From e8692933db6d98ba95868d23dd7a45e7a0a39af1 Mon Sep 17 00:00:00 2001 From: Olivier 'reivilibre Date: Fri, 24 Apr 2026 11:53:21 +0100 Subject: [PATCH] Check state before leaving, to avoid race condition --- federation/handle.go | 2 + ...federation_room_join_partial_state_test.go | 44 ++++++++++++------- 2 files changed, 31 insertions(+), 15 deletions(-) diff --git a/federation/handle.go b/federation/handle.go index eaaba9b7..6ce5b1dd 100644 --- a/federation/handle.go +++ b/federation/handle.go @@ -568,6 +568,8 @@ func HandleTransactionRequests(pduCallback func(gomatrixserverlib.PDU), eduCallb // Add this PDU as a success to the response response.PDUs[event.EventID()] = fclient.PDUResult{} + /////// <--- Interrupted + // Run the PDU callback function with this event if pduCallback != nil { pduCallback(event) diff --git a/tests/msc3902/federation_room_join_partial_state_test.go b/tests/msc3902/federation_room_join_partial_state_test.go index 9e11b163..82b35f1d 100644 --- a/tests/msc3902/federation_room_join_partial_state_test.go +++ b/tests/msc3902/federation_room_join_partial_state_test.go @@ -128,6 +128,10 @@ func (s *server) WithWaitForLeave( t *testing.T, room *federation.ServerRoom, userID string, leaveAction func(), ) { leaveChannel := make(chan gomatrixserverlib.PDU, 10) + + // Register a PDU handler to make it so that we 'expect' the leave event + // Without this PDU handler being registered, the test fails upon encountering + // the unexpected leave event. removePDUHandler := s.AddPDUHandler( func(e gomatrixserverlib.PDU) bool { if membership, _ := e.Membership(); e.Type() == "m.room.member" && @@ -141,24 +145,34 @@ func (s *server) WithWaitForLeave( ) defer removePDUHandler() - leaveAction() - + // HAZARD: Check state _before_ triggering the `leaveAction`. + // + // If we trigger the `leaveAction` first, it's possible for the leave to proceed quickly enough that + // the current state gets updated _before_ we start waiting for the PDU handler to fire, + // leading us to remove the PDU handler and trigger the 'unhandled PDU' test failure. memberEvent := room.CurrentState("m.room.member", userID) - membership := "" - if memberEvent != nil { - membership, _ = memberEvent.Membership() - } - if membership == "leave" { - t.Logf("%s has already seen %s leave test room %s.", s.ServerName(), userID, room.RoomID) + if memberEvent == nil { + // NOTE: despite the test server not having a view of the membership yet, the + // `TestPartialStateJoin/Outgoing_device_list_updates/Device_list_updates_reach_newly_joined_servers_in_partial_state_rooms` + // test still relies on the `leaveAction` being triggered here. + t.Logf("WithWaitForLeave: %s has no membership in room %s, from the perspective of %s.", userID, room.RoomID, s.ServerName()) } else { - select { - case <-leaveChannel: - t.Logf("%s saw %s leave test room %s.", s.ServerName(), userID, room.RoomID) - break - case <-time.After(1 * time.Second): - t.Errorf("%s timed out waiting for %s to leave test room %s.", s.ServerName(), userID, room.RoomID) + membership, _ := memberEvent.Membership() + if membership == "leave" { + t.Logf("WithWaitForLeave: %s has already left test room %s, from the perspective of %s.", userID, room.RoomID, s.ServerName()) + return } } + + leaveAction() + + select { + case <-leaveChannel: + t.Logf("%s saw %s leave test room %s.", s.ServerName(), userID, room.RoomID) + break + case <-time.After(1 * time.Second): + t.Errorf("%s timed out waiting for %s to leave test room %s.", s.ServerName(), userID, room.RoomID) + } } // Wait for the server to receive the event with given event ID. @@ -2232,7 +2246,7 @@ func TestPartialStateJoin(t *testing.T) { // test that device list updates are sent to the remote homeservers listed in the // `/send_join` response in a room with partial state. - t.Run("Device list updates reach all servers in partial state rooms", func(t *testing.T) { + t.Run("Device list updates reach all servers in partial state rooms", func(t *testing.T) { // FAILS alice, server1, server2, deviceListUpdateChannel1, deviceListUpdateChannel2, room, cleanup := setupOutgoingDeviceListUpdateTest(t, deployment, "t23alice") defer cleanup()