From b744d2dd0c25205e0ee86c1b1f5163e3e687c9c6 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Mon, 8 Jun 2026 19:19:15 +0100 Subject: [PATCH 1/2] Fix flaky TestPartialStateJoin/Outgoing_device_list_updates from a leave-handler race `WithWaitForLeave` registered a PDU handler (whose callback feeds `leaveChannel`) but then decided whether to wait by peeking at `room.CurrentState`. Because an incoming PDU is added to the room (`HandleTransactionRequests` calls `room.AddEvent`, updating `CurrentState`) *before* the PDU callback runs, the test goroutine could observe the updated state, take the "already seen" shortcut and return, firing the deferred `removePDUHandler()` in the window before the callback ran. The now-handler-less callback then flagged the (expected) leave as an unexpected PDU, failing the test. Wait on the channel fed by the handler instead of polling `room.CurrentState`, so the handler stays registered until its own callback has run. Every caller passes a joined user and an action that performs the leave, so the leave always arrives fresh via that callback. Make this contract explicit: snapshot membership before the action and fail if the user has already left (rather than silently treating it as "already seen"), and fail on timeout, since neither should happen in correct usage. The action still runs in the already-left case so any cleanup it performs is preserved. The `HandleTransactionRequests` ordering (callback after `room.AddEvent`) is left unchanged, as other tests rely on it (e.g. `federation_redaction_test.go` reads `serverRoom.Timeline` immediately after a callback-driven waiter). --- ...federation_room_join_partial_state_test.go | 37 ++++++++++++------- 1 file changed, 24 insertions(+), 13 deletions(-) diff --git a/tests/msc3902/federation_room_join_partial_state_test.go b/tests/msc3902/federation_room_join_partial_state_test.go index 9e11b163d..3bbda80ae 100644 --- a/tests/msc3902/federation_room_join_partial_state_test.go +++ b/tests/msc3902/federation_room_join_partial_state_test.go @@ -141,23 +141,34 @@ func (s *server) WithWaitForLeave( ) defer removePDUHandler() - leaveAction() - + // `WithWaitForLeave` expects the user to be in the room and to leave as a + // result of the action, so that the leave arrives as a PDU our handler + // matches. Snapshot the membership *before* the action: if the user has + // already left, no such PDU is coming, which is a test bug rather than + // something to wait for. memberEvent := room.CurrentState("m.room.member", userID) - membership := "" + membership := "leave" if memberEvent != nil { membership, _ = memberEvent.Membership() } - if membership == "leave" { - t.Logf("%s has already seen %s leave test room %s.", s.ServerName(), userID, room.RoomID) - } else { - select { - case <-leaveChannel: - t.Logf("%s saw %s leave test room %s.", s.ServerName(), userID, room.RoomID) - break - case <-time.After(1 * time.Second): - t.Errorf("%s timed out waiting for %s to leave test room %s.", s.ServerName(), userID, room.RoomID) - } + alreadyLeft := membership == "leave" + if alreadyLeft { + t.Errorf("%s: %s had already left test room %s before WithWaitForLeave ran.", s.ServerName(), userID, room.RoomID) + } + + leaveAction() + + // Wait on the channel fed by our PDU handler rather than polling + // `room.CurrentState`: the room's current state is updated (by + // `room.AddEvent`) *before* the PDU callback runs, so returning on a + // `CurrentState` check could deregister our handler in the window before + // the callback fires, making the (expected) leave look unexpected to + // `HandleTransactionRequests`. + select { + case <-leaveChannel: + t.Logf("%s saw %s leave test room %s.", s.ServerName(), userID, room.RoomID) + case <-time.After(1 * time.Second): + t.Errorf("%s timed out waiting for %s to leave test room %s.", s.ServerName(), userID, room.RoomID) } } From b48b4ebe15922c0873039e41525839073d855fc7 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Tue, 9 Jun 2026 20:59:46 +0100 Subject: [PATCH 2/2] Update tests/msc3902/federation_room_join_partial_state_test.go Co-authored-by: Eric Eastwood --- tests/msc3902/federation_room_join_partial_state_test.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/msc3902/federation_room_join_partial_state_test.go b/tests/msc3902/federation_room_join_partial_state_test.go index 3bbda80ae..0557fdd87 100644 --- a/tests/msc3902/federation_room_join_partial_state_test.go +++ b/tests/msc3902/federation_room_join_partial_state_test.go @@ -153,7 +153,7 @@ func (s *server) WithWaitForLeave( } alreadyLeft := membership == "leave" if alreadyLeft { - t.Errorf("%s: %s had already left test room %s before WithWaitForLeave ran.", s.ServerName(), userID, room.RoomID) + t.Fatalf("%s: %s had already left test room %s before WithWaitForLeave ran.", s.ServerName(), userID, room.RoomID) } leaveAction()