Skip to content

Commit 4bdf5bb

Browse files
SvenGrootCopilot
andauthored
vmbus_server: always notify that the connection is modified when disconnecting (#1817)
This change fixes an issue where, if all channels are already reset when a disconnect happens, the server would not invoke `Notifier::modify_connection`. This means that the state such as the interrupt page and monitor pages is not reset, and in the case of OpenHCL the relay is not notified of the disconnect (which can leave host state intact, including monitor pages if MNF is handled by the host). This caused an issue where Linux would occasionally crash during resume from hibernate. When resuming, Linux makes two connections, first to read the memory image, and then to resume normal operations, both using MNF. When the first connection unloads, the overlay pages for the monitor pages were not removed until the reconnect, leading to memory corruption when Linux proceeds to use these pages as normal memory. This change also adds some tests ensuring the notifier is invoked for an unload with open channels, without open channels, and a forced disconnect when a new InitiateContact message is received. Cherry-picked from #1809 Co-authored-by: Copilot <[email protected]>
1 parent 48ff19c commit 4bdf5bb

File tree

1 file changed

+179
-27
lines changed

1 file changed

+179
-27
lines changed

vm/devices/vmbus/vmbus_server/src/channels.rs

Lines changed: 179 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -1912,19 +1912,7 @@ impl<'a, N: 'a + Notifier> ServerWithNotifier<'a, N> {
19121912
modify_sent: false,
19131913
} => {
19141914
if self.are_channels_reset(matches!(next_action, ConnectionAction::Reset)) {
1915-
self.inner.state = ConnectionState::Disconnecting {
1916-
next_action,
1917-
modify_sent: true,
1918-
};
1919-
1920-
// Reset server state and disconnect the relay if there is one.
1921-
self.notifier
1922-
.modify_connection(ModifyConnectionRequest {
1923-
monitor_page: Update::Reset,
1924-
interrupt_page: Update::Reset,
1925-
..Default::default()
1926-
})
1927-
.expect("resetting state should not fail");
1915+
self.notify_disconnect(next_action);
19281916
}
19291917
}
19301918
ConnectionState::Disconnecting {
@@ -1936,6 +1924,25 @@ impl<'a, N: 'a + Notifier> ServerWithNotifier<'a, N> {
19361924
}
19371925
}
19381926

1927+
/// Informs the notifier to reset the connection state when disconnecting.
1928+
fn notify_disconnect(&mut self, next_action: ConnectionAction) {
1929+
// Assert this on debug only because it is an expensive check if there are many channels.
1930+
debug_assert!(self.are_channels_reset(matches!(next_action, ConnectionAction::Reset)));
1931+
self.inner.state = ConnectionState::Disconnecting {
1932+
next_action,
1933+
modify_sent: true,
1934+
};
1935+
1936+
// Reset server state and disconnect the relay if there is one.
1937+
self.notifier
1938+
.modify_connection(ModifyConnectionRequest {
1939+
monitor_page: Update::Reset,
1940+
interrupt_page: Update::Reset,
1941+
..Default::default()
1942+
})
1943+
.expect("resetting state should not fail");
1944+
}
1945+
19391946
/// If true, the server is mid-reset and cannot take certain actions such
19401947
/// as handling synic messages or saving state.
19411948
fn is_resetting(&self) -> bool {
@@ -2413,7 +2420,7 @@ impl<'a, N: 'a + Notifier> ServerWithNotifier<'a, N> {
24132420

24142421
ConnectionState::Connected { .. } => {
24152422
if self.are_channels_reset(vm_reset) {
2416-
self.inner.state = ConnectionState::Disconnected;
2423+
self.notify_disconnect(new_action);
24172424
} else {
24182425
self.inner.state = ConnectionState::Disconnecting {
24192426
next_action: new_action,
@@ -3897,32 +3904,34 @@ mod tests {
38973904

38983905
#[test]
38993906
fn test_version_negotiation_feature_flags() {
3900-
let (mut notifier, _recv) = TestNotifier::new();
3901-
let mut server = Server::new(Vtl::Vtl0, MESSAGE_CONNECTION_ID, 0);
3907+
let mut env = TestEnv::new();
39023908

39033909
// Test with no feature flags.
39043910
let mut target_info = TargetInfo::new()
39053911
.with_sint(SINT)
39063912
.with_vtl(0)
39073913
.with_feature_flags(FeatureFlags::new().into());
39083914
test_initiate_contact(
3909-
&mut server,
3910-
&mut notifier,
3915+
&mut env.server,
3916+
&mut env.notifier,
39113917
Version::Copper as u32,
39123918
target_info.into(),
39133919
true,
39143920
0,
39153921
);
39163922

3923+
env.c().handle_unload();
3924+
env.complete_reset();
3925+
env.notifier.messages.clear();
39173926
// Request supported feature flags.
39183927
target_info.set_feature_flags(
39193928
FeatureFlags::new()
39203929
.with_guest_specified_signal_parameters(true)
39213930
.into(),
39223931
);
39233932
test_initiate_contact(
3924-
&mut server,
3925-
&mut notifier,
3933+
&mut env.server,
3934+
&mut env.notifier,
39263935
Version::Copper as u32,
39273936
target_info.into(),
39283937
true,
@@ -3931,14 +3940,17 @@ mod tests {
39313940
.into(),
39323941
);
39333942

3943+
env.c().handle_unload();
3944+
env.complete_reset();
3945+
env.notifier.messages.clear();
39343946
// Request unsupported feature flags. This will succeed and report back the supported ones.
39353947
target_info.set_feature_flags(
39363948
u32::from(FeatureFlags::new().with_guest_specified_signal_parameters(true))
39373949
| 0xf0000000,
39383950
);
39393951
test_initiate_contact(
3940-
&mut server,
3941-
&mut notifier,
3952+
&mut env.server,
3953+
&mut env.notifier,
39423954
Version::Copper as u32,
39433955
target_info.into(),
39443956
true,
@@ -3947,11 +3959,14 @@ mod tests {
39473959
.into(),
39483960
);
39493961

3962+
env.c().handle_unload();
3963+
env.complete_reset();
3964+
env.notifier.messages.clear();
39503965
// Verify client ID feature flag.
39513966
target_info.set_feature_flags(FeatureFlags::new().with_client_id(true).into());
39523967
test_initiate_contact(
3953-
&mut server,
3954-
&mut notifier,
3968+
&mut env.server,
3969+
&mut env.notifier,
39553970
Version::Copper as u32,
39563971
target_info.into(),
39573972
true,
@@ -4589,9 +4604,7 @@ mod tests {
45894604
self.server.with_notifier(&mut self.notifier)
45904605
}
45914606

4592-
// Completes a reset operation if the server send a modify request as part of it. This
4593-
// shouldn't be called if the server was not connected or had no open channels or gpadls
4594-
// during the reset.
4607+
// Completes a reset operation if the server sends a modify request as part of it.
45954608
fn complete_reset(&mut self) {
45964609
let _ = self.next_action();
45974610
self.c()
@@ -5045,6 +5058,7 @@ mod tests {
50455058
env.c().reset();
50465059
// We have to "complete" the connection to let the reset go through.
50475060
env.complete_connect();
5061+
env.complete_reset();
50485062
env.notifier.check_reset();
50495063

50505064
env.c().restore(state).unwrap();
@@ -5107,6 +5121,7 @@ mod tests {
51075121

51085122
let state = env.server.save();
51095123
env.c().reset();
5124+
env.complete_reset();
51105125
env.notifier.check_reset();
51115126

51125127
env.c().restore(state).unwrap();
@@ -5359,6 +5374,7 @@ mod tests {
53595374

53605375
// Reserved channels and gpadls should stay open across unloads
53615376
env.c().handle_unload();
5377+
env.complete_reset();
53625378

53635379
// Closing while disconnected should work
53645380
env.close_reserved(2, 2, SINT.into());
@@ -5417,6 +5433,7 @@ mod tests {
54175433
env.c().open_complete(offer_id1, 0);
54185434

54195435
env.c().handle_unload();
5436+
env.complete_reset();
54205437

54215438
// Reset while disconnected should cleanup reserved channels
54225439
// and complete disconnect automatically
@@ -5439,6 +5456,7 @@ mod tests {
54395456
env.c().open_complete(offer_id2, 0);
54405457

54415458
env.c().handle_unload();
5459+
env.complete_reset();
54425460

54435461
env.close_reserved(2, 2, SINT.into());
54445462
env.c().close_complete(offer_id2);
@@ -5776,4 +5794,138 @@ mod tests {
57765794
}
57775795
);
57785796
}
5797+
5798+
#[test]
5799+
fn test_disconnect() {
5800+
let mut env = TestEnv::new();
5801+
let _offer_id1 = env.offer(1);
5802+
let _offer_id2 = env.offer(2);
5803+
let _offer_id3 = env.offer(3);
5804+
5805+
env.connect(Version::Win10, FeatureFlags::new());
5806+
env.c().handle_request_offers().unwrap();
5807+
5808+
// Send unload message with all channels already closed.
5809+
env.c().handle_unload();
5810+
5811+
// Check that modify_connection was invoked on the notifier.
5812+
let req = env.notifier.next_action();
5813+
assert_eq!(
5814+
req,
5815+
ModifyConnectionRequest {
5816+
monitor_page: Update::Reset,
5817+
interrupt_page: Update::Reset,
5818+
..Default::default()
5819+
}
5820+
);
5821+
5822+
env.notifier.messages.clear();
5823+
env.c().complete_disconnect();
5824+
env.notifier
5825+
.check_message(OutgoingMessage::new(&protocol::UnloadComplete {}));
5826+
}
5827+
5828+
#[test]
5829+
fn test_disconnect_open_channels() {
5830+
let mut env = TestEnv::new();
5831+
let offer_id1 = env.offer(1);
5832+
let offer_id2 = env.offer(2);
5833+
let _offer_id3 = env.offer(3);
5834+
5835+
env.connect(Version::Win10, FeatureFlags::new());
5836+
env.c().handle_request_offers().unwrap();
5837+
5838+
// Open two channels.
5839+
env.open(1);
5840+
env.open(2);
5841+
5842+
env.c().open_complete(offer_id1, 0);
5843+
env.c().open_complete(offer_id2, 0);
5844+
5845+
// Send unload message with channels still open.
5846+
env.c().handle_unload();
5847+
5848+
assert!(env.notifier.modify_requests.is_empty());
5849+
5850+
// Unload will close the channels, so complete that operation.
5851+
env.c().close_complete(offer_id1);
5852+
env.c().close_complete(offer_id2);
5853+
5854+
// Modify connection will be invoked once all channels are closed.
5855+
let req = env.notifier.next_action();
5856+
assert_eq!(
5857+
req,
5858+
ModifyConnectionRequest {
5859+
monitor_page: Update::Reset,
5860+
interrupt_page: Update::Reset,
5861+
..Default::default()
5862+
}
5863+
);
5864+
5865+
env.notifier.messages.clear();
5866+
env.c().complete_disconnect();
5867+
env.notifier
5868+
.check_message(OutgoingMessage::new(&protocol::UnloadComplete {}));
5869+
}
5870+
5871+
#[test]
5872+
fn test_reinitiate_contact() {
5873+
let mut env = TestEnv::new();
5874+
let _offer_id1 = env.offer(1);
5875+
let _offer_id2 = env.offer(2);
5876+
let _offer_id3 = env.offer(3);
5877+
5878+
env.connect(Version::Win10, FeatureFlags::new());
5879+
env.c().handle_request_offers().unwrap();
5880+
env.notifier.messages.clear();
5881+
5882+
// Send a new InitiateContact message to force a disconnect without using reload.
5883+
let result = env.c().handle_synic_message(in_msg_ex(
5884+
protocol::MessageType::INITIATE_CONTACT,
5885+
protocol::InitiateContact {
5886+
version_requested: Version::Win10 as u32,
5887+
interrupt_page_or_target_info: TargetInfo::new().with_sint(SINT).with_vtl(0).into(),
5888+
child_to_parent_monitor_page_gpa: 0x123f000,
5889+
parent_to_child_monitor_page_gpa: 0x321f000,
5890+
..FromZeros::new_zeroed()
5891+
},
5892+
false,
5893+
false,
5894+
));
5895+
assert!(result.is_ok());
5896+
5897+
// We will first receive a request indicating the forced disconnect.
5898+
let req = env.notifier.next_action();
5899+
assert_eq!(
5900+
req,
5901+
ModifyConnectionRequest {
5902+
monitor_page: Update::Reset,
5903+
interrupt_page: Update::Reset,
5904+
..Default::default()
5905+
}
5906+
);
5907+
5908+
env.c().complete_disconnect();
5909+
5910+
// No UnloadComplete is sent in this case since Unload was not sent.
5911+
assert!(env.notifier.messages.is_empty());
5912+
5913+
// Now we receive the request for the new connection.
5914+
let req = env.notifier.next_action();
5915+
assert_eq!(
5916+
req,
5917+
ModifyConnectionRequest {
5918+
version: Some(Version::Win10 as u32),
5919+
monitor_page: Update::Set(MonitorPageGpas {
5920+
child_to_parent: 0x123f000,
5921+
parent_to_child: 0x321f000,
5922+
}),
5923+
interrupt_page: Update::Reset,
5924+
target_message_vp: Some(0),
5925+
..Default::default()
5926+
}
5927+
);
5928+
5929+
env.complete_connect();
5930+
}
57795931
}

0 commit comments

Comments
 (0)