Skip to content

Commit 8389b67

Browse files
SvenGrootCopilot
andcommitted
vmbus_server: always notify that the connection is modified when disconnecting (microsoft#1809)
This change fixes an issue where, if all channels are already reset when a disconnect happens, the server would not invoke `Notifier::modify_connection`. This means that the state such as the interrupt page and monitor pages is not reset, and in the case of OpenHCL the relay is not notified of the disconnect (which can leave host state intact, including monitor pages if MNF is handled by the host). This caused an issue where Linux would occasionally crash during resume from hibernate. When resuming, Linux makes two connections, first to read the memory image, and then to resume normal operations, both using MNF. When the first connection unloads, the overlay pages for the monitor pages were not removed until the reconnect, leading to memory corruption when Linux proceeds to use these pages as normal memory. This change also adds some tests ensuring the notifier is invoked for an unload with open channels, without open channels, and a forced disconnect when a new InitiateContact message is received. --------- Co-authored-by: Copilot <[email protected]>
1 parent d961b6a commit 8389b67

File tree

1 file changed

+179
-27
lines changed

1 file changed

+179
-27
lines changed

vm/devices/vmbus/vmbus_server/src/channels.rs

Lines changed: 179 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -1755,19 +1755,7 @@ impl<'a, N: 'a + Notifier> ServerWithNotifier<'a, N> {
17551755
modify_sent: false,
17561756
} => {
17571757
if self.are_channels_reset(matches!(next_action, ConnectionAction::Reset)) {
1758-
self.inner.state = ConnectionState::Disconnecting {
1759-
next_action,
1760-
modify_sent: true,
1761-
};
1762-
1763-
// Reset server state and disconnect the relay if there is one.
1764-
self.notifier
1765-
.modify_connection(ModifyConnectionRequest {
1766-
monitor_page: Update::Reset,
1767-
interrupt_page: Update::Reset,
1768-
..Default::default()
1769-
})
1770-
.expect("resetting state should not fail");
1758+
self.notify_disconnect(next_action);
17711759
}
17721760
}
17731761
ConnectionState::Disconnecting {
@@ -1779,6 +1767,25 @@ impl<'a, N: 'a + Notifier> ServerWithNotifier<'a, N> {
17791767
}
17801768
}
17811769

1770+
/// Informs the notifier to reset the connection state when disconnecting.
1771+
fn notify_disconnect(&mut self, next_action: ConnectionAction) {
1772+
// Assert this on debug only because it is an expensive check if there are many channels.
1773+
debug_assert!(self.are_channels_reset(matches!(next_action, ConnectionAction::Reset)));
1774+
self.inner.state = ConnectionState::Disconnecting {
1775+
next_action,
1776+
modify_sent: true,
1777+
};
1778+
1779+
// Reset server state and disconnect the relay if there is one.
1780+
self.notifier
1781+
.modify_connection(ModifyConnectionRequest {
1782+
monitor_page: Update::Reset,
1783+
interrupt_page: Update::Reset,
1784+
..Default::default()
1785+
})
1786+
.expect("resetting state should not fail");
1787+
}
1788+
17821789
/// If true, the server is mid-reset and cannot take certain actions such
17831790
/// as handling synic messages or saving state.
17841791
fn is_resetting(&self) -> bool {
@@ -2246,7 +2253,7 @@ impl<'a, N: 'a + Notifier> ServerWithNotifier<'a, N> {
22462253

22472254
ConnectionState::Connected { .. } => {
22482255
if self.are_channels_reset(vm_reset) {
2249-
self.inner.state = ConnectionState::Disconnected;
2256+
self.notify_disconnect(new_action);
22502257
} else {
22512258
self.inner.state = ConnectionState::Disconnecting {
22522259
next_action: new_action,
@@ -3613,32 +3620,34 @@ mod tests {
36133620

36143621
#[test]
36153622
fn test_version_negotiation_feature_flags() {
3616-
let (mut notifier, _recv) = TestNotifier::new();
3617-
let mut server = Server::new(Vtl::Vtl0, MESSAGE_CONNECTION_ID, 0);
3623+
let mut env = TestEnv::new();
36183624

36193625
// Test with no feature flags.
36203626
let mut target_info = TargetInfo::new()
36213627
.with_sint(SINT)
36223628
.with_vtl(0)
36233629
.with_feature_flags(FeatureFlags::new().into());
36243630
test_initiate_contact(
3625-
&mut server,
3626-
&mut notifier,
3631+
&mut env.server,
3632+
&mut env.notifier,
36273633
Version::Copper as u32,
36283634
target_info.into(),
36293635
true,
36303636
0,
36313637
);
36323638

3639+
env.c().handle_unload();
3640+
env.complete_reset();
3641+
env.notifier.messages.clear();
36333642
// Request supported feature flags.
36343643
target_info.set_feature_flags(
36353644
FeatureFlags::new()
36363645
.with_guest_specified_signal_parameters(true)
36373646
.into(),
36383647
);
36393648
test_initiate_contact(
3640-
&mut server,
3641-
&mut notifier,
3649+
&mut env.server,
3650+
&mut env.notifier,
36423651
Version::Copper as u32,
36433652
target_info.into(),
36443653
true,
@@ -3647,14 +3656,17 @@ mod tests {
36473656
.into(),
36483657
);
36493658

3659+
env.c().handle_unload();
3660+
env.complete_reset();
3661+
env.notifier.messages.clear();
36503662
// Request unsupported feature flags. This will succeed and report back the supported ones.
36513663
target_info.set_feature_flags(
36523664
u32::from(FeatureFlags::new().with_guest_specified_signal_parameters(true))
36533665
| 0xf0000000,
36543666
);
36553667
test_initiate_contact(
3656-
&mut server,
3657-
&mut notifier,
3668+
&mut env.server,
3669+
&mut env.notifier,
36583670
Version::Copper as u32,
36593671
target_info.into(),
36603672
true,
@@ -3663,11 +3675,14 @@ mod tests {
36633675
.into(),
36643676
);
36653677

3678+
env.c().handle_unload();
3679+
env.complete_reset();
3680+
env.notifier.messages.clear();
36663681
// Verify client ID feature flag.
36673682
target_info.set_feature_flags(FeatureFlags::new().with_client_id(true).into());
36683683
test_initiate_contact(
3669-
&mut server,
3670-
&mut notifier,
3684+
&mut env.server,
3685+
&mut env.notifier,
36713686
Version::Copper as u32,
36723687
target_info.into(),
36733688
true,
@@ -4298,9 +4313,7 @@ mod tests {
42984313
self.server.with_notifier(&mut self.notifier)
42994314
}
43004315

4301-
// Completes a reset operation if the server send a modify request as part of it. This
4302-
// shouldn't be called if the server was not connected or had no open channels or gpadls
4303-
// during the reset.
4316+
// Completes a reset operation if the server sends a modify request as part of it.
43044317
fn complete_reset(&mut self) {
43054318
let _ = self.next_action();
43064319
self.c()
@@ -4745,6 +4758,7 @@ mod tests {
47454758
env.c().reset();
47464759
// We have to "complete" the connection to let the reset go through.
47474760
env.complete_connect();
4761+
env.complete_reset();
47484762
env.notifier.check_reset();
47494763

47504764
env.server.restore(state).unwrap();
@@ -4809,6 +4823,7 @@ mod tests {
48094823

48104824
let state = env.server.save();
48114825
env.c().reset();
4826+
env.complete_reset();
48124827
env.notifier.check_reset();
48134828
env.server.restore(state).unwrap();
48144829
env.c().post_restore().unwrap();
@@ -4952,6 +4967,7 @@ mod tests {
49524967

49534968
// Reserved channels and gpadls should stay open across unloads
49544969
env.c().handle_unload();
4970+
env.complete_reset();
49554971

49564972
// Closing while disconnected should work
49574973
env.close_reserved(2, 2, SINT.into());
@@ -5010,6 +5026,7 @@ mod tests {
50105026
env.c().open_complete(offer_id1, 0);
50115027

50125028
env.c().handle_unload();
5029+
env.complete_reset();
50135030

50145031
// Reset while disconnected should cleanup reserved channels
50155032
// and complete disconnect automatically
@@ -5032,6 +5049,7 @@ mod tests {
50325049
env.c().open_complete(offer_id2, 0);
50335050

50345051
env.c().handle_unload();
5052+
env.complete_reset();
50355053

50365054
env.close_reserved(2, 2, SINT.into());
50375055
env.c().close_complete(offer_id2);
@@ -5369,4 +5387,138 @@ mod tests {
53695387
}
53705388
);
53715389
}
5390+
5391+
#[test]
5392+
fn test_disconnect() {
5393+
let mut env = TestEnv::new();
5394+
let _offer_id1 = env.offer(1);
5395+
let _offer_id2 = env.offer(2);
5396+
let _offer_id3 = env.offer(3);
5397+
5398+
env.connect(Version::Win10, FeatureFlags::new());
5399+
env.c().handle_request_offers().unwrap();
5400+
5401+
// Send unload message with all channels already closed.
5402+
env.c().handle_unload();
5403+
5404+
// Check that modify_connection was invoked on the notifier.
5405+
let req = env.notifier.next_action();
5406+
assert_eq!(
5407+
req,
5408+
ModifyConnectionRequest {
5409+
monitor_page: Update::Reset,
5410+
interrupt_page: Update::Reset,
5411+
..Default::default()
5412+
}
5413+
);
5414+
5415+
env.notifier.messages.clear();
5416+
env.c().complete_disconnect();
5417+
env.notifier
5418+
.check_message(OutgoingMessage::new(&protocol::UnloadComplete {}));
5419+
}
5420+
5421+
#[test]
5422+
fn test_disconnect_open_channels() {
5423+
let mut env = TestEnv::new();
5424+
let offer_id1 = env.offer(1);
5425+
let offer_id2 = env.offer(2);
5426+
let _offer_id3 = env.offer(3);
5427+
5428+
env.connect(Version::Win10, FeatureFlags::new());
5429+
env.c().handle_request_offers().unwrap();
5430+
5431+
// Open two channels.
5432+
env.open(1);
5433+
env.open(2);
5434+
5435+
env.c().open_complete(offer_id1, 0);
5436+
env.c().open_complete(offer_id2, 0);
5437+
5438+
// Send unload message with channels still open.
5439+
env.c().handle_unload();
5440+
5441+
assert!(env.notifier.modify_requests.is_empty());
5442+
5443+
// Unload will close the channels, so complete that operation.
5444+
env.c().close_complete(offer_id1);
5445+
env.c().close_complete(offer_id2);
5446+
5447+
// Modify connection will be invoked once all channels are closed.
5448+
let req = env.notifier.next_action();
5449+
assert_eq!(
5450+
req,
5451+
ModifyConnectionRequest {
5452+
monitor_page: Update::Reset,
5453+
interrupt_page: Update::Reset,
5454+
..Default::default()
5455+
}
5456+
);
5457+
5458+
env.notifier.messages.clear();
5459+
env.c().complete_disconnect();
5460+
env.notifier
5461+
.check_message(OutgoingMessage::new(&protocol::UnloadComplete {}));
5462+
}
5463+
5464+
#[test]
5465+
fn test_reinitiate_contact() {
5466+
let mut env = TestEnv::new();
5467+
let _offer_id1 = env.offer(1);
5468+
let _offer_id2 = env.offer(2);
5469+
let _offer_id3 = env.offer(3);
5470+
5471+
env.connect(Version::Win10, FeatureFlags::new());
5472+
env.c().handle_request_offers().unwrap();
5473+
env.notifier.messages.clear();
5474+
5475+
// Send a new InitiateContact message to force a disconnect without using reload.
5476+
let result = env.c().handle_synic_message(in_msg_ex(
5477+
protocol::MessageType::INITIATE_CONTACT,
5478+
protocol::InitiateContact {
5479+
version_requested: Version::Win10 as u32,
5480+
interrupt_page_or_target_info: TargetInfo::new().with_sint(SINT).with_vtl(0).into(),
5481+
child_to_parent_monitor_page_gpa: 0x123f000,
5482+
parent_to_child_monitor_page_gpa: 0x321f000,
5483+
..FromZeros::new_zeroed()
5484+
},
5485+
false,
5486+
false,
5487+
));
5488+
assert!(result.is_ok());
5489+
5490+
// We will first receive a request indicating the forced disconnect.
5491+
let req = env.notifier.next_action();
5492+
assert_eq!(
5493+
req,
5494+
ModifyConnectionRequest {
5495+
monitor_page: Update::Reset,
5496+
interrupt_page: Update::Reset,
5497+
..Default::default()
5498+
}
5499+
);
5500+
5501+
env.c().complete_disconnect();
5502+
5503+
// No UnloadComplete is sent in this case since Unload was not sent.
5504+
assert!(env.notifier.messages.is_empty());
5505+
5506+
// Now we receive the request for the new connection.
5507+
let req = env.notifier.next_action();
5508+
assert_eq!(
5509+
req,
5510+
ModifyConnectionRequest {
5511+
version: Some(Version::Win10 as u32),
5512+
monitor_page: Update::Set(MonitorPageGpas {
5513+
child_to_parent: 0x123f000,
5514+
parent_to_child: 0x321f000,
5515+
}),
5516+
interrupt_page: Update::Reset,
5517+
target_message_vp: Some(0),
5518+
..Default::default()
5519+
}
5520+
);
5521+
5522+
env.complete_connect();
5523+
}
53725524
}

0 commit comments

Comments
 (0)