Skip to content

Commit 4bccee1

Browse files
committed
Merge branch 'main' into contig-mem-from-chris
2 parents 0579d85 + 90d4041 commit 4bccee1

File tree

13 files changed

+1262
-635
lines changed

13 files changed

+1262
-635
lines changed

petri/src/vm/hyperv/powershell.rs

Lines changed: 7 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -722,14 +722,14 @@ const HYPERV_VMMS_TABLE: &str = "Microsoft-Windows-Hyper-V-VMMS-Admin";
722722

723723
/// Get Hyper-V event logs for a VM
724724
pub async fn hyperv_event_logs(
725-
vmid: &Guid,
725+
vmid: Option<&Guid>,
726726
start_time: &Timestamp,
727727
) -> anyhow::Result<Vec<WinEvent>> {
728-
let vmid = vmid.to_string();
728+
let vmid = vmid.map(|id| id.to_string());
729729
run_get_winevent(
730730
&[HYPERV_WORKER_TABLE, HYPERV_VMMS_TABLE],
731731
Some(start_time),
732-
Some(&vmid),
732+
vmid.as_deref(),
733733
&[],
734734
)
735735
.await
@@ -801,8 +801,10 @@ pub const MSVM_TRIPLE_FAULT_INVALID_VP_REGISTER_ERROR: u32 = 18550;
801801
pub const MSVM_TRIPLE_FAULT_UNRECOVERABLE_EXCEPTION_ERROR: u32 = 18560;
802802
/// The vm was hibernated successfully.
803803
pub const MSVM_GUEST_HIBERNATE_SUCCESS: u32 = 18608;
804+
/// The vm has quit unexpectedly (the worker process terminated).
805+
pub const MSVM_VMMS_VM_TERMINATE_ERROR: u32 = 14070;
804806

805-
const HALT_EVENT_IDS: [u32; 12] = [
807+
const HALT_EVENT_IDS: [u32; 13] = [
806808
MSVM_HOST_STOP_SUCCESS,
807809
MSVM_HOST_SHUTDOWN_SUCCESS,
808810
MSVM_GUEST_SHUTDOWN_SUCCESS,
@@ -815,6 +817,7 @@ const HALT_EVENT_IDS: [u32; 12] = [
815817
MSVM_TRIPLE_FAULT_INVALID_VP_REGISTER_ERROR,
816818
MSVM_TRIPLE_FAULT_UNRECOVERABLE_EXCEPTION_ERROR,
817819
MSVM_GUEST_HIBERNATE_SUCCESS,
820+
MSVM_VMMS_VM_TERMINATE_ERROR,
818821
];
819822

820823
/// Get Hyper-V halt event logs for a VM

petri/src/vm/hyperv/vm.rs

Lines changed: 52 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -46,6 +46,7 @@ pub struct HyperVVM {
4646
// state
4747
destroyed: bool,
4848
last_start_time: Option<Timestamp>,
49+
last_log_flushed: Option<Timestamp>,
4950
}
5051

5152
impl HyperVVM {
@@ -126,6 +127,7 @@ impl HyperVVM {
126127
driver,
127128
destroyed: false,
128129
last_start_time: None,
130+
last_log_flushed: None,
129131
};
130132

131133
// Remove the default network adapter
@@ -172,25 +174,35 @@ impl HyperVVM {
172174
}
173175

174176
/// Get Hyper-V logs and write them to the log file
175-
pub async fn flush_logs(&self) -> anyhow::Result<()> {
176-
for event in powershell::hyperv_event_logs(&self.vmid, &self.create_time).await? {
177-
self.log_file.write_entry_fmt(
178-
Some(event.time_created),
179-
match event.level {
180-
1 | 2 => Level::ERROR,
181-
3 => Level::WARN,
182-
5 => Level::TRACE,
183-
_ => Level::INFO,
184-
},
185-
format_args!(
186-
"[{}] {}: ({}, {}) {}",
187-
event.time_created, event.provider_name, event.level, event.id, event.message,
188-
),
189-
);
177+
pub async fn flush_logs(&mut self) -> anyhow::Result<()> {
178+
let start_time = self.last_log_flushed.as_ref().unwrap_or(&self.create_time);
179+
for event in powershell::hyperv_event_logs(Some(&self.vmid), start_time).await? {
180+
self.log_winevent(&event);
181+
if self.last_log_flushed.is_none_or(|t| t < event.time_created) {
182+
// add 1ms to avoid duplicate log entries
183+
self.last_log_flushed =
184+
Some(event.time_created.checked_add(Duration::from_millis(1))?);
185+
}
190186
}
191187
Ok(())
192188
}
193189

190+
fn log_winevent(&self, event: &powershell::WinEvent) {
191+
self.log_file.write_entry_fmt(
192+
Some(event.time_created),
193+
match event.level {
194+
1 | 2 => Level::ERROR,
195+
3 => Level::WARN,
196+
5 => Level::TRACE,
197+
_ => Level::INFO,
198+
},
199+
format_args!(
200+
"[{}] {}: ({}, {}) {}",
201+
event.time_created, event.provider_name, event.level, event.id, event.message,
202+
),
203+
);
204+
}
205+
194206
/// Waits for an event emitted by the firmware about its boot status, and
195207
/// returns that status.
196208
pub async fn wait_for_boot_event(&mut self) -> anyhow::Result<FirmwareEvent> {
@@ -377,6 +389,7 @@ impl HyperVVM {
377389

378390
let (halt_reason, timestamp) = self.wait_for_some(Self::halt_event).await?;
379391
if halt_reason == PetriHaltReason::Reset {
392+
// add 1ms to avoid getting the same event again
380393
self.last_start_time = Some(timestamp.checked_add(Duration::from_millis(1))?);
381394
}
382395
Ok(halt_reason)
@@ -394,6 +407,19 @@ impl HyperVVM {
394407
}
395408
let event = events.first();
396409

410+
if event.is_some_and(|e| e.id == powershell::MSVM_VMMS_VM_TERMINATE_ERROR) {
411+
tracing::error!("hyper-v worker process crashed");
412+
// Get 5 seconds of non-vm-specific hyper-v logs preceding the crash
413+
const COLLECT_HYPERV_CRASH_LOGS_SECONDS: u64 = 5;
414+
let start_time = event
415+
.unwrap()
416+
.time_created
417+
.checked_sub(Duration::from_secs(COLLECT_HYPERV_CRASH_LOGS_SECONDS))?;
418+
for event in powershell::hyperv_event_logs(None, &start_time).await? {
419+
self.log_winevent(&event);
420+
}
421+
}
422+
397423
event
398424
.map(|e| {
399425
Ok((
@@ -411,7 +437,8 @@ impl HyperVVM {
411437
| powershell::MSVM_TRIPLE_FAULT_UNRECOVERABLE_EXCEPTION_ERROR => {
412438
PetriHaltReason::TripleFault
413439
}
414-
powershell::MSVM_STOP_CRITICAL_SUCCESS => PetriHaltReason::Other,
440+
powershell::MSVM_STOP_CRITICAL_SUCCESS
441+
| powershell::MSVM_VMMS_VM_TERMINATE_ERROR => PetriHaltReason::Other,
415442
id => anyhow::bail!("Unexpected event id: {id}"),
416443
},
417444
e.time_created,
@@ -421,7 +448,7 @@ impl HyperVVM {
421448
}
422449

423450
/// Wait for the VM shutdown ic
424-
pub async fn wait_for_enlightened_shutdown_ready(&self) -> anyhow::Result<()> {
451+
pub async fn wait_for_enlightened_shutdown_ready(&mut self) -> anyhow::Result<()> {
425452
self.wait_for(Self::shutdown_ic_status, powershell::VmShutdownIcStatus::Ok)
426453
.await
427454
.context("wait_for_enlightened_shutdown_ready")
@@ -440,10 +467,14 @@ impl HyperVVM {
440467
}
441468

442469
async fn wait_for<T: std::fmt::Debug + PartialEq>(
443-
&self,
470+
&mut self,
444471
f: impl AsyncFn(&Self) -> anyhow::Result<T>,
445472
target: T,
446473
) -> anyhow::Result<()> {
474+
// flush the logs every time we start waiting for something in case
475+
// they don't get flushed when the VM is destroyed.
476+
// TODO: run this periodically in a task.
477+
self.flush_logs().await?;
447478
loop {
448479
let state = f(self).await?;
449480
if state == target {
@@ -458,9 +489,11 @@ impl HyperVVM {
458489
}
459490

460491
async fn wait_for_some<T: std::fmt::Debug + PartialEq>(
461-
&self,
492+
&mut self,
462493
f: impl AsyncFn(&Self) -> anyhow::Result<Option<T>>,
463494
) -> anyhow::Result<T> {
495+
// see comment in `wait_for` above
496+
self.flush_logs().await?;
464497
loop {
465498
let state = f(self).await?;
466499
if let Some(state) = state {

0 commit comments

Comments
 (0)