From 3d011eae8c54ffd33b1f10f62afbc02cb69bec56 Mon Sep 17 00:00:00 2001 From: Srinivas Shekar Date: Thu, 2 Oct 2025 08:56:47 +0000 Subject: [PATCH 1/4] Updated the log type to info when there is a lack of h/w resources --- .../disk_nvme/nvme_driver/src/driver.rs | 66 ++++++++++++++----- 1 file changed, 51 insertions(+), 15 deletions(-) diff --git a/vm/devices/storage/disk_nvme/nvme_driver/src/driver.rs b/vm/devices/storage/disk_nvme/nvme_driver/src/driver.rs index c174340a17..32c6eb3a37 100644 --- a/vm/devices/storage/disk_nvme/nvme_driver/src/driver.rs +++ b/vm/devices/storage/disk_nvme/nvme_driver/src/driver.rs @@ -103,6 +103,23 @@ pub enum RestoreError { InvalidData, } +#[derive(Debug, Error)] +pub enum DeviceError { + #[error("no more io queues available, reached maximum {0}")] + NoMoreIoQueues(usize), + #[error("failed to map interrupt")] + InterruptMapFailure, + #[error("failed to create io queue pair {0}")] + IoQueuePairCreationFailure(u16), + #[error("failed to create io completion queue {0}")] + IoCompletionQueueFailure(u16), + #[error("failed to create io submission queue {0}")] + IoSubmissionQueueFailure(u16), + // Other device related errors + #[error(transparent)] + Other(anyhow::Error), +} + #[derive(Inspect)] struct IoQueue { queue: QueuePair, @@ -859,7 +876,7 @@ impl DriverWorkerTask { .await { Ok(issuer) => issuer, - Err(err) => { + Err(err) => { // Find a fallback queue close in index to the failed queue. let (fallback_cpu, fallback) = self.io_issuers.per_cpu[..cpu as usize] .iter() @@ -868,14 +885,29 @@ impl DriverWorkerTask { .find_map(|(i, issuer)| issuer.get().map(|issuer| (i, issuer))) .unwrap(); - tracing::error!( - cpu, - fallback_cpu, - error = err.as_ref() as &dyn std::error::Error, - "failed to create io queue, falling back" - ); + // Log the error as informational only when there is a lack of + // hardware resources from the device. + match err { + DeviceError::NoMoreIoQueues(_) => { + tracing::info!( + cpu, + fallback_cpu, + error = &err as &dyn std::error::Error, + "failed to create io queue, falling back" + ); + } + _ => { + tracing::info!( + cpu, + fallback_cpu, + error = &err as &dyn std::error::Error, + "failed to create io queue, falling back" + ); + } + } + fallback.clone() - } + }, }; self.io_issuers.per_cpu[cpu as usize] @@ -888,9 +920,9 @@ impl DriverWorkerTask { &mut self, state: &mut WorkerState, cpu: u32, - ) -> anyhow::Result { + ) -> Result { if self.io.len() >= state.max_io_queues as usize { - anyhow::bail!("no more io queues available"); + return Err(DeviceError::NoMoreIoQueues(state.max_io_queues as usize)); } let qid = self.io.len() as u16 + 1; @@ -902,7 +934,8 @@ impl DriverWorkerTask { let interrupt = self .device .map_interrupt(iv.into(), cpu) - .context("failed to map interrupt")?; + .context("failed to map interrupt") + .map_err(|_| DeviceError::InterruptMapFailure)?; let queue = QueuePair::new( self.driver.clone(), @@ -914,7 +947,8 @@ impl DriverWorkerTask { self.registers.clone(), self.bounce_buffer, ) - .with_context(|| format!("failed to create io queue pair {qid}"))?; + .with_context(|| format!("failed to create io queue pair {qid}")) + .map_err(|_| DeviceError::IoQueuePairCreationFailure(qid))?; let io_sq_addr = queue.sq_addr(); let io_cq_addr = queue.cq_addr(); @@ -943,7 +977,8 @@ impl DriverWorkerTask { ..admin_cmd(spec::AdminOpcode::CREATE_IO_COMPLETION_QUEUE) }) .await - .with_context(|| format!("failed to create io completion queue {qid}"))?; + .with_context(|| format!("failed to create io completion queue {qid}")) + .map_err(|_| DeviceError::IoCompletionQueueFailure(qid))?; created_completion_queue = true; @@ -961,7 +996,8 @@ impl DriverWorkerTask { ..admin_cmd(spec::AdminOpcode::CREATE_IO_SUBMISSION_QUEUE) }) .await - .with_context(|| format!("failed to create io submission queue {qid}"))?; + .with_context(|| format!("failed to create io submission queue {qid}")) + .map_err(|_| DeviceError::IoSubmissionQueueFailure(qid))?; Ok(()) }; @@ -983,7 +1019,7 @@ impl DriverWorkerTask { } let io = self.io.pop().unwrap(); io.queue.shutdown().await; - return Err(err); + return Err(DeviceError::Other(err)); } Ok(IoIssuer { From 7a2c31cc9ed0766ff40ba5dd36188618349cf82f Mon Sep 17 00:00:00 2001 From: SrinivasShekar Date: Fri, 3 Oct 2025 13:18:29 +0000 Subject: [PATCH 2/4] Addressed review comments --- .../disk_nvme/nvme_driver/src/driver.rs | 30 ++++++++----------- 1 file changed, 13 insertions(+), 17 deletions(-) diff --git a/vm/devices/storage/disk_nvme/nvme_driver/src/driver.rs b/vm/devices/storage/disk_nvme/nvme_driver/src/driver.rs index 32c6eb3a37..ea5eb05025 100644 --- a/vm/devices/storage/disk_nvme/nvme_driver/src/driver.rs +++ b/vm/devices/storage/disk_nvme/nvme_driver/src/driver.rs @@ -108,13 +108,13 @@ pub enum DeviceError { #[error("no more io queues available, reached maximum {0}")] NoMoreIoQueues(usize), #[error("failed to map interrupt")] - InterruptMapFailure, - #[error("failed to create io queue pair {0}")] - IoQueuePairCreationFailure(u16), - #[error("failed to create io completion queue {0}")] - IoCompletionQueueFailure(u16), - #[error("failed to create io submission queue {0}")] - IoSubmissionQueueFailure(u16), + InterruptMapFailure(#[source] anyhow::Error), + #[error("failed to create io queue pair {1}")] + IoQueuePairCreationFailure(#[source] anyhow::Error, u16), + #[error("failed to create io completion queue {1}")] + IoCompletionQueueFailure(#[source] anyhow::Error, u16), + #[error("failed to create io submission queue {1}")] + IoSubmissionQueueFailure(#[source] anyhow::Error, u16), // Other device related errors #[error(transparent)] Other(anyhow::Error), @@ -876,7 +876,7 @@ impl DriverWorkerTask { .await { Ok(issuer) => issuer, - Err(err) => { + Err(err) => { // Find a fallback queue close in index to the failed queue. let (fallback_cpu, fallback) = self.io_issuers.per_cpu[..cpu as usize] .iter() @@ -907,7 +907,7 @@ impl DriverWorkerTask { } fallback.clone() - }, + } }; self.io_issuers.per_cpu[cpu as usize] @@ -934,8 +934,7 @@ impl DriverWorkerTask { let interrupt = self .device .map_interrupt(iv.into(), cpu) - .context("failed to map interrupt") - .map_err(|_| DeviceError::InterruptMapFailure)?; + .map_err(DeviceError::InterruptMapFailure)?; let queue = QueuePair::new( self.driver.clone(), @@ -947,8 +946,7 @@ impl DriverWorkerTask { self.registers.clone(), self.bounce_buffer, ) - .with_context(|| format!("failed to create io queue pair {qid}")) - .map_err(|_| DeviceError::IoQueuePairCreationFailure(qid))?; + .map_err(|err| DeviceError::IoQueuePairCreationFailure(err.into(), qid))?; let io_sq_addr = queue.sq_addr(); let io_cq_addr = queue.cq_addr(); @@ -977,8 +975,7 @@ impl DriverWorkerTask { ..admin_cmd(spec::AdminOpcode::CREATE_IO_COMPLETION_QUEUE) }) .await - .with_context(|| format!("failed to create io completion queue {qid}")) - .map_err(|_| DeviceError::IoCompletionQueueFailure(qid))?; + .map_err(|err| DeviceError::IoCompletionQueueFailure(err.into(), qid))?; created_completion_queue = true; @@ -996,8 +993,7 @@ impl DriverWorkerTask { ..admin_cmd(spec::AdminOpcode::CREATE_IO_SUBMISSION_QUEUE) }) .await - .with_context(|| format!("failed to create io submission queue {qid}")) - .map_err(|_| DeviceError::IoSubmissionQueueFailure(qid))?; + .map_err(|err| DeviceError::IoSubmissionQueueFailure(err.into(), qid))?; Ok(()) }; From 0ab1dc6f6aba5d9436165ab5eafcb4d78360858d Mon Sep 17 00:00:00 2001 From: SrinivasShekar Date: Fri, 3 Oct 2025 13:23:56 +0000 Subject: [PATCH 3/4] updated the log type --- vm/devices/storage/disk_nvme/nvme_driver/src/driver.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/vm/devices/storage/disk_nvme/nvme_driver/src/driver.rs b/vm/devices/storage/disk_nvme/nvme_driver/src/driver.rs index ea5eb05025..7651eee4a0 100644 --- a/vm/devices/storage/disk_nvme/nvme_driver/src/driver.rs +++ b/vm/devices/storage/disk_nvme/nvme_driver/src/driver.rs @@ -897,7 +897,7 @@ impl DriverWorkerTask { ); } _ => { - tracing::info!( + tracing::error!( cpu, fallback_cpu, error = &err as &dyn std::error::Error, From 9cbe691b6f81b56655845c07feb2b5766988af20 Mon Sep 17 00:00:00 2001 From: SrinivasShekar Date: Fri, 3 Oct 2025 16:54:11 +0000 Subject: [PATCH 4/4] Addressed lint errors --- .../storage/disk_nvme/nvme_driver/src/driver.rs | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/vm/devices/storage/disk_nvme/nvme_driver/src/driver.rs b/vm/devices/storage/disk_nvme/nvme_driver/src/driver.rs index 7651eee4a0..fb2a9f9b4a 100644 --- a/vm/devices/storage/disk_nvme/nvme_driver/src/driver.rs +++ b/vm/devices/storage/disk_nvme/nvme_driver/src/driver.rs @@ -106,7 +106,7 @@ pub enum RestoreError { #[derive(Debug, Error)] pub enum DeviceError { #[error("no more io queues available, reached maximum {0}")] - NoMoreIoQueues(usize), + NoMoreIoQueues(u16), #[error("failed to map interrupt")] InterruptMapFailure(#[source] anyhow::Error), #[error("failed to create io queue pair {1}")] @@ -894,15 +894,15 @@ impl DriverWorkerTask { fallback_cpu, error = &err as &dyn std::error::Error, "failed to create io queue, falling back" - ); + ); } _ => { - tracing::error!( + tracing::error!( cpu, fallback_cpu, error = &err as &dyn std::error::Error, "failed to create io queue, falling back" - ); + ); } } @@ -922,7 +922,7 @@ impl DriverWorkerTask { cpu: u32, ) -> Result { if self.io.len() >= state.max_io_queues as usize { - return Err(DeviceError::NoMoreIoQueues(state.max_io_queues as usize)); + return Err(DeviceError::NoMoreIoQueues(state.max_io_queues)); } let qid = self.io.len() as u16 + 1; @@ -946,7 +946,7 @@ impl DriverWorkerTask { self.registers.clone(), self.bounce_buffer, ) - .map_err(|err| DeviceError::IoQueuePairCreationFailure(err.into(), qid))?; + .map_err(|err| DeviceError::IoQueuePairCreationFailure(err, qid))?; let io_sq_addr = queue.sq_addr(); let io_cq_addr = queue.cq_addr();