Skip to content

Commit 8177f77

Browse files
SrinivasShekarSrinivas Shekar
andauthored
disk_nvme: Demote NVMe queues availability message from error to info (#2063)
**Issue:** Currently, when we hit the hardware resources limit for the NVMe device, we log an error message which is a red herring among other NVMe IO queue availability failures. Ref: #499 **Fix:** Demoted the log type to info for cases where we hit the hardware resources limit for the NVMe device. **Validation:** Ran nvme specific tests(subset of vmm_tests) using the successfully built the openHCL binary. --------- Co-authored-by: Srinivas Shekar <[email protected]>
1 parent b1fa9dd commit 8177f77

File tree

1 file changed

+45
-13
lines changed
  • vm/devices/storage/disk_nvme/nvme_driver/src

1 file changed

+45
-13
lines changed

vm/devices/storage/disk_nvme/nvme_driver/src/driver.rs

Lines changed: 45 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -103,6 +103,23 @@ pub enum RestoreError {
103103
InvalidData,
104104
}
105105

106+
#[derive(Debug, Error)]
107+
pub enum DeviceError {
108+
#[error("no more io queues available, reached maximum {0}")]
109+
NoMoreIoQueues(u16),
110+
#[error("failed to map interrupt")]
111+
InterruptMapFailure(#[source] anyhow::Error),
112+
#[error("failed to create io queue pair {1}")]
113+
IoQueuePairCreationFailure(#[source] anyhow::Error, u16),
114+
#[error("failed to create io completion queue {1}")]
115+
IoCompletionQueueFailure(#[source] anyhow::Error, u16),
116+
#[error("failed to create io submission queue {1}")]
117+
IoSubmissionQueueFailure(#[source] anyhow::Error, u16),
118+
// Other device related errors
119+
#[error(transparent)]
120+
Other(anyhow::Error),
121+
}
122+
106123
#[derive(Inspect)]
107124
struct IoQueue {
108125
queue: QueuePair,
@@ -868,12 +885,27 @@ impl<T: DeviceBacking> DriverWorkerTask<T> {
868885
.find_map(|(i, issuer)| issuer.get().map(|issuer| (i, issuer)))
869886
.unwrap();
870887

871-
tracing::error!(
872-
cpu,
873-
fallback_cpu,
874-
error = err.as_ref() as &dyn std::error::Error,
875-
"failed to create io queue, falling back"
876-
);
888+
// Log the error as informational only when there is a lack of
889+
// hardware resources from the device.
890+
match err {
891+
DeviceError::NoMoreIoQueues(_) => {
892+
tracing::info!(
893+
cpu,
894+
fallback_cpu,
895+
error = &err as &dyn std::error::Error,
896+
"failed to create io queue, falling back"
897+
);
898+
}
899+
_ => {
900+
tracing::error!(
901+
cpu,
902+
fallback_cpu,
903+
error = &err as &dyn std::error::Error,
904+
"failed to create io queue, falling back"
905+
);
906+
}
907+
}
908+
877909
fallback.clone()
878910
}
879911
};
@@ -888,9 +920,9 @@ impl<T: DeviceBacking> DriverWorkerTask<T> {
888920
&mut self,
889921
state: &mut WorkerState,
890922
cpu: u32,
891-
) -> anyhow::Result<IoIssuer> {
923+
) -> Result<IoIssuer, DeviceError> {
892924
if self.io.len() >= state.max_io_queues as usize {
893-
anyhow::bail!("no more io queues available");
925+
return Err(DeviceError::NoMoreIoQueues(state.max_io_queues));
894926
}
895927

896928
let qid = self.io.len() as u16 + 1;
@@ -902,7 +934,7 @@ impl<T: DeviceBacking> DriverWorkerTask<T> {
902934
let interrupt = self
903935
.device
904936
.map_interrupt(iv.into(), cpu)
905-
.context("failed to map interrupt")?;
937+
.map_err(DeviceError::InterruptMapFailure)?;
906938

907939
let queue = QueuePair::new(
908940
self.driver.clone(),
@@ -914,7 +946,7 @@ impl<T: DeviceBacking> DriverWorkerTask<T> {
914946
self.registers.clone(),
915947
self.bounce_buffer,
916948
)
917-
.with_context(|| format!("failed to create io queue pair {qid}"))?;
949+
.map_err(|err| DeviceError::IoQueuePairCreationFailure(err, qid))?;
918950

919951
let io_sq_addr = queue.sq_addr();
920952
let io_cq_addr = queue.cq_addr();
@@ -943,7 +975,7 @@ impl<T: DeviceBacking> DriverWorkerTask<T> {
943975
..admin_cmd(spec::AdminOpcode::CREATE_IO_COMPLETION_QUEUE)
944976
})
945977
.await
946-
.with_context(|| format!("failed to create io completion queue {qid}"))?;
978+
.map_err(|err| DeviceError::IoCompletionQueueFailure(err.into(), qid))?;
947979

948980
created_completion_queue = true;
949981

@@ -961,7 +993,7 @@ impl<T: DeviceBacking> DriverWorkerTask<T> {
961993
..admin_cmd(spec::AdminOpcode::CREATE_IO_SUBMISSION_QUEUE)
962994
})
963995
.await
964-
.with_context(|| format!("failed to create io submission queue {qid}"))?;
996+
.map_err(|err| DeviceError::IoSubmissionQueueFailure(err.into(), qid))?;
965997

966998
Ok(())
967999
};
@@ -983,7 +1015,7 @@ impl<T: DeviceBacking> DriverWorkerTask<T> {
9831015
}
9841016
let io = self.io.pop().unwrap();
9851017
io.queue.shutdown().await;
986-
return Err(err);
1018+
return Err(DeviceError::Other(err));
9871019
}
9881020

9891021
Ok(IoIssuer {

0 commit comments

Comments
 (0)