@@ -46,18 +46,23 @@ use zerocopy::FromZeros;
4646
4747/// Value for unused PRP entries, to catch/mitigate buffer size mismatches.
4848const INVALID_PAGE_ADDR : u64 = !( PAGE_SIZE as u64 - 1 ) ;
49- /// Maximum SQ size in entries.
50- pub const MAX_SQ_ENTRIES : u16 = ( PAGE_SIZE / 64 ) as u16 ;
51- /// Maximum CQ size in entries.
52- pub const MAX_CQ_ENTRIES : u16 = ( PAGE_SIZE / 16 ) as u16 ;
49+
50+ const SQ_ENTRY_SIZE : usize = size_of :: < spec:: Command > ( ) ;
51+ const CQ_ENTRY_SIZE : usize = size_of :: < spec:: Completion > ( ) ;
5352/// Submission Queue size in bytes.
54- const SQ_SIZE : usize = PAGE_SIZE ;
53+ const SQ_SIZE : usize = PAGE_SIZE * 4 ;
5554/// Completion Queue size in bytes.
5655const CQ_SIZE : usize = PAGE_SIZE ;
56+ /// Maximum SQ size in entries.
57+ pub const MAX_SQ_ENTRIES : u16 = ( SQ_SIZE / SQ_ENTRY_SIZE ) as u16 ;
58+ /// Maximum CQ size in entries.
59+ pub const MAX_CQ_ENTRIES : u16 = ( CQ_SIZE / CQ_ENTRY_SIZE ) as u16 ;
5760/// Number of pages per queue if bounce buffering.
5861const PER_QUEUE_PAGES_BOUNCE_BUFFER : usize = 128 ;
5962/// Number of pages per queue if not bounce buffering.
6063const PER_QUEUE_PAGES_NO_BOUNCE_BUFFER : usize = 64 ;
64+ /// Number of SQ entries per page (64).
65+ const SQ_ENTRIES_PER_PAGE : usize = PAGE_SIZE / SQ_ENTRY_SIZE ;
6166
6267#[ derive( Inspect ) ]
6368pub ( crate ) struct QueuePair < T : AerHandler > {
@@ -75,6 +80,8 @@ pub(crate) struct QueuePair<T: AerHandler> {
7580 sq_entries : u16 ,
7681 #[ inspect( skip) ]
7782 cq_entries : u16 ,
83+ sq_addr : u64 ,
84+ cq_addr : u64 ,
7885}
7986
8087impl PendingCommands {
@@ -177,17 +184,31 @@ impl PendingCommands {
177184}
178185
179186impl < T : AerHandler > QueuePair < T > {
187+ /// Create a new queue pair.
188+ ///
189+ /// `sq_entries` and `cq_entries` are the requested sizes in entries.
190+ /// Calling code should request the largest size it thinks the device
191+ /// will support (see `CAP.MQES`). These may be clamped down to what will
192+ /// fit in one page should this routine fail to allocate physically
193+ /// contiguous memory to back the queues.
194+ /// IMPORTANT: Calling code should check the actual sizes via corresponding
195+ /// calls to [`QueuePair::sq_entries`] and [`QueuePair::cq_entries`] AFTER calling this routine.
180196 pub fn new (
181197 spawner : impl SpawnDriver ,
182198 device : & impl DeviceBacking ,
183199 qid : u16 ,
184- sq_entries : u16 , // Requested SQ size in entries.
185- cq_entries : u16 , // Requested CQ size in entries.
200+ sq_entries : u16 ,
201+ cq_entries : u16 ,
186202 interrupt : DeviceInterrupt ,
187203 registers : Arc < DeviceRegisters < impl DeviceBacking > > ,
188204 bounce_buffer : bool ,
189205 aer_handler : T ,
190206 ) -> anyhow:: Result < Self > {
207+ // FUTURE: Consider splitting this into several allocations, rather than
208+ // allocating the sum total together. This can increase the likelihood
209+ // of getting contiguous memory when falling back to the LockedMem
210+ // allocator, but this is not the expected path. Be careful that any
211+ // changes you make here work with already established save state.
191212 let total_size = SQ_SIZE
192213 + CQ_SIZE
193214 + if bounce_buffer {
@@ -196,6 +217,10 @@ impl<T: AerHandler> QueuePair<T> {
196217 PER_QUEUE_PAGES_NO_BOUNCE_BUFFER * PAGE_SIZE
197218 } ;
198219 let dma_client = device. dma_client ( ) ;
220+
221+ // TODO: Keepalive: Detect when the allocation came from outside
222+ // the private pool and put the device in a degraded state, so it
223+ // is possible to inspect that a servicing with keepalive will fail.
199224 let mem = dma_client
200225 . allocate_dma_buffer ( total_size)
201226 . context ( "failed to allocate memory for queues" ) ?;
@@ -217,12 +242,11 @@ impl<T: AerHandler> QueuePair<T> {
217242 )
218243 }
219244
220- /// Create new object or restore from saved state.
221245 fn new_or_restore (
222246 spawner : impl SpawnDriver ,
223247 qid : u16 ,
224- sq_entries : u16 , // Submission queue entries.
225- cq_entries : u16 , // Completion queue entries.
248+ sq_entries : u16 ,
249+ cq_entries : u16 ,
226250 mut interrupt : DeviceInterrupt ,
227251 registers : Arc < DeviceRegisters < impl DeviceBacking > > ,
228252 mem : MemoryBlock ,
@@ -235,6 +259,49 @@ impl<T: AerHandler> QueuePair<T> {
235259 let cq_mem_block = mem. subblock ( SQ_SIZE , CQ_SIZE ) ;
236260 let data_offset = SQ_SIZE + CQ_SIZE ;
237261
262+ // Make sure that the queue memory is physically contiguous. While the
263+ // NVMe spec allows for some provisions of queue memory to be
264+ // non-contiguous, this depends on device support. At least one device
265+ // that we must support requires that the memory is contiguous (via the
266+ // CAP.CQR bit). Because of that, just simplify the code paths to use
267+ // contiguous memory.
268+ //
269+ // We could also seek through the memory block to find contiguous pages
270+ // (for example, if the first 4 pages are not contiguous, but pages 5-8
271+ // are, use those), but other parts of this driver already assume the
272+ // math to get the correct offsets.
273+ //
274+ // N.B. It is expected that allocations from the private pool will
275+ // always be contiguous, and that is the normal path. That can fail in
276+ // some cases (e.g. if we got some guesses about memory size wrong), and
277+ // we prefer to operate in a perf degraded state rather than fail
278+ // completely.
279+
280+ let ( sq_is_contiguous, cq_is_contiguous) = (
281+ sq_mem_block. contiguous_pfns ( ) ,
282+ cq_mem_block. contiguous_pfns ( ) ,
283+ ) ;
284+
285+ let ( sq_entries, cq_entries) = if !sq_is_contiguous || !cq_is_contiguous {
286+ tracing:: warn!(
287+ qid,
288+ sq_is_contiguous,
289+ sq_mem_block. pfns = ?sq_mem_block. pfns( ) ,
290+ cq_is_contiguous,
291+ cq_mem_block. pfns = ?cq_mem_block. pfns( ) ,
292+ "non-contiguous queue memory detected, falling back to single page queues"
293+ ) ;
294+ // Clamp both queues to the number of entries that will fit in a
295+ // single SQ page (since this will be the smaller between the SQ and
296+ // CQ capacity).
297+ ( SQ_ENTRIES_PER_PAGE as u16 , SQ_ENTRIES_PER_PAGE as u16 )
298+ } else {
299+ ( sq_entries, cq_entries)
300+ } ;
301+
302+ let sq_addr = sq_mem_block. pfns ( ) [ 0 ] * PAGE_SIZE64 ;
303+ let cq_addr = cq_mem_block. pfns ( ) [ 0 ] * PAGE_SIZE64 ;
304+
238305 let mut queue_handler = match saved_state {
239306 Some ( s) => QueueHandler :: restore ( sq_mem_block, cq_mem_block, s, aer_handler) ?,
240307 None => {
@@ -296,15 +363,27 @@ impl<T: AerHandler> QueuePair<T> {
296363 qid,
297364 sq_entries,
298365 cq_entries,
366+ sq_addr,
367+ cq_addr,
299368 } )
300369 }
301370
371+ /// Returns the actual number of SQ entries supported by this queue pair.
372+ pub fn sq_entries ( & self ) -> u16 {
373+ self . sq_entries
374+ }
375+
376+ /// Returns the actual number of CQ entries supported by this queue pair.
377+ pub fn cq_entries ( & self ) -> u16 {
378+ self . cq_entries
379+ }
380+
302381 pub fn sq_addr ( & self ) -> u64 {
303- self . mem . pfns ( ) [ 0 ] * PAGE_SIZE64
382+ self . sq_addr
304383 }
305384
306385 pub fn cq_addr ( & self ) -> u64 {
307- self . mem . pfns ( ) [ 1 ] * PAGE_SIZE64
386+ self . cq_addr
308387 }
309388
310389 pub fn issuer ( & self ) -> & Arc < Issuer > {
0 commit comments