rpma: make gpspm server use separate RCQ

yangx-jy · yangx-jy · commit f565cf7f4024 · 2022-03-16T20:51:20.000+08:00
Signed-off-by: Xiao Yang &lt;yangx.jy@fujitsu.com&gt;
diff --git a/engines/librpma_gpspm.c b/engines/librpma_gpspm.c
@@ -361,26 +361,36 @@ FIO_STATIC struct ioengine_ops ioengine_client = {
 
 #define IO_U_BUFF_OFF_SERVER(i) (i * IO_U_BUF_LEN)
 
+static int server_cmpl_poll(struct thread_data *td,
+		struct rpma_cq *cq, struct ibv_wc *wc);
+static int server_cmpl_wait_and_poll(struct thread_data *td,
+		struct rpma_cq *cq, struct ibv_wc *wc);
+static int (*server_cmpl_process_func)(struct thread_data *td,
+		struct rpma_cq *cq, struct ibv_wc *wc);
+
 struct server_data {
 	/* aligned td->orig_buffer */
 	char *orig_buffer_aligned;
 
 	/* resources for messaging buffer from DRAM allocated by fio */
 	struct rpma_mr_local *msg_mr;
 
-	uint32_t msg_sqe_available; /* # of free SQ slots */
-
-	/* in-memory queues */
-	struct ibv_wc *msgs_queued;
-	uint32_t msg_queued_nr;
+	/* # of free SQ slots */
+	uint32_t msg_sqe_available;
+	/* receive CQ */
+	struct rpma_cq *rcq;
 };
 
 static int server_init(struct thread_data *td)
 {
+	struct librpma_fio_options_values *o = td->eo;
 	struct librpma_fio_server_data *csd;
 	struct server_data *sd;
 	int ret = -1;
 
+	server_cmpl_process_func = o->busy_wait_polling ? server_cmpl_poll :
+			server_cmpl_wait_and_poll;
+
 	if ((ret = librpma_fio_server_init(td)))
 		return ret;
 
@@ -393,13 +403,6 @@ static int server_init(struct thread_data *td)
 		goto err_server_cleanup;
 	}
 
-	/* allocate in-memory queue */
-	sd->msgs_queued = calloc(td->o.iodepth, sizeof(*sd->msgs_queued));
-	if (sd->msgs_queued == NULL) {
-		td_verror(td, errno, "calloc");
-		goto err_free_sd;
-	}
-
 	/*
 	 * Assure a single io_u buffer can store both SEND and RECV messages and
 	 * an io_us buffer allocation is page-size-aligned which is required
@@ -412,9 +415,6 @@ static int server_init(struct thread_data *td)
 
 	return 0;
 
-err_free_sd:
-	free(sd);
-
 err_server_cleanup:
 	librpma_fio_server_cleanup(td);
 
@@ -485,7 +485,6 @@ static void server_cleanup(struct thread_data *td)
 		if ((ret = rpma_mr_dereg(&sd->msg_mr)))
 			librpma_td_verror(td, ret, "rpma_mr_dereg");
 
-		free(sd->msgs_queued);
 		free(sd);
 	}
 
@@ -518,6 +517,7 @@ static int prepare_connection(struct thread_data *td,
 static int server_open_file(struct thread_data *td, struct fio_file *f)
 {
 	struct librpma_fio_server_data *csd = td->io_ops_data;
+	struct server_data *sd = csd->server_data;
 	struct rpma_conn_cfg *cfg = NULL;
 	uint16_t max_msg_num = td->o.iodepth;
 	int ret;
@@ -531,13 +531,15 @@ static int server_open_file(struct thread_data *td, struct fio_file *f)
 	}
 
 	/*
-	 * Calculate the required queue sizes where:
+	 * The required queue sizes are:
 	 * - the send queue (SQ) has to be big enough to accommodate
 	 *   all possible flush requests (SENDs)
 	 * - the receive queue (RQ) has to be big enough to accommodate
 	 *   all flush responses (RECVs)
-	 * - the completion queue (CQ) has to be big enough to accommodate
-	 *   all success and error completions (sq_size + rq_size)
+	 * - the main completion queue (CQ) has to be big enough to
+	 *   accommodate all success and error completions (sq_size)
+	 * - the receive completion queue (RCQ) has to be big enough to
+	 *   accommodate all success and error completions (rq_size)
 	 */
 	if ((ret = rpma_conn_cfg_set_sq_size(cfg, max_msg_num))) {
 		librpma_td_verror(td, ret, "rpma_conn_cfg_set_sq_size");
@@ -547,12 +549,21 @@ static int server_open_file(struct thread_data *td, struct fio_file *f)
 		librpma_td_verror(td, ret, "rpma_conn_cfg_set_rq_size");
 		goto err_cfg_delete;
 	}
-	if ((ret = rpma_conn_cfg_set_cq_size(cfg, max_msg_num * 2))) {
+	if ((ret = rpma_conn_cfg_set_cq_size(cfg, max_msg_num))) {
 		librpma_td_verror(td, ret, "rpma_conn_cfg_set_cq_size");
 		goto err_cfg_delete;
 	}
+	if ((ret = rpma_conn_cfg_set_rcq_size(cfg, max_msg_num))) {
+		librpma_td_verror(td, ret, "rpma_conn_cfg_set_rcq_size");
+		goto err_cfg_delete;
+	}
 
-	ret = librpma_fio_server_open_file(td, f, cfg);
+	if ((ret = librpma_fio_server_open_file(td, f, cfg)))
+		goto err_cfg_delete;
+
+	/* get the connection's receive CQ */
+	if ((ret = rpma_conn_get_rcq(csd->conn, &sd->rcq)))
+		librpma_td_verror(td, ret, "rpma_conn_get_rcq");
 
 err_cfg_delete:
 	(void) rpma_conn_cfg_delete(&cfg);
@@ -645,65 +656,26 @@ static int server_qe_process(struct thread_data *td, struct ibv_wc *wc)
 	return -1;
 }
 
-static inline int server_queue_process(struct thread_data *td)
+/*
+ * server_cmpl_poll - poll and process a completion
+ *
+ * Return value:
+ *   0 or 1  - number of received completions
+ *   -1      - in case of an error
+ */
+static int server_cmpl_poll(struct thread_data *td, struct rpma_cq *cq,
+		struct ibv_wc *wc)
 {
 	struct librpma_fio_server_data *csd = td->io_ops_data;
 	struct server_data *sd = csd->server_data;
 	int ret;
-	int i;
 
-	/* min(# of queue entries, # of SQ entries available) */
-	uint32_t qes_to_process = min(sd->msg_queued_nr, sd->msg_sqe_available);
-	if (qes_to_process == 0)
+	ret = rpma_cq_get_wc(cq, 1, wc, NULL);
+	if (ret == RPMA_E_NO_COMPLETION) {
+		/* lack of completion is not an error */
 		return 0;
-
-	/* process queued completions */
-	for (i = 0; i < qes_to_process; ++i) {
-		if ((ret = server_qe_process(td, &sd->msgs_queued[i])))
-			return ret;
-	}
-
-	/* progress the queue */
-	for (i = 0; i < sd->msg_queued_nr - qes_to_process; ++i) {
-		memcpy(&sd->msgs_queued[i],
-			&sd->msgs_queued[qes_to_process + i],
-			sizeof(sd->msgs_queued[i]));
 	}
-
-	sd->msg_queued_nr -= qes_to_process;
-
-	return 0;
-}
-
-static int server_cmpl_process(struct thread_data *td)
-{
-	struct librpma_fio_server_data *csd = td->io_ops_data;
-	struct server_data *sd = csd->server_data;
-	struct ibv_wc *wc = &sd->msgs_queued[sd->msg_queued_nr];
-	struct librpma_fio_options_values *o = td->eo;
-	int ret;
-
-	ret = rpma_cq_get_wc(csd->cq, 1, wc, NULL);
-	if (ret == RPMA_E_NO_COMPLETION) {
-		if (o->busy_wait_polling)
-			return 0; /* lack of completion is not an error */
-
-		ret = rpma_cq_wait(csd->cq);
-		if (ret == RPMA_E_NO_COMPLETION)
-			return 0; /* lack of completion is not an error */
-		if (ret) {
-			librpma_td_verror(td, ret, "rpma_cq_wait");
-			goto err_terminate;
-		}
-
-		ret = rpma_cq_get_wc(csd->cq, 1, wc, NULL);
-		if (ret == RPMA_E_NO_COMPLETION)
-			return 0; /* lack of completion is not an error */
-		if (ret) {
-			librpma_td_verror(td, ret, "rpma_cq_get_wc");
-			goto err_terminate;
-		}
-	} else if (ret) {
+	if (ret) {
 		librpma_td_verror(td, ret, "rpma_cq_get_wc");
 		goto err_terminate;
 	}
@@ -712,25 +684,82 @@ static int server_cmpl_process(struct thread_data *td)
 	if (wc->status != IBV_WC_SUCCESS)
 		goto err_terminate;
 
-	if (wc->opcode == IBV_WC_RECV)
-		++sd->msg_queued_nr;
-	else if (wc->opcode == IBV_WC_SEND)
+	if (wc->opcode == IBV_WC_SEND)
 		++sd->msg_sqe_available;
 
-	return 0;
+	return 1;
 
 err_terminate:
 	td->terminate = true;
 
 	return -1;
 }
 
+/*
+ * server_cmpl_wait_and_poll - wait, poll and process a completion
+ *
+ * Return value:
+ *   0 or 1  - number of received completions
+ *   -1      - in case of an error
+ */
+static int server_cmpl_wait_and_poll(struct thread_data *td, struct rpma_cq *cq,
+		struct ibv_wc *wc)
+{
+	int ret;
+
+	ret = server_cmpl_poll(td, cq, wc);
+	if (ret)
+		return ret;
+
+	/* lack of completion */
+	ret = rpma_cq_wait(cq);
+	if (ret == RPMA_E_NO_COMPLETION) {
+		/* lack of completion is not an error */
+		return 0;
+	}
+	if (ret) {
+		librpma_td_verror(td, ret, "rpma_cq_wait");
+		td->terminate = true;
+		return -1;
+	}
+
+	return server_cmpl_poll(td, cq, wc);
+}
+
+static inline int server_queue_process(struct thread_data *td)
+{
+	struct librpma_fio_server_data *csd = td->io_ops_data;
+	struct server_data *sd = csd->server_data;
+	struct ibv_wc cq_wc, rcq_wc;
+	int ret;
+
+	/* process as many send completions as possible */
+	while ((ret = server_cmpl_poll(td, csd->cq, &cq_wc))) {
+		if (ret < 0)
+			return ret;
+	}
+
+	/* process the receive completion */
+	ret = server_cmpl_process_func(td, sd->rcq, &rcq_wc);
+	if (ret != 1)
+		return ret;
+
+	/* ret == 1 means rcq_wc.opcode == IBV_WC_RECV */
+
+	/* ensure that at least one SQ slot is available */
+	while (sd->msg_sqe_available == 0) {
+		/* process the send completion */
+		ret = server_cmpl_process_func(td, csd->cq, &cq_wc);
+		if (ret < 0)
+			return ret;
+	}
+
+	return server_qe_process(td, &rcq_wc);
+}
+
 static enum fio_q_status server_queue(struct thread_data *td, struct io_u *io_u)
 {
 	do {
-		if (server_cmpl_process(td))
-			return FIO_Q_BUSY;
-
 		if (server_queue_process(td))
 			return FIO_Q_BUSY;