From 23b8647591d656c72e3444977b948aa99b3fae4e Mon Sep 17 00:00:00 2001
From: Tian Xin <tianx@yunsilicon.com>
Date: Tue, 24 Sep 2024 14:43:56 +0800
Subject: [PATCH 1/6] libxscale: Introduce xscale user space RDMA provider

libxscale is a user-space driver that provides RDMA
capabilities to user applications. The current patch
includes the following components:
1. basic compile framework
2. register/unregister user-space driver via verbs
3. query_port
4. query_device_ex

Signed-off-by: Tian Xin <tianx@yunsilicon.com>
Signed-off-by: Wei Honggang <weihg@yunsilicon.com>
Signed-off-by: Zhao Qianwei <zhaoqw@yunsilicon.com>
Signed-off-by: Li Qiang <liq@yunsilicon.com>
Signed-off-by: Yan Lei <jacky@yunsilicon.com>
---
 CMakeLists.txt                  |   1 +
 MAINTAINERS                     |   9 +
 providers/xscale/CMakeLists.txt |   4 +
 providers/xscale/verbs.c        |  75 +++++++++
 providers/xscale/xsc-abi.h      |  31 ++++
 providers/xscale/xscale.c       | 282 ++++++++++++++++++++++++++++++++
 providers/xscale/xscale.h       | 164 +++++++++++++++++++
 7 files changed, 566 insertions(+)
 create mode 100644 providers/xscale/CMakeLists.txt
 create mode 100644 providers/xscale/verbs.c
 create mode 100644 providers/xscale/xsc-abi.h
 create mode 100644 providers/xscale/xscale.c
 create mode 100644 providers/xscale/xscale.h

diff --git a/CMakeLists.txt b/CMakeLists.txt
index b2bd4f411..839761802 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -729,6 +729,7 @@ add_subdirectory(providers/mthca)
 add_subdirectory(providers/ocrdma)
 add_subdirectory(providers/qedr)
 add_subdirectory(providers/vmw_pvrdma)
+add_subdirectory(providers/xscale)
 endif()
 
 add_subdirectory(providers/hfi1verbs)
diff --git a/MAINTAINERS b/MAINTAINERS
index 4b241171e..4f4aed695 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -189,3 +189,12 @@ PYVERBS
 M:	Edward Srouji <edwards@mellanox.com>
 S:	Supported
 F:	pyverbs/
+
+XSCALE USERSPACE PROVIDER (for xsc_ib.ko)
+M:	Wei Honggang <weihg@yunsilicon.com>
+M:	Zhao Qianwei <zhaoqw@yunsilicon.com>
+M:	Li Qiang <liq@yunsilicon.com>
+M:	Tian Xin <tianx@yunsilicon.com>
+M:	Yan Lei <jacky@yunsilicon.com>
+S:	Supported
+F:	providers/xscale/
diff --git a/providers/xscale/CMakeLists.txt b/providers/xscale/CMakeLists.txt
new file mode 100644
index 000000000..cfd05b492
--- /dev/null
+++ b/providers/xscale/CMakeLists.txt
@@ -0,0 +1,4 @@
+rdma_provider(xscale
+  xscale.c
+  verbs.c
+)
diff --git a/providers/xscale/verbs.c b/providers/xscale/verbs.c
new file mode 100644
index 000000000..943665a8b
--- /dev/null
+++ b/providers/xscale/verbs.c
@@ -0,0 +1,75 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (c) 2021 - 2022, Shanghai Yunsilicon Technology Co., Ltd.
+ * All rights reserved.
+ */
+
+#include <config.h>
+
+#include <stdlib.h>
+#include <stdio.h>
+#include <stdatomic.h>
+#include <string.h>
+#include <pthread.h>
+#include <errno.h>
+#include <limits.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#include <unistd.h>
+#include <sys/mman.h>
+#include <ccan/array_size.h>
+
+#include <util/compiler.h>
+#include <util/mmio.h>
+#include <rdma/ib_user_ioctl_cmds.h>
+#include <infiniband/cmd_write.h>
+
+#include "xscale.h"
+#include "xsc-abi.h"
+
+int xsc_query_port(struct ibv_context *context, u8 port,
+		   struct ibv_port_attr *attr)
+{
+	struct ibv_query_port cmd;
+
+	return ibv_cmd_query_port(context, port, attr, &cmd, sizeof(cmd));
+}
+
+static void xsc_set_fw_version(struct ibv_device_attr *attr,
+			       union xsc_ib_fw_ver *fw_ver)
+{
+	u8 ver_major = fw_ver->s.ver_major;
+	u8 ver_minor = fw_ver->s.ver_minor;
+	u16 ver_patch = fw_ver->s.ver_patch;
+	u32 ver_tweak = fw_ver->s.ver_tweak;
+
+	if (ver_tweak == 0) {
+		snprintf(attr->fw_ver, sizeof(attr->fw_ver), "v%u.%u.%u",
+			 ver_major, ver_minor, ver_patch);
+	} else {
+		snprintf(attr->fw_ver, sizeof(attr->fw_ver), "v%u.%u.%u+%u",
+			 ver_major, ver_minor, ver_patch, ver_tweak);
+	}
+}
+
+int xsc_query_device_ex(struct ibv_context *context,
+			const struct ibv_query_device_ex_input *input,
+			struct ibv_device_attr_ex *attr, size_t attr_size)
+{
+	struct ib_uverbs_ex_query_device_resp resp;
+	size_t resp_size = sizeof(resp);
+	union xsc_ib_fw_ver raw_fw_ver;
+	int err;
+
+	raw_fw_ver.data = 0;
+	err = ibv_cmd_query_device_any(context, input, attr, attr_size,
+				       &resp, &resp_size);
+	if (err)
+		return err;
+
+	raw_fw_ver.data = resp.base.fw_ver;
+	xsc_set_fw_version(&attr->orig_attr, &raw_fw_ver);
+
+	return 0;
+}
diff --git a/providers/xscale/xsc-abi.h b/providers/xscale/xsc-abi.h
new file mode 100644
index 000000000..66d2bd1a4
--- /dev/null
+++ b/providers/xscale/xsc-abi.h
@@ -0,0 +1,31 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright (c) 2021 - 2022, Shanghai Yunsilicon Technology Co., Ltd.
+ * All rights reserved.
+ */
+
+#ifndef XSC_ABI_H
+#define XSC_ABI_H
+
+#include <infiniband/kern-abi.h>
+#include <infiniband/verbs.h>
+#include <rdma/xsc-abi.h>
+#include <kernel-abi/xsc-abi.h>
+
+#define XSC_UVERBS_MIN_ABI_VERSION 1
+#define XSC_UVERBS_MAX_ABI_VERSION 1
+
+DECLARE_DRV_CMD(xsc_alloc_ucontext, IB_USER_VERBS_CMD_GET_CONTEXT,
+		empty, xsc_ib_alloc_ucontext_resp);
+DECLARE_DRV_CMD(xsc_alloc_pd, IB_USER_VERBS_CMD_ALLOC_PD, empty,
+		xsc_ib_alloc_pd_resp);
+DECLARE_DRV_CMD(xsc_create_cq, IB_USER_VERBS_CMD_CREATE_CQ, xsc_ib_create_cq,
+		xsc_ib_create_cq_resp);
+DECLARE_DRV_CMD(xsc_create_cq_ex, IB_USER_VERBS_EX_CMD_CREATE_CQ,
+		xsc_ib_create_cq, xsc_ib_create_cq_resp);
+DECLARE_DRV_CMD(xsc_create_qp_ex, IB_USER_VERBS_EX_CMD_CREATE_QP,
+		xsc_ib_create_qp, xsc_ib_create_qp_resp);
+DECLARE_DRV_CMD(xsc_create_qp, IB_USER_VERBS_CMD_CREATE_QP, xsc_ib_create_qp,
+		xsc_ib_create_qp_resp);
+
+#endif /* XSC_ABI_H */
diff --git a/providers/xscale/xscale.c b/providers/xscale/xscale.c
new file mode 100644
index 000000000..c7be81278
--- /dev/null
+++ b/providers/xscale/xscale.c
@@ -0,0 +1,282 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (c) 2021 - 2022, Shanghai Yunsilicon Technology Co., Ltd.
+ * All rights reserved.
+ */
+
+#define _GNU_SOURCE
+#include <config.h>
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <errno.h>
+#include <sys/mman.h>
+#include <pthread.h>
+#include <string.h>
+#include <sched.h>
+#include <sys/param.h>
+
+#include <util/mmio.h>
+#include <util/symver.h>
+
+#include "xscale.h"
+#include "xsc-abi.h"
+
+static const struct verbs_match_ent hca_table[] = {
+	VERBS_MODALIAS_MATCH("*xscale*", NULL),
+	{}
+};
+
+u32 xsc_debug_mask;
+static void xsc_free_context(struct ibv_context *ibctx);
+
+static const struct verbs_context_ops xsc_ctx_common_ops = {
+	.query_port = xsc_query_port,
+	.query_device_ex = xsc_query_device_ex,
+	.free_context = xsc_free_context,
+};
+
+static void open_debug_file(struct xsc_context *ctx)
+{
+	char *env;
+
+	env = getenv("XSC_DEBUG_FILE");
+	if (!env) {
+		ctx->dbg_fp = stderr;
+		return;
+	}
+
+	ctx->dbg_fp = fopen(env, "aw+");
+	if (!ctx->dbg_fp) {
+		fprintf(stderr, "Failed opening debug file %s, using stderr\n",
+			env);
+		ctx->dbg_fp = stderr;
+		return;
+	}
+}
+
+static void close_debug_file(struct xsc_context *ctx)
+{
+	if (ctx->dbg_fp && ctx->dbg_fp != stderr)
+		fclose(ctx->dbg_fp);
+}
+
+static void set_debug_mask(void)
+{
+	char *env;
+
+	env = getenv("XSC_DEBUG_MASK");
+	if (env)
+		xsc_debug_mask = strtol(env, NULL, 0);
+}
+
+static int xsc_cmd_get_context(struct xsc_context *context,
+			       struct xsc_alloc_ucontext *req, size_t req_len,
+			       struct xsc_alloc_ucontext_resp *resp,
+			       size_t resp_len)
+{
+	struct verbs_context *verbs_ctx = &context->ibv_ctx;
+
+	return ibv_cmd_get_context(verbs_ctx, &req->ibv_cmd, req_len,
+				   &resp->ibv_resp, resp_len);
+}
+
+static int xsc_mmap(struct xsc_device *xdev, struct xsc_context *context,
+		    int cmd_fd, int size)
+{
+	u64 page_mask;
+
+	page_mask = (~(xdev->page_size - 1));
+	xsc_dbg(context->dbg_fp, XSC_DBG_CTX, "page size:%d\n", size);
+	context->sqm_reg_va =
+		mmap(NULL, size, PROT_READ | PROT_WRITE, MAP_SHARED, cmd_fd,
+		     context->qpm_tx_db & page_mask);
+	if (context->sqm_reg_va == MAP_FAILED)
+		return -1;
+
+	xsc_dbg(context->dbg_fp, XSC_DBG_CTX, "qpm reg va:%p\n",
+		context->sqm_reg_va);
+
+	context->rqm_reg_va =
+		mmap(NULL, size, PROT_READ | PROT_WRITE, MAP_SHARED, cmd_fd,
+		     context->qpm_rx_db & page_mask);
+	if (context->rqm_reg_va == MAP_FAILED)
+		goto free_sqm;
+
+	xsc_dbg(context->dbg_fp, XSC_DBG_CTX, "qpm reg va:%p\n",
+		context->rqm_reg_va);
+
+	context->cqm_reg_va =
+		mmap(NULL, size, PROT_READ | PROT_WRITE, MAP_SHARED, cmd_fd,
+		     context->cqm_next_cid_reg & page_mask);
+	if (context->cqm_reg_va == MAP_FAILED)
+		goto free_rqm;
+
+	xsc_dbg(context->dbg_fp, XSC_DBG_CTX, "cqm ci va:%p\n",
+		context->cqm_reg_va);
+	context->db_mmap_size = size;
+
+	context->cqm_armdb_va =
+		mmap(NULL, size, PROT_READ | PROT_WRITE, MAP_SHARED, cmd_fd,
+		     context->cqm_armdb & page_mask);
+	if (context->cqm_armdb_va == MAP_FAILED)
+		goto free_cqm;
+	xsc_dbg(context->dbg_fp, XSC_DBG_CTX, "cqm armdb va:%p\n",
+		context->cqm_armdb_va);
+
+	return 0;
+
+free_cqm:
+	munmap(context->cqm_reg_va, size);
+free_rqm:
+	munmap(context->rqm_reg_va, size);
+free_sqm:
+	munmap(context->sqm_reg_va, size);
+
+	return -1;
+}
+
+static void xsc_munmap(struct xsc_context *context)
+{
+	if (context->sqm_reg_va)
+		munmap(context->sqm_reg_va, context->db_mmap_size);
+
+	if (context->rqm_reg_va)
+		munmap(context->rqm_reg_va, context->db_mmap_size);
+
+	if (context->cqm_reg_va)
+		munmap(context->cqm_reg_va, context->db_mmap_size);
+
+	if (context->cqm_armdb_va)
+		munmap(context->cqm_armdb_va, context->db_mmap_size);
+}
+
+static struct verbs_context *xsc_alloc_context(struct ibv_device *ibdev,
+					       int cmd_fd, void *private_data)
+{
+	struct xsc_context *context;
+	struct xsc_alloc_ucontext req;
+	struct xsc_alloc_ucontext_resp resp;
+	int i;
+	int page_size;
+	struct xsc_device *xdev = to_xdev(ibdev);
+	struct verbs_context *v_ctx;
+	struct ibv_device_attr_ex device_attr;
+
+	context = verbs_init_and_alloc_context(ibdev, cmd_fd, context, ibv_ctx,
+					       RDMA_DRIVER_XSC);
+	if (!context)
+		return NULL;
+
+	v_ctx = &context->ibv_ctx;
+	page_size = xdev->page_size;
+
+	open_debug_file(context);
+	set_debug_mask();
+	if (gethostname(context->hostname, sizeof(context->hostname)))
+		strncpy(context->hostname, "host_unknown", NAME_BUFFER_SIZE - 1);
+
+	memset(&req, 0, sizeof(req));
+	memset(&resp, 0, sizeof(resp));
+
+	if (xsc_cmd_get_context(context, &req, sizeof(req), &resp,
+				sizeof(resp)))
+		goto err_free;
+
+	context->max_num_qps = resp.qp_tab_size;
+	context->max_sq_desc_sz = resp.max_sq_desc_sz;
+	context->max_rq_desc_sz = resp.max_rq_desc_sz;
+	context->max_send_wr = resp.max_send_wr;
+	context->num_ports = resp.num_ports;
+	context->max_recv_wr = resp.max_recv_wr;
+	context->qpm_tx_db = resp.qpm_tx_db;
+	context->qpm_rx_db = resp.qpm_rx_db;
+	context->cqm_next_cid_reg = resp.cqm_next_cid_reg;
+	context->cqm_armdb = resp.cqm_armdb;
+	context->send_ds_num = resp.send_ds_num;
+	context->send_ds_shift = xsc_ilog2(resp.send_ds_num);
+	context->recv_ds_num = resp.recv_ds_num;
+	context->recv_ds_shift = xsc_ilog2(resp.recv_ds_num);
+
+	xsc_dbg(context->dbg_fp, XSC_DBG_CTX,
+		"max_num_qps:%u, max_sq_desc_sz:%u max_rq_desc_sz:%u\n",
+		context->max_num_qps, context->max_sq_desc_sz,
+		context->max_rq_desc_sz);
+	xsc_dbg(context->dbg_fp, XSC_DBG_CTX,
+		"max_send_wr:%u, num_ports:%u, max_recv_wr:%u\n",
+		context->max_send_wr,
+		context->num_ports, context->max_recv_wr);
+	xsc_dbg(context->dbg_fp, XSC_DBG_CTX,
+		"send_ds_num:%u shift:%u recv_ds_num:%u shift:%u\n",
+		context->send_ds_num, context->send_ds_shift,
+		context->recv_ds_num, context->recv_ds_shift);
+
+	pthread_mutex_init(&context->qp_table_mutex, NULL);
+	for (i = 0; i < XSC_QP_TABLE_SIZE; ++i)
+		context->qp_table[i].refcnt = 0;
+
+	context->page_size = page_size;
+	if (xsc_mmap(xdev, context, cmd_fd, page_size))
+		goto err_free;
+
+	verbs_set_ops(v_ctx, &xsc_ctx_common_ops);
+
+	memset(&device_attr, 0, sizeof(device_attr));
+	if (!xsc_query_device_ex(&v_ctx->context, NULL, &device_attr,
+				 sizeof(struct ibv_device_attr_ex))) {
+		context->max_cqe = device_attr.orig_attr.max_cqe;
+	}
+
+	return v_ctx;
+
+err_free:
+	verbs_uninit_context(&context->ibv_ctx);
+	close_debug_file(context);
+	free(context);
+	return NULL;
+}
+
+static void xsc_free_context(struct ibv_context *ibctx)
+{
+	struct xsc_context *context = to_xctx(ibctx);
+
+	xsc_dbg(context->dbg_fp, XSC_DBG_CTX, "\n");
+	xsc_munmap(context);
+
+	verbs_uninit_context(&context->ibv_ctx);
+	close_debug_file(context);
+	free(context);
+}
+
+static void xsc_uninit_device(struct verbs_device *verbs_device)
+{
+	struct xsc_device *xdev = to_xdev(&verbs_device->device);
+
+	free(xdev);
+}
+
+static struct verbs_device *xsc_device_alloc(struct verbs_sysfs_dev *sysfs_dev)
+{
+	struct xsc_device *xdev;
+
+	xdev = calloc(1, sizeof(*xdev));
+	if (!xdev)
+		return NULL;
+
+	xdev->page_size = sysconf(_SC_PAGESIZE);
+
+	return &xdev->verbs_dev;
+}
+
+static const struct verbs_device_ops xsc_dev_ops = {
+	.name = "xscale",
+	.match_min_abi_version = XSC_UVERBS_MIN_ABI_VERSION,
+	.match_max_abi_version = XSC_UVERBS_MAX_ABI_VERSION,
+	.match_table = hca_table,
+	.alloc_device = xsc_device_alloc,
+	.uninit_device = xsc_uninit_device,
+	.alloc_context = xsc_alloc_context,
+};
+
+PROVIDER_DRIVER(xscale, xsc_dev_ops);
diff --git a/providers/xscale/xscale.h b/providers/xscale/xscale.h
new file mode 100644
index 000000000..85538d931
--- /dev/null
+++ b/providers/xscale/xscale.h
@@ -0,0 +1,164 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright (c) 2021 - 2022, Shanghai Yunsilicon Technology Co., Ltd.
+ * All rights reserved.
+ */
+
+#ifndef XSCALE_H
+#define XSCALE_H
+
+#include <stddef.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <stdatomic.h>
+#include <util/compiler.h>
+
+#include <infiniband/driver.h>
+#include <util/udma_barrier.h>
+#include <ccan/list.h>
+#include <ccan/minmax.h>
+#include <valgrind/memcheck.h>
+
+#include "xsc-abi.h"
+
+typedef uint8_t   u8;
+typedef uint16_t  u16;
+typedef uint32_t  u32;
+typedef uint64_t  u64;
+
+enum {
+	XSC_DBG_QP = 1 << 0,
+	XSC_DBG_CQ = 1 << 1,
+	XSC_DBG_QP_SEND = 1 << 2,
+	XSC_DBG_QP_SEND_ERR = 1 << 3,
+	XSC_DBG_CQ_CQE = 1 << 4,
+	XSC_DBG_CONTIG = 1 << 5,
+	XSC_DBG_DR = 1 << 6,
+	XSC_DBG_CTX = 1 << 7,
+	XSC_DBG_PD = 1 << 8,
+	XSC_DBG_MR = 1 << 9,
+};
+
+extern u32 xsc_debug_mask;
+
+#define xsc_dbg(fp, mask, fmt, args...)                                        \
+	do {                                                                   \
+		if (xsc_debug_mask & (mask)) {                                 \
+			char host[256];                                        \
+			char timestr[32];                                      \
+			struct tm now_tm;                                      \
+			time_t now_time;                                       \
+			time(&now_time);                                       \
+			localtime_r(&now_time, &now_tm);                       \
+			strftime(timestr, sizeof(timestr), "%Y-%m-%d %X",      \
+				 &now_tm);                                     \
+			gethostname(host, 256);                                \
+			fprintf(fp, "[%s %s %s %d] " fmt, timestr, host,       \
+				__func__, __LINE__, ##args);                   \
+		}                                                              \
+	} while (0)
+
+#define xsc_err(fmt, args...)                                                  \
+	do {                                                                   \
+		char host[256];                                                \
+		char timestr[32];                                              \
+		struct tm now_tm;                                              \
+		time_t now_time;                                               \
+		time(&now_time);                                               \
+		localtime_r(&now_time, &now_tm);                               \
+		strftime(timestr, sizeof(timestr), "%Y-%m-%d %X", &now_tm);    \
+		gethostname(host, 256);                                        \
+		printf("[%s %s %s %d] " fmt, timestr, host, __func__,          \
+		       __LINE__, ##args);                                      \
+	} while (0)
+
+enum {
+	XSC_QP_TABLE_SHIFT = 12,
+	XSC_QP_TABLE_MASK = (1 << XSC_QP_TABLE_SHIFT) - 1,
+	XSC_QP_TABLE_SIZE = 1 << (24 - XSC_QP_TABLE_SHIFT),
+};
+
+struct xsc_device {
+	struct verbs_device verbs_dev;
+	int page_size;
+};
+
+#define NAME_BUFFER_SIZE 64
+
+struct xsc_context {
+	struct verbs_context ibv_ctx;
+	int max_num_qps;
+	struct {
+		struct xsc_qp **table;
+		int refcnt;
+	} qp_table[XSC_QP_TABLE_SIZE];
+	pthread_mutex_t qp_table_mutex;
+
+	int max_sq_desc_sz;
+	int max_rq_desc_sz;
+	int max_send_wr;
+	int max_recv_wr;
+	int num_ports;
+	char hostname[NAME_BUFFER_SIZE];
+	u32 max_cqe;
+	void *sqm_reg_va;
+	void *rqm_reg_va;
+	void *cqm_reg_va;
+	void *cqm_armdb_va;
+	int db_mmap_size;
+	u32 page_size;
+	u64 qpm_tx_db;
+	u64 qpm_rx_db;
+	u64 cqm_next_cid_reg;
+	u64 cqm_armdb;
+	u32 send_ds_num;
+	u32 recv_ds_num;
+	u32 send_ds_shift;
+	u32 recv_ds_shift;
+	FILE *dbg_fp;
+	struct xsc_hw_ops *hw_ops;
+};
+
+union xsc_ib_fw_ver {
+	u64 data;
+	struct {
+		u8 ver_major;
+		u8 ver_minor;
+		u16 ver_patch;
+		u32 ver_tweak;
+	} s;
+};
+
+static inline int xsc_ilog2(int n)
+{
+	int t;
+
+	if (n <= 0)
+		return -1;
+
+	t = 0;
+	while ((1 << t) < n)
+		++t;
+
+	return t;
+}
+
+static inline struct xsc_device *to_xdev(struct ibv_device *ibdev)
+{
+	return container_of(ibdev, struct xsc_device, verbs_dev.device);
+}
+
+static inline struct xsc_context *to_xctx(struct ibv_context *ibctx)
+{
+	return container_of(ibctx, struct xsc_context, ibv_ctx.context);
+}
+
+int xsc_query_device(struct ibv_context *context, struct ibv_device_attr *attr);
+int xsc_query_device_ex(struct ibv_context *context,
+			const struct ibv_query_device_ex_input *input,
+			struct ibv_device_attr_ex *attr, size_t attr_size);
+int xsc_query_port(struct ibv_context *context, u8 port,
+		   struct ibv_port_attr *attr);
+
+#endif /* XSC_H */

From 751c968f018638ceeeb8fce5f2f9d5c782edac4b Mon Sep 17 00:00:00 2001
From: Tian Xin <tianx@yunsilicon.com>
Date: Tue, 24 Sep 2024 15:06:40 +0800
Subject: [PATCH 2/6] libxscale: Add support for pd and mr

This patch adds support for pd and mr operations including:
1. alloc_pd
2. dealloc_pd
3. reg_mr
4. dereg_mr

Signed-off-by: Tian Xin <tianx@yunsilicon.com>
Signed-off-by: Wei Honggang <weihg@yunsilicon.com>
Signed-off-by: Zhao Qianwei <zhaoqw@yunsilicon.com>
Signed-off-by: Li Qiang <liq@yunsilicon.com>
Signed-off-by: Yan Lei <jacky@yunsilicon.com>
---
 providers/xscale/verbs.c  | 85 +++++++++++++++++++++++++++++++++++++++
 providers/xscale/xscale.c |  4 ++
 providers/xscale/xscale.h | 29 +++++++++++++
 3 files changed, 118 insertions(+)

diff --git a/providers/xscale/verbs.c b/providers/xscale/verbs.c
index 943665a8b..ed265d6e0 100644
--- a/providers/xscale/verbs.c
+++ b/providers/xscale/verbs.c
@@ -36,6 +36,91 @@ int xsc_query_port(struct ibv_context *context, u8 port,
 	return ibv_cmd_query_port(context, port, attr, &cmd, sizeof(cmd));
 }
 
+struct ibv_pd *xsc_alloc_pd(struct ibv_context *context)
+{
+	struct ibv_alloc_pd cmd;
+	struct xsc_alloc_pd_resp resp;
+	struct xsc_pd *pd;
+
+	pd = calloc(1, sizeof(*pd));
+	if (!pd)
+		return NULL;
+
+	if (ibv_cmd_alloc_pd(context, &pd->ibv_pd, &cmd, sizeof(cmd),
+			     &resp.ibv_resp, sizeof(resp))) {
+		free(pd);
+		return NULL;
+	}
+
+	atomic_init(&pd->refcount, 1);
+	pd->pdn = resp.pdn;
+	xsc_dbg(to_xctx(context)->dbg_fp, XSC_DBG_PD, "pd number:%u\n",
+		pd->pdn);
+
+	return &pd->ibv_pd;
+}
+
+int xsc_free_pd(struct ibv_pd *pd)
+{
+	int ret;
+	struct xsc_pd *xpd = to_xpd(pd);
+
+	if (atomic_load(&xpd->refcount) > 1)
+		return EBUSY;
+
+	ret = ibv_cmd_dealloc_pd(pd);
+	if (ret)
+		return ret;
+
+	xsc_dbg(to_xctx(pd->context)->dbg_fp, XSC_DBG_PD, "dealloc pd\n");
+	free(xpd);
+
+	return 0;
+}
+
+struct ibv_mr *xsc_reg_mr(struct ibv_pd *pd, void *addr, size_t length,
+			  u64 hca_va, int acc)
+{
+	struct xsc_mr *mr;
+	struct ibv_reg_mr cmd;
+	int ret;
+	enum ibv_access_flags access = (enum ibv_access_flags)acc;
+	struct ib_uverbs_reg_mr_resp resp;
+
+	mr = calloc(1, sizeof(*mr));
+	if (!mr)
+		return NULL;
+
+	ret = ibv_cmd_reg_mr(pd, addr, length, hca_va, access, &mr->vmr, &cmd,
+			     sizeof(cmd), &resp, sizeof(resp));
+	if (ret) {
+		free(mr);
+		return NULL;
+	}
+	mr->alloc_flags = acc;
+
+	xsc_dbg(to_xctx(pd->context)->dbg_fp, XSC_DBG_MR, "lkey:%u, rkey:%u\n",
+		mr->vmr.ibv_mr.lkey, mr->vmr.ibv_mr.rkey);
+
+	return &mr->vmr.ibv_mr;
+}
+
+int xsc_dereg_mr(struct verbs_mr *vmr)
+{
+	int ret;
+
+	if (vmr->mr_type == IBV_MR_TYPE_NULL_MR)
+		goto free;
+
+	ret = ibv_cmd_dereg_mr(vmr);
+	if (ret)
+		return ret;
+
+free:
+	free(vmr);
+	return 0;
+}
+
 static void xsc_set_fw_version(struct ibv_device_attr *attr,
 			       union xsc_ib_fw_ver *fw_ver)
 {
diff --git a/providers/xscale/xscale.c b/providers/xscale/xscale.c
index c7be81278..cdc37fbd3 100644
--- a/providers/xscale/xscale.c
+++ b/providers/xscale/xscale.c
@@ -33,6 +33,10 @@ static void xsc_free_context(struct ibv_context *ibctx);
 
 static const struct verbs_context_ops xsc_ctx_common_ops = {
 	.query_port = xsc_query_port,
+	.alloc_pd = xsc_alloc_pd,
+	.dealloc_pd = xsc_free_pd,
+	.reg_mr = xsc_reg_mr,
+	.dereg_mr = xsc_dereg_mr,
 	.query_device_ex = xsc_query_device_ex,
 	.free_context = xsc_free_context,
 };
diff --git a/providers/xscale/xscale.h b/providers/xscale/xscale.h
index 85538d931..3cef67813 100644
--- a/providers/xscale/xscale.h
+++ b/providers/xscale/xscale.h
@@ -120,6 +120,17 @@ struct xsc_context {
 	struct xsc_hw_ops *hw_ops;
 };
 
+struct xsc_pd {
+	struct ibv_pd ibv_pd;
+	u32 pdn;
+	atomic_int refcount;
+};
+
+struct xsc_mr {
+	struct verbs_mr vmr;
+	u32 alloc_flags;
+};
+
 union xsc_ib_fw_ver {
 	u64 data;
 	struct {
@@ -154,6 +165,17 @@ static inline struct xsc_context *to_xctx(struct ibv_context *ibctx)
 	return container_of(ibctx, struct xsc_context, ibv_ctx.context);
 }
 
+/* to_xpd always returns the real xsc_pd object ie the protection domain. */
+static inline struct xsc_pd *to_xpd(struct ibv_pd *ibpd)
+{
+	return container_of(ibpd, struct xsc_pd, ibv_pd);
+}
+
+static inline struct xsc_mr *to_xmr(struct ibv_mr *ibmr)
+{
+	return container_of(ibmr, struct xsc_mr, vmr.ibv_mr);
+}
+
 int xsc_query_device(struct ibv_context *context, struct ibv_device_attr *attr);
 int xsc_query_device_ex(struct ibv_context *context,
 			const struct ibv_query_device_ex_input *input,
@@ -161,4 +183,11 @@ int xsc_query_device_ex(struct ibv_context *context,
 int xsc_query_port(struct ibv_context *context, u8 port,
 		   struct ibv_port_attr *attr);
 
+struct ibv_pd *xsc_alloc_pd(struct ibv_context *context);
+int xsc_free_pd(struct ibv_pd *pd);
+
+struct ibv_mr *xsc_reg_mr(struct ibv_pd *pd, void *addr, size_t length,
+			  u64 hca_va, int access);
+int xsc_dereg_mr(struct verbs_mr *mr);
+
 #endif /* XSC_H */

From 03a1079e960980decc635fad046c9bb7c847d6ef Mon Sep 17 00:00:00 2001
From: Tian Xin <tianx@yunsilicon.com>
Date: Tue, 24 Sep 2024 15:08:07 +0800
Subject: [PATCH 3/6] libxscale: Add support for cq related verbs

This patch adds support for the following cq verbs:
1. create_cq
2. poll_cq
3. req_notify_cq
4. resize_cq
5. destroy_cq

Signed-off-by: Tian Xin <tianx@yunsilicon.com>
Signed-off-by: Wei Honggang <weihg@yunsilicon.com>
Signed-off-by: Zhao Qianwei <zhaoqw@yunsilicon.com>
Signed-off-by: Li Qiang <liq@yunsilicon.com>
Signed-off-by: Yan Lei <jacky@yunsilicon.com>
---
 providers/xscale/CMakeLists.txt |   3 +
 providers/xscale/buf.c          |  42 +++
 providers/xscale/cq.c           | 522 ++++++++++++++++++++++++++++++++
 providers/xscale/verbs.c        | 283 +++++++++++++++++
 providers/xscale/xsc_hsi.c      |  96 ++++++
 providers/xscale/xsc_hsi.h      | 178 +++++++++++
 providers/xscale/xscale.c       |  13 +-
 providers/xscale/xscale.h       |  91 ++++++
 8 files changed, 1226 insertions(+), 2 deletions(-)
 create mode 100644 providers/xscale/buf.c
 create mode 100644 providers/xscale/cq.c
 create mode 100644 providers/xscale/xsc_hsi.c
 create mode 100644 providers/xscale/xsc_hsi.h

diff --git a/providers/xscale/CMakeLists.txt b/providers/xscale/CMakeLists.txt
index cfd05b492..f9f174933 100644
--- a/providers/xscale/CMakeLists.txt
+++ b/providers/xscale/CMakeLists.txt
@@ -1,4 +1,7 @@
 rdma_provider(xscale
   xscale.c
   verbs.c
+  cq.c
+  xsc_hsi.c
+  buf.c
 )
diff --git a/providers/xscale/buf.c b/providers/xscale/buf.c
new file mode 100644
index 000000000..2096cef62
--- /dev/null
+++ b/providers/xscale/buf.c
@@ -0,0 +1,42 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (c) 2021 - 2022, Shanghai Yunsilicon Technology Co., Ltd.
+ * All rights reserved.
+ */
+
+#include <config.h>
+
+#include <signal.h>
+#include <sys/ipc.h>
+#include <sys/shm.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <errno.h>
+
+#include "util/util.h"
+#include "xscale.h"
+
+int xsc_alloc_buf(struct xsc_buf *buf, size_t size, int page_size)
+{
+	int ret;
+	int al_size;
+
+	al_size = align(size, page_size);
+	ret = posix_memalign(&buf->buf, page_size, al_size);
+	if (ret)
+		return ret;
+
+	ret = ibv_dontfork_range(buf->buf, al_size);
+	if (ret)
+		free(buf->buf);
+
+	buf->length = al_size;
+
+	return ret;
+}
+
+void xsc_free_buf(struct xsc_buf *buf)
+{
+	ibv_dofork_range(buf->buf, buf->length);
+	free(buf->buf);
+}
diff --git a/providers/xscale/cq.c b/providers/xscale/cq.c
new file mode 100644
index 000000000..1aeb7d337
--- /dev/null
+++ b/providers/xscale/cq.c
@@ -0,0 +1,522 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (c) 2021 - 2022, Shanghai Yunsilicon Technology Co., Ltd.
+ * All rights reserved.
+ */
+
+#include <config.h>
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <pthread.h>
+#include <string.h>
+#include <errno.h>
+#include <unistd.h>
+
+#include <util/compiler.h>
+#include <infiniband/opcode.h>
+
+#include "xscale.h"
+#include "xsc_hsi.h"
+
+enum { CQ_OK = 0, CQ_EMPTY = -1, CQ_POLL_ERR = -2 };
+
+static const u32 xsc_msg_opcode[][2][2] = {
+	[XSC_MSG_OPCODE_SEND][XSC_REQ][XSC_WITHOUT_IMMDT] =
+		XSC_OPCODE_RDMA_REQ_SEND,
+	[XSC_MSG_OPCODE_SEND][XSC_REQ][XSC_WITH_IMMDT] =
+		XSC_OPCODE_RDMA_REQ_SEND_IMMDT,
+	[XSC_MSG_OPCODE_SEND][XSC_RSP][XSC_WITHOUT_IMMDT] =
+		XSC_OPCODE_RDMA_RSP_RECV,
+	[XSC_MSG_OPCODE_SEND][XSC_RSP][XSC_WITH_IMMDT] =
+		XSC_OPCODE_RDMA_RSP_RECV_IMMDT,
+	[XSC_MSG_OPCODE_RDMA_WRITE][XSC_REQ][XSC_WITHOUT_IMMDT] =
+		XSC_OPCODE_RDMA_REQ_WRITE,
+	[XSC_MSG_OPCODE_RDMA_WRITE][XSC_REQ][XSC_WITH_IMMDT] =
+		XSC_OPCODE_RDMA_REQ_WRITE_IMMDT,
+	[XSC_MSG_OPCODE_RDMA_WRITE][XSC_RSP][XSC_WITHOUT_IMMDT] =
+		XSC_OPCODE_RDMA_CQE_ERROR,
+	[XSC_MSG_OPCODE_RDMA_WRITE][XSC_RSP][XSC_WITH_IMMDT] =
+		XSC_OPCODE_RDMA_RSP_WRITE_IMMDT,
+	[XSC_MSG_OPCODE_RDMA_READ][XSC_REQ][XSC_WITHOUT_IMMDT] =
+		XSC_OPCODE_RDMA_REQ_READ,
+	[XSC_MSG_OPCODE_RDMA_READ][XSC_REQ][XSC_WITH_IMMDT] =
+		XSC_OPCODE_RDMA_CQE_ERROR,
+	[XSC_MSG_OPCODE_RDMA_READ][XSC_RSP][XSC_WITHOUT_IMMDT] =
+		XSC_OPCODE_RDMA_CQE_ERROR,
+	[XSC_MSG_OPCODE_RDMA_READ][XSC_RSP][XSC_WITH_IMMDT] =
+		XSC_OPCODE_RDMA_CQE_ERROR,
+};
+
+static const u32 xsc_cqe_opcode[] = {
+	[XSC_OPCODE_RDMA_REQ_SEND] = IBV_WC_SEND,
+	[XSC_OPCODE_RDMA_REQ_SEND_IMMDT] = IBV_WC_SEND,
+	[XSC_OPCODE_RDMA_RSP_RECV] = IBV_WC_RECV,
+	[XSC_OPCODE_RDMA_RSP_RECV_IMMDT] = IBV_WC_RECV,
+	[XSC_OPCODE_RDMA_REQ_WRITE] = IBV_WC_RDMA_WRITE,
+	[XSC_OPCODE_RDMA_REQ_WRITE_IMMDT] = IBV_WC_RDMA_WRITE,
+	[XSC_OPCODE_RDMA_RSP_WRITE_IMMDT] = IBV_WC_RECV_RDMA_WITH_IMM,
+	[XSC_OPCODE_RDMA_REQ_READ] = IBV_WC_RDMA_READ,
+};
+
+static inline u8 xsc_get_cqe_opcode(struct xsc_context *ctx,
+				    struct xsc_cqe *cqe) ALWAYS_INLINE;
+static inline u8 xsc_get_cqe_opcode(struct xsc_context *ctx,
+				    struct xsc_cqe *cqe)
+{
+	u8 msg_opcode = ctx->hw_ops->get_cqe_msg_opcode(cqe);
+	u8 type = FIELD_GET(CQE_DATA0_TYPE_MASK, le32toh(cqe->data0));
+	u8 with_immdt = FIELD_GET(CQE_DATA0_WITH_IMMDT_MASK,
+				  le32toh(cqe->data0));
+
+	if (ctx->hw_ops->is_err_cqe(cqe))
+		return type ? XSC_OPCODE_RDMA_RSP_ERROR :
+				   XSC_OPCODE_RDMA_REQ_ERROR;
+	if (msg_opcode > XSC_MSG_OPCODE_RDMA_READ) {
+		printf("rdma cqe msg code should be send/write/read\n");
+		return XSC_OPCODE_RDMA_CQE_ERROR;
+	}
+	return xsc_msg_opcode[msg_opcode][type][with_immdt];
+}
+
+struct xsc_qp *xsc_find_qp(struct xsc_context *ctx, u32 qpn)
+{
+	int tind = qpn >> XSC_QP_TABLE_SHIFT;
+
+	if (ctx->qp_table[tind].refcnt)
+		return ctx->qp_table[tind].table[qpn & XSC_QP_TABLE_MASK];
+	else
+		return NULL;
+}
+
+static inline int get_qp_ctx(struct xsc_context *xctx,
+			     struct xsc_resource **cur_rsc,
+			     u32 qpn) ALWAYS_INLINE;
+static inline int get_qp_ctx(struct xsc_context *xctx,
+			     struct xsc_resource **cur_rsc, u32 qpn)
+{
+	if (!*cur_rsc || (qpn != (*cur_rsc)->rsn)) {
+		/*
+		 * We do not have to take the QP table lock here,
+		 * because CQs will be locked while QPs are removed
+		 * from the table.
+		 */
+		*cur_rsc = (struct xsc_resource *)xsc_find_qp(xctx, qpn);
+		if (unlikely(!*cur_rsc))
+			return CQ_POLL_ERR;
+	}
+
+	return CQ_OK;
+}
+
+static void *get_cqe(struct xsc_cq *cq, int n)
+{
+	return cq->active_buf->buf + n * cq->cqe_sz;
+}
+
+static void *get_sw_cqe(struct xsc_cq *cq, int n)
+{
+	int cid = n & (cq->verbs_cq.cq_ex.cqe - 1);
+	struct xsc_cqe *cqe = get_cqe(cq, cid);
+
+	if (likely(xsc_get_cqe_sw_own(cqe, n, cq->log2_cq_ring_sz)))
+		return cqe;
+	else
+		return NULL;
+}
+
+void *xsc_get_send_wqe(struct xsc_qp *qp, int n)
+{
+	return qp->sq_start + (n << qp->sq.wqe_shift);
+}
+
+static void update_cons_index(struct xsc_cq *cq)
+{
+	struct xsc_context *ctx =
+		to_xctx(ibv_cq_ex_to_cq(&cq->verbs_cq.cq_ex)->context);
+
+	ctx->hw_ops->set_cq_ci(cq->db, cq->cqn, cq->cons_index);
+}
+
+static void dump_cqe(void *buf)
+{
+	__le32 *p = buf;
+	int i;
+
+	for (i = 0; i < 8; i += 4)
+		printf("0x%08x 0x%08x 0x%08x 0x%08x\n", p[i], p[i + 1],
+		       p[i + 2], p[i + 3]);
+}
+
+static enum ibv_wc_status xsc_cqe_error_code(u8 error_code)
+{
+	switch (error_code) {
+	case XSC_ERR_CODE_NAK_RETRY:
+		return IBV_WC_RETRY_EXC_ERR;
+	case XSC_ERR_CODE_NAK_OPCODE:
+		return IBV_WC_REM_INV_REQ_ERR;
+	case XSC_ERR_CODE_NAK_MR:
+		return IBV_WC_REM_ACCESS_ERR;
+	case XSC_ERR_CODE_NAK_OPERATION:
+		return IBV_WC_REM_OP_ERR;
+	case XSC_ERR_CODE_NAK_RNR:
+		return IBV_WC_RNR_RETRY_EXC_ERR;
+	case XSC_ERR_CODE_LOCAL_MR:
+		return IBV_WC_LOC_PROT_ERR;
+	case XSC_ERR_CODE_LOCAL_LEN:
+		return IBV_WC_LOC_LEN_ERR;
+	case XSC_ERR_CODE_LEN_GEN_CQE:
+		return IBV_WC_LOC_LEN_ERR;
+	case XSC_ERR_CODE_OPERATION:
+		return IBV_WC_LOC_ACCESS_ERR;
+	case XSC_ERR_CODE_FLUSH:
+		return IBV_WC_WR_FLUSH_ERR;
+	case XSC_ERR_CODE_MALF_WQE_HOST:
+	case XSC_ERR_CODE_STRG_ACC_GEN_CQE:
+	case XSC_ERR_CODE_STRG_ACC:
+		return IBV_WC_FATAL_ERR;
+	case XSC_ERR_CODE_MR_GEN_CQE:
+		return IBV_WC_LOC_PROT_ERR;
+	case XSC_ERR_CODE_OPCODE_GEN_CQE:
+	case XSC_ERR_CODE_LOCAL_OPCODE:
+	default:
+		return IBV_WC_GENERAL_ERR;
+	}
+}
+
+static inline void handle_good_req(struct ibv_wc *wc, struct xsc_cqe *cqe,
+				   struct xsc_qp *qp, struct xsc_wq *wq,
+				   u8 opcode)
+{
+	int idx;
+	struct xsc_send_wqe_ctrl_seg *ctrl;
+
+	wc->opcode = xsc_cqe_opcode[opcode];
+	wc->status = IBV_WC_SUCCESS;
+	idx = FIELD_GET(CQE_DATA1_WQE_ID_MASK, le64toh(cqe->data1));
+	idx >>= (qp->sq.wqe_shift - XSC_BASE_WQE_SHIFT);
+	idx &= (wq->wqe_cnt - 1);
+	wc->wr_id = wq->wrid[idx];
+	wq->tail = wq->wqe_head[idx] + 1;
+	if (opcode == XSC_OPCODE_RDMA_REQ_READ) {
+		ctrl = xsc_get_send_wqe(qp, idx);
+		wc->byte_len = le32toh(ctrl->msg_len);
+	}
+	wq->flush_wqe_cnt--;
+
+	xsc_dbg(to_xctx(qp->ibv_qp->context)->dbg_fp, XSC_DBG_CQ_CQE,
+		"wqeid:%u, wq tail:%u\n", idx, wq->tail);
+}
+
+static inline void handle_good_responder(struct ibv_wc *wc, struct xsc_cqe *cqe,
+					 struct xsc_wq *wq, u8 opcode)
+{
+	u16 idx;
+	struct xsc_qp *qp = container_of(wq, struct xsc_qp, rq);
+
+	wc->byte_len = le32toh(cqe->msg_len);
+	wc->opcode = xsc_cqe_opcode[opcode];
+	wc->status = IBV_WC_SUCCESS;
+
+	idx = wq->tail & (wq->wqe_cnt - 1);
+	wc->wr_id = wq->wrid[idx];
+	++wq->tail;
+	wq->flush_wqe_cnt--;
+
+	xsc_dbg(to_xctx(qp->ibv_qp->context)->dbg_fp, XSC_DBG_CQ_CQE,
+		"recv cqe idx:%u, len:%u\n", idx, wc->byte_len);
+}
+
+static inline void handle_bad_req(struct xsc_context *xctx, struct ibv_wc *wc,
+				  struct xsc_cqe *cqe, struct xsc_qp *qp,
+				  struct xsc_wq *wq)
+{
+	int idx;
+	u8 error_code = xctx->hw_ops->get_cqe_error_code(cqe);
+
+	wc->status = xsc_cqe_error_code(error_code);
+	wc->vendor_err = error_code;
+	idx = FIELD_GET(CQE_DATA1_WQE_ID_MASK, le64toh(cqe->data1));
+	idx >>= (qp->sq.wqe_shift - XSC_BASE_WQE_SHIFT);
+	idx &= (wq->wqe_cnt - 1);
+	wq->tail = wq->wqe_head[idx] + 1;
+	wc->wr_id = wq->wrid[idx];
+	wq->flush_wqe_cnt--;
+	if (error_code != XSC_ERR_CODE_FLUSH) {
+		printf("%s: got completion with error:\n", xctx->hostname);
+		dump_cqe(cqe);
+	}
+	qp->ibv_qp->state = IBV_QPS_ERR;
+}
+
+static inline void handle_bad_responder(struct xsc_context *xctx,
+					struct ibv_wc *wc, struct xsc_cqe *cqe,
+					struct xsc_qp *qp, struct xsc_wq *wq)
+{
+	u8 error_code = xctx->hw_ops->get_cqe_error_code(cqe);
+
+	wc->status = xsc_cqe_error_code(error_code);
+	wc->vendor_err = error_code;
+
+	++wq->tail;
+	wq->flush_wqe_cnt--;
+	if (error_code != XSC_ERR_CODE_FLUSH) {
+		printf("%s: got completion with error:\n", xctx->hostname);
+		dump_cqe(cqe);
+	}
+	qp->ibv_qp->state = IBV_QPS_ERR;
+}
+
+static inline int xsc_parse_cqe(struct xsc_cq *cq, struct xsc_cqe *cqe,
+				struct xsc_resource **cur_rsc,
+				struct ibv_wc *wc, int lazy)
+{
+	struct xsc_wq *wq;
+	u32 qp_id;
+	u8 opcode;
+	int err = 0;
+	struct xsc_qp *xqp = NULL;
+	struct xsc_context *xctx;
+
+	xctx = to_xctx(ibv_cq_ex_to_cq(&cq->verbs_cq.cq_ex)->context);
+	qp_id = FIELD_GET(CQE_DATA0_QP_ID_MASK, le32toh(cqe->data0));
+	wc->wc_flags = 0;
+	wc->qp_num = qp_id;
+	opcode = xsc_get_cqe_opcode(xctx, cqe);
+
+	xsc_dbg(xctx->dbg_fp, XSC_DBG_CQ_CQE, "opcode:0x%x qp_num:%u\n", opcode,
+		qp_id);
+	switch (opcode) {
+	case XSC_OPCODE_RDMA_REQ_SEND_IMMDT:
+	case XSC_OPCODE_RDMA_REQ_WRITE_IMMDT:
+		wc->wc_flags |= IBV_WC_WITH_IMM;
+		SWITCH_FALLTHROUGH;
+	case XSC_OPCODE_RDMA_REQ_SEND:
+	case XSC_OPCODE_RDMA_REQ_WRITE:
+	case XSC_OPCODE_RDMA_REQ_READ:
+		err = get_qp_ctx(xctx, cur_rsc, qp_id);
+		if (unlikely(err))
+			return CQ_EMPTY;
+		xqp = rsc_to_xqp(*cur_rsc);
+		wq = &xqp->sq;
+		handle_good_req(wc, cqe, xqp, wq, opcode);
+		break;
+	case XSC_OPCODE_RDMA_RSP_RECV_IMMDT:
+	case XSC_OPCODE_RDMA_RSP_WRITE_IMMDT:
+		wc->wc_flags |= IBV_WC_WITH_IMM;
+		wc->imm_data = htobe32(le32toh(cqe->imm_data));
+		SWITCH_FALLTHROUGH;
+	case XSC_OPCODE_RDMA_RSP_RECV:
+		err = get_qp_ctx(xctx, cur_rsc, qp_id);
+		if (unlikely(err))
+			return CQ_EMPTY;
+		xqp = rsc_to_xqp(*cur_rsc);
+		wq = &xqp->rq;
+		handle_good_responder(wc, cqe, wq, opcode);
+		break;
+	case XSC_OPCODE_RDMA_REQ_ERROR:
+		err = get_qp_ctx(xctx, cur_rsc, qp_id);
+		if (unlikely(err))
+			return CQ_POLL_ERR;
+		xqp = rsc_to_xqp(*cur_rsc);
+		wq = &xqp->sq;
+		handle_bad_req(xctx, wc, cqe, xqp, wq);
+		break;
+	case XSC_OPCODE_RDMA_RSP_ERROR:
+		err = get_qp_ctx(xctx, cur_rsc, qp_id);
+		if (unlikely(err))
+			return CQ_POLL_ERR;
+		xqp = rsc_to_xqp(*cur_rsc);
+		wq = &xqp->rq;
+		handle_bad_responder(xctx, wc, cqe, xqp, wq);
+		break;
+	case XSC_OPCODE_RDMA_CQE_ERROR:
+		printf("%s: got completion with cqe format error:\n",
+		       xctx->hostname);
+		dump_cqe(cqe);
+		SWITCH_FALLTHROUGH;
+	default:
+		return CQ_POLL_ERR;
+	}
+	return CQ_OK;
+}
+
+static inline int xsc_poll_one(struct xsc_cq *cq, struct xsc_resource **cur_rsc,
+			       struct ibv_wc *wc) ALWAYS_INLINE;
+static inline int xsc_poll_one(struct xsc_cq *cq, struct xsc_resource **cur_rsc,
+			       struct ibv_wc *wc)
+{
+	struct xsc_cqe *cqe = get_sw_cqe(cq, cq->cons_index);
+
+	if (!cqe)
+		return CQ_EMPTY;
+
+	memset(wc, 0, sizeof(*wc));
+
+	++cq->cons_index;
+
+	/*
+	 * Make sure we read CQ entry contents after we've checked the
+	 * ownership bit.
+	 */
+	udma_from_device_barrier();
+	return xsc_parse_cqe(cq, cqe, cur_rsc, wc, 0);
+}
+
+static inline void gen_flush_err_cqe(struct xsc_err_state_qp_node *err_node,
+				     u32 qp_id, struct xsc_wq *wq,
+				     u32 idx, struct ibv_wc *wc)
+{
+	memset(wc, 0, sizeof(*wc));
+	if (err_node->is_sq) {
+		switch (wq->wr_opcode[idx]) {
+		case IBV_WR_SEND:
+		case IBV_WR_SEND_WITH_IMM:
+		case IBV_WR_SEND_WITH_INV:
+			wc->opcode = IBV_WC_SEND;
+			break;
+		case IBV_WR_RDMA_WRITE:
+		case IBV_WR_RDMA_WRITE_WITH_IMM:
+			wc->opcode = IBV_WC_RDMA_WRITE;
+			break;
+		case IBV_WR_RDMA_READ:
+			wc->opcode = IBV_WC_RDMA_READ;
+		}
+	} else {
+		wc->opcode = IBV_WC_RECV;
+	}
+
+	wc->qp_num = qp_id;
+	wc->status = IBV_WC_WR_FLUSH_ERR;
+	wc->vendor_err = XSC_ERR_CODE_FLUSH;
+	wc->wr_id = wq->wrid[idx];
+	wq->tail++;
+	wq->flush_wqe_cnt--;
+}
+
+static inline int xsc_generate_flush_err_cqe(struct ibv_cq *ibcq, int ne,
+					     int *npolled, struct ibv_wc *wc)
+{
+	u32 qp_id = 0;
+	u32 flush_wqe_cnt = 0;
+	int sw_npolled = 0;
+	int ret = 0;
+	u32 idx = 0;
+	struct xsc_err_state_qp_node *err_qp_node, *tmp;
+	struct xsc_resource *res = NULL;
+	struct xsc_context *xctx = to_xctx(ibcq->context);
+	struct xsc_cq *cq = to_xcq(ibcq);
+	struct xsc_wq *wq;
+
+	list_for_each_safe(&cq->err_state_qp_list, err_qp_node, tmp, entry) {
+		if (!err_qp_node)
+			break;
+
+		sw_npolled = 0;
+		qp_id = err_qp_node->qp_id;
+		ret = get_qp_ctx(xctx, &res, qp_id);
+		if (unlikely(ret))
+			continue;
+		wq = err_qp_node->is_sq ? &(rsc_to_xqp(res)->sq) :
+					  &(rsc_to_xqp(res)->rq);
+		flush_wqe_cnt = wq->flush_wqe_cnt;
+		xsc_dbg(xctx->dbg_fp, XSC_DBG_CQ_CQE,
+			"is_sq %d, flush_wq_cnt %d, ne %d, npolled %d, qp_id %d\n",
+			err_qp_node->is_sq, wq->flush_wqe_cnt, ne, *npolled,
+			qp_id);
+
+		if (flush_wqe_cnt <= (ne - *npolled)) {
+			while (sw_npolled < flush_wqe_cnt) {
+				idx = wq->tail & (wq->wqe_cnt - 1);
+				if (err_qp_node->is_sq &&
+				    !wq->need_flush[idx]) {
+					wq->tail++;
+					continue;
+				} else {
+					gen_flush_err_cqe(err_qp_node,
+							  err_qp_node->qp_id,
+							  wq, idx,
+							  wc + *npolled + sw_npolled);
+					++sw_npolled;
+				}
+			}
+			list_del(&err_qp_node->entry);
+			free(err_qp_node);
+			*npolled += sw_npolled;
+		} else {
+			while (sw_npolled < (ne - *npolled)) {
+				idx = wq->tail & (wq->wqe_cnt - 1);
+				if (err_qp_node->is_sq &&
+				    !wq->need_flush[idx]) {
+					wq->tail++;
+					continue;
+				} else {
+					gen_flush_err_cqe(err_qp_node,
+							  err_qp_node->qp_id,
+							  wq, idx,
+							  wc + *npolled + sw_npolled);
+					++sw_npolled;
+				}
+			}
+			*npolled = ne;
+			break;
+		}
+	}
+
+	return 0;
+}
+
+static inline int poll_cq(struct ibv_cq *ibcq, int ne,
+			  struct ibv_wc *wc) ALWAYS_INLINE;
+static inline int poll_cq(struct ibv_cq *ibcq, int ne, struct ibv_wc *wc)
+{
+	struct xsc_cq *cq = to_xcq(ibcq);
+	struct xsc_resource *rsc = NULL;
+	int npolled = 0;
+	int err = CQ_OK;
+	u32 next_cid = cq->cons_index;
+
+	xsc_spin_lock(&cq->lock);
+	for (npolled = 0; npolled < ne; ++npolled) {
+		err = xsc_poll_one(cq, &rsc, wc + npolled);
+		if (err != CQ_OK)
+			break;
+	}
+
+	if (err == CQ_EMPTY) {
+		if (npolled < ne && !(list_empty(&cq->err_state_qp_list)))
+			xsc_generate_flush_err_cqe(ibcq, ne, &npolled, wc);
+	}
+
+	udma_to_device_barrier();
+	if (next_cid != cq->cons_index)
+		update_cons_index(cq);
+	xsc_spin_unlock(&cq->lock);
+
+	return err == CQ_POLL_ERR ? err : npolled;
+}
+
+int xsc_poll_cq(struct ibv_cq *ibcq, int ne, struct ibv_wc *wc)
+{
+	return poll_cq(ibcq, ne, wc);
+}
+
+int xsc_alloc_cq_buf(struct xsc_context *xctx, struct xsc_cq *cq,
+		     struct xsc_buf *buf, int nent, int cqe_sz)
+{
+	struct xsc_device *xdev = to_xdev(xctx->ibv_ctx.context.device);
+	int ret;
+
+	ret = xsc_alloc_buf(buf, align(nent * cqe_sz, xdev->page_size),
+			    xdev->page_size);
+	if (ret)
+		return -1;
+
+	memset(buf->buf, 0, nent * cqe_sz);
+	return 0;
+}
+
+void xsc_free_cq_buf(struct xsc_context *ctx, struct xsc_buf *buf)
+{
+	return xsc_free_buf(buf);
+}
diff --git a/providers/xscale/verbs.c b/providers/xscale/verbs.c
index ed265d6e0..29de73111 100644
--- a/providers/xscale/verbs.c
+++ b/providers/xscale/verbs.c
@@ -17,6 +17,7 @@
 #include <sys/stat.h>
 #include <fcntl.h>
 #include <unistd.h>
+#include <inttypes.h>
 #include <sys/mman.h>
 #include <ccan/array_size.h>
 
@@ -27,6 +28,7 @@
 
 #include "xscale.h"
 #include "xsc-abi.h"
+#include "xsc_hsi.h"
 
 int xsc_query_port(struct ibv_context *context, u8 port,
 		   struct ibv_port_attr *attr)
@@ -121,6 +123,287 @@ int xsc_dereg_mr(struct verbs_mr *vmr)
 	return 0;
 }
 
+int xsc_round_up_power_of_two(long long sz)
+{
+	long long ret;
+
+	for (ret = 1; ret < sz; ret <<= 1)
+		; /* nothing */
+
+	if (ret > INT_MAX) {
+		fprintf(stderr, "%s: roundup overflow\n", __func__);
+		return -ENOMEM;
+	}
+
+	return (int)ret;
+}
+
+static int align_queue_size(long long req)
+{
+	return xsc_round_up_power_of_two(req);
+}
+
+enum { CREATE_CQ_SUPPORTED_WC_FLAGS =
+	       IBV_WC_STANDARD_FLAGS | IBV_WC_EX_WITH_COMPLETION_TIMESTAMP |
+	       IBV_WC_EX_WITH_CVLAN | IBV_WC_EX_WITH_FLOW_TAG |
+	       IBV_WC_EX_WITH_TM_INFO |
+	       IBV_WC_EX_WITH_COMPLETION_TIMESTAMP_WALLCLOCK };
+
+enum { CREATE_CQ_SUPPORTED_COMP_MASK = IBV_CQ_INIT_ATTR_MASK_FLAGS };
+
+enum { CREATE_CQ_SUPPORTED_FLAGS = IBV_CREATE_CQ_ATTR_SINGLE_THREADED |
+				   IBV_CREATE_CQ_ATTR_IGNORE_OVERRUN };
+
+static int xsc_cqe_depth_check(void)
+{
+	char *e;
+
+	e = getenv("XSC_CQE_DEPTH_CHECK");
+	if (e && !strcmp(e, "n"))
+		return 0;
+
+	return 1;
+}
+
+static struct ibv_cq_ex *create_cq(struct ibv_context *context,
+				   const struct ibv_cq_init_attr_ex *cq_attr,
+				   int cq_alloc_flags)
+{
+	struct xsc_create_cq cmd = {};
+	struct xsc_create_cq_resp resp = {};
+	struct xsc_create_cq_ex cmd_ex = {};
+	struct xsc_create_cq_ex_resp resp_ex = {};
+	struct xsc_ib_create_cq *cmd_drv;
+	struct xsc_ib_create_cq_resp *resp_drv;
+	struct xsc_cq *cq;
+	int cqe_sz;
+	int ret;
+	int ncqe;
+	struct xsc_context *xctx = to_xctx(context);
+	bool use_ex = false;
+	char *env;
+	int i;
+
+	if (!cq_attr->cqe) {
+		xsc_err("CQE invalid\n");
+		errno = EINVAL;
+		return NULL;
+	}
+
+	xsc_dbg(xctx->dbg_fp, XSC_DBG_CQ, "CQE number:%u\n", cq_attr->cqe);
+
+	if (cq_attr->comp_mask & ~CREATE_CQ_SUPPORTED_COMP_MASK) {
+		xsc_err("Unsupported comp_mask for create cq\n");
+		errno = EINVAL;
+		return NULL;
+	}
+
+	if (cq_attr->comp_mask & IBV_CQ_INIT_ATTR_MASK_FLAGS &&
+	    cq_attr->flags & ~CREATE_CQ_SUPPORTED_FLAGS) {
+		xsc_err("Unsupported creation flags requested for create cq\n");
+		errno = EINVAL;
+		return NULL;
+	}
+
+	if (cq_attr->wc_flags & ~CREATE_CQ_SUPPORTED_WC_FLAGS) {
+		xsc_err("unsupported flags:0x%" PRIx64 "\n", cq_attr->wc_flags);
+		errno = ENOTSUP;
+		return NULL;
+	}
+
+	cq = calloc(1, sizeof(*cq));
+	if (!cq) {
+		xsc_err("Alloc CQ failed\n");
+		errno = ENOMEM;
+		return NULL;
+	}
+
+	if (cq_attr->comp_mask & IBV_CQ_INIT_ATTR_MASK_FLAGS) {
+		if (cq_attr->flags & IBV_CREATE_CQ_ATTR_IGNORE_OVERRUN)
+			use_ex = true;
+	}
+
+	xsc_dbg(xctx->dbg_fp, XSC_DBG_CQ, "use_ex:%u\n", use_ex);
+
+	cmd_drv = use_ex ? &cmd_ex.drv_payload : &cmd.drv_payload;
+	resp_drv = use_ex ? &resp_ex.drv_payload : &resp.drv_payload;
+
+	cq->cons_index = 0;
+
+	if (xsc_spinlock_init(&cq->lock))
+		goto err;
+
+	ncqe = align_queue_size(cq_attr->cqe);
+	if (ncqe < XSC_CQE_RING_DEPTH_MIN) {
+		xsc_dbg(xctx->dbg_fp, XSC_DBG_CQ,
+			"CQE ring size %u is not enough, set it as %u\n", ncqe,
+			XSC_CQE_RING_DEPTH_MIN);
+		ncqe = XSC_CQE_RING_DEPTH_MIN;
+	}
+
+	if (ncqe > xctx->max_cqe) {
+		if (xsc_cqe_depth_check()) {
+			xsc_err("CQE ring size %u exceeds CQE ring depth %u, abort!\n",
+				ncqe, xctx->max_cqe);
+			errno = EINVAL;
+			goto err_spl;
+		} else {
+			xsc_dbg(xctx->dbg_fp, XSC_DBG_CQ,
+				"CQE ring size %u exceeds the MAX ring szie, set it as %u\n",
+				ncqe, xctx->max_cqe);
+			ncqe = xctx->max_cqe;
+		}
+	}
+
+	cqe_sz = XSC_CQE_SIZE;
+	xsc_dbg(xctx->dbg_fp, XSC_DBG_CQ, "CQE number:%u, size:%u\n", ncqe,
+		cqe_sz);
+
+	if (xsc_alloc_cq_buf(to_xctx(context), cq, &cq->buf, ncqe, cqe_sz)) {
+		xsc_err("Alloc cq buffer failed.\n");
+		errno = ENOMEM;
+		goto err_spl;
+	}
+
+	cq->cqe_sz = cqe_sz;
+	cq->flags = cq_alloc_flags;
+
+	cmd_drv->buf_addr = (uintptr_t)cq->buf.buf;
+	cmd_drv->cqe_size = cqe_sz;
+
+	xsc_dbg(xctx->dbg_fp, XSC_DBG_CQ, "buf_addr:%p\n", cq->buf.buf);
+
+	if (use_ex) {
+		struct ibv_cq_init_attr_ex cq_attr_ex = *cq_attr;
+
+		cq_attr_ex.cqe = ncqe;
+		ret = ibv_cmd_create_cq_ex(context, &cq_attr_ex, &cq->verbs_cq,
+					   &cmd_ex.ibv_cmd, sizeof(cmd_ex),
+					   &resp_ex.ibv_resp, sizeof(resp_ex),
+					   0);
+	} else {
+		ret = ibv_cmd_create_cq(context, ncqe, cq_attr->channel,
+					cq_attr->comp_vector,
+					ibv_cq_ex_to_cq(&cq->verbs_cq.cq_ex),
+					&cmd.ibv_cmd, sizeof(cmd),
+					&resp.ibv_resp, sizeof(resp));
+	}
+
+	if (ret) {
+		xsc_err("ibv_cmd_create_cq failed,ret %d\n", ret);
+		goto err_buf;
+	}
+
+	cq->active_buf = &cq->buf;
+	cq->resize_buf = NULL;
+	cq->cqn = resp_drv->cqn;
+
+	cq->db = xctx->cqm_reg_va +
+		 (xctx->cqm_next_cid_reg & (xctx->page_size - 1));
+	cq->armdb =
+		xctx->cqm_armdb_va + (xctx->cqm_armdb & (xctx->page_size - 1));
+	cq->cqe_cnt = ncqe;
+	cq->log2_cq_ring_sz = xsc_ilog2(ncqe);
+
+	for (i = 0; i < ncqe; i++) {
+		struct xsc_cqe *cqe = (struct xsc_cqe *)(cq->active_buf->buf +
+							 i * cq->cqe_sz);
+		u32 owner_data = 0;
+
+		owner_data |= FIELD_PREP(CQE_DATA2_OWNER_MASK, 1);
+		cqe->data2 = htole32(owner_data);
+	}
+
+	env = getenv("XSC_DISABLE_FLUSH_ERROR");
+	cq->disable_flush_error_cqe = env ? true : false;
+	xsc_dbg(xctx->dbg_fp, XSC_DBG_CQ, "cqe count:%u cqn:%u\n", cq->cqe_cnt,
+		cq->cqn);
+	list_head_init(&cq->err_state_qp_list);
+	return &cq->verbs_cq.cq_ex;
+
+err_buf:
+	xsc_free_cq_buf(to_xctx(context), &cq->buf);
+
+err_spl:
+	xsc_spinlock_destroy(&cq->lock);
+
+err:
+	free(cq);
+
+	return NULL;
+}
+
+struct ibv_cq *xsc_create_cq(struct ibv_context *context, int cqe,
+			     struct ibv_comp_channel *channel, int comp_vector)
+{
+	struct ibv_cq_ex *cq;
+	struct ibv_cq_init_attr_ex cq_attr = { .cqe = cqe,
+					       .channel = channel,
+					       .comp_vector = comp_vector,
+					       .wc_flags =
+						       IBV_WC_STANDARD_FLAGS };
+
+	if (cqe <= 0) {
+		errno = EINVAL;
+		return NULL;
+	}
+
+	cq = create_cq(context, &cq_attr, 0);
+	return cq ? ibv_cq_ex_to_cq(cq) : NULL;
+}
+
+int xsc_arm_cq(struct ibv_cq *ibvcq, int solicited)
+{
+	struct xsc_cq *cq = to_xcq(ibvcq);
+	struct xsc_context *ctx = to_xctx(ibvcq->context);
+
+	ctx->hw_ops->update_cq_db(cq->armdb, cq->cqn, cq->cons_index,
+				  solicited);
+
+	return 0;
+}
+
+int xsc_resize_cq(struct ibv_cq *ibcq, int cqe)
+{
+	struct xsc_cq *cq = to_xcq(ibcq);
+
+	if (cqe < 0) {
+		errno = EINVAL;
+		return errno;
+	}
+
+	xsc_spin_lock(&cq->lock);
+	cq->active_cqes = cq->verbs_cq.cq_ex.cqe;
+	/* currently we don't change cqe size */
+	cq->resize_cqe_sz = cq->cqe_sz;
+	cq->resize_cqes = cq->verbs_cq.cq_ex.cqe;
+	xsc_spin_unlock(&cq->lock);
+	cq->resize_buf = NULL;
+	return 0;
+}
+
+int xsc_destroy_cq(struct ibv_cq *cq)
+{
+	int ret;
+	struct xsc_err_state_qp_node *tmp, *err_qp_node;
+
+	xsc_dbg(to_xctx(cq->context)->dbg_fp, XSC_DBG_CQ, "\n");
+	ret = ibv_cmd_destroy_cq(cq);
+	if (ret)
+		return ret;
+
+	list_for_each_safe(&to_xcq(cq)->err_state_qp_list, err_qp_node, tmp,
+			   entry) {
+		list_del(&err_qp_node->entry);
+		free(err_qp_node);
+	}
+
+	xsc_free_cq_buf(to_xctx(cq->context), to_xcq(cq)->active_buf);
+	free(to_xcq(cq));
+
+	return 0;
+}
+
 static void xsc_set_fw_version(struct ibv_device_attr *attr,
 			       union xsc_ib_fw_ver *fw_ver)
 {
diff --git a/providers/xscale/xsc_hsi.c b/providers/xscale/xsc_hsi.c
new file mode 100644
index 000000000..d84fb52ed
--- /dev/null
+++ b/providers/xscale/xsc_hsi.c
@@ -0,0 +1,96 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (c) 2021 - 2022, Shanghai Yunsilicon Technology Co., Ltd.
+ * All rights reserved.
+ */
+
+#include <stdint.h>
+#include <stdbool.h>
+
+#include <util/mmio.h>
+
+#include "xscale.h"
+#include "xsc_hsi.h"
+
+static void andes_ring_tx_doorbell(void *db_addr, u32 sqn,
+				   u32 next_pid)
+{
+	u32 tx_db = 0;
+
+	tx_db = FIELD_PREP(ANDES_SEND_DB_NEXT_PID_MASK, next_pid) |
+		FIELD_PREP(ANDES_SEND_DB_QP_ID_MASK, sqn);
+
+	udma_to_device_barrier();
+	mmio_write32_le(db_addr, htole32(tx_db));
+}
+
+static void andes_ring_rx_doorbell(void *db_addr, u32 rqn,
+				   u32 next_pid)
+{
+	u32 rx_db = 0;
+
+	rx_db = FIELD_PREP(ANDES_RECV_DB_NEXT_PID_MASK, next_pid) |
+		FIELD_PREP(ANDES_RECV_DB_QP_ID_MASK, rqn);
+
+	udma_to_device_barrier();
+	mmio_write32_le(db_addr, htole32(rx_db));
+}
+
+static void andes_update_cq_db(void *db_addr, u32 cqn, u32 next_cid,
+			       u8 solicited)
+{
+	u32 cq_db;
+
+	cq_db = FIELD_PREP(ANDES_CQ_DB_NEXT_CID_MASK, next_cid) |
+		FIELD_PREP(ANDES_CQ_DB_CQ_ID_MASK, cqn) |
+		FIELD_PREP(ANDES_CQ_DB_ARM_MASK, solicited);
+
+	udma_to_device_barrier();
+	mmio_wc_start();
+	mmio_write32_le(db_addr, htole32(cq_db));
+	mmio_flush_writes();
+}
+
+static void andes_set_cq_ci(void *db_addr, u32 cqn, u32 next_cid)
+{
+	u32 cq_db;
+
+	cq_db = FIELD_PREP(ANDES_CQ_DB_NEXT_CID_MASK, next_cid) |
+		FIELD_PREP(ANDES_CQ_DB_CQ_ID_MASK, cqn) |
+		FIELD_PREP(ANDES_CQ_DB_ARM_MASK, 0);
+
+	udma_to_device_barrier();
+	mmio_write32_le(db_addr, htole32(cq_db));
+}
+
+static bool andes_is_err_cqe(struct xsc_cqe *cqe)
+{
+	return FIELD_GET(CQE_DATA0_IS_ERR_MASK, le32toh(cqe->data0));
+}
+
+static u8 andes_get_cqe_error_code(struct xsc_cqe *cqe)
+{
+	return FIELD_GET(CQE_DATA0_ERROR_CODE_ANDES_MASK,
+			 le32toh(cqe->data0));
+}
+
+static u8 andes_get_msg_opcode(struct xsc_cqe *cqe)
+{
+	return FIELD_GET(CQE_DATA0_MSG_OPCODE_ANDES_MASK,
+			 le32toh(cqe->data0));
+}
+
+static struct xsc_hw_ops andes_ops = {
+	.ring_tx_doorbell = andes_ring_tx_doorbell,
+	.ring_rx_doorbell = andes_ring_rx_doorbell,
+	.update_cq_db = andes_update_cq_db,
+	.set_cq_ci = andes_set_cq_ci,
+	.is_err_cqe = andes_is_err_cqe,
+	.get_cqe_error_code = andes_get_cqe_error_code,
+	.get_cqe_msg_opcode = andes_get_msg_opcode,
+};
+
+void xsc_init_hw_ops(struct xsc_context *ctx)
+{
+	ctx->hw_ops = &andes_ops;
+}
diff --git a/providers/xscale/xsc_hsi.h b/providers/xscale/xsc_hsi.h
new file mode 100644
index 000000000..230d7101b
--- /dev/null
+++ b/providers/xscale/xsc_hsi.h
@@ -0,0 +1,178 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright (c) 2021 - 2022, Shanghai Yunsilicon Technology Co., Ltd.
+ * All rights reserved.
+ */
+
+#ifndef __XSC_HSI_H__
+#define __XSC_HSI_H__
+
+#include <linux/types.h>
+#include <endian.h>
+
+#include "util/util.h"
+#include "xscale.h"
+
+/* message opcode */
+enum {
+	XSC_MSG_OPCODE_SEND = 0,
+	XSC_MSG_OPCODE_RDMA_WRITE = 1,
+	XSC_MSG_OPCODE_RDMA_READ = 2,
+	XSC_MSG_OPCODE_MAD = 3,
+	XSC_MSG_OPCODE_RDMA_ACK = 4,
+	XSC_MSG_OPCODE_RDMA_ACK_READ = 5,
+	XSC_MSG_OPCODE_RDMA_CNP = 6,
+	XSC_MSG_OPCODE_RAW = 7,
+	XSC_MSG_OPCODE_VIRTIO_NET = 8,
+	XSC_MSG_OPCODE_VIRTIO_BLK = 9,
+	XSC_MSG_OPCODE_RAW_TPE = 10,
+	XSC_MSG_OPCODE_INIT_QP_REQ = 11,
+	XSC_MSG_OPCODE_INIT_QP_RSP = 12,
+	XSC_MSG_OPCODE_INIT_PATH_REQ = 13,
+	XSC_MSG_OPCODE_INIT_PATH_RSP = 14,
+};
+
+enum {
+	XSC_REQ = 0,
+	XSC_RSP = 1,
+};
+
+enum {
+	XSC_WITHOUT_IMMDT = 0,
+	XSC_WITH_IMMDT = 1,
+};
+
+enum {
+	XSC_ERR_CODE_NAK_RETRY = 0x40,
+	XSC_ERR_CODE_NAK_OPCODE = 0x41,
+	XSC_ERR_CODE_NAK_MR = 0x42,
+	XSC_ERR_CODE_NAK_OPERATION = 0x43,
+	XSC_ERR_CODE_NAK_RNR = 0x44,
+	XSC_ERR_CODE_LOCAL_MR = 0x45,
+	XSC_ERR_CODE_LOCAL_LEN = 0x46,
+	XSC_ERR_CODE_LOCAL_OPCODE = 0x47,
+	XSC_ERR_CODE_CQ_OVER_FLOW = 0x48,
+	XSC_ERR_CODE_STRG_ACC_GEN_CQE = 0x4b,
+	XSC_ERR_CODE_STRG_ACC = 0x4c,
+	XSC_ERR_CODE_CQE_ACC = 0x4d,
+	XSC_ERR_CODE_FLUSH = 0x4e,
+	XSC_ERR_CODE_MALF_WQE_HOST = 0x50,
+	XSC_ERR_CODE_MALF_WQE_INFO = 0x51,
+	XSC_ERR_CODE_MR_NON_NAK = 0x52,
+	XSC_ERR_CODE_OPCODE_GEN_CQE = 0x61,
+	XSC_ERR_CODE_MANY_READ = 0x62,
+	XSC_ERR_CODE_LEN_GEN_CQE = 0x63,
+	XSC_ERR_CODE_MR = 0x65,
+	XSC_ERR_CODE_MR_GEN_CQE = 0x66,
+	XSC_ERR_CODE_OPERATION = 0x67,
+	XSC_ERR_CODE_MALF_WQE_INFO_GEN_NAK = 0x68,
+};
+
+enum {
+	XSC_OPCODE_RDMA_REQ_SEND = 0,
+	XSC_OPCODE_RDMA_REQ_SEND_IMMDT = 1,
+	XSC_OPCODE_RDMA_RSP_RECV = 2,
+	XSC_OPCODE_RDMA_RSP_RECV_IMMDT = 3,
+	XSC_OPCODE_RDMA_REQ_WRITE = 4,
+	XSC_OPCODE_RDMA_REQ_WRITE_IMMDT = 5,
+	XSC_OPCODE_RDMA_RSP_WRITE_IMMDT = 6,
+	XSC_OPCODE_RDMA_REQ_READ = 7,
+	XSC_OPCODE_RDMA_REQ_ERROR = 8,
+	XSC_OPCODE_RDMA_RSP_ERROR = 9,
+	XSC_OPCODE_RDMA_CQE_ERROR = 10,
+};
+
+enum {
+	XSC_BASE_WQE_SHIFT = 4,
+};
+
+/*
+ * Descriptors that are allocated by SW and accessed by HW, 32-byte aligned
+ */
+#define CTRL_SEG_WQE_HDR_MSG_OPCODE_MASK GENMASK(7, 0)
+#define CTRL_SEG_WQE_HDR_WITH_IMMDT_MASK BIT(8)
+#define CTRL_SEG_WQE_HDR_DS_NUM_MASK GENMASK(15, 11)
+#define CTRL_SEG_WQE_HDR_WQE_ID_MASK GENMASK(31, 16)
+#define CTRL_SEG_DATA0_SE_MASK BIT(0)
+#define CTRL_SEG_DATA0_CE_MASK BIT(1)
+#define CTRL_SEG_DATA0_IN_LINE_MASK BIT(2)
+
+struct xsc_send_wqe_ctrl_seg {
+	__le32 wqe_hdr;
+	__le32 msg_len;
+	__le32 opcode_data;
+	__le32 data0;
+};
+
+#define DATA_SEG_DATA0_SEG_LEN_MASK GENMASK(31, 1)
+
+struct xsc_wqe_data_seg {
+	union {
+		struct {
+			__le32 data0;
+			__le32 mkey;
+			__le64 va;
+		};
+		struct {
+			u8 in_line_data[16];
+		};
+	};
+};
+
+#define CQE_DATA0_MSG_OPCODE_ANDES_MASK	GENMASK(7, 0)
+#define CQE_DATA0_ERROR_CODE_ANDES_MASK	GENMASK(6, 0)
+#define CQE_DATA0_IS_ERR_MASK		BIT(7)
+#define CQE_DATA0_QP_ID_MASK		GENMASK(22, 8)
+#define CQE_DATA0_SE_MASK		BIT(24)
+#define CQE_DATA0_HAS_PPH_MASK		BIT(25)
+#define CQE_DATA0_TYPE_MASK		BIT(26)
+#define CQE_DATA0_WITH_IMMDT_MASK	BIT(27)
+#define CQE_DATA0_CSUM_ERR_MASK		GENMASK(31, 28)
+#define CQE_DATA1_TS_MASK		GENMASK_ULL(47, 0)
+#define CQE_DATA1_WQE_ID_MASK		GENMASK_ULL(63, 48)
+#define CQE_DATA2_OWNER_MASK		BIT(31)
+
+struct xsc_cqe {
+	__le32 data0;
+	__le32 imm_data;
+	__le32 msg_len;
+	__le32 vni;
+	__le64 data1;
+	__le32 rsv;
+	__le32 data2;
+};
+
+#define ANDES_SEND_DB_NEXT_PID_MASK	GENMASK(15, 0)
+#define ANDES_SEND_DB_QP_ID_MASK	GENMASK(30, 16)
+#define ANDES_RECV_DB_NEXT_PID_MASK	GENMASK(12, 0)
+#define ANDES_RECV_DB_QP_ID_MASK	GENMASK(27, 13)
+#define ANDES_CQ_DB_NEXT_CID_MASK	GENMASK(15, 0)
+#define ANDES_CQ_DB_CQ_ID_MASK		GENMASK(30, 16)
+#define ANDES_CQ_DB_ARM_MASK		BIT(31)
+
+struct xsc_hw_ops {
+	void (*ring_tx_doorbell)(void *db, u32 sqn, u32 next_pid);
+	void (*ring_rx_doorbell)(void *db, u32 rqn, u32 next_pid);
+	void (*update_cq_db)(void *db, u32 cqn, u32 next_cid,
+			     u8 solicited);
+	void (*set_cq_ci)(void *db, u32 cqn, u32 next_cid);
+	bool (*is_err_cqe)(struct xsc_cqe *cqe);
+	u8 (*get_cqe_error_code)(struct xsc_cqe *cqe);
+	u8 (*get_cqe_msg_opcode)(struct xsc_cqe *cqe);
+};
+
+/* Size of CQE */
+#define XSC_CQE_SIZE sizeof(struct xsc_cqe)
+
+#define XSC_SEND_WQE_RING_DEPTH_MIN 16
+#define XSC_CQE_RING_DEPTH_MIN 2
+
+void xsc_init_hw_ops(struct xsc_context *ctx);
+static inline bool xsc_get_cqe_sw_own(struct xsc_cqe *cqe, int cid,
+				      int ring_sz) ALWAYS_INLINE;
+
+static inline bool xsc_get_cqe_sw_own(struct xsc_cqe *cqe, int cid, int ring_sz)
+{
+	return FIELD_GET(CQE_DATA2_OWNER_MASK, le32toh(cqe->data2)) == ((cid >> ring_sz) & 1);
+}
+#endif /* __XSC_HSI_H__ */
diff --git a/providers/xscale/xscale.c b/providers/xscale/xscale.c
index cdc37fbd3..7b439f786 100644
--- a/providers/xscale/xscale.c
+++ b/providers/xscale/xscale.c
@@ -22,6 +22,7 @@
 
 #include "xscale.h"
 #include "xsc-abi.h"
+#include "xsc_hsi.h"
 
 static const struct verbs_match_ent hca_table[] = {
 	VERBS_MODALIAS_MATCH("*xscale*", NULL),
@@ -33,12 +34,19 @@ static void xsc_free_context(struct ibv_context *ibctx);
 
 static const struct verbs_context_ops xsc_ctx_common_ops = {
 	.query_port = xsc_query_port,
+	.query_device_ex = xsc_query_device_ex,
+	.free_context = xsc_free_context,
+
 	.alloc_pd = xsc_alloc_pd,
 	.dealloc_pd = xsc_free_pd,
 	.reg_mr = xsc_reg_mr,
 	.dereg_mr = xsc_dereg_mr,
-	.query_device_ex = xsc_query_device_ex,
-	.free_context = xsc_free_context,
+
+	.create_cq = xsc_create_cq,
+	.poll_cq = xsc_poll_cq,
+	.req_notify_cq = xsc_arm_cq,
+	.resize_cq = xsc_resize_cq,
+	.destroy_cq = xsc_destroy_cq,
 };
 
 static void open_debug_file(struct xsc_context *ctx)
@@ -202,6 +210,7 @@ static struct verbs_context *xsc_alloc_context(struct ibv_device *ibdev,
 	context->send_ds_shift = xsc_ilog2(resp.send_ds_num);
 	context->recv_ds_num = resp.recv_ds_num;
 	context->recv_ds_shift = xsc_ilog2(resp.recv_ds_num);
+	xsc_init_hw_ops(context);
 
 	xsc_dbg(context->dbg_fp, XSC_DBG_CTX,
 		"max_num_qps:%u, max_sq_desc_sz:%u max_rq_desc_sz:%u\n",
diff --git a/providers/xscale/xscale.h b/providers/xscale/xscale.h
index 3cef67813..21df88c13 100644
--- a/providers/xscale/xscale.h
+++ b/providers/xscale/xscale.h
@@ -79,11 +79,19 @@ enum {
 	XSC_QP_TABLE_SIZE = 1 << (24 - XSC_QP_TABLE_SHIFT),
 };
 
+struct xsc_resource {
+	u32 rsn;
+};
+
 struct xsc_device {
 	struct verbs_device verbs_dev;
 	int page_size;
 };
 
+struct xsc_spinlock {
+	pthread_spinlock_t lock;
+};
+
 #define NAME_BUFFER_SIZE 64
 
 struct xsc_context {
@@ -120,12 +128,46 @@ struct xsc_context {
 	struct xsc_hw_ops *hw_ops;
 };
 
+struct xsc_buf {
+	void *buf;
+	size_t length;
+};
+
 struct xsc_pd {
 	struct ibv_pd ibv_pd;
 	u32 pdn;
 	atomic_int refcount;
 };
 
+struct xsc_err_state_qp_node {
+	struct list_node entry;
+	u32 qp_id;
+	int is_sq;
+};
+
+struct xsc_cq {
+	/* ibv_cq should always be subset of ibv_cq_ex */
+	struct verbs_cq verbs_cq;
+	struct xsc_buf buf;
+	struct xsc_buf *active_buf;
+	struct xsc_buf *resize_buf;
+	int resize_cqes;
+	int active_cqes;
+	struct xsc_spinlock lock;
+	u32 cqn;
+	u32 cons_index;
+	__le32 *db;
+	__le32 *armdb;
+	u32 cqe_cnt;
+	int log2_cq_ring_sz;
+	int cqe_sz;
+	int resize_cqe_sz;
+	struct xsc_resource *cur_rsc;
+	u32 flags;
+	int disable_flush_error_cqe;
+	struct list_head err_state_qp_list;
+};
+
 struct xsc_mr {
 	struct verbs_mr vmr;
 	u32 alloc_flags;
@@ -171,11 +213,25 @@ static inline struct xsc_pd *to_xpd(struct ibv_pd *ibpd)
 	return container_of(ibpd, struct xsc_pd, ibv_pd);
 }
 
+static inline struct xsc_cq *to_xcq(struct ibv_cq *ibcq)
+{
+	return container_of((struct ibv_cq_ex *)ibcq, struct xsc_cq,
+			    verbs_cq.cq_ex);
+}
+
 static inline struct xsc_mr *to_xmr(struct ibv_mr *ibmr)
 {
 	return container_of(ibmr, struct xsc_mr, vmr.ibv_mr);
 }
 
+static inline struct xsc_qp *rsc_to_xqp(struct xsc_resource *rsc)
+{
+	return (struct xsc_qp *)rsc;
+}
+
+int xsc_alloc_buf(struct xsc_buf *buf, size_t size, int page_size);
+void xsc_free_buf(struct xsc_buf *buf);
+
 int xsc_query_device(struct ibv_context *context, struct ibv_device_attr *attr);
 int xsc_query_device_ex(struct ibv_context *context,
 			const struct ibv_query_device_ex_input *input,
@@ -189,5 +245,40 @@ int xsc_free_pd(struct ibv_pd *pd);
 struct ibv_mr *xsc_reg_mr(struct ibv_pd *pd, void *addr, size_t length,
 			  u64 hca_va, int access);
 int xsc_dereg_mr(struct verbs_mr *mr);
+struct ibv_cq *xsc_create_cq(struct ibv_context *context, int cqe,
+			     struct ibv_comp_channel *channel, int comp_vector);
+struct ibv_cq_ex *xsc_create_cq_ex(struct ibv_context *context,
+				   struct ibv_cq_init_attr_ex *cq_attr);
+int xsc_alloc_cq_buf(struct xsc_context *xctx, struct xsc_cq *cq,
+		     struct xsc_buf *buf, int nent, int cqe_sz);
+void xsc_free_cq_buf(struct xsc_context *ctx, struct xsc_buf *buf);
+int xsc_resize_cq(struct ibv_cq *cq, int cqe);
+int xsc_destroy_cq(struct ibv_cq *cq);
+int xsc_poll_cq(struct ibv_cq *cq, int ne, struct ibv_wc *wc);
+int xsc_arm_cq(struct ibv_cq *cq, int solicited);
+void __xsc_cq_clean(struct xsc_cq *cq, u32 qpn);
+void xsc_cq_clean(struct xsc_cq *cq, u32 qpn);
+int xsc_round_up_power_of_two(long long sz);
+void *xsc_get_send_wqe(struct xsc_qp *qp, int n);
+
+static inline int xsc_spin_lock(struct xsc_spinlock *lock)
+{
+	return pthread_spin_lock(&lock->lock);
+}
+
+static inline int xsc_spin_unlock(struct xsc_spinlock *lock)
+{
+	return pthread_spin_unlock(&lock->lock);
+}
+
+static inline int xsc_spinlock_init(struct xsc_spinlock *lock)
+{
+	return pthread_spin_init(&lock->lock, PTHREAD_PROCESS_PRIVATE);
+}
+
+static inline int xsc_spinlock_destroy(struct xsc_spinlock *lock)
+{
+	return pthread_spin_destroy(&lock->lock);
+}
 
 #endif /* XSC_H */

From 7c8b8309a6372497c4c47386e7ba965c907de25f Mon Sep 17 00:00:00 2001
From: Tian Xin <tianx@yunsilicon.com>
Date: Tue, 24 Sep 2024 15:09:07 +0800
Subject: [PATCH 4/6] libxscale: Add support for qp management

This patch adds support for following qp management verbs:
1. create_qp
2. query_qp
3. modify_qp
4. destroy_qp

Signed-off-by: Tian Xin <tianx@yunsilicon.com>
Signed-off-by: Wei Honggang <weihg@yunsilicon.com>
Signed-off-by: Zhao Qianwei <zhaoqw@yunsilicon.com>
Signed-off-by: Li Qiang <liq@yunsilicon.com>
Signed-off-by: Yan Lei <jacky@yunsilicon.com>
---
 providers/xscale/CMakeLists.txt |   1 +
 providers/xscale/cq.c           |  71 +++-
 providers/xscale/qp.c           | 110 ++++++
 providers/xscale/verbs.c        | 570 ++++++++++++++++++++++++++++++++
 providers/xscale/xscale.c       |   5 +
 providers/xscale/xscale.h       |  72 ++++
 6 files changed, 819 insertions(+), 10 deletions(-)
 create mode 100644 providers/xscale/qp.c

diff --git a/providers/xscale/CMakeLists.txt b/providers/xscale/CMakeLists.txt
index f9f174933..63de71cf5 100644
--- a/providers/xscale/CMakeLists.txt
+++ b/providers/xscale/CMakeLists.txt
@@ -2,6 +2,7 @@ rdma_provider(xscale
   xscale.c
   verbs.c
   cq.c
+  qp.c
   xsc_hsi.c
   buf.c
 )
diff --git a/providers/xscale/cq.c b/providers/xscale/cq.c
index 1aeb7d337..7c5284250 100644
--- a/providers/xscale/cq.c
+++ b/providers/xscale/cq.c
@@ -79,16 +79,6 @@ static inline u8 xsc_get_cqe_opcode(struct xsc_context *ctx,
 	return xsc_msg_opcode[msg_opcode][type][with_immdt];
 }
 
-struct xsc_qp *xsc_find_qp(struct xsc_context *ctx, u32 qpn)
-{
-	int tind = qpn >> XSC_QP_TABLE_SHIFT;
-
-	if (ctx->qp_table[tind].refcnt)
-		return ctx->qp_table[tind].table[qpn & XSC_QP_TABLE_MASK];
-	else
-		return NULL;
-}
-
 static inline int get_qp_ctx(struct xsc_context *xctx,
 			     struct xsc_resource **cur_rsc,
 			     u32 qpn) ALWAYS_INLINE;
@@ -520,3 +510,64 @@ void xsc_free_cq_buf(struct xsc_context *ctx, struct xsc_buf *buf)
 {
 	return xsc_free_buf(buf);
 }
+
+void __xsc_cq_clean(struct xsc_cq *cq, u32 qpn)
+{
+	u32 prod_index;
+	int nfreed = 0;
+	void *cqe, *dest;
+
+	if (!cq)
+		return;
+	xsc_dbg(to_xctx(cq->verbs_cq.cq_ex.context)->dbg_fp, XSC_DBG_CQ, "\n");
+
+	/*
+	 * First we need to find the current producer index, so we
+	 * know where to start cleaning from.  It doesn't matter if HW
+	 * adds new entries after this loop -- the QP we're worried
+	 * about is already in RESET, so the new entries won't come
+	 * from our QP and therefore don't need to be checked.
+	 */
+	for (prod_index = cq->cons_index; get_sw_cqe(cq, prod_index);
+	     ++prod_index)
+		if (prod_index == cq->cons_index + cq->verbs_cq.cq_ex.cqe)
+			break;
+
+	/*
+	 * Now sweep backwards through the CQ, removing CQ entries
+	 * that match our QP by copying older entries on top of them.
+	 */
+	while ((int)(--prod_index) - (int)cq->cons_index >= 0) {
+		u32 qp_id;
+
+		cqe = get_cqe(cq, prod_index & (cq->verbs_cq.cq_ex.cqe - 1));
+		qp_id = FIELD_GET(CQE_DATA0_QP_ID_MASK,
+				  le32toh(((struct xsc_cqe *)cqe)->data0));
+		if (qpn == qp_id) {
+			++nfreed;
+		} else if (nfreed) {
+			dest = get_cqe(cq,
+				       (prod_index + nfreed) &
+					       (cq->verbs_cq.cq_ex.cqe - 1));
+			memcpy(dest, cqe, cq->cqe_sz);
+		}
+	}
+
+	if (nfreed) {
+		cq->cons_index += nfreed;
+		/*
+		 * Make sure update of buffer contents is done before
+		 * updating consumer index.
+		 */
+		udma_to_device_barrier();
+		update_cons_index(cq);
+	}
+}
+
+void xsc_cq_clean(struct xsc_cq *cq, uint32_t qpn)
+{
+	xsc_spin_lock(&cq->lock);
+	__xsc_cq_clean(cq, qpn);
+	xsc_spin_unlock(&cq->lock);
+}
+
diff --git a/providers/xscale/qp.c b/providers/xscale/qp.c
new file mode 100644
index 000000000..7fa715c4b
--- /dev/null
+++ b/providers/xscale/qp.c
@@ -0,0 +1,110 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (c) 2021 - 2022, Shanghai Yunsilicon Technology Co., Ltd.
+ * All rights reserved.
+ */
+
+#include <config.h>
+
+#include <stdlib.h>
+#include <pthread.h>
+#include <string.h>
+#include <errno.h>
+#include <stdio.h>
+#include <util/compiler.h>
+
+#include "xscale.h"
+#include "xsc_hsi.h"
+
+struct xsc_qp *xsc_find_qp(struct xsc_context *ctx, uint32_t qpn)
+{
+	int tind = qpn >> XSC_QP_TABLE_SHIFT;
+
+	if (ctx->qp_table[tind].refcnt)
+		return ctx->qp_table[tind].table[qpn & XSC_QP_TABLE_MASK];
+	else
+		return NULL;
+}
+
+int xsc_store_qp(struct xsc_context *ctx, uint32_t qpn, struct xsc_qp *qp)
+{
+	int tind = qpn >> XSC_QP_TABLE_SHIFT;
+
+	if (!ctx->qp_table[tind].refcnt) {
+		ctx->qp_table[tind].table =
+			calloc(XSC_QP_TABLE_MASK + 1, sizeof(struct xsc_qp *));
+		if (!ctx->qp_table[tind].table)
+			return -1;
+	}
+
+	++ctx->qp_table[tind].refcnt;
+	ctx->qp_table[tind].table[qpn & XSC_QP_TABLE_MASK] = qp;
+	return 0;
+}
+
+void xsc_clear_qp(struct xsc_context *ctx, uint32_t qpn)
+{
+	int tind = qpn >> XSC_QP_TABLE_SHIFT;
+
+	if (!--ctx->qp_table[tind].refcnt)
+		free(ctx->qp_table[tind].table);
+	else
+		ctx->qp_table[tind].table[qpn & XSC_QP_TABLE_MASK] = NULL;
+}
+
+int xsc_err_state_qp(struct ibv_qp *qp, enum ibv_qp_state cur_state,
+		     enum ibv_qp_state state)
+{
+	struct xsc_err_state_qp_node *tmp, *err_rq_node, *err_sq_node;
+	struct xsc_qp *xqp = to_xqp(qp);
+	int ret = 0;
+
+	xsc_dbg(to_xctx(qp->context)->dbg_fp, XSC_DBG_QP,
+		"modify qp: qpid %d, cur_qp_state %d, qp_state %d\n",
+		xqp->rsc.rsn, cur_state, state);
+	if (cur_state == IBV_QPS_ERR && state != IBV_QPS_ERR) {
+		if (qp->recv_cq) {
+			list_for_each_safe(&to_xcq(qp->recv_cq)->err_state_qp_list,
+					   err_rq_node, tmp, entry) {
+				if (err_rq_node->qp_id == xqp->rsc.rsn) {
+					list_del(&err_rq_node->entry);
+					free(err_rq_node);
+				}
+			}
+		}
+
+		if (qp->send_cq) {
+			list_for_each_safe(&to_xcq(qp->send_cq)->err_state_qp_list,
+					   err_sq_node, tmp, entry) {
+				if (err_sq_node->qp_id == xqp->rsc.rsn) {
+					list_del(&err_sq_node->entry);
+					free(err_sq_node);
+				}
+			}
+		}
+		return ret;
+	}
+
+	if (cur_state != IBV_QPS_ERR && state == IBV_QPS_ERR) {
+		if (qp->recv_cq) {
+			err_rq_node = calloc(1, sizeof(*err_rq_node));
+			if (!err_rq_node)
+				return ENOMEM;
+			err_rq_node->qp_id = xqp->rsc.rsn;
+			err_rq_node->is_sq = false;
+			list_add_tail(&to_xcq(qp->recv_cq)->err_state_qp_list,
+				      &err_rq_node->entry);
+		}
+
+		if (qp->send_cq) {
+			err_sq_node = calloc(1, sizeof(*err_sq_node));
+			if (!err_sq_node)
+				return ENOMEM;
+			err_sq_node->qp_id = xqp->rsc.rsn;
+			err_sq_node->is_sq = true;
+			list_add_tail(&to_xcq(qp->send_cq)->err_state_qp_list,
+				      &err_sq_node->entry);
+		}
+	}
+	return ret;
+}
diff --git a/providers/xscale/verbs.c b/providers/xscale/verbs.c
index 29de73111..b11755e5f 100644
--- a/providers/xscale/verbs.c
+++ b/providers/xscale/verbs.c
@@ -441,3 +441,573 @@ int xsc_query_device_ex(struct ibv_context *context,
 
 	return 0;
 }
+
+static int xsc_calc_sq_size(struct xsc_context *ctx,
+			    struct ibv_qp_init_attr_ex *attr, struct xsc_qp *qp)
+{
+	int wqe_size;
+	int wq_size;
+	int wq_size_min = 0;
+
+	if (!attr->cap.max_send_wr)
+		return 0;
+
+	wqe_size = 1 << (XSC_BASE_WQE_SHIFT + ctx->send_ds_shift);
+
+	wq_size = xsc_round_up_power_of_two(attr->cap.max_send_wr);
+
+	if (attr->qp_type != IBV_QPT_RAW_PACKET)
+		wq_size_min = XSC_SEND_WQE_RING_DEPTH_MIN;
+	if (wq_size < wq_size_min) {
+		xsc_dbg(ctx->dbg_fp, XSC_DBG_QP,
+			"WQE size %u is not enough, set it as %u\n", wq_size,
+			wq_size_min);
+		wq_size = wq_size_min;
+	}
+
+	if (wq_size > ctx->max_send_wr) {
+		xsc_dbg(ctx->dbg_fp, XSC_DBG_QP,
+			"WQE size %u exceeds WQE ring depth, set it as %u\n",
+			wq_size, ctx->max_send_wr);
+		wq_size = ctx->max_send_wr;
+	}
+
+	qp->max_inline_data = attr->cap.max_inline_data;
+	qp->sq.wqe_cnt = wq_size;
+	qp->sq.ds_cnt = wq_size << ctx->send_ds_shift;
+	qp->sq.seg_cnt = 1 << ctx->send_ds_shift;
+	qp->sq.wqe_shift = XSC_BASE_WQE_SHIFT + ctx->send_ds_shift;
+	qp->sq.max_gs = attr->cap.max_send_sge;
+	qp->sq.max_post = qp->sq.wqe_cnt;
+	if (attr->cap.max_inline_data >
+	    (qp->sq.seg_cnt - 2) * sizeof(struct xsc_wqe_data_seg))
+		return -EINVAL;
+
+	xsc_dbg(ctx->dbg_fp, XSC_DBG_QP,
+		"Send WQE count:%u, max post:%u wqe shift:%u\n", qp->sq.wqe_cnt,
+		qp->sq.max_post, qp->sq.wqe_shift);
+
+	return wqe_size * qp->sq.wqe_cnt;
+}
+
+static int xsc_calc_rq_size(struct xsc_context *ctx,
+			    struct ibv_qp_init_attr_ex *attr, struct xsc_qp *qp)
+{
+	int wqe_size;
+	int wq_size;
+	int wq_size_min = 0;
+
+	if (!attr->cap.max_recv_wr)
+		return 0;
+
+	wqe_size = 1 << (XSC_BASE_WQE_SHIFT + ctx->recv_ds_shift);
+
+	wq_size = xsc_round_up_power_of_two(attr->cap.max_recv_wr);
+	/* due to hardware limit, rdma rq depth should be
+	 * one send wqe ds num at least
+	 */
+	if (attr->qp_type != IBV_QPT_RAW_PACKET)
+		wq_size_min = ctx->send_ds_num;
+	if (wq_size < wq_size_min) {
+		xsc_dbg(ctx->dbg_fp, XSC_DBG_QP,
+			"WQE size %u is not enough, set it as %u\n", wq_size,
+			wq_size_min);
+		wq_size = wq_size_min;
+	}
+
+	if (wq_size > ctx->max_recv_wr) {
+		xsc_dbg(ctx->dbg_fp, XSC_DBG_QP,
+			"WQE size %u exceeds WQE ring depth, set it as %u\n",
+			wq_size, ctx->max_recv_wr);
+		wq_size = ctx->max_recv_wr;
+	}
+
+	qp->rq.wqe_cnt = wq_size;
+	qp->rq.ds_cnt = qp->rq.wqe_cnt << ctx->recv_ds_shift;
+	qp->rq.seg_cnt = 1 << ctx->recv_ds_shift;
+	qp->rq.wqe_shift = XSC_BASE_WQE_SHIFT + ctx->recv_ds_shift;
+	qp->rq.max_post = qp->rq.wqe_cnt;
+	qp->rq.max_gs = attr->cap.max_recv_sge;
+
+	xsc_dbg(ctx->dbg_fp, XSC_DBG_QP,
+		"Recv WQE count:%u, max post:%u wqe shift:%u\n", qp->rq.wqe_cnt,
+		qp->rq.max_post, qp->rq.wqe_shift);
+	return wqe_size * qp->rq.wqe_cnt;
+}
+
+static int xsc_calc_wq_size(struct xsc_context *ctx,
+			    struct ibv_qp_init_attr_ex *attr, struct xsc_qp *qp)
+{
+	int ret;
+	int result;
+
+	ret = xsc_calc_sq_size(ctx, attr, qp);
+	if (ret < 0)
+		return ret;
+
+	result = ret;
+
+	ret = xsc_calc_rq_size(ctx, attr, qp);
+	if (ret < 0)
+		return ret;
+
+	result += ret;
+
+	qp->sq.offset = ret;
+	qp->rq.offset = 0;
+
+	return result;
+}
+
+static int xsc_alloc_qp_buf(struct ibv_context *context,
+			    struct ibv_qp_init_attr_ex *attr, struct xsc_qp *qp,
+			    int size)
+{
+	int err;
+
+	if (qp->sq.wqe_cnt) {
+		qp->sq.wrid = malloc(qp->sq.wqe_cnt * sizeof(*qp->sq.wrid));
+		if (!qp->sq.wrid) {
+			errno = ENOMEM;
+			err = -1;
+			return err;
+		}
+
+		qp->sq.wqe_head =
+			malloc(qp->sq.wqe_cnt * sizeof(*qp->sq.wqe_head));
+		if (!qp->sq.wqe_head) {
+			errno = ENOMEM;
+			err = -1;
+			goto ex_wrid;
+		}
+
+		qp->sq.need_flush =
+			malloc(qp->sq.wqe_cnt * sizeof(*qp->sq.need_flush));
+		if (!qp->sq.need_flush) {
+			errno = ENOMEM;
+			err = -1;
+			goto ex_wrid;
+		}
+		memset(qp->sq.need_flush, 0, qp->sq.wqe_cnt);
+
+		qp->sq.wr_opcode =
+			malloc(qp->sq.wqe_cnt * sizeof(*qp->sq.wr_opcode));
+		if (!qp->sq.wr_opcode) {
+			errno = ENOMEM;
+			err = -1;
+			goto ex_wrid;
+		}
+	}
+
+	if (qp->rq.wqe_cnt) {
+		qp->rq.wrid = malloc(qp->rq.wqe_cnt * sizeof(uint64_t));
+		if (!qp->rq.wrid) {
+			errno = ENOMEM;
+			err = -1;
+			goto ex_wrid;
+		}
+	}
+
+	err = xsc_alloc_buf(&qp->buf,
+			    align(qp->buf_size,
+				  to_xdev(context->device)->page_size),
+			    to_xdev(context->device)->page_size);
+	if (err) {
+		err = -ENOMEM;
+		goto ex_wrid;
+	}
+
+	memset(qp->buf.buf, 0, qp->buf_size);
+
+	if (attr->qp_type == IBV_QPT_RAW_PACKET) {
+		size_t aligned_sq_buf_size = align(qp->sq_buf_size,
+						   to_xdev(context->device)->page_size);
+		/* For Raw Packet QP, allocate a separate buffer for the SQ */
+		err = xsc_alloc_buf(&qp->sq_buf,
+				    aligned_sq_buf_size,
+				    to_xdev(context->device)->page_size);
+		if (err) {
+			err = -ENOMEM;
+			goto rq_buf;
+		}
+
+		memset(qp->sq_buf.buf, 0, aligned_sq_buf_size);
+	}
+
+	return 0;
+rq_buf:
+	xsc_free_buf(&qp->buf);
+ex_wrid:
+	if (qp->rq.wrid)
+		free(qp->rq.wrid);
+
+	if (qp->sq.wqe_head)
+		free(qp->sq.wqe_head);
+
+	if (qp->sq.wrid)
+		free(qp->sq.wrid);
+
+	if (qp->sq.need_flush)
+		free(qp->sq.need_flush);
+
+	if (qp->sq.wr_opcode)
+		free(qp->sq.wr_opcode);
+
+	return err;
+}
+
+static void xsc_free_qp_buf(struct xsc_context *ctx, struct xsc_qp *qp)
+{
+	xsc_free_buf(&qp->buf);
+
+	if (qp->sq_buf.buf)
+		xsc_free_buf(&qp->sq_buf);
+
+	if (qp->rq.wrid)
+		free(qp->rq.wrid);
+
+	if (qp->sq.wqe_head)
+		free(qp->sq.wqe_head);
+
+	if (qp->sq.wrid)
+		free(qp->sq.wrid);
+
+	if (qp->sq.need_flush)
+		free(qp->sq.need_flush);
+
+	if (qp->sq.wr_opcode)
+		free(qp->sq.wr_opcode);
+}
+
+enum { XSC_CREATE_QP_SUP_COMP_MASK =
+	       (IBV_QP_INIT_ATTR_PD | IBV_QP_INIT_ATTR_CREATE_FLAGS),
+};
+
+void xsc_init_qp_indices(struct xsc_qp *qp)
+{
+	qp->sq.head = 0;
+	qp->sq.tail = 0;
+	qp->rq.head = 0;
+	qp->rq.tail = 0;
+	qp->sq.cur_post = 0;
+}
+
+static struct ibv_qp *create_qp(struct ibv_context *context,
+				struct ibv_qp_init_attr_ex *attr)
+{
+	struct xsc_create_qp cmd;
+	struct xsc_create_qp_resp resp;
+	struct xsc_create_qp_ex_resp resp_ex;
+	struct xsc_qp *qp;
+	int ret;
+	struct xsc_context *ctx = to_xctx(context);
+	struct ibv_qp *ibqp;
+	struct xsc_device *xdev = to_xdev(context->device);
+
+	xsc_dbg(ctx->dbg_fp, XSC_DBG_QP, "comp_mask=0x%x.\n", attr->comp_mask);
+
+	if (attr->comp_mask & ~XSC_CREATE_QP_SUP_COMP_MASK) {
+		xsc_err("Not supported comp_mask:0x%x\n", attr->comp_mask);
+		return NULL;
+	}
+
+	/*check qp_type*/
+	if (attr->qp_type != IBV_QPT_RC &&
+	    attr->qp_type != IBV_QPT_RAW_PACKET) {
+		xsc_err("Not supported qp_type:0x%x\n", attr->qp_type);
+		return NULL;
+	}
+
+	qp = calloc(1, sizeof(*qp));
+	if (!qp) {
+		xsc_err("QP calloc failed\n");
+		return NULL;
+	}
+
+	ibqp = &qp->verbs_qp.qp;
+	qp->ibv_qp = ibqp;
+
+	memset(&cmd, 0, sizeof(cmd));
+	memset(&resp, 0, sizeof(resp));
+	memset(&resp_ex, 0, sizeof(resp_ex));
+
+	ret = xsc_calc_wq_size(ctx, attr, qp);
+	if (ret < 0) {
+		xsc_err("Calculate WQ size failed\n");
+		errno = EINVAL;
+		goto err;
+	}
+
+	qp->buf_size = ret;
+	qp->sq_buf_size = 0;
+
+	if (xsc_alloc_qp_buf(context, attr, qp, ret)) {
+		xsc_err("Alloc QP buffer failed\n");
+		errno = ENOMEM;
+		goto err;
+	}
+
+	qp->sq_start = qp->buf.buf + qp->sq.offset;
+	qp->rq_start = qp->buf.buf + qp->rq.offset;
+	qp->sq.qend = qp->buf.buf + qp->sq.offset +
+		      (qp->sq.wqe_cnt << qp->sq.wqe_shift);
+
+	xsc_dbg(ctx->dbg_fp, XSC_DBG_QP,
+		"sq start:%p, sq qend:%p, buffer size:%u\n", qp->sq_start,
+		qp->sq.qend, qp->buf_size);
+
+	xsc_init_qp_indices(qp);
+
+	if (xsc_spinlock_init(&qp->sq.lock) ||
+	    xsc_spinlock_init(&qp->rq.lock))
+		goto err_free_qp_buf;
+
+	cmd.buf_addr = (uintptr_t)qp->buf.buf;
+	cmd.sq_wqe_count = qp->sq.ds_cnt;
+	cmd.rq_wqe_count = qp->rq.ds_cnt;
+	cmd.rq_wqe_shift = qp->rq.wqe_shift;
+
+	if (attr->qp_type == IBV_QPT_RAW_PACKET) {
+		if (attr->comp_mask & IBV_QP_INIT_ATTR_CREATE_FLAGS) {
+			if (attr->create_flags & XSC_QP_CREATE_RAWPACKET_TSO) {
+				cmd.flags |= XSC_QP_FLAG_RAWPACKET_TSO;
+				xsc_dbg(ctx->dbg_fp, XSC_DBG_QP,
+					"revert create_flags(0x%x) to cmd_flags(0x%x)\n",
+					attr->create_flags, cmd.flags);
+			}
+
+			if (attr->create_flags & XSC_QP_CREATE_RAWPACKET_TX) {
+				cmd.flags |= XSC_QP_FLAG_RAWPACKET_TX;
+				xsc_dbg(ctx->dbg_fp, XSC_DBG_QP,
+					"revert create_flags(0x%x) to cmd_flags(0x%x)\n",
+					attr->create_flags, cmd.flags);
+			}
+			attr->comp_mask &= ~IBV_QP_INIT_ATTR_CREATE_FLAGS;
+		}
+	}
+
+	pthread_mutex_lock(&ctx->qp_table_mutex);
+
+	ret = ibv_cmd_create_qp_ex(context, &qp->verbs_qp, attr, &cmd.ibv_cmd,
+				   sizeof(cmd), &resp.ibv_resp, sizeof(resp));
+	if (ret) {
+		xsc_err("ibv_cmd_create_qp_ex failed,ret %d\n", ret);
+		errno = ret;
+		goto err_free_qp_buf;
+	}
+
+	if (qp->sq.wqe_cnt || qp->rq.wqe_cnt) {
+		ret = xsc_store_qp(ctx, ibqp->qp_num, qp);
+		if (ret) {
+			xsc_err("xsc_store_qp failed,ret %d\n", ret);
+			errno = EINVAL;
+			goto err_destroy;
+		}
+	}
+
+	pthread_mutex_unlock(&ctx->qp_table_mutex);
+
+	qp->rq.max_post = qp->rq.wqe_cnt;
+
+	if (attr->sq_sig_all)
+		qp->sq_signal_bits = 1;
+	else
+		qp->sq_signal_bits = 0;
+
+	attr->cap.max_send_wr = qp->sq.max_post;
+	attr->cap.max_recv_wr = qp->rq.max_post;
+	attr->cap.max_recv_sge = qp->rq.max_gs;
+
+	qp->rsc.rsn = ibqp->qp_num;
+
+	qp->rqn = ibqp->qp_num;
+	qp->sqn = ibqp->qp_num;
+
+	xsc_dbg(ctx->dbg_fp, XSC_DBG_QP, "qp rqn:%u, sqn:%u\n", qp->rqn,
+		qp->sqn);
+	qp->sq.db = ctx->sqm_reg_va + (ctx->qpm_tx_db & (xdev->page_size - 1));
+	qp->rq.db = ctx->rqm_reg_va + (ctx->qpm_rx_db & (xdev->page_size - 1));
+
+	if (attr->comp_mask & IBV_QP_INIT_ATTR_SEND_OPS_FLAGS)
+		qp->verbs_qp.comp_mask |= VERBS_QP_EX;
+
+	return ibqp;
+
+err_destroy:
+	ibv_cmd_destroy_qp(ibqp);
+
+err_free_qp_buf:
+	pthread_mutex_unlock(&to_xctx(context)->qp_table_mutex);
+	xsc_free_qp_buf(ctx, qp);
+
+err:
+	free(qp);
+
+	return NULL;
+}
+
+struct ibv_qp *xsc_create_qp(struct ibv_pd *pd, struct ibv_qp_init_attr *attr)
+{
+	struct ibv_qp *qp;
+	struct ibv_qp_init_attr_ex attrx;
+
+	memset(&attrx, 0, sizeof(attrx));
+	memcpy(&attrx, attr, sizeof(*attr));
+	attrx.comp_mask = IBV_QP_INIT_ATTR_PD;
+	attrx.pd = pd;
+	qp = create_qp(pd->context, &attrx);
+	if (qp)
+		memcpy(attr, &attrx, sizeof(*attr));
+
+	return qp;
+}
+
+static void xsc_lock_cqs(struct ibv_qp *qp)
+{
+	struct xsc_cq *send_cq = to_xcq(qp->send_cq);
+	struct xsc_cq *recv_cq = to_xcq(qp->recv_cq);
+
+	if (send_cq && recv_cq) {
+		if (send_cq == recv_cq) {
+			xsc_spin_lock(&send_cq->lock);
+		} else if (send_cq->cqn < recv_cq->cqn) {
+			xsc_spin_lock(&send_cq->lock);
+			xsc_spin_lock(&recv_cq->lock);
+		} else {
+			xsc_spin_lock(&recv_cq->lock);
+			xsc_spin_lock(&send_cq->lock);
+		}
+	} else if (send_cq) {
+		xsc_spin_lock(&send_cq->lock);
+	} else if (recv_cq) {
+		xsc_spin_lock(&recv_cq->lock);
+	}
+}
+
+static void xsc_unlock_cqs(struct ibv_qp *qp)
+{
+	struct xsc_cq *send_cq = to_xcq(qp->send_cq);
+	struct xsc_cq *recv_cq = to_xcq(qp->recv_cq);
+
+	if (send_cq && recv_cq) {
+		if (send_cq == recv_cq) {
+			xsc_spin_unlock(&send_cq->lock);
+		} else if (send_cq->cqn < recv_cq->cqn) {
+			xsc_spin_unlock(&recv_cq->lock);
+			xsc_spin_unlock(&send_cq->lock);
+		} else {
+			xsc_spin_unlock(&send_cq->lock);
+			xsc_spin_unlock(&recv_cq->lock);
+		}
+	} else if (send_cq) {
+		xsc_spin_unlock(&send_cq->lock);
+	} else if (recv_cq) {
+		xsc_spin_unlock(&recv_cq->lock);
+	}
+}
+
+int xsc_destroy_qp(struct ibv_qp *ibqp)
+{
+	struct xsc_qp *qp = to_xqp(ibqp);
+	struct xsc_context *ctx = to_xctx(ibqp->context);
+	int ret;
+	struct xsc_err_state_qp_node *tmp, *err_rq_node, *err_sq_node;
+
+	xsc_dbg(ctx->dbg_fp, XSC_DBG_QP, "\n");
+
+	pthread_mutex_lock(&ctx->qp_table_mutex);
+
+	ret = ibv_cmd_destroy_qp(ibqp);
+	if (ret) {
+		pthread_mutex_unlock(&ctx->qp_table_mutex);
+		return ret;
+	}
+
+	xsc_lock_cqs(ibqp);
+
+	list_for_each_safe(&to_xcq(ibqp->recv_cq)->err_state_qp_list,
+			   err_rq_node, tmp, entry) {
+		if (err_rq_node->qp_id == qp->rsc.rsn) {
+			list_del(&err_rq_node->entry);
+			free(err_rq_node);
+		}
+	}
+
+	list_for_each_safe(&to_xcq(ibqp->send_cq)->err_state_qp_list,
+			   err_sq_node, tmp, entry) {
+		if (err_sq_node->qp_id == qp->rsc.rsn) {
+			list_del(&err_sq_node->entry);
+			free(err_sq_node);
+		}
+	}
+
+	__xsc_cq_clean(to_xcq(ibqp->recv_cq), qp->rsc.rsn);
+	if (ibqp->send_cq != ibqp->recv_cq)
+		__xsc_cq_clean(to_xcq(ibqp->send_cq), qp->rsc.rsn);
+
+	if (qp->sq.wqe_cnt || qp->rq.wqe_cnt)
+		xsc_clear_qp(ctx, ibqp->qp_num);
+
+	xsc_unlock_cqs(ibqp);
+	pthread_mutex_unlock(&ctx->qp_table_mutex);
+
+	xsc_free_qp_buf(ctx, qp);
+
+	free(qp);
+
+	return 0;
+}
+
+int xsc_query_qp(struct ibv_qp *ibqp, struct ibv_qp_attr *attr, int attr_mask,
+		 struct ibv_qp_init_attr *init_attr)
+{
+	struct ibv_query_qp cmd;
+	struct xsc_qp *qp = to_xqp(ibqp);
+	int ret;
+
+	xsc_dbg(to_xctx(ibqp->context)->dbg_fp, XSC_DBG_QP, "\n");
+
+	ret = ibv_cmd_query_qp(ibqp, attr, attr_mask, init_attr, &cmd,
+			       sizeof(cmd));
+	if (ret)
+		return ret;
+
+	init_attr->cap.max_send_wr = qp->sq.max_post;
+	init_attr->cap.max_send_sge = qp->sq.max_gs;
+	init_attr->cap.max_inline_data = qp->max_inline_data;
+
+	attr->cap = init_attr->cap;
+	attr->qp_state = qp->ibv_qp->state;
+
+	return 0;
+}
+
+int xsc_modify_qp(struct ibv_qp *qp, struct ibv_qp_attr *attr, int attr_mask)
+{
+	struct ibv_modify_qp cmd = {};
+	struct xsc_qp *xqp = to_xqp(qp);
+	int ret;
+
+	xsc_dbg(to_xctx(qp->context)->dbg_fp, XSC_DBG_QP, "\n");
+	ret = ibv_cmd_modify_qp(qp, attr, attr_mask, &cmd, sizeof(cmd));
+
+	if (!ret && (attr_mask & IBV_QP_STATE) &&
+	    attr->qp_state == IBV_QPS_RESET) {
+		if (qp->recv_cq)
+			xsc_cq_clean(to_xcq(qp->recv_cq), xqp->rsc.rsn);
+
+		if (qp->send_cq != qp->recv_cq && qp->send_cq)
+			xsc_cq_clean(to_xcq(qp->send_cq), to_xqp(qp)->rsc.rsn);
+
+		xsc_init_qp_indices(xqp);
+	}
+
+	if (!ret && (attr_mask & IBV_QP_STATE))
+		qp->state = attr->qp_state;
+
+	/*workaround: generate flush err cqe if qp status turns to ERR*/
+	if (!ret && (attr_mask & IBV_QP_STATE))
+		ret = xsc_err_state_qp(qp, attr->cur_qp_state, attr->qp_state);
+
+	return ret;
+}
diff --git a/providers/xscale/xscale.c b/providers/xscale/xscale.c
index 7b439f786..4d048629a 100644
--- a/providers/xscale/xscale.c
+++ b/providers/xscale/xscale.c
@@ -47,6 +47,11 @@ static const struct verbs_context_ops xsc_ctx_common_ops = {
 	.req_notify_cq = xsc_arm_cq,
 	.resize_cq = xsc_resize_cq,
 	.destroy_cq = xsc_destroy_cq,
+
+	.create_qp = xsc_create_qp,
+	.query_qp = xsc_query_qp,
+	.modify_qp = xsc_modify_qp,
+	.destroy_qp = xsc_destroy_qp,
 };
 
 static void open_debug_file(struct xsc_context *ctx)
diff --git a/providers/xscale/xscale.h b/providers/xscale/xscale.h
index 21df88c13..82514a594 100644
--- a/providers/xscale/xscale.h
+++ b/providers/xscale/xscale.h
@@ -27,6 +27,16 @@ typedef uint16_t  u16;
 typedef uint32_t  u32;
 typedef uint64_t  u64;
 
+enum {
+	XSC_QP_FLAG_RAWPACKET_TSO = 1 << 9,
+	XSC_QP_FLAG_RAWPACKET_TX = 1 << 10,
+};
+
+enum xsc_qp_create_flags {
+	XSC_QP_CREATE_RAWPACKET_TSO = 1 << 0,
+	XSC_QP_CREATE_RAWPACKET_TX = 1 << 1,
+};
+
 enum {
 	XSC_DBG_QP = 1 << 0,
 	XSC_DBG_CQ = 1 << 1,
@@ -168,11 +178,52 @@ struct xsc_cq {
 	struct list_head err_state_qp_list;
 };
 
+struct xsc_wq {
+	u64 *wrid;
+	unsigned int *wqe_head;
+	struct xsc_spinlock lock;
+	unsigned int wqe_cnt;
+	unsigned int max_post;
+	unsigned int head;
+	unsigned int tail;
+	unsigned int cur_post;
+	int max_gs;
+	int wqe_shift;
+	int offset;
+	void *qend;
+	__le32 *db;
+	unsigned int ds_cnt;
+	unsigned int seg_cnt;
+	unsigned int *wr_opcode;
+	unsigned int *need_flush;
+	unsigned int flush_wqe_cnt;
+};
+
 struct xsc_mr {
 	struct verbs_mr vmr;
 	u32 alloc_flags;
 };
 
+struct xsc_qp {
+	struct xsc_resource rsc; /* This struct must be first */
+	struct verbs_qp verbs_qp;
+	struct ibv_qp *ibv_qp;
+	struct xsc_buf buf;
+	void *sq_start;
+	void *rq_start;
+	int max_inline_data;
+	int buf_size;
+	/* For Raw Packet QP, use different buffers for the SQ and RQ */
+	struct xsc_buf sq_buf;
+	int sq_buf_size;
+	u8 sq_signal_bits;
+	struct xsc_wq sq;
+	struct xsc_wq rq;
+	u32 flags; /* Use enum xsc_qp_flags */
+	u32 rqn;
+	u32 sqn;
+};
+
 union xsc_ib_fw_ver {
 	u64 data;
 	struct {
@@ -219,6 +270,13 @@ static inline struct xsc_cq *to_xcq(struct ibv_cq *ibcq)
 			    verbs_cq.cq_ex);
 }
 
+static inline struct xsc_qp *to_xqp(struct ibv_qp *ibqp)
+{
+	struct verbs_qp *vqp = (struct verbs_qp *)ibqp;
+
+	return container_of(vqp, struct xsc_qp, verbs_qp);
+}
+
 static inline struct xsc_mr *to_xmr(struct ibv_mr *ibmr)
 {
 	return container_of(ibmr, struct xsc_mr, vmr.ibv_mr);
@@ -258,6 +316,20 @@ int xsc_poll_cq(struct ibv_cq *cq, int ne, struct ibv_wc *wc);
 int xsc_arm_cq(struct ibv_cq *cq, int solicited);
 void __xsc_cq_clean(struct xsc_cq *cq, u32 qpn);
 void xsc_cq_clean(struct xsc_cq *cq, u32 qpn);
+
+struct ibv_qp *xsc_create_qp_ex(struct ibv_context *context,
+				struct ibv_qp_init_attr_ex *attr);
+struct ibv_qp *xsc_create_qp(struct ibv_pd *pd, struct ibv_qp_init_attr *attr);
+int xsc_query_qp(struct ibv_qp *qp, struct ibv_qp_attr *attr, int attr_mask,
+		 struct ibv_qp_init_attr *init_attr);
+int xsc_modify_qp(struct ibv_qp *qp, struct ibv_qp_attr *attr, int attr_mask);
+int xsc_destroy_qp(struct ibv_qp *qp);
+void xsc_init_qp_indices(struct xsc_qp *qp);
+struct xsc_qp *xsc_find_qp(struct xsc_context *ctx, u32 qpn);
+int xsc_store_qp(struct xsc_context *ctx, u32 qpn, struct xsc_qp *qp);
+void xsc_clear_qp(struct xsc_context *ctx, u32 qpn);
+int xsc_err_state_qp(struct ibv_qp *qp, enum ibv_qp_state cur_state,
+		     enum ibv_qp_state state);
 int xsc_round_up_power_of_two(long long sz);
 void *xsc_get_send_wqe(struct xsc_qp *qp, int n);
 

From 1790accece3b88692f8235c5daec56fd3d8dc950 Mon Sep 17 00:00:00 2001
From: Tian Xin <tianx@yunsilicon.com>
Date: Tue, 24 Sep 2024 15:09:58 +0800
Subject: [PATCH 5/6] libxscale: Add support for posting verbs

This patch adds support for post_send and post_recv routines.

Signed-off-by: Tian Xin <tianx@yunsilicon.com>
Signed-off-by: Wei Honggang <weihg@yunsilicon.com>
Signed-off-by: Zhao Qianwei <zhaoqw@yunsilicon.com>
Signed-off-by: Li Qiang <liq@yunsilicon.com>
Signed-off-by: Yan Lei <jacky@yunsilicon.com>
---
 providers/xscale/cq.c     |   5 -
 providers/xscale/qp.c     | 522 ++++++++++++++++++++++++++++++++++++++
 providers/xscale/xscale.c |   3 +
 providers/xscale/xscale.h |   4 +
 4 files changed, 529 insertions(+), 5 deletions(-)

diff --git a/providers/xscale/cq.c b/providers/xscale/cq.c
index 7c5284250..db1598ad8 100644
--- a/providers/xscale/cq.c
+++ b/providers/xscale/cq.c
@@ -115,11 +115,6 @@ static void *get_sw_cqe(struct xsc_cq *cq, int n)
 		return NULL;
 }
 
-void *xsc_get_send_wqe(struct xsc_qp *qp, int n)
-{
-	return qp->sq_start + (n << qp->sq.wqe_shift);
-}
-
 static void update_cons_index(struct xsc_cq *cq)
 {
 	struct xsc_context *ctx =
diff --git a/providers/xscale/qp.c b/providers/xscale/qp.c
index 7fa715c4b..551cf4dc1 100644
--- a/providers/xscale/qp.c
+++ b/providers/xscale/qp.c
@@ -16,6 +16,528 @@
 #include "xscale.h"
 #include "xsc_hsi.h"
 
+static const u32 xsc_ib_opcode[] = {
+	[IBV_WR_SEND] = XSC_MSG_OPCODE_SEND,
+	[IBV_WR_SEND_WITH_IMM] = XSC_MSG_OPCODE_SEND,
+	[IBV_WR_RDMA_WRITE] = XSC_MSG_OPCODE_RDMA_WRITE,
+	[IBV_WR_RDMA_WRITE_WITH_IMM] = XSC_MSG_OPCODE_RDMA_WRITE,
+	[IBV_WR_RDMA_READ] = XSC_MSG_OPCODE_RDMA_READ,
+	[IBV_WR_SEND_WITH_INV] = XSC_MSG_OPCODE_SEND,
+};
+
+static void *get_recv_wqe(struct xsc_qp *qp, int n)
+{
+	return qp->rq_start + (n << qp->rq.wqe_shift);
+}
+
+static void *get_seg_wqe(void *first, int n)
+{
+	return first + (n << XSC_BASE_WQE_SHIFT);
+}
+
+void *xsc_get_send_wqe(struct xsc_qp *qp, int n)
+{
+	return qp->sq_start + (n << qp->sq.wqe_shift);
+}
+
+static int xsc_wq_overflow(struct xsc_wq *wq, int nreq, struct xsc_cq *cq)
+{
+	unsigned int cur;
+
+	cur = wq->head - wq->tail;
+	if (cur + nreq < wq->max_post)
+		return 0;
+
+	xsc_spin_lock(&cq->lock);
+	cur = wq->head - wq->tail;
+	xsc_spin_unlock(&cq->lock);
+
+	return cur + nreq >= wq->max_post;
+}
+
+static inline void set_remote_addr_seg(struct xsc_wqe_data_seg *remote_seg,
+				       u32 msg_len, u64 remote_addr,
+				       u32 rkey)
+{
+	u32 ds_data0 = 0;
+
+	ds_data0 |= FIELD_PREP(DATA_SEG_DATA0_SEG_LEN_MASK, msg_len);
+	remote_seg->data0 = htole32(ds_data0);
+	remote_seg->mkey = htole32(rkey);
+	remote_seg->va = htole64(remote_addr);
+}
+
+static void set_local_data_seg(struct xsc_wqe_data_seg *data_seg,
+			       const struct ibv_sge *sg)
+{
+	u32 ds_data0 = 0;
+
+	ds_data0 |= FIELD_PREP(DATA_SEG_DATA0_SEG_LEN_MASK, sg->length);
+	data_seg->data0 = htole32(ds_data0);
+	data_seg->mkey = htole32(sg->lkey);
+	data_seg->va = htole64(sg->addr);
+}
+
+static __be32 send_ieth(struct ibv_send_wr *wr)
+{
+	switch (wr->opcode) {
+	case IBV_WR_SEND_WITH_IMM:
+	case IBV_WR_RDMA_WRITE_WITH_IMM:
+		return wr->imm_data;
+	default:
+		return 0;
+	}
+}
+
+static void *get_addr_from_wr(const void *list, int idx)
+{
+	const struct ibv_send_wr *wr = list;
+
+	return (void *)(uintptr_t)wr->sg_list[idx].addr;
+}
+
+static int get_len_from_wr(const void *list, int idx)
+{
+	const struct ibv_send_wr *wr = list;
+
+	return wr->sg_list[idx].length;
+}
+
+static int _set_wqe_inline(void *data_seg, size_t num_buf, const void *list,
+			   void *(*get_addr)(const void *, int),
+			   int (*get_len)(const void *, int))
+{
+	int i;
+	int ds_left_len = 0;
+	int len = 0;
+	void *addr;
+	void *data_seg_base = data_seg;
+	int seg_index = 0;
+	const int ds_len = sizeof(struct xsc_wqe_data_seg);
+
+	for (i = 0; i < num_buf; i++) {
+		addr = get_addr(list, i);
+		len = get_len(list, i);
+		if (likely(len)) {
+			if (ds_left_len > 0) {
+				int copy_len = min_t(int, len, ds_left_len);
+
+				memcpy(data_seg, addr, copy_len);
+				addr += copy_len;
+				len -= copy_len;
+			}
+
+			while (len >= ds_len) {
+				data_seg =
+					get_seg_wqe(data_seg_base, seg_index);
+				seg_index++;
+				memcpy(data_seg, addr, ds_len);
+				addr += ds_len;
+				len -= ds_len;
+			}
+
+			if (len > 0) {
+				data_seg =
+					get_seg_wqe(data_seg_base, seg_index);
+				seg_index++;
+				memcpy(data_seg, addr, len);
+				data_seg += len;
+				ds_left_len = ds_len - len;
+			} else {
+				ds_left_len = 0;
+			}
+		}
+	}
+	return seg_index;
+}
+
+static int set_wqe_inline_from_wr(struct xsc_qp *qp, struct ibv_send_wr *wr,
+				  struct xsc_send_wqe_ctrl_seg *ctrl)
+{
+	void *data_seg;
+	unsigned int seg_index;
+	int msg_len = le32toh(ctrl->msg_len);
+	int filled_ds_num;
+	u32 tmp_ctrl_wqe_hdr = le32toh(ctrl->wqe_hdr);
+
+	if (wr->opcode == IBV_WR_SEND || wr->opcode == IBV_WR_SEND_WITH_IMM)
+		seg_index = 1;
+	else
+		seg_index = 2;
+	data_seg = get_seg_wqe(ctrl, seg_index);
+
+	if (unlikely(msg_len > qp->max_inline_data))
+		return -ENOMEM;
+
+	filled_ds_num = _set_wqe_inline(data_seg, wr->num_sge, wr,
+					get_addr_from_wr, get_len_from_wr);
+	tmp_ctrl_wqe_hdr |= FIELD_PREP(CTRL_SEG_WQE_HDR_DS_NUM_MASK,
+				       seg_index - 1 + filled_ds_num);
+	ctrl->wqe_hdr = htole32(tmp_ctrl_wqe_hdr);
+
+	return 0;
+}
+
+static void _zero_send_ds(int idx, struct xsc_qp *qp, int keep_ctrl)
+{
+	u64 *p;
+	void *seg;
+	int i;
+
+	seg = (void *)xsc_get_send_wqe(qp, idx);
+	for (i = keep_ctrl; i < qp->sq.seg_cnt; i++) {
+		p = get_seg_wqe(seg, i);
+		p[0] = 0;
+		p[1] = 0;
+	}
+}
+
+static void clear_send_wqe(int idx, struct xsc_qp *qp)
+{
+	_zero_send_ds(idx, qp, 0);
+}
+
+static void clear_send_wqe_except_ctrl(int idx, struct xsc_qp *qp)
+{
+	_zero_send_ds(idx, qp, 1);
+}
+
+static void clear_recv_wqe(int idx, struct xsc_qp *qp)
+{
+	u64 *p;
+	void *seg;
+	int i;
+
+	seg = (void *)get_recv_wqe(qp, idx);
+	for (i = 0; i < qp->rq.seg_cnt; i++) {
+		p = get_seg_wqe(seg, i);
+		p[0] = 0;
+		p[1] = 0;
+	}
+}
+
+static void dump_wqe(int type, int idx, struct xsc_qp *qp)
+{
+	u32 *p;
+	int i;
+	void *seg;
+
+	/* type0 for send, type1 for recv */
+	if (type == 0) {
+		seg = (void *)xsc_get_send_wqe(qp, idx);
+		xsc_dbg(to_xctx(qp->ibv_qp->context)->dbg_fp, XSC_DBG_QP,
+			"dump send wqe at %p\n", seg);
+		for (i = 0; i < qp->sq.seg_cnt; i++) {
+			p = get_seg_wqe(seg, i);
+			xsc_dbg(to_xctx(qp->ibv_qp->context)->dbg_fp,
+				XSC_DBG_QP, "0x%08x 0x%08x 0x%08x 0x%08x\n",
+				p[0], p[1], p[2], p[3]);
+		}
+	} else if (type == 1) {
+		seg = (void *)get_recv_wqe(qp, idx);
+		xsc_dbg(to_xctx(qp->ibv_qp->context)->dbg_fp, XSC_DBG_QP,
+			"dump recv wqe at %p\n", seg);
+		for (i = 0; i < qp->rq.seg_cnt; i++) {
+			p = get_seg_wqe(seg, i);
+			xsc_dbg(to_xctx(qp->ibv_qp->context)->dbg_fp,
+				XSC_DBG_QP, "0x%08x 0x%08x 0x%08x 0x%08x\n",
+				p[0], p[1], p[2], p[3]);
+		}
+	} else {
+		xsc_dbg(to_xctx(qp->ibv_qp->context)->dbg_fp, XSC_DBG_QP,
+			"unknown type %d\n", type);
+	}
+}
+
+static inline void xsc_post_send_db(struct xsc_qp *qp, int nreq)
+{
+	struct xsc_context *ctx = to_xctx(qp->ibv_qp->context);
+	u32 next_pid;
+
+	if (unlikely(!nreq))
+		return;
+
+	qp->sq.head += nreq;
+	next_pid = qp->sq.head << (qp->sq.wqe_shift - XSC_BASE_WQE_SHIFT);
+	xsc_dbg(to_xctx(qp->ibv_qp->context)->dbg_fp, XSC_DBG_QP_SEND,
+		"nreq:%d\n", nreq);
+	ctx->hw_ops->ring_tx_doorbell(qp->sq.db, qp->sqn, next_pid);
+}
+
+static inline int _xsc_post_send(struct ibv_qp *ibqp, struct ibv_send_wr *wr,
+				 struct ibv_send_wr **bad_wr)
+{
+	struct xsc_qp *qp = to_xqp(ibqp);
+	void *seg;
+	struct xsc_send_wqe_ctrl_seg *ctrl;
+	struct xsc_wqe_data_seg *data_seg;
+
+	int nreq;
+	int err = 0;
+	int i;
+	unsigned int idx;
+	unsigned int seg_index = 1;
+	unsigned int msg_len = 0;
+
+	if (unlikely(ibqp->state < IBV_QPS_RTS)) {
+		xsc_dbg(to_xctx(ibqp->context)->dbg_fp, XSC_DBG_QP_SEND,
+			"qp state is %u, should not post send\n", ibqp->state);
+		err = EINVAL;
+		*bad_wr = wr;
+		return err;
+	}
+
+	xsc_spin_lock(&qp->sq.lock);
+
+	for (nreq = 0; wr; ++nreq, wr = wr->next) {
+		u8 ds_num;
+		u8 with_immdt;
+		u32 wqe_id;
+		u8 ce;
+		u32 wqe_hdr;
+		u32 ctrl_data0;
+
+		seg_index = 1;
+		msg_len = 0;
+		if (unlikely(wr->opcode < 0 ||
+			     wr->opcode >= sizeof(xsc_ib_opcode) /
+						   sizeof(xsc_ib_opcode[0]))) {
+			xsc_dbg(to_xctx(ibqp->context)->dbg_fp, XSC_DBG_QP_SEND,
+				"bad opcode %d\n", wr->opcode);
+			err = EINVAL;
+			*bad_wr = wr;
+			goto out;
+		}
+
+		if (unlikely(xsc_wq_overflow(&qp->sq, nreq,
+					     to_xcq(qp->ibv_qp->send_cq)))) {
+			xsc_dbg(to_xctx(ibqp->context)->dbg_fp, XSC_DBG_QP_SEND,
+				"send work queue overflow\n");
+			err = ENOMEM;
+			*bad_wr = wr;
+			goto out;
+		}
+
+		if (unlikely(wr->num_sge > qp->sq.max_gs)) {
+			xsc_dbg(to_xctx(ibqp->context)->dbg_fp, XSC_DBG_QP_SEND,
+				"max gs exceeded %d (max = %d)\n", wr->num_sge,
+				qp->sq.max_gs);
+			err = ENOMEM;
+			*bad_wr = wr;
+			goto out;
+		}
+
+		if (unlikely(wr->opcode == IBV_WR_RDMA_READ &&
+			     wr->num_sge > 1)) {
+			xsc_dbg(to_xctx(ibqp->context)->dbg_fp, XSC_DBG_QP_SEND,
+				"rdma read, max gs exceeded %d (max = 1)\n",
+				wr->num_sge);
+			err = ENOMEM;
+			*bad_wr = wr;
+			goto out;
+		}
+
+		idx = qp->sq.cur_post & (qp->sq.wqe_cnt - 1);
+		clear_send_wqe(idx, qp);
+		seg = xsc_get_send_wqe(qp, idx);
+		ctrl = seg;
+		ds_num = 0;
+		wqe_id = qp->sq.cur_post << (qp->sq.wqe_shift - XSC_BASE_WQE_SHIFT);
+		ce = qp->sq_signal_bits ? 1 :
+		     (wr->send_flags & IBV_SEND_SIGNALED ? 1 : 0);
+		for (i = 0; i < wr->num_sge; ++i) {
+			if (likely(wr->sg_list[i].length))
+				msg_len += wr->sg_list[i].length;
+		}
+		with_immdt = 0;
+
+		if (unlikely(wr->opcode == IBV_WR_RDMA_READ && msg_len == 0)) {
+			xsc_dbg(to_xctx(ibqp->context)->dbg_fp, XSC_DBG_QP_SEND,
+				"rdma read, msg len should not be 0\n");
+			err = ENOMEM;
+			*bad_wr = wr;
+			goto out;
+		}
+
+		switch (ibqp->qp_type) {
+		case IBV_QPT_RC:
+			switch (wr->opcode) {
+			case IBV_WR_SEND_WITH_INV:
+			case IBV_WR_SEND:
+				break;
+			case IBV_WR_SEND_WITH_IMM:
+				with_immdt = 1;
+				ctrl->opcode_data = htole32(be32toh(send_ieth(wr)));
+				break;
+			case IBV_WR_RDMA_WRITE_WITH_IMM:
+				with_immdt = 1;
+				ctrl->opcode_data = htole32(be32toh(send_ieth(wr)));
+				SWITCH_FALLTHROUGH;
+			case IBV_WR_RDMA_READ:
+			case IBV_WR_RDMA_WRITE:
+				if (ctrl->msg_len == 0)
+					break;
+				ds_num++;
+				data_seg = get_seg_wqe(ctrl, seg_index);
+				set_remote_addr_seg(data_seg, msg_len,
+						    wr->wr.rdma.remote_addr,
+						    wr->wr.rdma.rkey);
+				seg_index++;
+				break;
+			default:
+				printf("debug: opcode:%u NOT supported\n",
+				       wr->opcode);
+				err = EPERM;
+				*bad_wr = wr;
+				goto out;
+			}
+			break;
+		default:
+			xsc_dbg(to_xctx(ibqp->context)->dbg_fp, XSC_DBG_QP_SEND,
+				"qp type:%u NOT supported\n", ibqp->qp_type);
+			err = EPERM;
+			*bad_wr = wr;
+			goto out;
+		}
+
+		if (wr->send_flags & IBV_SEND_INLINE && wr->num_sge) {
+			err = set_wqe_inline_from_wr(qp, wr, ctrl);
+			if (unlikely(err)) {
+				*bad_wr = wr;
+				xsc_dbg(to_xctx(ibqp->context)->dbg_fp,
+					XSC_DBG_QP_SEND,
+					"inline layout failed, err %d\n", err);
+				goto out;
+			}
+		} else {
+			for (i = 0; i < wr->num_sge; ++i, ++seg_index) {
+				if (likely(wr->sg_list[i].length)) {
+					data_seg = get_seg_wqe(ctrl, seg_index);
+					set_local_data_seg(data_seg,
+							   &wr->sg_list[i]);
+					ds_num++;
+				}
+			}
+		}
+
+		wqe_hdr = FIELD_PREP(CTRL_SEG_WQE_HDR_MSG_OPCODE_MASK,
+				     xsc_ib_opcode[wr->opcode]) |
+			  FIELD_PREP(CTRL_SEG_WQE_HDR_WITH_IMMDT_MASK,
+				     with_immdt) |
+			  FIELD_PREP(CTRL_SEG_WQE_HDR_DS_NUM_MASK,
+				     ds_num) |
+			  FIELD_PREP(CTRL_SEG_WQE_HDR_WQE_ID_MASK,
+				     wqe_id);
+		ctrl_data0 = FIELD_PREP(CTRL_SEG_DATA0_SE_MASK,
+					wr->send_flags & IBV_SEND_SOLICITED ? 1 : 0) |
+			     FIELD_PREP(CTRL_SEG_DATA0_CE_MASK, ce) |
+			     FIELD_PREP(CTRL_SEG_DATA0_IN_LINE_MASK,
+					wr->send_flags & IBV_SEND_INLINE ? 1 : 0);
+		ctrl->wqe_hdr = htole32(wqe_hdr);
+		ctrl->msg_len = htole32(msg_len);
+		ctrl->data0 = htole32(ctrl_data0);
+
+		if (msg_len == 0) {
+			ds_num = 0;
+			clear_send_wqe_except_ctrl(idx, qp);
+		}
+		qp->sq.wrid[idx] = wr->wr_id;
+		qp->sq.wqe_head[idx] = qp->sq.head + nreq;
+		qp->sq.cur_post += 1;
+		if (ce) {
+			qp->sq.flush_wqe_cnt++;
+			qp->sq.need_flush[idx] = 1;
+		}
+		qp->sq.wr_opcode[idx] = wr->opcode;
+
+		if (xsc_debug_mask & XSC_DBG_QP_SEND)
+			dump_wqe(0, idx, qp);
+	}
+
+out:
+	xsc_post_send_db(qp, nreq);
+	xsc_spin_unlock(&qp->sq.lock);
+
+	return err;
+}
+
+int xsc_post_send(struct ibv_qp *ibqp, struct ibv_send_wr *wr,
+		  struct ibv_send_wr **bad_wr)
+{
+	return _xsc_post_send(ibqp, wr, bad_wr);
+}
+
+int xsc_post_recv(struct ibv_qp *ibqp, struct ibv_recv_wr *wr,
+		  struct ibv_recv_wr **bad_wr)
+{
+	struct xsc_qp *qp = to_xqp(ibqp);
+	struct xsc_wqe_data_seg *recv_head;
+	struct xsc_wqe_data_seg *data_seg;
+	int err = 0;
+	u32 next_pid = 0;
+	int nreq;
+	u16 idx;
+	int i;
+
+	xsc_spin_lock(&qp->rq.lock);
+
+	idx = qp->rq.head & (qp->rq.wqe_cnt - 1);
+
+	clear_recv_wqe(idx, qp);
+	for (nreq = 0; wr; ++nreq, wr = wr->next) {
+		if (unlikely(xsc_wq_overflow(&qp->rq, nreq,
+					     to_xcq(qp->ibv_qp->recv_cq)))) {
+			printf("recv work queue overflow\n");
+			err = ENOMEM;
+			*bad_wr = wr;
+			goto out;
+		}
+
+		if (unlikely(wr->num_sge > qp->rq.max_gs)) {
+			printf("max gs exceeded %d (max = %d)\n", wr->num_sge,
+			       qp->rq.max_gs);
+			err = EINVAL;
+			*bad_wr = wr;
+			goto out;
+		}
+
+		recv_head = get_recv_wqe(qp, idx);
+
+		for (i = 0; i < wr->num_sge; ++i) {
+			u32 ds_data0 = 0;
+
+			if (unlikely(!wr->sg_list[i].length))
+				continue;
+			data_seg = get_seg_wqe(recv_head, i);
+			ds_data0 = FIELD_PREP(DATA_SEG_DATA0_SEG_LEN_MASK,
+					      wr->sg_list[i].length);
+			data_seg->data0 = htole32(ds_data0);
+			data_seg->mkey = htole32(wr->sg_list[i].lkey);
+			data_seg->va = htole64(wr->sg_list[i].addr);
+		}
+
+		qp->rq.wrid[idx] = wr->wr_id;
+
+		dump_wqe(1, idx, qp);
+		idx = (idx + 1) & (qp->rq.wqe_cnt - 1);
+		qp->rq.flush_wqe_cnt++;
+	}
+
+out:
+	if (likely(nreq)) {
+		struct xsc_context *ctx = to_xctx(ibqp->context);
+
+		qp->rq.head += nreq;
+		next_pid = qp->rq.head
+			   << (qp->rq.wqe_shift - XSC_BASE_WQE_SHIFT);
+		ctx->hw_ops->ring_rx_doorbell(qp->rq.db, qp->rqn, next_pid);
+	}
+
+	xsc_spin_unlock(&qp->rq.lock);
+
+	return err;
+}
+
 struct xsc_qp *xsc_find_qp(struct xsc_context *ctx, uint32_t qpn)
 {
 	int tind = qpn >> XSC_QP_TABLE_SHIFT;
diff --git a/providers/xscale/xscale.c b/providers/xscale/xscale.c
index 4d048629a..a1c7779f7 100644
--- a/providers/xscale/xscale.c
+++ b/providers/xscale/xscale.c
@@ -52,6 +52,9 @@ static const struct verbs_context_ops xsc_ctx_common_ops = {
 	.query_qp = xsc_query_qp,
 	.modify_qp = xsc_modify_qp,
 	.destroy_qp = xsc_destroy_qp,
+
+	.post_send = xsc_post_send,
+	.post_recv = xsc_post_recv,
 };
 
 static void open_debug_file(struct xsc_context *ctx)
diff --git a/providers/xscale/xscale.h b/providers/xscale/xscale.h
index 82514a594..573789b66 100644
--- a/providers/xscale/xscale.h
+++ b/providers/xscale/xscale.h
@@ -325,6 +325,10 @@ int xsc_query_qp(struct ibv_qp *qp, struct ibv_qp_attr *attr, int attr_mask,
 int xsc_modify_qp(struct ibv_qp *qp, struct ibv_qp_attr *attr, int attr_mask);
 int xsc_destroy_qp(struct ibv_qp *qp);
 void xsc_init_qp_indices(struct xsc_qp *qp);
+int xsc_post_send(struct ibv_qp *ibqp, struct ibv_send_wr *wr,
+		  struct ibv_send_wr **bad_wr);
+int xsc_post_recv(struct ibv_qp *ibqp, struct ibv_recv_wr *wr,
+		  struct ibv_recv_wr **bad_wr);
 struct xsc_qp *xsc_find_qp(struct xsc_context *ctx, u32 qpn);
 int xsc_store_qp(struct xsc_context *ctx, u32 qpn, struct xsc_qp *qp);
 void xsc_clear_qp(struct xsc_context *ctx, u32 qpn);

From 9c45cb85b0b653bb535c06b39c2db19562459637 Mon Sep 17 00:00:00 2001
From: Tian Xin <tianx@yunsilicon.com>
Date: Tue, 24 Sep 2024 15:11:05 +0800
Subject: [PATCH 6/6] libxscale: Add xscale needed kernel headers

This patch adds xscale needed kernel abi headers into
the kernel-headers directory.

Signed-off-by: Tian Xin <tianx@yunsilicon.com>
Signed-off-by: Wei Honggang <weihg@yunsilicon.com>
Signed-off-by: Zhao Qianwei <zhaoqw@yunsilicon.com>
Signed-off-by: Li Qiang <liq@yunsilicon.com>
Signed-off-by: Yan Lei <jacky@yunsilicon.com>
---
 kernel-headers/CMakeLists.txt             |  2 +
 kernel-headers/rdma/ib_user_ioctl_verbs.h |  1 +
 kernel-headers/rdma/xsc-abi.h             | 74 +++++++++++++++++++++++
 libibverbs/verbs.h                        |  1 +
 4 files changed, 78 insertions(+)
 create mode 100644 kernel-headers/rdma/xsc-abi.h

diff --git a/kernel-headers/CMakeLists.txt b/kernel-headers/CMakeLists.txt
index 82c191cad..56eb3984b 100644
--- a/kernel-headers/CMakeLists.txt
+++ b/kernel-headers/CMakeLists.txt
@@ -26,6 +26,7 @@ publish_internal_headers(rdma
   rdma/rvt-abi.h
   rdma/siw-abi.h
   rdma/vmw_pvrdma-abi.h
+  rdma/xsc-abi.h
   )
 
 publish_internal_headers(rdma/hfi
@@ -80,6 +81,7 @@ rdma_kernel_provider_abi(
   rdma/rdma_user_rxe.h
   rdma/siw-abi.h
   rdma/vmw_pvrdma-abi.h
+  rdma/xsc-abi.h
   )
 
 publish_headers(infiniband
diff --git a/kernel-headers/rdma/ib_user_ioctl_verbs.h b/kernel-headers/rdma/ib_user_ioctl_verbs.h
index fe15bc7e9..9f36fcf11 100644
--- a/kernel-headers/rdma/ib_user_ioctl_verbs.h
+++ b/kernel-headers/rdma/ib_user_ioctl_verbs.h
@@ -255,6 +255,7 @@ enum rdma_driver_id {
 	RDMA_DRIVER_SIW,
 	RDMA_DRIVER_ERDMA,
 	RDMA_DRIVER_MANA,
+	RDMA_DRIVER_XSC,
 };
 
 enum ib_uverbs_gid_type {
diff --git a/kernel-headers/rdma/xsc-abi.h b/kernel-headers/rdma/xsc-abi.h
new file mode 100644
index 000000000..456d6c474
--- /dev/null
+++ b/kernel-headers/rdma/xsc-abi.h
@@ -0,0 +1,74 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright (c) 2021 - 2022, Shanghai Yunsilicon Technology Co., Ltd.
+ * All rights reserved.
+ */
+
+#ifndef XSC_ABI_USER_H
+#define XSC_ABI_USER_H
+
+#include <linux/types.h>
+#include <linux/if_ether.h>	/* For ETH_ALEN. */
+#include <rdma/ib_user_ioctl_verbs.h>
+
+/* Make sure that all structs defined in this file remain laid out so
+ * that they pack the same way on 32-bit and 64-bit architectures (to
+ * avoid incompatibility between 32-bit userspace and 64-bit kernels).
+ * In particular do not use pointer types -- pass pointers in __u64
+ * instead.
+ */
+
+struct xsc_ib_alloc_ucontext_resp {
+	__u32	qp_tab_size;
+	__u32	cache_line_size;
+	__u16	max_sq_desc_sz;
+	__u16	max_rq_desc_sz;
+	__u32	max_send_wr;
+	__u32	max_recv_wr;
+	__u16	num_ports;
+	__u16	device_id;
+	__aligned_u64	qpm_tx_db;
+	__aligned_u64	qpm_rx_db;
+	__aligned_u64	cqm_next_cid_reg;
+	__aligned_u64	cqm_armdb;
+	__u32	send_ds_num;
+	__u32	recv_ds_num;
+	__u32	resv;
+};
+
+struct xsc_ib_create_qp {
+	__aligned_u64 buf_addr;
+	__aligned_u64 db_addr;
+	__u32	sq_wqe_count;
+	__u32	rq_wqe_count;
+	__u32	rq_wqe_shift;
+	__u32	flags;
+	__u32	resv;
+};
+
+struct xsc_ib_create_qp_resp {
+	__u32   bfreg_index;
+	__u32   resv;
+};
+
+struct xsc_ib_create_cq {
+	__aligned_u64 buf_addr;
+	__u32	cqe_size;
+};
+
+struct xsc_ib_create_cq_resp {
+	__u32	cqn;
+	__u32	reserved;
+};
+
+struct xsc_ib_create_ah_resp {
+	__u32	response_length;
+	__u8	dmac[ETH_ALEN];
+	__u8	reserved[6];
+};
+
+struct xsc_ib_alloc_pd_resp {
+	__u32	pdn;
+};
+
+#endif /* XSC_ABI_USER_H */
diff --git a/libibverbs/verbs.h b/libibverbs/verbs.h
index cec005519..7127899ca 100644
--- a/libibverbs/verbs.h
+++ b/libibverbs/verbs.h
@@ -2277,6 +2277,7 @@ extern const struct verbs_device_ops verbs_provider_qedr;
 extern const struct verbs_device_ops verbs_provider_rxe;
 extern const struct verbs_device_ops verbs_provider_siw;
 extern const struct verbs_device_ops verbs_provider_vmw_pvrdma;
+extern const struct verbs_device_ops verbs_provider_xscale;
 extern const struct verbs_device_ops verbs_provider_all;
 extern const struct verbs_device_ops verbs_provider_none;
 void ibv_static_providers(void *unused, ...);