Skip to content

Commit e0a5ca2

Browse files
committed
DAOS-19036 dtx: handle DTX race issues
Mainly including the following fixes: 1. When DTX leader switch, it is possible that the old DTX leader wanted to abort such DTX but not completed before its eviction. And then the new DTX leader may re-execute related modification successfully and try to commit such DTX. If without control, it is possible that those in-flight DTX ABORT RPC from the old DTX leader may abort the DTX that is to be committed by the new DTX leader, then break DTX semantics. The patch adds @Version parameter when abort DTX: when new DTX leader handles resent RPC from client, related DTX version will be refreshed if it has been prepared by old DTX leader; anytime when abort DTX locally, the logic will compare the version from ABORT request with related DTX version and skip stale ABORT RPC. 2. vos_dtx_load_mbs() maybe triggered before related DTX prepared locally. Under such case, related MBS information is empty. We need to handle such case to avoid segmentation fault. 3. Explicitly cleanup non-prepared DTX after modification failure to avoid leaking stale active DTX (header) in DTX table. Signed-off-by: Fan Yong <fan.yong@hpe.com>
1 parent 6e234f4 commit e0a5ca2

10 files changed

Lines changed: 207 additions & 156 deletions

File tree

src/dtx/dtx_coll.c

Lines changed: 13 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -38,12 +38,13 @@
3838
*/
3939

4040
struct dtx_coll_local_args {
41-
uuid_t dcla_po_uuid;
42-
uuid_t dcla_co_uuid;
43-
struct dtx_id dcla_xid;
44-
daos_epoch_t dcla_epoch;
45-
uint32_t dcla_opc;
46-
int *dcla_results;
41+
uuid_t dcla_po_uuid;
42+
uuid_t dcla_co_uuid;
43+
struct dtx_id dcla_xid;
44+
daos_epoch_t dcla_epoch;
45+
uint32_t dcla_ver;
46+
uint32_t dcla_opc;
47+
int *dcla_results;
4748
};
4849

4950
void
@@ -361,7 +362,7 @@ dtx_coll_local_one(void *args)
361362
rc = vos_dtx_commit(cont->sc_hdl, &dcla->dcla_xid, 1, false, NULL);
362363
break;
363364
case DTX_COLL_ABORT:
364-
rc = vos_dtx_abort(cont->sc_hdl, &dcla->dcla_xid, dcla->dcla_epoch);
365+
rc = vos_dtx_abort(cont->sc_hdl, &dcla->dcla_xid, dcla->dcla_epoch, dcla->dcla_ver);
365366
break;
366367
case DTX_COLL_CHECK:
367368
rc = vos_dtx_check(cont->sc_hdl, &dcla->dcla_xid, NULL, NULL, NULL, false);
@@ -397,7 +398,8 @@ dtx_coll_local_one(void *args)
397398

398399
int
399400
dtx_coll_local_exec(uuid_t po_uuid, uuid_t co_uuid, struct dtx_id *xid, daos_epoch_t epoch,
400-
uint32_t opc, uint32_t bitmap_sz, uint8_t *bitmap, int **p_results)
401+
uint32_t version, uint32_t opc, uint32_t bitmap_sz, uint8_t *bitmap,
402+
int **p_results)
401403
{
402404
struct dtx_coll_local_args dcla = { 0 };
403405
struct dss_coll_ops coll_ops = { 0 };
@@ -410,9 +412,10 @@ dtx_coll_local_exec(uuid_t po_uuid, uuid_t co_uuid, struct dtx_id *xid, daos_epo
410412

411413
uuid_copy(dcla.dcla_po_uuid, po_uuid);
412414
uuid_copy(dcla.dcla_co_uuid, co_uuid);
413-
dcla.dcla_xid = *xid;
415+
dcla.dcla_xid = *xid;
414416
dcla.dcla_epoch = epoch;
415-
dcla.dcla_opc = opc;
417+
dcla.dcla_ver = version;
418+
dcla.dcla_opc = opc;
416419

417420
coll_ops.co_func = dtx_coll_local_one;
418421
coll_args.ca_func_args = &dcla;

src/dtx/dtx_internal.h

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -304,7 +304,8 @@ int dtx_coll_prep(uuid_t po_uuid, daos_unit_oid_t oid, struct dtx_id *xid,
304304
struct dtx_memberships *mbs, uint32_t my_tgtid, uint32_t dtx_ver,
305305
uint32_t pm_ver, bool for_check, bool need_hint, struct dtx_coll_entry **p_dce);
306306
int dtx_coll_local_exec(uuid_t po_uuid, uuid_t co_uuid, struct dtx_id *xid, daos_epoch_t epoch,
307-
uint32_t opc, uint32_t bitmap_sz, uint8_t *bitmap, int **p_results);
307+
uint32_t version, uint32_t opc, uint32_t bitmap_sz, uint8_t *bitmap,
308+
int **p_results);
308309
/* clang-format on */
309310

310311
enum dtx_status_handle_result {

src/dtx/dtx_resync.c

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
/**
22
* (C) Copyright 2019-2024 Intel Corporation.
3-
* (C) Copyright 2025 Hewlett Packard Enterprise Development LP
3+
* (C) Copyright 2025-2026 Hewlett Packard Enterprise Development LP
44
*
55
* SPDX-License-Identifier: BSD-2-Clause-Patent
66
*/
@@ -392,7 +392,8 @@ dtx_status_handle(struct dtx_resync_args *dra)
392392
again:
393393
d_list_for_each_entry_safe(dre, next, &drh->drh_list, dre_link) {
394394
if (dre->dre_dte.dte_ver < dra->discard_version) {
395-
err = vos_dtx_abort(cont->sc_hdl, &dre->dre_xid, dre->dre_epoch);
395+
err = vos_dtx_abort(cont->sc_hdl, &dre->dre_xid, dre->dre_epoch,
396+
dre->dre_dte.dte_ver);
396397
if (err == -DER_NONEXIST)
397398
err = 0;
398399
if (err != 0)

src/dtx/dtx_rpc.c

Lines changed: 10 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -963,7 +963,7 @@ dtx_abort(struct ds_cont_child *cont, struct dtx_entry *dte, daos_epoch_t epoch)
963963
* to resend sometime later.
964964
*/
965965
if (epoch != 0)
966-
rc1 = vos_dtx_abort(cont->sc_hdl, &dte->dte_xid, epoch);
966+
rc1 = vos_dtx_abort(cont->sc_hdl, &dte->dte_xid, epoch, dte->dte_ver);
967967
else
968968
rc1 = vos_dtx_set_flags(cont->sc_hdl, &dte->dte_xid, 1, DTE_CORRUPTED);
969969
if (rc1 > 0 || rc1 == -DER_NONEXIST)
@@ -1235,7 +1235,8 @@ dtx_refresh_internal(struct ds_cont_child *cont, int *check_count, d_list_t *che
12351235
d_list_del(&dsp->dsp_link);
12361236
dtx_dsp_free(dsp);
12371237
} else {
1238-
rc1 = vos_dtx_abort(cont->sc_hdl, &dsp->dsp_xid, dsp->dsp_epoch);
1238+
rc1 = vos_dtx_abort(cont->sc_hdl, &dsp->dsp_xid, dsp->dsp_epoch,
1239+
dsp->dsp_version);
12391240
D_ASSERT(rc1 != -DER_NO_PERM);
12401241

12411242
if (rc1 == 0 || !for_io) {
@@ -1655,8 +1656,8 @@ dtx_coll_commit(struct ds_cont_child *cont, struct dtx_coll_entry *dce, struct d
16551656
if (dce->dce_bitmap != NULL) {
16561657
clrbit(dce->dce_bitmap, dss_get_module_info()->dmi_tgt_id);
16571658
len = dtx_coll_local_exec(cont->sc_pool_uuid, cont->sc_uuid, &dce->dce_xid, 0,
1658-
DTX_COLL_COMMIT, dce->dce_bitmap_sz, dce->dce_bitmap,
1659-
&results);
1659+
dce->dce_ver, DTX_COLL_COMMIT, dce->dce_bitmap_sz,
1660+
dce->dce_bitmap, &results);
16601661
if (len < 0) {
16611662
rc1 = len;
16621663
} else {
@@ -1738,8 +1739,8 @@ dtx_coll_abort(struct ds_cont_child *cont, struct dtx_coll_entry *dce, daos_epoc
17381739
if (dce->dce_bitmap != NULL) {
17391740
clrbit(dce->dce_bitmap, dss_get_module_info()->dmi_tgt_id);
17401741
len = dtx_coll_local_exec(cont->sc_pool_uuid, cont->sc_uuid, &dce->dce_xid, epoch,
1741-
DTX_COLL_ABORT, dce->dce_bitmap_sz, dce->dce_bitmap,
1742-
&results);
1742+
dce->dce_ver, DTX_COLL_ABORT, dce->dce_bitmap_sz,
1743+
dce->dce_bitmap, &results);
17431744
if (len < 0) {
17441745
rc1 = len;
17451746
} else {
@@ -1759,7 +1760,7 @@ dtx_coll_abort(struct ds_cont_child *cont, struct dtx_coll_entry *dce, daos_epoc
17591760
}
17601761

17611762
if (epoch != 0)
1762-
rc2 = vos_dtx_abort(cont->sc_hdl, &dce->dce_xid, epoch);
1763+
rc2 = vos_dtx_abort(cont->sc_hdl, &dce->dce_xid, epoch, dce->dce_ver);
17631764
else
17641765
rc2 = vos_dtx_set_flags(cont->sc_hdl, &dce->dce_xid, 1, DTE_CORRUPTED);
17651766
if (rc2 > 0 || rc2 == -DER_NONEXIST)
@@ -1795,8 +1796,8 @@ dtx_coll_check(struct ds_cont_child *cont, struct dtx_coll_entry *dce, daos_epoc
17951796

17961797
if (dce->dce_bitmap != NULL) {
17971798
len = dtx_coll_local_exec(cont->sc_pool_uuid, cont->sc_uuid, &dce->dce_xid, epoch,
1798-
DTX_COLL_CHECK, dce->dce_bitmap_sz, dce->dce_bitmap,
1799-
&results);
1799+
dce->dce_ver, DTX_COLL_CHECK, dce->dce_bitmap_sz,
1800+
dce->dce_bitmap, &results);
18001801
if (len < 0) {
18011802
rc1 = len;
18021803
} else {

src/dtx/dtx_srv.c

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -239,7 +239,7 @@ dtx_handler(crt_rpc_t *rpc)
239239

240240
rc = vos_dtx_abort(cont->sc_hdl,
241241
(struct dtx_id *)din->di_dtx_array.ca_arrays,
242-
din->di_epoch);
242+
din->di_epoch, din->di_version);
243243
} else {
244244
rc = vos_dtx_set_flags(cont->sc_hdl,
245245
(struct dtx_id *)din->di_dtx_array.ca_arrays,
@@ -462,7 +462,7 @@ dtx_coll_handler(crt_rpc_t *rpc)
462462
}
463463

464464
len = dtx_coll_local_exec(dci->dci_po_uuid, dci->dci_co_uuid, &dci->dci_xid, dci->dci_epoch,
465-
opc, bitmap_sz, bitmap, &results);
465+
dci->dci_version, opc, bitmap_sz, bitmap, &results);
466466
if (len < 0)
467467
D_GOTO(out, rc = len);
468468

src/include/daos_srv/vos.h

Lines changed: 6 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
/**
22
* (C) Copyright 2015-2024 Intel Corporation.
3-
* (C) Copyright 2025 Hewlett Packard Enterprise Development LP.
3+
* (C) Copyright 2025-2026 Hewlett Packard Enterprise Development LP.
44
*
55
* SPDX-License-Identifier: BSD-2-Clause-Patent
66
*/
@@ -199,14 +199,15 @@ vos_dtx_commit(daos_handle_t coh, struct dtx_id dtis[], int count, bool keep_act
199199
/**
200200
* Abort the specified DTXs.
201201
*
202-
* \param coh [IN] Container open handle.
203-
* \param dti [IN] The DTX identifiers to be aborted.
204-
* \param epoch [IN] The max epoch for the DTX to be aborted.
202+
* \param coh [IN] Container open handle.
203+
* \param dti [IN] The DTX identifiers to be aborted.
204+
* \param epoch [IN] The max epoch for the DTX to be aborted.
205+
* \param version [IN] The max version for the DTX to be aborted.
205206
*
206207
* \return Zero on success, negative value if error.
207208
*/
208209
int
209-
vos_dtx_abort(daos_handle_t coh, struct dtx_id *dti, daos_epoch_t epoch);
210+
vos_dtx_abort(daos_handle_t coh, struct dtx_id *dti, daos_epoch_t epoch, uint32_t version);
210211

211212
/**
212213
* Discard the active DTX entry's records if invalid.

0 commit comments

Comments
 (0)