diff --git a/include/sys/ddt.h b/include/sys/ddt.h index e0ccea56af49..5285614acba7 100644 --- a/include/sys/ddt.h +++ b/include/sys/ddt.h @@ -284,6 +284,9 @@ typedef struct { avl_tree_t ddt_tree; /* "live" (changed) entries this txg */ avl_tree_t ddt_repair_tree; /* entries being repaired */ + /* Protects ddt_object[] and ddt_object_dnode[]. */ + krwlock_t ddt_objects_lock ____cacheline_aligned; + /* * Log trees are stable during I/O, and only modified during sync * with exclusive access. diff --git a/module/zfs/ddt.c b/module/zfs/ddt.c index bbcfdfddc354..7835c64855c9 100644 --- a/module/zfs/ddt.c +++ b/module/zfs/ddt.c @@ -426,7 +426,6 @@ ddt_object_destroy(ddt_t *ddt, ddt_type_t type, ddt_class_t class, { spa_t *spa = ddt->ddt_spa; objset_t *os = ddt->ddt_os; - uint64_t *objectp = &ddt->ddt_object[type][class]; uint64_t count; char name[DDT_NAMELEN]; @@ -434,20 +433,24 @@ ddt_object_destroy(ddt_t *ddt, ddt_type_t type, ddt_class_t class, ddt_object_name(ddt, type, class, name); - ASSERT3U(*objectp, !=, 0); + ASSERT(ddt->ddt_object[type][class] != 0); ASSERT(ddt_histogram_empty(&ddt->ddt_histogram[type][class])); VERIFY0(ddt_object_count(ddt, type, class, &count)); VERIFY0(count); VERIFY0(zap_remove(os, ddt->ddt_dir_object, name, tx)); VERIFY0(zap_remove(os, spa->spa_ddt_stat_object, name, tx)); - if (ddt->ddt_object_dnode[type][class] != NULL) { - dnode_rele(ddt->ddt_object_dnode[type][class], ddt); - ddt->ddt_object_dnode[type][class] = NULL; - } - VERIFY0(ddt_ops[type]->ddt_op_destroy(os, *objectp, tx)); - memset(&ddt->ddt_object_stats[type][class], 0, sizeof (ddt_object_t)); - *objectp = 0; + uint64_t object = ddt->ddt_object[type][class]; + dnode_t *dn = ddt->ddt_object_dnode[type][class]; + rw_enter(&ddt->ddt_objects_lock, RW_WRITER); + ddt->ddt_object[type][class] = 0; + ddt->ddt_object_dnode[type][class] = NULL; + rw_exit(&ddt->ddt_objects_lock); + + if (dn != NULL) + dnode_rele(dn, ddt); + VERIFY0(ddt_ops[type]->ddt_op_destroy(os, object, tx)); + memset(&ddt->ddt_object_stats[type][class], 0, sizeof (ddt_object_t)); } static int @@ -553,6 +556,20 @@ ddt_object_lookup(ddt_t *ddt, ddt_type_t type, ddt_class_t class, dde->dde_phys, DDT_PHYS_SIZE(ddt))); } +/* + * Like ddt_object_lookup(), but for open context where we need protection + * against concurrent object destruction by sync context. + */ +static int +ddt_object_lookup_open(ddt_t *ddt, ddt_type_t type, ddt_class_t class, + ddt_entry_t *dde) +{ + rw_enter(&ddt->ddt_objects_lock, RW_READER); + int error = ddt_object_lookup(ddt, type, class, dde); + rw_exit(&ddt->ddt_objects_lock); + return (error); +} + static int ddt_object_contains(ddt_t *ddt, ddt_type_t type, ddt_class_t class, const ddt_key_t *ddk) @@ -568,21 +585,33 @@ static void ddt_object_prefetch(ddt_t *ddt, ddt_type_t type, ddt_class_t class, const ddt_key_t *ddk) { + /* + * Called from open context, so protect against concurrent + * object destruction by sync context. + */ + rw_enter(&ddt->ddt_objects_lock, RW_READER); + dnode_t *dn = ddt->ddt_object_dnode[type][class]; - if (dn == NULL) - return; + if (dn != NULL) + ddt_ops[type]->ddt_op_prefetch(dn, ddk); - ddt_ops[type]->ddt_op_prefetch(dn, ddk); + rw_exit(&ddt->ddt_objects_lock); } static void ddt_object_prefetch_all(ddt_t *ddt, ddt_type_t type, ddt_class_t class) { + /* + * Called from open context, so protect against concurrent + * object destruction by sync context. + */ + rw_enter(&ddt->ddt_objects_lock, RW_READER); + dnode_t *dn = ddt->ddt_object_dnode[type][class]; - if (dn == NULL) - return; + if (dn != NULL) + ddt_ops[type]->ddt_op_prefetch_all(dn); - ddt_ops[type]->ddt_op_prefetch_all(dn); + rw_exit(&ddt->ddt_objects_lock); } static int @@ -610,16 +639,26 @@ int ddt_object_walk(ddt_t *ddt, ddt_type_t type, ddt_class_t class, uint64_t *walk, ddt_lightweight_entry_t *ddlwe) { + /* + * Can be called from open context, so protect against concurrent + * object destruction by sync context. + */ + rw_enter(&ddt->ddt_objects_lock, RW_READER); + dnode_t *dn = ddt->ddt_object_dnode[type][class]; - ASSERT(dn != NULL); + if (dn == NULL) { + rw_exit(&ddt->ddt_objects_lock); + return (SET_ERROR(ENOENT)); + } int error = ddt_ops[type]->ddt_op_walk(dn, walk, &ddlwe->ddlwe_key, &ddlwe->ddlwe_phys, DDT_PHYS_SIZE(ddt)); if (error == 0) { ddlwe->ddlwe_type = type; ddlwe->ddlwe_class = class; - return (0); } + + rw_exit(&ddt->ddt_objects_lock); return (error); } @@ -627,10 +666,22 @@ int ddt_object_count(ddt_t *ddt, ddt_type_t type, ddt_class_t class, uint64_t *count) { + /* + * Can be called from open context, so protect against concurrent + * object destruction by sync context. + */ + rw_enter(&ddt->ddt_objects_lock, RW_READER); + dnode_t *dn = ddt->ddt_object_dnode[type][class]; - ASSERT(dn != NULL); + if (dn == NULL) { + rw_exit(&ddt->ddt_objects_lock); + return (SET_ERROR(ENOENT)); + } - return (ddt_ops[type]->ddt_op_count(dn, count)); + int error = ddt_ops[type]->ddt_op_count(dn, count); + + rw_exit(&ddt->ddt_objects_lock); + return (error); } int @@ -1698,6 +1749,7 @@ ddt_table_alloc(spa_t *spa, enum zio_checksum c) sizeof (ddt_entry_t), offsetof(ddt_entry_t, dde_node)); avl_create(&ddt->ddt_repair_tree, ddt_key_compare, sizeof (ddt_entry_t), offsetof(ddt_entry_t, dde_node)); + rw_init(&ddt->ddt_objects_lock, NULL, RW_DEFAULT, NULL); ddt->ddt_checksum = c; ddt->ddt_spa = spa; @@ -1744,6 +1796,7 @@ ddt_table_free(ddt_t *ddt) } } } + rw_destroy(&ddt->ddt_objects_lock); ASSERT0(avl_numnodes(&ddt->ddt_tree)); ASSERT0(avl_numnodes(&ddt->ddt_repair_tree)); avl_destroy(&ddt->ddt_tree); @@ -1876,7 +1929,7 @@ ddt_repair_start(ddt_t *ddt, const blkptr_t *bp) * there's definitely only one copy, so don't even try. */ if (class != DDT_CLASS_UNIQUE && - ddt_object_lookup(ddt, type, class, dde) == 0) + ddt_object_lookup_open(ddt, type, class, dde) == 0) return (dde); } }