diff --git a/include/sys/fs/zfs.h b/include/sys/fs/zfs.h index c8deb5be419e..c5e2c586fb90 100644 --- a/include/sys/fs/zfs.h +++ b/include/sys/fs/zfs.h @@ -385,6 +385,7 @@ typedef enum { VDEV_PROP_TRIM_SUPPORT, VDEV_PROP_TRIM_ERRORS, VDEV_PROP_SLOW_IOS, + VDEV_PROP_SLOW_IO_EVENTS, VDEV_NUM_PROPS } vdev_prop_t; diff --git a/include/sys/vdev_impl.h b/include/sys/vdev_impl.h index 385d7224f2c5..f557ec701703 100644 --- a/include/sys/vdev_impl.h +++ b/include/sys/vdev_impl.h @@ -464,6 +464,7 @@ struct vdev { uint64_t vdev_checksum_t; uint64_t vdev_io_n; uint64_t vdev_io_t; + boolean_t vdev_slow_io_events; uint64_t vdev_slow_io_n; uint64_t vdev_slow_io_t; }; diff --git a/lib/libzfs/libzfs.abi b/lib/libzfs/libzfs.abi index 35ecdca767db..01abaab104c4 100644 --- a/lib/libzfs/libzfs.abi +++ b/lib/libzfs/libzfs.abi @@ -5930,7 +5930,8 @@ - + + diff --git a/man/man7/vdevprops.7 b/man/man7/vdevprops.7 index acabe6b6613a..5de6f518471c 100644 --- a/man/man7/vdevprops.7 +++ b/man/man7/vdevprops.7 @@ -45,7 +45,7 @@ section, below. Every vdev has a set of properties that export statistics about the vdev as well as control various behaviors. Properties are not inherited from top-level vdevs, with the exception of -checksum_n, checksum_t, io_n, io_t, slow_io_n, and slow_io_t. +checksum_n, checksum_t, io_n, io_t, slow_io_events, slow_io_n, and slow_io_t. .Pp The values of numeric properties can be specified using human-readable suffixes .Po for example, @@ -126,7 +126,8 @@ Indicates if a leaf device supports trim operations. .Pp The following native properties can be used to change the behavior of a vdev. .Bl -tag -width "allocating" -.It Sy checksum_n , checksum_t , io_n , io_t , slow_io_n , slow_io_t +.It Sy checksum_n , checksum_t , io_n , io_t , slow_io_events, slow_io_n , +.It Sy slow_io_t Tune the fault management daemon by specifying checksum/io thresholds of errors in seconds, respectively. These properties can be set on leaf and top-level vdevs. diff --git a/module/zcommon/zpool_prop.c b/module/zcommon/zpool_prop.c index 04ae9f986d8f..d1ea2b30679e 100644 --- a/module/zcommon/zpool_prop.c +++ b/module/zcommon/zpool_prop.c @@ -475,6 +475,9 @@ vdev_prop_init(void) zprop_register_index(VDEV_PROP_FAILFAST, "failfast", B_TRUE, PROP_DEFAULT, ZFS_TYPE_VDEV, "on | off", "FAILFAST", boolean_table, sfeatures); + zprop_register_index(VDEV_PROP_SLOW_IO_EVENTS, "slow_io_events", + B_TRUE, PROP_DEFAULT, ZFS_TYPE_VDEV, "on | off", + "SLOW_IO_EVENTS", boolean_table, sfeatures); /* hidden properties */ zprop_register_hidden(VDEV_PROP_NAME, "name", PROP_TYPE_STRING, diff --git a/module/zfs/vdev.c b/module/zfs/vdev.c index 01758b0c54c0..0940d36bf07e 100644 --- a/module/zfs/vdev.c +++ b/module/zfs/vdev.c @@ -427,32 +427,53 @@ vdev_get_nparity(vdev_t *vd) } static int -vdev_prop_get_int(vdev_t *vd, vdev_prop_t prop, uint64_t *value) +vdev_prop_get_objid(vdev_t *vd, uint64_t *objid) { - spa_t *spa = vd->vdev_spa; - objset_t *mos = spa->spa_meta_objset; - uint64_t objid; - int err; if (vd->vdev_root_zap != 0) { - objid = vd->vdev_root_zap; + *objid = vd->vdev_root_zap; } else if (vd->vdev_top_zap != 0) { - objid = vd->vdev_top_zap; + *objid = vd->vdev_top_zap; } else if (vd->vdev_leaf_zap != 0) { - objid = vd->vdev_leaf_zap; + *objid = vd->vdev_leaf_zap; } else { return (EINVAL); } + return (0); +} + +static int +vdev_prop_get_int(vdev_t *vd, vdev_prop_t prop, uint64_t *value) +{ + spa_t *spa = vd->vdev_spa; + objset_t *mos = spa->spa_meta_objset; + uint64_t objid; + int err; + + if (vdev_prop_get_objid(vd, &objid) != 0) + return (EINVAL); + err = zap_lookup(mos, objid, vdev_prop_to_name(prop), sizeof (uint64_t), 1, value); - if (err == ENOENT) *value = vdev_prop_default_numeric(prop); return (err); } +static int +vdev_prop_get_bool(vdev_t *vd, vdev_prop_t prop, boolean_t *bvalue) +{ + int err; + uint64_t ivalue; + + err = vdev_prop_get_int(vd, prop, &ivalue); + *bvalue = ivalue != 0; + + return (err); +} + /* * Get the number of data disks for a top-level vdev. */ @@ -714,8 +735,12 @@ vdev_alloc_common(spa_t *spa, uint_t id, uint64_t guid, vdev_ops_t *ops) */ vd->vdev_checksum_n = vdev_prop_default_numeric(VDEV_PROP_CHECKSUM_N); vd->vdev_checksum_t = vdev_prop_default_numeric(VDEV_PROP_CHECKSUM_T); + vd->vdev_io_n = vdev_prop_default_numeric(VDEV_PROP_IO_N); vd->vdev_io_t = vdev_prop_default_numeric(VDEV_PROP_IO_T); + + vd->vdev_slow_io_events = vdev_prop_default_numeric( + VDEV_PROP_SLOW_IO_EVENTS); vd->vdev_slow_io_n = vdev_prop_default_numeric(VDEV_PROP_SLOW_IO_N); vd->vdev_slow_io_t = vdev_prop_default_numeric(VDEV_PROP_SLOW_IO_T); @@ -3870,6 +3895,11 @@ vdev_load(vdev_t *vd) vdev_dbgmsg(vd, "vdev_load: zap_lookup(zap=%llu) " "failed [error=%d]", (u_longlong_t)zapobj, error); + error = vdev_prop_get_bool(vd, VDEV_PROP_SLOW_IO_EVENTS, + &vd->vdev_slow_io_events); + if (error && error != ENOENT) + vdev_dbgmsg(vd, "vdev_load: zap_lookup(zap=%llu) " + "failed [error=%d]", (u_longlong_t)zapobj, error); error = vdev_prop_get_int(vd, VDEV_PROP_SLOW_IO_N, &vd->vdev_slow_io_n); if (error && error != ENOENT) @@ -5917,15 +5947,8 @@ vdev_props_set_sync(void *arg, dmu_tx_t *tx) /* * Set vdev property values in the vdev props mos object. */ - if (vd->vdev_root_zap != 0) { - objid = vd->vdev_root_zap; - } else if (vd->vdev_top_zap != 0) { - objid = vd->vdev_top_zap; - } else if (vd->vdev_leaf_zap != 0) { - objid = vd->vdev_leaf_zap; - } else { + if (vdev_prop_get_objid(vd, &objid) != 0) panic("unexpected vdev type"); - } mutex_enter(&spa->spa_props_lock); @@ -6102,6 +6125,13 @@ vdev_prop_set(vdev_t *vd, nvlist_t *innvl, nvlist_t *outnvl) } vd->vdev_io_t = intval; break; + case VDEV_PROP_SLOW_IO_EVENTS: + if (nvpair_value_uint64(elem, &intval) != 0) { + error = EINVAL; + break; + } + vd->vdev_slow_io_events = intval != 0; + break; case VDEV_PROP_SLOW_IO_N: if (nvpair_value_uint64(elem, &intval) != 0) { error = EINVAL; @@ -6143,6 +6173,7 @@ vdev_prop_get(vdev_t *vd, nvlist_t *innvl, nvlist_t *outnvl) nvpair_t *elem = NULL; nvlist_t *nvprops = NULL; uint64_t intval = 0; + boolean_t boolval = 0; char *strval = NULL; const char *propname = NULL; vdev_prop_t prop; @@ -6156,15 +6187,8 @@ vdev_prop_get(vdev_t *vd, nvlist_t *innvl, nvlist_t *outnvl) nvlist_lookup_nvlist(innvl, ZPOOL_VDEV_PROPS_GET_PROPS, &nvprops); - if (vd->vdev_root_zap != 0) { - objid = vd->vdev_root_zap; - } else if (vd->vdev_top_zap != 0) { - objid = vd->vdev_top_zap; - } else if (vd->vdev_leaf_zap != 0) { - objid = vd->vdev_leaf_zap; - } else { + if (vdev_prop_get_objid(vd, &objid) != 0) return (SET_ERROR(EINVAL)); - } ASSERT(objid != 0); mutex_enter(&spa->spa_props_lock); @@ -6473,6 +6497,18 @@ vdev_prop_get(vdev_t *vd, nvlist_t *innvl, nvlist_t *outnvl) vdev_prop_add_list(outnvl, propname, strval, intval, src); break; + case VDEV_PROP_SLOW_IO_EVENTS: + err = vdev_prop_get_bool(vd, prop, &boolval); + if (err && err != ENOENT) + break; + + src = ZPROP_SRC_LOCAL; + if (boolval == vdev_prop_default_numeric(prop)) + src = ZPROP_SRC_DEFAULT; + + vdev_prop_add_list(outnvl, propname, NULL, + boolval, src); + break; case VDEV_PROP_CHECKSUM_N: case VDEV_PROP_CHECKSUM_T: case VDEV_PROP_IO_N: diff --git a/module/zfs/zfs_fm.c b/module/zfs/zfs_fm.c index 221f24e381dc..ba376ccbaa73 100644 --- a/module/zfs/zfs_fm.c +++ b/module/zfs/zfs_fm.c @@ -223,6 +223,9 @@ vdev_prop_get_inherited(vdev_t *vd, vdev_prop_t prop) case VDEV_PROP_IO_T: propval = vd->vdev_io_t; break; + case VDEV_PROP_SLOW_IO_EVENTS: + propval = vd->vdev_slow_io_events; + break; case VDEV_PROP_SLOW_IO_N: propval = vd->vdev_slow_io_n; break; diff --git a/module/zfs/zio.c b/module/zfs/zio.c index 64f3d31f5655..6ea88f79464c 100644 --- a/module/zfs/zio.c +++ b/module/zfs/zio.c @@ -5414,9 +5414,12 @@ zio_done(zio_t *zio) zio->io_vd->vdev_stat.vs_slow_ios++; mutex_exit(&zio->io_vd->vdev_stat_lock); - (void) zfs_ereport_post(FM_EREPORT_ZFS_DELAY, - zio->io_spa, zio->io_vd, &zio->io_bookmark, - zio, 0); + if (zio->io_vd->vdev_slow_io_events) { + (void) zfs_ereport_post( + FM_EREPORT_ZFS_DELAY, + zio->io_spa, zio->io_vd, + &zio->io_bookmark, zio, 0); + } } } } diff --git a/tests/zfs-tests/tests/functional/cli_root/zpool_get/vdev_get.cfg b/tests/zfs-tests/tests/functional/cli_root/zpool_get/vdev_get.cfg index ccb5e9c15809..6d9aa28681c7 100644 --- a/tests/zfs-tests/tests/functional/cli_root/zpool_get/vdev_get.cfg +++ b/tests/zfs-tests/tests/functional/cli_root/zpool_get/vdev_get.cfg @@ -71,6 +71,7 @@ typeset -a properties=( checksum_t io_n io_t + slow_io_events slow_io_n slow_io_t trim_support diff --git a/tests/zfs-tests/tests/functional/events/zed_slow_io.ksh b/tests/zfs-tests/tests/functional/events/zed_slow_io.ksh index 0c68530ee9ef..570c3b0c62b6 100755 --- a/tests/zfs-tests/tests/functional/events/zed_slow_io.ksh +++ b/tests/zfs-tests/tests/functional/events/zed_slow_io.ksh @@ -23,6 +23,7 @@ # # Copyright (c) 2023, Klara Inc. +# Copyright (c) 2025, Mariusz Zaborski # # DESCRIPTION: @@ -140,8 +141,8 @@ function slow_io_degrade { do_setup - zpool set slow_io_n=5 $TESTPOOL $VDEV - zpool set slow_io_t=60 $TESTPOOL $VDEV + log_must zpool set slow_io_n=5 $TESTPOOL $VDEV + log_must zpool set slow_io_t=60 $TESTPOOL $VDEV start_slow_io for i in {1..16}; do @@ -193,6 +194,44 @@ function slow_io_no_degrade do_clean } +# Change slow_io_n, slow_io_t to 5 events in 60 seconds +# fire more than 5 events. Disable slow io events. +# Should not degrade. +function slow_io_degrade_disabled +{ + do_setup + + log_must zpool set slow_io_n=5 $TESTPOOL $VDEV + log_must zpool set slow_io_t=60 $TESTPOOL $VDEV + log_must zpool set slow_io_events=off $TESTPOOL $VDEV + + start_slow_io + for i in {1..16}; do + dd if=${FILEPATH}$i of=/dev/null count=1 bs=512 2>/dev/null + sleep 0.5 + done + stop_slow_io + zpool sync + + # + # wait 60 seconds to confirm that zfs.delay was not generated. + # + typeset -i i=0 + typeset -i events=0 + while [[ $i -lt 60 ]]; do + events=$(zpool events | grep "ereport\.fs\.zfs.delay" | wc -l) + i=$((i+1)) + sleep 1 + done + log_note "$events delay events found" + + [ $events -eq "0" ] || \ + log_fail "expecting no delay events, found $events" + + log_mustnot wait_vdev_state $TESTPOOL $VDEV "DEGRADED" 45 + do_clean +} + log_assert "Test ZED slow io configurability" log_onexit cleanup @@ -202,5 +241,6 @@ log_must zed_start default_degrade slow_io_degrade slow_io_no_degrade +slow_io_degrade_disabled log_pass "Test ZED slow io configurability"