Skip to content

Add knob to disable slow io notifications #17477

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 1 commit into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions include/sys/fs/zfs.h
Original file line number Diff line number Diff line change
Expand Up @@ -385,6 +385,7 @@ typedef enum {
VDEV_PROP_TRIM_SUPPORT,
VDEV_PROP_TRIM_ERRORS,
VDEV_PROP_SLOW_IOS,
VDEV_PROP_SLOW_IO_EVENTS,
VDEV_NUM_PROPS
} vdev_prop_t;

Expand Down
1 change: 1 addition & 0 deletions include/sys/vdev_impl.h
Original file line number Diff line number Diff line change
Expand Up @@ -464,6 +464,7 @@ struct vdev {
uint64_t vdev_checksum_t;
uint64_t vdev_io_n;
uint64_t vdev_io_t;
boolean_t vdev_slow_io_events;
uint64_t vdev_slow_io_n;
uint64_t vdev_slow_io_t;
};
Expand Down
3 changes: 2 additions & 1 deletion lib/libzfs/libzfs.abi
Original file line number Diff line number Diff line change
Expand Up @@ -5930,7 +5930,8 @@
<enumerator name='VDEV_PROP_TRIM_SUPPORT' value='49'/>
<enumerator name='VDEV_PROP_TRIM_ERRORS' value='50'/>
<enumerator name='VDEV_PROP_SLOW_IOS' value='51'/>
<enumerator name='VDEV_NUM_PROPS' value='52'/>
<enumerator name='VDEV_PROP_SLOW_IO_EVENTS' value='52'/>
<enumerator name='VDEV_NUM_PROPS' value='53'/>
</enum-decl>
<typedef-decl name='vdev_prop_t' type-id='1573bec8' id='5aa5c90c'/>
<class-decl name='zpool_load_policy' size-in-bits='256' is-struct='yes' visibility='default' id='2f65b36f'>
Expand Down
5 changes: 3 additions & 2 deletions man/man7/vdevprops.7
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,7 @@ section, below.
Every vdev has a set of properties that export statistics about the vdev
as well as control various behaviors.
Properties are not inherited from top-level vdevs, with the exception of
checksum_n, checksum_t, io_n, io_t, slow_io_n, and slow_io_t.
checksum_n, checksum_t, io_n, io_t, slow_io_events, slow_io_n, and slow_io_t.
.Pp
The values of numeric properties can be specified using human-readable suffixes
.Po for example,
Expand Down Expand Up @@ -126,7 +126,8 @@ Indicates if a leaf device supports trim operations.
.Pp
The following native properties can be used to change the behavior of a vdev.
.Bl -tag -width "allocating"
.It Sy checksum_n , checksum_t , io_n , io_t , slow_io_n , slow_io_t
.It Sy checksum_n , checksum_t , io_n , io_t , slow_io_events, slow_io_n ,
.It Sy slow_io_t
Tune the fault management daemon by specifying checksum/io thresholds of <N>
errors in <T> seconds, respectively.
These properties can be set on leaf and top-level vdevs.
Expand Down
3 changes: 3 additions & 0 deletions module/zcommon/zpool_prop.c
Original file line number Diff line number Diff line change
Expand Up @@ -475,6 +475,9 @@ vdev_prop_init(void)
zprop_register_index(VDEV_PROP_FAILFAST, "failfast", B_TRUE,
PROP_DEFAULT, ZFS_TYPE_VDEV, "on | off", "FAILFAST", boolean_table,
sfeatures);
zprop_register_index(VDEV_PROP_SLOW_IO_EVENTS, "slow_io_events",
B_TRUE, PROP_DEFAULT, ZFS_TYPE_VDEV, "on | off",
"SLOW_IO_EVENTS", boolean_table, sfeatures);

/* hidden properties */
zprop_register_hidden(VDEV_PROP_NAME, "name", PROP_TYPE_STRING,
Expand Down
86 changes: 61 additions & 25 deletions module/zfs/vdev.c
Original file line number Diff line number Diff line change
Expand Up @@ -427,32 +427,53 @@ vdev_get_nparity(vdev_t *vd)
}

static int
vdev_prop_get_int(vdev_t *vd, vdev_prop_t prop, uint64_t *value)
vdev_prop_get_objid(vdev_t *vd, uint64_t *objid)
{
spa_t *spa = vd->vdev_spa;
objset_t *mos = spa->spa_meta_objset;
uint64_t objid;
int err;

if (vd->vdev_root_zap != 0) {
objid = vd->vdev_root_zap;
*objid = vd->vdev_root_zap;
} else if (vd->vdev_top_zap != 0) {
objid = vd->vdev_top_zap;
*objid = vd->vdev_top_zap;
} else if (vd->vdev_leaf_zap != 0) {
objid = vd->vdev_leaf_zap;
*objid = vd->vdev_leaf_zap;
} else {
return (EINVAL);
}

return (0);
}

static int
vdev_prop_get_int(vdev_t *vd, vdev_prop_t prop, uint64_t *value)
{
spa_t *spa = vd->vdev_spa;
objset_t *mos = spa->spa_meta_objset;
uint64_t objid;
int err;

if (vdev_prop_get_objid(vd, &objid) != 0)
return (EINVAL);

err = zap_lookup(mos, objid, vdev_prop_to_name(prop),
sizeof (uint64_t), 1, value);

if (err == ENOENT)
*value = vdev_prop_default_numeric(prop);

return (err);
}

static int
vdev_prop_get_bool(vdev_t *vd, vdev_prop_t prop, boolean_t *bvalue)
{
int err;
uint64_t ivalue;

err = vdev_prop_get_int(vd, prop, &ivalue);
*bvalue = ivalue != 0;

return (err);
}

/*
* Get the number of data disks for a top-level vdev.
*/
Expand Down Expand Up @@ -714,8 +735,12 @@ vdev_alloc_common(spa_t *spa, uint_t id, uint64_t guid, vdev_ops_t *ops)
*/
vd->vdev_checksum_n = vdev_prop_default_numeric(VDEV_PROP_CHECKSUM_N);
vd->vdev_checksum_t = vdev_prop_default_numeric(VDEV_PROP_CHECKSUM_T);

vd->vdev_io_n = vdev_prop_default_numeric(VDEV_PROP_IO_N);
vd->vdev_io_t = vdev_prop_default_numeric(VDEV_PROP_IO_T);

vd->vdev_slow_io_events = vdev_prop_default_numeric(
VDEV_PROP_SLOW_IO_EVENTS);
vd->vdev_slow_io_n = vdev_prop_default_numeric(VDEV_PROP_SLOW_IO_N);
vd->vdev_slow_io_t = vdev_prop_default_numeric(VDEV_PROP_SLOW_IO_T);

Expand Down Expand Up @@ -3870,6 +3895,11 @@ vdev_load(vdev_t *vd)
vdev_dbgmsg(vd, "vdev_load: zap_lookup(zap=%llu) "
"failed [error=%d]", (u_longlong_t)zapobj, error);

error = vdev_prop_get_bool(vd, VDEV_PROP_SLOW_IO_EVENTS,
&vd->vdev_slow_io_events);
if (error && error != ENOENT)
vdev_dbgmsg(vd, "vdev_load: zap_lookup(zap=%llu) "
"failed [error=%d]", (u_longlong_t)zapobj, error);
error = vdev_prop_get_int(vd, VDEV_PROP_SLOW_IO_N,
&vd->vdev_slow_io_n);
if (error && error != ENOENT)
Expand Down Expand Up @@ -5917,15 +5947,8 @@ vdev_props_set_sync(void *arg, dmu_tx_t *tx)
/*
* Set vdev property values in the vdev props mos object.
*/
if (vd->vdev_root_zap != 0) {
objid = vd->vdev_root_zap;
} else if (vd->vdev_top_zap != 0) {
objid = vd->vdev_top_zap;
} else if (vd->vdev_leaf_zap != 0) {
objid = vd->vdev_leaf_zap;
} else {
if (vdev_prop_get_objid(vd, &objid) != 0)
panic("unexpected vdev type");
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Would it make sense to move the panic() into vdev_prop_get_objid() to save having to test it everywhere it is used?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We use it in vdev_prop_get_int, and then it is used for example here to generate an error.

So I would leave it as is for a consumer to decide if it should panic or not.

}

mutex_enter(&spa->spa_props_lock);

Expand Down Expand Up @@ -6102,6 +6125,13 @@ vdev_prop_set(vdev_t *vd, nvlist_t *innvl, nvlist_t *outnvl)
}
vd->vdev_io_t = intval;
break;
case VDEV_PROP_SLOW_IO_EVENTS:
if (nvpair_value_uint64(elem, &intval) != 0) {
error = EINVAL;
break;
}
vd->vdev_slow_io_events = intval != 0;
break;
case VDEV_PROP_SLOW_IO_N:
if (nvpair_value_uint64(elem, &intval) != 0) {
error = EINVAL;
Expand Down Expand Up @@ -6143,6 +6173,7 @@ vdev_prop_get(vdev_t *vd, nvlist_t *innvl, nvlist_t *outnvl)
nvpair_t *elem = NULL;
nvlist_t *nvprops = NULL;
uint64_t intval = 0;
boolean_t boolval = 0;
char *strval = NULL;
const char *propname = NULL;
vdev_prop_t prop;
Expand All @@ -6156,15 +6187,8 @@ vdev_prop_get(vdev_t *vd, nvlist_t *innvl, nvlist_t *outnvl)

nvlist_lookup_nvlist(innvl, ZPOOL_VDEV_PROPS_GET_PROPS, &nvprops);

if (vd->vdev_root_zap != 0) {
objid = vd->vdev_root_zap;
} else if (vd->vdev_top_zap != 0) {
objid = vd->vdev_top_zap;
} else if (vd->vdev_leaf_zap != 0) {
objid = vd->vdev_leaf_zap;
} else {
if (vdev_prop_get_objid(vd, &objid) != 0)
return (SET_ERROR(EINVAL));
}
ASSERT(objid != 0);

mutex_enter(&spa->spa_props_lock);
Expand Down Expand Up @@ -6473,6 +6497,18 @@ vdev_prop_get(vdev_t *vd, nvlist_t *innvl, nvlist_t *outnvl)
vdev_prop_add_list(outnvl, propname, strval,
intval, src);
break;
case VDEV_PROP_SLOW_IO_EVENTS:
err = vdev_prop_get_bool(vd, prop, &boolval);
if (err && err != ENOENT)
break;

src = ZPROP_SRC_LOCAL;
if (boolval == vdev_prop_default_numeric(prop))
src = ZPROP_SRC_DEFAULT;

vdev_prop_add_list(outnvl, propname, NULL,
boolval, src);
break;
case VDEV_PROP_CHECKSUM_N:
case VDEV_PROP_CHECKSUM_T:
case VDEV_PROP_IO_N:
Expand Down
3 changes: 3 additions & 0 deletions module/zfs/zfs_fm.c
Original file line number Diff line number Diff line change
Expand Up @@ -223,6 +223,9 @@ vdev_prop_get_inherited(vdev_t *vd, vdev_prop_t prop)
case VDEV_PROP_IO_T:
propval = vd->vdev_io_t;
break;
case VDEV_PROP_SLOW_IO_EVENTS:
propval = vd->vdev_slow_io_events;
break;
case VDEV_PROP_SLOW_IO_N:
propval = vd->vdev_slow_io_n;
break;
Expand Down
9 changes: 6 additions & 3 deletions module/zfs/zio.c
Original file line number Diff line number Diff line change
Expand Up @@ -5414,9 +5414,12 @@ zio_done(zio_t *zio)
zio->io_vd->vdev_stat.vs_slow_ios++;
mutex_exit(&zio->io_vd->vdev_stat_lock);

(void) zfs_ereport_post(FM_EREPORT_ZFS_DELAY,
zio->io_spa, zio->io_vd, &zio->io_bookmark,
zio, 0);
if (zio->io_vd->vdev_slow_io_events) {
(void) zfs_ereport_post(
FM_EREPORT_ZFS_DELAY,
zio->io_spa, zio->io_vd,
&zio->io_bookmark, zio, 0);
}
}
}
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -71,6 +71,7 @@ typeset -a properties=(
checksum_t
io_n
io_t
slow_io_events
slow_io_n
slow_io_t
trim_support
Expand Down
44 changes: 42 additions & 2 deletions tests/zfs-tests/tests/functional/events/zed_slow_io.ksh
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@

#
# Copyright (c) 2023, Klara Inc.
# Copyright (c) 2025, Mariusz Zaborski <[email protected]>
#

# DESCRIPTION:
Expand Down Expand Up @@ -140,8 +141,8 @@ function slow_io_degrade
{
do_setup

zpool set slow_io_n=5 $TESTPOOL $VDEV
zpool set slow_io_t=60 $TESTPOOL $VDEV
log_must zpool set slow_io_n=5 $TESTPOOL $VDEV
log_must zpool set slow_io_t=60 $TESTPOOL $VDEV

start_slow_io
for i in {1..16}; do
Expand Down Expand Up @@ -193,6 +194,44 @@ function slow_io_no_degrade
do_clean
}

# Change slow_io_n, slow_io_t to 5 events in 60 seconds
# fire more than 5 events. Disable slow io events.
# Should not degrade.
function slow_io_degrade_disabled
{
do_setup

log_must zpool set slow_io_n=5 $TESTPOOL $VDEV
log_must zpool set slow_io_t=60 $TESTPOOL $VDEV
log_must zpool set slow_io_events=off $TESTPOOL $VDEV

start_slow_io
for i in {1..16}; do
dd if=${FILEPATH}$i of=/dev/null count=1 bs=512 2>/dev/null
sleep 0.5
done
stop_slow_io
zpool sync

#
# wait 60 seconds to confirm that zfs.delay was not generated.
#
typeset -i i=0
typeset -i events=0
while [[ $i -lt 60 ]]; do
events=$(zpool events | grep "ereport\.fs\.zfs.delay" | wc -l)
i=$((i+1))
sleep 1
done
log_note "$events delay events found"

[ $events -eq "0" ] || \
log_fail "expecting no delay events, found $events"

log_mustnot wait_vdev_state $TESTPOOL $VDEV "DEGRADED" 45
do_clean
}

log_assert "Test ZED slow io configurability"
log_onexit cleanup

Expand All @@ -202,5 +241,6 @@ log_must zed_start
default_degrade
slow_io_degrade
slow_io_no_degrade
slow_io_degrade_disabled

log_pass "Test ZED slow io configurability"
Loading