Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
14 changes: 9 additions & 5 deletions drivers/md/md.c
Original file line number Diff line number Diff line change
Expand Up @@ -986,14 +986,18 @@ static void super_written(struct bio *bio)
if (bio->bi_status) {
pr_err("md: %s gets error=%d\n", __func__,
blk_status_to_errno(bio->bi_status));
if (bio->bi_opf & MD_FAILFAST)
set_bit(FailfastIOFailure, &rdev->flags);
md_error(mddev, rdev);
if (!test_bit(Faulty, &rdev->flags)
&& (bio->bi_opf & MD_FAILFAST)) {
pr_warn("md: %s: Metadata write will be repeated to %pg\n",
mdname(mddev), rdev->bdev);
set_bit(MD_SB_NEED_REWRITE, &mddev->sb_flags);
set_bit(LastDev, &rdev->flags);
}
} else
clear_bit(LastDev, &rdev->flags);
} else {
clear_bit(MD_SB_NEED_REWRITE, &mddev->sb_flags);
}

bio_put(bio);

Expand Down Expand Up @@ -1035,7 +1039,7 @@ void md_super_write(struct mddev *mddev, struct md_rdev *rdev,

if (test_bit(MD_FAILFAST_SUPPORTED, &mddev->flags) &&
test_bit(FailFast, &rdev->flags) &&
!test_bit(LastDev, &rdev->flags))
!test_bit(MD_SB_NEED_REWRITE, &mddev->sb_flags))
bio->bi_opf |= MD_FAILFAST;

atomic_inc(&mddev->pending_writes);
Expand All @@ -1046,7 +1050,7 @@ int md_super_wait(struct mddev *mddev)
{
/* wait for all superblock writes that were scheduled to complete */
wait_event(mddev->sb_wait, atomic_read(&mddev->pending_writes)==0);
if (test_and_clear_bit(MD_SB_NEED_REWRITE, &mddev->sb_flags))
if (test_bit(MD_SB_NEED_REWRITE, &mddev->sb_flags))
return -EAGAIN;
return 0;
}
Expand Down
13 changes: 7 additions & 6 deletions drivers/md/md.h
Original file line number Diff line number Diff line change
Expand Up @@ -281,9 +281,10 @@ enum flag_bits {
* It is expects that no bad block log
* is present.
*/
LastDev, /* Seems to be the last working dev as
* it didn't fail, so don't use FailFast
* any more for metadata
FailfastIOFailure, /* rdev with failfast IO failure
* but md_error not yet completed.
* If the last rdev has this flag,
* error_handler must not fail the array
*/
CollisionCheck, /*
* check if there is collision between raid1
Expand Down Expand Up @@ -331,8 +332,8 @@ struct md_cluster_operations;
* @MD_CLUSTER_RESYNC_LOCKED: cluster raid only, which means node, already took
* resync lock, need to release the lock.
* @MD_FAILFAST_SUPPORTED: Using MD_FAILFAST on metadata writes is supported as
* calls to md_error() will never cause the array to
* become failed.
* calls to md_error() with FailfastIOFailure will
* never cause the array to become failed.
* @MD_HAS_PPL: The raid array has PPL feature set.
* @MD_HAS_MULTIPLE_PPLS: The raid array has multiple PPLs feature set.
* @MD_NOT_READY: do_md_run() is active, so 'array_state', ust not report that
Expand Down Expand Up @@ -360,7 +361,7 @@ enum mddev_sb_flags {
MD_SB_CHANGE_DEVS, /* Some device status has changed */
MD_SB_CHANGE_CLEAN, /* transition to or from 'clean' */
MD_SB_CHANGE_PENDING, /* switch from 'clean' to 'active' in progress */
MD_SB_NEED_REWRITE, /* metadata write needs to be repeated */
MD_SB_NEED_REWRITE, /* metadata write needs to be repeated, do not use failfast */
};

#define NR_SERIAL_INFOS 8
Expand Down
32 changes: 26 additions & 6 deletions drivers/md/raid1.c
Original file line number Diff line number Diff line change
Expand Up @@ -471,6 +471,7 @@
(bio->bi_opf & MD_FAILFAST) &&
/* We never try FailFast to WriteMostly devices */
!test_bit(WriteMostly, &rdev->flags)) {
set_bit(FailfastIOFailure, &rdev->flags);
md_error(r1_bio->mddev, rdev);
}

Expand Down Expand Up @@ -1748,8 +1749,12 @@
* - recovery is interrupted.
* - &mddev->degraded is bumped.
*
* @rdev is marked as &Faulty excluding case when array is failed and
* &mddev->fail_last_dev is off.
* If @rdev has &FailfastIOFailure and it is the 'last' rdev,
* then @mddev and @rdev will not be marked as failed.
*
* @rdev is marked as &Faulty excluding any cases:
* - when @mddev is failed and &mddev->fail_last_dev is off
* - when @rdev is last device and &FailfastIOFailure flag is set
*/
static void raid1_error(struct mddev *mddev, struct md_rdev *rdev)
{
Expand All @@ -1760,7 +1765,19 @@

if (test_bit(In_sync, &rdev->flags) &&
(conf->raid_disks - mddev->degraded) == 1) {
if (test_and_clear_bit(FailfastIOFailure, &rdev->flags)) {
spin_unlock_irqrestore(&conf->device_lock, flags);
pr_warn_ratelimited("md/raid1:%s: Failfast IO failure on %pg, "
"last device but ignoring it\n",

Check failure on line 1771 in drivers/md/raid1.c

View workflow job for this annotation

GitHub Actions / per-patch-testing

WARNING: quoted string split across lines
mdname(mddev), rdev->bdev);
return;
}

set_bit(MD_BROKEN, &mddev->flags);
pr_crit("md/raid1:%s: Disk failure on %pg, this is the last device.\n"
"md/raid1:%s: Cannot continue operation (%d/%d failed).\n",
mdname(mddev), rdev->bdev,
mdname(mddev), mddev->degraded + 1, conf->raid_disks);

if (!mddev->fail_last_dev) {
conf->recovery_disabled = mddev->recovery_disabled;
Expand All @@ -1772,17 +1789,18 @@
if (test_and_clear_bit(In_sync, &rdev->flags))
mddev->degraded++;
set_bit(Faulty, &rdev->flags);
if ((conf->raid_disks - mddev->degraded) > 0)
pr_crit("md/raid1:%s: Disk failure on %pg, disabling device.\n"
"md/raid1:%s: Operation continuing on %d devices.\n",
mdname(mddev), rdev->bdev,
mdname(mddev), conf->raid_disks - mddev->degraded);
spin_unlock_irqrestore(&conf->device_lock, flags);
/*
* if recovery is running, make sure it aborts.
*/
set_bit(MD_RECOVERY_INTR, &mddev->recovery);
set_mask_bits(&mddev->sb_flags, 0,
BIT(MD_SB_CHANGE_DEVS) | BIT(MD_SB_CHANGE_PENDING));
pr_crit("md/raid1:%s: Disk failure on %pg, disabling device.\n"
"md/raid1:%s: Operation continuing on %d devices.\n",
mdname(mddev), rdev->bdev,
mdname(mddev), conf->raid_disks - mddev->degraded);
}

static void print_conf(struct r1conf *conf)
Expand Down Expand Up @@ -2150,6 +2168,7 @@
if (test_bit(FailFast, &rdev->flags)) {
/* Don't try recovering from here - just fail it
* ... unless it is the last working device of course */
set_bit(FailfastIOFailure, &rdev->flags);
md_error(mddev, rdev);
if (test_bit(Faulty, &rdev->flags))
/* Don't try to read from here, but make sure
Expand Down Expand Up @@ -2654,6 +2673,7 @@
fix_read_error(conf, r1_bio);
unfreeze_array(conf);
} else if (mddev->ro == 0 && test_bit(FailFast, &rdev->flags)) {
set_bit(FailfastIOFailure, &rdev->flags);
md_error(mddev, rdev);
} else {
r1_bio->bios[r1_bio->read_disk] = IO_BLOCKED;
Expand Down
35 changes: 28 additions & 7 deletions drivers/md/raid10.c
Original file line number Diff line number Diff line change
Expand Up @@ -488,6 +488,7 @@
dec_rdev = 0;
if (test_bit(FailFast, &rdev->flags) &&
(bio->bi_opf & MD_FAILFAST)) {
set_bit(FailfastIOFailure, &rdev->flags);
md_error(rdev->mddev, rdev);
}

Expand Down Expand Up @@ -1995,8 +1996,12 @@
* - recovery is interrupted.
* - &mddev->degraded is bumped.
*
* @rdev is marked as &Faulty excluding case when array is failed and
* &mddev->fail_last_dev is off.
* If @rdev has &FailfastIOFailure and it is the 'last' rdev,
* then @mddev and @rdev will not be marked as failed.
*
* @rdev is marked as &Faulty excluding any cases:
* - when @mddev is failed and &mddev->fail_last_dev is off
* - when @rdev is last device and &FailfastIOFailure flag is set
*/
static void raid10_error(struct mddev *mddev, struct md_rdev *rdev)
{
Expand All @@ -2006,7 +2011,19 @@
spin_lock_irqsave(&conf->device_lock, flags);

if (test_bit(In_sync, &rdev->flags) && !enough(conf, rdev->raid_disk)) {
if (test_and_clear_bit(FailfastIOFailure, &rdev->flags)) {
spin_unlock_irqrestore(&conf->device_lock, flags);
pr_warn_ratelimited("md/raid10:%s: Failfast IO failure on %pg, "
"last device but ignoring it\n",

Check failure on line 2017 in drivers/md/raid10.c

View workflow job for this annotation

GitHub Actions / per-patch-testing

WARNING: quoted string split across lines
mdname(mddev), rdev->bdev);
return;
}

set_bit(MD_BROKEN, &mddev->flags);
pr_crit("md/raid10:%s: Disk failure on %pg, this is the last device.\n"
"md/raid10:%s: Cannot continue operation (%d/%d failed).\n",
mdname(mddev), rdev->bdev,
mdname(mddev), mddev->degraded + 1, conf->geo.raid_disks);

if (!mddev->fail_last_dev) {
spin_unlock_irqrestore(&conf->device_lock, flags);
Expand All @@ -2021,11 +2038,12 @@
set_bit(Faulty, &rdev->flags);
set_mask_bits(&mddev->sb_flags, 0,
BIT(MD_SB_CHANGE_DEVS) | BIT(MD_SB_CHANGE_PENDING));
if (enough(conf, -1))
pr_crit("md/raid10:%s: Disk failure on %pg, disabling device.\n"
"md/raid10:%s: Operation continuing on %d devices.\n",
mdname(mddev), rdev->bdev,
mdname(mddev), conf->geo.raid_disks - mddev->degraded);
spin_unlock_irqrestore(&conf->device_lock, flags);
pr_crit("md/raid10:%s: Disk failure on %pg, disabling device.\n"
"md/raid10:%s: Operation continuing on %d devices.\n",
mdname(mddev), rdev->bdev,
mdname(mddev), conf->geo.raid_disks - mddev->degraded);
}

static void print_conf(struct r10conf *conf)
Expand Down Expand Up @@ -2413,6 +2431,7 @@
continue;
} else if (test_bit(FailFast, &rdev->flags)) {
/* Just give up on this device */
set_bit(FailfastIOFailure, &rdev->flags);
md_error(rdev->mddev, rdev);
continue;
}
Expand Down Expand Up @@ -2868,8 +2887,10 @@
freeze_array(conf, 1);
fix_read_error(conf, mddev, r10_bio);
unfreeze_array(conf);
} else
} else {
set_bit(FailfastIOFailure, &rdev->flags);
md_error(mddev, rdev);
}

rdev_dec_pending(rdev, mddev);
r10_bio->state = 0;
Expand Down