diff --git a/.github/tools/.checkpatch.conf b/.github/tools/.checkpatch.conf index d6e3bc44..03114847 100644 --- a/.github/tools/.checkpatch.conf +++ b/.github/tools/.checkpatch.conf @@ -8,3 +8,4 @@ --exclude tests --ignore FILE_PATH_CHANGES --ignore EMAIL_SUBJECT +--ignore NEW_TYPEDEFS diff --git a/Incremental.c b/Incremental.c index 5e59b6d1..0167b56a 100644 --- a/Incremental.c +++ b/Incremental.c @@ -1645,22 +1645,6 @@ static int Incremental_container(struct supertype *st, char *devname, return rv; } -static void remove_from_member_array(struct mdstat_ent *memb, - struct mddev_dev *devlist, int verbose) -{ - int subfd = open_dev(memb->devnm); - - if (subfd >= 0) { - /* - * Ignore the return value because it's necessary - * to handle failure condition here. - */ - Manage_subdevs(memb->devnm, subfd, devlist, verbose, - 0, UOPT_UNDEFINED, 0); - close(subfd); - } -} - /** * is_devnode_path() - check if the devname passed might be devnode path. * @devnode: the path to check. @@ -1681,25 +1665,81 @@ static bool is_devnode_path(char *devnode) return false; } +/** + * Incremental_remove_external() - Remove the device from external container. + * @device_devnm: block device to remove. + * @container_devnm: the parent container + * @mdstat: mdstat file content. + * @verbose: verbose flag. + * + * Fail member device in each subarray and remove member device from external container. + * The resposibility of removing member disks from external subararys belongs to mdmon. + */ +static mdadm_status_t Incremental_remove_external(char *device_devnm, char *container_devnm, + struct mdstat_ent *mdstat, int verbose) +{ + mdadm_status_t rv = MDADM_STATUS_SUCCESS; + struct mdstat_ent *memb; + + for (memb = mdstat ; memb ; memb = memb->next) { + mdadm_status_t ret = MDADM_STATUS_SUCCESS; + int state_fd; + + if (!is_container_member(memb, container_devnm)) + continue; + + /* + * Checking mdstat is pointles because it might be outdated, try open descriptor + * instead. If it fails, we are fine with that, device is already gone. + */ + state_fd = sysfs_open_memb_attr(memb->devnm, device_devnm, "state", O_RDWR); + if (!is_fd_valid(state_fd)) + continue; + + ret = sysfs_set_memb_state_fd(state_fd, MEMB_STATE_FAULTY, NULL); + if (ret && verbose >= 0) + pr_err("Cannot fail member device %s in external subarray %s.\n", + device_devnm, memb->devnm); + + close_fd(&state_fd); + + /* + * Don't remove member device from container if it failed to remove it + * from any member array. + */ + rv |= ret; + } + + if (rv == MDADM_STATUS_SUCCESS) + rv = sysfs_set_memb_state(container_devnm, device_devnm, MEMB_STATE_REMOVE); + + if (rv && verbose >= 0) + pr_err("Cannot remove member device %s from container %s.\n", device_devnm, + container_devnm); + + return rv; +} + /** * Incremental_remove() - Remove the device from all raid arrays. * @devname: the device we want to remove, it could be kernel device name or devnode. * @id_path: optional, /dev/disk/by-path path to save for bare scenarios support. * @verbose: verbose flag. * - * First, fail the device (if needed) and then remove the device from native raid array or external - * container. If it is external container, the device is removed from each subarray first. + * First, fail the device (if needed) and then remove the device. This code is critical for system + * funtionality and that is why it is keept as simple as possible. We do not load devices using + * sysfs_read() because any unerelated failure may lead us to abort. We also do not call + * Manage_Subdevs(). */ int Incremental_remove(char *devname, char *id_path, int verbose) { + mdadm_status_t rv = MDADM_STATUS_SUCCESS; char *devnm = basename(devname); - struct mddev_dev devlist = {0}; char buf[SYSFS_MAX_BUF_SIZE]; struct mdstat_ent *mdstat; struct mdstat_ent *ent; struct mdinfo mdi; - int rv = 1; - int mdfd; + int mdfd = -1; if (strcmp(devnm, devname) != 0) if (!is_devnode_path(devname)) { @@ -1728,15 +1768,30 @@ int Incremental_remove(char *devname, char *id_path, int verbose) mdfd = open_dev_excl(ent->devnm); if (is_fd_valid(mdfd)) { + char *array_state_file = "array_state"; + + /** + * This is a workaround for the old issue. + * Incremental_remove() triggered from udev rule when disk is removed from OS + * tries to set array in auto-read-only mode. This can interrupt rebuild + * process which is started automatically, e.g. if array is mounted and + * spare disk is available (I/O errors limit might be achieved faster than disk is + * removed by mdadm). Prevent Incremental_remove() from setting array + * into "auto-read-only", by requiring exclusive open to succeed. + */ close_fd(&mdfd); - if (sysfs_get_str(&mdi, NULL, "array_state", - buf, sizeof(buf)) > 0) { - if (strncmp(buf, "active", 6) == 0 || - strncmp(buf, "clean", 5) == 0) - sysfs_set_str(&mdi, NULL, - "array_state", "read-auto"); + + if (sysfs_get_str(&mdi, NULL, array_state_file, buf, sizeof(buf)) > 0) { + char *str_read_auto = map_num_s(sysfs_array_states, ARRAY_READ_AUTO); + char *str_active = map_num_s(sysfs_array_states, ARRAY_ACTIVE); + char *str_clean = map_num_s(sysfs_array_states, ARRAY_CLEAN); + + if (strncmp(buf, str_active, strlen(str_active)) == 0 || + strncmp(buf, str_clean, strlen(str_clean)) == 0) + sysfs_set_str(&mdi, NULL, array_state_file, str_read_auto); } } + mdfd = open_dev(ent->devnm); if (mdfd < 0) { if (verbose >= 0) @@ -1746,38 +1801,31 @@ int Incremental_remove(char *devname, char *id_path, int verbose) if (id_path) { struct map_ent *map = NULL, *me; + me = map_by_devnm(&map, ent->devnm); if (me) policy_save_path(id_path, me); map_free(map); } - devlist.devname = devnm; - devlist.disposition = 'I'; - /* for a container, we must fail each member array */ if (is_mdstat_ent_external(ent)) { - struct mdstat_ent *memb; - for (memb = mdstat ; memb ; memb = memb->next) { - if (is_container_member(memb, ent->devnm)) - remove_from_member_array(memb, - &devlist, verbose); - } - } else { - /* - * This 'I' incremental remove is a try-best effort, - * the failure condition can be safely ignored - * because of the following up 'r' remove. - */ - Manage_subdevs(ent->devnm, mdfd, &devlist, - verbose, 0, UOPT_UNDEFINED, 0); + rv = Incremental_remove_external(devnm, ent->devnm, mdstat, verbose); + goto out; } - devlist.disposition = 'r'; - rv = Manage_subdevs(ent->devnm, mdfd, &devlist, - verbose, 0, UOPT_UNDEFINED, 0); + /* Native arrays are handled separatelly to provide more detailed error handling */ + rv = sysfs_set_memb_state(ent->devnm, devnm, MEMB_STATE_FAULTY); + if (rv && verbose >= 0) + pr_err("Cannot fail member device %s in array %s.\n", devnm, ent->devnm); + + if (rv == MDADM_STATUS_SUCCESS) + rv = sysfs_set_memb_state(ent->devnm, devnm, MEMB_STATE_REMOVE); + + if (rv && verbose >= 0) + pr_err("Cannot remove member device %s from %s.\n", devnm, ent->devnm); - close_fd(&mdfd); out: + close_fd(&mdfd); free_mdstat(mdstat); return rv; } diff --git a/Manage.c b/Manage.c index b9e55c43..7475622a 100644 --- a/Manage.c +++ b/Manage.c @@ -1381,8 +1381,6 @@ bool is_remove_safe(mdu_array_info_t *array, const int fd, char *devname, const * 'f' - set the device faulty SET_DISK_FAULTY * device can be 'detached' in which case any device that * is inaccessible will be marked faulty. - * 'I' - remove device by using incremental fail - * which is executed when device is removed surprisingly. * 'R' - mark this device as wanting replacement. * 'W' - this device is added if necessary and activated as * a replacement for a previous 'R' device. @@ -1532,9 +1530,9 @@ int Manage_subdevs(char *devname, int fd, /* This is a kernel-internal name like 'sda1' */ - if (!strchr("rfI", dv->disposition)) { - pr_err("%s only meaningful with -r, -f or -I, not -%c\n", - dv->devname, dv->disposition); + if (!strchr("rf", dv->disposition)) { + pr_err("%s only meaningful with -r, -f, not -%c\n", dv->devname, + dv->disposition); goto abort; } @@ -1661,11 +1659,9 @@ int Manage_subdevs(char *devname, int fd, close_fd(&sysfd); goto abort; } - case 'I': - if (is_fd_valid(sysfd)) { - static const char val[] = "faulty"; - rv = sysfs_write_descriptor(sysfd, val, strlen(val), &err); + if (is_fd_valid(sysfd)) { + rv = sysfs_set_memb_state_fd(sysfd, MEMB_STATE_FAULTY, &err); } else { rv = ioctl(fd, SET_DISK_FAULTY, rdev); if (rv) diff --git a/mdadm.h b/mdadm.h index b9945a29..e0c8cc1c 100644 --- a/mdadm.h +++ b/mdadm.h @@ -801,9 +801,29 @@ enum sysfs_read_flags { #define SYSFS_MAX_BUF_SIZE 64 +/** + * Defines md//state possible values. + * Note that remove can't be read-back from the file. + * + * This is not complete list. + */ +typedef enum memb_state { + MEMB_STATE_EXTERNAL_BBL, + MEMB_STATE_BLOCKED, + MEMB_STATE_SPARE, + MEMB_STATE_WRITE_MOSTLY, + MEMB_STATE_IN_SYNC, + MEMB_STATE_FAULTY, + MEMB_STATE_REMOVE, + MEMB_STATE_UNKNOWN +} memb_state_t; +char *map_memb_state(memb_state_t state); + extern mdadm_status_t sysfs_write_descriptor(const int fd, const char *value, const ssize_t len, int *errno_p); extern mdadm_status_t write_attr(const char *value, const int fd); +extern mdadm_status_t sysfs_set_memb_state_fd(int fd, memb_state_t state, int *err); +extern mdadm_status_t sysfs_set_memb_state(char *array_devnm, char *memb_devnm, memb_state_t state); extern void sysfs_get_container_devnm(struct mdinfo *mdi, char *buf); extern int sysfs_open(char *devnm, char *devname, char *attr); diff --git a/monitor.c b/monitor.c index 3c54f8cb..b771707a 100644 --- a/monitor.c +++ b/monitor.c @@ -140,17 +140,17 @@ int read_dev_state(int fd) cp = buf; while (cp) { - if (sysfs_attr_match(cp, "faulty")) + if (sysfs_attr_match(cp, map_memb_state(MEMB_STATE_FAULTY))) rv |= DS_FAULTY; - if (sysfs_attr_match(cp, "in_sync")) + if (sysfs_attr_match(cp, map_memb_state(MEMB_STATE_IN_SYNC))) rv |= DS_INSYNC; - if (sysfs_attr_match(cp, "write_mostly")) + if (sysfs_attr_match(cp, map_memb_state(MEMB_STATE_WRITE_MOSTLY))) rv |= DS_WRITE_MOSTLY; - if (sysfs_attr_match(cp, "spare")) + if (sysfs_attr_match(cp, map_memb_state(MEMB_STATE_SPARE))) rv |= DS_SPARE; - if (sysfs_attr_match(cp, "blocked")) + if (sysfs_attr_match(cp, map_memb_state(MEMB_STATE_BLOCKED))) rv |= DS_BLOCKED; - if (sysfs_attr_match(cp, "external_bbl")) + if (sysfs_attr_match(cp, map_memb_state(MEMB_STATE_EXTERNAL_BBL))) rv |= DS_EXTERNAL_BB; cp = strchr(cp, ','); if (cp) diff --git a/sysfs.c b/sysfs.c index a3bcb432..c030d634 100644 --- a/sysfs.c +++ b/sysfs.c @@ -75,6 +75,23 @@ void sysfs_free(struct mdinfo *sra) sra = sra2; } } + +mapping_t sysfs_memb_states[] = { + {"external_bbl", MEMB_STATE_EXTERNAL_BBL}, + {"blocked", MEMB_STATE_BLOCKED}, + {"spare", MEMB_STATE_SPARE}, + {"write_mostly", MEMB_STATE_WRITE_MOSTLY}, + {"in_sync", MEMB_STATE_IN_SYNC}, + {"faulty", MEMB_STATE_FAULTY}, + {"remove", MEMB_STATE_REMOVE}, + {NULL, MEMB_STATE_UNKNOWN} +}; + +char *map_memb_state(memb_state_t state) +{ + return map_num_s(sysfs_memb_states, state); +} + /** * write_attr() - write value to fd, don't check errno. * @attr: value to write. @@ -117,6 +134,44 @@ mdadm_status_t sysfs_write_descriptor(const int fd, const char *value, const ssi return MDADM_STATUS_SUCCESS; } +/** + * sysfs_set_memb_state_fd() - write to md//state file. + * @fd: open file descriptor to the file. + * @state: enum value describing value to write + * @err: errno value pointer in case of error. + * + * This is helper to avoid inlining values, they are extracted from map now. + */ +mdadm_status_t sysfs_set_memb_state_fd(int fd, memb_state_t state, int *err) +{ + const char *val = map_memb_state(state); + + return sysfs_write_descriptor(fd, val, strlen(val), err); +} + +/** + * sysfs_set_memb_state() - write to member disk state file. + * @array_devnm: kernel name of the array. + * @memb_devnm: kernel name of member device. + * @state: value to write. + * + * Function expects that the device exists, error is unconditionally printed. + */ +mdadm_status_t sysfs_set_memb_state(char *array_devnm, char *memb_devnm, memb_state_t state) +{ + int state_fd = sysfs_open_memb_attr(array_devnm, memb_devnm, "state", O_RDWR); + + if (!is_fd_valid(state_fd)) { + pr_err("Cannot open file descriptor to %s in array %s, aborting.\n", + memb_devnm, array_devnm); + return MDADM_STATUS_ERROR; + } + + return sysfs_set_memb_state_fd(state_fd, state, NULL); + + close_fd(&state_fd); +} + /** * sysfs_get_container_devnm() - extract container device name. * @mdi: md_info describes member array, with GET_VERSION option. diff --git a/util.c b/util.c index a120c985..6aa44a80 100644 --- a/util.c +++ b/util.c @@ -1808,12 +1808,11 @@ int hot_remove_disk(int mdfd, unsigned long dev, int force) int sys_hot_remove_disk(int statefd, int force) { - static const char val[] = "remove"; int cnt = force ? 500 : 5; while (cnt--) { int err = 0; - int ret = sysfs_write_descriptor(statefd, val, strlen(val), &err); + int ret = sysfs_set_memb_state_fd(statefd, MEMB_STATE_REMOVE, &err); if (ret == MDADM_STATUS_SUCCESS) return 0;