diff options
author | Mateusz Kusiak <mateusz.kusiak@intel.com> | 2024-10-10 12:31:06 +0200 |
---|---|---|
committer | Mariusz Tkaczyk <mtkaczyk@kernel.org> | 2025-01-13 10:24:49 +0100 |
commit | 8a0d3fea424c1c19c51993c0849ea76ea41e8003 (patch) | |
tree | d45b48936296e2d1c5eae446937c344023c023af | |
parent | Detail: Export reshape status (diff) | |
download | mdadm-8a0d3fea424c1c19c51993c0849ea76ea41e8003.tar.xz mdadm-8a0d3fea424c1c19c51993c0849ea76ea41e8003.zip |
mdadm: Do not start reshape before switchroot
There are numerous issues for --grow --continue in switchroot phrase,
they include:
* Events being missed for restarting grow-continue service. This is
apparent mostly on OS on RAID scenarios. When a checkpoint (next step)
is committed, we have no reliable way to gracefully stop reshape until
it reaches that checkpoint. During boot, there's heavy I/O utilisation,
which causes sync speed drop, and naturally checkpoint takes longer to
reach. This further causes systemd to forcefully kill grow-continue
service due to timeouts, which results in udev event being missed for
grow-continue service restart.
* Grow-continue (seemingly) was not designed to be restarted without
reassembly, some things like stopping chunksize (to lower) migration
were straight up not working until recently.
This patch makes grow-continue (actual reshape) start after switchroot
phrase. This way we should not encounter issues related to restarting
the service.
Add checks not start a reshape if in initrd, let it initialise only.
Change grow-continue udev rule to be triggered whenever there's a
reshape happening in metadata, rely on udev event to kick reshape after
switchroot. Add handle_forking helper function for reshapes to avoid
duplicating code.
Signed-off-by: Mateusz Kusiak <mateusz.kusiak@intel.com>
-rw-r--r-- | Grow.c | 81 | ||||
-rw-r--r-- | mdadm_status.h | 3 | ||||
-rw-r--r-- | udev-md-raid-arrays.rules | 3 | ||||
-rw-r--r-- | util.c | 1 |
4 files changed, 61 insertions, 27 deletions
@@ -2995,6 +2995,34 @@ static void catch_term(int sig) sigterm = 1; } + +/** + * handle_forking() - Handle reshape forking. + * + * @forked: if already forked. + * @devname: device name. + * Returns: -1 if fork() failed, + * 0 if child process, + * 1 if job delegated to forked process or systemd. + * + * This function is a helper function for reshapes for fork handling. + */ +static mdadm_status_t handle_forking(bool forked, char *devname) +{ + if (forked) + return MDADM_STATUS_FORKED; + + if (devname && continue_via_systemd(devname, GROW_SERVICE, NULL)) + return MDADM_STATUS_SUCCESS; + + switch (fork()) { + case -1: return MDADM_STATUS_ERROR; /* error */ + case 0: return MDADM_STATUS_FORKED; /* child */ + default: return MDADM_STATUS_SUCCESS; /* parent */ + } + +} + static int reshape_array(char *container, int fd, char *devname, struct supertype *st, struct mdinfo *info, int force, struct mddev_dev *devlist, @@ -3485,33 +3513,35 @@ started: if (restart) sysfs_set_str(sra, NULL, "array_state", "active"); - if (!forked) - if (continue_via_systemd(container ?: sra->sys_name, - GROW_SERVICE, NULL)) { - free(fdlist); - free(offsets); - sysfs_free(sra); - return 0; - } + /* Do not run in initrd */ + if (in_initrd()) { + free(fdlist); + free(offsets); + sysfs_free(sra); + pr_info("Reshape has to be continued from location %llu when root filesystem has been mounted.\n", + sra->reshape_progress); + return 1; + } /* Now we just need to kick off the reshape and watch, while * handling backups of the data... * This is all done by a forked background process. */ - switch(forked ? 0 : fork()) { - case -1: + switch (handle_forking(forked, container ? container : sra->sys_name)) { + default: /* Unused, only to satisfy compiler. */ + case MDADM_STATUS_ERROR: /* error */ pr_err("Cannot run child to monitor reshape: %s\n", strerror(errno)); abort_reshape(sra); goto release; - default: + case MDADM_STATUS_FORKED: /* child */ + map_fork(); + break; + case MDADM_STATUS_SUCCESS: /* parent */ free(fdlist); free(offsets); sysfs_free(sra); return 0; - case 0: - map_fork(); - break; } /* Close unused file descriptor in the forked process */ @@ -3680,22 +3710,19 @@ int reshape_container(char *container, char *devname, */ ping_monitor(container); - if (!forked) - if (continue_via_systemd(container, GROW_SERVICE, NULL)) - return 0; - - switch (forked ? 0 : fork()) { - case -1: /* error */ + switch (handle_forking(forked, container)) { + default: /* Unused, only to satisfy compiler. */ + case MDADM_STATUS_ERROR: /* error */ perror("Cannot fork to complete reshape\n"); unfreeze(st); return 1; - default: /* parent */ - printf("%s: multi-array reshape continues in background\n", Name); - return 0; - case 0: /* child */ + case MDADM_STATUS_FORKED: /* child */ manage_fork_fds(0); map_fork(); break; + case MDADM_STATUS_SUCCESS: /* parent */ + printf("%s: multi-array reshape continues in background\n", Name); + return 0; } /* close unused handle in child process @@ -3791,6 +3818,12 @@ int reshape_container(char *container, char *devname, c->backup_file, c->verbose, 1, restart); close(fd); + /* Do not run reshape in initrd but let it initialize.*/ + if (in_initrd()) { + sysfs_free(cc); + exit(0); + } + restart = 0; if (rv) break; diff --git a/mdadm_status.h b/mdadm_status.h index 905105e2..e244127c 100644 --- a/mdadm_status.h +++ b/mdadm_status.h @@ -7,7 +7,8 @@ typedef enum mdadm_status { MDADM_STATUS_SUCCESS = 0, MDADM_STATUS_ERROR, MDADM_STATUS_UNDEF, - MDADM_STATUS_MEM_FAIL + MDADM_STATUS_MEM_FAIL, + MDADM_STATUS_FORKED } mdadm_status_t; #endif diff --git a/udev-md-raid-arrays.rules b/udev-md-raid-arrays.rules index 4e64b249..d8de6d00 100644 --- a/udev-md-raid-arrays.rules +++ b/udev-md-raid-arrays.rules @@ -15,7 +15,6 @@ ENV{DEVTYPE}=="partition", GOTO="md_ignore_state" ATTR{md/metadata_version}=="external:[A-Za-z]*", ATTR{md/array_state}=="inactive", GOTO="md_ignore_state" TEST!="md/array_state", ENV{SYSTEMD_READY}="0", GOTO="md_end" ATTR{md/array_state}=="clear*|inactive", ENV{SYSTEMD_READY}="0", GOTO="md_end" -ATTR{md/sync_action}=="reshape", ENV{RESHAPE_ACTIVE}="yes" LABEL="md_ignore_state" IMPORT{program}="BINDIR/mdadm --detail --no-devices --export $devnode" @@ -40,6 +39,6 @@ ENV{MD_LEVEL}=="raid[1-9]*", ENV{SYSTEMD_WANTS}+="mdmonitor.service" ENV{MD_LEVEL}=="raid[1-9]*", ENV{MD_CONTAINER}=="?*", PROGRAM="/usr/bin/readlink $env{MD_CONTAINER}", ENV{MD_MON_THIS}="%c" ENV{MD_MON_THIS}=="?*", TEST=="/etc/initrd-release", PROGRAM="/usr/bin/basename $env{MD_MON_THIS}", ENV{SYSTEMD_WANTS}+="mdmon@initrd-%c.service" ENV{MD_MON_THIS}=="?*", TEST!="/etc/initrd-release", PROGRAM="/usr/bin/basename $env{MD_MON_THIS}", ENV{SYSTEMD_WANTS}+="mdmon@%c.service" -ENV{RESHAPE_ACTIVE}=="yes", PROGRAM="/usr/bin/basename $env{MD_MON_THIS}", ENV{SYSTEMD_WANTS}+="mdadm-grow-continue@%c.service" +ENV{MD_RESHAPE_ACTIVE}=="True", PROGRAM="/usr/bin/basename $env{MD_MON_THIS}", ENV{SYSTEMD_WANTS}+="mdadm-grow-continue@%c.service" LABEL="md_end" @@ -2307,6 +2307,7 @@ int continue_via_systemd(char *devnm, char *service_name, char *prefix) int pid, status; char pathbuf[1024]; + dprintf("Start %s service\n", service_name); /* Simply return that service cannot be started */ if (check_env("MDADM_NO_SYSTEMCTL")) return 0; |