Discussion:
[systemd-devel] [PATCH 1/4] Adding halt binary to shutdown the system
Gustavo Sverzut Barbieri
2010-10-01 05:28:37 UTC
Permalink
From: Fabiano Fidencio <***@profusion.mobi>

This functions are working as follows:
- Send a SIGTERM to all process
- Send a SIGKILL to all process
- Try to umount all mount points
- Try to remount read-only all mount points that can't
be umounted
- Call shutdown

If one step fail, shutdown will be aborted

also add .gitignore file.
---
.gitignore | 1 +
Makefile.am | 14 +++++++++-
src/halt.c | 79 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
3 files changed, 93 insertions(+), 1 deletions(-)
create mode 100644 src/halt.c

diff --git a/.gitignore b/.gitignore
index 9ba0758..e6788e9 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1,3 +1,4 @@
+systemd-halt
systemd-tmpfiles
systemd-readahead-collect
systemd-readahead-replay
diff --git a/Makefile.am b/Makefile.am
index 06b2cca..4d8e181 100644
--- a/Makefile.am
+++ b/Makefile.am
@@ -89,6 +89,7 @@ rootlibexec_PROGRAMS = \
systemd-update-utmp \
systemd-random-seed \
systemd-shutdownd \
+ systemd-halt \
systemd-modules-load \
systemd-remount-api-vfs \
systemd-kmsg-syslogd \
@@ -349,7 +350,7 @@ libsystemd_core_la_SOURCES = \
src/service.c \
src/automount.c \
src/mount.c \
- src/umount.c \
+ src/halt.c \
src/swap.c \
src/device.c \
src/target.c \
@@ -629,6 +630,17 @@ systemd_shutdownd_CFLAGS = \
systemd_shutdownd_LDADD = \
libsystemd-basic.la

+systemd_halt_SOURCES = \
+ src/mount-setup.c \
+ src/umount.c \
+ src/halt.c
+
+systemd_halt_CFLAGS = \
+ $(AM_CFLAGS)
+
+systemd_halt_LDADD = \
+ libsystemd-basic.la
+
systemd_modules_load_SOURCES = \
src/modules-load.c

diff --git a/src/halt.c b/src/halt.c
new file mode 100644
index 0000000..61cb794
--- /dev/null
+++ b/src/halt.c
@@ -0,0 +1,79 @@
+/*-*- Mode: C; c-basic-offset: 8; indent-tabs-mode: nil -*-*/
+
+/***
+ This file is part of systemd.
+
+ Copyright 2010 ProFUSION embedded systems
+
+ systemd is free software; you can redistribute it and/or modify it
+ under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 2 of the License, or
+ (at your option) any later version.
+
+ systemd is distributed in the hope that it will be useful, but
+ WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with systemd; If not, see <http://www.gnu.org/licenses/>.
+***/
+
+#include <sys/mman.h>
+#include <sys/types.h>
+#include <sys/reboot.h>
+#include <linux/reboot.h>
+#include <sys/wait.h>
+#include <signal.h>
+#include <unistd.h>
+#include <errno.h>
+#include "umount.h"
+
+static volatile unsigned int run = 1;
+
+static void sig_alarm(int sig) {
+ run = 0;
+}
+
+int main(int argc, char *argv[]) {
+ int c, cmd;
+
+ if (getpid() != 1)
+ return -1;
+
+ if (argc != 2)
+ return -1;
+
+ switch ((c = getopt(argc, argv, "hrp"))) {
+ case 'h':
+ cmd = LINUX_REBOOT_CMD_HALT;
+ case 'p':
+ cmd = RB_POWER_OFF;
+ break;
+ case 'r':
+ cmd = RB_AUTOBOOT;
+ break;
+ default:
+ return -1;
+ }
+
+ /* lock us into memory */
+ mlockall(MCL_CURRENT|MCL_FUTURE);
+
+ signal(SIGALRM, sig_alarm);
+ kill(-1, SIGTERM);
+ alarm(10);
+ while (run) {
+ pid_t r = wait(NULL);
+ if (r == (pid_t)-1 && errno == ECHILD)
+ run = 0;
+ }
+ alarm(0);
+ kill(-1, SIGKILL);
+
+ if (umount_init() < 0)
+ return -1;
+
+ sync();
+ return reboot(cmd);
+}
--
1.7.2.2
Gustavo Sverzut Barbieri
2010-10-01 05:28:38 UTC
Permalink
---
Makefile.am | 1 +
src/main.c | 39 ++++++++++++++++++++++++++++++++++++++-
src/manager.h | 3 +++
src/target.c | 8 ++++++++
4 files changed, 50 insertions(+), 1 deletions(-)

diff --git a/Makefile.am b/Makefile.am
index 4d8e181..bf9d9ab 100644
--- a/Makefile.am
+++ b/Makefile.am
@@ -50,6 +50,7 @@ AM_CPPFLAGS = \
-DSESSION_DATA_UNIT_PATH=\"$(sessionunitdir)\" \
-DCGROUP_AGENT_PATH=\"$(rootlibexecdir)/systemd-cgroups-agent\" \
-DSYSTEMD_BINARY_PATH=\"$(rootbindir)/systemd\" \
+ -DSYSTEMD_HALT_BINARY_PATH=\"$(rootlibexecdir)/systemd-halt\" \
-DSYSTEMCTL_BINARY_PATH=\"$(rootbindir)/systemctl\" \
-DRUNTIME_DIR=\"$(localstatedir)/run\" \
-DRANDOM_SEED=\"$(localstatedir)/lib/random-seed\" \
diff --git a/src/main.c b/src/main.c
index ee12e1f..e6dc157 100644
--- a/src/main.c
+++ b/src/main.c
@@ -896,6 +896,7 @@ int main(int argc, char *argv[]) {
int r, retval = EXIT_FAILURE;
FDSet *fds = NULL;
bool reexecute = false;
+ ManagerExitCode exit_action = _MANAGER_EXIT_CODE_INVALID;

if (getpid() != 1 && strstr(program_invocation_short_name, "init")) {
/* This is compatbility support for SysV, where
@@ -1127,6 +1128,21 @@ int main(int argc, char *argv[]) {
log_notice("Reexecuting.");
goto finish;

+ case MANAGER_REBOOT:
+ log_info("Reboot.");
+ exit_action = MANAGER_REBOOT;
+ goto finish;
+
+ case MANAGER_POWEROFF:
+ log_info("Poweroff.");
+ exit_action = MANAGER_POWEROFF;
+ goto finish;
+
+ case MANAGER_HALT:
+ log_info("Halt.");
+ exit_action = MANAGER_HALT;
+ goto finish;
+
default:
assert_not_reached("Unknown exit code.");
}
@@ -1206,8 +1222,29 @@ finish:
if (fds)
fdset_free(fds);

- if (getpid() == 1)
+ if (getpid() == 1) {
+ const char *args[3] = {SYSTEMD_HALT_BINARY_PATH, NULL, NULL};
+
+ switch (exit_action) {
+ case MANAGER_REBOOT:
+ args[1] = "-r";
+ break;
+ case MANAGER_POWEROFF:
+ args[1] = "-p";
+ break;
+ case MANAGER_HALT:
+ args[1] = "-h";
+ break;
+ default:
+ log_error("Unknown exit_action code %d", exit_action);
+ }
+
+ if (args[1]) {
+ execv(args[0], (char* const*) args);
+ log_error("failed to exit with action %d", exit_action);
+ }
freeze();
+ }

return retval;
}
diff --git a/src/manager.h b/src/manager.h
index 10ff24c..cb8988d 100644
--- a/src/manager.h
+++ b/src/manager.h
@@ -41,6 +41,9 @@ typedef enum ManagerExitCode {
MANAGER_EXIT,
MANAGER_RELOAD,
MANAGER_REEXECUTE,
+ MANAGER_REBOOT,
+ MANAGER_HALT,
+ MANAGER_POWEROFF,
_MANAGER_EXIT_CODE_MAX,
_MANAGER_EXIT_CODE_INVALID = -1
} ManagerExitCode;
diff --git a/src/target.c b/src/target.c
index 3522bf1..dc3af2e 100644
--- a/src/target.c
+++ b/src/target.c
@@ -172,6 +172,14 @@ static int target_start(Unit *u) {
assert(t->state == TARGET_DEAD);

target_set_state(t, TARGET_ACTIVE);
+
+ if (unit_has_name(u, SPECIAL_REBOOT_TARGET))
+ t->meta.manager->exit_code = MANAGER_REBOOT;
+ else if (unit_has_name(u, SPECIAL_POWEROFF_TARGET))
+ t->meta.manager->exit_code = MANAGER_POWEROFF;
+ else if (unit_has_name(u, SPECIAL_HALT_TARGET))
+ t->meta.manager->exit_code = MANAGER_HALT;
+
return 0;
}
--
1.7.2.2
Lennart Poettering
2010-10-01 13:54:00 UTC
Permalink
Post by Gustavo Sverzut Barbieri
+++ b/src/target.c
@@ -172,6 +172,14 @@ static int target_start(Unit *u) {
assert(t->state == TARGET_DEAD);
target_set_state(t, TARGET_ACTIVE);
+
+ if (unit_has_name(u, SPECIAL_REBOOT_TARGET))
+ t->meta.manager->exit_code = MANAGER_REBOOT;
+ else if (unit_has_name(u, SPECIAL_POWEROFF_TARGET))
+ t->meta.manager->exit_code = MANAGER_POWEROFF;
+ else if (unit_has_name(u, SPECIAL_HALT_TARGET))
+ t->meta.manager->exit_code = MANAGER_HALT;
+
return 0;
}
Please don't do this. Instead just place a normal service in these
targets that uses "systemctl" or "kill" to ask PID 1 to do the right
operation here...

Lennart
--
Lennart Poettering - Red Hat, Inc.
Gustavo Sverzut Barbieri
2010-10-01 14:44:18 UTC
Permalink
On Fri, Oct 1, 2010 at 10:54 AM, Lennart Poettering
Post by Lennart Poettering
Post by Gustavo Sverzut Barbieri
+++ b/src/target.c
@@ -172,6 +172,14 @@ static int target_start(Unit *u) {
         assert(t->state == TARGET_DEAD);
         target_set_state(t, TARGET_ACTIVE);
+
+        if (unit_has_name(u, SPECIAL_REBOOT_TARGET))
+                t->meta.manager->exit_code = MANAGER_REBOOT;
+        else if (unit_has_name(u, SPECIAL_POWEROFF_TARGET))
+                t->meta.manager->exit_code = MANAGER_POWEROFF;
+        else if (unit_has_name(u, SPECIAL_HALT_TARGET))
+                t->meta.manager->exit_code = MANAGER_HALT;
+
         return 0;
 }
Please don't do this. Instead just place a normal service in these
targets that uses "systemctl" or "kill" to ask PID 1 to do the right
operation here...
yes, this is hackish, but your solution does need some replication as
I need to have it specifically for dbus, then signals (which to use?),
then socket then any other command interface we choose. At least this
is ugly but at a central place that always work.

another option is to have one more entry in target units that specify
the manager exit_code. Then the unit parser converts it to
ManagerExitCode and if != _MANAGER_EXIT_CODE_INVALID then it applies
to t->meta.manager->exit_code. It would be more generic, but really,
we already named the others "special targets". If you want I can make
this conversion to ManagerExitCode at parse time.
--
Gustavo Sverzut Barbieri
http://profusion.mobi embedded systems
--------------------------------------
MSN: ***@gmail.com
Skype: gsbarbieri
Mobile: +55 (19) 9225-2202
Lennart Poettering
2010-10-01 16:33:58 UTC
Permalink
Post by Gustavo Sverzut Barbieri
On Fri, Oct 1, 2010 at 10:54 AM, Lennart Poettering
Post by Lennart Poettering
Post by Gustavo Sverzut Barbieri
+++ b/src/target.c
@@ -172,6 +172,14 @@ static int target_start(Unit *u) {
         assert(t->state == TARGET_DEAD);
         target_set_state(t, TARGET_ACTIVE);
+
+        if (unit_has_name(u, SPECIAL_REBOOT_TARGET))
+                t->meta.manager->exit_code = MANAGER_REBOOT;
+        else if (unit_has_name(u, SPECIAL_POWEROFF_TARGET))
+                t->meta.manager->exit_code = MANAGER_POWEROFF;
+        else if (unit_has_name(u, SPECIAL_HALT_TARGET))
+                t->meta.manager->exit_code = MANAGER_HALT;
+
         return 0;
 }
Please don't do this. Instead just place a normal service in these
targets that uses "systemctl" or "kill" to ask PID 1 to do the right
operation here...
yes, this is hackish, but your solution does need some replication as
I need to have it specifically for dbus, then signals (which to use?),
then socket then any other command interface we choose. At least this
is ugly but at a central place that always work.
This isn't necessary. Just add this to systemctl, which already does the
D-Bus fallback properly, and implements this logic similar to "systemctl
daemon-reexec" and "systemctl daemon-exit" are already implemented. In
fact, the handling for exit.service (which is useful only for sessions)
should be almost identical to what you need to do for
reboot/poweroff/halt here.
Post by Gustavo Sverzut Barbieri
another option is to have one more entry in target units that specify
the manager exit_code. Then the unit parser converts it to
ManagerExitCode and if != _MANAGER_EXIT_CODE_INVALID then it applies
to t->meta.manager->exit_code. It would be more generic, but really,
we already named the others "special targets". If you want I can make
this conversion to ManagerExitCode at parse time.
I am not a fan of hardcoding numeric codes everywhere. Also, I strongly
in favour of using similar codepaths for session exit, daemon reexec,
poweroff, halt, reboot and kexec.

Lennart
--
Lennart Poettering - Red Hat, Inc.
Gustavo Sverzut Barbieri
2010-10-01 16:45:50 UTC
Permalink
On Fri, Oct 1, 2010 at 1:33 PM, Lennart Poettering
Post by Lennart Poettering
Post by Gustavo Sverzut Barbieri
On Fri, Oct 1, 2010 at 10:54 AM, Lennart Poettering
Post by Lennart Poettering
Post by Gustavo Sverzut Barbieri
+++ b/src/target.c
@@ -172,6 +172,14 @@ static int target_start(Unit *u) {
         assert(t->state == TARGET_DEAD);
         target_set_state(t, TARGET_ACTIVE);
+
+        if (unit_has_name(u, SPECIAL_REBOOT_TARGET))
+                t->meta.manager->exit_code = MANAGER_REBOOT;
+        else if (unit_has_name(u, SPECIAL_POWEROFF_TARGET))
+                t->meta.manager->exit_code = MANAGER_POWEROFF;
+        else if (unit_has_name(u, SPECIAL_HALT_TARGET))
+                t->meta.manager->exit_code = MANAGER_HALT;
+
         return 0;
 }
Please don't do this. Instead just place a normal service in these
targets that uses "systemctl" or "kill" to ask PID 1 to do the right
operation here...
yes, this is hackish, but your solution does need some replication as
I need to have it specifically for dbus, then signals (which to use?),
then socket then any other command interface we choose. At least this
is ugly but at a central place that always work.
This isn't necessary. Just add this to systemctl, which already does the
D-Bus fallback properly, and implements this logic similar to "systemctl
daemon-reexec" and "systemctl daemon-exit" are already implemented. In
fact, the handling for exit.service (which is useful only for sessions)
should be almost identical to what you need to do for
reboot/poweroff/halt here.
Post by Gustavo Sverzut Barbieri
another option is to have one more entry in target units that specify
the manager exit_code. Then the unit parser converts it to
ManagerExitCode and if != _MANAGER_EXIT_CODE_INVALID then it applies
to t->meta.manager->exit_code. It would be more generic, but really,
we already named the others "special targets". If you want I can make
this conversion to ManagerExitCode  at parse time.
I am not a fan of hardcoding numeric codes everywhere. Also, I strongly
in favour of using similar codepaths for session exit, daemon reexec,
poweroff, halt, reboot and kexec.
Okay, okay... it's Fidencio that is doing this anyway :-) Until code
is ready I'll keep using this as it is working nicely.
--
Gustavo Sverzut Barbieri
http://profusion.mobi embedded systems
--------------------------------------
MSN: ***@gmail.com
Skype: gsbarbieri
Mobile: +55 (19) 9225-2202
Gustavo Sverzut Barbieri
2010-10-01 05:28:39 UTC
Permalink
---
units/halt.target | 2 --
units/hwclock-save.service | 2 +-
units/poweroff.target | 2 --
units/reboot.target | 2 --
units/systemd-random-seed-save.service.in | 2 +-
units/systemd-update-utmp-runlevel.service.in | 2 +-
units/systemd-update-utmp-shutdown.service.in | 2 +-
units/tmpwatch.service | 2 +-
8 files changed, 5 insertions(+), 11 deletions(-)

diff --git a/units/halt.target b/units/halt.target
index 04b42cd..31b8bae 100644
--- a/units/halt.target
+++ b/units/halt.target
@@ -10,8 +10,6 @@
[Unit]
Description=Halt
DefaultDependencies=no
-Requires=halt.service
-After=halt.service
AllowIsolate=yes

[Install]
diff --git a/units/hwclock-save.service b/units/hwclock-save.service
index 067196f..567348c 100644
--- a/units/hwclock-save.service
+++ b/units/hwclock-save.service
@@ -8,7 +8,7 @@
[Unit]
Description=Update RTC With System Clock
DefaultDependencies=no
-Before=poweroff.service reboot.service halt.service killall.service
+Before=poweroff.target reboot.target halt.target

[Service]
Type=oneshot
diff --git a/units/poweroff.target b/units/poweroff.target
index 975b088..b0c8101 100644
--- a/units/poweroff.target
+++ b/units/poweroff.target
@@ -11,8 +11,6 @@
Description=Power-Off
DefaultDependencies=no
Names=runlevel0.target
-Requires=poweroff.service
-After=poweroff.service
AllowIsolate=yes

[Install]
diff --git a/units/reboot.target b/units/reboot.target
index 2cd46a0..813768a 100644
--- a/units/reboot.target
+++ b/units/reboot.target
@@ -11,8 +11,6 @@
Description=Reboot
DefaultDependencies=no
Names=runlevel6.target
-Requires=reboot.service
-After=reboot.service
AllowIsolate=yes

[Install]
diff --git a/units/systemd-random-seed-save.service.in b/units/systemd-random-seed-save.service.in
index 7cfd2c0..c44d814 100644
--- a/units/systemd-random-seed-save.service.in
+++ b/units/systemd-random-seed-save.service.in
@@ -8,7 +8,7 @@
[Unit]
Description=Save Random Seed
DefaultDependencies=no
-Before=poweroff.service reboot.service halt.service killall.service
+Before=poweroff.target reboot.target halt.target
Conflicts=systemd-random-seed-load.service

[Service]
diff --git a/units/systemd-update-utmp-runlevel.service.in b/units/systemd-update-utmp-runlevel.service.in
index 0cbde37..6fee3ee 100644
--- a/units/systemd-update-utmp-runlevel.service.in
+++ b/units/systemd-update-utmp-runlevel.service.in
@@ -10,7 +10,7 @@ Description=Notify Audit System and Update UTMP about System Runlevel Changes
DefaultDependencies=no
Wants=local-fs.target sysinit.target
After=local-fs.target sysinit.target auditd.service runlevel1.target runlevel2.target runlevel3.target runlevel4.target runlevel5.target
-Before=poweroff.service reboot.service halt.service killall.service
+Before=poweroff.target reboot.target halt.target

[Service]
Type=oneshot
diff --git a/units/systemd-update-utmp-shutdown.service.in b/units/systemd-update-utmp-shutdown.service.in
index 0b4fe92..350367d 100644
--- a/units/systemd-update-utmp-shutdown.service.in
+++ b/units/systemd-update-utmp-shutdown.service.in
@@ -10,7 +10,7 @@ Description=Notify Audit System and Update UTMP about System Shutdown
DefaultDependencies=no
Wants=local-fs.target sysinit.target
After=local-fs.target sysinit.target auditd.service systemd-update-utmp-runlevel.service
-Before=poweroff.service reboot.service halt.service killall.service
+Before=poweroff.target reboot.target halt.target

[Service]
Type=oneshot
diff --git a/units/tmpwatch.service b/units/tmpwatch.service
index 5cc0b6b..e0d8198 100644
--- a/units/tmpwatch.service
+++ b/units/tmpwatch.service
@@ -10,7 +10,7 @@ Description=Cleanup of Temporary Directories
DefaultDependencies=no
Wants=local-fs.target
After=systemd-readahead-collect.service systemd-readahead-replay.service local-fs.target
-Before=poweroff.service reboot.service halt.service killall.service
+Before=poweroff.target reboot.target halt.target

[Service]
Type=oneshot
--
1.7.2.2
Gustavo Sverzut Barbieri
2010-10-01 05:28:40 UTC
Permalink
---
Makefile.am | 4 ----
1 files changed, 0 insertions(+), 4 deletions(-)

diff --git a/Makefile.am b/Makefile.am
index bf9d9ab..04e4ad9 100644
--- a/Makefile.am
+++ b/Makefile.am
@@ -291,10 +291,6 @@ endif

if TARGET_GENTOO
dist_systemunit_DATA += \
- units/gentoo/halt.service \
- units/gentoo/killall.service \
- units/gentoo/poweroff.service \
- units/gentoo/reboot.service \
units/gentoo/xdm.service
endif
--
1.7.2.2
Gustavo Sverzut Barbieri
2010-10-01 05:33:58 UTC
Permalink
On Fri, Oct 1, 2010 at 2:28 AM, Gustavo Sverzut Barbieri
   - Send a SIGTERM to all process
   - Send a SIGKILL to all process
   - Try to umount all mount points
   - Try to remount read-only all mount points that can't
   be umounted
   - Call shutdown
   If one step fail, shutdown will be aborted
yay! with this and the other 3 patches I'm now able to shutdown and
unmount using only systemd!

Fabiano's patch did not include halt support and had a bug with
getppid() instead of getpid()... that was forcing early quit.

Please send comments, I just removed gentoo specific services, but we
should remove them all.

BR,
--
Gustavo Sverzut Barbieri
http://profusion.mobi embedded systems
--------------------------------------
MSN: ***@gmail.com
Skype: gsbarbieri
Mobile: +55 (19) 9225-2202
Lennart Poettering
2010-10-01 13:52:25 UTC
Permalink
Post by Gustavo Sverzut Barbieri
- Send a SIGTERM to all process
- Send a SIGKILL to all process
- Try to umount all mount points
- Try to remount read-only all mount points that can't
be umounted
- Call shutdown
If one step fail, shutdown will be aborted
This shouldn't fail, ever. We need to make sure that we don't ever hang in
the middle of nowhere.
Post by Gustavo Sverzut Barbieri
+ systemd-halt \
Could we name this "systemd-shutdown" please? I kinda see "shutdown" as
a higher level term for all of "halt", "reboot", "poweroff" and "kexec".
Post by Gustavo Sverzut Barbieri
systemd-modules-load \
systemd-remount-api-vfs \
systemd-kmsg-syslogd \
@@ -349,7 +350,7 @@ libsystemd_core_la_SOURCES = \
src/service.c \
src/automount.c \
src/mount.c \
- src/umount.c \
+ src/halt.c \
Hmm, this doesn't belong in libsystemd_core, and neither does umount.c.
Post by Gustavo Sverzut Barbieri
+static void sig_alarm(int sig) {
+ run = 0;
+}
Grmbl. Using signals like this is racy, since they might get delivered
between the check for 'run' in the loop below, and the next call to
wait(). The effect of that is the timout might get eaten up and the
system freezes.

Or in other words: never ever use SIGLARM or alarm().
Post by Gustavo Sverzut Barbieri
+int main(int argc, char *argv[]) {
+ int c, cmd;
+
+ if (getpid() != 1)
+ return -1;
Returning -1 in main() is not really defined.
Post by Gustavo Sverzut Barbieri
+
+ if (argc != 2)
+ return -1;
+
+ switch ((c = getopt(argc, argv, "hrp"))) {
+ cmd = LINUX_REBOOT_CMD_HALT;
There's a break missing here.
Post by Gustavo Sverzut Barbieri
+ cmd = RB_POWER_OFF;
+ break;
+ cmd = RB_AUTOBOOT;
+ break;
+ return -1;
+ }
I'd kinda prefer a proper verb on the command line here. I.e. invoking
"systemd-shutdown halt" should halt the system, "systemd-shutdown
reboot" should reboot it, and so on.
Post by Gustavo Sverzut Barbieri
+
+ /* lock us into memory */
+ mlockall(MCL_CURRENT|MCL_FUTURE);
+
+ signal(SIGALRM, sig_alarm);
+ kill(-1, SIGTERM);
Unfortunately this will probably not be sufficient. Some PIDs probably
must be spared (for example: mdmon and stuff). While I am not entirely
sure how we best should allow apps to mark themselves to be spared here
(my ideal solution would be to patch the kernel to allow user xattrs on
/proc/$PID...), I think we must iterate through the processes and kill
things individually, potentially omitting a few while doing that.

If you do this then it is probably wise to make sure that the SIGTERM is
delivered for all processes at the same time and not one after the
other, hence it is recommended to use kill(-1, SIGSTOP) before and
kill(-1, SIGCONT) afterwards. This is btw what the sysv implementation
of killall5 does.
Post by Gustavo Sverzut Barbieri
+ alarm(10);
Do not use alram.
Post by Gustavo Sverzut Barbieri
+ while (run) {
+ pid_t r = wait(NULL);
+ if (r == (pid_t)-1 && errno == ECHILD)
+ run = 0;
+ }
While I like the idea of using wait() like this it won't work anymore if
we need to spare some processes.

You can use sigtimedwait() + waitpid(WNOHANG) to wait for SIGCHLD and
apply a timeout.

Make sure that if you iterate through the processes you count the
processes you killed so that we know how many processes to wait for.
Post by Gustavo Sverzut Barbieri
+ alarm(0);
+ kill(-1, SIGKILL);
A similar loop is needed here.
Post by Gustavo Sverzut Barbieri
+
+ if (umount_init() < 0)
+ return -1;
+
+ sync();
This is redundant, since the kernel does that anyway when you call
reboot(). We probably should remove all invocations of sync()
everywhere.
Post by Gustavo Sverzut Barbieri
+ return reboot(cmd);
+}
Here are a couple of things I'd still like to see added, but which are
not necessary to get the basic version merged:

- use the devicemapper libraries to remove all dm disks after each
unmounting run that can be removed (in particular useful for crypto
disks). Also remove all loopback devices (i.e. /dev/loop).

- we need code that disables all swaps, similar to the code that umounts
everything. This should probably be integrated into the umount loop, to
properly deal with swap files.

- As a minor optimization we might want an umount run for all
non-API tmp disks before disabling all swaps. Why? Because when removing
the swaps everything that is swapped out on it will be swapped back into
memory. If this is tmpfs data that is not necessary, hence let's unmount
the tmpfs stuff first and then remove the swaps. (the fedora reboot
script does this). For the API tmpfs file systems (i.e. /sys/fs/cgroup
and /dev this should not be necessary since it does not store any big
files).

- having support for kexec would be cool. As last step simply invoke
/sbin/kexec -e -x -f, and if that fails fall back to a normal reboot.

The umount/unswap loop should probably look like this:

do {
p = umount_all();
q = unswap_all();
r = undm_all();
s = unloop_all();
} while (p > 0 || q > 0 || r > 0 || s > 0);

where the various xxx_all() functions return the number of
unmounts/unspaws/undms/unloops they executed.

Lennart
--
Lennart Poettering - Red Hat, Inc.
Gustavo Sverzut Barbieri
2010-10-01 14:37:25 UTC
Permalink
I'm replying to this as couple of the bad ideas were my fault :-)


On Fri, Oct 1, 2010 at 10:52 AM, Lennart Poettering
Post by Lennart Poettering
    - Send a SIGTERM to all process
    - Send a SIGKILL to all process
    - Try to umount all mount points
    - Try to remount read-only all mount points that can't
    be umounted
    - Call shutdown
    If one step fail, shutdown will be aborted
This shouldn't fail, ever. We need to make sure that we don't ever hang in
the middle of nowhere.
yeah, actually I've noticed that getppid() was failing (of course) and
I was getting a kernel oops "trying to kill init" that I've spent
hours to debug as I thought the behavior of kill(-1, X) was not like
the documented. at the end it was just returning and thus it was not
defined. But we need to figure out what to do in case of errors.
Post by Lennart Poettering
+     systemd-halt \
Could we name this "systemd-shutdown" please? I kinda see "shutdown" as
a higher level term for all of "halt", "reboot", "poweroff" and "kexec".
this was not my idea, but I guess Fabiano did it due sysvinit main
tool and others being links.
Post by Lennart Poettering
      systemd-modules-load \
      systemd-remount-api-vfs \
      systemd-kmsg-syslogd \
@@ -349,7 +350,7 @@ libsystemd_core_la_SOURCES = \
      src/service.c \
      src/automount.c \
      src/mount.c \
-     src/umount.c \
+     src/halt.c \
Hmm, this doesn't belong in libsystemd_core, and neither does umount.c.
+static void sig_alarm(int sig) {
+        run = 0;
+}
Grmbl. Using signals like this is racy, since they might get delivered
between the check for 'run' in the loop below, and the next call to
wait(). The effect of that is the timout might get eaten up and the
system freezes.
Or in other words: never ever use SIGLARM or alarm().
well, my idea as well, but I like your race-free more ;-)
Post by Lennart Poettering
+int main(int argc, char *argv[]) {
+        int c, cmd;
+
+        if (getpid() != 1)
+                return -1;
Returning -1 in main() is not really defined.
+
+        if (argc != 2)
+                return -1;
+
+        switch ((c = getopt(argc, argv, "hrp"))) {
+                        cmd = LINUX_REBOOT_CMD_HALT;
There's a break missing here.
my stupid fault, i added but not tested it.
Post by Lennart Poettering
+                        cmd = RB_POWER_OFF;
+                        break;
+                        cmd = RB_AUTOBOOT;
+                        break;
+                        return -1;
+        }
I'd kinda prefer a proper verb on the command line here. I.e. invoking
"systemd-shutdown halt" should halt the system, "systemd-shutdown
reboot" should reboot it, and so on.
makes sense, and as we're just taking few pre-determined options we
can easily strcmp instead of going getopt().
Post by Lennart Poettering
+
+        /* lock us into memory */
+        mlockall(MCL_CURRENT|MCL_FUTURE);
+
+        signal(SIGALRM, sig_alarm);
+        kill(-1, SIGTERM);
Unfortunately this will probably not be sufficient. Some PIDs probably
must be spared (for example: mdmon and stuff). While I am not entirely
sure how we best should allow apps to mark themselves to be spared here
(my ideal solution would be to patch the kernel to allow user xattrs on
/proc/$PID...), I think we must iterate through the processes and kill
things individually, potentially omitting a few while doing that.
If you do this then it is probably wise to make sure that the SIGTERM is
delivered for all processes at the same time and not one after the
other, hence it is recommended to use kill(-1, SIGSTOP) before and
kill(-1, SIGCONT) afterwards. This is btw what the sysv implementation
of killall5 does.
we saw the STOP->sig->CONT, but I asked him to remove as we could do
it better being pid1. His first implementation was very much like
sysv.

can you confirm we need this for mdmon and all? If they just ignore
SIGTERM, all we have to do is try to umount devicemapper before
SIGKILL.
Post by Lennart Poettering
+        alarm(10);
Do not use alram.
+        while (run) {
+                pid_t r = wait(NULL);
+                if (r == (pid_t)-1 && errno == ECHILD)
+                        run = 0;
+        }
While I like the idea of using wait() like this it won't work anymore if
we need to spare some processes.
You can use sigtimedwait() + waitpid(WNOHANG) to wait for SIGCHLD and
apply a timeout.
Make sure that if you iterate through the processes you count the
processes you killed so that we know how many processes to wait for.
+        alarm(0);
+        kill(-1, SIGKILL);
A similar loop is needed here.
why? things will just die... but yeah, the loop should exit quickly.
Post by Lennart Poettering
+
+        if (umount_init() < 0)
+                return -1;
+
+        sync();
This is redundant, since the kernel does that anyway when you call
reboot(). We probably should remove all invocations of sync()
everywhere.
I've added it during debug, I read the man page that reboot() did not
ensure sync, and in the process I've lost some of my data... but at
the end it seems that the reason was something else and this ended
dangling... but it does not hurt either ;-)
Post by Lennart Poettering
+        return reboot(cmd);
+}
Here are a couple of things I'd still like to see added, but which are
- use the devicemapper libraries to remove all dm disks after each
unmounting run that can be removed (in particular useful for crypto
disks). Also remove all loopback devices (i.e. /dev/loop).
shouldn't this be done BEFORE going to this step? Like in the regular
systemd units? When we run this binary we should pretty much done,
we're just giving things a last chance, but in a fully systemd
compliant system we should have no more left processes when their
units are stopped.

if you worry about an unity not being unmounted due processes keeping
it busy, maybe we can also introduce a non-pid1 killall that SIGTERM
all processes and that should help dm.

and do we really need to remove the devices themselves? isn't
unmounting their filesystems enough? In my not-so-experienced point of
view the data is already on media and removing the loop/crypto/dm
devices will just free system memory, but we're exiting linux anyway
(it's pretty much like calling free() before ending a process).
Post by Lennart Poettering
- we need code that disables all swaps, similar to the code that umounts
everything. This should probably be integrated into the umount loop, to
properly deal with swap files.
- As a minor optimization we might want an umount run for all
non-API tmp disks before disabling all swaps. Why? Because when removing
the swaps everything that is swapped out on it will be swapped back into
memory. If this is tmpfs data that is not necessary, hence let's unmount
the tmpfs stuff first and then remove the swaps. (the fedora reboot
script does this). For the API tmpfs file systems (i.e. /sys/fs/cgroup
and /dev this should not be necessary since it does not store any big
files).
so we just do a pre-loop to umount tmpfs, then swap, then the regular one?
Post by Lennart Poettering
- having support for kexec would be cool. As last step simply invoke
/sbin/kexec -e -x -f, and if that fails fall back to a normal reboot.
do we really have to exec /sbin/kexec or is it possible to do some syscall?
Post by Lennart Poettering
do {
       p = umount_all();
       q = unswap_all();
       r = undm_all();
       s = unloop_all();
} while (p > 0 || q > 0 || r > 0 || s > 0);
where the various xxx_all() functions return the number of
unmounts/unspaws/undms/unloops they executed.
seems simple.
--
Gustavo Sverzut Barbieri
http://profusion.mobi embedded systems
--------------------------------------
MSN: ***@gmail.com
Skype: gsbarbieri
Mobile: +55 (19) 9225-2202
Lennart Poettering
2010-10-01 17:43:32 UTC
Permalink
Post by Gustavo Sverzut Barbieri
Post by Lennart Poettering
If you do this then it is probably wise to make sure that the SIGTERM is
delivered for all processes at the same time and not one after the
other, hence it is recommended to use kill(-1, SIGSTOP) before and
kill(-1, SIGCONT) afterwards. This is btw what the sysv implementation
of killall5 does.
we saw the STOP->sig->CONT, but I asked him to remove as we could do
it better being pid1. His first implementation was very much like
sysv.
can you confirm we need this for mdmon and all? If they just ignore
SIGTERM, all we have to do is try to umount devicemapper before
SIGKILL.
Well, the fact that mdmon needs that sucks, and I can only say that I
find this a major design flaw of MD. Kay and I have tried to track down
what this is necessary for and it seems it this is support for the
on-disk format of some hw raid adapters.

But be that is it may we probably do have to support this in some
way. How exactly the matching will work in the end we can leave open,
but we do need the loop unfortunately.

(The whole mdmon idea is just broken, because if it really wants to stay
around it must be ebale to reexec itself to release mmaped files on
disk. But AFAICS this isn't the case)
Post by Gustavo Sverzut Barbieri
Post by Lennart Poettering
A similar loop is needed here.
why? things will just die... but yeah, the loop should exit quickly.
AFAIK SIGKILL isn't any different from other signals and hence is
asynchronous, and before you go on and unmount the file systems you must
make sure all processes are really gone so that they don't keep the file
system busy.
Post by Gustavo Sverzut Barbieri
Post by Lennart Poettering
+
+        if (umount_init() < 0)
+                return -1;
+
+        sync();
This is redundant, since the kernel does that anyway when you call
reboot(). We probably should remove all invocations of sync()
everywhere.
I've added it during debug, I read the man page that reboot() did not
ensure sync, and in the process I've lost some of my data... but at
the end it seems that the reason was something else and this ended
dangling... but it does not hurt either ;-)
Interesting. I am pretty sure the man page isn't correct here
though. But for now it's probably better to leave the sync in.
Post by Gustavo Sverzut Barbieri
Post by Lennart Poettering
- use the devicemapper libraries to remove all dm disks after each
unmounting run that can be removed (in particular useful for crypto
disks). Also remove all loopback devices (i.e. /dev/loop).
shouldn't this be done BEFORE going to this step? Like in the regular
systemd units? When we run this binary we should pretty much done,
we're just giving things a last chance, but in a fully systemd
compliant system we should have no more left processes when their
units are stopped.
Well, the whole binary is mostly a safety net anyway, so yes, everythng
should ideally happy cleanly before, but that doesn't relieve us from
coding this safety net correctly.
Post by Gustavo Sverzut Barbieri
if you worry about an unity not being unmounted due processes keeping
it busy, maybe we can also introduce a non-pid1 killall that SIGTERM
all processes and that should help dm.
Kay and I have discussed this a bit, and what we came up with is a
scheme like this: there should be a little .service that is active as
long as a user can log in. When it starts up it removes the /etc/nologin
files, and when it shuts down it reccreates it and goes through all
sessions in /sys/fs/cgroups/user/ and kills them. It should be one of
the last services started and the first to be shutdown. It's service
file should probably look something like this:

[Service]
ExecStart=/lib/systemd/systemd-sessions start
ExecStop=/lib/systemd/systemd-sessions stop
RemainAfterExit=yes
Type=oneshot

And then /lib/systemd/systemd-sessions should be a simple binary that
manages /etc/nologin and kills those sessions.

(Oh, and we probably should also managed /var/run/nologin or so, since
/etc/nologin is kinda sucky on r/o root dirs)
Post by Gustavo Sverzut Barbieri
and do we really need to remove the devices themselves? isn't
unmounting their filesystems enough? In my not-so-experienced point of
view the data is already on media and removing the loop/crypto/dm
devices will just free system memory, but we're exiting linux anyway
(it's pretty much like calling free() before ending a process).
Well, consider a file system on a loop device which is created from a
file on some file system. To unmount the latter fs you need to unmount
the former fs AND remove the loop device, otherwise you will get EBUSY
if you try to unmount the latter fs.

Getting rid of the loop device is actually easy. The equivalent of
"losetup -d" is a LOOP_CLR_FD ioctl call on the loop device. Really
trivial to implement.
Post by Gustavo Sverzut Barbieri
Post by Lennart Poettering
- As a minor optimization we might want an umount run for all
non-API tmp disks before disabling all swaps. Why? Because when removing
the swaps everything that is swapped out on it will be swapped back into
memory. If this is tmpfs data that is not necessary, hence let's unmount
the tmpfs stuff first and then remove the swaps. (the fedora reboot
script does this). For the API tmpfs file systems (i.e. /sys/fs/cgroup
and /dev this should not be necessary since it does not store any big
files).
so we just do a pre-loop to umount tmpfs, then swap, then the regular one?
I'd integrate this into a single loop, to deal with weird stuff such as
fs-on-loop-on-tmpfs-on-swap-on-fs-on-loop-....
Post by Gustavo Sverzut Barbieri
Post by Lennart Poettering
- having support for kexec would be cool. As last step simply invoke
/sbin/kexec -e -x -f, and if that fails fall back to a normal reboot.
do we really have to exec /sbin/kexec or is it possible to do some syscall?
Unless you want to reimplement the complex kernel loader logic
/sbin/kexec is the only solution.

Lennart
--
Lennart Poettering - Red Hat, Inc.
Michael Biebl
2010-10-01 14:36:40 UTC
Permalink
   - Send a SIGTERM to all process
   - Send a SIGKILL to all process
   - Try to umount all mount points
   - Try to remount read-only all mount points that can't
   be umounted
What about remote mounts (e.g. NFS requiring portmap) or fuse mounts?
If you kill their processes before unmounting you can not unmount
those fs cleanly.

Cheers,
Michael
--
Why is it that all of the instruments seeking intelligent life in the
universe are pointed away from Earth?
Gustavo Sverzut Barbieri
2010-10-01 14:39:15 UTC
Permalink
Post by Michael Biebl
   - Send a SIGTERM to all process
   - Send a SIGKILL to all process
   - Try to umount all mount points
   - Try to remount read-only all mount points that can't
   be umounted
What about remote mounts (e.g. NFS requiring portmap) or fuse mounts?
If you kill their processes before unmounting you can not unmount
those fs cleanly.
IMO these should be stopped while at systemd's units. After all, if
units were started and then stopped by systemd, the server processes
are over already.
--
Gustavo Sverzut Barbieri
http://profusion.mobi embedded systems
--------------------------------------
MSN: ***@gmail.com
Skype: gsbarbieri
Mobile: +55 (19) 9225-2202
Michael Biebl
2010-10-01 14:50:14 UTC
Permalink
Post by Gustavo Sverzut Barbieri
Post by Michael Biebl
   - Send a SIGTERM to all process
   - Send a SIGKILL to all process
   - Try to umount all mount points
   - Try to remount read-only all mount points that can't
   be umounted
What about remote mounts (e.g. NFS requiring portmap) or fuse mounts?
If you kill their processes before unmounting you can not unmount
those fs cleanly.
IMO these should be stopped while at systemd's units. After all, if
units were started and then stopped by systemd, the server processes
are over already.
Sorry, I don't understand what you are saying.

Say I have an NFS and/or fuse mount in /etc/fstab.
Which unit file you are talking about should be dealing with
unmounting this nfs mount?
--
Why is it that all of the instruments seeking intelligent life in the
universe are pointed away from Earth?
Fabiano Fidêncio
2010-10-01 15:31:50 UTC
Permalink
Post by Michael Biebl
Post by Gustavo Sverzut Barbieri
Post by Michael Biebl
   - Send a SIGTERM to all process
   - Send a SIGKILL to all process
   - Try to umount all mount points
   - Try to remount read-only all mount points that can't
   be umounted
What about remote mounts (e.g. NFS requiring portmap) or fuse mounts?
If you kill their processes before unmounting you can not unmount
those fs cleanly.
IMO these should be stopped while at systemd's units. After all, if
units were started and then stopped by systemd, the server processes
are over already.
Sorry, I don't understand what you are saying.
Say I have an NFS and/or fuse mount in /etc/fstab.
Which unit file you are talking about should be dealing with
unmounting this nfs mount?
NFS is being umounted. I'm passing MNT_FORCE to umount2 (in umount.c)
About fuse, Which is the problem in try to umount using umount2?
Post by Michael Biebl
--
Why is it that all of the instruments seeking intelligent life in the
universe are pointed away from Earth?
_______________________________________________
systemd-devel mailing list
http://lists.freedesktop.org/mailman/listinfo/systemd-devel
--
Fabiano Fidêncio
ProFUSION embedded systems
http://www.profusion.mobi
Bill Nottingham
2010-10-01 15:42:18 UTC
Permalink
Post by Fabiano Fidêncio
Post by Michael Biebl
Sorry, I don't understand what you are saying.
Say I have an NFS and/or fuse mount in /etc/fstab.
Which unit file you are talking about should be dealing with
unmounting this nfs mount?
NFS is being umounted. I'm passing MNT_FORCE to umount2 (in umount.c)
About fuse, Which is the problem in try to umount using umount2?
It's certianly *nicer* to unmount network mounts before you drop
the network connection (whether via NM, legacy network scripts, or
native systemd units); historically in RH/Fedora we have a SysV script
that does this.

Bill
Michael Biebl
2010-10-01 16:16:49 UTC
Permalink
Post by Fabiano Fidêncio
About fuse, Which is the problem in try to umount using umount2?
I'm not an expert regarding fuse, but say I have a partition mounted
using ntfs-3g.
If I kill the ntfs-3g process, the mount will go away.
During your "kill" stage, the order of processes being killed is
random, I guess.
So there still might be processes accessing that ntfs partition.

It would definitely be nicer, if you kill all running processes
(besides the ntfs-3g process), and then unmount the NTFS partition.
The nfs case is similar.
killall5 (at least in Debian) has an -o flag [1], and e.g. portmap or
ntfs-3g use that mechanism to not be killed by the killall script.

As I already wrote for the LVM/mdadm/cryptsetup case, imo we need a
mechanism how those tools can hook into the shutdown process.
Maybe having a single binary doing all steps in on go does not offer
the necessary flexibility.

Michael


[1] omitpid Tells killall5 to omit processes with that process id.
--
Why is it that all of the instruments seeking intelligent life in the
universe are pointed away from Earth?
Lennart Poettering
2010-10-01 16:50:42 UTC
Permalink
Post by Michael Biebl
Post by Fabiano Fidêncio
About fuse, Which is the problem in try to umount using umount2?
I'm not an expert regarding fuse, but say I have a partition mounted
using ntfs-3g.
If I kill the ntfs-3g process, the mount will go away.
During your "kill" stage, the order of processes being killed is
random, I guess.
So there still might be processes accessing that ntfs partition.
It would definitely be nicer, if you kill all running processes
(besides the ntfs-3g process), and then unmount the NTFS partition.
The nfs case is similar.
killall5 (at least in Debian) has an -o flag [1], and e.g. portmap or
ntfs-3g use that mechanism to not be killed by the killall script.
As I already wrote for the LVM/mdadm/cryptsetup case, imo we need a
mechanism how those tools can hook into the shutdown process.
Maybe having a single binary doing all steps in on go does not offer
the necessary flexibility.
As mentioned, Fabianos code is intended as last resort. The proper order
in which to shut down stuff should be ensured with with the usual
brefore/after dependencies.

Lennart
--
Lennart Poettering - Red Hat, Inc.
Gustavo Sverzut Barbieri
2010-10-01 16:53:35 UTC
Permalink
On Fri, Oct 1, 2010 at 1:50 PM, Lennart Poettering
Post by Lennart Poettering
Post by Michael Biebl
Post by Fabiano Fidêncio
About fuse, Which is the problem in try to umount using umount2?
I'm not an expert regarding fuse, but say I have a partition mounted
using ntfs-3g.
If I kill the ntfs-3g process, the mount will go away.
During your "kill" stage, the order of processes being killed is
random, I guess.
So there still might be processes accessing that ntfs partition.
It would definitely be nicer, if you kill all running processes
(besides the ntfs-3g process), and then unmount the NTFS partition.
The nfs case is similar.
killall5 (at least in Debian) has an -o flag [1], and e.g. portmap or
ntfs-3g use that mechanism to not be killed by the killall script.
As I already wrote for the LVM/mdadm/cryptsetup case, imo we need a
mechanism how those tools can hook into the shutdown process.
Maybe having a single binary doing all steps in on go does not offer
the necessary flexibility.
As mentioned, Fabianos code is intended as last resort. The proper order
in which to shut down stuff should be ensured with with the usual
brefore/after dependencies.
yes!

Just one thing: do we still need a killall.service? Or systemd will
just handle it automatically?
--
Gustavo Sverzut Barbieri
http://profusion.mobi embedded systems
--------------------------------------
MSN: ***@gmail.com
Skype: gsbarbieri
Mobile: +55 (19) 9225-2202
Lennart Poettering
2010-10-01 16:55:22 UTC
Permalink
Post by Gustavo Sverzut Barbieri
Post by Lennart Poettering
As mentioned, Fabianos code is intended as last resort. The proper order
in which to shut down stuff should be ensured with with the usual
brefore/after dependencies.
yes!
Just one thing: do we still need a killall.service? Or systemd will
just handle it automatically?
The fedora killall.service should definitely be unnecessary.

Lennart
--
Lennart Poettering - Red Hat, Inc.
Gustavo Sverzut Barbieri
2010-10-01 17:02:18 UTC
Permalink
On Fri, Oct 1, 2010 at 1:55 PM, Lennart Poettering
Post by Lennart Poettering
Post by Gustavo Sverzut Barbieri
Post by Lennart Poettering
As mentioned, Fabianos code is intended as last resort. The proper order
in which to shut down stuff should be ensured with with the usual
brefore/after dependencies.
yes!
Just one thing: do we still need a killall.service? Or systemd will
just handle it automatically?
The fedora killall.service should definitely be unnecessary.
so as a summary of this thread:

- rename halt.c to shutdown.c and fix Makefile.am linking it to
core as it is not needed.
- replace getopt with strcmp of verbs: poweroff, reboot, halt
- sigtimedwait() + waitpid(WNOHANG) as replacement for alarm()
- remove sync()
- umount swap after others
- exec kexec before trying reboot action.


do we have a consensus over:
- handling of devicemapper?
- umount loop? shouldn't be required...
- waitpid() after SIGKILL
- is SIGSTOP -> SIGXYZ -> SIGCONT required?
--
Gustavo Sverzut Barbieri
http://profusion.mobi embedded systems
--------------------------------------
MSN: ***@gmail.com
Skype: gsbarbieri
Mobile: +55 (19) 9225-2202
Lennart Poettering
2010-10-01 17:47:37 UTC
Permalink
Post by Gustavo Sverzut Barbieri
- handling of devicemapper?
needed.
Post by Gustavo Sverzut Barbieri
- umount loop? shouldn't be required...
needed.
Post by Gustavo Sverzut Barbieri
- waitpid() after SIGKILL
needed.
Post by Gustavo Sverzut Barbieri
- is SIGSTOP -> SIGXYZ -> SIGCONT required?
needed.

Yupp, sucks.

Lennart
--
Lennart Poettering - Red Hat, Inc.
Michael Biebl
2010-10-01 17:07:23 UTC
Permalink
Post by Lennart Poettering
Post by Michael Biebl
Post by Fabiano Fidêncio
About fuse, Which is the problem in try to umount using umount2?
I'm not an expert regarding fuse, but say I have a partition mounted
using ntfs-3g.
If I kill the ntfs-3g process, the mount will go away.
During your "kill" stage, the order of processes being killed is
random, I guess.
So there still might be processes accessing that ntfs partition.
It would definitely be nicer, if you kill all running processes
(besides the ntfs-3g process), and then unmount the NTFS partition.
The nfs case is similar.
killall5 (at least in Debian) has an -o flag [1], and e.g. portmap or
ntfs-3g use that mechanism to not be killed by the killall script.
As I already wrote for the LVM/mdadm/cryptsetup case, imo we need a
mechanism how those tools can hook into the shutdown process.
Maybe having a single binary doing all steps in on go does not offer
the necessary flexibility.
As mentioned, Fabianos code is intended as last resort. The proper order
in which to shut down stuff should be ensured with with the usual
brefore/after dependencies.
If an LVM/NFS/RAID mount is defined in /etc/fstab, and those mount
units are created implicitly, how can one add before/after
dependencies there?

I still fail to see the big picture here and how this is supposed to work.
Could you explain in greater detail how you intend to handle the
aforementioned cases.


Cheers,
Michael
--
Why is it that all of the instruments seeking intelligent life in the
universe are pointed away from Earth?
Gustavo Sverzut Barbieri
2010-10-01 15:35:45 UTC
Permalink
Post by Michael Biebl
Post by Gustavo Sverzut Barbieri
Post by Michael Biebl
   - Send a SIGTERM to all process
   - Send a SIGKILL to all process
   - Try to umount all mount points
   - Try to remount read-only all mount points that can't
   be umounted
What about remote mounts (e.g. NFS requiring portmap) or fuse mounts?
If you kill their processes before unmounting you can not unmount
those fs cleanly.
IMO these should be stopped while at systemd's units. After all, if
units were started and then stopped by systemd, the server processes
are over already.
Sorry, I don't understand what you are saying.
Say I have an NFS and/or fuse mount in /etc/fstab.
Which unit file you are talking about should be dealing with
unmounting this nfs mount?
rpcbind/portmapper are required to operate on nfs, no? if they go down
shouldn't you umount them? I'm not doing NFS since 2001 so I don't
remember how it work from bring-up and shutdown PoV.
--
Gustavo Sverzut Barbieri
http://profusion.mobi embedded systems
--------------------------------------
MSN: ***@gmail.com
Skype: gsbarbieri
Mobile: +55 (19) 9225-2202
Lennart Poettering
2010-10-01 16:43:18 UTC
Permalink
Post by Michael Biebl
Post by Gustavo Sverzut Barbieri
Post by Michael Biebl
   - Send a SIGTERM to all process
   - Send a SIGKILL to all process
   - Try to umount all mount points
   - Try to remount read-only all mount points that can't
   be umounted
What about remote mounts (e.g. NFS requiring portmap) or fuse mounts?
If you kill their processes before unmounting you can not unmount
those fs cleanly.
IMO these should be stopped while at systemd's units. After all, if
units were started and then stopped by systemd, the server processes
are over already.
Sorry, I don't understand what you are saying.
Say I have an NFS and/or fuse mount in /etc/fstab.
Which unit file you are talking about should be dealing with
unmounting this nfs mount?
All mount units have an implicit conflicts dependency on
umount.target. Activating the latter means that all mount points go
away. This is done as part of shutdown.

Lennart
--
Lennart Poettering - Red Hat, Inc.
Michael Biebl
2010-10-01 16:02:56 UTC
Permalink
Post by Michael Biebl
   - Send a SIGTERM to all process
   - Send a SIGKILL to all process
   - Try to umount all mount points
   - Try to remount read-only all mount points that can't
   be umounted
What about remote mounts (e.g. NFS requiring portmap) or fuse mounts?
If you kill their processes before unmounting you can not unmount
those fs cleanly.
Other important use cases:
- LVM
- RAID/mdam
- luks/cryptsetup

How can those tools hook into the shutdown process so they can
correctly tear down their devices *after* running processes have been
stopped?
In Debian we have dedicated sysv init scripts which run after the
killall script and before the shutdown script.
--
Why is it that all of the instruments seeking intelligent life in the
universe are pointed away from Earth?
Lennart Poettering
2010-10-01 16:41:24 UTC
Permalink
Post by Michael Biebl
   - Send a SIGTERM to all process
   - Send a SIGKILL to all process
   - Try to umount all mount points
   - Try to remount read-only all mount points that can't
   be umounted
What about remote mounts (e.g. NFS requiring portmap) or fuse mounts?
If you kill their processes before unmounting you can not unmount
those fs cleanly.
I am pretty sure that both NFS and FUSE mount points can actually be
unmounted properly even if portmap resp. the providing processes are
gone. Since the providing processes will be killed with SIGTERM first,
they should have the chance to get rid of the mount points themselves
safely.

Also note that the code in question is needed only as last resort. At
the point this is executed all mount points and services should already
have been shut down cleanly and in the proper order. This code only
kills and unmounts what is left. Stuff will only be left if processes
actively tried to escape systemd's supervision (by playing games with
/sys/fs/cgroup/systemd) or turned out to be unkillable or suchlike.

In short: I don't think this is a problem and we already have all the
right hooks to get rid of the mount points and services correctly and
cleanly.

Lennart
--
Lennart Poettering - Red Hat, Inc.
Andrey Borzenkov
2010-10-01 17:01:54 UTC
Permalink
Post by Lennart Poettering
In short: I don't think this is a problem and we already have all the
right hooks to get rid of the mount points and services correctly and
cleanly.
Hmmm... On my system home.mount and tmp.mount always fail
on shutdown (I was lucky to notice it before failure message was squashed :p )
This happens most likely due to various daemons started during logon
session; I would guess, ssh-agent and/or gpg-agent. They never terminate
and keep both home and tmp busy. Do I miss something?
Lennart Poettering
2010-10-01 17:48:33 UTC
Permalink
Post by Andrey Borzenkov
Post by Lennart Poettering
In short: I don't think this is a problem and we already have all the
right hooks to get rid of the mount points and services correctly and
cleanly.
Hmmm... On my system home.mount and tmp.mount always fail
on shutdown (I was lucky to notice it before failure message was squashed :p )
This happens most likely due to various daemons started during logon
session; I would guess, ssh-agent and/or gpg-agent. They never terminate
and keep both home and tmp busy. Do I miss something?
As mentioned we still need a little service that is able to shut down
the remaining sessions when the system goes down. This should fix this
problem.

Lennart
--
Lennart Poettering - Red Hat, Inc.
Loading...