Respawn cleanup and new features; usage

Signed-off-by: fbt <fbt@fleshless.org>
This commit is contained in:
Jack L. Frost 2018-03-04 16:55:53 +03:00
parent 92b54483bd
commit 39eb1aff49
1 changed files with 131 additions and 36 deletions

167
ssm
View File

@ -5,6 +5,12 @@ shopt -s nullglob
is_function() [[ $(type -t $1 2>/dev/null) == 'function' ]]
readonly -f is_function
usage() {
cat <<- EOF
Usage: ssm <service> <function>
EOF
}
var() {
declare varname=$1; shift
@ -108,40 +114,84 @@ die() {
exit "$code"
}; readonly -f die
spawn() {
if [[ $service_logfile_out == "$service_logfile_err" ]]; then
exec "$@" >"$service_logfile_out" 2>&1
else
exec "$@" >"$service_logfile_out" 2>"$service_logfile_err"
fi
}
## Run the command and wait for it to die
svc() {
declare job_pid
var job_pid
# Cgroups
if cgroups; then
mkdir -p "$cgroup_home/$service_cgroup_name"
echo "$BASHPID" > "$cgroup_home/$service_cgroup_name/cgroup.procs"
fi
declare job_pid job_exit job_success last_respawn fail_counter date
var job_pid job_exit job_success last_respawn fail_counter date
svc::cleanup() {
nullexec kill -n "$service_stop_signal" "$job_pid"
pid_wait "$job_pid"
rm -f "$svc_pidfile" "$service_ready_flag"
die 0
}; trap 'svc::cleanup' TERM
svc::reload() {
kill -n "$service_reload_signal" "$job_pid"
}; trap 'svc::reload' HUP
printf '%s' $BASHPID > "$svc_pidfile"
"$@" 1>"$service_logfile_out" 2>"$service_logfile_err" & job_pid = "$!"
while true; do
job_success = 1 # Needs to be reset
printf '%s' "$job_pid" > "$svc_pidfile"
wait "$job_pid"
# Spawn the process and record the PID
spawn "$@" & job_pid = "$!"
# Cgroups
if cgroups; then
mkdir -p "$cgroup_home/$service_cgroup_name"
echo "$job_pid" > "$cgroup_home/$service_cgroup_name/cgroup.procs"
fi
# Wait for the process to exit and record the exit code
wait -n; job_exit=$?
if service_success_exit u "$job_exit"; then
job_success = 1
(( fail_counter )) && fail_counter--
else
job_success = 0
fail_counter++
printf '%s\n' "$job_exit" > "$service_failed_flag"
fi
# Back off if the service exits too much AND too quickly.
if (( fail_counter >= 3 )); then
printf -v date '%(%s)T'
(( (date - last_respawn) <= 5 )) && break
fi
# Respawn, if necessary
service_respawn_flag || break
case $service_respawn in
(on-success) job_success || break;;
(on-failure) job_success && break;;
esac
# Remove the failed flag, we're going to attempt a restart.
rm -f "$service_failed_flag"
# Record the time every time we restart the loop
printf -v last_respawn '%(%s)T'
done
svc::cleanup
}; readonly -f svc
## Respawn
respawn() {
declare job_pid
var job_pid
declare job_pid job_exit job_success
var job_pid job_exit
var job_success = 0
respawn::cleanup() {
kill -n "$service_stop_signal" "$job_pid"
@ -167,11 +217,16 @@ respawn() {
}; respawn::set_traps
while true; do
exec "$@" & job_pid = "$!"
exec "$@" & wait -n; job_exit = $?
while nullexec kill -n 0 "$job_pid"; do
wait "$job_pid"
done
if service_success_exit u "$job_exit"; then
job_success = 1
fi
case $service_respawn in
(on-failure) job_success && break;;
(on-success) job_success || break;;
esac
done
}; readonly -f respawn
@ -309,11 +364,7 @@ start() {
rm -f "$service_stopped_flag"
if service_managed; then
if service_respawn; then
svc respawn "${service_command[@]}" &
else
svc "${service_command[@]}" &
fi
svc "${service_command[@]}" &
if timer "$service_ready_timeout" ready; then
printf '1' > "$service_ready_flag"
@ -321,7 +372,7 @@ start() {
return 5
fi
elif service_oneshot; then
"${service_command[@]}" 1>"$service_logfile_out" 2>"$service_logfile_err"; res=$?
spawn "${service_command[@]}"; res=$?
(( res )) && return "$res"
printf '1' > "$service_enabled_flag"
else
@ -331,7 +382,7 @@ start() {
echo "$BASHPID" > "$cgroup_home/$service_cgroup_name/cgroup.procs"
fi
exec "${service_command[@]}" 1>"$service_logfile_out" 2>"$service_logfile_err" &
spawn "${service_command[@]}" &
fi
return 0
@ -398,8 +449,11 @@ info() {
"Exec" "${service_command[*]}" \
"Respawn" "${service_respawn:-false}" \
"Config path" "$service_config" \
"Output log" "$service_logfile_out" \
"Error log" "$service_logfile_err"
"Output log" "$service_logfile_out"
service_logfile_out == "$service_logfile_err" || {
_info_items+=( "Error log" "$service_logfile_err" )
}
if _status == 'yes'; then
_info_items += \
@ -424,13 +478,20 @@ restart() {
}
edit() { $EDITOR "$service_config"; }
logs() { printf '%s\n' "$service_logfile_out" "$service_logfile_err"; }
logs() {
if service_logfile_out == "$service_logfile_err"; then
$PAGER "$service_logfile_out"
else
printf '%s\n' "$service_logfile_out" "$service_logfile_err"
fi
}
## Status is a bit of a special case. It's talkative.
status() {
service_running && return 0
service_enabled && return 0
service_stopped && return 7
service_failed && return 9
return 1
}
@ -457,6 +518,7 @@ var service_pid \
service_ready_flag \
service_enabled_flag \
service_stopped_flag \
service_failed_flag \
service_cgroup_name \
service_cgroup_procs \
service_cgroup_path \
@ -478,7 +540,7 @@ var XDG_RUNTIME_DIR := "/run/user/$UID"
## Let's set some defaults
# These are meaningful to reconfigure.
var service_respawn = 0 # Respawn the service if it exits
var service_respawn = 'no' # Respawn the service if it exits
var service_workdir = '/'
var service_stop_timeout = 30
var service_ready_timeout = 15
@ -487,6 +549,7 @@ var service_reload_signal = 1
var service_stop_signal = 15
var service_cgroup_exclusive = 0 # Refuse to start the service if its cgroup is not empty
var service_cgroup_wait = 0 # Wait on all the members of the cgroup to exit when stopping the service.
var service_success_exit = 0 # Array, takes exit codes that are to be treated as successful termination.
# Global config
var cgroups = 0 # Enable cgroup-related functions
@ -499,9 +562,11 @@ var service_oneshot = 0
var service_running = 0
var service_enabled = 0
var service_stopped = 0
var service_failed = 0
var service_nologs = 0
var service_cgroup_empty = 1
var service_cgroup_empty = 1
var service_respawn_flag = 0
# These depend on who we are
if (( $UID )); then
@ -547,6 +612,18 @@ for (( idx=${#cfg_path[@]}-1; idx>=0; idx-- )); do
done
done
# Parse arguments
while (( $# )); do
case $1 in
(-h|--help) usage; exit 0;;
(--) shift; break;;
(-*) printf 'Unknown key: %s\n' "$1" >&2; exit 1;;
(*) break;;
esac
shift
done
# Now create the needed runtime stuff
for d in "$rundir" "$logdir"; do
mkdir -p "$d" || die 3 "Failed to create runtime dir: $d"
@ -555,6 +632,9 @@ done
# Common service path
service_path += "$XDG_CONFIG_HOME/ssm/services" '/etc/ssm/services' "$rundir/services" "$usrdir/services"
# This script requires at least two aruments
(( $# >= 2 )) || { usage; exit 2; }
# If $1 is a full path, source it.
# If not, search for it in the service path.
if [[ $1 == /* ]]; then
@ -584,16 +664,22 @@ done
# Get the service config
source -- "$service_config" "${@:3}" || die 7 "Failed to read the service config: $service_config"
if ! service_respawn == 'no'; then
case $service_respawn in
(on-failure|on-success|always) service_respawn_flag = 1;;
(*) die 88 "Wrong value for service_respawn";;
esac
fi
# Legacy
service_args && service_command += "${service_args[@]}"
service_respawn == 'true' && service_respawn = 1
service_type == 'oneshot' && service_oneshot = 1
service_oneshot && service_managed = 0
service_pidfile && service_managed = 0
if ! service_managed; then
service_respawn && die 21 "Refusing to respawn a service that manages itself."
service_respawn_flag && die 21 "Refusing to respawn a service that manages itself."
fi
# Semi-hardcoded stuff
@ -601,13 +687,15 @@ svc_pidfile = "$rundir/$service_name.pid"
# Service-level defaults
service_pidfile := "$svc_pidfile"
service_logfile_out := "$logdir/${service_name}.out.log"
service_logfile_err := "$logdir/${service_name}.err.log"
service_logfile_out := "$logdir/${service_name}.log"
service_logfile_err := "$service_logfile_out"
service_ready_flag := "$rundir/$service_name.ready"
service_enabled_flag := "$rundir/$service_name.enabled"
service_stopped_flag := "$rundir/$service_name.stopped"
service_failed_flag := "$rundir/$service_name.failed"
service_cgroup_name := "$service_name"
service_cgroup_path := "$cgroup_home/$service_name"
service_success_exit := 0
# A shortcut for disabling logging
if service_nologs; then
@ -637,6 +725,12 @@ if service_stopped_flag is file; then
service_stopped = 1
fi
# Maybe it has failed?
if service_failed_flag is file; then
# :(
service_failed = 1
fi
# Check cgroups, if enabled
if cgroups; then
if service_cgroup_path is dir; then
@ -709,7 +803,8 @@ case "$2" in
result "$res" \
0 "$service_name is running" \
1 "$service_name is not running" \
7 "$service_name was stopped"
7 "$service_name was stopped" \
9 "$service_name has failed"
fi
;;
esac