From 39eb1aff49b663c0e4be85ed19ce012e88b8c539 Mon Sep 17 00:00:00 2001 From: fbt Date: Sun, 4 Mar 2018 16:55:53 +0300 Subject: [PATCH] Respawn cleanup and new features; usage Signed-off-by: fbt --- ssm | 167 +++++++++++++++++++++++++++++++++++++++++++++++------------- 1 file changed, 131 insertions(+), 36 deletions(-) diff --git a/ssm b/ssm index bc06982..36558dd 100755 --- a/ssm +++ b/ssm @@ -5,6 +5,12 @@ shopt -s nullglob is_function() [[ $(type -t $1 2>/dev/null) == 'function' ]] readonly -f is_function +usage() { + cat <<- EOF + Usage: ssm + EOF +} + var() { declare varname=$1; shift @@ -108,40 +114,84 @@ die() { exit "$code" }; readonly -f die +spawn() { + if [[ $service_logfile_out == "$service_logfile_err" ]]; then + exec "$@" >"$service_logfile_out" 2>&1 + else + exec "$@" >"$service_logfile_out" 2>"$service_logfile_err" + fi +} + ## Run the command and wait for it to die svc() { - declare job_pid - var job_pid - - # Cgroups - if cgroups; then - mkdir -p "$cgroup_home/$service_cgroup_name" - echo "$BASHPID" > "$cgroup_home/$service_cgroup_name/cgroup.procs" - fi + declare job_pid job_exit job_success last_respawn fail_counter date + var job_pid job_exit job_success last_respawn fail_counter date svc::cleanup() { nullexec kill -n "$service_stop_signal" "$job_pid" pid_wait "$job_pid" rm -f "$svc_pidfile" "$service_ready_flag" + + die 0 }; trap 'svc::cleanup' TERM - svc::reload() { - kill -n "$service_reload_signal" "$job_pid" - }; trap 'svc::reload' HUP + printf '%s' $BASHPID > "$svc_pidfile" - "$@" 1>"$service_logfile_out" 2>"$service_logfile_err" & job_pid = "$!" + while true; do + job_success = 1 # Needs to be reset - printf '%s' "$job_pid" > "$svc_pidfile" - wait "$job_pid" + # Spawn the process and record the PID + spawn "$@" & job_pid = "$!" + + # Cgroups + if cgroups; then + mkdir -p "$cgroup_home/$service_cgroup_name" + echo "$job_pid" > "$cgroup_home/$service_cgroup_name/cgroup.procs" + fi + + # Wait for the process to exit and record the exit code + wait -n; job_exit=$? + + if service_success_exit u "$job_exit"; then + job_success = 1 + (( fail_counter )) && fail_counter-- + else + job_success = 0 + fail_counter++ + + printf '%s\n' "$job_exit" > "$service_failed_flag" + fi + + # Back off if the service exits too much AND too quickly. + if (( fail_counter >= 3 )); then + printf -v date '%(%s)T' + + (( (date - last_respawn) <= 5 )) && break + fi + + # Respawn, if necessary + service_respawn_flag || break + case $service_respawn in + (on-success) job_success || break;; + (on-failure) job_success && break;; + esac + + # Remove the failed flag, we're going to attempt a restart. + rm -f "$service_failed_flag" + + # Record the time every time we restart the loop + printf -v last_respawn '%(%s)T' + done svc::cleanup }; readonly -f svc ## Respawn respawn() { - declare job_pid - var job_pid + declare job_pid job_exit job_success + var job_pid job_exit + var job_success = 0 respawn::cleanup() { kill -n "$service_stop_signal" "$job_pid" @@ -167,11 +217,16 @@ respawn() { }; respawn::set_traps while true; do - exec "$@" & job_pid = "$!" + exec "$@" & wait -n; job_exit = $? - while nullexec kill -n 0 "$job_pid"; do - wait "$job_pid" - done + if service_success_exit u "$job_exit"; then + job_success = 1 + fi + + case $service_respawn in + (on-failure) job_success && break;; + (on-success) job_success || break;; + esac done }; readonly -f respawn @@ -309,11 +364,7 @@ start() { rm -f "$service_stopped_flag" if service_managed; then - if service_respawn; then - svc respawn "${service_command[@]}" & - else - svc "${service_command[@]}" & - fi + svc "${service_command[@]}" & if timer "$service_ready_timeout" ready; then printf '1' > "$service_ready_flag" @@ -321,7 +372,7 @@ start() { return 5 fi elif service_oneshot; then - "${service_command[@]}" 1>"$service_logfile_out" 2>"$service_logfile_err"; res=$? + spawn "${service_command[@]}"; res=$? (( res )) && return "$res" printf '1' > "$service_enabled_flag" else @@ -331,7 +382,7 @@ start() { echo "$BASHPID" > "$cgroup_home/$service_cgroup_name/cgroup.procs" fi - exec "${service_command[@]}" 1>"$service_logfile_out" 2>"$service_logfile_err" & + spawn "${service_command[@]}" & fi return 0 @@ -398,8 +449,11 @@ info() { "Exec" "${service_command[*]}" \ "Respawn" "${service_respawn:-false}" \ "Config path" "$service_config" \ - "Output log" "$service_logfile_out" \ - "Error log" "$service_logfile_err" + "Output log" "$service_logfile_out" + + service_logfile_out == "$service_logfile_err" || { + _info_items+=( "Error log" "$service_logfile_err" ) + } if _status == 'yes'; then _info_items += \ @@ -424,13 +478,20 @@ restart() { } edit() { $EDITOR "$service_config"; } -logs() { printf '%s\n' "$service_logfile_out" "$service_logfile_err"; } +logs() { + if service_logfile_out == "$service_logfile_err"; then + $PAGER "$service_logfile_out" + else + printf '%s\n' "$service_logfile_out" "$service_logfile_err" + fi +} ## Status is a bit of a special case. It's talkative. status() { service_running && return 0 service_enabled && return 0 service_stopped && return 7 + service_failed && return 9 return 1 } @@ -457,6 +518,7 @@ var service_pid \ service_ready_flag \ service_enabled_flag \ service_stopped_flag \ + service_failed_flag \ service_cgroup_name \ service_cgroup_procs \ service_cgroup_path \ @@ -478,7 +540,7 @@ var XDG_RUNTIME_DIR := "/run/user/$UID" ## Let's set some defaults # These are meaningful to reconfigure. -var service_respawn = 0 # Respawn the service if it exits +var service_respawn = 'no' # Respawn the service if it exits var service_workdir = '/' var service_stop_timeout = 30 var service_ready_timeout = 15 @@ -487,6 +549,7 @@ var service_reload_signal = 1 var service_stop_signal = 15 var service_cgroup_exclusive = 0 # Refuse to start the service if its cgroup is not empty var service_cgroup_wait = 0 # Wait on all the members of the cgroup to exit when stopping the service. +var service_success_exit = 0 # Array, takes exit codes that are to be treated as successful termination. # Global config var cgroups = 0 # Enable cgroup-related functions @@ -499,9 +562,11 @@ var service_oneshot = 0 var service_running = 0 var service_enabled = 0 var service_stopped = 0 +var service_failed = 0 var service_nologs = 0 var service_cgroup_empty = 1 var service_cgroup_empty = 1 +var service_respawn_flag = 0 # These depend on who we are if (( $UID )); then @@ -547,6 +612,18 @@ for (( idx=${#cfg_path[@]}-1; idx>=0; idx-- )); do done done +# Parse arguments +while (( $# )); do + case $1 in + (-h|--help) usage; exit 0;; + (--) shift; break;; + (-*) printf 'Unknown key: %s\n' "$1" >&2; exit 1;; + (*) break;; + esac + + shift +done + # Now create the needed runtime stuff for d in "$rundir" "$logdir"; do mkdir -p "$d" || die 3 "Failed to create runtime dir: $d" @@ -555,6 +632,9 @@ done # Common service path service_path += "$XDG_CONFIG_HOME/ssm/services" '/etc/ssm/services' "$rundir/services" "$usrdir/services" +# This script requires at least two aruments +(( $# >= 2 )) || { usage; exit 2; } + # If $1 is a full path, source it. # If not, search for it in the service path. if [[ $1 == /* ]]; then @@ -584,16 +664,22 @@ done # Get the service config source -- "$service_config" "${@:3}" || die 7 "Failed to read the service config: $service_config" +if ! service_respawn == 'no'; then + case $service_respawn in + (on-failure|on-success|always) service_respawn_flag = 1;; + (*) die 88 "Wrong value for service_respawn";; + esac +fi + # Legacy service_args && service_command += "${service_args[@]}" -service_respawn == 'true' && service_respawn = 1 service_type == 'oneshot' && service_oneshot = 1 service_oneshot && service_managed = 0 service_pidfile && service_managed = 0 if ! service_managed; then - service_respawn && die 21 "Refusing to respawn a service that manages itself." + service_respawn_flag && die 21 "Refusing to respawn a service that manages itself." fi # Semi-hardcoded stuff @@ -601,13 +687,15 @@ svc_pidfile = "$rundir/$service_name.pid" # Service-level defaults service_pidfile := "$svc_pidfile" -service_logfile_out := "$logdir/${service_name}.out.log" -service_logfile_err := "$logdir/${service_name}.err.log" +service_logfile_out := "$logdir/${service_name}.log" +service_logfile_err := "$service_logfile_out" service_ready_flag := "$rundir/$service_name.ready" service_enabled_flag := "$rundir/$service_name.enabled" service_stopped_flag := "$rundir/$service_name.stopped" +service_failed_flag := "$rundir/$service_name.failed" service_cgroup_name := "$service_name" service_cgroup_path := "$cgroup_home/$service_name" +service_success_exit := 0 # A shortcut for disabling logging if service_nologs; then @@ -637,6 +725,12 @@ if service_stopped_flag is file; then service_stopped = 1 fi +# Maybe it has failed? +if service_failed_flag is file; then + # :( + service_failed = 1 +fi + # Check cgroups, if enabled if cgroups; then if service_cgroup_path is dir; then @@ -709,7 +803,8 @@ case "$2" in result "$res" \ 0 "$service_name is running" \ 1 "$service_name is not running" \ - 7 "$service_name was stopped" + 7 "$service_name was stopped" \ + 9 "$service_name has failed" fi ;; esac