From 7a3bb1a5f0cb76bb9c3fa96f4577e5713b65285c Mon Sep 17 00:00:00 2001 From: fbt Date: Sun, 4 Mar 2018 19:26:40 +0300 Subject: [PATCH] respawn rework Signed-off-by: fbt --- ssm | 194 +++++++++++++++++++++++++++++------------------------------- 1 file changed, 95 insertions(+), 99 deletions(-) diff --git a/ssm b/ssm index 36558dd..2abfbc9 100755 --- a/ssm +++ b/ssm @@ -12,19 +12,27 @@ usage() { } var() { - declare varname=$1; shift + declare var_function=$1; shift + declare var_name # This enforces bash's grammar against things like # var 'cat /etc/shadow; foo' ... - [[ $varname =~ ^[a-zA-Z_][a-zA-Z0-9_]+?$ ]] || { - die 73 "On line $LINENO, in $FUNCNAME: Invalid identifier: '$varname'" + [[ $var_function =~ ^[a-zA-Z_][a-zA-Z0-9_]+?$ ]] || { + die 73 "On line $LINENO, in $FUNCNAME: Invalid identifier: '$var_function'" } - if ! is_function "$varname"; then + if [[ "$1" == '-v' ]]; then + var_name=$2 + shift 2 + else + var_name=$var_function + fi + + if ! is_function "$var_function"; then eval " - ${varname}() { + ${var_function}() { declare mode=set - declare -n _var=\"${varname}\" + declare -n _var=\"${var_name}\" if (( \$# )); then case \"\$1\" in @@ -40,7 +48,7 @@ var() { ('u') mode=includes;; - (*) die 71 \"Syntax error in ${varname}!\";; + (*) die 71 \"Syntax error in ${var_function}!\";; esac shift else @@ -75,18 +83,18 @@ var() { (is_file) [[ -f \"\$_var\" ]];; (is_dir|is_directory) [[ -d \"\$_var\" ]];; - (*) die 71 \"Syntax error in ${varname}!\";; + (*) die 71 \"Syntax error in ${var_function}!\";; esac - }; readonly -f \"${varname}\" + }; readonly -f \"${var_function}\" - ${varname}++() { - declare -n _var=\"${varname}\" - (( ${varname}++ )) + ${var_function}++() { + declare -n _var=\"${var_name}\" + (( _var++ )) } - ${varname}--() { - declare -n _var=\"${varname}\" - (( ${varname}-- )) + ${var_function}--() { + declare -n _var=\"${var_name}\" + (( _var-- )) } " fi @@ -94,7 +102,7 @@ var() { if (( $# )); then case "$1" in ('='|'=='|'=~'|'+='|'_='|':=') - "$varname" "$@" + "$var_function" "$@" ;; (*) @@ -124,22 +132,26 @@ spawn() { ## Run the command and wait for it to die svc() { - declare job_pid job_exit job_success last_respawn fail_counter date - var job_pid job_exit job_success last_respawn fail_counter date + declare job_pid job_exit job_success last_respawn fail_counter date counter + var job_pid job_exit job_success last_respawn fail_counter date counter svc::cleanup() { nullexec kill -n "$service_stop_signal" "$job_pid" - pid_wait "$job_pid" + anywait "$job_pid" "$service_stop_timeout" rm -f "$svc_pidfile" "$service_ready_flag" die 0 }; trap 'svc::cleanup' TERM + svc::reload() { + nullexec kill -n "$service_reload_signal" "$job_pid" + }; trap 'svc::reload' HUP + printf '%s' $BASHPID > "$svc_pidfile" while true; do - job_success = 1 # Needs to be reset + job_success = 0 # Needs to be reset # Spawn the process and record the PID spawn "$@" & job_pid = "$!" @@ -151,7 +163,25 @@ svc() { fi # Wait for the process to exit and record the exit code - wait -n; job_exit=$? + # This depends on a few things + if service_managed; then + wait "$job_pid"; job_exit=$? + else + # We need to wait for the service to write down its pidfile + until service_pidfile is file; do + (( counter >= service_pidfile_timeout*10 )) && { + printf 'No pidfile' > "$service_failed_flag" + break + } + counter++ + sleep 0.1 + done + + read -r job_pid < "$service_pidfile" + + # We consider any termination of an unmanaged service to be a failure + anywait "$job_pid"; job_exit=127 + fi if service_success_exit u "$job_exit"; then job_success = 1 @@ -160,15 +190,17 @@ svc() { job_success = 0 fail_counter++ - printf '%s\n' "$job_exit" > "$service_failed_flag" + printf '%s' "$job_exit" > "$service_failed_flag" fi # Back off if the service exits too much AND too quickly. - if (( fail_counter >= 3 )); then - printf -v date '%(%s)T' + service_respawn_force || { + if (( fail_counter >= 3 )); then + printf -v date '%(%s)T' - (( (date - last_respawn) <= 5 )) && break - fi + (( (date - last_respawn) <= 5 )) && break + fi + } # Respawn, if necessary service_respawn_flag || break @@ -187,66 +219,27 @@ svc() { svc::cleanup }; readonly -f svc -## Respawn -respawn() { - declare job_pid job_exit job_success - var job_pid job_exit - var job_success = 0 - - respawn::cleanup() { - kill -n "$service_stop_signal" "$job_pid" - wait "$job_pid" - - rm -f "$svc_pidfile" "$service_ready_flag" - - exit 0 - }; trap 'respawn::cleanup' TERM - - respawn::sigpass() { - declare sig pid - var sig = "$1" - var pid = "$2" - - kill -n "$sig" "$pid" - } - - respawn::set_traps() { - for s in "${service_signals[@]}"; do - trap "respawn::sigpass $s \$job_pid" "$s" - done - }; respawn::set_traps - - while true; do - exec "$@" & wait -n; job_exit = $? - - if service_success_exit u "$job_exit"; then - job_success = 1 - fi - - case $service_respawn in - (on-failure) job_success && break;; - (on-success) job_success || break;; - esac - done -}; readonly -f respawn - ## Run a command with its output discarded nullexec() { "$@" &>/dev/null; } readonly -f nullexec -## Wait for a pid to die -pid_wait() { - declare cnt - var cnt = 0 +## Wait for a pid, indefinitely +anywait() { + declare counter timeout + var counter = 0 + var timeout = "$2" while nullexec kill -0 "$1"; do - (( cnt >= (service_stop_timeout*10) )) && return 1 + timeout && { + (( counter >= timeout )) && return 1 + counter++ + } + sleep 0.1 - cnt++ done return 0 -}; readonly -f pid_wait +}; readonly -f anywait ## Simple timer timer() { @@ -363,7 +356,14 @@ start() { rm -f "$service_stopped_flag" - if service_managed; then + if service_oneshot; then + spawn "${service_command[@]}"; res=$? + (( res )) && { + printf '%s' "$res" > "$service_failed_flag" + return "$res" + } + printf '1' > "$service_enabled_flag" + else svc "${service_command[@]}" & if timer "$service_ready_timeout" ready; then @@ -371,18 +371,6 @@ start() { else return 5 fi - elif service_oneshot; then - spawn "${service_command[@]}"; res=$? - (( res )) && return "$res" - printf '1' > "$service_enabled_flag" - else - # Put ourselves into the cgroup, so that even when we die, whatever we started stays in it - if cgroups; then - mkdir -p "$cgroup_home/$service_cgroup_name" - echo "$BASHPID" > "$cgroup_home/$service_cgroup_name/cgroup.procs" - fi - - spawn "${service_command[@]}" & fi return 0 @@ -393,7 +381,11 @@ start() { reload() { service_running || return 3 - kill -n "$service_reload_signal" "$service_pid" + if service_managed; then + kill -n 1 "$service_pid" + else + kill -n "$service_reload_signal" "$service_pid" + fi } ## Stop the service @@ -409,16 +401,20 @@ stop() { else service_running || return 3 - nullexec kill -n "$service_stop_signal" "$service_pid" || return 1 + if service_managed; then + kill -n 15 "$service_pid" || return 1 + else + kill -n "$service_stop_signal" "$service_pid" || return 1 + fi - pid_wait "$service_pid" || return 5 + anywait "$service_pid" "$service_stop_timeout" || return 5 > "$service_stopped_flag" # Cgroup stuff if cgroups; then if service_cgroup_wait; then for p in "${service_cgroup_procs[@]}"; do - pid_wait "$p" & + anywait "$p" "$service_stop_timeout" & wait || return 5 done fi @@ -447,7 +443,7 @@ info() { "Type" "$_type" \ "$_status_label" "$_status" \ "Exec" "${service_command[*]}" \ - "Respawn" "${service_respawn:-false}" \ + "Respawn" "$service_respawn" \ "Config path" "$service_config" \ "Output log" "$service_logfile_out" @@ -502,6 +498,9 @@ qstatus() { nullexec status; } ## By default there is no ready check ready() { :; } +## Reset failes +reset-failed() { rm -f "$service_failed_flag"; } + # Main code ## Empty declarations var service_pid \ @@ -550,6 +549,7 @@ var service_stop_signal = 15 var service_cgroup_exclusive = 0 # Refuse to start the service if its cgroup is not empty var service_cgroup_wait = 0 # Wait on all the members of the cgroup to exit when stopping the service. var service_success_exit = 0 # Array, takes exit codes that are to be treated as successful termination. +var service_pidfile_timeout = 15 # How long to wait for unmanaged services to create their pidfiles. # Global config var cgroups = 0 # Enable cgroup-related functions @@ -567,6 +567,7 @@ var service_nologs = 0 var service_cgroup_empty = 1 var service_cgroup_empty = 1 var service_respawn_flag = 0 +var service_respawn_force = 0 # These depend on who we are if (( $UID )); then @@ -675,13 +676,8 @@ fi service_args && service_command += "${service_args[@]}" service_type == 'oneshot' && service_oneshot = 1 -service_oneshot && service_managed = 0 service_pidfile && service_managed = 0 -if ! service_managed; then - service_respawn_flag && die 21 "Refusing to respawn a service that manages itself." -fi - # Semi-hardcoded stuff svc_pidfile = "$rundir/$service_name.pid"