respawn rework

Signed-off-by: fbt <fbt@fleshless.org>
This commit is contained in:
Jack L. Frost 2018-03-04 19:26:40 +03:00
parent 39eb1aff49
commit 7a3bb1a5f0
1 changed files with 95 additions and 99 deletions

194
ssm
View File

@ -12,19 +12,27 @@ usage() {
}
var() {
declare varname=$1; shift
declare var_function=$1; shift
declare var_name
# This enforces bash's grammar against things like
# var 'cat /etc/shadow; foo' ...
[[ $varname =~ ^[a-zA-Z_][a-zA-Z0-9_]+?$ ]] || {
die 73 "On line $LINENO, in $FUNCNAME: Invalid identifier: '$varname'"
[[ $var_function =~ ^[a-zA-Z_][a-zA-Z0-9_]+?$ ]] || {
die 73 "On line $LINENO, in $FUNCNAME: Invalid identifier: '$var_function'"
}
if ! is_function "$varname"; then
if [[ "$1" == '-v' ]]; then
var_name=$2
shift 2
else
var_name=$var_function
fi
if ! is_function "$var_function"; then
eval "
${varname}() {
${var_function}() {
declare mode=set
declare -n _var=\"${varname}\"
declare -n _var=\"${var_name}\"
if (( \$# )); then
case \"\$1\" in
@ -40,7 +48,7 @@ var() {
('u') mode=includes;;
(*) die 71 \"Syntax error in ${varname}!\";;
(*) die 71 \"Syntax error in ${var_function}!\";;
esac
shift
else
@ -75,18 +83,18 @@ var() {
(is_file) [[ -f \"\$_var\" ]];;
(is_dir|is_directory) [[ -d \"\$_var\" ]];;
(*) die 71 \"Syntax error in ${varname}!\";;
(*) die 71 \"Syntax error in ${var_function}!\";;
esac
}; readonly -f \"${varname}\"
}; readonly -f \"${var_function}\"
${varname}++() {
declare -n _var=\"${varname}\"
(( ${varname}++ ))
${var_function}++() {
declare -n _var=\"${var_name}\"
(( _var++ ))
}
${varname}--() {
declare -n _var=\"${varname}\"
(( ${varname}-- ))
${var_function}--() {
declare -n _var=\"${var_name}\"
(( _var-- ))
}
"
fi
@ -94,7 +102,7 @@ var() {
if (( $# )); then
case "$1" in
('='|'=='|'=~'|'+='|'_='|':=')
"$varname" "$@"
"$var_function" "$@"
;;
(*)
@ -124,22 +132,26 @@ spawn() {
## Run the command and wait for it to die
svc() {
declare job_pid job_exit job_success last_respawn fail_counter date
var job_pid job_exit job_success last_respawn fail_counter date
declare job_pid job_exit job_success last_respawn fail_counter date counter
var job_pid job_exit job_success last_respawn fail_counter date counter
svc::cleanup() {
nullexec kill -n "$service_stop_signal" "$job_pid"
pid_wait "$job_pid"
anywait "$job_pid" "$service_stop_timeout"
rm -f "$svc_pidfile" "$service_ready_flag"
die 0
}; trap 'svc::cleanup' TERM
svc::reload() {
nullexec kill -n "$service_reload_signal" "$job_pid"
}; trap 'svc::reload' HUP
printf '%s' $BASHPID > "$svc_pidfile"
while true; do
job_success = 1 # Needs to be reset
job_success = 0 # Needs to be reset
# Spawn the process and record the PID
spawn "$@" & job_pid = "$!"
@ -151,7 +163,25 @@ svc() {
fi
# Wait for the process to exit and record the exit code
wait -n; job_exit=$?
# This depends on a few things
if service_managed; then
wait "$job_pid"; job_exit=$?
else
# We need to wait for the service to write down its pidfile
until service_pidfile is file; do
(( counter >= service_pidfile_timeout*10 )) && {
printf 'No pidfile' > "$service_failed_flag"
break
}
counter++
sleep 0.1
done
read -r job_pid < "$service_pidfile"
# We consider any termination of an unmanaged service to be a failure
anywait "$job_pid"; job_exit=127
fi
if service_success_exit u "$job_exit"; then
job_success = 1
@ -160,15 +190,17 @@ svc() {
job_success = 0
fail_counter++
printf '%s\n' "$job_exit" > "$service_failed_flag"
printf '%s' "$job_exit" > "$service_failed_flag"
fi
# Back off if the service exits too much AND too quickly.
if (( fail_counter >= 3 )); then
printf -v date '%(%s)T'
service_respawn_force || {
if (( fail_counter >= 3 )); then
printf -v date '%(%s)T'
(( (date - last_respawn) <= 5 )) && break
fi
(( (date - last_respawn) <= 5 )) && break
fi
}
# Respawn, if necessary
service_respawn_flag || break
@ -187,66 +219,27 @@ svc() {
svc::cleanup
}; readonly -f svc
## Respawn
respawn() {
declare job_pid job_exit job_success
var job_pid job_exit
var job_success = 0
respawn::cleanup() {
kill -n "$service_stop_signal" "$job_pid"
wait "$job_pid"
rm -f "$svc_pidfile" "$service_ready_flag"
exit 0
}; trap 'respawn::cleanup' TERM
respawn::sigpass() {
declare sig pid
var sig = "$1"
var pid = "$2"
kill -n "$sig" "$pid"
}
respawn::set_traps() {
for s in "${service_signals[@]}"; do
trap "respawn::sigpass $s \$job_pid" "$s"
done
}; respawn::set_traps
while true; do
exec "$@" & wait -n; job_exit = $?
if service_success_exit u "$job_exit"; then
job_success = 1
fi
case $service_respawn in
(on-failure) job_success && break;;
(on-success) job_success || break;;
esac
done
}; readonly -f respawn
## Run a command with its output discarded
nullexec() { "$@" &>/dev/null; }
readonly -f nullexec
## Wait for a pid to die
pid_wait() {
declare cnt
var cnt = 0
## Wait for a pid, indefinitely
anywait() {
declare counter timeout
var counter = 0
var timeout = "$2"
while nullexec kill -0 "$1"; do
(( cnt >= (service_stop_timeout*10) )) && return 1
timeout && {
(( counter >= timeout )) && return 1
counter++
}
sleep 0.1
cnt++
done
return 0
}; readonly -f pid_wait
}; readonly -f anywait
## Simple timer
timer() {
@ -363,7 +356,14 @@ start() {
rm -f "$service_stopped_flag"
if service_managed; then
if service_oneshot; then
spawn "${service_command[@]}"; res=$?
(( res )) && {
printf '%s' "$res" > "$service_failed_flag"
return "$res"
}
printf '1' > "$service_enabled_flag"
else
svc "${service_command[@]}" &
if timer "$service_ready_timeout" ready; then
@ -371,18 +371,6 @@ start() {
else
return 5
fi
elif service_oneshot; then
spawn "${service_command[@]}"; res=$?
(( res )) && return "$res"
printf '1' > "$service_enabled_flag"
else
# Put ourselves into the cgroup, so that even when we die, whatever we started stays in it
if cgroups; then
mkdir -p "$cgroup_home/$service_cgroup_name"
echo "$BASHPID" > "$cgroup_home/$service_cgroup_name/cgroup.procs"
fi
spawn "${service_command[@]}" &
fi
return 0
@ -393,7 +381,11 @@ start() {
reload() {
service_running || return 3
kill -n "$service_reload_signal" "$service_pid"
if service_managed; then
kill -n 1 "$service_pid"
else
kill -n "$service_reload_signal" "$service_pid"
fi
}
## Stop the service
@ -409,16 +401,20 @@ stop() {
else
service_running || return 3
nullexec kill -n "$service_stop_signal" "$service_pid" || return 1
if service_managed; then
kill -n 15 "$service_pid" || return 1
else
kill -n "$service_stop_signal" "$service_pid" || return 1
fi
pid_wait "$service_pid" || return 5
anywait "$service_pid" "$service_stop_timeout" || return 5
> "$service_stopped_flag"
# Cgroup stuff
if cgroups; then
if service_cgroup_wait; then
for p in "${service_cgroup_procs[@]}"; do
pid_wait "$p" &
anywait "$p" "$service_stop_timeout" &
wait || return 5
done
fi
@ -447,7 +443,7 @@ info() {
"Type" "$_type" \
"$_status_label" "$_status" \
"Exec" "${service_command[*]}" \
"Respawn" "${service_respawn:-false}" \
"Respawn" "$service_respawn" \
"Config path" "$service_config" \
"Output log" "$service_logfile_out"
@ -502,6 +498,9 @@ qstatus() { nullexec status; }
## By default there is no ready check
ready() { :; }
## Reset failes
reset-failed() { rm -f "$service_failed_flag"; }
# Main code
## Empty declarations
var service_pid \
@ -550,6 +549,7 @@ var service_stop_signal = 15
var service_cgroup_exclusive = 0 # Refuse to start the service if its cgroup is not empty
var service_cgroup_wait = 0 # Wait on all the members of the cgroup to exit when stopping the service.
var service_success_exit = 0 # Array, takes exit codes that are to be treated as successful termination.
var service_pidfile_timeout = 15 # How long to wait for unmanaged services to create their pidfiles.
# Global config
var cgroups = 0 # Enable cgroup-related functions
@ -567,6 +567,7 @@ var service_nologs = 0
var service_cgroup_empty = 1
var service_cgroup_empty = 1
var service_respawn_flag = 0
var service_respawn_force = 0
# These depend on who we are
if (( $UID )); then
@ -675,13 +676,8 @@ fi
service_args && service_command += "${service_args[@]}"
service_type == 'oneshot' && service_oneshot = 1
service_oneshot && service_managed = 0
service_pidfile && service_managed = 0
if ! service_managed; then
service_respawn_flag && die 21 "Refusing to respawn a service that manages itself."
fi
# Semi-hardcoded stuff
svc_pidfile = "$rundir/$service_name.pid"