respawn rework

Signed-off-by: fbt <fbt@fleshless.org>
This commit is contained in:
Jack L. Frost 2018-03-04 19:26:40 +03:00
parent 39eb1aff49
commit 7a3bb1a5f0

184
ssm
View File

@ -12,19 +12,27 @@ usage() {
} }
var() { var() {
declare varname=$1; shift declare var_function=$1; shift
declare var_name
# This enforces bash's grammar against things like # This enforces bash's grammar against things like
# var 'cat /etc/shadow; foo' ... # var 'cat /etc/shadow; foo' ...
[[ $varname =~ ^[a-zA-Z_][a-zA-Z0-9_]+?$ ]] || { [[ $var_function =~ ^[a-zA-Z_][a-zA-Z0-9_]+?$ ]] || {
die 73 "On line $LINENO, in $FUNCNAME: Invalid identifier: '$varname'" die 73 "On line $LINENO, in $FUNCNAME: Invalid identifier: '$var_function'"
} }
if ! is_function "$varname"; then if [[ "$1" == '-v' ]]; then
var_name=$2
shift 2
else
var_name=$var_function
fi
if ! is_function "$var_function"; then
eval " eval "
${varname}() { ${var_function}() {
declare mode=set declare mode=set
declare -n _var=\"${varname}\" declare -n _var=\"${var_name}\"
if (( \$# )); then if (( \$# )); then
case \"\$1\" in case \"\$1\" in
@ -40,7 +48,7 @@ var() {
('u') mode=includes;; ('u') mode=includes;;
(*) die 71 \"Syntax error in ${varname}!\";; (*) die 71 \"Syntax error in ${var_function}!\";;
esac esac
shift shift
else else
@ -75,18 +83,18 @@ var() {
(is_file) [[ -f \"\$_var\" ]];; (is_file) [[ -f \"\$_var\" ]];;
(is_dir|is_directory) [[ -d \"\$_var\" ]];; (is_dir|is_directory) [[ -d \"\$_var\" ]];;
(*) die 71 \"Syntax error in ${varname}!\";; (*) die 71 \"Syntax error in ${var_function}!\";;
esac esac
}; readonly -f \"${varname}\" }; readonly -f \"${var_function}\"
${varname}++() { ${var_function}++() {
declare -n _var=\"${varname}\" declare -n _var=\"${var_name}\"
(( ${varname}++ )) (( _var++ ))
} }
${varname}--() { ${var_function}--() {
declare -n _var=\"${varname}\" declare -n _var=\"${var_name}\"
(( ${varname}-- )) (( _var-- ))
} }
" "
fi fi
@ -94,7 +102,7 @@ var() {
if (( $# )); then if (( $# )); then
case "$1" in case "$1" in
('='|'=='|'=~'|'+='|'_='|':=') ('='|'=='|'=~'|'+='|'_='|':=')
"$varname" "$@" "$var_function" "$@"
;; ;;
(*) (*)
@ -124,22 +132,26 @@ spawn() {
## Run the command and wait for it to die ## Run the command and wait for it to die
svc() { svc() {
declare job_pid job_exit job_success last_respawn fail_counter date declare job_pid job_exit job_success last_respawn fail_counter date counter
var job_pid job_exit job_success last_respawn fail_counter date var job_pid job_exit job_success last_respawn fail_counter date counter
svc::cleanup() { svc::cleanup() {
nullexec kill -n "$service_stop_signal" "$job_pid" nullexec kill -n "$service_stop_signal" "$job_pid"
pid_wait "$job_pid" anywait "$job_pid" "$service_stop_timeout"
rm -f "$svc_pidfile" "$service_ready_flag" rm -f "$svc_pidfile" "$service_ready_flag"
die 0 die 0
}; trap 'svc::cleanup' TERM }; trap 'svc::cleanup' TERM
svc::reload() {
nullexec kill -n "$service_reload_signal" "$job_pid"
}; trap 'svc::reload' HUP
printf '%s' $BASHPID > "$svc_pidfile" printf '%s' $BASHPID > "$svc_pidfile"
while true; do while true; do
job_success = 1 # Needs to be reset job_success = 0 # Needs to be reset
# Spawn the process and record the PID # Spawn the process and record the PID
spawn "$@" & job_pid = "$!" spawn "$@" & job_pid = "$!"
@ -151,7 +163,25 @@ svc() {
fi fi
# Wait for the process to exit and record the exit code # Wait for the process to exit and record the exit code
wait -n; job_exit=$? # This depends on a few things
if service_managed; then
wait "$job_pid"; job_exit=$?
else
# We need to wait for the service to write down its pidfile
until service_pidfile is file; do
(( counter >= service_pidfile_timeout*10 )) && {
printf 'No pidfile' > "$service_failed_flag"
break
}
counter++
sleep 0.1
done
read -r job_pid < "$service_pidfile"
# We consider any termination of an unmanaged service to be a failure
anywait "$job_pid"; job_exit=127
fi
if service_success_exit u "$job_exit"; then if service_success_exit u "$job_exit"; then
job_success = 1 job_success = 1
@ -160,15 +190,17 @@ svc() {
job_success = 0 job_success = 0
fail_counter++ fail_counter++
printf '%s\n' "$job_exit" > "$service_failed_flag" printf '%s' "$job_exit" > "$service_failed_flag"
fi fi
# Back off if the service exits too much AND too quickly. # Back off if the service exits too much AND too quickly.
service_respawn_force || {
if (( fail_counter >= 3 )); then if (( fail_counter >= 3 )); then
printf -v date '%(%s)T' printf -v date '%(%s)T'
(( (date - last_respawn) <= 5 )) && break (( (date - last_respawn) <= 5 )) && break
fi fi
}
# Respawn, if necessary # Respawn, if necessary
service_respawn_flag || break service_respawn_flag || break
@ -187,66 +219,27 @@ svc() {
svc::cleanup svc::cleanup
}; readonly -f svc }; readonly -f svc
## Respawn
respawn() {
declare job_pid job_exit job_success
var job_pid job_exit
var job_success = 0
respawn::cleanup() {
kill -n "$service_stop_signal" "$job_pid"
wait "$job_pid"
rm -f "$svc_pidfile" "$service_ready_flag"
exit 0
}; trap 'respawn::cleanup' TERM
respawn::sigpass() {
declare sig pid
var sig = "$1"
var pid = "$2"
kill -n "$sig" "$pid"
}
respawn::set_traps() {
for s in "${service_signals[@]}"; do
trap "respawn::sigpass $s \$job_pid" "$s"
done
}; respawn::set_traps
while true; do
exec "$@" & wait -n; job_exit = $?
if service_success_exit u "$job_exit"; then
job_success = 1
fi
case $service_respawn in
(on-failure) job_success && break;;
(on-success) job_success || break;;
esac
done
}; readonly -f respawn
## Run a command with its output discarded ## Run a command with its output discarded
nullexec() { "$@" &>/dev/null; } nullexec() { "$@" &>/dev/null; }
readonly -f nullexec readonly -f nullexec
## Wait for a pid to die ## Wait for a pid, indefinitely
pid_wait() { anywait() {
declare cnt declare counter timeout
var cnt = 0 var counter = 0
var timeout = "$2"
while nullexec kill -0 "$1"; do while nullexec kill -0 "$1"; do
(( cnt >= (service_stop_timeout*10) )) && return 1 timeout && {
(( counter >= timeout )) && return 1
counter++
}
sleep 0.1 sleep 0.1
cnt++
done done
return 0 return 0
}; readonly -f pid_wait }; readonly -f anywait
## Simple timer ## Simple timer
timer() { timer() {
@ -363,7 +356,14 @@ start() {
rm -f "$service_stopped_flag" rm -f "$service_stopped_flag"
if service_managed; then if service_oneshot; then
spawn "${service_command[@]}"; res=$?
(( res )) && {
printf '%s' "$res" > "$service_failed_flag"
return "$res"
}
printf '1' > "$service_enabled_flag"
else
svc "${service_command[@]}" & svc "${service_command[@]}" &
if timer "$service_ready_timeout" ready; then if timer "$service_ready_timeout" ready; then
@ -371,18 +371,6 @@ start() {
else else
return 5 return 5
fi fi
elif service_oneshot; then
spawn "${service_command[@]}"; res=$?
(( res )) && return "$res"
printf '1' > "$service_enabled_flag"
else
# Put ourselves into the cgroup, so that even when we die, whatever we started stays in it
if cgroups; then
mkdir -p "$cgroup_home/$service_cgroup_name"
echo "$BASHPID" > "$cgroup_home/$service_cgroup_name/cgroup.procs"
fi
spawn "${service_command[@]}" &
fi fi
return 0 return 0
@ -393,7 +381,11 @@ start() {
reload() { reload() {
service_running || return 3 service_running || return 3
if service_managed; then
kill -n 1 "$service_pid"
else
kill -n "$service_reload_signal" "$service_pid" kill -n "$service_reload_signal" "$service_pid"
fi
} }
## Stop the service ## Stop the service
@ -409,16 +401,20 @@ stop() {
else else
service_running || return 3 service_running || return 3
nullexec kill -n "$service_stop_signal" "$service_pid" || return 1 if service_managed; then
kill -n 15 "$service_pid" || return 1
else
kill -n "$service_stop_signal" "$service_pid" || return 1
fi
pid_wait "$service_pid" || return 5 anywait "$service_pid" "$service_stop_timeout" || return 5
> "$service_stopped_flag" > "$service_stopped_flag"
# Cgroup stuff # Cgroup stuff
if cgroups; then if cgroups; then
if service_cgroup_wait; then if service_cgroup_wait; then
for p in "${service_cgroup_procs[@]}"; do for p in "${service_cgroup_procs[@]}"; do
pid_wait "$p" & anywait "$p" "$service_stop_timeout" &
wait || return 5 wait || return 5
done done
fi fi
@ -447,7 +443,7 @@ info() {
"Type" "$_type" \ "Type" "$_type" \
"$_status_label" "$_status" \ "$_status_label" "$_status" \
"Exec" "${service_command[*]}" \ "Exec" "${service_command[*]}" \
"Respawn" "${service_respawn:-false}" \ "Respawn" "$service_respawn" \
"Config path" "$service_config" \ "Config path" "$service_config" \
"Output log" "$service_logfile_out" "Output log" "$service_logfile_out"
@ -502,6 +498,9 @@ qstatus() { nullexec status; }
## By default there is no ready check ## By default there is no ready check
ready() { :; } ready() { :; }
## Reset failes
reset-failed() { rm -f "$service_failed_flag"; }
# Main code # Main code
## Empty declarations ## Empty declarations
var service_pid \ var service_pid \
@ -550,6 +549,7 @@ var service_stop_signal = 15
var service_cgroup_exclusive = 0 # Refuse to start the service if its cgroup is not empty var service_cgroup_exclusive = 0 # Refuse to start the service if its cgroup is not empty
var service_cgroup_wait = 0 # Wait on all the members of the cgroup to exit when stopping the service. var service_cgroup_wait = 0 # Wait on all the members of the cgroup to exit when stopping the service.
var service_success_exit = 0 # Array, takes exit codes that are to be treated as successful termination. var service_success_exit = 0 # Array, takes exit codes that are to be treated as successful termination.
var service_pidfile_timeout = 15 # How long to wait for unmanaged services to create their pidfiles.
# Global config # Global config
var cgroups = 0 # Enable cgroup-related functions var cgroups = 0 # Enable cgroup-related functions
@ -567,6 +567,7 @@ var service_nologs = 0
var service_cgroup_empty = 1 var service_cgroup_empty = 1
var service_cgroup_empty = 1 var service_cgroup_empty = 1
var service_respawn_flag = 0 var service_respawn_flag = 0
var service_respawn_force = 0
# These depend on who we are # These depend on who we are
if (( $UID )); then if (( $UID )); then
@ -675,13 +676,8 @@ fi
service_args && service_command += "${service_args[@]}" service_args && service_command += "${service_args[@]}"
service_type == 'oneshot' && service_oneshot = 1 service_type == 'oneshot' && service_oneshot = 1
service_oneshot && service_managed = 0
service_pidfile && service_managed = 0 service_pidfile && service_managed = 0
if ! service_managed; then
service_respawn_flag && die 21 "Refusing to respawn a service that manages itself."
fi
# Semi-hardcoded stuff # Semi-hardcoded stuff
svc_pidfile = "$rundir/$service_name.pid" svc_pidfile = "$rundir/$service_name.pid"