After using the simple watchdog for a while I needed to monitor another network interface. Since the old script couldn’t be extended nicely (without creating lots of redundant code), I decided to rewrite the whole thing! This new version allows me to set up multiple watchdogs with custom commands within the configuration file. See how you can use it too:


Configuration

First create a new configuration file /etc/config/watchping:

config watchping 'wifi'
	option enabled '1'
	option host 'wifi.accesspoint'
	option timeout '3'
	option command '/sbin/wifi' # "If wifi is already up when you run "wifi" with no parameters, the wifi system will be stopped and restarted."

config watchping 'network'
	option enabled '0'
	option host 'network.uplink'
	option timeout '3'
	option command '/etc/init.d/network restart'

config watchping 'openvpn'
	option enabled '1'
	option host 'my.vpn.server'
	option timeout '5'
	option command '/etc/init.d/openvpn restart'

This configures watchping with three instances. Each instance will ping the specified host and issue the restart command after the timeout expires. Customize this file to your needs, e.g. add your own hosts or disable instances you don’t need.

The scripts

Next create the folder /usr/lib/watchping/ and upload the following script:

#!/bin/sh 
#
# Pings a remote host and restarts WiFi/OpenVPN if the connection is down
# Requires "fping" -> opkg install fping
#
# Copyright (C) 2020 luani.de

. /lib/functions.sh

# check prerequisites
if [ ! -x "$(command -v fping)" ]; then
	echo 'watchping error: fping is not installed, exiting' >&2
	exit 1
fi

config_load watchping
if [ $? -ne 0 ]; then
	echo 'watchping error: configuration not found, exiting' >&2
	exit 1
fi

# setup function, parse config & check ranges
setup_check() {
	local job=$1
	if [ -z $job ]; then return 1; fi

	local enabled
	config_get_bool enabled ${job} enabled "false"
	if [ "$enabled" -eq "1" ]; then
		local host
		local timeout
		local command

		config_get host ${job} host "localhost"
		config_get timeout ${job} timeout "5"
		config_get command ${job} command ""
		if [ $timeout -le 0 ]; then
			$timeout=5
		fi
		if [ -z "$command" ]; then
			return 1
		fi

		eval "${job}_host"=\$host
		eval "${job}_timeout"=\$timeout
		eval "${job}_command"=\$command
		eval "${job}_failcount"=0

		joblist="${joblist} ${job}"
		logger -t "watchping" "job '${job}' setup: host '${host}', timeout ${timeout} min"
	fi
}

# check function
perform_check() {
	local job=$1

	local host
	local timeout
	local command
	local failcount
	local failure=0

	eval host=\$"${job}_host"
	eval timeout=\$"${job}_timeout"
	eval command=\$"${job}_command"
	eval failcount=\$"${job}_failcount"

	# ping host
	fping --ipv4 --quiet --count 1 --random ${host} &> /dev/null
	if [ $? -ne 0 ]; then
		failure=1
		let "failcount++"
	else
		failcount=0
	fi

	if [ $failcount -ge $timeout ]; then
		failcount=0
		logger -t "watchping" "job '${job}' failure: host (${host}) is down for $timeout min, command initiated!"

		# restart service
		eval "$command"
	fi

	eval "${job}_failcount"=\$failcount
	return $failure
}


# read configuration
joblist=""
config_foreach setup_check watchping

# check loop
firstrun=true
while sleep 60; do
	for job in $joblist; do		
		perform_check ${job}

		if [ $? -eq 0 ] && [ "$firstrun" = true ]; then
			logger -t "watchping" "job '${job}' first run: executed successfully, host reachable"
		elif [ "$firstrun" = true ]; then
			logger -t "watchping" "job '${job}' first run: failed, host unavailable"
		fi
	done
	if [ "$firstrun" = true ]; then firstrun=false; fi
done

exit 0

Finally upload the init script to /etc/init.d/watchping:

#!/bin/sh /etc/rc.common
# 
# start watchdog
#
# https://openwrt.org/docs/guide-developer/procd-init-script-example
# Copyright (C) 2020 luani.de

# Init sequence
START=99
STOP=10

# PROCD
USE_PROCD=1

start_service() {
	procd_open_instance watchping
	procd_set_param command /bin/sh "/usr/lib/watchping/watchping.sh"

	procd_set_param respawn ${respawn_threshold:-3600} ${respawn_timeout:-5} ${respawn_retry:-5}

	procd_set_param stdout 1 # forward stdout of the command to logd
	procd_set_param stderr 1 # same for stderr

	procd_set_param pidfile /var/run/watchping.pid

	procd_close_instance
}

stop_service() {
	logger -t "watchping" "watchdog stopped!"
}

service_triggers()
{
	procd_add_reload_trigger "watchping"
}

reload_service()
{
	stop
	start
}

Final touches

The watchping script requires fping to be installed on the router:

# install dependencies
opkg update
opkg install fping

# make executable
chmod +x /usr/lib/watchping/watchping.sh
chmod +x /etc/init.d/watchping

# enable & start service
# you can also do this from LuCI, system -> startup
/etc/init.d/watchping enable
/etc/init.d/watchping start

Check your system log and verify the watchdog started correctly:

user.notice watchping: job 'wifi' setup: host 'wifi.accesspoin', timeout 3 min
user.notice watchping: job 'wifi' first run: executed successfully, host reachable

Tested with OpenWRT 19.07

2 Comments

Schreibe einen Kommentar

Deine E-Mail-Adresse wird nicht veröffentlicht. Erforderliche Felder sind mit * markiert.