@ -26,67 +26,116 @@ while true; do
done
echo ' #!/usr/bin/env bash
# Maximum number of restarts
maxrestartcount = 3
# Emails for restarts
mailaddress = 'admin@domain.tld'
mail = "false"
# Search string for IP address, here more precise selection, IP addresses can be excluded.
searchip = '192\.168\.|10\.'
# Read excluded instances from command line arguments
excluded_instances = ( " $@ " )
echo " Excluded instances: ${ excluded_instances [@] } "
echo " $( date +'%Y-%m-%d %H:%M:%S' ) : Excluded instances: ${ excluded_instances [@] } "
while true; do
for instance in $( pct list | awk '\' '{if(NR>1) print $1}' \' '; qm list | awk ' \' '{if(NR>1) print $1}' \' ' ) ; do
# Skip excluded instances
if [ [ " ${ excluded_instances [@] } " = ~ " ${ instance } " ] ] ; then
echo " Skipping $instance because it is excluded "
continue
for vmid in $( pct list | awk '{if(NR>1) print $1}' ; qm list | awk '{if(NR>1) print $1}' )
do
IP =
skip = "false"
if [ -f /tmp/$vmid .count ] ; then
count = $( cat /tmp/$vmid .count)
else
count = 0
fi
# Determine the type of the instance (container or virtual machine)
if pct status $instance >/dev/null 2>& 1; then
if pct status $vmid >/dev/null 2>& 1; then
# It is a container
config_cmd = "pct config"
IP = $( pct exec $instance ip a s dev eth0 | awk '\' '/inet / {print $2}' \' ' | cut -d/ -f1)
test = $( pct status $vmid | grep -q "status: running" )
if [ $? -eq 0 ] ; then
IP = $( pct exec $vmid ip a s dev eth0 | awk '/inet / {print $2}' | cut -d/ -f1)
fi
else
# It is a virtual machine
config_cmd = "qm config"
IP = $( qm guest cmd $instance network-get-interfaces | egrep -o "([0-9]{1,3}\.){3}[0-9]{1,3}" | grep -E "192\.|10\." | head -n 1)
test = $( qm status $vmid | grep -q "status: running" )
if [ $? -eq 0 ] ; then
IP = $( qm guest cmd $vmid network-get-interfaces | egrep -o "([0-9]{1,3}\.){3}[0-9]{1,3}" | grep -E " $searchip " | head -n 1)
fi
fi
NAME = $( $config_cmd $vmid | grep name: | awk '{ print $2 }' )
# Skip instances based on onboot and templates
onboot = $( $config_cmd $instance | grep onboot | grep -q "onboot: 0" && echo "true" || echo "false" )
template = $( $config_cmd $instance | grep template | grep -q "template:" && echo "true" || echo "false" )
if [ " $onboot " = = "true" ] ; then
echo " Skipping $instance because it is set not to boot "
continue
elif [ " $template " = = "true" ] ; then
echo " Skipping $instance because it is a template "
continue
test = $( $config_cmd $vmid | grep "onboot" | awk '{ print $2 }' )
if [ " $test " = = "1" ] ; then
onboot = "true"
else
onboot = "false"
fi
test = $( $config_cmd $vmid | grep "template:" | awk '{ print $2 }' )
if [ " $test " = = "1" ] ; then
template = "true"
else
template = "false"
fi
# Ping the instance
if ! ping -c 1 $IP >/dev/null 2>& 1; then
# If the instance can not be pinged, stop and start it
if pct status $instance >/dev/null 2>& 1; then
# It is a container
echo " $( date) : CT $instance is not responding, restarting... "
pct stop $instance >/dev/null 2>& 1
sleep 5
pct start $instance >/dev/null 2>& 1
else
# It is a virtual machine
if qm status $instance | grep -q "status: running" ; then
echo " $( date) : VM $instance is not responding, restarting... "
qm stop $instance >/dev/null 2>& 1
sleep 5
if [ " $onboot " = = "false" ] ; then
echo " $( date +'%Y-%m-%d %H:%M:%S' ) : Skipping $vmid $NAME because it is set not to boot "
skip = "true"
fi
if [ " $template " = = "true" ] ; then
echo " $( date +'%Y-%m-%d %H:%M:%S' ) : Skipping $vmid $NAME because it is a template "
skip = "true"
fi
if [ " $skip " = = "false" ] ; then
# Ping the instance
if ! ping -c 1 $IP >/dev/null 2>& 1; then
if [ $count -le $maxrestartcount ] ; then
count = $(( count + 1 ))
# If the instance can not be pinged, stop and start it
if pct status $vmid >/dev/null 2>& 1; then
# It is a container
echo " $( date +'%Y-%m-%d %H:%M:%S' ) : CT $vmid $NAME is not responding, restarting... "
if [ " $mail " = = "true" ] ; then
echo " CT $vmid $NAME is not responding, restarting " | mail -s " $( date +'%Y-%m-%d %H:%M:%S' ) : $( hostname) - $NAME " $mailaddress
fi
pct stop $vmid >/dev/null 2>& 1
sleep 5
pct start $vmid >/dev/null 2>& 1
else
# It is a virtual machine
test = $( qm status $vmid | grep -q "status: running" )
if [ $? -eq 0 ] ; then
echo " $( date +'%Y-%m-%d %H:%M:%S' ) : VM $vmid $NAME is not responding, restarting... "
if [ " $mail " = = "true" ] ; then
echo " VM $vmid $NAME is not responding, restarting " | mail -s " $( date +'%Y-%m-%d %H:%M:%S' ) : $( hostname) - $NAME " $mailaddress
fi
qm stop $vmid >/dev/null 2>& 1
sleep 5
else
echo " $( date +'%Y-%m-%d %H:%M:%S' ) : VM $vmid $NAME is not running, starting... "
fi
qm start $vmid >/dev/null 2>& 1
echo " $count " > /tmp/$vmid .count
fi
else
echo " $( date) : VM $instance is not running, starting... "
echo " $( date +'%Y-%m-%d %H:%M:%S' ) : VM $vmid $NAME max restart count $count reached "
if [ " $mail " = = "true" ] ; then
echo " VM $vmid $NAME max restart count $count reached " | mail -s " $( date +'%Y-%m-%d %H:%M:%S' ) : $( hostname) - $NAME " $mailaddress
fi
fi
qm start $instance >/dev/null 2>& 1
else
echo " $( date +'%Y-%m-%d %H:%M:%S' ) : CT/VM $vmid $NAME with ip $IP is pingable... "
echo "0" > /tmp/$vmid .count
fi
fi
done
# Wait for 5 minutes. (Edit to your needs)
echo " $( date) : Pausing for 5 minutes... "
echo " $( date +'%Y-%m-%d %H:%M:%S' ) : Pausing for 5 minutes... "
sleep 300
done >/var/log/ping-instances.log 2>& 1' >/usr/local/bin/ping-instances.sh
touch /var/log/ping-instances.log