#!/bin/ksh # @(#)23 1.12.1.10 src/avs/fs/mmfs/samples/nfscluster/nfsmonitor, mmfs, avs_rgpfs24, rgpfs24s011a 3/20/07 16:46:39 # # HA-NFS monitoring # Usage: nfsmonitor [start|stop|restart|status] # Monitor levels LEVEL0=0 # NO_MONITORING LEVEL1=1 # ALERT_ONLY LEVEL2=2 # RESTART (not applicable for all services) LEVEL3=3 # FAILOVER MONITOR_INTERVAL=${MONITOR_INTERVAL-15} MONITOR_NETWORK=${MONITOR_NETWORK-3} MONITOR_PORTMAP=${MONITOR_PORTMAP-3} MONITOR_NFSD=${MONITOR_NFSD-3} MONITOR_MOUNTD=${MONITOR_MOUNTD-3} MONITOR_STATD=${MONITOR_STATD-3} MONITOR_SSHD=${MONITOR_SSHD-3} # All times below are in seconds NFS_RPC_ACT_SAMPLE_INTERVAL=3 RESTART_TIMEOUT=7 # Internal flags NUMBER_OF_RESTARTS=3 # Functions return values (used only within this file) SERVICE_RUNNING=1 SERVICE_NOT_RUNNING=2 SERVICE_UNKNOWN=3 if [ ! -f /var/mmfs/etc/nfsfuncs ]; then echo "$0: Can't find NFS functions in /var/mmfs/etc" exit 0 fi . /var/mmfs/etc/nfsfuncs # Display an alert message in syslog. Optionally, call a customer-provided alert script # Usage: alert alert() { service=$1 actionmsg=$2 shift 2 [ -n "$*" ] && comment="($*)" message="Monitoring detected $service is inactive, $action $comment" msg "$message" [ ! -e /var/mmfs/etc/alert ] && return /var/mmfs/etc/alert "$message" "$GPFS_IP" } alertmsg() { service=$1 shift alert $service "no action taken as configured" $* } failovermsg() { service=$1 shift alert $service "node failure initiated as configured" $* } # Terminate all processes that are running the nfs tool. # This is useful for the case that the restart process is hanging... nfsToolKill() { if [ -f /etc/init.d/nfsserver ]; then NFSTOOL=/etc/init.d/nfsserver else NFSTOOL=/etc/init.d/nfs fi nfsToolBase=${NFSTOOL##*/} nfsToolPid=`pidof -o $$ -o $PPID -o %PPID -x $NFSTOOL || \ pidof -o $$ -o $PPID -o %PPID -x $nfsToolBase` debugmsg2 "The pids of $NFSTOOL are $nfsToolPid" if [[ -z $nfsToolPid ]]; then return fi kill -9 $nfsToolPid } # Check if a given service is configured to be restarted. # Returns: the number of restarts or 0 if configured for no restart checkRestart() { service=$1 eval service_level='$'MONITOR_$service debugmsg2 "$service level is $service_level" [ $service_level -ge $LEVEL2 ] && return $NUMBER_OF_RESTARTS return 0 } # Get the status of a given service. # Returns: SERVICE_RUNNING or SERVICE_NOT_RUNNING getStatus() { service=$1 checkStatus $service status=$? if [ $status -eq 0 ]; then debugmsg2 "$service is running" return $SERVICE_RUNNING else debugmsg "$service is not running (status $status)" return $SERVICE_NOT_RUNNING fi } invokeFailover() { debugmsg "Invoking failover..." # Stop nfs gracefully to prevent client from getting ESTALE debuglog2 touch /tmp/ha-nfs-reboot 2> /dev/null stop.nfs # Kill the gpfs daemon on the node to invoke failover debugmsg "Stopping GPFS..." /etc/init.d/gpfs stop exit } invokeFailoverReboot() { debugmsg "Invoking failover with reboot..." # Stop nfs gracefully to prevent client from getting ESTALE stop.nfs # Kill the gpfs daemon on the node to invoke failover debugmsg "Stopping GPFS before reboot..." /etc/init.d/gpfs stop reboot exit } ################# # Monitoring nfsd ################# # This function samples /proc/net/rpc/nfsd twice within a given interval # and compares the two samples to detect any nfsd rpc activity # Returns: if activity was detected -> SERVICE_RUNNING # if there is no entry in proc or no activity was detected, no conclusion -> # SERVICE_UNKNOWN detectNfsdActivity() { set -A v2procs NAME COUNT NULL GETATTR SETATTR ROOT LOOKUP READLINK \ READ WRCACHE WRITE CREATE REMOVE RENAME \ LINK SYMLINK MKDIR RMDIR READDIR FSSTAT set -A v3procs NAME COUNT NULL GETATTR SETATTR LOOKUP ACCESS READLINK \ READ WRITE CREATE MKDIR SYMLINK MKNOD REMOVE RMDIR RENAME \ LINK READDIR READDIRPLUS FSSTAT FSINFO PATHCONF COMMIT procfile=/proc/net/rpc/nfsd if [ ! -f $procfile ]; then msg "Monitoring could not find /proc/net/rpc/nfsd" return $SERVICE_UNKNOWN fi # Sample v2/v3 activity (using proc) in the next NFS_RPC_ACT_SAMPLE_INTERVAL sec set -A v2procs1 $(cat $procfile | grep -w proc2) set -A v3procs1 $(cat $procfile | grep -w proc3) debugmsg2 "Sleeping for $NFS_RPC_ACT_SAMPLE_INTERVAL" sleep $NFS_RPC_ACT_SAMPLE_INTERVAL set -A v2procs2 $(cat $procfile | grep -w proc2) set -A v3procs2 $(cat $procfile | grep -w proc3) # Use the samples to detect activity p=2 # skipping name and count fields n=${#v2procs[@]} while [ $p -lt $n ]; do activity=$((${v2procs2[$p]}-${v2procs1[$p]})) if [ $activity -gt 0 ]; then debugmsg2 "nfsd activity detected: $activity ${v2procs[$p]} operations in $NFS_RPC_ACT_SAMPLE_INTERVAL sec" return $SERVICE_RUNNING fi p=$((p+1)) done p=2 # skipping name and count fields n=${#v3procs[@]} while [ $p -lt $n ]; do activity=$((${v3procs2[$p]}-${v3procs1[$p]})) if [ $activity -gt 0 ]; then debugmsg2 "nfsd activity detected: $activity ${v3procs[$p]} operations in $NFS_RPC_ACT_SAMPLE_INTERVAL sec" return $SERVICE_RUNNING fi p=$((p+1)) done # no activity detected debugmsg2 "Could not detect nfsd activity using /proc" return $SERVICE_UNKNOWN } # If nfsd is configured to be restarted, this function attempts to start/restart # the nfsd process as many times as configured. The operation parameter is set # to start/restart. # The success of restart is determined by checking the status of nfsd. # Note that portmap has to be running in order to restart nfsd. # If restart has failed, alert and failover are invoked as configured. # For nfsd, we use the /etc/init.d/nfs(RH)|nfsserver(SUSE) utility since it stops # the service before restarting it (which is important to make sure that nfsd # re-registerin with portmap, for example, in the case of restarting nfsd after # restarting portmap. This utility also reloads /etc/exports. startNfsd() { checkRestart NFSD # This call takes care of MONITOR_NFSD < LEVEL2 (numberOfRestarts=0) numberOfRestarts=$? attemptNo=0 debugmsg2 "About to start nfsd (up to $numberOfRestarts times)" while [ $numberOfRestarts -gt 0 ]; do attemptNo=$((attemptNo+1)) debugmsg2 "In startnfsd, attempt number = $attemptNo" nfsService start # give the restart/start chance to complete sleep $RESTART_TIMEOUT getStatus nfsd status=$? debugmsg2 "The nfsd status after $attemptNo attempts to start is $status" [[ $status == $SERVICE_RUNNING ]] && return # start attempt has failed/hangs -> kill the process and retry debugmsg "start nfsd failed/hangs, about to kill the start process." nfsToolKill numberOfAttempts=$((numberOfRestarts-1)) done # If we get here, it means that all start attempts have failed # Failed to restart nfsd, check for failover configuration parameters if [[ $MONITOR_NFSD -eq $LEVEL3 ]]; then failovermsg nfsd invokeFailover else alertmsg nfsd fi } # NULL RPC test: use rpcinfo to send a null rpc to nfs v3 # Returns: SERVICE_RUNNING or SERVICE_NOT_RUNNING nfsdNullRpcTest() { hostname=$(hostname) # Use rpcinfo to send a null rpc to nfs v3 using UDP debuglog2 rpcinfo -u $hostname nfs 3 if [ $? -eq 0 ]; then debugmsg2 "Sent nullrpc to nfsd v3 using rpcinfo, the service is up and running" return $SERVICE_RUNNING else msg "Sent nullrpc to nfsd v3 using rpcinfo, the service is not available (it maybe because portmap was started after nfsd, or that rpcinfo is not installed)" return $SERVICE_NOT_RUNNING fi } # Run all the tests for monitoring nfsd, and take actions according # to the monioring level. monitorNfsd() { ifGPFSDownExit $GPFS_IP # Check that the nfsd process is running getStatus nfsd runStatus=$? case $runStatus in $SERVICE_RUNNING) debugmsg2 "Perform more tests, to make sure that nfsd is functioning" detectNfsdActivity case $? in $SERVICE_RUNNING) return ;; $SERVICE_UNKNOWN) nfsdNullRpcTest case $? in $SERVICE_NOT_RUNNING) startNfsd ;; $SERVICE_RUNNING) return ;; esac ;; esac ;; $SERVICE_NOT_RUNNING) # Note that if nfsd was not running and the start has succeeded, we assume # that the process is running, and only perform the next level of tests # (rpc activity, and null rpc) next time. startNfsd return ;; esac } #################### # Monitoring mountd #################### restartMountd() { checkRestart MOUNTD # This takes care of MONITOR_MOUNTD < LEVEL3 (numberOfRestarts=0) numberOfRestarts=$? attemptNo=0 debugmsg2 "About to restart mountd (up to $numberOfRestarts times as configured, note: 0 means that the configuration level is lower than LEVEL2)" while [ $numberOfRestarts -gt 0 ]; do attemptNo=$((attemptNo+1)) startMountd restartPID=$! debugmsg2 "restartPID=$restartPID" # give the restart a chance to complete sleep $RESTART_TIMEOUT # check the status of mountd after the restart debugmsg2 "Try to restart mountd (attempt $attemptNo), checking the status:" getStatus /usr/sbin/rpc.mountd if [ $? -eq $SERVICE_RUNNING ]; then debugmsg2 "Succeeded to restart the mountd service at $attemptNo attempt" return fi # restart attempt has failed/hangs -> kill the process and retry debugmsg "Restarting mountd failed/hangs, about to kill the restart process." if [ -e /sbin/startproc ]; then debuglog kill -9 $restartPID else nfsToolKill # FIX fi numberOfRestarts=$((numberOfRestarts-1)) done # If we get to this point, it means that all restart attempts have failed msg "Failed to restart the mountd (tried $attemptNo times as configured)" # Failed to restart mountd, check for failover configuration parameters if [[ $MONITOR_MOUNTD == $LEVEL3 ]]; then failovermsg mountd invokeFailover else alertmsg mountd fi } # This is the main function for monitoring mountd, and take actions according # to the monioring level. monitorMountd() { ifGPFSDownExit $GPFS_IP getStatus /usr/sbin/rpc.mountd case $? in $SERVICE_RUNNING) return ;; $SERVICE_NOT_RUNNING) restartMountd ;; esac } ######################## # Monitoring the network ######################## # Monitor the network. # For now the only tests performed are: (1) whether the link is connected or not, using # ethtool. (2) ping the gateway. More tests can be added here later. monitorNetwork() { ifGPFSDownExit $GPFS_IP # TEST1: make sure that all interfaces that are used for nfs serving are connected nfsIfs=$(getNfsIFs $GPFS_IP) if [[ -z $nfsIfs ]]; then msg "No configured NFS IP addresses detected on any of the node's interfaces" nwFailoverCondition "no configured nfs interfaces" else for eth in $nfsIfs; do tmp=$(mmgetifconf | grep -w $eth | awk '{print $1}') if [[ -z $tmp ]]; then nwFailoverCondition "interface is down" fi checkLinkStatus $eth if [ $? -eq 0 ]; then continue else nwFailoverCondition "link is not connected" fi done fi # TEST2: check that all NFS IP addresses are enabled nfsIPs=$(getNfsIPs $GPFS_IP) for ip in $nfsIPs; do mmgetifconf | grep -q $ip [ $? -eq 0 ] && continue debugmsg "monitor detected $ip is down, restarting" ifUp $ip done # Now check that all NFS IP addresses for failover nodes are enabled nfsIPs= for ip in $(getFailedNodes $GPFS_IP); do nfsIPs="$nfsIPs $(getNfsIPs $ip)" done for ip in $nfsIPs; do mmgetifconf | grep -q $ip [ $? -eq 0 ] && continue [ "$(IPaddr $ip monitor)" == "OK" ] && continue debugmsg "monitor detected $ip is down, restarting" debuglog IPaddr $ip start done # TEST3: ping the gateway pingDefaultGateway } # List all interfaces used for NFS serving from NODELIST getNfsIFs () { thisGpfsIP=$1 eth="" # Get the list of nfs ip addresses for the given gpfs ip address nfsIPList=$(getNfsIPs $thisGpfsIP) # Handle the case that there is not entry for the node in nfs.nodes if [[ -z $nfsIPList ]]; then debugmsg "ALERT: No entry was found in nfs.nodes for this node (gpfs ip address:$thisGpfsIP)" nwFailoverCondition "no configured nfs interfaces" fi debugmsg2 "The list of ips is $nfsIPList" for nfsIP in $nfsIPList; do # get the "original" interface (e.g. the original interface for eth0:1 is eth0) origEth=$(getEthInterface $nfsIP | awk -F: '{print $1}') if [[ -z $origEth ]]; then debugmsg "ALERT: The nfs ip address $nfsIP is not assigned an interface" continue fi echo $origEth debugmsg2 "the actual interface for $nfsIP is $origEth" done } # Get the inerface for a given IP address # The format of nfs.nodes: GPFS_IP NFS_IP1 NFS_IP2 ... # NOTE: The original function was copied from Marc and was changed. # May requires future integration. getEthInterface() { eth="" # calls an executable that returns a line for each interface on the machine, # and what ip address it is assigned tmp=/tmp/mmgetifconf.$$ mmgetifconf > $tmp exec 3< $tmp while read -u3 iface ip mask; do if [[ $ip == $1 ]]; then eth=$iface fi done unlink $tmp # eth may be empty if there is no interface associated with this ip address echo $eth } # Locate and ping the default gateway. # On failure, alert and invoke failover if configured. pingDefaultGateway() { gwIP=$(route -n | awk '/UG/ {print $2}') [[ -z $gwIP ]] && return # Make sure the local machine is not set as the default gateway tmp=/tmp/mmgetifconf.$$ mmgetifconf > $tmp exec 3< $tmp while read -u3 iface ip mask; do [[ $ip == $gwIP ]] && return done unlink $tmp # try to ping the gateway ping -c 1 -w 5 $gwIP > /dev/null outPing=$? if [ $outPing -ne 0 ]; then msg "Failed to ping the gateway at $gwIP (err $outPing)" nwFailoverCondition "can't ping the gateway" else debugmsg2 "Succeeded to ping the gateway at $gwIP (ping returns $outPing)" fi } # Handle the network alert and failover if configured. nwFailoverCondition() { message=$1 debugmsg2 nwFailoverCondition $message if [[ $MONITOR_NETWORK -eq $LEVEL3 ]]; then failovermsg network $message invokeFailover else alertmsg network $message fi } ######################## # Monitoring portmap ######################## # Check if portmap is up; invoke failover and/or alert if configured. # TODO: we can test if this node is mounting anything, and if not # we can restart portmap, and re-register the nfs prcesses with it. # Currently, if this machine is mouning anything, lockd does not re-register with portmap. monitorPortmap() { getStatus /sbin/portmap case $? in $SERVICE_RUNNING) return ;; $SERVICE_NOT_RUNNING) if [[ $MONITOR_PORTMAP -eq $LEVEL1 ]]; then alertmsg portmap else failovermsg portmap invokeFailoverReboot fi ;; esac } ###################################### # Monitoring locking (lockd and statd) ###################################### # This is the main function for monitoring locking (lockd, and statd) monitorLocking() { ifGPFSDownExit $GPFS_IP # Monitor lockd. getStatus lockd case $? in $SERVICE_RUNNING) ;; $SERVICE_NOT_RUNNING) if [[ $MONITOR_STATD -eq $LEVEL3 ]]; then failovermsg lockd invokeFailover else alertmsg lockd fi ;; esac # Monitor statd if [ -f /sbin/rpc.statd ]; then getStatus /sbin/rpc.statd case $? in $SERVICE_RUNNING) ;; $SERVICE_NOT_RUNNING) if [[ $MONITOR_STATD -ge $LEVEL2 ]]; then restartStatd else alertmsg statd fi ;; esac fi } # This function attempts to restart statd in the background (only once). restartStatd() { [ ! -f /sbin/rpc.statd ] && return # Kill the statd process if exists (important for registering with portmap) debuglog kill -9 $(/sbin/pidof -x /sbin/rpc.statd) debuglog /etc/init.d/nfslock start sleep $RESTART_TIMEOUT getStatus /sbin/rpc.statd case $? in $SERVICE_RUNNING) ;; $SERVICE_NOT_RUNNING) if [[ $MONITOR_STATD -eq $LEVEL3 ]]; then failovermsg statd invokeFailover else alertmsg statd fi ;; esac } ###################################### # Monitoring rsh/ssh daemon ###################################### startSshd() { checkRestart SSHD # This call takes care of MONITOR_SSHD < LEVEL3 (numberOfRestarts=0) numberOfRestarts=$? service=$(rshService) attemptNo=0 debugmsg2 "About to restart sshd (up to $numberOfRestarts times as configured, note: 0 means that the configuration level is lower than LEVEL2)" while [[ $numberOfRestarts -gt 0 ]]; do attemptNo=$((attemptNo+1)) /etc/init.d/$service restart > /tmp/$service_restart.out 2>&1 & restartPID=$! sleep $((attemptNo*RESTART_TIMEOUT)) debugmsg2 "Try to restart $service (attempt $attemptNo), checking the status:" getStatus $service restartStatus=$? debugmsg2 "The $service status after the $attemptNo attempt of restart is $restartStatus" if [[ $restartStatus == $SERVICE_RUNNING ]]; then return fi debugmsg "Restarting $service failed/hangs, about to kill the restart process. The output of restart attempt is in /tmp/$service_restart.out" kill -9 $restartPID 2>&1 numberOfRestartsLeft=$((numberOfRestartsLeft-1)) done # Failed to restart service, check for failover configuration parameters msg "Failed to restart the $service process (tried $attemptNo times as configured)" if [[ $MONITOR_SSHD == $LEVEL3 ]]; then failovermsg $service invokeFailover else alertmsg $service fi } monitorSshd() { ifGPFSDownExit $GPFS_IP service=$(rshService) getStatus $service case $? in $SERVICE_RUNNING) return ;; $SERVICE_NOT_RUNNING) startSshd ;; esac } ###################################### # Monitoring gpfs daemon ###################################### monitorGPFS() { ifGPFSDownExit $GPFS_IP if [ $? != 0 ]; then failovermsg GPFS exit fi } ###################################### # Main ###################################### nfsMonitor() { GPFS_IP=$(myGPFSIP) tempvar=1 while [ $tempvar == 1 ] ; do sleep $MONITOR_INTERVAL # GPFS monitoring if [[ $MONITOR_GPFS -gt $LEVEL3 ]]; then debugmsg2 "==========GPFS monitoring===============" monitorGPFS debugmsg2 "done monitoring GPFS" fi # Network monitoring if [[ $MONITOR_NETWORK -gt $LEVEL0 ]]; then debugmsg2 "==========NW monitoring===============" monitorNetwork debugmsg2 "done monitoring the network" fi # Monitoring portmap # Note that all of the rpc services have to be registered with portmap # in order for new clients to access them. In our monitoring script, # we only test for portmap once, but if it fails afterward, the services # may not be available for new clients even though they are running. # This is the case until portmap is restarted again, # and the rpc processes re-register with it. if [[ $MONITOR_PORTMAP -gt $LEVEL0 ]]; then debugmsg2 "===========portmap monitoring==============" monitorPortmap debugmsg2 "done monitoring portmap" fi # Monitoring nfsd # The system monitors nfsd only if the nfsd is configured to be monitored in # the configuration file. # There are several monitoring methods/levels: # (test 1) Check that the nfsd process is running. # (test 2) Monitor the rpc-nfs activity. # (test 3) Send null rpc to the nfsd service. # # Order of tests: # Perfom (test 1). If the process is not running, goto Action. # If the process is running, perform (test 2), if there is nfs activity, # goto Done. # If no activity is detected, perform (test 3); if fails -- goto Action; # if pass -- goto Done. # Action: if nfsd is not running & configured to be restarted then the nfsd is # restarted. # If all restart attempts have failed, the node is declared "dead" for nfs # serving, and if nfsd is configured as "failover" then the node is failed over # to another node, and a user level alert is invoked. # Done: nfsd is up and running, continue. if [[ $MONITOR_NFSD -gt $LEVEL0 ]]; then debugmsg2 "==========nfsd monitoring===============" monitorNfsd debugmsg2 "done monitoring nfsd" fi # Monitoring mountd # The system monitors mountd only if mountd is configured to be monitored. # (test 1) Check that the mountd process is running. # Order of tests: # Perfom (test 1). If the process is not running, goto Action. # If the process is running, goto Done. # Action: if mountd is not running & mountd is configured to be restarted then the mountd is # restarted. # If all restart attempts have failed, the node is declared "dead" for nfs # serving. If mountd is configured as "failover" then the node is failed over # to another node, and alert is sent. if [[ $MONITOR_MOUNTD -gt $LEVEL0 ]]; then debugmsg2 "============mountd monitoring=============" monitorMountd debugmsg2 "done monitoring mountd" fi # Monitoring locking (lockd and statd) # The system monitors locking only if configured. # There are several monitoring methods/levels: # (test 1) Check that the lockd processes is running # (test 2) Check that the statd processes is running # If lockd is not running, the node is declared "dead" for nfs serving, # and if locking is configured as "failover" then the node is failed over to another # node, and alert is sent. # If statd is not running, a restart takes place if statd is configured to be restarted. # If the restart has failed, the node is declared "dead" for nfs serving, # and if configured, the node is failed over to another # node, and alert is sent. if [[ $MONITOR_STATD -gt $LEVEL0 ]]; then debugmsg2 "==========statd monitoring===============" monitorLocking debugmsg2 "done monitoring statd" fi # Monitoring sshd # The system monitors sshd only if sshd is configured to be monitored. # (test 1) Check that the sshd process is running. # Order of tests: # Perfom (test 1). If the process is not running, goto Action. # If the process is running, goto Done. # Action: if sshd is not running is configured to be restarted, then restart. # If all restart attempts have failed, the node is declared "dead" for ssh # serving. If sshd is configured as "failover" then the node is failed over # to another node, and alert is sent. if [[ $MONITOR_SSHD -gt $LEVEL0 ]]; then debugmsg2 "============sshd monitoring=============" monitorSshd debugmsg2 "done monitoring sshd" fi done } stopNfsMonitor() { nfsMonToolPid=`pidof -o $$ -o $PPID -o %PPID -x ./nfsmonitor` if [[ -z $nfsMonToolPid ]]; then debugmsg2 "Warning: Couldn't find the monitoring process to stop" return fi kill -9 $nfsMonToolPid msg "Monitoring has stopped." } startNfsMonitor() { nfsMonitor & msg "Monitoring has started." } restartNfsMonitor() { stopNfsMonitor startNfsMonitor } statusNfsMonitor() { nfsMonToolPid=$(pidof -o $$ -o $PPID -o %PPID -x ./nfsmonitor) if [[ -z $nfsMonToolPid ]]; then debugmsg "nfsmonitor is not running" return else debugmsg "nfsmonitor is running" fi } ################################# # Main program ################################# case "$1" in -s|start) startNfsMonitor ;; -e|stop) stopNfsMonitor ;; -r|restart) restartNfsMonitor ;; -q|status) statusNfsMonitor ;; *) echo $"Usage: $0 [-s|start|-e|end|-r|restart|-q|status]" exit 1 ;; esac