#!/bin/ksh ## ## SCRIPT NAME: rmtcmds4topps_cumcpu_sortOFallhosts ## ## Where: in $FEDIR/scripts where $FEDIR=/apps/nns_com/fea ## ############################################################################ ## PURPOSE: To show the TOP PROCESSES on SGI hosts --- ## 'A SORTED OVERVIEW OF ALL HOSTS ON THE NETWORK' --- ## 'TOP' being in terms of Cumulative CPU time. ## ## ** Helps find 'forgotten' processes, in a tight processing ** ## ** loop, gobbling lots of CPU cycles, on network clients. ** ## ## Processes are selected to be shown based on a CPUMIN ## threshold variable, which is defaulted to a value, like 5 min. ## ## This script puts report output in a file and displays it. ## ## The script puts the hostname beside each more-than-$CPUMIN ## 'ps' line --- and it sorts the resulting list, for all hosts, ## by Cumulative-CPU-time --- so the TOP PROCESSES ON THE NETWORK ## POP UP TO THE TOP OF THE LIST, with hostname attached. ## ############################################################################ ## CALLED BY: nethosts_tools -> nethosts_tools.chestdef ## in $FEDIR/scripts ## ############################################################################ ## CALL FORMAT: ## /apps/nns_com/fea/scripts/rmtcmds4topps_cumcpu_sortOFallhosts ## ## or ## /apps/nns_com/fea/scripts/rmtcmds4topps_cumcpu_sortOFallhosts ## ## where ## represents a number to be loaded into the ## CPUMIN threshold variable. ## ## See nethosts_tools.chestdef in $FEDIR/scripts ## for a call initiated within a 'winterm'. ############################################################################ ## INPUT VARS: $1 could contain a value for the CPUMIN threshold variable. ## ############################################################################ ## MAINTENANCE HISTORY: ## Written by: B.Montandon O06 14Apr2000 Based on ## 'runcmd_on_pingable_hosts_bygui' ## and 'runcmd_topps_cumcpu_1host' ## in $FEDIR/scripts ## Updated by: B.Montandon O06 9May2000 Add 2 chars to PID & PPID fields. ## ############################################################################ if test "$FEDIR" = "" then FEDIR="/apps/nns_com/fea" fi ############################################################################ ## Set the CPUMIN threshold variable. ############################################################################ # CPUMIN=2 # CPUMIN=120 CPUMIN=5 if test ! "$1" = "" then CPUMIN=$1 fi ############################################################################## ## GENERATE THE NAME OF THE OUTPUT-REPORT FILE. ############################################################################## . $FEDIR/scripts/set_localoutlist OUTLIST=${OUTLIST}_topps_cumcpu OUTLIST_PRESORT=${OUTLIST}_presort rm -f $OUTLIST rm -f $OUTLIST_PRESORT ############################################################################## ## GENERATE THE HOSTLIST from NIS (ypcat hosts). ############################################################################## ## . /apps/nns_com/fea/scripts/alarm_sethostlist ## . /apps/ideas/cron/set_hostlist HOSTLIST=`ypcat hosts | grep -v "^#" | grep iaw | awk '{print $3}' | sort` # HOSTLIST=$HOSTLIST" engvis00 engprd00 sgia sgib" ## FOR TESTING: # HOSTLIST="iaw005 iaw007 iaw021 iaw030 iaw141" ############################################################################# ## PREPARE A HEADER FOR THE REPORT. ############################################################################# echo "\ ************************* `date '+%Y %b %d %T%p'` *************************** NNS SGI NETWORK HOST PROCESSES THAT HAVE ACCUMULATED ** MORE THAN $CPUMIN MINUTES OF CPU-TIME ** since their start. SORTED BY CUMULATIVE-CPU-TIME --- TOP PROCESSES AT THE TOP. _____________________________________________________________________________ Following is data for processes --- extracted from 'ps -ef' records. (The X-server process 'Xsgi' on each host is excluded.) _____________________________________________________________________________ CUM-CPU Parent START HOST TIME User Process Process -TIME NAME min:sec ID ID ID /DATE COMMAND ------ -------------- ----- ------- ------- -------- ---------------------- " > $OUTLIST ############################################################################## ## IF WE WERE GOING TO EXECUTE A COMMAND/SCRIPT ON EACH HOST, ## WE COULD ASSURE THE USER HAD A .rhosts FILE. ############################################################################## # echo "+ $USER" > $HOME/.rhosts . $FEDIR/scripts/mak_rhosts ############################################################################## ## LOOP THRU HOSTS -- TO EXECUTE THE 'ps-nawk-sort' COMMAND PIPE, with 'rsh'. ############################################################################## ## Use 'ping' to check the accessibility of the host before issuing ## 'rsh'. 'ping' returns faster on an inaccessible host than 'rsh'. ############################################################################## ## CATCH THE OUTPUT FROM A SINGLE PING WITH A SMALL 4-BYTE PACKET. ############################################################################## for HOST in $HOSTLIST do echo "\nPinging $HOST to check availability." ## FOR TESTING: # set -x ########################################################################### ## PING THE HOST BEFORE TRYING 'rsh' --- and CATCH THE OUTPUT ## FROM A SINGLE PING WITH A SMALL 4-BYTE PACKET --- to use to ## determine whether to do 'rsh' on the host. ########################################################################### PINGOUT=`/usr/etc/ping -s 4 -c 1 $HOST` ## FOR TESTING: # echo "$PINGOUT # # " ## FOR TESTING: # set - PINGCHECK=`echo $PINGOUT | grep '100.0% packet loss'` if test "$PINGCHECK" = "" then ####################################################### ## Get IP address of host. ## (better way? with 'netstat -in'? 'ifconfig'?) ####################################################### ## NOT USED AT THIS TIME. ####################################################### ## ## HOSTNAME=$HOST ## HOSTNAME_ARP=`/usr/etc/arp $HOSTNAME` ## HOSTNAME2=`echo "$HOSTNAME_ARP" |sed "s|-- no entry||"` ## ## BOOTTIME=`who -b` ## # DATETIME=`date` ## DATETIME=`date '+%Y %b %d %T%p'` ####################################################### ################################################################################# ## Using 'rsh $HOST', ## get the 'ps -ef' output and reformat it so that CPU-time is in 1st col; ## and sort by CPU-time. ## Put this in an environment variable, WINMSG0 --- . ## Could use a work file if necessary. ################################################################################# ## Handle either format of 'ps -ef' output (date or time started): ## ## $1 $2 $3 $4 $5 $6 $7 $8 $9 ## root 408 393 0 Mar 13 ? 1:17 /usr/etc/lpd ## ## $1 $2 $3 $4 $5 $6 $7 $8 ## bmo01 84332 86747 0 08:33:49 ? 7:00 netscape -mail ################################################################################# ## FOR TESTING: # set -x # ps -ef | nawk 'BEGIN { # WINMSG0=`rsh $HOST ps -ef | nawk 'BEGIN { WINMSG0=`rsh $HOST ps -ef | awk 'BEGIN { } NR == 1 {next} $5 ~ /..:..:../ { # print $7 " " $1 " " $2 " " $3 " " $5 " " $8 printf ("%13s %8s %7s %7s %8s %s\n", $7, $1, $2, $3, $5, $8) # print "" next } $5 !~ /..:..:../ { # print $8 " " $1 " " $2 " " $3 " " $5 " " $6 " "$9 printf ("%13s %8s %7s %7s %3s %-4s %s\n", $8, $1, $2, $3, $5, $6, $9) # print "" next } ' | sort -t: +0nr -1 +1r -2` ################################################################# DELETE? # RSH_RETCODE=$? # # if test $RSH_RETCODE = 1 # then # # echo "COMMAND WAS NOT RUN ON $HOST. # IT APPEARS THAT 'rsh' FAILED FOR USERID $USER.\n" >> $OUTLIST # # else ############################################################################ ############################################################################ ## If CPU-mins > $CPUMIN and process is not Xsgi, output those process-info ## 'exception' lines. Add hostname at the front of each line. ############################################################################ # echo "$WINMSG0" | awk -v CPUMIN=$CPUMIN -F: 'BEGIN {print ""} # (( $1 > CPUMIN ) && ( $0 !~ /Xsgi/ )) {print $0} ' >> $OUTLIST_PRESORT echo "$WINMSG0" | awk -v CPUMIN=$CPUMIN -F: \ '(( $1 > CPUMIN ) && ( $0 !~ /Xsgi/ )) {print $0} ' | \ sed "s|^|$HOST |" >> $OUTLIST_PRESORT ##################################################### ### If CPU-min threshold were not available in a var, ### CPU-min could be HARDCODED. Example: ##################################################### ## echo "$WINMSG0" | awk -F: 'BEGIN {print ""} ## (( $1 > 2 ) && ( $0 !~ /Xsgi/ )) {print $0} ' ##################################################### ################################################################### DELETE? # fi # ## END OF if test $RSH_RETCODE = 1 ############################################################################ else ## ELSE OF if test "$PINGCHECK" = "" echo " NO OUTPUT from $HOST. NOT PING-ABLE." ## >> $OUTLIST fi ## END OF if test "$PINGCHECK" = "" done ## END OF for HOST in $HOSTLIST ######################################################################## ## Sort $OUTLIST_PRESORT by Cum-CPU column and add to $OUTLIST. ######################################################################## ## FOR TESTING: ## (AT THIS POINT, THE RECORDS ARE STILL GROUPED BY EACH HOST, ## AND SORTED BY CUM-CPU-TIME, WITHIN EACH GROUP OF HOST RECORDS.) # xpg $OUTLIST_PRESORT # sort +1nr -2 $OUTLIST_PRESORT >> $OUTLIST sort +1r -2 $OUTLIST_PRESORT >> $OUTLIST ## FOR TESTING: # xpg $OUTLIST ######################################################################## ## Add TRAILER to report. ######################################################################## echo " ************************* `date '+%Y %b %d %T%p'` *************************** The list above shows TOP PROCESSES on currently ping-able SGI 'network' hosts --- 'TOP' being in terms of *Accumulated* CPU time (not current-%-of-CPU-cycles). The time is accumulated since the respective startup time of each process. Processes were selected to be shown based on a CPUMIN threshold variable, which, in this run, was set to the value $CPUMIN minutes. *********************************************************************** This list can be used to find 'forgotten' processes that are in a tight processing loop, gobbling lots of CPU cycles, on network hosts. *********************************************************************** This report was generated from SGI 'network' hosts --- by userid $USER --- with the candidate hostnames provided via NIS = Network Information Service, i.e. by the 'ypcat hosts' command. Hence, the hosts are limited to 'network' hosts on which $USER can login. -------- The list was assembled by the script $0 The script uses a sequence of commands ('ps','awk','sed') on each host, followed by a 'sort', by Cumulative-CPU-time, applied to the collected output from all hosts --- so the TOP PROCESSES ON THE NETWORK POP UP TO THE TOP OF THE LIST, with hostname attached. -------- IMPLEMENTATION METHODS: The ' `basename $0` ' script is intended to be run periodically (preferably almost daily) by SGI network Administrators --- Application (CAD/FEA) or System (root) Administrators. It could be run at a 'quiet' time --- like noon-hour or evening/night. The script could be implemented as - a command alias, via an Administrator's .profile file; - a desktop icon, via the 'Find, File QuickFind' tool drawers; - a drawer in the SGI toolchest, via an Administrator's .auxchestrc file. Or the script could be accessed via a drawer in a command toolchest-utility, like 'nethosts_tools'. This report can be generated via nnsFEAmenu option 'u n ?' (Utilities, Net-vu, ?). ........................................................................... " >> $OUTLIST ##################################################################### ## SHOW THE REPORT. ##################################################################### ## FOR TESTING: # echo " # DISPLAY: $DISPLAY" # # set -x ##################################################################### ## $FEDIR/scripts/shofil Does not work in an 'xwsh' from toolchest. ## Apparently, ## because of '&' batch invocation of shofil.tk within this script. ##################################################################### # $FEDIR/scripts/shofil $OUTLIST ##################################################################### SHOFILENAME=$OUTLIST export SHOFILENAME XLPHP_FORMAT="AV" export XLPHP_FORMAT ## $FEDIR/tkGUIs/shofil.tk & ## DOES NOT SHOW UP in an 'xwsh'. $FEDIR/tkGUIs/shofil.tk