#!/bin/ksh ## ## SCRIPT NAME: rmtcmds4netstat_colls_sortOFallhosts ## ## Where: in $FEDIR/scripts where $FEDIR=/apps/nns_com/fea ## ############################################################################ ## PURPOSE: To show the TOP Ethernet COLLISIONS on SGI hosts --- ## 'A SORTED OVERVIEW OF ALL HOSTS ON THE NETWORK' --- ## 'TOP' being in terms of % of Out-Packets. ## ## ** Helps find 'forgotten' processes, in a tight processing ** ## ** generating lots of file I/O, from network clients. ** ## ************************************************************* ## (These processes can have a worse impact on the network than ## 'forgotten' processes, in a tight processing loop, ## gobbling lots of CPU cycles, on network clients --- for those ## CPU-gobblers that are NOT generating network I/O.) ## ## This script puts report output in a file and displays it. ## ## The script puts the hostname beside each netstat %-collisions ## line --- and it sorts the resulting list, for all hosts, ## by %-collisions --- so the TOP PROCESSES ON THE NETWORK ## POP UP TO THE TOP OF THE LIST, with hostname attached. ## ############################################################################ ## CALLED BY: nethosts_tools -> nethosts_tools.chestdef ## in $FEDIR/scripts ## ############################################################################ ## CALL FORMAT: ## /apps/nns_com/fea/scripts/rmtcmds4netstat_colls_sortOFallhosts ## ## ## See nethosts_tools.chestdef in $FEDIR/scripts ## for a call initiated within a 'winterm'. ## ############################################################################ ## MAINTENANCE HISTORY: ## Written by: B.Montandon O06 17Apr2000 Based on ## 'rmtcmds4netstat_colls_sortOFallhosts' ## in $FEDIR/scripts ## Updated by: B.Montandon O06 17Apr2000 ## ############################################################################ if test "$FEDIR" = "" then FEDIR="/apps/nns_com/fea" fi ############################################################################## ## GENERATE THE NAME OF THE OUTPUT-REPORT FILE. ############################################################################## . $FEDIR/scripts/set_localoutlist OUTLIST=${OUTLIST}_netstat_colls OUTLIST_PRESORT=${OUTLIST}_presort rm -f $OUTLIST rm -f $OUTLIST_PRESORT ############################################################################## ## GENERATE THE HOSTLIST from NIS (ypcat hosts). ############################################################################## ## . /apps/nns_com/fea/scripts/alarm_sethostlist ## . /apps/ideas/cron/set_hostlist HOSTLIST=`ypcat hosts | grep -v "^#" | grep iaw | awk '{print $3}' | sort` # HOSTLIST=$HOSTLIST" engvis00 engprd00 sgia sgib" ## FOR TESTING: # HOSTLIST="iaw005 iaw007 iaw021 iaw030 iaw141" ############################################################################# ## PREPARE A HEADER FOR THE REPORT. ############################################################################# echo "\ ************************* `date '+%Y %b %d %T%p'` *********************************** NNS SGI NETWORK HOSTS AND THEIR ** ETHERNET COLLISION PERCENTAGES ** since their last re-boot. SORTED BY %-COLLISIONS --- HIGHEST PERCENTAGES AT THE TOP. (column 11) (The percent was calculated by dividing Collisions by the total Out-Packets. See comments at bottom of this report.) ________________________________________________________________________________________________________________________ 1 2 3 4 5 6 7 8 9 10 11 Net MTU Inter Max (Ave. since boot) HOST face Xfer %Collisions NAME Name Unit Network Address In-Packets In-Errs Out-Packets Out-Errs Collisions of Out-pkts ------ ---- ----- ----------- --------------- ------------ ---------- ------------ ---------- ------------ ----------- " > $OUTLIST ############################################################################## ## IF WE WERE GOING TO EXECUTE A COMMAND/SCRIPT ON EACH HOST, ## WE COULD ASSURE THE USER HAD A .rhosts FILE. ############################################################################## # echo "+ $USER" > $HOME/.rhosts . $FEDIR/scripts/mak_rhosts ############################################################################## ## LOOP THRU HOSTS -- TO EXECUTE THE 'ps-nawk-sort' COMMAND PIPE, with 'rsh'. ############################################################################## ## Use 'ping' to check the accessibility of the host before issuing ## 'rsh'. 'ping' returns faster on an inaccessible host than 'rsh'. ############################################################################## ## CATCH THE OUTPUT FROM A SINGLE PING WITH A SMALL 4-BYTE PACKET. ############################################################################## for HOST in $HOSTLIST do echo "\nPinging $HOST to check availability." ## FOR TESTING: # set -x ########################################################################### ## PING THE HOST BEFORE TRYING 'rsh' --- and CATCH THE OUTPUT ## FROM A SINGLE PING WITH A SMALL 4-BYTE PACKET --- to use to ## determine whether to do 'rsh' on the host. ########################################################################### PINGOUT=`/usr/etc/ping -s 4 -c 1 $HOST` ## FOR TESTING: # echo "$PINGOUT # # " ## FOR TESTING: # set - PINGCHECK=`echo $PINGOUT | grep '100.0% packet loss'` if test "$PINGCHECK" = "" then ####################################################### ## Get IP address of host. ## (better way? with 'netstat -in'? 'ifconfig'?) ####################################################### ## NOT USED AT THIS TIME. ####################################################### ## ## HOSTNAME=$HOST ## HOSTNAME_ARP=`/usr/etc/arp $HOSTNAME` ## HOSTNAME2=`echo "$HOSTNAME_ARP" |sed "s|-- no entry||"` ## ## BOOTTIME=`who -b` ## # DATETIME=`date` ## DATETIME=`date '+%Y %b %d %T%p'` ####################################################### ############################################################################## ## Using 'rsh $HOST', ## get the '/usr/etc/netstat -i' output and reformat it so that ## %-collisions is in last col. ############################################################################## ## Output of '/usr/etc/netstat -i': ## ## Cols: ## ## Name Mtu Network Address Ipkts Ierrs Opkts Oerrs Coll ## ec0 1500 150.10.2 iaw030.cae.nns~ 69668 0 26274 0 229 ## lo0 8304 loopback localhost 5097 0 5097 0 0 ## ## $1 $2 $3 $4 $5 $6 $7 $8 $9 ## ## Importance to this list: ## ## Name Mtu Network Address Ipkts Ierrs Opkts Oerrs Coll ## * * ** * ** ** ## ## Ref. O'Reilly Assoc. 'System Performance Tuning', p.181-184 & 187-194 ## See awk-based script to diff the netstat output, p.184. ############################################################################## ############################################################################## ## Calculate from the 'netstat -i' output so that ## %Collisions-of-outpkts is added as the last column. ## Put this in an environment variable, WINMSG0. ## Could use a work file if necessary. ############################################################################# ## FOR TESTING: # set -x # WINMSG0=`rsh $HOST /usr/etc/netstat -i | nawk 'BEGIN { WINMSG0=`rsh $HOST /usr/etc/netstat -i | awk 'BEGIN { } NR == 1 {next} {printf ("%-4s %-5s %-11s %-15s %12s %10s %12s %10s %12s %7.1f \n", \ $1, $2, $3, $4, $5, $6, $7, $8, $9, 100*$9/$7 )} ' ` ## ' | sort -t: +9nr -10` ################################################################# DELETE? # RSH_RETCODE=$? # # if test $RSH_RETCODE = 1 # then # # echo "COMMAND WAS NOT RUN ON $HOST. # IT APPEARS THAT 'rsh' FAILED FOR USERID $USER.\n" >> $OUTLIST # # else ######################################################################### ######################################################################### ## [If %Collisions-of-outpkts > 5,] ## Output the augmented 'netstat -i' line(s). ## Add boot-time at the end of each line. ## Add hostname at the front of each line. ######################################################################### ## echo "$WINMSG0" | awk 'BEGIN {print ""} ## ( $10 > 5 ) {print $0} ' # echo "$WINMSG0" | sed "s|^|$HOST |" >> $OUTLIST_PRESORT BOOTTIME=`rsh $HOST who -b` if test "$WINMSG0" = "" then echo "$HOST $USER-login failed" >> $OUTLIST_PRESORT else echo "$WINMSG0" | sed "s|$| $BOOTTIME|" | sed "s|^|$HOST |" >> $OUTLIST_PRESORT fi ################################################################ DELETE? # fi # ## END OF if test $RSH_RETCODE = 1 ######################################################################### else ## ELSE OF if test "$PINGCHECK" = "" echo " NO OUTPUT from $HOST. NOT PING-ABLE." echo "$HOST not ping-able" >> $OUTLIST_PRESORT fi ## END OF if test "$PINGCHECK" = "" done ## END OF for HOST in $HOSTLIST ######################################################################## ## Sort $OUTLIST_PRESORT by %-Collisions column and add to $OUTLIST. ######################################################################## ## FOR TESTING: ## (AT THIS POINT, THE RECORDS ARE STILL GROUPED BY EACH HOST, ## AND SORTED BY CUM-CPU-TIME, WITHIN EACH GROUP OF HOST RECORDS.) # xpg $OUTLIST_PRESORT # sort +10nr -11 $OUTLIST_PRESORT >> $OUTLIST # sort +10r -11 +7r -8 $OUTLIST_PRESORT >> $OUTLIST sort +10r -11 +1 -2 +0 -1 $OUTLIST_PRESORT >> $OUTLIST ## FOR TESTING: # xpg $OUTLIST ######################################################################## ## Add TRAILER to report. ######################################################################## echo " ************************* `date '+%Y %b %d %T%p'` *************************** The list above shows currently ping-able SGI 'network' hosts --- with hosts with highest Ethernet Out-packet Collision-%s at the TOP. The Collisions & Out-packet data were accumulated since the last re-boot of each host. ***************************************************************************** This list can be used to find 'forgotten' processes that are/were in a tight processing loop on a host, generating lots of packets on the network. ***************************************************************************** The collisions total on each host is accumlated since last bootup --- and the activity that generated bursts of collisions may have been days ago. Hence the situation may be 'quiet' now on hosts that experienced many, intense collisions in the past. However, these totals can still prove to be quite valuable in revealing hosts (& applications & users) that occasionally generate intense processing situations. -------- NOTE: Most experienced network administrators consider a collision % greater than 10 to indicate an 'unhealthy' situation. Some even consider 5% to be in need of attention --- especially if it is a long-term average rather than a 'burst-rate' percentage. -------- This report was generated from SGI 'network' hosts --- by userid $USER --- with the candidate hostnames provided via NIS = Network Information Service, i.e. by the 'ypcat hosts' command. Hence, the hosts are limited to 'network' hosts on which $USER can login. -------- The list was assembled by the script $0 The script uses a sequence of commands ('/usr/etc/netstat -i','awk','sed') on each host, followed by a 'sort', by %-Collisions, applied to the collected output from all hosts --- so the HOSTS WITH THE HIGHEST %-COLLISIONS ON THE NETWORK POP UP TO THE TOP OF THE LIST. -------- IMPLEMENTATION METHODS: The ' `basename $0` ' script is intended to be run periodically (preferably almost daily) by SGI network Administrators --- Application (CAD/FEA) or System (root) Administrators. It could be run at a 'quiet' time --- like noon-hour or evening/night. The script could be implemented as - a command alias, via an Administrator's .profile file; - a desktop icon, via the 'Find, File QuickFind' tool drawers; - a drawer in the SGI toolchest, via an Administrator's .auxchestrc file. Or the script could be accessed via a drawer in a command toolchest-utility, like 'nethosts_tools'. This report can be generated via nnsFEAmenu option 'u n ?' (Utilities, Net-vu, ?). ........................................................................... " >> $OUTLIST ##################################################################### ## SHOW THE REPORT. ##################################################################### ## FOR TESTING: # echo " # DISPLAY: $DISPLAY" # # set -x ##################################################################### ## $FEDIR/scripts/shofil Does not work in an 'xwsh' from toolchest. ## Apparently, ## because of '&' batch invocation of shofil.tk within this script. ##################################################################### # $FEDIR/scripts/shofil $OUTLIST ##################################################################### SHOFILENAME=$OUTLIST export SHOFILENAME XLPHP_FORMAT="AV" export XLPHP_FORMAT ## $FEDIR/tkGUIs/shofil.tk & ## DOES NOT SHOW UP in an 'xwsh'. $FEDIR/tkGUIs/shofil.tk