#!/bin/ksh ## ## SCRIPT NAME: diruse_files_all_levs_sizesort_bygui ## ## Where: in $FEDIR/scripts where $FEDIR=/apps/nns_com/fea ## ############################################################################## ## PURPOSE: Lists the files 'under' a user-specified directory (and host) ## --- at all sub-directory levels --- SORTED BY SIZE. ## ## A Tcl-Tk script presents a GUI that prompts for hostid ## and a directory name. ## ############################################################################## ## CALLED BY: 'spacetools' script in $FEDIR/scripts, ## actually via 'spacetools.chestdef' in $FEDIR/scripts. ## ## The 'spacetools' toolchest is available via ## the site toolchest-drawer hierarchy-path: ## ## SGI Toolchest -> ## HandyTools -> ## AnyHost - SpaceTools (Files) ## ############################################################################## ## METHOD OF OPERATION: ## ## This script runs a 'pipe' of find-ls-sort-awk commands on the ## specified host (local or remote). The pipe is of the form ## ## find $DIRNAME -local -type f -exec ls -l {} \; | sort -k5nr | \ ## ## find $DIRNAME -local -type f -exec ls -l {} \; | sort +4 -5nr | \ ## awk '{printf ("%13.6f ... %5s %s\n", $5/1000000, $1, ..., $9 )}' \ ## ## where {} represents a filename. ## ## The Unix 'find' command is used to recursively travel through ## ALL the sub-directories of the specified directory ## $DIRNAME ## and execute the 'ls -l {}' command to provide a list ## - without breaks with sub-directory 'section' names ## (which is what would happen if 'ls -lR' were used) ## and, instead of 'relative' filenames, provide a list ## - with fully-qualified filenames. ## ## I.e. a list is produced that is suitable for sorting (re-ordering ## the records) and still have complete filenames for unambiguous ## identification. ##----- ## In the case that a remote host was specified for the query, ## the find|sort|awk pipe of commands are quoted together in the ## 'rsh $HOST_ID "commands"' ## statement, so that find-AND-sort-AND-awk execute on the remote ## hosts (usually a powerful server machine like engprd00 or engvis00). ############################################################################## ## NOTE: You can see only the non-commented, executable lines of this ## script by using ## egrep -v '^ *##|^ *# ' ## or ## grep -v '^ *##' | grep -v '^ *# ' ############################################################################## ## MAINTENANCE HISTORY: ## Written by: B.Montandon O06 17Mar2000 Based on ## 'find_big_or_old_files4dir_bygui' ## and 'diruse_files_all_levs_alphasort_bygui' ## in $FEDIR/scripts ## Updated by: B.Montandon O06 17Apr2000 Comment out directory existence ## check; needs to handle remote dirs. ## Updated by: B.Montandon O06 2May2000 Chg 'sort' to '-k' format. ## Updated by: B. Montandon O06 6Sep2000 Add 'Unknown host' check. ## Updated by: B. Montandon O06 7Sep2000 Add local/remote DIRCHECK routine. ## Updated by: B. Montandon O06 22Sep2000 Eliminate the call to separate script ## 'diruse_files_all_levs_sizesort' by ## putting its find-sort-awk in this ## script, for local&remote-host cases. ## ## Updated by: Blaise Montandon 28apr2003 In the 'awk' commands, ## replaced the '$9' field by a ## 'COLfilnam = index($0,$9)' technique ## --- to return complete filenames when ## there are embedded blanks in the name. ## (This is not 'ideal', as described ## below at the awk calls.) ## ## Updated by: Blaise Montandon 29apr2003 Replace Tk-GUI script ## 'enter_hostid_and_dir.tk' by ## 'enter_hostid_and_dir_toghelp-scroll.tk'. ## ## Updated by: Blaise Montandon 26aug2003 Load HOST_ID & DIRNAME vars from ## script args $1 & $2. If not loaded ## (i.e. = "") then use the previous ## defaults. ## ## Updated by: Blaise Montandon 04sep2003 Improve format & content of WIN_INFO ## and the report trailer. ## ## Updated by: Blaise Montandon 08apr2004 To avoid a bug in new SGI 'sort' ## in IRIX 6.5.22, changed several cases ## of 'sort -k5nr' to ## 'sort +4 -5nr'. ############################################################################## ############################################################################# ## SET A UTILITY SCRIPTS PATHNAME in case this toolchest is not started from ## the HandyTools toolchest or the nnsFEAmenu toolchest --- and a ## $FEDIR utility script (or help file) is needed. ############################################################################# if test "$FEDIR" = "" then FEDIR=/apps/nns_com/fea fi ############################################################################## ## SET query hostname and directory from script arguments 1 & 2 ## --- if not already set. ############################################################################## if test "$HOST_ID" = "" then HOST_ID="$1" fi if test "$DIRNAME" = "" then DIRNAME="$2" fi ############################################################################## ## SET LOCAL HOST NAME FOR MESSAGES & REPORTS & LOCAL-HOST-CHECKS. ############################################################################## THISHOST=`hostname` SERVER="engprd00" FEA_SERVER="engfea00" VIZ_SERVER="engvis00" ######################################################################## ## SET GUI VARIABLES, including default HOSTNAME and DIRECTORY NAME. ######################################################################## WINTITLE="FileSizes_for_given_Host:Dirname (ALL dir levels) -- SIZE SORTED" export WINTITLE WIN_INFO="\ Enter 1) the name of an SGI (Unix) host on the site network and 2) a directory name. This utility prints all the 'ordinary file' names (NOT directory names), the FULL-FILENAMES, at *ALL LEVELS* under the specified directory, in *SIZE* sort order. --- For small to moderate sized ('shallow') directories, this report is typically generated in 3 to 30 seconds. If there are MANY sub-directories and/or files under the specified directory, i.e. if the specified directory is 'deep', IT MAY TAKE MORE THAN A MINUTE OR TWO. USE THIS UTILITY SPARINGLY ON HUGE DIRECTORIES. ------------------ 'LOCAL' PROCESSING (for speed): This utility uses a 'find' command in the form find -local -type f -exec ls -l {} \; | sort ... | awk ... The '-type f' parameter selects 'ordinary files', rather than directories, for printing. The '-local' parameter is used to list only the files 'local to' the specified host --- NOT files in a directory mounted from a remote machine. This helps eliminate cases like following through all the subdirectories of a mounted directory like '$SERVER:/usr/people'. So FOR PROPER RESULTS (AND FOR FAST PERFORMANCE), it is necessary to SPECIFY THE HOST AT WHICH THE DIRECTORY'S DISK IS LOCALLY ATTACHED. Examples: $SERVER and $HOME $SERVER and /data//cae/$USER $THISHOST and /local/scratch/$USER $THISHOST and /local/scratch $FEA_SERVER and /local/scratch/$USER $VIZ_SERVER and /division/... Local/remote directories references: The 'ManyDirs' button --- or the 'df -m' command on the host --- or the 'Show FILE-SYSTEM-SIZES @AnyHost (%-USED SORT)' drawer in the HandyTools-SpaceTools toolchest. ---------- THE OUTPUT (and its use): This utility shows the DISK USAGE of EACH OF THE FILES under the specified directory --- in ALL sub-directories of that directory, at ALL LEVELS. The SIZE-SORTED file information is especially helpful, when a file system is near-full, to RAPIDLY locate files which will yield the most pay-back if they could be REMOVED or COMPRESSED or ARCHIVED 'off-line'. ------- THE GUI: Note that you can expand this window to enter-and-see the full directory name. You can do queries on multiple hosts/directories and bring the report windows side-by-side for comparison. --------------------------------------------------------------------------- This GUI is presented by the 'wrapper' script $0 This 'wrapper' script also performs the report generation --- with a find-ls-sort-awk 'pipe' of commands --- and then shows the report." export WIN_INFO ## IN FACT, THIS UTILITY RESTRICTS ITS ATTENTION TO FILES 'LOCAL TO' THE ## SPECIFIED HOST --- for performance and to make sure it does not follow ## (unintentionally) links to huge directories that are mounted from another ## (remote) host --- like $SERVER:/usr/people. ## WIN_GEOM="+40+40" ## export WIN_GEOM ## WIN_COLOR="#AAAAAA" ## export WIN_COLOR ################################ ## SET DEFAULT HOST & DIR NAMES. ################################ ## HOSTS_LIST="`hostname` engfea00 engvis00 engprd00 ews###" ## export HOSTS_LIST if test "$HOST_ID" = "" then HOST_ID="$SERVER" # HOST_ID="`hostname`" fi export HOST_ID if test "$DIRNAME" = "" then DIRNAME="$HOME" # DIRNAME="/local/scratch" # DIRNAME="/local/scratch/$USER" fi export DIRNAME ######################################################################## ## START OF PROMPTING LOOP, FOR DIRECTORY NAME. (Not implemented.) ######################################################################## ## # while true ## while : ## do ################################ ## PROMPT FOR HOST & DIR NAMES. ################################ # TEMP=`$FEDIR/tkGUIs/enter_hostid_and_dir.tk` TEMP=`$FEDIR/tkGUIs/enter_hostid_and_dir_toghelp-scroll.tk` eval "$TEMP" ## FOR TESTING: # echo $TEMP if test "$HOST_ID" = "" then exit fi if test "$DIRNAME" = "" then exit fi ##################################################################### ## CHECK THAT THE HOST_ID EXISTS, if not THISHOST. ##################################################################### ## FOR TESTING: # set -x if test ! "$HOST_ID" = "$THISHOST" then HOSTCHECK=`rsh $HOST_ID cd 2>&1 | grep 'Unknown host'` if test ! "$HOSTCHECK" = "" then CONFIRM_TEXT="\ Specified Host: $HOST_ID is Unknown. " export CONFIRM_TEXT CONFIRM_GEOM="+080+080" CONFIRM_MINSIZE="350 150" CONFIRM_TITLE="$WINTITLE" export CONFIRM_TITLE CONFIRM_MINSIZE CONFIRM_GEOM CONFIRM_COLOR="#3DADFF" export CONFIRM_COLOR CONFIRM_FONT="-adobe-helvetica-bold-r-normal--*-100-*-*-p-*-*-*" export CONFIRM_FONT JUNK=`$FEDIR/tkGUIs/confirm.tk DISMISS` & exit ## continue fi ## END OF if test ! "$HOSTCHECK" = "" fi ## END OF if test ! "$HOST_ID" = "$THISHOST" ################################################################### ## CHECK THAT THE DIRNAME IS ACCESSIBLE/EXISTS. ## -- on $HOST_ID (do local or remote check). ##################################################################### ## For a slightly different technique of handling 'stdout & stderr' ## (with 2>&1), see $FEDIR/scripts/find_big_or_old_files4dir_bygui ##################################################################### ## FOR TESTING: # set -x if test "$HOST_ID" = "$THISHOST" then DIRCHECK=`ls -d $DIRNAME 2> /dev/null` else . $FEDIR/scripts/mak_rhosts DIRCHECK=`rsh $HOST_ID ls -d $DIRNAME 2> /dev/null` fi if test "$DIRCHECK" = "" then CONFIRM_TEXT="\ Specified Directory: $DIRNAME Not found or does not exist, according to $HOST_ID. Exiting. " export CONFIRM_TEXT CONFIRM_GEOM="+080+080" CONFIRM_MINSIZE="350 150" CONFIRM_TITLE="$WINTITLE" export CONFIRM_TITLE CONFIRM_MINSIZE CONFIRM_GEOM CONFIRM_COLOR="#3DADFF" export CONFIRM_COLOR CONFIRM_FONT="-adobe-helvetica-bold-r-normal--*-100-*-*-p-*-*-*" export CONFIRM_FONT JUNK=`$FEDIR/tkGUIs/confirm.tk DISMISS` & exit ## continue fi ## END OF if test "$DIRCHECK" = "" ########################################################### ## EXIT IF USER SPECIFIES '/' for the directory. ########################################################## ## if test "$DIRNAME" = "/" ## then ## echo " ## Try a lower level directory than the root directory (/). ## ## *** Press Enter to continue." ## read NNS_JUNK ## ## continue ## # exit ## fi ########################################################## ######################################################################## ## If the directory $DIRNAME is NSF-mounted on host $HOST_ID, ## show a message on possible performance improvement in doing the ## query on a remote host-server. ######################################################################## THISHOST=`hostname` if test "$HOST_ID" = "$THISHOST" then NFSCHECK=`stat $DIRNAME | grep 'st_fstype: nfs'` else . $FEDIR/scripts/mak_rhosts NFSCHECK=`rsh $HOST_ID stat $DIRNAME | grep 'st_fstype: nfs'` fi if test "$NFSCHECK" != "" then CONFIRM_TEXT="\ The directory name that you specified $DIRNAME is NFS-mounted to the host you specified, $HOST_ID, i.e. the directory is remote from the specified host, NOT 'local to' it. USE A HOST FOR WHICH THE DIRECTORY IS IN A 'LOCAL' FILE SYSTEM. I.E. USE A HOST THAT IS SERVING-OUT THE DIRECTORY. " export CONFIRM_TEXT CONFIRM_GEOM="+080+080" CONFIRM_MINSIZE="550 250" CONFIRM_TITLE="$WINTITLE" export CONFIRM_TITLE CONFIRM_MINSIZE CONFIRM_GEOM CONFIRM_COLOR="#3DADFF" export CONFIRM_COLOR ## GOOD FIXED-WIDTH CANDIDATES (for env var CONFIRM_FONT): # set TEXT_FONT "-adobe-courier-bold-r-normal--*-120-*-*-m-92-*-*" # set TEXT_FONT "-adobe-courier-bold-r-normal--*-100-*-*-m-90-*-*" # set TEXT_FONT "-adobe-courier-medium-r-normal--*-100-*-*-m-90-*-*" # set TEXT_FONT "-misc-fixed-bold-r-normal--*-120-*-*-c-70-*-*" # set TEXT_FONT "-misc-fixed-bold-r-normal--*-120-*-*-c-80-*-*" # set TEXT_FONT "-misc-fixed-bold-r-normal--*-120-*-*-c-90-*-*" # set TEXT_FONT "-b&h-lucidatypewriter-bold-r-normal-sans-*-140-*-*-m-80-*-*" # set TEXT_FONT "-schumacher-clean-bold-r-normal--*-130-*-*-c-80-*-*" ## GOOD VARIABLE-WIDTH CANDIDATES (for env var CONFIRM_FONT): # set TEXT_FONT "-adobe-helvetica-bold-r-normal--*-100-*-*-p-82-*-*" # set TEXT_FONT "-adobe-times-bold-r-normal--*-100-*-*-p-76-*-*" # set TEXT_FONT "-adobe-palatino-bold-r-normal--*-100-*-*-p-81-*-*" # set TEXT_FONT "-adobe-new century schoolbook-bold-r-normal--*-100-*-*-p-87-*-*" # set TEXT_FONT "-adobe-utopia-regular-r-normal--*-100-*-*-p-75-*-*" # set TEXT_FONT "-adobe-utopia-medium-r-normal--*-100-*-*-p-75-*-*" CONFIRM_FONT="-adobe-helvetica-bold-r-normal--*-100-*-*-p-*-*-*" export CONFIRM_FONT JUNK=`$FEDIR/tkGUIs/confirm.tk DISMISS` & exit fi ## END OF if test "$NFSCHECK" != "" ##################################################################### ## Set a filename for report output, in $OUTLIST. ##################################################################### . $FEDIR/scripts/set_localoutlist rm -f $OUTLIST ########################################################################## ## SET REPORT HEADING on size of sub-directories.. ########################################################################## echo " ................... `date '+%Y %b %d %a %T%p'` ............................ DISK USAGE OF FILES IN *ALL* THE SUB-DIRECTORIES UNDER ${HOST_ID}:$DIRNAME **** SORTED BY *SIZE* --- BIGGEST FILES AT THE TOP. **** ************** Disk usage Last-Modified (MegaBytes) Permissions Owner Group Date-Time/Yr Filename -------------- ----------- -------- -------- ------------ ---------------------- GigMeg.KilByt | | | |" > $OUTLIST ##################################################################### ## PERFORM THE QUERY -- on the specified host. ##################################################################### ## NOTE on filenames with embedded spaces and 'awk': ## ## To handle filenames with embedded spaces, we use 'substr($0,COLfilnam)' ## in awk, instead of '$9' --- where ## we set COLfilnam with the expression 'COLfilnam = index($0,$9)'. ## ## The 'substr($0,COLfilnam)' gets the substring of the record, $0, ## starting at column COLfilnam and going to the end of the record. ## ## This is not elegant, because when the contents of field $9 matches one ## of the fields, $1 thru $8, some extra chars before the filename may be ## printed. ## ## Have not been able to find an 'ideal' method to extract the filename ## with 'awk'. Would be nice if we could 'index' fields like $9 to get, ## with no exceptions, the column number of its starting character. ## ## This technique came from another files-report script: ## 'diruse_files_all_levs_fullnamesort_bygui'. ######################################################################### THISHOST=`hostname` if test "$HOST_ID" = "$THISHOST" then ## FOR TESTING: # set -x ##OLD## $FEDIR/scripts/diruse_files_all_levs_sizesort $DIRNAME > $OUTLIST find $DIRNAME -local -type f -exec ls -l {} \; | \ sort +4 -5nr | \ awk '{ COLfilnam = index($0,$9) ; \ printf ("%13.6f %-10s %-8s %-8s %-3s %2s %5s %s\n", $5/1000000, $1, $3, $4, $6, $7, $8, substr($0,COLfilnam) )}' \ >> $OUTLIST # sort -k5nr | \ ## OLD VERSION that did not handle embedded blanks: # awk '{printf ("%13.6f %-10s %-8s %-8s %-3s %2s %5s %s\n", $5/1000000, $1, $3, $4, $6, $7, $8, $9 )}' \ ## FOR TESTING: # set - else . $FEDIR/scripts/mak_rhosts ## FOR TESTING: # set -x ##OLD## rsh $HOST_ID $FEDIR/scripts/diruse_files_all_levs_sizesort \ ##OLD## $DIRNAME > $OUTLIST ## We put quotes around the entire find|sort|awk pipe to get sort & awk ## to execute at the remote host, which is often a powerful server. rsh $HOST_ID "find $DIRNAME -local -type f -exec ls -l {} \; | \ sort +4 -5nr | \ awk '{ COLfilnam = index(\$0,\$9) ; \ printf (\"%13.6f %-10s %-8s %-8s %-3s %2s %5s %s\n\", \$5/1000000, \$1, \$3, \$4, \$6, \$7, \$8, substr(\$0,COLfilnam) )}' " \ >> $OUTLIST # sort -k5nr | \ ## OLD VERSION that did not handle embedded blanks: # awk '{printf (\"%13.6f %-10s %-8s %-8s %-3s %2s %5s %s\n\", \$5/1000000, \$1, \$3, \$4, \$6, \$7, \$8, \$9 )}' " \ ## FOR TESTING: # set - fi ## END OF if test "$HOST_ID" = "$THISHOST" ######################################################################## ## Add TRAILER to report. ######################################################################## echo "\ | | | | GigMeg.KilByt -------------- ----------- -------- -------- ------------ ---------------------- (MegaBytes) Permissions Owner Group Date-Time/Yr Filename Disk usage Last-Modified ************* ................... `date '+%Y %b %d %a %T%p'` ............................ The SIZE-SORTED output above was generated by the script $0 which ran the 'find' and 'ls -l' commands on host $HOST_ID . ----------------- PROCESSING METHOD: A 'pipe' of several commands (find, ls, sort, awk) was used, of the form: find -local -type f -exec ls -l {} \; | \\ sort +4 -5nr | awk '{printf ( ... )}' where {} represents a filename. The Unix 'find' command was used to recursively travel through the sub-directories of the specified directory $DIRNAME and execute the 'ls -l {}' command. NOTE: If the directory name you specify is actually a 'link' to an actual directory, the list above may be empty. Re-try, specifying the actual directory name. ------------ FEATURE NOTE: This technique provides a list - without breaks at sub-directory 'section' names (which is what would happen if 'ls -lR' were used instead of the 'find ... -exec ls -l {}' command) and, instead of 'relative' filenames, provides a list - with FULLY-QUALIFIED filenames. I.e. a list is produced that is suitable for sorting (re-ordering the records) and still have complete filenames for unambiguous identification. ---------------------- IMPLEMENTATION METHODS: This utility-script can be accessed via a drawer in a 'spacetools' command-toolchest utility --- with a drawer-name like 'Show FILE-SIZES 4aDir@AnyHost (ALL levels, SIZE-SORT)'. The site toolchest-drawer hierarchy-path is SGI Toolchest -> HandyTools -> AnyHost - SpaceTools (Files) -> Show FILE-SIZES 4aDir@AnyHost (ALL levels, SIZE-SORT) OR, you can implement the script (or 'spacetools') as - a command alias, via your $HOME/.profile file; - a desktop icon, via the 'Find, File QuickFind' tool drawers; - a drawer in the SGI toolchest, via your $HOME/.auxchestrc file. ........................................................................ " >> $OUTLIST ##################################################################### ## SHOW THE REPORT, with option to print. ##################################################################### $FEDIR/scripts/shofil $OUTLIST ## done ######################################################################## ## END OF prompting loop, for directory name. (Not implemented.) ######################################################################## ## RETURN TO DIRECTORY PROMPT. ########################################################################