#!/bin/ksh ## ## SCRIPT NAME: findfils4type_basename_bygui ## ## Where: in $FEDIR/scripts where $FEDIR=/apps/nns_com/fea ## ############################################################################## ## PURPOSE: ## Finds files for a given file-type and for a host:directory ## 'start point'. Looks through all the sub-directories --- for ## files that are LOCAL to the specified host. ## ## Example host|directory|file-type: engprd00, /data/e46, executable ## ## More specifically, this script creates a list of files whose type ## (determined by the 'file' command) contains a specified keyword, ## like 'executable' or 'text' or 'data' or 'GIF' or 'JPEG' or ... ## ## ------------------------------------------------------------------------- ## ## NOTE: THIS LISTING HAS THE 'basename' OF THE SELECTED FILES SEPARATED ## INTO COLUMN 1 --- SO THAT THE LIST CAN BE SORTED BY 'basename'. ## ## For example, this can be used to look for duplicate executables ## scattered among sub-directories of a given directory. By ## showing both name and file size (in bytes), one can determine ## if files with duplicate names are likely to have identical contents. ## ## I.e. this utility (and basename-sort method) can QUICKLY find ## copies of the same-named or similarly-named executables ## in scattered sub-directories of a given directory. ############################################################################## ## IMPLEMENTATION NOTE: THIS IS A PRELIMINARY IMPLEMENTATION. ## ## COULD USE CHANGES TO HEAD OFF ## COMPLETE TRAVERSAL OF HUGE ## DIRECTORIES WITH 'find'. ## ############################################################################## ## ORIGINAL INTENT: ## The original intent was to find duplicate executables ## in user-department/project directories (like /data/e46) ## on a 'File Server' like 'engprd00'. ## ############################################################################## ## ## CALLED BY: 'findtools' ## in $FEDIR/scripts ## ## May eventually be called by nnsFEAmenu option 'u f ??', i.e. ## feamain -> feautils.menu -> fileman.menu ## ## See implementation notes in 'findtools'. ## ############################################################################## ## POSSIBLE ENHANCEMENTS FOR MORE AUTOMATION: ## ## 1a) With some userid selecting logic (and mail-address-finding/checking ## logic), this script could automatically generate e-mail ## to users who have many duplicate files in scattered sub-directories ## under a given directory. ## ## 1b) Or the e-mail could be sent to 'ideasadm' or 'caeadm' or 'root' userids, ## or specific Administrator ids (like 'bmo01' or 'tkj00') ## for manual notification of the appropriate users. The notification ## of 'userids' could be done with a userid/to/mail-address lookup aid, ## like a lookup aid in the 'mailtools' utility-cmd in $FEDIR/scripts. ## ########################################################################### ## NOTE: You can see only the non-commented, executable lines of this ## script by using ## egrep -v '^ *##|^ *# ' ## or ## grep -v '^ *##' | grep -v '^ *# ' ############################################################################## ## MAINTENANCE HISTORY: ## Written by: B.Montandon O06 24Aug2000 Based on preliminary script ## 'find_files4filetype' ## in $FEDIR/scripts ## ## Updated by: B.Montandon O06 6oct2000 Chg default prompts to ## engprd00, /data/e46, executable. ## Add quotes around the 'rsh find' pipe ## to fix 'incomplete statement' error. ## ## Updated by: Blaise Montandon 29apr2003 Replace Tk-GUI script ## 'enter_hostid_dir_string.tk' by ## 'enter_hostid_dir_string_toghelp-scroll.tk'. ############################################################################## ############################################################################## ## SET APPS DIRECTORY NAME --- for utility scripts, if any --- ## in case this script is not called from within nnsFEAmenu script system. ############################################################################## if test "$FEDIR" = "" then FEDIR="/apps/nns_com/fea" fi ############################################################################## ## SET MAIN SERVER NAME --- used in window messages only. ## (The user sets the host where the directory is actually queried.) ############################################################################## SERVER="engprd00" ############################################################################## ## SET LIMIT ON NUMBER OF FILES FOR WHICH FILE INFO IS FOUND, ## which can be slow processing, at one command issued per file. ############################################################################## ## FILELIM=10 ## FOR TESTING. FILELIM=2000 ############################################################################## ## SET LOCAL HOST NAME FOR MESSAGES & REPORTS & LOCAL-HOST-CHECKS. ############################################################################## THISHOST=`hostname` ######################################################################## ## GET HOSTID and DIRECTORY NAME and STRING (FILENAME MASK). ######################################################################## WINTITLE=\ "Files_under_Host:Dirname (ALL dir levels) -- BASENAMES FOR A GIVEN FILE-TYPE" export WINTITLE WIN_INFO="\ Enter the name of an SGI host on the NNS network and a directory name. At the String prompt, enter a file-type or portion thereof (explained below). -------- This utility shows the FILENAMES under the specified host:directory --- in ALL SUB-directories of that directory. It shows filenames that match the SPECIFIED FILE TYPE, as determined by the Unix 'file' command. BASICALLY, THERE ARE THREE FILE-TYPES: text, executable, and 'data'. THE LIST IS SORTED BY FILE 'basename', IRRESPECTIVE OF ITS DIRECTORY. THIS HELPS LOOK FOR DUPLICATES and files with similar name prefixes. -------- For fast performance, it may be BEST TO SPECIFY THE HOST AT WHICH THE DIRECTORY RESIDES. Reference: The 'ManyDirs' button. IN FACT, THIS UTILITY RESTRICTS ITS ATTENTION TO FILES 'LOCAL TO' THE SPECIFIED HOST --- for better performance AND to reduce likelihood of following (unintentionally) links to huge directories that are mounted from another (remote) host --- like ${SERVER}:/usr/people. -------- Some example file-type strings, to use for matching, follow. TEXT: 'text' OR 'ascii text' OR 'c program text' OR 'English text' OR 'commands text' OR 'c program text with garbage' EXE's: 'executable' OR 'ELF N32 MSB mips-4 dynamic executable ...' DATA: 'data' OR 'image' OR 'SGI imagelib' OR 'GIF' OR 'JPEG' OR 'compressed' OR 'compressed data' OR 'tar' Note: When the 'file' command returns 'data', it means the file contains 'binary' (NOT text-only) data, like numbers in easily-machine-readable, not easily-human-readable, format. Directories: This utility could be used to search your home directory --- ${SERVER}:$HOME ; or a local scratch directory --- ${THISHOST}:/local/scratch/$USER ; or a /data sub-directory --- ${SERVER}:/data/subs/cae/$USER or ${SERVER}:/data/e46 or ${SERVER}:/data/foundry or ${SERVER}:/data/cvn##/ideas/team or ${SERVER}:/data/cvn##. -------- Note that you can expand this window to enter-and-see the full directory name. You can do queries on multiple hosts/directories and bring the report windows side-by-side for comparison. For moderate sized directories, this report is typically generated in 10 to 30 seconds. If there are MANY sub-directories & files, IT MAY TAKE MORE THAN A MINUTE OR TWO. USE THIS UTILITY SPARINGLY ON BIG DIRECTORIES, like ${SERVER}:/usr/people." export WIN_INFO ## WIN_GEOM="+40+40" ## export WIN_GEOM ## ## WIN_MINWIDTH1="620" ## export WIN_MINWIDTH1 ## ## WIN_MINHEIGHT1="130" ## export WIN_MINHEIGHT1 ## ## WIN_COLOR="#AAAAAA" ## export WIN_COLOR ################################ ## SET DEFAULT HOST & DIR NAMES. ################################ # HOST_ID="`hostname`" # DIRNAME="/local/scratch" # STRING="text" HOST_ID="engprd00" DIRNAME="/data/e46" STRING="executable" # HOST_ID="engprd00" # DIRNAME="$HOME" # STRING="text" export HOST_ID DIRNAME STRING ############################################################################## ## PROMPTING LOOP -- for host:directory-name:file-type-string. ############################################################################## ## # while true ## while : ## do ############################################################################## ## GET host:directory-name:file-type-string. ############################################################################## # TEMP=`$FEDIR/tkGUIs/enter_hostid_dir_string.tk` TEMP=`$FEDIR/tkGUIs/enter_hostid_dir_string_toghelp-scroll.tk` eval "$TEMP" ## FOR TESTING: # echo $TEMP if test "$HOST_ID" = "" then exit fi if test "$DIRNAME" = "" then exit fi if test "$STRING" = "" then exit fi ##################################################################### ## CHECK THAT THE DIRNAME IS ACCESSIBLE/EXISTS. ##################################################################### if test ! -d "$DIRNAME" then CONFIRM_TEXT="\ Specified Directory: $DIRNAME Not found or does not exist. " export CONFIRM_TEXT CONFIRM_GEOM="+080+080" CONFIRM_MINSIZE="350 150" CONFIRM_TITLE="$WINTITLE" export CONFIRM_TITLE CONFIRM_MINSIZE CONFIRM_GEOM CONFIRM_COLOR="#3DADFF" export CONFIRM_COLOR CONFIRM_FONT="-adobe-helvetica-bold-r-normal--*-100-*-*-p-*-*-*" export CONFIRM_FONT JUNK=`$FEDIR/tkGUIs/confirm.tk DISMISS` & exit fi ## if test "$DIRNAME" = "/" ## then ## echo " ## Try a lower level directory than the root directory (/). ## ## *** Press Enter to continue." ## read NNS_JUNK ## ## continue ## # exit ## fi ######################################################################## ## If the directory $DIRNAME is NSF-mounted on host $HOST_ID, ## show a message on possible performance improvement in doing the ## query on a remote host-server. ######################################################################## ## THISHOST=`hostname` ## WAS SET ABOVE. if test "$HOST_ID" = "$THISHOST" then NFSCHECK=`stat $DIRNAME | grep 'st_fstype: nfs'` else . $FEDIR/scripts/mak_rhosts NFSCHECK=`rsh $HOST_ID stat $DIRNAME | grep 'st_fstype: nfs'` fi if test "$NFSCHECK" != "" then CONFIRM_TEXT="\ The directory name that you specified $DIRNAME is NFS-mounted to the host you specified, $HOST_ID, i.e. the directory is remote from the specified host, NOT 'local to' it. USE A HOST FOR WHICH THE DIRECTORY IS IN A 'LOCAL' FILE SYSTEM. I.E. USE A HOST THAT IS SERVING-OUT THE DIRECTORY. " export CONFIRM_TEXT CONFIRM_GEOM="+080+080" CONFIRM_MINSIZE="550 250" CONFIRM_TITLE="$WINTITLE" export CONFIRM_TITLE CONFIRM_MINSIZE CONFIRM_GEOM CONFIRM_COLOR="#3DADFF" export CONFIRM_COLOR ## GOOD FIXED-WIDTH CANDIDATES (for env var CONFIRM_FONT): # set TEXT_FONT "-adobe-courier-bold-r-normal--*-120-*-*-m-92-*-*" # set TEXT_FONT "-adobe-courier-bold-r-normal--*-100-*-*-m-90-*-*" # set TEXT_FONT "-adobe-courier-medium-r-normal--*-100-*-*-m-90-*-*" # set TEXT_FONT "-misc-fixed-bold-r-normal--*-120-*-*-c-70-*-*" # set TEXT_FONT "-misc-fixed-bold-r-normal--*-120-*-*-c-80-*-*" # set TEXT_FONT "-misc-fixed-bold-r-normal--*-120-*-*-c-90-*-*" # set TEXT_FONT "-b&h-lucidatypewriter-bold-r-normal-sans-*-140-*-*-m-80-*-*" # set TEXT_FONT "-schumacher-clean-bold-r-normal--*-130-*-*-c-80-*-*" ## GOOD VARIABLE-WIDTH CANDIDATES (for env var CONFIRM_FONT): # set TEXT_FONT "-adobe-helvetica-bold-r-normal--*-100-*-*-p-82-*-*" # set TEXT_FONT "-adobe-times-bold-r-normal--*-100-*-*-p-76-*-*" # set TEXT_FONT "-adobe-palatino-bold-r-normal--*-100-*-*-p-81-*-*" # set TEXT_FONT "-adobe-new century schoolbook-bold-r-normal--*-100-*-*-p-87-*-*" # set TEXT_FONT "-adobe-utopia-regular-r-normal--*-100-*-*-p-75-*-*" # set TEXT_FONT "-adobe-utopia-medium-r-normal--*-100-*-*-p-75-*-*" CONFIRM_FONT="-adobe-helvetica-bold-r-normal--*-100-*-*-p-*-*-*" export CONFIRM_FONT JUNK=`$FEDIR/tkGUIs/confirm.tk DISMISS` & exit fi ##################################################################### ## SET A FILENAME FOR REPORT OUTPUT, in $OUTLIST. ##################################################################### . $FEDIR/scripts/set_localoutlist rm -f $OUTLIST ##################################################################### ## PREPARE REPORT HEADING, in $OUTLIST. ##################################################################### echo "\ ............................. `date '+%Y %b %d %a %T%p %Z'` ................. LISTING OF FILES WHOSE 'file'-TYPE INCLUDES THE STRING: $STRING AT ALL LEVELS, UNDER THE DIRECTORY $DIRNAME ( ONLY FILES *LOCAL TO* HOST $HOST_ID ) *** SORTED BY FILE BASENAME. *** (Helps find duplicate files.) ( TYPICALLY, PRINT THIS LANDSCAPE. ) .............................................................................................................................. Truncates at 28 chars | Owner FileSize(Meg) Last Modify FileType via File Basename V Permissions Userid GigMeg.KilByt Date-Time 'file' command Directory ---------------------------- ---------- -------- | | | | ------------ ---------------- ------------------------------ " > $OUTLIST ## This query was run on Host: $HOST_ID ######################################################################### ## COLLECT FULLY-QUALIFIED FILENAMES FOR FILES WHICH MATCH $STRING, ## AT ANY LEVEL UNDER THE UNDER GIVEN $DIRNAME. ######################################################################### ## Could build a counter into this and stop if too many found within ## the 'find' command. ######################################################################### ## FOR TESTING: # set -x ## FOR TESTING: # echo "FINDING FILES WHOSE TYPE MATCHES $STRING." if test "$HOST_ID" = "$THISHOST" then FILELIST=`find $DIRNAME -local -type f -exec file {} \; | \ grep "$STRING" | cut -d: -f1` else . $FEDIR/scripts/mak_rhosts FILELIST=`rsh $HOST_ID "find $DIRNAME -local -type f -exec file {} \; | \ grep "$STRING" | cut -d: -f1"` fi ######################################################################### ## EXIT IF 'TOO MANY' FILES FOUND. ######################################################################### FILECNT=`echo "$FILELIST" | wc -w` ## FILELIM=2000 ## SET AT THE TOP OF THIS SCRIPT. if test $FILECNT -gt $FILELIM then CONFIRM_TEXT="\ Files Basename Query using host : $HOST_ID directory: $DIRNAME file-type: $STRING More than $FILELIM files of type $STRING were found: $FILECNT EXITING --- without 1) doing 'ls -l' for each file, 2) separating out file-basename 3) formatting report lines and 4) doing sort. TRY A QUERY ON A SMALLER/LOWER-LEVEL DIRECTORY. " export CONFIRM_TEXT CONFIRM_GEOM="+080+080" CONFIRM_MINSIZE="500 50" CONFIRM_TITLE="$WINTITLE" export CONFIRM_TITLE CONFIRM_MINSIZE CONFIRM_GEOM CONFIRM_COLOR="#3DADFF" export CONFIRM_COLOR CONFIRM_FONT="-adobe-helvetica-bold-r-normal--*-100-*-*-p-*-*-*" export CONFIRM_FONT JUNK=`$FEDIR/tkGUIs/confirm.tk DISMISS` & exit fi ######################################################################### ## COLLECT 'ls -l' INFO FOR THE COLLECTED FULLY-QUALIFIED FILENAMES. ######################################################################### ## For better performance, ######################################################################### ## FOR TESTING: # echo "FINDING FILE INFO (basename,dirname,'ls -l',file) FOR FILES # WHOSE TYPE MATCHES $STRING." UNSORTEDLIST=${OUTLIST}_unsorted rm -f $UNSORTEDLIST ## FOR TESTING: # set -x for FILE in $FILELIST do BASENAME=`basename $FILE` DIRNAME=`dirname $FILE` FILEINFO=`ls -l $FILE` FILETYPE=`file $FILE | cut -d: -f2 | cut -c2-17 |tr ' ' '_'` # BASENAMECUT=`echo "$BASENAME " | cut -c1-28` # FILETYPECUT=`echo "$FILETYPE " | cut -c1-16` # echo "$BASENAMECUT" "$FILEINFO" "$FILETYPECUT" "$DIRNAME" >> $UNSORTEDLIST ## FOR TESTING: # set -x echo "$BASENAME" "$FILEINFO" "$FILETYPE" "$DIRNAME" | awk \ '{printf ("%-28s %-13s %-8s %13.6f %3s %2s %5s %-16s %s\n", \ $1, $2, $4, $6/1000000, $7, $8, $9, $11, $12)}' >> $UNSORTEDLIST ## FOR TESTING: # set - done ## FOR TESTING: # set - ##################################################################### ## SORT THE FILE INFO LIST, BY FILE BASENAME. ## ATTACH THIS OUTPUT TO THE REPORT. ##################################################################### ## FOR TESTING: # cat $UNSORTEDLIST >> $OUTLIST ## FOR TESTING: # echo "STARTING SORT OF REPORT." sort -k1 $UNSORTEDLIST >> $OUTLIST ######################################################################## ## Add TRAILER to report. ######################################################################## echo " ............................. `date '+%Y %b %d %a %T%p %Z'` ................. Number of files found: $FILECNT The output above was generated by the script $0 -------- The Unix 'find' command was used to recursively travel through the sub-directories of the specified directory $DIRNAME and to provide a list - without breaks with sub-directory 'section' names (such as the list produced by 'ls -lR') and, instead, - with fully-qualified filenames. I.e. a list is produced that is suitable for sorting. -------- Command used: $CMD -------- IMPLEMENTATION METHODS: You can implement the script as - a command alias, via your .profile file; - a desktop icon, via the 'Find, File QuickFind' tool drawers; - a drawer in the SGI toolchest, via your .auxchestrc file. Or the script could be accessed via a drawer in a 'findtools' or 'filetools' command-utility. This report COULD be generated via nnsFEAmenu option 'u f f?' (Utilities, File-tools, Find-File). ........................................................................ " >> $OUTLIST ##################################################################### ## SHOW THE REPORT, with option to print. ##################################################################### . $FEDIR/scripts/shofil $OUTLIST ######################################################################## ## RETURN TO DIRECTORY PROMPT. ######################################################################## ## done ######################################################################## ## END OF prompting loop, for directory name. ########################################################################