#!/bin/sh ## ## Nautilus ## SCRIPT: 10_oneSVGfile_EXTRACTpath-etcXYdata_egrep-fold-awk-sed-txteditor.sh ## ## PURPOSE: For a user-selected SVG (Scalable Vector Graphics) file, ## the file contents are passed through 'egrep' and 'fold' and 'awk' ## and 'sed' utilities to extract and put the numeric ## x,y coordinate data into a separate text file. ## ## The x,y coordinate data is between '' markup indicators --- and in some special SVG 'element' ## statements such as '' indicators may span multiple lines. ## We may assume the others are on a single line. ## ## Several types of character replacement are performed in ## a 'path' statement's line of x,y data --- including: ## - each occurrence of the 'm' or 'M' or 'c' or 'C' ## and other such one-character commands is replaced ## that character preceded by a line-feed. ## This helps break up long 'path' statements into much ## shorter lines of x,y data pairs. ## ## (NOTE: We do not attempt to replace spline data with ## data giving an accurate rendering the splines. We ## simply pass along the control-point data --- hoping ## that that data is near enough the actual spline curve. ## However, that is often not the case.) ## ## The output of 'egrep' and a pipeline of 'fold', 'awk', ## and 'sed' commands is directed into a separate text file --- ## in the same directory as the input file. ## ## (An alternative is to put the output file into the /tmp ## directory. This might be advisable if the output files ## are typically huge. But most SVG files are not massive.) ## ## NOTE: ## The SVG coordinate data between '' markup indicators can often ## go on for thousands of characters in a single line. ## We basicly want to 'fold' these extra-long lines ## into lines with relatively few pairs of x,y numeric values. ## This is much more manageable to manually edit for ## final touchup into 2 columns of x,y numbers. ## ## HOW TO USE: In Nautilus, select a SVG (text) file in a directory. ## (The selected file should NOT be a directory.) ## Right click and, from the 'Scripts >' submenus, ## choose to run this script (name above). ## ## Created: 2017sep17 Based on the FE Nautilus 'MAPtools' script ## '00_oneKMLfile_REFORMAT-FoldLines_sed-txteditor.sh' ## and the FE Nautilus 'HTMLtools' script ## '02_anyfile_HTMLfile-REMOVE-SCRIPTs-STYLEs_sed-awk-txteditor.sh' ## Changed: 2017 ## FOR TESTING: (show statements as they execute) # set -x ############################################ ## Set a couple of input filename variables. ############################################ FILENAME="$1" BASENAME=`basename $FILENAME` ############################################ ## Set a couple of script name variables. ############################################ SCRIPTDIRNAME=`dirname $0` SCRIPTBASENAME=`basename $0` ############################################## ## Set maximum length for lines in input to 'awk'. ## Set maximum length for lines in output file. ############################################## MAXCHARSperLINEin=3071 MAXCHARSperLINEout=80 ############################################ ## Show an informative message with 'zenity'. ############################################ Q1="'" LT="<" MSG2USER="\ This utility expects the file you chose: $BASENAME to be an SVG (Scalable Vector Graphics) file. This script processes the file contents with the ${Q1}egrep${Q1} and ${Q1}fold${Q1} and ${Q1}awk${Q1} and ${Q1}sed${Q1} utilities --- to reformat the data into a separate text file. The x,y coordinate data between ${Q1}${LT}path ${Q1} and ${Q1}/>${Q1} markup indicators --- and in some special SVG ${Q1}element${Q1} statements such as ${Q1}${LT}rect ${Q1}, ${Q1}${LT}line ${Q1}, ${Q1}${LT}polyline ${Q1}, ${Q1}${LT}circle ${Q1}, and ${Q1}${LT}text ${Q1} --- are retained. Some extensive manual editing will typically be required to create a file of x,y data in two columns -- for defining line-segments --- especially if RELATIVE (lower-case) SVG commands are used. The output file may include some ${Q1}left-over${Q1} character strings. The main intent of this utility is to make a text file that is easily edited to contain only a pair of x,y coordinate numbers per line --- along with a comment line or two at the top that may incorporate some description data, like the input filename --- and comment lines to separate ${Q1}loops${Q1} of line segments. A major purpose of this utility is to ${Q1}fold${Q1} extra-long lines of coordinate data into relatively short lines containing x,y coordinate numbers. The user-selected file is processed and the output is put in a file with a string like ${Q1}_EXTRACTED${Q1} appended to the midname of the user-selected file. The output file is put in the same directory with the original selected file. This script is: $SCRIPTBASENAME in directory $SCRIPTDIRNAME" ## FOR TESTING: # echo "$MSG2USER" zenity --warning --width=600 --no-wrap \ --title "SVG file processor - EXTRACT x,y DATA lines" \ --text "$MSG2USER" ############################################### ## Exit if the selected file is a directory. ############################################### if test -d "$FILENAME" then exit fi #################################################### ## Get the file extension and check that it is not ## blank. Skip the filename if it has no extension. ## (Assumes one '.' in filename, at the extension.) #################################################### FILEEXT=`echo "$FILENAME" | cut -d\. -f2` if test "$FILEEXT" = "" then exit fi #################################################### ## Exit if the file extension is not 'svg'. ## COMMENTED, for now. #################################################### # if test "$FILEEXT" != "svg" # then # exit # fi #################################################### ## Get the 'midname' of the file, the part before ## the period and the extension. #################################################### MIDNAME=`echo "$FILENAME" | cut -d\. -f1` ################################### ## Make the output filename. ################################### OUTNAME="${MIDNAME}_EXTRACTEDsvgXYdata.txt" rm -f "$OUTNAME" ########################################### ## Put the input filename in a comment line ## at the top of the output file. ########################################### echo "# $FILENAME" > "$OUTNAME" ############################################################## ## Use 'fold' and 'awk' and 'sed' to make the new output file. ############################################################## ## Could try running the command in a window, ## to see err msgs, if any. ## Could use zenity to offer this as an option. ## ## xterm -fg white -bg black -hold -e \ ####################################################### ## The 'egrep' and the awk-and-sed pipe below does the following: ## 0) Use 'egrep' to concatenate the following 'element' lines ## onto the output file: ## '' ## '' ## '' ## '' ## '' ## We assume each is on one line. ## 1) Use 'fold' to fold lines longer than $MAXCHARSperLINEin. ## NOTE: The following awk will fail with a ## 'Input record too long' error that stops awk dead ## when a line is longer than about 3071 chars. ## The 'fold' prevents that. ## 2) Use an 'awk' program to print ## sections even if they span more than ## one line. # 3) Use 'sed' to remove path parameters like ## id="..." and style="..." ## which would be broken up by the following sed. ## ## NOTE: The regular expression "[^"]*" removes ## the double-quotes, and the stuff between them, up to ## the SECOND double-quote --- not to a LAST double-quote ## in the line. Ref: page 54 of O'Reilly 'Sed and Awk' book. ## 4) Use 'sed' to replace each occurrence of the ## 'm' or 'M' or 'c' or 'C' or 'v' or 'V' or ## 'l' or 'L' etc. 'path' characters by a line-feed ## followed by the character. ## Also replace '-' by ' -'. ## Other not-so-critical changes on the "path" stuff: ## 5) Use 'sed' to replace '' with null. ## 7) Use 'sed' to replace each occurrence of one or more space ## characters with exactly one space character. ## 8) Use 'sed' to replace each occurrence of a comma character ## by a space character. ## 9) Remove blank lines. ## 10) Remove double-quote characters. ## 5-10 are not necessarily in this order. ####################################################### ## In a '> "$OUTNAME" ## FOR TESTING: # set -x fold -b -w $MAXCHARSperLINEin "$FILENAME" | \ awk \ 'BEGIN { GetNEXTpathLine0or1 = 0 } ## END OF BEGIN ## START OF BODY { # HOLDline = tolower($0) HOLDline = $0 ## FOR TESTING: # if ( NR < 40 ) { print "HOLDline :" $HOLDline } ## If this is a continued "". ## If so, set the GET flag to 0. ## In any case, print this "path" line and go to ## next line and start the "body" processing again. if ( GetNEXTpathLine0or1 == 1 ) { if ( index(HOLDline,"/>") != 0 ) { GetNEXTpathLine0or1 = 0 } print next } else { ## Check if this is a "", ## set the GET flag to 0. If it does not contain ## the end-indicator, set the GET flag to 1. ## In either case, we are done with this ## "") != 0 ) { GetNEXTpathLine0or1 = 0 } else { GetNEXTpathLine0or1 = 1 } next } } ## No print of other lines if we reach this point --- ## because if we reach this point, the line is not ## part of a "path" line. }' | \ sed -e 's|id="[^"]*"||g' | \ sed -e 's|style="[^"]*"||g' | \ sed -e 's|transform="[^"]*"|\n#X|g' | \ sed -e 's|sodipodi:[^"]*="[^"]*"|\n#Y|g' | \ sed -e 's|||g' | \ sed -e 's|"||g' | \ sed -e 's| *| |g' | \ sed -e '/^ *$/d' | \ sed -e 's|,| |g' | \ fold -b -w $MAXCHARSperLINEout >> "$OUTNAME" ## FOR TESTING: # set - ################################### ## Show the output file. ################################### ## . $HOME/.gnome2/nautilus-scripts/.set_VIEWERvars.shi . $HOME/.freedomenv/feNautilusScripts/set_DIR_NautilusScripts.shi . $DIR_NautilusScripts/.set_VIEWERvars.shi # $TXTVIEWER "$OUTNAME" & $TXTEDITOR "$OUTNAME" &