#!/bin/sh
##
## Nautilus
## SCRIPT: 10_oneSVGfile_EXTRACTpath-etcXYdata_egrep-fold-awk-sed-txteditor.sh
##
## PURPOSE: For a user-selected SVG (Scalable Vector Graphics) file,
## the file contents are passed through 'egrep' and 'fold' and 'awk'
## and 'sed' utilities to extract and put the numeric
## x,y coordinate data into a separate text file.
##
## The x,y coordinate data is between '' markup indicators --- and in some special SVG 'element'
## statements such as '' indicators may span multiple lines.
## We may assume the others are on a single line.
##
## Several types of character replacement are performed in
## a 'path' statement's line of x,y data --- including:
## - each occurrence of the 'm' or 'M' or 'c' or 'C'
## and other such one-character commands is replaced
## that character preceded by a line-feed.
## This helps break up long 'path' statements into much
## shorter lines of x,y data pairs.
##
## (NOTE: We do not attempt to replace spline data with
## data giving an accurate rendering the splines. We
## simply pass along the control-point data --- hoping
## that that data is near enough the actual spline curve.
## However, that is often not the case.)
##
## The output of 'egrep' and a pipeline of 'fold', 'awk',
## and 'sed' commands is directed into a separate text file ---
## in the same directory as the input file.
##
## (An alternative is to put the output file into the /tmp
## directory. This might be advisable if the output files
## are typically huge. But most SVG files are not massive.)
##
## NOTE:
## The SVG coordinate data between '' markup indicators can often
## go on for thousands of characters in a single line.
## We basicly want to 'fold' these extra-long lines
## into lines with relatively few pairs of x,y numeric values.
## This is much more manageable to manually edit for
## final touchup into 2 columns of x,y numbers.
##
## HOW TO USE: In Nautilus, select a SVG (text) file in a directory.
## (The selected file should NOT be a directory.)
## Right click and, from the 'Scripts >' submenus,
## choose to run this script (name above).
##
## Created: 2017sep17 Based on the FE Nautilus 'MAPtools' script
## '00_oneKMLfile_REFORMAT-FoldLines_sed-txteditor.sh'
## and the FE Nautilus 'HTMLtools' script
## '02_anyfile_HTMLfile-REMOVE-SCRIPTs-STYLEs_sed-awk-txteditor.sh'
## Changed: 2017
## FOR TESTING: (show statements as they execute)
# set -x
############################################
## Set a couple of input filename variables.
############################################
FILENAME="$1"
BASENAME=`basename $FILENAME`
############################################
## Set a couple of script name variables.
############################################
SCRIPTDIRNAME=`dirname $0`
SCRIPTBASENAME=`basename $0`
##############################################
## Set maximum length for lines in input to 'awk'.
## Set maximum length for lines in output file.
##############################################
MAXCHARSperLINEin=3071
MAXCHARSperLINEout=80
############################################
## Show an informative message with 'zenity'.
############################################
Q1="'"
LT="<"
MSG2USER="\
This utility expects the file you chose:
$BASENAME
to be an SVG (Scalable Vector Graphics) file. This script processes
the file contents with the ${Q1}egrep${Q1} and ${Q1}fold${Q1} and ${Q1}awk${Q1} and
${Q1}sed${Q1} utilities --- to reformat the data into a separate text file.
The x,y coordinate data between ${Q1}${LT}path ${Q1} and ${Q1}/>${Q1} markup indicators
--- and in some special SVG ${Q1}element${Q1} statements such as ${Q1}${LT}rect ${Q1},
${Q1}${LT}line ${Q1}, ${Q1}${LT}polyline ${Q1}, ${Q1}${LT}circle ${Q1}, and ${Q1}${LT}text ${Q1} --- are retained.
Some extensive manual editing will typically be required to create
a file of x,y data in two columns -- for defining line-segments
--- especially if RELATIVE (lower-case) SVG commands are used.
The output file may include some ${Q1}left-over${Q1} character strings.
The main intent of this utility is to make a text file that is
easily edited to contain only a pair of x,y coordinate numbers
per line --- along with a comment line or two at the top that
may incorporate some description data, like the input filename
--- and comment lines to separate ${Q1}loops${Q1} of line segments.
A major purpose of this utility is to ${Q1}fold${Q1} extra-long
lines of coordinate data into relatively short lines containing
x,y coordinate numbers.
The user-selected file is processed and the output is put
in a file with a string like ${Q1}_EXTRACTED${Q1} appended to
the midname of the user-selected file.
The output file is put in the same directory with the
original selected file.
This script is:
$SCRIPTBASENAME
in directory
$SCRIPTDIRNAME"
## FOR TESTING:
# echo "$MSG2USER"
zenity --warning --width=600 --no-wrap \
--title "SVG file processor - EXTRACT x,y DATA lines" \
--text "$MSG2USER"
###############################################
## Exit if the selected file is a directory.
###############################################
if test -d "$FILENAME"
then
exit
fi
####################################################
## Get the file extension and check that it is not
## blank. Skip the filename if it has no extension.
## (Assumes one '.' in filename, at the extension.)
####################################################
FILEEXT=`echo "$FILENAME" | cut -d\. -f2`
if test "$FILEEXT" = ""
then
exit
fi
####################################################
## Exit if the file extension is not 'svg'.
## COMMENTED, for now.
####################################################
# if test "$FILEEXT" != "svg"
# then
# exit
# fi
####################################################
## Get the 'midname' of the file, the part before
## the period and the extension.
####################################################
MIDNAME=`echo "$FILENAME" | cut -d\. -f1`
###################################
## Make the output filename.
###################################
OUTNAME="${MIDNAME}_EXTRACTEDsvgXYdata.txt"
rm -f "$OUTNAME"
###########################################
## Put the input filename in a comment line
## at the top of the output file.
###########################################
echo "# $FILENAME" > "$OUTNAME"
##############################################################
## Use 'fold' and 'awk' and 'sed' to make the new output file.
##############################################################
## Could try running the command in a window,
## to see err msgs, if any.
## Could use zenity to offer this as an option.
##
## xterm -fg white -bg black -hold -e \
#######################################################
## The 'egrep' and the awk-and-sed pipe below does the following:
## 0) Use 'egrep' to concatenate the following 'element' lines
## onto the output file:
## ''
## ''
## ''
## ''
## ''
## We assume each is on one line.
## 1) Use 'fold' to fold lines longer than $MAXCHARSperLINEin.
## NOTE: The following awk will fail with a
## 'Input record too long' error that stops awk dead
## when a line is longer than about 3071 chars.
## The 'fold' prevents that.
## 2) Use an 'awk' program to print
## sections even if they span more than
## one line.
# 3) Use 'sed' to remove path parameters like
## id="..." and style="..."
## which would be broken up by the following sed.
##
## NOTE: The regular expression "[^"]*" removes
## the double-quotes, and the stuff between them, up to
## the SECOND double-quote --- not to a LAST double-quote
## in the line. Ref: page 54 of O'Reilly 'Sed and Awk' book.
## 4) Use 'sed' to replace each occurrence of the
## 'm' or 'M' or 'c' or 'C' or 'v' or 'V' or
## 'l' or 'L' etc. 'path' characters by a line-feed
## followed by the character.
## Also replace '-' by ' -'.
## Other not-so-critical changes on the "path" stuff:
## 5) Use 'sed' to replace '' with null.
## 7) Use 'sed' to replace each occurrence of one or more space
## characters with exactly one space character.
## 8) Use 'sed' to replace each occurrence of a comma character
## by a space character.
## 9) Remove blank lines.
## 10) Remove double-quote characters.
## 5-10 are not necessarily in this order.
#######################################################
## In a '> "$OUTNAME"
## FOR TESTING:
# set -x
fold -b -w $MAXCHARSperLINEin "$FILENAME" | \
awk \
'BEGIN {
GetNEXTpathLine0or1 = 0
}
## END OF BEGIN
## START OF BODY
{
# HOLDline = tolower($0)
HOLDline = $0
## FOR TESTING:
# if ( NR < 40 ) { print "HOLDline :" $HOLDline }
## If this is a continued "".
## If so, set the GET flag to 0.
## In any case, print this "path" line and go to
## next line and start the "body" processing again.
if ( GetNEXTpathLine0or1 == 1 ) {
if ( index(HOLDline,"/>") != 0 ) {
GetNEXTpathLine0or1 = 0
}
print
next
} else {
## Check if this is a "",
## set the GET flag to 0. If it does not contain
## the end-indicator, set the GET flag to 1.
## In either case, we are done with this
## "") != 0 ) {
GetNEXTpathLine0or1 = 0
} else {
GetNEXTpathLine0or1 = 1
}
next
}
}
## No print of other lines if we reach this point ---
## because if we reach this point, the line is not
## part of a "path" line.
}' | \
sed -e 's|id="[^"]*"||g' | \
sed -e 's|style="[^"]*"||g' | \
sed -e 's|transform="[^"]*"|\n#X|g' | \
sed -e 's|sodipodi:[^"]*="[^"]*"|\n#Y|g' | \
sed -e 's|||g' | \
sed -e 's|"||g' | \
sed -e 's| *| |g' | \
sed -e '/^ *$/d' | \
sed -e 's|,| |g' | \
fold -b -w $MAXCHARSperLINEout >> "$OUTNAME"
## FOR TESTING:
# set -
###################################
## Show the output file.
###################################
## . $HOME/.gnome2/nautilus-scripts/.set_VIEWERvars.shi
. $HOME/.freedomenv/feNautilusScripts/set_DIR_NautilusScripts.shi
. $DIR_NautilusScripts/.set_VIEWERvars.shi
# $TXTVIEWER "$OUTNAME" &
$TXTEDITOR "$OUTNAME" &