forked from GitHub/gf-rgl
52 lines
1.6 KiB
Bash
Executable File
52 lines
1.6 KiB
Bash
Executable File
#!/bin/bash
|
|
|
|
USAGE="usage: ./remove_sense_distinctions.sh <concrete syntax file>"
|
|
NOTE="This is not extremely useful, it will just create a file with only those entries that are homonymous in dictionary form, but differ in other forms. The purpose of the file is for you to look at/do small experiments with. The real job is done in MkMorphoDict.hs."
|
|
|
|
# String manipulation
|
|
CONC=$1 # e.g. MorphoDictFin.gf
|
|
BAK="$CONC.bak" # e.g. MorphoDictFin.gf.bak
|
|
|
|
NAME=`echo $CONC | cut -f 1 -d '.'` # e.g. MorphoDictFin
|
|
ABS="${NAME}Abs.gf" # e.g. MorphoDictFinAbs.gf
|
|
CONC_HEADER="$NAME.header" # e.g. MorphoDictFin.header
|
|
ABS_HEADER="${NAME}Abs.header" # e.g. MorphoDictFinAbs.header
|
|
|
|
find_duplicates() {
|
|
echo "Putting (temporarily) only homonyms in $CONC"
|
|
echo "cat $CONC_HEADER > $CONC"
|
|
cat $CONC_HEADER > $CONC
|
|
DUPLS=`cut -f 2 -d ' ' /tmp/$CONC \
|
|
| sort | uniq -c | sort -nr \
|
|
| egrep "^ +1?[2-9][0-9]? [a-zåäö]+_" \
|
|
| tr -d '[0-9][A-ZÅÄÖ]'`
|
|
for d in $DUPLS
|
|
do
|
|
grep "lin $d" $BAK >> $CONC
|
|
done
|
|
echo "}" >> $CONC
|
|
}
|
|
|
|
remove_numbers() {
|
|
echo "cp $CONC{,.bak}"
|
|
cp $CONC{,.bak}
|
|
echo "cat $CONC | sed -E 's/_[0-9]_/_/g' | uniq > /tmp/$CONC"
|
|
cat $CONC | sed -E 's/_[0-9]_/_/g' | uniq > /tmp/$CONC
|
|
echo "Done removing numbers."
|
|
}
|
|
|
|
#### Action starts here
|
|
|
|
echo $NOTE
|
|
|
|
if [[ $CONC == *"Abs.gf" ]]
|
|
then
|
|
echo $USAGE
|
|
else
|
|
remove_numbers
|
|
find_duplicates
|
|
# echo "gf -v=0 -make $CONC"
|
|
# gf -v=0 -make $CONC
|
|
echo "$CONC contains now only homonyms. Original file is found in $BAK."
|
|
fi
|