1
0
forked from GitHub/gf-rgl
Files
gf-rgl/src/morphodict/utils/only_homonyms.sh
2021-06-04 13:41:42 +08:00

52 lines
1.6 KiB
Bash
Executable File

#!/bin/bash
USAGE="usage: ./remove_sense_distinctions.sh <concrete syntax file>"
NOTE="This is not extremely useful, it will just create a file with only those entries that are homonymous in dictionary form, but differ in other forms. The purpose of the file is for you to look at/do small experiments with. The real job is done in MkMorphoDict.hs."
# String manipulation
CONC=$1 # e.g. MorphoDictFin.gf
BAK="$CONC.bak" # e.g. MorphoDictFin.gf.bak
NAME=`echo $CONC | cut -f 1 -d '.'` # e.g. MorphoDictFin
ABS="${NAME}Abs.gf" # e.g. MorphoDictFinAbs.gf
CONC_HEADER="$NAME.header" # e.g. MorphoDictFin.header
ABS_HEADER="${NAME}Abs.header" # e.g. MorphoDictFinAbs.header
find_duplicates() {
echo "Putting (temporarily) only homonyms in $CONC"
echo "cat $CONC_HEADER > $CONC"
cat $CONC_HEADER > $CONC
DUPLS=`cut -f 2 -d ' ' /tmp/$CONC \
| sort | uniq -c | sort -nr \
| egrep "^ +1?[2-9][0-9]? [a-zåäö]+_" \
| tr -d '[0-9][A-ZÅÄÖ]'`
for d in $DUPLS
do
grep "lin $d" $BAK >> $CONC
done
echo "}" >> $CONC
}
remove_numbers() {
echo "cp $CONC{,.bak}"
cp $CONC{,.bak}
echo "cat $CONC | sed -E 's/_[0-9]_/_/g' | uniq > /tmp/$CONC"
cat $CONC | sed -E 's/_[0-9]_/_/g' | uniq > /tmp/$CONC
echo "Done removing numbers."
}
#### Action starts here
echo $NOTE
if [[ $CONC == *"Abs.gf" ]]
then
echo $USAGE
else
remove_numbers
find_duplicates
# echo "gf -v=0 -make $CONC"
# gf -v=0 -make $CONC
echo "$CONC contains now only homonyms. Original file is found in $BAK."
fi