diff --git a/src/morphodict/MorphoDictFin.header b/src/morphodict/MorphoDictFin.header new file mode 100644 index 00000000..a59e6caa --- /dev/null +++ b/src/morphodict/MorphoDictFin.header @@ -0,0 +1,10 @@ +concrete MorphoDictFin of MorphoDictFinAbs = CatFin ** open + ParadigmsFin, +-- MorphoFin, + Kotus +-- Prelude + in { + +-- extracted from http://kaino.kotus.fi/sanat/nykysuomi/, licensed under LGPL + +flags coding = utf8 ; \ No newline at end of file diff --git a/src/morphodict/utils/remove_sense_distinctions.sh b/src/morphodict/utils/remove_sense_distinctions.sh new file mode 100755 index 00000000..b845799d --- /dev/null +++ b/src/morphodict/utils/remove_sense_distinctions.sh @@ -0,0 +1,45 @@ +#!/bin/bash + +USAGE="usage: ./remove_sense_distinctions.sh " + +# String manipulation +CONC=$1 # e.g. MorphoDictFin.gf +BAK="$CONC.bak" # e.g. MorphoDictFin.gf.bak + +NAME=`echo $CONC | cut -f 1 -d '.'` # e.g. MorphoDictFin +ABS="${NAME}Abs.gf" # e.g. MorphoDictFinAbs.gf +CONC_HEADER="$NAME.header" # e.g. MorphoDictFin.header + +find_duplicates() { + echo "Putting (temporarily) only homonyms in $CONC" + echo "cat $CONC_HEADER > $CONC" + cat $CONC_HEADER > $CONC + DUPLS=`cut -f 2 -d ' ' /tmp/$CONC \ + | sort | uniq -c | sort -nr \ + | egrep "^ +1?[2-9][0-9]? [a-zåäö]+_" \ + | tr -d '[0-9][A-ZÅÄÖ]'` + for d in $DUPLS + do + grep "lin $d" $BAK >> $CONC + done + echo "}" >> $CONC +} + +remove_numbers() { + echo "cp $CONC{,.bak}" + cp $CONC{,.bak} + echo "cat $CONC | sed -E 's/_[0-9]_/_/g' | uniq > /tmp/$CONC" + cat $CONC | sed -E 's/_[0-9]_/_/g' | uniq > /tmp/$CONC + echo "Done removing numbers." +} + +if [[ $CONC == *"Abs.gf" ]] + then + echo $USAGE + else + remove_numbers + find_duplicates + echo "gf -v=0 -make $CONC" + gf -v=0 -make $CONC + echo "$CONC contains now only homonyms. Original file is found in $BAK." +fi