added DictCompFin; started frequency-based Finnish grammar

This commit is contained in:
aarne
2010-12-29 11:42:56 +00:00
parent fbbd1981e3
commit 5d84849e70
5 changed files with 35368 additions and 35319 deletions

File diff suppressed because it is too large Load Diff

View File

@@ -1,30 +1,57 @@
main = interact (unlines . concatMap (prEntry . mkOne) . lines)
import qualified Data.Set as S
prEntry (f,c,w) = [
unwords [f, ":", c]
main = do
src <- readFile "src/suomen-sanomalehtikielen-taajuussanasto-utf8.txt"
kotus <- readFile "kotusfuns.txt"
let funSet = S.fromList (map (head . words) (lines kotus))
mapM_ putStrLn $ concatMap (prEntry funSet . mkOne) $ lines src
stoplist = S.fromList ["ei","olla","ei_ihme","eikä"]
prEntry funSet (f,c,w,p@(k,n),i) = if n == 0 then [] else [
unwords ["fun",f, " : ", c,";"],
unwords ["lin",f, "=", para,";"]
]
where
para = case n of
2 -> let kf = i ++ "_" ++ k in
if S.member kf funSet
then "mk" ++ c ++ " " ++ kf
else "mk" ++ c ++ " " ++ quoted w
_ -> "mk" ++ k ++ " " ++ quoted w
mkOne line = case words line of
_:_:_:w:c0:_ -> let c = mkCat c0 in (mkFun w c, c, w, mkLin c)
_ -> []
_:_:_:w:c0:_ | S.member w stoplist -> none
_:_:_:w:c0:_ -> let c = mkCat c0 in (mkFun w c, c, w, mkLin c0, mkId w)
_ -> none
none = ("","","",("",0),"")
quoted s = "\"" ++ s ++ "\""
mkCat = fst . catlin
mkLin = snd . catlin
catlin c = case c of
"(adjektiivi)" -> "A",
"(adverbi)" -> "Adv"
"(erisnimi)" -> "PN"
"(interjektio)" -> "Interj"
"(konjunktio)" -> "Conj"
"(lukusana)" -> "Numeral"
"(lyhenne)" -> "Abbr"
"(prepositio)" -> "Prep"
"(pronomini)" -> "Pron"
"(substantiivi)" -> "N"
"(verbi)" -> "V"
_ -> "Junk"
"(adjektiivi)" -> kotus "A"
"(adverbi)" -> rep "Adv"
"(erisnimi)" -> rep "PN"
"(interjektio)" -> hide "Interj"
"(konjunktio)" -> hide "Conj"
"(lukusana)" -> hide "Numeral"
"(lyhenne)" -> hide "Abbr"
"(prepositio)" -> hide "Prep"
"(pronomini)" -> hide "Pron"
"(substantiivi)" -> kotus "N"
"(verbi)" -> kotus "V"
_ -> hide "Junk"
rep s = (s,(s,1))
kotus s = (s,(s ++ "K",2))
--- for entries not to be included
hide s = (s,(s,0))
mkFun w c = mkId w ++ "_" ++ c

View File

@@ -0,0 +1,14 @@
all: freq
funs:
echo "pg -funs | wf -file=kotusfuns.txt" | gf -run ../DictCompFinAbs.gf
freq:
runghc Freq.hs >src.tmp
cp prelFreqFinAbs FreqFinAbs.gf
grep ":" src.tmp >>FreqFinAbs.gf
echo "}" >>FreqFinAbs.gf
cp prelFreqFin FreqFin.gf
grep "=" src.tmp >>FreqFin.gf
echo "}" >>FreqFin.gf

View File

@@ -0,0 +1,6 @@
--# -path=.:alltenses:..
concrete FreqFin of FreqFinAbs = CatFin, StructuralFin ** open ParadigmsFin, DictCompFin in {
flags coding = utf8 ;

View File

@@ -0,0 +1,2 @@
abstract FreqFinAbs = Cat, Structural ** {