1
0
forked from GitHub/gf-core

added DictCompFin; started frequency-based Finnish grammar

This commit is contained in:
aarne
2010-12-29 11:42:56 +00:00
parent fbbd1981e3
commit 5d84849e70
5 changed files with 35368 additions and 35319 deletions

File diff suppressed because it is too large Load Diff

View File

@@ -1,30 +1,57 @@
main = interact (unlines . concatMap (prEntry . mkOne) . lines) import qualified Data.Set as S
prEntry (f,c,w) = [ main = do
unwords [f, ":", c] src <- readFile "src/suomen-sanomalehtikielen-taajuussanasto-utf8.txt"
kotus <- readFile "kotusfuns.txt"
let funSet = S.fromList (map (head . words) (lines kotus))
mapM_ putStrLn $ concatMap (prEntry funSet . mkOne) $ lines src
stoplist = S.fromList ["ei","olla","ei_ihme","eikä"]
prEntry funSet (f,c,w,p@(k,n),i) = if n == 0 then [] else [
unwords ["fun",f, " : ", c,";"],
unwords ["lin",f, "=", para,";"]
] ]
where
para = case n of
2 -> let kf = i ++ "_" ++ k in
if S.member kf funSet
then "mk" ++ c ++ " " ++ kf
else "mk" ++ c ++ " " ++ quoted w
_ -> "mk" ++ k ++ " " ++ quoted w
mkOne line = case words line of mkOne line = case words line of
_:_:_:w:c0:_ -> let c = mkCat c0 in (mkFun w c, c, w, mkLin c) _:_:_:w:c0:_ | S.member w stoplist -> none
_ -> [] _:_:_:w:c0:_ -> let c = mkCat c0 in (mkFun w c, c, w, mkLin c0, mkId w)
_ -> none
none = ("","","",("",0),"")
quoted s = "\"" ++ s ++ "\""
mkCat = fst . catlin mkCat = fst . catlin
mkLin = snd . catlin mkLin = snd . catlin
catlin c = case c of catlin c = case c of
"(adjektiivi)" -> "A", "(adjektiivi)" -> kotus "A"
"(adverbi)" -> "Adv" "(adverbi)" -> rep "Adv"
"(erisnimi)" -> "PN" "(erisnimi)" -> rep "PN"
"(interjektio)" -> "Interj" "(interjektio)" -> hide "Interj"
"(konjunktio)" -> "Conj" "(konjunktio)" -> hide "Conj"
"(lukusana)" -> "Numeral" "(lukusana)" -> hide "Numeral"
"(lyhenne)" -> "Abbr" "(lyhenne)" -> hide "Abbr"
"(prepositio)" -> "Prep" "(prepositio)" -> hide "Prep"
"(pronomini)" -> "Pron" "(pronomini)" -> hide "Pron"
"(substantiivi)" -> "N" "(substantiivi)" -> kotus "N"
"(verbi)" -> "V" "(verbi)" -> kotus "V"
_ -> "Junk" _ -> hide "Junk"
rep s = (s,(s,1))
kotus s = (s,(s ++ "K",2))
--- for entries not to be included
hide s = (s,(s,0))
mkFun w c = mkId w ++ "_" ++ c mkFun w c = mkId w ++ "_" ++ c

View File

@@ -0,0 +1,14 @@
all: freq
funs:
echo "pg -funs | wf -file=kotusfuns.txt" | gf -run ../DictCompFinAbs.gf
freq:
runghc Freq.hs >src.tmp
cp prelFreqFinAbs FreqFinAbs.gf
grep ":" src.tmp >>FreqFinAbs.gf
echo "}" >>FreqFinAbs.gf
cp prelFreqFin FreqFin.gf
grep "=" src.tmp >>FreqFin.gf
echo "}" >>FreqFin.gf

View File

@@ -0,0 +1,6 @@
--# -path=.:alltenses:..
concrete FreqFin of FreqFinAbs = CatFin, StructuralFin ** open ParadigmsFin, DictCompFin in {
flags coding = utf8 ;

View File

@@ -0,0 +1,2 @@
abstract FreqFinAbs = Cat, Structural ** {