forked from GitHub/gf-core
added DictCompFin; started frequency-based Finnish grammar
This commit is contained in:
File diff suppressed because it is too large
Load Diff
@@ -1,30 +1,57 @@
|
||||
main = interact (unlines . concatMap (prEntry . mkOne) . lines)
|
||||
import qualified Data.Set as S
|
||||
|
||||
prEntry (f,c,w) = [
|
||||
unwords [f, ":", c]
|
||||
main = do
|
||||
src <- readFile "src/suomen-sanomalehtikielen-taajuussanasto-utf8.txt"
|
||||
kotus <- readFile "kotusfuns.txt"
|
||||
let funSet = S.fromList (map (head . words) (lines kotus))
|
||||
mapM_ putStrLn $ concatMap (prEntry funSet . mkOne) $ lines src
|
||||
|
||||
stoplist = S.fromList ["ei","olla","ei_ihme","eikä"]
|
||||
|
||||
prEntry funSet (f,c,w,p@(k,n),i) = if n == 0 then [] else [
|
||||
unwords ["fun",f, " : ", c,";"],
|
||||
unwords ["lin",f, "=", para,";"]
|
||||
]
|
||||
where
|
||||
para = case n of
|
||||
2 -> let kf = i ++ "_" ++ k in
|
||||
if S.member kf funSet
|
||||
then "mk" ++ c ++ " " ++ kf
|
||||
else "mk" ++ c ++ " " ++ quoted w
|
||||
_ -> "mk" ++ k ++ " " ++ quoted w
|
||||
|
||||
|
||||
mkOne line = case words line of
|
||||
_:_:_:w:c0:_ -> let c = mkCat c0 in (mkFun w c, c, w, mkLin c)
|
||||
_ -> []
|
||||
_:_:_:w:c0:_ | S.member w stoplist -> none
|
||||
_:_:_:w:c0:_ -> let c = mkCat c0 in (mkFun w c, c, w, mkLin c0, mkId w)
|
||||
_ -> none
|
||||
|
||||
none = ("","","",("",0),"")
|
||||
|
||||
quoted s = "\"" ++ s ++ "\""
|
||||
|
||||
mkCat = fst . catlin
|
||||
mkLin = snd . catlin
|
||||
|
||||
catlin c = case c of
|
||||
"(adjektiivi)" -> "A",
|
||||
"(adverbi)" -> "Adv"
|
||||
"(erisnimi)" -> "PN"
|
||||
"(interjektio)" -> "Interj"
|
||||
"(konjunktio)" -> "Conj"
|
||||
"(lukusana)" -> "Numeral"
|
||||
"(lyhenne)" -> "Abbr"
|
||||
"(prepositio)" -> "Prep"
|
||||
"(pronomini)" -> "Pron"
|
||||
"(substantiivi)" -> "N"
|
||||
"(verbi)" -> "V"
|
||||
_ -> "Junk"
|
||||
"(adjektiivi)" -> kotus "A"
|
||||
"(adverbi)" -> rep "Adv"
|
||||
"(erisnimi)" -> rep "PN"
|
||||
"(interjektio)" -> hide "Interj"
|
||||
"(konjunktio)" -> hide "Conj"
|
||||
"(lukusana)" -> hide "Numeral"
|
||||
"(lyhenne)" -> hide "Abbr"
|
||||
"(prepositio)" -> hide "Prep"
|
||||
"(pronomini)" -> hide "Pron"
|
||||
"(substantiivi)" -> kotus "N"
|
||||
"(verbi)" -> kotus "V"
|
||||
_ -> hide "Junk"
|
||||
|
||||
|
||||
rep s = (s,(s,1))
|
||||
kotus s = (s,(s ++ "K",2))
|
||||
--- for entries not to be included
|
||||
hide s = (s,(s,0))
|
||||
|
||||
mkFun w c = mkId w ++ "_" ++ c
|
||||
|
||||
|
||||
14
lib/src/finnish/frequency/Makefile
Normal file
14
lib/src/finnish/frequency/Makefile
Normal file
@@ -0,0 +1,14 @@
|
||||
all: freq
|
||||
|
||||
funs:
|
||||
echo "pg -funs | wf -file=kotusfuns.txt" | gf -run ../DictCompFinAbs.gf
|
||||
|
||||
freq:
|
||||
runghc Freq.hs >src.tmp
|
||||
cp prelFreqFinAbs FreqFinAbs.gf
|
||||
grep ":" src.tmp >>FreqFinAbs.gf
|
||||
echo "}" >>FreqFinAbs.gf
|
||||
cp prelFreqFin FreqFin.gf
|
||||
grep "=" src.tmp >>FreqFin.gf
|
||||
echo "}" >>FreqFin.gf
|
||||
|
||||
6
lib/src/finnish/frequency/prelFreqFin
Normal file
6
lib/src/finnish/frequency/prelFreqFin
Normal file
@@ -0,0 +1,6 @@
|
||||
--# -path=.:alltenses:..
|
||||
|
||||
concrete FreqFin of FreqFinAbs = CatFin, StructuralFin ** open ParadigmsFin, DictCompFin in {
|
||||
|
||||
flags coding = utf8 ;
|
||||
|
||||
2
lib/src/finnish/frequency/prelFreqFinAbs
Normal file
2
lib/src/finnish/frequency/prelFreqFinAbs
Normal file
@@ -0,0 +1,2 @@
|
||||
abstract FreqFinAbs = Cat, Structural ** {
|
||||
|
||||
Reference in New Issue
Block a user