started project with Finnish frequency dictionary

This commit is contained in:
aarne
2010-12-28 20:47:27 +00:00
parent ed4e07d0c2
commit 3f9313cc0f
3 changed files with 10044 additions and 0 deletions

View File

@@ -0,0 +1,31 @@
main = interact (unlines . map mkOne . lines)
mkOne line = case words line of
_:_:_:w:c0:_ -> let c = cat c0 in unwords [mkId w ++ "_" ++ c, ":", c]
_ -> []
cat c = case c of
"(adjektiivi)" -> "A"
"(adverbi)" -> "Adv"
"(erisnimi)" -> "PN"
"(interjektio)" -> "Interj"
"(konjunktio)" -> "Conj"
"(lukusana)" -> "Numeral"
"(lyhenne)" -> "Abbr"
"(prepositio)" -> "Prep"
"(pronomini)" -> "Pron"
"(substantiivi)" -> "N"
"(verbi)" -> "V"
_ -> "Junk"
mkId = concatMap trim where
trim c = case fromEnum c of
32 -> "_" -- space
45 -> "_" -- -
224 -> "a''" -- à
228 -> "a'" -- ä
246 -> "o'" -- ö
252 -> "u'" -- ü
x | x < 65 || (x > 90 && x < 97) || x > 122 -> "_"
_ -> [c]