forked from GitHub/gf-core
DictFin: 43k lemmas from KOTUS word list
This commit is contained in:
43542
lib/src/finnish/DictFin.gf
Normal file
43542
lib/src/finnish/DictFin.gf
Normal file
File diff suppressed because it is too large
Load Diff
43538
lib/src/finnish/DictFinAbs.gf
Normal file
43538
lib/src/finnish/DictFinAbs.gf
Normal file
File diff suppressed because it is too large
Load Diff
@@ -12,3 +12,10 @@ tuntea_V =
|
|||||||
nahda_V
|
nahda_V
|
||||||
|
|
||||||
tehda_V
|
tehda_V
|
||||||
|
|
||||||
|
tietaa_V
|
||||||
|
|
||||||
|
taitaa_V
|
||||||
|
|
||||||
|
siita_V
|
||||||
|
|
||||||
|
|||||||
@@ -196,6 +196,10 @@ resource MorphoFin = ResFin ** open Prelude in {
|
|||||||
raetta = case <rae : Str> of {
|
raetta = case <rae : Str> of {
|
||||||
_ + "e" =>
|
_ + "e" =>
|
||||||
<rae + "tt" + a, rakee + "seen"> ; -- raetta,rakeeseen
|
<rae + "tt" + a, rakee + "seen"> ; -- raetta,rakeeseen
|
||||||
|
_ + "u" =>
|
||||||
|
<rae + "tt" + a, rakee + "seen"> ; -- kiiru, kiiruuseen
|
||||||
|
_ + "i" =>
|
||||||
|
<rae + "tt" + a, rakee + "seen"> ; -- ori, oriin
|
||||||
_ + "s" =>
|
_ + "s" =>
|
||||||
<rae + "t" + a, rakee + "seen"> ; -- rengasta,renkaaseen
|
<rae + "t" + a, rakee + "seen"> ; -- rengasta,renkaaseen
|
||||||
_ + "t" =>
|
_ + "t" =>
|
||||||
@@ -804,9 +808,12 @@ resource MorphoFin = ResFin ** open Prelude in {
|
|||||||
"ll" + a => "lt" + a ;
|
"ll" + a => "lt" + a ;
|
||||||
h@("h" | "l") + "je" + e => h + "ke" ; -- pohje/lahje impossible
|
h@("h" | "l") + "je" + e => h + "ke" ; -- pohje/lahje impossible
|
||||||
("tk" | "hk" | "sk" | "sp" | "st") + _ => nke ; -- viuhke,kuiske
|
("tk" | "hk" | "sk" | "sp" | "st") + _ => nke ; -- viuhke,kuiske
|
||||||
a + k@("k"|"p"|"t") + e@("e"|"a"|"ä"|"u"|"i"|"o"|"ö") => a + k + k + e ;
|
a + k@("k"|"p"|"t") + e@("e"|"a"|"ä"|"u"|"y"|"i"|"o"|"ö") => a + k + k + e ;
|
||||||
a + "d" + e@("e"|"a"|"ä"|"u"|"i"|"o"|"ö") => a + "t" + e ;
|
a + "d" + e@("e"|"a"|"ä"|"u"|"i"|"o"|"ö") => a + "t" + e ;
|
||||||
s + a@("a" | "ä") + "e" => s + a + "ke" ; -- säe, tae
|
s + a@("a" | "ä") + "e" => s + a + "ke" ; -- säe, tae
|
||||||
|
s + "ui" => s + "uki" ; -- ruis
|
||||||
|
s + "aa" => s + "aka" ; -- taata
|
||||||
|
s + "i" + a@("a" | "e" | "i") => s + "ik" + a ; -- liata, siitä, pietä
|
||||||
a + "v" + e@("e"|"a"|"ä"|"u"|"i") => a + "p" + e ; -- taive/toive imposs
|
a + "v" + e@("e"|"a"|"ä"|"u"|"i") => a + "p" + e ; -- taive/toive imposs
|
||||||
ase => ase
|
ase => ase
|
||||||
} ;
|
} ;
|
||||||
|
|||||||
@@ -1,6 +1,6 @@
|
|||||||
--# -path=.:alltenses
|
--# -path=.:alltenses
|
||||||
|
|
||||||
resource Kotus = Declensions ** open MorphoFin,CatFin,Prelude in {
|
resource Kotus = open MorphoFin, Prelude in {
|
||||||
|
|
||||||
oper vowelHarmony = vowHarmony ;
|
oper vowelHarmony = vowHarmony ;
|
||||||
|
|
||||||
@@ -18,7 +18,10 @@ oper
|
|||||||
= \s -> let ws = weakGrade s in
|
= \s -> let ws = weakGrade s in
|
||||||
dSilakka s (ws + "n") (ws + "it" + getHarmony (last s)) ;
|
dSilakka s (ws + "n") (ws + "it" + getHarmony (last s)) ;
|
||||||
d05 : Str -> NForms -- 3212 öljymaali
|
d05 : Str -> NForms -- 3212 öljymaali
|
||||||
= \s -> dPaatti s (s + "n") ;
|
= \s -> case last s of {
|
||||||
|
"i" => dPaatti s (s + "n") ;
|
||||||
|
_ => dUnix s
|
||||||
|
} ;
|
||||||
d05A : Str -> NForms -- 1959 öylätti
|
d05A : Str -> NForms -- 1959 öylätti
|
||||||
= \s -> dPaatti s (weakGrade s + "n") ;
|
= \s -> dPaatti s (weakGrade s + "n") ;
|
||||||
d06 : Str -> NForms -- 1231 öykkäri
|
d06 : Str -> NForms -- 1231 öykkäri
|
||||||
@@ -176,7 +179,7 @@ oper
|
|||||||
d47 : Str -> NForms -- 46 ylirasittunut
|
d47 : Str -> NForms -- 46 ylirasittunut
|
||||||
= dOttanut ;
|
= dOttanut ;
|
||||||
d48 : Str -> NForms -- 346 äpäre
|
d48 : Str -> NForms -- 346 äpäre
|
||||||
= \s -> dRae s (s + "en") ;
|
= \s -> dRae s (s + last s + "n") ;
|
||||||
d48A : Str -> NForms -- 481 äänne
|
d48A : Str -> NForms -- 481 äänne
|
||||||
= \s -> dRae s (strongGrade s + "en") ;
|
= \s -> dRae s (strongGrade s + "en") ;
|
||||||
d49 : Str -> NForms -- 31 vempele
|
d49 : Str -> NForms -- 31 vempele
|
||||||
@@ -267,29 +270,32 @@ oper
|
|||||||
= \s -> cValjeta s (Predef.tk 2 s + "ni") ;
|
= \s -> cValjeta s (Predef.tk 2 s + "ni") ;
|
||||||
c72A : Str -> VForms -- 52 yhdetä
|
c72A : Str -> VForms -- 52 yhdetä
|
||||||
= \s -> cValjeta s (strongGrade (Predef.tk 2 s) + "ni") ;
|
= \s -> cValjeta s (strongGrade (Predef.tk 2 s) + "ni") ;
|
||||||
{-
|
|
||||||
c73 : Str -> VForms -- 600 äkseerata
|
c73 : Str -> VForms -- 600 äkseerata
|
||||||
= \s -> ;
|
= \s -> cPudota s (Predef.tk 2 s + "si") ;
|
||||||
c73A : Str -> VForms -- 313 änkätä
|
c73A : Str -> VForms -- 313 änkätä
|
||||||
= \s -> ;
|
= \s -> cPudota s (strongGrade (Predef.tk 2 s) + "si") ;
|
||||||
c74 : Str -> VForms -- 99 öljytä
|
c74 : Str -> VForms -- 99 öljytä
|
||||||
= \s -> ;
|
= \s -> cPudota s (Predef.tk 2 s + "si") ;
|
||||||
c74A : Str -> VForms -- 72 ängetä
|
c74A : Str -> VForms -- 72 ängetä
|
||||||
= \s -> ;
|
= \s -> cPudota s (strongGrade (Predef.tk 2 s) + "si") ;
|
||||||
c75 : Str -> VForms -- 39 viritä
|
c75 : Str -> VForms -- 39 viritä
|
||||||
= \s -> ;
|
= \s -> cPudota s (Predef.tk 2 s + "si") ;
|
||||||
c75A : Str -> VForms -- 9 siitä
|
c75A : Str -> VForms -- 9 siitä
|
||||||
= \s -> ;
|
= \s -> cPudota s (strongGrade (Predef.tk 2 s) + "si") ;
|
||||||
c76A : Str -> VForms -- 2 tietää
|
c76A : Str -> VForms -- 2 tietää
|
||||||
= \s -> ;
|
= \s -> let tieta = init s ; tieda = weakGrade tieta ; ties = Predef.tk 2 tieta + "s" in
|
||||||
|
cOttaa s (tieda + "n") (ties + "in") (ties + "i") ; -- only tietaa, taitaa
|
||||||
|
-- defective verbs
|
||||||
c77 : Str -> VForms -- 3 vipajaa
|
c77 : Str -> VForms -- 3 vipajaa
|
||||||
= \s -> ;
|
= c56A ; ----
|
||||||
c78 : Str -> VForms -- 31 ähkää
|
c78 : Str -> VForms -- 31 ähkää
|
||||||
= \s -> ;
|
= c56A ; ----
|
||||||
c78A : Str -> VForms -- 1 tuikkaa
|
c78A : Str -> VForms -- 1 tuikkaa
|
||||||
= \s -> ;
|
= c56A ; ----
|
||||||
c99 : Str -> VForms -- 5453 öykkärimäisesti
|
c99 : Str -> {s : Str} -- 5453 öykkärimäisesti
|
||||||
= \s -> ;
|
= \s -> {s = s} ;
|
||||||
-}
|
|
||||||
|
c101 : Str -> {s : Str} -- pronouns etc
|
||||||
|
= c99 ; -- dummy
|
||||||
}
|
}
|
||||||
|
|
||||||
57
lib/src/finnish/kotus/Kotus.hs
Normal file
57
lib/src/finnish/kotus/Kotus.hs
Normal file
@@ -0,0 +1,57 @@
|
|||||||
|
main = interact (unlines . concatMap mkOne . lines)
|
||||||
|
|
||||||
|
mkOne = mkEntry . analyse
|
||||||
|
|
||||||
|
mkEntry (w,p,g,h) | elem p [0,50,51] = [] -- no inflection information, or a compound
|
||||||
|
mkEntry (w,p,g,h) | head w == '-' = mkEntry (tail w,p,g,h) -- suffix only
|
||||||
|
mkEntry (w,p,g,h) | last w == 't' && notElem p [5,43,47 ] = [] -- plurale tantum --- to do
|
||||||
|
mkEntry (w,p,g,h) = [mkFun fun cat, mkLin fun par w] where
|
||||||
|
cat = if p < 50 then catNoun
|
||||||
|
else if p < 99 then catVerb
|
||||||
|
else catAdverb
|
||||||
|
fun = mkId w ++ "_" ++ (if h=="0" then "" else h ++ "_") ++ cat
|
||||||
|
par = (if p < 52 then "d" else "c") ++ num p ++ (if g == "0" then "" else "A")
|
||||||
|
num p = if p < 10 then "0" ++ show p else show p
|
||||||
|
|
||||||
|
mkFun fun cat = unwords ["fun",fun,":",cat,";"]
|
||||||
|
mkLin fun par w = unwords ["lin",fun,"=",par,quoted w,";"]
|
||||||
|
|
||||||
|
mkId = concatMap trim where
|
||||||
|
trim c = case fromEnum c of
|
||||||
|
32 -> "_" -- space
|
||||||
|
45 -> "_" -- -
|
||||||
|
224 -> "a''" -- à
|
||||||
|
228 -> "a'" -- ä
|
||||||
|
246 -> "o'" -- ö
|
||||||
|
252 -> "u'" -- ü
|
||||||
|
x | x < 65 || (x > 90 && x < 97) || x > 122 -> "_"
|
||||||
|
_ -> [c]
|
||||||
|
|
||||||
|
quoted s = "\"" ++ s ++ "\""
|
||||||
|
|
||||||
|
analyse :: String -> (String,Int,String,String)
|
||||||
|
analyse s = (word,paradigm,gradation,homonym) where
|
||||||
|
word = tagged "s" x
|
||||||
|
paradigm = (read (tagged "tn" x) :: Int)
|
||||||
|
gradation = tagged "av" x
|
||||||
|
homonym = tagged "hn" x
|
||||||
|
x = getTags s
|
||||||
|
|
||||||
|
tagged :: String -> Tags -> String
|
||||||
|
tagged s x = maybe "0" id $ lookup s x
|
||||||
|
|
||||||
|
-- get values of leave tags
|
||||||
|
getTags :: String -> Tags
|
||||||
|
getTags s = case s of
|
||||||
|
'<':rest -> case break (=='>') rest of
|
||||||
|
(tag,_:more) -> case break (=='<') more of
|
||||||
|
([],_) -> getTags more
|
||||||
|
(v,end) -> (tag,v):getTags end
|
||||||
|
_ -> []
|
||||||
|
_ -> []
|
||||||
|
|
||||||
|
type Tags = [(String,String)]
|
||||||
|
|
||||||
|
catNoun = "NK"
|
||||||
|
catVerb = "VK"
|
||||||
|
catAdverb = "AdvK"
|
||||||
10
lib/src/finnish/kotus/Makefile
Normal file
10
lib/src/finnish/kotus/Makefile
Normal file
@@ -0,0 +1,10 @@
|
|||||||
|
gf: abs cnc
|
||||||
|
|
||||||
|
abs:
|
||||||
|
cp prelDictFinAbs DictFinAbs.gf
|
||||||
|
runghc Kotus.hs <src/kotus-sanalista_v1.xml | grep ":" >>DictFinAbs.gf
|
||||||
|
echo "}" >>DictFinAbs.gf
|
||||||
|
cnc:
|
||||||
|
cp prelDictFin DictFin.gf
|
||||||
|
runghc Kotus.hs <src/kotus-sanalista_v1.xml | grep "=" >>DictFin.gf
|
||||||
|
echo "}" >>DictFin.gf
|
||||||
7
lib/src/finnish/kotus/prelDictFin
Normal file
7
lib/src/finnish/kotus/prelDictFin
Normal file
@@ -0,0 +1,7 @@
|
|||||||
|
--# -path=.:alltenses
|
||||||
|
|
||||||
|
concrete DictFin of DictFinAbs = open MorphoFin, Kotus, Prelude in {
|
||||||
|
|
||||||
|
flags coding = utf8 ;
|
||||||
|
|
||||||
|
lincat NK = NForms ; VK = VForms ;AdvK = SS ;
|
||||||
3
lib/src/finnish/kotus/prelDictFinAbs
Normal file
3
lib/src/finnish/kotus/prelDictFinAbs
Normal file
@@ -0,0 +1,3 @@
|
|||||||
|
abstract DictFinAbs = {
|
||||||
|
cat NK ; VK ; AdvK ;
|
||||||
|
|
||||||
Reference in New Issue
Block a user