forked from GitHub/gf-core
started adding KOTUS verbs in Finnish
This commit is contained in:
14
lib/src/finnish/IrregFin.gf
Normal file
14
lib/src/finnish/IrregFin.gf
Normal file
@@ -0,0 +1,14 @@
|
||||
|
||||
kayda_V =
|
||||
let kay = Predef.tk 2 s ; kavi = init kay + "vi" in
|
||||
vForms12 s (kay + "n") kay (kay + "vät") (kay + "kää") (kay + "dään")
|
||||
(kavi + "n") kavi (kavi + "si") (kay + "nyt") (kay + "tty")
|
||||
(kay + "nee") ;
|
||||
tuntea_V =
|
||||
let tunte = init s ; tunne = weakGrade tunte ; tuns = Predef.tk 2 tunte + "s" in
|
||||
vForms12 s (tunne + "n") (tunte + "e") (tunte + "vat") (tunte + "kaa") (tunne + "taan")
|
||||
(tuns + "in") (tuns + "i") (init tunte + "isi") (tunte + "nut") (tunne + "ttu")
|
||||
(tunte + "nee") ;
|
||||
nahda_V
|
||||
|
||||
tehda_V
|
||||
@@ -147,7 +147,7 @@ lin
|
||||
policeman_N = mkN "poliisi" ;
|
||||
priest_N = mkN "pappi" ;
|
||||
probable_AS = mkAS --- for vowel harmony
|
||||
(mkA (mkN "todennäköinen") "tonennäköisempi" "todennälöisin") ; ---- sta
|
||||
(mkA (mkN "todennäköinen") "tonennäköisempi" "todennäköisin") ; ---- sta
|
||||
queen_N = mkN "kuningatar" ;
|
||||
radio_N = mk2N "radio" "radioita" ;
|
||||
rain_V0 = mkV0 (mk2V "sataa" "satoi") ;
|
||||
|
||||
@@ -643,6 +643,7 @@ resource MorphoFin = ResFin ** open Prelude in {
|
||||
kuunnel = Predef.tk 2 kuunnella ;
|
||||
kuuntel = Predef.tk 2 kuuntelin ;
|
||||
u = uyHarmony a ;
|
||||
l = last kuunnel
|
||||
in vForms12
|
||||
kuunnella
|
||||
(kuuntel + "en")
|
||||
@@ -653,9 +654,9 @@ resource MorphoFin = ResFin ** open Prelude in {
|
||||
(kuuntel + "in")
|
||||
(kuuntel + "i")
|
||||
(kuuntel + "isi")
|
||||
(kuunnel + "l" + u + "t")
|
||||
(kuunnel + l + u + "t")
|
||||
(kuunnel + "t" + u)
|
||||
(kuunnel + "lee") ;
|
||||
(kuunnel + l + "ee") ;
|
||||
|
||||
-- auxiliaries
|
||||
|
||||
|
||||
28
lib/src/finnish/kotus/src/kotus-sanalista.dtd
Normal file
28
lib/src/finnish/kotus/src/kotus-sanalista.dtd
Normal file
@@ -0,0 +1,28 @@
|
||||
<!--
|
||||
Copyright (C) Kotimaisten kielten tutkimuskeskus 2006
|
||||
Kotimaisten kielten tutkimuskeskuksen nykysuomen sanalista, versio 1
|
||||
Julkaistu 15.12.2006
|
||||
|
||||
Sanalista julkaistaan GNU LGPL -lisenssillä.
|
||||
Lisenssiteksti luettavissa osoitteessa http://www.gnu.org/licenses/lgpl.html
|
||||
Listaan perustuvien sovellusten mukana on aina toimitettava alkuperäinen
|
||||
sanalista lukukelpoisessa muodossaan.
|
||||
-->
|
||||
|
||||
<!ELEMENT kotus-sanalista (st*) >
|
||||
|
||||
<!ELEMENT st (s, hn?, t*) >
|
||||
|
||||
<!ELEMENT s (#PCDATA) >
|
||||
|
||||
<!ELEMENT hn (#PCDATA) >
|
||||
|
||||
<!ELEMENT t (tn, av?)* >
|
||||
<!ATTLIST t taivutus CDATA #IMPLIED>
|
||||
|
||||
<!ELEMENT tn (#PCDATA) >
|
||||
|
||||
<!ELEMENT av (#PCDATA) >
|
||||
<!ATTLIST av astevaihtelu CDATA #IMPLIED>
|
||||
|
||||
|
||||
94125
lib/src/finnish/kotus/src/kotus-sanalista_v1.xml
Normal file
94125
lib/src/finnish/kotus/src/kotus-sanalista_v1.xml
Normal file
File diff suppressed because it is too large
Load Diff
295
lib/src/finnish/kotus/uusisuomi/Kotus.gf
Normal file
295
lib/src/finnish/kotus/uusisuomi/Kotus.gf
Normal file
@@ -0,0 +1,295 @@
|
||||
--# -path=.:alltenses
|
||||
|
||||
resource Kotus = Declensions ** open MorphoFin,CatFin,Prelude in {
|
||||
|
||||
oper vowelHarmony = vowHarmony ;
|
||||
|
||||
oper
|
||||
|
||||
d01 : Str -> NForms -- 1780 öljy
|
||||
= \s -> dUkko s (s + "n") ;
|
||||
d01A : Str -> NForms -- 166 yökkö
|
||||
= \s -> dUkko s (weakGrade s + "n") ;
|
||||
d02 : Str -> NForms -- 1189 ääntely
|
||||
= \s -> dSilakka s (s + "n") (s + "j" + getHarmony (last s)) ;
|
||||
d03 : Str -> NForms -- 481 ääntiö
|
||||
= \s -> dSilakka s (s + "n") (s + "it" + vowelHarmony s) ;
|
||||
d04A : Str -> NForms -- 273 äpärikkö
|
||||
= \s -> let ws = weakGrade s in
|
||||
dSilakka s (ws + "n") (ws + "it" + getHarmony (last s)) ;
|
||||
d05 : Str -> NForms -- 3212 öljymaali
|
||||
= \s -> dPaatti s (s + "n") ;
|
||||
d05A : Str -> NForms -- 1959 öylätti
|
||||
= \s -> dPaatti s (weakGrade s + "n") ;
|
||||
d06 : Str -> NForms -- 1231 öykkäri
|
||||
= \s -> dTohtori s ;
|
||||
d07 : Str -> NForms -- 81 vuoksi
|
||||
= \s -> dArpi s (init s + "en") ;
|
||||
d07A : Str -> NForms -- 70 väki
|
||||
= \s -> dArpi s (init (weakGrade s) + "en") ;
|
||||
d08 : Str -> NForms -- 99 à la carte
|
||||
= \s -> dNukke s (s + "n") ;
|
||||
d08A : Str -> NForms -- 5 vinaigrette
|
||||
= \s -> dNukke s (weakGrade s + "n") ;
|
||||
d09 : Str -> NForms -- 696 ääriraja
|
||||
= \s -> let a = last s in dSilakka s
|
||||
(s + "n")
|
||||
(init s + case a of {"a" => "o" ; _ => "ö"} + "j" + a) ;
|
||||
d09A : Str -> NForms -- 1040 ääniraita
|
||||
= \s -> let a = last s in dSilakka s
|
||||
(weakGrade s + "n")
|
||||
(init s + case a of {"a" => "o" ; _ => "ö"} + "j" + a) ;
|
||||
d10 : Str -> NForms -- 2119 äänittäjä
|
||||
= \s -> dSilakka s (s + "n") (init s + "i" + vowelHarmony (last s)) ;
|
||||
d10A : Str -> NForms -- 284 änkkä
|
||||
= \s -> dSilakka s (weakGrade s + "n") (init s + "i" + vowelHarmony (last s)) ;
|
||||
d11 : Str -> NForms -- 46 ödeema
|
||||
= \s -> dSilakka s (weakGrade s + "n") (init s + "i" + vowelHarmony (last s)) ;
|
||||
d12 : Str -> NForms -- 1125 örinä
|
||||
= \s -> let a = vowelHarmony (last s) in
|
||||
dSilakka s (s + "n")
|
||||
(init s + case a of {"a" => "o" ; _ => "ö"} + "it" + a) ;
|
||||
d13 : Str -> NForms -- 157 virtaska
|
||||
= \s -> let a = vowelHarmony (last s) in
|
||||
dSilakka s (s + "n")
|
||||
(init s + case a of {"a" => "o" ; _ => "ö"} + "j" + a) ;
|
||||
d14A : Str -> NForms -- 244 ötökkä
|
||||
= \s -> let a = vowelHarmony (last s) ; ws = weakGrade s in
|
||||
dSilakka s (ws + "n")
|
||||
(init ws + case a of {"a" => "o" ; _ => "ö"} + "it" + a) ;
|
||||
d15 : Str -> NForms -- 170 äreä
|
||||
= dKorkea ;
|
||||
d16 : Str -> NForms -- 2 kumpikin --?
|
||||
= \s -> let kumpi = Predef.take 5 s ; kin = Predef.drop 5 s in
|
||||
\\i => (dSuurempi kumpi ! i + kin) ;
|
||||
d16A : Str -> NForms -- 20 ylempi
|
||||
= dSuurempi ;
|
||||
d17 : Str -> NForms -- 38 virkkuu
|
||||
= dPaluu ;
|
||||
d18 : Str -> NForms -- voi, tee, sää
|
||||
= dPuu ;
|
||||
d19 : Str -> NForms -- 6 yö
|
||||
= dSuo ;
|
||||
d20 : Str -> NForms -- 46 voodoo
|
||||
= dPaluu ;
|
||||
d21 : Str -> NForms -- 22 tax-free --? rosé
|
||||
= dPuu ;
|
||||
d22 : Str -> NForms -- 13 tournedos
|
||||
= \s -> nForms10
|
||||
s (s + "'n") (s + "'ta") (s + "'na") (s + "'hon")
|
||||
(s + "'iden") (s + "'ita") (s + "'ina") (s + "'issa") (s + "'ihin") ;
|
||||
d23 : Str -> NForms -- 9 vuohi
|
||||
= \s -> dArpi s (init s + "en") ;
|
||||
d24 : Str -> NForms -- 20 uni
|
||||
= \s -> dArpi s (init s + "en") ;
|
||||
d25 : Str -> NForms -- 9 tuomi
|
||||
= \s -> dArpi s (init s + "en") ;
|
||||
d26 : Str -> NForms -- 113 ääri
|
||||
= \s -> dArpi s (init s + "en") ;
|
||||
d27 : Str -> NForms -- 23 vuosi
|
||||
= \s -> dArpi s (Predef.tk 2 s + "den") ;
|
||||
d28 : Str -> NForms -- 13 virsi
|
||||
= \s -> dArpi s (Predef.tk 2 s + "ren") ;
|
||||
d28A : Str -> NForms -- 1 jälsi
|
||||
= \s -> dArpi s (Predef.tk 2 s + "len") ;
|
||||
d29 : Str -> NForms -- 1 lapsi
|
||||
= \s -> let lapsi = dArpi s (init s + "en") in
|
||||
table {2 => Predef.tk 3 s + "ta" ; i => lapsi ! i} ;
|
||||
d30 : Str -> NForms -- 2 veitsi
|
||||
= \s -> let lapsi = dArpi s (init s + "en") in
|
||||
table {2 => Predef.tk 3 s + "stä" ; i => lapsi ! i} ;
|
||||
d31 : Str -> NForms -- 3 yksi
|
||||
= \s -> let
|
||||
y = Predef.tk 3 s ;
|
||||
a = vowelHarmony y
|
||||
in nForms10
|
||||
s (y + "hden") (y + "ht" + a) (y + "hten" + a) (y + "hteen")
|
||||
(s + "en") (s + a) (s + "n" + a) (s + "ss" + a) (s + "in") ;
|
||||
d32 : Str -> NForms -- 20 uumen
|
||||
= \s -> dPiennar s (s + "en") ;
|
||||
d32A : Str -> NForms -- 54 ystävätär
|
||||
= \s -> dPiennar s (strongGrade (init s) + last s + "en") ;
|
||||
d33 : Str -> NForms -- 168 väistin
|
||||
= \s -> dLiitin s (init s + "men") ;
|
||||
d33A : Str -> NForms -- 181 yllytin
|
||||
= \s -> dLiitin s (strongGrade (init s) + "men") ;
|
||||
d34 : Str -> NForms -- 1 alaston
|
||||
= \s -> let alastom = init s in
|
||||
nForms10
|
||||
s (alastom + "an") (s + "ta") (alastom + "ana") (alastom + "aan")
|
||||
(alastom + "ien") (alastom + "ia") (alastom + "ina") (alastom + "issa")
|
||||
(alastom + "iin") ;
|
||||
d34A : Str -> NForms -- 569 ääretön
|
||||
= dOnneton ;
|
||||
d35A : Str -> NForms -- 1 lämmin
|
||||
= \s -> let lämpim = strongGrade (init s) + "m" in
|
||||
nForms10
|
||||
s (lämpim + "än") (s + "tä") (lämpim + "änä") (lämpim + "ään")
|
||||
(lämpim + "ien") (lämpim + "iä") (lämpim + "inä") (lämpim + "issä")
|
||||
(lämpim + "iin") ;
|
||||
d36 : Str -> NForms -- 11 ylin
|
||||
= dSuurin ;
|
||||
d37 : Str -> NForms -- 1 vasen
|
||||
= \s -> let vasem = init s + "m" in
|
||||
nForms10
|
||||
s (vasem + "man") (s + "ta") (vasem + "pana") (vasem + "paan")
|
||||
(vasem + "pien") (vasem + "pia") (vasem + "pina") (vasem + "missa")
|
||||
(vasem + "piin") ;
|
||||
d38 : Str -> NForms -- 4195 öykkärimäinen
|
||||
= dNainen ;
|
||||
d39 : Str -> NForms -- 2730 örähdys
|
||||
= dJalas ;
|
||||
d40 : Str -> NForms -- 2482 öykkärimäisyys
|
||||
= dLujuus ;
|
||||
d41 : Str -> NForms -- 127 äyräs
|
||||
= \s -> let is = init s in dRae s (is + last is + "n") ;
|
||||
d41A : Str -> NForms -- 401 öljykangas
|
||||
= \s -> let is = init s in dRae s (strongGrade is + last is + "n") ;
|
||||
d42 : Str -> NForms -- 1 mies
|
||||
= \s -> let mieh = init s + "s" in
|
||||
nForms10
|
||||
s (mieh + "en") (s + "tä") (mieh + "enä") (mieh + "een")
|
||||
(s + "ten") (mieh + "iä") (mieh + "inä") (mieh + "issä")
|
||||
(mieh + "iin") ;
|
||||
d43 : Str -> NForms -- 11 tiehyt
|
||||
= \s -> dRae s (init s + "en") ;
|
||||
d43A : Str -> NForms -- 1 immyt
|
||||
= \s -> dRae s (strongGrade (init s) + "en") ;
|
||||
d44 : Str -> NForms -- 1 kevät
|
||||
= \s -> let kevä = init s in
|
||||
nForms10
|
||||
s (kevä + "än") (s + "tä") (kevä + "änä") (kevä + "äseen")
|
||||
(s + "iden") (kevä + "itä") (kevä + "inä") (kevä + "issä")
|
||||
(kevä + "isiin") ;
|
||||
d45 : Str -> NForms -- 23 yhdes
|
||||
= \s -> let yhde = init s ; a = vowelHarmony s in
|
||||
nForms10
|
||||
s (yhde + "nnen") (yhde + "tt" + a) (yhde + "nten" + a) (yhde + "nteen")
|
||||
(yhde + "nsien") (yhde + "nsi" + a) (yhde + "nsin" + a) (yhde + "nsiss" + a)
|
||||
(yhde + "nsiin") ;
|
||||
d46 : Str -> NForms -- 1 tuhat
|
||||
= \s -> let tuha = init s ; a = vowelHarmony s in
|
||||
nForms10
|
||||
s (tuha + "nnen") (tuha + "tt" + a) (tuha + "nten" + a) (tuha + "nteen")
|
||||
(tuha + "nsien") (tuha + "nsi" + a) (tuha + "nsin" + a) (tuha + "nsiss" + a)
|
||||
(tuha + "nsiin") ;
|
||||
d47 : Str -> NForms -- 46 ylirasittunut
|
||||
= dOttanut ;
|
||||
d48 : Str -> NForms -- 346 äpäre
|
||||
= \s -> dRae s (s + "en") ;
|
||||
d48A : Str -> NForms -- 481 äänne
|
||||
= \s -> dRae s (strongGrade s + "en") ;
|
||||
d49 : Str -> NForms -- 31 vempele
|
||||
= \s -> case last s of {
|
||||
"e" => dRae s (s + "en") ;
|
||||
_ => dPiennar s (s + "en")
|
||||
} ;
|
||||
d49A : Str -> NForms -- 11 vemmel
|
||||
= \s -> dPiennar s (strongGrade (init s) + "len") ;
|
||||
{-
|
||||
d50 : Str -> NForms -- 520 vääräsääri
|
||||
= \s -> ;
|
||||
d51 : Str -> NForms -- 62 vierasmies
|
||||
= \s -> ;
|
||||
-}
|
||||
c52 : Str -> VForms -- 667 ärjyä
|
||||
= \s -> cHukkua s (init s + "n") ;
|
||||
c52A : Str -> VForms -- 1568 öljyyntyä
|
||||
= \s -> cHukkua s (weakGrade (init s) + "n") ;
|
||||
c53 : Str -> VForms -- 605 äänestää
|
||||
= \s -> let ott = Predef.tk 2 s in
|
||||
cOttaa s (init s + "n") (ott + "in") (ott + "i") ;
|
||||
c53A : Str -> VForms -- 2121 örähtää
|
||||
= \s -> let ota = weakGrade (init s) in
|
||||
cOttaa s (ota + "n") (init ota + "in") (Predef.tk 2 s + "i") ;
|
||||
c54 : Str -> VForms -- 2 pieksää
|
||||
= \s -> let ott = Predef.tk 2 s in
|
||||
cOttaa s (init s + "n") (ott + "in") (ott + "i") ;
|
||||
c54A : Str -> VForms -- 316 ääntää
|
||||
= \s -> let ota = weakGrade (init s) ; o = Predef.tk 2 ota in
|
||||
cOttaa s (ota + "n") (o + "sin") (o + "si") ;
|
||||
c55A : Str -> VForms -- 7 yltää
|
||||
= c54A ; --? diff: variation ylti/ylsi
|
||||
c56 : Str -> VForms -- 22 valaa
|
||||
= \s -> let val = Predef.tk 2 s in
|
||||
cOttaa s (init s + "n") (val + "oin") (val + "oi") ; -- never ö
|
||||
c56A : Str -> VForms -- 28 virkkaa
|
||||
= \s -> let ota = weakGrade (init s) ; ot = init ota in
|
||||
cOttaa s (ota + "n") (ot + "oin") (ot + "oi") ;
|
||||
c57A : Str -> VForms -- 3 saartaa
|
||||
= c56A ; --? diff: saartoi/saarsi
|
||||
c58 : Str -> VForms -- 13 suitsea
|
||||
= \s -> cHukkua s (init s + "n") ;
|
||||
c58A : Str -> VForms -- 19 tunkea
|
||||
= \s -> cHukkua s (weakGrade (init s) + "n") ;
|
||||
c59A : Str -> VForms -- 1 tuntea
|
||||
= \s -> let tunte = init s ; tunne = weakGrade tunte ; tuns = Predef.tk 2 tunte + "s" in
|
||||
vForms12 s (tunne + "n") (tunte + "e") (tunte + "vat") (tunte + "kaa") (tunne + "taan")
|
||||
(tuns + "in") (tuns + "i") (init tunte + "isi") (tunte + "nut") (tunne + "ttu")
|
||||
(tunte + "nee") ; -- just one verb
|
||||
c60A : Str -> VForms -- 1 lähteä
|
||||
= c58A ; --? diff lähti/läksi, just one verb
|
||||
c61 : Str -> VForms -- 249 äyskiä
|
||||
= \s -> cHukkua s (init s + "n") ;
|
||||
c61A : Str -> VForms -- 153 vääntelehtiä
|
||||
= \s -> cHukkua s (weakGrade (init s) + "n") ;
|
||||
c62 : Str -> VForms -- 684 öykkäröidä
|
||||
= \s -> cJuoda s ;
|
||||
c63 : Str -> VForms -- 3 saada
|
||||
= c62 ;
|
||||
c64 : Str -> VForms -- 8 viedä
|
||||
= c62 ;
|
||||
c65 : Str -> VForms -- 1 käydä
|
||||
= \s -> let kay = Predef.tk 2 s ; kavi = init kay + "vi" in
|
||||
vForms12 s (kay + "n") kay (kay + "vät") (kay + "kää") (kay + "dään")
|
||||
(kavi + "n") kavi (kavi + "si") (kay + "nyt") (kay + "tty")
|
||||
(kay + "nee") ; -- just one verb
|
||||
c66 : Str -> VForms -- 268 öristä
|
||||
= \s -> cKuunnella s (Predef.tk 2 s + "in") ;
|
||||
c66A : Str -> VForms -- 3 vavista
|
||||
= \s -> cKuunnella s (strongGrade (Predef.tk 3 s) + "sin") ;
|
||||
c67 : Str -> VForms -- 704 ällistellä
|
||||
= \s -> cKuunnella s (Predef.tk 2 s + "in") ;
|
||||
c67A : Str -> VForms -- 634 äännellä
|
||||
= \s -> cKuunnella s (strongGrade (Predef.tk 3 s) + "lin") ;
|
||||
c68 : Str -> VForms -- 49 viheriöidä
|
||||
= c62 ; -- diff viheriöin/viheriöitsen
|
||||
c69 : Str -> VForms -- 48 villitä
|
||||
= \s -> cHarkita s ;
|
||||
c70 : Str -> VForms -- 3 syöstä
|
||||
= \s -> cJuosta s (Predef.tk 3 s + "ksen") ;
|
||||
c71 : Str -> VForms -- 2 tehdä
|
||||
= \s -> let te = Predef.tk 3 s in
|
||||
vForms12 s (te + "en") (te + "kee") (te + "kevät") (te + "hkää") (te + "hdään")
|
||||
(te + "en") (te + "ki") (te + "kisi") (te + "hnyt") (te + "hty")
|
||||
(te + "hnee") ; -- just two verbs: nähdä, tehdä
|
||||
c72 : Str -> VForms -- 93 yletä
|
||||
= \s -> cValjeta s (Predef.tk 2 s + "ni") ;
|
||||
c72A : Str -> VForms -- 52 yhdetä
|
||||
= \s -> cValjeta s (strongGrade (Predef.tk 2 s) + "ni") ;
|
||||
{-
|
||||
c73 : Str -> VForms -- 600 äkseerata
|
||||
= \s -> ;
|
||||
c73A : Str -> VForms -- 313 änkätä
|
||||
= \s -> ;
|
||||
c74 : Str -> VForms -- 99 öljytä
|
||||
= \s -> ;
|
||||
c74A : Str -> VForms -- 72 ängetä
|
||||
= \s -> ;
|
||||
c75 : Str -> VForms -- 39 viritä
|
||||
= \s -> ;
|
||||
c75A : Str -> VForms -- 9 siitä
|
||||
= \s -> ;
|
||||
c76A : Str -> VForms -- 2 tietää
|
||||
= \s -> ;
|
||||
c77 : Str -> VForms -- 3 vipajaa
|
||||
= \s -> ;
|
||||
c78 : Str -> VForms -- 31 ähkää
|
||||
= \s -> ;
|
||||
c78A : Str -> VForms -- 1 tuikkaa
|
||||
= \s -> ;
|
||||
c99 : Str -> VForms -- 5453 öykkärimäisesti
|
||||
= \s -> ;
|
||||
-}
|
||||
}
|
||||
|
||||
54
lib/src/finnish/kotus/uusisuomi/Makefile
Normal file
54
lib/src/finnish/kotus/uusisuomi/Makefile
Normal file
@@ -0,0 +1,54 @@
|
||||
LEX=Omat
|
||||
CAT=N
|
||||
|
||||
.PHONY: kotus
|
||||
|
||||
all: nouns
|
||||
|
||||
verbs:
|
||||
export CAT=V ; export LEX=VNSSK ; make -e tests
|
||||
export CAT=V ; export LEX=VOmat ; make -e tests
|
||||
export CAT=V ; export LEX=VSwadesh ; make -e tests
|
||||
export CAT=V ; export LEX=VDictionary ; make -e tests
|
||||
cat all-diff-V* >all-differences-V
|
||||
cat all-differences-V
|
||||
|
||||
nouns:
|
||||
# export LEX=NSSK ; make -e tests
|
||||
# export LEX=Omat ; make -e tests
|
||||
export LEX=Swadesh ; make -e tests
|
||||
export LEX=Dictionary ; make -e tests
|
||||
export LEX=Duodecim ; make -e tests
|
||||
export LEX=Aino ; make -e tests
|
||||
cat all-diff-* >all-differences
|
||||
cat all-differences
|
||||
|
||||
kotus:
|
||||
export LEX=KOTUS ; make -e tests
|
||||
cat all-diff-KOTUS
|
||||
|
||||
CSC:
|
||||
export LEX=NCSC ; make -e tests
|
||||
cat all-diff-NCSC
|
||||
|
||||
gf-files:
|
||||
runghc MkLex.hs 0 $(CAT) $(LEX) > $(LEX)Abs.gf
|
||||
# runghc MkLex.hs 1 $(CAT) $(LEX) > $(LEX)1.gf
|
||||
runghc MkLex.hs 2 $(CAT) $(LEX) > $(LEX)2.gf
|
||||
runghc MkLex.hs 3 $(CAT) $(LEX) > $(LEX)3.gf
|
||||
runghc MkLex.hs 4 $(CAT) $(LEX) > $(LEX)4.gf
|
||||
|
||||
experiments: gf-files
|
||||
# echo "gt -cat=Utt | l | wf exper1-$(LEX).txt" | gf -s $(LEX)1.gf
|
||||
echo "gt -cat=Utt | l | wf exper2-$(LEX).txt" | gf -s $(LEX)2.gf
|
||||
echo "gt -cat=Utt | l | wf exper3-$(LEX).txt" | gf -s $(LEX)3.gf
|
||||
echo "gt -cat=Utt | l | wf exper4-$(LEX).txt" | gf -s $(LEX)4.gf
|
||||
|
||||
tests: experiments
|
||||
runghc MyDiff.hs correct-$(LEX).txt exper1-$(LEX).txt >diff1-$(LEX).txt
|
||||
runghc MyDiff.hs correct-$(LEX).txt exper2-$(LEX).txt >diff2-$(LEX).txt
|
||||
runghc MyDiff.hs correct-$(LEX).txt exper3-$(LEX).txt >diff3-$(LEX).txt
|
||||
runghc MyDiff.hs correct-$(LEX).txt exper4-$(LEX).txt >diff4-$(LEX).txt
|
||||
date >all-diff-$(LEX)
|
||||
echo $(LEX) >>all-diff-$(LEX)
|
||||
wc -l diff?-$(LEX).txt >>all-diff-$(LEX)
|
||||
118
lib/src/finnish/kotus/uusisuomi/MkLex.hs
Normal file
118
lib/src/finnish/kotus/uusisuomi/MkLex.hs
Normal file
@@ -0,0 +1,118 @@
|
||||
module Main where
|
||||
|
||||
import System
|
||||
import Char
|
||||
|
||||
-- generate Finnish lexicon implementations with 1 or more
|
||||
-- characteristic arguments
|
||||
-- usage: runghc MkLex.hs 3 cat name
|
||||
|
||||
main = do
|
||||
i:cat:tgt:_ <- getArgs
|
||||
let src = "correct-" ++ tgt ++ ".txt"
|
||||
ss <- readFile src >>= return . filter (not . (all isSpace)) . lines
|
||||
initiate tgt cat i
|
||||
mapM_ (mkLex cat (read i) . uncurry (++)) (zip nums ss)
|
||||
putStrLn "}"
|
||||
|
||||
initiate tgt cat i = mapM_ putStrLn [
|
||||
"--# -path=.:alltenses",
|
||||
"",
|
||||
header i,
|
||||
""
|
||||
]
|
||||
where
|
||||
header i = case i of
|
||||
"0" -> unlines [
|
||||
"abstract " ++ tgt ++ "Abs = Cat ** {",
|
||||
"fun testN : N -> Utt ;",
|
||||
"fun testV : V -> Utt ;"
|
||||
]
|
||||
_ -> unlines [
|
||||
"concrete " ++ tgt ++ i ++
|
||||
" of " ++ tgt ++
|
||||
"Abs = CatFin ** open Nominal, Verbal, ResFin, Prelude in {",
|
||||
"",
|
||||
"lin testN = showN ;",
|
||||
"lin testV = showV ;"
|
||||
]
|
||||
|
||||
nums = map prt [10001 ..] where
|
||||
---- prt i = (if i < 10 then "0" else "") ++ show i ++ ". "
|
||||
prt i = show i ++ ". "
|
||||
|
||||
-- W is the flag for mixed-class word lists
|
||||
mkLex "W" 0 line = case words line of
|
||||
num:cat:sana:_ -> do
|
||||
let nimi = "n" ++ init num ++ "_" ++ sana
|
||||
putStrLn $ "fun " ++ nimi ++ "_" ++ cat ++ " : " ++ cat ++ " ;"
|
||||
_ -> return ()
|
||||
|
||||
mkLex "W" 1 line = case words line of
|
||||
num:cat:sanat@(sana:_) -> do
|
||||
let nimi = "n" ++ init num ++ "_" ++ sana
|
||||
putStrLn $ "lin " ++ nimi ++
|
||||
"_" ++ cat ++ " = mk" ++ cat ++ " " ++
|
||||
unwords (map prQuoted sanat) ++" ;"
|
||||
_ -> return ()
|
||||
|
||||
mkLex cat 0 line = case words line of
|
||||
num:sana:_ -> do
|
||||
let nimi = "n" ++ init num ++ "_" ++ sana
|
||||
putStrLn $ "fun " ++ nimi ++ "_" ++ cat ++ " : " ++ cat ++ " ;"
|
||||
_ -> return ()
|
||||
|
||||
mkLex cat 1 line = case words line of
|
||||
num:sana:_ -> do
|
||||
let nimi = "n" ++ init num ++ "_" ++ sana
|
||||
putStrLn $ "lin " ++ nimi ++
|
||||
"_" ++ cat ++ " = mk" ++ cat ++ " \"" ++ sana ++ "\" ;"
|
||||
_ -> return ()
|
||||
|
||||
mkLex "V" _ line = case words line of
|
||||
num:sana:_:_:_:_:_:_:sanan:_ -> do
|
||||
let nimi = "n" ++ init num ++ "_" ++ sana
|
||||
putStrLn $ "lin " ++ nimi ++
|
||||
"_V = mkV \"" ++ sana ++ "\" \"" ++ sanan ++ "\" ;"
|
||||
_ -> return ()
|
||||
|
||||
mkLex "N" 2 line = case words line of
|
||||
-- num:sana:sanan:_ -> do
|
||||
num:sana:_:_:_:_:_:sanan:_ -> do
|
||||
let nimi = "n" ++ init num ++ "_" ++ sana
|
||||
putStrLn $ "lin " ++ nimi ++
|
||||
"_N = mkN \"" ++ sana ++ "\" \"" ++ sanan ++ "\" ;"
|
||||
_ -> return ()
|
||||
|
||||
mkLex "N" 3 line = case words line of
|
||||
---- num:sana:sanan:sanoja:_ -> do
|
||||
num:sana:sanan:_:_:_:_:sanoja:_ -> do
|
||||
let nimi = "n" ++ init num ++ "_" ++ sana
|
||||
putStrLn $ "lin " ++ nimi ++
|
||||
"_N = mkN \"" ++ sana ++ "\" \"" ++ sanan ++ "\" \"" ++ sanoja ++ "\" ;"
|
||||
_ -> return ()
|
||||
|
||||
mkLex "N" 4 line = case words line of
|
||||
num:sana:sanan:sanaa:_:_:_:sanoja:_ -> do
|
||||
let nimi = "n" ++ init num ++ "_" ++ sana
|
||||
putStrLn $ "lin " ++ nimi ++
|
||||
"_N = mkN \"" ++ sana ++ "\" \"" ++ sanan ++
|
||||
"\" \"" ++ sanoja ++ "\" \"" ++ sanaa ++ "\" ;"
|
||||
_ -> return ()
|
||||
|
||||
-- to initiate from a noun list that has compounds
|
||||
|
||||
mkLex "N" 11 line = case words line of
|
||||
_:"--":_ -> return ()
|
||||
num:sana0:_ -> do
|
||||
let sana = uncompound sana0
|
||||
let nimi = "n" ++ init num ++ "_" ++ sana
|
||||
putStrLn $ "fun " ++ nimi ++ "_N : N ;"
|
||||
putStrLn $ "lin " ++ nimi ++ "_N = mkN \"" ++ sana ++ "\" ;"
|
||||
_ -> return ()
|
||||
|
||||
prQuoted s = concat ["\"",s,"\""]
|
||||
|
||||
-- from sora+tie to tie
|
||||
|
||||
uncompound = reverse . takeWhile (/= '+') . reverse
|
||||
25
lib/src/finnish/kotus/uusisuomi/MyDiff.hs
Normal file
25
lib/src/finnish/kotus/uusisuomi/MyDiff.hs
Normal file
@@ -0,0 +1,25 @@
|
||||
module Main where
|
||||
|
||||
import System
|
||||
|
||||
-- compare lines word-by-word, returning difference pairs with their positions
|
||||
|
||||
main = do
|
||||
x:y:_ <- getArgs
|
||||
old <- readFile x >>= return . lines
|
||||
new <- readFile y >>= return . lines
|
||||
mapM_ comp (zip old new)
|
||||
|
||||
comp (ws1,ws2) = do
|
||||
let diffs = [form ++ ":" ++ w1 ++ "-" ++ w2 |
|
||||
(form,(w1,w2)) <- zip forms (zip (words ws1) (words ws2)), diff w2 w1]
|
||||
putStr $ unwords diffs
|
||||
if null diffs then return () else putStrLn ""
|
||||
|
||||
forms = map show [1..]
|
||||
|
||||
diff w ws = notElem w (chop ws) where
|
||||
chop cs = case span (/='/') cs of
|
||||
([],_) -> []
|
||||
(w1,ww) -> w1:chop (drop 1 ww)
|
||||
|
||||
45
lib/src/finnish/kotus/uusisuomi/bootstrapping.txt
Normal file
45
lib/src/finnish/kotus/uusisuomi/bootstrapping.txt
Normal file
@@ -0,0 +1,45 @@
|
||||
1. write a word list - one noun per line, save in file correct-Foo.txt
|
||||
|
||||
2. create a first compilable grammar:
|
||||
|
||||
% runghc MkLex.hs 0 Foo >FooAbs.gf
|
||||
% runghc MkLex.hs 1 Foo >Foo1.gf
|
||||
|
||||
3. compile this and create a first full-form word list
|
||||
|
||||
% gf Foo1.gf
|
||||
> gt -cat=Utt | l | wf correct-Foo.txt
|
||||
|
||||
4. manually correct some singular genitive forms (the largest error source)
|
||||
|
||||
uutuus uutuuksen ... => uutuus uutuuden ...
|
||||
|
||||
5. create a second compilable grammar:
|
||||
|
||||
% runghc MkLex.hs 2 Foo >Foo2.gf
|
||||
|
||||
6. compile this into a second full-form word list
|
||||
|
||||
% gf Foo2.gf
|
||||
> gt -cat=Utt | l | wf correct-Foo.txt
|
||||
|
||||
7. manually correct the remaining partitive forms (mostly plural)
|
||||
|
||||
8. create yet another grammar:
|
||||
|
||||
% runghc MkLex.hs 4 Foo >Foo4.gf
|
||||
|
||||
9. compile this into yet another full-form word list
|
||||
|
||||
% gf Foo4.gf
|
||||
> gt -cat=Utt | l | wf correct-Foo.txt
|
||||
|
||||
10. manually correct any remaining errors (which should be rare now)
|
||||
|
||||
11. if relevant, run a test of the regularity of the vocabulary:
|
||||
|
||||
% export LEX=Foo ; make -e
|
||||
|
||||
|
||||
|
||||
|
||||
Reference in New Issue
Block a user