1
0
forked from GitHub/gf-core

started adding KOTUS verbs in Finnish

This commit is contained in:
aarne
2010-12-27 13:38:22 +00:00
parent 34e6ed3220
commit 90a9149a5c
10 changed files with 94708 additions and 3 deletions

View File

@@ -0,0 +1,14 @@
kayda_V =
let kay = Predef.tk 2 s ; kavi = init kay + "vi" in
vForms12 s (kay + "n") kay (kay + "vät") (kay + "kää") (kay + "dään")
(kavi + "n") kavi (kavi + "si") (kay + "nyt") (kay + "tty")
(kay + "nee") ;
tuntea_V =
let tunte = init s ; tunne = weakGrade tunte ; tuns = Predef.tk 2 tunte + "s" in
vForms12 s (tunne + "n") (tunte + "e") (tunte + "vat") (tunte + "kaa") (tunne + "taan")
(tuns + "in") (tuns + "i") (init tunte + "isi") (tunte + "nut") (tunne + "ttu")
(tunte + "nee") ;
nahda_V
tehda_V

View File

@@ -147,7 +147,7 @@ lin
policeman_N = mkN "poliisi" ;
priest_N = mkN "pappi" ;
probable_AS = mkAS --- for vowel harmony
(mkA (mkN "todennäköinen") "tonennäköisempi" "todennälöisin") ; ---- sta
(mkA (mkN "todennäköinen") "tonennäköisempi" "todennäköisin") ; ---- sta
queen_N = mkN "kuningatar" ;
radio_N = mk2N "radio" "radioita" ;
rain_V0 = mkV0 (mk2V "sataa" "satoi") ;

View File

@@ -643,6 +643,7 @@ resource MorphoFin = ResFin ** open Prelude in {
kuunnel = Predef.tk 2 kuunnella ;
kuuntel = Predef.tk 2 kuuntelin ;
u = uyHarmony a ;
l = last kuunnel
in vForms12
kuunnella
(kuuntel + "en")
@@ -653,9 +654,9 @@ resource MorphoFin = ResFin ** open Prelude in {
(kuuntel + "in")
(kuuntel + "i")
(kuuntel + "isi")
(kuunnel + "l" + u + "t")
(kuunnel + l + u + "t")
(kuunnel + "t" + u)
(kuunnel + "lee") ;
(kuunnel + l + "ee") ;
-- auxiliaries

View File

@@ -0,0 +1,28 @@
<!--
Copyright (C) Kotimaisten kielten tutkimuskeskus 2006
Kotimaisten kielten tutkimuskeskuksen nykysuomen sanalista, versio 1
Julkaistu 15.12.2006
Sanalista julkaistaan GNU LGPL -lisenssillä.
Lisenssiteksti luettavissa osoitteessa http://www.gnu.org/licenses/lgpl.html
Listaan perustuvien sovellusten mukana on aina toimitettava alkuperäinen
sanalista lukukelpoisessa muodossaan.
-->
<!ELEMENT kotus-sanalista (st*) >
<!ELEMENT st (s, hn?, t*) >
<!ELEMENT s (#PCDATA) >
<!ELEMENT hn (#PCDATA) >
<!ELEMENT t (tn, av?)* >
<!ATTLIST t taivutus CDATA #IMPLIED>
<!ELEMENT tn (#PCDATA) >
<!ELEMENT av (#PCDATA) >
<!ATTLIST av astevaihtelu CDATA #IMPLIED>

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,295 @@
--# -path=.:alltenses
resource Kotus = Declensions ** open MorphoFin,CatFin,Prelude in {
oper vowelHarmony = vowHarmony ;
oper
d01 : Str -> NForms -- 1780 öljy
= \s -> dUkko s (s + "n") ;
d01A : Str -> NForms -- 166 yökkö
= \s -> dUkko s (weakGrade s + "n") ;
d02 : Str -> NForms -- 1189 ääntely
= \s -> dSilakka s (s + "n") (s + "j" + getHarmony (last s)) ;
d03 : Str -> NForms -- 481 ääntiö
= \s -> dSilakka s (s + "n") (s + "it" + vowelHarmony s) ;
d04A : Str -> NForms -- 273 äpärikkö
= \s -> let ws = weakGrade s in
dSilakka s (ws + "n") (ws + "it" + getHarmony (last s)) ;
d05 : Str -> NForms -- 3212 öljymaali
= \s -> dPaatti s (s + "n") ;
d05A : Str -> NForms -- 1959 öylätti
= \s -> dPaatti s (weakGrade s + "n") ;
d06 : Str -> NForms -- 1231 öykkäri
= \s -> dTohtori s ;
d07 : Str -> NForms -- 81 vuoksi
= \s -> dArpi s (init s + "en") ;
d07A : Str -> NForms -- 70 väki
= \s -> dArpi s (init (weakGrade s) + "en") ;
d08 : Str -> NForms -- 99 à la carte
= \s -> dNukke s (s + "n") ;
d08A : Str -> NForms -- 5 vinaigrette
= \s -> dNukke s (weakGrade s + "n") ;
d09 : Str -> NForms -- 696 ääriraja
= \s -> let a = last s in dSilakka s
(s + "n")
(init s + case a of {"a" => "o" ; _ => "ö"} + "j" + a) ;
d09A : Str -> NForms -- 1040 ääniraita
= \s -> let a = last s in dSilakka s
(weakGrade s + "n")
(init s + case a of {"a" => "o" ; _ => "ö"} + "j" + a) ;
d10 : Str -> NForms -- 2119 äänittäjä
= \s -> dSilakka s (s + "n") (init s + "i" + vowelHarmony (last s)) ;
d10A : Str -> NForms -- 284 änkkä
= \s -> dSilakka s (weakGrade s + "n") (init s + "i" + vowelHarmony (last s)) ;
d11 : Str -> NForms -- 46 ödeema
= \s -> dSilakka s (weakGrade s + "n") (init s + "i" + vowelHarmony (last s)) ;
d12 : Str -> NForms -- 1125 örinä
= \s -> let a = vowelHarmony (last s) in
dSilakka s (s + "n")
(init s + case a of {"a" => "o" ; _ => "ö"} + "it" + a) ;
d13 : Str -> NForms -- 157 virtaska
= \s -> let a = vowelHarmony (last s) in
dSilakka s (s + "n")
(init s + case a of {"a" => "o" ; _ => "ö"} + "j" + a) ;
d14A : Str -> NForms -- 244 ötökkä
= \s -> let a = vowelHarmony (last s) ; ws = weakGrade s in
dSilakka s (ws + "n")
(init ws + case a of {"a" => "o" ; _ => "ö"} + "it" + a) ;
d15 : Str -> NForms -- 170 äreä
= dKorkea ;
d16 : Str -> NForms -- 2 kumpikin --?
= \s -> let kumpi = Predef.take 5 s ; kin = Predef.drop 5 s in
\\i => (dSuurempi kumpi ! i + kin) ;
d16A : Str -> NForms -- 20 ylempi
= dSuurempi ;
d17 : Str -> NForms -- 38 virkkuu
= dPaluu ;
d18 : Str -> NForms -- voi, tee, sää
= dPuu ;
d19 : Str -> NForms -- 6 yö
= dSuo ;
d20 : Str -> NForms -- 46 voodoo
= dPaluu ;
d21 : Str -> NForms -- 22 tax-free --? rosé
= dPuu ;
d22 : Str -> NForms -- 13 tournedos
= \s -> nForms10
s (s + "'n") (s + "'ta") (s + "'na") (s + "'hon")
(s + "'iden") (s + "'ita") (s + "'ina") (s + "'issa") (s + "'ihin") ;
d23 : Str -> NForms -- 9 vuohi
= \s -> dArpi s (init s + "en") ;
d24 : Str -> NForms -- 20 uni
= \s -> dArpi s (init s + "en") ;
d25 : Str -> NForms -- 9 tuomi
= \s -> dArpi s (init s + "en") ;
d26 : Str -> NForms -- 113 ääri
= \s -> dArpi s (init s + "en") ;
d27 : Str -> NForms -- 23 vuosi
= \s -> dArpi s (Predef.tk 2 s + "den") ;
d28 : Str -> NForms -- 13 virsi
= \s -> dArpi s (Predef.tk 2 s + "ren") ;
d28A : Str -> NForms -- 1 jälsi
= \s -> dArpi s (Predef.tk 2 s + "len") ;
d29 : Str -> NForms -- 1 lapsi
= \s -> let lapsi = dArpi s (init s + "en") in
table {2 => Predef.tk 3 s + "ta" ; i => lapsi ! i} ;
d30 : Str -> NForms -- 2 veitsi
= \s -> let lapsi = dArpi s (init s + "en") in
table {2 => Predef.tk 3 s + "stä" ; i => lapsi ! i} ;
d31 : Str -> NForms -- 3 yksi
= \s -> let
y = Predef.tk 3 s ;
a = vowelHarmony y
in nForms10
s (y + "hden") (y + "ht" + a) (y + "hten" + a) (y + "hteen")
(s + "en") (s + a) (s + "n" + a) (s + "ss" + a) (s + "in") ;
d32 : Str -> NForms -- 20 uumen
= \s -> dPiennar s (s + "en") ;
d32A : Str -> NForms -- 54 ystävätär
= \s -> dPiennar s (strongGrade (init s) + last s + "en") ;
d33 : Str -> NForms -- 168 väistin
= \s -> dLiitin s (init s + "men") ;
d33A : Str -> NForms -- 181 yllytin
= \s -> dLiitin s (strongGrade (init s) + "men") ;
d34 : Str -> NForms -- 1 alaston
= \s -> let alastom = init s in
nForms10
s (alastom + "an") (s + "ta") (alastom + "ana") (alastom + "aan")
(alastom + "ien") (alastom + "ia") (alastom + "ina") (alastom + "issa")
(alastom + "iin") ;
d34A : Str -> NForms -- 569 ääretön
= dOnneton ;
d35A : Str -> NForms -- 1 lämmin
= \s -> let lämpim = strongGrade (init s) + "m" in
nForms10
s (lämpim + "än") (s + "tä") (lämpim + "änä") (lämpim + "ään")
(lämpim + "ien") (lämpim + "iä") (lämpim + "inä") (lämpim + "issä")
(lämpim + "iin") ;
d36 : Str -> NForms -- 11 ylin
= dSuurin ;
d37 : Str -> NForms -- 1 vasen
= \s -> let vasem = init s + "m" in
nForms10
s (vasem + "man") (s + "ta") (vasem + "pana") (vasem + "paan")
(vasem + "pien") (vasem + "pia") (vasem + "pina") (vasem + "missa")
(vasem + "piin") ;
d38 : Str -> NForms -- 4195 öykkärimäinen
= dNainen ;
d39 : Str -> NForms -- 2730 örähdys
= dJalas ;
d40 : Str -> NForms -- 2482 öykkärimäisyys
= dLujuus ;
d41 : Str -> NForms -- 127 äyräs
= \s -> let is = init s in dRae s (is + last is + "n") ;
d41A : Str -> NForms -- 401 öljykangas
= \s -> let is = init s in dRae s (strongGrade is + last is + "n") ;
d42 : Str -> NForms -- 1 mies
= \s -> let mieh = init s + "s" in
nForms10
s (mieh + "en") (s + "tä") (mieh + "enä") (mieh + "een")
(s + "ten") (mieh + "iä") (mieh + "inä") (mieh + "issä")
(mieh + "iin") ;
d43 : Str -> NForms -- 11 tiehyt
= \s -> dRae s (init s + "en") ;
d43A : Str -> NForms -- 1 immyt
= \s -> dRae s (strongGrade (init s) + "en") ;
d44 : Str -> NForms -- 1 kevät
= \s -> let kevä = init s in
nForms10
s (kevä + "än") (s + "tä") (kevä + "änä") (kevä + "äseen")
(s + "iden") (kevä + "itä") (kevä + "inä") (kevä + "issä")
(kevä + "isiin") ;
d45 : Str -> NForms -- 23 yhdes
= \s -> let yhde = init s ; a = vowelHarmony s in
nForms10
s (yhde + "nnen") (yhde + "tt" + a) (yhde + "nten" + a) (yhde + "nteen")
(yhde + "nsien") (yhde + "nsi" + a) (yhde + "nsin" + a) (yhde + "nsiss" + a)
(yhde + "nsiin") ;
d46 : Str -> NForms -- 1 tuhat
= \s -> let tuha = init s ; a = vowelHarmony s in
nForms10
s (tuha + "nnen") (tuha + "tt" + a) (tuha + "nten" + a) (tuha + "nteen")
(tuha + "nsien") (tuha + "nsi" + a) (tuha + "nsin" + a) (tuha + "nsiss" + a)
(tuha + "nsiin") ;
d47 : Str -> NForms -- 46 ylirasittunut
= dOttanut ;
d48 : Str -> NForms -- 346 äpäre
= \s -> dRae s (s + "en") ;
d48A : Str -> NForms -- 481 äänne
= \s -> dRae s (strongGrade s + "en") ;
d49 : Str -> NForms -- 31 vempele
= \s -> case last s of {
"e" => dRae s (s + "en") ;
_ => dPiennar s (s + "en")
} ;
d49A : Str -> NForms -- 11 vemmel
= \s -> dPiennar s (strongGrade (init s) + "len") ;
{-
d50 : Str -> NForms -- 520 vääräsääri
= \s -> ;
d51 : Str -> NForms -- 62 vierasmies
= \s -> ;
-}
c52 : Str -> VForms -- 667 ärjyä
= \s -> cHukkua s (init s + "n") ;
c52A : Str -> VForms -- 1568 öljyyntyä
= \s -> cHukkua s (weakGrade (init s) + "n") ;
c53 : Str -> VForms -- 605 äänestää
= \s -> let ott = Predef.tk 2 s in
cOttaa s (init s + "n") (ott + "in") (ott + "i") ;
c53A : Str -> VForms -- 2121 örähtää
= \s -> let ota = weakGrade (init s) in
cOttaa s (ota + "n") (init ota + "in") (Predef.tk 2 s + "i") ;
c54 : Str -> VForms -- 2 pieksää
= \s -> let ott = Predef.tk 2 s in
cOttaa s (init s + "n") (ott + "in") (ott + "i") ;
c54A : Str -> VForms -- 316 ääntää
= \s -> let ota = weakGrade (init s) ; o = Predef.tk 2 ota in
cOttaa s (ota + "n") (o + "sin") (o + "si") ;
c55A : Str -> VForms -- 7 yltää
= c54A ; --? diff: variation ylti/ylsi
c56 : Str -> VForms -- 22 valaa
= \s -> let val = Predef.tk 2 s in
cOttaa s (init s + "n") (val + "oin") (val + "oi") ; -- never ö
c56A : Str -> VForms -- 28 virkkaa
= \s -> let ota = weakGrade (init s) ; ot = init ota in
cOttaa s (ota + "n") (ot + "oin") (ot + "oi") ;
c57A : Str -> VForms -- 3 saartaa
= c56A ; --? diff: saartoi/saarsi
c58 : Str -> VForms -- 13 suitsea
= \s -> cHukkua s (init s + "n") ;
c58A : Str -> VForms -- 19 tunkea
= \s -> cHukkua s (weakGrade (init s) + "n") ;
c59A : Str -> VForms -- 1 tuntea
= \s -> let tunte = init s ; tunne = weakGrade tunte ; tuns = Predef.tk 2 tunte + "s" in
vForms12 s (tunne + "n") (tunte + "e") (tunte + "vat") (tunte + "kaa") (tunne + "taan")
(tuns + "in") (tuns + "i") (init tunte + "isi") (tunte + "nut") (tunne + "ttu")
(tunte + "nee") ; -- just one verb
c60A : Str -> VForms -- 1 lähteä
= c58A ; --? diff lähti/läksi, just one verb
c61 : Str -> VForms -- 249 äyskiä
= \s -> cHukkua s (init s + "n") ;
c61A : Str -> VForms -- 153 vääntelehtiä
= \s -> cHukkua s (weakGrade (init s) + "n") ;
c62 : Str -> VForms -- 684 öykkäröidä
= \s -> cJuoda s ;
c63 : Str -> VForms -- 3 saada
= c62 ;
c64 : Str -> VForms -- 8 viedä
= c62 ;
c65 : Str -> VForms -- 1 käydä
= \s -> let kay = Predef.tk 2 s ; kavi = init kay + "vi" in
vForms12 s (kay + "n") kay (kay + "vät") (kay + "kää") (kay + "dään")
(kavi + "n") kavi (kavi + "si") (kay + "nyt") (kay + "tty")
(kay + "nee") ; -- just one verb
c66 : Str -> VForms -- 268 öristä
= \s -> cKuunnella s (Predef.tk 2 s + "in") ;
c66A : Str -> VForms -- 3 vavista
= \s -> cKuunnella s (strongGrade (Predef.tk 3 s) + "sin") ;
c67 : Str -> VForms -- 704 ällistellä
= \s -> cKuunnella s (Predef.tk 2 s + "in") ;
c67A : Str -> VForms -- 634 äännellä
= \s -> cKuunnella s (strongGrade (Predef.tk 3 s) + "lin") ;
c68 : Str -> VForms -- 49 viheriöidä
= c62 ; -- diff viheriöin/viheriöitsen
c69 : Str -> VForms -- 48 villitä
= \s -> cHarkita s ;
c70 : Str -> VForms -- 3 syöstä
= \s -> cJuosta s (Predef.tk 3 s + "ksen") ;
c71 : Str -> VForms -- 2 tehdä
= \s -> let te = Predef.tk 3 s in
vForms12 s (te + "en") (te + "kee") (te + "kevät") (te + "hkää") (te + "hdään")
(te + "en") (te + "ki") (te + "kisi") (te + "hnyt") (te + "hty")
(te + "hnee") ; -- just two verbs: nähdä, tehdä
c72 : Str -> VForms -- 93 yletä
= \s -> cValjeta s (Predef.tk 2 s + "ni") ;
c72A : Str -> VForms -- 52 yhdetä
= \s -> cValjeta s (strongGrade (Predef.tk 2 s) + "ni") ;
{-
c73 : Str -> VForms -- 600 äkseerata
= \s -> ;
c73A : Str -> VForms -- 313 änkätä
= \s -> ;
c74 : Str -> VForms -- 99 öljytä
= \s -> ;
c74A : Str -> VForms -- 72 ängetä
= \s -> ;
c75 : Str -> VForms -- 39 viritä
= \s -> ;
c75A : Str -> VForms -- 9 siitä
= \s -> ;
c76A : Str -> VForms -- 2 tietää
= \s -> ;
c77 : Str -> VForms -- 3 vipajaa
= \s -> ;
c78 : Str -> VForms -- 31 ähkää
= \s -> ;
c78A : Str -> VForms -- 1 tuikkaa
= \s -> ;
c99 : Str -> VForms -- 5453 öykkärimäisesti
= \s -> ;
-}
}

View File

@@ -0,0 +1,54 @@
LEX=Omat
CAT=N
.PHONY: kotus
all: nouns
verbs:
export CAT=V ; export LEX=VNSSK ; make -e tests
export CAT=V ; export LEX=VOmat ; make -e tests
export CAT=V ; export LEX=VSwadesh ; make -e tests
export CAT=V ; export LEX=VDictionary ; make -e tests
cat all-diff-V* >all-differences-V
cat all-differences-V
nouns:
# export LEX=NSSK ; make -e tests
# export LEX=Omat ; make -e tests
export LEX=Swadesh ; make -e tests
export LEX=Dictionary ; make -e tests
export LEX=Duodecim ; make -e tests
export LEX=Aino ; make -e tests
cat all-diff-* >all-differences
cat all-differences
kotus:
export LEX=KOTUS ; make -e tests
cat all-diff-KOTUS
CSC:
export LEX=NCSC ; make -e tests
cat all-diff-NCSC
gf-files:
runghc MkLex.hs 0 $(CAT) $(LEX) > $(LEX)Abs.gf
# runghc MkLex.hs 1 $(CAT) $(LEX) > $(LEX)1.gf
runghc MkLex.hs 2 $(CAT) $(LEX) > $(LEX)2.gf
runghc MkLex.hs 3 $(CAT) $(LEX) > $(LEX)3.gf
runghc MkLex.hs 4 $(CAT) $(LEX) > $(LEX)4.gf
experiments: gf-files
# echo "gt -cat=Utt | l | wf exper1-$(LEX).txt" | gf -s $(LEX)1.gf
echo "gt -cat=Utt | l | wf exper2-$(LEX).txt" | gf -s $(LEX)2.gf
echo "gt -cat=Utt | l | wf exper3-$(LEX).txt" | gf -s $(LEX)3.gf
echo "gt -cat=Utt | l | wf exper4-$(LEX).txt" | gf -s $(LEX)4.gf
tests: experiments
runghc MyDiff.hs correct-$(LEX).txt exper1-$(LEX).txt >diff1-$(LEX).txt
runghc MyDiff.hs correct-$(LEX).txt exper2-$(LEX).txt >diff2-$(LEX).txt
runghc MyDiff.hs correct-$(LEX).txt exper3-$(LEX).txt >diff3-$(LEX).txt
runghc MyDiff.hs correct-$(LEX).txt exper4-$(LEX).txt >diff4-$(LEX).txt
date >all-diff-$(LEX)
echo $(LEX) >>all-diff-$(LEX)
wc -l diff?-$(LEX).txt >>all-diff-$(LEX)

View File

@@ -0,0 +1,118 @@
module Main where
import System
import Char
-- generate Finnish lexicon implementations with 1 or more
-- characteristic arguments
-- usage: runghc MkLex.hs 3 cat name
main = do
i:cat:tgt:_ <- getArgs
let src = "correct-" ++ tgt ++ ".txt"
ss <- readFile src >>= return . filter (not . (all isSpace)) . lines
initiate tgt cat i
mapM_ (mkLex cat (read i) . uncurry (++)) (zip nums ss)
putStrLn "}"
initiate tgt cat i = mapM_ putStrLn [
"--# -path=.:alltenses",
"",
header i,
""
]
where
header i = case i of
"0" -> unlines [
"abstract " ++ tgt ++ "Abs = Cat ** {",
"fun testN : N -> Utt ;",
"fun testV : V -> Utt ;"
]
_ -> unlines [
"concrete " ++ tgt ++ i ++
" of " ++ tgt ++
"Abs = CatFin ** open Nominal, Verbal, ResFin, Prelude in {",
"",
"lin testN = showN ;",
"lin testV = showV ;"
]
nums = map prt [10001 ..] where
---- prt i = (if i < 10 then "0" else "") ++ show i ++ ". "
prt i = show i ++ ". "
-- W is the flag for mixed-class word lists
mkLex "W" 0 line = case words line of
num:cat:sana:_ -> do
let nimi = "n" ++ init num ++ "_" ++ sana
putStrLn $ "fun " ++ nimi ++ "_" ++ cat ++ " : " ++ cat ++ " ;"
_ -> return ()
mkLex "W" 1 line = case words line of
num:cat:sanat@(sana:_) -> do
let nimi = "n" ++ init num ++ "_" ++ sana
putStrLn $ "lin " ++ nimi ++
"_" ++ cat ++ " = mk" ++ cat ++ " " ++
unwords (map prQuoted sanat) ++" ;"
_ -> return ()
mkLex cat 0 line = case words line of
num:sana:_ -> do
let nimi = "n" ++ init num ++ "_" ++ sana
putStrLn $ "fun " ++ nimi ++ "_" ++ cat ++ " : " ++ cat ++ " ;"
_ -> return ()
mkLex cat 1 line = case words line of
num:sana:_ -> do
let nimi = "n" ++ init num ++ "_" ++ sana
putStrLn $ "lin " ++ nimi ++
"_" ++ cat ++ " = mk" ++ cat ++ " \"" ++ sana ++ "\" ;"
_ -> return ()
mkLex "V" _ line = case words line of
num:sana:_:_:_:_:_:_:sanan:_ -> do
let nimi = "n" ++ init num ++ "_" ++ sana
putStrLn $ "lin " ++ nimi ++
"_V = mkV \"" ++ sana ++ "\" \"" ++ sanan ++ "\" ;"
_ -> return ()
mkLex "N" 2 line = case words line of
-- num:sana:sanan:_ -> do
num:sana:_:_:_:_:_:sanan:_ -> do
let nimi = "n" ++ init num ++ "_" ++ sana
putStrLn $ "lin " ++ nimi ++
"_N = mkN \"" ++ sana ++ "\" \"" ++ sanan ++ "\" ;"
_ -> return ()
mkLex "N" 3 line = case words line of
---- num:sana:sanan:sanoja:_ -> do
num:sana:sanan:_:_:_:_:sanoja:_ -> do
let nimi = "n" ++ init num ++ "_" ++ sana
putStrLn $ "lin " ++ nimi ++
"_N = mkN \"" ++ sana ++ "\" \"" ++ sanan ++ "\" \"" ++ sanoja ++ "\" ;"
_ -> return ()
mkLex "N" 4 line = case words line of
num:sana:sanan:sanaa:_:_:_:sanoja:_ -> do
let nimi = "n" ++ init num ++ "_" ++ sana
putStrLn $ "lin " ++ nimi ++
"_N = mkN \"" ++ sana ++ "\" \"" ++ sanan ++
"\" \"" ++ sanoja ++ "\" \"" ++ sanaa ++ "\" ;"
_ -> return ()
-- to initiate from a noun list that has compounds
mkLex "N" 11 line = case words line of
_:"--":_ -> return ()
num:sana0:_ -> do
let sana = uncompound sana0
let nimi = "n" ++ init num ++ "_" ++ sana
putStrLn $ "fun " ++ nimi ++ "_N : N ;"
putStrLn $ "lin " ++ nimi ++ "_N = mkN \"" ++ sana ++ "\" ;"
_ -> return ()
prQuoted s = concat ["\"",s,"\""]
-- from sora+tie to tie
uncompound = reverse . takeWhile (/= '+') . reverse

View File

@@ -0,0 +1,25 @@
module Main where
import System
-- compare lines word-by-word, returning difference pairs with their positions
main = do
x:y:_ <- getArgs
old <- readFile x >>= return . lines
new <- readFile y >>= return . lines
mapM_ comp (zip old new)
comp (ws1,ws2) = do
let diffs = [form ++ ":" ++ w1 ++ "-" ++ w2 |
(form,(w1,w2)) <- zip forms (zip (words ws1) (words ws2)), diff w2 w1]
putStr $ unwords diffs
if null diffs then return () else putStrLn ""
forms = map show [1..]
diff w ws = notElem w (chop ws) where
chop cs = case span (/='/') cs of
([],_) -> []
(w1,ww) -> w1:chop (drop 1 ww)

View File

@@ -0,0 +1,45 @@
1. write a word list - one noun per line, save in file correct-Foo.txt
2. create a first compilable grammar:
% runghc MkLex.hs 0 Foo >FooAbs.gf
% runghc MkLex.hs 1 Foo >Foo1.gf
3. compile this and create a first full-form word list
% gf Foo1.gf
> gt -cat=Utt | l | wf correct-Foo.txt
4. manually correct some singular genitive forms (the largest error source)
uutuus uutuuksen ... => uutuus uutuuden ...
5. create a second compilable grammar:
% runghc MkLex.hs 2 Foo >Foo2.gf
6. compile this into a second full-form word list
% gf Foo2.gf
> gt -cat=Utt | l | wf correct-Foo.txt
7. manually correct the remaining partitive forms (mostly plural)
8. create yet another grammar:
% runghc MkLex.hs 4 Foo >Foo4.gf
9. compile this into yet another full-form word list
% gf Foo4.gf
> gt -cat=Utt | l | wf correct-Foo.txt
10. manually correct any remaining errors (which should be rare now)
11. if relevant, run a test of the regularity of the vocabulary:
% export LEX=Foo ; make -e