From 19d635046c7315d9a0325f340d1837fce797c69e Mon Sep 17 00:00:00 2001 From: aarne Date: Sun, 26 Jun 2005 19:40:31 +0000 Subject: [PATCH] Finnish close to production ; Predef.occurs --- lib/prelude/Predef.gf | 1 + lib/resource/finnish/BasicFin.gf | 68 +++++++------- lib/resource/finnish/MorphoFin.gf | 51 +---------- lib/resource/finnish/NumeralsFin.gf | 4 +- lib/resource/finnish/ParadigmsFin.gf | 129 +++++++++++++++++++++++---- src/GF/Compile/MkConcrete.hs | 4 +- src/GF/Grammar/AppPredefined.hs | 8 +- src/Makefile | 3 +- 8 files changed, 158 insertions(+), 110 deletions(-) diff --git a/lib/prelude/Predef.gf b/lib/prelude/Predef.gf index 0b93b7f57..11d094793 100644 --- a/lib/prelude/Predef.gf +++ b/lib/prelude/Predef.gf @@ -20,6 +20,7 @@ resource Predef = { oper plus : Int -> Int -> Int = variants {} ; -- add integers oper eqStr : Tok -> Tok -> PBool = variants {} ; -- test if equal strings oper occur : Tok -> Tok -> PBool = variants {} ; -- test if occurs as substring + oper occurs : Tok -> Tok -> PBool = variants {} ; -- test if any char occurs oper show : (P : Type) -> P -> Tok = variants {} ; -- convert param to string oper read : (P : Type) -> Tok -> P = variants {} ; -- convert string to param oper toStr : (L : Type) -> L -> Str = variants {} ; -- find the "first" string diff --git a/lib/resource/finnish/BasicFin.gf b/lib/resource/finnish/BasicFin.gf index 114b9974d..9bec4c63f 100644 --- a/lib/resource/finnish/BasicFin.gf +++ b/lib/resource/finnish/BasicFin.gf @@ -9,40 +9,40 @@ flags lin + airplane_N = regN "lentokone" ; +---- answer_V2S = mkV2S (regV "answer") "to" ; + apartment_N = reg3N "asunto" "asunnon" "asuntoja" ; + apple_N = reg3N "omena" "omenan" "omenoita" ; + art_N = reg3N "taide" "taiteen" "taiteita" ; +---- ask_V2Q = mkV2Q (regV "ask") [] ; + baby_N = regN "vauva" ; + bad_ADeg = mkADeg (regN "paha") "pahempi" "pahin" ; + bank_N = reg3N "pankki" "pankin" "pankkeja" ; + beautiful_ADeg = mkADeg (regN "kaunis") "kauniimpi" "kaunein" ; +---- become_VA = mkVA (irregV "become" "became" "become") ; + beer_N = reg3N "olut" "oluen" "oluita" ; +---- beg_V2V = mkV2V (regDuplV "beg") [] "to" ; + big_ADeg = mkADeg (reg3N "suuri" "suuren" "suuria") "suurempi" "suurin" ; + bike_N = regN "polkupyörä" ; + bird_N = reg3N "lintu" "linnun" "lintuja" ; + black_ADeg = mkADeg (reg3N "musta" "mustan" "mustia") "mustempi" "mustin" ; + blue_ADeg = mkADeg (regN "sininen") "sinisempi" "sinisin" ; + boat_N = regN "vene" ; + book_N = regN "kirja" ; + boot_N = reg3N "saapas" "saappaan" "saappaita" ; + boss_N = regN "pomo" ; + boy_N = reg3N "poika" "pojan" "poikia" ; + bread_N = reg3N "leipä" "leivän" "leipiä" ; + break_V2 = dirV2 (reg3V "rikkoa" "rikon" "rikoin") ; + broad_ADeg = mkADeg (regN "leveä") "leveämpi" "levein" ; +---- brother_N2 = regN2 "brother" ; + brown_ADeg = mkADeg (regN "ruskea") "ruskeampi" "ruskein" ; + butter_N = reg3N "voi" "voin" "voita" ; + buy_V2 = dirV2 (regV "ostaa") ; + camera_N = regN "kamera" ; + cap_N = reg3N "lakki" "lakin" "lakkeja" ; + car_N = regN "auto" ; {- - airplane_N = regN "airplane" ; - answer_V2S = mkV2S (regV "answer") "to" ; - apartment_N = regN "apartment" ; - apple_N = regN "apple" ; - art_N = regN "art" ; - ask_V2Q = mkV2Q (regV "ask") [] ; - baby_N = regN "baby" ; - bad_ADeg = regADeg "bad" ; - bank_N = regN "bank" ; - beautiful_ADeg = regADeg "beautiful" ; - become_VA = mkVA (irregV "become" "became" "become") ; - beer_N = regN "beer" ; - beg_V2V = mkV2V (regDuplV "beg") [] "to" ; - big_ADeg = regADeg "big" ; - bike_N = regN "bike" ; - bird_N = regN "bird" ; - black_ADeg = regADeg "black" ; - blue_ADeg = regADeg "blue" ; - boat_N = regN "boat" ; - book_N = regN "book" ; - boot_N = regN "boot" ; - boss_N = regN "boss" ; - boy_N = regN "boy" ; - bread_N = regN "bread" ; - break_V2 = dirV2 (irregV "break" "broke" "broken") ; - broad_ADeg = regADeg "broad" ; - brother_N2 = regN2 "brother" ; - brown_ADeg = regADeg "brown" ; - butter_N = regN "butter" ; - buy_V2 = dirV2 (irregV "buy" "bought" "bought") ; - camera_N = regN "camera" ; - cap_N = regN "cap" ; - car_N = regN "car" ; carpet_N = regN "carpet" ; cat_N = regN "cat" ; ceiling_N = regN "ceiling" ; @@ -68,7 +68,7 @@ lin dog_N = regN "dog" ; door_N = regN "door" ; -} - drink_V2 = tvDir (regV "juoda") ; + drink_V2 = dirV2 (regV "juoda") ; {- easy_A2V = mkA2V (regA "easy") "for" ; eat_V2 = dirV2 (irregV "eat" "ate" "eaten") ; diff --git a/lib/resource/finnish/MorphoFin.gf b/lib/resource/finnish/MorphoFin.gf index 77a28539a..a76264a33 100644 --- a/lib/resource/finnish/MorphoFin.gf +++ b/lib/resource/finnish/MorphoFin.gf @@ -419,55 +419,6 @@ oper (suurimpi + a) (suurimpi + "in") ; --- The almost-regular heuristic analyses three forms. - -reg3Noun : (_,_,_ : Str) -> CommonNoun = \vesi,veden,vesiä -> - let - esi = Predef.dp 3 vesi ; -- analysis: suffixes - si = Predef.dp 2 esi ; - i = last si ; - s = init si ; - den = Predef.dp 3 veden ; - d = Predef.tk 2 den ; - esiä = Predef.dp 4 vesiä ; - a = last vesiä ; - ves = init vesi ; -- synthesis: prefixes - ve = init ves ; - ved = Predef.tk 2 veden ; - sRae_vesi = sRae vesi (veden + a) ; - in - case si of { - "aa" | "ee" | "ii" | "oo" | "uu" | "yy" | "ää" | "öö" => sPuu vesi ; - "ie" | "uo" | "yö" => sSuo vesi ; - "is" => sNauris (vesi + ("t" + a)) ; - "ut" | "yt" => sRae vesi (ves + ("en" + a)) ; - "us" | "ys" => - ifTok CommonNoun d "s" - (sTilaus vesi (veden + a)) - (sRakkaus vesi) ; - _ => case esi of { - "nen" => sNainen (Predef.tk 3 vesi + ("st" + a)) ; - _ => case esiä of { - "oita" | "öitä" => sPeruna vesi ; - _ => case den of { - "een" => sRae_vesi ; - _ => case i of { - "a" | "ä" => sKukko vesi veden vesiä ; - "i" => case (last (init vesiä)) of { - "i" => case s of { - "s" => sSusi vesi veden (ve + ("ten" + a)) ; - _ => sKorpi vesi veden (veden + a) - } ; - _ => sBaari (vesi + a) - } ; - "o" | "u" | "y" | "ö" => sKukko vesi veden vesiä ; - _ => sLinux (vesi + "i" + a) - } - } - } - } - } ; - -- This auxiliary resolves vowel harmony from a given letter. getHarmony : Str -> Str = \u -> case u of { @@ -917,7 +868,7 @@ caseTable : Number -> CommonNoun -> Case => Str = \n,cn -> -- For "poistaa", "ryystää". vPoistaa : Str -> Verb = \poistaa -> - vOttaa poistaa ((Predef.tk 2 poistaa + "n")) ; + vOttaa poistaa ((Predef.tk 1 poistaa + "n")) ; -- For "osata", "lisätä" diff --git a/lib/resource/finnish/NumeralsFin.gf b/lib/resource/finnish/NumeralsFin.gf index 98ecdb5fe..a15feeb70 100644 --- a/lib/resource/finnish/NumeralsFin.gf +++ b/lib/resource/finnish/NumeralsFin.gf @@ -14,8 +14,8 @@ lin n3 = mkSubst "a" "kolme" "kolme" "kolme" "kolmea" "kolmeen" "kolmi" "kolmi" "kolmien" "kolmia" "kolmiin" ; n4 = regN "neljä" ; - n5 = reg3Noun "viisi" "viiden" "viisiä" ; - n6 = reg3Noun "kuusi" "kuuden" "kuutta" ; + n5 = reg3N "viisi" "viiden" "viisiä" ; + n6 = reg3N "kuusi" "kuuden" "kuusia" ; n7 = mkSubst "ä" "seitsemän" "seitsemä" "seitsemä" "seitsemää" "seitsemään" "seitsemi" "seitsemi" "seitsemien" "seitsemiä" "seitsemiin" ; diff --git a/lib/resource/finnish/ParadigmsFin.gf b/lib/resource/finnish/ParadigmsFin.gf index 099e7508f..b911a2fa4 100644 --- a/lib/resource/finnish/ParadigmsFin.gf +++ b/lib/resource/finnish/ParadigmsFin.gf @@ -62,6 +62,10 @@ oper regN : (talo : Str) -> N ; +-- The almost-regular heuristic analyses three forms. + + reg3N : (vesi,veden,vesiä : Str) -> N ; + -- Nouns with partitive "a"/"ä" are a large group. -- To determine for grade and vowel alternation, three forms are usually needed: -- singular nominative and genitive, and plural partitive. @@ -181,6 +185,9 @@ oper mkADeg : (kiva : N) -> (kivempaa,kivinta : Str) -> ADeg ; +--- Unfortunately, this function expands to enormous size. + +---- regADeg : (suuri : Str) -> ADeg ; --2 Verbs -- @@ -190,6 +197,7 @@ oper mkV : (tulla,tulee,tulen,tulevat,tulkaa,tullaan, tuli,tulin,tulisi,tullut,tultu,tullun : Str) -> V ; regV : (soutaa : Str) -> V ; + reg3V : (soutaa,soudan,soudin : Str) -> V ; -- A simple special case is the one with just one stem and no grade alternation. @@ -228,12 +236,12 @@ oper -- If both are empty, the following special function can be used. - tvCase : V -> Case -> V2 ; + caseV2 : V -> Case -> V2 ; -- Verbs with a direct (accusative) object -- are special, since their complement case is finally decided in syntax. - tvDir : V -> V2 ; + dirV2 : V -> V2 ; -- The definitions should not bother the user of the API. So they are -- hidden from the document. @@ -267,8 +275,7 @@ regN = \vesi -> si = Predef.dp 2 esi ; i = last si ; s = init si ; - occ : Str -> Bool = \a -> pbool2bool (Predef.occur a vesi) ; - a = if_then_Str (orB (occ "a") (orB (occ "o") (occ "u"))) "a" "ä" ; + a = if_then_Str (pbool2bool (Predef.occurs "aou" vesi)) "a" "ä" ; ves = init vesi ; -- synthesis: prefixes ve = init ves ; in @@ -279,20 +286,71 @@ regN = \vesi -> _ => case si of { "aa" | "ee" | "ii" | "oo" | "uu" | "yy" | "ää" | "öö" => sPuu vesi ; "ie" | "uo" | "yö" => sSuo vesi ; + "ea" | "eä" => + mkNoun + vesi (vesi + "n") (vesi + "n"+a) (vesi + a) (vesi + a+"n") + (ves + "in"+a) (ves + "iss"+a) (ves + "iden") (ves + "it"+a) + (ves + "isiin") ; "is" => sNauris (vesi + ("t" + a)) ; "ut" | "yt" => sRae vesi (ves + ("en" + a)) ; "uus" | "yys" => sRakkaus vesi ; "us" | "ys" => sTilaus vesi (ves + ("ksen" + a)) ; _ => case i of { "i" => sBaari (vesi + a) ; - "e" => sRae vesi (strongGrade ves + ("een" + a)) ; + "e" => sRae vesi (ves + ("een" + a)) ; +---- "e" => sRae vesi (strongGrade ves + ("een" + a)) ; "a" | "o" | "u" | "y" | "ä" | "ö" => sLukko vesi ; _ => sLinux (vesi + "i" + a) } } } ** {g = NonHuman ; lock_N = <>} ; - +reg3N = \vesi,veden,vesiä -> + let + esi = Predef.dp 3 vesi ; -- analysis: suffixes + si = Predef.dp 2 esi ; + i = last si ; + s = init si ; + den = Predef.dp 3 veden ; + d = Predef.tk 2 den ; + esiä = Predef.dp 4 vesiä ; + a = last vesiä ; + ves = init vesi ; -- synthesis: prefixes + ve = init ves ; + ved = Predef.tk 2 veden ; + sRae_vesi = sRae vesi (veden + a) ; + in + case si of { + "aa" | "ee" | "ii" | "oo" | "uu" | "yy" | "ää" | "öö" => sPuu vesi ; + "ie" | "uo" | "yö" => sSuo vesi ; + "is" => sNauris (vesi + ("t" + a)) ; + "ut" | "yt" => sRae vesi (ves + ("en" + a)) ; + "us" | "ys" => + ifTok CommonNoun d "s" + (sTilaus vesi (veden + a)) + (sRakkaus vesi) ; + _ => case esi of { + "nen" => sNainen (Predef.tk 3 vesi + ("st" + a)) ; + _ => case esiä of { + "oita" | "öitä" => sPeruna vesi ; + _ => case den of { + "een" => sRae_vesi ; + _ => case i of { + "a" | "ä" => sKukko vesi veden vesiä ; + "i" => case (last (init vesiä)) of { + "i" => case s of { + "s" => sSusi vesi veden (ve + ("ten" + a)) ; + _ => sKorpi vesi veden (veden + a) + } ; + _ => sBaari (vesi + a) + } ; + "o" | "u" | "y" | "ö" => sKukko vesi veden vesiä ; + _ => sLinux (vesi + "i" + a) + } + } + } + } + } ** {g = NonHuman ; lock_N = <>} ; nKukko = \a,b,c -> sKukko a b c ** {g = nonhuman ; lock_N = <>} ; @@ -322,28 +380,65 @@ regN = \vesi -> mkA = \x -> noun2adj x ** {lock_A = <>} ; mkA2 = \x,c -> mkA x ** {c = NPCase c ; lock_A2 = <>} ; - mkADeg x y z = regAdjDegr x y z ** {lock_ADeg = <>} ; + mkADeg x kivempi kivin = + let + a = last (x.s ! ((NCase Sg Part))) ; ---- gives "kivinta" + kivempaa = init kivempi + a + a ; + kivinta = kivin + "t" + a + in + regAdjDegr x kivempaa kivinta ** {lock_ADeg = <>} ; + +{- ---- + regADeg suuri = + let suur = regN suuri in + mkADeg + suur + (init (suur.s ! NCase Sg Gen) + "mpi") + (init (suur.s ! NCase Pl Ess)) ; +-} mkV a b c d e f g h i j k l = mkVerb a b c d e f g h i j k l ** {lock_V = <>} ; regV soutaa = let taa = Predef.dp 3 soutaa ; + ta = init taa ; aa = Predef.dp 2 taa ; ---- souda = weakGrade (init soutaa) ; + soudan = Predef.tk 2 soutaa + "en" ; juo = Predef.tk 2 soutaa ; o = Predef.dp 1 juo ; + a = last aa ; + u = ifTok Str a "a" "u" "y" ; joi = Predef.tk 2 juo + (o + "i") - in case taa of { - "taa" | "tää" => vPoistaa soutaa ; ---- "taa" | "tää" => vOttaa soutaa (souda + "n") ; ---- "sta" | "stä" => vJuosta ottaa otan ottanut otettu ---- "nna" | "nnä" => vJuosta ottaa otan ottanut otettu + in case ta of { + "ta" | "tä" => vPoistaa soutaa ; + "st" => vJuosta soutaa soudan (juo + "s"+u+"t") (juo + "t"+u) ; + "nn" | "rr" | "ll" => vJuosta soutaa soudan (juo + o+u+"t") (juo + "t"+u) ; _ => case aa of { "da" | "dä" => vJuoda soutaa joi ; "ta" | "tä" => vOsata soutaa ; _ => vSanoa soutaa ---- _ => vHukkua soutaa souda + }} ** {lock_V = <>} ; + +reg3V soutaa soudan soudin = + let + taa = Predef.dp 3 soutaa ; + ta = init taa ; + aa = Predef.dp 2 taa ; + souda = init soudan ; + juo = Predef.tk 2 soutaa ; + o = last juo ; + a = last aa ; + u = ifTok Str a "a" "u" "y" ; + joi = init soudin + in case ta of { + "ta" | "tä" => vOttaa soutaa soudan ; + "st" => vJuosta soutaa soudan (juo + "s"+u+"t") (juo + "t"+u) ; + "nn" | "rr" | "ll" => vJuosta soutaa soudan (juo + o+u+"t") (juo + "t"+u) ; + _ => case aa of { + "da" | "dä" => vJuoda soutaa joi ; + "ta" | "tä" => vOsata soutaa ; + _ => vHukkua soudan souda }} ** {lock_V = <>} ; vValua v = vSanoa v ** {lock_V = <>} ; @@ -354,8 +449,8 @@ regV soutaa = vOlla = verbOlla ** {lock_V = <>} ; vEi = verbEi ** {lock_V = <>} ; ----- mkV2 = \v,c,p,o -> v ** {s3 = p ; s4 = o ; c = c ; lock_V2 = <>} ; - tvCase = \v,c -> mkV2 v c [] [] ; - tvDir v = mkTransVerbDir v ** {lock_V2 = <>} ; + mkV2 = \v,c,p,o -> v ** {s3 = p ; s4 = o ; c = CCase c ; lock_V2 = <>} ; + caseV2 = \v,c -> mkV2 v c [] [] ; + dirV2 v = mkTransVerbDir v ** {lock_V2 = <>} ; } ; diff --git a/src/GF/Compile/MkConcrete.hs b/src/GF/Compile/MkConcrete.hs index 7d228de39..d96e3e774 100644 --- a/src/GF/Compile/MkConcrete.hs +++ b/src/GF/Compile/MkConcrete.hs @@ -63,8 +63,8 @@ mkCncGroups (res,files) = do putStrLnFlush $ "Going to preprocess examples in " ++ unwords files putStrLn $ "Compiling resource " ++ res egr <- appIOE $ - optFile2grammar (options - [useOptimizer "share",fromSource,beSilent,notEmitCode]) res --- for -mcfg + optFile2grammar (options [beSilent]) res + -- [useOptimizer "share",fromSource,beSilent,notEmitCode]) res --- for -mcfg gr <- err (\s -> putStrLn s >> error "resource file rejected") return egr let parser cat = errVal ([],"No parse") . optParseArgErrMsg (options [newMParser, firstCat cat, beVerbose]) gr diff --git a/src/GF/Grammar/AppPredefined.hs b/src/GF/Grammar/AppPredefined.hs index 1ec023a63..da81c0d83 100644 --- a/src/GF/Grammar/AppPredefined.hs +++ b/src/GF/Grammar/AppPredefined.hs @@ -5,9 +5,9 @@ -- Stability : (stable) -- Portability : (portable) -- --- > CVS $Date: 2005/04/21 16:22:19 $ --- > CVS $Author: bringert $ --- > CVS $Revision: 1.11 $ +-- > CVS $Date: 2005/06/26 20:40:33 $ +-- > CVS $Author: aarne $ +-- > CVS $Revision: 1.12 $ -- -- Predefined function type signatures and definitions. ----------------------------------------------------------------------------- @@ -41,6 +41,7 @@ typPredefined c@(IC f) = case f of "eqStr" -> return $ mkFunType [typeTok,typeTok] (cnPredef "PBool") "length" -> return $ mkFunType [typeTok] (cnPredef "Int") "occur" -> return $ mkFunType [typeTok,typeTok] (cnPredef "PBool") + "occurs" -> return $ mkFunType [typeTok,typeTok] (cnPredef "PBool") "plus" -> return $ mkFunType [cnPredef "Int",cnPredef "Int"] (cnPredef "Int") ---- "read" -> (P : Type) -> Tok -> P "show" -> return $ mkProd -- (P : PType) -> P -> Tok @@ -77,6 +78,7 @@ appPredefined t = case t of ("dp", EInt i, K s) -> retb $ K (drop (max 0 (length s - i)) s) ("eqStr",K s, K t) -> retb $ if s == t then predefTrue else predefFalse ("occur",K s, K t) -> retb $ if substring s t then predefTrue else predefFalse + ("occurs",K s, K t) -> retb $ if any (flip elem t) s then predefTrue else predefFalse ("eqInt",EInt i, EInt j) -> retb $ if i==j then predefTrue else predefFalse ("lessInt",EInt i, EInt j) -> retb $ if i retb $ EInt $ i+j diff --git a/src/Makefile b/src/Makefile index 52bff9a33..9a5c4db7f 100644 --- a/src/Makefile +++ b/src/Makefile @@ -113,9 +113,8 @@ today: tools/mktoday.sh $(PACKAGE_VERSION) javac: - $(JAVAC) $(JAVAFLAGS) JavaGUI/*.java $(JAVAC) $(JAVAFLAGS) -classpath $(GFEDITOR):$(GFEDITOR)/log4j-1.2.8.jar:$(GFEDITOR)/jargs-1.0.jar $(GFEDITOR)/de/uka/ilkd/key/ocl/gf/*.java - + $(JAVAC) $(JAVAFLAGS) JavaGUI/*.java jar: javac cd JavaGUI; $(JAR) -cmf manifest.txt gf-java.jar *.class ; cd ..