From 5df34601af0decabacc4c81e8b4a74e049ec1b48 Mon Sep 17 00:00:00 2001 From: aarne Date: Thu, 23 Jun 2005 13:32:43 +0000 Subject: [PATCH] finnish vowel harmony unlexer ; optimizations and mcfg --- lib/resource/finnish/SyntaxFin.gf | 7 ++++++- src/GF/Canon/CMacros.hs | 13 +++++++------ src/GF/Canon/Look.hs | 12 +++++++----- src/GF/Conversion/GFCtoSimple.hs | 7 ++++--- src/GF/Text/Text.hs | 32 ++++++++++++++++++++++++++----- src/GF/UseGrammar/Custom.hs | 7 ++++--- src/GF/UseGrammar/Linear.hs | 7 ++++--- 7 files changed, 59 insertions(+), 26 deletions(-) diff --git a/lib/resource/finnish/SyntaxFin.gf b/lib/resource/finnish/SyntaxFin.gf index 1b7e42a88..ae9343c09 100644 --- a/lib/resource/finnish/SyntaxFin.gf +++ b/lib/resource/finnish/SyntaxFin.gf @@ -10,6 +10,11 @@ resource SyntaxFin = MorphoFin ** open Prelude, (CO = Coordination) in { +-- To glue a particle to the preceding word. The lexer and unlexer +-- are expected to deal with actual gluing and vowel harmony. + + glueParticle : Str -> Str -> Str = \word,part -> word ++ "&*" ++ part ; + --2 Common Nouns -- -- Simple common nouns are defined as the type $CommNoun$ in $MorphoFin$. @@ -502,7 +507,7 @@ oper } } ; - questPart : Str -> Str = \s -> glue s "ko" ; --- "kö" + questPart : Str -> Str = \s -> glueParticle s "ko" ; --- "kö" mkSats : NounPhrase -> Verb -> Sats = \subj,verb -> {subj = subj.s ! NPCase Nom ; --- "minusta tulee poliisi" diff --git a/src/GF/Canon/CMacros.hs b/src/GF/Canon/CMacros.hs index 77f4b0027..c5268b8cb 100644 --- a/src/GF/Canon/CMacros.hs +++ b/src/GF/Canon/CMacros.hs @@ -5,9 +5,9 @@ -- Stability : (stable) -- Portability : (portable) -- --- > CVS $Date: 2005/06/23 13:23:01 $ --- > CVS $Author: peb $ --- > CVS $Revision: 1.26 $ +-- > CVS $Date: 2005/06/23 14:32:43 $ +-- > CVS $Author: aarne $ +-- > CVS $Revision: 1.27 $ -- -- Macros for building and analysing terms in GFC concrete syntax. -- @@ -290,7 +290,8 @@ composOp co trm = do as' <- mapM co as return (FV as') - -- peb tried to do this, but then there were errors in GF.Canon.Look.ccompute: - -- V x as -> do as' <- mapM co as - -- return (V x as') + V x as -> + do + as' <- mapM co as + return (V x as') _ -> return trm -- covers Arg, I, LI, K, E diff --git a/src/GF/Canon/Look.hs b/src/GF/Canon/Look.hs index 231014abc..bcd73f97d 100644 --- a/src/GF/Canon/Look.hs +++ b/src/GF/Canon/Look.hs @@ -5,9 +5,9 @@ -- Stability : (stable) -- Portability : (portable) -- --- > CVS $Date: 2005/06/17 14:15:17 $ --- > CVS $Author: bringert $ --- > CVS $Revision: 1.14 $ +-- > CVS $Date: 2005/06/23 14:32:43 $ +-- > CVS $Author: aarne $ +-- > CVS $Revision: 1.15 $ -- -- lookup in GFC. AR 2003 ----------------------------------------------------------------------------- @@ -125,8 +125,8 @@ ccompute :: CanonGrammar -> [Term] -> Term -> Err Term ccompute cnc = comp [] where comp g xs t = case t of - Arg (A _ i) -> errIn ("argument list") $ xs !? fromInteger i - Arg (AB _ _ i) -> errIn ("argument list for binding") $ xs !? fromInteger i + Arg (A _ i) -> err (const (return t)) return $ xs !? fromInteger i + Arg (AB _ _ i) -> err (const (return t)) return $ xs !? fromInteger i I c -> look c LI c -> lookVar c g @@ -194,8 +194,10 @@ ccompute cnc = comp [] noVar v = case v of LI _ -> False + Arg _ -> False R rs -> all noVar [t | Ass _ t <- rs] Par _ ts -> all noVar ts FV ts -> all noVar ts S x y -> noVar x && noVar y + P t _ -> noVar t _ -> True --- other cases that can be values to pattern match? diff --git a/src/GF/Conversion/GFCtoSimple.hs b/src/GF/Conversion/GFCtoSimple.hs index bd895155d..07a1da9e2 100644 --- a/src/GF/Conversion/GFCtoSimple.hs +++ b/src/GF/Conversion/GFCtoSimple.hs @@ -4,9 +4,9 @@ -- Stability : (stable) -- Portability : (portable) -- --- > CVS $Date: 2005/06/23 13:23:01 $ --- > CVS $Author: peb $ --- > CVS $Revision: 1.11 $ +-- > CVS $Date: 2005/06/23 14:32:44 $ +-- > CVS $Author: aarne $ +-- > CVS $Revision: 1.12 $ -- -- Converting GFC to SimpleGFC -- @@ -138,6 +138,7 @@ convertPatt (A.PC con pats) = con :^ map convertPatt pats -- convertPatt (A.PW) = Wildcard convertPatt (A.PR rec) = Rec [ (lbl, convertPatt pat) | A.PAss lbl pat <- rec ] convertPatt (A.PI n) = error "GFCtoSimple.convertPatt: cannot handle 'PI' constructor" +convertPatt p = error $ "GFCtoSimple.convertPatt: cannot handle " ++ show p ---------------------------------------------------------------------- diff --git a/src/GF/Text/Text.hs b/src/GF/Text/Text.hs index dc9130f90..7b7f18469 100644 --- a/src/GF/Text/Text.hs +++ b/src/GF/Text/Text.hs @@ -5,9 +5,9 @@ -- Stability : (stable) -- Portability : (portable) -- --- > CVS $Date: 2005/04/21 16:23:41 $ --- > CVS $Author: bringert $ --- > CVS $Revision: 1.9 $ +-- > CVS $Date: 2005/06/23 14:32:44 $ +-- > CVS $Author: aarne $ +-- > CVS $Revision: 1.10 $ -- -- elementary text postprocessing. AR 21\/11\/2001. -- @@ -26,6 +26,7 @@ module GF.Text.Text (untokWithXML, formatAsLatex, formatAsCode, performBinds, + performBindsFinnish, unStringLit, concatRemSpace ) where @@ -101,12 +102,33 @@ formatAsCode = rend 0 . words where space t s = if null s then t else t ++ " " ++ s performBinds :: String -> String -performBinds = unwords . format . words where +performBinds = performBindsOpt (\x y -> y) + + +-- The function defines an effect of the former on the latter part, +-- such as in vowel harmony. It is triggered by the binder token "&*" + +performBindsOpt :: (String -> String -> String) -> String -> String +performBindsOpt harm = unwords . format . words where format ws = case ws of - w : "&+" : u : ws -> format ((w ++ u) : ws) + w : "&+" : u : ws -> format ((w ++ u) : ws) + w : "&*" : u : ws -> format ((w ++ harm w u) : ws) w : ws -> w : format ws [] -> [] +-- unlexer for Finnish particles +-- Notice: left associativity crucial for "tie &* ko &* han" --> "tieköhän" + +performBindsFinnish :: String -> String +performBindsFinnish = performBindsOpt vowelHarmony where + vowelHarmony w p = if any (flip elem "aouAOU") w then p else map toFront p + toFront c = case c of + 'A' -> 'Ä' + 'O' -> 'Ö' + 'a' -> 'ä' + 'o' -> 'ö' + _ -> c + unStringLit :: String -> String unStringLit s = case s of c : cs | strlim c && strlim (last cs) -> init cs diff --git a/src/GF/UseGrammar/Custom.hs b/src/GF/UseGrammar/Custom.hs index e158a19c4..40c625612 100644 --- a/src/GF/UseGrammar/Custom.hs +++ b/src/GF/UseGrammar/Custom.hs @@ -5,9 +5,9 @@ -- Stability : (stable) -- Portability : (portable) -- --- > CVS $Date: 2005/06/17 12:46:05 $ --- > CVS $Author: bringert $ --- > CVS $Revision: 1.64 $ +-- > CVS $Date: 2005/06/23 14:32:44 $ +-- > CVS $Author: aarne $ +-- > CVS $Revision: 1.65 $ -- -- A database for customizable GF shell commands. -- @@ -404,6 +404,7 @@ customUntokenizer = ,(strCI "codelit", const $ formatAsCodeLit) ,(strCI "concat", const $ concatRemSpace) ,(strCI "glue", const $ performBinds) + ,(strCI "finnish", const $ performBindsFinnish) ,(strCI "reverse", const $ reverse) ,(strCI "bind", const $ performBinds) -- backward compat -- add your own untokenizers here diff --git a/src/GF/UseGrammar/Linear.hs b/src/GF/UseGrammar/Linear.hs index 4df459ec6..3899aa48f 100644 --- a/src/GF/UseGrammar/Linear.hs +++ b/src/GF/UseGrammar/Linear.hs @@ -5,9 +5,9 @@ -- Stability : (stable) -- Portability : (portable) -- --- > CVS $Date: 2005/06/23 09:43:40 $ --- > CVS $Author: peb $ --- > CVS $Revision: 1.16 $ +-- > CVS $Date: 2005/06/23 14:32:44 $ +-- > CVS $Author: aarne $ +-- > CVS $Revision: 1.17 $ -- -- Linearization for canonical GF. AR 7\/6\/2003 ----------------------------------------------------------------------------- @@ -93,6 +93,7 @@ linearizeNoMark gr = linearizeToRecord gr noMark -- | expand tables in linearized term to full, normal-order tables -- -- NB expand from inside-out so that values are not looked up in copies of branches + expandLinTables :: CanonGrammar -> Term -> Err Term expandLinTables gr t = case t of R rs -> liftM (R . map (uncurry Ass)) $ mapPairsM exp [(l,r) | Ass l r <- rs]