From 5df34601af0decabacc4c81e8b4a74e049ec1b48 Mon Sep 17 00:00:00 2001
From: aarne <unknown>
Date: Thu, 23 Jun 2005 13:32:43 +0000
Subject: [PATCH] finnish vowel harmony unlexer ; optimizations and mcfg

---
 lib/resource/finnish/SyntaxFin.gf |  7 ++++++-
 src/GF/Canon/CMacros.hs           | 13 +++++++------
 src/GF/Canon/Look.hs              | 12 +++++++-----
 src/GF/Conversion/GFCtoSimple.hs  |  7 ++++---
 src/GF/Text/Text.hs               | 32 ++++++++++++++++++++++++++-----
 src/GF/UseGrammar/Custom.hs       |  7 ++++---
 src/GF/UseGrammar/Linear.hs       |  7 ++++---
 7 files changed, 59 insertions(+), 26 deletions(-)
diff --git a/lib/resource/finnish/SyntaxFin.gf b/lib/resource/finnish/SyntaxFin.gf
index 1b7e42a88..ae9343c09 100644
--- a/lib/resource/finnish/SyntaxFin.gf
+++ b/lib/resource/finnish/SyntaxFin.gf
@@ -10,6 +10,11 @@
 
 resource SyntaxFin = MorphoFin ** open Prelude, (CO = Coordination) in {
 
+-- To glue a particle to the preceding word. The lexer and unlexer
+-- are expected to deal with actual gluing and vowel harmony.
+
+ glueParticle : Str -> Str -> Str = \word,part -> word ++ "&*" ++ part ;
+
 --2 Common Nouns
 --
 -- Simple common nouns are defined as the type $CommNoun$ in $MorphoFin$.
@@ -502,7 +507,7 @@ oper
        }
     } ;
 
-  questPart : Str -> Str = \s -> glue s "ko" ; --- "kö"
+  questPart : Str -> Str = \s -> glueParticle s "ko" ; --- "kö"
 
   mkSats : NounPhrase -> Verb -> Sats = \subj,verb ->
     {subj = subj.s ! NPCase Nom ; --- "minusta tulee poliisi"
diff --git a/src/GF/Canon/CMacros.hs b/src/GF/Canon/CMacros.hs
index 77f4b0027..c5268b8cb 100644
--- a/src/GF/Canon/CMacros.hs
+++ b/src/GF/Canon/CMacros.hs
@@ -5,9 +5,9 @@
 -- Stability   : (stable)
 -- Portability : (portable)
 --
--- > CVS $Date: 2005/06/23 13:23:01 $ 
--- > CVS $Author: peb $
--- > CVS $Revision: 1.26 $
+-- > CVS $Date: 2005/06/23 14:32:43 $ 
+-- > CVS $Author: aarne $
+-- > CVS $Revision: 1.27 $
 --
 -- Macros for building and analysing terms in GFC concrete syntax.
 --
@@ -290,7 +290,8 @@ composOp co trm =
       do
       as' <- mapM co as
       return (FV as')
-  -- peb tried to do this, but then there were errors in GF.Canon.Look.ccompute:
-  -- V x as -> do as' <- mapM co as
-  -- 	       return (V x as')
+  V x as -> 
+      do 
+      as' <- mapM co as
+      return (V x as')
   _ -> return trm -- covers Arg, I, LI, K, E
diff --git a/src/GF/Canon/Look.hs b/src/GF/Canon/Look.hs
index 231014abc..bcd73f97d 100644
--- a/src/GF/Canon/Look.hs
+++ b/src/GF/Canon/Look.hs
@@ -5,9 +5,9 @@
 -- Stability   : (stable)
 -- Portability : (portable)
 --
--- > CVS $Date: 2005/06/17 14:15:17 $ 
--- > CVS $Author: bringert $
--- > CVS $Revision: 1.14 $
+-- > CVS $Date: 2005/06/23 14:32:43 $ 
+-- > CVS $Author: aarne $
+-- > CVS $Revision: 1.15 $
 --
 -- lookup in GFC. AR 2003
 -----------------------------------------------------------------------------
@@ -125,8 +125,8 @@ ccompute :: CanonGrammar -> [Term] -> Term -> Err Term
 ccompute cnc = comp [] 
  where
   comp g xs t = case t of
-    Arg (A _ i)    -> errIn ("argument list") $ xs !? fromInteger i
-    Arg (AB _ _ i) -> errIn ("argument list for binding") $ xs !? fromInteger i
+    Arg (A _ i)    -> err (const (return t)) return $ xs !? fromInteger i
+    Arg (AB _ _ i) -> err (const (return t)) return $ xs !? fromInteger i
     I c     -> look c 
     LI c    -> lookVar c g
 
@@ -194,8 +194,10 @@ ccompute cnc = comp []
 
      noVar v = case v of
        LI _ -> False
+       Arg _ -> False
        R rs -> all noVar [t | Ass _ t <- rs]
        Par _ ts -> all noVar ts
        FV ts -> all noVar ts
        S x y -> noVar x && noVar y
+       P t _ -> noVar t
        _    -> True --- other cases that can be values to pattern match?
diff --git a/src/GF/Conversion/GFCtoSimple.hs b/src/GF/Conversion/GFCtoSimple.hs
index bd895155d..07a1da9e2 100644
--- a/src/GF/Conversion/GFCtoSimple.hs
+++ b/src/GF/Conversion/GFCtoSimple.hs
@@ -4,9 +4,9 @@
 -- Stability   : (stable)
 -- Portability : (portable)
 --
--- > CVS $Date: 2005/06/23 13:23:01 $ 
--- > CVS $Author: peb $
--- > CVS $Revision: 1.11 $
+-- > CVS $Date: 2005/06/23 14:32:44 $ 
+-- > CVS $Author: aarne $
+-- > CVS $Revision: 1.12 $
 --
 -- Converting GFC to SimpleGFC
 --
@@ -138,6 +138,7 @@ convertPatt (A.PC con pats) = con :^ map convertPatt pats
 -- convertPatt (A.PW)          = Wildcard
 convertPatt (A.PR rec)      = Rec [ (lbl, convertPatt pat) | A.PAss lbl pat <- rec ]
 convertPatt (A.PI n)        = error "GFCtoSimple.convertPatt: cannot handle 'PI' constructor"
+convertPatt p               = error $ "GFCtoSimple.convertPatt: cannot handle " ++ show p
 
 ----------------------------------------------------------------------
 
diff --git a/src/GF/Text/Text.hs b/src/GF/Text/Text.hs
index dc9130f90..7b7f18469 100644
--- a/src/GF/Text/Text.hs
+++ b/src/GF/Text/Text.hs
@@ -5,9 +5,9 @@
 -- Stability   : (stable)
 -- Portability : (portable)
 --
--- > CVS $Date: 2005/04/21 16:23:41 $ 
--- > CVS $Author: bringert $
--- > CVS $Revision: 1.9 $
+-- > CVS $Date: 2005/06/23 14:32:44 $ 
+-- > CVS $Author: aarne $
+-- > CVS $Revision: 1.10 $
 --
 -- elementary text postprocessing. AR 21\/11\/2001.
 --
@@ -26,6 +26,7 @@ module GF.Text.Text (untokWithXML,
 	     formatAsLatex,
 	     formatAsCode,
 	     performBinds,
+	     performBindsFinnish,
 	     unStringLit,
 	     concatRemSpace
 	    ) where
@@ -101,12 +102,33 @@ formatAsCode = rend 0 . words where
   space t s = if null s then t else t ++ " " ++ s
 
 performBinds :: String -> String
-performBinds = unwords . format . words where
+performBinds = performBindsOpt (\x y -> y) 
+
+
+-- The function defines an effect of the former on the latter part,
+-- such as in vowel harmony. It is triggered by the binder token "&*"
+
+performBindsOpt :: (String -> String -> String) -> String -> String
+performBindsOpt harm = unwords . format . words where
   format ws = case ws of
-    w : "&+" : u : ws -> format ((w ++ u) : ws)
+    w : "&+" : u : ws -> format ((w ++        u) : ws)
+    w : "&*" : u : ws -> format ((w ++ harm w u) : ws)
     w : ws            -> w : format ws
     []                -> []
 
+-- unlexer for Finnish particles
+-- Notice: left associativity crucial for "tie &* ko &* han" --> "tieköhän"
+
+performBindsFinnish :: String -> String
+performBindsFinnish = performBindsOpt vowelHarmony where
+  vowelHarmony w p = if any (flip elem "aouAOU") w then p else map toFront p
+  toFront c = case c of
+    'A' -> 'Ä'
+    'O' -> 'Ö'
+    'a' -> 'ä'
+    'o' -> 'ö'
+    _ -> c
+
 unStringLit :: String -> String
 unStringLit s = case s of
   c : cs | strlim c && strlim (last cs) -> init cs
diff --git a/src/GF/UseGrammar/Custom.hs b/src/GF/UseGrammar/Custom.hs
index e158a19c4..40c625612 100644
--- a/src/GF/UseGrammar/Custom.hs
+++ b/src/GF/UseGrammar/Custom.hs
@@ -5,9 +5,9 @@
 -- Stability   : (stable)
 -- Portability : (portable)
 --
--- > CVS $Date: 2005/06/17 12:46:05 $ 
--- > CVS $Author: bringert $
--- > CVS $Revision: 1.64 $
+-- > CVS $Date: 2005/06/23 14:32:44 $ 
+-- > CVS $Author: aarne $
+-- > CVS $Revision: 1.65 $
 --
 -- A database for customizable GF shell commands. 
 --
@@ -404,6 +404,7 @@ customUntokenizer =
   ,(strCI "codelit",   const $ formatAsCodeLit)
   ,(strCI "concat",    const $ concatRemSpace)
   ,(strCI "glue",      const $ performBinds)
+  ,(strCI "finnish",   const $ performBindsFinnish)
   ,(strCI "reverse",   const $ reverse)
   ,(strCI "bind",      const $ performBinds) -- backward compat
 -- add your own untokenizers here
diff --git a/src/GF/UseGrammar/Linear.hs b/src/GF/UseGrammar/Linear.hs
index 4df459ec6..3899aa48f 100644
--- a/src/GF/UseGrammar/Linear.hs
+++ b/src/GF/UseGrammar/Linear.hs
@@ -5,9 +5,9 @@
 -- Stability   : (stable)
 -- Portability : (portable)
 --
--- > CVS $Date: 2005/06/23 09:43:40 $ 
--- > CVS $Author: peb $
--- > CVS $Revision: 1.16 $
+-- > CVS $Date: 2005/06/23 14:32:44 $ 
+-- > CVS $Author: aarne $
+-- > CVS $Revision: 1.17 $
 --
 -- Linearization for canonical GF. AR 7\/6\/2003
 -----------------------------------------------------------------------------
@@ -93,6 +93,7 @@ linearizeNoMark gr = linearizeToRecord gr noMark
 -- | expand tables in linearized term to full, normal-order tables
 --
 -- NB expand from inside-out so that values are not looked up in copies of branches
+
 expandLinTables :: CanonGrammar -> Term -> Err Term
 expandLinTables gr t = case t of
   R rs -> liftM (R . map (uncurry Ass)) $ mapPairsM exp [(l,r) | Ass l r <- rs]