finnish vowel harmony unlexer ; optimizations and mcfg

This commit is contained in:
aarne
2005-06-23 13:32:43 +00:00
parent 6c5ce5fd9b
commit 5df34601af
7 changed files with 59 additions and 26 deletions

View File

@@ -10,6 +10,11 @@
resource SyntaxFin = MorphoFin ** open Prelude, (CO = Coordination) in { resource SyntaxFin = MorphoFin ** open Prelude, (CO = Coordination) in {
-- To glue a particle to the preceding word. The lexer and unlexer
-- are expected to deal with actual gluing and vowel harmony.
glueParticle : Str -> Str -> Str = \word,part -> word ++ "&*" ++ part ;
--2 Common Nouns --2 Common Nouns
-- --
-- Simple common nouns are defined as the type $CommNoun$ in $MorphoFin$. -- Simple common nouns are defined as the type $CommNoun$ in $MorphoFin$.
@@ -502,7 +507,7 @@ oper
} }
} ; } ;
questPart : Str -> Str = \s -> glue s "ko" ; --- "kö" questPart : Str -> Str = \s -> glueParticle s "ko" ; --- "kö"
mkSats : NounPhrase -> Verb -> Sats = \subj,verb -> mkSats : NounPhrase -> Verb -> Sats = \subj,verb ->
{subj = subj.s ! NPCase Nom ; --- "minusta tulee poliisi" {subj = subj.s ! NPCase Nom ; --- "minusta tulee poliisi"

View File

@@ -5,9 +5,9 @@
-- Stability : (stable) -- Stability : (stable)
-- Portability : (portable) -- Portability : (portable)
-- --
-- > CVS $Date: 2005/06/23 13:23:01 $ -- > CVS $Date: 2005/06/23 14:32:43 $
-- > CVS $Author: peb $ -- > CVS $Author: aarne $
-- > CVS $Revision: 1.26 $ -- > CVS $Revision: 1.27 $
-- --
-- Macros for building and analysing terms in GFC concrete syntax. -- Macros for building and analysing terms in GFC concrete syntax.
-- --
@@ -290,7 +290,8 @@ composOp co trm =
do do
as' <- mapM co as as' <- mapM co as
return (FV as') return (FV as')
-- peb tried to do this, but then there were errors in GF.Canon.Look.ccompute: V x as ->
-- V x as -> do as' <- mapM co as do
-- return (V x as') as' <- mapM co as
return (V x as')
_ -> return trm -- covers Arg, I, LI, K, E _ -> return trm -- covers Arg, I, LI, K, E

View File

@@ -5,9 +5,9 @@
-- Stability : (stable) -- Stability : (stable)
-- Portability : (portable) -- Portability : (portable)
-- --
-- > CVS $Date: 2005/06/17 14:15:17 $ -- > CVS $Date: 2005/06/23 14:32:43 $
-- > CVS $Author: bringert $ -- > CVS $Author: aarne $
-- > CVS $Revision: 1.14 $ -- > CVS $Revision: 1.15 $
-- --
-- lookup in GFC. AR 2003 -- lookup in GFC. AR 2003
----------------------------------------------------------------------------- -----------------------------------------------------------------------------
@@ -125,8 +125,8 @@ ccompute :: CanonGrammar -> [Term] -> Term -> Err Term
ccompute cnc = comp [] ccompute cnc = comp []
where where
comp g xs t = case t of comp g xs t = case t of
Arg (A _ i) -> errIn ("argument list") $ xs !? fromInteger i Arg (A _ i) -> err (const (return t)) return $ xs !? fromInteger i
Arg (AB _ _ i) -> errIn ("argument list for binding") $ xs !? fromInteger i Arg (AB _ _ i) -> err (const (return t)) return $ xs !? fromInteger i
I c -> look c I c -> look c
LI c -> lookVar c g LI c -> lookVar c g
@@ -194,8 +194,10 @@ ccompute cnc = comp []
noVar v = case v of noVar v = case v of
LI _ -> False LI _ -> False
Arg _ -> False
R rs -> all noVar [t | Ass _ t <- rs] R rs -> all noVar [t | Ass _ t <- rs]
Par _ ts -> all noVar ts Par _ ts -> all noVar ts
FV ts -> all noVar ts FV ts -> all noVar ts
S x y -> noVar x && noVar y S x y -> noVar x && noVar y
P t _ -> noVar t
_ -> True --- other cases that can be values to pattern match? _ -> True --- other cases that can be values to pattern match?

View File

@@ -4,9 +4,9 @@
-- Stability : (stable) -- Stability : (stable)
-- Portability : (portable) -- Portability : (portable)
-- --
-- > CVS $Date: 2005/06/23 13:23:01 $ -- > CVS $Date: 2005/06/23 14:32:44 $
-- > CVS $Author: peb $ -- > CVS $Author: aarne $
-- > CVS $Revision: 1.11 $ -- > CVS $Revision: 1.12 $
-- --
-- Converting GFC to SimpleGFC -- Converting GFC to SimpleGFC
-- --
@@ -138,6 +138,7 @@ convertPatt (A.PC con pats) = con :^ map convertPatt pats
-- convertPatt (A.PW) = Wildcard -- convertPatt (A.PW) = Wildcard
convertPatt (A.PR rec) = Rec [ (lbl, convertPatt pat) | A.PAss lbl pat <- rec ] convertPatt (A.PR rec) = Rec [ (lbl, convertPatt pat) | A.PAss lbl pat <- rec ]
convertPatt (A.PI n) = error "GFCtoSimple.convertPatt: cannot handle 'PI' constructor" convertPatt (A.PI n) = error "GFCtoSimple.convertPatt: cannot handle 'PI' constructor"
convertPatt p = error $ "GFCtoSimple.convertPatt: cannot handle " ++ show p
---------------------------------------------------------------------- ----------------------------------------------------------------------

View File

@@ -5,9 +5,9 @@
-- Stability : (stable) -- Stability : (stable)
-- Portability : (portable) -- Portability : (portable)
-- --
-- > CVS $Date: 2005/04/21 16:23:41 $ -- > CVS $Date: 2005/06/23 14:32:44 $
-- > CVS $Author: bringert $ -- > CVS $Author: aarne $
-- > CVS $Revision: 1.9 $ -- > CVS $Revision: 1.10 $
-- --
-- elementary text postprocessing. AR 21\/11\/2001. -- elementary text postprocessing. AR 21\/11\/2001.
-- --
@@ -26,6 +26,7 @@ module GF.Text.Text (untokWithXML,
formatAsLatex, formatAsLatex,
formatAsCode, formatAsCode,
performBinds, performBinds,
performBindsFinnish,
unStringLit, unStringLit,
concatRemSpace concatRemSpace
) where ) where
@@ -101,12 +102,33 @@ formatAsCode = rend 0 . words where
space t s = if null s then t else t ++ " " ++ s space t s = if null s then t else t ++ " " ++ s
performBinds :: String -> String performBinds :: String -> String
performBinds = unwords . format . words where performBinds = performBindsOpt (\x y -> y)
-- The function defines an effect of the former on the latter part,
-- such as in vowel harmony. It is triggered by the binder token "&*"
performBindsOpt :: (String -> String -> String) -> String -> String
performBindsOpt harm = unwords . format . words where
format ws = case ws of format ws = case ws of
w : "&+" : u : ws -> format ((w ++ u) : ws) w : "&+" : u : ws -> format ((w ++ u) : ws)
w : "&*" : u : ws -> format ((w ++ harm w u) : ws)
w : ws -> w : format ws w : ws -> w : format ws
[] -> [] [] -> []
-- unlexer for Finnish particles
-- Notice: left associativity crucial for "tie &* ko &* han" --> "tieköhän"
performBindsFinnish :: String -> String
performBindsFinnish = performBindsOpt vowelHarmony where
vowelHarmony w p = if any (flip elem "aouAOU") w then p else map toFront p
toFront c = case c of
'A' -> 'Ä'
'O' -> 'Ö'
'a' -> 'ä'
'o' -> 'ö'
_ -> c
unStringLit :: String -> String unStringLit :: String -> String
unStringLit s = case s of unStringLit s = case s of
c : cs | strlim c && strlim (last cs) -> init cs c : cs | strlim c && strlim (last cs) -> init cs

View File

@@ -5,9 +5,9 @@
-- Stability : (stable) -- Stability : (stable)
-- Portability : (portable) -- Portability : (portable)
-- --
-- > CVS $Date: 2005/06/17 12:46:05 $ -- > CVS $Date: 2005/06/23 14:32:44 $
-- > CVS $Author: bringert $ -- > CVS $Author: aarne $
-- > CVS $Revision: 1.64 $ -- > CVS $Revision: 1.65 $
-- --
-- A database for customizable GF shell commands. -- A database for customizable GF shell commands.
-- --
@@ -404,6 +404,7 @@ customUntokenizer =
,(strCI "codelit", const $ formatAsCodeLit) ,(strCI "codelit", const $ formatAsCodeLit)
,(strCI "concat", const $ concatRemSpace) ,(strCI "concat", const $ concatRemSpace)
,(strCI "glue", const $ performBinds) ,(strCI "glue", const $ performBinds)
,(strCI "finnish", const $ performBindsFinnish)
,(strCI "reverse", const $ reverse) ,(strCI "reverse", const $ reverse)
,(strCI "bind", const $ performBinds) -- backward compat ,(strCI "bind", const $ performBinds) -- backward compat
-- add your own untokenizers here -- add your own untokenizers here

View File

@@ -5,9 +5,9 @@
-- Stability : (stable) -- Stability : (stable)
-- Portability : (portable) -- Portability : (portable)
-- --
-- > CVS $Date: 2005/06/23 09:43:40 $ -- > CVS $Date: 2005/06/23 14:32:44 $
-- > CVS $Author: peb $ -- > CVS $Author: aarne $
-- > CVS $Revision: 1.16 $ -- > CVS $Revision: 1.17 $
-- --
-- Linearization for canonical GF. AR 7\/6\/2003 -- Linearization for canonical GF. AR 7\/6\/2003
----------------------------------------------------------------------------- -----------------------------------------------------------------------------
@@ -93,6 +93,7 @@ linearizeNoMark gr = linearizeToRecord gr noMark
-- | expand tables in linearized term to full, normal-order tables -- | expand tables in linearized term to full, normal-order tables
-- --
-- NB expand from inside-out so that values are not looked up in copies of branches -- NB expand from inside-out so that values are not looked up in copies of branches
expandLinTables :: CanonGrammar -> Term -> Err Term expandLinTables :: CanonGrammar -> Term -> Err Term
expandLinTables gr t = case t of expandLinTables gr t = case t of
R rs -> liftM (R . map (uncurry Ass)) $ mapPairsM exp [(l,r) | Ass l r <- rs] R rs -> liftM (R . map (uncurry Ass)) $ mapPairsM exp [(l,r) | Ass l r <- rs]