finnish vowel harmony unlexer ; optimizations and mcfg

This commit is contained in:
aarne
2005-06-23 13:32:43 +00:00
parent 6c5ce5fd9b
commit 5df34601af
7 changed files with 59 additions and 26 deletions

View File

@@ -10,6 +10,11 @@
resource SyntaxFin = MorphoFin ** open Prelude, (CO = Coordination) in {
-- To glue a particle to the preceding word. The lexer and unlexer
-- are expected to deal with actual gluing and vowel harmony.
glueParticle : Str -> Str -> Str = \word,part -> word ++ "&*" ++ part ;
--2 Common Nouns
--
-- Simple common nouns are defined as the type $CommNoun$ in $MorphoFin$.
@@ -502,7 +507,7 @@ oper
}
} ;
questPart : Str -> Str = \s -> glue s "ko" ; --- "kö"
questPart : Str -> Str = \s -> glueParticle s "ko" ; --- "kö"
mkSats : NounPhrase -> Verb -> Sats = \subj,verb ->
{subj = subj.s ! NPCase Nom ; --- "minusta tulee poliisi"

View File

@@ -5,9 +5,9 @@
-- Stability : (stable)
-- Portability : (portable)
--
-- > CVS $Date: 2005/06/23 13:23:01 $
-- > CVS $Author: peb $
-- > CVS $Revision: 1.26 $
-- > CVS $Date: 2005/06/23 14:32:43 $
-- > CVS $Author: aarne $
-- > CVS $Revision: 1.27 $
--
-- Macros for building and analysing terms in GFC concrete syntax.
--
@@ -290,7 +290,8 @@ composOp co trm =
do
as' <- mapM co as
return (FV as')
-- peb tried to do this, but then there were errors in GF.Canon.Look.ccompute:
-- V x as -> do as' <- mapM co as
-- return (V x as')
V x as ->
do
as' <- mapM co as
return (V x as')
_ -> return trm -- covers Arg, I, LI, K, E

View File

@@ -5,9 +5,9 @@
-- Stability : (stable)
-- Portability : (portable)
--
-- > CVS $Date: 2005/06/17 14:15:17 $
-- > CVS $Author: bringert $
-- > CVS $Revision: 1.14 $
-- > CVS $Date: 2005/06/23 14:32:43 $
-- > CVS $Author: aarne $
-- > CVS $Revision: 1.15 $
--
-- lookup in GFC. AR 2003
-----------------------------------------------------------------------------
@@ -125,8 +125,8 @@ ccompute :: CanonGrammar -> [Term] -> Term -> Err Term
ccompute cnc = comp []
where
comp g xs t = case t of
Arg (A _ i) -> errIn ("argument list") $ xs !? fromInteger i
Arg (AB _ _ i) -> errIn ("argument list for binding") $ xs !? fromInteger i
Arg (A _ i) -> err (const (return t)) return $ xs !? fromInteger i
Arg (AB _ _ i) -> err (const (return t)) return $ xs !? fromInteger i
I c -> look c
LI c -> lookVar c g
@@ -194,8 +194,10 @@ ccompute cnc = comp []
noVar v = case v of
LI _ -> False
Arg _ -> False
R rs -> all noVar [t | Ass _ t <- rs]
Par _ ts -> all noVar ts
FV ts -> all noVar ts
S x y -> noVar x && noVar y
P t _ -> noVar t
_ -> True --- other cases that can be values to pattern match?

View File

@@ -4,9 +4,9 @@
-- Stability : (stable)
-- Portability : (portable)
--
-- > CVS $Date: 2005/06/23 13:23:01 $
-- > CVS $Author: peb $
-- > CVS $Revision: 1.11 $
-- > CVS $Date: 2005/06/23 14:32:44 $
-- > CVS $Author: aarne $
-- > CVS $Revision: 1.12 $
--
-- Converting GFC to SimpleGFC
--
@@ -138,6 +138,7 @@ convertPatt (A.PC con pats) = con :^ map convertPatt pats
-- convertPatt (A.PW) = Wildcard
convertPatt (A.PR rec) = Rec [ (lbl, convertPatt pat) | A.PAss lbl pat <- rec ]
convertPatt (A.PI n) = error "GFCtoSimple.convertPatt: cannot handle 'PI' constructor"
convertPatt p = error $ "GFCtoSimple.convertPatt: cannot handle " ++ show p
----------------------------------------------------------------------

View File

@@ -5,9 +5,9 @@
-- Stability : (stable)
-- Portability : (portable)
--
-- > CVS $Date: 2005/04/21 16:23:41 $
-- > CVS $Author: bringert $
-- > CVS $Revision: 1.9 $
-- > CVS $Date: 2005/06/23 14:32:44 $
-- > CVS $Author: aarne $
-- > CVS $Revision: 1.10 $
--
-- elementary text postprocessing. AR 21\/11\/2001.
--
@@ -26,6 +26,7 @@ module GF.Text.Text (untokWithXML,
formatAsLatex,
formatAsCode,
performBinds,
performBindsFinnish,
unStringLit,
concatRemSpace
) where
@@ -101,12 +102,33 @@ formatAsCode = rend 0 . words where
space t s = if null s then t else t ++ " " ++ s
performBinds :: String -> String
performBinds = unwords . format . words where
performBinds = performBindsOpt (\x y -> y)
-- The function defines an effect of the former on the latter part,
-- such as in vowel harmony. It is triggered by the binder token "&*"
performBindsOpt :: (String -> String -> String) -> String -> String
performBindsOpt harm = unwords . format . words where
format ws = case ws of
w : "&+" : u : ws -> format ((w ++ u) : ws)
w : "&+" : u : ws -> format ((w ++ u) : ws)
w : "&*" : u : ws -> format ((w ++ harm w u) : ws)
w : ws -> w : format ws
[] -> []
-- unlexer for Finnish particles
-- Notice: left associativity crucial for "tie &* ko &* han" --> "tieköhän"
performBindsFinnish :: String -> String
performBindsFinnish = performBindsOpt vowelHarmony where
vowelHarmony w p = if any (flip elem "aouAOU") w then p else map toFront p
toFront c = case c of
'A' -> 'Ä'
'O' -> 'Ö'
'a' -> 'ä'
'o' -> 'ö'
_ -> c
unStringLit :: String -> String
unStringLit s = case s of
c : cs | strlim c && strlim (last cs) -> init cs

View File

@@ -5,9 +5,9 @@
-- Stability : (stable)
-- Portability : (portable)
--
-- > CVS $Date: 2005/06/17 12:46:05 $
-- > CVS $Author: bringert $
-- > CVS $Revision: 1.64 $
-- > CVS $Date: 2005/06/23 14:32:44 $
-- > CVS $Author: aarne $
-- > CVS $Revision: 1.65 $
--
-- A database for customizable GF shell commands.
--
@@ -404,6 +404,7 @@ customUntokenizer =
,(strCI "codelit", const $ formatAsCodeLit)
,(strCI "concat", const $ concatRemSpace)
,(strCI "glue", const $ performBinds)
,(strCI "finnish", const $ performBindsFinnish)
,(strCI "reverse", const $ reverse)
,(strCI "bind", const $ performBinds) -- backward compat
-- add your own untokenizers here

View File

@@ -5,9 +5,9 @@
-- Stability : (stable)
-- Portability : (portable)
--
-- > CVS $Date: 2005/06/23 09:43:40 $
-- > CVS $Author: peb $
-- > CVS $Revision: 1.16 $
-- > CVS $Date: 2005/06/23 14:32:44 $
-- > CVS $Author: aarne $
-- > CVS $Revision: 1.17 $
--
-- Linearization for canonical GF. AR 7\/6\/2003
-----------------------------------------------------------------------------
@@ -93,6 +93,7 @@ linearizeNoMark gr = linearizeToRecord gr noMark
-- | expand tables in linearized term to full, normal-order tables
--
-- NB expand from inside-out so that values are not looked up in copies of branches
expandLinTables :: CanonGrammar -> Term -> Err Term
expandLinTables gr t = case t of
R rs -> liftM (R . map (uncurry Ass)) $ mapPairsM exp [(l,r) | Ass l r <- rs]