GF/src is now for 2.9, and the new sources are in src-3.0 - keep it this way until the release of GF 3

This commit is contained in:
aarne
2008-05-21 09:26:44 +00:00
parent b24ca795ca
commit 2bab9286f1
536 changed files with 0 additions and 0 deletions

View File

@@ -0,0 +1,63 @@
----------------------------------------------------------------------
-- |
-- Maintainer : PL
-- Stability : (stable)
-- Portability : (portable)
--
-- > CVS $Date: 2005/05/09 09:28:44 $
-- > CVS $Author: peb $
-- > CVS $Revision: 1.5 $
--
-- Adding coercion functions to a MCFG if necessary.
-----------------------------------------------------------------------------
module GF.Conversion.SimpleToMCFG.Coercions
(addCoercions) where
import GF.System.Tracing
import GF.Infra.Print
import GF.Formalism.Utilities
import GF.Formalism.GCFG
import GF.Formalism.MCFG
import GF.Conversion.Types
import GF.Data.SortedList
import Data.List (groupBy)
----------------------------------------------------------------------
addCoercions :: EGrammar -> EGrammar
addCoercions rules = coercions ++ rules
where (allHeads, allArgs) = unzip [ ((head, lbls), nubsort args) |
Rule (Abs head args _) (Cnc lbls _ _) <- rules ]
allHeadSet = nubsort allHeads
allArgSet = union allArgs <\\> map fst allHeadSet
coercions = tracePrt "SimpleToMCFG.Coercions - MCFG coercions" (prt . length) $
concat $
tracePrt "SimpleToMCFG.Coercions - MCFG coercions per category"
(prtList . map length) $
combineCoercions
(groupBy sameECatFst allHeadSet)
(groupBy sameECat allArgSet)
sameECatFst a b = sameECat (fst a) (fst b)
combineCoercions [] _ = []
combineCoercions _ [] = []
combineCoercions allHeads'@(heads:allHeads) allArgs'@(args:allArgs)
= case compare (ecat2scat $ fst $ head heads) (ecat2scat $ head args) of
LT -> combineCoercions allHeads allArgs'
GT -> combineCoercions allHeads' allArgs
EQ -> makeCoercion heads args : combineCoercions allHeads allArgs
makeCoercion heads args
= [ Rule (Abs arg [head] coercionName) (Cnc lbls [lbls] lins) |
(head@(ECat _ headCns), lbls) <- heads,
let lins = [ Lin lbl [Cat (head, lbl, 0)] | lbl <- lbls ],
arg@(ECat _ argCns) <- args,
argCns `subset` headCns ]

View File

@@ -0,0 +1,256 @@
----------------------------------------------------------------------
-- |
-- Maintainer : PL
-- Stability : (stable)
-- Portability : (portable)
--
-- > CVS $Date: 2005/08/17 08:27:29 $
-- > CVS $Author: peb $
-- > CVS $Revision: 1.7 $
--
-- Converting SimpleGFC grammars to MCFG grammars, nondeterministically.
-- Afterwards, the grammar has to be extended with coercion functions,
-- from the module 'GF.Conversion.SimpleToMCFG.Coercions'
--
-- the resulting grammars might be /very large/
--
-- the conversion is only equivalent if the GFC grammar has a context-free backbone.
-----------------------------------------------------------------------------
module GF.Conversion.SimpleToMCFG.Nondet
(convertGrammar) where
import GF.System.Tracing
import GF.Infra.Print
import Control.Monad
import GF.Formalism.Utilities
import GF.Formalism.GCFG
import GF.Formalism.MCFG
import GF.Formalism.SimpleGFC
import GF.Conversion.Types
import GF.Data.BacktrackM
import GF.Data.Utilities (notLongerThan, updateNthM)
------------------------------------------------------------
-- type declarations
type CnvMonad a = BacktrackM Env a
type Env = (ECat, [ECat], LinRec, [SLinType]) -- variable bindings: [(Var, STerm)]
type LinRec = [Lin SCat MLabel Token]
----------------------------------------------------------------------
-- main conversion function
maxNrRules :: Int
maxNrRules = 5000
convertGrammar :: SGrammar -> EGrammar
convertGrammar rules = traceCalcFirst rules' $
tracePrt "SimpleToMCFG.Nondet - MCFG rules" (prt . length) $
rules'
where rules' = rules >>= convertRule
-- solutions conversion undefined
-- where conversion = member rules >>= convertRule
convertRule :: SRule -> [ERule] -- CnvMonad ERule
convertRule (Rule (Abs decl decls fun) (Cnc ctype ctypes (Just term))) =
-- | prt(name2fun fun) `elem`
-- words "UseCl PosTP TPast ASimul SPredV IndefOneNP DefOneNP UseN2 mother_N2 jump_V" =
if notLongerThan maxNrRules rules
then tracePrt ("SimpeToMCFG.Nondet - MCFG rules for " ++ prt fun) (prt . length) $
rules
else trace2 "SimpeToMCFG.Nondet - TOO MANY RULES, function not converted"
("More than " ++ show maxNrRules ++ " MCFG rules for " ++ prt fun) $
[]
where rules = flip solutions undefined $
do let cat : args = map decl2cat (decl : decls)
writeState (initialECat cat, map initialECat args, [], ctypes)
rterm <- simplifyTerm term
reduceTerm ctype emptyPath rterm
(newCat, newArgs, linRec, _) <- readState
let newLinRec = map (instantiateArgs newArgs) linRec
catPaths : argsPaths = map (lintype2paths emptyPath) (ctype : ctypes)
-- checkLinRec argsPaths catPaths newLinRec
return $ Rule (Abs newCat newArgs fun) (Cnc catPaths argsPaths newLinRec)
convertRule _ = [] -- failure
----------------------------------------------------------------------
-- "type-checking" the resulting linearization
-- should not be necessary, if the algorithms (type-checking and conversion) are correct
checkLinRec args lbls = mapM (checkLin args lbls)
checkLin args lbls (Lin lbl lin)
| lbl `elem` lbls = mapM (symbol (checkArg args) (const (return ()))) lin
| otherwise = trace2 "SimpleToMCFG.Nondet - ERROR" "Label mismatch" $
failure
checkArg args (_cat, lbl, nr)
| lbl `elem` (args !! nr) = return ()
-- | otherwise = trace2 "SimpleToMCFG.Nondet - ERROR" ("Label mismatch in arg " ++ prt nr) $
-- failure
| otherwise = trace2 ("SimpleToMCFG.Nondet - ERROR: Label mismatch in arg " ++ prt nr)
(prt lbl ++ " `notElem` " ++ prt (args!!nr)) $
failure
----------------------------------------------------------------------
-- term simplification
simplifyTerm :: STerm -> CnvMonad STerm
simplifyTerm (term :! sel)
= do sterm <- simplifyTerm term
ssel <- simplifyTerm sel
case sterm of
Tbl table -> do (pat, val) <- member table
pat =?= ssel
return val
_ -> do sel' <- expandTerm ssel
return (sterm +! sel')
-- simplifyTerm (Var x) = readBinding x
simplifyTerm (con :^ terms) = liftM (con :^) $ mapM simplifyTerm terms
simplifyTerm (Rec record) = liftM Rec $ mapM simplifyAssign record
simplifyTerm (term :. lbl) = liftM (+. lbl) $ simplifyTerm term
simplifyTerm (Tbl table) = liftM Tbl $ mapM simplifyCase table
simplifyTerm (Variants terms) = liftM Variants $ mapM simplifyTerm terms
simplifyTerm (term1 :++ term2) = liftM2 (:++) (simplifyTerm term1) (simplifyTerm term2)
simplifyTerm term = return term
simplifyAssign :: (Label, STerm) -> CnvMonad (Label, STerm)
simplifyAssign (lbl, term) = liftM ((,) lbl) $ simplifyTerm term
simplifyCase :: (STerm, STerm) -> CnvMonad (STerm, STerm)
simplifyCase (pat, term) = liftM2 (,) (simplifyTerm pat) (simplifyTerm term)
------------------------------------------------------------
-- reducing simplified terms, collecting MCF rules
reduceTerm :: SLinType -> SPath -> STerm -> CnvMonad ()
--reduceTerm ctype path (Variants terms)
-- = member terms >>= reduceTerm ctype path
reduceTerm (StrT) path term = updateLin (path, term)
reduceTerm (ConT _) path term = do pat <- expandTerm term
updateHead (path, pat)
reduceTerm (RecT rtype) path term
= sequence_ [ reduceTerm ctype (path ++. lbl) (term +. lbl) | (lbl, ctype) <- rtype ]
reduceTerm (TblT pats vtype) path table
= sequence_ [ reduceTerm vtype (path ++! pat) (table +! pat) | pat <- pats ]
------------------------------------------------------------
-- expanding a term to ground terms
expandTerm :: STerm -> CnvMonad STerm
expandTerm arg@(Arg nr _ path)
= do ctypes <- readArgCTypes
unifyPType arg $ lintypeFollowPath path $ ctypes !! nr
-- expandTerm arg@(Arg nr _ path)
-- = do ctypes <- readArgCTypes
-- pat <- member $ enumeratePatterns $ lintypeFollowPath path $ ctypes !! nr
-- pat =?= arg
-- return pat
expandTerm (con :^ terms) = liftM (con :^) $ mapM expandTerm terms
expandTerm (Rec record) = liftM Rec $ mapM expandAssign record
--expandTerm (Variants terms) = liftM Variants $ mapM expandTerm terms
expandTerm (Variants terms) = member terms >>= expandTerm
expandTerm term = error $ "expandTerm: " ++ prt term
expandAssign :: (Label, STerm) -> CnvMonad (Label, STerm)
expandAssign (lbl, term) = liftM ((,) lbl) $ expandTerm term
unifyPType :: STerm -> SLinType -> CnvMonad STerm
unifyPType arg (RecT prec) =
liftM Rec $
sequence [ liftM ((,) lbl) $
unifyPType (arg +. lbl) ptype |
(lbl, ptype) <- prec ]
unifyPType (Arg nr _ path) (ConT terms) =
do (_, args, _, _) <- readState
case lookup path (ecatConstraints (args !! nr)) of
Just term -> return term
Nothing -> do term <- member terms
updateArg nr (path, term)
return term
------------------------------------------------------------
-- unification of patterns and selection terms
(=?=) :: STerm -> STerm -> CnvMonad ()
-- Wildcard =?= _ = return ()
-- Var x =?= term = addBinding x term
Rec precord =?= arg@(Arg _ _ _) = sequence_ [ pat =?= (arg +. lbl) |
(lbl, pat) <- precord ]
pat =?= Arg nr _ path = updateArg nr (path, pat)
(con :^ pats) =?= (con' :^ terms) = do guard (con==con' && length pats==length terms)
sequence_ $ zipWith (=?=) pats terms
Rec precord =?= Rec record = sequence_ [ maybe mzero (pat =?=) mterm |
(lbl, pat) <- precord,
let mterm = lookup lbl record ]
-- variants are not allowed in patterns, but in selection terms:
term =?= Variants terms = member terms >>= (term =?=)
pat =?= term = error $ "(=?=): " ++ prt pat ++ " =?= " ++ prt term
----------------------------------------------------------------------
-- variable bindings (does not work correctly)
{-
addBinding x term = do (a, b, c, d, bindings) <- readState
writeState (a, b, c, d, (x,term):bindings)
readBinding x = do (_, _, _, _, bindings) <- readState
return $ maybe (Var x) id $ lookup x bindings
-}
------------------------------------------------------------
-- updating the MCF rule
readArgCTypes :: CnvMonad [SLinType]
readArgCTypes = do (_, _, _, env) <- readState
return env
updateArg :: Int -> Constraint -> CnvMonad ()
updateArg arg cn
= do (head, args, lins, env) <- readState
args' <- updateNthM (addToECat cn) arg args
writeState (head, args', lins, env)
updateHead :: Constraint -> CnvMonad ()
updateHead cn
= do (head, args, lins, env) <- readState
head' <- addToECat cn head
writeState (head', args, lins, env)
updateLin :: Constraint -> CnvMonad ()
updateLin (path, term)
= do let newLins = term2lins term
(head, args, lins, env) <- readState
let lins' = lins ++ map (Lin path) newLins
writeState (head, args, lins', env)
term2lins :: STerm -> [[Symbol (SCat, SPath, Int) Token]]
term2lins (Arg nr cat path) = return [Cat (cat, path, nr)]
term2lins (Token str) = return [Tok str]
term2lins (t1 :++ t2) = liftM2 (++) (term2lins t1) (term2lins t2)
term2lins (Empty) = return []
term2lins (Variants terms) = terms >>= term2lins
term2lins term = error $ "term2lins: " ++ show term
addToECat :: Constraint -> ECat -> CnvMonad ECat
addToECat cn (ECat cat cns) = liftM (ECat cat) $ addConstraint cn cns
addConstraint :: Constraint -> [Constraint] -> CnvMonad [Constraint]
addConstraint cn0 (cn : cns)
| fst cn0 > fst cn = liftM (cn:) (addConstraint cn0 cns)
| fst cn0 == fst cn = guard (snd cn0 == snd cn) >>
return (cn : cns)
addConstraint cn0 cns = return (cn0 : cns)

View File

@@ -0,0 +1,129 @@
----------------------------------------------------------------------
-- |
-- Maintainer : PL
-- Stability : (stable)
-- Portability : (portable)
--
-- > CVS $Date: 2005/05/09 09:28:44 $
-- > CVS $Author: peb $
-- > CVS $Revision: 1.5 $
--
-- Converting SimpleGFC grammars to MCFG grammars, deterministic.
--
-- the resulting grammars might be /very large/
--
-- the conversion is only equivalent if the GFC grammar has a context-free backbone.
-----------------------------------------------------------------------------
module GF.Conversion.SimpleToMCFG.Strict
(convertGrammar) where
import GF.System.Tracing
import GF.Infra.Print
import Control.Monad
import GF.Formalism.Utilities
import GF.Formalism.GCFG
import GF.Formalism.MCFG
import GF.Formalism.SimpleGFC
import GF.Conversion.Types
import GF.Data.BacktrackM
import GF.Data.SortedList
----------------------------------------------------------------------
-- main conversion function
type CnvMonad a = BacktrackM () a
convertGrammar :: SGrammar -> EGrammar
convertGrammar rules = tracePrt "SimpleToMCFG.Strict - MCFG rules" (prt . length) $
solutions conversion undefined
where conversion = member rules >>= convertRule
convertRule :: SRule -> CnvMonad ERule
convertRule (Rule (Abs decl decls fun) (Cnc ctype ctypes (Just term)))
= do let cat : args = map decl2cat (decl : decls)
args_ctypes = zip3 [0..] args ctypes
instArgs <- mapM enumerateArg args_ctypes
let instTerm = substitutePaths instArgs term
newCat <- extractECat cat ctype instTerm
newArgs <- mapM (extractArg instArgs) args_ctypes
let linRec = strPaths ctype instTerm >>= extractLin newArgs
let newLinRec = map (instantiateArgs newArgs) linRec
catPaths : argsPaths = map (lintype2paths emptyPath) (ctype : ctypes)
return $ Rule (Abs newCat newArgs fun) (Cnc catPaths argsPaths newLinRec)
convertRule _ = failure
----------------------------------------------------------------------
-- category extraction
extractArg :: [STerm] -> (Int, SCat, SLinType) -> CnvMonad ECat
extractArg args (nr, cat, ctype) = extractECat cat ctype (args !! nr)
extractECat :: SCat -> SLinType -> STerm -> CnvMonad ECat
extractECat cat ctype term = member $ map (ECat cat) $ parPaths ctype term
enumerateArg :: (Int, SCat, SLinType) -> CnvMonad STerm
enumerateArg (nr, cat, ctype) = member $ enumerateTerms (Just (Arg nr cat emptyPath)) ctype
----------------------------------------------------------------------
-- Substitute each instantiated parameter path for its instantiation
substitutePaths :: [STerm] -> STerm -> STerm
substitutePaths arguments = subst
where subst (Arg nr _ path) = termFollowPath path (arguments !! nr)
subst (con :^ terms) = con :^ map subst terms
subst (Rec record) = Rec [ (lbl, subst term) | (lbl, term) <- record ]
subst (term :. lbl) = subst term +. lbl
subst (Tbl table) = Tbl [ (pat, subst term) |
(pat, term) <- table ]
subst (term :! select) = subst term +! subst select
subst (term :++ term') = subst term ?++ subst term'
subst (Variants terms) = Variants $ map subst terms
subst term = term
----------------------------------------------------------------------
-- term paths extaction
termPaths :: SLinType -> STerm -> [(SPath, (SLinType, STerm))]
termPaths ctype (Variants terms) = terms >>= termPaths ctype
termPaths (RecT rtype) (Rec record)
= [ (path ++. lbl, value) |
(lbl, term) <- record,
let Just ctype = lookup lbl rtype,
(path, value) <- termPaths ctype term ]
termPaths (TblT _ ctype) (Tbl table)
= [ (path ++! pat, value) |
(pat, term) <- table,
(path, value) <- termPaths ctype term ]
termPaths ctype term | isBaseType ctype = [ (emptyPath, (ctype, term)) ]
{- ^^^ variants are pushed inside (not equivalent -- but see record-variants.txt):
{a=a1; b=b1} | {a=a2; b=b2} ==> {a=a1|a2; b=b1|b2}
[p=>p1;q=>q1] | [p=>p2;q=>q2] ==> [p=>p1|p2;q=>q1|q2]
-}
parPaths :: SLinType -> STerm -> [[(SPath, STerm)]]
parPaths ctype term = mapM (uncurry (map . (,))) $ groupPairs $
nubsort [ (path, value) |
(path, (ConT _, value)) <- termPaths ctype term ]
strPaths :: SLinType -> STerm -> [(SPath, STerm)]
strPaths ctype term = [ (path, variants values) | (path, values) <- groupPairs paths ]
where paths = nubsort [ (path, value) | (path, (StrT, value)) <- termPaths ctype term ]
----------------------------------------------------------------------
-- linearization extraction
extractLin :: [ECat] -> (SPath, STerm) -> [Lin ECat MLabel Token]
extractLin args (path, term) = map (Lin path) (convertLin term)
where convertLin (t1 :++ t2) = liftM2 (++) (convertLin t1) (convertLin t2)
convertLin (Empty) = [[]]
convertLin (Token tok) = [[Tok tok]]
convertLin (Variants terms) = concatMap convertLin terms
convertLin (Arg nr _ path) = [[Cat (args !! nr, path, nr)]]
convertLin t = error $ "convertLin: " ++ prt t ++ " " ++ prt (args, path)