From 9be3569798a591d6d9a96b90aaa0e85f022905d0 Mon Sep 17 00:00:00 2001 From: peb Date: Mon, 21 Mar 2005 21:31:43 +0000 Subject: [PATCH] "Committed_by_peb" --- src/GF/Parsing/CFGrammar.hs | 153 ++++++++++ src/GF/Parsing/ConvertGFCtoMCFG.hs | 34 +++ src/GF/Parsing/ConvertGFCtoMCFG/Coercions.hs | 70 +++++ src/GF/Parsing/ConvertGFCtoMCFG/Nondet.hs | 281 ++++++++++++++++++ src/GF/Parsing/ConvertGFCtoMCFG/Old.hs | 277 +++++++++++++++++ src/GF/Parsing/ConvertGFCtoMCFG/Strict.hs | 195 ++++++++++++ src/GF/Parsing/ConvertGFCtoMCFG/Utils.hs | 237 +++++++++++++++ src/GF/Parsing/ConvertGrammar.hs | 42 +++ src/GF/Parsing/ConvertMCFGtoCFG.hs | 52 ++++ src/GF/Parsing/GeneralChart.hs | 29 +- src/GF/Parsing/GrammarTypes.hs | 146 +++++++++ src/GF/Parsing/IncrementalChart.hs | 17 +- src/GF/Parsing/MCFGrammar.hs | 206 +++++++++++++ src/GF/Parsing/ParseCF.hs | 8 +- src/GF/Parsing/ParseCFG.hs | 12 +- .../General.hs} | 11 +- .../Incremental.hs} | 10 +- src/GF/Parsing/ParseGFC.hs | 12 +- src/GF/Parsing/ParseMCFG.hs | 12 +- .../{MCFParserBasic.hs => ParseMCFG/Basic.hs} | 10 +- src/GF/Parsing/{Parser.hs => Utilities.hs} | 23 +- 21 files changed, 1766 insertions(+), 71 deletions(-) create mode 100644 src/GF/Parsing/CFGrammar.hs create mode 100644 src/GF/Parsing/ConvertGFCtoMCFG.hs create mode 100644 src/GF/Parsing/ConvertGFCtoMCFG/Coercions.hs create mode 100644 src/GF/Parsing/ConvertGFCtoMCFG/Nondet.hs create mode 100644 src/GF/Parsing/ConvertGFCtoMCFG/Old.hs create mode 100644 src/GF/Parsing/ConvertGFCtoMCFG/Strict.hs create mode 100644 src/GF/Parsing/ConvertGFCtoMCFG/Utils.hs create mode 100644 src/GF/Parsing/ConvertGrammar.hs create mode 100644 src/GF/Parsing/ConvertMCFGtoCFG.hs create mode 100644 src/GF/Parsing/GrammarTypes.hs create mode 100644 src/GF/Parsing/MCFGrammar.hs rename src/GF/Parsing/{CFParserGeneral.hs => ParseCFG/General.hs} (94%) rename src/GF/Parsing/{CFParserIncremental.hs => ParseCFG/Incremental.hs} (96%) rename src/GF/Parsing/{MCFParserBasic.hs => ParseMCFG/Basic.hs} (97%) rename src/GF/Parsing/{Parser.hs => Utilities.hs} (93%) diff --git a/src/GF/Parsing/CFGrammar.hs b/src/GF/Parsing/CFGrammar.hs new file mode 100644 index 000000000..d75b4807b --- /dev/null +++ b/src/GF/Parsing/CFGrammar.hs @@ -0,0 +1,153 @@ +---------------------------------------------------------------------- +-- | +-- Module : CFGrammar +-- Maintainer : Peter Ljunglöf +-- Stability : (stable) +-- Portability : (portable) +-- +-- > CVS $Date: 2005/03/21 22:31:43 $ +-- > CVS $Author: peb $ +-- > CVS $Revision: 1.1 $ +-- +-- Definitions of context-free grammars, +-- parser information and chart conversion +---------------------------------------------------------------------- + +module GF.Parsing.CFGrammar + (-- * Type definitions + Grammar, + Rule(..), + CFParser, + -- * Parser information + pInfo, + PInfo(..), + -- * Building parse charts + edges2chart, + -- * Grammar checking + checkGrammar + ) where + +import Tracing + +-- haskell modules: +import Array +-- gf modules: +import GF.Data.SortedList +import GF.Data.Assoc +import qualified CF +-- parser modules: +import GF.Parsing.Utilities +import GF.Printing.PrintParser + + +------------------------------------------------------------ +-- type definitions + +type Grammar n c t = [Rule n c t] +data Rule n c t = Rule c [Symbol c t] n + deriving (Eq, Ord, Show) + + +type CFParser n c t = PInfo n c t -> [c] -> Input t -> [Edge (Rule n c t)] +-- - - - - - - - - - - - - - - - - - ^^^ possible starting categories + + +------------------------------------------------------------ +-- parser information + +pInfo :: (Ord n, Ord c, Ord t) => Grammar n c t -> PInfo n c t + +data PInfo n c t + = PInfo { grammarTokens :: SList t, + nameRules :: Assoc n (SList (Rule n c t)), + topdownRules :: Assoc c (SList (Rule n c t)), + bottomupRules :: Assoc (Symbol c t) (SList (Rule n c t)), + emptyLeftcornerRules :: Assoc c (SList (Rule n c t)), + emptyCategories :: Set c, + cyclicCategories :: SList c, + -- ^^ONLY FOR DIRECT CYCLIC RULES!!! + leftcornerTokens :: Assoc c (SList t) + -- ^^DOES NOT WORK WITH EMPTY RULES!!! + } + +-- this is not permanent... +pInfo grammar = pInfo' (filter (not.isCyclic) grammar) + +pInfo' grammar = tracePrt "#parserInfo" prt $ + PInfo grToks nmRules tdRules buRules elcRules emptyCats cyclicCats leftToks + where grToks = union [ nubsort [ tok | Tok tok <- rhs ] | Rule _ rhs _ <- grammar ] + nmRules = accumAssoc id [ (name, rule) | rule@(Rule _ _ name) <- grammar ] + tdRules = accumAssoc id [ (cat, rule) | rule@(Rule cat _ _) <- grammar ] + buRules = accumAssoc id [ (next, rule) | rule@(Rule _ (next:_) _) <- grammar ] + elcRules = accumAssoc id $ limit lc emptyRules + leftToks = accumAssoc id $ limit lc $ + nubsort [ (cat, token) | Rule cat (Tok token:_) _ <- grammar ] + lc (left, res) = nubsort [ (cat, res) | Rule cat _ _ <- buRules ? Cat left ] + emptyRules = nubsort [ (cat, rule) | rule@(Rule cat [] _) <- grammar ] + emptyCats = listSet $ limitEmpties $ map fst emptyRules + limitEmpties es = if es==es' then es else limitEmpties es' + where es' = nubsort [ cat | Rule cat rhs _ <- grammar, + all (symbol (`elem` es) (const False)) rhs ] + cyclicCats = nubsort [ cat | Rule cat [Cat cat'] _ <- grammar, cat == cat' ] + +isCyclic (Rule cat [Cat cat'] _) = cat==cat' +isCyclic _ = False + +------------------------------------------------------------ +-- building parse charts + +edges2chart :: (Ord n, Ord c, Ord t) => Input t -> + [Edge (Rule n c t)] -> ParseChart n (Edge c) + +---------- + +edges2chart input edges + = accumAssoc id [ (Edge i k cat, (name, children i k rhs)) | + Edge i k (Rule cat rhs name) <- edges ] + where children i k [] = [ [] | i == k ] + children i k (Tok tok:rhs) = [ rest | i <= k, + j <- (inputFrom input ! i) ? tok, + rest <- children j k rhs ] + children i k (Cat cat:rhs) = [ Edge i j cat : rest | i <= k, + j <- echart ? (i, cat), + rest <- children j k rhs ] + echart = accumAssoc id [ ((i, cat), j) | Edge i j (Rule cat _ _) <- edges ] + + +------------------------------------------------------------ +-- grammar checking + +checkGrammar :: (Ord n, Ord c, Ord t, Print n, Print c, Print t) => + Grammar n c t -> [String] + +---------- + +checkGrammar rules = [ "rhs category does not exist: " ++ prt cat ++ "\n" ++ + " in rule: " ++ prt rule | + rule@(Rule _ rhs _) <- rules, + Cat cat <- rhs, cat `notElem` cats ] + where cats = nubsort [ cat | Rule cat _ _ <- rules ] + + +------------------------------------------------------------ +-- pretty-printing + +instance (Print n, Print c, Print t) => Print (Rule n c t) where + prt (Rule cat rhs name) = prt name ++ ". " ++ prt cat ++ " -> " ++ prt rhs ++ + (if null rhs then ".\n" else "\n") + prtList = concatMap prt + + +instance (Ord n, Ord c, Ord t) => Print (PInfo n c t) where + prt pI = "[ tokens=" ++ show (length (grammarTokens pI)) ++ + "; names=" ++ sla nameRules ++ + "; tdCats=" ++ sla topdownRules ++ + "; buCats=" ++ sla bottomupRules ++ + "; elcCats=" ++ sla emptyLeftcornerRules ++ + "; eCats=" ++ sla emptyCategories ++ + "; cCats=" ++ show (length (cyclicCategories pI)) ++ + -- "; lctokCats=" ++ sla leftcornerTokens ++ + " ]" + where sla f = show $ length $ aElems $ f pI + + diff --git a/src/GF/Parsing/ConvertGFCtoMCFG.hs b/src/GF/Parsing/ConvertGFCtoMCFG.hs new file mode 100644 index 000000000..224d1d6ab --- /dev/null +++ b/src/GF/Parsing/ConvertGFCtoMCFG.hs @@ -0,0 +1,34 @@ +---------------------------------------------------------------------- +-- | +-- Module : ConvertGFCtoMCFG +-- Maintainer : PL +-- Stability : (stable) +-- Portability : (portable) +-- +-- > CVS $Date: 2005/03/21 22:31:46 $ +-- > CVS $Author: peb $ +-- > CVS $Revision: 1.1 $ +-- +-- All different conversions from GFC to MCFG +----------------------------------------------------------------------------- + + +module GF.Parsing.ConvertGFCtoMCFG + (convertGrammar) where + +import GFC (CanonGrammar) +import GF.Parsing.GrammarTypes +import Ident (Ident(..)) +import Option +import Tracing + +import qualified GF.Parsing.ConvertGFCtoMCFG.Old as Old +import qualified GF.Parsing.ConvertGFCtoMCFG.Nondet as Nondet +import qualified GF.Parsing.ConvertGFCtoMCFG.Strict as Strict +import qualified GF.Parsing.ConvertGFCtoMCFG.Coercions as Coerce + +convertGrammar :: String -> (CanonGrammar, Ident) -> MCFGrammar +convertGrammar "nondet" = Coerce.addCoercions . Nondet.convertGrammar +convertGrammar "strict" = Strict.convertGrammar +convertGrammar "old" = Old.convertGrammar + diff --git a/src/GF/Parsing/ConvertGFCtoMCFG/Coercions.hs b/src/GF/Parsing/ConvertGFCtoMCFG/Coercions.hs new file mode 100644 index 000000000..a0bac995c --- /dev/null +++ b/src/GF/Parsing/ConvertGFCtoMCFG/Coercions.hs @@ -0,0 +1,70 @@ +---------------------------------------------------------------------- +-- | +-- Module : AddCoercions +-- Maintainer : PL +-- Stability : (stable) +-- Portability : (portable) +-- +-- > CVS $Date: 2005/03/21 22:31:53 $ +-- > CVS $Author: peb $ +-- > CVS $Revision: 1.1 $ +-- +----------------------------------------------------------------------------- + + +module GF.Parsing.ConvertGFCtoMCFG.Coercions (addCoercions) where + +import Tracing +import GF.Printing.PrintParser +import GF.Printing.PrintSimplifiedTerm +-- import PrintGFC +-- import qualified PrGrammar as PG + +import qualified Ident +import GF.Parsing.Utilities +import GF.Parsing.GrammarTypes +import GF.Parsing.MCFGrammar (Rule(..), Lin(..)) +import GF.Data.SortedList +import List (groupBy) -- , transpose) + +---------------------------------------------------------------------- + +addCoercions :: MCFGrammar -> MCFGrammar +addCoercions rules = coercions ++ rules + where (allHeads, allArgs) = unzip [ ((head, lbls), nubsort args) | + Rule head args lins _ <- rules, + let lbls = [ lbl | Lin lbl _ <- lins ] ] + allHeadSet = nubsort allHeads + allArgSet = union allArgs <\\> map fst allHeadSet + coercions = tracePrt "#coercions total" (prt . length) $ + concat $ + tracePrt "#coercions per cat" (prtList . map length) $ + combineCoercions + (groupBy sameCatFst allHeadSet) + (groupBy sameCat allArgSet) + sameCatFst a b = sameCat (fst a) (fst b) + + +combineCoercions [] _ = [] +combineCoercions _ [] = [] +combineCoercions allHeads'@(heads:allHeads) allArgs'@(args:allArgs) + = case compare (mainCat $ fst $ head heads) (mainCat $ head args) of + LT -> combineCoercions allHeads allArgs' + GT -> combineCoercions allHeads' allArgs + EQ -> makeCoercion heads args : combineCoercions allHeads allArgs + + +makeCoercion heads args = [ Rule arg [head] lins coercionName | + (head@(MCFCat _ headCns), lbls) <- heads, + let lins = [ Lin lbl [Cat (head, lbl, 0)] | lbl <- lbls ], + arg@(MCFCat _ argCns) <- args, + argCns `subset` headCns ] + + +coercionName = Ident.IW + +mainCat (MCFCat c _) = c + +sameCat mc1 mc2 = mainCat mc1 == mainCat mc2 + + diff --git a/src/GF/Parsing/ConvertGFCtoMCFG/Nondet.hs b/src/GF/Parsing/ConvertGFCtoMCFG/Nondet.hs new file mode 100644 index 000000000..34ce30ad1 --- /dev/null +++ b/src/GF/Parsing/ConvertGFCtoMCFG/Nondet.hs @@ -0,0 +1,281 @@ +---------------------------------------------------------------------- +-- | +-- Module : ConvertGFCtoMCFG.Nondet +-- Maintainer : PL +-- Stability : (stable) +-- Portability : (portable) +-- +-- > CVS $Date: 2005/03/21 22:31:53 $ +-- > CVS $Author: peb $ +-- > CVS $Revision: 1.1 $ +-- +-- Converting GFC grammars to MCFG grammars, nondeterministically. +-- +-- the resulting grammars might be /very large/ +-- +-- the conversion is only equivalent if the GFC grammar has a context-free backbone. +-- (also, the conversion might fail if the GFC grammar has dependent or higher-order types) +----------------------------------------------------------------------------- + + +module GF.Parsing.ConvertGFCtoMCFG.Nondet (convertGrammar) where + +import Tracing +import IOExts (unsafePerformIO) +import GF.Printing.PrintParser +import GF.Printing.PrintSimplifiedTerm +-- import PrintGFC +-- import qualified PrGrammar as PG + +import Monad +import Ident (Ident(..)) +import AbsGFC +import GFC +import Look +import Operations +import qualified Modules as M +import CMacros (defLinType) +import MkGFC (grammar2canon) +import GF.Parsing.Utilities +import GF.Parsing.GrammarTypes +import GF.Parsing.MCFGrammar (Grammar, Rule(..), Lin(..)) +import GF.Data.SortedList +-- import Maybe (listToMaybe) +import List (groupBy) -- , transpose) + +import GF.Data.BacktrackM + +---------------------------------------------------------------------- + +type GrammarEnv = (CanonGrammar, Ident) + +convertGrammar :: GrammarEnv -- ^ the canonical grammar, together with the selected language + -> MCFGrammar -- ^ the resulting MCF grammar +convertGrammar gram = trace2 "language" (prt (snd gram)) $ + trace2 "modules" (prtSep " " modnames) $ + tracePrt "#mcf-rules total" (prt . length) $ + solutions conversion gram undefined + where Gr modules = grammar2canon (fst gram) + modnames = uncurry M.allExtends gram + conversion = member modules >>= convertModule + convertModule (Mod (MTCnc modname _) _ _ _ defs) + | modname `elem` modnames = member defs >>= convertDef + convertModule _ = failure + +convertDef :: Def -> CnvMonad MCFRule +convertDef (CncDFun fun (CIQ _ cat) args term _) + | trace2 "converting function" (prt fun) True + = do let iCat : iArgs = map initialMCat (cat : map catOfArg args) + writeState (iCat, iArgs, []) + convertTerm cat term + (newCat, newArgs, linRec) <- readState + let newTerm = map (instLin newArgs) linRec + traceDot $ + return (Rule newCat newArgs newTerm fun) +convertDef _ = failure + +instLin newArgs (Lin lbl lin) = Lin lbl (map instSym lin) + where instSym = mapSymbol instCat id + instCat (_, lbl, arg) = (newArgs !! arg, lbl, arg) + +convertTerm :: Cat -> Term -> CnvMonad () +convertTerm cat term = do rterm <- simplifyTerm term + env <- readEnv + let ctype = lookupCType env cat + reduce ctype rterm emptyPath + +------------------------------------------------------------ + +type CnvMonad a = BacktrackM GrammarEnv CMRule a + +type CMRule = (MCFCat, [MCFCat], LinRec) +type LinRec = [Lin Cat Path Tokn] + +initialMCat :: Cat -> MCFCat +initialMCat cat = MCFCat cat [] + +---------------------------------------------------------------------- + +simplifyTerm :: Term -> CnvMonad STerm +simplifyTerm (Arg (A cat nr)) = return (SArg (fromInteger nr) cat emptyPath) +simplifyTerm (Con con terms) = liftM (SCon con) $ mapM simplifyTerm terms +simplifyTerm (R record) = liftM SRec $ mapM simplifyAssign record +simplifyTerm (P term lbl) = liftM (+. lbl) $ simplifyTerm term +simplifyTerm (T ct table) = liftM STbl $ sequence $ concatMap simplifyCase table +simplifyTerm (V ct terms) + = do env <- readEnv + liftM STbl $ sequence [ liftM ((,) pat) (simplifyTerm term) | + (pat, term) <- zip (groundTerms env ct) terms ] +simplifyTerm (S term sel) + = do sterm <- simplifyTerm term + ssel <- simplifyTerm sel + case sterm of + STbl table -> do (pat, val) <- member table + pat =?= ssel + return val + _ -> do sel' <- expandTerm ssel + return (sterm +! sel') +simplifyTerm (FV terms) = liftM SVariants $ mapM simplifyTerm terms +simplifyTerm (term1 `C` term2) = liftM2 (SConcat) (simplifyTerm term1) (simplifyTerm term2) +simplifyTerm (K tokn) = return $ SToken tokn +simplifyTerm (E) = return $ SEmpty +simplifyTerm x = error $ "simplifyTerm: " ++ show x +-- error constructors: +-- (I CIdent) - from resource +-- (LI Ident) - pattern variable +-- (EInt Integer) - integer + +simplifyAssign :: Assign -> CnvMonad (Label, STerm) +simplifyAssign (Ass lbl term) = liftM ((,) lbl) $ simplifyTerm term + +simplifyCase :: Case -> [CnvMonad (STerm, STerm)] +simplifyCase (Cas pats term) = [ liftM2 (,) (simplifyPattern pat) (simplifyTerm term) | + pat <- pats ] + +simplifyPattern :: Patt -> CnvMonad STerm +simplifyPattern (PC con pats) = liftM (SCon con) $ mapM simplifyPattern pats +simplifyPattern (PW) = return SWildcard +simplifyPattern (PR record) = do record' <- mapM simplifyPattAssign record + case filter (\row -> snd row /= SWildcard) record' of + [] -> return SWildcard + record'' -> return (SRec record') +simplifyPattern x = error $ "simplifyPattern: " ++ show x +-- error constructors: +-- (PV Ident) - pattern variable + +simplifyPattAssign :: PattAssign -> CnvMonad (Label, STerm) +simplifyPattAssign (PAss lbl pat) = liftM ((,) lbl) $ simplifyPattern pat + + +------------------------------------------------------------ +-- reducing simplified terms, collecting mcf rules + +reduce :: CType -> STerm -> Path -> CnvMonad () +reduce TStr term path = updateLin (path, term) +reduce (Cn _) term path + = do pat <- expandTerm term + updateHead (path, pat) +reduce ctype (SVariants terms) path + = do term <- member terms + reduce ctype term path +reduce (RecType rtype) term path + = sequence_ [ reduce ctype (term +. lbl) (path ++. lbl) | + Lbg lbl ctype <- rtype ] +reduce (Table _ ctype) (STbl table) path + = sequence_ [ reduce ctype term (path ++! pat) | + (pat, term) <- table ] +reduce (Table ptype vtype) arg@(SArg _ _ _) path + = do env <- readEnv + sequence_ [ reduce vtype (arg +! pat) (path ++! pat) | + pat <- groundTerms env ptype ] +reduce ctype term path = error ("reduce:\n ctype = (" ++ show ctype ++ + ")\n term = (" ++ show term ++ + ")\n path = (" ++ show path ++ ")\n") + + +------------------------------------------------------------ +-- expanding a term to ground terms + +expandTerm :: STerm -> CnvMonad STerm +expandTerm arg@(SArg _ _ _) + = do env <- readEnv + pat <- member $ groundTerms env $ cTypeForArg env arg + pat =?= arg + return pat +expandTerm (SCon con terms) = liftM (SCon con) $ mapM expandTerm terms +expandTerm (SRec record) = liftM SRec $ mapM expandAssign record +expandTerm (SVariants terms) = member terms >>= expandTerm +expandTerm term = error $ "expandTerm: " ++ show term + +expandAssign :: (Label, STerm) -> CnvMonad (Label, STerm) +expandAssign (lbl, term) = liftM ((,) lbl) $ expandTerm term + +------------------------------------------------------------ +-- unification of patterns and selection terms + +(=?=) :: STerm -> STerm -> CnvMonad () +SWildcard =?= _ = return () +SRec precord =?= arg@(SArg _ _ _) = sequence_ [ pat =?= (arg +. lbl) | + (lbl, pat) <- precord ] +pat =?= SArg arg _ path = updateArg arg (path, pat) +SCon con pats =?= SCon con' terms = do guard (con==con' && length pats==length terms) + sequence_ $ zipWith (=?=) pats terms +SRec precord =?= SRec record = sequence_ [ maybe mzero (pat =?=) mterm | + (lbl, pat) <- precord, + let mterm = lookup lbl record ] +pat =?= term = error $ "(=?=): " ++ show pat ++ " =?= " ++ show term + + +------------------------------------------------------------ +-- updating the mcf rule + +updateArg :: Int -> Constraint -> CnvMonad () +updateArg arg cn + = do (head, args, lins) <- readState + args' <- updateNth (addToMCFCat cn) arg args + writeState (head, args', lins) + +updateHead :: Constraint -> CnvMonad () +updateHead cn + = do (head, args, lins) <- readState + head' <- addToMCFCat cn head + writeState (head', args, lins) + +updateLin :: Constraint -> CnvMonad () +updateLin (path, term) + = do let newLins = term2lins term + (head, args, lins) <- readState + let lins' = lins ++ map (Lin path) newLins + writeState (head, args, lins') + +term2lins :: STerm -> [[Symbol (Cat, Path, Int) Tokn]] +term2lins (SArg arg cat path) = return [Cat (cat, path, arg)] +term2lins (SToken str) = return [Tok str] +term2lins (SConcat t1 t2) = liftM2 (++) (term2lins t1) (term2lins t2) +term2lins (SEmpty) = return [] +term2lins (SVariants terms) = terms >>= term2lins +term2lins term = error $ "term2lins: " ++ show term + +addToMCFCat :: Constraint -> MCFCat -> CnvMonad MCFCat +addToMCFCat cn (MCFCat cat cns) = liftM (MCFCat cat) $ addConstraint cn cns + +addConstraint :: Constraint -> [Constraint] -> CnvMonad [Constraint] +addConstraint cn0 (cn : cns) + | fst cn0 > fst cn = liftM (cn:) (addConstraint cn0 cns) + | fst cn0 == fst cn = guard (snd cn0 == snd cn) >> + return (cn : cns) +addConstraint cn0 cns = return (cn0 : cns) + + +---------------------------------------------------------------------- +-- utilities + +updateNth :: Monad m => (a -> m a) -> Int -> [a] -> m [a] +updateNth update 0 (a : as) = liftM (:as) (update a) +updateNth update n (a : as) = liftM (a:) (updateNth update (n-1) as) + +catOfArg (A aCat _) = aCat +catOfArg (AB aCat _ _) = aCat + +lookupCType :: GrammarEnv -> Cat -> CType +lookupCType env cat = errVal defLinType $ + lookupLincat (fst env) (CIQ (snd env) cat) + +groundTerms :: GrammarEnv -> CType -> [STerm] +groundTerms env ctype = err error (map term2spattern) $ + allParamValues (fst env) ctype + +cTypeForArg :: GrammarEnv -> STerm -> CType +cTypeForArg env (SArg nr cat (Path path)) + = follow path $ lookupCType env cat + where follow [] ctype = ctype + follow (Right pat : path) (Table _ ctype) = follow path ctype + follow (Left lbl : path) (RecType rec) + = case [ ctype | Lbg lbl' ctype <- rec, lbl == lbl' ] of + [ctype] -> follow path ctype + err -> error $ "follow: " ++ show rec ++ " . " ++ show lbl ++ + " results in " ++ show err + +term2spattern (R rec) = SRec [ (lbl, term2spattern term) | Ass lbl term <- rec ] +term2spattern (Con con terms) = SCon con $ map term2spattern terms + diff --git a/src/GF/Parsing/ConvertGFCtoMCFG/Old.hs b/src/GF/Parsing/ConvertGFCtoMCFG/Old.hs new file mode 100644 index 000000000..90044fa0d --- /dev/null +++ b/src/GF/Parsing/ConvertGFCtoMCFG/Old.hs @@ -0,0 +1,277 @@ +---------------------------------------------------------------------- +-- | +-- Module : ConvertGFCtoMCFG +-- Maintainer : PL +-- Stability : (stable) +-- Portability : (portable) +-- +-- > CVS $Date: 2005/03/21 22:31:53 $ +-- > CVS $Author: peb $ +-- > CVS $Revision: 1.1 $ +-- +-- Converting GFC grammars to MCFG grammars. +-- +-- the resulting grammars might be /very large/ +-- +-- the conversion is only equivalent if the GFC grammar has a context-free backbone. +-- (also, the conversion might fail if the GFC grammar has dependent or higher-order types) +----------------------------------------------------------------------------- + + +module GF.Parsing.ConvertGFCtoMCFG.Old (convertGrammar) where + +import Tracing +import GF.Printing.PrintParser +import GF.Printing.PrintSimplifiedTerm +--import PrintGFC +import qualified PrGrammar as PG + +import Monad (liftM, liftM2, guard) +-- import Maybe (listToMaybe) +import Ident (Ident(..)) +import AbsGFC +import GFC +import Look +import Operations +import qualified Modules as M +import CMacros (defLinType) +import MkGFC (grammar2canon) +import GF.Parsing.Utilities +import GF.Parsing.GrammarTypes +import GF.Parsing.MCFGrammar (Rule(..), Lin(..)) +import SortedList (nubsort, groupPairs) +import Maybe (listToMaybe) +import List (groupBy, transpose) + +---------------------------------------------------------------------- +-- old style types + +data XMCFCat = XMCFCat Cat [(XPath, Term)] deriving (Eq, Ord, Show) +type XMCFLabel = XPath + +cnvXMCFCat :: XMCFCat -> MCFCat +cnvXMCFCat (XMCFCat cat constrs) = MCFCat cat [ (cnvXPath path, cnvTerm term) | + (path, term) <- constrs ] + +cnvXMCFLabel :: XMCFLabel -> MCFLabel +cnvXMCFLabel = cnvXPath + +cnvXMCFLin :: Lin XMCFCat XMCFLabel Tokn -> Lin MCFCat MCFLabel Tokn +cnvXMCFLin (Lin lbl lin) = Lin (cnvXMCFLabel lbl) $ + map (mapSymbol cnvSym id) lin + where cnvSym (cat, lbl, nr) = (cnvXMCFCat cat, cnvXMCFLabel lbl, nr) + +-- Term -> STerm + +cnvTerm (R rec) = SRec [ (lbl, cnvTerm term) | Ass lbl term <- rec ] +cnvTerm (T _ tbl) = STbl [ (cnvPattern pat, cnvTerm term) | + Cas pats term <- tbl, pat <- pats ] +cnvTerm (Con con terms) = SCon con $ map cnvTerm terms +cnvTerm term + | isArgPath term = cnvArgPath term + +cnvPattern (PR rec) = SRec [ (lbl, cnvPattern term) | PAss lbl term <- rec ] +cnvPattern (PC con pats) = SCon con $ map cnvPattern pats +cnvPattern (PW) = SWildcard + +isArgPath (Arg _) = True +isArgPath (P _ _) = True +isArgPath (S _ _) = True +isArgPath _ = False + +cnvArgPath (Arg (A cat nr)) = SArg (fromInteger nr) cat emptyPath +cnvArgPath (term `P` lbl) = cnvArgPath term +. lbl +cnvArgPath (term `S` sel) = cnvArgPath term +! cnvTerm sel + +-- old style paths + +newtype XPath = XPath [Either Label Term] deriving (Eq, Ord, Show) + +cnvXPath :: XPath -> Path +cnvXPath (XPath path) = Path (map (either Left (Right . cnvTerm)) (reverse path)) + +emptyXPath :: XPath +emptyXPath = XPath [] + +(++..) :: XPath -> Label -> XPath +XPath path ++.. lbl = XPath (Left lbl : path) + +(++!!) :: XPath -> Term -> XPath +XPath path ++!! sel = XPath (Right sel : path) + +---------------------------------------------------------------------- + +-- | combining alg. 1 and alg. 2 from Ljunglöf's PhD thesis +convertGrammar :: (CanonGrammar, Ident) -> MCFGrammar +convertGrammar (gram, lng) = trace2 "language" (prt lng) $ + trace2 "modules" (prtSep " " modnames) $ + trace2 "#lin-terms" (prt (length cncdefs)) $ + tracePrt "#mcf-rules total" (prt.length) $ + concat $ + tracePrt "#mcf-rules per fun" + (\rs -> concat [" "++show n++"="++show (length r) | + (n, r) <- zip [1..] rs]) $ + map (convertDef gram lng) cncdefs + where Gr mods = grammar2canon gram + cncdefs = [ def | Mod (MTCnc modname _) _ _ _ defs <- mods, + modname `elem` modnames, + def@(CncDFun _ _ _ _ _) <- defs ] + modnames = M.allExtends gram lng + + +convertDef :: CanonGrammar -> Ident -> Def -> [MCFRule] +convertDef gram lng (CncDFun fun (CIQ _ cat) args term _) + = [ Rule (cnvXMCFCat newCat) (map cnvXMCFCat newArgs) (map cnvXMCFLin newTerm) fun | + let ctype = lookupCType gram lng cat, + instArgs <- mapM (enumerateInsts gram lng) args, + let instTerm = substitutePaths gram lng instArgs term, + newCat <- emcfCat gram lng cat instTerm, + newArgs <- mapM (extractArg gram lng instArgs) args, + let newTerm = concatMap (extractLin newArgs) $ strPaths gram lng ctype instTerm + ] + + +-- gammalt skräp: +-- mergeArgs = zipWith mergeRec +-- mergeRec (R r1) (R r2) = R (r1 ++ r2) + +extractArg :: CanonGrammar -> Ident -> [Term] -> ArgVar -> [XMCFCat] +extractArg gram lng args (A cat nr) = emcfCat gram lng cat (args !!! nr) + + +emcfCat :: CanonGrammar -> Ident -> Ident -> Term -> [XMCFCat] +emcfCat gram lng cat = map (XMCFCat cat) . parPaths gram lng (lookupCType gram lng cat) + + +extractLin :: [XMCFCat] -> (XPath, Term) -> [Lin XMCFCat XMCFLabel Tokn] +extractLin args (path, term) = map (Lin path) (convertLin term) + where convertLin (t1 `C` t2) = liftM2 (++) (convertLin t1) (convertLin t2) + convertLin (E) = [[]] + convertLin (K tok) = [[Tok tok]] + convertLin (FV terms) = concatMap convertLin terms + convertLin term = map (return . Cat) $ flattenTerm emptyXPath term + flattenTerm path (Arg (A _ nr)) = [(args !!! nr, path, fromInteger nr)] + flattenTerm path (term `P` lbl) = flattenTerm (path ++.. lbl) term + flattenTerm path (term `S` sel) = flattenTerm (path ++!! sel) term + flattenTerm path (FV terms) = concatMap (flattenTerm path) terms + flattenTerm path term = error $ "flattenTerm: \n " ++ show path ++ "\n " ++ prt term + + +enumerateInsts :: CanonGrammar -> Ident -> ArgVar -> [Term] +enumerateInsts gram lng arg@(A argCat _) = enumerate (Arg arg) (lookupCType gram lng argCat) + where enumerate path (TStr) = [ path ] + enumerate path (Cn con) = okError $ lookupParamValues gram con + enumerate path (RecType r) + = map R $ sequence [ map (lbl `Ass`) $ + enumerate (path `P` lbl) ctype | + lbl `Lbg` ctype <- r ] + enumerate path (Table s t) + = map (T s) $ sequence [ map ([term2pattern sel] `Cas`) $ + enumerate (path `S` sel) t | + sel <- enumerate (error "enumerate") s ] + + + +termPaths :: CanonGrammar -> Ident -> CType -> Term -> [(XPath, (CType, Term))] +termPaths gr l (TStr) term = [ (emptyXPath, (TStr, term)) ] +termPaths gr l (RecType rtype) (R record) + = [ (path ++.. lbl, value) | + lbl `Ass` term <- record, + let ctype = okError $ maybeErr "termPaths/record" $ lookupLabelling lbl rtype, + (path, value) <- termPaths gr l ctype term ] +termPaths gr l (Table _ ctype) (T _ table) + = [ (path ++!! pattern2term pat, value) | + pats `Cas` term <- table, pat <- pats, + (path, value) <- termPaths gr l ctype term ] +termPaths gr l (Table _ ctype) (V ptype table) + = [ (path ++!! pat, value) | + (pat, term) <- zip (okError $ allParamValues gr ptype) table, + (path, value) <- termPaths gr l ctype term ] +termPaths gr l ctype (FV terms) + = concatMap (termPaths gr l ctype) terms +termPaths gr l (Cn pc) term = [ (emptyXPath, (Cn pc, term)) ] + +{- ^^^ variants are pushed inside (not equivalent -- but see record-variants.txt): +{a=a1; b=b1} | {a=a2; b=b2} ==> {a=a1|a2; b=b1|b2} +[p=>p1;q=>q1] | [p=>p2;q=>q2] ==> [p=>p1|p2;q=>q1|q2] +-} + +parPaths :: CanonGrammar -> Ident -> CType -> Term -> [[(XPath, Term)]] +parPaths gr l ctype term = mapM (uncurry (map . (,))) (groupPairs paths) + where paths = nubsort [ (path, value) | (path, (Cn _, value)) <- termPaths gr l ctype term ] + +strPaths :: CanonGrammar -> Ident -> CType -> Term -> [(XPath, Term)] +strPaths gr l ctype term = [ (path, evalFV values) | (path, values) <- groupPairs paths ] + where paths = nubsort [ (path, value) | (path, (TStr, value)) <- termPaths gr l ctype term ] + + +-- Substitute each instantiated parameter path for its instantiation +substitutePaths :: CanonGrammar -> Ident -> [Term] -> Term -> Term +substitutePaths gr l arguments trm = subst trm + where subst (con `Con` terms) = con `Con` map subst terms + subst (R record) = R $ map substAss record + subst (term `P` lbl) = subst term `evalP` lbl + subst (T ptype table) = T ptype $ map substCas table + subst (V ptype table) = T ptype [ [term2pattern pat] `Cas` subst term | + (pat, term) <- zip (okError $ allParamValues gr ptype) table ] + subst (term `S` select) = subst term `evalS` subst select + subst (term `C` term') = subst term `C` subst term' + subst (FV terms) = evalFV $ map subst terms + subst (Arg (A _ arg)) = arguments !!! arg + subst term = term + + substAss (l `Ass` term) = l `Ass` subst term + substCas (p `Cas` term) = p `Cas` subst term + + +evalP (R record) lbl = okError $ maybeErr errStr $ lookupAssign lbl record + where errStr = "evalP: " ++ prt (R record `P` lbl) +evalP (FV terms) lbl = evalFV [ evalP term lbl | term <- terms ] +evalP term lbl = term `P` lbl + +evalS t@(T _ tbl) sel = maybe (t `S` sel) id $ lookupCase sel tbl +evalS (FV terms) sel = evalFV [ term `evalS` sel | term <- terms ] +evalS term (FV sels)= evalFV [ term `evalS` sel | sel <- sels ] +evalS term sel = term `S` sel + +evalFV terms0 = case nubsort (concatMap flattenFV terms0) of + [term] -> term + terms -> FV terms + where flattenFV (FV ts) = ts + flattenFV t = [t] + + +---------------------------------------------------------------------- +-- utilities + +-- lookup a CType for an Ident +lookupCType :: CanonGrammar -> Ident -> Ident -> CType +lookupCType gr lng c = errVal defLinType $ lookupLincat gr (CIQ lng c) + +-- lookup a label in a (record / record ctype / table) +lookupAssign :: Label -> [Assign] -> Maybe Term +lookupLabelling :: Label -> [Labelling] -> Maybe CType +lookupCase :: Term -> [Case] -> Maybe Term + +lookupAssign lbl rec = listToMaybe [ term | lbl' `Ass` term <- rec, lbl == lbl' ] +lookupLabelling lbl rtyp = listToMaybe [ ctyp | lbl' `Lbg` ctyp <- rtyp, lbl == lbl' ] +lookupCase sel tbl = listToMaybe [ term | pats `Cas` term <- tbl, sel `matchesPats` pats ] + +matchesPats :: Term -> [Patt] -> Bool +matchesPats term patterns = or [ term == pattern2term pattern | pattern <- patterns ] + +-- converting between patterns and terms +pattern2term :: Patt -> Term +term2pattern :: Term -> Patt + +pattern2term (con `PC` patterns) = con `Con` map pattern2term patterns +pattern2term (PR record) = R [ lbl `Ass` pattern2term pattern | + lbl `PAss` pattern <- record ] + +term2pattern (con `Con` terms) = con `PC` map term2pattern terms +term2pattern (R record) = PR [ lbl `PAss` term2pattern term | + lbl `Ass` term <- record ] + +-- list lookup for Integers instead of Ints +(!!!) :: [a] -> Integer -> a +xs !!! n = xs !! fromInteger n diff --git a/src/GF/Parsing/ConvertGFCtoMCFG/Strict.hs b/src/GF/Parsing/ConvertGFCtoMCFG/Strict.hs new file mode 100644 index 000000000..de3ad7d5f --- /dev/null +++ b/src/GF/Parsing/ConvertGFCtoMCFG/Strict.hs @@ -0,0 +1,195 @@ +---------------------------------------------------------------------- +-- | +-- Module : ConvertGFCtoMCFG.Strict +-- Maintainer : PL +-- Stability : (stable) +-- Portability : (portable) +-- +-- > CVS $Date: 2005/03/21 22:31:54 $ +-- > CVS $Author: peb $ +-- > CVS $Revision: 1.1 $ +-- +-- Converting GFC grammars to MCFG grammars, nondeterministically. +-- +-- the resulting grammars might be /very large/ +-- +-- the conversion is only equivalent if the GFC grammar has a context-free backbone. +-- (also, the conversion might fail if the GFC grammar has dependent or higher-order types) +----------------------------------------------------------------------------- + + +module GF.Parsing.ConvertGFCtoMCFG.Strict (convertGrammar) where + +import Tracing +import IOExts (unsafePerformIO) +import GF.Printing.PrintParser +import GF.Printing.PrintSimplifiedTerm +-- import PrintGFC +-- import qualified PrGrammar as PG + +import Monad +import Ident (Ident(..)) +import AbsGFC +import GFC +import Look +import Operations +import qualified Modules as M +import CMacros (defLinType) +import MkGFC (grammar2canon) +import GF.Parsing.Utilities +import GF.Parsing.GrammarTypes +import GF.Parsing.MCFGrammar (Grammar, Rule(..), Lin(..)) +import GF.Data.SortedList +-- import Maybe (listToMaybe) +import List (groupBy) -- , transpose) + +import GF.Data.BacktrackM + +---------------------------------------------------------------------- + +type GrammarEnv = (CanonGrammar, Ident) + +convertGrammar :: GrammarEnv -- ^ the canonical grammar, together with the selected language + -> MCFGrammar -- ^ the resulting MCF grammar +convertGrammar gram = trace2 "language" (prt (snd gram)) $ + trace2 "modules" (prtSep " " modnames) $ + tracePrt "#mcf-rules total" (prt . length) $ + solutions conversion gram undefined + where Gr modules = grammar2canon (fst gram) + modnames = uncurry M.allExtends gram + conversion = member modules >>= convertModule + convertModule (Mod (MTCnc modname _) _ _ _ defs) + | modname `elem` modnames = member defs >>= convertDef + convertModule _ = failure + +convertDef :: Def -> CnvMonad MCFRule +convertDef (CncDFun fun (CIQ _ cat) args term _) + | trace2 "converting function" (prt fun) True + = do env <- readEnv + let ctype = lookupCType env cat + instArgs <- mapM enumerateArg args + let instTerm = substitutePaths env instArgs term + newCat <- emcfCat cat instTerm + newArgs <- mapM (extractArg instArgs) args + let newTerm = strPaths env ctype instTerm >>= extractLin newArgs + traceDot $ + return (Rule newCat newArgs newTerm fun) +convertDef _ = failure + +------------------------------------------------------------ + +type CnvMonad a = BacktrackM GrammarEnv () a + +---------------------------------------------------------------------- +-- strict conversion + +extractArg :: [STerm] -> ArgVar -> CnvMonad MCFCat +extractArg args (A cat nr) = emcfCat cat (args !! fromInteger nr) + +emcfCat :: Cat -> STerm -> CnvMonad MCFCat +emcfCat cat term = do env <- readEnv + member $ map (MCFCat cat) $ parPaths env (lookupCType env cat) term + +enumerateArg :: ArgVar -> CnvMonad STerm +enumerateArg (A cat nr) = do env <- readEnv + let ctype = lookupCType env cat + enumerate (SArg (fromInteger nr) cat emptyPath) ctype + where enumerate arg (TStr) = return arg + enumerate arg ctype@(Cn _) = do env <- readEnv + member $ groundTerms env ctype + enumerate arg (RecType rtype) + = liftM SRec $ sequence [ liftM ((,) lbl) $ + enumerate (arg +. lbl) ctype | + lbl `Lbg` ctype <- rtype ] + enumerate arg (Table stype ctype) + = do env <- readEnv + state <- readState + liftM STbl $ sequence [ liftM ((,) sel) $ + enumerate (arg +! sel) ctype | + sel <- solutions (enumerate err stype) env state ] + where err = error "enumerate: parameter type should not be string" + +-- Substitute each instantiated parameter path for its instantiation +substitutePaths :: GrammarEnv -> [STerm] -> Term -> STerm +substitutePaths env arguments trm = subst trm + where subst (con `Con` terms) = con `SCon` map subst terms + subst (R record) = SRec [ (lbl, subst term) | lbl `Ass` term <- record ] + subst (term `P` lbl) = subst term +. lbl + subst (T ptype table) = STbl [ (pattern2sterm pat, subst term) | + pats `Cas` term <- table, pat <- pats ] + subst (V ptype table) = STbl [ (pat, subst term) | + (pat, term) <- zip (groundTerms env ptype) table ] + subst (term `S` select) = subst term +! subst select + subst (term `C` term') = subst term `SConcat` subst term' + subst (K str) = SToken str + subst (E) = SEmpty + subst (FV terms) = evalFV $ map subst terms + subst (Arg (A _ arg)) = arguments !! fromInteger arg + + +termPaths :: GrammarEnv -> CType -> STerm -> [(Path, (CType, STerm))] +termPaths env (TStr) term = [ (emptyPath, (TStr, term)) ] +termPaths env (RecType rtype) (SRec record) + = [ (path ++. lbl, value) | + (lbl, term) <- record, + let ctype = lookupLabelling lbl rtype, + (path, value) <- termPaths env ctype term ] +termPaths env (Table _ ctype) (STbl table) + = [ (path ++! pat, value) | + (pat, term) <- table, + (path, value) <- termPaths env ctype term ] +termPaths env ctype (SVariants terms) + = terms >>= termPaths env ctype +termPaths env (Cn pc) term = [ (emptyPath, (Cn pc, term)) ] + +{- ^^^ variants are pushed inside (not equivalent -- but see record-variants.txt): +{a=a1; b=b1} | {a=a2; b=b2} ==> {a=a1|a2; b=b1|b2} +[p=>p1;q=>q1] | [p=>p2;q=>q2] ==> [p=>p1|p2;q=>q1|q2] +-} + +parPaths :: GrammarEnv -> CType -> STerm -> [[(Path, STerm)]] +parPaths env ctype term = mapM (uncurry (map . (,))) (groupPairs paths) + where paths = nubsort [ (path, value) | (path, (Cn _, value)) <- termPaths env ctype term ] + +strPaths :: GrammarEnv -> CType -> STerm -> [(Path, STerm)] +strPaths env ctype term = [ (path, evalFV values) | (path, values) <- groupPairs paths ] + where paths = nubsort [ (path, value) | (path, (TStr, value)) <- termPaths env ctype term ] + +extractLin :: [MCFCat] -> (Path, STerm) -> [Lin MCFCat MCFLabel Tokn] +extractLin args (path, term) = map (Lin path) (convertLin term) + where convertLin (t1 `SConcat` t2) = liftM2 (++) (convertLin t1) (convertLin t2) + convertLin (SEmpty) = [[]] + convertLin (SToken tok) = [[Tok tok]] + convertLin (SVariants terms) = concatMap convertLin terms + convertLin (SArg nr _ path) = [[Cat (args !! nr, path, nr)]] + +evalFV terms0 = case nubsort (concatMap flattenFV terms0) of + [term] -> term + terms -> SVariants terms + where flattenFV (SVariants ts) = ts + flattenFV t = [t] + +---------------------------------------------------------------------- +-- utilities + +lookupCType :: GrammarEnv -> Cat -> CType +lookupCType env cat = errVal defLinType $ + lookupLincat (fst env) (CIQ (snd env) cat) + +lookupLabelling :: Label -> [Labelling] -> CType +lookupLabelling lbl rtyp = case [ ctyp | lbl' `Lbg` ctyp <- rtyp, lbl == lbl' ] of + [ctyp] -> ctyp + err -> error $ "lookupLabelling:" ++ show err + +groundTerms :: GrammarEnv -> CType -> [STerm] +groundTerms env ctype = err error (map term2spattern) $ + allParamValues (fst env) ctype + +term2spattern (R rec) = SRec [ (lbl, term2spattern term) | Ass lbl term <- rec ] +term2spattern (Con con terms) = SCon con $ map term2spattern terms + +pattern2sterm :: Patt -> STerm +pattern2sterm (con `PC` patterns) = con `SCon` map pattern2sterm patterns +pattern2sterm (PR record) = SRec [ (lbl, pattern2sterm pattern) | + lbl `PAss` pattern <- record ] + diff --git a/src/GF/Parsing/ConvertGFCtoMCFG/Utils.hs b/src/GF/Parsing/ConvertGFCtoMCFG/Utils.hs new file mode 100644 index 000000000..4fd91e894 --- /dev/null +++ b/src/GF/Parsing/ConvertGFCtoMCFG/Utils.hs @@ -0,0 +1,237 @@ +---------------------------------------------------------------------- +-- | +-- Module : ConvertGFCtoMCFGnondet +-- Maintainer : PL +-- Stability : (stable) +-- Portability : (portable) +-- +-- > CVS $Date: 2005/03/21 22:31:54 $ +-- > CVS $Author: peb $ +-- > CVS $Revision: 1.1 $ +-- +-- Converting GFC grammars to MCFG grammars, nondeterministically. +-- +-- the resulting grammars might be /very large/ +-- +-- the conversion is only equivalent if the GFC grammar has a context-free backbone. +-- (also, the conversion might fail if the GFC grammar has dependent or higher-order types) +----------------------------------------------------------------------------- + + +module GF.Conversion.ConvertGFCtoMCFG.Utils where + +import Tracing +import IOExts (unsafePerformIO) +import GF.Printing.PrintParser +import GF.Printing.PrintSimplifiedTerm +-- import PrintGFC +-- import qualified PrGrammar as PG + +import Monad +import Ident (Ident(..)) +import AbsGFC +import GFC +import Look +import Operations +import qualified Modules as M +import CMacros (defLinType) +import MkGFC (grammar2canon) +import GF.Parsing.Parser +import GF.Parsing.GrammarTypes +import GF.Parsing.MCFGrammar (Grammar, Rule(..), Lin(..)) +import GF.Data.SortedList +-- import Maybe (listToMaybe) +import List (groupBy) -- , transpose) + +import GF.Data.BacktrackM + +---------------------------------------------------------------------- + +type GrammarEnv = (CanonGrammar, Ident) + +buildConversion :: (Def -> BacktrackM GrammarEnv state MCFRule) + -> GrammarEnv -> MCFGrammar +buildConversion cnvDef env = trace2 "language" (prt (snd gram)) $ + trace2 "modules" (prtSep " " modnames) $ + tracePrt "#mcf-rules total" (prt . length) $ + solutions conversion env undefined + where Gr modules = grammar2canon (fst gram) + modnames = uncurry M.allExtends gram + conversion = member modules >>= convertModule + convertModule (Mod (MTCnc modname _) _ _ _ defs) + | modname `elem` modnames = member defs >>= cnvDef cnvtype + convertModule _ = failure + + +---------------------------------------------------------------------- +-- strict conversion + +extractArg :: [STerm] -> ArgVar -> CnvMonad MCFCat +extractArg args (A cat nr) = emcfCat cat (args !! fromInteger nr) + +emcfCat :: Cat -> STerm -> CnvMonad MCFCat +emcfCat cat term = do env <- readEnv + member $ map (MCFCat cat) $ parPaths env (lookupCType env cat) term + +enumerateArg :: ArgVar -> CnvMonad STerm +enumerateArg (A cat nr) = do env <- readEnv + let ctype = lookupCType env cat + enumerate (SArg (fromInteger nr) cat emptyPath) ctype + where enumerate arg (TStr) = return arg + enumerate arg ctype@(Cn _) = do env <- readEnv + member $ groundTerms env ctype + enumerate arg (RecType rtype) + = liftM SRec $ sequence [ liftM ((,) lbl) $ + enumerate (arg +. lbl) ctype | + lbl `Lbg` ctype <- rtype ] + enumerate arg (Table stype ctype) + = do env <- readEnv + state <- readState + liftM STbl $ sequence [ liftM ((,) sel) $ + enumerate (arg +! sel) ctype | + sel <- solutions (enumerate err stype) env state ] + where err = error "enumerate: parameter type should not be string" + +-- Substitute each instantiated parameter path for its instantiation +substitutePaths :: GrammarEnv -> [STerm] -> Term -> STerm +substitutePaths env arguments trm = subst trm + where subst (con `Con` terms) = con `SCon` map subst terms + subst (R record) = SRec [ (lbl, subst term) | lbl `Ass` term <- record ] + subst (term `P` lbl) = subst term +. lbl + subst (T ptype table) = STbl [ (pattern2sterm pat, subst term) | + pats `Cas` term <- table, pat <- pats ] + subst (V ptype table) = STbl [ (pat, subst term) | + (pat, term) <- zip (groundTerms env ptype) table ] + subst (term `S` select) = subst term +! subst select + subst (term `C` term') = subst term `SConcat` subst term' + subst (K str) = SToken str + subst (E) = SEmpty + subst (FV terms) = evalFV $ map subst terms + subst (Arg (A _ arg)) = arguments !! fromInteger arg + + +termPaths :: GrammarEnv -> CType -> STerm -> [(Path, (CType, STerm))] +termPaths env (TStr) term = [ (emptyPath, (TStr, term)) ] +termPaths env (RecType rtype) (SRec record) + = [ (path ++. lbl, value) | + (lbl, term) <- record, + let ctype = lookupLabelling lbl rtype, + (path, value) <- termPaths env ctype term ] +termPaths env (Table _ ctype) (STbl table) + = [ (path ++! pat, value) | + (pat, term) <- table, + (path, value) <- termPaths env ctype term ] +termPaths env ctype (SVariants terms) + = terms >>= termPaths env ctype +termPaths env (Cn pc) term = [ (emptyPath, (Cn pc, term)) ] + +{- ^^^ variants are pushed inside (not equivalent -- but see record-variants.txt): +{a=a1; b=b1} | {a=a2; b=b2} ==> {a=a1|a2; b=b1|b2} +[p=>p1;q=>q1] | [p=>p2;q=>q2] ==> [p=>p1|p2;q=>q1|q2] +-} + +parPaths :: GrammarEnv -> CType -> STerm -> [[(Path, STerm)]] +parPaths env ctype term = mapM (uncurry (map . (,))) (groupPairs paths) + where paths = nubsort [ (path, value) | (path, (Cn _, value)) <- termPaths env ctype term ] + +strPaths :: GrammarEnv -> CType -> STerm -> [(Path, STerm)] +strPaths env ctype term = [ (path, evalFV values) | (path, values) <- groupPairs paths ] + where paths = nubsort [ (path, value) | (path, (TStr, value)) <- termPaths env ctype term ] + +extractLin :: [MCFCat] -> (Path, STerm) -> [Lin MCFCat MCFLabel Tokn] +extractLin args (path, term) = map (Lin path) (convertLin term) + where convertLin (t1 `SConcat` t2) = liftM2 (++) (convertLin t1) (convertLin t2) + convertLin (SEmpty) = [[]] + convertLin (SToken tok) = [[Tok tok]] + convertLin (SVariants terms) = concatMap convertLin terms + convertLin (SArg nr _ path) = [[Cat (args !! nr, path, nr)]] + +evalFV terms0 = case nubsort (concatMap flattenFV terms0) of + [term] -> term + terms -> SVariants terms + where flattenFV (SVariants ts) = ts + flattenFV t = [t] + +lookupLabelling :: Label -> [Labelling] -> CType +lookupLabelling lbl rtyp = case [ ctyp | lbl' `Lbg` ctyp <- rtyp, lbl == lbl' ] of + [ctyp] -> ctyp + err -> error $ "lookupLabelling:" ++ show err + +pattern2sterm :: Patt -> STerm +pattern2sterm (con `PC` patterns) = con `SCon` map pattern2sterm patterns +pattern2sterm (PR record) = SRec [ (lbl, pattern2sterm pattern) | + lbl `PAss` pattern <- record ] + +------------------------------------------------------------ +-- updating the mcf rule + +updateArg :: Int -> Constraint -> CnvMonad () +updateArg arg cn + = do (head, args, lins) <- readState + args' <- updateNth (addToMCFCat cn) arg args + writeState (head, args', lins) + +updateHead :: Constraint -> CnvMonad () +updateHead cn + = do (head, args, lins) <- readState + head' <- addToMCFCat cn head + writeState (head', args, lins) + +updateLin :: Constraint -> CnvMonad () +updateLin (path, term) + = do let newLins = term2lins term + (head, args, lins) <- readState + let lins' = lins ++ map (Lin path) newLins + writeState (head, args, lins') + +term2lins :: STerm -> [[Symbol (Cat, Path, Int) Tokn]] +term2lins (SArg arg cat path) = return [Cat (cat, path, arg)] +term2lins (SToken str) = return [Tok str] +term2lins (SConcat t1 t2) = liftM2 (++) (term2lins t1) (term2lins t2) +term2lins (SEmpty) = return [] +term2lins (SVariants terms) = terms >>= term2lins +term2lins term = error $ "term2lins: " ++ show term + +addToMCFCat :: Constraint -> MCFCat -> CnvMonad MCFCat +addToMCFCat cn (MCFCat cat cns) = liftM (MCFCat cat) $ addConstraint cn cns + +addConstraint :: Constraint -> [Constraint] -> CnvMonad [Constraint] +addConstraint cn0 (cn : cns) + | fst cn0 > fst cn = liftM (cn:) (addConstraint cn0 cns) + | fst cn0 == fst cn = guard (snd cn0 == snd cn) >> + return (cn : cns) +addConstraint cn0 cns = return (cn0 : cns) + + +---------------------------------------------------------------------- +-- utilities + +updateNth :: Monad m => (a -> m a) -> Int -> [a] -> m [a] +updateNth update 0 (a : as) = liftM (:as) (update a) +updateNth update n (a : as) = liftM (a:) (updateNth update (n-1) as) + +catOfArg (A aCat _) = aCat +catOfArg (AB aCat _ _) = aCat + +lookupCType :: GrammarEnv -> Cat -> CType +lookupCType env cat = errVal defLinType $ + lookupLincat (fst env) (CIQ (snd env) cat) + +groundTerms :: GrammarEnv -> CType -> [STerm] +groundTerms env ctype = err error (map term2spattern) $ + allParamValues (fst env) ctype + +cTypeForArg :: GrammarEnv -> STerm -> CType +cTypeForArg env (SArg nr cat (Path path)) + = follow path $ lookupCType env cat + where follow [] ctype = ctype + follow (Right pat : path) (Table _ ctype) = follow path ctype + follow (Left lbl : path) (RecType rec) + = case [ ctype | Lbg lbl' ctype <- rec, lbl == lbl' ] of + [ctype] -> follow path ctype + err -> error $ "follow: " ++ show rec ++ " . " ++ show lbl ++ + " results in " ++ show err + +term2spattern (R rec) = SRec [ (lbl, term2spattern term) | Ass lbl term <- rec ] +term2spattern (Con con terms) = SCon con $ map term2spattern terms + diff --git a/src/GF/Parsing/ConvertGrammar.hs b/src/GF/Parsing/ConvertGrammar.hs new file mode 100644 index 000000000..f8ce9335f --- /dev/null +++ b/src/GF/Parsing/ConvertGrammar.hs @@ -0,0 +1,42 @@ +---------------------------------------------------------------------- +-- | +-- Module : ConvertGrammar +-- Maintainer : PL +-- Stability : (stable) +-- Portability : (portable) +-- +-- > CVS $Date: 2005/03/21 22:31:46 $ +-- > CVS $Author: peb $ +-- > CVS $Revision: 1.1 $ +-- +-- All (?) grammar conversions which are used in GF +----------------------------------------------------------------------------- + + +module GF.Parsing.ConvertGrammar + (pInfo, emptyPInfo, + module GF.Parsing.GrammarTypes + ) where + +import GFC (CanonGrammar) +import GF.Parsing.GrammarTypes +import Ident (Ident(..)) +import Option +import Tracing + +import qualified GF.Parsing.ConvertGFCtoMCFG as G2M +import qualified GF.Parsing.ConvertMCFGtoCFG as M2C +import qualified GF.Parsing.MCFGrammar as MCFG +import qualified GF.Parsing.CFGrammar as CFG + +pInfo :: Options -> CanonGrammar -> Ident -> PInfo +pInfo opts canon lng = PInfo mcfg cfg mcfp cfp + where mcfg = G2M.convertGrammar cnv (canon, lng) + cnv = maybe "nondet" id $ getOptVal opts gfcConversion + cfg = M2C.convertGrammar mcfg + mcfp = MCFG.pInfo mcfg + cfp = CFG.pInfo cfg + +emptyPInfo :: PInfo +emptyPInfo = PInfo [] [] (MCFG.pInfo []) (CFG.pInfo []) + diff --git a/src/GF/Parsing/ConvertMCFGtoCFG.hs b/src/GF/Parsing/ConvertMCFGtoCFG.hs new file mode 100644 index 000000000..41618ffdd --- /dev/null +++ b/src/GF/Parsing/ConvertMCFGtoCFG.hs @@ -0,0 +1,52 @@ +---------------------------------------------------------------------- +-- | +-- Module : ConvertMCFGtoCFG +-- Maintainer : PL +-- Stability : (stable) +-- Portability : (portable) +-- +-- > CVS $Date: 2005/03/21 22:31:47 $ +-- > CVS $Author: peb $ +-- > CVS $Revision: 1.1 $ +-- +-- Converting MCFG grammars to (possibly overgenerating) CFG +----------------------------------------------------------------------------- + + +module GF.Parsing.ConvertMCFGtoCFG + (convertGrammar) where + +import Tracing +import GF.Printing.PrintParser + +import Monad +import GF.Parsing.Utilities +import qualified GF.Parsing.MCFGrammar as MCFG +import qualified GF.Parsing.CFGrammar as CFG +import GF.Parsing.GrammarTypes + +convertGrammar :: MCFGrammar -> CFGrammar +convertGrammar gram = tracePrt "#cf-rules" (prt.length) $ + concatMap convertRule gram + +convertRule :: MCFRule -> [CFRule] +convertRule (MCFG.Rule cat args record name) + = [ CFG.Rule (CFCat cat lbl) rhs (CFName name profile) | + MCFG.Lin lbl lin <- record, + let rhs = map (mapSymbol convertArg id) lin, + let profile = map (argPlaces lin) [0 .. length args-1] + ] + +convertArg (cat, lbl, _arg) = CFCat cat lbl + +argPlaces lin arg = [ place | ((_cat, _lbl, arg'), place) <- + zip (filterCats lin) [0::Int ..], arg == arg' ] + +filterCats syms = [ cat | Cat cat <- syms ] + + + + + + + diff --git a/src/GF/Parsing/GeneralChart.hs b/src/GF/Parsing/GeneralChart.hs index 61f933932..c8fe2b202 100644 --- a/src/GF/Parsing/GeneralChart.hs +++ b/src/GF/Parsing/GeneralChart.hs @@ -5,26 +5,27 @@ -- Stability : (stable) -- Portability : (portable) -- --- > CVS $Date: 2005/03/21 14:17:42 $ +-- > CVS $Date: 2005/03/21 22:31:48 $ -- > CVS $Author: peb $ --- > CVS $Revision: 1.1 $ +-- > CVS $Revision: 1.2 $ -- -- Simple implementation of deductive chart parsing ----------------------------------------------------------------------------- -module GF.Parsing.GeneralChart (-- * Type definition - Chart, - -- * Main functions - chartLookup, - buildChart, - -- * Probably not needed - emptyChart, - chartMember, - chartInsert, - chartList, - addToChart - ) where +module GF.Parsing.GeneralChart + (-- * Type definition + Chart, + -- * Main functions + chartLookup, + buildChart, + -- * Probably not needed + emptyChart, + chartMember, + chartInsert, + chartList, + addToChart + ) where -- import Trace diff --git a/src/GF/Parsing/GrammarTypes.hs b/src/GF/Parsing/GrammarTypes.hs new file mode 100644 index 000000000..326ad343c --- /dev/null +++ b/src/GF/Parsing/GrammarTypes.hs @@ -0,0 +1,146 @@ +---------------------------------------------------------------------- +-- | +-- Module : GrammarTypes +-- Maintainer : PL +-- Stability : (stable) +-- Portability : (portable) +-- +-- > CVS $Date: 2005/03/21 22:31:48 $ +-- > CVS $Author: peb $ +-- > CVS $Revision: 1.1 $ +-- +-- All possible instantiations of different grammar formats used for parsing +-- +-- Plus some helper types and utilities +----------------------------------------------------------------------------- + + +module GF.Parsing.GrammarTypes + (-- * Main parser information + PInfo(..), + -- * Multiple context-free grammars + MCFGrammar, MCFRule, MCFPInfo, + MCFCat(..), MCFLabel, + Constraint, + -- * Context-free grammars + CFGrammar, CFRule, CFPInfo, + CFProfile, CFName(..), CFCat(..), + -- * Assorted types + Cat, Name, Constr, Label, Tokn, + -- * Simplified terms + STerm(..), (+.), (+!), + -- * Record\/table paths + Path(..), emptyPath, + (++.), (++!) + ) where + +import Ident (Ident(..)) +import AbsGFC +import qualified GF.Parsing.CFGrammar as CFG +import qualified GF.Parsing.MCFGrammar as MCFG +import GF.Printing.PrintParser +import GF.Printing.PrintSimplifiedTerm + +---------------------------------------------------------------------- + +data PInfo = PInfo { mcfg :: MCFGrammar, + cfg :: CFGrammar, + mcfPInfo :: MCFPInfo, + cfPInfo :: CFPInfo } + +type MCFGrammar = MCFG.Grammar Name MCFCat MCFLabel Tokn +type MCFRule = MCFG.Rule Name MCFCat MCFLabel Tokn +type MCFPInfo = MCFG.PInfo Name MCFCat MCFLabel Tokn + +data MCFCat = MCFCat Cat [Constraint] deriving (Eq, Ord, Show) +type MCFLabel = Path + +type Constraint = (Path, STerm) + +type CFGrammar = CFG.Grammar CFName CFCat Tokn +type CFRule = CFG.Rule CFName CFCat Tokn +type CFPInfo = CFG.PInfo CFName CFCat Tokn + +type CFProfile = [[Int]] +data CFName = CFName Name CFProfile deriving (Eq, Ord, Show) +data CFCat = CFCat MCFCat MCFLabel deriving (Eq, Ord, Show) + +---------------------------------------------------------------------- + +type Cat = Ident +type Name = Ident +type Constr = CIdent + +data STerm = SArg Int Cat Path -- ^ argument variable, the 'Path' is a path + -- pointing into the term + | SCon Constr [STerm] -- ^ constructor + | SRec [(Label, STerm)] -- ^ record + | STbl [(STerm, STerm)] -- ^ table of patterns/terms + | SVariants [STerm] -- ^ variants + | SConcat STerm STerm -- ^ concatenation + | SToken Tokn -- ^ single token + | SEmpty -- ^ empty string + | SWildcard -- ^ wildcard pattern variable + + -- | SRes CIdent -- resource identifier + -- | SVar Ident -- bound pattern variable + -- | SInt Integer -- integer + deriving (Eq, Ord, Show) + +(+.) :: STerm -> Label -> STerm +SRec record +. lbl = maybe err id $ lookup lbl record + where err = error $ "(+.), label not in record: " ++ show (SRec record) ++ " +. " ++ show lbl +SArg arg cat path +. lbl = SArg arg cat (path ++. lbl) +SVariants terms +. lbl = SVariants $ map (+. lbl) terms +sterm +. lbl = error $ "(+.): " ++ show sterm ++ " +. " ++ show lbl + +(+!) :: STerm -> STerm -> STerm +STbl table +! pat = maybe err id $ lookup pat table + where err = error $ "(+!), pattern not in table: " ++ show (STbl table) ++ " +! " ++ show pat +SArg arg cat path +! pat = SArg arg cat (path ++! pat) +SVariants terms +! pat = SVariants $ map (+! pat) terms +term +! SVariants pats = SVariants $ map (term +!) pats +sterm +! pat = error $ "(+!): " ++ show sterm ++ " +! " ++ show pat + +---------------------------------------------------------------------- + +newtype Path = Path [Either Label STerm] deriving (Eq, Ord, Show) + +emptyPath :: Path +emptyPath = Path [] + +(++.) :: Path -> Label -> Path +Path path ++. lbl = Path (Left lbl : path) + +(++!) :: Path -> STerm -> Path +Path path ++! sel = Path (Right sel : path) + +------------------------------------------------------------ + +instance Print STerm where + prt (SArg n c p) = prt c ++ "@" ++ prt n ++ prt p + prt (SCon c []) = prt c + prt (SCon c ts) = prt c ++ prtList ts + prt (SRec rec) = "{" ++ concat [ prt l ++ "=" ++ prt t ++ ";" | (l,t) <- rec ] ++ "}" + prt (STbl tbl) = "[" ++ concat [ prt p ++ "=>" ++ prt t ++ ";" | (p,t) <- tbl ] ++ "}" + prt (SVariants ts) = "{| " ++ prtSep " | " ts ++ " |}" + prt (SConcat t1 t2) = prt t1 ++ "++" ++ prt t2 + prt (SToken t) = prt t + prt (SEmpty) = "[]" + prt (SWildcard) = "_" + +instance Print MCFCat where + prt (MCFCat cat params) + = prt cat ++ "{" ++ concat [ prt path ++ "=" ++ prt term ++ ";" | + (path, term) <- params ] ++ "}" + +instance Print CFName where + prt (CFName name profile) = prt name ++ prt profile + +instance Print CFCat where + prt (CFCat cat lbl) = prt cat ++ prt lbl + +instance Print Path where + prt (Path path) = concatMap prtEither (reverse path) + where prtEither (Left lbl) = "." ++ prt lbl + prtEither (Right patt) = "!" ++ prt patt diff --git a/src/GF/Parsing/IncrementalChart.hs b/src/GF/Parsing/IncrementalChart.hs index a040ddd60..a5d9f54b1 100644 --- a/src/GF/Parsing/IncrementalChart.hs +++ b/src/GF/Parsing/IncrementalChart.hs @@ -5,21 +5,22 @@ -- Stability : (stable) -- Portability : (portable) -- --- > CVS $Date: 2005/03/21 14:17:42 $ +-- > CVS $Date: 2005/03/21 22:31:49 $ -- > CVS $Author: peb $ --- > CVS $Revision: 1.1 $ +-- > CVS $Revision: 1.2 $ -- -- Implementation of /incremental/ deductive parsing, -- i.e. parsing one word at the time. ----------------------------------------------------------------------------- -module GF.Parsing.IncrementalChart (-- * Type definitions - IncrementalChart, - -- * Functions - buildChart, - chartList - ) where +module GF.Parsing.IncrementalChart + (-- * Type definitions + IncrementalChart, + -- * Functions + buildChart, + chartList + ) where import Array import GF.Data.SortedList diff --git a/src/GF/Parsing/MCFGrammar.hs b/src/GF/Parsing/MCFGrammar.hs new file mode 100644 index 000000000..c8ff0c329 --- /dev/null +++ b/src/GF/Parsing/MCFGrammar.hs @@ -0,0 +1,206 @@ +---------------------------------------------------------------------- +-- | +-- Module : MCFGrammar +-- Maintainer : PL +-- Stability : (stable) +-- Portability : (portable) +-- +-- > CVS $Date: 2005/03/21 22:31:49 $ +-- > CVS $Author: peb $ +-- > CVS $Revision: 1.1 $ +-- +-- Definitions of multiple context-free grammars, +-- parser information and chart conversion +----------------------------------------------------------------------------- + +module GF.Parsing.MCFGrammar + (-- * Type definitions + Grammar, + Rule(..), + Lin(..), + -- * Parser information + MCFParser, + MEdge, + edges2chart, + PInfo, + pInfo, + -- * Ranges + Range(..), + makeRange, + concatRange, + unifyRange, + unionRange, + failRange, + -- * Utilities + select, + updateIndex + ) where + +-- gf modules: +import GF.Data.SortedList +import GF.Data.Assoc +-- parser modules: +import GF.Parsing.Utilities +import GF.Printing.PrintParser + + + +select :: [a] -> [(a, [a])] +select [] = [] +select (x:xs) = (x,xs) : [ (y,x:ys) | (y,ys) <- select xs ] + +updateIndex :: Functor f => Int -> [a] -> (a -> f a) -> f [a] +updateIndex 0 (a:as) f = fmap (:as) $ f a +updateIndex n (a:as) f = fmap (a:) $ updateIndex (n-1) as f +updateIndex _ _ _ = error "ParserUtils.updateIndex: Index out of range" + + +------------------------------------------------------------ +-- grammar types + +type Grammar n c l t = [Rule n c l t] +data Rule n c l t = Rule c [c] [Lin c l t] n + deriving (Eq, Ord, Show) +data Lin c l t = Lin l [Symbol (c, l, Int) t] + deriving (Eq, Ord, Show) + +-- variants is simply several linearizations with the same label + + +------------------------------------------------------------ +-- parser information + +type PInfo n c l t = Grammar n c l t + +pInfo :: Grammar n c l t -> PInfo n c l t +pInfo = id + +type MCFParser n c l t = PInfo n c l t -> [c] -> Input t -> ParseChart n (MEdge c l) + +type MEdge c l = (c, [(l, Range)]) + +edges2chart :: (Ord n, Ord c, Ord l) => + [(n, MEdge c l, [MEdge c l])] -> ParseChart n (MEdge c l) +edges2chart edges = fmap groupPairs $ accumAssoc id $ + [ (medge, (name, medges)) | (name, medge, medges) <- edges ] + + +------------------------------------------------------------ +-- ranges as sets of int-pairs + +newtype Range = Rng (SList (Int, Int)) deriving (Eq, Ord, Show) + +makeRange :: SList (Int, Int) -> Range +makeRange rho = Rng rho + +concatRange :: Range -> Range -> Range +concatRange (Rng rho) (Rng rho') = Rng $ nubsort [ (i,k) | (i,j) <- rho, (j',k) <- rho', j==j' ] + +unifyRange :: Range -> Range -> Range +unifyRange (Rng rho) (Rng rho') = Rng $ rho <**> rho' + +unionRange :: Range -> Range -> Range +unionRange (Rng rho) (Rng rho') = Rng $ rho <++> rho' + +failRange :: Range +failRange = Rng [] + + +------------------------------------------------------------ +-- pretty-printing + +instance (Print n, Print c, Print l, Print t) => Print (Rule n c l t) where + prt (Rule cat args record name) + = prt name ++ ". " ++ prt cat ++ " -> " ++ prtSep " " args ++ "\n" ++ prt record + prtList = concatMap prt + +instance (Print c, Print l, Print t) => Print (Lin c l t) where + prt (Lin lbl lin) = prt lbl ++ " = " ++ prtSep " " (map (symbol prArg (show.prt)) lin) + where prArg (cat, lbl, arg) = prt cat ++ "@" ++ prt arg ++ "." ++ prt lbl + prtList = prtBeforeAfter "\t" "\n" + +instance Print Range where + prt (Rng rho) = "(" ++ prtSep "|" [ show i ++ "-" ++ show j | (i,j) <- rho ] ++ ")" + +{- +------------------------------------------------------------ +-- items & forests + +data Item n c l = Item n (MEdge c l) [[MEdge c l]] + deriving (Eq, Ord, Show) +type MEdge c l = (c, [Edge l]) + +items2forests :: (Ord n, Ord c, Ord l) => Edge ((c, l) -> Bool) -> [Item n c l] -> [ParseForest n] + +---------- + +items2forests (Edge i0 k0 startCat) items + = concatMap edge2forests $ filter checkEdge $ aElems chart + where edge2forests (cat, []) = [FMeta] + edge2forests edge = filter checkForest $ map item2forest (chart ? edge) + + item2forest (Item name _ children) = FNode name [ forests | edges <- children, + forests <- mapM edge2forests edges ] + + checkEdge (cat, [Edge i k lbl]) = i == i0 && k == k0 && startCat (cat, lbl) + checkEdge _ = False + + checkForest (FNode _ children) = not (null children) + + chart = accumAssoc id [ (edge, item) | item@(Item _ edge _) <- items ] +-} + + +------------------------------------------------------------ +-- grammar checking +{- +--checkGrammar :: (Ord c, Ord l, Print n, Print c, Print l, Print t) => Grammar n c l t -> [String] + +checkGrammar rules + = do rule@(Rule cat rhs record name) <- rules + if null record + then [ "empty linearization record in rule: " ++ prt rule ] + else [ "category does not exist: " ++ prt rcat ++ "\n" ++ + " - in rule: " ++ prt rule | + rcat <- rhs, rcat `notElem` lhsCats ] ++ + do Lin _ lin <- record + Cat (arg, albl) <- lin + if arg<0 || arg>=length rhs + then [ "argument index out of range: " ++ show arg ++ "/" ++ prt albl ++ "\n" ++ + " - in rule: " ++ prt rule ] + else [ "label does not exist: " ++ prt albl ++ "\n" ++ + " - from rule: " ++ prt rule ++ + " - in rule: " ++ prt arule | + arule@(Rule _ acat _ arecord) <- rules, + acat == rhs !! arg, + albl `notElem` [ lbl | Lin lbl _ <- arecord ] ] + where lhsCats = nubsort [ cat | Rule _ cat _ _ <- rules ] +-} + + + + + +{----- +------------------------------------------------------------ +-- simplifications + +splitMRule :: (Ord n, Ord c, Ord l, Ord t) => Grammar n c l t -> Rule n c l t -> [Rule n c l t] +splitMRule rules (Rule name cat args record) = nubsort [ (Rule name cat args splitrec) | + (cat', lbls) <- rhsCats, cat == cat', + let splitrec = [ lin | lin@(Lin lbl _) <- record, lbl `elem` lbls ] ] + where rhsCats = limit rhsC lhsCats + lhsCats = nubsort [ (cat, [lbl]) | Rule _ cat _ record <- rules, Lin lbl _ <- record ] + rhsC (cat, lbls) = nubsort [ (rcat, rlbls) | + Rule _ cat' rhs lins <- rules, cat == cat', + (arg, rcat) <- zip [0..] rhs, + let rlbls = nubsort [ rlbl | Lin lbl lin <- lins, lbl `elem` lbls, + Cat (arg', rlbl) <- lin, arg == arg' ], + not $ null rlbls + ] + + +----} + + + diff --git a/src/GF/Parsing/ParseCF.hs b/src/GF/Parsing/ParseCF.hs index 20f45e3f2..b6c6b6ae5 100644 --- a/src/GF/Parsing/ParseCF.hs +++ b/src/GF/Parsing/ParseCF.hs @@ -5,9 +5,9 @@ -- Stability : (stable) -- Portability : (portable) -- --- > CVS $Date: 2005/03/21 14:17:42 $ +-- > CVS $Date: 2005/03/21 22:31:50 $ -- > CVS $Author: peb $ --- > CVS $Revision: 1.1 $ +-- > CVS $Revision: 1.2 $ -- -- Chart parsing of grammars in CF format ----------------------------------------------------------------------------- @@ -22,8 +22,8 @@ import GF.Data.SortedList (nubsort) import GF.Data.Assoc import qualified CF import qualified CFIdent as CFI -import GF.Parsing.Parser -import GF.Conversion.CFGrammar +import GF.Parsing.Utilities +import GF.Parsing.CFGrammar import qualified GF.Parsing.ParseCFG as P type Token = CFI.CFTok diff --git a/src/GF/Parsing/ParseCFG.hs b/src/GF/Parsing/ParseCFG.hs index 1005d5656..c613ca312 100644 --- a/src/GF/Parsing/ParseCFG.hs +++ b/src/GF/Parsing/ParseCFG.hs @@ -5,9 +5,9 @@ -- Stability : (stable) -- Portability : (portable) -- --- > CVS $Date: 2005/03/21 14:17:42 $ +-- > CVS $Date: 2005/03/21 22:31:51 $ -- > CVS $Author: peb $ --- > CVS $Revision: 1.1 $ +-- > CVS $Revision: 1.2 $ -- -- Main parsing module for context-free grammars ----------------------------------------------------------------------------- @@ -16,10 +16,10 @@ module GF.Parsing.ParseCFG (parse) where import Char (toLower) -import GF.Parsing.Parser -import GF.Conversion.CFGrammar -import qualified GF.Parsing.CFParserGeneral as PGen -import qualified GF.Parsing.CFParserIncremental as PInc +import GF.Parsing.Utilities +import GF.Parsing.CFGrammar +import qualified GF.Parsing.ParseCFG.General as PGen +import qualified GF.Parsing.ParseCFG.Incremental as PInc parse :: (Ord n, Ord c, Ord t, Show t) => diff --git a/src/GF/Parsing/CFParserGeneral.hs b/src/GF/Parsing/ParseCFG/General.hs similarity index 94% rename from src/GF/Parsing/CFParserGeneral.hs rename to src/GF/Parsing/ParseCFG/General.hs index cc24820b7..a1cd21c2c 100644 --- a/src/GF/Parsing/CFParserGeneral.hs +++ b/src/GF/Parsing/ParseCFG/General.hs @@ -5,21 +5,20 @@ -- Stability : (stable) -- Portability : (portable) -- --- > CVS $Date: 2005/03/21 14:17:41 $ +-- > CVS $Date: 2005/03/21 22:31:54 $ -- > CVS $Author: peb $ -- > CVS $Revision: 1.1 $ -- -- Several implementations of CFG chart parsing ----------------------------------------------------------------------------- -module GF.Parsing.CFParserGeneral (parse, - Strategy - ) where +module GF.Parsing.ParseCFG.General + (parse, Strategy) where import Tracing -import GF.Parsing.Parser -import GF.Conversion.CFGrammar +import GF.Parsing.Utilities +import GF.Parsing.CFGrammar import GF.Parsing.GeneralChart import GF.Data.Assoc diff --git a/src/GF/Parsing/CFParserIncremental.hs b/src/GF/Parsing/ParseCFG/Incremental.hs similarity index 96% rename from src/GF/Parsing/CFParserIncremental.hs rename to src/GF/Parsing/ParseCFG/Incremental.hs index 3b9951721..b5f91aec5 100644 --- a/src/GF/Parsing/CFParserIncremental.hs +++ b/src/GF/Parsing/ParseCFG/Incremental.hs @@ -5,7 +5,7 @@ -- Stability : (stable) -- Portability : (portable) -- --- > CVS $Date: 2005/03/21 14:17:41 $ +-- > CVS $Date: 2005/03/21 22:31:54 $ -- > CVS $Author: peb $ -- > CVS $Revision: 1.1 $ -- @@ -14,8 +14,8 @@ -module GF.Parsing.CFParserIncremental (parse, - Strategy) where +module GF.Parsing.ParseCFG.Incremental + (parse, Strategy) where import Tracing import GF.Printing.PrintParser @@ -27,8 +27,8 @@ import GF.Data.SortedList import GF.Data.Assoc import Operations -- parser modules: -import GF.Parsing.Parser -import GF.Conversion.CFGrammar +import GF.Parsing.Utilities +import GF.Parsing.CFGrammar import GF.Parsing.IncrementalChart diff --git a/src/GF/Parsing/ParseGFC.hs b/src/GF/Parsing/ParseGFC.hs index 0d0d5c662..f43162c16 100644 --- a/src/GF/Parsing/ParseGFC.hs +++ b/src/GF/Parsing/ParseGFC.hs @@ -5,9 +5,9 @@ -- Stability : (stable) -- Portability : (portable) -- --- > CVS $Date: 2005/03/21 14:17:43 $ +-- > CVS $Date: 2005/03/21 22:31:51 $ -- > CVS $Author: peb $ --- > CVS $Revision: 1.1 $ +-- > CVS $Revision: 1.2 $ -- -- The main parsing module, parsing GFC grammars -- by translating to simpler formats, such as PMCFG and CFG @@ -34,11 +34,11 @@ import Operations import GF.Data.SortedList -- Conversion and parser modules import GF.Data.Assoc -import GF.Parsing.Parser +import GF.Parsing.Utilities -- import ConvertGrammar -import GF.Conversion.GrammarTypes -import qualified GF.Conversion.MCFGrammar as M -import qualified GF.Conversion.CFGrammar as C +import GF.Parsing.GrammarTypes +import qualified GF.Parsing.MCFGrammar as M +import qualified GF.Parsing.CFGrammar as C import qualified GF.Parsing.ParseMCFG as PM import qualified GF.Parsing.ParseCFG as PC --import MCFRange diff --git a/src/GF/Parsing/ParseMCFG.hs b/src/GF/Parsing/ParseMCFG.hs index 4afc44bb7..296a4d4d0 100644 --- a/src/GF/Parsing/ParseMCFG.hs +++ b/src/GF/Parsing/ParseMCFG.hs @@ -5,9 +5,9 @@ -- Stability : (stable) -- Portability : (portable) -- --- > CVS $Date: 2005/03/21 14:17:43 $ +-- > CVS $Date: 2005/03/21 22:31:52 $ -- > CVS $Author: peb $ --- > CVS $Revision: 1.1 $ +-- > CVS $Revision: 1.2 $ -- -- Main module for MCFG parsing ----------------------------------------------------------------------------- @@ -16,9 +16,9 @@ module GF.Parsing.ParseMCFG (parse) where import Char (toLower) -import GF.Parsing.Parser -import GF.Conversion.MCFGrammar -import qualified GF.Parsing.MCFParserBasic as PBas +import GF.Parsing.Utilities +import GF.Parsing.MCFGrammar +import qualified GF.Parsing.ParseMCFG.Basic as PBas import GF.Printing.PrintParser ---- import qualified MCFParserBasic2 as PBas2 -- file not found AR @@ -30,7 +30,7 @@ parse str = decodeParser (map toLower str) decodeParser "b" = PBas.parse ---- decodeParser "c" = PBas2.parse -decodeParser _ = decodeParser "c" +decodeParser _ = decodeParser "b" diff --git a/src/GF/Parsing/MCFParserBasic.hs b/src/GF/Parsing/ParseMCFG/Basic.hs similarity index 97% rename from src/GF/Parsing/MCFParserBasic.hs rename to src/GF/Parsing/ParseMCFG/Basic.hs index 03a1d8b9d..f75756267 100644 --- a/src/GF/Parsing/MCFParserBasic.hs +++ b/src/GF/Parsing/ParseMCFG/Basic.hs @@ -5,21 +5,21 @@ -- Stability : (stable) -- Portability : (portable) -- --- > CVS $Date: 2005/03/21 14:17:42 $ +-- > CVS $Date: 2005/03/21 22:31:55 $ -- > CVS $Author: peb $ -- > CVS $Revision: 1.1 $ -- -- Simplest possible implementation of MCFG chart parsing ----------------------------------------------------------------------------- -module GF.Parsing.MCFParserBasic (parse - ) where +module GF.Parsing.ParseMCFG.Basic + (parse) where import Tracing import Ix -import GF.Parsing.Parser -import GF.Conversion.MCFGrammar +import GF.Parsing.Utilities +import GF.Parsing.MCFGrammar import GF.Parsing.GeneralChart import GF.Data.Assoc import GF.Data.SortedList diff --git a/src/GF/Parsing/Parser.hs b/src/GF/Parsing/Utilities.hs similarity index 93% rename from src/GF/Parsing/Parser.hs rename to src/GF/Parsing/Utilities.hs index 0c18514f9..295389d52 100644 --- a/src/GF/Parsing/Parser.hs +++ b/src/GF/Parsing/Utilities.hs @@ -5,7 +5,7 @@ -- Stability : (stable) -- Portability : (portable) -- --- > CVS $Date: 2005/03/21 14:17:43 $ +-- > CVS $Date: 2005/03/21 22:31:52 $ -- > CVS $Author: peb $ -- > CVS $Revision: 1.1 $ -- @@ -13,16 +13,17 @@ ----------------------------------------------------------------------------- -module GF.Parsing.Parser ( -- * Symbols - Symbol(..), symbol, mapSymbol, - -- * Edges - Edge(..), - -- * Parser input - Input(..), makeInput, input, inputMany, - -- * charts, parse forests & trees - ParseChart, ParseForest(..), ParseTree(..), - chart2forests, forest2trees - ) where +module GF.Parsing.Utilities + ( -- * Symbols + Symbol(..), symbol, mapSymbol, + -- * Edges + Edge(..), + -- * Parser input + Input(..), makeInput, input, inputMany, + -- * charts, parse forests & trees + ParseChart, ParseForest(..), ParseTree(..), + chart2forests, forest2trees + ) where -- haskell modules: import Monad