diff --git a/grammars/ljung/fin_dep_types/Findep.gf b/grammars/ljung/fin_dep_types/Findep.gf new file mode 100644 index 000000000..cd722ec35 --- /dev/null +++ b/grammars/ljung/fin_dep_types/Findep.gf @@ -0,0 +1,36 @@ + +concrete Findep of FindepAbs = { + +lin + +Sg = {s = "SINGULAR"}; +-- Pl = {s = "PLURAL"}; + +s n g b x y = {s = x.s ++ y.s}; +np n g b x y = {s = x.s ++ y.s}; +vp n g b x y = {s = x.s ++ y.s}; + +npBest n g x = {s = x.s}; +npPl g b x = {s = x.s}; + +en = {s = "en"}; +ett = {s = "ett"}; +den = {s = "den"}; +det = {s = "det"}; + +alla g = {s = "alla"}; +de g = {s = "de"}; + +katt = {s = "katt"}; +katter = {s = "katter"}; +katten = {s = "katten"}; +katterna = {s = "katterna"}; + +barn n = {s = "barn"}; +barnet = {s = "barnet"}; +barnen = {s = "barnen"}; + +jagar = {s = "jagar"}; + +} + diff --git a/grammars/ljung/fin_dep_types/FindepAbs.gf b/grammars/ljung/fin_dep_types/FindepAbs.gf new file mode 100644 index 000000000..3f3bad71c --- /dev/null +++ b/grammars/ljung/fin_dep_types/FindepAbs.gf @@ -0,0 +1,43 @@ + +abstract FindepAbs = { + +cat +Num; Gen; Def; +S; V; VP; +D Num Gen Def; N Num Gen Def; NP Num Gen Def; + +fun + +Sg, Pl : Num; +Best, OBest : Def; +Utr, Neu : Gen; + + +s : (n:Num) -> (g:Gen) -> (b:Def) -> NP n g b -> VP -> S; +np : (n:Num) -> (g:Gen) -> (b:Def) -> D n g b -> N n g b -> NP n g b; +vp : (n:Num) -> (g:Gen) -> (b:Def) -> V -> NP n g b -> VP; + +npBest : (n:Num) -> (g:Gen) -> N n g Best -> NP n g Best; +npPl : (g:Gen) -> (b:Def) -> N Pl g b -> NP Pl g b; + +en : D Sg Utr OBest; +ett : D Sg Neu OBest; +den : D Sg Utr Best; +det : D Sg Neu OBest; + +alla : (g:Gen) -> D Pl g OBest; +de : (g:Gen) -> D Pl g Best; + +katt : N Sg Utr OBest; +katten : N Sg Utr Best; +katter : N Pl Utr OBest; +katterna : N Pl Utr Best; + +barn : (n:Num) -> N n Neu OBest; +barnet : N Sg Neu Best; +barnen : N Pl Neu Best; + +jagar : V; + +} + diff --git a/grammars/ljung/thesis/FragmentAbstract.gf b/grammars/ljung/thesis/FragmentAbstract.gf new file mode 100644 index 000000000..9c1f5df57 --- /dev/null +++ b/grammars/ljung/thesis/FragmentAbstract.gf @@ -0,0 +1,17 @@ + +abstract FragmentAbstract = { + +cat S; NP; VP; D; N; V; + +fun + +s_p : NP -> VP -> S; +np_d : D -> N -> NP; +np_p : N -> NP; +vp_t : V -> NP -> VP; +d_a, d_m : D; +n_c, n_f : N; +v_e : V; + +} + diff --git a/grammars/ljung/thesis/FragmentNumber.gf b/grammars/ljung/thesis/FragmentNumber.gf new file mode 100644 index 000000000..9dd05adb2 --- /dev/null +++ b/grammars/ljung/thesis/FragmentNumber.gf @@ -0,0 +1,26 @@ + +concrete FragmentNumber of FragmentAbstract = open FragmentResource in { + +lincat + +N = { s : Num => Str }; +V = { s : Num => Str }; +VP = { s : Num => Str }; + +D = { s : Str; n : Num }; +NP = { s : Str; n : Num }; + +lin + +s_p x y = { s = x.s ++ y.s!x.n }; +np_d x y = { s = x.s ++ y.s!x.n; n = x.n }; +np_p x = { s = x.s!Pl; n = Pl }; +vp_t x y = { s = table { z => x.s!z ++ y.s } }; +d_a = { s = "a"; n = Sg }; +d_m = { s = "many"; n = Pl }; +n_c = { s = table { Sg => "lion"; Pl => "lions" } }; +n_f = { s = table { _ => "fish" } }; +v_e = { s = table { Sg => "eats" ; Pl => "eat" } }; + +} + diff --git a/grammars/ljung/thesis/FragmentResource.gf b/grammars/ljung/thesis/FragmentResource.gf new file mode 100644 index 000000000..154b50a52 --- /dev/null +++ b/grammars/ljung/thesis/FragmentResource.gf @@ -0,0 +1,10 @@ + +resource FragmentResource = { + +param + +Num = Sg | Pl; +Gen = Neu | Utr; +Order = Dir | Indir | Sub | Top; + +} diff --git a/grammars/ljung/thesis/FragmentSimple.gf b/grammars/ljung/thesis/FragmentSimple.gf new file mode 100644 index 000000000..bdf2581d0 --- /dev/null +++ b/grammars/ljung/thesis/FragmentSimple.gf @@ -0,0 +1,17 @@ + +concrete FragmentSimple of FragmentAbstract = { + +lin + +s_p x y = { s = x.s ++ y.s }; +np_d x y = { s = x.s ++ y.s }; +np_p x = { s = x.s }; +vp_t x y = { s = x.s ++ y.s }; +d_a = { s = "a" }; +d_m = { s = "many" }; +n_c = { s = variants { "lion" ; "lions" } }; +n_f = { s = "fish" }; +v_e = { s = variants { "eats" ; "eat" } }; + +} + diff --git a/grammars/ljung/thesis/FragmentSwedish.gf b/grammars/ljung/thesis/FragmentSwedish.gf new file mode 100644 index 000000000..db8396bd1 --- /dev/null +++ b/grammars/ljung/thesis/FragmentSwedish.gf @@ -0,0 +1,26 @@ + +concrete FragmentSwedish of FragmentAbstract = open FragmentResource in { + +lincat + +S = { s : Order => Str }; +VP = { s1 : Str; s2 : Str }; +N = { s : Num => Str; g : Gen }; +D = { s : Gen => Str; n : Num }; + +lin + +s_p x y = { s = table { Indir => y.s1 ++ x.s ++ y.s2; + Top => y.s2 ++ y.s1 ++ x.s; + _ => x.s ++ y.s1 ++ y.s2 } }; +np_d x y = { s = x.s!y.g ++ y.s!x.n }; +np_p x = { s = x.s!Pl }; +vp_t x y = { s1 = x.s; s2 = y.s }; +d_a = { s = table { Utr => "en"; Neu => "ett" }; n = Sg }; +d_m = { s = table { _ => "maanga" }; n = Pl }; +n_c = { s = table { _ => "lejon" }; g = Neu }; +n_f = { s = table { Sg => "fisk"; Pl => "fiskar" }; g = Utr }; +v_e = { s = "aeter" }; + +} + diff --git a/grammars/ljung/timeflies/TimeFlies.gf b/grammars/ljung/timeflies/TimeFlies.gf new file mode 100644 index 000000000..f46592b55 --- /dev/null +++ b/grammars/ljung/timeflies/TimeFlies.gf @@ -0,0 +1,24 @@ + +concrete TimeFlies of TimeFliesAbs = { + +lin +s1 x y = {s = x.s ++ y.s}; +vp1 x = {s = x.s}; +vp2 x y = {s = x.s ++ y.s}; +vp3 x y = {s = x.s ++ y.s}; +np1 x = {s = x.s}; +np2 x y = {s = x.s ++ y.s}; +np3 x y = {s = x.s ++ y.s}; +pp1 x y = {s = x.s ++ y.s}; + +flyV = {s = "flies"}; +timeV = {s = "time"}; +likeV = {s = "like"}; +flyN = {s = "flies"}; +timeN = {s = "time"}; +arrowN = {s = "arrow"}; +anD = {s = "an"}; +timeD = {s = "time"}; +likeP = {s = "like"}; + +} diff --git a/grammars/ljung/timeflies/TimeFliesAbs.gf b/grammars/ljung/timeflies/TimeFliesAbs.gf new file mode 100644 index 000000000..fe52f82b6 --- /dev/null +++ b/grammars/ljung/timeflies/TimeFliesAbs.gf @@ -0,0 +1,27 @@ + +abstract TimeFliesAbs = { + +cat +S; VP; NP; PP; V; N; D; P; + +fun +s1 : NP -> VP -> S; +vp1 : V -> VP; +vp2 : V -> NP -> VP; +vp3 : VP -> PP -> VP; +np1 : N -> NP; +np2 : D -> N -> NP; +np3 : NP -> PP -> NP; +pp1 : P -> NP -> PP; + +flyV : V; +timeV : V; +likeV : V; +flyN : N; +timeN : N; +arrowN : N; +anD : D; +timeD : D; +likeP : P; +} + diff --git a/grammars/ljung/timeflies/TimeFliesCnc.gf b/grammars/ljung/timeflies/TimeFliesCnc.gf new file mode 100644 index 000000000..5952dda49 --- /dev/null +++ b/grammars/ljung/timeflies/TimeFliesCnc.gf @@ -0,0 +1,2 @@ + +resource diff --git a/grammars/ljung/variants/TestVars.gf b/grammars/ljung/variants/TestVars.gf new file mode 100644 index 000000000..5341f12fe --- /dev/null +++ b/grammars/ljung/variants/TestVars.gf @@ -0,0 +1,14 @@ + +concrete TestVars of TestVarsA = open TestVarsR in { + +lincat S = { s : XYZ => Str; p : { s : Str; a : AB } }; + +lin a = { s = table { X _ => variants { "x1" ; "x2" }; + Y => variants { "y1" ; "y2" }; + _ => variants { "z1" ; "z2" } }; + p = variants { { s = "s1" ; a = A } ; + { s = "s2" ; a = B } }; + }; + +} + diff --git a/grammars/ljung/variants/TestVarsA.gf b/grammars/ljung/variants/TestVarsA.gf new file mode 100644 index 000000000..253af1320 --- /dev/null +++ b/grammars/ljung/variants/TestVarsA.gf @@ -0,0 +1,9 @@ + +abstract TestVarsA = { + +cat S; + +fun a : S; + +} + diff --git a/grammars/ljung/variants/TestVarsR.gf b/grammars/ljung/variants/TestVarsR.gf new file mode 100644 index 000000000..c0c02926c --- /dev/null +++ b/grammars/ljung/variants/TestVarsR.gf @@ -0,0 +1,27 @@ + +resource TestVarsR = { + +param AB = A | B; +param XYZ = X AB | Y | Z AB; + +} + + + + + + + + + + + + + + + + + + + + diff --git a/src/GF/CFGM/PrintCFGrammar.hs b/src/GF/CFGM/PrintCFGrammar.hs index afde02884..f4c01b39a 100644 --- a/src/GF/CFGM/PrintCFGrammar.hs +++ b/src/GF/CFGM/PrintCFGrammar.hs @@ -5,9 +5,9 @@ -- Stability : (stable) -- Portability : (portable) -- --- > CVS $Date: 2005/03/21 22:40:03 $ +-- > CVS $Date: 2005/04/11 13:53:38 $ -- > CVS $Author: peb $ --- > CVS $Revision: 1.12 $ +-- > CVS $Revision: 1.13 $ -- -- Handles printing a CFGrammar in CFGM format. ----------------------------------------------------------------------------- @@ -19,12 +19,12 @@ import qualified PrintCFG import Ident import GFC import Modules -import qualified GF.Parsing.ConvertGrammar as Cnv +import qualified GF.OldParsing.ConvertGrammar as Cnv import qualified GF.Printing.PrintParser as Prt -import qualified GF.Parsing.CFGrammar as CFGrammar -import qualified GF.Parsing.GrammarTypes as GT +import qualified GF.OldParsing.CFGrammar as CFGrammar +import qualified GF.OldParsing.GrammarTypes as GT import qualified AbsCFG -import qualified GF.Parsing.Utilities as Parser +import qualified GF.OldParsing.Utilities as Parser import ErrM import qualified Option diff --git a/src/GF/Compile/ShellState.hs b/src/GF/Compile/ShellState.hs index f2cf3b094..580bdeb5f 100644 --- a/src/GF/Compile/ShellState.hs +++ b/src/GF/Compile/ShellState.hs @@ -5,9 +5,9 @@ -- Stability : (stable) -- Portability : (portable) -- --- > CVS $Date: 2005/03/21 22:40:03 $ +-- > CVS $Date: 2005/04/11 13:53:38 $ -- > CVS $Author: peb $ --- > CVS $Revision: 1.40 $ +-- > CVS $Revision: 1.41 $ -- -- (Description of the module) ----------------------------------------------------------------------------- @@ -34,9 +34,9 @@ import Option import Ident import Arch (ModTime) --- peb 25/5-04 --- import CFtoCFG -import qualified GF.Parsing.ConvertGrammar as Cnv +import qualified GF.OldParsing.ConvertGrammar as CnvOld -- OBSOLETE +import qualified GF.Conversion.GFC as Cnv +import qualified GF.NewParsing.GFC as Prs import List (nub,nubBy) @@ -49,8 +49,12 @@ data ShellState = ShSt { concretes :: [((Ident,Ident),Bool)], -- ^ list of all concretes, and whether active canModules :: CanonGrammar , -- ^ compiled abstracts and concretes srcModules :: G.SourceGrammar , -- ^ saved resource modules - cfs :: [(Ident,CF)] , -- ^ context-free grammars - pInfos :: [(Ident,Cnv.PInfo)], -- ^ parser information, peb 18\/6-04 + cfs :: [(Ident,CF)] , -- ^ context-free grammars (small, no parameters, very over-generating) + pInfosOld :: [(Ident,CnvOld.PInfo)], -- ^ parser information, peb 18\/6-04 (OBSOLETE) + mcfgs :: [(Ident, Cnv.MGrammar)], -- ^ MCFG, converted according to Ljunglöf (2004, ch 3) + cfgs :: [(Ident, Cnv.CGrammar)], -- ^ CFG, converted from mcfg + -- (large, with parameters, no-so overgenerating) + pInfos :: [(Ident, Prs.PInfo)], -- ^ parsing information (compiled mcfg&cfg grammars) morphos :: [(Ident,Morpho)], -- ^ morphologies gloptions :: Options, -- ^ global options readFiles :: [(FilePath,ModTime)],-- ^ files read @@ -76,7 +80,10 @@ emptyShellState = ShSt { canModules = M.emptyMGrammar, srcModules = M.emptyMGrammar, cfs = [], - pInfos = [], -- peb 18/6 + pInfosOld = [], -- peb 18/6 (OBSOLETE) + mcfgs = [], + cfgs = [], + pInfos = [], morphos = [], gloptions = noOptions, readFiles = [], @@ -97,23 +104,29 @@ prLanguage = prIdent -- | grammar for one language in a state, comprising its abs and cnc data StateGrammar = StGr { - absId :: Ident, - cncId :: Ident, - grammar :: CanonGrammar, - cf :: CF, - pInfo :: Cnv.PInfo, -- peb 8/6 - morpho :: Morpho, + absId :: Ident, + cncId :: Ident, + grammar :: CanonGrammar, + cf :: CF, + pInfoOld :: CnvOld.PInfo, -- peb 8/6 (OBSOLETE) + mcfg :: Cnv.MGrammar, + cfg :: Cnv.CGrammar, + pInfo :: Prs.PInfo, + morpho :: Morpho, loptions :: Options } emptyStateGrammar :: StateGrammar emptyStateGrammar = StGr { - absId = identC "#EMPTY", --- - cncId = identC "#EMPTY", --- - grammar = M.emptyMGrammar, - cf = emptyCF, - pInfo = Cnv.emptyPInfo, -- peb 18/6 - morpho = emptyMorpho, + absId = identC "#EMPTY", --- + cncId = identC "#EMPTY", --- + grammar = M.emptyMGrammar, + cf = emptyCF, + pInfoOld = CnvOld.emptyPInfo, -- peb 18/6 (OBSOLETE) + mcfg = [], + cfg = [], + pInfo = Prs.buildPInfo [] [], + morpho = emptyMorpho, loptions = noOptions } @@ -121,17 +134,25 @@ emptyStateGrammar = StGr { stateGrammarST :: StateGrammar -> CanonGrammar stateCF :: StateGrammar -> CF -statePInfo :: StateGrammar -> Cnv.PInfo +statePInfoOld :: StateGrammar -> CnvOld.PInfo -- OBSOLETE +stateMCFG :: StateGrammar -> Cnv.MGrammar +stateCFG :: StateGrammar -> Cnv.CGrammar +statePInfo :: StateGrammar -> Prs.PInfo stateMorpho :: StateGrammar -> Morpho stateOptions :: StateGrammar -> Options stateGrammarWords :: StateGrammar -> [String] +stateGrammarLang :: StateGrammar -> (CanonGrammar, Ident) stateGrammarST = grammar stateCF = cf +statePInfoOld = pInfoOld -- OBSOLETE +stateMCFG = mcfg +stateCFG = cfg statePInfo = pInfo stateMorpho = morpho stateOptions = loptions stateGrammarWords = allMorphoWords . stateMorpho +stateGrammarLang st = (grammar st, cncId st) cncModuleIdST :: StateGrammar -> CanonGrammar cncModuleIdST = stateGrammarST @@ -166,7 +187,23 @@ updateShellState opts mcnc sh ((_,sgr,gr),rts) = do notInrts f = notElem f $ map fst rts cfs <- mapM (canon2cf opts cgr) concrs --- would not need to update all... - let pinfos = map (Cnv.pInfo opts cgr) concrs -- peb 18/6 + let pinfosOld = map (CnvOld.pInfo opts cgr) concrs -- peb 18/6 (OBSOLETE) + + let g2s = Cnv.gfc2simple + fin = Cnv.simple2finite + s2mN = Cnv.simple2mcfg_nondet + s2mS = Cnv.simple2mcfg_strict + -- ____ kan man ha flera '-conversion=X -conversion=Y'? + (simpleCnv, mcfgCnv) = case getOptVal opts gfcConversion of + Just "strict" -> (g2s, s2mS) + Just "finite" -> (fin . g2s, s2mN) + Just "finite-strict" -> (fin . g2s, s2mS) + _ -> (g2s, s2mN) + cfgCnv = Cnv.mcfg2cfg + + let simples = map (curry simpleCnv cgr) concrs + mcfgs = map mcfgCnv simples + cfgs = map cfgCnv mcfgs let funs = funRulesOf cgr let cats = allCatsOf cgr @@ -185,7 +222,10 @@ updateShellState opts mcnc sh ((_,sgr,gr),rts) = do canModules = cgr, srcModules = src, cfs = zip concrs cfs, - pInfos = zip concrs pinfos, -- peb 8/6 + pInfosOld = zip concrs pinfosOld, -- peb 8/6 (OBSOLETE) + mcfgs = zip concrs mcfgs, + cfgs = zip concrs cfgs, + pInfos = zip concrs $ zipWith Prs.buildPInfo mcfgs cfgs, morphos = zip concrs (map (mkMorpho cgr) concrs), gloptions = gloptions sh, --- opts, -- this would be command-line options readFiles = [ft | ft@(f,_) <- readFiles sh, notInrts f] ++ rts, @@ -243,6 +283,9 @@ purgeShellState sh = ShSt { canModules = M.MGrammar $ purge $ M.modules $ canModules sh, srcModules = M.emptyMGrammar, cfs = cfs sh, + pInfosOld = pInfosOld sh, -- OBSOLETE + mcfgs = mcfgs sh, + cfgs = cfgs sh, pInfos = pInfos sh, morphos = morphos sh, gloptions = gloptions sh, @@ -256,15 +299,15 @@ purgeShellState sh = ShSt { acncs = maybe [] singleton (abstract sh) ++ map (snd . fst) (concretes sh) changeMain :: Maybe Ident -> ShellState -> Err ShellState -changeMain Nothing (ShSt _ _ cs ms ss cfs pis mos os rs acs s) = - return (ShSt Nothing Nothing [] ms ss cfs pis mos os rs acs s) -changeMain (Just c) st@(ShSt _ _ cs ms ss cfs pis mos os rs acs s) = +changeMain Nothing (ShSt _ _ cs ms ss cfs old_pis mcfgs cfgs pinfos mos os rs acs s) = + return (ShSt Nothing Nothing [] ms ss cfs old_pis mcfgs cfgs pinfos mos os rs acs s) +changeMain (Just c) st@(ShSt _ _ cs ms ss cfs old_pis mcfgs cfgs pinfos mos os rs acs s) = case lookup c (M.modules ms) of Just _ -> do a <- M.abstractOfConcrete ms c let cas = M.allConcretes ms a let cs' = [((c,c),True) | c <- cas] - return (ShSt (Just a) (Just c) cs' ms ss cfs pis mos os rs acs s) + return (ShSt (Just a) (Just c) cs' ms ss cfs old_pis mcfgs cfgs pinfos mos os rs acs s) _ -> P.prtBad "The state has no concrete syntax named" c -- | form just one state grammar, if unique, from a canonical grammar @@ -286,7 +329,10 @@ stateGrammarOfLang st l = StGr { cncId = l, grammar = can, cf = maybe emptyCF id (lookup l (cfs st)), - pInfo = maybe Cnv.emptyPInfo id (lookup l (pInfos st)), -- peb 18/6 + pInfoOld = maybe CnvOld.emptyPInfo id (lookup l (pInfosOld st)), -- peb 18/6 (OBSOLETE) + mcfg = maybe [] id $ lookup l $ mcfgs st, + cfg = maybe [] id $ lookup l $ cfgs st, + pInfo = maybe (Prs.buildPInfo [] []) id $ lookup l $ pInfos st, morpho = maybe emptyMorpho id (lookup l (morphos st)), loptions = errVal noOptions $ lookupOptionsCan can } @@ -316,12 +362,15 @@ mkStateGrammar = stateGrammarOfLang stateAbstractGrammar :: ShellState -> StateGrammar stateAbstractGrammar st = StGr { - absId = maybe (identC "Abs") id (abstract st), --- - cncId = identC "#Cnc", --- - grammar = canModules st, ---- only abstarct ones - cf = emptyCF, - pInfo = Cnv.emptyPInfo, -- peb 18/6 - morpho = emptyMorpho, + absId = maybe (identC "Abs") id (abstract st), --- + cncId = identC "#Cnc", --- + grammar = canModules st, ---- only abstarct ones + cf = emptyCF, + pInfoOld = CnvOld.emptyPInfo, -- peb 18/6 (OBSOLETE) + mcfg = [], + cfg = [], + pInfo = Prs.buildPInfo [] [], + morpho = emptyMorpho, loptions = gloptions st ---- } @@ -459,9 +508,10 @@ languageOn = languageOnOff True languageOff = languageOnOff False languageOnOff :: Bool -> Language -> ShellStateOper -languageOnOff b lang (ShSt a c cs cg sg cfs pinfos ms os fs cats sts) = - ShSt a c cs' cg sg cfs pinfos ms os fs cats sts where - cs' = [if lang==l then ((l,c),b) else i | i@((l,c),_) <- cs] +--- __________ this is OBSOLETE +languageOnOff b lang (ShSt a c cs cg sg cfs old_pinfos mcfgs cfgs pinfos ms os fs cats sts) = + ShSt a c cs' cg sg cfs old_pinfos mcfgs cfgs pinfos ms os fs cats sts where + cs' = [if lang==l then (lc,b) else i | i@(lc@(l,c),_) <- cs] {- updateLanguage :: FilePath -> (Language, StateConcrete) -> ShellStateOper @@ -476,13 +526,16 @@ initWithAbstract ab st@(ShSt (ma,cs,os)) = removeLanguage :: Language -> ShellStateOper removeLanguage lang (ShSt (ab,gs,os)) = ShSt (ab,removeAssoc lang gs, os) -} + changeOptions :: (Options -> Options) -> ShellStateOper -changeOptions f (ShSt a c cs can src cfs pinfos ms os ff ts ss) = - ShSt a c cs can src cfs pinfos ms (f os) ff ts ss +--- __________ this is OBSOLETE +changeOptions f (ShSt a c cs can src cfs old_pinfos mcfgs cfgs pinfos ms os ff ts ss) = + ShSt a c cs can src cfs old_pinfos mcfgs cfgs pinfos ms (f os) ff ts ss changeModTimes :: [(FilePath,ModTime)] -> ShellStateOper -changeModTimes mfs (ShSt a c cs can src cfs pinfos ms os ff ts ss) = - ShSt a c cs can src cfs pinfos ms os ff' ts ss +--- __________ this is OBSOLETE +changeModTimes mfs (ShSt a c cs can src cfs old_pinfos mcfgs cfgs pinfos ms os ff ts ss) = + ShSt a c cs can src cfs old_pinfos mcfgs cfgs pinfos ms os ff' ts ss where ff' = mfs ++ [mf | mf@(f,_) <- ff, notElem f (map fst mfs)] diff --git a/src/GF/Conversion/GFC.hs b/src/GF/Conversion/GFC.hs new file mode 100644 index 000000000..6a4adc253 --- /dev/null +++ b/src/GF/Conversion/GFC.hs @@ -0,0 +1,43 @@ +---------------------------------------------------------------------- +-- | +-- Maintainer : PL +-- Stability : (stable) +-- Portability : (portable) +-- +-- > CVS $Date: 2005/04/11 13:52:48 $ +-- > CVS $Author: peb $ +-- > CVS $Revision: 1.1 $ +-- +-- All conversions from GFC +----------------------------------------------------------------------------- + +module GF.Conversion.GFC + (module GF.Conversion.GFC, + SimpleGrammar, MGrammar, CGrammar) where + +import GFC (CanonGrammar) +import Ident (Ident) +import GF.Formalism.SimpleGFC (SimpleGrammar) +import GF.Conversion.Types (CGrammar, MGrammar) + +import qualified GF.Conversion.GFCtoSimple as G2S +import qualified GF.Conversion.SimpleToFinite as S2Fin +import qualified GF.Conversion.SimpleToMCFG as S2M +import qualified GF.Conversion.MCFGtoCFG as M2C + +gfc2simple :: (CanonGrammar, Ident) -> SimpleGrammar +gfc2simple = G2S.convertGrammar + +simple2finite :: SimpleGrammar -> SimpleGrammar +simple2finite = S2Fin.convertGrammar + +simple2mcfg_nondet :: SimpleGrammar -> MGrammar +simple2mcfg_nondet = S2M.convertGrammarNondet + +simple2mcfg_strict :: SimpleGrammar -> MGrammar +simple2mcfg_strict = S2M.convertGrammarStrict + +mcfg2cfg :: MGrammar -> CGrammar +mcfg2cfg = M2C.convertGrammar + + diff --git a/src/GF/Conversion/GFCtoSimple.hs b/src/GF/Conversion/GFCtoSimple.hs new file mode 100644 index 000000000..1764f1644 --- /dev/null +++ b/src/GF/Conversion/GFCtoSimple.hs @@ -0,0 +1,135 @@ +---------------------------------------------------------------------- +-- | +-- Maintainer : PL +-- Stability : (stable) +-- Portability : (portable) +-- +-- > CVS $Date: 2005/04/11 13:52:48 $ +-- > CVS $Author: peb $ +-- > CVS $Revision: 1.1 $ +-- +-- Converting GFC to SimpleGFC +-- +-- the conversion might fail if the GFC grammar has dependent or higher-order types +----------------------------------------------------------------------------- + +module GF.Conversion.GFCtoSimple + (convertGrammar) where + +import qualified AbsGFC as A +import qualified Ident as I +import GF.Formalism.GCFG +import GF.Formalism.SimpleGFC + +import GFC (CanonGrammar) +import MkGFC (grammar2canon) +import qualified Look (lookupLin, allParamValues, lookupLincat) +import qualified CMacros (defLinType) +import Operations (err, errVal) +--import qualified Modules as M + +import GF.System.Tracing +import GF.Infra.Print + +---------------------------------------------------------------------- + +type Env = (CanonGrammar, I.Ident) + +convertGrammar :: Env -> SimpleGrammar +convertGrammar gram = trace2 "converting language" (show (snd gram)) $ + tracePrt "#simpleGFC rules" (show . length) $ + [ convertAbsFun gram fun typing | + A.Mod (A.MTAbs modname) _ _ _ defs <- modules, + A.AbsDFun fun typing _ <- defs ] + where A.Gr modules = grammar2canon (fst gram) + +convertAbsFun :: Env -> I.Ident -> A.Exp -> SimpleRule +convertAbsFun gram fun typing = Rule abs cnc + where abs = convertAbstract [] fun typing + cnc = convertConcrete gram abs + +---------------------------------------------------------------------- +-- abstract definitions + +convertAbstract :: [Decl] -> Name -> A.Exp -> Abstract Decl Name +convertAbstract env fun (A.EProd x a b) + = convertAbstract ((x' ::: convertType [] a) : env) fun b + where x' = if x==I.identC "h_" then anyVar else x +convertAbstract env fun a = Abs (anyVar ::: convertType [] a) (reverse env) fun + +convertType :: [Atom] -> A.Exp -> Type +convertType args (A.EApp a (A.EAtom at)) = convertType (convertAtom at : args) a +convertType args (A.EAtom at) = convertCat at :@ args + +convertAtom :: A.Atom -> Atom +convertAtom (A.AC con) = ACon con +convertAtom (A.AV var) = AVar var + +convertCat :: A.Atom -> Cat +convertCat (A.AC (A.CIQ _ cat)) = cat +convertCat at = error $ "convertCat: " ++ show at + +---------------------------------------------------------------------- +-- concrete definitions + +convertConcrete :: Env -> Abstract Decl Name -> Concrete LinType (Maybe Term) +convertConcrete gram (Abs decl args fun) = Cnc ltyp largs term + where term = fmap (convertTerm gram) $ lookupLin gram fun + ltyp : largs = map (convertCType gram . lookupCType gram) (decl : args) + +convertCType :: Env -> A.CType -> LinType +convertCType gram (A.RecType rec) + = RecT [ (lbl, convertCType gram ctype) | A.Lbg lbl ctype <- rec ] +convertCType gram (A.Table ptype vtype) + = TblT (convertCType gram ptype) (convertCType gram vtype) +convertCType gram ct@(A.Cn con) = ConT con $ map (convertTerm gram) $ groundTerms gram ct +convertCType gram (A.TStr) = StrT +convertCType gram (A.TInts n) = error "convertCType: cannot handle 'TInts' constructor" + +convertTerm :: Env -> A.Term -> Term +convertTerm gram (A.Arg arg) = convertArgVar arg +convertTerm gram (A.Con con terms) = con :^ map (convertTerm gram) terms +convertTerm gram (A.LI var) = Var var +convertTerm gram (A.R rec) = Rec [ (lbl, convertTerm gram term) | A.Ass lbl term <- rec ] +convertTerm gram (A.P term lbl) = convertTerm gram term +. lbl +convertTerm gram (A.V ctype terms) = Tbl [ (convertTerm gram pat, convertTerm gram term) | + (pat, term) <- zip (groundTerms gram ctype) terms ] +convertTerm gram (A.T ctype tbl) = Tbl [ (convertPatt pat, convertTerm gram term) | + A.Cas pats term <- tbl, pat <- pats ] +convertTerm gram (A.S term sel) = convertTerm gram term +! convertTerm gram sel +convertTerm gram (A.C term1 term2) = convertTerm gram term1 ?++ convertTerm gram term2 +convertTerm gram (A.FV terms) = Variants (map (convertTerm gram) terms) +-- 'pre' tokens are converted to variants (over-generating): +convertTerm gram (A.K (A.KP [s] vs)) + = Variants $ Token s : [ Token v | A.Var [v] _ <- vs ] +convertTerm gram (A.K (A.KP _ _)) = error "convertTerm: don't know how to handle string lists in 'pre' tokens" +convertTerm gram (A.K (A.KS tok)) = Token tok +convertTerm gram (A.E) = Empty +convertTerm gram (A.I con) = error "convertTerm: cannot handle 'I' constructor" +convertTerm gram (A.EInt int) = error "convertTerm: cannot handle 'EInt' constructor" + +convertArgVar :: A.ArgVar -> Term +convertArgVar (A.A cat nr) = Arg (fromInteger nr) cat emptyPath +convertArgVar (A.AB cat bindings nr) = Arg (fromInteger nr) cat emptyPath + +convertPatt (A.PC con pats) = con :^ map convertPatt pats +convertPatt (A.PV x) = Var x +convertPatt (A.PW) = Wildcard +convertPatt (A.PR rec) = Rec [ (lbl, convertPatt pat) | A.PAss lbl pat <- rec ] +convertPatt (A.PI n) = error "convertPatt: cannot handle 'PI' constructor" + +---------------------------------------------------------------------- + +lookupLin :: Env -> Name -> Maybe A.Term +lookupLin gram fun = err fail Just $ + Look.lookupLin (fst gram) (A.CIQ (snd gram) fun) + +lookupCType :: Env -> Decl -> A.CType +lookupCType env decl + = errVal CMacros.defLinType $ + Look.lookupLincat (fst env) (A.CIQ (snd env) (decl2cat decl)) + +groundTerms :: Env -> A.CType -> [A.Term] +groundTerms gram ctype = err error id $ + Look.allParamValues (fst gram) ctype + diff --git a/src/GF/Conversion/MCFGtoCFG.hs b/src/GF/Conversion/MCFGtoCFG.hs new file mode 100644 index 000000000..c12bb6b53 --- /dev/null +++ b/src/GF/Conversion/MCFGtoCFG.hs @@ -0,0 +1,49 @@ +---------------------------------------------------------------------- +-- | +-- Maintainer : PL +-- Stability : (stable) +-- Portability : (portable) +-- +-- > CVS $Date: 2005/04/11 13:52:48 $ +-- > CVS $Author: peb $ +-- > CVS $Revision: 1.1 $ +-- +-- Converting MCFG grammars to (possibly overgenerating) CFG +----------------------------------------------------------------------------- + + +module GF.Conversion.MCFGtoCFG + (convertGrammar) where + +import GF.System.Tracing +import GF.Infra.Print + +import Monad +import GF.Formalism.Utilities +import GF.Formalism.GCFG +import GF.Formalism.MCFG +import GF.Formalism.CFG +import GF.Conversion.Types + +convertGrammar :: MGrammar -> CGrammar +convertGrammar gram = tracePrt "#context-free rules" (prt.length) $ + concatMap convertRule gram + +convertRule :: MRule -> [CRule] +convertRule (Rule (Abs cat args name) (Cnc _ _ record)) + = [ CFRule (CCat cat lbl) rhs (CName name profile) | + Lin lbl lin <- record, + let rhs = map (mapSymbol convertArg id) lin, + let profile = map (argPlaces lin) [0 .. length args-1] + ] + +convertArg :: (MCat, MLabel, Int) -> CCat +convertArg (cat, lbl, _) = CCat cat lbl + +argPlaces :: [Symbol (cat, lbl, Int) tok] -> Int -> [Int] +argPlaces lin nr = [ place | (nr', place) <- zip linArgs [0..], nr == nr' ] + where linArgs = [ nr' | (_, _, nr') <- filterCats lin ] + + + + diff --git a/src/GF/Conversion/SimpleToFinite.hs b/src/GF/Conversion/SimpleToFinite.hs new file mode 100644 index 000000000..4abc22356 --- /dev/null +++ b/src/GF/Conversion/SimpleToFinite.hs @@ -0,0 +1,134 @@ +---------------------------------------------------------------------- +-- | +-- Maintainer : PL +-- Stability : (stable) +-- Portability : (portable) +-- +-- > CVS $Date: 2005/04/11 13:52:48 $ +-- > CVS $Author: peb $ +-- > CVS $Revision: 1.1 $ +-- +-- Calculating the finiteness of each type in a grammar +----------------------------------------------------------------------------- + +module GF.Conversion.SimpleToFinite + (convertGrammar) where + +import GF.System.Tracing +import GF.Infra.Print + +import GF.Formalism.GCFG +import GF.Formalism.SimpleGFC + +import GF.Data.SortedList +import GF.Data.Assoc +import GF.Data.BacktrackM +import GF.Data.Utilities (lookupList) + +import Ident (Ident(..)) + +type CnvMonad a = BacktrackM () a + +convertGrammar :: SimpleGrammar -> SimpleGrammar +convertGrammar rules = tracePrt "#finite simpleGFC rules" (prt . length) $ + solutions cnvMonad () + where split = calcSplitable rules + cnvMonad = member rules >>= convertRule split + +convertRule :: Splitable -> SimpleRule -> CnvMonad SimpleRule +convertRule split (Rule abs cnc) + = do newAbs <- convertAbstract split abs + return $ Rule newAbs cnc + +convertAbstract :: Splitable -> Abstract Decl Name -> CnvMonad (Abstract Decl Name) +convertAbstract split (Abs (_ ::: typ) decls fun) + = case splitableFun split fun of + Just newCat -> return $ Abs (anyVar ::: (newCat :@ [])) decls fun + Nothing -> expandTyping split fun [] typ decls [] + + +expandTyping :: Splitable -> Name -> [(Var, Cat)] -> Type -> [Decl] -> [Decl] + -> CnvMonad (Abstract Decl Name) +expandTyping split fun env (cat :@ atoms) [] decls + = return $ Abs decl (reverse decls) fun + where decl = anyVar ::: substAtoms split env cat atoms [] +expandTyping split fun env typ ((x ::: (xcat :@ xatoms)) : declsToDo) declsDone + = do (xcat', env') <- calcNewEnv + let decl = x ::: substAtoms split env xcat' xatoms [] + expandTyping split fun env' typ declsToDo (decl : declsDone) + where calcNewEnv = case splitableCat split xcat of + Just newCats -> do newCat <- member newCats + return (newCat, (x,newCat) : env) + Nothing -> return (xcat, env) + +substAtoms :: Splitable -> [(Var, Cat)] -> Cat -> [Atom] -> [Atom] -> Type +substAtoms split env cat [] atoms = cat :@ reverse atoms +substAtoms split env cat (atom:atomsToDo) atomsDone + = case atomLookup split env atom of + Just newCat -> substAtoms split env (mergeArg cat newCat) atomsToDo atomsDone + Nothing -> substAtoms split env cat atomsToDo (atom : atomsDone) + +atomLookup split env (AVar x) = lookup x env +atomLookup split env (ACon con) = splitableFun split (constr2name con) + + +---------------------------------------------------------------------- +-- splitable categories (finite, no dependencies) +-- they should also be used as some dependency + +type Splitable = (Assoc Cat [Cat], Assoc Name Cat) + +splitableCat :: Splitable -> Cat -> Maybe [Cat] +splitableCat = lookupAssoc . fst + +splitableFun :: Splitable -> Name -> Maybe Cat +splitableFun = lookupAssoc . snd + +calcSplitable :: [SimpleRule] -> Splitable +calcSplitable rules = (listAssoc splitableCat2Funs, listAssoc splitableFun2Cat) + where splitableCat2Funs = groupPairs $ nubsort + [ (cat, mergeFun fun cat) | (cat, fun) <- splitableCatFuns ] + + splitableFun2Cat = nubsort + [ (fun, mergeFun fun cat) | (cat, fun) <- splitableCatFuns ] + + -- cat-fun pairs that are splitable + splitableCatFuns = [ (cat, fun) | + Rule (Abs (_ ::: (cat :@ [])) [] fun) _ <- rules, + splitableCats ?= cat ] + + -- all cats that are splitable + splitableCats = listSet $ + tracePrt "finite categories to split" prt $ + (nondepCats <**> depCats) <\\> resultCats + + -- all result cats for some pure function + resultCats = nubsort [ cat | Rule (Abs (_ ::: (cat :@ _)) decls _) _ <- rules, + not (null decls) ] + + -- all cats in constants without dependencies + nondepCats = nubsort [ cat | Rule (Abs (_ ::: (cat :@ [])) [] _) _ <- rules ] + + -- all cats occurring as some dependency of another cat + depCats = nubsort [ cat | Rule (Abs decl decls _) _ <- rules, + cat <- varCats [] (decls ++ [decl]) ] + + varCats _ [] = [] + varCats env ((x ::: (xcat :@ atoms)) : decls) + = varCats ((x,xcat) : env) decls ++ + [ cat | AVar y <- atoms, cat <- lookupList y env ] + + +---------------------------------------------------------------------- +-- utilities +-- mergeing categories + +mergeCats :: String -> String -> String -> Cat -> Cat -> Cat +mergeCats before middle after (IC cat) (IC arg) + = IC (before ++ cat ++ middle ++ arg ++ after) + +mergeFun, mergeArg :: Cat -> Cat -> Cat +mergeFun = mergeCats "{" ":" "}" +mergeArg = mergeCats "" "" "" + + diff --git a/src/GF/Conversion/SimpleToMCFG.hs b/src/GF/Conversion/SimpleToMCFG.hs new file mode 100644 index 000000000..5e299c8a0 --- /dev/null +++ b/src/GF/Conversion/SimpleToMCFG.hs @@ -0,0 +1,26 @@ +---------------------------------------------------------------------- +-- | +-- Maintainer : PL +-- Stability : (stable) +-- Portability : (portable) +-- +-- > CVS $Date: 2005/04/11 13:52:48 $ +-- > CVS $Author: peb $ +-- > CVS $Revision: 1.1 $ +-- +-- All different conversions from SimpleGFC to MCFG +----------------------------------------------------------------------------- + +module GF.Conversion.SimpleToMCFG where + +import GF.Formalism.SimpleGFC +import GF.Conversion.Types + +import qualified GF.Conversion.SimpleToMCFG.Strict as Strict +import qualified GF.Conversion.SimpleToMCFG.Nondet as Nondet +import qualified GF.Conversion.SimpleToMCFG.Coercions as Coerce + +convertGrammarNondet, convertGrammarStrict :: SimpleGrammar -> MGrammar +convertGrammarNondet = Coerce.addCoercions . Nondet.convertGrammar +convertGrammarStrict = Strict.convertGrammar + diff --git a/src/GF/Conversion/SimpleToMCFG/Coercions.hs b/src/GF/Conversion/SimpleToMCFG/Coercions.hs new file mode 100644 index 000000000..c1dc5b07c --- /dev/null +++ b/src/GF/Conversion/SimpleToMCFG/Coercions.hs @@ -0,0 +1,62 @@ +---------------------------------------------------------------------- +-- | +-- Maintainer : PL +-- Stability : (stable) +-- Portability : (portable) +-- +-- > CVS $Date: 2005/04/11 13:52:49 $ +-- > CVS $Author: peb $ +-- > CVS $Revision: 1.1 $ +-- +-- Adding coercion functions to a MCFG if necessary. +----------------------------------------------------------------------------- + + +module GF.Conversion.SimpleToMCFG.Coercions + (addCoercions) where + +import GF.System.Tracing +import GF.Infra.Print + +import GF.Formalism.Utilities +import GF.Formalism.GCFG +import GF.Formalism.MCFG +import GF.Conversion.Types +import GF.Data.SortedList +import List (groupBy) + +---------------------------------------------------------------------- + +addCoercions :: MGrammar -> MGrammar +addCoercions rules = coercions ++ rules + where (allHeads, allArgs) = unzip [ ((head, lbls), nubsort args) | + Rule (Abs head args _) (Cnc lbls _ _) <- rules ] + allHeadSet = nubsort allHeads + allArgSet = union allArgs <\\> map fst allHeadSet + coercions = tracePrt "#MCFG coercions" (prt . length) $ + concat $ + tracePrt "#MCFG coercions per category" (prtList . map length) $ + combineCoercions + (groupBy sameCatFst allHeadSet) + (groupBy sameCat allArgSet) + sameCatFst a b = sameCat (fst a) (fst b) + + +combineCoercions [] _ = [] +combineCoercions _ [] = [] +combineCoercions allHeads'@(heads:allHeads) allArgs'@(args:allArgs) + = case compare (mcat2cat $ fst $ head heads) (mcat2cat $ head args) of + LT -> combineCoercions allHeads allArgs' + GT -> combineCoercions allHeads' allArgs + EQ -> makeCoercion heads args : combineCoercions allHeads allArgs + + +makeCoercion heads args + = [ Rule (Abs arg [head] coercionName) (Cnc lbls [lbls] lins) | + (head@(MCat _ headCns), lbls) <- heads, + let lins = [ Lin lbl [Cat (head, lbl, 0)] | lbl <- lbls ], + arg@(MCat _ argCns) <- args, + argCns `subset` headCns ] + + + diff --git a/src/GF/Conversion/SimpleToMCFG/Nondet.hs b/src/GF/Conversion/SimpleToMCFG/Nondet.hs new file mode 100644 index 000000000..b98b368ff --- /dev/null +++ b/src/GF/Conversion/SimpleToMCFG/Nondet.hs @@ -0,0 +1,203 @@ +---------------------------------------------------------------------- +-- | +-- Maintainer : PL +-- Stability : (stable) +-- Portability : (portable) +-- +-- > CVS $Date: 2005/04/11 13:52:49 $ +-- > CVS $Author: peb $ +-- > CVS $Revision: 1.1 $ +-- +-- Converting SimpleGFC grammars to MCFG grammars, nondeterministically. +-- Afterwards, the grammar has to be extended with coercion functions, +-- from the module 'GF.Conversion.SimpleToMCFG.Coercions' +-- +-- the resulting grammars might be /very large/ +-- +-- the conversion is only equivalent if the GFC grammar has a context-free backbone. +----------------------------------------------------------------------------- + + +module GF.Conversion.SimpleToMCFG.Nondet + (convertGrammar) where + +import GF.System.Tracing +import GF.Infra.Print + +import Monad + +import GF.Formalism.Utilities +import GF.Formalism.GCFG +import GF.Formalism.MCFG +import GF.Formalism.SimpleGFC +import GF.Conversion.Types + +import GF.Data.BacktrackM + + +------------------------------------------------------------ +-- type declarations + +type CnvMonad a = BacktrackM Env a + +type Env = (MCat, [MCat], LinRec, [LinType]) +type LinRec = [Lin Cat MLabel Token] + + +---------------------------------------------------------------------- +-- main conversion function + +convertGrammar :: SimpleGrammar -> MGrammar +convertGrammar rules = tracePrt "Nondet conversion: #MCFG rules" (prt . length) $ + solutions conversion undefined + where conversion = member rules >>= convertRule + +convertRule :: SimpleRule -> CnvMonad MRule +convertRule (Rule (Abs decl decls fun) (Cnc ctype ctypes (Just term))) + = do let cat : args = map decl2cat (decl : decls) + writeState (initialMCat cat, map initialMCat args, [], ctypes) + rterm <- simplifyTerm term + reduceTerm ctype emptyPath rterm + (newCat, newArgs, linRec, _) <- readState + let newLinRec = map (instantiateArgs newArgs) linRec + catPaths : argsPaths = map (lintype2paths emptyPath) (ctype : ctypes) + return $ Rule (Abs newCat newArgs fun) (Cnc catPaths argsPaths newLinRec) +convertRule _ = failure + + +---------------------------------------------------------------------- +-- term simplification + +simplifyTerm :: Term -> CnvMonad Term +simplifyTerm (term :! sel) + = do sterm <- simplifyTerm term + ssel <- simplifyTerm sel + case sterm of + Tbl table -> do (pat, val) <- member table + pat =?= ssel + return val + _ -> do sel' <- expandTerm ssel + return (sterm +! sel') +simplifyTerm (con :^ terms) = liftM (con :^) $ mapM simplifyTerm terms +simplifyTerm (Rec record) = liftM Rec $ mapM simplifyAssign record +simplifyTerm (term :. lbl) = liftM (+. lbl) $ simplifyTerm term +simplifyTerm (Tbl table) = liftM Tbl $ mapM simplifyCase table +simplifyTerm (Variants terms) = liftM Variants $ mapM simplifyTerm terms +simplifyTerm (term1 :++ term2) = liftM2 (:++) (simplifyTerm term1) (simplifyTerm term2) +simplifyTerm term = return term +-- error constructors: +-- (I CIdent) - from resource +-- (LI Ident) - pattern variable +-- (EInt Integer) - integer + +simplifyAssign :: (Label, Term) -> CnvMonad (Label, Term) +simplifyAssign (lbl, term) = liftM ((,) lbl) $ simplifyTerm term + +simplifyCase :: (Term, Term) -> CnvMonad (Term, Term) +simplifyCase (pat, term) = liftM2 (,) (simplifyTerm pat) (simplifyTerm term) + + +------------------------------------------------------------ +-- reducing simplified terms, collecting MCF rules + +reduceTerm :: LinType -> Path -> Term -> CnvMonad () +reduceTerm ctype path (Variants terms) + = member terms >>= reduceTerm ctype path +reduceTerm (StrT) path term = updateLin (path, term) +reduceTerm (ConT _ _) path term = do pat <- expandTerm term + updateHead (path, pat) +reduceTerm (RecT rtype) path term + = sequence_ [ reduceTerm ctype (path ++. lbl) (term +. lbl) | + (lbl, ctype) <- rtype ] +reduceTerm (TblT ptype vtype) path table + = sequence_ [ reduceTerm vtype (path ++! pat) (table +! pat) | + pat <- enumeratePatterns ptype ] + + +------------------------------------------------------------ +-- expanding a term to ground terms + +expandTerm :: Term -> CnvMonad Term +expandTerm arg@(Arg nr _ path) + = do ctypes <- readArgCTypes + pat <- member $ enumeratePatterns $ lintypeFollowPath path $ ctypes !! nr + pat =?= arg + return pat +expandTerm (con :^ terms) = liftM (con :^) $ mapM expandTerm terms +expandTerm (Rec record) = liftM Rec $ mapM expandAssign record +expandTerm (Variants terms) = member terms >>= expandTerm +expandTerm term = error $ "expandTerm: " ++ prt term + +expandAssign :: (Label, Term) -> CnvMonad (Label, Term) +expandAssign (lbl, term) = liftM ((,) lbl) $ expandTerm term + + +------------------------------------------------------------ +-- unification of patterns and selection terms + +(=?=) :: Term -> Term -> CnvMonad () +Wildcard =?= _ = return () +Rec precord =?= arg@(Arg _ _ _) = sequence_ [ pat =?= (arg +. lbl) | + (lbl, pat) <- precord ] +pat =?= Arg nr _ path = updateArg nr (path, pat) +(con :^ pats) =?= (con' :^ terms) = do guard (con==con' && length pats==length terms) + sequence_ $ zipWith (=?=) pats terms +Rec precord =?= Rec record = sequence_ [ maybe mzero (pat =?=) mterm | + (lbl, pat) <- precord, + let mterm = lookup lbl record ] +pat =?= term = error $ "(=?=): " ++ prt pat ++ " =?= " ++ prt term + + +------------------------------------------------------------ +-- updating the MCF rule + +readArgCTypes :: CnvMonad [LinType] +readArgCTypes = do (_, _, _, env) <- readState + return env + +updateArg :: Int -> Constraint -> CnvMonad () +updateArg arg cn + = do (head, args, lins, env) <- readState + args' <- updateNth (addToMCat cn) arg args + writeState (head, args', lins, env) + +updateHead :: Constraint -> CnvMonad () +updateHead cn + = do (head, args, lins, env) <- readState + head' <- addToMCat cn head + writeState (head', args, lins, env) + +updateLin :: Constraint -> CnvMonad () +updateLin (path, term) + = do let newLins = term2lins term + (head, args, lins, env) <- readState + let lins' = lins ++ map (Lin path) newLins + writeState (head, args, lins', env) + +term2lins :: Term -> [[Symbol (Cat, Path, Int) Token]] +term2lins (Arg nr cat path) = return [Cat (cat, path, nr)] +term2lins (Token str) = return [Tok str] +term2lins (t1 :++ t2) = liftM2 (++) (term2lins t1) (term2lins t2) +term2lins (Empty) = return [] +term2lins (Variants terms) = terms >>= term2lins +term2lins term = error $ "term2lins: " ++ show term + +addToMCat :: Constraint -> MCat -> CnvMonad MCat +addToMCat cn (MCat cat cns) = liftM (MCat cat) $ addConstraint cn cns + +addConstraint :: Constraint -> [Constraint] -> CnvMonad [Constraint] +addConstraint cn0 (cn : cns) + | fst cn0 > fst cn = liftM (cn:) (addConstraint cn0 cns) + | fst cn0 == fst cn = guard (snd cn0 == snd cn) >> + return (cn : cns) +addConstraint cn0 cns = return (cn0 : cns) + + +---------------------------------------------------------------------- +-- utilities + +updateNth :: Monad m => (a -> m a) -> Int -> [a] -> m [a] +updateNth update 0 (a : as) = liftM (:as) (update a) +updateNth update n (a : as) = liftM (a:) (updateNth update (n-1) as) + + diff --git a/src/GF/Conversion/SimpleToMCFG/Strict.hs b/src/GF/Conversion/SimpleToMCFG/Strict.hs new file mode 100644 index 000000000..17c2293ec --- /dev/null +++ b/src/GF/Conversion/SimpleToMCFG/Strict.hs @@ -0,0 +1,128 @@ +---------------------------------------------------------------------- +-- | +-- Maintainer : PL +-- Stability : (stable) +-- Portability : (portable) +-- +-- > CVS $Date: 2005/04/11 13:52:49 $ +-- > CVS $Author: peb $ +-- > CVS $Revision: 1.1 $ +-- +-- Converting SimpleGFC grammars to MCFG grammars, deterministic. +-- +-- the resulting grammars might be /very large/ +-- +-- the conversion is only equivalent if the GFC grammar has a context-free backbone. +----------------------------------------------------------------------------- + + +module GF.Conversion.SimpleToMCFG.Strict where -- (convertGrammar) where + +import GF.System.Tracing +import GF.Infra.Print + +import Monad + +import GF.Formalism.Utilities +import GF.Formalism.GCFG +import GF.Formalism.MCFG +import GF.Formalism.SimpleGFC +import GF.Conversion.Types + +import GF.Data.BacktrackM +import GF.Data.SortedList + +---------------------------------------------------------------------- +-- main conversion function + +type CnvMonad a = BacktrackM () a + +convertGrammar :: SimpleGrammar -> MGrammar +convertGrammar rules = tracePrt "Strict conversion: #MCFG rules" (prt . length) $ + solutions conversion undefined + where conversion = member rules >>= convertRule + +convertRule :: SimpleRule -> CnvMonad MRule +convertRule (Rule (Abs decl decls fun) (Cnc ctype ctypes (Just term))) + = do let cat : args = map decl2cat (decl : decls) + args_ctypes = zip3 [0..] args ctypes + instArgs <- mapM enumerateArg args_ctypes + let instTerm = substitutePaths instArgs term + newCat <- extractMCat cat ctype instTerm + newArgs <- mapM (extractArg instArgs) args_ctypes + let linRec = strPaths ctype instTerm >>= extractLin newArgs + let newLinRec = map (instantiateArgs newArgs) linRec + catPaths : argsPaths = map (lintype2paths emptyPath) (ctype : ctypes) + return $ Rule (Abs newCat newArgs fun) (Cnc catPaths argsPaths newLinRec) +convertRule _ = failure + +---------------------------------------------------------------------- +-- category extraction + +extractArg :: [Term] -> (Int, Cat, LinType) -> CnvMonad MCat +extractArg args (nr, cat, ctype) = extractMCat cat ctype (args !! nr) + +extractMCat :: Cat -> LinType -> Term -> CnvMonad MCat +extractMCat cat ctype term = member $ map (MCat cat) $ parPaths ctype term + +enumerateArg :: (Int, Cat, LinType) -> CnvMonad Term +enumerateArg (nr, cat, ctype) = member $ enumerateTerms (Just (Arg nr cat emptyPath)) ctype + +---------------------------------------------------------------------- +-- Substitute each instantiated parameter path for its instantiation + +substitutePaths :: [Term] -> Term -> Term +substitutePaths arguments = subst + where subst (Arg nr _ path) = termFollowPath path (arguments !! nr) + subst (con :^ terms) = con :^ map subst terms + subst (Rec record) = Rec [ (lbl, subst term) | (lbl, term) <- record ] + subst (term :. lbl) = subst term +. lbl + subst (Tbl table) = Tbl [ (pat, subst term) | + (pat, term) <- table ] + subst (term :! select) = subst term +! subst select + subst (term :++ term') = subst term ?++ subst term' + subst (Variants terms) = Variants $ map subst terms + subst term = term + +---------------------------------------------------------------------- +-- term paths extaction + +termPaths :: LinType -> Term -> [(Path, (LinType, Term))] +termPaths ctype (Variants terms) = terms >>= termPaths ctype +termPaths (RecT rtype) (Rec record) + = [ (path ++. lbl, value) | + (lbl, term) <- record, + let Just ctype = lookup lbl rtype, + (path, value) <- termPaths ctype term ] +termPaths (TblT _ ctype) (Tbl table) + = [ (path ++! pat, value) | + (pat, term) <- table, + (path, value) <- termPaths ctype term ] +termPaths ctype term | isBaseType ctype = [ (emptyPath, (ctype, term)) ] + +{- ^^^ variants are pushed inside (not equivalent -- but see record-variants.txt): +{a=a1; b=b1} | {a=a2; b=b2} ==> {a=a1|a2; b=b1|b2} +[p=>p1;q=>q1] | [p=>p2;q=>q2] ==> [p=>p1|p2;q=>q1|q2] +-} + +parPaths :: LinType -> Term -> [[(Path, Term)]] +parPaths ctype term = mapM (uncurry (map . (,))) $ groupPairs $ + nubsort [ (path, value) | + (path, (ConT _ _, value)) <- termPaths ctype term ] + +strPaths :: LinType -> Term -> [(Path, Term)] +strPaths ctype term = [ (path, variants values) | (path, values) <- groupPairs paths ] + where paths = nubsort [ (path, value) | (path, (StrT, value)) <- termPaths ctype term ] + +---------------------------------------------------------------------- +-- linearization extraction + +extractLin :: [MCat] -> (Path, Term) -> [Lin MCat MLabel Token] +extractLin args (path, term) = map (Lin path) (convertLin term) + where convertLin (t1 :++ t2) = liftM2 (++) (convertLin t1) (convertLin t2) + convertLin (Empty) = [[]] + convertLin (Token tok) = [[Tok tok]] + convertLin (Variants terms) = concatMap convertLin terms + convertLin (Arg nr _ path) = [[Cat (args !! nr, path, nr)]] + convertLin t = error $ "convertLin: " ++ prt t ++ " " ++ prt (args, path) + diff --git a/src/GF/Conversion/Types.hs b/src/GF/Conversion/Types.hs new file mode 100644 index 000000000..d6b43bd58 --- /dev/null +++ b/src/GF/Conversion/Types.hs @@ -0,0 +1,79 @@ +---------------------------------------------------------------------- +-- | +-- Maintainer : PL +-- Stability : (stable) +-- Portability : (portable) +-- +-- > CVS $Date: 2005/04/11 13:52:49 $ +-- > CVS $Author: peb $ +-- > CVS $Revision: 1.1 $ +-- +-- All possible instantiations of different grammar formats used in conversion from GFC +----------------------------------------------------------------------------- + + +module GF.Conversion.Types where + +import qualified Ident +import qualified Grammar (Term) +import qualified Macros + +import GF.Formalism.GCFG +import GF.Formalism.SimpleGFC +import GF.Formalism.MCFG +import GF.Formalism.CFG +import GF.Infra.Print + +---------------------------------------------------------------------- +-- * MCFG + +type MGrammar = MCFGrammar MCat Name MLabel Token +type MRule = MCFRule MCat Name MLabel Token +data MCat = MCat Cat [Constraint] deriving (Eq, Ord, Show) +type MLabel = Path + +type Constraint = (Path, Term) + +initialMCat :: Cat -> MCat +initialMCat cat = MCat cat [] + +mcat2cat :: MCat -> Cat +mcat2cat (MCat cat _) = cat + +sameCat :: MCat -> MCat -> Bool +sameCat mc1 mc2 = mcat2cat mc1 == mcat2cat mc2 + +coercionName :: Name +coercionName = Ident.wildIdent + +isCoercion :: Name -> Bool +isCoercion = Ident.isWildIdent + +---------------------------------------------------------------------- +-- * CFG + +type CGrammar = CFGrammar CCat CName Token +type CRule = CFRule CCat CName Token + +data CCat = CCat MCat MLabel + deriving (Eq, Ord, Show) +data CName = CName Name Profile + deriving (Eq, Ord, Show) +type Profile = [[Int]] + +---------------------------------------------------------------------- +-- * pretty-printing + +instance Print MCat where + prt (MCat cat constrs) = prt cat ++ "{" ++ + concat [ prt path ++ "=" ++ prt term ++ ";" | + (path, term) <- constrs ] ++ "}" + +instance Print CCat where + prt (CCat cat label) = prt cat ++ prt label + +instance Print CName where + prt (CName fun args) = prt fun ++ prt args + + + diff --git a/src/GF/Data/BacktrackM.hs b/src/GF/Data/BacktrackM.hs index 555f5fec1..ba03884fd 100644 --- a/src/GF/Data/BacktrackM.hs +++ b/src/GF/Data/BacktrackM.hs @@ -5,9 +5,9 @@ -- Stability : (stable) -- Portability : (portable) -- --- > CVS $Date: 2005/03/29 11:17:54 $ +-- > CVS $Date: 2005/04/11 13:52:49 $ -- > CVS $Author: peb $ --- > CVS $Revision: 1.2 $ +-- > CVS $Revision: 1.3 $ -- -- Backtracking state monad, with r\/o environment ----------------------------------------------------------------------------- @@ -19,7 +19,6 @@ module GF.Data.BacktrackM ( -- * the backtracking state monad failure, (|||), -- * handling the state & environment - readEnv, readState, writeState, -- * monad specific utilities @@ -37,53 +36,51 @@ import Monad -- * controlling the monad -failure :: BacktrackM e s a -(|||) :: BacktrackM e s a -> BacktrackM e s a -> BacktrackM e s a +failure :: BacktrackM s a +(|||) :: BacktrackM s a -> BacktrackM s a -> BacktrackM s a -instance MonadPlus (BacktrackM e s) where +instance MonadPlus (BacktrackM s) where mzero = failure mplus = (|||) -- * handling the state & environment -readEnv :: BacktrackM e s e -readState :: BacktrackM e s s -writeState :: s -> BacktrackM e s () +readState :: BacktrackM s s +writeState :: s -> BacktrackM s () --- * monad specific utilities +-- * specific functions on the backtracking monad -member :: [a] -> BacktrackM e s a +member :: [a] -> BacktrackM s a member = msum . map return -- * running the monad -runBM :: BacktrackM e s a -> e -> s -> [(s, a)] +runBM :: BacktrackM s a -> s -> [(s, a)] -solutions :: BacktrackM e s a -> e -> s -> [a] -solutions bm e s = map snd $ runBM bm e s +solutions :: BacktrackM s a -> s -> [a] +solutions bm = map snd . runBM bm -finalStates :: BacktrackM e s () -> e -> s -> [s] -finalStates bm e s = map fst $ runBM bm e s +finalStates :: BacktrackM s () -> s -> [s] +finalStates bm = map fst . runBM bm {- ---------------------------------------------------------------------- -- implementation as lists of successes -newtype BacktrackM e s a = BM (e -> s -> [(s, a)]) +newtype BacktrackM s a = BM (s -> [(s, a)]) runBM (BM m) = m -readEnv = BM (\e s -> [(s, e)]) -readState = BM (\e s -> [(s, s)]) -writeState s = BM (\e _ -> [(s, ())]) +readState = BM (\s -> [(s, s)]) +writeState s = BM (\_ -> [(s, ())]) -failure = BM (\e s -> []) -BM m ||| BM n = BM (\e s -> m e s ++ n e s) +failure = BM (\s -> []) +BM m ||| BM n = BM (\s -> m s ++ n s) -instance Monad (BacktrackM e s) where - return a = BM (\e s -> [(s, a)]) - BM m >>= k = BM (\e s -> concat [ n e s' | (s', a) <- m e s, let BM n = k a ]) +instance Monad (BacktrackM s) where + return a = BM (\s -> [(s, a)]) + BM m >>= k = BM (\s -> concat [ n s' | (s', a) <- m s, let BM n = k a ]) fail _ = failure -} @@ -105,19 +102,17 @@ runB (B m) = m (:) [] -- BacktrackM = state monad transformer over the backtracking monad -newtype BacktrackM e s a = BM (e -> s -> Backtr (s, a)) +newtype BacktrackM s a = BM (s -> Backtr (s, a)) -runBM (BM m) e s = runB (m e s) +runBM (BM m) s = runB (m s) -readEnv = BM (\e s -> return (s, e)) -readState = BM (\e s -> return (s, s)) -writeState s = BM (\e _ -> return (s, ())) +readState = BM (\s -> return (s, s)) +writeState s = BM (\_ -> return (s, ())) -failure = BM (\e s -> failureB) -BM m ||| BM n = BM (\e s -> m e s |||| n e s) +failure = BM (\s -> failureB) +BM m ||| BM n = BM (\s -> m s |||| n s) -instance Monad (BacktrackM e s) where - return a = BM (\e s -> return (s, a)) - BM m >>= k = BM (\e s -> do (s', a) <- m e s - unBM (k a) e s') +instance Monad (BacktrackM s) where + return a = BM (\s -> return (s, a)) + BM m >>= k = BM (\s -> do (s', a) <- m s ; unBM (k a) s') where unBM (BM m) = m diff --git a/src/GF/Data/GeneralDeduction.hs b/src/GF/Data/GeneralDeduction.hs new file mode 100644 index 000000000..75511ee7a --- /dev/null +++ b/src/GF/Data/GeneralDeduction.hs @@ -0,0 +1,117 @@ +---------------------------------------------------------------------- +-- | +-- Maintainer : Peter Ljunglöf +-- Stability : (stable) +-- Portability : (portable) +-- +-- > CVS $Date: 2005/04/11 13:52:51 $ +-- > CVS $Author: peb $ +-- > CVS $Revision: 1.1 $ +-- +-- Simple implementation of deductive chart parsing +----------------------------------------------------------------------------- + +module GF.NewParsing.GeneralChart + (-- * Type definition + ParseChart, + -- * Main functions + chartLookup, + buildChart, buildChartM, + -- * Probably not needed + emptyChart, + chartMember, + chartInsert, chartInsertM, + chartList, + addToChart, addToChartM + ) where + +-- import Trace + +import GF.Data.RedBlackSet +import Monad (foldM) + +---------------------------------------------------------------------- +-- main functions + +chartLookup :: (Ord item, Ord key) => ParseChart item key -> key -> [item] +chartList :: (Ord item, Ord key) => ParseChart item key -> [item] +buildChart :: (Ord item, Ord key) => + (item -> key) -- ^ key lookup function + -> [ParseChart item key -> item -> [item]] -- ^ list of inference rules as functions + -- from triggering items to lists of items + -> [item] -- ^ initial chart + -> ParseChart item key -- ^ final chart +buildChartM :: (Ord item, Ord key) => + (item -> [key]) -- ^ many-valued key lookup function + -> [ParseChart item key -> item -> [item]] -- ^ list of inference rules as functions + -- from triggering items to lists of items + -> [item] -- ^ initial chart + -> ParseChart item key -- ^ final chart + +buildChart keyof rules axioms = addItems axioms emptyChart + where addItems [] = id + addItems (item:items) = addItems items . addItem item + -- addItem item | trace ("+ "++show item++"\n") False = undefined + addItem item = addToChart item (keyof item) + (\chart -> foldr (consequence item) chart rules) + consequence item rule chart = addItems (rule chart item) chart + +buildChartM keysof rules axioms = addItems axioms emptyChart + where addItems [] = id + addItems (item:items) = addItems items . addItem item + -- addItem item | trace ("+ "++show item++"\n") False = undefined + addItem item = addToChartM item (keysof item) + (\chart -> foldr (consequence item) chart rules) + consequence item rule chart = addItems (rule chart item) chart + +-- probably not needed + +emptyChart :: (Ord item, Ord key) => ParseChart item key +chartMember :: (Ord item, Ord key) => ParseChart item key + -> item -> key -> Bool +chartInsert :: (Ord item, Ord key) => ParseChart item key + -> item -> key -> Maybe (ParseChart item key) +chartInsertM :: (Ord item, Ord key) => ParseChart item key + -> item -> [key] -> Maybe (ParseChart item key) + +addToChart :: (Ord item, Ord key) => item -> key + -> (ParseChart item key -> ParseChart item key) + -> ParseChart item key -> ParseChart item key +addToChart item keys after chart = maybe chart after (chartInsert chart item keys) + +addToChartM :: (Ord item, Ord key) => item -> [key] + -> (ParseChart item key -> ParseChart item key) + -> ParseChart item key -> ParseChart item key +addToChartM item keys after chart = maybe chart after (chartInsertM chart item keys) + + +-------------------------------------------------------------------------------- +-- key charts as red/black trees + +newtype ParseChart item key = KC (RedBlackMap key item) + deriving Show + +emptyChart = KC rbmEmpty +chartMember (KC tree) item key = rbmElem key item tree +chartLookup (KC tree) key = rbmLookup key tree +chartList (KC tree) = concatMap snd (rbmList tree) +chartInsert (KC tree) item key = fmap KC (rbmInsert key item tree) + +chartInsertM (KC tree) item keys = fmap KC (foldM insertItem tree keys) + where insertItem tree key = rbmInsert key item tree + +--------------------------------------------------------------------------------} + + +{-------------------------------------------------------------------------------- +-- key charts as unsorted association lists -- OBSOLETE! + +newtype Chart item key = SC [(key, item)] + +emptyChart = SC [] +chartMember (SC chart) item key = (key,item) `elem` chart +chartInsert (SC chart) item key = if (key,item) `elem` chart then Nothing else Just (SC ((key,item):chart)) +chartLookup (SC chart) key = [ item | (key',item) <- chart, key == key' ] +chartList (SC chart) = map snd chart +--------------------------------------------------------------------------------} + diff --git a/src/GF/Data/IncrementalDeduction.hs b/src/GF/Data/IncrementalDeduction.hs new file mode 100644 index 000000000..072a1334f --- /dev/null +++ b/src/GF/Data/IncrementalDeduction.hs @@ -0,0 +1,64 @@ +---------------------------------------------------------------------- +-- | +-- Maintainer : PL +-- Stability : (stable) +-- Portability : (portable) +-- +-- > CVS $Date: 2005/04/11 13:52:51 $ +-- > CVS $Author: peb $ +-- > CVS $Revision: 1.1 $ +-- +-- Implementation of /incremental/ deductive parsing, +-- i.e. parsing one word at the time. +----------------------------------------------------------------------------- + +module GF.NewParsing.IncrementalChart + (-- * Type definitions + IncrementalChart, + -- * Functions + chartLookup, + buildChart, + chartList + ) where + +import Array +import GF.Data.SortedList +import GF.Data.Assoc + +---------------------------------------------------------------------- +-- main functions + +chartLookup :: (Ord item, Ord key) => + IncrementalChart item key + -> Int -> key -> SList item + +buildChart :: (Ord item, Ord key) => + (item -> key) -- ^ key lookup function + -> (Int -> item -> SList item) -- ^ all inference rules for position k, collected + -> (Int -> SList item) -- ^ all axioms for position k, collected + -> (Int, Int) -- ^ input bounds + -> IncrementalChart item key + +chartList :: (Ord item, Ord key) => + IncrementalChart item key -- ^ the final chart + -> (Int -> item -> edge) -- ^ function building an edge from + -- the position and the item + -> [edge] + +type IncrementalChart item key = Array Int (Assoc key (SList item)) + +---------- + +chartLookup chart k key = (chart ! k) ? key + +buildChart keyof rules axioms bounds = finalChartArray + where buildState k = limit (rules k) $ axioms k + finalChartList = map buildState [fst bounds .. snd bounds] + finalChartArray = listArray bounds $ map stateAssoc finalChartList + stateAssoc state = accumAssoc id [ (keyof item, item) | item <- state ] + +chartList chart combine = [ combine k item | + (k, state) <- assocs chart, + item <- concatMap snd $ aAssocs state ] + + diff --git a/src/GF/Data/SortedList.hs b/src/GF/Data/SortedList.hs index 0b340b533..8f96bdc59 100644 --- a/src/GF/Data/SortedList.hs +++ b/src/GF/Data/SortedList.hs @@ -1,13 +1,12 @@ ---------------------------------------------------------------------- -- | --- Module : SortedList -- Maintainer : Peter Ljunglöf -- Stability : stable -- Portability : portable -- --- > CVS $Date: 2005/03/21 14:17:39 $ +-- > CVS $Date: 2005/04/11 13:52:49 $ -- > CVS $Author: peb $ --- > CVS $Revision: 1.1 $ +-- > CVS $Revision: 1.2 $ -- -- Sets as sorted lists -- @@ -18,29 +17,37 @@ -- * /O(n^2)/ fixed point iteration ----------------------------------------------------------------------------- -module GF.Data.SortedList ( SList, - nubsort, union, - (<++>), (<\\>), (<**>), - limit, - hasCommonElements, subset, - groupPairs, groupUnion - ) where +module GF.Data.SortedList + ( -- * type declarations + SList, SMap, + -- * set operations + nubsort, union, + (<++>), (<\\>), (<**>), + limit, + hasCommonElements, subset, + -- * map operations + groupPairs, groupUnion, + unionMap, mergeMap + ) where import List (groupBy) +import GF.Data.Utilities (split, foldMerge) -- | The list must be sorted and contain no duplicates. type SList a = [a] --- | Group a set of key-value pairs into --- a set of unique keys with sets of values -groupPairs :: Ord a => SList (a, b) -> SList (a, SList b) +-- | A sorted map also has unique keys, +-- i.e. 'map fst m :: SList a', if 'm :: SMap a b' +type SMap a b = SList (a, b) + +-- | Group a set of key-value pairs into a sorted map +groupPairs :: Ord a => SList (a, b) -> SMap a (SList b) groupPairs = map mapFst . groupBy eqFst where mapFst as = (fst (head as), map snd as) eqFst a b = fst a == fst b --- | Group a set of key-(sets-of-values) pairs into --- a set of unique keys with sets of values -groupUnion :: (Ord a, Ord b) => SList (a, SList b) -> SList (a, SList b) +-- | Group a set of key-(sets-of-values) pairs into a sorted map +groupUnion :: (Ord a, Ord b) => SList (a, SList b) -> SMap a (SList b) groupUnion = map unionSnd . groupPairs where unionSnd (a, bs) = (a, union bs) @@ -57,13 +64,25 @@ xs `subset` ys = null (xs <\\> ys) nubsort :: Ord a => [a] -> SList a nubsort = union . map return +-- | the union of a list of sorted maps +unionMap :: Ord a => (b -> b -> b) + -> [SMap a b] -> SMap a b +unionMap plus = foldMerge (mergeMap plus) [] + +-- | merging two sorted maps +mergeMap :: Ord a => (b -> b -> b) + -> SMap a b -> SMap a b -> SMap a b +mergeMap plus [] abs = abs +mergeMap plus abs [] = abs +mergeMap plus abs@(ab@(a,bs):abs') cds@(cd@(c,ds):cds') + = case compare a c of + EQ -> (a, plus bs ds) : mergeMap plus abs' cds' + LT -> ab : mergeMap plus abs' cds + GT -> cd : mergeMap plus abs cds' + -- | The union of a list of sets union :: Ord a => [SList a] -> SList a -union [] = [] -union [as] = as -union abs = let (as, bs) = split abs in union as <++> union bs - where split (a:b:abs) = let (as, bs) = split abs in (a:as, b:bs) - split as = (as, []) +union = foldMerge (<++>) [] -- | The union of two sets (<++>) :: Ord a => SList a -> SList a -> SList a diff --git a/src/GF/Data/Utilities.hs b/src/GF/Data/Utilities.hs new file mode 100644 index 000000000..6f93add28 --- /dev/null +++ b/src/GF/Data/Utilities.hs @@ -0,0 +1,53 @@ +---------------------------------------------------------------------- +-- | +-- Maintainer : PL +-- Stability : (stable) +-- Portability : (portable) +-- +-- > CVS $Date: 2005/04/11 13:52:49 $ +-- > CVS $Author: peb $ +-- > CVS $Revision: 1.1 $ +-- +-- Basic functions not in the standard libraries +----------------------------------------------------------------------------- + + +module GF.Data.Utilities where + +-- * functions on lists + +sameLength :: [a] -> [a] -> Bool +sameLength [] [] = True +sameLength (_:xs) (_:ys) = sameLength xs ys +sameLength _ _ = False + +lookupList :: Eq a => a -> [(a, b)] -> [b] +lookupList a [] = [] +lookupList a (p:ps) | a == fst p = snd p : lookupList a ps + | otherwise = lookupList a ps + +split :: [a] -> ([a], [a]) +split (x : y : as) = (x:xs, y:ys) + where (xs, ys) = split as +split as = (as, []) + +splitBy :: (a -> Bool) -> [a] -> ([a], [a]) +splitBy p [] = ([], []) +splitBy p (a : as) = if p a then (a:xs, ys) else (xs, a:ys) + where (xs, ys) = splitBy p as + +foldMerge :: (a -> a -> a) -> a -> [a] -> a +foldMerge merge zero = fm + where fm [] = zero + fm [a] = a + fm abs = let (as, bs) = split abs in fm as `merge` fm bs + +-- * functions on pairs + +mapFst :: (a -> a') -> (a, b) -> (a', b) +mapFst f (a, b) = (f a, b) + +mapSnd :: (b -> b') -> (a, b) -> (a, b') +mapSnd f (a, b) = (a, f b) + + diff --git a/src/GF/Formalism/CFG.hs b/src/GF/Formalism/CFG.hs new file mode 100644 index 000000000..2eb090131 --- /dev/null +++ b/src/GF/Formalism/CFG.hs @@ -0,0 +1,50 @@ +---------------------------------------------------------------------- +-- | +-- Maintainer : PL +-- Stability : (stable) +-- Portability : (portable) +-- +-- > CVS $Date: 2005/04/11 13:52:49 $ +-- > CVS $Author: peb $ +-- > CVS $Revision: 1.1 $ +-- +-- CFG formalism +----------------------------------------------------------------------------- + +module GF.Formalism.CFG where + +import GF.Formalism.Utilities +import GF.Infra.Print +import GF.Data.Assoc (accumAssoc) +import GF.Data.SortedList (groupPairs) +import GF.Data.Utilities (mapSnd) + +------------------------------------------------------------ +-- type definitions + +type CFGrammar c n t = [CFRule c n t] +data CFRule c n t = CFRule c [Symbol c t] n + deriving (Eq, Ord, Show) + +type CFChart c n t = CFGrammar (Edge c) n t + + +------------------------------------------------------------ +-- building syntax charts from grammars + +grammar2chart :: (Ord n, Ord e) => CFGrammar e n t -> SyntaxChart n e +grammar2chart cfchart = accumAssoc groupPairs $ + [ (lhs, (name, filterCats rhs)) | + CFRule lhs rhs name <- cfchart ] + + +---------------------------------------------------------------------- +-- pretty-printing + +instance (Print n, Print c, Print t) => Print (CFRule c n t) where + prt (CFRule cat rhs name) = prt name ++ " : " ++ prt cat ++ + ( if null rhs then "" + else " --> " ++ prtSep " " rhs ) + prtList = prtSep "\n" + + diff --git a/src/GF/Formalism/GCFG.hs b/src/GF/Formalism/GCFG.hs new file mode 100644 index 000000000..407b85bc5 --- /dev/null +++ b/src/GF/Formalism/GCFG.hs @@ -0,0 +1,45 @@ +---------------------------------------------------------------------- +-- | +-- Maintainer : PL +-- Stability : (stable) +-- Portability : (portable) +-- +-- > CVS $Date: 2005/04/11 13:52:50 $ +-- > CVS $Author: peb $ +-- > CVS $Revision: 1.1 $ +-- +-- Basic GCFG formalism (derived from Pollard 1984) +----------------------------------------------------------------------------- + +module GF.Formalism.GCFG + ( Grammar, Rule(..), Abstract(..), Concrete(..) + ) where + +import GF.Infra.Print + +---------------------------------------------------------------------- + +type Grammar c n l t = [Rule c n l t] +data Rule c n l t = Rule (Abstract c n) (Concrete l t) + deriving (Eq, Ord, Show) + +data Abstract cat name = Abs cat [cat] name + deriving (Eq, Ord, Show) +data Concrete lin term = Cnc lin [lin] term + deriving (Eq, Ord, Show) + +---------------------------------------------------------------------- + +instance (Print c, Print n, Print l, Print t) => Print (Rule n c l t) where + prt (Rule abs cnc) = prt abs ++ " := " ++ prt cnc + prtList = prtSep "\n" + +instance (Print c, Print n) => Print (Abstract c n) where + prt (Abs cat args name) = prt name ++ ". " ++ prt cat ++ + ( if null args then "" + else " -> " ++ prtSep " " args ) + +instance (Print l, Print t) => Print (Concrete l t) where + prt (Cnc lcat args term) = prt term ++ " : " ++ prt lcat ++ + ( if null args then "" + else " / " ++ prtSep " " args) diff --git a/src/GF/Formalism/MCFG.hs b/src/GF/Formalism/MCFG.hs new file mode 100644 index 000000000..b4abdc76a --- /dev/null +++ b/src/GF/Formalism/MCFG.hs @@ -0,0 +1,47 @@ +---------------------------------------------------------------------- +-- | +-- Maintainer : PL +-- Stability : (stable) +-- Portability : (portable) +-- +-- > CVS $Date: 2005/04/11 13:52:50 $ +-- > CVS $Author: peb $ +-- > CVS $Revision: 1.1 $ +-- +-- Definitions of multiple context-free grammars +----------------------------------------------------------------------------- + +module GF.Formalism.MCFG where + +import GF.Formalism.Utilities +import GF.Formalism.GCFG + +import GF.Infra.Print + +------------------------------------------------------------ +-- grammar types + +-- | the lables in the linearization record should be in the same +-- order as specified by the linearization type @[lbl]@ +type MCFGrammar cat name lbl tok = Grammar cat name [lbl] [Lin cat lbl tok] +type MCFRule cat name lbl tok = Rule cat name [lbl] [Lin cat lbl tok] + +-- | variants are encoded as several linearizations with the same label +data Lin cat lbl tok = Lin lbl [Symbol (cat, lbl, Int) tok] + deriving (Eq, Ord, Show) + +instantiateArgs :: [cat] -> Lin cat' lbl tok -> Lin cat lbl tok +instantiateArgs args (Lin lbl lin) = Lin lbl (map instSym lin) + where instSym = mapSymbol instCat id + instCat (_, lbl, nr) = (args !! nr, lbl, nr) + +------------------------------------------------------------ +-- pretty-printing + +instance (Print c, Print l, Print t) => Print (Lin c l t) where + prt (Lin lbl lin) = prt lbl ++ " = " ++ prtSep " " (map (symbol prArg (show.prt)) lin) + where prArg (cat, lbl, nr) = prt cat ++ "@" ++ prt nr ++ prt lbl + prtList = prtBefore "\n\t" + + + diff --git a/src/GF/Formalism/SimpleGFC.hs b/src/GF/Formalism/SimpleGFC.hs new file mode 100644 index 000000000..78837a975 --- /dev/null +++ b/src/GF/Formalism/SimpleGFC.hs @@ -0,0 +1,217 @@ +---------------------------------------------------------------------- +-- | +-- Maintainer : PL +-- Stability : (stable) +-- Portability : (portable) +-- +-- > CVS $Date: 2005/04/11 13:52:50 $ +-- > CVS $Author: peb $ +-- > CVS $Revision: 1.1 $ +-- +-- Simplistic GFC format +----------------------------------------------------------------------------- + +module GF.Formalism.SimpleGFC where + +import Monad (liftM) +import qualified AbsGFC +import qualified Ident +import GF.Formalism.GCFG +import GF.Infra.Print + +---------------------------------------------------------------------- + +-- * basic (leaf) types + +type Name = Ident.Ident +type Cat = Ident.Ident +type Constr = AbsGFC.CIdent +type Var = Ident.Ident +type Token = String +type Label = AbsGFC.Label + +-- ** type coercions etc + +constr2name :: Constr -> Name +constr2name (AbsGFC.CIQ _ name) = name + +anyVar :: Var +anyVar = Ident.wildIdent + +---------------------------------------------------------------------- + +-- * simple GFC + +type SimpleGrammar = Grammar Decl Name LinType (Maybe Term) +type SimpleRule = Rule Decl Name LinType (Maybe Term) + +-- ** dependent type declarations + +data Decl = Var ::: Type + deriving (Eq, Ord, Show) +data Type = Cat :@ [Atom] + deriving (Eq, Ord, Show) +data Atom = ACon Constr + | AVar Var + deriving (Eq, Ord, Show) + +decl2cat :: Decl -> Cat +decl2cat (_ ::: (cat :@ _)) = cat + +-- ** linearization types and terms + +data LinType = RecT [(Label, LinType)] + | TblT LinType LinType + | ConT Constr [Term] + | StrT + deriving (Eq, Ord, Show) + +isBaseType :: LinType -> Bool +isBaseType (ConT _ _) = True +isBaseType (StrT) = True +isBaseType _ = False + +data Term = Arg Int Cat Path -- ^ argument variable, the 'Path' is a path + -- pointing into the term + | Constr :^ [Term] -- ^ constructor + | Rec [(Label, Term)] -- ^ record + | Term :. Label -- ^ record projection + | Tbl [(Term, Term)] -- ^ table of patterns\/terms + | Term :! Term -- ^ table selection + | Variants [Term] -- ^ variants + | Term :++ Term -- ^ concatenation + | Token Token -- ^ single token + | Empty -- ^ empty string + | Wildcard -- ^ wildcard pattern variable + | Var Var -- ^ bound pattern variable + + -- Res CIdent -- resource identifier + -- Int Integer -- integer + deriving (Eq, Ord, Show) + +-- ** calculations on terms + +(+.) :: Term -> Label -> Term +Variants terms +. lbl = variants $ map (+. lbl) terms +Rec record +. lbl = maybe err id $ lookup lbl record + where err = error $ "(+.), label not in record: " ++ show (Rec record) ++ " +. " ++ show lbl +Arg arg cat path +. lbl = Arg arg cat (path ++. lbl) +term +. lbl = term :. lbl + +(+!) :: Term -> Term -> Term +Variants terms +! pat = variants $ map (+! pat) terms +term +! Variants pats = variants $ map (term +!) pats +term +! arg@(Arg _ _ _) = term :! arg +Tbl table +! pat = maybe err id $ lookup pat table + where err = error $ "(+!), pattern not in table: " ++ show (Tbl table) ++ " +! " ++ show pat +Arg arg cat path +! pat = Arg arg cat (path ++! pat) +term +! pat = term :! pat + +(?++) :: Term -> Term -> Term +Variants terms ?++ term = variants $ map (?++ term) terms +term ?++ Variants terms = variants $ map (term ?++) terms +Empty ?++ term = term +term ?++ Empty = term +term1 ?++ term2 = term1 :++ term2 + +variants :: [Term] -> Term +variants terms0 = case concatMap flatten terms0 of + [term] -> term + terms -> Variants terms + where flatten (Variants ts) = ts + flatten t = [t] + +-- ** enumerations + +enumerateTerms :: Maybe Term -> LinType -> [Term] +enumerateTerms arg (StrT) = maybe err return arg + where err = error "enumeratePatterns: parameter type should not be string" +enumerateTerms arg (ConT _ terms) = terms +enumerateTerms arg (RecT rtype) + = liftM Rec $ mapM enumAssign rtype + where enumAssign (lbl, ctype) = liftM ((,) lbl) $ enumerateTerms arg ctype +enumerateTerms arg (TblT ptype ctype) + = liftM Tbl $ mapM enumCase $ enumeratePatterns ptype + where enumCase pat = liftM ((,) pat) $ enumerateTerms (fmap (+! pat) arg) ctype + +enumeratePatterns :: LinType -> [Term] +enumeratePatterns = enumerateTerms Nothing + +---------------------------------------------------------------------- + +-- * paths of record projections and table selections + +newtype Path = Path [Either Label Term] deriving (Eq, Ord, Show) + +emptyPath :: Path +emptyPath = Path [] + +-- ** calculations on paths + +(++.) :: Path -> Label -> Path +Path path ++. lbl = Path (Left lbl : path) + +(++!) :: Path -> Term -> Path +Path path ++! sel = Path (Right sel : path) + +lintypeFollowPath :: Path -> LinType -> LinType +lintypeFollowPath (Path path) = follow path + where follow [] ctype = ctype + follow (Right pat : path) (TblT _ ctype) = follow path ctype + follow (Left lbl : path) (RecT rec) + = maybe err (follow path) $ lookup lbl rec + where err = error $ "follow: " ++ prt rec ++ " . " ++ prt lbl + +termFollowPath :: Path -> Term -> Term +termFollowPath (Path path) = follow (reverse path) + where follow [] term = term + follow (Right pat : path) term = follow path (term +! pat) + follow (Left lbl : path) term = follow path (term +. lbl) + +lintype2paths :: Path -> LinType -> [Path] +lintype2paths path (ConT _ _) = [] +lintype2paths path (StrT) = [ path ] +lintype2paths path (RecT rec) = concat [ lintype2paths (path ++. lbl) ctype | + (lbl, ctype) <- rec ] +lintype2paths path (TblT pt vt) = concat [ lintype2paths (path ++! pat) vt | + pat <- enumeratePatterns pt ] + +---------------------------------------------------------------------- + +instance Print Decl where + prt (var ::: typ) + | var == anyVar = prt typ + | otherwise = prt var ++ ":" ++ prt typ + +instance Print Type where + prt (cat :@ ats) = prt cat ++ prtList ats + +instance Print Atom where + prt (ACon con) = prt con + prt (AVar var) = "?" ++ prt var + +instance Print LinType where + prt (RecT rec) = "{" ++ concat [ prt l ++ ":" ++ prt t ++ "; " | (l,t) <- rec ] ++ "}" + prt (TblT t1 t2) = "(" ++ prt t1 ++ " => " ++ prt t2 ++ ")" + prt (ConT t ts) = prt t ++ "[" ++ prtSep "|" ts ++ "]" + prt (StrT) = "Str" + +instance Print Term where + prt (Arg n c p) = prt c ++ "@" ++ prt n ++ "(" ++ prt p ++ ")" + prt (c :^ []) = prt c + prt (c :^ ts) = prt c ++ prtList ts + prt (Rec rec) = "{" ++ concat [ prt l ++ "=" ++ prt t ++ "; " | (l,t) <- rec ] ++ "}" + prt (Tbl tbl) = "[" ++ concat [ prt p ++ "=>" ++ prt t ++ "; " | (p,t) <- tbl ] ++ "]" + prt (Variants ts) = "{| " ++ prtSep " | " ts ++ " |}" + prt (t1 :++ t2) = prt t1 ++ "++" ++ prt t2 + prt (Token t) = prt t + prt (Empty) = "[]" + prt (Wildcard) = "_" + prt (term :. lbl) = prt term ++ "." ++ prt lbl + prt (term :! sel) = prt term ++ "!" ++ prt sel + prt (Var var) = "?" ++ prt var + +instance Print Path where + prt (Path path) = concatMap prtEither (reverse path) + where prtEither (Left lbl) = "." ++ prt lbl + prtEither (Right patt) = "!" ++ prt patt diff --git a/src/GF/Formalism/Symbol.hs b/src/GF/Formalism/Symbol.hs new file mode 100644 index 000000000..184dd1023 --- /dev/null +++ b/src/GF/Formalism/Symbol.hs @@ -0,0 +1,46 @@ +---------------------------------------------------------------------- +-- | +-- Maintainer : PL +-- Stability : (stable) +-- Portability : (portable) +-- +-- > CVS $Date: 2005/04/11 13:52:50 $ +-- > CVS $Author: peb $ +-- > CVS $Revision: 1.1 $ +-- +-- Basic type declarations and functions to be used in grammar formalisms +----------------------------------------------------------------------------- + + +module GF.Formalism.Symbol where + +import GF.Infra.Print + +------------------------------------------------------------ +-- symbols + +data Symbol c t = Cat c | Tok t + deriving (Eq, Ord, Show) + +symbol :: (c -> a) -> (t -> a) -> Symbol c t -> a +symbol fc ft (Cat cat) = fc cat +symbol fc ft (Tok tok) = ft tok + +mapSymbol :: (c -> d) -> (t -> u) -> Symbol c t -> Symbol d u +mapSymbol fc ft = symbol (Cat . fc) (Tok . ft) + +------------------------------------------------------------ +-- pretty-printing + +instance (Print c, Print t) => Print (Symbol c t) where + prt = symbol prt (simpleShow . prt) + where simpleShow str = "\"" ++ concatMap mkEsc str ++ "\"" + mkEsc '\\' = "\\\\" + mkEsc '\"' = "\\\"" + mkEsc '\n' = "\\n" + mkEsc '\t' = "\\t" + mkEsc chr = [chr] + prtList = prtSep " " + + + diff --git a/src/GF/Formalism/Utilities.hs b/src/GF/Formalism/Utilities.hs new file mode 100644 index 000000000..166534bc4 --- /dev/null +++ b/src/GF/Formalism/Utilities.hs @@ -0,0 +1,271 @@ +---------------------------------------------------------------------- +-- | +-- Maintainer : PL +-- Stability : (stable) +-- Portability : (portable) +-- +-- > CVS $Date: 2005/04/11 13:52:50 $ +-- > CVS $Author: peb $ +-- > CVS $Revision: 1.1 $ +-- +-- Basic type declarations and functions for grammar formalisms +----------------------------------------------------------------------------- + + +module GF.Formalism.Utilities where + +import Monad +import Array +import List (groupBy) + +import GF.Data.SortedList +import GF.Data.Assoc +import GF.Data.Utilities (sameLength, foldMerge, splitBy) + +import GF.Infra.Print + +------------------------------------------------------------ +-- * symbols + +data Symbol c t = Cat c | Tok t + deriving (Eq, Ord, Show) + +symbol :: (c -> a) -> (t -> a) -> Symbol c t -> a +symbol fc ft (Cat cat) = fc cat +symbol fc ft (Tok tok) = ft tok + +mapSymbol :: (c -> d) -> (t -> u) -> Symbol c t -> Symbol d u +mapSymbol fc ft = symbol (Cat . fc) (Tok . ft) + +filterCats :: [Symbol c t] -> [c] +filterCats syms = [ cat | Cat cat <- syms ] + +filterToks :: [Symbol c t] -> [t] +filterToks syms = [ tok | Tok tok <- syms ] + + +------------------------------------------------------------ +-- * edges + +data Edge s = Edge Int Int s + deriving (Eq, Ord, Show) + +instance Functor Edge where + fmap f (Edge i j s) = Edge i j (f s) + + +------------------------------------------------------------ +-- * representaions of input tokens + +data Input t = MkInput { inputEdges :: [Edge t], + inputBounds :: (Int, Int), + inputFrom :: Array Int (Assoc t [Int]), + inputTo :: Array Int (Assoc t [Int]), + inputToken :: Assoc t [(Int, Int)] + } + +makeInput :: Ord t => [Edge t] -> Input t +input :: Ord t => [t] -> Input t +inputMany :: Ord t => [[t]] -> Input t + +instance Show t => Show (Input t) where + show input = "makeInput " ++ show (inputEdges input) + +---------- + +makeInput inEdges | null inEdges = input [] + | otherwise = MkInput inEdges inBounds inFrom inTo inToken + where inBounds = foldr1 minmax [ (i, j) | Edge i j _ <- inEdges ] + where minmax (a, b) (a', b') = (min a a', max b b') + inFrom = fmap (accumAssoc id) $ accumArray (<++>) [] inBounds $ + [ (i, [(tok, j)]) | Edge i j tok <- inEdges ] + inTo = fmap (accumAssoc id) $ accumArray (<++>) [] inBounds + [ (j, [(tok, i)]) | Edge i j tok <- inEdges ] + inToken = accumAssoc id [ (tok, (i, j)) | Edge i j tok <- inEdges ] + +input toks = MkInput inEdges inBounds inFrom inTo inToken + where inEdges = zipWith3 Edge [0..] [1..] toks + inBounds = (0, length toks) + inFrom = listArray inBounds $ + [ listAssoc [(tok, [j])] | (tok, j) <- zip toks [1..] ] ++ [ listAssoc [] ] + inTo = listArray inBounds $ + [ listAssoc [] ] ++ [ listAssoc [(tok, [i])] | (tok, i) <- zip toks [0..] ] + inToken = accumAssoc id [ (tok, (i, j)) | Edge i j tok <- inEdges ] + +inputMany toks = MkInput inEdges inBounds inFrom inTo inToken + where inEdges = [ Edge i j t | (i, j, ts) <- zip3 [0..] [1..] toks, t <- ts ] + inBounds = (0, length toks) + inFrom = listArray inBounds $ + [ listAssoc [ (t, [j]) | t <- nubsort ts ] | (ts, j) <- zip toks [1..] ] + ++ [ listAssoc [] ] + inTo = listArray inBounds $ + [ listAssoc [] ] ++ + [ listAssoc [ (t, [i]) | t <- nubsort ts ] | (ts, i) <- zip toks [0..] ] + inToken = accumAssoc id [ (tok, (i, j)) | Edge i j tok <- inEdges ] + + +------------------------------------------------------------ +-- * charts, forests & trees + +-- | The values of the chart, a list of key-daughters pairs, +-- has unique keys. In essence, it is a map from 'n' to daughters. +-- The daughters should be a set (not necessarily sorted) of rhs's. +type SyntaxChart n e = Assoc e [(n, [[e]])] + +-- better(?) representation of forests: +-- data Forest n = F (SMap n (SList [Forest n])) Bool +-- == +-- type Forest n = GeneralTrie n (SList [Forest n]) Bool +-- (the Bool == isMeta) + +data SyntaxForest n = FMeta + | FNode n [[SyntaxForest n]] + -- ^ The outer list should be a set (not necessarily sorted) + -- of possible alternatives. Ie. the outer list + -- is a disjunctive node, and the inner lists + -- are (conjunctive) concatenative nodes + deriving (Eq, Ord, Show) + +data SyntaxTree n = TMeta | TNode n [SyntaxTree n] + deriving (Eq, Ord, Show) + +forestName :: SyntaxForest n -> Maybe n +forestName (FNode n _) = Just n +forestName (FMeta) = Nothing + +treeName :: SyntaxTree n -> Maybe n +treeName (TNode n _) = Just n +treeName (TMeta) = Nothing + +instance Functor SyntaxTree where + fmap f (TNode n trees) = TNode (f n) $ map (fmap f) trees + fmap f (TMeta) = TMeta + +instance Functor SyntaxForest where + fmap f (FNode n forests) = FNode (f n) $ map (map (fmap f)) forests + fmap f (FMeta) = FMeta + +{- måste tänka mer på detta: +compactForests :: Ord n => [SyntaxForest n] -> SList (SyntaxForest n) +compactForests = map joinForests . groupBy eqNames . sortForests + where eqNames f g = forestName f == forestName g + sortForests = foldMerge mergeForests [] . map return + mergeForests [] gs = gs + mergeForests fs [] = fs + mergeForests fs@(f:fs') gs@(g:gs') + = case forestName f `compare` forestName g of + LT -> f : mergeForests fs' gs + GT -> g : mergeForests fs gs' + EQ -> f : g : mergeForests fs' gs' + joinForests fs = case forestName (head fs) of + Nothing -> FMeta + Just name -> FNode name $ + compactDaughters $ + concat [ fss | FNode _ fss <- fs ] + compactDaughters fss = case head fss of + []  -> [[]] + [_] -> map return $ compactForests $ concat fss + _ -> nubsort fss +-} + +-- ** conversions between representations + +forest2trees :: SyntaxForest n -> SList (SyntaxTree n) +forest2trees (FNode n forests) = map (TNode n) $ forests >>= mapM forest2trees +forest2trees (FMeta) = [TMeta] + +chart2forests :: (Ord n, Ord e) => + SyntaxChart n e -- ^ The complete chart + -> (e -> Bool) -- ^ When is an edge 'FMeta'? + -> [e] -- ^ The starting edges + -> SList (SyntaxForest n) -- ^ The result has unique keys, ie. all 'n' are joined together. + -- In essence, the result is a map from 'n' to forest daughters + +-- simplest implementation +chart2forests chart isMeta = concatMap edge2forests + where edge2forests edge = if isMeta edge then [FMeta] + else map item2forest $ chart ? edge + item2forest (name, children) = FNode name $ children >>= mapM edge2forests + +{- +-- more intelligent(?) implementation, +-- requiring that charts and forests are sorted maps and sorted sets +chart2forests chart isMeta = es2fs + where e2fs e = if isMeta e then [FMeta] else map i2f $ chart ? e + es2fs es = if null metas then fs else FMeta : fs + where (metas, nonMetas) = splitBy isMeta es + fs = map i2f $ unionMap (<++>) $ map (chart ?) nonMetas + i2f (name, children) = FNode name $ + case head children of + [] -> [[]] + [_] -> map return $ es2fs $ concat children + _ -> children >>= mapM e2fs +-} + + +-- ** operations on forests + +unifyManyForests :: (Monad m, Eq n) => [SyntaxForest n] -> m (SyntaxForest n) +unifyManyForests = foldM unifyForests FMeta + +-- | two forests can be unified, if either is 'FMeta', or both have the same parent, +-- and all children can be unified +unifyForests :: (Monad m, Eq n) => SyntaxForest n -> SyntaxForest n -> m (SyntaxForest n) +unifyForests FMeta forest = return forest +unifyForests forest FMeta = return forest +unifyForests (FNode name1 children1) (FNode name2 children2) + | name1 == name2 && not (null children) = return $ FNode name1 children + | otherwise = fail "forest unification failure" + where children = [ forests | forests1 <- children1, forests2 <- children2, + sameLength forests1 forests2, + forests <- zipWithM unifyForests forests1 forests2 ] + + +-- ** operations on trees + +unifyManyTrees :: (Monad m, Eq n) => [SyntaxTree n] -> m (SyntaxTree n) +unifyManyTrees = foldM unifyTrees TMeta + +-- | two trees can be unified, if either is 'TMeta', +-- or both have the same parent, and their children can be unified +unifyTrees :: (Monad m, Eq n) => SyntaxTree n -> SyntaxTree n -> m (SyntaxTree n) +unifyTrees TMeta tree = return tree +unifyTrees tree TMeta = return tree +unifyTrees (TNode name1 children1) (TNode name2 children2) + | name1 == name2 && sameLength children1 children2 + = liftM (TNode name1) $ zipWithM unifyTrees children1 children2 + | otherwise = fail "tree unification failure" + + + +------------------------------------------------------------ +-- pretty-printing + +instance (Print c, Print t) => Print (Symbol c t) where + prt = symbol prt (simpleShow . prt) + where simpleShow str = "\"" ++ concatMap mkEsc str ++ "\"" + mkEsc '\\' = "\\\\" + mkEsc '\"' = "\\\"" + mkEsc '\n' = "\\n" + mkEsc '\t' = "\\t" + mkEsc chr = [chr] + prtList = prtSep " " + +instance Print t => Print (Input t) where + prt input = "input " ++ prt (inputEdges input) + +instance (Print s) => Print (Edge s) where + prt (Edge i j s) = "[" ++ show i ++ "-" ++ show j ++ ": " ++ prt s ++ "]" + prtList = prtSep "" + +instance (Print s) => Print (SyntaxTree s) where + prt (TNode s trees) = prt s ++ "^{" ++ prtSep " " trees ++ "}" + prt (TMeta) = "?" + prtList = prtAfter "\n" + +instance (Print s) => Print (SyntaxForest s) where + prt (FNode s forests) = prt s ++ "^{" ++ prtSep " | " (map (prtSep " ") forests) ++ "}" + prt (FMeta) = "?" + prtList = prtAfter "\n" + + diff --git a/src/GF/Infra/Option.hs b/src/GF/Infra/Option.hs index 401e02cab..41ed3c447 100644 --- a/src/GF/Infra/Option.hs +++ b/src/GF/Infra/Option.hs @@ -5,9 +5,9 @@ -- Stability : (stable) -- Portability : (portable) -- --- > CVS $Date: 2005/03/18 10:17:10 $ +-- > CVS $Date: 2005/04/11 13:53:38 $ -- > CVS $Author: peb $ --- > CVS $Revision: 1.23 $ +-- > CVS $Revision: 1.24 $ -- -- Options and flags used in GF shell commands and files. -- @@ -151,7 +151,7 @@ dontParse = iOpt "read" showAbstr, showXML, showOld, showLatex, showFullForm, showEBNF, showCF, showWords, showOpts, isCompiled, isHaskell, noCompOpers, retainOpers, - newParser, noCF, checkCirc, noCheckCirc, lexerByNeed, useUTF8id :: Option + newParser, newerParser, noCF, checkCirc, noCheckCirc, lexerByNeed, useUTF8id :: Option defaultGrOpts :: [Option] showAbstr = iOpt "abs" @@ -170,6 +170,7 @@ noCompOpers = iOpt "nocomp" retainOpers = iOpt "retain" defaultGrOpts = [] newParser = iOpt "new" +newerParser = iOpt "newer" noCF = iOpt "nocf" checkCirc = iOpt "nocirc" noCheckCirc = iOpt "nocheckcirc" diff --git a/src/GF/Infra/Print.hs b/src/GF/Infra/Print.hs new file mode 100644 index 000000000..8feeae3a0 --- /dev/null +++ b/src/GF/Infra/Print.hs @@ -0,0 +1,176 @@ +---------------------------------------------------------------------- +-- | +-- Maintainer : PL +-- Stability : (stable) +-- Portability : (portable) +-- +-- > CVS $Date: 2005/04/11 13:52:50 $ +-- > CVS $Author: peb $ +-- > CVS $Revision: 1.1 $ +-- +-- Pretty-printing +----------------------------------------------------------------------------- + +module GF.Infra.Print + (Print(..), + prtBefore, prtAfter, prtSep, + prtBeforeAfter, + prIO + ) where + +-- haskell modules: +import List (intersperse) +import Char (toUpper) +-- gf modules: +import Operations (Err(..)) +import Ident (Ident(..)) +import AbsGFC +import CF +import CFIdent +import qualified PrintGFC as P + +------------------------------------------------------------ + +prtBefore :: Print a => String -> [a] -> String +prtBefore before = prtBeforeAfter before "" + +prtAfter :: Print a => String -> [a] -> String +prtAfter after = prtBeforeAfter "" after + +prtSep :: Print a => String -> [a] -> String +prtSep sep = concat . intersperse sep . map prt + +prtBeforeAfter :: Print a => String -> String -> [a] -> String +prtBeforeAfter before after as = concat [ before ++ prt a ++ after | a <- as ] + +prIO :: Print a => a -> IO () +prIO = putStr . prt + +class Print a where + prt :: a -> String + prtList :: [a] -> String + prtList as = "[" ++ prtSep "," as ++ "]" + +instance Print a => Print [a] where + prt = prtList + +instance (Print a, Print b) => Print (a, b) where + prt (a, b) = "(" ++ prt a ++ "," ++ prt b ++ ")" + +instance (Print a, Print b, Print c) => Print (a, b, c) where + prt (a, b, c) = "(" ++ prt a ++ "," ++ prt b ++ "," ++ prt c ++ ")" + +instance (Print a, Print b, Print c, Print d) => Print (a, b, c, d) where + prt (a, b, c, d) = "(" ++ prt a ++ "," ++ prt b ++ "," ++ prt c ++ "," ++ prt d ++ ")" + +instance Print Char where + prt = return + prtList = id + +instance Print Int where + prt = show + +instance Print Integer where + prt = show + +instance Print a => Print (Maybe a) where + prt (Just a) = prt a + prt Nothing = "Nothing" + +instance Print a => Print (Err a) where + prt (Ok a) = prt a + prt (Bad str) = str + +---------------------------------------------------------------------- + +instance Print Ident where + prt = P.printTree + +instance Print Term where + prt (Arg arg) = prt arg + prt (con `Con` []) = prt con + prt (con `Con` terms) = prt con ++ "(" ++ prtSep ", " terms ++ ")" + prt (LI ident) = "$" ++ prt ident + prt (R record) = "{" ++ prtSep "; " record ++ "}" + prt (term `P` lbl) = prt term ++ "." ++ prt lbl + prt (T _ table) = "table{" ++ prtSep "; " table ++ "}" + prt (V _ terms) = "values{" ++ prtSep "; " terms ++ "}" + prt (term `S` sel) = "(" ++ prt term ++ " ! " ++ prt sel ++ ")" + prt (FV terms) = "variants{" ++ prtSep " | " terms ++ "}" + prt (term `C` term') = prt term ++ " " ++ prt term' + prt (EInt n) = prt n + prt (K tokn) = show (prt tokn) + prt (E) = show "" + +instance Print Patt where + prt (con `PC` []) = prt con + prt (con `PC` pats) = prt con ++ "(" ++ prtSep "," pats ++ ")" + prt (PV ident) = "$" ++ prt ident + prt (PW) = "_" + prt (PR record) = "{" ++ prtSep ";" record ++ "}" + +instance Print Label where + prt (L ident) = prt ident + prt (LV nr) = "$" ++ show nr + +instance Print Tokn where + prt (KS str) = str + prt tokn@(KP _ _) = show tokn + +instance Print ArgVar where + prt (A cat argNr) = prt cat ++ "#" ++ show argNr + +instance Print CIdent where + prt (CIQ _ ident) = prt ident + +instance Print Case where + prt (pats `Cas` term) = prtSep "|" pats ++ "=>" ++ prt term + +instance Print Assign where + prt (lbl `Ass` term) = prt lbl ++ "=" ++ prt term + +instance Print PattAssign where + prt (lbl `PAss` pat) = prt lbl ++ "=" ++ prt pat + +instance Print Atom where + prt (AC c) = prt c + prt (AD c) = "<" ++ prt c ++ ">" + prt (AV i) = "$" ++ prt i + prt (AM n) = "?" ++ show n + prt atom = show atom + +instance Print CType where + prt (RecType rtype) = "{" ++ prtSep "; " rtype ++ "}" + prt (Table ptype vtype) = "(" ++ prt ptype ++ " => " ++ prt vtype ++ ")" + prt (Cn cn) = prt cn + prt (TStr) = "Str" + +instance Print Labelling where + prt (lbl `Lbg` ctype) = prt lbl ++ ":" ++ prt ctype + +instance Print CFItem where + prt (CFTerm regexp) = prt regexp + prt (CFNonterm cat) = prt cat + +instance Print RegExp where + prt (RegAlts words) = "("++prtSep "|" words ++ ")" + prt (RegSpec tok) = prt tok + +instance Print CFTok where + prt (TS str) = str + prt (TC (c:str)) = '(' : toUpper c : ')' : str + prt (TL str) = show str + prt (TI n) = "#" ++ show n + prt (TV x) = "$" ++ prt x + prt (TM n s) = "?" ++ show n ++ s + +instance Print CFCat where + prt (CFCat (cid,lbl)) = prt cid ++ "-" ++ prt lbl + +instance Print CFFun where + prt (CFFun fun) = prt (fst fun) + +instance Print Exp where + prt = P.printTree + + diff --git a/src/GF/OldParsing/CFGrammar.hs b/src/GF/OldParsing/CFGrammar.hs new file mode 100644 index 000000000..6c6269626 --- /dev/null +++ b/src/GF/OldParsing/CFGrammar.hs @@ -0,0 +1,153 @@ +---------------------------------------------------------------------- +-- | +-- Module : CFGrammar +-- Maintainer : Peter Ljunglöf +-- Stability : (stable) +-- Portability : (portable) +-- +-- > CVS $Date: 2005/04/11 13:52:52 $ +-- > CVS $Author: peb $ +-- > CVS $Revision: 1.1 $ +-- +-- Definitions of context-free grammars, +-- parser information and chart conversion +---------------------------------------------------------------------- + +module GF.OldParsing.CFGrammar + (-- * Type definitions + Grammar, + Rule(..), + CFParser, + -- * Parser information + pInfo, + PInfo(..), + -- * Building parse charts + edges2chart, + -- * Grammar checking + checkGrammar + ) where + +import GF.System.Tracing + +-- haskell modules: +import Array +-- gf modules: +import GF.Data.SortedList +import GF.Data.Assoc +import qualified CF +-- parser modules: +import GF.OldParsing.Utilities +import GF.Printing.PrintParser + + +------------------------------------------------------------ +-- type definitions + +type Grammar n c t = [Rule n c t] +data Rule n c t = Rule c [Symbol c t] n + deriving (Eq, Ord, Show) + + +type CFParser n c t = PInfo n c t -> [c] -> Input t -> [Edge (Rule n c t)] +-- - - - - - - - - - - - - - - - - - ^^^ possible starting categories + + +------------------------------------------------------------ +-- parser information + +pInfo :: (Ord n, Ord c, Ord t) => Grammar n c t -> PInfo n c t + +data PInfo n c t + = PInfo { grammarTokens :: SList t, + nameRules :: Assoc n (SList (Rule n c t)), + topdownRules :: Assoc c (SList (Rule n c t)), + bottomupRules :: Assoc (Symbol c t) (SList (Rule n c t)), + emptyLeftcornerRules :: Assoc c (SList (Rule n c t)), + emptyCategories :: Set c, + cyclicCategories :: SList c, + -- ^^ONLY FOR DIRECT CYCLIC RULES!!! + leftcornerTokens :: Assoc c (SList t) + -- ^^DOES NOT WORK WITH EMPTY RULES!!! + } + +-- this is not permanent... +pInfo grammar = pInfo' (filter (not.isCyclic) grammar) + +pInfo' grammar = tracePrt "#parserInfo" prt $ + PInfo grToks nmRules tdRules buRules elcRules emptyCats cyclicCats leftToks + where grToks = union [ nubsort [ tok | Tok tok <- rhs ] | Rule _ rhs _ <- grammar ] + nmRules = accumAssoc id [ (name, rule) | rule@(Rule _ _ name) <- grammar ] + tdRules = accumAssoc id [ (cat, rule) | rule@(Rule cat _ _) <- grammar ] + buRules = accumAssoc id [ (next, rule) | rule@(Rule _ (next:_) _) <- grammar ] + elcRules = accumAssoc id $ limit lc emptyRules + leftToks = accumAssoc id $ limit lc $ + nubsort [ (cat, token) | Rule cat (Tok token:_) _ <- grammar ] + lc (left, res) = nubsort [ (cat, res) | Rule cat _ _ <- buRules ? Cat left ] + emptyRules = nubsort [ (cat, rule) | rule@(Rule cat [] _) <- grammar ] + emptyCats = listSet $ limitEmpties $ map fst emptyRules + limitEmpties es = if es==es' then es else limitEmpties es' + where es' = nubsort [ cat | Rule cat rhs _ <- grammar, + all (symbol (`elem` es) (const False)) rhs ] + cyclicCats = nubsort [ cat | Rule cat [Cat cat'] _ <- grammar, cat == cat' ] + +isCyclic (Rule cat [Cat cat'] _) = cat==cat' +isCyclic _ = False + +------------------------------------------------------------ +-- building parse charts + +edges2chart :: (Ord n, Ord c, Ord t) => Input t -> + [Edge (Rule n c t)] -> ParseChart n (Edge c) + +---------- + +edges2chart input edges + = accumAssoc id [ (Edge i k cat, (name, children i k rhs)) | + Edge i k (Rule cat rhs name) <- edges ] + where children i k [] = [ [] | i == k ] + children i k (Tok tok:rhs) = [ rest | i <= k, + j <- (inputFrom input ! i) ? tok, + rest <- children j k rhs ] + children i k (Cat cat:rhs) = [ Edge i j cat : rest | i <= k, + j <- echart ? (i, cat), + rest <- children j k rhs ] + echart = accumAssoc id [ ((i, cat), j) | Edge i j (Rule cat _ _) <- edges ] + + +------------------------------------------------------------ +-- grammar checking + +checkGrammar :: (Ord n, Ord c, Ord t, Print n, Print c, Print t) => + Grammar n c t -> [String] + +---------- + +checkGrammar rules = [ "rhs category does not exist: " ++ prt cat ++ "\n" ++ + " in rule: " ++ prt rule | + rule@(Rule _ rhs _) <- rules, + Cat cat <- rhs, cat `notElem` cats ] + where cats = nubsort [ cat | Rule cat _ _ <- rules ] + + +------------------------------------------------------------ +-- pretty-printing + +instance (Print n, Print c, Print t) => Print (Rule n c t) where + prt (Rule cat rhs name) = prt name ++ ". " ++ prt cat ++ " -> " ++ prt rhs ++ + (if null rhs then ".\n" else "\n") + prtList = concatMap prt + + +instance (Ord n, Ord c, Ord t) => Print (PInfo n c t) where + prt pI = "[ tokens=" ++ show (length (grammarTokens pI)) ++ + "; names=" ++ sla nameRules ++ + "; tdCats=" ++ sla topdownRules ++ + "; buCats=" ++ sla bottomupRules ++ + "; elcCats=" ++ sla emptyLeftcornerRules ++ + "; eCats=" ++ sla emptyCategories ++ + "; cCats=" ++ show (length (cyclicCategories pI)) ++ + -- "; lctokCats=" ++ sla leftcornerTokens ++ + " ]" + where sla f = show $ length $ aElems $ f pI + + diff --git a/src/GF/OldParsing/ConvertFiniteGFC.hs b/src/GF/OldParsing/ConvertFiniteGFC.hs new file mode 100644 index 000000000..61486023e --- /dev/null +++ b/src/GF/OldParsing/ConvertFiniteGFC.hs @@ -0,0 +1,283 @@ +---------------------------------------------------------------------- +-- | +-- Maintainer : PL +-- Stability : (stable) +-- Portability : (portable) +-- +-- > CVS $Date: 2005/04/11 13:52:52 $ +-- > CVS $Author: peb $ +-- > CVS $Revision: 1.1 $ +-- +-- Calculating the finiteness of each type in a grammar +----------------------------------------------------------------------------- + +module GF.OldParsing.ConvertFiniteGFC where + +import Operations +import GFC +import MkGFC +import AbsGFC +import Ident (Ident(..)) +import GF.System.Tracing +import GF.Printing.PrintParser +import GF.Printing.PrintSimplifiedTerm +import GF.Data.SortedList +import GF.Data.Assoc +import GF.Data.BacktrackM + +type Cat = Ident +type Name = Ident + +type CnvMonad a = BacktrackM () a + +convertGrammar :: CanonGrammar -> CanonGrammar +convertGrammar = canon2grammar . convertCanon . grammar2canon + +convertCanon :: Canon -> Canon +convertCanon (Gr modules) = Gr (map (convertModule split) modules) + where split = calcSplitable modules + +convertModule :: Splitable -> Module -> Module +convertModule split (Mod mtyp ext op fl defs) + = Mod mtyp ext op fl newDefs + where newDefs = solutions defMonad () + defMonad = member defs >>= convertDef split + +---------------------------------------------------------------------- +-- the main conversion function +convertDef :: Splitable -> Def -> CnvMonad Def + +-- converting abstract "cat" definitions +convertDef split (AbsDCat cat decls cidents) + = case splitableCat split cat of + Just newCats -> do newCat <- member newCats + return $ AbsDCat newCat decls cidents + Nothing -> do (newCat, newDecls) <- expandDecls cat decls + return $ AbsDCat newCat newDecls cidents + where expandDecls cat [] = return (cat, []) + expandDecls cat (decl@(Decl var typ) : decls) + = do (newCat, newDecls) <- expandDecls cat decls + let argCat = resultCat typ + case splitableCat split argCat of + Nothing -> return (newCat, decl : newDecls) + Just newArgs -> do newArg <- member newArgs + return (mergeArg newCat newArg, newDecls) + +-- converting abstract "fun" definitions +convertDef split (AbsDFun fun typ@(EAtom (AC (CIQ mod cat))) def) + = case splitableFun split fun of + Just newCat -> return (AbsDFun fun (EAtom (AC (CIQ mod newCat))) def) + Nothing -> do newTyp <- expandType split [] typ + return (AbsDFun fun newTyp def) +convertDef split (AbsDFun fun typ def) + = do newTyp <- expandType split [] typ + return (AbsDFun fun newTyp def) + +-- converting concrete "lincat" definitions +convertDef split (CncDCat cat ctype x y) + = case splitableCat split cat of + Just newCats -> do newCat <- member newCats + return $ CncDCat newCat ctype x y + Nothing -> return $ CncDCat cat ctype x y + +-- converting concrete "lin" definitions +convertDef split (CncDFun fun (CIQ mod cat) args linterm x) + = case splitableFun split fun of + Just newCat -> return $ CncDFun fun (CIQ mod newCat) args linterm x + Nothing -> return $ CncDFun fun (CIQ mod cat) args linterm x + +convertDef _ def = return def + +---------------------------------------------------------------------- +-- expanding type expressions + +expandType :: Splitable -> [(Ident, Cat)] -> Exp -> CnvMonad Exp +expandType split env (EProd x a@(EAtom (AC (CIQ mod cat))) b) + = case splitableCat split cat of + Nothing -> do b' <- expandType split env b + return (EProd x a b') + Just newCats -> do newCat <- member newCats + b' <- expandType split ((x,newCat):env) b + return (EProd x (EAtom (AC (CIQ mod newCat))) b') +expandType split env (EProd x a b) + = do a' <- expandType split env a + b' <- expandType split env b + return (EProd x a' b') +expandType split env app + = expandApp split env [] app + +expandApp :: Splitable -> [(Ident, Cat)] -> [Cat] -> Exp -> CnvMonad Exp +expandApp split env addons (EAtom (AC (CIQ mod cat))) + = return (EAtom (AC (CIQ mod (foldl mergeArg cat addons)))) +expandApp split env addons (EApp exp arg@(EAtom (AC (CIQ mod fun)))) + = case splitableFun split fun of + Just newCat -> expandApp split env (newCat:addons) exp + Nothing -> do exp' <- expandApp split env addons exp + return (EApp exp' arg) +expandApp split env addons (EApp exp arg@(EAtom (AV x))) + = case lookup x env of + Just newCat -> expandApp split env (newCat:addons) exp + Nothing -> do exp' <- expandApp split env addons exp + return (EApp exp' arg) + +---------------------------------------------------------------------- +-- splitable categories (finite, no dependencies) +-- they should also be used as some dependency + +type Splitable = (Assoc Cat [Cat], Assoc Name Cat) + +splitableCat :: Splitable -> Cat -> Maybe [Cat] +splitableCat = lookupAssoc . fst + +splitableFun :: Splitable -> Name -> Maybe Cat +splitableFun = lookupAssoc . snd + +calcSplitable :: [Module] -> Splitable +calcSplitable modules = (listAssoc splitableCats, listAssoc splitableFuns) + where splitableCats = tracePrt "splitableCats" (prtSep " ") $ + groupPairs $ nubsort + [ (cat, mergeFun fun cat) | (cat, fun) <- constantCats ] + + splitableFuns = tracePrt "splitableFuns" (prtSep " ") $ + nubsort + [ (fun, mergeFun fun cat) | (cat, fun) <- constantCats ] + + constantCats = tracePrt "constantCats" (prtSep " ") $ + [ (cat, fun) | + AbsDFun fun (EAtom (AC (CIQ _ cat))) _ <- absDefs, + dependentConstants ?= cat ] + + dependentConstants = listSet $ + tracePrt "dep consts" prt $ + dependentCats <\\> funCats + + funCats = tracePrt "fun cats" prt $ + nubsort [ resultCat typ | + AbsDFun _ typ@(EProd _ _ _) _ <- absDefs ] + + dependentCats = tracePrt "dep cats" prt $ + nubsort [ cat | AbsDCat _ decls _ <- absDefs, + Decl _ (EAtom (AC (CIQ _ cat))) <- decls ] + + absDefs = concat [ defs | Mod (MTAbs _) _ _ _ defs <- modules ] + + +---------------------------------------------------------------------- +-- utilities + +-- the main result category of a type expression +resultCat :: Exp -> Cat +resultCat (EProd _ _ b) = resultCat b +resultCat (EApp a _) = resultCat a +resultCat (EAtom (AC (CIQ _ cat))) = cat + +-- mergeing categories +mergeCats :: String -> String -> String -> Cat -> Cat -> Cat +mergeCats before middle after (IC cat) (IC arg) + = IC (before ++ cat ++ middle ++ arg ++ after) + +mergeFun, mergeArg :: Cat -> Cat -> Cat +mergeFun = mergeCats "{" ":" "}" +mergeArg = mergeCats "" "" "" + +---------------------------------------------------------------------- +-- obsolete? + +{- +type FiniteCats = Assoc Cat Integer + +calculateFiniteness :: Canon -> FiniteCats +calculateFiniteness canon@(Gr modules) + = trace2 "#typeInfo" (prt tInfo) $ + finiteCats + + where finiteCats = listAssoc [ (cat, fin) | (cat, Just fin) <- finiteInfo ] + finiteInfo = map finInfo groups + + finInfo :: (Cat, [[Cat]]) -> (Cat, Maybe Integer) + finInfo (cat, ctxts) + | cyclicCats ?= cat = (cat, Nothing) + | otherwise = (cat, fmap (sum . map product) $ + sequence (map (sequence . map lookFinCat) ctxts)) + + lookFinCat :: Cat -> Maybe Integer + lookFinCat cat = maybe (error "lookFinCat: Nothing") id $ + lookup cat finiteInfo + + cyclicCats :: Set Cat + cyclicCats = listSet $ + tracePrt "cyclic cats" prt $ + union $ map nubsort $ cyclesIn dependencies + + dependencies :: [(Cat, [Cat])] + dependencies = tracePrt "dependencies" (prtAfter "\n") $ + mapSnd (union . nubsort) groups + + groups :: [(Cat, [[Cat]])] + groups = tracePrt "groups" (prtAfter "\n") $ + mapSnd (map snd) $ groupPairs (nubsort allFuns) + + allFuns = tracePrt "all funs" (prtAfter "\n") $ + [ (cat, (fun, ctxt)) | + Mod (MTAbs _) _ _ _ defs <- modules, + AbsDFun fun typ _ <- defs, + let (cat, ctxt) = err error id $ typeForm typ ] + + tInfo = calculateTypeInfo 30 finiteCats (splitDefs canon) + +-- | stolen from 'Macros.qTypeForm', converted to GFC, and severely simplified +typeForm :: Monad m => Exp -> m (Cat, [Cat]) +typeForm t = case t of + EProd x a b -> do + (cat, ctxt) <- typeForm b + a' <- stripType a + return (cat, a':ctxt) + EApp c a -> do + (cat, _) <- typeForm c + return (cat, []) + EAtom (AC (CIQ _ con)) -> + return (con, []) + _ -> + fail $ "no normal form of type: " ++ prt t + +stripType :: Monad m => Exp -> m Cat +stripType (EApp c a) = stripType c +stripType (EAtom (AC (CIQ _ con))) = return con +stripType t = fail $ "can't strip type: " ++ prt t + +mapSnd f xs = [ (a, f b) | (a, b) <- xs ] +-} + +---------------------------------------------------------------------- +-- obsolete? + +{- +type SplitDefs = ([Def], [Def], [Def], [Def]) +----- AbsDCat AbsDFun CncDCat CncDFun + +splitDefs :: Canon -> SplitDefs +splitDefs (Gr modules) = foldr splitDef ([], [], [], []) $ + concat [ defs | Mod _ _ _ _ defs <- modules ] + +splitDef :: Def -> SplitDefs -> SplitDefs +splitDef ac@(AbsDCat _ _ _) (acs, afs, ccs, cfs) = (ac:acs, afs, ccs, cfs) +splitDef af@(AbsDFun _ _ _) (acs, afs, ccs, cfs) = (acs, af:afs, ccs, cfs) +splitDef cc@(CncDCat _ _ _ _) (acs, afs, ccs, cfs) = (acs, afs, cc:ccs, cfs) +splitDef cf@(CncDFun _ _ _ _ _) (acs, afs, ccs, cfs) = (acs, afs, ccs, cf:cfs) +splitDef _ sd = sd + +--calculateTypeInfo :: Integer -> FiniteCats -> SplitDefs -> ? +calculateTypeInfo maxFin allFinCats (acs, afs, ccs, cfs) + = (depCatsToExpand, catsToSplit) + where absDefsToExpand = tracePrt "absDefsToExpand" prt $ + [ ((cat, fin), cats) | + AbsDCat cat args _ <- acs, + not (null args), + cats <- mapM catOfDecl args, + fin <- lookupAssoc allFinCats cat, + fin <= maxFin + ] + (depCatsToExpand, argsCats') = unzip absDefsToExpand + catsToSplit = union (map nubsort argsCats') + catOfDecl (Decl _ exp) = err fail return $ stripType exp +-} diff --git a/src/GF/OldParsing/ConvertFiniteSimple.hs b/src/GF/OldParsing/ConvertFiniteSimple.hs new file mode 100644 index 000000000..7aac39cb2 --- /dev/null +++ b/src/GF/OldParsing/ConvertFiniteSimple.hs @@ -0,0 +1,121 @@ +---------------------------------------------------------------------- +-- | +-- Maintainer : PL +-- Stability : (stable) +-- Portability : (portable) +-- +-- > CVS $Date: 2005/04/11 13:52:52 $ +-- > CVS $Author: peb $ +-- > CVS $Revision: 1.1 $ +-- +-- Calculating the finiteness of each type in a grammar +----------------------------------------------------------------------------- + +module GF.OldParsing.ConvertFiniteSimple + (convertGrammar) where + +import GF.System.Tracing +import GF.Printing.PrintParser +import GF.Printing.PrintSimplifiedTerm + +import Operations +import Ident (Ident(..)) +import GF.OldParsing.SimpleGFC +import GF.Data.SortedList +import GF.Data.Assoc +import GF.Data.BacktrackM + +type CnvMonad a = BacktrackM () a + +convertGrammar :: Grammar -> Grammar +convertGrammar rules = solutions cnvMonad () + where split = calcSplitable rules + cnvMonad = member rules >>= convertRule split + +convertRule :: Splitable -> Rule -> CnvMonad Rule +convertRule split (Rule name typing term) + = do newTyping <- convertTyping split name typing + return $ Rule name newTyping term + +convertTyping :: Splitable -> Name -> Typing -> CnvMonad Typing +convertTyping split name (typ, decls) + = case splitableFun split name of + Just newCat -> return (newCat :@ [], decls) + Nothing -> expandTyping split [] typ decls [] + + +expandTyping :: Splitable -> [(Var, Cat)] -> Type -> [Decl] -> [Decl] -> CnvMonad Typing +expandTyping split env (cat :@ atoms) [] decls + = return (substAtoms split env cat atoms [], reverse decls) +expandTyping split env typ ((x ::: (xcat :@ xatoms)) : declsToDo) declsDone + = do env' <- calcNewEnv + expandTyping split env' typ declsToDo (decl : declsDone) + where decl = x ::: substAtoms split env xcat xatoms [] + calcNewEnv = case splitableCat split xcat of + Just newCats -> do newCat <- member newCats + return ((x,newCat) : env) + Nothing -> return env + +substAtoms :: Splitable -> [(Var, Cat)] -> Cat -> [Atom] -> [Atom] -> Type +substAtoms split env cat [] atoms = cat :@ reverse atoms +substAtoms split env cat (atom:atomsToDo) atomsDone + = case atomLookup split env atom of + Just newCat -> substAtoms split env (mergeArg cat newCat) atomsToDo atomsDone + Nothing -> substAtoms split env cat atomsToDo (atom : atomsDone) + +atomLookup split env (AVar x) = lookup x env +atomLookup split env (ACon con) = splitableFun split (constr2name con) + + +---------------------------------------------------------------------- +-- splitable categories (finite, no dependencies) +-- they should also be used as some dependency + +type Splitable = (Assoc Cat [Cat], Assoc Name Cat) + +splitableCat :: Splitable -> Cat -> Maybe [Cat] +splitableCat = lookupAssoc . fst + +splitableFun :: Splitable -> Name -> Maybe Cat +splitableFun = lookupAssoc . snd + +calcSplitable :: [Rule] -> Splitable +calcSplitable rules = (listAssoc splitableCats, listAssoc splitableFuns) + where splitableCats = tracePrt "splitableCats" (prtSep " ") $ + groupPairs $ nubsort + [ (cat, mergeFun fun cat) | (cat, fun) <- constantCats ] + + splitableFuns = tracePrt "splitableFuns" (prtSep " ") $ + nubsort + [ (fun, mergeFun fun cat) | (cat, fun) <- constantCats ] + + constantCats = tracePrt "constantCats" (prtSep " ") $ + [ (cat, fun) | + Rule fun (cat :@ [], []) _ <- rules, + dependentConstants ?= cat ] + + dependentConstants = listSet $ + tracePrt "dep consts" prt $ + dependentCats <\\> funCats + + funCats = tracePrt "fun cats" prt $ + nubsort [ cat | Rule _ (cat :@ _, decls) _ <- rules, + not (null decls) ] + + dependentCats = tracePrt "dep cats" prt $ + nubsort [ cat | Rule _ (cat :@ [], []) _ <- rules ] + + +---------------------------------------------------------------------- +-- utilities + +-- mergeing categories +mergeCats :: String -> String -> String -> Cat -> Cat -> Cat +mergeCats before middle after (IC cat) (IC arg) + = IC (before ++ cat ++ middle ++ arg ++ after) + +mergeFun, mergeArg :: Cat -> Cat -> Cat +mergeFun = mergeCats "{" ":" "}" +mergeArg = mergeCats "" "" "" + + diff --git a/src/GF/OldParsing/ConvertGFCtoMCFG.hs b/src/GF/OldParsing/ConvertGFCtoMCFG.hs new file mode 100644 index 000000000..1a9bc1a75 --- /dev/null +++ b/src/GF/OldParsing/ConvertGFCtoMCFG.hs @@ -0,0 +1,34 @@ +---------------------------------------------------------------------- +-- | +-- Module : ConvertGFCtoMCFG +-- Maintainer : PL +-- Stability : (stable) +-- Portability : (portable) +-- +-- > CVS $Date: 2005/04/11 13:52:52 $ +-- > CVS $Author: peb $ +-- > CVS $Revision: 1.1 $ +-- +-- All different conversions from GFC to MCFG +----------------------------------------------------------------------------- + + +module GF.OldParsing.ConvertGFCtoMCFG + (convertGrammar) where + +import GFC (CanonGrammar) +import GF.OldParsing.GrammarTypes +import Ident (Ident(..)) +import Option +import GF.System.Tracing + +import qualified GF.OldParsing.ConvertGFCtoMCFG.Old as Old +import qualified GF.OldParsing.ConvertGFCtoMCFG.Nondet as Nondet +import qualified GF.OldParsing.ConvertGFCtoMCFG.Strict as Strict +import qualified GF.OldParsing.ConvertGFCtoMCFG.Coercions as Coerce + +convertGrammar :: String -> (CanonGrammar, Ident) -> MCFGrammar +convertGrammar "nondet" = Coerce.addCoercions . Nondet.convertGrammar +convertGrammar "strict" = Strict.convertGrammar +convertGrammar "old" = Old.convertGrammar + diff --git a/src/GF/OldParsing/ConvertGFCtoMCFG/Coercions.hs b/src/GF/OldParsing/ConvertGFCtoMCFG/Coercions.hs new file mode 100644 index 000000000..650f8b646 --- /dev/null +++ b/src/GF/OldParsing/ConvertGFCtoMCFG/Coercions.hs @@ -0,0 +1,71 @@ +---------------------------------------------------------------------- +-- | +-- Module : ConvertGFCtoMCFG.Coercions +-- Maintainer : PL +-- Stability : (stable) +-- Portability : (portable) +-- +-- > CVS $Date: 2005/04/11 13:52:55 $ +-- > CVS $Author: peb $ +-- > CVS $Revision: 1.1 $ +-- +-- Adding coercion functions to a MCFG if necessary. +----------------------------------------------------------------------------- + + +module GF.OldParsing.ConvertGFCtoMCFG.Coercions (addCoercions) where + +import GF.System.Tracing +import GF.Printing.PrintParser +import GF.Printing.PrintSimplifiedTerm +-- import PrintGFC +-- import qualified PrGrammar as PG + +import qualified Ident +import GF.OldParsing.Utilities +import GF.OldParsing.GrammarTypes +import GF.OldParsing.MCFGrammar (Rule(..), Lin(..)) +import GF.Data.SortedList +import List (groupBy) -- , transpose) + +---------------------------------------------------------------------- + +addCoercions :: MCFGrammar -> MCFGrammar +addCoercions rules = coercions ++ rules + where (allHeads, allArgs) = unzip [ ((head, lbls), nubsort args) | + Rule head args lins _ <- rules, + let lbls = [ lbl | Lin lbl _ <- lins ] ] + allHeadSet = nubsort allHeads + allArgSet = union allArgs <\\> map fst allHeadSet + coercions = tracePrt "#coercions total" (prt . length) $ + concat $ + tracePrt "#coercions per cat" (prtList . map length) $ + combineCoercions + (groupBy sameCatFst allHeadSet) + (groupBy sameCat allArgSet) + sameCatFst a b = sameCat (fst a) (fst b) + + +combineCoercions [] _ = [] +combineCoercions _ [] = [] +combineCoercions allHeads'@(heads:allHeads) allArgs'@(args:allArgs) + = case compare (mainCat $ fst $ head heads) (mainCat $ head args) of + LT -> combineCoercions allHeads allArgs' + GT -> combineCoercions allHeads' allArgs + EQ -> makeCoercion heads args : combineCoercions allHeads allArgs + + +makeCoercion heads args = [ Rule arg [head] lins coercionName | + (head@(MCFCat _ headCns), lbls) <- heads, + let lins = [ Lin lbl [Cat (head, lbl, 0)] | lbl <- lbls ], + arg@(MCFCat _ argCns) <- args, + argCns `subset` headCns ] + + +coercionName = Ident.IW + +mainCat (MCFCat c _) = c + +sameCat mc1 mc2 = mainCat mc1 == mainCat mc2 + + diff --git a/src/GF/OldParsing/ConvertGFCtoMCFG/Nondet.hs b/src/GF/OldParsing/ConvertGFCtoMCFG/Nondet.hs new file mode 100644 index 000000000..d27e240bc --- /dev/null +++ b/src/GF/OldParsing/ConvertGFCtoMCFG/Nondet.hs @@ -0,0 +1,281 @@ +---------------------------------------------------------------------- +-- | +-- Module : ConvertGFCtoMCFG.Nondet +-- Maintainer : PL +-- Stability : (stable) +-- Portability : (portable) +-- +-- > CVS $Date: 2005/04/11 13:52:55 $ +-- > CVS $Author: peb $ +-- > CVS $Revision: 1.1 $ +-- +-- Converting GFC grammars to MCFG grammars, nondeterministically. +-- +-- the resulting grammars might be /very large/ +-- +-- the conversion is only equivalent if the GFC grammar has a context-free backbone. +-- (also, the conversion might fail if the GFC grammar has dependent or higher-order types) +----------------------------------------------------------------------------- + + +module GF.OldParsing.ConvertGFCtoMCFG.Nondet (convertGrammar) where + +import GF.System.Tracing +import GF.Printing.PrintParser +import GF.Printing.PrintSimplifiedTerm +-- import PrintGFC +-- import qualified PrGrammar as PG + +import Monad +import Ident (Ident(..)) +import AbsGFC +import GFC +import Look +import Operations +import qualified Modules as M +import CMacros (defLinType) +import MkGFC (grammar2canon) +import GF.OldParsing.Utilities +import GF.OldParsing.GrammarTypes +import GF.OldParsing.MCFGrammar (Grammar, Rule(..), Lin(..)) +import GF.Data.SortedList +-- import Maybe (listToMaybe) +import List (groupBy) -- , transpose) + +import GF.Data.BacktrackM + +---------------------------------------------------------------------- + +type Env = (CanonGrammar, Ident) + +convertGrammar :: Env -- ^ the canonical grammar, together with the selected language + -> MCFGrammar -- ^ the resulting MCF grammar +convertGrammar gram = trace2 "language" (prt (snd gram)) $ + trace2 "modules" (prtSep " " modnames) $ + tracePrt "#mcf-rules total" (prt . length) $ + solutions conversion undefined + where Gr modules = grammar2canon (fst gram) + modnames = uncurry M.allExtends gram + conversion = member modules >>= convertModule + convertModule (Mod (MTCnc modname _) _ _ _ defs) + | modname `elem` modnames = member defs >>= convertDef gram + convertModule _ = failure + +convertDef :: Env -> Def -> CnvMonad MCFRule +convertDef env (CncDFun fun (CIQ _ cat) args term _) + | trace2 "converting function" (prt fun) True + = do let iCat : iArgs = map initialMCat (cat : map catOfArg args) + writeState (iCat, iArgs, []) + convertTerm env cat term + (newCat, newArgs, linRec) <- readState + let newTerm = map (instLin newArgs) linRec + return (Rule newCat newArgs newTerm fun) +convertDef _ _ = failure + +instLin newArgs (Lin lbl lin) = Lin lbl (map instSym lin) + where instSym = mapSymbol instCat id + instCat (_, lbl, arg) = (newArgs !! arg, lbl, arg) + +convertTerm :: Env -> Cat -> Term -> CnvMonad () +convertTerm env cat term = do rterm <- simplTerm env term + let ctype = lookupCType env cat + reduceT env ctype rterm emptyPath + +------------------------------------------------------------ + +type CnvMonad a = BacktrackM CMRule a + +type CMRule = (MCFCat, [MCFCat], LinRec) +type LinRec = [Lin Cat Path Tokn] + +initialMCat :: Cat -> MCFCat +initialMCat cat = MCFCat cat [] + +---------------------------------------------------------------------- + +simplTerm :: Env -> Term -> CnvMonad STerm +simplTerm env = simplifyTerm + where + simplifyTerm :: Term -> CnvMonad STerm + simplifyTerm (Arg (A cat nr)) = return (SArg (fromInteger nr) cat emptyPath) + simplifyTerm (Con con terms) = liftM (SCon con) $ mapM simplifyTerm terms + simplifyTerm (R record) = liftM SRec $ mapM simplifyAssign record + simplifyTerm (P term lbl) = liftM (+. lbl) $ simplifyTerm term + simplifyTerm (T ct table) = liftM STbl $ sequence $ concatMap simplifyCase table + simplifyTerm (V ct terms) + = liftM STbl $ sequence [ liftM ((,) pat) (simplifyTerm term) | + (pat, term) <- zip (groundTerms env ct) terms ] + simplifyTerm (S term sel) + = do sterm <- simplifyTerm term + ssel <- simplifyTerm sel + case sterm of + STbl table -> do (pat, val) <- member table + pat =?= ssel + return val + _ -> do sel' <- expandTerm env ssel + return (sterm +! sel') + simplifyTerm (FV terms) = liftM SVariants $ mapM simplifyTerm terms + simplifyTerm (term1 `C` term2) = liftM2 (SConcat) (simplifyTerm term1) (simplifyTerm term2) + simplifyTerm (K tokn) = return $ SToken tokn + simplifyTerm (E) = return $ SEmpty + simplifyTerm x = error $ "simplifyTerm: " ++ show x +-- error constructors: +-- (I CIdent) - from resource +-- (LI Ident) - pattern variable +-- (EInt Integer) - integer + + simplifyAssign :: Assign -> CnvMonad (Label, STerm) + simplifyAssign (Ass lbl term) = liftM ((,) lbl) $ simplifyTerm term + + simplifyCase :: Case -> [CnvMonad (STerm, STerm)] + simplifyCase (Cas pats term) = [ liftM2 (,) (simplifyPattern pat) (simplifyTerm term) | + pat <- pats ] + + simplifyPattern :: Patt -> CnvMonad STerm + simplifyPattern (PC con pats) = liftM (SCon con) $ mapM simplifyPattern pats + simplifyPattern (PW) = return SWildcard + simplifyPattern (PR record) = do record' <- mapM simplifyPattAssign record + case filter (\row -> snd row /= SWildcard) record' of + [] -> return SWildcard + record'' -> return (SRec record') + simplifyPattern x = error $ "simplifyPattern: " ++ show x +-- error constructors: +-- (PV Ident) - pattern variable + + simplifyPattAssign :: PattAssign -> CnvMonad (Label, STerm) + simplifyPattAssign (PAss lbl pat) = liftM ((,) lbl) $ simplifyPattern pat + + +------------------------------------------------------------ +-- reducing simplified terms, collecting mcf rules + +reduceT :: Env -> CType -> STerm -> Path -> CnvMonad () +reduceT env = reduce + where + reduce :: CType -> STerm -> Path -> CnvMonad () + reduce TStr term path = updateLin (path, term) + reduce (Cn _) term path + = do pat <- expandTerm env term + updateHead (path, pat) + reduce ctype (SVariants terms) path + = do term <- member terms + reduce ctype term path + reduce (RecType rtype) term path + = sequence_ [ reduce ctype (term +. lbl) (path ++. lbl) | + Lbg lbl ctype <- rtype ] + reduce (Table _ ctype) (STbl table) path + = sequence_ [ reduce ctype term (path ++! pat) | + (pat, term) <- table ] + reduce (Table ptype vtype) arg@(SArg _ _ _) path + = sequence_ [ reduce vtype (arg +! pat) (path ++! pat) | + pat <- groundTerms env ptype ] + reduce ctype term path = error ("reduce:\n ctype = (" ++ show ctype ++ + ")\n term = (" ++ show term ++ + ")\n path = (" ++ show path ++ ")\n") + + +------------------------------------------------------------ +-- expanding a term to ground terms + +expandTerm :: Env -> STerm -> CnvMonad STerm +expandTerm env arg@(SArg _ _ _) + = do pat <- member $ groundTerms env $ cTypeForArg env arg + pat =?= arg + return pat +expandTerm env (SCon con terms) = liftM (SCon con) $ mapM (expandTerm env) terms +expandTerm env (SRec record) = liftM SRec $ mapM (expandAssign env) record +expandTerm env (SVariants terms) = member terms >>= expandTerm env +expandTerm env term = error $ "expandTerm: " ++ show term + +expandAssign :: Env -> (Label, STerm) -> CnvMonad (Label, STerm) +expandAssign env (lbl, term) = liftM ((,) lbl) $ expandTerm env term + +------------------------------------------------------------ +-- unification of patterns and selection terms + +(=?=) :: STerm -> STerm -> CnvMonad () +SWildcard =?= _ = return () +SRec precord =?= arg@(SArg _ _ _) = sequence_ [ pat =?= (arg +. lbl) | + (lbl, pat) <- precord ] +pat =?= SArg arg _ path = updateArg arg (path, pat) +SCon con pats =?= SCon con' terms = do guard (con==con' && length pats==length terms) + sequence_ $ zipWith (=?=) pats terms +SRec precord =?= SRec record = sequence_ [ maybe mzero (pat =?=) mterm | + (lbl, pat) <- precord, + let mterm = lookup lbl record ] +pat =?= term = error $ "(=?=): " ++ show pat ++ " =?= " ++ show term + + +------------------------------------------------------------ +-- updating the mcf rule + +updateArg :: Int -> Constraint -> CnvMonad () +updateArg arg cn + = do (head, args, lins) <- readState + args' <- updateNth (addToMCFCat cn) arg args + writeState (head, args', lins) + +updateHead :: Constraint -> CnvMonad () +updateHead cn + = do (head, args, lins) <- readState + head' <- addToMCFCat cn head + writeState (head', args, lins) + +updateLin :: Constraint -> CnvMonad () +updateLin (path, term) + = do let newLins = term2lins term + (head, args, lins) <- readState + let lins' = lins ++ map (Lin path) newLins + writeState (head, args, lins') + +term2lins :: STerm -> [[Symbol (Cat, Path, Int) Tokn]] +term2lins (SArg arg cat path) = return [Cat (cat, path, arg)] +term2lins (SToken str) = return [Tok str] +term2lins (SConcat t1 t2) = liftM2 (++) (term2lins t1) (term2lins t2) +term2lins (SEmpty) = return [] +term2lins (SVariants terms) = terms >>= term2lins +term2lins term = error $ "term2lins: " ++ show term + +addToMCFCat :: Constraint -> MCFCat -> CnvMonad MCFCat +addToMCFCat cn (MCFCat cat cns) = liftM (MCFCat cat) $ addConstraint cn cns + +addConstraint :: Constraint -> [Constraint] -> CnvMonad [Constraint] +addConstraint cn0 (cn : cns) + | fst cn0 > fst cn = liftM (cn:) (addConstraint cn0 cns) + | fst cn0 == fst cn = guard (snd cn0 == snd cn) >> + return (cn : cns) +addConstraint cn0 cns = return (cn0 : cns) + + +---------------------------------------------------------------------- +-- utilities + +updateNth :: Monad m => (a -> m a) -> Int -> [a] -> m [a] +updateNth update 0 (a : as) = liftM (:as) (update a) +updateNth update n (a : as) = liftM (a:) (updateNth update (n-1) as) + +catOfArg (A aCat _) = aCat +catOfArg (AB aCat _ _) = aCat + +lookupCType :: Env -> Cat -> CType +lookupCType env cat = errVal defLinType $ + lookupLincat (fst env) (CIQ (snd env) cat) + +groundTerms :: Env -> CType -> [STerm] +groundTerms env ctype = err error (map term2spattern) $ + allParamValues (fst env) ctype + +cTypeForArg :: Env -> STerm -> CType +cTypeForArg env (SArg nr cat (Path path)) + = follow path $ lookupCType env cat + where follow [] ctype = ctype + follow (Right pat : path) (Table _ ctype) = follow path ctype + follow (Left lbl : path) (RecType rec) + = case [ ctype | Lbg lbl' ctype <- rec, lbl == lbl' ] of + [ctype] -> follow path ctype + err -> error $ "follow: " ++ show rec ++ " . " ++ show lbl ++ + " results in " ++ show err + +term2spattern (R rec) = SRec [ (lbl, term2spattern term) | Ass lbl term <- rec ] +term2spattern (Con con terms) = SCon con $ map term2spattern terms + diff --git a/src/GF/OldParsing/ConvertGFCtoMCFG/Old.hs b/src/GF/OldParsing/ConvertGFCtoMCFG/Old.hs new file mode 100644 index 000000000..d0869c8f5 --- /dev/null +++ b/src/GF/OldParsing/ConvertGFCtoMCFG/Old.hs @@ -0,0 +1,277 @@ +---------------------------------------------------------------------- +-- | +-- Module : ConvertGFCtoMCFG.Old +-- Maintainer : PL +-- Stability : (stable) +-- Portability : (portable) +-- +-- > CVS $Date: 2005/04/11 13:52:55 $ +-- > CVS $Author: peb $ +-- > CVS $Revision: 1.1 $ +-- +-- Converting GFC grammars to MCFG grammars. (Old variant) +-- +-- the resulting grammars might be /very large/ +-- +-- the conversion is only equivalent if the GFC grammar has a context-free backbone. +-- (also, the conversion might fail if the GFC grammar has dependent or higher-order types) +----------------------------------------------------------------------------- + + +module GF.OldParsing.ConvertGFCtoMCFG.Old (convertGrammar) where + +import GF.System.Tracing +import GF.Printing.PrintParser +import GF.Printing.PrintSimplifiedTerm +--import PrintGFC +import qualified PrGrammar as PG + +import Monad (liftM, liftM2, guard) +-- import Maybe (listToMaybe) +import Ident (Ident(..)) +import AbsGFC +import GFC +import Look +import Operations +import qualified Modules as M +import CMacros (defLinType) +import MkGFC (grammar2canon) +import GF.OldParsing.Utilities +import GF.OldParsing.GrammarTypes +import GF.OldParsing.MCFGrammar (Rule(..), Lin(..)) +import GF.Data.SortedList (nubsort, groupPairs) +import Maybe (listToMaybe) +import List (groupBy, transpose) + +---------------------------------------------------------------------- +-- old style types + +data XMCFCat = XMCFCat Cat [(XPath, Term)] deriving (Eq, Ord, Show) +type XMCFLabel = XPath + +cnvXMCFCat :: XMCFCat -> MCFCat +cnvXMCFCat (XMCFCat cat constrs) = MCFCat cat [ (cnvXPath path, cnvTerm term) | + (path, term) <- constrs ] + +cnvXMCFLabel :: XMCFLabel -> MCFLabel +cnvXMCFLabel = cnvXPath + +cnvXMCFLin :: Lin XMCFCat XMCFLabel Tokn -> Lin MCFCat MCFLabel Tokn +cnvXMCFLin (Lin lbl lin) = Lin (cnvXMCFLabel lbl) $ + map (mapSymbol cnvSym id) lin + where cnvSym (cat, lbl, nr) = (cnvXMCFCat cat, cnvXMCFLabel lbl, nr) + +-- Term -> STerm + +cnvTerm (R rec) = SRec [ (lbl, cnvTerm term) | Ass lbl term <- rec ] +cnvTerm (T _ tbl) = STbl [ (cnvPattern pat, cnvTerm term) | + Cas pats term <- tbl, pat <- pats ] +cnvTerm (Con con terms) = SCon con $ map cnvTerm terms +cnvTerm term + | isArgPath term = cnvArgPath term + +cnvPattern (PR rec) = SRec [ (lbl, cnvPattern term) | PAss lbl term <- rec ] +cnvPattern (PC con pats) = SCon con $ map cnvPattern pats +cnvPattern (PW) = SWildcard + +isArgPath (Arg _) = True +isArgPath (P _ _) = True +isArgPath (S _ _) = True +isArgPath _ = False + +cnvArgPath (Arg (A cat nr)) = SArg (fromInteger nr) cat emptyPath +cnvArgPath (term `P` lbl) = cnvArgPath term +. lbl +cnvArgPath (term `S` sel) = cnvArgPath term +! cnvTerm sel + +-- old style paths + +newtype XPath = XPath [Either Label Term] deriving (Eq, Ord, Show) + +cnvXPath :: XPath -> Path +cnvXPath (XPath path) = Path (map (either Left (Right . cnvTerm)) (reverse path)) + +emptyXPath :: XPath +emptyXPath = XPath [] + +(++..) :: XPath -> Label -> XPath +XPath path ++.. lbl = XPath (Left lbl : path) + +(++!!) :: XPath -> Term -> XPath +XPath path ++!! sel = XPath (Right sel : path) + +---------------------------------------------------------------------- + +-- | combining alg. 1 and alg. 2 from Ljunglöf's PhD thesis +convertGrammar :: (CanonGrammar, Ident) -> MCFGrammar +convertGrammar (gram, lng) = trace2 "language" (prt lng) $ + trace2 "modules" (prtSep " " modnames) $ + trace2 "#lin-terms" (prt (length cncdefs)) $ + tracePrt "#mcf-rules total" (prt.length) $ + concat $ + tracePrt "#mcf-rules per fun" + (\rs -> concat [" "++show n++"="++show (length r) | + (n, r) <- zip [1..] rs]) $ + map (convertDef gram lng) cncdefs + where Gr mods = grammar2canon gram + cncdefs = [ def | Mod (MTCnc modname _) _ _ _ defs <- mods, + modname `elem` modnames, + def@(CncDFun _ _ _ _ _) <- defs ] + modnames = M.allExtends gram lng + + +convertDef :: CanonGrammar -> Ident -> Def -> [MCFRule] +convertDef gram lng (CncDFun fun (CIQ _ cat) args term _) + = [ Rule (cnvXMCFCat newCat) (map cnvXMCFCat newArgs) (map cnvXMCFLin newTerm) fun | + let ctype = lookupCType gram lng cat, + instArgs <- mapM (enumerateInsts gram lng) args, + let instTerm = substitutePaths gram lng instArgs term, + newCat <- emcfCat gram lng cat instTerm, + newArgs <- mapM (extractArg gram lng instArgs) args, + let newTerm = concatMap (extractLin newArgs) $ strPaths gram lng ctype instTerm + ] + + +-- gammalt skräp: +-- mergeArgs = zipWith mergeRec +-- mergeRec (R r1) (R r2) = R (r1 ++ r2) + +extractArg :: CanonGrammar -> Ident -> [Term] -> ArgVar -> [XMCFCat] +extractArg gram lng args (A cat nr) = emcfCat gram lng cat (args !!! nr) + + +emcfCat :: CanonGrammar -> Ident -> Ident -> Term -> [XMCFCat] +emcfCat gram lng cat = map (XMCFCat cat) . parPaths gram lng (lookupCType gram lng cat) + + +extractLin :: [XMCFCat] -> (XPath, Term) -> [Lin XMCFCat XMCFLabel Tokn] +extractLin args (path, term) = map (Lin path) (convertLin term) + where convertLin (t1 `C` t2) = liftM2 (++) (convertLin t1) (convertLin t2) + convertLin (E) = [[]] + convertLin (K tok) = [[Tok tok]] + convertLin (FV terms) = concatMap convertLin terms + convertLin term = map (return . Cat) $ flattenTerm emptyXPath term + flattenTerm path (Arg (A _ nr)) = [(args !!! nr, path, fromInteger nr)] + flattenTerm path (term `P` lbl) = flattenTerm (path ++.. lbl) term + flattenTerm path (term `S` sel) = flattenTerm (path ++!! sel) term + flattenTerm path (FV terms) = concatMap (flattenTerm path) terms + flattenTerm path term = error $ "flattenTerm: \n " ++ show path ++ "\n " ++ prt term + + +enumerateInsts :: CanonGrammar -> Ident -> ArgVar -> [Term] +enumerateInsts gram lng arg@(A argCat _) = enumerate (Arg arg) (lookupCType gram lng argCat) + where enumerate path (TStr) = [ path ] + enumerate path (Cn con) = okError $ lookupParamValues gram con + enumerate path (RecType r) + = map R $ sequence [ map (lbl `Ass`) $ + enumerate (path `P` lbl) ctype | + lbl `Lbg` ctype <- r ] + enumerate path (Table s t) + = map (T s) $ sequence [ map ([term2pattern sel] `Cas`) $ + enumerate (path `S` sel) t | + sel <- enumerate (error "enumerate") s ] + + + +termPaths :: CanonGrammar -> Ident -> CType -> Term -> [(XPath, (CType, Term))] +termPaths gr l (TStr) term = [ (emptyXPath, (TStr, term)) ] +termPaths gr l (RecType rtype) (R record) + = [ (path ++.. lbl, value) | + lbl `Ass` term <- record, + let ctype = okError $ maybeErr "termPaths/record" $ lookupLabelling lbl rtype, + (path, value) <- termPaths gr l ctype term ] +termPaths gr l (Table _ ctype) (T _ table) + = [ (path ++!! pattern2term pat, value) | + pats `Cas` term <- table, pat <- pats, + (path, value) <- termPaths gr l ctype term ] +termPaths gr l (Table _ ctype) (V ptype table) + = [ (path ++!! pat, value) | + (pat, term) <- zip (okError $ allParamValues gr ptype) table, + (path, value) <- termPaths gr l ctype term ] +termPaths gr l ctype (FV terms) + = concatMap (termPaths gr l ctype) terms +termPaths gr l (Cn pc) term = [ (emptyXPath, (Cn pc, term)) ] + +{- ^^^ variants are pushed inside (not equivalent -- but see record-variants.txt): +{a=a1; b=b1} | {a=a2; b=b2} ==> {a=a1|a2; b=b1|b2} +[p=>p1;q=>q1] | [p=>p2;q=>q2] ==> [p=>p1|p2;q=>q1|q2] +-} + +parPaths :: CanonGrammar -> Ident -> CType -> Term -> [[(XPath, Term)]] +parPaths gr l ctype term = mapM (uncurry (map . (,))) (groupPairs paths) + where paths = nubsort [ (path, value) | (path, (Cn _, value)) <- termPaths gr l ctype term ] + +strPaths :: CanonGrammar -> Ident -> CType -> Term -> [(XPath, Term)] +strPaths gr l ctype term = [ (path, evalFV values) | (path, values) <- groupPairs paths ] + where paths = nubsort [ (path, value) | (path, (TStr, value)) <- termPaths gr l ctype term ] + + +-- Substitute each instantiated parameter path for its instantiation +substitutePaths :: CanonGrammar -> Ident -> [Term] -> Term -> Term +substitutePaths gr l arguments trm = subst trm + where subst (con `Con` terms) = con `Con` map subst terms + subst (R record) = R $ map substAss record + subst (term `P` lbl) = subst term `evalP` lbl + subst (T ptype table) = T ptype $ map substCas table + subst (V ptype table) = T ptype [ [term2pattern pat] `Cas` subst term | + (pat, term) <- zip (okError $ allParamValues gr ptype) table ] + subst (term `S` select) = subst term `evalS` subst select + subst (term `C` term') = subst term `C` subst term' + subst (FV terms) = evalFV $ map subst terms + subst (Arg (A _ arg)) = arguments !!! arg + subst term = term + + substAss (l `Ass` term) = l `Ass` subst term + substCas (p `Cas` term) = p `Cas` subst term + + +evalP (R record) lbl = okError $ maybeErr errStr $ lookupAssign lbl record + where errStr = "evalP: " ++ prt (R record `P` lbl) +evalP (FV terms) lbl = evalFV [ evalP term lbl | term <- terms ] +evalP term lbl = term `P` lbl + +evalS t@(T _ tbl) sel = maybe (t `S` sel) id $ lookupCase sel tbl +evalS (FV terms) sel = evalFV [ term `evalS` sel | term <- terms ] +evalS term (FV sels)= evalFV [ term `evalS` sel | sel <- sels ] +evalS term sel = term `S` sel + +evalFV terms0 = case nubsort (concatMap flattenFV terms0) of + [term] -> term + terms -> FV terms + where flattenFV (FV ts) = ts + flattenFV t = [t] + + +---------------------------------------------------------------------- +-- utilities + +-- lookup a CType for an Ident +lookupCType :: CanonGrammar -> Ident -> Ident -> CType +lookupCType gr lng c = errVal defLinType $ lookupLincat gr (CIQ lng c) + +-- lookup a label in a (record / record ctype / table) +lookupAssign :: Label -> [Assign] -> Maybe Term +lookupLabelling :: Label -> [Labelling] -> Maybe CType +lookupCase :: Term -> [Case] -> Maybe Term + +lookupAssign lbl rec = listToMaybe [ term | lbl' `Ass` term <- rec, lbl == lbl' ] +lookupLabelling lbl rtyp = listToMaybe [ ctyp | lbl' `Lbg` ctyp <- rtyp, lbl == lbl' ] +lookupCase sel tbl = listToMaybe [ term | pats `Cas` term <- tbl, sel `matchesPats` pats ] + +matchesPats :: Term -> [Patt] -> Bool +matchesPats term patterns = or [ term == pattern2term pattern | pattern <- patterns ] + +-- converting between patterns and terms +pattern2term :: Patt -> Term +term2pattern :: Term -> Patt + +pattern2term (con `PC` patterns) = con `Con` map pattern2term patterns +pattern2term (PR record) = R [ lbl `Ass` pattern2term pattern | + lbl `PAss` pattern <- record ] + +term2pattern (con `Con` terms) = con `PC` map term2pattern terms +term2pattern (R record) = PR [ lbl `PAss` term2pattern term | + lbl `Ass` term <- record ] + +-- list lookup for Integers instead of Ints +(!!!) :: [a] -> Integer -> a +xs !!! n = xs !! fromInteger n diff --git a/src/GF/OldParsing/ConvertGFCtoMCFG/Strict.hs b/src/GF/OldParsing/ConvertGFCtoMCFG/Strict.hs new file mode 100644 index 000000000..604fb460b --- /dev/null +++ b/src/GF/OldParsing/ConvertGFCtoMCFG/Strict.hs @@ -0,0 +1,189 @@ +---------------------------------------------------------------------- +-- | +-- Module : ConvertGFCtoMCFG.Strict +-- Maintainer : PL +-- Stability : (stable) +-- Portability : (portable) +-- +-- > CVS $Date: 2005/04/11 13:52:56 $ +-- > CVS $Author: peb $ +-- > CVS $Revision: 1.1 $ +-- +-- Converting GFC grammars to MCFG grammars, nondeterministically. +-- +-- the resulting grammars might be /very large/ +-- +-- the conversion is only equivalent if the GFC grammar has a context-free backbone. +-- (also, the conversion might fail if the GFC grammar has dependent or higher-order types) +----------------------------------------------------------------------------- + + +module GF.OldParsing.ConvertGFCtoMCFG.Strict (convertGrammar) where + +import GF.System.Tracing +-- import IOExts (unsafePerformIO) +import GF.Printing.PrintParser +import GF.Printing.PrintSimplifiedTerm +-- import PrintGFC +-- import qualified PrGrammar as PG + +import Monad +import Ident (Ident(..)) +import AbsGFC +import GFC +import Look +import Operations +import qualified Modules as M +import CMacros (defLinType) +import MkGFC (grammar2canon) +import GF.OldParsing.Utilities +import GF.OldParsing.GrammarTypes +import GF.OldParsing.MCFGrammar (Grammar, Rule(..), Lin(..)) +import GF.Data.SortedList +-- import Maybe (listToMaybe) +import List (groupBy) -- , transpose) + +import GF.Data.BacktrackM + +---------------------------------------------------------------------- + +type Env = (CanonGrammar, Ident) + +convertGrammar :: Env -- ^ the canonical grammar, together with the selected language + -> MCFGrammar -- ^ the resulting MCF grammar +convertGrammar gram = trace2 "language" (prt (snd gram)) $ + trace2 "modules" (prtSep " " modnames) $ + tracePrt "#mcf-rules total" (prt . length) $ + solutions conversion undefined + where Gr modules = grammar2canon (fst gram) + modnames = uncurry M.allExtends gram + conversion = member modules >>= convertModule + convertModule (Mod (MTCnc modname _) _ _ _ defs) + | modname `elem` modnames = member defs >>= convertDef gram + convertModule _ = failure + +convertDef :: Env -> Def -> CnvMonad MCFRule +convertDef env (CncDFun fun (CIQ _ cat) args term _) + | trace2 "converting function" (prt fun) True + = do let ctype = lookupCType env cat + instArgs <- mapM (enumerateArg env) args + let instTerm = substitutePaths env instArgs term + newCat <- emcfCat env cat instTerm + newArgs <- mapM (extractArg env instArgs) args + let newTerm = strPaths env ctype instTerm >>= extractLin newArgs + return (Rule newCat newArgs newTerm fun) +convertDef _ _ = failure + +------------------------------------------------------------ + +type CnvMonad a = BacktrackM () a + +---------------------------------------------------------------------- +-- strict conversion + +extractArg :: Env -> [STerm] -> ArgVar -> CnvMonad MCFCat +extractArg env args (A cat nr) = emcfCat env cat (args !! fromInteger nr) + +emcfCat :: Env -> Cat -> STerm -> CnvMonad MCFCat +emcfCat env cat term = member $ map (MCFCat cat) $ parPaths env (lookupCType env cat) term + +enumerateArg :: Env -> ArgVar -> CnvMonad STerm +enumerateArg env (A cat nr) = let ctype = lookupCType env cat + in enumerate (SArg (fromInteger nr) cat emptyPath) ctype + where enumerate arg (TStr) = return arg + enumerate arg ctype@(Cn _) = member $ groundTerms env ctype + enumerate arg (RecType rtype) + = liftM SRec $ sequence [ liftM ((,) lbl) $ + enumerate (arg +. lbl) ctype | + lbl `Lbg` ctype <- rtype ] + enumerate arg (Table stype ctype) + = do state <- readState + liftM STbl $ sequence [ liftM ((,) sel) $ + enumerate (arg +! sel) ctype | + sel <- solutions (enumerate err stype) state ] + where err = error "enumerate: parameter type should not be string" + +-- Substitute each instantiated parameter path for its instantiation +substitutePaths :: Env -> [STerm] -> Term -> STerm +substitutePaths env arguments trm = subst trm + where subst (con `Con` terms) = con `SCon` map subst terms + subst (R record) = SRec [ (lbl, subst term) | lbl `Ass` term <- record ] + subst (term `P` lbl) = subst term +. lbl + subst (T ptype table) = STbl [ (pattern2sterm pat, subst term) | + pats `Cas` term <- table, pat <- pats ] + subst (V ptype table) = STbl [ (pat, subst term) | + (pat, term) <- zip (groundTerms env ptype) table ] + subst (term `S` select) = subst term +! subst select + subst (term `C` term') = subst term `SConcat` subst term' + subst (K str) = SToken str + subst (E) = SEmpty + subst (FV terms) = evalFV $ map subst terms + subst (Arg (A _ arg)) = arguments !! fromInteger arg + + +termPaths :: Env -> CType -> STerm -> [(Path, (CType, STerm))] +termPaths env (TStr) term = [ (emptyPath, (TStr, term)) ] +termPaths env (RecType rtype) (SRec record) + = [ (path ++. lbl, value) | + (lbl, term) <- record, + let ctype = lookupLabelling lbl rtype, + (path, value) <- termPaths env ctype term ] +termPaths env (Table _ ctype) (STbl table) + = [ (path ++! pat, value) | + (pat, term) <- table, + (path, value) <- termPaths env ctype term ] +termPaths env ctype (SVariants terms) + = terms >>= termPaths env ctype +termPaths env (Cn pc) term = [ (emptyPath, (Cn pc, term)) ] + +{- ^^^ variants are pushed inside (not equivalent -- but see record-variants.txt): +{a=a1; b=b1} | {a=a2; b=b2} ==> {a=a1|a2; b=b1|b2} +[p=>p1;q=>q1] | [p=>p2;q=>q2] ==> [p=>p1|p2;q=>q1|q2] +-} + +parPaths :: Env -> CType -> STerm -> [[(Path, STerm)]] +parPaths env ctype term = mapM (uncurry (map . (,))) (groupPairs paths) + where paths = nubsort [ (path, value) | (path, (Cn _, value)) <- termPaths env ctype term ] + +strPaths :: Env -> CType -> STerm -> [(Path, STerm)] +strPaths env ctype term = [ (path, evalFV values) | (path, values) <- groupPairs paths ] + where paths = nubsort [ (path, value) | (path, (TStr, value)) <- termPaths env ctype term ] + +extractLin :: [MCFCat] -> (Path, STerm) -> [Lin MCFCat MCFLabel Tokn] +extractLin args (path, term) = map (Lin path) (convertLin term) + where convertLin (t1 `SConcat` t2) = liftM2 (++) (convertLin t1) (convertLin t2) + convertLin (SEmpty) = [[]] + convertLin (SToken tok) = [[Tok tok]] + convertLin (SVariants terms) = concatMap convertLin terms + convertLin (SArg nr _ path) = [[Cat (args !! nr, path, nr)]] + +evalFV terms0 = case nubsort (concatMap flattenFV terms0) of + [term] -> term + terms -> SVariants terms + where flattenFV (SVariants ts) = ts + flattenFV t = [t] + +---------------------------------------------------------------------- +-- utilities + +lookupCType :: Env -> Cat -> CType +lookupCType env cat = errVal defLinType $ + lookupLincat (fst env) (CIQ (snd env) cat) + +lookupLabelling :: Label -> [Labelling] -> CType +lookupLabelling lbl rtyp = case [ ctyp | lbl' `Lbg` ctyp <- rtyp, lbl == lbl' ] of + [ctyp] -> ctyp + err -> error $ "lookupLabelling:" ++ show err + +groundTerms :: Env -> CType -> [STerm] +groundTerms env ctype = err error (map term2spattern) $ + allParamValues (fst env) ctype + +term2spattern (R rec) = SRec [ (lbl, term2spattern term) | Ass lbl term <- rec ] +term2spattern (Con con terms) = SCon con $ map term2spattern terms + +pattern2sterm :: Patt -> STerm +pattern2sterm (con `PC` patterns) = con `SCon` map pattern2sterm patterns +pattern2sterm (PR record) = SRec [ (lbl, pattern2sterm pattern) | + lbl `PAss` pattern <- record ] + diff --git a/src/GF/OldParsing/ConvertGFCtoSimple.hs b/src/GF/OldParsing/ConvertGFCtoSimple.hs new file mode 100644 index 000000000..a14fa90b6 --- /dev/null +++ b/src/GF/OldParsing/ConvertGFCtoSimple.hs @@ -0,0 +1,122 @@ +---------------------------------------------------------------------- +-- | +-- Maintainer : PL +-- Stability : (stable) +-- Portability : (portable) +-- +-- > CVS $Date: 2005/04/11 13:52:52 $ +-- > CVS $Author: peb $ +-- > CVS $Revision: 1.1 $ +-- +-- Converting GFC to SimpleGFC +-- +-- the conversion might fail if the GFC grammar has dependent or higher-order types +----------------------------------------------------------------------------- + +module GF.OldParsing.ConvertGFCtoSimple where + +import qualified AbsGFC as A +import qualified Ident as I +import GF.OldParsing.SimpleGFC + +import GFC +import MkGFC (grammar2canon) +import qualified Look (lookupLin, allParamValues, lookupLincat) +import qualified CMacros (defLinType) +import Operations (err, errVal) +import qualified Modules as M + +import GF.System.Tracing +import GF.Printing.PrintParser +import GF.Printing.PrintSimplifiedTerm + +---------------------------------------------------------------------- + +type Env = (CanonGrammar, I.Ident) + +convertGrammar :: Env -> Grammar +convertGrammar gram = trace2 "language" (show (snd gram)) $ + tracePrt "#simple-rules total" (show . length) $ + [ convertAbsFun gram fun typing | + A.Mod (A.MTAbs modname) _ _ _ defs <- modules, + A.AbsDFun fun typing _ <- defs ] + where A.Gr modules = grammar2canon (fst gram) + +convertAbsFun :: Env -> I.Ident -> A.Exp -> Rule +convertAbsFun gram fun aTyping + = -- trace2 "absFun" (show fun) $ + Rule fun sTyping sTerm + where sTyping = convertTyping [] aTyping + sTerm = do lin <- lookupLin gram fun + return (convertTerm gram lin, convertCType gram cType) + cType = lookupCType gram sTyping + +convertTyping :: [Decl] -> A.Exp -> Typing +-- convertTyping env tp | trace2 "typing" (prt env ++ " / " ++ prt tp) False = undefined +convertTyping env (A.EProd x a b) + = convertTyping ((x ::: convertType [] a) : env) b +convertTyping env a = (convertType [] a, reverse env) + +convertType :: [Atom] -> A.Exp -> Type +-- convertType args tp | trace2 "type" (prt args ++ " / " ++ prt tp) False = undefined +convertType args (A.EApp a (A.EAtom at)) = convertType (convertAtom at : args) a +convertType args (A.EAtom at) = convertCat at :@ args + +convertAtom :: A.Atom -> Atom +convertAtom (A.AC con) = ACon con +convertAtom (A.AV var) = AVar var + +convertCat :: A.Atom -> Cat +convertCat (A.AC (A.CIQ _ cat)) = cat +convertCat at = error $ "convertCat: " ++ show at + +convertCType :: Env -> A.CType -> CType +convertCType gram (A.RecType rec) + = RecT [ (lbl, convertCType gram ctype) | A.Lbg lbl ctype <- rec ] +convertCType gram (A.Table ptype vtype) + = TblT (convertCType gram ptype) (convertCType gram vtype) +convertCType gram ct@(A.Cn con) = ConT con $ map (convertTerm gram) $ groundTerms gram ct +convertCType gram (A.TStr) = StrT +convertCType gram (A.TInts n) = error "convertCType: cannot handle 'TInts' constructor" + +convertTerm :: Env -> A.Term -> Term +convertTerm gram (A.Arg arg) = convertArgVar arg +convertTerm gram (A.Con con terms) = con :^ map (convertTerm gram) terms +convertTerm gram (A.LI var) = Var var +convertTerm gram (A.R rec) = Rec [ (lbl, convertTerm gram term) | A.Ass lbl term <- rec ] +convertTerm gram (A.P term lbl) = convertTerm gram term +. lbl +convertTerm gram (A.V ctype terms) = Tbl [ (convertTerm gram pat, convertTerm gram term) | + (pat, term) <- zip (groundTerms gram ctype) terms ] +convertTerm gram (A.T ctype tbl) = Tbl [ (convertPatt pat, convertTerm gram term) | + A.Cas pats term <- tbl, pat <- pats ] +convertTerm gram (A.S term sel) = convertTerm gram term +! convertTerm gram sel +convertTerm gram (A.C term1 term2) = convertTerm gram term1 ?++ convertTerm gram term2 +convertTerm gram (A.FV terms) = Variants (map (convertTerm gram) terms) +convertTerm gram (A.K tok) = Token tok +convertTerm gram (A.E) = Empty +convertTerm gram (A.I con) = error "convertTerm: cannot handle 'I' constructor" +convertTerm gram (A.EInt int) = error "convertTerm: cannot handle 'EInt' constructor" + +convertArgVar :: A.ArgVar -> Term +convertArgVar (A.A cat nr) = Arg (fromInteger nr) cat emptyPath +convertArgVar (A.AB cat bindings nr) = Arg (fromInteger nr) cat emptyPath + +convertPatt (A.PC con pats) = con :^ map convertPatt pats +convertPatt (A.PV x) = Var x +convertPatt (A.PW) = Wildcard +convertPatt (A.PR rec) = Rec [ (lbl, convertPatt pat) | A.PAss lbl pat <- rec ] +convertPatt (A.PI n) = error "convertPatt: cannot handle 'PI' constructor" + +---------------------------------------------------------------------- + +lookupLin gram fun = err fail Just $ + Look.lookupLin (fst gram) (A.CIQ (snd gram) fun) + +--lookupCType :: Env -> Typing -> CType +lookupCType env (cat :@ _, _) = errVal CMacros.defLinType $ + Look.lookupLincat (fst env) (A.CIQ (snd env) cat) + +groundTerms :: Env -> A.CType -> [A.Term] +groundTerms gram ctype = err error id $ + Look.allParamValues (fst gram) ctype + diff --git a/src/GF/OldParsing/ConvertGrammar.hs b/src/GF/OldParsing/ConvertGrammar.hs new file mode 100644 index 000000000..474834081 --- /dev/null +++ b/src/GF/OldParsing/ConvertGrammar.hs @@ -0,0 +1,44 @@ +---------------------------------------------------------------------- +-- | +-- Module : ConvertGrammar +-- Maintainer : PL +-- Stability : (stable) +-- Portability : (portable) +-- +-- > CVS $Date: 2005/04/11 13:52:52 $ +-- > CVS $Author: peb $ +-- > CVS $Revision: 1.1 $ +-- +-- All (?) grammar conversions which are used in GF +----------------------------------------------------------------------------- + + +module GF.OldParsing.ConvertGrammar + (pInfo, emptyPInfo, + module GF.OldParsing.GrammarTypes + ) where + +import GFC (CanonGrammar) +import MkGFC (grammar2canon) +import GF.OldParsing.GrammarTypes +import Ident (Ident(..)) +import Option +import GF.System.Tracing + +-- import qualified GF.OldParsing.FiniteTypes.Calc as Fin +import qualified GF.OldParsing.ConvertGFCtoMCFG as G2M +import qualified GF.OldParsing.ConvertMCFGtoCFG as M2C +import qualified GF.OldParsing.MCFGrammar as MCFG +import qualified GF.OldParsing.CFGrammar as CFG + +pInfo :: Options -> CanonGrammar -> Ident -> PInfo +pInfo opts canon lng = PInfo mcfg cfg mcfp cfp + where mcfg = G2M.convertGrammar cnv (canon, lng) + cnv = maybe "nondet" id $ getOptVal opts gfcConversion + cfg = M2C.convertGrammar mcfg + mcfp = MCFG.pInfo mcfg + cfp = CFG.pInfo cfg + +emptyPInfo :: PInfo +emptyPInfo = PInfo [] [] (MCFG.pInfo []) (CFG.pInfo []) + diff --git a/src/GF/OldParsing/ConvertMCFGtoCFG.hs b/src/GF/OldParsing/ConvertMCFGtoCFG.hs new file mode 100644 index 000000000..06965994c --- /dev/null +++ b/src/GF/OldParsing/ConvertMCFGtoCFG.hs @@ -0,0 +1,52 @@ +---------------------------------------------------------------------- +-- | +-- Module : ConvertMCFGtoCFG +-- Maintainer : PL +-- Stability : (stable) +-- Portability : (portable) +-- +-- > CVS $Date: 2005/04/11 13:52:53 $ +-- > CVS $Author: peb $ +-- > CVS $Revision: 1.1 $ +-- +-- Converting MCFG grammars to (possibly overgenerating) CFG +----------------------------------------------------------------------------- + + +module GF.OldParsing.ConvertMCFGtoCFG + (convertGrammar) where + +import GF.System.Tracing +import GF.Printing.PrintParser + +import Monad +import GF.OldParsing.Utilities +import qualified GF.OldParsing.MCFGrammar as MCFG +import qualified GF.OldParsing.CFGrammar as CFG +import GF.OldParsing.GrammarTypes + +convertGrammar :: MCFGrammar -> CFGrammar +convertGrammar gram = tracePrt "#cf-rules" (prt.length) $ + concatMap convertRule gram + +convertRule :: MCFRule -> [CFRule] +convertRule (MCFG.Rule cat args record name) + = [ CFG.Rule (CFCat cat lbl) rhs (CFName name profile) | + MCFG.Lin lbl lin <- record, + let rhs = map (mapSymbol convertArg id) lin, + let profile = map (argPlaces lin) [0 .. length args-1] + ] + +convertArg (cat, lbl, _arg) = CFCat cat lbl + +argPlaces lin arg = [ place | ((_cat, _lbl, arg'), place) <- + zip (filterCats lin) [0::Int ..], arg == arg' ] + +filterCats syms = [ cat | Cat cat <- syms ] + + + + + + + diff --git a/src/GF/OldParsing/ConvertSimpleToMCFG.hs b/src/GF/OldParsing/ConvertSimpleToMCFG.hs new file mode 100644 index 000000000..e111444f9 --- /dev/null +++ b/src/GF/OldParsing/ConvertSimpleToMCFG.hs @@ -0,0 +1,30 @@ +---------------------------------------------------------------------- +-- | +-- Maintainer : PL +-- Stability : (stable) +-- Portability : (portable) +-- +-- > CVS $Date: 2005/04/11 13:52:53 $ +-- > CVS $Author: peb $ +-- > CVS $Revision: 1.1 $ +-- +-- All different conversions from SimpleGFC to MCFG +----------------------------------------------------------------------------- + + +module GF.OldParsing.ConvertSimpleToMCFG + (convertGrammar) where + +import qualified GF.OldParsing.SimpleGFC as S +--import GF.OldParsing.GrammarTypes + +import qualified GF.OldParsing.ConvertFiniteSimple as Fin +import qualified GF.OldParsing.ConvertSimpleToMCFG.Nondet as Nondet +--import qualified GF.OldParsing.ConvertSimpleToMCFG.Strict as Strict +import qualified GF.OldParsing.ConvertSimpleToMCFG.Coercions as Coerce + +--convertGrammar :: String -> S.Grammar -> MCFGrammar +convertGrammar ('f':'i':'n':'-':cnv) = convertGrammar cnv . Fin.convertGrammar +convertGrammar "nondet" = Coerce.addCoercions . Nondet.convertGrammar +--convertGrammar "strict" = Strict.convertGrammar + diff --git a/src/GF/OldParsing/ConvertSimpleToMCFG/Coercions.hs b/src/GF/OldParsing/ConvertSimpleToMCFG/Coercions.hs new file mode 100644 index 000000000..58a39b7f4 --- /dev/null +++ b/src/GF/OldParsing/ConvertSimpleToMCFG/Coercions.hs @@ -0,0 +1,70 @@ +---------------------------------------------------------------------- +-- | +-- Maintainer : PL +-- Stability : (stable) +-- Portability : (portable) +-- +-- > CVS $Date: 2005/04/11 13:52:56 $ +-- > CVS $Author: peb $ +-- > CVS $Revision: 1.1 $ +-- +-- Adding coercion functions to a MCFG if necessary. +----------------------------------------------------------------------------- + + +module GF.OldParsing.ConvertSimpleToMCFG.Coercions (addCoercions) where + +import GF.System.Tracing +import GF.Printing.PrintParser +import GF.Printing.PrintSimplifiedTerm +-- import PrintGFC +-- import qualified PrGrammar as PG + +import qualified Ident +import GF.OldParsing.Utilities +--import GF.OldParsing.GrammarTypes +import GF.OldParsing.MCFGrammar (Rule(..), Lin(..)) +import GF.Data.SortedList +import List (groupBy) -- , transpose) + +---------------------------------------------------------------------- + +--addCoercions :: MCFGrammar -> MCFGrammar +addCoercions rules = coercions ++ rules + where (allHeads, allArgs) = unzip [ ((head, lbls), nubsort args) | + Rule head args lins _ <- rules, + let lbls = [ lbl | Lin lbl _ <- lins ] ] + allHeadSet = nubsort allHeads + allArgSet = union allArgs <\\> map fst allHeadSet + coercions = tracePrt "#coercions total" (prt . length) $ + concat $ + tracePrt "#coercions per cat" (prtList . map length) $ + combineCoercions + (groupBy sameCatFst allHeadSet) + (groupBy sameCat allArgSet) + sameCatFst a b = sameCat (fst a) (fst b) + + +combineCoercions [] _ = [] +combineCoercions _ [] = [] +combineCoercions allHeads'@(heads:allHeads) allArgs'@(args:allArgs) + = case compare (mainCat $ fst $ head heads) (mainCat $ head args) of + LT -> combineCoercions allHeads allArgs' + GT -> combineCoercions allHeads' allArgs + EQ -> makeCoercion heads args : combineCoercions allHeads allArgs + + +makeCoercion heads args = [ Rule arg [head] lins coercionName | + (head@({-MCFCat-}(_, headCns), lbls) <- heads, + let lins = [ Lin lbl [Cat (head, lbl, 0)] | lbl <- lbls ], + arg@({-MCFCat-} (_, argCns) <- args, + argCns `subset` headCns ] + + +coercionName = Ident.IW + +mainCat ({-MCFCat-} (c, _) = c + +sameCat mc1 mc2 = mainCat mc1 == mainCat mc2 + + diff --git a/src/GF/OldParsing/ConvertSimpleToMCFG/Nondet.hs b/src/GF/OldParsing/ConvertSimpleToMCFG/Nondet.hs new file mode 100644 index 000000000..da7511eaf --- /dev/null +++ b/src/GF/OldParsing/ConvertSimpleToMCFG/Nondet.hs @@ -0,0 +1,245 @@ +---------------------------------------------------------------------- +-- | +-- Maintainer : PL +-- Stability : (stable) +-- Portability : (portable) +-- +-- > CVS $Date: 2005/04/11 13:52:56 $ +-- > CVS $Author: peb $ +-- > CVS $Revision: 1.1 $ +-- +-- Converting SimpleGFC grammars to MCFG grammars, nondeterministically. +-- +-- the resulting grammars might be /very large/ +-- +-- the conversion is only equivalent if the GFC grammar has a context-free backbone. +----------------------------------------------------------------------------- + + +module GF.OldParsing.ConvertSimpleToMCFG.Nondet (convertGrammar) where + +import GF.System.Tracing +import GF.Printing.PrintParser +import GF.Printing.PrintSimplifiedTerm +-- import PrintGFC +-- import qualified PrGrammar as PG + +import Monad +-- import Ident (Ident(..)) +import qualified AbsGFC +-- import GFC +import Look +import Operations +-- import qualified Modules as M +import CMacros (defLinType) +-- import MkGFC (grammar2canon) +import GF.OldParsing.Utilities +-- import GF.OldParsing.GrammarTypes +import GF.Data.SortedList +import qualified GF.OldParsing.MCFGrammar as MCF (Grammar, Rule(..), Lin(..)) +import GF.OldParsing.SimpleGFC +-- import Maybe (listToMaybe) +import List (groupBy) -- , transpose) + +import GF.Data.BacktrackM + +---------------------------------------------------------------------- + +--convertGrammar :: Grammar -> MCF.Grammar +convertGrammar rules = tracePrt "#mcf-rules total" (prt . length) $ + solutions conversion rules undefined + where conversion = member rules >>= convertRule + +--convertRule :: Rule -> CnvMonad MCF.Rule +convertRule (Rule fun (cat :@ _, decls) (Just (term, ctype))) + = do let args = [ arg | _ ::: (arg :@ _) <- decls ] + writeState (initialMCat cat, map initialMCat args, []) + convertTerm cat term + (newCat, newArgs, linRec) <- readState + let newTerm = map (instLin newArgs) linRec + return (MCF.Rule newCat newArgs newTerm fun) +convertRule _ = failure + +instLin newArgs (MCF.Lin lbl lin) = MCF.Lin lbl (map instSym lin) + where instSym = mapSymbol instCat id + instCat (_, lbl, arg) = (newArgs !! arg, lbl, arg) + +--convertTerm :: Cat -> Term -> CnvMonad () +convertTerm cat term = do rterm <- simplifyTerm term + env <- readEnv + let ctype = lookupCType env cat + reduce ctype rterm emptyPath + +------------------------------------------------------------ + +{- +type CnvMonad a = BacktrackM Grammar CMRule a + +type CMRule = (MCFCat, [MCFCat], LinRec) +type LinRec = [Lin Cat Path Tokn] +-} + +--initialMCat :: Cat -> MCFCat +initialMCat cat = (cat, []) --MCFCat cat [] + +---------------------------------------------------------------------- + +--simplifyTerm :: Term -> CnvMonad STerm +simplifyTerm (con :^ terms) = liftM (con :^) $ mapM simplifyTerm terms +simplifyTerm (Rec record) = liftM Rec $ mapM simplifyAssign record +simplifyTerm (term :. lbl) = liftM (+. lbl) $ simplifyTerm term +simplifyTerm (Tbl table) = Tbl $ mapM simplifyCase table +simplifyTerm (term :! sel) + = do sterm <- simplifyTerm term + ssel <- simplifyTerm sel + case sterm of + Tbl table -> do (pat, val) <- member table + pat =?= ssel + return val + _ -> do sel' <- expandTerm ssel + return (sterm +! sel') +simplifyTerm (Variants terms) = liftM Variants $ mapM simplifyTerm terms +simplifyTerm (term1 :++ term2) = liftM2 (:++) (simplifyTerm term1) (simplifyTerm term2) +simplifyTerm term = return term +-- error constructors: +-- (I CIdent) - from resource +-- (LI Ident) - pattern variable +-- (EInt Integer) - integer + +--simplifyAssign :: Assign -> CnvMonad (Label, STerm) +simplifyAssign (lbl, term) = liftM ((,) lbl) $ simplifyTerm term + +--simplifyCase :: Case -> [CnvMonad (STerm, STerm)] +simplifyCase (pat, term) = liftM2 (,) (simplifyTerm pat) (simplifyTerm term) + + +------------------------------------------------------------ +-- reducing simplified terms, collecting mcf rules + +--reduce :: CType -> STerm -> Path -> CnvMonad () +reduce StrT term path = updateLin (path, term) +reduce (ConT _) term path + = do pat <- expandTerm term + updateHead (path, pat) +reduce ctype (Variants terms) path + = do term <- member terms + reduce ctype term path +reduce (RecT rtype) term path + = sequence_ [ reduce ctype (term +. lbl) (path ++. lbl) | + (lbl, ctype) <- rtype ] +reduce (TblT _ ctype) (Tbl table) path + = sequence_ [ reduce ctype term (path ++! pat) | + (pat, term) <- table ] +reduce (TblT ptype vtype) arg@(Arg _ _ _) path + = do env <- readEnv + sequence_ [ reduce vtype (arg +! pat) (path ++! pat) | + pat <- groundTerms ptype ] +reduce ctype term path = error ("reduce:\n ctype = (" ++ show ctype ++ + ")\n term = (" ++ show term ++ + ")\n path = (" ++ show path ++ ")\n") + + +------------------------------------------------------------ +-- expanding a term to ground terms + +--expandTerm :: STerm -> CnvMonad STerm +expandTerm arg@(Arg _ _ _) + = do env <- readEnv + pat <- member $ groundTerms $ cTypeForArg env arg + pat =?= arg + return pat +expandTerm (con :^ terms) = liftM (con :^) $ mapM expandTerm terms +expandTerm (Rec record) = liftM Rec $ mapM expandAssign record +expandTerm (Variants terms) = member terms >>= expandTerm +expandTerm term = error $ "expandTerm: " ++ show term + +--expandAssign :: (Label, STerm) -> CnvMonad (Label, STerm) +expandAssign (lbl, term) = liftM ((,) lbl) $ expandTerm term + +------------------------------------------------------------ +-- unification of patterns and selection terms + +--(=?=) :: STerm -> STerm -> CnvMonad () +Wildcard =?= _ = return () +Rec precord =?= arg@(Arg _ _ _) = sequence_ [ pat =?= (arg +. lbl) | + (lbl, pat) <- precord ] +pat =?= Arg arg _ path = updateArg arg (path, pat) +(con :^ pats) =?= (con' :^ terms) = do guard (con==con' && length pats==length terms) + sequence_ $ zipWith (=?=) pats terms +Rec precord =?= Rec record = sequence_ [ maybe mzero (pat =?=) mterm | + (lbl, pat) <- precord, + let mterm = lookup lbl record ] +pat =?= term = error $ "(=?=): " ++ show pat ++ " =?= " ++ show term + + +------------------------------------------------------------ +-- updating the mcf rule + +--updateArg :: Int -> Constraint -> CnvMonad () +updateArg arg cn + = do (head, args, lins) <- readState + args' <- updateNth (addToMCFCat cn) arg args + writeState (head, args', lins) + +--updateHead :: Constraint -> CnvMonad () +updateHead cn + = do (head, args, lins) <- readState + head' <- addToMCFCat cn head + writeState (head', args, lins) + +--updateLin :: Constraint -> CnvMonad () +updateLin (path, term) + = do let newLins = term2lins term + (head, args, lins) <- readState + let lins' = lins ++ map (MCF.Lin path) newLins + writeState (head, args, lins') + +--term2lins :: STerm -> [[Symbol (Cat, Path, Int) Tokn]] +term2lins (Arg arg cat path) = return [Cat (cat, path, arg)] +term2lins (Token str) = return [Tok str] +term2lins (t1 :++ t2) = liftM2 (++) (term2lins t1) (term2lins t2) +term2lins (Empty) = return [] +term2lins (Variants terms) = terms >>= term2lins +term2lins term = error $ "term2lins: " ++ show term + +--addToMCFCat :: Constraint -> MCFCat -> CnvMonad MCFCat +addToMCFCat cn ({-MCFCat-} cat, cns) = liftM ({-MCFCat-} (,) cat) $ addConstraint cn cns + +--addConstraint :: Constraint -> [Constraint] -> CnvMonad [Constraint] +addConstraint cn0 (cn : cns) + | fst cn0 > fst cn = liftM (cn:) (addConstraint cn0 cns) + | fst cn0 == fst cn = guard (snd cn0 == snd cn) >> + return (cn : cns) +addConstraint cn0 cns = return (cn0 : cns) + + +---------------------------------------------------------------------- +-- utilities + +updateNth :: Monad m => (a -> m a) -> Int -> [a] -> m [a] +updateNth update 0 (a : as) = liftM (:as) (update a) +updateNth update n (a : as) = liftM (a:) (updateNth update (n-1) as) + +--lookupCType :: GrammarEnv -> Cat -> CType +lookupCType env cat = errVal defLinType $ + lookupLincat (fst env) (AbsGFC.CIQ (snd env) cat) + +--groundTerms :: GrammarEnv -> CType -> [STerm] +groundTerms env ctype = err error (map term2spattern) $ + allParamValues (fst env) ctype + +--cTypeForArg :: GrammarEnv -> STerm -> CType +cTypeForArg env (Arg nr cat (Path path)) + = follow path $ lookupCType env cat + where follow [] ctype = ctype + follow (Right pat : path) (TblT _ ctype) = follow path ctype + follow (Left lbl : path) (RecT rec) + = case [ ctype | (lbl', ctype) <- rec, lbl == lbl' ] of + [ctype] -> follow path ctype + err -> error $ "follow: " ++ show rec ++ " . " ++ show lbl ++ + " results in " ++ show err + +term2spattern (AbsGFC.R rec) = Rec [ (lbl, term2spattern term) | + AbsGFC.Ass lbl term <- rec ] +term2spattern (AbsGFC.Con con terms) = con :^ map term2spattern terms + diff --git a/src/GF/OldParsing/ConvertSimpleToMCFG/Old.hs b/src/GF/OldParsing/ConvertSimpleToMCFG/Old.hs new file mode 100644 index 000000000..88a459625 --- /dev/null +++ b/src/GF/OldParsing/ConvertSimpleToMCFG/Old.hs @@ -0,0 +1,277 @@ +---------------------------------------------------------------------- +-- | +-- Module : ConvertGFCtoMCFG.Old +-- Maintainer : PL +-- Stability : (stable) +-- Portability : (portable) +-- +-- > CVS $Date: 2005/04/11 13:52:56 $ +-- > CVS $Author: peb $ +-- > CVS $Revision: 1.1 $ +-- +-- Converting GFC grammars to MCFG grammars. (Old variant) +-- +-- the resulting grammars might be /very large/ +-- +-- the conversion is only equivalent if the GFC grammar has a context-free backbone. +-- (also, the conversion might fail if the GFC grammar has dependent or higher-order types) +----------------------------------------------------------------------------- + + +module GF.OldParsing.ConvertGFCtoMCFG.Old (convertGrammar) where + +import GF.System.Tracing +import GF.Printing.PrintParser +import GF.Printing.PrintSimplifiedTerm +--import PrintGFC +import qualified PrGrammar as PG + +import Monad (liftM, liftM2, guard) +-- import Maybe (listToMaybe) +import Ident (Ident(..)) +import AbsGFC +import GFC +import Look +import Operations +import qualified Modules as M +import CMacros (defLinType) +import MkGFC (grammar2canon) +import GF.OldParsing.Utilities +import GF.OldParsing.GrammarTypes +import GF.OldParsing.MCFGrammar (Rule(..), Lin(..)) +import GF.Data.SortedList (nubsort, groupPairs) +import Maybe (listToMaybe) +import List (groupBy, transpose) + +---------------------------------------------------------------------- +-- old style types + +data XMCFCat = XMCFCat Cat [(XPath, Term)] deriving (Eq, Ord, Show) +type XMCFLabel = XPath + +cnvXMCFCat :: XMCFCat -> MCFCat +cnvXMCFCat (XMCFCat cat constrs) = MCFCat cat [ (cnvXPath path, cnvTerm term) | + (path, term) <- constrs ] + +cnvXMCFLabel :: XMCFLabel -> MCFLabel +cnvXMCFLabel = cnvXPath + +cnvXMCFLin :: Lin XMCFCat XMCFLabel Tokn -> Lin MCFCat MCFLabel Tokn +cnvXMCFLin (Lin lbl lin) = Lin (cnvXMCFLabel lbl) $ + map (mapSymbol cnvSym id) lin + where cnvSym (cat, lbl, nr) = (cnvXMCFCat cat, cnvXMCFLabel lbl, nr) + +-- Term -> STerm + +cnvTerm (R rec) = SRec [ (lbl, cnvTerm term) | Ass lbl term <- rec ] +cnvTerm (T _ tbl) = STbl [ (cnvPattern pat, cnvTerm term) | + Cas pats term <- tbl, pat <- pats ] +cnvTerm (Con con terms) = SCon con $ map cnvTerm terms +cnvTerm term + | isArgPath term = cnvArgPath term + +cnvPattern (PR rec) = SRec [ (lbl, cnvPattern term) | PAss lbl term <- rec ] +cnvPattern (PC con pats) = SCon con $ map cnvPattern pats +cnvPattern (PW) = SWildcard + +isArgPath (Arg _) = True +isArgPath (P _ _) = True +isArgPath (S _ _) = True +isArgPath _ = False + +cnvArgPath (Arg (A cat nr)) = SArg (fromInteger nr) cat emptyPath +cnvArgPath (term `P` lbl) = cnvArgPath term +. lbl +cnvArgPath (term `S` sel) = cnvArgPath term +! cnvTerm sel + +-- old style paths + +newtype XPath = XPath [Either Label Term] deriving (Eq, Ord, Show) + +cnvXPath :: XPath -> Path +cnvXPath (XPath path) = Path (map (either Left (Right . cnvTerm)) (reverse path)) + +emptyXPath :: XPath +emptyXPath = XPath [] + +(++..) :: XPath -> Label -> XPath +XPath path ++.. lbl = XPath (Left lbl : path) + +(++!!) :: XPath -> Term -> XPath +XPath path ++!! sel = XPath (Right sel : path) + +---------------------------------------------------------------------- + +-- | combining alg. 1 and alg. 2 from Ljunglöf's PhD thesis +convertGrammar :: (CanonGrammar, Ident) -> MCFGrammar +convertGrammar (gram, lng) = trace2 "language" (prt lng) $ + trace2 "modules" (prtSep " " modnames) $ + trace2 "#lin-terms" (prt (length cncdefs)) $ + tracePrt "#mcf-rules total" (prt.length) $ + concat $ + tracePrt "#mcf-rules per fun" + (\rs -> concat [" "++show n++"="++show (length r) | + (n, r) <- zip [1..] rs]) $ + map (convertDef gram lng) cncdefs + where Gr mods = grammar2canon gram + cncdefs = [ def | Mod (MTCnc modname _) _ _ _ defs <- mods, + modname `elem` modnames, + def@(CncDFun _ _ _ _ _) <- defs ] + modnames = M.allExtends gram lng + + +convertDef :: CanonGrammar -> Ident -> Def -> [MCFRule] +convertDef gram lng (CncDFun fun (CIQ _ cat) args term _) + = [ Rule (cnvXMCFCat newCat) (map cnvXMCFCat newArgs) (map cnvXMCFLin newTerm) fun | + let ctype = lookupCType gram lng cat, + instArgs <- mapM (enumerateInsts gram lng) args, + let instTerm = substitutePaths gram lng instArgs term, + newCat <- emcfCat gram lng cat instTerm, + newArgs <- mapM (extractArg gram lng instArgs) args, + let newTerm = concatMap (extractLin newArgs) $ strPaths gram lng ctype instTerm + ] + + +-- gammalt skräp: +-- mergeArgs = zipWith mergeRec +-- mergeRec (R r1) (R r2) = R (r1 ++ r2) + +extractArg :: CanonGrammar -> Ident -> [Term] -> ArgVar -> [XMCFCat] +extractArg gram lng args (A cat nr) = emcfCat gram lng cat (args !!! nr) + + +emcfCat :: CanonGrammar -> Ident -> Ident -> Term -> [XMCFCat] +emcfCat gram lng cat = map (XMCFCat cat) . parPaths gram lng (lookupCType gram lng cat) + + +extractLin :: [XMCFCat] -> (XPath, Term) -> [Lin XMCFCat XMCFLabel Tokn] +extractLin args (path, term) = map (Lin path) (convertLin term) + where convertLin (t1 `C` t2) = liftM2 (++) (convertLin t1) (convertLin t2) + convertLin (E) = [[]] + convertLin (K tok) = [[Tok tok]] + convertLin (FV terms) = concatMap convertLin terms + convertLin term = map (return . Cat) $ flattenTerm emptyXPath term + flattenTerm path (Arg (A _ nr)) = [(args !!! nr, path, fromInteger nr)] + flattenTerm path (term `P` lbl) = flattenTerm (path ++.. lbl) term + flattenTerm path (term `S` sel) = flattenTerm (path ++!! sel) term + flattenTerm path (FV terms) = concatMap (flattenTerm path) terms + flattenTerm path term = error $ "flattenTerm: \n " ++ show path ++ "\n " ++ prt term + + +enumerateInsts :: CanonGrammar -> Ident -> ArgVar -> [Term] +enumerateInsts gram lng arg@(A argCat _) = enumerate (Arg arg) (lookupCType gram lng argCat) + where enumerate path (TStr) = [ path ] + enumerate path (Cn con) = okError $ lookupParamValues gram con + enumerate path (RecType r) + = map R $ sequence [ map (lbl `Ass`) $ + enumerate (path `P` lbl) ctype | + lbl `Lbg` ctype <- r ] + enumerate path (Table s t) + = map (T s) $ sequence [ map ([term2pattern sel] `Cas`) $ + enumerate (path `S` sel) t | + sel <- enumerate (error "enumerate") s ] + + + +termPaths :: CanonGrammar -> Ident -> CType -> Term -> [(XPath, (CType, Term))] +termPaths gr l (TStr) term = [ (emptyXPath, (TStr, term)) ] +termPaths gr l (RecType rtype) (R record) + = [ (path ++.. lbl, value) | + lbl `Ass` term <- record, + let ctype = okError $ maybeErr "termPaths/record" $ lookupLabelling lbl rtype, + (path, value) <- termPaths gr l ctype term ] +termPaths gr l (Table _ ctype) (T _ table) + = [ (path ++!! pattern2term pat, value) | + pats `Cas` term <- table, pat <- pats, + (path, value) <- termPaths gr l ctype term ] +termPaths gr l (Table _ ctype) (V ptype table) + = [ (path ++!! pat, value) | + (pat, term) <- zip (okError $ allParamValues gr ptype) table, + (path, value) <- termPaths gr l ctype term ] +termPaths gr l ctype (FV terms) + = concatMap (termPaths gr l ctype) terms +termPaths gr l (Cn pc) term = [ (emptyXPath, (Cn pc, term)) ] + +{- ^^^ variants are pushed inside (not equivalent -- but see record-variants.txt): +{a=a1; b=b1} | {a=a2; b=b2} ==> {a=a1|a2; b=b1|b2} +[p=>p1;q=>q1] | [p=>p2;q=>q2] ==> [p=>p1|p2;q=>q1|q2] +-} + +parPaths :: CanonGrammar -> Ident -> CType -> Term -> [[(XPath, Term)]] +parPaths gr l ctype term = mapM (uncurry (map . (,))) (groupPairs paths) + where paths = nubsort [ (path, value) | (path, (Cn _, value)) <- termPaths gr l ctype term ] + +strPaths :: CanonGrammar -> Ident -> CType -> Term -> [(XPath, Term)] +strPaths gr l ctype term = [ (path, evalFV values) | (path, values) <- groupPairs paths ] + where paths = nubsort [ (path, value) | (path, (TStr, value)) <- termPaths gr l ctype term ] + + +-- Substitute each instantiated parameter path for its instantiation +substitutePaths :: CanonGrammar -> Ident -> [Term] -> Term -> Term +substitutePaths gr l arguments trm = subst trm + where subst (con `Con` terms) = con `Con` map subst terms + subst (R record) = R $ map substAss record + subst (term `P` lbl) = subst term `evalP` lbl + subst (T ptype table) = T ptype $ map substCas table + subst (V ptype table) = T ptype [ [term2pattern pat] `Cas` subst term | + (pat, term) <- zip (okError $ allParamValues gr ptype) table ] + subst (term `S` select) = subst term `evalS` subst select + subst (term `C` term') = subst term `C` subst term' + subst (FV terms) = evalFV $ map subst terms + subst (Arg (A _ arg)) = arguments !!! arg + subst term = term + + substAss (l `Ass` term) = l `Ass` subst term + substCas (p `Cas` term) = p `Cas` subst term + + +evalP (R record) lbl = okError $ maybeErr errStr $ lookupAssign lbl record + where errStr = "evalP: " ++ prt (R record `P` lbl) +evalP (FV terms) lbl = evalFV [ evalP term lbl | term <- terms ] +evalP term lbl = term `P` lbl + +evalS t@(T _ tbl) sel = maybe (t `S` sel) id $ lookupCase sel tbl +evalS (FV terms) sel = evalFV [ term `evalS` sel | term <- terms ] +evalS term (FV sels)= evalFV [ term `evalS` sel | sel <- sels ] +evalS term sel = term `S` sel + +evalFV terms0 = case nubsort (concatMap flattenFV terms0) of + [term] -> term + terms -> FV terms + where flattenFV (FV ts) = ts + flattenFV t = [t] + + +---------------------------------------------------------------------- +-- utilities + +-- lookup a CType for an Ident +lookupCType :: CanonGrammar -> Ident -> Ident -> CType +lookupCType gr lng c = errVal defLinType $ lookupLincat gr (CIQ lng c) + +-- lookup a label in a (record / record ctype / table) +lookupAssign :: Label -> [Assign] -> Maybe Term +lookupLabelling :: Label -> [Labelling] -> Maybe CType +lookupCase :: Term -> [Case] -> Maybe Term + +lookupAssign lbl rec = listToMaybe [ term | lbl' `Ass` term <- rec, lbl == lbl' ] +lookupLabelling lbl rtyp = listToMaybe [ ctyp | lbl' `Lbg` ctyp <- rtyp, lbl == lbl' ] +lookupCase sel tbl = listToMaybe [ term | pats `Cas` term <- tbl, sel `matchesPats` pats ] + +matchesPats :: Term -> [Patt] -> Bool +matchesPats term patterns = or [ term == pattern2term pattern | pattern <- patterns ] + +-- converting between patterns and terms +pattern2term :: Patt -> Term +term2pattern :: Term -> Patt + +pattern2term (con `PC` patterns) = con `Con` map pattern2term patterns +pattern2term (PR record) = R [ lbl `Ass` pattern2term pattern | + lbl `PAss` pattern <- record ] + +term2pattern (con `Con` terms) = con `PC` map term2pattern terms +term2pattern (R record) = PR [ lbl `PAss` term2pattern term | + lbl `Ass` term <- record ] + +-- list lookup for Integers instead of Ints +(!!!) :: [a] -> Integer -> a +xs !!! n = xs !! fromInteger n diff --git a/src/GF/OldParsing/ConvertSimpleToMCFG/Strict.hs b/src/GF/OldParsing/ConvertSimpleToMCFG/Strict.hs new file mode 100644 index 000000000..a1be8af4e --- /dev/null +++ b/src/GF/OldParsing/ConvertSimpleToMCFG/Strict.hs @@ -0,0 +1,139 @@ +---------------------------------------------------------------------- +-- | +-- Maintainer : PL +-- Stability : (stable) +-- Portability : (portable) +-- +-- > CVS $Date: 2005/04/11 13:52:56 $ +-- > CVS $Author: peb $ +-- > CVS $Revision: 1.1 $ +-- +-- Converting SimpleGFC grammars to MCFG grammars, deterministic. +-- +-- the resulting grammars might be /very large/ +-- +-- the conversion is only equivalent if the GFC grammar has a context-free backbone. +----------------------------------------------------------------------------- + + +module GF.OldParsing.ConvertGFCtoMCFG.Strict (convertGrammar) where + +import GF.System.Tracing +import GF.Infra.Print + +import Monad + +import GF.Formalism.Utilities +import GF.Formalism.GCFG +import GF.Formalism.MCFG +import GF.Formalism.SimpleGFC +import GF.Conversion.Types + +import GF.Data.BacktrackM + +{- +import Ident (Ident(..)) +import AbsGFC +import GFC +import Look +import Operations +import qualified Modules as M +import CMacros (defLinType) +import MkGFC (grammar2canon) +import GF.OldParsing.Utilities +import GF.OldParsing.GrammarTypes +import GF.OldParsing.MCFGrammar (Grammar, Rule(..), Lin(..)) +import GF.Data.SortedList +-- import Maybe (listToMaybe) +import List (groupBy) -- , transpose) + +import GF.Data.BacktrackM +-} + +---------------------------------------------------------------------- + +convertGrammar :: SimpleGrammar -> MGrammar +convertGrammar rules = tracePrt "#mcf-rules total" (prt . length) $ + solutions conversion undefined + where conversion = member rules >>= convertRule + +convertRule :: SimpleRule -> CnvMonad MRule +convertRule (Rule (Abs decl decls fun) (Cnc ctype ctypes (Just term))) + = do let cat : args = map decl2cat (decl : decls) + args_ctypes = zip3 [0..] args ctypes + instArgs <- mapM enumerateArg args_ctypes + let instTerm = substitutePaths instArgs term + newCat <- extractMCat cat ctype instTerm + newArgs <- mapM (extractArg instArgs) args + let newLinRec = strPaths ctype instTerm >>= extractLin newArgs + lintype : lintypes = map (convertLinType emptyPath) (ctype : ctypes) + return $ Rule (Abs newCat newArgs fun) (Cnc lintype lintypes newLinRec) +convertRule _ = failure + +---------------------------------------------------------------------- + +type CnvMonad a = BacktrackM () a + +---------------------------------------------------------------------- +-- strict conversion + +--extractArg :: [Term] -> (Int, Cat, LinType) -> CnvMonad MCat +extractArg args (nr, cat, ctype) = emcfCat cat ctype (args !! nr) + +--emcfCat :: Cat -> LinType -> Term -> CnvMonad MCat +extractMCat cat ctype term = map (MCat cat) $ parPaths ctype term + +--enumerateArg :: (Int, Cat, LinType) -> CnvMonad Term +enumerateArg (nr, cat, ctype) = enumerateTerms (Arg nr cat emptyPath) ctype + +-- Substitute each instantiated parameter path for its instantiation +substitutePaths :: [Term] -> Term -> Term +substitutePaths arguments = subst + where subst (Arg nr _ path) = followPath path (arguments !! nr) + subst (con :^ terms) = con :^ map subst terms + subst (Rec record) = Rec [ (lbl, subst term) | (lbl, term) <- record ] + subst (term :. lbl) = subst term +. lbl + subst (Tbl table) = Tbl [ (pat, subst term) | + (pat, term) <- table ] + subst (term :! select) = subst term +! subst select + subst (term :++ term') = subst term ?++ subst term' + subst (Variants terms) = Variants $ map subst terms + subst term = term + + +--termPaths :: CType -> STerm -> [(Path, (CType, STerm))] +termPaths ctype (Variants terms) = terms >>= termPaths ctype +termPaths (StrT) term = [ (emptyPath, (StrT, term)) ] +termPaths (RecT rtype) (Rec record) + = [ (path ++. lbl, value) | + (lbl, term) <- record, + let Just ctype = lookup lbl rtype, + (path, value) <- termPaths ctype term ] +termPaths (TblT _ ctype) (Tbl table) + = [ (path ++! pat, value) | + (pat, term) <- table, + (path, value) <- termPaths ctype term ] +termPaths (ConT pc _) term = [ (emptyPath, (ConT pc, term)) ] + +{- ^^^ variants are pushed inside (not equivalent -- but see record-variants.txt): +{a=a1; b=b1} | {a=a2; b=b2} ==> {a=a1|a2; b=b1|b2} +[p=>p1;q=>q1] | [p=>p2;q=>q2] ==> [p=>p1|p2;q=>q1|q2] +-} + +--parPaths :: CType -> STerm -> [[(Path, STerm)]] +parPaths ctype term = mapM (uncurry (map . (,))) $ groupPairs $ + nubsort [ (path, value) | + (path, (ConT _, value)) <- termPaths ctype term ] + +--strPaths :: CType -> STerm -> [(Path, STerm)] +strPaths ctype term = [ (path, variants values) | (path, values) <- groupPairs paths ] + where paths = nubsort [ (path, value) | (path, (StrT, value)) <- termPaths ctype term ] + +--extractLin :: [MCFCat] -> (Path, STerm) -> [Lin MCFCat MCFLabel Tokn] +extractLin args (path, term) = map (Lin path) (convertLin term) + where convertLin (t1 :++ t2) = liftM2 (++) (convertLin t1) (convertLin t2) + convertLin (Empty) = [[]] + convertLin (Token tok) = [[Tok tok]] + convertLin (Variants terms) = concatMap convertLin terms + convertLin (Arg nr _ path) = [[Cat (args !! nr, path, nr)]] + diff --git a/src/GF/OldParsing/GCFG.hs b/src/GF/OldParsing/GCFG.hs new file mode 100644 index 000000000..33a710e5d --- /dev/null +++ b/src/GF/OldParsing/GCFG.hs @@ -0,0 +1,43 @@ +---------------------------------------------------------------------- +-- | +-- Maintainer : PL +-- Stability : (stable) +-- Portability : (portable) +-- +-- > CVS $Date: 2005/04/11 13:52:53 $ +-- > CVS $Author: peb $ +-- > CVS $Revision: 1.1 $ +-- +-- Simplistic GFC format +----------------------------------------------------------------------------- + +module GF.OldParsing.GCFG where + +import GF.Printing.PrintParser + +---------------------------------------------------------------------- + +type Grammar c n l t = [Rule c n l t] +data Rule c n l t = Rule (Abstract c n) (Concrete l t) + deriving (Eq, Ord, Show) + +data Abstract cat name = Abs cat [cat] name + deriving (Eq, Ord, Show) +data Concrete lin term = Cnc lin [lin] term + deriving (Eq, Ord, Show) + +---------------------------------------------------------------------- + +instance (Print c, Print n, Print l, Print t) => Print (Rule n c l t) where + prt (Rule abs cnc) = prt abs ++ " := " ++ prt cnc ++ "\n" + prtList = concatMap prt + +instance (Print c, Print n) => Print (Abstract c n) where + prt (Abs cat args name) = prt name ++ ". " ++ prt cat ++ + ( if null args then "" + else " -> " ++ prtSep " " args ) + +instance (Print l, Print t) => Print (Concrete l t) where + prt (Cnc lcat args term) = prt term ++ " : " ++ prt lcat ++ + ( if null args then "" + else " [ " ++ prtSep " " args ++ " ]" ) diff --git a/src/GF/OldParsing/GeneralChart.hs b/src/GF/OldParsing/GeneralChart.hs new file mode 100644 index 000000000..1d51da025 --- /dev/null +++ b/src/GF/OldParsing/GeneralChart.hs @@ -0,0 +1,86 @@ +---------------------------------------------------------------------- +-- | +-- Module : GeneralChart +-- Maintainer : Peter Ljunglöf +-- Stability : (stable) +-- Portability : (portable) +-- +-- > CVS $Date: 2005/04/11 13:52:53 $ +-- > CVS $Author: peb $ +-- > CVS $Revision: 1.1 $ +-- +-- Simple implementation of deductive chart parsing +----------------------------------------------------------------------------- + + +module GF.OldParsing.GeneralChart + (-- * Type definition + Chart, + -- * Main functions + chartLookup, + buildChart, + -- * Probably not needed + emptyChart, + chartMember, + chartInsert, + chartList, + addToChart + ) where + +-- import Trace + +import GF.Data.RedBlackSet + +-- main functions + +chartLookup :: (Ord item, Ord key) => Chart item key -> key -> [item] +buildChart :: (Ord item, Ord key) => (item -> key) -> + [Chart item key -> item -> [item]] -> [item] -> [item] + +buildChart keyof rules axioms = chartList (addItems axioms emptyChart) + where addItems [] = id + addItems (item:items) = addItems items . addItem item + + -- addItem item | trace ("+ "++show item++"\n") False = undefined + addItem item = addToChart item (keyof item) + (\chart -> foldr (consequence item) chart rules) + + consequence item rule chart = addItems (rule chart item) chart + +-- probably not needed + +emptyChart :: (Ord item, Ord key) => Chart item key +chartMember :: (Ord item, Ord key) => Chart item key -> item -> key -> Bool +chartInsert :: (Ord item, Ord key) => Chart item key -> item -> key -> Maybe (Chart item key) +chartList :: (Ord item, Ord key) => Chart item key -> [item] +addToChart :: (Ord item, Ord key) => item -> key -> (Chart item key -> Chart item key) -> Chart item key -> Chart item key + +addToChart item key after chart = maybe chart after (chartInsert chart item key) + + +-------------------------------------------------------------------------------- +-- key charts as red/black trees + +newtype Chart item key = KC (RedBlackMap key item) + deriving Show + +emptyChart = KC rbmEmpty +chartMember (KC tree) item key = rbmElem key item tree +chartInsert (KC tree) item key = fmap KC (rbmInsert key item tree) +chartLookup (KC tree) key = rbmLookup key tree +chartList (KC tree) = concatMap snd (rbmList tree) +--------------------------------------------------------------------------------} + + +{-------------------------------------------------------------------------------- +-- key charts as unsorted association lists -- OBSOLETE! + +newtype Chart item key = SC [(key, item)] + +emptyChart = SC [] +chartMember (SC chart) item key = (key,item) `elem` chart +chartInsert (SC chart) item key = if (key,item) `elem` chart then Nothing else Just (SC ((key,item):chart)) +chartLookup (SC chart) key = [ item | (key',item) <- chart, key == key' ] +chartList (SC chart) = map snd chart +--------------------------------------------------------------------------------} + diff --git a/src/GF/OldParsing/GrammarTypes.hs b/src/GF/OldParsing/GrammarTypes.hs new file mode 100644 index 000000000..af2832bdf --- /dev/null +++ b/src/GF/OldParsing/GrammarTypes.hs @@ -0,0 +1,148 @@ +---------------------------------------------------------------------- +-- | +-- Maintainer : PL +-- Stability : (stable) +-- Portability : (portable) +-- +-- > CVS $Date: 2005/04/11 13:52:53 $ +-- > CVS $Author: peb $ +-- > CVS $Revision: 1.1 $ +-- +-- All possible instantiations of different grammar formats used for parsing +-- +-- Plus some helper types and utilities +----------------------------------------------------------------------------- + + +module GF.OldParsing.GrammarTypes + (-- * Main parser information + PInfo(..), + -- * Multiple context-free grammars + MCFGrammar, MCFRule, MCFPInfo, + MCFCat(..), MCFLabel, + Constraint, + -- * Context-free grammars + CFGrammar, CFRule, CFPInfo, + CFProfile, CFName(..), CFCat(..), + -- * Assorted types + Cat, Name, Constr, Label, Tokn, + -- * Simplified terms + STerm(..), (+.), (+!), + -- * Record\/table paths + Path(..), emptyPath, + (++.), (++!) + ) where + +import Ident (Ident(..)) +import AbsGFC +-- import qualified GF.OldParsing.FiniteTypes.Calc as Fin +import qualified GF.OldParsing.CFGrammar as CFG +import qualified GF.OldParsing.MCFGrammar as MCFG +import GF.Printing.PrintParser +import GF.Printing.PrintSimplifiedTerm + +import qualified GF.OldParsing.ConvertGFCtoSimple + +---------------------------------------------------------------------- + +data PInfo = PInfo { mcfg :: MCFGrammar, + cfg :: CFGrammar, + mcfPInfo :: MCFPInfo, + cfPInfo :: CFPInfo } + +type MCFGrammar = MCFG.Grammar Name MCFCat MCFLabel Tokn +type MCFRule = MCFG.Rule Name MCFCat MCFLabel Tokn +type MCFPInfo = MCFG.PInfo Name MCFCat MCFLabel Tokn + +data MCFCat = MCFCat Cat [Constraint] deriving (Eq, Ord, Show) +type MCFLabel = Path + +type Constraint = (Path, STerm) + +type CFGrammar = CFG.Grammar CFName CFCat Tokn +type CFRule = CFG.Rule CFName CFCat Tokn +type CFPInfo = CFG.PInfo CFName CFCat Tokn + +type CFProfile = [[Int]] +data CFName = CFName Name CFProfile deriving (Eq, Ord, Show) +data CFCat = CFCat MCFCat MCFLabel deriving (Eq, Ord, Show) + +---------------------------------------------------------------------- + +type Cat = Ident +type Name = Ident +type Constr = CIdent + +data STerm = SArg Int Cat Path -- ^ argument variable, the 'Path' is a path + -- pointing into the term + | SCon Constr [STerm] -- ^ constructor + | SRec [(Label, STerm)] -- ^ record + | STbl [(STerm, STerm)] -- ^ table of patterns\/terms + | SVariants [STerm] -- ^ variants + | SConcat STerm STerm -- ^ concatenation + | SToken Tokn -- ^ single token + | SEmpty -- ^ empty string + | SWildcard -- ^ wildcard pattern variable + + -- SRes CIdent -- resource identifier + -- SVar Ident -- bound pattern variable + -- SInt Integer -- integer + deriving (Eq, Ord, Show) + +(+.) :: STerm -> Label -> STerm +SRec record +. lbl = maybe err id $ lookup lbl record + where err = error $ "(+.), label not in record: " ++ show (SRec record) ++ " +. " ++ show lbl +SArg arg cat path +. lbl = SArg arg cat (path ++. lbl) +SVariants terms +. lbl = SVariants $ map (+. lbl) terms +sterm +. lbl = error $ "(+.): " ++ show sterm ++ " +. " ++ show lbl + +(+!) :: STerm -> STerm -> STerm +STbl table +! pat = maybe err id $ lookup pat table + where err = error $ "(+!), pattern not in table: " ++ show (STbl table) ++ " +! " ++ show pat +SArg arg cat path +! pat = SArg arg cat (path ++! pat) +SVariants terms +! pat = SVariants $ map (+! pat) terms +term +! SVariants pats = SVariants $ map (term +!) pats +sterm +! pat = error $ "(+!): " ++ show sterm ++ " +! " ++ show pat + +---------------------------------------------------------------------- + +newtype Path = Path [Either Label STerm] deriving (Eq, Ord, Show) + +emptyPath :: Path +emptyPath = Path [] + +(++.) :: Path -> Label -> Path +Path path ++. lbl = Path (Left lbl : path) + +(++!) :: Path -> STerm -> Path +Path path ++! sel = Path (Right sel : path) + +------------------------------------------------------------ + +instance Print STerm where + prt (SArg n c p) = prt c ++ "@" ++ prt n ++ prt p + prt (SCon c []) = prt c + prt (SCon c ts) = prt c ++ prtList ts + prt (SRec rec) = "{" ++ concat [ prt l ++ "=" ++ prt t ++ ";" | (l,t) <- rec ] ++ "}" + prt (STbl tbl) = "[" ++ concat [ prt p ++ "=>" ++ prt t ++ ";" | (p,t) <- tbl ] ++ "}" + prt (SVariants ts) = "{| " ++ prtSep " | " ts ++ " |}" + prt (SConcat t1 t2) = prt t1 ++ "++" ++ prt t2 + prt (SToken t) = prt t + prt (SEmpty) = "[]" + prt (SWildcard) = "_" + +instance Print MCFCat where + prt (MCFCat cat params) + = prt cat ++ "{" ++ concat [ prt path ++ "=" ++ prt term ++ ";" | + (path, term) <- params ] ++ "}" + +instance Print CFName where + prt (CFName name profile) = prt name ++ prt profile + +instance Print CFCat where + prt (CFCat cat lbl) = prt cat ++ prt lbl + +instance Print Path where + prt (Path path) = concatMap prtEither (reverse path) + where prtEither (Left lbl) = "." ++ prt lbl + prtEither (Right patt) = "!" ++ prt patt diff --git a/src/GF/OldParsing/IncrementalChart.hs b/src/GF/OldParsing/IncrementalChart.hs new file mode 100644 index 000000000..2a941ec84 --- /dev/null +++ b/src/GF/OldParsing/IncrementalChart.hs @@ -0,0 +1,50 @@ +---------------------------------------------------------------------- +-- | +-- Module : IncrementalChart +-- Maintainer : PL +-- Stability : (stable) +-- Portability : (portable) +-- +-- > CVS $Date: 2005/04/11 13:52:53 $ +-- > CVS $Author: peb $ +-- > CVS $Revision: 1.1 $ +-- +-- Implementation of /incremental/ deductive parsing, +-- i.e. parsing one word at the time. +----------------------------------------------------------------------------- + + +module GF.OldParsing.IncrementalChart + (-- * Type definitions + IncrementalChart, + -- * Functions + buildChart, + chartList + ) where + +import Array +import GF.Data.SortedList +import GF.Data.Assoc + +buildChart :: (Ord item, Ord key) => (item -> key) -> + (Int -> item -> SList item) -> + (Int -> SList item) -> + (Int, Int) -> IncrementalChart item key + +chartList :: (Ord item, Ord key) => (Int -> item -> edge) -> IncrementalChart item key -> [edge] + +type IncrementalChart item key = Array Int (Assoc key (SList item)) + +---------- + +buildChart keyof rules axioms bounds = finalChartArray + where buildState k = limit (rules k) $ axioms k + finalChartList = map buildState [fst bounds .. snd bounds] + finalChartArray = listArray bounds $ map stateAssoc finalChartList + stateAssoc state = accumAssoc id [ (keyof item, item) | item <- state ] + +chartList combine chart = [ combine k item | + (k, state) <- assocs chart, + item <- concatMap snd $ aAssocs state ] + + diff --git a/src/GF/OldParsing/MCFGrammar.hs b/src/GF/OldParsing/MCFGrammar.hs new file mode 100644 index 000000000..350c574a7 --- /dev/null +++ b/src/GF/OldParsing/MCFGrammar.hs @@ -0,0 +1,206 @@ +---------------------------------------------------------------------- +-- | +-- Module : MCFGrammar +-- Maintainer : PL +-- Stability : (stable) +-- Portability : (portable) +-- +-- > CVS $Date: 2005/04/11 13:52:54 $ +-- > CVS $Author: peb $ +-- > CVS $Revision: 1.1 $ +-- +-- Definitions of multiple context-free grammars, +-- parser information and chart conversion +----------------------------------------------------------------------------- + +module GF.OldParsing.MCFGrammar + (-- * Type definitions + Grammar, + Rule(..), + Lin(..), + -- * Parser information + MCFParser, + MEdge, + edges2chart, + PInfo, + pInfo, + -- * Ranges + Range(..), + makeRange, + concatRange, + unifyRange, + unionRange, + failRange, + -- * Utilities + select, + updateIndex + ) where + +-- gf modules: +import GF.Data.SortedList +import GF.Data.Assoc +-- parser modules: +import GF.OldParsing.Utilities +import GF.Printing.PrintParser + + + +select :: [a] -> [(a, [a])] +select [] = [] +select (x:xs) = (x,xs) : [ (y,x:ys) | (y,ys) <- select xs ] + +updateIndex :: Functor f => Int -> [a] -> (a -> f a) -> f [a] +updateIndex 0 (a:as) f = fmap (:as) $ f a +updateIndex n (a:as) f = fmap (a:) $ updateIndex (n-1) as f +updateIndex _ _ _ = error "ParserUtils.updateIndex: Index out of range" + + +------------------------------------------------------------ +-- grammar types + +type Grammar n c l t = [Rule n c l t] +data Rule n c l t = Rule c [c] [Lin c l t] n + deriving (Eq, Ord, Show) +data Lin c l t = Lin l [Symbol (c, l, Int) t] + deriving (Eq, Ord, Show) + +-- variants is simply several linearizations with the same label + + +------------------------------------------------------------ +-- parser information + +type PInfo n c l t = Grammar n c l t + +pInfo :: Grammar n c l t -> PInfo n c l t +pInfo = id + +type MCFParser n c l t = PInfo n c l t -> [c] -> Input t -> ParseChart n (MEdge c l) + +type MEdge c l = (c, [(l, Range)]) + +edges2chart :: (Ord n, Ord c, Ord l) => + [(n, MEdge c l, [MEdge c l])] -> ParseChart n (MEdge c l) +edges2chart edges = fmap groupPairs $ accumAssoc id $ + [ (medge, (name, medges)) | (name, medge, medges) <- edges ] + + +------------------------------------------------------------ +-- ranges as sets of int-pairs + +newtype Range = Rng (SList (Int, Int)) deriving (Eq, Ord, Show) + +makeRange :: SList (Int, Int) -> Range +makeRange rho = Rng rho + +concatRange :: Range -> Range -> Range +concatRange (Rng rho) (Rng rho') = Rng $ nubsort [ (i,k) | (i,j) <- rho, (j',k) <- rho', j==j' ] + +unifyRange :: Range -> Range -> Range +unifyRange (Rng rho) (Rng rho') = Rng $ rho <**> rho' + +unionRange :: Range -> Range -> Range +unionRange (Rng rho) (Rng rho') = Rng $ rho <++> rho' + +failRange :: Range +failRange = Rng [] + + +------------------------------------------------------------ +-- pretty-printing + +instance (Print n, Print c, Print l, Print t) => Print (Rule n c l t) where + prt (Rule cat args record name) + = prt name ++ ". " ++ prt cat ++ " -> " ++ prtSep " " args ++ "\n" ++ prt record + prtList = concatMap prt + +instance (Print c, Print l, Print t) => Print (Lin c l t) where + prt (Lin lbl lin) = prt lbl ++ " = " ++ prtSep " " (map (symbol prArg (show.prt)) lin) + where prArg (cat, lbl, arg) = prt cat ++ "@" ++ prt arg ++ "." ++ prt lbl + prtList = prtBeforeAfter "\t" "\n" + +instance Print Range where + prt (Rng rho) = "(" ++ prtSep "|" [ show i ++ "-" ++ show j | (i,j) <- rho ] ++ ")" + +{- +------------------------------------------------------------ +-- items & forests + +data Item n c l = Item n (MEdge c l) [[MEdge c l]] + deriving (Eq, Ord, Show) +type MEdge c l = (c, [Edge l]) + +items2forests :: (Ord n, Ord c, Ord l) => Edge ((c, l) -> Bool) -> [Item n c l] -> [ParseForest n] + +---------- + +items2forests (Edge i0 k0 startCat) items + = concatMap edge2forests $ filter checkEdge $ aElems chart + where edge2forests (cat, []) = [FMeta] + edge2forests edge = filter checkForest $ map item2forest (chart ? edge) + + item2forest (Item name _ children) = FNode name [ forests | edges <- children, + forests <- mapM edge2forests edges ] + + checkEdge (cat, [Edge i k lbl]) = i == i0 && k == k0 && startCat (cat, lbl) + checkEdge _ = False + + checkForest (FNode _ children) = not (null children) + + chart = accumAssoc id [ (edge, item) | item@(Item _ edge _) <- items ] +-} + + +------------------------------------------------------------ +-- grammar checking +{- +--checkGrammar :: (Ord c, Ord l, Print n, Print c, Print l, Print t) => Grammar n c l t -> [String] + +checkGrammar rules + = do rule@(Rule cat rhs record name) <- rules + if null record + then [ "empty linearization record in rule: " ++ prt rule ] + else [ "category does not exist: " ++ prt rcat ++ "\n" ++ + " - in rule: " ++ prt rule | + rcat <- rhs, rcat `notElem` lhsCats ] ++ + do Lin _ lin <- record + Cat (arg, albl) <- lin + if arg<0 || arg>=length rhs + then [ "argument index out of range: " ++ show arg ++ "/" ++ prt albl ++ "\n" ++ + " - in rule: " ++ prt rule ] + else [ "label does not exist: " ++ prt albl ++ "\n" ++ + " - from rule: " ++ prt rule ++ + " - in rule: " ++ prt arule | + arule@(Rule _ acat _ arecord) <- rules, + acat == rhs !! arg, + albl `notElem` [ lbl | Lin lbl _ <- arecord ] ] + where lhsCats = nubsort [ cat | Rule _ cat _ _ <- rules ] +-} + + + + + +{----- +------------------------------------------------------------ +-- simplifications + +splitMRule :: (Ord n, Ord c, Ord l, Ord t) => Grammar n c l t -> Rule n c l t -> [Rule n c l t] +splitMRule rules (Rule name cat args record) = nubsort [ (Rule name cat args splitrec) | + (cat', lbls) <- rhsCats, cat == cat', + let splitrec = [ lin | lin@(Lin lbl _) <- record, lbl `elem` lbls ] ] + where rhsCats = limit rhsC lhsCats + lhsCats = nubsort [ (cat, [lbl]) | Rule _ cat _ record <- rules, Lin lbl _ <- record ] + rhsC (cat, lbls) = nubsort [ (rcat, rlbls) | + Rule _ cat' rhs lins <- rules, cat == cat', + (arg, rcat) <- zip [0..] rhs, + let rlbls = nubsort [ rlbl | Lin lbl lin <- lins, lbl `elem` lbls, + Cat (arg', rlbl) <- lin, arg == arg' ], + not $ null rlbls + ] + + +----} + + + diff --git a/src/GF/OldParsing/ParseCF.hs b/src/GF/OldParsing/ParseCF.hs new file mode 100644 index 000000000..0ed19c786 --- /dev/null +++ b/src/GF/OldParsing/ParseCF.hs @@ -0,0 +1,82 @@ +---------------------------------------------------------------------- +-- | +-- Module : ParseCF +-- Maintainer : PL +-- Stability : (stable) +-- Portability : (portable) +-- +-- > CVS $Date: 2005/04/11 13:52:54 $ +-- > CVS $Author: peb $ +-- > CVS $Revision: 1.1 $ +-- +-- Chart parsing of grammars in CF format +----------------------------------------------------------------------------- + +module GF.OldParsing.ParseCF (parse, alternatives) where + +import GF.System.Tracing +import GF.Printing.PrintParser +import GF.Printing.PrintSimplifiedTerm + +import GF.Data.SortedList (nubsort) +import GF.Data.Assoc +import qualified CF +import qualified CFIdent as CFI +import GF.OldParsing.Utilities +import GF.OldParsing.CFGrammar +import qualified GF.OldParsing.ParseCFG as P + +type Token = CFI.CFTok +type Name = CFI.CFFun +type Category = CFI.CFCat + +alternatives :: [(String, [String])] +alternatives = [ ("gb", ["G","GB","_gen","_genBU"]), + ("gt", ["GT","_genTD"]), + ("ibn", ["","I","B","IB","IBN","_inc","BU","_incBU"]), + ("ibb", ["BB","IBB","BU_BUF","_incBU_BUF"]), + ("ibt", ["BT","IBT","BU_TDF","_incBU_TDF"]), + ("iba", ["BA","IBA","BU_BTF","BU_TBF","_incBU_BTF","_incBU_TBF"]), + ("itn", ["T","IT","ITN","TD","_incTD"]), + ("itb", ["TB","ITB","TD_BUF","_incTD_BUF"]) + ] + +parse :: String -> CF.CF -> Category -> CF.CFParser +parse = buildParser . P.parse + +buildParser :: CFParser Name Category Token -> CF.CF -> Category -> CF.CFParser +buildParser parser cf start tokens = trace "ParseCF" $ + (parseResults, parseInformation) + where parseInformation = prtSep "\n" trees + parseResults = {-take maxTake-} [ (tree2cfTree t, []) | t <- trees ] + theInput = input tokens + edges = tracePrt "#edges" (prt.length) $ + parser pInf [start] theInput + chart = tracePrt "#chart" (prt . map (length.snd) . aAssocs) $ + edges2chart theInput $ map (fmap addCategory) edges + forests = tracePrt "#forests" (prt.length) $ + chart2forests chart (const False) $ + uncurry Edge (inputBounds theInput) start + trees = tracePrt "#trees" (prt.length) $ + concatMap forest2trees forests + pInf = pInfo $ cf2grammar cf (nubsort tokens) + + +addCategory (Rule cat rhs name) = Rule cat rhs (name, cat) + +tree2cfTree (TNode (name, cat) trees) = CF.CFTree (name, (cat, map tree2cfTree trees)) + +cf2grammar :: CF.CF -> [Token] -> Grammar Name Category Token +cf2grammar cf tokens = [ Rule cat rhs name | + (name, (cat, rhs0)) <- cfRules, + rhs <- mapM item2symbol rhs0 ] + where cfRules = concatMap (CF.predefRules (CF.predefOfCF cf)) tokens ++ + CF.rulesOfCF cf + item2symbol (CF.CFNonterm cat) = [Cat cat] + item2symbol item = map Tok $ filter (CF.matchCFTerm item) tokens + +-- maxTake :: Int +-- maxTake = 500 +-- maxTake = maxBound + + diff --git a/src/GF/OldParsing/ParseCFG.hs b/src/GF/OldParsing/ParseCFG.hs new file mode 100644 index 000000000..7cba41175 --- /dev/null +++ b/src/GF/OldParsing/ParseCFG.hs @@ -0,0 +1,43 @@ +---------------------------------------------------------------------- +-- | +-- Module : ParseCFG +-- Maintainer : PL +-- Stability : (stable) +-- Portability : (portable) +-- +-- > CVS $Date: 2005/04/11 13:52:54 $ +-- > CVS $Author: peb $ +-- > CVS $Revision: 1.1 $ +-- +-- Main parsing module for context-free grammars +----------------------------------------------------------------------------- + + +module GF.OldParsing.ParseCFG (parse) where + +import Char (toLower) +import GF.OldParsing.Utilities +import GF.OldParsing.CFGrammar +import qualified GF.OldParsing.ParseCFG.General as PGen +import qualified GF.OldParsing.ParseCFG.Incremental as PInc + + +parse :: (Ord n, Ord c, Ord t, Show t) => + String -> CFParser n c t +parse = decodeParser . map toLower + +decodeParser ['g',s] = PGen.parse (decodeStrategy s) +decodeParser ['i',s,f] = PInc.parse (decodeStrategy s, decodeFilter f) +decodeParser _ = decodeParser "ibn" + +decodeStrategy 'b' = (True, False) +decodeStrategy 't' = (False, True) + +decodeFilter 'a' = (True, True) +decodeFilter 'b' = (True, False) +decodeFilter 't' = (False, True) +decodeFilter 'n' = (False, False) + + + + diff --git a/src/GF/OldParsing/ParseCFG/General.hs b/src/GF/OldParsing/ParseCFG/General.hs new file mode 100644 index 000000000..7ac395ba3 --- /dev/null +++ b/src/GF/OldParsing/ParseCFG/General.hs @@ -0,0 +1,83 @@ +---------------------------------------------------------------------- +-- | +-- Module : ParseCFG.General +-- Maintainer : Peter Ljunglöf +-- Stability : (stable) +-- Portability : (portable) +-- +-- > CVS $Date: 2005/04/11 13:52:57 $ +-- > CVS $Author: peb $ +-- > CVS $Revision: 1.1 $ +-- +-- Several implementations of CFG chart parsing +----------------------------------------------------------------------------- + +module GF.OldParsing.ParseCFG.General + (parse, Strategy) where + +import GF.System.Tracing + +import GF.OldParsing.Utilities +import GF.OldParsing.CFGrammar +import GF.OldParsing.GeneralChart +import GF.Data.Assoc + +parse :: (Ord n, Ord c, Ord t) => Strategy -> CFParser n c t +parse strategy grammar start = extract . process strategy grammar start + +type Strategy = (Bool, Bool) -- (isBottomup, isTopdown) + +extract :: [Item n (Symbol c t)] -> [Edge (Rule n c t)] +extract edges = + edges' + where edges' = [ Edge j k (Rule cat (reverse found) name) | + Edge j k (Cat cat, found, [], Just name) <- edges ] + +process :: (Ord n, Ord c, Ord t) => Strategy -> PInfo n c t -> + [c] -> Input t -> [Item n (Symbol c t)] +process (isBottomup, isTopdown) grammar start + = trace2 "CFParserGeneral" ((if isBottomup then " BU" else "") ++ + (if isTopdown then " TD" else "")) $ + buildChart keyof [predict, combine] . axioms + where axioms input = initial ++ scan input + + scan input = map (fmap mkEdge) (inputEdges input) + mkEdge tok = (Tok tok, [], [], Nothing) + + -- the combine rule + combine chart (Edge j k (next, _, [], _)) + = [ edge `forwardTo` k | edge <- chartLookup chart (Active next j) ] + combine chart edge@(Edge _ j (_, _, next:_, _)) + = [ edge `forwardTo` k | Edge _ k _ <- chartLookup chart (Passive next j) ] + + -- initial predictions + initial = [ loopingEdge 0 rule | cat <- start, rule <- tdRuleLookup ? cat ] + + -- predictions + predict chart (Edge j k (next, _, [], _)) | isBottomup + = [ loopingEdge j rule `forwardTo` k | rule <- bottomupRules grammar ? next ] + -- - - - - - - - - - ^^^^^^^^^^^^^ Kilbury prediction: move dot forward + predict chart (Edge _ k (_, _, Cat cat:_, _)) + = [ loopingEdge k rule | rule <- tdRuleLookup ? cat ] + predict _ _ = [] + + tdRuleLookup | isTopdown = topdownRules grammar + | isBottomup = emptyLeftcornerRules grammar + +-- internal representation of parse items + +type Item n s = Edge (s, [s], [s], Maybe n) +type IChart n s = Chart (Item n s) (IKey s) +data IKey s = Active s Int + | Passive s Int + deriving (Eq, Ord, Show) + +keyof (Edge _ j (_, _, next:_, _)) = Active next j +keyof (Edge j _ (cat, _, [], _)) = Passive cat j + +forwardTo (Edge i j (cat, found, next:tofind, name)) k = Edge i k (cat, next:found, tofind, name) + +loopingEdge k (Rule cat tofind name) = Edge k k (Cat cat, [], tofind, Just name) + + + diff --git a/src/GF/OldParsing/ParseCFG/Incremental.hs b/src/GF/OldParsing/ParseCFG/Incremental.hs new file mode 100644 index 000000000..882fad26e --- /dev/null +++ b/src/GF/OldParsing/ParseCFG/Incremental.hs @@ -0,0 +1,142 @@ +---------------------------------------------------------------------- +-- | +-- Module : ParseCFG.Incremental +-- Maintainer : PL +-- Stability : (stable) +-- Portability : (portable) +-- +-- > CVS $Date: 2005/04/11 13:52:57 $ +-- > CVS $Author: peb $ +-- > CVS $Revision: 1.1 $ +-- +-- Incremental chart parsing for context-free grammars +----------------------------------------------------------------------------- + + + +module GF.OldParsing.ParseCFG.Incremental + (parse, Strategy) where + +import GF.System.Tracing +import GF.Printing.PrintParser + +-- haskell modules: +import Array +-- gf modules: +import GF.Data.SortedList +import GF.Data.Assoc +import Operations +-- parser modules: +import GF.OldParsing.Utilities +import GF.OldParsing.CFGrammar +import GF.OldParsing.IncrementalChart + + +type Strategy = ((Bool, Bool), (Bool, Bool)) -- (predict:(BU, TD), filter:(BU, TD)) + +parse :: (Ord n, Ord c, Ord t, Show t) => + Strategy -> CFParser n c t +parse ((isPredictBU, isPredictTD), (isFilterBU, isFilterTD)) grammar start input = + trace2 "CFParserIncremental" + ((if isPredictBU then "BU-predict " else "") ++ + (if isPredictTD then "TD-predict " else "") ++ + (if isFilterBU then "BU-filter " else "") ++ + (if isFilterTD then "TD-filter " else "")) $ + finalEdges + where finalEdges = [ Edge j k (Rule cat (reverse found) name) | + (k, state) <- + tracePrt "#passiveChart" + (prt . map (length . (?Passive) . snd)) $ + tracePrt "#activeChart" + (prt . map (length . concatMap snd . aAssocs . snd)) $ + assocs finalChart, + Item j (Rule cat _Nil name) found <- state ? Passive ] + + finalChart = buildChart keyof rules axioms $ inputBounds input + + axioms 0 = --tracePrt ("axioms 0") (prtSep "\n") $ + union $ map (tdInfer 0) start + axioms k = --tracePrt ("axioms "++show k) (prtSep "\n") $ + union [ buInfer j k (Tok token) | + (token, js) <- aAssocs (inputTo input ! k), j <- js ] + + rules k (Item j (Rule cat [] _) _) + = buInfer j k (Cat cat) + rules k (Item j rule@(Rule _ (Cat next:_) _) found) + = tdInfer k next <++> + -- hack for empty rules: + [ Item j (forward rule) (Cat next:found) | + emptyCategories grammar ?= next ] + rules _ _ = [] + + buInfer j k next = --tracePrt ("buInfer "++show(j,k)++" "++prt next) (prtSep "\n") $ + buPredict j k next <++> buCombine j k next + tdInfer k next = tdPredict k next + + -- the combine rule + buCombine j k next + | j == k = [] -- hack for empty rules + | otherwise = [ Item i (forward rule) (next:found) | + Item i rule found <- (finalChart ! j) ? Active next ] + + -- kilbury bottom-up prediction + buPredict j k next + = [ Item j rule [next] | isPredictBU, + rule <- map forward $ --tracePrt ("buRules "++prt next) (prtSep "\n") $ + bottomupRules grammar ? next, + buFilter rule k, + tdFilter rule j k ] + + -- top-down prediction + tdPredict k cat + = [ Item k rule [] | isPredictTD || isFilterTD, + rule <- topdownRules grammar ? cat, + buFilter rule k ] <++> + -- hack for empty rules: + [ Item k rule [] | isPredictBU, + rule <- emptyLeftcornerRules grammar ? cat ] + + -- bottom up filtering: input symbol k can begin the given symbol list (first set) + -- leftcornerTokens DOESN'T WORK WITH EMPTY RULES!!! + buFilter (Rule _ (Cat cat:_) _) k | isFilterBU + = k < snd (inputBounds input) && + hasCommonElements (leftcornerTokens grammar ? cat) + (aElems (inputFrom input ! k)) + buFilter _ _ = True + + -- top down filtering: 'cat' is reachable by an active edge ending in node j < k + tdFilter (Rule cat _ _) j k | isFilterTD && j < k + = (tdFilters ! j) ?= cat + tdFilter _ _ _ = True + + tdFilters = listArray (inputBounds input) $ + map (listSet . limit leftCats . activeCats) [0..] + activeCats j = [ next | Active (Cat next) <- aElems (finalChart ! j) ] + leftCats cat = [ left | Rule _cat (Cat left:_) _ <- topdownRules grammar ? cat ] + + +-- type declarations, items & keys +data Item n c t = Item Int (Rule n c t) [Symbol c t] + deriving (Eq, Ord, Show) + +data IKey c t = Active (Symbol c t) | Passive + deriving (Eq, Ord, Show) + +keyof :: Item n c t -> IKey c t +keyof (Item _ (Rule _ (next:_) _) _) = Active next +keyof (Item _ (Rule _ [] _) _) = Passive + +forward :: Rule n c t -> Rule n c t +forward (Rule cat (_:rest) name) = Rule cat rest name + + +instance (Print n, Print c, Print t) => Print (Item n c t) where + prt (Item k (Rule cat rhs name) syms) + = "<" ++show k++ ": "++prt name++". "++ + prt cat++" -> "++prt rhs++" / "++prt syms++">" + +instance (Print c, Print t) => Print (IKey c t) where + prt (Active sym) = "?" ++ prt sym + prt (Passive) = "!" + + diff --git a/src/GF/OldParsing/ParseGFC.hs b/src/GF/OldParsing/ParseGFC.hs new file mode 100644 index 000000000..ebd4dc782 --- /dev/null +++ b/src/GF/OldParsing/ParseGFC.hs @@ -0,0 +1,177 @@ +---------------------------------------------------------------------- +-- | +-- Module : ParseGFC +-- Maintainer : PL +-- Stability : (stable) +-- Portability : (portable) +-- +-- > CVS $Date: 2005/04/11 13:52:54 $ +-- > CVS $Author: peb $ +-- > CVS $Revision: 1.1 $ +-- +-- The main parsing module, parsing GFC grammars +-- by translating to simpler formats, such as PMCFG and CFG +---------------------------------------------------------------------- + +module GF.OldParsing.ParseGFC (newParser) where + +import GF.System.Tracing +import GF.Printing.PrintParser +import qualified PrGrammar + +-- Haskell modules +import Monad +-- import Ratio ((%)) +-- GF modules +import qualified Grammar as GF +import Values +import qualified Macros +import qualified Modules as Mods +import qualified AbsGFC +import qualified Ident +import qualified ShellState as SS +import Operations +import GF.Data.SortedList +-- Conversion and parser modules +import GF.Data.Assoc +import GF.OldParsing.Utilities +-- import ConvertGrammar +import GF.OldParsing.GrammarTypes +import qualified GF.OldParsing.MCFGrammar as M +import qualified GF.OldParsing.CFGrammar as C +import qualified GF.OldParsing.ParseMCFG as PM +import qualified GF.OldParsing.ParseCFG as PC +--import MCFRange + +newParser :: String -> SS.StateGrammar -> GF.Cat -> String -> Err [GF.Term] + +-- parsing via MCFG +newParser (m:strategy) gr (_, startCat) inString + | m=='m' || m=='M' = trace2 "Parser" "MCFG" $ Ok terms + where terms = map (ptree2term abstract) trees + trees = --tracePrt "trees" (prtBefore "\n") $ + tracePrt "#trees" (prt . length) $ + concatMap forest2trees forests + forests = --tracePrt "forests" (prtBefore "\n") $ + tracePrt "#forests" (prt . length) $ + concatMap (chart2forests chart isMeta) finalEdges + isMeta = null . snd + finalEdges = tracePrt "finalEdges" (prtBefore "\n") $ + filter isFinalEdge $ aElems chart +-- nubsort [ (cat, [(lbl, E.makeRange [(i,j)])]) | +-- let (i, j) = inputBounds inTokens, +-- E.Rule cat _ [E.Lin lbl _] _ <- pInf, +-- isStartCat cat ] + isFinalEdge (cat, rows) + = isStartCat cat && + inputBounds inTokens `elem` concat [ rho | (_, M.Rng rho) <- rows ] + chart = --tracePrt "chart" (prtBefore "\n" . aAssocs) $ + tracePrt "#chart" (prt . map (length.snd) . aAssocs) $ + PM.parse strategy pInf starters inTokens + inTokens = input $ map AbsGFC.KS $ words inString + pInf = -- tracePrt "avg rec" (\gr -> show (sum [ length rec | E.Rule _ _ rec _ <- gr ] % length gr)) $ + mcfPInfo $ SS.statePInfoOld gr + starters = tracePrt "startCats" prt $ + filter isStartCat $ nubsort [ cat | M.Rule cat _ _ _ <- pInf ] + isStartCat (MCFCat cat _) = cat == startCat + abstract = tracePrt "abstract module" PrGrammar.prt $ + SS.absId gr + +-- parsing via CFG +newParser (c:strategy) gr (_, startCat) inString + | c=='c' || c=='C' = trace2 "Parser" "CFG" $ Ok terms + where terms = -- tracePrt "terms" (unlines . map PrGrammar.prt) $ + map (ptree2term abstract) trees + trees = tracePrt "#trees" (prt . length) $ + --tracePrt "trees" (prtSep "\n") $ + concatMap forest2trees forests + forests = tracePrt "$cfForests" (prt) $ -- . length) $ + tracePrt "forests" (unlines . map prt) $ + concatMap convertFromCFForest cfForests + cfForests= tracePrt "cfForests" (unlines . map prt) $ + concatMap (chart2forests chart (const False)) finalEdges + finalEdges = tracePrt "finalChartEdges" prt $ + map (uncurry Edge (inputBounds inTokens)) starters + chart = --tracePrt "finalChartEdges" (prt . (? finalEdge)) $ + tracePrt "#chart" (prt . map (length.snd) . aAssocs) $ + C.edges2chart inTokens edges + edges = --tracePrt "finalEdges" + --(prt . filter (\(Edge i j _) -> (i,j)==inputBounds inTokens)) $ + tracePrt "#edges" (prt . length) $ + PC.parse strategy pInf starters inTokens + inTokens = input $ map AbsGFC.KS $ words inString + pInf = cfPInfo $ SS.statePInfoOld gr + starters = tracePrt "startCats" prt $ + filter isStartCat $ map fst $ aAssocs $ C.topdownRules pInf + isStartCat (CFCat (MCFCat cat _) _) = cat == startCat + abstract = tracePrt "abstract module" PrGrammar.prt $ + SS.absId gr + --ifNull (Ident.identC "ABS") last $ + --[i | (i, Mods.ModMod m) <- Mods.modules (SS.grammar gr), Mods.isModAbs m] + +newParser "" gr start inString = newParser "c" gr start inString + +newParser opt gr (_,cat) _ = + Bad ("new-parser '" ++ opt ++ "' not defined yet") + +ptree2term :: Ident.Ident -> ParseTree Name -> GF.Term +ptree2term a (TNode f ts) = Macros.mkApp (Macros.qq (a,f)) (map (ptree2term a) ts) +ptree2term a (TMeta) = GF.Meta (GF.MetaSymb 0) + +---------------------------------------------------------------------- +-- conversion and unification of forests + +convertFromCFForest :: ParseForest CFName -> [ParseForest Name] +convertFromCFForest (FNode (CFName name profile) children) + | isCoercion name = concat chForests + | otherwise = [ FNode name chForests | not (null chForests) ] + where chForests = concat [ mapM (checkProfile forests) profile | + forests0 <- children, + forests <- mapM convertFromCFForest forests0 ] + checkProfile forests = unifyManyForests . map (forests !!) + -- foldM unifyForests FMeta . map (forests !!) + +isCoercion Ident.IW = True +isCoercion _ = False + +unifyManyForests :: Eq n => [ParseForest n] -> [ParseForest n] +unifyManyForests [] = [FMeta] +unifyManyForests [f] = [f] +unifyManyForests (f:g:fs) = do h <- unifyForests f g + unifyManyForests (h:fs) + +unifyForests :: Eq n => ParseForest n -> ParseForest n -> [ParseForest n] +unifyForests FMeta forest = [forest] +unifyForests forest FMeta = [forest] +unifyForests (FNode name1 children1) (FNode name2 children2) + = [ FNode name1 children | name1 == name2, not (null children) ] + where children = [ forests | forests1 <- children1, forests2 <- children2, + forests <- zipWithM unifyForests forests1 forests2 ] + + + +{- +---------------------------------------------------------------------- +-- conversion and unification for parse trees instead of forests + +convertFromCFTree :: ParseTree CFName -> [ParseTree Name] +convertFromCFTree (TNode (CFName name profile) children0) + = [ TNode name children | + children1 <- mapM convertFromCFTree children0, + children <- mapM (checkProfile children1) profile ] + where checkProfile trees = unifyManyTrees . map (trees !!) + +unifyManyTrees :: Eq n => [ParseTree n] -> [ParseTree n] +unifyManyTrees [] = [TMeta] +unifyManyTrees [f] = [f] +unifyManyTrees (f:g:fs) = do h <- unifyTrees f g + unifyManyTrees (h:fs) + +unifyTrees TMeta tree = [tree] +unifyTrees tree TMeta = [tree] +unifyTrees (TNode name1 children1) (TNode name2 children2) + = [ TNode name1 children | name1 == name2, + children <- zipWithM unifyTrees children1 children2 ] + +-} + diff --git a/src/GF/OldParsing/ParseMCFG.hs b/src/GF/OldParsing/ParseMCFG.hs new file mode 100644 index 000000000..ad29e5f2f --- /dev/null +++ b/src/GF/OldParsing/ParseMCFG.hs @@ -0,0 +1,37 @@ +---------------------------------------------------------------------- +-- | +-- Module : ParseMCFG +-- Maintainer : PL +-- Stability : (stable) +-- Portability : (portable) +-- +-- > CVS $Date: 2005/04/11 13:52:54 $ +-- > CVS $Author: peb $ +-- > CVS $Revision: 1.1 $ +-- +-- Main module for MCFG parsing +----------------------------------------------------------------------------- + + +module GF.OldParsing.ParseMCFG (parse) where + +import Char (toLower) +import GF.OldParsing.Utilities +import GF.OldParsing.MCFGrammar +import qualified GF.OldParsing.ParseMCFG.Basic as PBas +import GF.Printing.PrintParser +---- import qualified MCFParserBasic2 as PBas2 -- file not found AR + + +parse :: (Ord n, Ord c, Ord l, Ord t, + Print n, Print c, Print l, Print t) => + String -> MCFParser n c l t +parse str = decodeParser (map toLower str) + +decodeParser "b" = PBas.parse +---- decodeParser "c" = PBas2.parse +decodeParser _ = decodeParser "b" + + + + diff --git a/src/GF/OldParsing/ParseMCFG/Basic.hs b/src/GF/OldParsing/ParseMCFG/Basic.hs new file mode 100644 index 000000000..7b0d01dde --- /dev/null +++ b/src/GF/OldParsing/ParseMCFG/Basic.hs @@ -0,0 +1,156 @@ +---------------------------------------------------------------------- +-- | +-- Module : ParseMCFG.Basic +-- Maintainer : Peter Ljunglöf +-- Stability : (stable) +-- Portability : (portable) +-- +-- > CVS $Date: 2005/04/11 13:52:57 $ +-- > CVS $Author: peb $ +-- > CVS $Revision: 1.1 $ +-- +-- Simplest possible implementation of MCFG chart parsing +----------------------------------------------------------------------------- + +module GF.OldParsing.ParseMCFG.Basic + (parse) where + +import GF.System.Tracing + +import Ix +import GF.OldParsing.Utilities +import GF.OldParsing.MCFGrammar +import GF.OldParsing.GeneralChart +import GF.Data.Assoc +import GF.Data.SortedList +import GF.Printing.PrintParser + + +parse :: (Ord n, Ord c, Ord l, Ord t, + Print n, Print c, Print l, Print t) => + MCFParser n c l t +parse grammar start = edges2chart . extract . process grammar + + +extract :: [Item n c l t] -> [(n, MEdge c l, [MEdge c l])] +extract items = tracePrt "#passives" (prt.length) $ + --trace2 "passives" (prtAfter "\n" [ i | i@(PItem _) <- items ]) $ + [ item | PItem item <- items ] + + +process :: (Ord n, Ord c, Ord l, Ord t, + Print n, Print c, Print l, Print t) => + Grammar n c l t -> Input t -> [Item n c l t] +process grammar input = buildChart keyof rules axioms + where axioms = initial + rules = [combine, scan, predict] + + -- axioms + initial = traceItems "axiom" [] $ + [ nextLin name tofind (addNull cat) (map addNull args) | + Rule cat args tofind name <- grammar ] + + addNull a = (a, []) + + -- predict + predict chart i1@(Item name tofind rho (Lin lbl []) (cat, found0) children) + = traceItems "predict" [i1] + [ nextLin name tofind (cat, found) children | + let found = insertRow lbl rho found0 ] + predict _ _ = [] + + -- combine + combine chart active@(Item _ _ _ (Lin _ (Cat(cat,_,_):_)) _ _) + = do passive <- chartLookup chart (Passive cat) + combineItems active passive + combine chart passive@(PItem (_, (cat, _), _)) + = do active <- chartLookup chart (Active cat) + combineItems active passive + combine _ _ = [] + + combineItems i1@(Item name tofind rho0 (Lin lbl (Cat(_,lbl',nr):rest)) found children0) + i2@(PItem (_, found', _)) + = traceItems "combine" [i1,i2] + [ Item name tofind rho (Lin lbl rest) found children | + rho1 <- lookupLbl lbl' found', + let rho = concatRange rho0 rho1, + children <- updateChild nr children0 (snd found') ] + + -- scan + scan chart i1@(Item name tofind rho0 (Lin lbl (Tok tok:rest)) found children) + = traceItems "scan" [i1] + [ Item name tofind rho (Lin lbl rest) found children | + let rho = concatRange rho0 (rangeOfToken tok) ] + scan _ _ = [] + + -- utilities + rangeOfToken tok = makeRange $ inputToken input ? tok + + zeroRange = makeRange $ map (\i -> (i,i)) $ range $ inputBounds input + + nextLin name [] found children = PItem (name, found, children) + nextLin name (lin : tofind) found children + = Item name tofind zeroRange lin found children + +lookupLbl a = map snd . filter (\b -> a == fst b) . snd +updateChild nr children found = updateIndex nr children $ + \child -> if null (snd child) + then [ (fst child, found) ] + else [ child | snd child == found ] + +insertRow lbl rho [] = [(lbl, rho)] +insertRow lbl rho rows'@(row@(lbl', rho') : rows) + = case compare lbl lbl' of + LT -> row : insertRow lbl rho rows + GT -> (lbl, rho) : rows' + EQ -> (lbl, unionRange rho rho') : rows + + +-- internal representation of parse items + +data Item n c l t + = Item n [Lin c l t] -- tofind + Range (Lin c l t) -- current row + (MEdge c l) -- found rows + [MEdge c l] -- found children + | PItem (n, MEdge c l, [MEdge c l]) + deriving (Eq, Ord, Show) + +data IKey c = Passive c | Active c | AnyItem + deriving (Eq, Ord, Show) + +keyof (PItem (_, (cat, _), _)) = Passive cat +keyof (Item _ _ _ (Lin _ (Cat(cat,_,_):_)) _ _) = Active cat +keyof _ = AnyItem + + +-- tracing + +--type TraceItem = Item String String Char String +traceItems :: (Print n, Print l, Print c, Print t) => + String -> [Item n c l t] -> [Item n c l t] -> [Item n c l t] +traceItems rule trigs items + | null items || True = items + | otherwise = trace ("\n" ++ rule ++ ":" ++ + unlines [ "\t" ++ prt i | i <- trigs ] ++ "=>" ++ + unlines [ "\t" ++ prt i | i <- items ]) items + +-- pretty-printing + +instance (Print n, Print c, Print l, Print t) => Print (Item n c l t) where + prt (Item name tofind rho lin (cat, found) children) + = prt name ++ ". " ++ prt cat ++ prtRhs (map fst children) ++ + " { " ++ prt rho ++ prt lin ++ " ; " ++ + concat [ prt lbl ++ "=" ++ prt ln ++ " " | + Lin lbl ln <- tofind ] ++ "; " ++ + concat [ prt lbl ++ "=" ++ prt rho ++ " " | + (lbl, rho) <- found ] ++ "} " ++ + concat [ "[ " ++ concat [ prt lbl ++ "=" ++ prt rho ++ " " | + (lbl,rho) <- child ] ++ "] " | + child <- map snd children ] + prt (PItem (name, edge, edges)) + = prt name ++ ". " ++ prt edge ++ prtRhs edges + +prtRhs [] = "" +prtRhs rhs = " -> " ++ prtSep " " rhs + diff --git a/src/GF/OldParsing/SimpleGFC.hs b/src/GF/OldParsing/SimpleGFC.hs new file mode 100644 index 000000000..456c44685 --- /dev/null +++ b/src/GF/OldParsing/SimpleGFC.hs @@ -0,0 +1,161 @@ +---------------------------------------------------------------------- +-- | +-- Maintainer : PL +-- Stability : (stable) +-- Portability : (portable) +-- +-- > CVS $Date: 2005/04/11 13:52:54 $ +-- > CVS $Author: peb $ +-- > CVS $Revision: 1.1 $ +-- +-- Simplistic GFC format +----------------------------------------------------------------------------- + +module GF.OldParsing.SimpleGFC where + +import qualified AbsGFC +import qualified Ident + +import GF.Printing.PrintParser +import GF.Printing.PrintSimplifiedTerm + +import Operations (ifNull) + +---------------------------------------------------------------------- + +type Name = Ident.Ident +type Cat = Ident.Ident +type Constr = AbsGFC.CIdent +type Var = Ident.Ident +type Token = AbsGFC.Tokn +type Label = AbsGFC.Label + +constr2name :: Constr -> Name +constr2name (AbsGFC.CIQ _ name) = name + +---------------------------------------------------------------------- + +type Grammar = [Rule] +data Rule = Rule Name Typing (Maybe (Term, CType)) + deriving (Eq, Ord, Show) + +type Typing = (Type, [Decl]) + +data Decl = Var ::: Type + deriving (Eq, Ord, Show) +data Type = Cat :@ [Atom] + deriving (Eq, Ord, Show) +data Atom = ACon Constr + | AVar Var + deriving (Eq, Ord, Show) + +data CType = RecT [(Label, CType)] + | TblT CType CType + | ConT Constr [Term] + | StrT + deriving (Eq, Ord, Show) + + +data Term = Arg Int Cat Path -- ^ argument variable, the 'Path' is a path + -- pointing into the term + | Constr :^ [Term] -- ^ constructor + | Rec [(Label, Term)] -- ^ record + | Term :. Label -- ^ record projection + | Tbl [(Term, Term)] -- ^ table of patterns\/terms + | Term :! Term -- ^ table selection + | Variants [Term] -- ^ variants + | Term :++ Term -- ^ concatenation + | Token Token -- ^ single token + | Empty -- ^ empty string + | Wildcard -- ^ wildcard pattern variable + | Var Var -- ^ bound pattern variable + + -- Res CIdent -- resource identifier + -- Int Integer -- integer + deriving (Eq, Ord, Show) + + +---------------------------------------------------------------------- + +(+.) :: Term -> Label -> Term +Variants terms +. lbl = Variants $ map (+. lbl) terms +Rec record +. lbl = maybe err id $ lookup lbl record + where err = error $ "(+.), label not in record: " ++ show (Rec record) ++ " +. " ++ show lbl +Arg arg cat path +. lbl = Arg arg cat (path ++. lbl) +term +. lbl = term :. lbl + +(+!) :: Term -> Term -> Term +Variants terms +! pat = Variants $ map (+! pat) terms +term +! Variants pats = Variants $ map (term +!) pats +Tbl table +! pat = maybe err id $ lookup pat table + where err = error $ "(+!), pattern not in table: " ++ show (Tbl table) ++ " +! " ++ show pat +Arg arg cat path +! pat = Arg arg cat (path ++! pat) +term +! pat = term :! pat + +(?++) :: Term -> Term -> Term +Variants terms ?++ term = Variants $ map (?++ term) terms +term ?++ Variants terms = Variants $ map (term ?++) terms +Empty ?++ term = term +term ?++ Empty = term +term1 ?++ term2 = term1 :++ term2 + +---------------------------------------------------------------------- + +newtype Path = Path [Either Label Term] deriving (Eq, Ord, Show) + +emptyPath :: Path +emptyPath = Path [] + +(++.) :: Path -> Label -> Path +Path path ++. lbl = Path (Left lbl : path) + +(++!) :: Path -> Term -> Path +Path path ++! sel = Path (Right sel : path) + +---------------------------------------------------------------------- + +instance Print Rule where + prt (Rule name (typ, args) term) + = prt name ++ " : " ++ + prtAfter " " args ++ + (if null args then "" else "-> ") ++ + prt typ ++ + maybe "" (\(t,c) -> " := " ++ prt t ++ " : " ++ prt c) term ++ + "\n" + prtList = concatMap prt + +instance Print Decl where + prt (var ::: typ) = "(" ++ prt var ++ ":" ++ prt typ ++ ")" + +instance Print Type where + prt (cat :@ ats) = prt cat ++ prtList ats + +instance Print Atom where + prt (ACon con) = prt con + prt (AVar var) = "?" ++ prt var + +instance Print CType where + prt (RecT rec) = "{" ++ concat [ prt l ++ ":" ++ prt t ++ "; " | (l,t) <- rec ] ++ "}" + prt (TblT t1 t2) = "(" ++ prt t1 ++ " => " ++ prt t2 ++ ")" + prt (ConT t ts) = prt t ++ "(|" ++ prtSep "|" ts ++ "|)" + prt (StrT) = "Str" + +instance Print Term where + prt (Arg n c p) = prt c ++ "@" ++ prt n ++ prt p + prt (c :^ []) = prt c + prt (c :^ ts) = prt c ++ prtList ts + prt (Rec rec) = "{" ++ concat [ prt l ++ "=" ++ prt t ++ "; " | (l,t) <- rec ] ++ "}" + prt (Tbl tbl) = "[" ++ concat [ prt p ++ "=>" ++ prt t ++ "; " | (p,t) <- tbl ] ++ "}" + prt (Variants ts) = "{| " ++ prtSep " | " ts ++ " |}" + prt (t1 :++ t2) = prt t1 ++ "++" ++ prt t2 + prt (Token t) = prt t + prt (Empty) = "[]" + prt (Wildcard) = "_" + prt (term :. lbl) = prt term ++ "." ++ prt lbl + prt (term :! sel) = prt term ++ " ! " ++ prt sel + prt (Var var) = "?" ++ prt var + +instance Print Path where + prt (Path path) = concatMap prtEither (reverse path) + where prtEither (Left lbl) = "." ++ prt lbl + prtEither (Right patt) = "!" ++ prt patt diff --git a/src/GF/OldParsing/Utilities.hs b/src/GF/OldParsing/Utilities.hs new file mode 100644 index 000000000..22d168973 --- /dev/null +++ b/src/GF/OldParsing/Utilities.hs @@ -0,0 +1,188 @@ +---------------------------------------------------------------------- +-- | +-- Module : Parsing.Utilities +-- Maintainer : PL +-- Stability : (stable) +-- Portability : (portable) +-- +-- > CVS $Date: 2005/04/11 13:52:55 $ +-- > CVS $Author: peb $ +-- > CVS $Revision: 1.1 $ +-- +-- Basic type declarations and functions to be used when parsing +----------------------------------------------------------------------------- + + +module GF.OldParsing.Utilities + ( -- * Symbols + Symbol(..), symbol, mapSymbol, + -- * Edges + Edge(..), + -- * Parser input + Input(..), makeInput, input, inputMany, + -- * charts, parse forests & trees + ParseChart, ParseForest(..), ParseTree(..), + chart2forests, forest2trees + ) where + +-- haskell modules: +import Monad +import Array +-- gf modules: +import GF.Data.SortedList +import GF.Data.Assoc +-- parsing modules: +import GF.Printing.PrintParser + +------------------------------------------------------------ +-- symbols + +data Symbol c t = Cat c | Tok t + deriving (Eq, Ord, Show) + +symbol :: (c -> a) -> (t -> a) -> Symbol c t -> a +mapSymbol :: (c -> d) -> (t -> u) -> Symbol c t -> Symbol d u + +---------- + +symbol fc ft (Cat cat) = fc cat +symbol fc ft (Tok tok) = ft tok + +mapSymbol fc ft = symbol (Cat . fc) (Tok . ft) + + +------------------------------------------------------------ +-- edges + +data Edge s = Edge Int Int s + deriving (Eq, Ord, Show) + +instance Functor Edge where + fmap f (Edge i j s) = Edge i j (f s) + + +------------------------------------------------------------ +-- parser input + +data Input t = MkInput { inputEdges :: [Edge t], + inputBounds :: (Int, Int), + inputFrom :: Array Int (Assoc t [Int]), + inputTo :: Array Int (Assoc t [Int]), + inputToken :: Assoc t [(Int, Int)] + } + +makeInput :: Ord t => [Edge t] -> Input t +input :: Ord t => [t] -> Input t +inputMany :: Ord t => [[t]] -> Input t + +---------- + +makeInput inEdges | null inEdges = input [] + | otherwise = MkInput inEdges inBounds inFrom inTo inToken + where inBounds = foldr1 minmax [ (i, j) | Edge i j _ <- inEdges ] + where minmax (a, b) (a', b') = (min a a', max b b') + inFrom = fmap (accumAssoc id) $ accumArray (<++>) [] inBounds $ + [ (i, [(tok, j)]) | Edge i j tok <- inEdges ] + inTo = fmap (accumAssoc id) $ accumArray (<++>) [] inBounds + [ (j, [(tok, i)]) | Edge i j tok <- inEdges ] + inToken = accumAssoc id [ (tok, (i, j)) | Edge i j tok <- inEdges ] + +input toks = MkInput inEdges inBounds inFrom inTo inToken + where inEdges = zipWith3 Edge [0..] [1..] toks + inBounds = (0, length toks) + inFrom = listArray inBounds $ + [ listAssoc [(tok, [j])] | (tok, j) <- zip toks [1..] ] ++ [ listAssoc [] ] + inTo = listArray inBounds $ + [ listAssoc [] ] ++ [ listAssoc [(tok, [i])] | (tok, i) <- zip toks [0..] ] + inToken = accumAssoc id [ (tok, (i, j)) | Edge i j tok <- inEdges ] + +inputMany toks = MkInput inEdges inBounds inFrom inTo inToken + where inEdges = [ Edge i j t | (i, j, ts) <- zip3 [0..] [1..] toks, t <- ts ] + inBounds = (0, length toks) + inFrom = listArray inBounds $ + [ listAssoc [ (t, [j]) | t <- nubsort ts ] | (ts, j) <- zip toks [1..] ] + ++ [ listAssoc [] ] + inTo = listArray inBounds $ + [ listAssoc [] ] ++ + [ listAssoc [ (t, [i]) | t <- nubsort ts ] | (ts, i) <- zip toks [0..] ] + inToken = accumAssoc id [ (tok, (i, j)) | Edge i j tok <- inEdges ] + + +------------------------------------------------------------ +-- charts, parse forests & trees + +type ParseChart n e = Assoc e [(n, [[e]])] + +data ParseForest n = FNode n [[ParseForest n]] | FMeta + deriving (Eq, Ord, Show) + +data ParseTree n = TNode n [ParseTree n] | TMeta + deriving (Eq, Ord, Show) + +chart2forests :: Ord e => ParseChart n e -> (e -> Bool) -> e -> [ParseForest n] + +--filterCoercions :: (n -> Bool) -> ParseForest n -> [ParseForest n] + +forest2trees :: ParseForest n -> [ParseTree n] + +instance Functor ParseTree where + fmap f (TNode n trees) = TNode (f n) $ map (fmap f) trees + fmap f (TMeta) = TMeta + +instance Functor ParseForest where + fmap f (FNode n forests) = FNode (f n) $ map (map (fmap f)) forests + fmap f (FMeta) = FMeta + +---------- + +chart2forests chart isMeta = edge2forests + where item2forest (name, children) = FNode name $ + do edges <- children + mapM edge2forests edges + edge2forests edge + | isMeta edge = [FMeta] + | otherwise = filter checkForest $ map item2forest $ chart ? edge + checkForest (FNode _ children) = not (null children) + +-- filterCoercions _ (FMeta) = [FMeta] +-- filterCoercions isCoercion (FNode s forests) +-- | isCoercion s = do [forest] <- forests ; filterCoercions isCoercion forest +-- | otherwise = FNode s $ do children <- forests ; mapM (filterCoercions isCoercion) + +forest2trees (FNode s forests) = map (TNode s) $ forests >>= mapM forest2trees +forest2trees (FMeta) = [TMeta] + + + +------------------------------------------------------------ +-- pretty-printing + +instance (Print c, Print t) => Print (Symbol c t) where + prt = symbol prt (simpleShow.prt) + prtList = prtSep " " + +simpleShow :: String -> String +simpleShow s = "\"" ++ concatMap mkEsc s ++ "\"" + where + mkEsc :: Char -> String + mkEsc c = case c of + _ | elem c "\\\"" -> '\\' : [c] + '\n' -> "\\n" + '\t' -> "\\t" + _ -> [c] + +instance (Print s) => Print (Edge s) where + prt (Edge i j s) = "[" ++ show i ++ "-" ++ show j ++ ": " ++ prt s ++ "]" + prtList = prtSep "" + +instance (Print s) => Print (ParseTree s) where + prt (TNode s trees) = prt s ++ "^{" ++ prtSep " " trees ++ "}" + prt (TMeta) = "?" + prtList = prtAfter "\n" + +instance (Print s) => Print (ParseForest s) where + prt (FNode s forests) = prt s ++ "^{" ++ prtSep " | " (map (prtSep " ") forests) ++ "}" + prt (FMeta) = "?" + prtList = prtAfter "\n" + + diff --git a/src/GF/Parsing/CFG.hs b/src/GF/Parsing/CFG.hs new file mode 100644 index 000000000..6af1de8ac --- /dev/null +++ b/src/GF/Parsing/CFG.hs @@ -0,0 +1,44 @@ +---------------------------------------------------------------------- +-- | +-- Maintainer : PL +-- Stability : (stable) +-- Portability : (portable) +-- +-- > CVS $Date: 2005/04/11 13:52:51 $ +-- > CVS $Author: peb $ +-- > CVS $Revision: 1.1 $ +-- +-- CFG parsing +----------------------------------------------------------------------------- + +module GF.NewParsing.CFG + (parseCF, module GF.NewParsing.CFG.PInfo) where + +import GF.Formalism.Utilities +import GF.Formalism.CFG +import GF.NewParsing.CFG.PInfo + +import qualified GF.NewParsing.CFG.Incremental as Inc +import qualified GF.NewParsing.CFG.General as Gen + +---------------------------------------------------------------------- +-- parsing + +--parseCF :: (Ord n, Ord c, Ord t) => String -> CFParser c n t +parseCF "gb" = Gen.parse bottomup +parseCF "gt" = Gen.parse topdown +parseCF "ib" = Inc.parse (bottomup, noFilter) +parseCF "it" = Inc.parse (topdown, noFilter) +parseCF "ibFT" = Inc.parse (bottomup, topdown) +parseCF "ibFB" = Inc.parse (bottomup, bottomup) +parseCF "ibFTB" = Inc.parse (bottomup, bothFilters) +parseCF "itF" = Inc.parse (topdown, bottomup) +-- default parser: +parseCF _ = parseCF "gb" + +bottomup = (True, False) +topdown = (False, True) +noFilter = (False, False) +bothFilters = (True, True) + + diff --git a/src/GF/Parsing/CFG/General.hs b/src/GF/Parsing/CFG/General.hs new file mode 100644 index 000000000..ea67ec94f --- /dev/null +++ b/src/GF/Parsing/CFG/General.hs @@ -0,0 +1,101 @@ +---------------------------------------------------------------------- +-- | +-- Maintainer : PL +-- Stability : (stable) +-- Portability : (portable) +-- +-- > CVS $Date: 2005/04/11 13:52:51 $ +-- > CVS $Author: peb $ +-- > CVS $Revision: 1.1 $ +-- +-- CFG parsing with a general chart +----------------------------------------------------------------------------- + +module GF.NewParsing.CFG.General + (parse, Strategy) where + +import GF.System.Tracing +import GF.Infra.Print + +import GF.Formalism.Utilities +import GF.Formalism.CFG +import GF.NewParsing.CFG.PInfo +import GF.NewParsing.GeneralChart +import GF.Data.Assoc +import Monad + +--parse :: (Ord n, Ord c, Ord t) => Strategy -> CFParser c n t +parse strategy grammar start = extract . + tracePrt "#internal chart" (prt . length . chartList) . + process strategy grammar start + +type Strategy = (Bool, Bool) -- ^ (isBottomup, isTopdown) + +extract :: (Ord n, Ord c, Ord t) => + IChart n (Symbol c t) -> CFChart c n t +extract chart = [ CFRule (Edge j k cat) daughters name | + Edge j k (Cat cat, found, [], Just name) <- chartList chart, + daughters <- path j k (reverse found) ] + where path i k [] = [ [] | i==k ] + path i k (Tok tok : found) + = [ Tok tok : daughters | + daughters <- path (i+1) k found ] + path i k (Cat cat : found) + = [ Cat (Edge i j cat) : daughters | + Edge _i j _cat <- chartLookup chart (Passive (Cat cat) i), + daughters <- path j k found ] + + +process :: (Ord n, Ord c, Ord t) => + Strategy -- ^ (isBottomup, isTopdown) :: (Bool, Bool) + -> CFPInfo c n t -- ^ parser information (= grammar) + -> [c] -- ^ list of starting categories + -> Input t -- ^ input string + -> IChart n (Symbol c t) +process (isBottomup, isTopdown) grammar start + = trace2 "CFParserGeneral" ((if isBottomup then " BU" else "") ++ + (if isTopdown then " TD" else "")) $ + buildChart keyof [predict, combine] . axioms + where axioms input = initial ++ scan input + + scan input = map (fmap mkEdge) (inputEdges input) + mkEdge tok = (Tok tok, [], [], Nothing) + + -- the combine rule + combine chart (Edge j k (next, _, [], _)) + = [ edge `forwardTo` k | edge <- chartLookup chart (Active next j) ] + combine chart edge@(Edge _ j (_, _, next:_, _)) + = [ edge `forwardTo` k | Edge _ k _ <- chartLookup chart (Passive next j) ] + + -- initial predictions + initial = [ loopingEdge 0 rule | cat <- start, rule <- tdRuleLookup ? cat ] + + -- predictions + predict chart (Edge j k (next, _, [], _)) | isBottomup + = [ loopingEdge j rule `forwardTo` k | rule <- bottomupRules grammar ? next ] + -- - - - - - - - - - ^^^^^^^^^^^^^ Kilbury prediction: move dot forward + predict chart (Edge _ k (_, _, Cat cat:_, _)) + = [ loopingEdge k rule | rule <- tdRuleLookup ? cat ] + predict _ _ = [] + + tdRuleLookup | isTopdown = topdownRules grammar + | isBottomup = emptyLeftcornerRules grammar + +-- internal representation of parse items + +type Item n s = Edge (s, [s], [s], Maybe n) +type IChart n s = ParseChart (Item n s) (IKey s) +data IKey s = Active s Int + | Passive s Int + deriving (Eq, Ord, Show) + +keyof (Edge _ j (_, _, next:_, _)) = Active next j +keyof (Edge j _ (cat, _, [], _)) = Passive cat j + +forwardTo (Edge i j (cat, found, next:tofind, name)) k + = Edge i k (cat, next:found, tofind, name) + +loopingEdge k (CFRule cat tofind name) = Edge k k (Cat cat, [], tofind, Just name) + + + diff --git a/src/GF/Parsing/CFG/Incremental.hs b/src/GF/Parsing/CFG/Incremental.hs new file mode 100644 index 000000000..af0f79bf0 --- /dev/null +++ b/src/GF/Parsing/CFG/Incremental.hs @@ -0,0 +1,148 @@ +---------------------------------------------------------------------- +-- | +-- Maintainer : PL +-- Stability : (stable) +-- Portability : (portable) +-- +-- > CVS $Date: 2005/04/11 13:52:51 $ +-- > CVS $Author: peb $ +-- > CVS $Revision: 1.1 $ +-- +-- Incremental chart parsing for CFG +----------------------------------------------------------------------------- + + +module GF.NewParsing.CFG.Incremental + (parse, Strategy) where + +import GF.System.Tracing +import GF.Infra.Print + +import Array + +import Operations +import GF.Data.SortedList +import GF.Data.Assoc +import GF.Formalism.Utilities +import GF.Formalism.CFG +import GF.NewParsing.CFG.PInfo +import GF.NewParsing.IncrementalChart + + +type Strategy = ((Bool, Bool), (Bool, Bool)) -- ^ (predict:(BU, TD), filter:(BU, TD)) + +parse :: (Ord n, Ord c, Ord t) => Strategy -> CFParser c n t +parse strategy grammar start = extract . + tracePrt "#internal chart" (prt . length . flip chartList const) . + process strategy grammar start + +extract :: (Ord n, Ord c, Ord t) => + IChart c n t -> CFChart c n t +extract finalChart = [ CFRule (Edge j k cat) daughters name | + (k, Item j (CFRule cat [] name) found) <- chartList finalChart (,), + daughters <- path j k (reverse found) ] + where path i k [] = [ [] | i==k ] + path i k (Tok tok : found) + = [ Tok tok : daughters | + daughters <- path (i+1) k found ] + path i k (Cat cat : found) + = [ Cat (Edge i j cat) : daughters | + Item j _ _ <- chartLookup finalChart i (Passive cat), + daughters <- path j k found ] + +process :: (Ord n, Ord c, Ord t) => + Strategy -> CFPInfo c n t -> [c] -> Input t -> IChart c n t +process ((isPredictBU, isPredictTD), (isFilterBU, isFilterTD)) grammar start input + = trace2 "CFParserIncremental" ((if isPredictBU then "BU-predict " else "") ++ + (if isPredictTD then "TD-predict " else "") ++ + (if isFilterBU then "BU-filter " else "") ++ + (if isFilterTD then "TD-filter " else "")) $ + finalChart + where finalChart = buildChart keyof rules axioms $ inputBounds input + + axioms 0 = union $ map (tdInfer 0) start + axioms k = union [ buInfer j k (Tok token) | + (token, js) <- aAssocs (inputTo input ! k), j <- js ] + + rules k (Item j (CFRule cat [] _) _) + = buInfer j k (Cat cat) + rules k (Item j rule@(CFRule _ (sym@(Cat next):_) _) found) + = tdInfer k next <++> + -- hack for empty rules: + [ Item j (forward rule) (sym:found) | + emptyCategories grammar ?= next ] + rules _ _ = [] + + buInfer j k next = buPredict j k next <++> buCombine j k next + tdInfer k next = tdPredict k next + + -- the combine rule + buCombine j k next + | j == k = [] -- hack for empty rules, see rules above and tdPredict below + | otherwise = [ Item i (forward rule) (next:found) | + Item i rule found <- (finalChart ! j) ? Active next ] + + -- kilbury bottom-up prediction + buPredict j k next + = [ Item j rule [next] | isPredictBU, + rule <- map forward $ bottomupRules grammar ? next, + buFilter rule k, + tdFilter rule j k ] + + -- top-down prediction + tdPredict k cat + = [ Item k rule [] | isPredictTD || isFilterTD, + rule <- topdownRules grammar ? cat, + buFilter rule k ] <++> + -- hack for empty rules: + [ Item k rule [] | isPredictBU, + rule <- emptyLeftcornerRules grammar ? cat ] + + -- bottom up filtering: input symbol k can begin the given symbol list (first set) + -- leftcornerTokens DOESN'T WORK WITH EMPTY RULES!!! + buFilter (CFRule _ (Cat cat:_) _) k | isFilterBU + = k < snd (inputBounds input) && + hasCommonElements (leftcornerTokens grammar ? cat) + (aElems (inputFrom input ! k)) + buFilter _ _ = True + + -- top down filtering: 'cat' is reachable by an active edge ending in node j < k + tdFilter (CFRule cat _ _) j k | isFilterTD && j < k + = (tdFilters ! j) ?= cat + tdFilter _ _ _ = True + + tdFilters = listArray (inputBounds input) $ + map (listSet . limit leftCats . activeCats) [0..] + activeCats j = [ next | Active (Cat next) <- aElems (finalChart ! j) ] + leftCats cat = [ left | CFRule _cat (Cat left:_) _ <- topdownRules grammar ? cat ] + + +---------------------------------------------------------------------- +-- type declarations, items & keys + +data Item c n t = Item Int (CFRule c n t) [Symbol c t] + deriving (Eq, Ord, Show) + +data IKey c t = Active (Symbol c t) | Passive c + deriving (Eq, Ord, Show) + +type IChart c n t = IncrementalChart (Item c n t) (IKey c t) + +keyof :: Item c n t -> IKey c t +keyof (Item _ (CFRule _ (next:_) _) _) = Active next +keyof (Item _ (CFRule cat [] _) _) = Passive cat + +forward :: CFRule c n t -> CFRule c n t +forward (CFRule cat (_:rest) name) = CFRule cat rest name + +---------------------------------------------------------------------- + +instance (Print n, Print c, Print t) => Print (Item c n t) where + prt (Item k rule syms) + = "<"++show k++ ": "++ prt rule++" / "++prt syms++">" + +instance (Print c, Print t) => Print (IKey c t) where + prt (Active sym) = "?" ++ prt sym + prt (Passive cat) = "!" ++ prt cat + + diff --git a/src/GF/Parsing/CFG/PInfo.hs b/src/GF/Parsing/CFG/PInfo.hs new file mode 100644 index 000000000..eff0767c1 --- /dev/null +++ b/src/GF/Parsing/CFG/PInfo.hs @@ -0,0 +1,95 @@ +--------------------------------------------------------------------- +-- | +-- Maintainer : PL +-- Stability : (stable) +-- Portability : (portable) +-- +-- > CVS $Date: 2005/04/11 13:52:52 $ +-- > CVS $Author: peb $ +-- > CVS $Revision: 1.1 $ +-- +-- CFG parsing, parser information +----------------------------------------------------------------------------- + +module GF.NewParsing.CFG.PInfo where + +import GF.System.Tracing +import GF.Infra.Print + +import GF.Formalism.Utilities +import GF.Formalism.CFG +import GF.Data.SortedList +import GF.Data.Assoc + +---------------------------------------------------------------------- +-- type declarations + +type CFParser c n t = CFPInfo c n t + -> [c] -- ^ possible starting categories + -> Input t -- ^ the input tokens + -> CFChart c n t + +------------------------------------------------------------ +-- parser information + +data CFPInfo c n t + = CFPInfo { grammarTokens :: SList t, + nameRules :: Assoc n (SList (CFRule c n t)), + topdownRules :: Assoc c (SList (CFRule c n t)), + bottomupRules :: Assoc (Symbol c t) (SList (CFRule c n t)), + emptyLeftcornerRules :: Assoc c (SList (CFRule c n t)), + emptyCategories :: Set c, + cyclicCategories :: SList c, + -- ^ ONLY FOR DIRECT CYCLIC RULES!!! + leftcornerTokens :: Assoc c (SList t) + -- ^ DOES NOT WORK WITH EMPTY RULES!!! + } + +--buildCFPInfo :: (Ord n, Ord c, Ord t) => CFGrammar c n t -> CFPInfo c n t + +-- this is not permanent... +buildCFPInfo grammar = traceCalcFirst grammar $ + tracePrt "cf parser info" (prt) $ + pInfo' (filter (not . isCyclic) grammar) + +pInfo' grammar = CFPInfo grToks nmRules tdRules buRules elcRules emptyCats cyclicCats leftToks + where grToks = union [ nubsort [ tok | Tok tok <- rhs ] | + CFRule _ rhs _ <- grammar ] + nmRules = accumAssoc id [ (name, rule) | + rule@(CFRule _ _ name) <- grammar ] + tdRules = accumAssoc id [ (cat, rule) | + rule@(CFRule cat _ _) <- grammar ] + buRules = accumAssoc id [ (next, rule) | + rule@(CFRule _ (next:_) _) <- grammar ] + elcRules = accumAssoc id $ limit lc emptyRules + leftToks = accumAssoc id $ limit lc $ + nubsort [ (cat, token) | + CFRule cat (Tok token:_) _ <- grammar ] + lc (left, res) = nubsort [ (cat, res) | + CFRule cat _ _ <- buRules ? Cat left ] + emptyRules = nubsort [ (cat, rule) | + rule@(CFRule cat [] _) <- grammar ] + emptyCats = listSet $ limitEmpties $ map fst emptyRules + limitEmpties es = if es==es' then es else limitEmpties es' + where es' = nubsort [ cat | CFRule cat rhs _ <- grammar, + all (symbol (\e -> e `elem` es) (const False)) rhs ] + cyclicCats = nubsort [ cat | CFRule cat [Cat cat'] _ <- grammar, cat == cat' ] + +isCyclic (CFRule cat [Cat cat'] _) = cat==cat' +isCyclic _ = False + + +---------------------------------------------------------------------- + +instance (Ord n, Ord c, Ord t) => Print (CFPInfo n c t) where + prt pI = "[ tokens=" ++ sl grammarTokens ++ + "; names=" ++ sla nameRules ++ + "; tdCats=" ++ sla topdownRules ++ + "; buCats=" ++ sla bottomupRules ++ + "; elcCats=" ++ sla emptyLeftcornerRules ++ + "; eCats=" ++ sla emptyCategories ++ + "; cCats=" ++ sl cyclicCategories ++ + "; lctokCats=" ++ sla leftcornerTokens ++ + " ]" + where sla f = show $ length $ aElems $ f pI + sl f = show $ length $ f pI diff --git a/src/GF/Parsing/GFC.hs b/src/GF/Parsing/GFC.hs new file mode 100644 index 000000000..11fdbbe04 --- /dev/null +++ b/src/GF/Parsing/GFC.hs @@ -0,0 +1,187 @@ +---------------------------------------------------------------------- +-- | +-- Maintainer : PL +-- Stability : (stable) +-- Portability : (portable) +-- +-- > CVS $Date: 2005/04/11 13:52:51 $ +-- > CVS $Author: peb $ +-- > CVS $Revision: 1.1 $ +-- +-- The main parsing module, parsing GFC grammars +-- by translating to simpler formats, such as PMCFG and CFG +---------------------------------------------------------------------- + +module GF.NewParsing.GFC + (parse, PInfo(..), buildPInfo) where + +import GF.System.Tracing +import GF.Infra.Print +import qualified PrGrammar + +import Monad + +import qualified Grammar +-- import Values +import qualified Macros +-- import qualified Modules +import qualified AbsGFC +import qualified Ident +import Operations +import CFIdent (CFCat, cfCat2Ident, CFTok, prCFTok) + +import GF.Data.SortedList +import GF.Data.Assoc +import GF.Formalism.Utilities +import GF.Conversion.Types +import GF.Formalism.SimpleGFC +import qualified GF.Formalism.MCFG as M +import qualified GF.Formalism.CFG as C +-- import qualified GF.NewParsing.MCFG as PM +import qualified GF.NewParsing.CFG as PC +--import qualified GF.Conversion.FromGFC as From + +---------------------------------------------------------------------- +-- parsing information + +data PInfo = PInfo { mcfPInfo :: (), -- ^ not implemented yet + cfPInfo :: PC.CFPInfo CCat CName Token } + +buildPInfo :: MGrammar -> CGrammar -> PInfo +buildPInfo mcfg cfg = PInfo { mcfPInfo = (), + cfPInfo = PC.buildCFPInfo cfg } + + +---------------------------------------------------------------------- +-- main parsing function + +parse :: String -- ^ parsing strategy + -> PInfo -- ^ compiled grammars (mcfg and cfg) + -> Ident.Ident -- ^ abstract module name + -> CFCat -- ^ starting category + -> [CFTok] -- ^ input tokens + -> [Grammar.Term] -- ^ resulting GF terms + +-- parsing via CFG +parse (c:strategy) pinfo abs startCat + | c=='c' || c=='C' = map (tree2term abs) . + parseCFG strategy pinfo startCats . + map prCFTok + where startCats = tracePrt "startCats" prt $ + filter isStartCat $ map fst $ aAssocs $ PC.topdownRules $ cfPInfo pinfo + isStartCat (CCat (MCat cat _) _) = cat == cfCat2Ident startCat + +-- default parser +parse strategy pinfo abs start = parse ('c':strategy) pinfo abs start + + +---------------------------------------------------------------------- + +parseCFG :: String -> PInfo -> [CCat] -> [Token] -> [SyntaxTree Name] +parseCFG strategy pInfo startCats inString = trace2 "Parser" "CFG" $ + trees + where trees = tracePrt "#trees" (prt . length) $ + nubsort $ forests >>= forest2trees + -- compactFs >>= forest2trees + + -- compactFs = tracePrt "#compactForests" (prt . length) $ + -- tracePrt "compactForests" (prtBefore "\n") $ + -- compactForests forests + + forests = tracePrt "#forests" (prt . length) $ + cfForests >>= convertFromCFForest + cfForests= tracePrt "#cfForests" (prt . length) $ + chart2forests chart (const False) finalEdges + + finalEdges = tracePrt "finalChartEdges" prt $ + map (uncurry Edge (inputBounds inTokens)) startCats + chart = --tracePrt "finalChartEdges" (prt . (? finalEdge)) $ + tracePrt "#chart" (prt . map (length.snd) . aAssocs) $ + C.grammar2chart cfChart + cfChart = --tracePrt "finalEdges" + --(prt . filter (\(Edge i j _) -> (i,j)==inputBounds inTokens)) $ + tracePrt "#cfChart" (prt . length) $ + PC.parseCF strategy (cfPInfo pInfo) startCats inTokens + + inTokens = input inString + + +{- +-- parsing via MCFG +newParser (m:strategy) gr (_, startCat) inString + | m=='m' || m=='M' = trace2 "Parser" "MCFG" $ Ok terms + where terms = map (tree2term abstract) trees + trees = --tracePrt "trees" (prtBefore "\n") $ + tracePrt "#trees" (prt . length) $ + concatMap forest2trees forests + forests = --tracePrt "forests" (prtBefore "\n") $ + tracePrt "#forests" (prt . length) $ + concatMap (chart2forests chart isMeta) finalEdges + isMeta = null . snd + finalEdges = tracePrt "finalEdges" (prtBefore "\n") $ + filter isFinalEdge $ aElems chart +-- nubsort [ (cat, [(lbl, E.makeRange [(i,j)])]) | +-- let (i, j) = inputBounds inTokens, +-- E.Rule cat _ [E.Lin lbl _] _ <- pInf, +-- isStartCat cat ] + isFinalEdge (cat, rows) + = isStartCat cat && + inputBounds inTokens `elem` concat [ rho | (_, M.Rng rho) <- rows ] + chart = --tracePrt "chart" (prtBefore "\n" . aAssocs) $ + tracePrt "#chart" (prt . map (length.snd) . aAssocs) $ + PM.parse strategy pInf starters inTokens + inTokens = input $ map AbsGFC.KS $ words inString + pInf = -- tracePrt "avg rec" (\gr -> show (sum [ length rec | E.Rule _ _ rec _ <- gr ] % length gr)) $ + mcfPInfo $ SS.statePInfo gr + starters = tracePrt "startCats" prt $ + filter isStartCat $ nubsort [ cat | M.Rule cat _ _ _ <- pInf ] + isStartCat (MCFCat cat _) = cat == startCat + abstract = tracePrt "abstract module" PrGrammar.prt $ + SS.absId gr +-} + + +---------------------------------------------------------------------- +-- parse trees to GF terms + +tree2term :: Ident.Ident -> SyntaxTree Name -> Grammar.Term +tree2term abs (TNode f ts) = Macros.mkApp (Macros.qq (abs,f)) (map (tree2term abs) ts) +tree2term abs (TMeta) = Macros.mkMeta 0 + + +---------------------------------------------------------------------- +-- conversion and unification of forests + +convertFromCFForest :: SyntaxForest CName -> [SyntaxForest Name] + +-- simplest implementation +convertFromCFForest (FNode (CName name profile) children) + | isCoercion name = concat chForests + | otherwise = [ FNode name chForests | not (null chForests) ] + where chForests = concat [ mapM (checkProfile forests) profile | + forests0 <- children, + forests <- mapM convertFromCFForest forests0 ] + +{- +-- more intelligent(?) implementation +convertFromCFForest (FNode (CName name profile) children) + | isCoercion name = concat chForests + | otherwise = [ FNode name chForests | not (null chForests) ] + where chForests = concat [ mapM (checkProfile forests) profile | + forests0 <- children, + forests <- mapM convertFromCFForest forests0 ] +-} + +checkProfile forests = unifyManyForests . map (forests !!) + + +---------------------------------------------------------------------- +-- conversion and unification for parse trees instead of forests + +convertFromCFTree :: SyntaxTree CName -> [SyntaxTree Name] +convertFromCFTree (TNode (CName name profile) children0) + = [ TNode name children | + children1 <- mapM convertFromCFTree children0, + children <- mapM (checkProfile children1) profile ] + where checkProfile trees = unifyManyTrees . map (trees !!) + diff --git a/src/GF/Shell/ShellCommands.hs b/src/GF/Shell/ShellCommands.hs index a46d943c4..9f9743cf1 100644 --- a/src/GF/Shell/ShellCommands.hs +++ b/src/GF/Shell/ShellCommands.hs @@ -5,9 +5,9 @@ -- Stability : (stable) -- Portability : (portable) -- --- > CVS $Date: 2005/04/01 21:24:25 $ --- > CVS $Author: aarne $ --- > CVS $Revision: 1.27 $ +-- > CVS $Date: 2005/04/11 13:53:38 $ +-- > CVS $Author: peb $ +-- > CVS $Revision: 1.28 $ -- -- The datatype of shell commands and the list of their options. ----------------------------------------------------------------------------- @@ -24,6 +24,7 @@ import Operations import Modules import Char (isDigit) +import Monad (mplus) -- shell commands and their options -- moved to separate module and added option check: AR 27/5/2004 @@ -122,6 +123,8 @@ testValidFlag st co f x = case f of "printer" -> case co of CPrintGrammar -> testInc customGrammarPrinter CPrintMultiGrammar -> testInc customMultiGrammarPrinter + CSetFlag -> testInc customGrammarPrinter `mplus` + testInc customMultiGrammarPrinter "lexer" -> testInc customTokenizer "unlexer" -> testInc customUntokenizer "depth" -> testN @@ -151,6 +154,9 @@ testValidFlag st co f x = case f of optionsOfCommand :: Command -> ([String],[String]) optionsOfCommand co = case co of + CSetFlag -> both "utf8 table struct record all multi" + "cat lang lexer parser number depth rawtrees unlexer optimize path conversion printer" + CImport _ -> both "old v s src retain nocf nocheckcirc cflexer noemit o" "abs cnc res path optimize conversion" CRemoveLanguage _ -> none @@ -159,7 +165,7 @@ optionsOfCommand co = case co of CTransformGrammar _ -> flags "printer" CConvertLatex _ -> none CLinearize _ -> both "utf8 table struct record all multi" "lang number unlexer" - CParse -> both "new n ign raw v lines all" "cat lang lexer parser number rawtrees" + CParse -> both "new newer n ign raw v lines all" "cat lang lexer parser number rawtrees" CTranslate _ _ -> opts "cat lexer parser" CGenerateRandom -> flags "cat lang number depth" CGenerateTrees -> both "metas" "depth alts cat lang number" @@ -195,7 +201,6 @@ optionsOfCommand co = case co of _ -> none {- - CSetFlag CSetLocalFlag Language CPrintGlobalOptions CPrintLanguages diff --git a/src/GF/Speech/PrGSL.hs b/src/GF/Speech/PrGSL.hs index 537bce960..d59412ebd 100644 --- a/src/GF/Speech/PrGSL.hs +++ b/src/GF/Speech/PrGSL.hs @@ -5,9 +5,9 @@ -- Stability : (stable) -- Portability : (portable) -- --- > CVS $Date: 2005/03/21 22:40:04 $ +-- > CVS $Date: 2005/04/11 13:53:38 $ -- > CVS $Author: peb $ --- > CVS $Revision: 1.14 $ +-- > CVS $Revision: 1.15 $ -- -- This module prints a CFG as a Nuance GSL 2.0 grammar. -- @@ -19,9 +19,9 @@ module PrGSL (gslPrinter) where import SRG import Ident -import GF.Parsing.CFGrammar -import GF.Parsing.Utilities (Symbol(..)) -import GF.Parsing.GrammarTypes +import GF.OldParsing.CFGrammar +import GF.OldParsing.Utilities (Symbol(..)) +import GF.OldParsing.GrammarTypes import GF.Printing.PrintParser import Option diff --git a/src/GF/Speech/PrJSGF.hs b/src/GF/Speech/PrJSGF.hs index f6dd7d0c3..9562ff5ac 100644 --- a/src/GF/Speech/PrJSGF.hs +++ b/src/GF/Speech/PrJSGF.hs @@ -5,9 +5,9 @@ -- Stability : (stable) -- Portability : (portable) -- --- > CVS $Date: 2005/03/21 22:40:05 $ +-- > CVS $Date: 2005/04/11 13:53:39 $ -- > CVS $Author: peb $ --- > CVS $Revision: 1.8 $ +-- > CVS $Revision: 1.9 $ -- -- This module prints a CFG as a JSGF grammar. -- @@ -21,9 +21,9 @@ module PrJSGF (jsgfPrinter) where import SRG import Ident -import GF.Parsing.CFGrammar -import GF.Parsing.Utilities (Symbol(..)) -import GF.Parsing.GrammarTypes +import GF.OldParsing.CFGrammar +import GF.OldParsing.Utilities (Symbol(..)) +import GF.OldParsing.GrammarTypes import GF.Printing.PrintParser import Option diff --git a/src/GF/Speech/SRG.hs b/src/GF/Speech/SRG.hs index 1e71d983a..9ec684295 100644 --- a/src/GF/Speech/SRG.hs +++ b/src/GF/Speech/SRG.hs @@ -5,9 +5,9 @@ -- Stability : (stable) -- Portability : (portable) -- --- > CVS $Date: 2005/03/21 22:40:06 $ +-- > CVS $Date: 2005/04/11 13:53:39 $ -- > CVS $Author: peb $ --- > CVS $Revision: 1.10 $ +-- > CVS $Revision: 1.11 $ -- -- Representation of, conversion to, and utilities for -- printing of a general Speech Recognition Grammar. @@ -21,9 +21,9 @@ module SRG where import Ident -import GF.Parsing.CFGrammar -import GF.Parsing.Utilities (Symbol(..)) -import GF.Parsing.GrammarTypes +import GF.OldParsing.CFGrammar +import GF.OldParsing.Utilities (Symbol(..)) +import GF.OldParsing.GrammarTypes import GF.Printing.PrintParser import TransformCFG import Option diff --git a/src/GF/Speech/TransformCFG.hs b/src/GF/Speech/TransformCFG.hs index 7c481f5c0..8dd81cb91 100644 --- a/src/GF/Speech/TransformCFG.hs +++ b/src/GF/Speech/TransformCFG.hs @@ -5,9 +5,9 @@ -- Stability : (stable) -- Portability : (portable) -- --- > CVS $Date: 2005/03/21 22:40:06 $ +-- > CVS $Date: 2005/04/11 13:53:39 $ -- > CVS $Author: peb $ --- > CVS $Revision: 1.8 $ +-- > CVS $Revision: 1.9 $ -- -- This module does some useful transformations on CFGs. -- @@ -17,9 +17,9 @@ module TransformCFG (makeNice, CFRule_) where import Ident -import GF.Parsing.CFGrammar -import GF.Parsing.Utilities (Symbol(..)) -import GF.Parsing.GrammarTypes +import GF.OldParsing.CFGrammar +import GF.OldParsing.Utilities (Symbol(..)) +import GF.OldParsing.GrammarTypes import GF.Printing.PrintParser import Data.FiniteMap diff --git a/src/GF/System/Tracing.hs b/src/GF/System/Tracing.hs index b092949e8..179ed986d 100644 --- a/src/GF/System/Tracing.hs +++ b/src/GF/System/Tracing.hs @@ -5,16 +5,17 @@ -- Stability : (stable) -- Portability : (portable) -- --- > CVS $Date: 2005/03/29 11:58:46 $ +-- > CVS $Date: 2005/04/11 13:52:57 $ -- > CVS $Author: peb $ --- > CVS $Revision: 1.1 $ +-- > CVS $Revision: 1.2 $ -- -- Tracing utilities for debugging purposes. -- If the CPP symbol TRACING is set, then the debugging output is shown. ----------------------------------------------------------------------------- -module GF.System.Tracing (trace, trace2, traceDot, traceCall, tracePrt) where +module GF.System.Tracing + (trace, trace2, traceM, traceCall, tracePrt, traceCalcFirst) where import qualified IOExts @@ -26,8 +27,8 @@ trace :: String -> a -> a -- @{fun: out}@ trace2 :: String -> String -> a -> a --- | emit a dot before(?) calculating the value, for displaying progress -traceDot :: a -> a +-- | monadic version of 'trace2' +traceM :: Monad m => String -> String -> m () -- | show when a value is starting to be calculated (with a '+'), -- and when it is finished (with a '-') @@ -37,20 +38,28 @@ traceCall :: String -> String -> (a -> String) -> a -> a -- @{fun: value}@ tracePrt :: String -> (a -> String) -> a -> a +-- | this is equivalent to 'seq' when tracing, but +-- just skips the first argument otherwise +traceCalcFirst :: a -> b -> b + #if TRACING trace str a = IOExts.trace (bold ++ "{" ++ normal ++ str ++ bold ++ "}" ++ normal) a trace2 fun str a = trace (bold ++ fgcol 1 ++ fun ++ ": " ++ normal ++ str) a -traceDot a = IOExts.unsafePerformIO (putStr ".") `seq` a +traceM fun str = trace2 fun str (return ()) traceCall fun start prt val = trace2 ("+" ++ fun) start $ val `seq` trace2 ("-" ++ fun) (prt val) val tracePrt mod prt val = val `seq` trace2 mod (prt val) val +traceCalcFirst = seq + #else trace _ = id trace2 _ _ = id -traceDot = id +traceM _ _ = return () traceCall _ _ _ = id tracePrt _ _ = id +traceCalcFirst _ = id + #endif diff --git a/src/GF/UseGrammar/Custom.hs b/src/GF/UseGrammar/Custom.hs index 727b11950..7e8fe9162 100644 --- a/src/GF/UseGrammar/Custom.hs +++ b/src/GF/UseGrammar/Custom.hs @@ -5,9 +5,9 @@ -- Stability : (stable) -- Portability : (portable) -- --- > CVS $Date: 2005/03/31 15:47:43 $ --- > CVS $Author: aarne $ --- > CVS $Revision: 1.50 $ +-- > CVS $Date: 2005/04/11 13:53:39 $ +-- > CVS $Author: peb $ +-- > CVS $Revision: 1.51 $ -- -- A database for customizable GF shell commands. -- @@ -66,17 +66,24 @@ import GrammarToHaskell -- the cf parsing algorithms import ChartParser -- or some other CF Parser -import qualified GF.Parsing.ParseCF as PCF +import qualified GF.OldParsing.ParseCF as PCFOld --import qualified ParseGFCviaCFG as PGFC --import NewChartParser --import NewerChartParser -- grammar conversions -- peb 19/4-04 -- see also customGrammarPrinter -import qualified GF.Parsing.ConvertGrammar as Cnv +import qualified GF.OldParsing.ConvertGrammar as CnvOld import qualified GF.Printing.PrintParser as Prt -import qualified GF.Data.Assoc as Assoc -import qualified GF.Parsing.ConvertFiniteGFC as Fin +--import qualified GF.Data.Assoc as Assoc +--import qualified GF.OldParsing.ConvertFiniteGFC as Fin +--import qualified GF.OldParsing.ConvertGFCtoSimple as Simp +--import qualified GF.OldParsing.ConvertFiniteSimple as FinSimp +--import qualified GF.OldParsing.ConvertSimpleToMCFG as MCFSimp +--import qualified GF.Conversion.GFCtoSimple as G2S +--import qualified GF.Conversion.SimpleToMCFG as S2M +--import GF.Conversion.FromGFC +import qualified GF.Infra.Print as Prt2 import GFC import qualified MkGFC as MC @@ -230,10 +237,10 @@ customGrammarPrinter = ,(strCI "srg", prSRG . stateCF) ,(strCI "gsl", \s -> let opts = stateOptions s name = cncId s - in gslPrinter name opts $ Cnv.cfg $ statePInfo s) + in gslPrinter name opts $ CnvOld.cfg $ statePInfoOld s) ,(strCI "jsgf", \s -> let opts = stateOptions s name = cncId s - in jsgfPrinter name opts $ Cnv.cfg $ statePInfo s) + in jsgfPrinter name opts $ CnvOld.cfg $ statePInfoOld s) ,(strCI "plbnf", prLBNF True) ,(strCI "lbnf", prLBNF False) ,(strCI "bnf", prBNF False) @@ -250,15 +257,37 @@ customGrammarPrinter = -} -- add your own grammar printers here -- grammar conversions, (peb) - ,(strCI "gfc_show", show . grammar2canon . stateGrammarST) - ,(strCI "mcfg", Prt.prt . Cnv.mcfg . statePInfo) - ,(strCI "cfg", Prt.prt . Cnv.cfg . statePInfo) - ,(strCI "mcfg_show", show . Cnv.mcfg . statePInfo) - ,(strCI "cfg_show", show . Cnv.cfg . statePInfo) +-- ,(strCI "gfc_show", show . grammar2canon . stateGrammarST) + ,(strCI "mcfg-old", Prt.prt . CnvOld.mcfg . statePInfoOld) + ,(strCI "cfg-old", Prt.prt . CnvOld.cfg . statePInfoOld) +-- ,(strCI "mcfg_show", show . CnvOld.mcfg . statePInfoOld) +-- ,(strCI "cfg_show", show . CnvOld.cfg . statePInfoOld) -- hack for printing finiteness of grammar categories: - -- ,(strCI "finiteness", Prt.prtAfter "\n" . Assoc.aAssocs . Cnv.fintypes . statePInfo) - ,(strCI "finite", prCanon . Fin.convertGrammar . stateGrammarST) +-- ,(strCI "finiteness", Prt.prtAfter "\n" . Assoc.aAssocs . CnvOld.fintypes . statePInfoOld) +-- ,(strCI "finite", prCanon . Fin.convertGrammar . stateGrammarST) +-- ,(strCI "simpleMCF", (\sg -> Prt.prt $ MCFSimp.convertGrammar "nondet" $ +-- Simp.convertGrammar (stateGrammarST sg, cncId sg))) +-- ,(strCI "simpleGFC", (\sg -> Prt.prt $ Simp.convertGrammar (stateGrammarST sg, cncId sg))) +-- ,(strCI "finiteSimple", (\sg -> Prt.prt $ FinSimp.convertGrammar $ +-- Simp.convertGrammar (stateGrammarST sg, cncId sg))) --- also include printing via grammar2syntax! +-- ,(strCI "g2s", (\sg -> Prt2.prt $ G2S.convertGrammar (stateGrammarST sg, cncId sg))) +-- ,(strCI "g2s2m", (\sg -> Prt2.prt $ S2M.convertGrammar "nondet" $ +-- G2S.convertGrammar (stateGrammarST sg, cncId sg))) + ,(strCI "mcfg", Prt2.prt . stateMCFG) + ,(strCI "cfg", Prt2.prt . stateCFG) +{- + ,(strCI "simple", Prt2.prt . convertToSimple "" . stateGrammarLang) + ,(strCI "mcfg-nondet", Prt2.prt . convertToMCFG "" "nondet" . stateGrammarLang) + ,(strCI "mcfg-strict", Prt2.prt . convertToMCFG "" "strict" . stateGrammarLang) + ,(strCI "cfg-nondet", Prt2.prt . convertToCFG "" "nondet" . stateGrammarLang) + ,(strCI "cfg-strict", Prt2.prt . convertToCFG "" "strict" . stateGrammarLang) + ,(strCI "fin-simple", Prt2.prt . convertToSimple "fin" . stateGrammarLang) + ,(strCI "fin-mcfg-nondet", Prt2.prt . convertToMCFG "fin" "nondet" . stateGrammarLang) + ,(strCI "fin-mcfg-strict", Prt2.prt . convertToMCFG "fin" "strict" . stateGrammarLang) + ,(strCI "fin-cfg-nondet", Prt2.prt . convertToCFG "fin" "nondet" . stateGrammarLang) + ,(strCI "fin-cfg-strict", Prt2.prt . convertToCFG "fin" "strict" . stateGrammarLang) +-} ] customMultiGrammarPrinter = @@ -344,14 +373,14 @@ customStringCommand = customParser = customData "Parsers, selected by option -parser=x" $ [ - (strCI "chart", PCF.parse "ibn" . stateCF) + (strCI "chart", PCFOld.parse "ibn" . stateCF) ,(strCI "old", chartParser . stateCF) ,(strCI "myparser", myParser) -- add your own parsers here ] -- 31/5-04, peb: - ++ [ (strCI ("chart"++name), PCF.parse descr . stateCF) | - (descr, names) <- PCF.alternatives, name <- names ] + ++ [ (strCI ("chart"++name), PCFOld.parse descr . stateCF) | + (descr, names) <- PCFOld.alternatives, name <- names ] customTokenizer = customData "Tokenizers, selected by option -lexer=x" $ diff --git a/src/GF/UseGrammar/Parsing.hs b/src/GF/UseGrammar/Parsing.hs index 5c24e4566..ae890b757 100644 --- a/src/GF/UseGrammar/Parsing.hs +++ b/src/GF/UseGrammar/Parsing.hs @@ -5,9 +5,9 @@ -- Stability : (stable) -- Portability : (portable) -- --- > CVS $Date: 2005/03/21 13:54:45 $ +-- > CVS $Date: 2005/04/11 13:53:39 $ -- > CVS $Author: peb $ --- > CVS $Revision: 1.15 $ +-- > CVS $Revision: 1.16 $ -- -- (Description of the module) ----------------------------------------------------------------------------- @@ -35,7 +35,8 @@ import Custom import ShellState import PPrCF (prCFTree) -import qualified GF.Parsing.ParseGFC as N +import qualified GF.OldParsing.ParseGFC as NewOld +import qualified GF.NewParsing.GFC as New import Operations @@ -56,12 +57,20 @@ parseStringC :: Options -> StateGrammar -> CFCat -> String -> Check [Tree] parseStringC opts0 sg cat s ---- to test peb's new parser 6/10/2003 +---- (to be obsoleted by "newer" below | oElem newParser opts0 = do let pm = maybe "" id $ getOptVal opts0 useParser -- -parser=pm - gr = stateGrammarST sg ct = cfCat2Cat cat - ts <- checkErr $ N.newParser pm sg ct s -- peb 27/5-04 (changed gr -> sg) - mapM (checkErr . (annotate gr)) ts + ts <- checkErr $ NewOld.newParser pm sg ct s + mapM (checkErr . annotate (stateGrammarST sg)) ts + +---- to test peb's newer parser 7/4-05 + | oElem newerParser opts0 = do + let opts = unionOptions opts0 $ stateOptions sg + pm = maybe "" id $ getOptVal opts0 useParser -- -parser=pm + tok = customOrDefault opts useTokenizer customTokenizer sg + ts <- return $ New.parse pm (pInfo sg) (absId sg) cat (tok s) + mapM (checkErr . annotate (stateGrammarST sg)) ts | otherwise = do let opts = unionOptions opts0 $ stateOptions sg @@ -72,6 +81,7 @@ parseStringC opts0 sg cat s parser = customOrDefault opts useParser customParser sg cat tokens2trms opts sg cn parser (tok s) + tokens2trms :: Options ->StateGrammar ->Ident -> CFParser -> [CFTok] -> Check [Tree] tokens2trms opts sg cn parser toks = trees2trms opts sg cn toks trees info where result = parser toks diff --git a/src/haddock/haddock-script.csh b/src/haddock/haddock-script.csh index a27cbf505..bafb9afef 100644 --- a/src/haddock/haddock-script.csh +++ b/src/haddock/haddock-script.csh @@ -2,8 +2,8 @@ ###################################################################### # Author: Peter Ljunglöf -# Time-stamp: "2005-03-29, 13:55" -# CVS $Date: 2005/03/29 11:58:45 $ +# Time-stamp: "2005-03-29, 14:04" +# CVS $Date: 2005/04/11 13:53:37 $ # CVS $Author: peb $ # # a script for producing documentation through Haddock @@ -16,7 +16,7 @@ set resourcedir = haddock-resources #set dirs = (. api compile grammar infra shell source canonical useGrammar cf newparsing parsers notrace cfgm speech visualization for-hugs for-ghc) -set files = (`find * -name '*.hs' -not -path 'old-stuff/*' -not -path 'for-*' -not -path 'haddock*' -not -name 'Lex[GC]*' -not -name 'Par[GC]*'` $base/for-ghc-nofud/*.hs) +set files = (`find * -name '*.hs' -not -path 'old-stuff/*' -not -path 'for-*' -not -path 'haddock*'` for-ghc-nofud/*.hs) ###################################################################### @@ -29,29 +29,18 @@ rm -r $docdir/* ###################################################################### echo -echo 2. Copying Haskell files to temporary directory ($tempdir) +echo 2. Copying Haskell files to temporary directory: $tempdir rm -r $tempdir foreach f ($files) - echo -- $f + # echo -- $f mkdir -p `dirname $tempdir/$f` - perl -e 's/^#/-- CPP #/' $f > $tempdir/$f + perl -pe 's/^#/-- CPP #/' $f > $tempdir/$f end ###################################################################### -# set rmfiles = {Lex,Par}{CFG,GF,GFC}.hs - -# echo -# echo 2. Removing unnecessary files - -# cd $docdir -# echo -- `ls $rmfiles` -# rm $rmfiles - -###################################################################### - echo echo 3. Invoking Haddock @@ -67,6 +56,7 @@ echo 4. Restructuring to HTML framesets echo -- Substituting for frame targets inside html files mv $docdir/index.html $docdir/index-frame.html foreach f ($docdir/*.html) + # echo -- $f perl -pe 's/ .tempfile mv .tempfile $f end