From cd213f3e56ee54a83927055a1273df83ab4b1f6f Mon Sep 17 00:00:00 2001 From: "kr.angelov" Date: Mon, 24 Sep 2007 08:12:11 +0000 Subject: [PATCH] remove FTypes module and move all definitions to Formalism.FCFG --- src/GF/Compile/ShellState.hs | 7 ++- src/GF/Conversion/FTypes.hs | 63 -------------------- src/GF/Conversion/GFC.hs | 3 +- src/GF/Conversion/SimpleToFCFG.hs | 5 +- src/GF/FCFG/ToFCFG.hs | 3 +- src/GF/Formalism/FCFG.hs | 96 +++++++++++++++++++++++++++---- src/GF/Parsing/FCFG.hs | 4 +- src/GF/Parsing/FCFG/Active.hs | 16 +++--- src/GF/Parsing/FCFG/PInfo.hs | 30 +++++----- src/GF/Parsing/GFC.hs | 5 +- 10 files changed, 119 insertions(+), 113 deletions(-) delete mode 100644 src/GF/Conversion/FTypes.hs diff --git a/src/GF/Compile/ShellState.hs b/src/GF/Compile/ShellState.hs index e9533e1a0..41bcf50c7 100644 --- a/src/GF/Compile/ShellState.hs +++ b/src/GF/Compile/ShellState.hs @@ -44,6 +44,7 @@ import GF.System.Arch (ModTime) import qualified Transfer.InterpreterAPI as T +import GF.Formalism.FCFG import qualified GF.OldParsing.ConvertGrammar as CnvOld -- OBSOLETE import qualified GF.Conversion.GFC as Cnv import qualified GF.Conversion.SimpleToFCFG as FCnv @@ -67,7 +68,7 @@ data ShellState = ShSt { cfs :: [(Ident,CF)] , -- ^ context-free grammars (small, no parameters, very over-generating) abstracts :: [(Ident,[Ident])], -- ^ abstracts and their associated concretes mcfgs :: [(Ident, Cnv.MGrammar)], -- ^ MCFG, converted according to Ljunglöf (2004, ch 3) - fcfgs :: [(Ident, Cnv.FGrammar)], -- ^ FCFG, optimized MCFG by Krasimir Angelov + fcfgs :: [(Ident, FGrammar)], -- ^ FCFG, optimized MCFG by Krasimir Angelov cfgs :: [(Ident, Cnv.CGrammar)], -- ^ CFG, converted from mcfg -- (large, with parameters, no-so overgenerating) pInfos :: [(Ident, Prs.PInfo)], -- ^ parsing information (compiled mcfg&cfg grammars) @@ -146,7 +147,7 @@ data StateGrammar = StGr { grammar :: CanonGrammar, cf :: CF, mcfg :: Cnv.MGrammar, - fcfg :: Cnv.FGrammar, + fcfg :: FGrammar, cfg :: Cnv.CGrammar, pInfo :: Prs.PInfo, morpho :: Morpho, @@ -174,7 +175,7 @@ emptyStateGrammar = StGr { stateGrammarST :: StateGrammar -> CanonGrammar stateCF :: StateGrammar -> CF stateMCFG :: StateGrammar -> Cnv.MGrammar -stateFCFG :: StateGrammar -> Cnv.FGrammar +stateFCFG :: StateGrammar -> FGrammar stateCFG :: StateGrammar -> Cnv.CGrammar statePInfo :: StateGrammar -> Prs.PInfo stateMorpho :: StateGrammar -> Morpho diff --git a/src/GF/Conversion/FTypes.hs b/src/GF/Conversion/FTypes.hs deleted file mode 100644 index 9409fc4ee..000000000 --- a/src/GF/Conversion/FTypes.hs +++ /dev/null @@ -1,63 +0,0 @@ -module GF.Conversion.FTypes where - -import qualified GF.Canon.GFCC.AbsGFCC as AbsGFCC (CId(..)) - -import GF.Formalism.FCFG -import GF.Formalism.Utilities -import GF.Infra.PrintClass -import GF.Data.Assoc - -import Control.Monad (foldM) -import Data.Array - ----------------------------------------------------------------------- --- * basic (leaf) types - --- ** input tokens - ----- type Token = String ---- inlined in FGrammar and FRule - - ----------------------------------------------------------------------- --- * fast nonerasing MCFG - -type FIndex = Int -type FPath = [FIndex] -type FName = NameProfile AbsGFCC.CId -type FGrammar = FCFGrammar FCat FName String -type FRule = FCFRule FCat FName String -data FCat = FCat {-# UNPACK #-} !Int AbsGFCC.CId [FPath] [(FPath,FIndex)] - -initialFCat :: AbsGFCC.CId -> FCat -initialFCat cat = FCat 0 cat [] [] - -fcatString = FCat (-1) (AbsGFCC.CId "String") [[0]] [] -fcatInt = FCat (-2) (AbsGFCC.CId "Int") [[0]] [] -fcatFloat = FCat (-3) (AbsGFCC.CId "Float") [[0]] [] - -fcat2cid :: FCat -> AbsGFCC.CId -fcat2cid (FCat _ c _ _) = c - -instance Eq FCat where - (FCat id1 _ _ _) == (FCat id2 _ _ _) = id1 == id2 - -instance Ord FCat where - compare (FCat id1 _ _ _) (FCat id2 _ _ _) = compare id1 id2 - -instance Print AbsGFCC.CId where - prt (AbsGFCC.CId s) = s - -isCoercionF :: FName -> Bool -isCoercionF (Name fun [Unify [0]]) = fun == AbsGFCC.CId "_" -isCoercionF _ = False - - ----------------------------------------------------------------------- --- * pretty-printing - -instance Print FCat where - prt (FCat _ (AbsGFCC.CId cat) rcs tcs) = cat ++ "{" ++ - prtSep ";" ([prt path | path <- rcs] ++ - [prt path ++ "=" ++ prt term | (path,term) <- tcs]) - ++ "}" - diff --git a/src/GF/Conversion/GFC.hs b/src/GF/Conversion/GFC.hs index 5abfe17c0..354bdea65 100644 --- a/src/GF/Conversion/GFC.hs +++ b/src/GF/Conversion/GFC.hs @@ -13,7 +13,7 @@ module GF.Conversion.GFC (module GF.Conversion.GFC, - SGrammar, EGrammar, MGrammar, FGrammar, CGrammar) where + SGrammar, EGrammar, MGrammar, CGrammar) where import GF.Infra.Option import GF.Canon.GFC (CanonGrammar) @@ -25,7 +25,6 @@ import GF.Formalism.SimpleGFC (decl2cat) import GF.Formalism.CFG (CFRule(..)) import GF.Formalism.Utilities (symbol, name2fun) import GF.Conversion.Types -import GF.Conversion.FTypes import qualified GF.Conversion.GFCtoSimple as G2S import qualified GF.Conversion.SimpleToFinite as S2Fin diff --git a/src/GF/Conversion/SimpleToFCFG.hs b/src/GF/Conversion/SimpleToFCFG.hs index f5d771298..fc0177900 100644 --- a/src/GF/Conversion/SimpleToFCFG.hs +++ b/src/GF/Conversion/SimpleToFCFG.hs @@ -21,7 +21,6 @@ import Control.Monad import GF.Formalism.Utilities import GF.Formalism.FCFG -import GF.Conversion.FTypes import GF.Canon.GFCC.AbsGFCC import GF.Canon.GFCC.DataGFCC @@ -38,9 +37,7 @@ import Data.Maybe ---------------------------------------------------------------------- -- main conversion function -type FToken = String - -convertGrammar :: GFCC -> [(CId,FCFGrammar FCat FName FToken)] +convertGrammar :: GFCC -> [(CId,FGrammar)] convertGrammar gfcc = [(cncname,convert abs_defs conc) | cncname <- cncnames gfcc, conc <- Map.lookup cncname (concretes gfcc)] where diff --git a/src/GF/FCFG/ToFCFG.hs b/src/GF/FCFG/ToFCFG.hs index 790993487..57e67113d 100644 --- a/src/GF/FCFG/ToFCFG.hs +++ b/src/GF/FCFG/ToFCFG.hs @@ -11,7 +11,6 @@ module GF.FCFG.ToFCFG (printFGrammar) where import GF.Formalism.FCFG import GF.Formalism.SimpleGFC -import GF.Conversion.FTypes import GF.Infra.Ident import qualified GF.FCFG.AbsFCFG as F @@ -31,7 +30,7 @@ import GF.Infra.Print type FToken = String -- this is the main function used -printFGrammar :: FCFGrammar FCat FName FToken -> String +printFGrammar :: FGrammar -> String printFGrammar = undefined {- printTree . fgrammar fgrammar :: FCFGrammar FCat Name FToken -> F.FGrammar diff --git a/src/GF/Formalism/FCFG.hs b/src/GF/Formalism/FCFG.hs index 2fb4b0422..5b8edc434 100644 --- a/src/GF/Formalism/FCFG.hs +++ b/src/GF/Formalism/FCFG.hs @@ -7,32 +7,106 @@ -- Definitions of fast multiple context-free grammars ----------------------------------------------------------------------------- -module GF.Formalism.FCFG where +module GF.Formalism.FCFG + ( + -- * Token + FToken + + -- * Category + , FPath + , FCat(..) + + , initialFCat + , fcatString, fcatInt, fcatFloat + , fcat2cid + + -- * Symbol + , FIndex + , FSymbol(..) + + -- * Name + , FName + , isCoercionF + + -- * Grammar + , FPointPos + , FGrammar + , FRule(..) + ) where import Control.Monad (liftM) import Data.List (groupBy) import Data.Array +import GF.Formalism.Utilities +import qualified GF.Canon.GFCC.AbsGFCC as AbsGFCC import GF.Infra.PrintClass ------------------------------------------------------------ --- grammar types +-- Token +type FToken = String -type FLabel = Int + +------------------------------------------------------------ +-- Category +type FPath = [FIndex] +data FCat = FCat {-# UNPACK #-} !Int AbsGFCC.CId [FPath] [(FPath,FIndex)] + +initialFCat :: AbsGFCC.CId -> FCat +initialFCat cat = FCat 0 cat [] [] + +fcatString = FCat (-1) (AbsGFCC.CId "String") [[0]] [] +fcatInt = FCat (-2) (AbsGFCC.CId "Int") [[0]] [] +fcatFloat = FCat (-3) (AbsGFCC.CId "Float") [[0]] [] + +fcat2cid :: FCat -> AbsGFCC.CId +fcat2cid (FCat _ c _ _) = c + +instance Eq FCat where + (FCat id1 _ _ _) == (FCat id2 _ _ _) = id1 == id2 + +instance Ord FCat where + compare (FCat id1 _ _ _) (FCat id2 _ _ _) = compare id1 id2 + + +------------------------------------------------------------ +-- Symbol +type FIndex = Int +data FSymbol + = FSymCat FCat {-# UNPACK #-} !FIndex {-# UNPACK #-} !Int + | FSymTok FToken + + +------------------------------------------------------------ +-- Name +type FName = NameProfile AbsGFCC.CId + +isCoercionF :: FName -> Bool +isCoercionF (Name fun [Unify [0]]) = fun == AbsGFCC.CId "_" +isCoercionF _ = False + + +------------------------------------------------------------ +-- Grammar +type FGrammar = [FRule] type FPointPos = Int +data FRule = FRule FName [FCat] FCat (Array FIndex (Array FPointPos FSymbol)) -data FSymbol cat tok - = FSymCat cat {-# UNPACK #-} !FLabel {-# UNPACK #-} !Int - | FSymTok tok - -type FCFGrammar cat name tok = [FCFRule cat name tok] -data FCFRule cat name tok = FRule name [cat] cat (Array FLabel (Array FPointPos (FSymbol cat tok))) ------------------------------------------------------------ -- pretty-printing -instance (Print c, Print t) => Print (FSymbol c t) where +instance Print AbsGFCC.CId where + prt (AbsGFCC.CId s) = s + +instance Print FCat where + prt (FCat _ (AbsGFCC.CId cat) rcs tcs) = cat ++ "{" ++ + prtSep ";" ([prt path | path <- rcs] ++ + [prt path ++ "=" ++ prt term | (path,term) <- tcs]) + ++ "}" + +instance Print FSymbol where prt (FSymCat c l n) = "($" ++ prt n ++ "!" ++ prt l ++ ")" prt (FSymTok t) = simpleShow (prt t) where simpleShow str = "\"" ++ concatMap mkEsc str ++ "\"" @@ -43,7 +117,7 @@ instance (Print c, Print t) => Print (FSymbol c t) where mkEsc chr = [chr] prtList = prtSep " " -instance (Print c, Print n, Print t) => Print (FCFRule n c t) where +instance Print FRule where prt (FRule name args res lins) = prt name ++ " : " ++ (if null args then "" else prtSep " " args ++ " -> ") ++ prt res ++ " =\n [" ++ prtSep "\n " ["("++prtSep " " [prt sym | (_,sym) <- assocs syms]++")" | (_,syms) <- assocs lins]++"]" prtList = prtSep "\n" diff --git a/src/GF/Parsing/FCFG.hs b/src/GF/Parsing/FCFG.hs index dfe26d0b3..91b4201b7 100644 --- a/src/GF/Parsing/FCFG.hs +++ b/src/GF/Parsing/FCFG.hs @@ -21,12 +21,12 @@ import GF.Infra.PrintClass ---------------------------------------------------------------------- -- parsing -parseFCF :: (Print c, Ord c, Ord n, Print t, Ord t) => String -> Err (FCFParser c n t) +parseFCF :: String -> Err (FCFParser) parseFCF prs | prs `elem` strategies = Ok $ parseFCF' prs | otherwise = Bad $ "FCFG parsing strategy not defined: " ++ prs strategies = words "bottomup topdown" -parseFCF' :: (Print c, Ord c, Ord n, Print t, Ord t) => String -> FCFParser c n t +parseFCF' :: String -> FCFParser parseFCF' "bottomup" pinfo starts toks = Active.parse "b" pinfo starts toks parseFCF' "topdown" pinfo starts toks = Active.parse "t" pinfo starts toks diff --git a/src/GF/Parsing/FCFG/Active.hs b/src/GF/Parsing/FCFG/Active.hs index d315ca1cc..fbbf3736d 100644 --- a/src/GF/Parsing/FCFG/Active.hs +++ b/src/GF/Parsing/FCFG/Active.hs @@ -32,7 +32,7 @@ import Data.Array ---------------------------------------------------------------------- -- * parsing -parse :: (Print c, Ord c, Ord n, Print t, Ord t) => String -> FCFParser c n t +parse :: String -> FCFParser parse strategy pinfo starts toks = xchart2syntaxchart chart pinfo where chart = process strategy pinfo toks axioms emptyXChart axioms | isBU strategy = literals pinfo toks ++ initialBU pinfo toks @@ -42,7 +42,7 @@ isBU s = s=="b" isTD s = s=="t" -- used in prediction -emptyChildren :: RuleId -> FCFPInfo c n t -> SyntaxNode RuleId RangeRec +emptyChildren :: RuleId -> FCFPInfo -> SyntaxNode RuleId RangeRec emptyChildren ruleid pinfo = SNode ruleid (replicate (length rhs) []) where FRule _ rhs _ _ = allRules pinfo ! ruleid @@ -57,7 +57,7 @@ updateChildren (SNode ruleid recs) i rec = do makeMaxRange (Range _ j) = Range j j makeMaxRange EmptyRange = EmptyRange -process :: (Print c, Ord c, Ord n, Print t, Ord t) => String -> FCFPInfo c n t -> Input t -> [(c,Item)] -> XChart c -> XChart c +process :: String -> FCFPInfo -> Input FToken -> [(FCat,Item)] -> XChart FCat -> XChart FCat process strategy pinfo toks [] chart = chart process strategy pinfo toks ((c,item):items) chart = process strategy pinfo toks items $! univRule c item chart where @@ -110,7 +110,7 @@ process strategy pinfo toks ((c,item):items) chart = process strategy pinfo toks data Item = Active RangeRec Range - {-# UNPACK #-} !FLabel + {-# UNPACK #-} !FIndex {-# UNPACK #-} !FPointPos (SyntaxNode RuleId RangeRec) | Final RangeRec (SyntaxNode RuleId RangeRec) @@ -134,7 +134,7 @@ insertXChart (XChart actives finals) item@(Final _ _) c = lookupXChartAct (XChart actives finals) c = chartLookup actives c lookupXChartFinal (XChart actives finals) c = chartLookup finals c -xchart2syntaxchart :: (Ord c, Ord n, Ord t) => XChart c -> FCFPInfo c n t -> SyntaxChart n (c,RangeRec) +xchart2syntaxchart :: XChart FCat -> FCFPInfo -> SyntaxChart FName (FCat,RangeRec) xchart2syntaxchart (XChart actives finals) pinfo = accumAssoc groupSyntaxNodes $ [ case node of @@ -146,7 +146,7 @@ xchart2syntaxchart (XChart actives finals) pinfo = | (cat, Final found node) <- chartAssocs finals ] -literals :: (Ord c, Ord n, Ord t) => FCFPInfo c n t -> Input t -> [(c,Item)] +literals :: FCFPInfo -> Input FToken -> [(FCat,Item)] literals pinfo toks = [let (c,node) = grammarLexer pinfo t in (c,Final [makeRange i j] node) | Edge i j t <- inputEdges toks, not (t `elem` grammarToks pinfo)] @@ -154,7 +154,7 @@ literals pinfo toks = -- Earley -- -- called with all starting categories -initialTD :: (Ord c, Ord n, Ord t) => FCFPInfo c n t -> [c] -> Input t -> [(c,Item)] +initialTD :: FCFPInfo -> [FCat] -> Input FToken -> [(FCat,Item)] initialTD pinfo starts toks = do cat <- starts ruleid <- topdownRules pinfo ? cat @@ -164,7 +164,7 @@ initialTD pinfo starts toks = ---------------------------------------------------------------------- -- Kilbury -- -initialBU :: (Ord c, Ord n, Ord t) => FCFPInfo c n t -> Input t -> [(c,Item)] +initialBU :: FCFPInfo -> Input FToken -> [(FCat,Item)] initialBU pinfo toks = do tok <- aElems (inputToken toks) ruleid <- leftcornerTokens pinfo ? tok ++ diff --git a/src/GF/Parsing/FCFG/PInfo.hs b/src/GF/Parsing/FCFG/PInfo.hs index e463cf65a..8a45b651a 100644 --- a/src/GF/Parsing/FCFG/PInfo.hs +++ b/src/GF/Parsing/FCFG/PInfo.hs @@ -23,10 +23,10 @@ import Data.Maybe -- type declarations -- | the list of categories = possible starting categories -type FCFParser c n t = FCFPInfo c n t - -> [c] - -> Input t - -> SyntaxChart n (c,RangeRec) +type FCFParser = FCFPInfo + -> [FCat] + -> Input FToken + -> SyntaxChart FName (FCat,RangeRec) makeFinalEdge cat 0 0 = (cat, [EmptyRange]) makeFinalEdge cat i j = (cat, [makeRange i j]) @@ -36,19 +36,19 @@ makeFinalEdge cat i j = (cat, [makeRange i j]) type RuleId = Int -data FCFPInfo c n t - = FCFPInfo { allRules :: Array RuleId (FCFRule c n t) - , topdownRules :: Assoc c (SList RuleId) +data FCFPInfo + = FCFPInfo { allRules :: Array RuleId FRule + , topdownRules :: Assoc FCat (SList RuleId) -- ^ used in 'GF.Parsing.MCFG.Active' (Earley): - -- , emptyRules :: [RuleId] + -- , emptyRules :: [RuleId] , epsilonRules :: [RuleId] -- ^ used in 'GF.Parsing.MCFG.Active' (Kilbury): - , leftcornerCats :: Assoc c (SList RuleId) - , leftcornerTokens :: Assoc t (SList RuleId) + , leftcornerCats :: Assoc FCat (SList RuleId) + , leftcornerTokens :: Assoc FToken (SList RuleId) -- ^ used in 'GF.Parsing.MCFG.Active' (Kilbury): - , grammarCats :: SList c - , grammarToks :: SList t - , grammarLexer :: t -> (c,SyntaxNode RuleId RangeRec) + , grammarCats :: SList FCat + , grammarToks :: SList FToken + , grammarLexer :: FToken -> (FCat,SyntaxNode RuleId RangeRec) } @@ -68,7 +68,7 @@ getLeftCornerCat lins where syms = lins ! 0 -buildFCFPInfo :: (Ord c, Ord n, Ord t) => (t -> (c,SyntaxNode RuleId RangeRec)) -> FCFGrammar c n t -> FCFPInfo c n t +buildFCFPInfo :: (FToken -> (FCat,SyntaxNode RuleId RangeRec)) -> FGrammar -> FCFPInfo buildFCFPInfo lexer grammar = FCFPInfo { allRules = allrules , topdownRules = topdownrules @@ -98,7 +98,7 @@ buildFCFPInfo lexer grammar = ---------------------------------------------------------------------- -- pretty-printing of statistics -instance (Ord c, Ord n, Ord t) => Print (FCFPInfo c n t) where +instance Print FCFPInfo where prt pI = "[ allRules=" ++ sl (elems . allRules) ++ "; tdRules=" ++ sla topdownRules ++ -- "; emptyRules=" ++ sl emptyRules ++ diff --git a/src/GF/Parsing/GFC.hs b/src/GF/Parsing/GFC.hs index 6d6c662c0..90ba718c7 100644 --- a/src/GF/Parsing/GFC.hs +++ b/src/GF/Parsing/GFC.hs @@ -32,11 +32,11 @@ import GF.Data.SortedList import GF.Data.Assoc import GF.Formalism.Utilities import GF.Conversion.Types -import GF.Conversion.FTypes import qualified GF.Formalism.GCFG as G import qualified GF.Formalism.SimpleGFC as S import qualified GF.Formalism.MCFG as M +import GF.Formalism.FCFG import qualified GF.Formalism.CFG as C import qualified GF.Parsing.MCFG as PM import qualified GF.Parsing.FCFG as PF @@ -46,12 +46,11 @@ import qualified GF.Parsing.CFG as PC -- parsing information data PInfo = PInfo { mcfPInfo :: MCFPInfo - , fcfPInfo :: FCFPInfo + , fcfPInfo :: PF.FCFPInfo , cfPInfo :: CFPInfo } type MCFPInfo = PM.MCFPInfo MCat Name MLabel Token -type FCFPInfo = PF.FCFPInfo FCat FName Token type CFPInfo = PC.CFPInfo CCat Name Token buildPInfo :: MGrammar -> FGrammar -> CGrammar -> PInfo