remove FTypes module and move all definitions to Formalism.FCFG

This commit is contained in:
kr.angelov
2007-09-24 08:12:11 +00:00
parent 9efddd16ce
commit cd213f3e56
10 changed files with 119 additions and 113 deletions

View File

@@ -44,6 +44,7 @@ import GF.System.Arch (ModTime)
import qualified Transfer.InterpreterAPI as T
import GF.Formalism.FCFG
import qualified GF.OldParsing.ConvertGrammar as CnvOld -- OBSOLETE
import qualified GF.Conversion.GFC as Cnv
import qualified GF.Conversion.SimpleToFCFG as FCnv
@@ -67,7 +68,7 @@ data ShellState = ShSt {
cfs :: [(Ident,CF)] , -- ^ context-free grammars (small, no parameters, very over-generating)
abstracts :: [(Ident,[Ident])], -- ^ abstracts and their associated concretes
mcfgs :: [(Ident, Cnv.MGrammar)], -- ^ MCFG, converted according to Ljunglöf (2004, ch 3)
fcfgs :: [(Ident, Cnv.FGrammar)], -- ^ FCFG, optimized MCFG by Krasimir Angelov
fcfgs :: [(Ident, FGrammar)], -- ^ FCFG, optimized MCFG by Krasimir Angelov
cfgs :: [(Ident, Cnv.CGrammar)], -- ^ CFG, converted from mcfg
-- (large, with parameters, no-so overgenerating)
pInfos :: [(Ident, Prs.PInfo)], -- ^ parsing information (compiled mcfg&cfg grammars)
@@ -146,7 +147,7 @@ data StateGrammar = StGr {
grammar :: CanonGrammar,
cf :: CF,
mcfg :: Cnv.MGrammar,
fcfg :: Cnv.FGrammar,
fcfg :: FGrammar,
cfg :: Cnv.CGrammar,
pInfo :: Prs.PInfo,
morpho :: Morpho,
@@ -174,7 +175,7 @@ emptyStateGrammar = StGr {
stateGrammarST :: StateGrammar -> CanonGrammar
stateCF :: StateGrammar -> CF
stateMCFG :: StateGrammar -> Cnv.MGrammar
stateFCFG :: StateGrammar -> Cnv.FGrammar
stateFCFG :: StateGrammar -> FGrammar
stateCFG :: StateGrammar -> Cnv.CGrammar
statePInfo :: StateGrammar -> Prs.PInfo
stateMorpho :: StateGrammar -> Morpho

View File

@@ -1,63 +0,0 @@
module GF.Conversion.FTypes where
import qualified GF.Canon.GFCC.AbsGFCC as AbsGFCC (CId(..))
import GF.Formalism.FCFG
import GF.Formalism.Utilities
import GF.Infra.PrintClass
import GF.Data.Assoc
import Control.Monad (foldM)
import Data.Array
----------------------------------------------------------------------
-- * basic (leaf) types
-- ** input tokens
---- type Token = String ---- inlined in FGrammar and FRule
----------------------------------------------------------------------
-- * fast nonerasing MCFG
type FIndex = Int
type FPath = [FIndex]
type FName = NameProfile AbsGFCC.CId
type FGrammar = FCFGrammar FCat FName String
type FRule = FCFRule FCat FName String
data FCat = FCat {-# UNPACK #-} !Int AbsGFCC.CId [FPath] [(FPath,FIndex)]
initialFCat :: AbsGFCC.CId -> FCat
initialFCat cat = FCat 0 cat [] []
fcatString = FCat (-1) (AbsGFCC.CId "String") [[0]] []
fcatInt = FCat (-2) (AbsGFCC.CId "Int") [[0]] []
fcatFloat = FCat (-3) (AbsGFCC.CId "Float") [[0]] []
fcat2cid :: FCat -> AbsGFCC.CId
fcat2cid (FCat _ c _ _) = c
instance Eq FCat where
(FCat id1 _ _ _) == (FCat id2 _ _ _) = id1 == id2
instance Ord FCat where
compare (FCat id1 _ _ _) (FCat id2 _ _ _) = compare id1 id2
instance Print AbsGFCC.CId where
prt (AbsGFCC.CId s) = s
isCoercionF :: FName -> Bool
isCoercionF (Name fun [Unify [0]]) = fun == AbsGFCC.CId "_"
isCoercionF _ = False
----------------------------------------------------------------------
-- * pretty-printing
instance Print FCat where
prt (FCat _ (AbsGFCC.CId cat) rcs tcs) = cat ++ "{" ++
prtSep ";" ([prt path | path <- rcs] ++
[prt path ++ "=" ++ prt term | (path,term) <- tcs])
++ "}"

View File

@@ -13,7 +13,7 @@
module GF.Conversion.GFC
(module GF.Conversion.GFC,
SGrammar, EGrammar, MGrammar, FGrammar, CGrammar) where
SGrammar, EGrammar, MGrammar, CGrammar) where
import GF.Infra.Option
import GF.Canon.GFC (CanonGrammar)
@@ -25,7 +25,6 @@ import GF.Formalism.SimpleGFC (decl2cat)
import GF.Formalism.CFG (CFRule(..))
import GF.Formalism.Utilities (symbol, name2fun)
import GF.Conversion.Types
import GF.Conversion.FTypes
import qualified GF.Conversion.GFCtoSimple as G2S
import qualified GF.Conversion.SimpleToFinite as S2Fin

View File

@@ -21,7 +21,6 @@ import Control.Monad
import GF.Formalism.Utilities
import GF.Formalism.FCFG
import GF.Conversion.FTypes
import GF.Canon.GFCC.AbsGFCC
import GF.Canon.GFCC.DataGFCC
@@ -38,9 +37,7 @@ import Data.Maybe
----------------------------------------------------------------------
-- main conversion function
type FToken = String
convertGrammar :: GFCC -> [(CId,FCFGrammar FCat FName FToken)]
convertGrammar :: GFCC -> [(CId,FGrammar)]
convertGrammar gfcc = [(cncname,convert abs_defs conc) |
cncname <- cncnames gfcc, conc <- Map.lookup cncname (concretes gfcc)]
where

View File

@@ -11,7 +11,6 @@ module GF.FCFG.ToFCFG (printFGrammar) where
import GF.Formalism.FCFG
import GF.Formalism.SimpleGFC
import GF.Conversion.FTypes
import GF.Infra.Ident
import qualified GF.FCFG.AbsFCFG as F
@@ -31,7 +30,7 @@ import GF.Infra.Print
type FToken = String
-- this is the main function used
printFGrammar :: FCFGrammar FCat FName FToken -> String
printFGrammar :: FGrammar -> String
printFGrammar = undefined {- printTree . fgrammar
fgrammar :: FCFGrammar FCat Name FToken -> F.FGrammar

View File

@@ -7,32 +7,106 @@
-- Definitions of fast multiple context-free grammars
-----------------------------------------------------------------------------
module GF.Formalism.FCFG where
module GF.Formalism.FCFG
(
-- * Token
FToken
-- * Category
, FPath
, FCat(..)
, initialFCat
, fcatString, fcatInt, fcatFloat
, fcat2cid
-- * Symbol
, FIndex
, FSymbol(..)
-- * Name
, FName
, isCoercionF
-- * Grammar
, FPointPos
, FGrammar
, FRule(..)
) where
import Control.Monad (liftM)
import Data.List (groupBy)
import Data.Array
import GF.Formalism.Utilities
import qualified GF.Canon.GFCC.AbsGFCC as AbsGFCC
import GF.Infra.PrintClass
------------------------------------------------------------
-- grammar types
-- Token
type FToken = String
type FLabel = Int
------------------------------------------------------------
-- Category
type FPath = [FIndex]
data FCat = FCat {-# UNPACK #-} !Int AbsGFCC.CId [FPath] [(FPath,FIndex)]
initialFCat :: AbsGFCC.CId -> FCat
initialFCat cat = FCat 0 cat [] []
fcatString = FCat (-1) (AbsGFCC.CId "String") [[0]] []
fcatInt = FCat (-2) (AbsGFCC.CId "Int") [[0]] []
fcatFloat = FCat (-3) (AbsGFCC.CId "Float") [[0]] []
fcat2cid :: FCat -> AbsGFCC.CId
fcat2cid (FCat _ c _ _) = c
instance Eq FCat where
(FCat id1 _ _ _) == (FCat id2 _ _ _) = id1 == id2
instance Ord FCat where
compare (FCat id1 _ _ _) (FCat id2 _ _ _) = compare id1 id2
------------------------------------------------------------
-- Symbol
type FIndex = Int
data FSymbol
= FSymCat FCat {-# UNPACK #-} !FIndex {-# UNPACK #-} !Int
| FSymTok FToken
------------------------------------------------------------
-- Name
type FName = NameProfile AbsGFCC.CId
isCoercionF :: FName -> Bool
isCoercionF (Name fun [Unify [0]]) = fun == AbsGFCC.CId "_"
isCoercionF _ = False
------------------------------------------------------------
-- Grammar
type FGrammar = [FRule]
type FPointPos = Int
data FRule = FRule FName [FCat] FCat (Array FIndex (Array FPointPos FSymbol))
data FSymbol cat tok
= FSymCat cat {-# UNPACK #-} !FLabel {-# UNPACK #-} !Int
| FSymTok tok
type FCFGrammar cat name tok = [FCFRule cat name tok]
data FCFRule cat name tok = FRule name [cat] cat (Array FLabel (Array FPointPos (FSymbol cat tok)))
------------------------------------------------------------
-- pretty-printing
instance (Print c, Print t) => Print (FSymbol c t) where
instance Print AbsGFCC.CId where
prt (AbsGFCC.CId s) = s
instance Print FCat where
prt (FCat _ (AbsGFCC.CId cat) rcs tcs) = cat ++ "{" ++
prtSep ";" ([prt path | path <- rcs] ++
[prt path ++ "=" ++ prt term | (path,term) <- tcs])
++ "}"
instance Print FSymbol where
prt (FSymCat c l n) = "($" ++ prt n ++ "!" ++ prt l ++ ")"
prt (FSymTok t) = simpleShow (prt t)
where simpleShow str = "\"" ++ concatMap mkEsc str ++ "\""
@@ -43,7 +117,7 @@ instance (Print c, Print t) => Print (FSymbol c t) where
mkEsc chr = [chr]
prtList = prtSep " "
instance (Print c, Print n, Print t) => Print (FCFRule n c t) where
instance Print FRule where
prt (FRule name args res lins) = prt name ++ " : " ++ (if null args then "" else prtSep " " args ++ " -> ") ++ prt res ++
" =\n [" ++ prtSep "\n " ["("++prtSep " " [prt sym | (_,sym) <- assocs syms]++")" | (_,syms) <- assocs lins]++"]"
prtList = prtSep "\n"

View File

@@ -21,12 +21,12 @@ import GF.Infra.PrintClass
----------------------------------------------------------------------
-- parsing
parseFCF :: (Print c, Ord c, Ord n, Print t, Ord t) => String -> Err (FCFParser c n t)
parseFCF :: String -> Err (FCFParser)
parseFCF prs | prs `elem` strategies = Ok $ parseFCF' prs
| otherwise = Bad $ "FCFG parsing strategy not defined: " ++ prs
strategies = words "bottomup topdown"
parseFCF' :: (Print c, Ord c, Ord n, Print t, Ord t) => String -> FCFParser c n t
parseFCF' :: String -> FCFParser
parseFCF' "bottomup" pinfo starts toks = Active.parse "b" pinfo starts toks
parseFCF' "topdown" pinfo starts toks = Active.parse "t" pinfo starts toks

View File

@@ -32,7 +32,7 @@ import Data.Array
----------------------------------------------------------------------
-- * parsing
parse :: (Print c, Ord c, Ord n, Print t, Ord t) => String -> FCFParser c n t
parse :: String -> FCFParser
parse strategy pinfo starts toks = xchart2syntaxchart chart pinfo
where chart = process strategy pinfo toks axioms emptyXChart
axioms | isBU strategy = literals pinfo toks ++ initialBU pinfo toks
@@ -42,7 +42,7 @@ isBU s = s=="b"
isTD s = s=="t"
-- used in prediction
emptyChildren :: RuleId -> FCFPInfo c n t -> SyntaxNode RuleId RangeRec
emptyChildren :: RuleId -> FCFPInfo -> SyntaxNode RuleId RangeRec
emptyChildren ruleid pinfo = SNode ruleid (replicate (length rhs) [])
where
FRule _ rhs _ _ = allRules pinfo ! ruleid
@@ -57,7 +57,7 @@ updateChildren (SNode ruleid recs) i rec = do
makeMaxRange (Range _ j) = Range j j
makeMaxRange EmptyRange = EmptyRange
process :: (Print c, Ord c, Ord n, Print t, Ord t) => String -> FCFPInfo c n t -> Input t -> [(c,Item)] -> XChart c -> XChart c
process :: String -> FCFPInfo -> Input FToken -> [(FCat,Item)] -> XChart FCat -> XChart FCat
process strategy pinfo toks [] chart = chart
process strategy pinfo toks ((c,item):items) chart = process strategy pinfo toks items $! univRule c item chart
where
@@ -110,7 +110,7 @@ process strategy pinfo toks ((c,item):items) chart = process strategy pinfo toks
data Item
= Active RangeRec
Range
{-# UNPACK #-} !FLabel
{-# UNPACK #-} !FIndex
{-# UNPACK #-} !FPointPos
(SyntaxNode RuleId RangeRec)
| Final RangeRec (SyntaxNode RuleId RangeRec)
@@ -134,7 +134,7 @@ insertXChart (XChart actives finals) item@(Final _ _) c =
lookupXChartAct (XChart actives finals) c = chartLookup actives c
lookupXChartFinal (XChart actives finals) c = chartLookup finals c
xchart2syntaxchart :: (Ord c, Ord n, Ord t) => XChart c -> FCFPInfo c n t -> SyntaxChart n (c,RangeRec)
xchart2syntaxchart :: XChart FCat -> FCFPInfo -> SyntaxChart FName (FCat,RangeRec)
xchart2syntaxchart (XChart actives finals) pinfo =
accumAssoc groupSyntaxNodes $
[ case node of
@@ -146,7 +146,7 @@ xchart2syntaxchart (XChart actives finals) pinfo =
| (cat, Final found node) <- chartAssocs finals
]
literals :: (Ord c, Ord n, Ord t) => FCFPInfo c n t -> Input t -> [(c,Item)]
literals :: FCFPInfo -> Input FToken -> [(FCat,Item)]
literals pinfo toks =
[let (c,node) = grammarLexer pinfo t in (c,Final [makeRange i j] node) | Edge i j t <- inputEdges toks, not (t `elem` grammarToks pinfo)]
@@ -154,7 +154,7 @@ literals pinfo toks =
-- Earley --
-- called with all starting categories
initialTD :: (Ord c, Ord n, Ord t) => FCFPInfo c n t -> [c] -> Input t -> [(c,Item)]
initialTD :: FCFPInfo -> [FCat] -> Input FToken -> [(FCat,Item)]
initialTD pinfo starts toks =
do cat <- starts
ruleid <- topdownRules pinfo ? cat
@@ -164,7 +164,7 @@ initialTD pinfo starts toks =
----------------------------------------------------------------------
-- Kilbury --
initialBU :: (Ord c, Ord n, Ord t) => FCFPInfo c n t -> Input t -> [(c,Item)]
initialBU :: FCFPInfo -> Input FToken -> [(FCat,Item)]
initialBU pinfo toks =
do tok <- aElems (inputToken toks)
ruleid <- leftcornerTokens pinfo ? tok ++

View File

@@ -23,10 +23,10 @@ import Data.Maybe
-- type declarations
-- | the list of categories = possible starting categories
type FCFParser c n t = FCFPInfo c n t
-> [c]
-> Input t
-> SyntaxChart n (c,RangeRec)
type FCFParser = FCFPInfo
-> [FCat]
-> Input FToken
-> SyntaxChart FName (FCat,RangeRec)
makeFinalEdge cat 0 0 = (cat, [EmptyRange])
makeFinalEdge cat i j = (cat, [makeRange i j])
@@ -36,19 +36,19 @@ makeFinalEdge cat i j = (cat, [makeRange i j])
type RuleId = Int
data FCFPInfo c n t
= FCFPInfo { allRules :: Array RuleId (FCFRule c n t)
, topdownRules :: Assoc c (SList RuleId)
data FCFPInfo
= FCFPInfo { allRules :: Array RuleId FRule
, topdownRules :: Assoc FCat (SList RuleId)
-- ^ used in 'GF.Parsing.MCFG.Active' (Earley):
-- , emptyRules :: [RuleId]
-- , emptyRules :: [RuleId]
, epsilonRules :: [RuleId]
-- ^ used in 'GF.Parsing.MCFG.Active' (Kilbury):
, leftcornerCats :: Assoc c (SList RuleId)
, leftcornerTokens :: Assoc t (SList RuleId)
, leftcornerCats :: Assoc FCat (SList RuleId)
, leftcornerTokens :: Assoc FToken (SList RuleId)
-- ^ used in 'GF.Parsing.MCFG.Active' (Kilbury):
, grammarCats :: SList c
, grammarToks :: SList t
, grammarLexer :: t -> (c,SyntaxNode RuleId RangeRec)
, grammarCats :: SList FCat
, grammarToks :: SList FToken
, grammarLexer :: FToken -> (FCat,SyntaxNode RuleId RangeRec)
}
@@ -68,7 +68,7 @@ getLeftCornerCat lins
where
syms = lins ! 0
buildFCFPInfo :: (Ord c, Ord n, Ord t) => (t -> (c,SyntaxNode RuleId RangeRec)) -> FCFGrammar c n t -> FCFPInfo c n t
buildFCFPInfo :: (FToken -> (FCat,SyntaxNode RuleId RangeRec)) -> FGrammar -> FCFPInfo
buildFCFPInfo lexer grammar =
FCFPInfo { allRules = allrules
, topdownRules = topdownrules
@@ -98,7 +98,7 @@ buildFCFPInfo lexer grammar =
----------------------------------------------------------------------
-- pretty-printing of statistics
instance (Ord c, Ord n, Ord t) => Print (FCFPInfo c n t) where
instance Print FCFPInfo where
prt pI = "[ allRules=" ++ sl (elems . allRules) ++
"; tdRules=" ++ sla topdownRules ++
-- "; emptyRules=" ++ sl emptyRules ++

View File

@@ -32,11 +32,11 @@ import GF.Data.SortedList
import GF.Data.Assoc
import GF.Formalism.Utilities
import GF.Conversion.Types
import GF.Conversion.FTypes
import qualified GF.Formalism.GCFG as G
import qualified GF.Formalism.SimpleGFC as S
import qualified GF.Formalism.MCFG as M
import GF.Formalism.FCFG
import qualified GF.Formalism.CFG as C
import qualified GF.Parsing.MCFG as PM
import qualified GF.Parsing.FCFG as PF
@@ -46,12 +46,11 @@ import qualified GF.Parsing.CFG as PC
-- parsing information
data PInfo = PInfo { mcfPInfo :: MCFPInfo
, fcfPInfo :: FCFPInfo
, fcfPInfo :: PF.FCFPInfo
, cfPInfo :: CFPInfo
}
type MCFPInfo = PM.MCFPInfo MCat Name MLabel Token
type FCFPInfo = PF.FCFPInfo FCat FName Token
type CFPInfo = PC.CFPInfo CCat Name Token
buildPInfo :: MGrammar -> FGrammar -> CGrammar -> PInfo