forked from GitHub/gf-core
remove FTypes module and move all definitions to Formalism.FCFG
This commit is contained in:
@@ -44,6 +44,7 @@ import GF.System.Arch (ModTime)
|
||||
|
||||
import qualified Transfer.InterpreterAPI as T
|
||||
|
||||
import GF.Formalism.FCFG
|
||||
import qualified GF.OldParsing.ConvertGrammar as CnvOld -- OBSOLETE
|
||||
import qualified GF.Conversion.GFC as Cnv
|
||||
import qualified GF.Conversion.SimpleToFCFG as FCnv
|
||||
@@ -67,7 +68,7 @@ data ShellState = ShSt {
|
||||
cfs :: [(Ident,CF)] , -- ^ context-free grammars (small, no parameters, very over-generating)
|
||||
abstracts :: [(Ident,[Ident])], -- ^ abstracts and their associated concretes
|
||||
mcfgs :: [(Ident, Cnv.MGrammar)], -- ^ MCFG, converted according to Ljunglöf (2004, ch 3)
|
||||
fcfgs :: [(Ident, Cnv.FGrammar)], -- ^ FCFG, optimized MCFG by Krasimir Angelov
|
||||
fcfgs :: [(Ident, FGrammar)], -- ^ FCFG, optimized MCFG by Krasimir Angelov
|
||||
cfgs :: [(Ident, Cnv.CGrammar)], -- ^ CFG, converted from mcfg
|
||||
-- (large, with parameters, no-so overgenerating)
|
||||
pInfos :: [(Ident, Prs.PInfo)], -- ^ parsing information (compiled mcfg&cfg grammars)
|
||||
@@ -146,7 +147,7 @@ data StateGrammar = StGr {
|
||||
grammar :: CanonGrammar,
|
||||
cf :: CF,
|
||||
mcfg :: Cnv.MGrammar,
|
||||
fcfg :: Cnv.FGrammar,
|
||||
fcfg :: FGrammar,
|
||||
cfg :: Cnv.CGrammar,
|
||||
pInfo :: Prs.PInfo,
|
||||
morpho :: Morpho,
|
||||
@@ -174,7 +175,7 @@ emptyStateGrammar = StGr {
|
||||
stateGrammarST :: StateGrammar -> CanonGrammar
|
||||
stateCF :: StateGrammar -> CF
|
||||
stateMCFG :: StateGrammar -> Cnv.MGrammar
|
||||
stateFCFG :: StateGrammar -> Cnv.FGrammar
|
||||
stateFCFG :: StateGrammar -> FGrammar
|
||||
stateCFG :: StateGrammar -> Cnv.CGrammar
|
||||
statePInfo :: StateGrammar -> Prs.PInfo
|
||||
stateMorpho :: StateGrammar -> Morpho
|
||||
|
||||
@@ -1,63 +0,0 @@
|
||||
module GF.Conversion.FTypes where
|
||||
|
||||
import qualified GF.Canon.GFCC.AbsGFCC as AbsGFCC (CId(..))
|
||||
|
||||
import GF.Formalism.FCFG
|
||||
import GF.Formalism.Utilities
|
||||
import GF.Infra.PrintClass
|
||||
import GF.Data.Assoc
|
||||
|
||||
import Control.Monad (foldM)
|
||||
import Data.Array
|
||||
|
||||
----------------------------------------------------------------------
|
||||
-- * basic (leaf) types
|
||||
|
||||
-- ** input tokens
|
||||
|
||||
---- type Token = String ---- inlined in FGrammar and FRule
|
||||
|
||||
|
||||
----------------------------------------------------------------------
|
||||
-- * fast nonerasing MCFG
|
||||
|
||||
type FIndex = Int
|
||||
type FPath = [FIndex]
|
||||
type FName = NameProfile AbsGFCC.CId
|
||||
type FGrammar = FCFGrammar FCat FName String
|
||||
type FRule = FCFRule FCat FName String
|
||||
data FCat = FCat {-# UNPACK #-} !Int AbsGFCC.CId [FPath] [(FPath,FIndex)]
|
||||
|
||||
initialFCat :: AbsGFCC.CId -> FCat
|
||||
initialFCat cat = FCat 0 cat [] []
|
||||
|
||||
fcatString = FCat (-1) (AbsGFCC.CId "String") [[0]] []
|
||||
fcatInt = FCat (-2) (AbsGFCC.CId "Int") [[0]] []
|
||||
fcatFloat = FCat (-3) (AbsGFCC.CId "Float") [[0]] []
|
||||
|
||||
fcat2cid :: FCat -> AbsGFCC.CId
|
||||
fcat2cid (FCat _ c _ _) = c
|
||||
|
||||
instance Eq FCat where
|
||||
(FCat id1 _ _ _) == (FCat id2 _ _ _) = id1 == id2
|
||||
|
||||
instance Ord FCat where
|
||||
compare (FCat id1 _ _ _) (FCat id2 _ _ _) = compare id1 id2
|
||||
|
||||
instance Print AbsGFCC.CId where
|
||||
prt (AbsGFCC.CId s) = s
|
||||
|
||||
isCoercionF :: FName -> Bool
|
||||
isCoercionF (Name fun [Unify [0]]) = fun == AbsGFCC.CId "_"
|
||||
isCoercionF _ = False
|
||||
|
||||
|
||||
----------------------------------------------------------------------
|
||||
-- * pretty-printing
|
||||
|
||||
instance Print FCat where
|
||||
prt (FCat _ (AbsGFCC.CId cat) rcs tcs) = cat ++ "{" ++
|
||||
prtSep ";" ([prt path | path <- rcs] ++
|
||||
[prt path ++ "=" ++ prt term | (path,term) <- tcs])
|
||||
++ "}"
|
||||
|
||||
@@ -13,7 +13,7 @@
|
||||
|
||||
module GF.Conversion.GFC
|
||||
(module GF.Conversion.GFC,
|
||||
SGrammar, EGrammar, MGrammar, FGrammar, CGrammar) where
|
||||
SGrammar, EGrammar, MGrammar, CGrammar) where
|
||||
|
||||
import GF.Infra.Option
|
||||
import GF.Canon.GFC (CanonGrammar)
|
||||
@@ -25,7 +25,6 @@ import GF.Formalism.SimpleGFC (decl2cat)
|
||||
import GF.Formalism.CFG (CFRule(..))
|
||||
import GF.Formalism.Utilities (symbol, name2fun)
|
||||
import GF.Conversion.Types
|
||||
import GF.Conversion.FTypes
|
||||
|
||||
import qualified GF.Conversion.GFCtoSimple as G2S
|
||||
import qualified GF.Conversion.SimpleToFinite as S2Fin
|
||||
|
||||
@@ -21,7 +21,6 @@ import Control.Monad
|
||||
|
||||
import GF.Formalism.Utilities
|
||||
import GF.Formalism.FCFG
|
||||
import GF.Conversion.FTypes
|
||||
import GF.Canon.GFCC.AbsGFCC
|
||||
import GF.Canon.GFCC.DataGFCC
|
||||
|
||||
@@ -38,9 +37,7 @@ import Data.Maybe
|
||||
----------------------------------------------------------------------
|
||||
-- main conversion function
|
||||
|
||||
type FToken = String
|
||||
|
||||
convertGrammar :: GFCC -> [(CId,FCFGrammar FCat FName FToken)]
|
||||
convertGrammar :: GFCC -> [(CId,FGrammar)]
|
||||
convertGrammar gfcc = [(cncname,convert abs_defs conc) |
|
||||
cncname <- cncnames gfcc, conc <- Map.lookup cncname (concretes gfcc)]
|
||||
where
|
||||
|
||||
@@ -11,7 +11,6 @@ module GF.FCFG.ToFCFG (printFGrammar) where
|
||||
|
||||
import GF.Formalism.FCFG
|
||||
import GF.Formalism.SimpleGFC
|
||||
import GF.Conversion.FTypes
|
||||
import GF.Infra.Ident
|
||||
import qualified GF.FCFG.AbsFCFG as F
|
||||
|
||||
@@ -31,7 +30,7 @@ import GF.Infra.Print
|
||||
type FToken = String
|
||||
|
||||
-- this is the main function used
|
||||
printFGrammar :: FCFGrammar FCat FName FToken -> String
|
||||
printFGrammar :: FGrammar -> String
|
||||
printFGrammar = undefined {- printTree . fgrammar
|
||||
|
||||
fgrammar :: FCFGrammar FCat Name FToken -> F.FGrammar
|
||||
|
||||
@@ -7,32 +7,106 @@
|
||||
-- Definitions of fast multiple context-free grammars
|
||||
-----------------------------------------------------------------------------
|
||||
|
||||
module GF.Formalism.FCFG where
|
||||
module GF.Formalism.FCFG
|
||||
(
|
||||
-- * Token
|
||||
FToken
|
||||
|
||||
-- * Category
|
||||
, FPath
|
||||
, FCat(..)
|
||||
|
||||
, initialFCat
|
||||
, fcatString, fcatInt, fcatFloat
|
||||
, fcat2cid
|
||||
|
||||
-- * Symbol
|
||||
, FIndex
|
||||
, FSymbol(..)
|
||||
|
||||
-- * Name
|
||||
, FName
|
||||
, isCoercionF
|
||||
|
||||
-- * Grammar
|
||||
, FPointPos
|
||||
, FGrammar
|
||||
, FRule(..)
|
||||
) where
|
||||
|
||||
import Control.Monad (liftM)
|
||||
import Data.List (groupBy)
|
||||
import Data.Array
|
||||
|
||||
import GF.Formalism.Utilities
|
||||
import qualified GF.Canon.GFCC.AbsGFCC as AbsGFCC
|
||||
import GF.Infra.PrintClass
|
||||
|
||||
|
||||
------------------------------------------------------------
|
||||
-- grammar types
|
||||
-- Token
|
||||
type FToken = String
|
||||
|
||||
type FLabel = Int
|
||||
|
||||
------------------------------------------------------------
|
||||
-- Category
|
||||
type FPath = [FIndex]
|
||||
data FCat = FCat {-# UNPACK #-} !Int AbsGFCC.CId [FPath] [(FPath,FIndex)]
|
||||
|
||||
initialFCat :: AbsGFCC.CId -> FCat
|
||||
initialFCat cat = FCat 0 cat [] []
|
||||
|
||||
fcatString = FCat (-1) (AbsGFCC.CId "String") [[0]] []
|
||||
fcatInt = FCat (-2) (AbsGFCC.CId "Int") [[0]] []
|
||||
fcatFloat = FCat (-3) (AbsGFCC.CId "Float") [[0]] []
|
||||
|
||||
fcat2cid :: FCat -> AbsGFCC.CId
|
||||
fcat2cid (FCat _ c _ _) = c
|
||||
|
||||
instance Eq FCat where
|
||||
(FCat id1 _ _ _) == (FCat id2 _ _ _) = id1 == id2
|
||||
|
||||
instance Ord FCat where
|
||||
compare (FCat id1 _ _ _) (FCat id2 _ _ _) = compare id1 id2
|
||||
|
||||
|
||||
------------------------------------------------------------
|
||||
-- Symbol
|
||||
type FIndex = Int
|
||||
data FSymbol
|
||||
= FSymCat FCat {-# UNPACK #-} !FIndex {-# UNPACK #-} !Int
|
||||
| FSymTok FToken
|
||||
|
||||
|
||||
------------------------------------------------------------
|
||||
-- Name
|
||||
type FName = NameProfile AbsGFCC.CId
|
||||
|
||||
isCoercionF :: FName -> Bool
|
||||
isCoercionF (Name fun [Unify [0]]) = fun == AbsGFCC.CId "_"
|
||||
isCoercionF _ = False
|
||||
|
||||
|
||||
------------------------------------------------------------
|
||||
-- Grammar
|
||||
type FGrammar = [FRule]
|
||||
type FPointPos = Int
|
||||
data FRule = FRule FName [FCat] FCat (Array FIndex (Array FPointPos FSymbol))
|
||||
|
||||
data FSymbol cat tok
|
||||
= FSymCat cat {-# UNPACK #-} !FLabel {-# UNPACK #-} !Int
|
||||
| FSymTok tok
|
||||
|
||||
type FCFGrammar cat name tok = [FCFRule cat name tok]
|
||||
data FCFRule cat name tok = FRule name [cat] cat (Array FLabel (Array FPointPos (FSymbol cat tok)))
|
||||
|
||||
------------------------------------------------------------
|
||||
-- pretty-printing
|
||||
|
||||
instance (Print c, Print t) => Print (FSymbol c t) where
|
||||
instance Print AbsGFCC.CId where
|
||||
prt (AbsGFCC.CId s) = s
|
||||
|
||||
instance Print FCat where
|
||||
prt (FCat _ (AbsGFCC.CId cat) rcs tcs) = cat ++ "{" ++
|
||||
prtSep ";" ([prt path | path <- rcs] ++
|
||||
[prt path ++ "=" ++ prt term | (path,term) <- tcs])
|
||||
++ "}"
|
||||
|
||||
instance Print FSymbol where
|
||||
prt (FSymCat c l n) = "($" ++ prt n ++ "!" ++ prt l ++ ")"
|
||||
prt (FSymTok t) = simpleShow (prt t)
|
||||
where simpleShow str = "\"" ++ concatMap mkEsc str ++ "\""
|
||||
@@ -43,7 +117,7 @@ instance (Print c, Print t) => Print (FSymbol c t) where
|
||||
mkEsc chr = [chr]
|
||||
prtList = prtSep " "
|
||||
|
||||
instance (Print c, Print n, Print t) => Print (FCFRule n c t) where
|
||||
instance Print FRule where
|
||||
prt (FRule name args res lins) = prt name ++ " : " ++ (if null args then "" else prtSep " " args ++ " -> ") ++ prt res ++
|
||||
" =\n [" ++ prtSep "\n " ["("++prtSep " " [prt sym | (_,sym) <- assocs syms]++")" | (_,syms) <- assocs lins]++"]"
|
||||
prtList = prtSep "\n"
|
||||
|
||||
@@ -21,12 +21,12 @@ import GF.Infra.PrintClass
|
||||
----------------------------------------------------------------------
|
||||
-- parsing
|
||||
|
||||
parseFCF :: (Print c, Ord c, Ord n, Print t, Ord t) => String -> Err (FCFParser c n t)
|
||||
parseFCF :: String -> Err (FCFParser)
|
||||
parseFCF prs | prs `elem` strategies = Ok $ parseFCF' prs
|
||||
| otherwise = Bad $ "FCFG parsing strategy not defined: " ++ prs
|
||||
|
||||
strategies = words "bottomup topdown"
|
||||
|
||||
parseFCF' :: (Print c, Ord c, Ord n, Print t, Ord t) => String -> FCFParser c n t
|
||||
parseFCF' :: String -> FCFParser
|
||||
parseFCF' "bottomup" pinfo starts toks = Active.parse "b" pinfo starts toks
|
||||
parseFCF' "topdown" pinfo starts toks = Active.parse "t" pinfo starts toks
|
||||
|
||||
@@ -32,7 +32,7 @@ import Data.Array
|
||||
----------------------------------------------------------------------
|
||||
-- * parsing
|
||||
|
||||
parse :: (Print c, Ord c, Ord n, Print t, Ord t) => String -> FCFParser c n t
|
||||
parse :: String -> FCFParser
|
||||
parse strategy pinfo starts toks = xchart2syntaxchart chart pinfo
|
||||
where chart = process strategy pinfo toks axioms emptyXChart
|
||||
axioms | isBU strategy = literals pinfo toks ++ initialBU pinfo toks
|
||||
@@ -42,7 +42,7 @@ isBU s = s=="b"
|
||||
isTD s = s=="t"
|
||||
|
||||
-- used in prediction
|
||||
emptyChildren :: RuleId -> FCFPInfo c n t -> SyntaxNode RuleId RangeRec
|
||||
emptyChildren :: RuleId -> FCFPInfo -> SyntaxNode RuleId RangeRec
|
||||
emptyChildren ruleid pinfo = SNode ruleid (replicate (length rhs) [])
|
||||
where
|
||||
FRule _ rhs _ _ = allRules pinfo ! ruleid
|
||||
@@ -57,7 +57,7 @@ updateChildren (SNode ruleid recs) i rec = do
|
||||
makeMaxRange (Range _ j) = Range j j
|
||||
makeMaxRange EmptyRange = EmptyRange
|
||||
|
||||
process :: (Print c, Ord c, Ord n, Print t, Ord t) => String -> FCFPInfo c n t -> Input t -> [(c,Item)] -> XChart c -> XChart c
|
||||
process :: String -> FCFPInfo -> Input FToken -> [(FCat,Item)] -> XChart FCat -> XChart FCat
|
||||
process strategy pinfo toks [] chart = chart
|
||||
process strategy pinfo toks ((c,item):items) chart = process strategy pinfo toks items $! univRule c item chart
|
||||
where
|
||||
@@ -110,7 +110,7 @@ process strategy pinfo toks ((c,item):items) chart = process strategy pinfo toks
|
||||
data Item
|
||||
= Active RangeRec
|
||||
Range
|
||||
{-# UNPACK #-} !FLabel
|
||||
{-# UNPACK #-} !FIndex
|
||||
{-# UNPACK #-} !FPointPos
|
||||
(SyntaxNode RuleId RangeRec)
|
||||
| Final RangeRec (SyntaxNode RuleId RangeRec)
|
||||
@@ -134,7 +134,7 @@ insertXChart (XChart actives finals) item@(Final _ _) c =
|
||||
lookupXChartAct (XChart actives finals) c = chartLookup actives c
|
||||
lookupXChartFinal (XChart actives finals) c = chartLookup finals c
|
||||
|
||||
xchart2syntaxchart :: (Ord c, Ord n, Ord t) => XChart c -> FCFPInfo c n t -> SyntaxChart n (c,RangeRec)
|
||||
xchart2syntaxchart :: XChart FCat -> FCFPInfo -> SyntaxChart FName (FCat,RangeRec)
|
||||
xchart2syntaxchart (XChart actives finals) pinfo =
|
||||
accumAssoc groupSyntaxNodes $
|
||||
[ case node of
|
||||
@@ -146,7 +146,7 @@ xchart2syntaxchart (XChart actives finals) pinfo =
|
||||
| (cat, Final found node) <- chartAssocs finals
|
||||
]
|
||||
|
||||
literals :: (Ord c, Ord n, Ord t) => FCFPInfo c n t -> Input t -> [(c,Item)]
|
||||
literals :: FCFPInfo -> Input FToken -> [(FCat,Item)]
|
||||
literals pinfo toks =
|
||||
[let (c,node) = grammarLexer pinfo t in (c,Final [makeRange i j] node) | Edge i j t <- inputEdges toks, not (t `elem` grammarToks pinfo)]
|
||||
|
||||
@@ -154,7 +154,7 @@ literals pinfo toks =
|
||||
-- Earley --
|
||||
|
||||
-- called with all starting categories
|
||||
initialTD :: (Ord c, Ord n, Ord t) => FCFPInfo c n t -> [c] -> Input t -> [(c,Item)]
|
||||
initialTD :: FCFPInfo -> [FCat] -> Input FToken -> [(FCat,Item)]
|
||||
initialTD pinfo starts toks =
|
||||
do cat <- starts
|
||||
ruleid <- topdownRules pinfo ? cat
|
||||
@@ -164,7 +164,7 @@ initialTD pinfo starts toks =
|
||||
----------------------------------------------------------------------
|
||||
-- Kilbury --
|
||||
|
||||
initialBU :: (Ord c, Ord n, Ord t) => FCFPInfo c n t -> Input t -> [(c,Item)]
|
||||
initialBU :: FCFPInfo -> Input FToken -> [(FCat,Item)]
|
||||
initialBU pinfo toks =
|
||||
do tok <- aElems (inputToken toks)
|
||||
ruleid <- leftcornerTokens pinfo ? tok ++
|
||||
|
||||
@@ -23,10 +23,10 @@ import Data.Maybe
|
||||
-- type declarations
|
||||
|
||||
-- | the list of categories = possible starting categories
|
||||
type FCFParser c n t = FCFPInfo c n t
|
||||
-> [c]
|
||||
-> Input t
|
||||
-> SyntaxChart n (c,RangeRec)
|
||||
type FCFParser = FCFPInfo
|
||||
-> [FCat]
|
||||
-> Input FToken
|
||||
-> SyntaxChart FName (FCat,RangeRec)
|
||||
|
||||
makeFinalEdge cat 0 0 = (cat, [EmptyRange])
|
||||
makeFinalEdge cat i j = (cat, [makeRange i j])
|
||||
@@ -36,19 +36,19 @@ makeFinalEdge cat i j = (cat, [makeRange i j])
|
||||
|
||||
type RuleId = Int
|
||||
|
||||
data FCFPInfo c n t
|
||||
= FCFPInfo { allRules :: Array RuleId (FCFRule c n t)
|
||||
, topdownRules :: Assoc c (SList RuleId)
|
||||
data FCFPInfo
|
||||
= FCFPInfo { allRules :: Array RuleId FRule
|
||||
, topdownRules :: Assoc FCat (SList RuleId)
|
||||
-- ^ used in 'GF.Parsing.MCFG.Active' (Earley):
|
||||
-- , emptyRules :: [RuleId]
|
||||
-- , emptyRules :: [RuleId]
|
||||
, epsilonRules :: [RuleId]
|
||||
-- ^ used in 'GF.Parsing.MCFG.Active' (Kilbury):
|
||||
, leftcornerCats :: Assoc c (SList RuleId)
|
||||
, leftcornerTokens :: Assoc t (SList RuleId)
|
||||
, leftcornerCats :: Assoc FCat (SList RuleId)
|
||||
, leftcornerTokens :: Assoc FToken (SList RuleId)
|
||||
-- ^ used in 'GF.Parsing.MCFG.Active' (Kilbury):
|
||||
, grammarCats :: SList c
|
||||
, grammarToks :: SList t
|
||||
, grammarLexer :: t -> (c,SyntaxNode RuleId RangeRec)
|
||||
, grammarCats :: SList FCat
|
||||
, grammarToks :: SList FToken
|
||||
, grammarLexer :: FToken -> (FCat,SyntaxNode RuleId RangeRec)
|
||||
}
|
||||
|
||||
|
||||
@@ -68,7 +68,7 @@ getLeftCornerCat lins
|
||||
where
|
||||
syms = lins ! 0
|
||||
|
||||
buildFCFPInfo :: (Ord c, Ord n, Ord t) => (t -> (c,SyntaxNode RuleId RangeRec)) -> FCFGrammar c n t -> FCFPInfo c n t
|
||||
buildFCFPInfo :: (FToken -> (FCat,SyntaxNode RuleId RangeRec)) -> FGrammar -> FCFPInfo
|
||||
buildFCFPInfo lexer grammar =
|
||||
FCFPInfo { allRules = allrules
|
||||
, topdownRules = topdownrules
|
||||
@@ -98,7 +98,7 @@ buildFCFPInfo lexer grammar =
|
||||
----------------------------------------------------------------------
|
||||
-- pretty-printing of statistics
|
||||
|
||||
instance (Ord c, Ord n, Ord t) => Print (FCFPInfo c n t) where
|
||||
instance Print FCFPInfo where
|
||||
prt pI = "[ allRules=" ++ sl (elems . allRules) ++
|
||||
"; tdRules=" ++ sla topdownRules ++
|
||||
-- "; emptyRules=" ++ sl emptyRules ++
|
||||
|
||||
@@ -32,11 +32,11 @@ import GF.Data.SortedList
|
||||
import GF.Data.Assoc
|
||||
import GF.Formalism.Utilities
|
||||
import GF.Conversion.Types
|
||||
import GF.Conversion.FTypes
|
||||
|
||||
import qualified GF.Formalism.GCFG as G
|
||||
import qualified GF.Formalism.SimpleGFC as S
|
||||
import qualified GF.Formalism.MCFG as M
|
||||
import GF.Formalism.FCFG
|
||||
import qualified GF.Formalism.CFG as C
|
||||
import qualified GF.Parsing.MCFG as PM
|
||||
import qualified GF.Parsing.FCFG as PF
|
||||
@@ -46,12 +46,11 @@ import qualified GF.Parsing.CFG as PC
|
||||
-- parsing information
|
||||
|
||||
data PInfo = PInfo { mcfPInfo :: MCFPInfo
|
||||
, fcfPInfo :: FCFPInfo
|
||||
, fcfPInfo :: PF.FCFPInfo
|
||||
, cfPInfo :: CFPInfo
|
||||
}
|
||||
|
||||
type MCFPInfo = PM.MCFPInfo MCat Name MLabel Token
|
||||
type FCFPInfo = PF.FCFPInfo FCat FName Token
|
||||
type CFPInfo = PC.CFPInfo CCat Name Token
|
||||
|
||||
buildPInfo :: MGrammar -> FGrammar -> CGrammar -> PInfo
|
||||
|
||||
Reference in New Issue
Block a user