forked from GitHub/gf-core
remove the old parsing code and the -erasing=on flag
This commit is contained in:
@@ -74,8 +74,8 @@ module PGF(
|
||||
|
||||
-- ** Word Completion (Incremental Parsing)
|
||||
complete,
|
||||
Incremental.ParseState,
|
||||
Incremental.initState, Incremental.nextState, Incremental.getCompletions, Incremental.recoveryStates, Incremental.extractTrees,
|
||||
Parse.ParseState,
|
||||
Parse.initState, Parse.nextState, Parse.getCompletions, Parse.recoveryStates, Parse.extractTrees,
|
||||
|
||||
-- ** Generation
|
||||
generateRandom, generateAll, generateAllDepth,
|
||||
@@ -105,8 +105,7 @@ import PGF.Expr (Tree)
|
||||
import PGF.Morphology
|
||||
import PGF.Data hiding (functions)
|
||||
import PGF.Binary
|
||||
import qualified PGF.Parsing.FCFG.Active as Active
|
||||
import qualified PGF.Parsing.FCFG.Incremental as Incremental
|
||||
import qualified PGF.Parse as Parse
|
||||
import qualified GF.Compile.GeneratePMCFG as PMCFG
|
||||
|
||||
import GF.Infra.Option
|
||||
@@ -249,13 +248,11 @@ linearize pgf lang = concat . take 1 . PGF.Linearize.linearizes pgf lang
|
||||
parse pgf lang typ s =
|
||||
case Map.lookup lang (concretes pgf) of
|
||||
Just cnc -> case parser cnc of
|
||||
Just pinfo -> if Map.lookup (mkCId "erasing") (cflags cnc) == Just "on"
|
||||
then Incremental.parse pgf lang typ (words s)
|
||||
else Active.parse "t" pinfo typ (words s)
|
||||
Just pinfo -> Parse.parse pgf lang typ (words s)
|
||||
Nothing -> error ("No parser built for language: " ++ showCId lang)
|
||||
Nothing -> error ("Unknown language: " ++ showCId lang)
|
||||
|
||||
parseWithRecovery pgf lang typ open_typs s = Incremental.parseWithRecovery pgf lang typ open_typs (words s)
|
||||
parseWithRecovery pgf lang typ open_typs s = Parse.parseWithRecovery pgf lang typ open_typs (words s)
|
||||
|
||||
canParse pgf cnc = isJust (lookParser pgf cnc)
|
||||
|
||||
@@ -297,12 +294,12 @@ functionType pgf fun =
|
||||
|
||||
complete pgf from typ input =
|
||||
let (ws,prefix) = tokensAndPrefix input
|
||||
state0 = Incremental.initState pgf from typ
|
||||
state0 = Parse.initState pgf from typ
|
||||
in case loop state0 ws of
|
||||
Nothing -> []
|
||||
Just state ->
|
||||
(if null prefix && not (null (Incremental.extractTrees state typ)) then [unwords ws ++ " "] else [])
|
||||
++ [unwords (ws++[c]) ++ " " | c <- Map.keys (Incremental.getCompletions state prefix)]
|
||||
(if null prefix && not (null (Parse.extractTrees state typ)) then [unwords ws ++ " "] else [])
|
||||
++ [unwords (ws++[c]) ++ " " | c <- Map.keys (Parse.getCompletions state prefix)]
|
||||
where
|
||||
tokensAndPrefix :: String -> ([String],String)
|
||||
tokensAndPrefix s | not (null s) && isSpace (last s) = (ws, "")
|
||||
@@ -311,7 +308,7 @@ complete pgf from typ input =
|
||||
where ws = words s
|
||||
|
||||
loop ps [] = Just ps
|
||||
loop ps (t:ts) = case Incremental.nextState ps t of
|
||||
loop ps (t:ts) = case Parse.nextState ps t of
|
||||
Left es -> Nothing
|
||||
Right ps -> loop ps ts
|
||||
|
||||
|
||||
@@ -159,8 +159,8 @@ instance Binary BindType where
|
||||
_ -> decodingError
|
||||
|
||||
instance Binary FFun where
|
||||
put (FFun fun prof lins) = put (fun,prof,lins)
|
||||
get = liftM3 FFun get get get
|
||||
put (FFun fun lins) = put (fun,lins)
|
||||
get = liftM2 FFun get get
|
||||
|
||||
instance Binary FSymbol where
|
||||
put (FSymCat n l) = putWord8 0 >> put (n,l)
|
||||
|
||||
@@ -1,76 +0,0 @@
|
||||
---------------------------------------------------------------------
|
||||
-- |
|
||||
-- Maintainer : Krasimir Angelov
|
||||
-- Stability : (stable)
|
||||
-- Portability : (portable)
|
||||
--
|
||||
-- FCFG parsing, parser information
|
||||
-----------------------------------------------------------------------------
|
||||
|
||||
module PGF.BuildParser where
|
||||
|
||||
import GF.Data.SortedList
|
||||
import GF.Data.Assoc
|
||||
import PGF.CId
|
||||
import PGF.Data
|
||||
import PGF.Parsing.FCFG.Utilities
|
||||
|
||||
import Data.Array.IArray
|
||||
import Data.Maybe
|
||||
import qualified Data.IntMap as IntMap
|
||||
import qualified Data.Map as Map
|
||||
import qualified Data.Set as Set
|
||||
import Debug.Trace
|
||||
|
||||
|
||||
data ParserInfoEx
|
||||
= ParserInfoEx { epsilonRules :: [(FunId,[FCat],FCat)]
|
||||
, leftcornerCats :: Assoc FCat [(FunId,[FCat],FCat)]
|
||||
, leftcornerTokens :: Assoc String [(FunId,[FCat],FCat)]
|
||||
, grammarToks :: [String]
|
||||
}
|
||||
|
||||
------------------------------------------------------------
|
||||
-- parser information
|
||||
|
||||
getLeftCornerTok pinfo (FFun _ _ lins)
|
||||
| inRange (bounds syms) 0 = case syms ! 0 of
|
||||
FSymKS [tok] -> [tok]
|
||||
_ -> []
|
||||
| otherwise = []
|
||||
where
|
||||
syms = (sequences pinfo) ! (lins ! 0)
|
||||
|
||||
getLeftCornerCat pinfo args (FFun _ _ lins)
|
||||
| inRange (bounds syms) 0 = case syms ! 0 of
|
||||
FSymCat d _ -> let cat = args !! d
|
||||
in case IntMap.lookup cat (productions pinfo) of
|
||||
Just set -> cat : [cat' | FCoerce cat' <- Set.toList set]
|
||||
Nothing -> [cat]
|
||||
_ -> []
|
||||
| otherwise = []
|
||||
where
|
||||
syms = (sequences pinfo) ! (lins ! 0)
|
||||
|
||||
buildParserInfo :: ParserInfo -> ParserInfoEx
|
||||
buildParserInfo pinfo =
|
||||
ParserInfoEx { epsilonRules = epsilonrules
|
||||
, leftcornerCats = leftcorncats
|
||||
, leftcornerTokens = leftcorntoks
|
||||
, grammarToks = grammartoks
|
||||
}
|
||||
|
||||
where epsilonrules = [ (ruleid,args,cat)
|
||||
| (cat,set) <- IntMap.toList (productions pinfo)
|
||||
, (FApply ruleid args) <- Set.toList set
|
||||
, let (FFun _ _ lins) = (functions pinfo) ! ruleid
|
||||
, not (inRange (bounds ((sequences pinfo) ! (lins ! 0))) 0) ]
|
||||
leftcorncats = accumAssoc id [ (cat', (ruleid, args, cat))
|
||||
| (cat,set) <- IntMap.toList (productions pinfo)
|
||||
, (FApply ruleid args) <- Set.toList set
|
||||
, cat' <- getLeftCornerCat pinfo args ((functions pinfo) ! ruleid) ]
|
||||
leftcorntoks = accumAssoc id [ (tok, (ruleid, args, cat))
|
||||
| (cat,set) <- IntMap.toList (productions pinfo)
|
||||
, (FApply ruleid args) <- Set.toList set
|
||||
, tok <- getLeftCornerTok pinfo ((functions pinfo) ! ruleid) ]
|
||||
grammartoks = nubsort [t | lin <- elems (sequences pinfo), FSymKS [t] <- elems lin]
|
||||
@@ -19,13 +19,12 @@ data FSymbol
|
||||
| FSymKS [String]
|
||||
| FSymKP [String] [Alternative]
|
||||
deriving (Eq,Ord,Show)
|
||||
type Profile = [Int]
|
||||
data Production
|
||||
= FApply {-# UNPACK #-} !FunId [FCat]
|
||||
| FCoerce {-# UNPACK #-} !FCat
|
||||
| FConst Expr [String]
|
||||
deriving (Eq,Ord,Show)
|
||||
data FFun = FFun CId [Profile] {-# UNPACK #-} !(UArray FIndex SeqId) deriving (Eq,Ord,Show)
|
||||
data FFun = FFun CId {-# UNPACK #-} !(UArray FIndex SeqId) deriving (Eq,Ord,Show)
|
||||
type FSeq = Array FPointPos FSymbol
|
||||
type FunId = Int
|
||||
type SeqId = Int
|
||||
@@ -39,7 +38,7 @@ data ParserInfo
|
||||
, sequences :: Array SeqId FSeq
|
||||
, productions0:: IntMap.IntMap (Set.Set Production) -- this are the original productions as they are loaded from the PGF file
|
||||
, productions :: IntMap.IntMap (Set.Set Production) -- this are the productions after the filtering for useless productions
|
||||
, startCats :: Map.Map CId [FCat]
|
||||
, startCats :: Map.Map CId (FCat,FCat)
|
||||
, totalCats :: {-# UNPACK #-} !FCat
|
||||
}
|
||||
|
||||
@@ -71,14 +70,14 @@ ppProduction (fcat,FCoerce arg) =
|
||||
ppProduction (fcat,FConst _ ss) =
|
||||
ppFCat fcat <+> text "->" <+> ppStrs ss
|
||||
|
||||
ppFun (funid,FFun fun _ arr) =
|
||||
ppFun (funid,FFun fun arr) =
|
||||
ppFunId funid <+> text ":=" <+> parens (hcat (punctuate comma (map ppSeqId (elems arr)))) <+> brackets (ppCId fun)
|
||||
|
||||
ppSeq (seqid,seq) =
|
||||
ppSeqId seqid <+> text ":=" <+> hsep (map ppSymbol (elems seq))
|
||||
|
||||
ppStartCat (id,fcats) =
|
||||
ppCId id <+> text ":=" <+> brackets (hcat (punctuate comma (map ppFCat fcats)))
|
||||
ppStartCat (id,(start,end)) =
|
||||
ppCId id <+> text ":=" <+> brackets (ppFCat start <+> text ".." <+> ppFCat end)
|
||||
|
||||
ppSymbol (FSymCat d r) = char '<' <> int d <> comma <> int r <> char '>'
|
||||
ppSymbol (FSymLit d r) = char '<' <> int d <> comma <> int r <> char '>'
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
{-# LANGUAGE BangPatterns #-}
|
||||
module PGF.Parsing.FCFG.Incremental
|
||||
module PGF.Parse
|
||||
( ParseState
|
||||
, ErrorState
|
||||
, initState
|
||||
@@ -57,10 +57,10 @@ parseWithRecovery pgf lang typ open_typs toks = accept (initState pgf lang typ)
|
||||
initState :: PGF -> Language -> Type -> ParseState
|
||||
initState pgf lang (DTyp _ start _) =
|
||||
let items = do
|
||||
cat <- fromMaybe [] (Map.lookup start (startCats pinfo))
|
||||
cat <- maybe [] range (Map.lookup start (startCats pinfo))
|
||||
(funid,args) <- foldForest (\funid args -> (:) (funid,args)) (\_ _ args -> args)
|
||||
[] cat (productions pinfo)
|
||||
let FFun fn _ lins = functions pinfo ! funid
|
||||
let FFun fn lins = functions pinfo ! funid
|
||||
(lbl,seqid) <- assocs lins
|
||||
return (Active 0 0 funid seqid args (AK cat lbl))
|
||||
|
||||
@@ -131,7 +131,7 @@ recoveryStates open_types (EState pgf pinfo chart) =
|
||||
}
|
||||
in (PState pgf pinfo chart (TMap.singleton [] (Set.fromList agenda)), fmap (PState pgf pinfo chart2) acc)
|
||||
where
|
||||
type2fcats (DTyp _ cat _) = fromMaybe [] (Map.lookup cat (startCats pinfo))
|
||||
type2fcats (DTyp _ cat _) = maybe [] range (Map.lookup cat (startCats pinfo))
|
||||
|
||||
complete open_fcats items ac =
|
||||
foldl (Set.fold (\(Active j' ppos funid seqid args keyc) ->
|
||||
@@ -154,10 +154,10 @@ extractTrees (PState pgf pinfo chart items) ty@(DTyp _ start _) =
|
||||
(_,st) = process Nothing (\_ _ -> id) (sequences pinfo) (functions pinfo) agenda () chart
|
||||
|
||||
exps = do
|
||||
cat <- fromMaybe [] (Map.lookup start (startCats pinfo))
|
||||
cat <- maybe [] range (Map.lookup start (startCats pinfo))
|
||||
(funid,args) <- foldForest (\funid args -> (:) (funid,args)) (\_ _ args -> args)
|
||||
[] cat (productions pinfo)
|
||||
let FFun fn _ lins = functions pinfo ! funid
|
||||
let FFun fn lins = functions pinfo ! funid
|
||||
lbl <- indices lins
|
||||
Just fid <- [lookupPC (PK cat lbl 0) (passive st)]
|
||||
(fvs,tree) <- go Set.empty 0 (0,fid)
|
||||
@@ -168,7 +168,7 @@ extractTrees (PState pgf pinfo chart items) ty@(DTyp _ start _) =
|
||||
| fcat < totalCats pinfo = return (Set.empty,EMeta (fcat'*10+d)) -- FIXME: here we assume that every rule has at most 10 arguments
|
||||
| Set.member fcat rec = mzero
|
||||
| otherwise = foldForest (\funid args trees ->
|
||||
do let FFun fn _ lins = functions pinfo ! funid
|
||||
do let FFun fn lins = functions pinfo ! funid
|
||||
args <- mapM (go (Set.insert fcat rec) fcat) (zip [0..] args)
|
||||
check_ho_fun fn args
|
||||
`mplus`
|
||||
@@ -250,7 +250,7 @@ process mbt fn !seqs !funs (item@(Active j ppos funid seqid args key0):items) ac
|
||||
|
||||
rhs funid lbl = unsafeAt lins lbl
|
||||
where
|
||||
FFun _ _ lins = unsafeAt funs funid
|
||||
FFun _ lins = unsafeAt funs funid
|
||||
|
||||
|
||||
updateAt :: Int -> a -> [a] -> [a]
|
||||
@@ -1,205 +0,0 @@
|
||||
----------------------------------------------------------------------
|
||||
-- |
|
||||
-- Maintainer : Krasimir Angelov
|
||||
-- Stability : (stable)
|
||||
-- Portability : (portable)
|
||||
--
|
||||
-- MCFG parsing, the active algorithm
|
||||
-----------------------------------------------------------------------------
|
||||
|
||||
module PGF.Parsing.FCFG.Active (parse) where
|
||||
|
||||
import GF.Data.Assoc
|
||||
import GF.Data.SortedList
|
||||
import GF.Data.Utilities
|
||||
import qualified GF.Data.MultiMap as MM
|
||||
|
||||
import PGF.CId
|
||||
import PGF.Data
|
||||
import PGF.Tree
|
||||
import PGF.Parsing.FCFG.Utilities
|
||||
import PGF.BuildParser
|
||||
|
||||
import Control.Monad (guard)
|
||||
|
||||
import qualified Data.List as List
|
||||
import qualified Data.Map as Map
|
||||
import qualified Data.IntMap as IntMap
|
||||
import qualified Data.Set as Set
|
||||
import Data.Array.IArray
|
||||
import Debug.Trace
|
||||
|
||||
----------------------------------------------------------------------
|
||||
-- * parsing
|
||||
|
||||
type FToken = String
|
||||
|
||||
makeFinalEdge cat 0 0 = (cat, [EmptyRange])
|
||||
makeFinalEdge cat i j = (cat, [makeRange i j])
|
||||
|
||||
-- | the list of categories = possible starting categories
|
||||
parse :: String -> ParserInfo -> Type -> [FToken] -> [Expr]
|
||||
parse strategy pinfo (DTyp _ start _) toks = map (tree2expr) . nubsort $ filteredForests >>= forest2trees
|
||||
where
|
||||
inTokens = input toks
|
||||
starts = Map.findWithDefault [] start (startCats pinfo)
|
||||
schart = xchart2syntaxchart chart pinfo
|
||||
(i,j) = inputBounds inTokens
|
||||
finalEdges = [makeFinalEdge cat i j | cat <- starts]
|
||||
forests = chart2forests schart (const False) finalEdges
|
||||
filteredForests = forests >>= applyProfileToForest
|
||||
|
||||
pinfoex = buildParserInfo pinfo
|
||||
|
||||
chart = process strategy pinfo pinfoex inTokens axioms emptyXChart
|
||||
axioms | isBU strategy = literals pinfoex inTokens ++ initialBU pinfo pinfoex inTokens
|
||||
| isTD strategy = literals pinfoex inTokens ++ initialTD pinfo starts inTokens
|
||||
|
||||
isBU s = s=="b"
|
||||
isTD s = s=="t"
|
||||
|
||||
-- used in prediction
|
||||
emptyChildren :: FunId -> [FCat] -> SyntaxNode FunId RangeRec
|
||||
emptyChildren ruleid args = SNode ruleid (replicate (length args) [])
|
||||
|
||||
|
||||
process :: String -> ParserInfo -> ParserInfoEx -> Input FToken -> [Item] -> XChart FCat -> XChart FCat
|
||||
process strategy pinfo pinfoex toks [] chart = chart
|
||||
process strategy pinfo pinfoex toks (item:items) chart = process strategy pinfo pinfoex toks items $! univRule item chart
|
||||
where
|
||||
univRule item@(Active found rng lbl ppos node@(SNode ruleid recs) args cat) chart
|
||||
| inRange (bounds lin) ppos =
|
||||
case lin ! ppos of
|
||||
FSymCat d r -> let c = args !! d
|
||||
in case recs !! d of
|
||||
[] -> case insertXChart chart item c of
|
||||
Nothing -> chart
|
||||
Just chart -> let items = do item@(Final found' _ _ _) <- lookupXChartFinal chart c
|
||||
rng <- concatRange rng (found' !! r)
|
||||
return (Active found rng lbl (ppos+1) (SNode ruleid (updateNth (const found') d recs)) args cat)
|
||||
++
|
||||
do guard (isTD strategy)
|
||||
(ruleid,args) <- topdownRules pinfo c
|
||||
return (Active [] EmptyRange 0 0 (emptyChildren ruleid args) args c)
|
||||
in process strategy pinfo pinfoex toks items chart
|
||||
found' -> let items = do rng <- concatRange rng (found' !! r)
|
||||
return (Active found rng lbl (ppos+1) node args cat)
|
||||
in process strategy pinfo pinfoex toks items chart
|
||||
FSymKS [tok]
|
||||
-> let items = do t_rng <- inputToken toks ? tok
|
||||
rng' <- concatRange rng t_rng
|
||||
return (Active found rng' lbl (ppos+1) node args cat)
|
||||
in process strategy pinfo pinfoex toks items chart
|
||||
| otherwise =
|
||||
if inRange (bounds lins) (lbl+1)
|
||||
then univRule (Active (rng:found) EmptyRange (lbl+1) 0 node args cat) chart
|
||||
else univRule (Final (reverse (rng:found)) node args cat) chart
|
||||
where
|
||||
(FFun _ _ lins) = functions pinfo ! ruleid
|
||||
lin = sequences pinfo ! (lins ! lbl)
|
||||
univRule item@(Final found' node args cat) chart =
|
||||
case insertXChart chart item cat of
|
||||
Nothing -> chart
|
||||
Just chart -> let items = do (Active found rng l ppos node@(SNode ruleid _) args c) <- lookupXChartAct chart cat
|
||||
let FFun _ _ lins = functions pinfo ! ruleid
|
||||
FSymCat d r = (sequences pinfo ! (lins ! l)) ! ppos
|
||||
rng <- concatRange rng (found' !! r)
|
||||
return (Active found rng l (ppos+1) (updateChildren node d found') args c)
|
||||
++
|
||||
do guard (isBU strategy)
|
||||
(ruleid,args,c) <- leftcornerCats pinfoex ? cat
|
||||
let FFun _ _ lins = functions pinfo ! ruleid
|
||||
FSymCat d r = (sequences pinfo ! (lins ! 0)) ! 0
|
||||
return (Active [] (found' !! r) 0 1 (updateChildren (emptyChildren ruleid args) d found') args c)
|
||||
|
||||
updateChildren :: SyntaxNode FunId RangeRec -> Int -> RangeRec -> SyntaxNode FunId RangeRec
|
||||
updateChildren (SNode ruleid recs) i rec = SNode ruleid $! updateNth (const rec) i recs
|
||||
in process strategy pinfo pinfoex toks items chart
|
||||
|
||||
----------------------------------------------------------------------
|
||||
-- * XChart
|
||||
|
||||
data Item
|
||||
= Active RangeRec
|
||||
Range
|
||||
{-# UNPACK #-} !FIndex
|
||||
{-# UNPACK #-} !FPointPos
|
||||
(SyntaxNode FunId RangeRec)
|
||||
[FCat]
|
||||
FCat
|
||||
| Final RangeRec (SyntaxNode FunId RangeRec) [FCat] FCat
|
||||
deriving (Eq, Ord, Show)
|
||||
|
||||
data XChart c = XChart !(MM.MultiMap c Item) !(MM.MultiMap c Item)
|
||||
|
||||
emptyXChart :: Ord c => XChart c
|
||||
emptyXChart = XChart MM.empty MM.empty
|
||||
|
||||
insertXChart (XChart actives finals) item@(Active _ _ _ _ _ _ _) c =
|
||||
case MM.insert' c item actives of
|
||||
Nothing -> Nothing
|
||||
Just actives -> Just (XChart actives finals)
|
||||
|
||||
insertXChart (XChart actives finals) item@(Final _ _ _ _) c =
|
||||
case MM.insert' c item finals of
|
||||
Nothing -> Nothing
|
||||
Just finals -> Just (XChart actives finals)
|
||||
|
||||
lookupXChartAct (XChart actives finals) c = actives MM.! c
|
||||
lookupXChartFinal (XChart actives finals) c = finals MM.! c
|
||||
|
||||
xchart2syntaxchart :: XChart FCat -> ParserInfo -> SyntaxChart (CId,[Profile]) (FCat,RangeRec)
|
||||
xchart2syntaxchart (XChart actives finals) pinfo =
|
||||
accumAssoc groupSyntaxNodes $
|
||||
[ case node of
|
||||
SNode ruleid rrecs -> let FFun fun prof _ = functions pinfo ! ruleid
|
||||
in ((cat,found), SNode (fun,prof) (zip rhs rrecs))
|
||||
SString s -> ((cat,found), SString s)
|
||||
SInt n -> ((cat,found), SInt n)
|
||||
SFloat f -> ((cat,found), SFloat f)
|
||||
| (Final found node rhs cat) <- MM.elems finals
|
||||
]
|
||||
|
||||
literals :: ParserInfoEx -> Input FToken -> [Item]
|
||||
literals pinfoex toks =
|
||||
[let (c,node) = lexer t in (Final [rng] node [] c) | (t,rngs) <- aAssocs (inputToken toks), rng <- rngs, not (t `elem` grammarToks pinfoex)]
|
||||
where
|
||||
lexer t =
|
||||
case reads t of
|
||||
[(n,"")] -> (fcatInt, SInt (n::Integer))
|
||||
_ -> case reads t of
|
||||
[(f,"")] -> (fcatFloat, SFloat (f::Double))
|
||||
_ -> (fcatString,SString t)
|
||||
|
||||
|
||||
----------------------------------------------------------------------
|
||||
-- Earley --
|
||||
|
||||
-- called with all starting categories
|
||||
initialTD :: ParserInfo -> [FCat] -> Input FToken -> [Item]
|
||||
initialTD pinfo starts toks =
|
||||
do cat <- starts
|
||||
(ruleid,args) <- topdownRules pinfo cat
|
||||
return (Active [] (Range 0 0) 0 0 (emptyChildren ruleid args) args cat)
|
||||
|
||||
topdownRules pinfo cat = f cat []
|
||||
where
|
||||
f cat rules = maybe rules (Set.fold g rules) (IntMap.lookup cat (productions pinfo))
|
||||
|
||||
g (FApply ruleid args) rules = (ruleid,args) : rules
|
||||
g (FCoerce cat) rules = f cat rules
|
||||
|
||||
|
||||
----------------------------------------------------------------------
|
||||
-- Kilbury --
|
||||
|
||||
initialBU :: ParserInfo -> ParserInfoEx -> Input FToken -> [Item]
|
||||
initialBU pinfo pinfoex toks =
|
||||
do (tok,rngs) <- aAssocs (inputToken toks)
|
||||
(ruleid,args,cat) <- leftcornerTokens pinfoex ? tok
|
||||
rng <- rngs
|
||||
return (Active [] rng 0 1 (emptyChildren ruleid args) args cat)
|
||||
++
|
||||
do (ruleid,args,cat) <- epsilonRules pinfoex
|
||||
let FFun _ _ _ = functions pinfo ! ruleid
|
||||
return (Active [] EmptyRange 0 0 (emptyChildren ruleid args) args cat)
|
||||
@@ -1,188 +0,0 @@
|
||||
----------------------------------------------------------------------
|
||||
-- |
|
||||
-- Maintainer : PL
|
||||
-- Stability : (stable)
|
||||
-- Portability : (portable)
|
||||
--
|
||||
-- > CVS $Date: 2005/05/13 12:40:19 $
|
||||
-- > CVS $Author: peb $
|
||||
-- > CVS $Revision: 1.6 $
|
||||
--
|
||||
-- Basic type declarations and functions for grammar formalisms
|
||||
-----------------------------------------------------------------------------
|
||||
|
||||
|
||||
module PGF.Parsing.FCFG.Utilities where
|
||||
|
||||
import Control.Monad
|
||||
import Data.Array
|
||||
import Data.List (groupBy)
|
||||
|
||||
import PGF.CId
|
||||
import PGF.Data
|
||||
import PGF.Tree
|
||||
import GF.Data.Assoc
|
||||
import GF.Data.Utilities (sameLength, foldMerge, splitBy)
|
||||
|
||||
|
||||
------------------------------------------------------------
|
||||
-- ranges as single pairs
|
||||
|
||||
type RangeRec = [Range]
|
||||
|
||||
data Range = Range {-# UNPACK #-} !Int {-# UNPACK #-} !Int
|
||||
| EmptyRange
|
||||
deriving (Eq, Ord, Show)
|
||||
|
||||
makeRange :: Int -> Int -> Range
|
||||
makeRange = Range
|
||||
|
||||
concatRange :: Range -> Range -> [Range]
|
||||
concatRange EmptyRange rng = return rng
|
||||
concatRange rng EmptyRange = return rng
|
||||
concatRange (Range i j) (Range j' k) = [Range i k | j==j']
|
||||
|
||||
minRange :: Range -> Int
|
||||
minRange (Range i j) = i
|
||||
|
||||
maxRange :: Range -> Int
|
||||
maxRange (Range i j) = j
|
||||
|
||||
|
||||
------------------------------------------------------------
|
||||
-- * representaions of input tokens
|
||||
|
||||
data Input t = MkInput { inputBounds :: (Int, Int),
|
||||
inputToken :: Assoc t [Range]
|
||||
}
|
||||
|
||||
input :: Ord t => [t] -> Input t
|
||||
input toks = MkInput inBounds inToken
|
||||
where
|
||||
inBounds = (0, length toks)
|
||||
inToken = accumAssoc id [ (tok, makeRange i j) | (i,j,tok) <- zip3 [0..] [1..] toks ]
|
||||
|
||||
inputMany :: Ord t => [[t]] -> Input t
|
||||
inputMany toks = MkInput inBounds inToken
|
||||
where
|
||||
inBounds = (0, length toks)
|
||||
inToken = accumAssoc id [ (tok, makeRange i j) | (i,j,ts) <- zip3 [0..] [1..] toks, tok <- ts ]
|
||||
|
||||
|
||||
------------------------------------------------------------
|
||||
-- * representations of syntactical analyses
|
||||
|
||||
-- ** charts as finite maps over edges
|
||||
|
||||
-- | The values of the chart, a list of key-daughters pairs,
|
||||
-- has unique keys. In essence, it is a map from 'n' to daughters.
|
||||
-- The daughters should be a set (not necessarily sorted) of rhs's.
|
||||
type SyntaxChart n e = Assoc e [SyntaxNode n [e]]
|
||||
|
||||
data SyntaxNode n e = SMeta
|
||||
| SNode n [e]
|
||||
| SString String
|
||||
| SInt Integer
|
||||
| SFloat Double
|
||||
deriving (Eq,Ord,Show)
|
||||
|
||||
groupSyntaxNodes :: Ord n => [SyntaxNode n e] -> [SyntaxNode n [e]]
|
||||
groupSyntaxNodes [] = []
|
||||
groupSyntaxNodes (SNode n0 es0:xs) = (SNode n0 (es0:ess)) : groupSyntaxNodes xs'
|
||||
where
|
||||
(ess,xs') = span xs
|
||||
|
||||
span [] = ([],[])
|
||||
span xs@(SNode n es:xs')
|
||||
| n0 == n = let (ess,xs) = span xs' in (es:ess,xs)
|
||||
| otherwise = ([],xs)
|
||||
groupSyntaxNodes (SString s:xs) = (SString s) : groupSyntaxNodes xs
|
||||
groupSyntaxNodes (SInt n:xs) = (SInt n) : groupSyntaxNodes xs
|
||||
groupSyntaxNodes (SFloat f:xs) = (SFloat f) : groupSyntaxNodes xs
|
||||
|
||||
-- ** syntax forests
|
||||
|
||||
data SyntaxForest n = FMeta
|
||||
| FNode n [[SyntaxForest n]]
|
||||
-- ^ The outer list should be a set (not necessarily sorted)
|
||||
-- of possible alternatives. Ie. the outer list
|
||||
-- is a disjunctive node, and the inner lists
|
||||
-- are (conjunctive) concatenative nodes
|
||||
| FString String
|
||||
| FInt Integer
|
||||
| FFloat Double
|
||||
deriving (Eq, Ord, Show)
|
||||
|
||||
instance Functor SyntaxForest where
|
||||
fmap f (FNode n forests) = FNode (f n) $ map (map (fmap f)) forests
|
||||
fmap _ (FString s) = FString s
|
||||
fmap _ (FInt n) = FInt n
|
||||
fmap _ (FFloat f) = FFloat f
|
||||
fmap _ (FMeta) = FMeta
|
||||
|
||||
forestName :: SyntaxForest n -> Maybe n
|
||||
forestName (FNode n _) = Just n
|
||||
forestName _ = Nothing
|
||||
|
||||
unifyManyForests :: (Monad m, Eq n) => [SyntaxForest n] -> m (SyntaxForest n)
|
||||
unifyManyForests = foldM unifyForests FMeta
|
||||
|
||||
-- | two forests can be unified, if either is 'FMeta', or both have the same parent,
|
||||
-- and all children can be unified
|
||||
unifyForests :: (Monad m, Eq n) => SyntaxForest n -> SyntaxForest n -> m (SyntaxForest n)
|
||||
unifyForests FMeta forest = return forest
|
||||
unifyForests forest FMeta = return forest
|
||||
unifyForests (FNode name1 children1) (FNode name2 children2)
|
||||
| name1 == name2 && not (null children) = return $ FNode name1 children
|
||||
where children = [ forests | forests1 <- children1, forests2 <- children2,
|
||||
sameLength forests1 forests2,
|
||||
forests <- zipWithM unifyForests forests1 forests2 ]
|
||||
unifyForests (FString s1) (FString s2)
|
||||
| s1 == s2 = return $ FString s1
|
||||
unifyForests (FInt n1) (FInt n2)
|
||||
| n1 == n2 = return $ FInt n1
|
||||
unifyForests (FFloat f1) (FFloat f2)
|
||||
| f1 == f2 = return $ FFloat f1
|
||||
unifyForests _ _ = fail "forest unification failure"
|
||||
|
||||
|
||||
-- ** conversions between representations
|
||||
|
||||
chart2forests :: (Ord n, Ord e) =>
|
||||
SyntaxChart n e -- ^ The complete chart
|
||||
-> (e -> Bool) -- ^ When is an edge 'FMeta'?
|
||||
-> [e] -- ^ The starting edges
|
||||
-> [SyntaxForest n] -- ^ The result has unique keys, ie. all 'n' are joined together.
|
||||
-- In essence, the result is a map from 'n' to forest daughters
|
||||
chart2forests chart isMeta = concatMap (edge2forests [])
|
||||
where edge2forests edges edge
|
||||
| isMeta edge = [FMeta]
|
||||
| edge `elem` edges = []
|
||||
| otherwise = map (item2forest (edge:edges)) $ chart ? edge
|
||||
item2forest edges (SMeta) = FMeta
|
||||
item2forest edges (SNode name children) =
|
||||
FNode name $ children >>= mapM (edge2forests edges)
|
||||
item2forest edges (SString s) = FString s
|
||||
item2forest edges (SInt n) = FInt n
|
||||
item2forest edges (SFloat f) = FFloat f
|
||||
|
||||
|
||||
applyProfileToForest :: SyntaxForest (CId,[Profile]) -> [SyntaxForest CId]
|
||||
applyProfileToForest (FNode (fun,profiles) children)
|
||||
| fun == wildCId = concat chForests
|
||||
| otherwise = [ FNode fun chForests | not (null chForests) ]
|
||||
where chForests = concat [ mapM (unifyManyForests . map (forests !!)) profiles |
|
||||
forests0 <- children,
|
||||
forests <- mapM applyProfileToForest forests0 ]
|
||||
applyProfileToForest (FString s) = [FString s]
|
||||
applyProfileToForest (FInt n) = [FInt n]
|
||||
applyProfileToForest (FFloat f) = [FFloat f]
|
||||
applyProfileToForest (FMeta) = [FMeta]
|
||||
|
||||
|
||||
forest2trees :: SyntaxForest CId -> [Tree]
|
||||
forest2trees (FNode n forests) = map (Fun n) $ forests >>= mapM forest2trees
|
||||
forest2trees (FString s) = [Lit (LStr s)]
|
||||
forest2trees (FInt n) = [Lit (LInt n)]
|
||||
forest2trees (FFloat f) = [Lit (LFlt f)]
|
||||
forest2trees (FMeta) = [Meta 0]
|
||||
Reference in New Issue
Block a user