1
0
forked from GitHub/gf-core

GF/src is now for 2.9, and the new sources are in src-3.0 - keep it this way until the release of GF 3

This commit is contained in:
aarne
2008-05-21 09:26:44 +00:00
parent 915a1de717
commit 055c0d0d5a
536 changed files with 0 additions and 0 deletions

View File

@@ -0,0 +1,179 @@
----------------------------------------------------------------------
-- |
-- Maintainer : Krasimir Angelov
-- Stability : (stable)
-- Portability : (portable)
--
-- MCFG parsing, the active algorithm
-----------------------------------------------------------------------------
module GF.Parsing.FCFG.Active (parse) where
import GF.Data.GeneralDeduction
import GF.Data.Assoc
import GF.Data.SortedList
import GF.Data.Utilities
import GF.Formalism.FCFG
import GF.Formalism.Utilities
import GF.Infra.PrintClass
import GF.Parsing.FCFG.Range
import GF.Parsing.FCFG.PInfo
import Control.Monad (guard)
import qualified Data.List as List
import qualified Data.Map as Map
import qualified Data.Set as Set
import Data.Array
----------------------------------------------------------------------
-- * parsing
parse :: String -> FCFParser
parse strategy pinfo starts toks = xchart2syntaxchart chart pinfo
where chart = process strategy pinfo toks axioms emptyXChart
axioms | isBU strategy = literals pinfo toks ++ initialBU pinfo toks
| isTD strategy = literals pinfo toks ++ initialTD pinfo starts toks
isBU s = s=="b"
isTD s = s=="t"
-- used in prediction
emptyChildren :: RuleId -> FCFPInfo -> SyntaxNode RuleId RangeRec
emptyChildren ruleid pinfo = SNode ruleid (replicate (length rhs) [])
where
FRule _ rhs _ _ = allRules pinfo ! ruleid
process :: String -> FCFPInfo -> Input FToken -> [(FCat,Item)] -> XChart FCat -> XChart FCat
process strategy pinfo toks [] chart = chart
process strategy pinfo toks ((c,item):items) chart = process strategy pinfo toks items $! univRule c item chart
where
univRule cat item@(Active found rng lbl ppos node@(SNode ruleid recs)) chart
| inRange (bounds lin) ppos =
case lin ! ppos of
FSymCat c r d -> case recs !! d of
[] -> case insertXChart chart item c of
Nothing -> chart
Just chart -> let items = do item@(Final found' _) <- lookupXChartFinal chart c
rng <- concatRange rng (found' !! r)
return (c, Active found rng lbl (ppos+1) (SNode ruleid (updateNth (const found') d recs)))
++
do guard (isTD strategy)
ruleid <- topdownRules pinfo ? c
return (c, Active [] EmptyRange 0 0 (emptyChildren ruleid pinfo))
in process strategy pinfo toks items chart
found' -> let items = do rng <- concatRange rng (found' !! r)
return (c, Active found rng lbl (ppos+1) node)
in process strategy pinfo toks items chart
FSymTok tok -> let items = do (i,j) <- inputToken toks ? tok
rng' <- concatRange rng (makeRange i j)
return (cat, Active found rng' lbl (ppos+1) node)
in process strategy pinfo toks items chart
| otherwise =
if inRange (bounds lins) (lbl+1)
then univRule cat (Active (rng:found) EmptyRange (lbl+1) 0 node) chart
else univRule cat (Final (reverse (rng:found)) node) chart
where
(FRule fn _ cat lins) = allRules pinfo ! ruleid
lin = lins ! lbl
univRule cat item@(Final found' node) chart =
case insertXChart chart item cat of
Nothing -> chart
Just chart -> let items = do (Active found rng l ppos node@(SNode ruleid _)) <- lookupXChartAct chart cat
let FRule _ _ _ lins = allRules pinfo ! ruleid
FSymCat cat r d = lins ! l ! ppos
rng <- concatRange rng (found' !! r)
return (cat, Active found rng l (ppos+1) (updateChildren node d found'))
++
do guard (isBU strategy)
ruleid <- leftcornerCats pinfo ? cat
let FRule _ _ _ lins = allRules pinfo ! ruleid
FSymCat cat r d = lins ! 0 ! 0
return (cat, Active [] (found' !! r) 0 1 (updateChildren (emptyChildren ruleid pinfo) d found'))
updateChildren :: SyntaxNode RuleId RangeRec -> Int -> RangeRec -> SyntaxNode RuleId RangeRec
updateChildren (SNode ruleid recs) i rec = SNode ruleid $! updateNth (const rec) i recs
in process strategy pinfo toks items chart
----------------------------------------------------------------------
-- * XChart
data Item
= Active RangeRec
Range
{-# UNPACK #-} !FIndex
{-# UNPACK #-} !FPointPos
(SyntaxNode RuleId RangeRec)
| Final RangeRec (SyntaxNode RuleId RangeRec)
deriving (Eq, Ord)
data XChart c = XChart !(ParseChart Item c) !(ParseChart Item c)
emptyXChart :: Ord c => XChart c
emptyXChart = XChart emptyChart emptyChart
insertXChart (XChart actives finals) item@(Active _ _ _ _ _) c =
case chartInsert actives item c of
Nothing -> Nothing
Just actives -> Just (XChart actives finals)
insertXChart (XChart actives finals) item@(Final _ _) c =
case chartInsert finals item c of
Nothing -> Nothing
Just finals -> Just (XChart actives finals)
lookupXChartAct (XChart actives finals) c = chartLookup actives c
lookupXChartFinal (XChart actives finals) c = chartLookup finals c
xchart2syntaxchart :: XChart FCat -> FCFPInfo -> SyntaxChart FName (FCat,RangeRec)
xchart2syntaxchart (XChart actives finals) pinfo =
accumAssoc groupSyntaxNodes $
[ case node of
SNode ruleid rrecs -> let FRule fun rhs cat _ = allRules pinfo ! ruleid
in ((cat,found), SNode fun (zip rhs rrecs))
SString s -> ((cat,found), SString s)
SInt n -> ((cat,found), SInt n)
SFloat f -> ((cat,found), SFloat f)
| (cat, Final found node) <- chartAssocs finals
]
literals :: FCFPInfo -> Input FToken -> [(FCat,Item)]
literals pinfo toks =
[let (c,node) = lexer t in (c,Final [makeRange i j] node) | Edge i j t <- inputEdges toks, not (t `elem` grammarToks pinfo)]
where
lexer t =
case reads t of
[(n,"")] -> (fcatInt, SInt (n::Integer))
_ -> case reads t of
[(f,"")] -> (fcatFloat, SFloat (f::Double))
_ -> (fcatString,SString t)
----------------------------------------------------------------------
-- Earley --
-- called with all starting categories
initialTD :: FCFPInfo -> [FCat] -> Input FToken -> [(FCat,Item)]
initialTD pinfo starts toks =
do cat <- starts
ruleid <- topdownRules pinfo ? cat
return (cat,Active [] (Range 0 0) 0 0 (emptyChildren ruleid pinfo))
----------------------------------------------------------------------
-- Kilbury --
initialBU :: FCFPInfo -> Input FToken -> [(FCat,Item)]
initialBU pinfo toks =
do (tok,rngs) <- aAssocs (inputToken toks)
ruleid <- leftcornerTokens pinfo ? tok
let FRule _ _ cat _ = allRules pinfo ! ruleid
(i,j) <- rngs
return (cat,Active [] (makeRange i j) 0 1 (emptyChildren ruleid pinfo))
++
do ruleid <- epsilonRules pinfo
let FRule _ _ cat _ = allRules pinfo ! ruleid
return (cat,Active [] EmptyRange 0 0 (emptyChildren ruleid pinfo))

View File

@@ -0,0 +1,107 @@
module GF.Parsing.FCFG.Incremental where
import Data.Array
import qualified Data.Map as Map
import qualified Data.IntMap as IntMap
import qualified Data.Set as Set
import Control.Monad
import GF.Data.Assoc
import GF.Data.GeneralDeduction
import GF.Formalism.FCFG
import GF.Formalism.Utilities
import GF.Parsing.FCFG.PInfo
import GF.Parsing.FCFG.Range
import GF.GFCC.CId
import Debug.Trace
initState :: FCFPInfo -> CId -> State
initState pinfo start =
let items = do
starts <- Map.lookup start (startupCats pinfo)
c <- starts
ruleid <- topdownRules pinfo ? c
let (FRule fn args cat lins) = allRules pinfo ! ruleid
lbl <- indices lins
return (Active 0 lbl 0 ruleid args cat)
forest = IntMap.fromListWith Set.union [(cat, Set.singleton (Passive ruleid args)) | (ruleid, FRule _ args cat _) <- assocs (allRules pinfo)]
max_fid = case IntMap.maxViewWithKey forest of
Just ((fid,_), _) -> fid+1
Nothing -> 0
in process pinfo items (State emptyChart [] emptyChart Map.empty forest max_fid 0)
nextState :: FCFPInfo -> FToken -> State -> State
nextState pinfo t state =
process pinfo (chartLookup (tokens state) t) state{ chart=emptyChart
, charts=chart state : charts state
, tokens=emptyChart
, passive=Map.empty
, currOffset=currOffset state+1
}
getCompletions :: State -> FToken -> [FToken]
getCompletions state w =
[t | t <- chartKeys (tokens state), take (length w) t == w]
process pinfo [] state = state
process pinfo (item@(Active j lbl ppos ruleid args fid0):xitems) state
| inRange (bounds lin) ppos =
case lin ! ppos of
FSymCat _ r d -> let fid = args !! d
in case chartInsert (chart state) item (fid,r) of
Nothing -> process pinfo xitems state
Just actCat -> let items = do exprs <- IntMap.lookup fid (forest state)
(Passive ruleid args) <- Set.toList exprs
return (Active k r 0 ruleid args fid)
`mplus`
do id <- Map.lookup (fid,r,k) (passive state)
return (Active j lbl (ppos+1) ruleid (updateAt d id args) fid0)
in process pinfo (xitems++items) state{chart=actCat}
FSymTok tok -> case chartInsert (tokens state) (Active j lbl (ppos+1) ruleid args fid0) tok of
Nothing -> process pinfo xitems state
Just actTok -> process pinfo xitems state{tokens=actTok}
| otherwise = case Map.lookup (fid0, lbl, j) (passive state) of
Nothing -> let fid = nextId state
items = do Active j' lbl ppos ruleid args fidc <- chartLookup ((chart state:charts state) !! (k-j)) (fid0,lbl)
let FSymCat _ _ d = rhs ruleid lbl ! ppos
return (Active j' lbl (ppos+1) ruleid (updateAt d fid args) fidc)
in process pinfo (xitems++items) state{passive=Map.insert (fid0, lbl, j) fid (passive state)
,forest =IntMap.insert fid (Set.singleton (Passive ruleid args)) (forest state)
,nextId =nextId state+1
}
Just id -> process pinfo xitems state{forest = IntMap.insertWith Set.union id (Set.singleton (Passive ruleid args)) (forest state)}
where
lin = rhs ruleid lbl
k = currOffset state
rhs ruleid lbl = lins ! lbl
where
(FRule _ _ cat lins) = allRules pinfo ! ruleid
updateAt nr x xs = [if i == nr then x else y | (i,y) <- zip [0..] xs]
data Active
= Active Int FIndex FPointPos RuleId [FCat] FCat
deriving (Eq,Show,Ord)
data Passive
= Passive RuleId [FCat]
deriving (Eq,Ord,Show)
data State
= State
{ chart :: Chart
, charts :: [Chart]
, tokens :: ParseChart Active FToken
, passive :: Map.Map (FCat, FIndex, Int) FCat
, forest :: IntMap.IntMap (Set.Set Passive)
, nextId :: FCat
, currOffset :: Int
}
deriving Show
type Chart = ParseChart Active (FCat, FIndex)

View File

@@ -0,0 +1,121 @@
---------------------------------------------------------------------
-- |
-- Maintainer : Krasimir Angelov
-- Stability : (stable)
-- Portability : (portable)
--
-- FCFG parsing, parser information
-----------------------------------------------------------------------------
module GF.Parsing.FCFG.PInfo where
import GF.Infra.PrintClass
import GF.Formalism.Utilities
import GF.Formalism.FCFG
import GF.Data.SortedList
import GF.Data.Assoc
import GF.Parsing.FCFG.Range
import qualified GF.GFCC.CId as AbsGFCC
import Data.Array
import Data.Maybe
import qualified Data.Map as Map
import qualified Data.Set as Set
import Debug.Trace
----------------------------------------------------------------------
-- type declarations
-- | the list of categories = possible starting categories
type FCFParser = FCFPInfo
-> [FCat]
-> Input FToken
-> SyntaxChart FName (FCat,RangeRec)
makeFinalEdge cat 0 0 = (cat, [EmptyRange])
makeFinalEdge cat i j = (cat, [makeRange i j])
------------------------------------------------------------
-- parser information
type RuleId = Int
data FCFPInfo
= FCFPInfo { allRules :: Array RuleId FRule
, topdownRules :: Assoc FCat (SList RuleId)
-- ^ used in 'GF.Parsing.MCFG.Active' (Earley):
-- , emptyRules :: [RuleId]
, epsilonRules :: [RuleId]
-- ^ used in 'GF.Parsing.MCFG.Active' (Kilbury):
, leftcornerCats :: Assoc FCat (SList RuleId)
, leftcornerTokens :: Assoc FToken (SList RuleId)
-- ^ used in 'GF.Parsing.MCFG.Active' (Kilbury):
, grammarCats :: SList FCat
, grammarToks :: SList FToken
, startupCats :: Map.Map AbsGFCC.CId [FCat]
}
getLeftCornerTok lins
| inRange (bounds syms) 0 = case syms ! 0 of
FSymTok tok -> Just tok
_ -> Nothing
| otherwise = Nothing
where
syms = lins ! 0
getLeftCornerCat lins
| inRange (bounds syms) 0 = case syms ! 0 of
FSymCat c _ _ -> Just c
_ -> Nothing
| otherwise = Nothing
where
syms = lins ! 0
buildFCFPInfo :: FGrammar -> FCFPInfo
buildFCFPInfo (grammar,startup) = -- trace (unlines [prt (x,Set.toList set) | (x,set) <- Map.toList leftcornFilter]) $
FCFPInfo { allRules = allrules
, topdownRules = topdownrules
-- , emptyRules = emptyrules
, epsilonRules = epsilonrules
, leftcornerCats = leftcorncats
, leftcornerTokens = leftcorntoks
, grammarCats = grammarcats
, grammarToks = grammartoks
, startupCats = startup
}
where allrules = listArray (0,length grammar-1) grammar
topdownrules = accumAssoc id [(cat, ruleid) | (ruleid, FRule _ _ cat _) <- assocs allrules]
-- emptyrules = [ruleid | (ruleid, FRule _ [] _ _) <- assocs allrules]
epsilonrules = [ ruleid | (ruleid, FRule _ _ _ lins) <- assocs allrules,
not (inRange (bounds (lins ! 0)) 0) ]
leftcorncats = accumAssoc id
[ (fromJust (getLeftCornerCat lins), ruleid) |
(ruleid, FRule _ _ _ lins) <- assocs allrules, isJust (getLeftCornerCat lins) ]
leftcorntoks = accumAssoc id
[ (fromJust (getLeftCornerTok lins), ruleid) |
(ruleid, FRule _ _ _ lins) <- assocs allrules, isJust (getLeftCornerTok lins) ]
grammarcats = aElems topdownrules
grammartoks = nubsort [t | (FRule _ _ _ lins) <- grammar, lin <- elems lins, FSymTok t <- elems lin]
fcfPInfoToFGrammar :: FCFPInfo -> FGrammar
fcfPInfoToFGrammar pinfo = (elems (allRules pinfo), startupCats pinfo)
----------------------------------------------------------------------
-- pretty-printing of statistics
instance Print FCFPInfo where
prt pI = "[ allRules=" ++ sl (elems . allRules) ++
"; tdRules=" ++ sla topdownRules ++
-- "; emptyRules=" ++ sl emptyRules ++
"; epsilonRules=" ++ sl epsilonRules ++
"; lcCats=" ++ sla leftcornerCats ++
"; lcTokens=" ++ sla leftcornerTokens ++
"; categories=" ++ sl grammarCats ++
" ]"
where sl f = show $ length $ f pI
sla f = let (as, bs) = unzip $ aAssocs $ f pI
in show (length as) ++ "/" ++ show (length (concat bs))

View File

@@ -0,0 +1,50 @@
---------------------------------------------------------------------
-- |
-- Maintainer : Krasimir Angelov
-- Stability : (stable)
-- Portability : (portable)
--
-- Definitions of ranges, and operations on ranges
-----------------------------------------------------------------------------
module GF.Parsing.FCFG.Range
( RangeRec, Range(..), makeRange, concatRange, rangeEdge, edgeRange, minRange, maxRange,
) where
-- GF modules
import GF.Formalism.Utilities
import GF.Infra.PrintClass
------------------------------------------------------------
-- ranges as single pairs
type RangeRec = [Range]
data Range = Range {-# UNPACK #-} !Int {-# UNPACK #-} !Int
| EmptyRange
deriving (Eq, Ord)
makeRange :: Int -> Int -> Range
makeRange = Range
concatRange :: Range -> Range -> [Range]
concatRange EmptyRange rng = return rng
concatRange rng EmptyRange = return rng
concatRange (Range i j) (Range j' k) = [Range i k | j==j']
rangeEdge :: a -> Range -> Edge a
rangeEdge a (Range i j) = Edge i j a
edgeRange :: Edge a -> Range
edgeRange (Edge i j _) = Range i j
minRange :: Range -> Int
minRange (Range i j) = i
maxRange :: Range -> Int
maxRange (Range i j) = j
instance Print Range where
prt (Range i j) = "(" ++ show i ++ "-" ++ show j ++ ")"
prt (EmptyRange) = "(?)"