changed names of resource-1.3; added a note on homepage on release

This commit is contained in:
aarne
2008-06-25 16:54:35 +00:00
parent 7d721eb16e
commit c5c6d13546
1729 changed files with 113 additions and 32 deletions

View File

@@ -0,0 +1,189 @@
----------------------------------------------------------------------
-- |
-- Maintainer : Krasimir Angelov
-- Stability : (stable)
-- Portability : (portable)
--
-- MCFG parsing, the active algorithm
-----------------------------------------------------------------------------
module PGF.Parsing.FCFG.Active (parse) where
import GF.Data.Assoc
import GF.Data.SortedList
import GF.Data.Utilities
import qualified GF.Data.MultiMap as MM
import PGF.CId
import PGF.Data
import PGF.Parsing.FCFG.Utilities
import Control.Monad (guard)
import qualified Data.List as List
import qualified Data.Map as Map
import qualified Data.Set as Set
import Data.Array
----------------------------------------------------------------------
-- * parsing
makeFinalEdge cat 0 0 = (cat, [EmptyRange])
makeFinalEdge cat i j = (cat, [makeRange i j])
-- | the list of categories = possible starting categories
parse :: String -> ParserInfo -> CId -> [FToken] -> [Tree]
parse strategy pinfo start toks = nubsort $ filteredForests >>= forest2trees
where
inTokens = input toks
starts = Map.findWithDefault [] start (startupCats pinfo)
schart = xchart2syntaxchart chart pinfo
(i,j) = inputBounds inTokens
finalEdges = [makeFinalEdge cat i j | cat <- starts]
forests = chart2forests schart (const False) finalEdges
filteredForests = forests >>= applyProfileToForest
chart = process strategy pinfo inTokens axioms emptyXChart
axioms | isBU strategy = literals pinfo inTokens ++ initialBU pinfo inTokens
| isTD strategy = literals pinfo inTokens ++ initialTD pinfo starts inTokens
isBU s = s=="b"
isTD s = s=="t"
-- used in prediction
emptyChildren :: RuleId -> ParserInfo -> SyntaxNode RuleId RangeRec
emptyChildren ruleid pinfo = SNode ruleid (replicate (length rhs) [])
where
FRule _ _ rhs _ _ = allRules pinfo ! ruleid
process :: String -> ParserInfo -> Input FToken -> [(FCat,Item)] -> XChart FCat -> XChart FCat
process strategy pinfo toks [] chart = chart
process strategy pinfo toks ((c,item):items) chart = process strategy pinfo toks items $! univRule c item chart
where
univRule cat item@(Active found rng lbl ppos node@(SNode ruleid recs)) chart
| inRange (bounds lin) ppos =
case lin ! ppos of
FSymCat r d -> let c = args !! d
in case recs !! d of
[] -> case insertXChart chart item c of
Nothing -> chart
Just chart -> let items = do item@(Final found' _) <- lookupXChartFinal chart c
rng <- concatRange rng (found' !! r)
return (c, Active found rng lbl (ppos+1) (SNode ruleid (updateNth (const found') d recs)))
++
do guard (isTD strategy)
ruleid <- topdownRules pinfo ? c
return (c, Active [] EmptyRange 0 0 (emptyChildren ruleid pinfo))
in process strategy pinfo toks items chart
found' -> let items = do rng <- concatRange rng (found' !! r)
return (c, Active found rng lbl (ppos+1) node)
in process strategy pinfo toks items chart
FSymTok tok -> let items = do t_rng <- inputToken toks ? tok
rng' <- concatRange rng t_rng
return (cat, Active found rng' lbl (ppos+1) node)
in process strategy pinfo toks items chart
| otherwise =
if inRange (bounds lins) (lbl+1)
then univRule cat (Active (rng:found) EmptyRange (lbl+1) 0 node) chart
else univRule cat (Final (reverse (rng:found)) node) chart
where
(FRule _ _ args cat lins) = allRules pinfo ! ruleid
lin = lins ! lbl
univRule cat item@(Final found' node) chart =
case insertXChart chart item cat of
Nothing -> chart
Just chart -> let items = do (Active found rng l ppos node@(SNode ruleid _)) <- lookupXChartAct chart cat
let FRule _ _ args _ lins = allRules pinfo ! ruleid
FSymCat r d = lins ! l ! ppos
rng <- concatRange rng (found' !! r)
return (args !! d, Active found rng l (ppos+1) (updateChildren node d found'))
++
do guard (isBU strategy)
ruleid <- leftcornerCats pinfo ? cat
let FRule _ _ args _ lins = allRules pinfo ! ruleid
FSymCat r d = lins ! 0 ! 0
return (args !! d, Active [] (found' !! r) 0 1 (updateChildren (emptyChildren ruleid pinfo) d found'))
updateChildren :: SyntaxNode RuleId RangeRec -> Int -> RangeRec -> SyntaxNode RuleId RangeRec
updateChildren (SNode ruleid recs) i rec = SNode ruleid $! updateNth (const rec) i recs
in process strategy pinfo toks items chart
----------------------------------------------------------------------
-- * XChart
data Item
= Active RangeRec
Range
{-# UNPACK #-} !FIndex
{-# UNPACK #-} !FPointPos
(SyntaxNode RuleId RangeRec)
| Final RangeRec (SyntaxNode RuleId RangeRec)
deriving (Eq, Ord)
data XChart c = XChart !(MM.MultiMap c Item) !(MM.MultiMap c Item)
emptyXChart :: Ord c => XChart c
emptyXChart = XChart MM.empty MM.empty
insertXChart (XChart actives finals) item@(Active _ _ _ _ _) c =
case MM.insert' c item actives of
Nothing -> Nothing
Just actives -> Just (XChart actives finals)
insertXChart (XChart actives finals) item@(Final _ _) c =
case MM.insert' c item finals of
Nothing -> Nothing
Just finals -> Just (XChart actives finals)
lookupXChartAct (XChart actives finals) c = actives MM.! c
lookupXChartFinal (XChart actives finals) c = finals MM.! c
xchart2syntaxchart :: XChart FCat -> ParserInfo -> SyntaxChart (CId,[Profile]) (FCat,RangeRec)
xchart2syntaxchart (XChart actives finals) pinfo =
accumAssoc groupSyntaxNodes $
[ case node of
SNode ruleid rrecs -> let FRule fun prof rhs cat _ = allRules pinfo ! ruleid
in ((cat,found), SNode (fun,prof) (zip rhs rrecs))
SString s -> ((cat,found), SString s)
SInt n -> ((cat,found), SInt n)
SFloat f -> ((cat,found), SFloat f)
| (cat, Final found node) <- MM.toList finals
]
literals :: ParserInfo -> Input FToken -> [(FCat,Item)]
literals pinfo toks =
[let (c,node) = lexer t in (c,Final [rng] node) | (t,rngs) <- aAssocs (inputToken toks), rng <- rngs, not (t `elem` grammarToks pinfo)]
where
lexer t =
case reads t of
[(n,"")] -> (fcatInt, SInt (n::Integer))
_ -> case reads t of
[(f,"")] -> (fcatFloat, SFloat (f::Double))
_ -> (fcatString,SString t)
----------------------------------------------------------------------
-- Earley --
-- called with all starting categories
initialTD :: ParserInfo -> [FCat] -> Input FToken -> [(FCat,Item)]
initialTD pinfo starts toks =
do cat <- starts
ruleid <- topdownRules pinfo ? cat
return (cat,Active [] (Range 0 0) 0 0 (emptyChildren ruleid pinfo))
----------------------------------------------------------------------
-- Kilbury --
initialBU :: ParserInfo -> Input FToken -> [(FCat,Item)]
initialBU pinfo toks =
do (tok,rngs) <- aAssocs (inputToken toks)
ruleid <- leftcornerTokens pinfo ? tok
let FRule _ _ _ cat _ = allRules pinfo ! ruleid
rng <- rngs
return (cat,Active [] rng 0 1 (emptyChildren ruleid pinfo))
++
do ruleid <- epsilonRules pinfo
let FRule _ _ _ cat _ = allRules pinfo ! ruleid
return (cat,Active [] EmptyRange 0 0 (emptyChildren ruleid pinfo))

View File

@@ -0,0 +1,187 @@
{-# LANGUAGE BangPatterns #-}
module PGF.Parsing.FCFG.Incremental
( ParseState
, initState
, nextState
, getCompletions
, extractExps
, parse
) where
import Data.Array
import Data.Array.Base (unsafeAt)
import Data.List (isPrefixOf, foldl')
import Data.Maybe (fromMaybe)
import qualified Data.Map as Map
import qualified Data.IntMap as IntMap
import qualified Data.Set as Set
import Control.Monad
import GF.Data.Assoc
import GF.Data.SortedList
import qualified GF.Data.MultiMap as MM
import PGF.CId
import PGF.Data
import PGF.Parsing.FCFG.Utilities
import Debug.Trace
parse :: ParserInfo -> CId -> [FToken] -> [Tree]
parse pinfo start toks = extractExps (foldl' nextState (initState pinfo start) toks) start
initState :: ParserInfo -> CId -> ParseState
initState pinfo start =
let items = do
c <- Map.findWithDefault [] start (startupCats pinfo)
ruleid <- topdownRules pinfo ? c
let (FRule fn _ args cat lins) = allRules pinfo ! ruleid
lbl <- indices lins
return (Active 0 lbl 0 ruleid args cat)
forest = IntMap.fromListWith Set.union [(cat, Set.singleton (Passive ruleid args)) | (ruleid, FRule _ _ args cat _) <- assocs (allRules pinfo)]
max_fid = case IntMap.maxViewWithKey forest of
Just ((fid,_), _) -> fid+1
Nothing -> 0
in State pinfo
(Chart MM.empty [] Map.empty forest max_fid 0)
(Set.fromList items)
-- | From the current state and the next token
-- 'nextState' computes a new state where the token
-- is consumed and the current position shifted by one.
nextState :: ParseState -> String -> ParseState
nextState (State pinfo chart items) t =
let (items1,chart1) = process add (allRules pinfo) (Set.toList items) (Set.empty,chart)
chart2 = chart1{ active =MM.empty
, actives=active chart1 : actives chart1
, passive=Map.empty
, offset =offset chart1+1
}
in State pinfo chart2 items1
where
add tok item set
| tok == t = Set.insert item set
| otherwise = set
-- | If the next token is not known but only its prefix (possible empty prefix)
-- then the 'getCompletions' function can be used to calculate the possible
-- next words and the consequent states. This is used for word completions in
-- the GF interpreter.
getCompletions :: ParseState -> String -> Map.Map String ParseState
getCompletions (State pinfo chart items) w =
let (map',chart1) = process add (allRules pinfo) (Set.toList items) (MM.empty,chart)
chart2 = chart1{ active =MM.empty
, actives=active chart1 : actives chart1
, passive=Map.empty
, offset =offset chart1+1
}
in fmap (State pinfo chart2) map'
where
add tok item map
| isPrefixOf w tok = fromMaybe map (MM.insert' tok item map)
| otherwise = map
extractExps :: ParseState -> CId -> [Tree]
extractExps (State pinfo chart items) start = exps
where
(_,st) = process (\_ _ -> id) (allRules pinfo) (Set.toList items) ((),chart)
exps = nubsort $ do
c <- Map.findWithDefault [] start (startupCats pinfo)
ruleid <- topdownRules pinfo ? c
let (FRule fn _ args cat lins) = allRules pinfo ! ruleid
lbl <- indices lins
fid <- Map.lookup (PK c lbl 0) (passive st)
go Set.empty fid
go rec fid
| Set.member fid rec = mzero
| otherwise = do set <- IntMap.lookup fid (forest st)
Passive ruleid args <- Set.toList set
let (FRule fn _ _ cat lins) = allRules pinfo ! ruleid
if fn == wildCId
then go (Set.insert fid rec) (head args)
else do args <- mapM (go (Set.insert fid rec)) args
return (Fun fn args)
process fn !rules [] acc_chart = acc_chart
process fn !rules (item:items) acc_chart = univRule item acc_chart
where
univRule (Active j lbl ppos ruleid args fid0) acc_chart@(acc,chart)
| inRange (bounds lin) ppos =
case unsafeAt lin ppos of
FSymCat r d -> let !fid = args !! d
in case MM.insert' (AK fid r) item (active chart) of
Nothing -> process fn rules items $ acc_chart
Just actCat -> (case Map.lookup (PK fid r k) (passive chart) of
Nothing -> id
Just id -> process fn rules [Active j lbl (ppos+1) ruleid (updateAt d id args) fid0]) $
(case IntMap.lookup fid (forest chart) of
Nothing -> id
Just set -> process fn rules (Set.fold (\(Passive ruleid args) -> (:) (Active k r 0 ruleid args fid)) [] set)) $
process fn rules items $
(acc,chart{active=actCat})
FSymTok tok -> process fn rules items $
(fn tok (Active j lbl (ppos+1) ruleid args fid0) acc,chart)
| otherwise = case Map.lookup (PK fid0 lbl j) (passive chart) of
Nothing -> let fid = nextId chart
in process fn rules [Active j' lbl (ppos+1) ruleid (updateAt d fid args) fidc
| Active j' lbl ppos ruleid args fidc <- ((active chart:actives chart) !! (k-j)) MM.! (AK fid0 lbl),
let FSymCat _ d = unsafeAt (rhs ruleid lbl) ppos] $
process fn rules items $
(acc,chart{passive=Map.insert (PK fid0 lbl j) fid (passive chart)
,forest =IntMap.insert fid (Set.singleton (Passive ruleid args)) (forest chart)
,nextId =nextId chart+1
})
Just id -> process fn rules items $
(acc,chart{forest = IntMap.insertWith Set.union id (Set.singleton (Passive ruleid args)) (forest chart)})
where
!lin = rhs ruleid lbl
!k = offset chart
rhs ruleid lbl = unsafeAt lins lbl
where
(FRule _ _ _ cat lins) = unsafeAt rules ruleid
updateAt :: Int -> a -> [a] -> [a]
updateAt nr x xs = [if i == nr then x else y | (i,y) <- zip [0..] xs]
data Active
= Active {-# UNPACK #-} !Int
{-# UNPACK #-} !FIndex
{-# UNPACK #-} !FPointPos
{-# UNPACK #-} !RuleId
[FCat]
{-# UNPACK #-} !FCat
deriving (Eq,Show,Ord)
data Passive
= Passive {-# UNPACK #-} !RuleId
[FCat]
deriving (Eq,Ord,Show)
data ActiveKey
= AK {-# UNPACK #-} !FCat
{-# UNPACK #-} !FIndex
deriving (Eq,Ord,Show)
data PassiveKey
= PK {-# UNPACK #-} !FCat
{-# UNPACK #-} !FIndex
{-# UNPACK #-} !Int
deriving (Eq,Ord,Show)
-- | An abstract data type whose values represent
-- the current state in an incremental parser.
data ParseState = State ParserInfo Chart (Set.Set Active)
data Chart
= Chart
{ active :: MM.MultiMap ActiveKey Active
, actives :: [MM.MultiMap ActiveKey Active]
, passive :: Map.Map PassiveKey FCat
, forest :: IntMap.IntMap (Set.Set Passive)
, nextId :: {-# UNPACK #-} !FCat
, offset :: {-# UNPACK #-} !Int
}

View File

@@ -0,0 +1,187 @@
----------------------------------------------------------------------
-- |
-- Maintainer : PL
-- Stability : (stable)
-- Portability : (portable)
--
-- > CVS $Date: 2005/05/13 12:40:19 $
-- > CVS $Author: peb $
-- > CVS $Revision: 1.6 $
--
-- Basic type declarations and functions for grammar formalisms
-----------------------------------------------------------------------------
module PGF.Parsing.FCFG.Utilities where
import Control.Monad
import Data.Array
import Data.List (groupBy)
import PGF.CId
import PGF.Data
import GF.Data.Assoc
import GF.Data.Utilities (sameLength, foldMerge, splitBy)
------------------------------------------------------------
-- ranges as single pairs
type RangeRec = [Range]
data Range = Range {-# UNPACK #-} !Int {-# UNPACK #-} !Int
| EmptyRange
deriving (Eq, Ord)
makeRange :: Int -> Int -> Range
makeRange = Range
concatRange :: Range -> Range -> [Range]
concatRange EmptyRange rng = return rng
concatRange rng EmptyRange = return rng
concatRange (Range i j) (Range j' k) = [Range i k | j==j']
minRange :: Range -> Int
minRange (Range i j) = i
maxRange :: Range -> Int
maxRange (Range i j) = j
------------------------------------------------------------
-- * representaions of input tokens
data Input t = MkInput { inputBounds :: (Int, Int),
inputToken :: Assoc t [Range]
}
input :: Ord t => [t] -> Input t
input toks = MkInput inBounds inToken
where
inBounds = (0, length toks)
inToken = accumAssoc id [ (tok, makeRange i j) | (i,j,tok) <- zip3 [0..] [1..] toks ]
inputMany :: Ord t => [[t]] -> Input t
inputMany toks = MkInput inBounds inToken
where
inBounds = (0, length toks)
inToken = accumAssoc id [ (tok, makeRange i j) | (i,j,ts) <- zip3 [0..] [1..] toks, tok <- ts ]
------------------------------------------------------------
-- * representations of syntactical analyses
-- ** charts as finite maps over edges
-- | The values of the chart, a list of key-daughters pairs,
-- has unique keys. In essence, it is a map from 'n' to daughters.
-- The daughters should be a set (not necessarily sorted) of rhs's.
type SyntaxChart n e = Assoc e [SyntaxNode n [e]]
data SyntaxNode n e = SMeta
| SNode n [e]
| SString String
| SInt Integer
| SFloat Double
deriving (Eq,Ord)
groupSyntaxNodes :: Ord n => [SyntaxNode n e] -> [SyntaxNode n [e]]
groupSyntaxNodes [] = []
groupSyntaxNodes (SNode n0 es0:xs) = (SNode n0 (es0:ess)) : groupSyntaxNodes xs'
where
(ess,xs') = span xs
span [] = ([],[])
span xs@(SNode n es:xs')
| n0 == n = let (ess,xs) = span xs' in (es:ess,xs)
| otherwise = ([],xs)
groupSyntaxNodes (SString s:xs) = (SString s) : groupSyntaxNodes xs
groupSyntaxNodes (SInt n:xs) = (SInt n) : groupSyntaxNodes xs
groupSyntaxNodes (SFloat f:xs) = (SFloat f) : groupSyntaxNodes xs
-- ** syntax forests
data SyntaxForest n = FMeta
| FNode n [[SyntaxForest n]]
-- ^ The outer list should be a set (not necessarily sorted)
-- of possible alternatives. Ie. the outer list
-- is a disjunctive node, and the inner lists
-- are (conjunctive) concatenative nodes
| FString String
| FInt Integer
| FFloat Double
deriving (Eq, Ord, Show)
instance Functor SyntaxForest where
fmap f (FNode n forests) = FNode (f n) $ map (map (fmap f)) forests
fmap _ (FString s) = FString s
fmap _ (FInt n) = FInt n
fmap _ (FFloat f) = FFloat f
fmap _ (FMeta) = FMeta
forestName :: SyntaxForest n -> Maybe n
forestName (FNode n _) = Just n
forestName _ = Nothing
unifyManyForests :: (Monad m, Eq n) => [SyntaxForest n] -> m (SyntaxForest n)
unifyManyForests = foldM unifyForests FMeta
-- | two forests can be unified, if either is 'FMeta', or both have the same parent,
-- and all children can be unified
unifyForests :: (Monad m, Eq n) => SyntaxForest n -> SyntaxForest n -> m (SyntaxForest n)
unifyForests FMeta forest = return forest
unifyForests forest FMeta = return forest
unifyForests (FNode name1 children1) (FNode name2 children2)
| name1 == name2 && not (null children) = return $ FNode name1 children
where children = [ forests | forests1 <- children1, forests2 <- children2,
sameLength forests1 forests2,
forests <- zipWithM unifyForests forests1 forests2 ]
unifyForests (FString s1) (FString s2)
| s1 == s2 = return $ FString s1
unifyForests (FInt n1) (FInt n2)
| n1 == n2 = return $ FInt n1
unifyForests (FFloat f1) (FFloat f2)
| f1 == f2 = return $ FFloat f1
unifyForests _ _ = fail "forest unification failure"
-- ** conversions between representations
chart2forests :: (Ord n, Ord e) =>
SyntaxChart n e -- ^ The complete chart
-> (e -> Bool) -- ^ When is an edge 'FMeta'?
-> [e] -- ^ The starting edges
-> [SyntaxForest n] -- ^ The result has unique keys, ie. all 'n' are joined together.
-- In essence, the result is a map from 'n' to forest daughters
chart2forests chart isMeta = concatMap (edge2forests [])
where edge2forests edges edge
| isMeta edge = [FMeta]
| edge `elem` edges = []
| otherwise = map (item2forest (edge:edges)) $ chart ? edge
item2forest edges (SMeta) = FMeta
item2forest edges (SNode name children) =
FNode name $ children >>= mapM (edge2forests edges)
item2forest edges (SString s) = FString s
item2forest edges (SInt n) = FInt n
item2forest edges (SFloat f) = FFloat f
applyProfileToForest :: SyntaxForest (CId,[Profile]) -> [SyntaxForest CId]
applyProfileToForest (FNode (fun,profiles) children)
| fun == wildCId = concat chForests
| otherwise = [ FNode fun chForests | not (null chForests) ]
where chForests = concat [ mapM (unifyManyForests . map (forests !!)) profiles |
forests0 <- children,
forests <- mapM applyProfileToForest forests0 ]
applyProfileToForest (FString s) = [FString s]
applyProfileToForest (FInt n) = [FInt n]
applyProfileToForest (FFloat f) = [FFloat f]
applyProfileToForest (FMeta) = [FMeta]
forest2trees :: SyntaxForest CId -> [Tree]
forest2trees (FNode n forests) = map (Fun n) $ forests >>= mapM forest2trees
forest2trees (FString s) = [Lit (LStr s)]
forest2trees (FInt n) = [Lit (LInt n)]
forest2trees (FFloat f) = [Lit (LFlt f)]
forest2trees (FMeta) = [Meta 0]