completely phrase based parser and support for pre {} in PMCFG

This commit is contained in:
krasimir
2009-06-16 11:56:08 +00:00
parent b442cde3bd
commit 8bc8929c59
12 changed files with 147 additions and 68 deletions

View File

@@ -51,6 +51,7 @@ library
PGF.TypeCheck
PGF.Binary
GF.Data.MultiMap
GF.Data.TrieMap
GF.Data.Utilities
GF.Data.SortedList
GF.Data.Assoc
@@ -95,6 +96,7 @@ executable gf
GF.Infra.CompactPrint
GF.Text.UTF8
GF.Data.MultiMap
GF.Data.TrieMap
GF.Data.Utilities
GF.Data.SortedList
GF.Data.Assoc

View File

@@ -129,7 +129,7 @@ lins2js p ls = JS.EArray [JS.EArray [sym2js s | s <- Array.elems (sequences p Ar
sym2js :: FSymbol -> JS.Expr
sym2js (FSymCat n l) = new "ArgProj" [JS.EInt n, JS.EInt l]
sym2js (FSymLit n l) = new "ArgProj" [JS.EInt n, JS.EInt l]
sym2js (FSymTok (KS t)) = new "Terminal" [JS.EStr t]
sym2js (FSymKS [t]) = new "Terminal" [JS.EStr t]
new :: String -> [JS.Expr] -> JS.Expr
new f xs = JS.ENew (JS.Ident f) xs

View File

@@ -158,7 +158,10 @@ translateLin idxArgs ((lbl,syms) : lins) grammarEnv lbl'
| lbl' == lbl = addFSeq grammarEnv (lbl,map instSym syms)
| otherwise = translateLin idxArgs lins grammarEnv lbl'
where
instSym = either (\(lbl, nr, xnr) -> instCat lbl nr xnr 0 idxArgs) FSymTok
instSym = either (\(lbl, nr, xnr) -> instCat lbl nr xnr 0 idxArgs)
(\t -> case t of
KS s -> FSymKS [s]
KP strs vars -> FSymKP strs vars)
instCat lbl nr xnr nr' ((idx,xargs):idxArgs)
| nr == idx = let (fcat, PFCat _ rcs _) = xargs !! xnr
in FSymCat (nr'+xnr) (index lbl rcs 0)

View File

@@ -213,10 +213,22 @@ addSequences' env (Return v) = let (env1,v1) = addSequences env v
addSequences :: GrammarEnv -> Value [FSymbol] -> (GrammarEnv, Value SeqId)
addSequences env (Rec vs) = let (env1,vs1) = List.mapAccumL addSequences' env vs
in (env1,Rec vs1)
addSequences env (Str lin) = let (env1,seqid) = addFSeq env lin
addSequences env (Str lin) = let (env1,seqid) = addFSeq env (optimizeLin lin)
in (env1,Str seqid)
addSequences env (Con i) = (env,Con i)
optimizeLin [] = []
optimizeLin lin@(FSymKS _ : _) =
let (ts,lin') = getRest lin
in FSymKS ts : optimizeLin lin'
where
getRest (FSymKS ts : lin) = let (ts1,lin') = getRest lin
in (ts++ts1,lin')
getRest lin = ([],lin)
optimizeLin (sym : lin) = sym : optimizeLin lin
convertTerm :: TermMap -> FPath -> Term -> Term -> CnvMonad (Value [FSymbol])
convertTerm cnc_defs sel ctype (V nr) = convertArg ctype nr (reverse sel)
convertTerm cnc_defs sel ctype (C nr) = convertCon ctype nr (reverse sel)
@@ -227,11 +239,8 @@ convertTerm cnc_defs sel ctype (FV vars) = do term <- variants vars
convertTerm cnc_defs sel ctype term
convertTerm cnc_defs sel ctype (S ts) = do vs <- mapM (convertTerm cnc_defs sel ctype) ts
return (Str (concat [s | Str s <- vs]))
--convertTerm cnc_defs sel ctype (K t) = return (Str [FSymTok t])
convertTerm cnc_defs sel ctype (K (KS t)) = return (Str [FSymTok (KS t)])
convertTerm cnc_defs sel ctype (K (KP strs vars)) =
do toks <- variants (strs:[strs' | Alt strs' _ <- vars])
return (Str (map (FSymTok . KS) toks))
convertTerm cnc_defs sel ctype (K (KS t)) = return (Str [FSymKS [t]])
convertTerm cnc_defs sel ctype (K (KP s v))=return (Str [FSymKP s v])
convertTerm cnc_defs sel ctype (F id) = case Map.lookup id cnc_defs of
Just term -> convertTerm cnc_defs sel ctype term
Nothing -> error ("unknown id " ++ prCId id)

55
src/GF/Data/TrieMap.hs Normal file
View File

@@ -0,0 +1,55 @@
module GF.Data.TrieMap
( TrieMap
, empty
, singleton
, lookup
, null
, decompose
, insertWith
, unionWith
) where
import Prelude hiding (lookup, null)
import qualified Data.Map as Map
data TrieMap k v = Tr (Maybe v) (Map.Map k (TrieMap k v))
empty = Tr Nothing Map.empty
singleton :: [k] -> a -> TrieMap k a
singleton [] v = Tr (Just v) Map.empty
singleton (k:ks) v = Tr Nothing (Map.singleton k (singleton ks v))
lookup :: Ord k => [k] -> TrieMap k a -> Maybe a
lookup [] (Tr mb_v m) = mb_v
lookup (k:ks) (Tr mb_v m) = Map.lookup k m >>= lookup ks
null :: TrieMap k v -> Bool
null (Tr Nothing m) = Map.null m
null _ = False
decompose :: TrieMap k v -> (Maybe v, Map.Map k (TrieMap k v))
decompose (Tr mb_v m) = (mb_v,m)
insertWith :: Ord k => (v -> v -> v) -> [k] -> v -> TrieMap k v -> TrieMap k v
insertWith f [] v0 (Tr mb_v m) = case mb_v of
Just v -> Tr (Just (f v0 v)) m
Nothing -> Tr (Just v0 ) m
insertWith f (k:ks) v0 (Tr mb_v m) = case Map.lookup k m of
Nothing -> Tr mb_v (Map.insert k (singleton ks v0) m)
Just tr -> Tr mb_v (Map.insert k (insertWith f ks v0 tr) m)
unionWith :: Ord k => (v -> v -> v) -> TrieMap k v -> TrieMap k v -> TrieMap k v
unionWith f (Tr mb_v1 m1) (Tr mb_v2 m2) =
let mb_v = case (mb_v1,mb_v2) of
(Nothing,Nothing) -> Nothing
(Just v ,Nothing) -> Just v
(Nothing,Just v ) -> Just v
(Just v1,Just v2) -> Just (f v1 v2)
m = Map.unionWith (unionWith f) m1 m2
in Tr mb_v m

View File

@@ -82,17 +82,17 @@ pgfToCFG pgf lang = mkCFG (prCId (lookStartCat pgf)) extCats (startRules ++ conc
FFun f ps rhs = functions pinfo ! funid
mkRhs :: Array FPointPos FSymbol -> [CFSymbol]
mkRhs = map fsymbolToSymbol . Array.elems
mkRhs = concatMap fsymbolToSymbol . Array.elems
containsLiterals :: Array FPointPos FSymbol -> Bool
containsLiterals row = any isLiteralFCat [args!!n | FSymCat n _ <- Array.elems row] ||
not (null [n | FSymLit n _ <- Array.elems row]) -- only this is needed for PMCFG.
-- The first line is for backward compat.
fsymbolToSymbol :: FSymbol -> CFSymbol
fsymbolToSymbol (FSymCat n l) = NonTerminal (fcatToCat (args!!n) l)
fsymbolToSymbol (FSymLit n l) = NonTerminal (fcatToCat (args!!n) l)
fsymbolToSymbol (FSymTok (KS t)) = Terminal t
fsymbolToSymbol :: FSymbol -> [CFSymbol]
fsymbolToSymbol (FSymCat n l) = [NonTerminal (fcatToCat (args!!n) l)]
fsymbolToSymbol (FSymLit n l) = [NonTerminal (fcatToCat (args!!n) l)]
fsymbolToSymbol (FSymKS ts) = map Terminal ts
fixProfile :: Array FPointPos FSymbol -> Profile -> Profile
fixProfile row = concatMap positions

View File

@@ -156,14 +156,14 @@ instance Binary FFun where
instance Binary FSymbol where
put (FSymCat n l) = putWord8 0 >> put (n,l)
put (FSymLit n l) = putWord8 1 >> put (n,l)
put (FSymTok (KS s)) = putWord8 2 >> put s
put (FSymTok (KP d vs)) = putWord8 3 >> put (d,vs)
put (FSymKS ts) = putWord8 2 >> put ts
put (FSymKP d vs) = putWord8 3 >> put (d,vs)
get = do tag <- getWord8
case tag of
0 -> liftM2 FSymCat get get
1 -> liftM2 FSymLit get get
2 -> liftM (FSymTok . KS) get
3 -> liftM2 (\d vs -> FSymTok (KP d vs)) get get
2 -> liftM FSymKS get
3 -> liftM2 (\d vs -> FSymKP d vs) get get
_ -> decodingError
instance Binary Production where

View File

@@ -35,8 +35,8 @@ data ParserInfoEx
getLeftCornerTok pinfo (FFun _ _ lins)
| inRange (bounds syms) 0 = case syms ! 0 of
FSymTok (KS tok) -> [tok]
_ -> []
FSymKS [tok] -> [tok]
_ -> []
| otherwise = []
where
syms = (sequences pinfo) ! (lins ! 0)
@@ -73,4 +73,4 @@ buildParserInfo pinfo =
| (cat,set) <- IntMap.toList (productions pinfo)
, (FApply ruleid args) <- Set.toList set
, tok <- getLeftCornerTok pinfo ((functions pinfo) ! ruleid) ]
grammartoks = nubsort [t | lin <- elems (sequences pinfo), FSymTok (KS t) <- elems lin]
grammartoks = nubsort [t | lin <- elems (sequences pinfo), FSymKS [t] <- elems lin]

View File

@@ -53,7 +53,10 @@ data Term =
| TM String
deriving (Eq,Ord,Show)
data Tokn =
KS String
| KP [String] [Alternative]
deriving (Eq,Ord,Show)
-- merge two GFCCs; fails is differens absnames; priority to second arg

View File

@@ -16,24 +16,20 @@ type FPointPos = Int
data FSymbol
= FSymCat {-# UNPACK #-} !Int {-# UNPACK #-} !FIndex
| FSymLit {-# UNPACK #-} !Int {-# UNPACK #-} !FIndex
| FSymTok Tokn
| FSymKS [String]
| FSymKP [String] [Alternative]
deriving (Eq,Ord,Show)
type Profile = [Int]
data Production
= FApply {-# UNPACK #-} !FunId [FCat]
| FCoerce {-# UNPACK #-} !FCat
| FConst Tree String
| FConst Tree [String]
deriving (Eq,Ord,Show)
data FFun = FFun CId [Profile] {-# UNPACK #-} !(UArray FIndex SeqId) deriving (Eq,Ord,Show)
type FSeq = Array FPointPos FSymbol
type FunId = Int
type SeqId = Int
data Tokn =
KS String
| KP [String] [Alternative]
deriving (Eq,Ord,Show)
data Alternative =
Alt [String] [String]
deriving (Eq,Ord,Show)
@@ -70,8 +66,8 @@ ppProduction (fcat,FApply funid args) =
ppFCat fcat <+> text "->" <+> ppFunId funid <> brackets (hcat (punctuate comma (map ppFCat args)))
ppProduction (fcat,FCoerce arg) =
ppFCat fcat <+> text "->" <+> char '_' <> brackets (ppFCat arg)
ppProduction (fcat,FConst _ s) =
ppFCat fcat <+> text "->" <+> ppStr s
ppProduction (fcat,FConst _ ss) =
ppFCat fcat <+> text "->" <+> ppStrs ss
ppFun (funid,FFun fun _ arr) =
ppFunId funid <+> text ":=" <+> parens (hcat (punctuate comma (map ppSeqId (elems arr)))) <+> brackets (text (prCId fun))
@@ -84,14 +80,12 @@ ppStartCat (id,fcats) =
ppSymbol (FSymCat d r) = char '<' <> int d <> comma <> int r <> char '>'
ppSymbol (FSymLit d r) = char '<' <> int d <> comma <> int r <> char '>'
ppSymbol (FSymTok t) = ppTokn t
ppSymbol (FSymKS ts) = ppStrs ts
ppSymbol (FSymKP ts alts) = text "pre" <+> braces (hsep (punctuate semi (ppStrs ts : map ppAlt alts)))
ppTokn (KS t) = ppStr t
ppTokn (KP ts alts) = text "pre" <+> braces (hsep (punctuate semi (hsep (map ppStr ts) : map ppAlt alts)))
ppAlt (Alt ts ps) = ppStrs ts <+> char '/' <+> hsep (map (doubleQuotes . text) ps)
ppAlt (Alt ts ps) = hsep (map ppStr ts) <+> char '/' <+> hsep (map ppStr ps)
ppStr s = doubleQuotes (text s)
ppStrs ss = doubleQuotes (hsep (map text ss))
ppFCat fcat
| fcat == fcatString = text "String"

View File

@@ -84,7 +84,7 @@ process strategy pinfo pinfoex toks (item:items) chart = process strategy pinfo
found' -> let items = do rng <- concatRange rng (found' !! r)
return (Active found rng lbl (ppos+1) node args cat)
in process strategy pinfo pinfoex toks items chart
FSymTok (KS tok)
FSymKS [tok]
-> let items = do t_rng <- inputToken toks ? tok
rng' <- concatRange rng t_rng
return (Active found rng' lbl (ppos+1) node args cat)

View File

@@ -13,6 +13,7 @@ import Data.Array.Base (unsafeAt)
import Data.List (isPrefixOf, foldl')
import Data.Maybe (fromMaybe, maybe)
import qualified Data.Map as Map
import qualified GF.Data.TrieMap as TMap
import qualified Data.IntMap as IntMap
import qualified Data.Set as Set
import Control.Monad
@@ -37,26 +38,29 @@ initState pinfo (DTyp _ start _) =
in State pinfo
(Chart emptyAC [] emptyPC (productions pinfo) (totalCats pinfo) 0)
(Set.fromList items)
(TMap.singleton [] (Set.fromList items))
-- | From the current state and the next token
-- 'nextState' computes a new state where the token
-- is consumed and the current position shifted by one.
nextState :: ParseState -> String -> Maybe ParseState
nextState (State pinfo chart items) t =
let (items1,chart1) = process (Just t) add (sequences pinfo) (functions pinfo) (Set.toList items) Set.empty chart
let (mb_agenda,map_items) = TMap.decompose items
agenda = maybe [] Set.toList mb_agenda
acc = fromMaybe TMap.empty (Map.lookup t map_items)
(acc1,chart1) = process (Just t) add (sequences pinfo) (functions pinfo) agenda acc chart
chart2 = chart1{ active =emptyAC
, actives=active chart1 : actives chart1
, passive=emptyPC
, offset =offset chart1+1
}
in if Set.null items1
in if TMap.null acc1
then Nothing
else Just (State pinfo chart2 items1)
else Just (State pinfo chart2 acc1)
where
add (KS tok) item set
| tok == t = Set.insert item set
| otherwise = set
add (tok:toks) item acc
| tok == t = TMap.insertWith Set.union toks (Set.singleton item) acc
add _ item acc = acc
-- | If the next token is not known but only its prefix (possible empty prefix)
-- then the 'getCompletions' function can be used to calculate the possible
@@ -64,22 +68,27 @@ nextState (State pinfo chart items) t =
-- the GF interpreter.
getCompletions :: ParseState -> String -> Map.Map String ParseState
getCompletions (State pinfo chart items) w =
let (map',chart1) = process Nothing add (sequences pinfo) (functions pinfo) (Set.toList items) Map.empty chart
let (mb_agenda,map_items) = TMap.decompose items
agenda = maybe [] Set.toList mb_agenda
acc = Map.filterWithKey (\tok _ -> isPrefixOf w tok) map_items
(acc',chart1) = process Nothing add (sequences pinfo) (functions pinfo) agenda acc chart
chart2 = chart1{ active =emptyAC
, actives=active chart1 : actives chart1
, passive=emptyPC
, offset =offset chart1+1
}
in fmap (State pinfo chart2) map'
in fmap (State pinfo chart2) acc'
where
add (KS tok) item map
| isPrefixOf w tok = Map.insertWith Set.union tok (Set.singleton item) map
| otherwise = map
add (tok:toks) item acc
| isPrefixOf w tok = Map.insertWith (TMap.unionWith Set.union) tok (TMap.singleton toks (Set.singleton item)) acc
add _ item acc = acc
extractExps :: ParseState -> Type -> [Tree]
extractExps (State pinfo chart items) (DTyp _ start _) = exps
where
(_,st) = process Nothing (\_ _ -> id) (sequences pinfo) (functions pinfo) (Set.toList items) () chart
(mb_agenda,acc) = TMap.decompose items
agenda = maybe [] Set.toList mb_agenda
(_,st) = process Nothing (\_ _ -> id) (sequences pinfo) (functions pinfo) agenda () chart
exps = nubsort $ do
cat <- fromMaybe [] (Map.lookup start (startCats pinfo))
@@ -138,19 +147,23 @@ process mbt fn !seqs !funs (item@(Active j ppos funid seqid args key0):items) ac
Nothing -> process mbt fn seqs funs items3 acc chart{active=insertAC key (Set.singleton item) (active chart)}
Just set | Set.member item set -> process mbt fn seqs funs items acc chart
| otherwise -> process mbt fn seqs funs items2 acc chart{active=insertAC key (Set.insert item set) (active chart)}
FSymTok tok -> let !acc' = fn tok (Active j (ppos+1) funid seqid args key0) acc
FSymKS toks -> let !acc' = fn toks (Active j (ppos+1) funid seqid args key0) acc
in process mbt fn seqs funs items acc' chart
FSymKP strs vars
-> let !acc' = foldl (\acc toks -> fn toks (Active j (ppos+1) funid seqid args key0) acc) acc
(strs:[strs' | Alt strs' _ <- vars])
in process mbt fn seqs funs items acc' chart
FSymLit d r -> let !fid = args !! d
in case [t | FConst _ t <- maybe [] Set.toList (IntMap.lookup fid (forest chart))] of
(tok:_) -> let !acc' = fn (KS tok) (Active j (ppos+1) funid seqid args key0) acc
in process mbt fn seqs funs items acc' chart
[] -> case litCatMatch fid mbt of
Just (t,lit) -> let fid' = nextId chart
!acc' = fn (KS t) (Active j (ppos+1) funid seqid (updateAt d fid' args) key0) acc
in process mbt fn seqs funs items acc' chart{forest=IntMap.insert fid' (Set.singleton (FConst lit t)) (forest chart)
,nextId=nextId chart+1
}
Nothing -> process mbt fn seqs funs items acc chart
in case [ts | FConst _ ts <- maybe [] Set.toList (IntMap.lookup fid (forest chart))] of
(toks:_) -> let !acc' = fn toks (Active j (ppos+1) funid seqid args key0) acc
in process mbt fn seqs funs items acc' chart
[] -> case litCatMatch fid mbt of
Just (toks,lit) -> let fid' = nextId chart
!acc' = fn toks (Active j (ppos+1) funid seqid (updateAt d fid' args) key0) acc
in process mbt fn seqs funs items acc' chart{forest=IntMap.insert fid' (Set.singleton (FConst lit toks)) (forest chart)
,nextId=nextId chart+1
}
Nothing -> process mbt fn seqs funs items acc chart
| otherwise =
case lookupPC (mkPK key0 j) (passive chart) of
Nothing -> let fid = nextId chart
@@ -181,12 +194,12 @@ updateAt :: Int -> a -> [a] -> [a]
updateAt nr x xs = [if i == nr then x else y | (i,y) <- zip [0..] xs]
litCatMatch fcat (Just t)
| fcat == fcatString = Just (t,Lit (LStr t))
| fcat == fcatInt = case reads t of {[(n,"")] -> Just (t,Lit (LInt n));
| fcat == fcatString = Just ([t],Lit (LStr t))
| fcat == fcatInt = case reads t of {[(n,"")] -> Just ([t],Lit (LInt n));
_ -> Nothing }
| fcat == fcatFloat = case reads t of {[(d,"")] -> Just (t,Lit (LFlt d));
| fcat == fcatFloat = case reads t of {[(d,"")] -> Just ([t],Lit (LFlt d));
_ -> Nothing }
| fcat == fcatVar = Just (t,Var (mkCId t))
| fcat == fcatVar = Just ([t],Var (mkCId t))
litCatMatch _ _ = Nothing
@@ -250,7 +263,7 @@ insertPC key fcat chart = Map.insert key fcat chart
-- Forest
----------------------------------------------------------------
foldForest :: (FunId -> [FCat] -> b -> b) -> (Tree -> String -> b -> b) -> b -> FCat -> IntMap.IntMap (Set.Set Production) -> b
foldForest :: (FunId -> [FCat] -> b -> b) -> (Tree -> [String] -> b -> b) -> b -> FCat -> IntMap.IntMap (Set.Set Production) -> b
foldForest f g b fcat forest =
case IntMap.lookup fcat forest of
Nothing -> b
@@ -258,7 +271,7 @@ foldForest f g b fcat forest =
where
foldProd (FCoerce fcat) b = foldForest f g b fcat forest
foldProd (FApply funid args) b = f funid args b
foldProd (FConst const s) b = g const s b
foldProd (FConst const toks) b = g const toks b
----------------------------------------------------------------
@@ -267,7 +280,7 @@ foldForest f g b fcat forest =
-- | An abstract data type whose values represent
-- the current state in an incremental parser.
data ParseState = State ParserInfo Chart (Set.Set Active)
data ParseState = State ParserInfo Chart (TMap.TrieMap String (Set.Set Active))
data Chart
= Chart