mirror of
https://github.com/GrammaticalFramework/gf-core.git
synced 2026-05-07 02:02:51 -06:00
completely phrase based parser and support for pre {} in PMCFG
This commit is contained in:
@@ -156,14 +156,14 @@ instance Binary FFun where
|
||||
instance Binary FSymbol where
|
||||
put (FSymCat n l) = putWord8 0 >> put (n,l)
|
||||
put (FSymLit n l) = putWord8 1 >> put (n,l)
|
||||
put (FSymTok (KS s)) = putWord8 2 >> put s
|
||||
put (FSymTok (KP d vs)) = putWord8 3 >> put (d,vs)
|
||||
put (FSymKS ts) = putWord8 2 >> put ts
|
||||
put (FSymKP d vs) = putWord8 3 >> put (d,vs)
|
||||
get = do tag <- getWord8
|
||||
case tag of
|
||||
0 -> liftM2 FSymCat get get
|
||||
1 -> liftM2 FSymLit get get
|
||||
2 -> liftM (FSymTok . KS) get
|
||||
3 -> liftM2 (\d vs -> FSymTok (KP d vs)) get get
|
||||
2 -> liftM FSymKS get
|
||||
3 -> liftM2 (\d vs -> FSymKP d vs) get get
|
||||
_ -> decodingError
|
||||
|
||||
instance Binary Production where
|
||||
|
||||
@@ -35,8 +35,8 @@ data ParserInfoEx
|
||||
|
||||
getLeftCornerTok pinfo (FFun _ _ lins)
|
||||
| inRange (bounds syms) 0 = case syms ! 0 of
|
||||
FSymTok (KS tok) -> [tok]
|
||||
_ -> []
|
||||
FSymKS [tok] -> [tok]
|
||||
_ -> []
|
||||
| otherwise = []
|
||||
where
|
||||
syms = (sequences pinfo) ! (lins ! 0)
|
||||
@@ -73,4 +73,4 @@ buildParserInfo pinfo =
|
||||
| (cat,set) <- IntMap.toList (productions pinfo)
|
||||
, (FApply ruleid args) <- Set.toList set
|
||||
, tok <- getLeftCornerTok pinfo ((functions pinfo) ! ruleid) ]
|
||||
grammartoks = nubsort [t | lin <- elems (sequences pinfo), FSymTok (KS t) <- elems lin]
|
||||
grammartoks = nubsort [t | lin <- elems (sequences pinfo), FSymKS [t] <- elems lin]
|
||||
|
||||
@@ -53,7 +53,10 @@ data Term =
|
||||
| TM String
|
||||
deriving (Eq,Ord,Show)
|
||||
|
||||
|
||||
data Tokn =
|
||||
KS String
|
||||
| KP [String] [Alternative]
|
||||
deriving (Eq,Ord,Show)
|
||||
|
||||
|
||||
-- merge two GFCCs; fails is differens absnames; priority to second arg
|
||||
|
||||
@@ -16,24 +16,20 @@ type FPointPos = Int
|
||||
data FSymbol
|
||||
= FSymCat {-# UNPACK #-} !Int {-# UNPACK #-} !FIndex
|
||||
| FSymLit {-# UNPACK #-} !Int {-# UNPACK #-} !FIndex
|
||||
| FSymTok Tokn
|
||||
| FSymKS [String]
|
||||
| FSymKP [String] [Alternative]
|
||||
deriving (Eq,Ord,Show)
|
||||
type Profile = [Int]
|
||||
data Production
|
||||
= FApply {-# UNPACK #-} !FunId [FCat]
|
||||
| FCoerce {-# UNPACK #-} !FCat
|
||||
| FConst Tree String
|
||||
| FConst Tree [String]
|
||||
deriving (Eq,Ord,Show)
|
||||
data FFun = FFun CId [Profile] {-# UNPACK #-} !(UArray FIndex SeqId) deriving (Eq,Ord,Show)
|
||||
type FSeq = Array FPointPos FSymbol
|
||||
type FunId = Int
|
||||
type SeqId = Int
|
||||
|
||||
data Tokn =
|
||||
KS String
|
||||
| KP [String] [Alternative]
|
||||
deriving (Eq,Ord,Show)
|
||||
|
||||
data Alternative =
|
||||
Alt [String] [String]
|
||||
deriving (Eq,Ord,Show)
|
||||
@@ -70,8 +66,8 @@ ppProduction (fcat,FApply funid args) =
|
||||
ppFCat fcat <+> text "->" <+> ppFunId funid <> brackets (hcat (punctuate comma (map ppFCat args)))
|
||||
ppProduction (fcat,FCoerce arg) =
|
||||
ppFCat fcat <+> text "->" <+> char '_' <> brackets (ppFCat arg)
|
||||
ppProduction (fcat,FConst _ s) =
|
||||
ppFCat fcat <+> text "->" <+> ppStr s
|
||||
ppProduction (fcat,FConst _ ss) =
|
||||
ppFCat fcat <+> text "->" <+> ppStrs ss
|
||||
|
||||
ppFun (funid,FFun fun _ arr) =
|
||||
ppFunId funid <+> text ":=" <+> parens (hcat (punctuate comma (map ppSeqId (elems arr)))) <+> brackets (text (prCId fun))
|
||||
@@ -84,14 +80,12 @@ ppStartCat (id,fcats) =
|
||||
|
||||
ppSymbol (FSymCat d r) = char '<' <> int d <> comma <> int r <> char '>'
|
||||
ppSymbol (FSymLit d r) = char '<' <> int d <> comma <> int r <> char '>'
|
||||
ppSymbol (FSymTok t) = ppTokn t
|
||||
ppSymbol (FSymKS ts) = ppStrs ts
|
||||
ppSymbol (FSymKP ts alts) = text "pre" <+> braces (hsep (punctuate semi (ppStrs ts : map ppAlt alts)))
|
||||
|
||||
ppTokn (KS t) = ppStr t
|
||||
ppTokn (KP ts alts) = text "pre" <+> braces (hsep (punctuate semi (hsep (map ppStr ts) : map ppAlt alts)))
|
||||
ppAlt (Alt ts ps) = ppStrs ts <+> char '/' <+> hsep (map (doubleQuotes . text) ps)
|
||||
|
||||
ppAlt (Alt ts ps) = hsep (map ppStr ts) <+> char '/' <+> hsep (map ppStr ps)
|
||||
|
||||
ppStr s = doubleQuotes (text s)
|
||||
ppStrs ss = doubleQuotes (hsep (map text ss))
|
||||
|
||||
ppFCat fcat
|
||||
| fcat == fcatString = text "String"
|
||||
|
||||
@@ -84,7 +84,7 @@ process strategy pinfo pinfoex toks (item:items) chart = process strategy pinfo
|
||||
found' -> let items = do rng <- concatRange rng (found' !! r)
|
||||
return (Active found rng lbl (ppos+1) node args cat)
|
||||
in process strategy pinfo pinfoex toks items chart
|
||||
FSymTok (KS tok)
|
||||
FSymKS [tok]
|
||||
-> let items = do t_rng <- inputToken toks ? tok
|
||||
rng' <- concatRange rng t_rng
|
||||
return (Active found rng' lbl (ppos+1) node args cat)
|
||||
|
||||
@@ -13,6 +13,7 @@ import Data.Array.Base (unsafeAt)
|
||||
import Data.List (isPrefixOf, foldl')
|
||||
import Data.Maybe (fromMaybe, maybe)
|
||||
import qualified Data.Map as Map
|
||||
import qualified GF.Data.TrieMap as TMap
|
||||
import qualified Data.IntMap as IntMap
|
||||
import qualified Data.Set as Set
|
||||
import Control.Monad
|
||||
@@ -37,26 +38,29 @@ initState pinfo (DTyp _ start _) =
|
||||
|
||||
in State pinfo
|
||||
(Chart emptyAC [] emptyPC (productions pinfo) (totalCats pinfo) 0)
|
||||
(Set.fromList items)
|
||||
(TMap.singleton [] (Set.fromList items))
|
||||
|
||||
-- | From the current state and the next token
|
||||
-- 'nextState' computes a new state where the token
|
||||
-- is consumed and the current position shifted by one.
|
||||
nextState :: ParseState -> String -> Maybe ParseState
|
||||
nextState (State pinfo chart items) t =
|
||||
let (items1,chart1) = process (Just t) add (sequences pinfo) (functions pinfo) (Set.toList items) Set.empty chart
|
||||
let (mb_agenda,map_items) = TMap.decompose items
|
||||
agenda = maybe [] Set.toList mb_agenda
|
||||
acc = fromMaybe TMap.empty (Map.lookup t map_items)
|
||||
(acc1,chart1) = process (Just t) add (sequences pinfo) (functions pinfo) agenda acc chart
|
||||
chart2 = chart1{ active =emptyAC
|
||||
, actives=active chart1 : actives chart1
|
||||
, passive=emptyPC
|
||||
, offset =offset chart1+1
|
||||
}
|
||||
in if Set.null items1
|
||||
in if TMap.null acc1
|
||||
then Nothing
|
||||
else Just (State pinfo chart2 items1)
|
||||
else Just (State pinfo chart2 acc1)
|
||||
where
|
||||
add (KS tok) item set
|
||||
| tok == t = Set.insert item set
|
||||
| otherwise = set
|
||||
add (tok:toks) item acc
|
||||
| tok == t = TMap.insertWith Set.union toks (Set.singleton item) acc
|
||||
add _ item acc = acc
|
||||
|
||||
-- | If the next token is not known but only its prefix (possible empty prefix)
|
||||
-- then the 'getCompletions' function can be used to calculate the possible
|
||||
@@ -64,22 +68,27 @@ nextState (State pinfo chart items) t =
|
||||
-- the GF interpreter.
|
||||
getCompletions :: ParseState -> String -> Map.Map String ParseState
|
||||
getCompletions (State pinfo chart items) w =
|
||||
let (map',chart1) = process Nothing add (sequences pinfo) (functions pinfo) (Set.toList items) Map.empty chart
|
||||
let (mb_agenda,map_items) = TMap.decompose items
|
||||
agenda = maybe [] Set.toList mb_agenda
|
||||
acc = Map.filterWithKey (\tok _ -> isPrefixOf w tok) map_items
|
||||
(acc',chart1) = process Nothing add (sequences pinfo) (functions pinfo) agenda acc chart
|
||||
chart2 = chart1{ active =emptyAC
|
||||
, actives=active chart1 : actives chart1
|
||||
, passive=emptyPC
|
||||
, offset =offset chart1+1
|
||||
}
|
||||
in fmap (State pinfo chart2) map'
|
||||
in fmap (State pinfo chart2) acc'
|
||||
where
|
||||
add (KS tok) item map
|
||||
| isPrefixOf w tok = Map.insertWith Set.union tok (Set.singleton item) map
|
||||
| otherwise = map
|
||||
add (tok:toks) item acc
|
||||
| isPrefixOf w tok = Map.insertWith (TMap.unionWith Set.union) tok (TMap.singleton toks (Set.singleton item)) acc
|
||||
add _ item acc = acc
|
||||
|
||||
extractExps :: ParseState -> Type -> [Tree]
|
||||
extractExps (State pinfo chart items) (DTyp _ start _) = exps
|
||||
where
|
||||
(_,st) = process Nothing (\_ _ -> id) (sequences pinfo) (functions pinfo) (Set.toList items) () chart
|
||||
(mb_agenda,acc) = TMap.decompose items
|
||||
agenda = maybe [] Set.toList mb_agenda
|
||||
(_,st) = process Nothing (\_ _ -> id) (sequences pinfo) (functions pinfo) agenda () chart
|
||||
|
||||
exps = nubsort $ do
|
||||
cat <- fromMaybe [] (Map.lookup start (startCats pinfo))
|
||||
@@ -138,19 +147,23 @@ process mbt fn !seqs !funs (item@(Active j ppos funid seqid args key0):items) ac
|
||||
Nothing -> process mbt fn seqs funs items3 acc chart{active=insertAC key (Set.singleton item) (active chart)}
|
||||
Just set | Set.member item set -> process mbt fn seqs funs items acc chart
|
||||
| otherwise -> process mbt fn seqs funs items2 acc chart{active=insertAC key (Set.insert item set) (active chart)}
|
||||
FSymTok tok -> let !acc' = fn tok (Active j (ppos+1) funid seqid args key0) acc
|
||||
FSymKS toks -> let !acc' = fn toks (Active j (ppos+1) funid seqid args key0) acc
|
||||
in process mbt fn seqs funs items acc' chart
|
||||
FSymKP strs vars
|
||||
-> let !acc' = foldl (\acc toks -> fn toks (Active j (ppos+1) funid seqid args key0) acc) acc
|
||||
(strs:[strs' | Alt strs' _ <- vars])
|
||||
in process mbt fn seqs funs items acc' chart
|
||||
FSymLit d r -> let !fid = args !! d
|
||||
in case [t | FConst _ t <- maybe [] Set.toList (IntMap.lookup fid (forest chart))] of
|
||||
(tok:_) -> let !acc' = fn (KS tok) (Active j (ppos+1) funid seqid args key0) acc
|
||||
in process mbt fn seqs funs items acc' chart
|
||||
[] -> case litCatMatch fid mbt of
|
||||
Just (t,lit) -> let fid' = nextId chart
|
||||
!acc' = fn (KS t) (Active j (ppos+1) funid seqid (updateAt d fid' args) key0) acc
|
||||
in process mbt fn seqs funs items acc' chart{forest=IntMap.insert fid' (Set.singleton (FConst lit t)) (forest chart)
|
||||
,nextId=nextId chart+1
|
||||
}
|
||||
Nothing -> process mbt fn seqs funs items acc chart
|
||||
in case [ts | FConst _ ts <- maybe [] Set.toList (IntMap.lookup fid (forest chart))] of
|
||||
(toks:_) -> let !acc' = fn toks (Active j (ppos+1) funid seqid args key0) acc
|
||||
in process mbt fn seqs funs items acc' chart
|
||||
[] -> case litCatMatch fid mbt of
|
||||
Just (toks,lit) -> let fid' = nextId chart
|
||||
!acc' = fn toks (Active j (ppos+1) funid seqid (updateAt d fid' args) key0) acc
|
||||
in process mbt fn seqs funs items acc' chart{forest=IntMap.insert fid' (Set.singleton (FConst lit toks)) (forest chart)
|
||||
,nextId=nextId chart+1
|
||||
}
|
||||
Nothing -> process mbt fn seqs funs items acc chart
|
||||
| otherwise =
|
||||
case lookupPC (mkPK key0 j) (passive chart) of
|
||||
Nothing -> let fid = nextId chart
|
||||
@@ -181,12 +194,12 @@ updateAt :: Int -> a -> [a] -> [a]
|
||||
updateAt nr x xs = [if i == nr then x else y | (i,y) <- zip [0..] xs]
|
||||
|
||||
litCatMatch fcat (Just t)
|
||||
| fcat == fcatString = Just (t,Lit (LStr t))
|
||||
| fcat == fcatInt = case reads t of {[(n,"")] -> Just (t,Lit (LInt n));
|
||||
| fcat == fcatString = Just ([t],Lit (LStr t))
|
||||
| fcat == fcatInt = case reads t of {[(n,"")] -> Just ([t],Lit (LInt n));
|
||||
_ -> Nothing }
|
||||
| fcat == fcatFloat = case reads t of {[(d,"")] -> Just (t,Lit (LFlt d));
|
||||
| fcat == fcatFloat = case reads t of {[(d,"")] -> Just ([t],Lit (LFlt d));
|
||||
_ -> Nothing }
|
||||
| fcat == fcatVar = Just (t,Var (mkCId t))
|
||||
| fcat == fcatVar = Just ([t],Var (mkCId t))
|
||||
litCatMatch _ _ = Nothing
|
||||
|
||||
|
||||
@@ -250,7 +263,7 @@ insertPC key fcat chart = Map.insert key fcat chart
|
||||
-- Forest
|
||||
----------------------------------------------------------------
|
||||
|
||||
foldForest :: (FunId -> [FCat] -> b -> b) -> (Tree -> String -> b -> b) -> b -> FCat -> IntMap.IntMap (Set.Set Production) -> b
|
||||
foldForest :: (FunId -> [FCat] -> b -> b) -> (Tree -> [String] -> b -> b) -> b -> FCat -> IntMap.IntMap (Set.Set Production) -> b
|
||||
foldForest f g b fcat forest =
|
||||
case IntMap.lookup fcat forest of
|
||||
Nothing -> b
|
||||
@@ -258,7 +271,7 @@ foldForest f g b fcat forest =
|
||||
where
|
||||
foldProd (FCoerce fcat) b = foldForest f g b fcat forest
|
||||
foldProd (FApply funid args) b = f funid args b
|
||||
foldProd (FConst const s) b = g const s b
|
||||
foldProd (FConst const toks) b = g const toks b
|
||||
|
||||
|
||||
----------------------------------------------------------------
|
||||
@@ -267,7 +280,7 @@ foldForest f g b fcat forest =
|
||||
|
||||
-- | An abstract data type whose values represent
|
||||
-- the current state in an incremental parser.
|
||||
data ParseState = State ParserInfo Chart (Set.Set Active)
|
||||
data ParseState = State ParserInfo Chart (TMap.TrieMap String (Set.Set Active))
|
||||
|
||||
data Chart
|
||||
= Chart
|
||||
|
||||
Reference in New Issue
Block a user