mirror of
https://github.com/GrammaticalFramework/gf-core.git
synced 2026-04-23 03:32:51 -06:00
store the label names in PMCFG
This commit is contained in:
@@ -37,7 +37,7 @@ import Control.Exception
|
|||||||
|
|
||||||
convertConcrete :: Options -> Abstr -> CId -> Concr -> IO ParserInfo
|
convertConcrete :: Options -> Abstr -> CId -> Concr -> IO ParserInfo
|
||||||
convertConcrete opts abs lang cnc = do
|
convertConcrete opts abs lang cnc = do
|
||||||
let env0 = emptyGrammarEnv cnc_defs cat_defs
|
let env0 = emptyGrammarEnv cnc_defs cat_defs params
|
||||||
when (flag optProf opts) $ do
|
when (flag optProf opts) $ do
|
||||||
profileGrammar lang cnc_defs env0 pfrules
|
profileGrammar lang cnc_defs env0 pfrules
|
||||||
let env1 = expandHOAS abs_defs cnc_defs cat_defs lin_defs env0
|
let env1 = expandHOAS abs_defs cnc_defs cat_defs lin_defs env0
|
||||||
@@ -47,6 +47,7 @@ convertConcrete opts abs lang cnc = do
|
|||||||
abs_defs = Map.assocs (funs abs)
|
abs_defs = Map.assocs (funs abs)
|
||||||
cnc_defs = Map.union (opers cnc) (lins cnc) -- "union big+small most efficient"
|
cnc_defs = Map.union (opers cnc) (lins cnc) -- "union big+small most efficient"
|
||||||
cat_defs = Map.insert cidVar (S []) (lincats cnc)
|
cat_defs = Map.insert cidVar (S []) (lincats cnc)
|
||||||
|
params = paramlincats cnc
|
||||||
lin_defs = lindefs cnc
|
lin_defs = lindefs cnc
|
||||||
|
|
||||||
pfrules = [
|
pfrules = [
|
||||||
@@ -72,7 +73,7 @@ profileGrammar lang cnc_defs (GrammarEnv last_id catSet seqSet funSet crcSet pro
|
|||||||
mapM_ profileRule pfrules
|
mapM_ profileRule pfrules
|
||||||
hPutStrLn stderr "--------------------------------"
|
hPutStrLn stderr "--------------------------------"
|
||||||
where
|
where
|
||||||
profileCat (cid,(fcat1,fcat2,_)) = do
|
profileCat (cid,(fcat1,fcat2,_,_)) = do
|
||||||
hPutStrLn stderr (lformat 23 cid ++ rformat 9 (fcat2-fcat1+1))
|
hPutStrLn stderr (lformat 23 cid ++ rformat 9 (fcat2-fcat1+1))
|
||||||
|
|
||||||
profileRule (PFRule fun args res ctypes ctype term) = do
|
profileRule (PFRule fun args res ctypes ctype term) = do
|
||||||
@@ -340,21 +341,21 @@ evalTerm cnc_defs path x = error ("evalTerm ("++show x++")")
|
|||||||
-- GrammarEnv
|
-- GrammarEnv
|
||||||
|
|
||||||
data GrammarEnv = GrammarEnv {-# UNPACK #-} !Int CatSet SeqSet FunSet CoerceSet (IntMap.IntMap (Set.Set Production))
|
data GrammarEnv = GrammarEnv {-# UNPACK #-} !Int CatSet SeqSet FunSet CoerceSet (IntMap.IntMap (Set.Set Production))
|
||||||
type CatSet = IntMap.IntMap (Map.Map CId (FCat,FCat,[Int]))
|
type CatSet = IntMap.IntMap (Map.Map CId (FCat,FCat,[Int],Array FIndex String))
|
||||||
type SeqSet = Map.Map FSeq SeqId
|
type SeqSet = Map.Map FSeq SeqId
|
||||||
type FunSet = Map.Map FFun FunId
|
type FunSet = Map.Map FFun FunId
|
||||||
type CoerceSet= Map.Map [FCat] FCat
|
type CoerceSet= Map.Map [FCat] FCat
|
||||||
|
|
||||||
emptyGrammarEnv cnc_defs lincats =
|
emptyGrammarEnv cnc_defs lincats params =
|
||||||
let (last_id,catSet) = Map.mapAccumWithKey computeCatRange 0 lincats
|
let (last_id,catSet) = Map.mapAccumWithKey computeCatRange 0 lincats
|
||||||
in GrammarEnv last_id (IntMap.singleton 0 catSet) Map.empty Map.empty Map.empty IntMap.empty
|
in GrammarEnv last_id (IntMap.singleton 0 catSet) Map.empty Map.empty Map.empty IntMap.empty
|
||||||
where
|
where
|
||||||
computeCatRange index cat ctype
|
computeCatRange index cat ctype
|
||||||
| cat == cidString = (index, (fcatString,fcatString,[]))
|
| cat == cidString = (index, (fcatString,fcatString,[],listArray (0,0) ["s"]))
|
||||||
| cat == cidInt = (index, (fcatInt, fcatInt, []))
|
| cat == cidInt = (index, (fcatInt, fcatInt, [],listArray (0,0) ["s"]))
|
||||||
| cat == cidFloat = (index, (fcatFloat, fcatFloat, []))
|
| cat == cidFloat = (index, (fcatFloat, fcatFloat, [],listArray (0,0) ["s"]))
|
||||||
| cat == cidVar = (index, (fcatVar, fcatVar, []))
|
| cat == cidVar = (index, (fcatVar, fcatVar, [],listArray (0,0) ["s"]))
|
||||||
| otherwise = (index+size,(index,index+size-1,poly))
|
| otherwise = (index+size,(index,index+size-1, poly,maybe (error "missing params") (mkArray . getLabels []) (Map.lookup cat params)))
|
||||||
where
|
where
|
||||||
(size,poly) = getMultipliers 1 [] ctype
|
(size,poly) = getMultipliers 1 [] ctype
|
||||||
|
|
||||||
@@ -365,6 +366,12 @@ emptyGrammarEnv cnc_defs lincats =
|
|||||||
Just term -> getMultipliers m ms term
|
Just term -> getMultipliers m ms term
|
||||||
Nothing -> error ("unknown identifier: "++showCId id)
|
Nothing -> error ("unknown identifier: "++showCId id)
|
||||||
|
|
||||||
|
getLabels ls (R record) = concat [getLabels (l:ls) t | P (K (KS l)) t <- record]
|
||||||
|
getLabels ls (S [FV ps,t]) = concat [getLabels (l:ls) t | K (KS l) <- ps]
|
||||||
|
getLabels ls (S []) = [unwords (reverse ls)]
|
||||||
|
getLabels ls (FV _) = []
|
||||||
|
getLabels _ t = error (show t)
|
||||||
|
|
||||||
expandHOAS abs_defs cnc_defs lincats lindefs env =
|
expandHOAS abs_defs cnc_defs lincats lindefs env =
|
||||||
foldl add_varFun (foldl (\env ncat -> add_hoFun (add_hoCat env ncat) ncat) env hoTypes) hoCats
|
foldl add_varFun (foldl (\env ncat -> add_hoFun (add_hoCat env ncat) ncat) env hoTypes) hoCats
|
||||||
where
|
where
|
||||||
@@ -381,10 +388,10 @@ expandHOAS abs_defs cnc_defs lincats lindefs env =
|
|||||||
-- add a range of PMCFG categories for each GF high-order category
|
-- add a range of PMCFG categories for each GF high-order category
|
||||||
add_hoCat env@(GrammarEnv last_id catSet seqSet funSet crcSet prodSet) (n,cat) =
|
add_hoCat env@(GrammarEnv last_id catSet seqSet funSet crcSet prodSet) (n,cat) =
|
||||||
case IntMap.lookup 0 catSet >>= Map.lookup cat of
|
case IntMap.lookup 0 catSet >>= Map.lookup cat of
|
||||||
Just (start,end,ms) -> let !catSet' = IntMap.insertWith Map.union n (Map.singleton cat (last_id,last_id+(end-start),ms)) catSet
|
Just (start,end,ms,lbls) -> let !catSet' = IntMap.insertWith Map.union n (Map.singleton cat (last_id,last_id+(end-start),ms,lbls)) catSet
|
||||||
!last_id' = last_id+(end-start)+1
|
!last_id' = last_id+(end-start)+1
|
||||||
in (GrammarEnv last_id' catSet' seqSet funSet crcSet prodSet)
|
in (GrammarEnv last_id' catSet' seqSet funSet crcSet prodSet)
|
||||||
Nothing -> env
|
Nothing -> env
|
||||||
|
|
||||||
-- add one PMCFG function for each high-order type: _B : Cat -> Var -> ... -> Var -> HoCat
|
-- add one PMCFG function for each high-order type: _B : Cat -> Var -> ... -> Var -> HoCat
|
||||||
add_hoFun env (n,cat) =
|
add_hoFun env (n,cat) =
|
||||||
@@ -460,11 +467,11 @@ getParserInfo :: GrammarEnv -> ParserInfo
|
|||||||
getParserInfo (GrammarEnv last_id catSet seqSet funSet crcSet prodSet) =
|
getParserInfo (GrammarEnv last_id catSet seqSet funSet crcSet prodSet) =
|
||||||
ParserInfo { functions = mkArray funSet
|
ParserInfo { functions = mkArray funSet
|
||||||
, sequences = mkArray seqSet
|
, sequences = mkArray seqSet
|
||||||
, productions0= productions0
|
, productions0= productions0
|
||||||
, productions = filterProductions productions0
|
, productions = filterProductions productions0
|
||||||
, startCats = maybe Map.empty (Map.map (\(start,end,_) -> (start,end))) (IntMap.lookup 0 catSet)
|
, startCats = maybe Map.empty (Map.map (\(start,end,_,lbls) -> (start,end,lbls))) (IntMap.lookup 0 catSet)
|
||||||
, totalCats = last_id+1
|
, totalCats = last_id+1
|
||||||
}
|
}
|
||||||
where
|
where
|
||||||
mkArray map = array (0,Map.size map-1) [(v,k) | (k,v) <- Map.toList map]
|
mkArray map = array (0,Map.size map-1) [(v,k) | (k,v) <- Map.toList map]
|
||||||
|
|
||||||
@@ -474,7 +481,7 @@ getParserInfo (GrammarEnv last_id catSet seqSet funSet crcSet prodSet) =
|
|||||||
getFCats :: GrammarEnv -> ProtoFCat -> [FCat]
|
getFCats :: GrammarEnv -> ProtoFCat -> [FCat]
|
||||||
getFCats (GrammarEnv last_id catSet seqSet funSet crcSet prodSet) (PFCat n cat rcs tcs) =
|
getFCats (GrammarEnv last_id catSet seqSet funSet crcSet prodSet) (PFCat n cat rcs tcs) =
|
||||||
case IntMap.lookup n catSet >>= Map.lookup cat of
|
case IntMap.lookup n catSet >>= Map.lookup cat of
|
||||||
Just (start,end,ms) -> reverse (solutions (variants ms tcs start) ())
|
Just (start,end,ms,_) -> reverse (solutions (variants ms tcs start) ())
|
||||||
where
|
where
|
||||||
variants _ [] fcat = return fcat
|
variants _ [] fcat = return fcat
|
||||||
variants (m:ms) ((_,indices) : tcs) fcat = do index <- member indices
|
variants (m:ms) ((_,indices) : tcs) fcat = do index <- member indices
|
||||||
|
|||||||
@@ -96,7 +96,7 @@ parser2js p = [new "Parser" [JS.EObj $ [JS.Prop (JS.IntPropName cat) (JS.EArray
|
|||||||
JS.EObj $ map cats (Map.assocs (startCats p)),
|
JS.EObj $ map cats (Map.assocs (startCats p)),
|
||||||
JS.EInt (totalCats p)]]
|
JS.EInt (totalCats p)]]
|
||||||
where
|
where
|
||||||
cats (c,(start,end)) = JS.Prop (JS.IdentPropName (JS.Ident (showCId c))) (JS.EObj [JS.Prop (JS.IdentPropName (JS.Ident "s")) (JS.EInt start)
|
cats (c,(start,end,_)) = JS.Prop (JS.IdentPropName (JS.Ident (showCId c))) (JS.EObj [JS.Prop (JS.IdentPropName (JS.Ident "s")) (JS.EInt start)
|
||||||
,JS.Prop (JS.IdentPropName (JS.Ident "e")) (JS.EInt end)])
|
,JS.Prop (JS.IdentPropName (JS.Ident "e")) (JS.EInt end)])
|
||||||
|
|
||||||
frule2js :: Production -> JS.Expr
|
frule2js :: Production -> JS.Expr
|
||||||
|
|||||||
@@ -42,8 +42,8 @@ pgfToCFG pgf lang = mkCFG (showCId (lookStartCat pgf)) extCats (startRules ++ co
|
|||||||
|
|
||||||
fcatCats :: Map FCat Cat
|
fcatCats :: Map FCat Cat
|
||||||
fcatCats = Map.fromList [(fc, showCId c ++ "_" ++ show i)
|
fcatCats = Map.fromList [(fc, showCId c ++ "_" ++ show i)
|
||||||
| (c,fcs) <- Map.toList (startCats pinfo),
|
| (c,(s,e,lbls)) <- Map.toList (startCats pinfo),
|
||||||
(fc,i) <- zip (range fcs) [1..]]
|
(fc,i) <- zip (range (s,e)) [1..]]
|
||||||
|
|
||||||
fcatCat :: FCat -> Cat
|
fcatCat :: FCat -> Cat
|
||||||
fcatCat c = Map.findWithDefault ("Unknown_" ++ show c) c fcatCats
|
fcatCat c = Map.findWithDefault ("Unknown_" ++ show c) c fcatCats
|
||||||
@@ -69,8 +69,8 @@ pgfToCFG pgf lang = mkCFG (showCId (lookStartCat pgf)) extCats (startRules ++ co
|
|||||||
|
|
||||||
startRules :: [CFRule]
|
startRules :: [CFRule]
|
||||||
startRules = [CFRule (showCId c) [NonTerminal (fcatToCat fc r)] (CFRes 0)
|
startRules = [CFRule (showCId c) [NonTerminal (fcatToCat fc r)] (CFRes 0)
|
||||||
| (c,fcs) <- Map.toList (startCats pinfo),
|
| (c,(s,e,lbls)) <- Map.toList (startCats pinfo),
|
||||||
fc <- range fcs, not (isLiteralFCat fc),
|
fc <- range (s,e), not (isLiteralFCat fc),
|
||||||
r <- [0..catLinArity fc-1]]
|
r <- [0..catLinArity fc-1]]
|
||||||
|
|
||||||
fruleToCFRule :: (FCat,Production) -> [CFRule]
|
fruleToCFRule :: (FCat,Production) -> [CFRule]
|
||||||
|
|||||||
@@ -36,11 +36,11 @@ data Alternative =
|
|||||||
data ParserInfo
|
data ParserInfo
|
||||||
= ParserInfo { functions :: Array FunId FFun
|
= ParserInfo { functions :: Array FunId FFun
|
||||||
, sequences :: Array SeqId FSeq
|
, sequences :: Array SeqId FSeq
|
||||||
, productions0:: IntMap.IntMap (Set.Set Production) -- this are the original productions as they are loaded from the PGF file
|
, productions0:: IntMap.IntMap (Set.Set Production) -- this are the original productions as they are loaded from the PGF file
|
||||||
, productions :: IntMap.IntMap (Set.Set Production) -- this are the productions after the filtering for useless productions
|
, productions :: IntMap.IntMap (Set.Set Production) -- this are the productions after the filtering for useless productions
|
||||||
, startCats :: Map.Map CId (FCat,FCat)
|
, startCats :: Map.Map CId (FCat,FCat,Array FIndex String) -- for every category - start/end FCat and a list of label names
|
||||||
, totalCats :: {-# UNPACK #-} !FCat
|
, totalCats :: {-# UNPACK #-} !FCat
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
fcatString, fcatInt, fcatFloat, fcatVar :: Int
|
fcatString, fcatInt, fcatFloat, fcatVar :: Int
|
||||||
@@ -76,8 +76,9 @@ ppFun (funid,FFun fun arr) =
|
|||||||
ppSeq (seqid,seq) =
|
ppSeq (seqid,seq) =
|
||||||
ppSeqId seqid <+> text ":=" <+> hsep (map ppSymbol (elems seq))
|
ppSeqId seqid <+> text ":=" <+> hsep (map ppSymbol (elems seq))
|
||||||
|
|
||||||
ppStartCat (id,(start,end)) =
|
ppStartCat (id,(start,end,labels)) =
|
||||||
ppCId id <+> text ":=" <+> brackets (ppFCat start <+> text ".." <+> ppFCat end)
|
ppCId id <+> text ":=" <+> (text "range " <+> brackets (ppFCat start <+> text ".." <+> ppFCat end) $$
|
||||||
|
text "labels" <+> brackets (vcat (map (text . show) (elems labels))))
|
||||||
|
|
||||||
ppSymbol (FSymCat d r) = char '<' <> int d <> comma <> int r <> char '>'
|
ppSymbol (FSymCat d r) = char '<' <> int d <> comma <> int r <> char '>'
|
||||||
ppSymbol (FSymLit d r) = char '<' <> int d <> comma <> int r <> char '>'
|
ppSymbol (FSymLit d r) = char '<' <> int d <> comma <> int r <> char '>'
|
||||||
|
|||||||
@@ -56,14 +56,15 @@ parseWithRecovery pgf lang typ open_typs toks = accept (initState pgf lang typ)
|
|||||||
-- startup category.
|
-- startup category.
|
||||||
initState :: PGF -> Language -> Type -> ParseState
|
initState :: PGF -> Language -> Type -> ParseState
|
||||||
initState pgf lang (DTyp _ start _) =
|
initState pgf lang (DTyp _ start _) =
|
||||||
let items = do
|
let items = case Map.lookup start (startCats pinfo) of
|
||||||
cat <- maybe [] range (Map.lookup start (startCats pinfo))
|
Just (s,e,labels) -> do cat <- range (s,e)
|
||||||
(funid,args) <- foldForest (\funid args -> (:) (funid,args)) (\_ _ args -> args)
|
(funid,args) <- foldForest (\funid args -> (:) (funid,args)) (\_ _ args -> args)
|
||||||
[] cat (productions pinfo)
|
[] cat (productions pinfo)
|
||||||
let FFun fn lins = functions pinfo ! funid
|
let FFun fn lins = functions pinfo ! funid
|
||||||
(lbl,seqid) <- assocs lins
|
(lbl,seqid) <- assocs lins
|
||||||
return (Active 0 0 funid seqid args (AK cat lbl))
|
return (Active 0 0 funid seqid args (AK cat lbl))
|
||||||
|
Nothing -> mzero
|
||||||
|
|
||||||
pinfo =
|
pinfo =
|
||||||
case lookParser pgf lang of
|
case lookParser pgf lang of
|
||||||
Just pinfo -> pinfo
|
Just pinfo -> pinfo
|
||||||
@@ -131,8 +132,10 @@ recoveryStates open_types (EState pgf pinfo chart) =
|
|||||||
}
|
}
|
||||||
in (PState pgf pinfo chart (TMap.singleton [] (Set.fromList agenda)), fmap (PState pgf pinfo chart2) acc)
|
in (PState pgf pinfo chart (TMap.singleton [] (Set.fromList agenda)), fmap (PState pgf pinfo chart2) acc)
|
||||||
where
|
where
|
||||||
type2fcats (DTyp _ cat _) = maybe [] range (Map.lookup cat (startCats pinfo))
|
type2fcats (DTyp _ cat _) = case Map.lookup cat (startCats pinfo) of
|
||||||
|
Just (s,e,labels) -> range (s,e)
|
||||||
|
Nothing -> []
|
||||||
|
|
||||||
complete open_fcats items ac =
|
complete open_fcats items ac =
|
||||||
foldl (Set.fold (\(Active j' ppos funid seqid args keyc) ->
|
foldl (Set.fold (\(Active j' ppos funid seqid args keyc) ->
|
||||||
(:) (Active j' (ppos+1) funid seqid args keyc)))
|
(:) (Active j' (ppos+1) funid seqid args keyc)))
|
||||||
@@ -153,16 +156,15 @@ extractTrees (PState pgf pinfo chart items) ty@(DTyp _ start _) =
|
|||||||
agenda = maybe [] Set.toList mb_agenda
|
agenda = maybe [] Set.toList mb_agenda
|
||||||
(_,st) = process Nothing (\_ _ -> id) (sequences pinfo) (functions pinfo) agenda () chart
|
(_,st) = process Nothing (\_ _ -> id) (sequences pinfo) (functions pinfo) agenda () chart
|
||||||
|
|
||||||
exps = do
|
exps =
|
||||||
cat <- maybe [] range (Map.lookup start (startCats pinfo))
|
case Map.lookup start (startCats pinfo) of
|
||||||
(funid,args) <- foldForest (\funid args -> (:) (funid,args)) (\_ _ args -> args)
|
Just (s,e,lbls) -> do cat <- range (s,e)
|
||||||
[] cat (productions pinfo)
|
lbl <- indices lbls
|
||||||
let FFun fn lins = functions pinfo ! funid
|
Just fid <- [lookupPC (PK cat lbl 0) (passive st)]
|
||||||
lbl <- indices lins
|
(fvs,tree) <- go Set.empty 0 (0,fid)
|
||||||
Just fid <- [lookupPC (PK cat lbl 0) (passive st)]
|
guard (Set.null fvs)
|
||||||
(fvs,tree) <- go Set.empty 0 (0,fid)
|
return tree
|
||||||
guard (Set.null fvs)
|
Nothing -> mzero
|
||||||
return tree
|
|
||||||
|
|
||||||
go rec fcat' (d,fcat)
|
go rec fcat' (d,fcat)
|
||||||
| fcat < totalCats pinfo = return (Set.empty,EMeta (fcat'*10+d)) -- FIXME: here we assume that every rule has at most 10 arguments
|
| fcat < totalCats pinfo = return (Set.empty,EMeta (fcat'*10+d)) -- FIXME: here we assume that every rule has at most 10 arguments
|
||||||
|
|||||||
Reference in New Issue
Block a user