1
0
forked from GitHub/gf-core

PGF run-time library: function names in BracketedString (experimental)

+ Make room for function names in the BracketedString data structure.
+ Fill in function names when linearizing an abstract syntax tree to a
  BracketedString.
+ Fill in wildCId when it is not obvious what the function is.
+ Function bracketedLinearize: for compatibility with the other linearization
  functions, return Leaf "" instead of error "cannot linearize".
+ Export flattenBracketedString from module PGF.
+ PGFServce: make function names available in the JSON representation of
  BracketedString.
This commit is contained in:
hallgren
2012-03-18 20:12:26 +00:00
parent 771c1a0ad7
commit 07af8988d3
6 changed files with 53 additions and 48 deletions

View File

@@ -58,7 +58,7 @@ module PGF(
showPrintName,
BracketedString(..), FId, LIndex, Token,
Forest.showBracketedString,
Forest.showBracketedString,flattenBracketedString,
-- ** Parsing
parse, parseAllLang, parseAll, parse_, parseWithRecovery,

View File

@@ -57,9 +57,9 @@ linearizeWithBrackets dp = head . snd . untokn "" . bracketedTokn dp
bracketedTokn :: Maybe Int -> Forest -> BracketedTokn
bracketedTokn dp f@(Forest abs cnc forest root) =
case [computeSeq isTrusted seq (map (render forest) args) | (seq,args) <- root] of
([bs@(Bracket_ _ _ _ _ _)]:_) -> bs
(bss:_) -> Bracket_ wildCId 0 0 [] bss
[] -> Bracket_ wildCId 0 0 [] []
([bs@(Bracket_{})]:_) -> bs
(bss:_) -> Bracket_ wildCId 0 0 wildCId [] bss
[] -> Bracket_ wildCId 0 0 wildCId [] []
where
isTrusted (_,fid) = IntSet.member fid trusted
@@ -67,8 +67,8 @@ bracketedTokn dp f@(Forest abs cnc forest root) =
render forest arg@(PArg hypos fid) =
case IntMap.lookup fid forest >>= Set.maxView of
Just (p,set) -> let (ct,es,(_,lin)) = descend (if Set.null set then forest else IntMap.insert fid set forest) p
in (ct,es,(map getVar hypos,lin))
Just (p,set) -> let (ct,fun,es,(_,lin)) = descend (if Set.null set then forest else IntMap.insert fid set forest) p
in (ct,fun,es,(map getVar hypos,lin))
Nothing -> error ("wrong forest id " ++ show fid)
where
descend forest (PApply funid args) = let (CncFun fun lins) = cncfuns cnc ! funid
@@ -78,9 +78,9 @@ bracketedTokn dp f@(Forest abs cnc forest root) =
Just (DTyp _ cat _,_,_,_) -> cat
largs = map (render forest) args
ltable = mkLinTable cnc isTrusted [] funid largs
in ((cat,fid),either (const []) id $ getAbsTrees f arg Nothing dp,ltable)
in ((cat,fid),wildCId,either (const []) id $ getAbsTrees f arg Nothing dp,ltable)
descend forest (PCoerce fid) = render forest (PArg [] fid)
descend forest (PConst cat e ts) = ((cat,fid),[e],([],listArray (0,0) [[LeafKS ts]]))
descend forest (PConst cat e ts) = ((cat,fid),wildCId,[e],([],listArray (0,0) [[LeafKS ts]]))
getVar (fid,_)
| fid == fidVar = wildCId

View File

@@ -39,7 +39,9 @@ linearizeAllLang pgf t = [(lang,linearize pgf lang t) | lang <- Map.keys (concre
bracketedLinearize :: PGF -> Language -> Tree -> BracketedString
bracketedLinearize pgf lang = head . concat . map (snd . untokn "" . firstLin) . linTree pgf lang
where
head [] = error "cannot linearize"
-- head [] = error "cannot linearize"
head [] = Leaf ""
-- so that linearize = flattenBracketedString . bracketedLinearize
head (bs:bss) = bs
firstLin (_,arr)
@@ -63,7 +65,7 @@ tabularLinearizes pgf lang e = map cnv (linTree pgf lang e)
linTree :: PGF -> Language -> Expr -> [(CncType, Array LIndex BracketedTokn)]
linTree pgf lang e =
nub [(ct,amapWithIndex (\label -> Bracket_ cat fid label es) lin) | (_,(ct@(cat,fid),es,(xs,lin))) <- lin Nothing 0 e [] [] e []]
nub [(ct,amapWithIndex (\label -> Bracket_ cat fid label fun es) lin) | (_,(ct@(cat,fid),fun,es,(xs,lin))) <- lin Nothing 0 e [] [] e []]
where
cnc = lookMap (error "no lang") lang (concretes pgf)
lp = lproductions cnc
@@ -76,18 +78,18 @@ linTree pgf lang e =
lin mb_cty n_fid e0 ys xs (EMeta i) es = def mb_cty n_fid e0 ys xs ('?':show i)
lin mb_cty n_fid e0 ys xs (EVar i) _ = def mb_cty n_fid e0 ys xs (showCId ((xs++ys) !! i))
lin mb_cty n_fid e0 ys xs (ELit l) [] = case l of
LStr s -> return (n_fid+1,((cidString,n_fid),[e0],([],ss s)))
LInt n -> return (n_fid+1,((cidInt, n_fid),[e0],([],ss (show n))))
LFlt f -> return (n_fid+1,((cidFloat, n_fid),[e0],([],ss (show f))))
LStr s -> return (n_fid+1,((cidString,n_fid),wildCId,[e0],([],ss s)))
LInt n -> return (n_fid+1,((cidInt, n_fid),wildCId,[e0],([],ss (show n))))
LFlt f -> return (n_fid+1,((cidFloat, n_fid),wildCId,[e0],([],ss (show f))))
ss s = listArray (0,0) [[LeafKS [s]]]
apply :: Maybe CncType -> FId -> Expr -> [CId] -> [CId] -> CId -> [Expr] -> [(FId,(CncType, [Expr], LinTable))]
apply :: Maybe CncType -> FId -> Expr -> [CId] -> [CId] -> CId -> [Expr] -> [(FId,(CncType, CId, [Expr], LinTable))]
apply mb_cty n_fid e0 ys xs f es =
case Map.lookup f lp of
Just prods -> do (funid,(cat,fid),ctys) <- getApps prods
(n_fid,args) <- descend n_fid (zip ctys es)
return (n_fid+1,((cat,n_fid),[e0],mkLinTable cnc (const True) xs funid args))
return (n_fid+1,((cat,n_fid),f,[e0],mkLinTable cnc (const True) xs funid args))
Nothing -> def mb_cty n_fid e0 ys xs ("[" ++ showCId f ++ "]") -- fun without lin
where
getApps prods =
@@ -110,10 +112,10 @@ linTree pgf lang e =
def (Just (cat,fid)) n_fid e0 ys xs s =
case IntMap.lookup fid (lindefs cnc) of
Just funs -> do funid <- funs
let args = [((wildCId, n_fid),[e0],([],ss s))]
return (n_fid+2,((cat,n_fid+1),[e0],mkLinTable cnc (const True) xs funid args))
let args = [((wildCId, n_fid),wildCId,[e0],([],ss s))]
return (n_fid+2,((cat,n_fid+1),wildCId,[e0],mkLinTable cnc (const True) xs funid args))
Nothing
| isPredefFId fid -> return (n_fid+2,((cat,n_fid+1),[e0],(xs,listArray (0,0) [[LeafKS [s]]])))
| isPredefFId fid -> return (n_fid+2,((cat,n_fid+1),wildCId,[e0],(xs,listArray (0,0) [[LeafKS [s]]])))
| otherwise -> do PCoerce fid <- maybe [] Set.toList (IntMap.lookup fid (pproductions cnc))
def (Just (cat,fid)) n_fid e0 ys xs s
def Nothing n_fid e0 ys xs s = []

View File

@@ -141,7 +141,7 @@ cidVar = mkCId "__gfVar"
-- mark the beginning and the end of each constituent.
data BracketedString
= Leaf Token -- ^ this is the leaf i.e. a single token
| Bracket CId {-# UNPACK #-} !FId {-# UNPACK #-} !LIndex [Expr] [BracketedString]
| Bracket CId {-# UNPACK #-} !FId {-# UNPACK #-} !LIndex CId [Expr] [BracketedString]
-- ^ this is a bracket. The 'CId' is the category of
-- the phrase. The 'FId' is an unique identifier for
-- every phrase in the sentence. For context-free grammars
@@ -157,7 +157,7 @@ data BracketedString
data BracketedTokn
= LeafKS [Token]
| LeafKP [Token] [Alternative]
| Bracket_ CId {-# UNPACK #-} !FId {-# UNPACK #-} !LIndex [Expr] [BracketedTokn] -- Invariant: the list is not empty
| Bracket_ CId {-# UNPACK #-} !FId {-# UNPACK #-} !LIndex CId [Expr] [BracketedTokn] -- Invariant: the list is not empty
deriving Eq
type LinTable = ([CId],Array.Array LIndex [BracketedTokn])
@@ -169,12 +169,12 @@ showBracketedString :: BracketedString -> String
showBracketedString = render . ppBracketedString
ppBracketedString (Leaf t) = text t
ppBracketedString (Bracket cat fid index _ bss) = parens (ppCId cat <> colon <> int fid <+> hsep (map ppBracketedString bss))
ppBracketedString (Bracket cat fid index _ _ bss) = parens (ppCId cat <> colon <> int fid <+> hsep (map ppBracketedString bss))
-- | The length of the bracketed string in number of tokens.
lengthBracketedString :: BracketedString -> Int
lengthBracketedString (Leaf _) = 1
lengthBracketedString (Bracket _ _ _ _ bss) = sum (map lengthBracketedString bss)
lengthBracketedString (Bracket _ _ _ _ _ bss) = sum (map lengthBracketedString bss)
untokn :: String -> BracketedTokn -> (String,[BracketedString])
untokn nw (LeafKS ts) = (head ts,map Leaf ts)
@@ -185,18 +185,18 @@ untokn nw (LeafKP d vs) = let ts = sel d vs nw
case [v | Alt v cs <- vs, any (\c -> isPrefixOf c nw) cs] of
v:_ -> v
_ -> d
untokn nw (Bracket_ cat fid index es bss) =
untokn nw (Bracket_ cat fid index fun es bss) =
let (nw',bss') = mapAccumR untokn nw bss
in (nw',[Bracket cat fid index es (concat bss')])
in (nw',[Bracket cat fid index fun es (concat bss')])
type CncType = (CId, FId) -- concrete type is the abstract type (the category) + the forest id
mkLinTable :: Concr -> (CncType -> Bool) -> [CId] -> FunId -> [(CncType,[Expr],LinTable)] -> LinTable
mkLinTable :: Concr -> (CncType -> Bool) -> [CId] -> FunId -> [(CncType,CId,[Expr],LinTable)] -> LinTable
mkLinTable cnc filter xs funid args = (xs,listArray (bounds lins) [computeSeq filter (elems (sequences cnc ! seqid)) args | seqid <- elems lins])
where
(CncFun _ lins) = cncfuns cnc ! funid
computeSeq :: (CncType -> Bool) -> [Symbol] -> [(CncType,[Expr],LinTable)] -> [BracketedTokn]
computeSeq :: (CncType -> Bool) -> [Symbol] -> [(CncType,CId,[Expr],LinTable)] -> [BracketedTokn]
computeSeq filter seq args = concatMap compute seq
where
compute (SymCat d r) = getArg d r
@@ -207,16 +207,16 @@ computeSeq filter seq args = concatMap compute seq
getArg d r
| not (null arg_lin) &&
filter ct = [Bracket_ cat fid r es arg_lin]
filter ct = [Bracket_ cat fid r fun es arg_lin]
| otherwise = arg_lin
where
arg_lin = lin ! r
(ct@(cat,fid),es,(xs,lin)) = args !! d
(ct@(cat,fid),fun,es,(xs,lin)) = args !! d
getVar d r = [LeafKS [showCId (xs !! r)]]
where
(ct,es,(xs,lin)) = args !! d
(ct,fun,es,(xs,lin)) = args !! d
flattenBracketedString :: BracketedString -> [String]
flattenBracketedString (Leaf w) = [w]
flattenBracketedString (Bracket _ _ _ _ bss) = concatMap flattenBracketedString bss
flattenBracketedString (Bracket _ _ _ _ _ bss) = concatMap flattenBracketedString bss

View File

@@ -194,12 +194,12 @@ graphvizBracketedString = render . lin2tree
getLeaves level parent bs =
case bs of
Leaf w -> [(level-1,parent,w)]
Bracket _ fid i _ bss -> concatMap (getLeaves (level+1) fid) bss
Bracket _ fid i _ _ bss -> concatMap (getLeaves (level+1) fid) bss
getInterns level [] = []
getInterns level nodes =
nub [(level-1,parent,fid,showCId cat) | (parent,Bracket cat fid _ _ _) <- nodes] :
getInterns (level+1) [(fid,child) | (_,Bracket _ fid _ _ children) <- nodes, child <- children]
nub [(level-1,parent,fid,showCId cat) | (parent,Bracket cat fid _ _ _ _) <- nodes] :
getInterns (level+1) [(fid,child) | (_,Bracket _ fid _ _ _ children) <- nodes, child <- children]
mkStruct l cs = struct l <> text "[label = \"" <> fields cs <> text "\"] ;" $$
vcat [link pl pid l id | (pl,pid,id,_) <- cs]
@@ -247,7 +247,7 @@ genPreAlignment pgf langs = lin2align . linsBracketed
getLeaves parent bs =
case bs of
Leaf w -> [(parent,w)]
Bracket _ fid _ _ bss -> concatMap (getLeaves fid) bss
Bracket _ fid _ _ _ bss -> concatMap (getLeaves fid) bss
mkLayers (cs:css:rest) = let (lrest, rrest) = mkLayers (css:rest)
in ((fields cs) : lrest, (map (mkLinks css) cs) : rrest)

View File

@@ -169,8 +169,8 @@ doTranslate pgf input mcat mfrom mto =
["translations".=
[makeObj ["tree".=tree,
"linearizations".=
[makeObj ["to".=to, "text".=output]
| (to,output) <- linearizeAndBind pgf mto tree]]
[makeObj ["to".=to, "text".=text, "brackets".=bs]
| (to,text,bs)<- linearizeAndBind pgf mto tree]]
| tree <- trees]]
PGF.ParseIncomplete -> ["incomplete".=True]
PGF.ParseFailed n -> ["parseFailed".=n]
@@ -197,7 +197,7 @@ doTranslateGroup pgf input mcat mfrom mto =
where
groupResults = Map.toList . foldr more Map.empty . start . collect
where
collect tls = [(t,(l,s)) | (t,ls) <- tls, (l,s) <- ls, notDisamb l]
collect tls = [(t,(l,s)) | (t,ls) <- tls, (l,s,_) <- ls, notDisamb l]
start ls = [(l,[([t],s)]) | (t,(l,s)) <- ls]
more (l,s) = Map.insertWith (\ [([t],x)] xs -> insertAlt t x xs) l s
@@ -260,8 +260,8 @@ doComplete pgf input mcat mfrom mlimit = showJSON
doLinearize :: PGF -> PGF.Tree -> Maybe PGF.Language -> JSValue
doLinearize pgf tree mto = showJSON
[makeObj ["to".=PGF.showLanguage to, "text".=text]
| (to,text) <- linearize' pgf mto tree]
[makeObj ["to".=to, "text".=text,"brackets".=bs]
| (to,text,bs) <- linearize' pgf mto tree]
doLinearizes :: PGF -> PGF.Tree -> Maybe PGF.Language -> JSValue
doLinearizes pgf tree mto = showJSON
@@ -275,8 +275,8 @@ doRandom pgf mcat mdepth mlimit mto =
return $ showJSON
[makeObj ["tree".=PGF.showExpr [] tree,
"linearizations".=
[makeObj ["to".=PGF.showLanguage to, "text".=text]
| (to,text) <- linearize' pgf mto tree]]
[makeObj ["to".=to, "text".=text]
| (to,text,bs) <- linearize' pgf mto tree]]
| tree <- limit trees]
where cat = fromMaybe (PGF.startCat pgf) mcat
limit = take (fromMaybe 1 mlimit)
@@ -286,8 +286,8 @@ doGenerate :: PGF -> Maybe PGF.Type -> Maybe Int -> Maybe Int -> Maybe PGF.Langu
doGenerate pgf mcat mdepth mlimit mto =
showJSON [makeObj ["tree".=PGF.showExpr [] tree,
"linearizations".=
[makeObj ["to".=PGF.showLanguage to, "text".=text]
| (to,text) <- linearize' pgf mto tree]]
[makeObj ["to".=to, "text".=text]
| (to,text,bs) <- linearize' pgf mto tree]]
| tree <- limit trees]
where
trees = PGF.generateAllDepth pgf cat (Just depth)
@@ -455,8 +455,8 @@ instance JSON PGF.Expr where
instance JSON PGF.BracketedString where
readJSON x = return (PGF.Leaf "")
showJSON (PGF.Bracket cat fid index _ bs) =
makeObj ["cat".=cat, "fid".=fid, "index".=index, "children".=bs]
showJSON (PGF.Bracket cat fid index fun _ bs) =
makeObj ["cat".=cat, "fid".=fid, "index".=index, "fun".=fun, "children".=bs]
showJSON (PGF.Leaf s) = makeObj ["token".=s]
-- * PGF utilities
@@ -494,9 +494,11 @@ complete' pgf from typ mlimit input =
Left es -> (ps,w:ws)
Right ps -> loop ps ws
linearize' :: PGF -> Maybe PGF.Language -> PGF.Tree -> [(PGF.Language,String)]
linearize' :: PGF -> Maybe PGF.Language -> PGF.Tree -> [(PGF.Language,String,PGF.BracketedString)]
linearize' pgf mto tree =
[(to,PGF.linearize pgf to (transfer to tree)) | to<-langs]
[(to,s,bs) | to<-langs,
let bs = PGF.bracketedLinearize pgf to (transfer to tree)
s = unwords $ PGF.flattenBracketedString bs]
where
langs = maybe (PGF.languages pgf) (:[]) mto
@@ -512,7 +514,8 @@ linearizes' pgf mto tree =
langs = maybe (PGF.languages pgf) (:[]) mto
lins to = nub . concatMap (map snd) . PGF.tabularLinearizes pgf to
linearizeAndBind pgf mto t = [(la, binds s) | (la,s) <- linearize' pgf mto t]
linearizeAndBind pgf mto t =
[(la, binds s,bs) | (la,s,bs) <- linearize' pgf mto t]
where
binds = unwords . bs . words
bs ws = case ws of