forked from GitHub/gf-core
PGF run-time library: function names in BracketedString (experimental)
+ Make room for function names in the BracketedString data structure. + Fill in function names when linearizing an abstract syntax tree to a BracketedString. + Fill in wildCId when it is not obvious what the function is. + Function bracketedLinearize: for compatibility with the other linearization functions, return Leaf "" instead of error "cannot linearize". + Export flattenBracketedString from module PGF. + PGFServce: make function names available in the JSON representation of BracketedString.
This commit is contained in:
@@ -58,7 +58,7 @@ module PGF(
|
||||
showPrintName,
|
||||
|
||||
BracketedString(..), FId, LIndex, Token,
|
||||
Forest.showBracketedString,
|
||||
Forest.showBracketedString,flattenBracketedString,
|
||||
|
||||
-- ** Parsing
|
||||
parse, parseAllLang, parseAll, parse_, parseWithRecovery,
|
||||
|
||||
@@ -57,9 +57,9 @@ linearizeWithBrackets dp = head . snd . untokn "" . bracketedTokn dp
|
||||
bracketedTokn :: Maybe Int -> Forest -> BracketedTokn
|
||||
bracketedTokn dp f@(Forest abs cnc forest root) =
|
||||
case [computeSeq isTrusted seq (map (render forest) args) | (seq,args) <- root] of
|
||||
([bs@(Bracket_ _ _ _ _ _)]:_) -> bs
|
||||
(bss:_) -> Bracket_ wildCId 0 0 [] bss
|
||||
[] -> Bracket_ wildCId 0 0 [] []
|
||||
([bs@(Bracket_{})]:_) -> bs
|
||||
(bss:_) -> Bracket_ wildCId 0 0 wildCId [] bss
|
||||
[] -> Bracket_ wildCId 0 0 wildCId [] []
|
||||
where
|
||||
isTrusted (_,fid) = IntSet.member fid trusted
|
||||
|
||||
@@ -67,8 +67,8 @@ bracketedTokn dp f@(Forest abs cnc forest root) =
|
||||
|
||||
render forest arg@(PArg hypos fid) =
|
||||
case IntMap.lookup fid forest >>= Set.maxView of
|
||||
Just (p,set) -> let (ct,es,(_,lin)) = descend (if Set.null set then forest else IntMap.insert fid set forest) p
|
||||
in (ct,es,(map getVar hypos,lin))
|
||||
Just (p,set) -> let (ct,fun,es,(_,lin)) = descend (if Set.null set then forest else IntMap.insert fid set forest) p
|
||||
in (ct,fun,es,(map getVar hypos,lin))
|
||||
Nothing -> error ("wrong forest id " ++ show fid)
|
||||
where
|
||||
descend forest (PApply funid args) = let (CncFun fun lins) = cncfuns cnc ! funid
|
||||
@@ -78,9 +78,9 @@ bracketedTokn dp f@(Forest abs cnc forest root) =
|
||||
Just (DTyp _ cat _,_,_,_) -> cat
|
||||
largs = map (render forest) args
|
||||
ltable = mkLinTable cnc isTrusted [] funid largs
|
||||
in ((cat,fid),either (const []) id $ getAbsTrees f arg Nothing dp,ltable)
|
||||
in ((cat,fid),wildCId,either (const []) id $ getAbsTrees f arg Nothing dp,ltable)
|
||||
descend forest (PCoerce fid) = render forest (PArg [] fid)
|
||||
descend forest (PConst cat e ts) = ((cat,fid),[e],([],listArray (0,0) [[LeafKS ts]]))
|
||||
descend forest (PConst cat e ts) = ((cat,fid),wildCId,[e],([],listArray (0,0) [[LeafKS ts]]))
|
||||
|
||||
getVar (fid,_)
|
||||
| fid == fidVar = wildCId
|
||||
|
||||
@@ -39,7 +39,9 @@ linearizeAllLang pgf t = [(lang,linearize pgf lang t) | lang <- Map.keys (concre
|
||||
bracketedLinearize :: PGF -> Language -> Tree -> BracketedString
|
||||
bracketedLinearize pgf lang = head . concat . map (snd . untokn "" . firstLin) . linTree pgf lang
|
||||
where
|
||||
head [] = error "cannot linearize"
|
||||
-- head [] = error "cannot linearize"
|
||||
head [] = Leaf ""
|
||||
-- so that linearize = flattenBracketedString . bracketedLinearize
|
||||
head (bs:bss) = bs
|
||||
|
||||
firstLin (_,arr)
|
||||
@@ -63,7 +65,7 @@ tabularLinearizes pgf lang e = map cnv (linTree pgf lang e)
|
||||
|
||||
linTree :: PGF -> Language -> Expr -> [(CncType, Array LIndex BracketedTokn)]
|
||||
linTree pgf lang e =
|
||||
nub [(ct,amapWithIndex (\label -> Bracket_ cat fid label es) lin) | (_,(ct@(cat,fid),es,(xs,lin))) <- lin Nothing 0 e [] [] e []]
|
||||
nub [(ct,amapWithIndex (\label -> Bracket_ cat fid label fun es) lin) | (_,(ct@(cat,fid),fun,es,(xs,lin))) <- lin Nothing 0 e [] [] e []]
|
||||
where
|
||||
cnc = lookMap (error "no lang") lang (concretes pgf)
|
||||
lp = lproductions cnc
|
||||
@@ -76,18 +78,18 @@ linTree pgf lang e =
|
||||
lin mb_cty n_fid e0 ys xs (EMeta i) es = def mb_cty n_fid e0 ys xs ('?':show i)
|
||||
lin mb_cty n_fid e0 ys xs (EVar i) _ = def mb_cty n_fid e0 ys xs (showCId ((xs++ys) !! i))
|
||||
lin mb_cty n_fid e0 ys xs (ELit l) [] = case l of
|
||||
LStr s -> return (n_fid+1,((cidString,n_fid),[e0],([],ss s)))
|
||||
LInt n -> return (n_fid+1,((cidInt, n_fid),[e0],([],ss (show n))))
|
||||
LFlt f -> return (n_fid+1,((cidFloat, n_fid),[e0],([],ss (show f))))
|
||||
LStr s -> return (n_fid+1,((cidString,n_fid),wildCId,[e0],([],ss s)))
|
||||
LInt n -> return (n_fid+1,((cidInt, n_fid),wildCId,[e0],([],ss (show n))))
|
||||
LFlt f -> return (n_fid+1,((cidFloat, n_fid),wildCId,[e0],([],ss (show f))))
|
||||
|
||||
ss s = listArray (0,0) [[LeafKS [s]]]
|
||||
|
||||
apply :: Maybe CncType -> FId -> Expr -> [CId] -> [CId] -> CId -> [Expr] -> [(FId,(CncType, [Expr], LinTable))]
|
||||
apply :: Maybe CncType -> FId -> Expr -> [CId] -> [CId] -> CId -> [Expr] -> [(FId,(CncType, CId, [Expr], LinTable))]
|
||||
apply mb_cty n_fid e0 ys xs f es =
|
||||
case Map.lookup f lp of
|
||||
Just prods -> do (funid,(cat,fid),ctys) <- getApps prods
|
||||
(n_fid,args) <- descend n_fid (zip ctys es)
|
||||
return (n_fid+1,((cat,n_fid),[e0],mkLinTable cnc (const True) xs funid args))
|
||||
return (n_fid+1,((cat,n_fid),f,[e0],mkLinTable cnc (const True) xs funid args))
|
||||
Nothing -> def mb_cty n_fid e0 ys xs ("[" ++ showCId f ++ "]") -- fun without lin
|
||||
where
|
||||
getApps prods =
|
||||
@@ -110,10 +112,10 @@ linTree pgf lang e =
|
||||
def (Just (cat,fid)) n_fid e0 ys xs s =
|
||||
case IntMap.lookup fid (lindefs cnc) of
|
||||
Just funs -> do funid <- funs
|
||||
let args = [((wildCId, n_fid),[e0],([],ss s))]
|
||||
return (n_fid+2,((cat,n_fid+1),[e0],mkLinTable cnc (const True) xs funid args))
|
||||
let args = [((wildCId, n_fid),wildCId,[e0],([],ss s))]
|
||||
return (n_fid+2,((cat,n_fid+1),wildCId,[e0],mkLinTable cnc (const True) xs funid args))
|
||||
Nothing
|
||||
| isPredefFId fid -> return (n_fid+2,((cat,n_fid+1),[e0],(xs,listArray (0,0) [[LeafKS [s]]])))
|
||||
| isPredefFId fid -> return (n_fid+2,((cat,n_fid+1),wildCId,[e0],(xs,listArray (0,0) [[LeafKS [s]]])))
|
||||
| otherwise -> do PCoerce fid <- maybe [] Set.toList (IntMap.lookup fid (pproductions cnc))
|
||||
def (Just (cat,fid)) n_fid e0 ys xs s
|
||||
def Nothing n_fid e0 ys xs s = []
|
||||
|
||||
@@ -141,7 +141,7 @@ cidVar = mkCId "__gfVar"
|
||||
-- mark the beginning and the end of each constituent.
|
||||
data BracketedString
|
||||
= Leaf Token -- ^ this is the leaf i.e. a single token
|
||||
| Bracket CId {-# UNPACK #-} !FId {-# UNPACK #-} !LIndex [Expr] [BracketedString]
|
||||
| Bracket CId {-# UNPACK #-} !FId {-# UNPACK #-} !LIndex CId [Expr] [BracketedString]
|
||||
-- ^ this is a bracket. The 'CId' is the category of
|
||||
-- the phrase. The 'FId' is an unique identifier for
|
||||
-- every phrase in the sentence. For context-free grammars
|
||||
@@ -157,7 +157,7 @@ data BracketedString
|
||||
data BracketedTokn
|
||||
= LeafKS [Token]
|
||||
| LeafKP [Token] [Alternative]
|
||||
| Bracket_ CId {-# UNPACK #-} !FId {-# UNPACK #-} !LIndex [Expr] [BracketedTokn] -- Invariant: the list is not empty
|
||||
| Bracket_ CId {-# UNPACK #-} !FId {-# UNPACK #-} !LIndex CId [Expr] [BracketedTokn] -- Invariant: the list is not empty
|
||||
deriving Eq
|
||||
|
||||
type LinTable = ([CId],Array.Array LIndex [BracketedTokn])
|
||||
@@ -169,12 +169,12 @@ showBracketedString :: BracketedString -> String
|
||||
showBracketedString = render . ppBracketedString
|
||||
|
||||
ppBracketedString (Leaf t) = text t
|
||||
ppBracketedString (Bracket cat fid index _ bss) = parens (ppCId cat <> colon <> int fid <+> hsep (map ppBracketedString bss))
|
||||
ppBracketedString (Bracket cat fid index _ _ bss) = parens (ppCId cat <> colon <> int fid <+> hsep (map ppBracketedString bss))
|
||||
|
||||
-- | The length of the bracketed string in number of tokens.
|
||||
lengthBracketedString :: BracketedString -> Int
|
||||
lengthBracketedString (Leaf _) = 1
|
||||
lengthBracketedString (Bracket _ _ _ _ bss) = sum (map lengthBracketedString bss)
|
||||
lengthBracketedString (Bracket _ _ _ _ _ bss) = sum (map lengthBracketedString bss)
|
||||
|
||||
untokn :: String -> BracketedTokn -> (String,[BracketedString])
|
||||
untokn nw (LeafKS ts) = (head ts,map Leaf ts)
|
||||
@@ -185,18 +185,18 @@ untokn nw (LeafKP d vs) = let ts = sel d vs nw
|
||||
case [v | Alt v cs <- vs, any (\c -> isPrefixOf c nw) cs] of
|
||||
v:_ -> v
|
||||
_ -> d
|
||||
untokn nw (Bracket_ cat fid index es bss) =
|
||||
untokn nw (Bracket_ cat fid index fun es bss) =
|
||||
let (nw',bss') = mapAccumR untokn nw bss
|
||||
in (nw',[Bracket cat fid index es (concat bss')])
|
||||
in (nw',[Bracket cat fid index fun es (concat bss')])
|
||||
|
||||
type CncType = (CId, FId) -- concrete type is the abstract type (the category) + the forest id
|
||||
|
||||
mkLinTable :: Concr -> (CncType -> Bool) -> [CId] -> FunId -> [(CncType,[Expr],LinTable)] -> LinTable
|
||||
mkLinTable :: Concr -> (CncType -> Bool) -> [CId] -> FunId -> [(CncType,CId,[Expr],LinTable)] -> LinTable
|
||||
mkLinTable cnc filter xs funid args = (xs,listArray (bounds lins) [computeSeq filter (elems (sequences cnc ! seqid)) args | seqid <- elems lins])
|
||||
where
|
||||
(CncFun _ lins) = cncfuns cnc ! funid
|
||||
|
||||
computeSeq :: (CncType -> Bool) -> [Symbol] -> [(CncType,[Expr],LinTable)] -> [BracketedTokn]
|
||||
computeSeq :: (CncType -> Bool) -> [Symbol] -> [(CncType,CId,[Expr],LinTable)] -> [BracketedTokn]
|
||||
computeSeq filter seq args = concatMap compute seq
|
||||
where
|
||||
compute (SymCat d r) = getArg d r
|
||||
@@ -207,16 +207,16 @@ computeSeq filter seq args = concatMap compute seq
|
||||
|
||||
getArg d r
|
||||
| not (null arg_lin) &&
|
||||
filter ct = [Bracket_ cat fid r es arg_lin]
|
||||
filter ct = [Bracket_ cat fid r fun es arg_lin]
|
||||
| otherwise = arg_lin
|
||||
where
|
||||
arg_lin = lin ! r
|
||||
(ct@(cat,fid),es,(xs,lin)) = args !! d
|
||||
(ct@(cat,fid),fun,es,(xs,lin)) = args !! d
|
||||
|
||||
getVar d r = [LeafKS [showCId (xs !! r)]]
|
||||
where
|
||||
(ct,es,(xs,lin)) = args !! d
|
||||
(ct,fun,es,(xs,lin)) = args !! d
|
||||
|
||||
flattenBracketedString :: BracketedString -> [String]
|
||||
flattenBracketedString (Leaf w) = [w]
|
||||
flattenBracketedString (Bracket _ _ _ _ bss) = concatMap flattenBracketedString bss
|
||||
flattenBracketedString (Bracket _ _ _ _ _ bss) = concatMap flattenBracketedString bss
|
||||
|
||||
@@ -194,12 +194,12 @@ graphvizBracketedString = render . lin2tree
|
||||
getLeaves level parent bs =
|
||||
case bs of
|
||||
Leaf w -> [(level-1,parent,w)]
|
||||
Bracket _ fid i _ bss -> concatMap (getLeaves (level+1) fid) bss
|
||||
Bracket _ fid i _ _ bss -> concatMap (getLeaves (level+1) fid) bss
|
||||
|
||||
getInterns level [] = []
|
||||
getInterns level nodes =
|
||||
nub [(level-1,parent,fid,showCId cat) | (parent,Bracket cat fid _ _ _) <- nodes] :
|
||||
getInterns (level+1) [(fid,child) | (_,Bracket _ fid _ _ children) <- nodes, child <- children]
|
||||
nub [(level-1,parent,fid,showCId cat) | (parent,Bracket cat fid _ _ _ _) <- nodes] :
|
||||
getInterns (level+1) [(fid,child) | (_,Bracket _ fid _ _ _ children) <- nodes, child <- children]
|
||||
|
||||
mkStruct l cs = struct l <> text "[label = \"" <> fields cs <> text "\"] ;" $$
|
||||
vcat [link pl pid l id | (pl,pid,id,_) <- cs]
|
||||
@@ -247,7 +247,7 @@ genPreAlignment pgf langs = lin2align . linsBracketed
|
||||
getLeaves parent bs =
|
||||
case bs of
|
||||
Leaf w -> [(parent,w)]
|
||||
Bracket _ fid _ _ bss -> concatMap (getLeaves fid) bss
|
||||
Bracket _ fid _ _ _ bss -> concatMap (getLeaves fid) bss
|
||||
|
||||
mkLayers (cs:css:rest) = let (lrest, rrest) = mkLayers (css:rest)
|
||||
in ((fields cs) : lrest, (map (mkLinks css) cs) : rrest)
|
||||
|
||||
@@ -169,8 +169,8 @@ doTranslate pgf input mcat mfrom mto =
|
||||
["translations".=
|
||||
[makeObj ["tree".=tree,
|
||||
"linearizations".=
|
||||
[makeObj ["to".=to, "text".=output]
|
||||
| (to,output) <- linearizeAndBind pgf mto tree]]
|
||||
[makeObj ["to".=to, "text".=text, "brackets".=bs]
|
||||
| (to,text,bs)<- linearizeAndBind pgf mto tree]]
|
||||
| tree <- trees]]
|
||||
PGF.ParseIncomplete -> ["incomplete".=True]
|
||||
PGF.ParseFailed n -> ["parseFailed".=n]
|
||||
@@ -197,7 +197,7 @@ doTranslateGroup pgf input mcat mfrom mto =
|
||||
where
|
||||
groupResults = Map.toList . foldr more Map.empty . start . collect
|
||||
where
|
||||
collect tls = [(t,(l,s)) | (t,ls) <- tls, (l,s) <- ls, notDisamb l]
|
||||
collect tls = [(t,(l,s)) | (t,ls) <- tls, (l,s,_) <- ls, notDisamb l]
|
||||
start ls = [(l,[([t],s)]) | (t,(l,s)) <- ls]
|
||||
more (l,s) = Map.insertWith (\ [([t],x)] xs -> insertAlt t x xs) l s
|
||||
|
||||
@@ -260,8 +260,8 @@ doComplete pgf input mcat mfrom mlimit = showJSON
|
||||
|
||||
doLinearize :: PGF -> PGF.Tree -> Maybe PGF.Language -> JSValue
|
||||
doLinearize pgf tree mto = showJSON
|
||||
[makeObj ["to".=PGF.showLanguage to, "text".=text]
|
||||
| (to,text) <- linearize' pgf mto tree]
|
||||
[makeObj ["to".=to, "text".=text,"brackets".=bs]
|
||||
| (to,text,bs) <- linearize' pgf mto tree]
|
||||
|
||||
doLinearizes :: PGF -> PGF.Tree -> Maybe PGF.Language -> JSValue
|
||||
doLinearizes pgf tree mto = showJSON
|
||||
@@ -275,8 +275,8 @@ doRandom pgf mcat mdepth mlimit mto =
|
||||
return $ showJSON
|
||||
[makeObj ["tree".=PGF.showExpr [] tree,
|
||||
"linearizations".=
|
||||
[makeObj ["to".=PGF.showLanguage to, "text".=text]
|
||||
| (to,text) <- linearize' pgf mto tree]]
|
||||
[makeObj ["to".=to, "text".=text]
|
||||
| (to,text,bs) <- linearize' pgf mto tree]]
|
||||
| tree <- limit trees]
|
||||
where cat = fromMaybe (PGF.startCat pgf) mcat
|
||||
limit = take (fromMaybe 1 mlimit)
|
||||
@@ -286,8 +286,8 @@ doGenerate :: PGF -> Maybe PGF.Type -> Maybe Int -> Maybe Int -> Maybe PGF.Langu
|
||||
doGenerate pgf mcat mdepth mlimit mto =
|
||||
showJSON [makeObj ["tree".=PGF.showExpr [] tree,
|
||||
"linearizations".=
|
||||
[makeObj ["to".=PGF.showLanguage to, "text".=text]
|
||||
| (to,text) <- linearize' pgf mto tree]]
|
||||
[makeObj ["to".=to, "text".=text]
|
||||
| (to,text,bs) <- linearize' pgf mto tree]]
|
||||
| tree <- limit trees]
|
||||
where
|
||||
trees = PGF.generateAllDepth pgf cat (Just depth)
|
||||
@@ -455,8 +455,8 @@ instance JSON PGF.Expr where
|
||||
|
||||
instance JSON PGF.BracketedString where
|
||||
readJSON x = return (PGF.Leaf "")
|
||||
showJSON (PGF.Bracket cat fid index _ bs) =
|
||||
makeObj ["cat".=cat, "fid".=fid, "index".=index, "children".=bs]
|
||||
showJSON (PGF.Bracket cat fid index fun _ bs) =
|
||||
makeObj ["cat".=cat, "fid".=fid, "index".=index, "fun".=fun, "children".=bs]
|
||||
showJSON (PGF.Leaf s) = makeObj ["token".=s]
|
||||
|
||||
-- * PGF utilities
|
||||
@@ -494,9 +494,11 @@ complete' pgf from typ mlimit input =
|
||||
Left es -> (ps,w:ws)
|
||||
Right ps -> loop ps ws
|
||||
|
||||
linearize' :: PGF -> Maybe PGF.Language -> PGF.Tree -> [(PGF.Language,String)]
|
||||
linearize' :: PGF -> Maybe PGF.Language -> PGF.Tree -> [(PGF.Language,String,PGF.BracketedString)]
|
||||
linearize' pgf mto tree =
|
||||
[(to,PGF.linearize pgf to (transfer to tree)) | to<-langs]
|
||||
[(to,s,bs) | to<-langs,
|
||||
let bs = PGF.bracketedLinearize pgf to (transfer to tree)
|
||||
s = unwords $ PGF.flattenBracketedString bs]
|
||||
where
|
||||
langs = maybe (PGF.languages pgf) (:[]) mto
|
||||
|
||||
@@ -512,7 +514,8 @@ linearizes' pgf mto tree =
|
||||
langs = maybe (PGF.languages pgf) (:[]) mto
|
||||
lins to = nub . concatMap (map snd) . PGF.tabularLinearizes pgf to
|
||||
|
||||
linearizeAndBind pgf mto t = [(la, binds s) | (la,s) <- linearize' pgf mto t]
|
||||
linearizeAndBind pgf mto t =
|
||||
[(la, binds s,bs) | (la,s,bs) <- linearize' pgf mto t]
|
||||
where
|
||||
binds = unwords . bs . words
|
||||
bs ws = case ws of
|
||||
|
||||
Reference in New Issue
Block a user