vd command now reads local concrete configurations to deal with syncat words; TODO: dissolve clustered multiwords bound with +

This commit is contained in:
aarne
2017-04-06 11:55:21 +00:00
parent 20a038719f
commit ee2f3d085e
4 changed files with 81 additions and 21 deletions

View File

@@ -545,15 +545,20 @@ pgfCommands = Map.fromList [
"See also 'vp -showdep' for another visualization of dependencies."
],
exec = getEnv $ \ opts arg (Env pgf mos) -> do
let absname = abstractName pgf
let es = toExprs arg
let debug = isOpt "v" opts
let file = valStrOpts "file" "" opts
let abslabels = valStrOpts "abslabels" (valStrOpts "file" "" opts) opts
let cnclabels = valStrOpts "cnclabels" "" opts
let outp = valStrOpts "output" "dot" opts
mlab <- case file of
mlab <- case abslabels of
"" -> return Nothing
_ -> (Just . getDepLabels) `fmap` restricted (readFile file)
_ -> (Just . getDepLabels) `fmap` restricted (readFile abslabels)
mclab <- case cnclabels of
"" -> return Nothing
_ -> (Just . getCncDepLabels) `fmap` restricted (readFile cnclabels)
let lang = optLang pgf opts
let grphs = map (graphvizDependencyTree outp debug mlab Nothing pgf lang) es
let grphs = map (graphvizDependencyTree outp debug mlab mclab pgf lang) es
if isOpt "conll2latex" opts
then return $ fromString $ conlls2latexDoc $ stanzas $ unlines $ toStrings arg
else if isFlag "view" opts && valStrOpts "output" "" opts == "latex"
@@ -568,10 +573,9 @@ pgfCommands = Map.fromList [
else return $ fromString $ unlines $ intersperse "" grphs,
examples = [
mkEx "gr | vd -- generate a tree and show dependency tree in .dot",
mkEx "gr | vd -view=open -- generate a tree and display dependency tree on a Mac",
mkEx "gr | vd -view=open -output=latex -- generate a tree and display latex dependency tree on a Mac",
mkEx "gr -number=1000 | vd -file=dep.labels -output=conll -- generate training treebank",
mkEx "gr -number=100 | vd -file=dep.labels -output=malt_input -- generate test sentences",
mkEx "gr | vd -view=open -- generate a tree and display dependency tree on with Mac's 'open'",
mkEx "gr | vd -view=open -output=latex -- generate a tree and display latex dependency tree with Mac's 'open'",
mkEx "gr -number=1000 | vd -abslabels=Lang.labels -cnclabels=LangSwe.labels -output=conll -- generate a random treebank",
mkEx "rf -file=ex.conll | vd -conll2latex | wf -file=ex.tex -- convert conll file to latex"
],
options = [
@@ -579,11 +583,13 @@ pgfCommands = Map.fromList [
("conll2latex", "convert conll to latex")
],
flags = [
("file","configuration file for labels, format per line 'fun label*'"),
("format","format of the visualization file using dot (default \"png\")"),
("output","output format of graph source (dot (default), malt_input, conll)"),
("view","program to open the resulting file (default \"open\")"),
("lang","the language of analysis")
("abslabels","abstract configuration file for labels, format per line 'fun label*'"),
("cnclabels","concrete configuration file for labels, format per line 'fun {words|*} pos label head'"),
("file", "same as abslabels (abstract configuration file)"),
("format", "format of the visualization file using dot (default \"png\")"),
("output", "output format of graph source (latex, conll, dot (default but deprecated))"),
("view", "program to open the resulting graph file (default \"open\")"),
("lang", "the language of analysis")
]
}),

View File

@@ -132,6 +132,7 @@ module PGF(
conlls2latexDoc,
-- extra:
Labels, getDepLabels,
CncLabels, getCncDepLabels,
-- * Probabilities
Probabilities,

View File

@@ -17,6 +17,7 @@ module PGF.VisualizeTree
, graphvizParseTreeDep
, graphvizDependencyTree
, Labels, getDepLabels
, CncLabels, getCncDepLabels
, graphvizBracketedString
, graphvizAlignment
, gizaAlignment
@@ -33,7 +34,7 @@ import PGF.Macros (lookValCat, BracketedString(..))
import qualified Data.Map as Map
--import qualified Data.IntMap as IntMap
import Data.List (intersperse,nub,mapAccumL,find)
import Data.List (intersperse,nub,mapAccumL,find,groupBy)
--import Data.Char (isDigit)
import Data.Maybe (fromMaybe)
import Text.PrettyPrint
@@ -119,17 +120,17 @@ type Labels = Map.Map CId [String]
graphvizDependencyTree
:: String -- ^ Output format: @"latex"@, @"conll"@, @"malt_tab"@, @"malt_input"@ or @"dot"@
-> Bool -- ^ Include extra information (debug)
-> Maybe Labels -- ^ Label information obtained with 'getDepLabels'
-> unused -- ^ not used (was: @Maybe String@)
-> Maybe Labels -- ^ abstract label information obtained with 'getDepLabels'
-> Maybe CncLabels -- ^ concrete label information obtained with ' ' (was: unused (was: @Maybe String@))
-> PGF
-> CId -- ^ The language of analysis
-> Tree
-> String -- ^ Rendered output in the specified format
graphvizDependencyTree format debug mlab ms pgf lang t =
graphvizDependencyTree format debug mlab mclab pgf lang t =
case format of
"latex" -> render . ppLaTeX $ conll2latex' conll
"svg" -> render . ppSVG . toSVG $ conll2latex' conll
"conll" -> render $ vcat (map (hcat . intersperse (char '\t') ) wnodes)
"conll" -> printCoNLL conll
"malt_tab" -> render $ vcat (map (hcat . intersperse (char '\t') . (\ws -> [ws !! 0,ws !! 1,ws !! 3,ws !! 6,ws !! 7])) wnodes)
"malt_input" -> render $ vcat (map (hcat . intersperse (char '\t') . take 6) wnodes)
_ -> render $ text "digraph {" $$
@@ -140,7 +141,8 @@ graphvizDependencyTree format debug mlab ms pgf lang t =
vcat links) $$
text "}"
where
conll = (map.map) render wnodes
conll = maybe conll0 (\ls -> fixCoNLL ls conll0) mclab
conll0 = (map.map) render wnodes
nodes = map mkNode leaves
links = map mkLink [(fid, fromMaybe (dep_lbl,nil) (lookup fid deps)) | ((cat,fid,fun),_,w) <- tail leaves]
@@ -185,7 +187,8 @@ graphvizDependencyTree format debug mlab ms pgf lang t =
mkLink (x,(lbl,y)) = tag y <+> text "->" <+> tag x <+> text "[label = " <> doubleQuotes (text lbl) <> text "] ;"
labels = maybe Map.empty id mlab
labels = maybe Map.empty id mlab
clabels = maybe [] id mclab
posCat cat = case Map.lookup cat labels of
Just [p] -> mkCId p
@@ -737,3 +740,53 @@ ppSVG svg =
'<' -> "&lt;"++r
'>' -> "&gt;"++r
_ -> c:r
----------------------------------
-- concrete syntax annotations (local) on top of conll
-- examples of annotations:
-- UseComp {"not"} PART neg head
-- UseComp {*} AUX cop head
type CncLabels = [(String, String -> Maybe (String -> String,String,String))]
-- (fun, word -> (pos,label,target))
-- the pos can remain unchanged, as in the current notation in the article
fixCoNLL :: CncLabels -> CoNLL -> CoNLL
fixCoNLL labels conll = map fixc conll where
fixc row = case row of
(i:word:fun:pos:cat:x_:j:label:xs) -> case look (fun,word) of
Just (pos',label',"head") -> (i:word:fun:pos' pos:cat:x_:j :label':xs)
Just (pos',label',target) -> (i:word:fun:pos' pos:cat:x_: getDep j target:label':xs)
_ -> row
_ -> row
look (fun,word) = case lookup fun labels of
Just relabel -> relabel word
_ -> Nothing
getDep j label = maybe j id $ lookup (label,j) [((label,j),i) | i:word:fun:pos:cat:x_:j:label:xs <- conll]
getCncDepLabels :: String -> CncLabels
getCncDepLabels = map merge . groupBy (\ (x,_) (a,_) -> x == a) . concatMap analyse . filter choose . lines where
--- choose is for compatibility with the general notation
choose line = notElem '(' line && elem '{' line --- ignoring non-local (with "(") and abstract (without "{") rules
analyse line = case break (=='{') line of
(beg,_:ws) -> case break (=='}') ws of
(toks,_:target) -> case (words beg, words target) of
(fun:_,[ label,j]) -> [(fun, (tok, (id, label,j))) | tok <- getToks toks]
(fun:_,[pos,label,j]) -> [(fun, (tok, (const pos,label,j))) | tok <- getToks toks]
_ -> []
_ -> []
_ -> []
merge rules@((fun,_):_) = (fun, \tok ->
case lookup tok (map snd rules) of
Just new -> return new
_ -> lookup "*" (map snd rules)
)
getToks = words . map (\c -> if elem c "\"," then ' ' else c)
printCoNLL :: CoNLL -> String
printCoNLL = unlines . map (concat . intersperse "\t")

View File

@@ -794,7 +794,7 @@ parseTree pgf lang opts tree = PGF.graphvizParseTree pgf lang opts tree
doDepTree lc path pgf fmt lang tree =
do (_,lbls) <- liftIO $ getLabels lc path pgf
let vis = PGF.graphvizDependencyTree fmt False (Just lbls) () pgf lang tree
let vis = PGF.graphvizDependencyTree fmt False (Just lbls) Nothing pgf lang tree ---- TODO: CncLabels
if fmt `elem` ["png","gif","gv"]
then outputGraphviz vis
else if fmt=="svg"