vd command now reads local concrete configurations to deal with syncat words; TODO: dissolve clustered multiwords bound with +

This commit is contained in:
aarne
2017-04-06 11:55:21 +00:00
parent 20a038719f
commit ee2f3d085e
4 changed files with 81 additions and 21 deletions

View File

@@ -545,15 +545,20 @@ pgfCommands = Map.fromList [
"See also 'vp -showdep' for another visualization of dependencies." "See also 'vp -showdep' for another visualization of dependencies."
], ],
exec = getEnv $ \ opts arg (Env pgf mos) -> do exec = getEnv $ \ opts arg (Env pgf mos) -> do
let absname = abstractName pgf
let es = toExprs arg let es = toExprs arg
let debug = isOpt "v" opts let debug = isOpt "v" opts
let file = valStrOpts "file" "" opts let abslabels = valStrOpts "abslabels" (valStrOpts "file" "" opts) opts
let cnclabels = valStrOpts "cnclabels" "" opts
let outp = valStrOpts "output" "dot" opts let outp = valStrOpts "output" "dot" opts
mlab <- case file of mlab <- case abslabels of
"" -> return Nothing "" -> return Nothing
_ -> (Just . getDepLabels) `fmap` restricted (readFile file) _ -> (Just . getDepLabels) `fmap` restricted (readFile abslabels)
mclab <- case cnclabels of
"" -> return Nothing
_ -> (Just . getCncDepLabels) `fmap` restricted (readFile cnclabels)
let lang = optLang pgf opts let lang = optLang pgf opts
let grphs = map (graphvizDependencyTree outp debug mlab Nothing pgf lang) es let grphs = map (graphvizDependencyTree outp debug mlab mclab pgf lang) es
if isOpt "conll2latex" opts if isOpt "conll2latex" opts
then return $ fromString $ conlls2latexDoc $ stanzas $ unlines $ toStrings arg then return $ fromString $ conlls2latexDoc $ stanzas $ unlines $ toStrings arg
else if isFlag "view" opts && valStrOpts "output" "" opts == "latex" else if isFlag "view" opts && valStrOpts "output" "" opts == "latex"
@@ -568,10 +573,9 @@ pgfCommands = Map.fromList [
else return $ fromString $ unlines $ intersperse "" grphs, else return $ fromString $ unlines $ intersperse "" grphs,
examples = [ examples = [
mkEx "gr | vd -- generate a tree and show dependency tree in .dot", mkEx "gr | vd -- generate a tree and show dependency tree in .dot",
mkEx "gr | vd -view=open -- generate a tree and display dependency tree on a Mac", mkEx "gr | vd -view=open -- generate a tree and display dependency tree on with Mac's 'open'",
mkEx "gr | vd -view=open -output=latex -- generate a tree and display latex dependency tree on a Mac", mkEx "gr | vd -view=open -output=latex -- generate a tree and display latex dependency tree with Mac's 'open'",
mkEx "gr -number=1000 | vd -file=dep.labels -output=conll -- generate training treebank", mkEx "gr -number=1000 | vd -abslabels=Lang.labels -cnclabels=LangSwe.labels -output=conll -- generate a random treebank",
mkEx "gr -number=100 | vd -file=dep.labels -output=malt_input -- generate test sentences",
mkEx "rf -file=ex.conll | vd -conll2latex | wf -file=ex.tex -- convert conll file to latex" mkEx "rf -file=ex.conll | vd -conll2latex | wf -file=ex.tex -- convert conll file to latex"
], ],
options = [ options = [
@@ -579,11 +583,13 @@ pgfCommands = Map.fromList [
("conll2latex", "convert conll to latex") ("conll2latex", "convert conll to latex")
], ],
flags = [ flags = [
("file","configuration file for labels, format per line 'fun label*'"), ("abslabels","abstract configuration file for labels, format per line 'fun label*'"),
("format","format of the visualization file using dot (default \"png\")"), ("cnclabels","concrete configuration file for labels, format per line 'fun {words|*} pos label head'"),
("output","output format of graph source (dot (default), malt_input, conll)"), ("file", "same as abslabels (abstract configuration file)"),
("view","program to open the resulting file (default \"open\")"), ("format", "format of the visualization file using dot (default \"png\")"),
("lang","the language of analysis") ("output", "output format of graph source (latex, conll, dot (default but deprecated))"),
("view", "program to open the resulting graph file (default \"open\")"),
("lang", "the language of analysis")
] ]
}), }),

View File

@@ -132,6 +132,7 @@ module PGF(
conlls2latexDoc, conlls2latexDoc,
-- extra: -- extra:
Labels, getDepLabels, Labels, getDepLabels,
CncLabels, getCncDepLabels,
-- * Probabilities -- * Probabilities
Probabilities, Probabilities,

View File

@@ -17,6 +17,7 @@ module PGF.VisualizeTree
, graphvizParseTreeDep , graphvizParseTreeDep
, graphvizDependencyTree , graphvizDependencyTree
, Labels, getDepLabels , Labels, getDepLabels
, CncLabels, getCncDepLabels
, graphvizBracketedString , graphvizBracketedString
, graphvizAlignment , graphvizAlignment
, gizaAlignment , gizaAlignment
@@ -33,7 +34,7 @@ import PGF.Macros (lookValCat, BracketedString(..))
import qualified Data.Map as Map import qualified Data.Map as Map
--import qualified Data.IntMap as IntMap --import qualified Data.IntMap as IntMap
import Data.List (intersperse,nub,mapAccumL,find) import Data.List (intersperse,nub,mapAccumL,find,groupBy)
--import Data.Char (isDigit) --import Data.Char (isDigit)
import Data.Maybe (fromMaybe) import Data.Maybe (fromMaybe)
import Text.PrettyPrint import Text.PrettyPrint
@@ -119,17 +120,17 @@ type Labels = Map.Map CId [String]
graphvizDependencyTree graphvizDependencyTree
:: String -- ^ Output format: @"latex"@, @"conll"@, @"malt_tab"@, @"malt_input"@ or @"dot"@ :: String -- ^ Output format: @"latex"@, @"conll"@, @"malt_tab"@, @"malt_input"@ or @"dot"@
-> Bool -- ^ Include extra information (debug) -> Bool -- ^ Include extra information (debug)
-> Maybe Labels -- ^ Label information obtained with 'getDepLabels' -> Maybe Labels -- ^ abstract label information obtained with 'getDepLabels'
-> unused -- ^ not used (was: @Maybe String@) -> Maybe CncLabels -- ^ concrete label information obtained with ' ' (was: unused (was: @Maybe String@))
-> PGF -> PGF
-> CId -- ^ The language of analysis -> CId -- ^ The language of analysis
-> Tree -> Tree
-> String -- ^ Rendered output in the specified format -> String -- ^ Rendered output in the specified format
graphvizDependencyTree format debug mlab ms pgf lang t = graphvizDependencyTree format debug mlab mclab pgf lang t =
case format of case format of
"latex" -> render . ppLaTeX $ conll2latex' conll "latex" -> render . ppLaTeX $ conll2latex' conll
"svg" -> render . ppSVG . toSVG $ conll2latex' conll "svg" -> render . ppSVG . toSVG $ conll2latex' conll
"conll" -> render $ vcat (map (hcat . intersperse (char '\t') ) wnodes) "conll" -> printCoNLL conll
"malt_tab" -> render $ vcat (map (hcat . intersperse (char '\t') . (\ws -> [ws !! 0,ws !! 1,ws !! 3,ws !! 6,ws !! 7])) wnodes) "malt_tab" -> render $ vcat (map (hcat . intersperse (char '\t') . (\ws -> [ws !! 0,ws !! 1,ws !! 3,ws !! 6,ws !! 7])) wnodes)
"malt_input" -> render $ vcat (map (hcat . intersperse (char '\t') . take 6) wnodes) "malt_input" -> render $ vcat (map (hcat . intersperse (char '\t') . take 6) wnodes)
_ -> render $ text "digraph {" $$ _ -> render $ text "digraph {" $$
@@ -140,7 +141,8 @@ graphvizDependencyTree format debug mlab ms pgf lang t =
vcat links) $$ vcat links) $$
text "}" text "}"
where where
conll = (map.map) render wnodes conll = maybe conll0 (\ls -> fixCoNLL ls conll0) mclab
conll0 = (map.map) render wnodes
nodes = map mkNode leaves nodes = map mkNode leaves
links = map mkLink [(fid, fromMaybe (dep_lbl,nil) (lookup fid deps)) | ((cat,fid,fun),_,w) <- tail leaves] links = map mkLink [(fid, fromMaybe (dep_lbl,nil) (lookup fid deps)) | ((cat,fid,fun),_,w) <- tail leaves]
@@ -185,7 +187,8 @@ graphvizDependencyTree format debug mlab ms pgf lang t =
mkLink (x,(lbl,y)) = tag y <+> text "->" <+> tag x <+> text "[label = " <> doubleQuotes (text lbl) <> text "] ;" mkLink (x,(lbl,y)) = tag y <+> text "->" <+> tag x <+> text "[label = " <> doubleQuotes (text lbl) <> text "] ;"
labels = maybe Map.empty id mlab labels = maybe Map.empty id mlab
clabels = maybe [] id mclab
posCat cat = case Map.lookup cat labels of posCat cat = case Map.lookup cat labels of
Just [p] -> mkCId p Just [p] -> mkCId p
@@ -737,3 +740,53 @@ ppSVG svg =
'<' -> "&lt;"++r '<' -> "&lt;"++r
'>' -> "&gt;"++r '>' -> "&gt;"++r
_ -> c:r _ -> c:r
----------------------------------
-- concrete syntax annotations (local) on top of conll
-- examples of annotations:
-- UseComp {"not"} PART neg head
-- UseComp {*} AUX cop head
type CncLabels = [(String, String -> Maybe (String -> String,String,String))]
-- (fun, word -> (pos,label,target))
-- the pos can remain unchanged, as in the current notation in the article
fixCoNLL :: CncLabels -> CoNLL -> CoNLL
fixCoNLL labels conll = map fixc conll where
fixc row = case row of
(i:word:fun:pos:cat:x_:j:label:xs) -> case look (fun,word) of
Just (pos',label',"head") -> (i:word:fun:pos' pos:cat:x_:j :label':xs)
Just (pos',label',target) -> (i:word:fun:pos' pos:cat:x_: getDep j target:label':xs)
_ -> row
_ -> row
look (fun,word) = case lookup fun labels of
Just relabel -> relabel word
_ -> Nothing
getDep j label = maybe j id $ lookup (label,j) [((label,j),i) | i:word:fun:pos:cat:x_:j:label:xs <- conll]
getCncDepLabels :: String -> CncLabels
getCncDepLabels = map merge . groupBy (\ (x,_) (a,_) -> x == a) . concatMap analyse . filter choose . lines where
--- choose is for compatibility with the general notation
choose line = notElem '(' line && elem '{' line --- ignoring non-local (with "(") and abstract (without "{") rules
analyse line = case break (=='{') line of
(beg,_:ws) -> case break (=='}') ws of
(toks,_:target) -> case (words beg, words target) of
(fun:_,[ label,j]) -> [(fun, (tok, (id, label,j))) | tok <- getToks toks]
(fun:_,[pos,label,j]) -> [(fun, (tok, (const pos,label,j))) | tok <- getToks toks]
_ -> []
_ -> []
_ -> []
merge rules@((fun,_):_) = (fun, \tok ->
case lookup tok (map snd rules) of
Just new -> return new
_ -> lookup "*" (map snd rules)
)
getToks = words . map (\c -> if elem c "\"," then ' ' else c)
printCoNLL :: CoNLL -> String
printCoNLL = unlines . map (concat . intersperse "\t")

View File

@@ -794,7 +794,7 @@ parseTree pgf lang opts tree = PGF.graphvizParseTree pgf lang opts tree
doDepTree lc path pgf fmt lang tree = doDepTree lc path pgf fmt lang tree =
do (_,lbls) <- liftIO $ getLabels lc path pgf do (_,lbls) <- liftIO $ getLabels lc path pgf
let vis = PGF.graphvizDependencyTree fmt False (Just lbls) () pgf lang tree let vis = PGF.graphvizDependencyTree fmt False (Just lbls) Nothing pgf lang tree ---- TODO: CncLabels
if fmt `elem` ["png","gif","gv"] if fmt `elem` ["png","gif","gv"]
then outputGraphviz vis then outputGraphviz vis
else if fmt=="svg" else if fmt=="svg"