From ee2f3d085ea1e3ca943f78e71be518fd2f0b16cc Mon Sep 17 00:00:00 2001 From: aarne Date: Thu, 6 Apr 2017 11:55:21 +0000 Subject: [PATCH] vd command now reads local concrete configurations to deal with syncat words; TODO: dissolve clustered multiwords bound with + --- src/compiler/GF/Command/Commands.hs | 32 ++++++----- src/runtime/haskell/PGF.hs | 1 + src/runtime/haskell/PGF/VisualizeTree.hs | 67 +++++++++++++++++++++--- src/server/PGFService.hs | 2 +- 4 files changed, 81 insertions(+), 21 deletions(-) diff --git a/src/compiler/GF/Command/Commands.hs b/src/compiler/GF/Command/Commands.hs index ea62ba69a..e36326f6a 100644 --- a/src/compiler/GF/Command/Commands.hs +++ b/src/compiler/GF/Command/Commands.hs @@ -545,15 +545,20 @@ pgfCommands = Map.fromList [ "See also 'vp -showdep' for another visualization of dependencies." ], exec = getEnv $ \ opts arg (Env pgf mos) -> do + let absname = abstractName pgf let es = toExprs arg let debug = isOpt "v" opts - let file = valStrOpts "file" "" opts + let abslabels = valStrOpts "abslabels" (valStrOpts "file" "" opts) opts + let cnclabels = valStrOpts "cnclabels" "" opts let outp = valStrOpts "output" "dot" opts - mlab <- case file of + mlab <- case abslabels of "" -> return Nothing - _ -> (Just . getDepLabels) `fmap` restricted (readFile file) + _ -> (Just . getDepLabels) `fmap` restricted (readFile abslabels) + mclab <- case cnclabels of + "" -> return Nothing + _ -> (Just . getCncDepLabels) `fmap` restricted (readFile cnclabels) let lang = optLang pgf opts - let grphs = map (graphvizDependencyTree outp debug mlab Nothing pgf lang) es + let grphs = map (graphvizDependencyTree outp debug mlab mclab pgf lang) es if isOpt "conll2latex" opts then return $ fromString $ conlls2latexDoc $ stanzas $ unlines $ toStrings arg else if isFlag "view" opts && valStrOpts "output" "" opts == "latex" @@ -568,10 +573,9 @@ pgfCommands = Map.fromList [ else return $ fromString $ unlines $ intersperse "" grphs, examples = [ mkEx "gr | vd -- generate a tree and show dependency tree in .dot", - mkEx "gr | vd -view=open -- generate a tree and display dependency tree on a Mac", - mkEx "gr | vd -view=open -output=latex -- generate a tree and display latex dependency tree on a Mac", - mkEx "gr -number=1000 | vd -file=dep.labels -output=conll -- generate training treebank", - mkEx "gr -number=100 | vd -file=dep.labels -output=malt_input -- generate test sentences", + mkEx "gr | vd -view=open -- generate a tree and display dependency tree on with Mac's 'open'", + mkEx "gr | vd -view=open -output=latex -- generate a tree and display latex dependency tree with Mac's 'open'", + mkEx "gr -number=1000 | vd -abslabels=Lang.labels -cnclabels=LangSwe.labels -output=conll -- generate a random treebank", mkEx "rf -file=ex.conll | vd -conll2latex | wf -file=ex.tex -- convert conll file to latex" ], options = [ @@ -579,11 +583,13 @@ pgfCommands = Map.fromList [ ("conll2latex", "convert conll to latex") ], flags = [ - ("file","configuration file for labels, format per line 'fun label*'"), - ("format","format of the visualization file using dot (default \"png\")"), - ("output","output format of graph source (dot (default), malt_input, conll)"), - ("view","program to open the resulting file (default \"open\")"), - ("lang","the language of analysis") + ("abslabels","abstract configuration file for labels, format per line 'fun label*'"), + ("cnclabels","concrete configuration file for labels, format per line 'fun {words|*} pos label head'"), + ("file", "same as abslabels (abstract configuration file)"), + ("format", "format of the visualization file using dot (default \"png\")"), + ("output", "output format of graph source (latex, conll, dot (default but deprecated))"), + ("view", "program to open the resulting graph file (default \"open\")"), + ("lang", "the language of analysis") ] }), diff --git a/src/runtime/haskell/PGF.hs b/src/runtime/haskell/PGF.hs index 293aec0fd..8eb215f9d 100644 --- a/src/runtime/haskell/PGF.hs +++ b/src/runtime/haskell/PGF.hs @@ -132,6 +132,7 @@ module PGF( conlls2latexDoc, -- extra: Labels, getDepLabels, + CncLabels, getCncDepLabels, -- * Probabilities Probabilities, diff --git a/src/runtime/haskell/PGF/VisualizeTree.hs b/src/runtime/haskell/PGF/VisualizeTree.hs index 7c7fa2dca..862a34de8 100644 --- a/src/runtime/haskell/PGF/VisualizeTree.hs +++ b/src/runtime/haskell/PGF/VisualizeTree.hs @@ -17,6 +17,7 @@ module PGF.VisualizeTree , graphvizParseTreeDep , graphvizDependencyTree , Labels, getDepLabels + , CncLabels, getCncDepLabels , graphvizBracketedString , graphvizAlignment , gizaAlignment @@ -33,7 +34,7 @@ import PGF.Macros (lookValCat, BracketedString(..)) import qualified Data.Map as Map --import qualified Data.IntMap as IntMap -import Data.List (intersperse,nub,mapAccumL,find) +import Data.List (intersperse,nub,mapAccumL,find,groupBy) --import Data.Char (isDigit) import Data.Maybe (fromMaybe) import Text.PrettyPrint @@ -119,17 +120,17 @@ type Labels = Map.Map CId [String] graphvizDependencyTree :: String -- ^ Output format: @"latex"@, @"conll"@, @"malt_tab"@, @"malt_input"@ or @"dot"@ -> Bool -- ^ Include extra information (debug) - -> Maybe Labels -- ^ Label information obtained with 'getDepLabels' - -> unused -- ^ not used (was: @Maybe String@) + -> Maybe Labels -- ^ abstract label information obtained with 'getDepLabels' + -> Maybe CncLabels -- ^ concrete label information obtained with ' ' (was: unused (was: @Maybe String@)) -> PGF -> CId -- ^ The language of analysis -> Tree -> String -- ^ Rendered output in the specified format -graphvizDependencyTree format debug mlab ms pgf lang t = +graphvizDependencyTree format debug mlab mclab pgf lang t = case format of "latex" -> render . ppLaTeX $ conll2latex' conll "svg" -> render . ppSVG . toSVG $ conll2latex' conll - "conll" -> render $ vcat (map (hcat . intersperse (char '\t') ) wnodes) + "conll" -> printCoNLL conll "malt_tab" -> render $ vcat (map (hcat . intersperse (char '\t') . (\ws -> [ws !! 0,ws !! 1,ws !! 3,ws !! 6,ws !! 7])) wnodes) "malt_input" -> render $ vcat (map (hcat . intersperse (char '\t') . take 6) wnodes) _ -> render $ text "digraph {" $$ @@ -140,7 +141,8 @@ graphvizDependencyTree format debug mlab ms pgf lang t = vcat links) $$ text "}" where - conll = (map.map) render wnodes + conll = maybe conll0 (\ls -> fixCoNLL ls conll0) mclab + conll0 = (map.map) render wnodes nodes = map mkNode leaves links = map mkLink [(fid, fromMaybe (dep_lbl,nil) (lookup fid deps)) | ((cat,fid,fun),_,w) <- tail leaves] @@ -185,7 +187,8 @@ graphvizDependencyTree format debug mlab ms pgf lang t = mkLink (x,(lbl,y)) = tag y <+> text "->" <+> tag x <+> text "[label = " <> doubleQuotes (text lbl) <> text "] ;" - labels = maybe Map.empty id mlab + labels = maybe Map.empty id mlab + clabels = maybe [] id mclab posCat cat = case Map.lookup cat labels of Just [p] -> mkCId p @@ -737,3 +740,53 @@ ppSVG svg = '<' -> "<"++r '>' -> ">"++r _ -> c:r + + +---------------------------------- +-- concrete syntax annotations (local) on top of conll +-- examples of annotations: +-- UseComp {"not"} PART neg head +-- UseComp {*} AUX cop head + +type CncLabels = [(String, String -> Maybe (String -> String,String,String))] +-- (fun, word -> (pos,label,target)) +-- the pos can remain unchanged, as in the current notation in the article + +fixCoNLL :: CncLabels -> CoNLL -> CoNLL +fixCoNLL labels conll = map fixc conll where + fixc row = case row of + (i:word:fun:pos:cat:x_:j:label:xs) -> case look (fun,word) of + Just (pos',label',"head") -> (i:word:fun:pos' pos:cat:x_:j :label':xs) + Just (pos',label',target) -> (i:word:fun:pos' pos:cat:x_: getDep j target:label':xs) + _ -> row + _ -> row + + look (fun,word) = case lookup fun labels of + Just relabel -> relabel word + _ -> Nothing + + getDep j label = maybe j id $ lookup (label,j) [((label,j),i) | i:word:fun:pos:cat:x_:j:label:xs <- conll] + +getCncDepLabels :: String -> CncLabels +getCncDepLabels = map merge . groupBy (\ (x,_) (a,_) -> x == a) . concatMap analyse . filter choose . lines where + --- choose is for compatibility with the general notation + choose line = notElem '(' line && elem '{' line --- ignoring non-local (with "(") and abstract (without "{") rules + + analyse line = case break (=='{') line of + (beg,_:ws) -> case break (=='}') ws of + (toks,_:target) -> case (words beg, words target) of + (fun:_,[ label,j]) -> [(fun, (tok, (id, label,j))) | tok <- getToks toks] + (fun:_,[pos,label,j]) -> [(fun, (tok, (const pos,label,j))) | tok <- getToks toks] + _ -> [] + _ -> [] + _ -> [] + merge rules@((fun,_):_) = (fun, \tok -> + case lookup tok (map snd rules) of + Just new -> return new + _ -> lookup "*" (map snd rules) + ) + getToks = words . map (\c -> if elem c "\"," then ' ' else c) + +printCoNLL :: CoNLL -> String +printCoNLL = unlines . map (concat . intersperse "\t") + diff --git a/src/server/PGFService.hs b/src/server/PGFService.hs index d84f2b3c2..f12ad75fb 100644 --- a/src/server/PGFService.hs +++ b/src/server/PGFService.hs @@ -794,7 +794,7 @@ parseTree pgf lang opts tree = PGF.graphvizParseTree pgf lang opts tree doDepTree lc path pgf fmt lang tree = do (_,lbls) <- liftIO $ getLabels lc path pgf - let vis = PGF.graphvizDependencyTree fmt False (Just lbls) () pgf lang tree + let vis = PGF.graphvizDependencyTree fmt False (Just lbls) Nothing pgf lang tree ---- TODO: CncLabels if fmt `elem` ["png","gif","gv"] then outputGraphviz vis else if fmt=="svg"