PGF.VisualizeTree: add SVG renderering of word dependency trees

This was done by introducing an intermediate representation for the LaTeX
pictures produced by the LaTeX renderer and providing a new backend that
outputs SVG instead of LaTeX.
This commit is contained in:
hallgren
2016-06-08 17:49:15 +00:00
parent 5b491b7312
commit 8dfdee1e9a

View File

@@ -127,7 +127,8 @@ graphvizDependencyTree
-> String -- ^ Rendered output in the specified format
graphvizDependencyTree format debug mlab ms pgf lang t =
case format of
"latex" -> conll2latex $ render $ vcat (map (hcat . intersperse (char '\t') ) wnodes)
"latex" -> render . ppLaTeX $ conll2latex' conll
"svg" -> render . ppSVG . toSVG $ conll2latex' conll
"conll" -> render $ vcat (map (hcat . intersperse (char '\t') ) wnodes)
"malt_tab" -> render $ vcat (map (hcat . intersperse (char '\t') . (\ws -> [ws !! 0,ws !! 1,ws !! 3,ws !! 6,ws !! 7])) wnodes)
"malt_input" -> render $ vcat (map (hcat . intersperse (char '\t') . take 6) wnodes)
@@ -139,6 +140,7 @@ graphvizDependencyTree format debug mlab ms pgf lang t =
vcat links) $$
text "}"
where
conll = (map.map) render wnodes
nodes = map mkNode leaves
links = map mkLink [(fid, fromMaybe (dep_lbl,nil) (lookup fid deps)) | ((cat,fid,fun),_,w) <- tail leaves]
@@ -490,10 +492,19 @@ tag i
-- visualization with latex output. AR Nov 2015
conlls2latexDoc :: [String] -> String
conlls2latexDoc = latexDoc . intersperse "\n\\vspace{4mm}\n" . map conll2latex . filter (not . null)
conlls2latexDoc =
render .
latexDoc .
vcat .
intersperse (text "" $+$ app "vspace" (text "4mm")) .
map conll2latex .
filter (not . null)
conll2latex :: String -> String
conll2latex = unlines . dep2latex . conll2dep
conll2latex :: String -> Doc
conll2latex = ppLaTeX . conll2latex' . parseCoNLL
conll2latex' :: CoNLL -> [LaTeX]
conll2latex' = dep2latex . conll2dep'
data Dep = Dep {
wordLength :: Int -> Double -- length of word at position int -- was: fixed width, millimetres (>= 20.0)
@@ -517,11 +528,11 @@ arcbase = 30.0 -- arcs start and end 40u above the bottom
arcfactor r = r * 600 -- reduction of arc size from word distance
xyratio = 3 -- width/height ratio of arcs
putArc :: (Int -> Double) -> Int -> Int -> Int -> String -> String
putArc frwld height x y label = unlines [oval,arrowhead,labelling] where
oval = put ctr arcbase ("\\oval(" ++ show wdth ++ "," ++ show hght ++ ")[t]")
arrowhead = put endp (arcbase + 5) (app "vector(0,-1)" "5") -- downgoing arrow 5u above the arc base
labelling = put (labelstart ctr) (labelheight (hght/2)) (small label)
putArc :: (Int -> Double) -> Int -> Int -> Int -> String -> [DrawingCommand]
putArc frwld height x y label = [oval,arrowhead,labelling] where
oval = Put (ctr,arcbase) (OvalTop (wdth,hght))
arrowhead = Put (endp,arcbase + 5) (ArrowDown 5) -- downgoing arrow 5u above the arc base
labelling = Put (labelstart ctr,labelheight (hght/2)) (TinyText label)
dxy = wdist frwld x y -- distance between words, >>= 20.0
ndxy = 100 * rwld * fromIntegral height -- distance that is indep of word length
hdxy = dxy / 2 -- half the distance
@@ -532,17 +543,16 @@ putArc frwld height x y label = unlines [oval,arrowhead,labelling] where
endp = (if x < y then (+) else (-)) ctr (wdth/2) -- the point of the arrow
rwld = 0.5 ----
dep2latex :: Dep -> [String]
dep2latex :: Dep -> [LaTeX]
dep2latex d =
comment (unwords (map fst (tokens d)))
: app "setlength{\\unitlength}" (show defaultUnit ++ "mm")
: ("\\begin{picture}(" ++ show width ++ "," ++ show height ++ ")")
: [put (wpos rwld i) 0 w | (i,w) <- zip [0..] (map fst (tokens d))] -- words
++ [put (wpos rwld i) 15 (small w) | (i,w) <- zip [0..] (map snd (tokens d))] -- pos tags 15u above bottom
++ [putArc rwld (aheight x y) x y label | ((x,y),label) <- deps d] -- arcs and labels
++ [put (wpos rwld (root d) + 15) height (app "vector(0,-1)" (show (fromIntegral height-arcbase)))]
++ [put (wpos rwld (root d) + 20) (height - 10) (small "ROOT")]
++ ["\\end{picture}"]
[Comment (unwords (map fst (tokens d))),
Picture defaultUnit (width,height) (
[Put (wpos rwld i,0) (Text w) | (i,w) <- zip [0..] (map fst (tokens d))] -- words
++ [Put (wpos rwld i,15) (TinyText w) | (i,w) <- zip [0..] (map snd (tokens d))] -- pos tags 15u above bottom
++ concat [putArc rwld (aheight x y) x y label | ((x,y),label) <- deps d] -- arcs and labels
++ [Put (wpos rwld (root d) + 15,height) (ArrowDown (height-arcbase))]
++ [Put (wpos rwld (root d) + 20,height - 10) (TinyText "ROOT")]
)]
where
wld i = wordLength d i -- >= 20.0
rwld i = (wld i) / defaultWordLength -- >= 1.0
@@ -551,11 +561,18 @@ dep2latex d =
depth x y = case [(u,v) | (u,v) <- arcs, (x < u && v <= y) || (x == u && v < y)] of ---- only projective arcs counted
[] -> 0
uvs -> 1 + maximum [depth u v | (u,v) <- uvs]
width = round (sum [wsize rwld w | (w,_) <- zip [0..] (tokens d)]) + round spaceLength * ((length (tokens d)) - 1)
height = 40 + 20 * round (maximum [aheight x y | ((x,y),_) <- deps d])
width = {-round-} (sum [wsize rwld w | (w,_) <- zip [0..] (tokens d)]) + {-round-} spaceLength * fromIntegral ((length (tokens d)) - 1)
height = 40 + 20 * {-round-} (maximum [aheight x y | ((x,y),_) <- deps d])
conll2dep :: String -> Dep
conll2dep str = Dep {
type CoNLL = [[String]]
parseCoNLL :: String -> CoNLL
parseCoNLL = map words . lines
--conll2dep :: String -> Dep
--conll2dep = conll2dep' . parseCoNLL
conll2dep' :: CoNLL -> Dep
conll2dep' ls = Dep {
wordLength = wld
, tokens = toks
, deps = dps
@@ -563,23 +580,154 @@ conll2dep str = Dep {
}
where
wld i = maximum [charWidth * fromIntegral (length w) | w <- let (tok,pos) = toks !! i in [tok,pos]]
ls = map words (lines str)
toks = [(w,c) | _:w:_:c:_ <- ls]
dps = [((read y-1, read x-1),lab) | x:_:_:_:_:_:y:lab:_ <- ls, y /="0"]
maxdist = maximum [abs (x-y) | ((x,y),_) <- dps]
--maxdist = maximum [abs (x-y) | ((x,y),_) <- dps]
-- latex formatting
app macro arg = "\\" ++ macro ++ "{" ++ arg ++ "}"
put x y obj = app ("put(" ++ show x ++ "," ++ show y ++ ")") obj
small w = "{\\tiny " ++ w ++ "}"
comment s = "%% " ++ s
-- * LaTeX Pictures (see https://en.wikibooks.org/wiki/LaTeX/Picture)
latexDoc :: [String] -> String
latexDoc body = unlines $
"\\documentclass{article}"
: "\\usepackage[utf8]{inputenc}"
: "\\begin{document}"
: body
++ ["\\end{document}"]
-- We render both LaTeX and SVG from this intermediate representation of
-- LaTeX pictures.
data LaTeX = Comment String | Picture UnitLengthMM Size [DrawingCommand]
data DrawingCommand = Put Position Object
data Object = Text String | TinyText String | OvalTop Size | ArrowDown Length
type UnitLengthMM = Double
type Size = (Double,Double)
type Position = (Double,Double)
type Length = Double
-- * latex formatting
ppLaTeX = vcat . map ppLaTeX1
where
ppLaTeX1 el =
case el of
Comment s -> comment s
Picture unit size cmds ->
app "setlength{\\unitlength}" (text (show unit ++ "mm"))
$$ hang (app "begin" (text "picture")<>text (show size)) 2
(vcat (map ppDrawingCommand cmds))
$$ app "end" (text "picture")
$$ text ""
ppDrawingCommand (Put pos obj) = put pos (ppObject obj)
ppObject obj =
case obj of
Text s -> text s
TinyText s -> small (text s)
OvalTop size -> text "\\oval" <> text (show size) <> text "[t]"
ArrowDown len -> app "vector(0,-1)" (text (show len))
put p@(_,_) = app ("put" ++ show p)
small w = text "{\\tiny" <+> w <> text "}"
comment s = text "%%" <+> text s -- line break show follow
app macro arg = text "\\" <> text macro <> text "{" <> arg <> text "}"
latexDoc :: Doc -> Doc
latexDoc body =
vcat [text "\\documentclass{article}",
text "\\usepackage[utf8]{inputenc}",
text "\\begin{document}",
body,
text "\\end{document}"]
-- | Render LaTeX pictures as SVG
toSVG = concatMap toSVG1
where
toSVG1 el =
case el of
Comment s -> []
Picture unit size@(w,h) cmds ->
[Elem "svg" ["width".=x1,"height".=y0+5,
("viewBox",unwords (map show [0,0,x1,y0+5])),
("version","1.1"),
("xmlns","http://www.w3.org/2000/svg")]
(concatMap draw cmds)]
where
draw (Put pos obj) = objectSVG pos obj
objectSVG pos obj =
case obj of
Text s -> [text 16 pos s]
TinyText s -> [text 10 pos s]
OvalTop size -> [ovalTop pos size]
ArrowDown len -> arrowDown pos len
text h (x,y) s =
Elem "text" ["x".=xc x,"y".=yc y-2,"font-size".=h]
[CharData s]
ovalTop (x,y) (w,h) =
Elem "path" [("d",path),("stroke","black"),("fill","none")] []
where
x1 = x-w/2
x2 = min x (x1+r)
x3 = max x (x4-r)
x4 = x+w/2
y1 = y
y2 = y+r
r = h/2
sx = show . xc
sy = show . yc
path = unwords (["M",sx x1,sy y1,"Q",sx x1,sy y2,sx x2,sy y2,
"L",sx x3,sy y2,"Q",sx x4,sy y2,sx x4,sy y1])
arrowDown (x,y) len =
[Elem "line" ["x1".=xc x,"y1".=yc y,"x2".=xc x,"y2".=y2,
("stroke","black")] [],
Elem "path" [("d",unwords arrowhead)] []]
where
x2 = xc x
y2 = yc (y-len)
arrowhead = "M":map show [x2,y2,x2-3,y2-6,x2+3,y2-6]
xc x = num x+5
yc y = y0-num y
x1 = num w+10
y0 = num h+20
num x = round (scale*x)
scale = unit*5
infix 0 .=
n.=v = (n,show v)
-- * SVG is XML
data SVG = CharData String | Elem TagName Attrs [SVG]
type TagName = String
type Attrs = [(String,String)]
ppSVG svg =
vcat [text "<?xml version=\"1.0\" standalone=\"no\"?>",
text "<!DOCTYPE svg PUBLIC \"-//W3C//DTD SVG 1.1//EN\"",
text "\"http://www.w3.org/Graphics/SVG/1.1/DTD/svg11.dtd\">",
text "",
vcat (map ppSVG1 svg)] -- It should be a single <svg> element...
where
ppSVG1 svg1 =
case svg1 of
CharData s -> text (encode s)
Elem tag attrs [] ->
text "<"<>text tag<>cat (map attr attrs) <> text "/>"
Elem tag attrs svg ->
cat [text "<"<>text tag<>cat (map attr attrs) <> text ">",
nest 2 (cat (map ppSVG1 svg)),
text "</"<>text tag<>text ">"]
attr (n,v) = text " "<>text n<>text "=\""<>text (encode v)<>text "\""
encode s = foldr encodeEntity "" s
encodeEntity = encodeEntity' (const False)
encodeEntity' esc c r =
case c of
'&' -> "&amp;"++r
'<' -> "&lt;"++r
'>' -> "&gt;"++r
_ -> c:r