diff --git a/src/runtime/haskell/PGF/VisualizeTree.hs b/src/runtime/haskell/PGF/VisualizeTree.hs index 4ba605432..d274b7300 100644 --- a/src/runtime/haskell/PGF/VisualizeTree.hs +++ b/src/runtime/haskell/PGF/VisualizeTree.hs @@ -127,7 +127,8 @@ graphvizDependencyTree -> String -- ^ Rendered output in the specified format graphvizDependencyTree format debug mlab ms pgf lang t = case format of - "latex" -> conll2latex $ render $ vcat (map (hcat . intersperse (char '\t') ) wnodes) + "latex" -> render . ppLaTeX $ conll2latex' conll + "svg" -> render . ppSVG . toSVG $ conll2latex' conll "conll" -> render $ vcat (map (hcat . intersperse (char '\t') ) wnodes) "malt_tab" -> render $ vcat (map (hcat . intersperse (char '\t') . (\ws -> [ws !! 0,ws !! 1,ws !! 3,ws !! 6,ws !! 7])) wnodes) "malt_input" -> render $ vcat (map (hcat . intersperse (char '\t') . take 6) wnodes) @@ -139,6 +140,7 @@ graphvizDependencyTree format debug mlab ms pgf lang t = vcat links) $$ text "}" where + conll = (map.map) render wnodes nodes = map mkNode leaves links = map mkLink [(fid, fromMaybe (dep_lbl,nil) (lookup fid deps)) | ((cat,fid,fun),_,w) <- tail leaves] @@ -490,10 +492,19 @@ tag i -- visualization with latex output. AR Nov 2015 conlls2latexDoc :: [String] -> String -conlls2latexDoc = latexDoc . intersperse "\n\\vspace{4mm}\n" . map conll2latex . filter (not . null) +conlls2latexDoc = + render . + latexDoc . + vcat . + intersperse (text "" $+$ app "vspace" (text "4mm")) . + map conll2latex . + filter (not . null) -conll2latex :: String -> String -conll2latex = unlines . dep2latex . conll2dep +conll2latex :: String -> Doc +conll2latex = ppLaTeX . conll2latex' . parseCoNLL + +conll2latex' :: CoNLL -> [LaTeX] +conll2latex' = dep2latex . conll2dep' data Dep = Dep { wordLength :: Int -> Double -- length of word at position int -- was: fixed width, millimetres (>= 20.0) @@ -517,11 +528,11 @@ arcbase = 30.0 -- arcs start and end 40u above the bottom arcfactor r = r * 600 -- reduction of arc size from word distance xyratio = 3 -- width/height ratio of arcs -putArc :: (Int -> Double) -> Int -> Int -> Int -> String -> String -putArc frwld height x y label = unlines [oval,arrowhead,labelling] where - oval = put ctr arcbase ("\\oval(" ++ show wdth ++ "," ++ show hght ++ ")[t]") - arrowhead = put endp (arcbase + 5) (app "vector(0,-1)" "5") -- downgoing arrow 5u above the arc base - labelling = put (labelstart ctr) (labelheight (hght/2)) (small label) +putArc :: (Int -> Double) -> Int -> Int -> Int -> String -> [DrawingCommand] +putArc frwld height x y label = [oval,arrowhead,labelling] where + oval = Put (ctr,arcbase) (OvalTop (wdth,hght)) + arrowhead = Put (endp,arcbase + 5) (ArrowDown 5) -- downgoing arrow 5u above the arc base + labelling = Put (labelstart ctr,labelheight (hght/2)) (TinyText label) dxy = wdist frwld x y -- distance between words, >>= 20.0 ndxy = 100 * rwld * fromIntegral height -- distance that is indep of word length hdxy = dxy / 2 -- half the distance @@ -532,17 +543,16 @@ putArc frwld height x y label = unlines [oval,arrowhead,labelling] where endp = (if x < y then (+) else (-)) ctr (wdth/2) -- the point of the arrow rwld = 0.5 ---- -dep2latex :: Dep -> [String] +dep2latex :: Dep -> [LaTeX] dep2latex d = - comment (unwords (map fst (tokens d))) - : app "setlength{\\unitlength}" (show defaultUnit ++ "mm") - : ("\\begin{picture}(" ++ show width ++ "," ++ show height ++ ")") - : [put (wpos rwld i) 0 w | (i,w) <- zip [0..] (map fst (tokens d))] -- words - ++ [put (wpos rwld i) 15 (small w) | (i,w) <- zip [0..] (map snd (tokens d))] -- pos tags 15u above bottom - ++ [putArc rwld (aheight x y) x y label | ((x,y),label) <- deps d] -- arcs and labels - ++ [put (wpos rwld (root d) + 15) height (app "vector(0,-1)" (show (fromIntegral height-arcbase)))] - ++ [put (wpos rwld (root d) + 20) (height - 10) (small "ROOT")] - ++ ["\\end{picture}"] + [Comment (unwords (map fst (tokens d))), + Picture defaultUnit (width,height) ( + [Put (wpos rwld i,0) (Text w) | (i,w) <- zip [0..] (map fst (tokens d))] -- words + ++ [Put (wpos rwld i,15) (TinyText w) | (i,w) <- zip [0..] (map snd (tokens d))] -- pos tags 15u above bottom + ++ concat [putArc rwld (aheight x y) x y label | ((x,y),label) <- deps d] -- arcs and labels + ++ [Put (wpos rwld (root d) + 15,height) (ArrowDown (height-arcbase))] + ++ [Put (wpos rwld (root d) + 20,height - 10) (TinyText "ROOT")] + )] where wld i = wordLength d i -- >= 20.0 rwld i = (wld i) / defaultWordLength -- >= 1.0 @@ -551,11 +561,18 @@ dep2latex d = depth x y = case [(u,v) | (u,v) <- arcs, (x < u && v <= y) || (x == u && v < y)] of ---- only projective arcs counted [] -> 0 uvs -> 1 + maximum [depth u v | (u,v) <- uvs] - width = round (sum [wsize rwld w | (w,_) <- zip [0..] (tokens d)]) + round spaceLength * ((length (tokens d)) - 1) - height = 40 + 20 * round (maximum [aheight x y | ((x,y),_) <- deps d]) + width = {-round-} (sum [wsize rwld w | (w,_) <- zip [0..] (tokens d)]) + {-round-} spaceLength * fromIntegral ((length (tokens d)) - 1) + height = 40 + 20 * {-round-} (maximum [aheight x y | ((x,y),_) <- deps d]) -conll2dep :: String -> Dep -conll2dep str = Dep { +type CoNLL = [[String]] +parseCoNLL :: String -> CoNLL +parseCoNLL = map words . lines + +--conll2dep :: String -> Dep +--conll2dep = conll2dep' . parseCoNLL + +conll2dep' :: CoNLL -> Dep +conll2dep' ls = Dep { wordLength = wld , tokens = toks , deps = dps @@ -563,23 +580,154 @@ conll2dep str = Dep { } where wld i = maximum [charWidth * fromIntegral (length w) | w <- let (tok,pos) = toks !! i in [tok,pos]] - ls = map words (lines str) toks = [(w,c) | _:w:_:c:_ <- ls] dps = [((read y-1, read x-1),lab) | x:_:_:_:_:_:y:lab:_ <- ls, y /="0"] - maxdist = maximum [abs (x-y) | ((x,y),_) <- dps] + --maxdist = maximum [abs (x-y) | ((x,y),_) <- dps] --- latex formatting -app macro arg = "\\" ++ macro ++ "{" ++ arg ++ "}" -put x y obj = app ("put(" ++ show x ++ "," ++ show y ++ ")") obj -small w = "{\\tiny " ++ w ++ "}" -comment s = "%% " ++ s +-- * LaTeX Pictures (see https://en.wikibooks.org/wiki/LaTeX/Picture) -latexDoc :: [String] -> String -latexDoc body = unlines $ - "\\documentclass{article}" - : "\\usepackage[utf8]{inputenc}" - : "\\begin{document}" - : body - ++ ["\\end{document}"] +-- We render both LaTeX and SVG from this intermediate representation of +-- LaTeX pictures. +data LaTeX = Comment String | Picture UnitLengthMM Size [DrawingCommand] +data DrawingCommand = Put Position Object +data Object = Text String | TinyText String | OvalTop Size | ArrowDown Length + +type UnitLengthMM = Double +type Size = (Double,Double) +type Position = (Double,Double) +type Length = Double + + +-- * latex formatting +ppLaTeX = vcat . map ppLaTeX1 + where + ppLaTeX1 el = + case el of + Comment s -> comment s + Picture unit size cmds -> + app "setlength{\\unitlength}" (text (show unit ++ "mm")) + $$ hang (app "begin" (text "picture")<>text (show size)) 2 + (vcat (map ppDrawingCommand cmds)) + $$ app "end" (text "picture") + $$ text "" + + ppDrawingCommand (Put pos obj) = put pos (ppObject obj) + + ppObject obj = + case obj of + Text s -> text s + TinyText s -> small (text s) + OvalTop size -> text "\\oval" <> text (show size) <> text "[t]" + ArrowDown len -> app "vector(0,-1)" (text (show len)) + + put p@(_,_) = app ("put" ++ show p) + small w = text "{\\tiny" <+> w <> text "}" + comment s = text "%%" <+> text s -- line break show follow + +app macro arg = text "\\" <> text macro <> text "{" <> arg <> text "}" + + +latexDoc :: Doc -> Doc +latexDoc body = + vcat [text "\\documentclass{article}", + text "\\usepackage[utf8]{inputenc}", + text "\\begin{document}", + body, + text "\\end{document}"] + +-- | Render LaTeX pictures as SVG +toSVG = concatMap toSVG1 + where + toSVG1 el = + case el of + Comment s -> [] + Picture unit size@(w,h) cmds -> + [Elem "svg" ["width".=x1,"height".=y0+5, + ("viewBox",unwords (map show [0,0,x1,y0+5])), + ("version","1.1"), + ("xmlns","http://www.w3.org/2000/svg")] + (concatMap draw cmds)] + where + draw (Put pos obj) = objectSVG pos obj + + objectSVG pos obj = + case obj of + Text s -> [text 16 pos s] + TinyText s -> [text 10 pos s] + OvalTop size -> [ovalTop pos size] + ArrowDown len -> arrowDown pos len + + text h (x,y) s = + Elem "text" ["x".=xc x,"y".=yc y-2,"font-size".=h] + [CharData s] + + ovalTop (x,y) (w,h) = + Elem "path" [("d",path),("stroke","black"),("fill","none")] [] + where + x1 = x-w/2 + x2 = min x (x1+r) + x3 = max x (x4-r) + x4 = x+w/2 + y1 = y + y2 = y+r + r = h/2 + sx = show . xc + sy = show . yc + path = unwords (["M",sx x1,sy y1,"Q",sx x1,sy y2,sx x2,sy y2, + "L",sx x3,sy y2,"Q",sx x4,sy y2,sx x4,sy y1]) + + arrowDown (x,y) len = + [Elem "line" ["x1".=xc x,"y1".=yc y,"x2".=xc x,"y2".=y2, + ("stroke","black")] [], + Elem "path" [("d",unwords arrowhead)] []] + where + x2 = xc x + y2 = yc (y-len) + arrowhead = "M":map show [x2,y2,x2-3,y2-6,x2+3,y2-6] + + xc x = num x+5 + yc y = y0-num y + x1 = num w+10 + y0 = num h+20 + num x = round (scale*x) + scale = unit*5 + + infix 0 .= + n.=v = (n,show v) + +-- * SVG is XML + +data SVG = CharData String | Elem TagName Attrs [SVG] +type TagName = String +type Attrs = [(String,String)] + +ppSVG svg = + vcat [text "", + text "", + text "", + vcat (map ppSVG1 svg)] -- It should be a single element... + where + ppSVG1 svg1 = + case svg1 of + CharData s -> text (encode s) + Elem tag attrs [] -> + text "<"<>text tag<>cat (map attr attrs) <> text "/>" + Elem tag attrs svg -> + cat [text "<"<>text tag<>cat (map attr attrs) <> text ">", + nest 2 (cat (map ppSVG1 svg)), + text "text tag<>text ">"] + + attr (n,v) = text " "<>text n<>text "=\""<>text (encode v)<>text "\"" + + encode s = foldr encodeEntity "" s + + encodeEntity = encodeEntity' (const False) + encodeEntity' esc c r = + case c of + '&' -> "&"++r + '<' -> "<"++r + '>' -> ">"++r + _ -> c:r