mirror of
https://github.com/GrammaticalFramework/gf-core.git
synced 2026-04-22 11:19:32 -06:00
PGF.VisualizeTree: add SVG renderering of word dependency trees
This was done by introducing an intermediate representation for the LaTeX pictures produced by the LaTeX renderer and providing a new backend that outputs SVG instead of LaTeX.
This commit is contained in:
@@ -127,7 +127,8 @@ graphvizDependencyTree
|
|||||||
-> String -- ^ Rendered output in the specified format
|
-> String -- ^ Rendered output in the specified format
|
||||||
graphvizDependencyTree format debug mlab ms pgf lang t =
|
graphvizDependencyTree format debug mlab ms pgf lang t =
|
||||||
case format of
|
case format of
|
||||||
"latex" -> conll2latex $ render $ vcat (map (hcat . intersperse (char '\t') ) wnodes)
|
"latex" -> render . ppLaTeX $ conll2latex' conll
|
||||||
|
"svg" -> render . ppSVG . toSVG $ conll2latex' conll
|
||||||
"conll" -> render $ vcat (map (hcat . intersperse (char '\t') ) wnodes)
|
"conll" -> render $ vcat (map (hcat . intersperse (char '\t') ) wnodes)
|
||||||
"malt_tab" -> render $ vcat (map (hcat . intersperse (char '\t') . (\ws -> [ws !! 0,ws !! 1,ws !! 3,ws !! 6,ws !! 7])) wnodes)
|
"malt_tab" -> render $ vcat (map (hcat . intersperse (char '\t') . (\ws -> [ws !! 0,ws !! 1,ws !! 3,ws !! 6,ws !! 7])) wnodes)
|
||||||
"malt_input" -> render $ vcat (map (hcat . intersperse (char '\t') . take 6) wnodes)
|
"malt_input" -> render $ vcat (map (hcat . intersperse (char '\t') . take 6) wnodes)
|
||||||
@@ -139,6 +140,7 @@ graphvizDependencyTree format debug mlab ms pgf lang t =
|
|||||||
vcat links) $$
|
vcat links) $$
|
||||||
text "}"
|
text "}"
|
||||||
where
|
where
|
||||||
|
conll = (map.map) render wnodes
|
||||||
nodes = map mkNode leaves
|
nodes = map mkNode leaves
|
||||||
links = map mkLink [(fid, fromMaybe (dep_lbl,nil) (lookup fid deps)) | ((cat,fid,fun),_,w) <- tail leaves]
|
links = map mkLink [(fid, fromMaybe (dep_lbl,nil) (lookup fid deps)) | ((cat,fid,fun),_,w) <- tail leaves]
|
||||||
|
|
||||||
@@ -490,10 +492,19 @@ tag i
|
|||||||
-- visualization with latex output. AR Nov 2015
|
-- visualization with latex output. AR Nov 2015
|
||||||
|
|
||||||
conlls2latexDoc :: [String] -> String
|
conlls2latexDoc :: [String] -> String
|
||||||
conlls2latexDoc = latexDoc . intersperse "\n\\vspace{4mm}\n" . map conll2latex . filter (not . null)
|
conlls2latexDoc =
|
||||||
|
render .
|
||||||
|
latexDoc .
|
||||||
|
vcat .
|
||||||
|
intersperse (text "" $+$ app "vspace" (text "4mm")) .
|
||||||
|
map conll2latex .
|
||||||
|
filter (not . null)
|
||||||
|
|
||||||
conll2latex :: String -> String
|
conll2latex :: String -> Doc
|
||||||
conll2latex = unlines . dep2latex . conll2dep
|
conll2latex = ppLaTeX . conll2latex' . parseCoNLL
|
||||||
|
|
||||||
|
conll2latex' :: CoNLL -> [LaTeX]
|
||||||
|
conll2latex' = dep2latex . conll2dep'
|
||||||
|
|
||||||
data Dep = Dep {
|
data Dep = Dep {
|
||||||
wordLength :: Int -> Double -- length of word at position int -- was: fixed width, millimetres (>= 20.0)
|
wordLength :: Int -> Double -- length of word at position int -- was: fixed width, millimetres (>= 20.0)
|
||||||
@@ -517,11 +528,11 @@ arcbase = 30.0 -- arcs start and end 40u above the bottom
|
|||||||
arcfactor r = r * 600 -- reduction of arc size from word distance
|
arcfactor r = r * 600 -- reduction of arc size from word distance
|
||||||
xyratio = 3 -- width/height ratio of arcs
|
xyratio = 3 -- width/height ratio of arcs
|
||||||
|
|
||||||
putArc :: (Int -> Double) -> Int -> Int -> Int -> String -> String
|
putArc :: (Int -> Double) -> Int -> Int -> Int -> String -> [DrawingCommand]
|
||||||
putArc frwld height x y label = unlines [oval,arrowhead,labelling] where
|
putArc frwld height x y label = [oval,arrowhead,labelling] where
|
||||||
oval = put ctr arcbase ("\\oval(" ++ show wdth ++ "," ++ show hght ++ ")[t]")
|
oval = Put (ctr,arcbase) (OvalTop (wdth,hght))
|
||||||
arrowhead = put endp (arcbase + 5) (app "vector(0,-1)" "5") -- downgoing arrow 5u above the arc base
|
arrowhead = Put (endp,arcbase + 5) (ArrowDown 5) -- downgoing arrow 5u above the arc base
|
||||||
labelling = put (labelstart ctr) (labelheight (hght/2)) (small label)
|
labelling = Put (labelstart ctr,labelheight (hght/2)) (TinyText label)
|
||||||
dxy = wdist frwld x y -- distance between words, >>= 20.0
|
dxy = wdist frwld x y -- distance between words, >>= 20.0
|
||||||
ndxy = 100 * rwld * fromIntegral height -- distance that is indep of word length
|
ndxy = 100 * rwld * fromIntegral height -- distance that is indep of word length
|
||||||
hdxy = dxy / 2 -- half the distance
|
hdxy = dxy / 2 -- half the distance
|
||||||
@@ -532,17 +543,16 @@ putArc frwld height x y label = unlines [oval,arrowhead,labelling] where
|
|||||||
endp = (if x < y then (+) else (-)) ctr (wdth/2) -- the point of the arrow
|
endp = (if x < y then (+) else (-)) ctr (wdth/2) -- the point of the arrow
|
||||||
rwld = 0.5 ----
|
rwld = 0.5 ----
|
||||||
|
|
||||||
dep2latex :: Dep -> [String]
|
dep2latex :: Dep -> [LaTeX]
|
||||||
dep2latex d =
|
dep2latex d =
|
||||||
comment (unwords (map fst (tokens d)))
|
[Comment (unwords (map fst (tokens d))),
|
||||||
: app "setlength{\\unitlength}" (show defaultUnit ++ "mm")
|
Picture defaultUnit (width,height) (
|
||||||
: ("\\begin{picture}(" ++ show width ++ "," ++ show height ++ ")")
|
[Put (wpos rwld i,0) (Text w) | (i,w) <- zip [0..] (map fst (tokens d))] -- words
|
||||||
: [put (wpos rwld i) 0 w | (i,w) <- zip [0..] (map fst (tokens d))] -- words
|
++ [Put (wpos rwld i,15) (TinyText w) | (i,w) <- zip [0..] (map snd (tokens d))] -- pos tags 15u above bottom
|
||||||
++ [put (wpos rwld i) 15 (small w) | (i,w) <- zip [0..] (map snd (tokens d))] -- pos tags 15u above bottom
|
++ concat [putArc rwld (aheight x y) x y label | ((x,y),label) <- deps d] -- arcs and labels
|
||||||
++ [putArc rwld (aheight x y) x y label | ((x,y),label) <- deps d] -- arcs and labels
|
++ [Put (wpos rwld (root d) + 15,height) (ArrowDown (height-arcbase))]
|
||||||
++ [put (wpos rwld (root d) + 15) height (app "vector(0,-1)" (show (fromIntegral height-arcbase)))]
|
++ [Put (wpos rwld (root d) + 20,height - 10) (TinyText "ROOT")]
|
||||||
++ [put (wpos rwld (root d) + 20) (height - 10) (small "ROOT")]
|
)]
|
||||||
++ ["\\end{picture}"]
|
|
||||||
where
|
where
|
||||||
wld i = wordLength d i -- >= 20.0
|
wld i = wordLength d i -- >= 20.0
|
||||||
rwld i = (wld i) / defaultWordLength -- >= 1.0
|
rwld i = (wld i) / defaultWordLength -- >= 1.0
|
||||||
@@ -551,11 +561,18 @@ dep2latex d =
|
|||||||
depth x y = case [(u,v) | (u,v) <- arcs, (x < u && v <= y) || (x == u && v < y)] of ---- only projective arcs counted
|
depth x y = case [(u,v) | (u,v) <- arcs, (x < u && v <= y) || (x == u && v < y)] of ---- only projective arcs counted
|
||||||
[] -> 0
|
[] -> 0
|
||||||
uvs -> 1 + maximum [depth u v | (u,v) <- uvs]
|
uvs -> 1 + maximum [depth u v | (u,v) <- uvs]
|
||||||
width = round (sum [wsize rwld w | (w,_) <- zip [0..] (tokens d)]) + round spaceLength * ((length (tokens d)) - 1)
|
width = {-round-} (sum [wsize rwld w | (w,_) <- zip [0..] (tokens d)]) + {-round-} spaceLength * fromIntegral ((length (tokens d)) - 1)
|
||||||
height = 40 + 20 * round (maximum [aheight x y | ((x,y),_) <- deps d])
|
height = 40 + 20 * {-round-} (maximum [aheight x y | ((x,y),_) <- deps d])
|
||||||
|
|
||||||
conll2dep :: String -> Dep
|
type CoNLL = [[String]]
|
||||||
conll2dep str = Dep {
|
parseCoNLL :: String -> CoNLL
|
||||||
|
parseCoNLL = map words . lines
|
||||||
|
|
||||||
|
--conll2dep :: String -> Dep
|
||||||
|
--conll2dep = conll2dep' . parseCoNLL
|
||||||
|
|
||||||
|
conll2dep' :: CoNLL -> Dep
|
||||||
|
conll2dep' ls = Dep {
|
||||||
wordLength = wld
|
wordLength = wld
|
||||||
, tokens = toks
|
, tokens = toks
|
||||||
, deps = dps
|
, deps = dps
|
||||||
@@ -563,23 +580,154 @@ conll2dep str = Dep {
|
|||||||
}
|
}
|
||||||
where
|
where
|
||||||
wld i = maximum [charWidth * fromIntegral (length w) | w <- let (tok,pos) = toks !! i in [tok,pos]]
|
wld i = maximum [charWidth * fromIntegral (length w) | w <- let (tok,pos) = toks !! i in [tok,pos]]
|
||||||
ls = map words (lines str)
|
|
||||||
toks = [(w,c) | _:w:_:c:_ <- ls]
|
toks = [(w,c) | _:w:_:c:_ <- ls]
|
||||||
dps = [((read y-1, read x-1),lab) | x:_:_:_:_:_:y:lab:_ <- ls, y /="0"]
|
dps = [((read y-1, read x-1),lab) | x:_:_:_:_:_:y:lab:_ <- ls, y /="0"]
|
||||||
maxdist = maximum [abs (x-y) | ((x,y),_) <- dps]
|
--maxdist = maximum [abs (x-y) | ((x,y),_) <- dps]
|
||||||
|
|
||||||
-- latex formatting
|
|
||||||
|
|
||||||
app macro arg = "\\" ++ macro ++ "{" ++ arg ++ "}"
|
-- * LaTeX Pictures (see https://en.wikibooks.org/wiki/LaTeX/Picture)
|
||||||
put x y obj = app ("put(" ++ show x ++ "," ++ show y ++ ")") obj
|
|
||||||
small w = "{\\tiny " ++ w ++ "}"
|
|
||||||
comment s = "%% " ++ s
|
|
||||||
|
|
||||||
latexDoc :: [String] -> String
|
-- We render both LaTeX and SVG from this intermediate representation of
|
||||||
latexDoc body = unlines $
|
-- LaTeX pictures.
|
||||||
"\\documentclass{article}"
|
|
||||||
: "\\usepackage[utf8]{inputenc}"
|
|
||||||
: "\\begin{document}"
|
|
||||||
: body
|
|
||||||
++ ["\\end{document}"]
|
|
||||||
|
|
||||||
|
data LaTeX = Comment String | Picture UnitLengthMM Size [DrawingCommand]
|
||||||
|
data DrawingCommand = Put Position Object
|
||||||
|
data Object = Text String | TinyText String | OvalTop Size | ArrowDown Length
|
||||||
|
|
||||||
|
type UnitLengthMM = Double
|
||||||
|
type Size = (Double,Double)
|
||||||
|
type Position = (Double,Double)
|
||||||
|
type Length = Double
|
||||||
|
|
||||||
|
|
||||||
|
-- * latex formatting
|
||||||
|
ppLaTeX = vcat . map ppLaTeX1
|
||||||
|
where
|
||||||
|
ppLaTeX1 el =
|
||||||
|
case el of
|
||||||
|
Comment s -> comment s
|
||||||
|
Picture unit size cmds ->
|
||||||
|
app "setlength{\\unitlength}" (text (show unit ++ "mm"))
|
||||||
|
$$ hang (app "begin" (text "picture")<>text (show size)) 2
|
||||||
|
(vcat (map ppDrawingCommand cmds))
|
||||||
|
$$ app "end" (text "picture")
|
||||||
|
$$ text ""
|
||||||
|
|
||||||
|
ppDrawingCommand (Put pos obj) = put pos (ppObject obj)
|
||||||
|
|
||||||
|
ppObject obj =
|
||||||
|
case obj of
|
||||||
|
Text s -> text s
|
||||||
|
TinyText s -> small (text s)
|
||||||
|
OvalTop size -> text "\\oval" <> text (show size) <> text "[t]"
|
||||||
|
ArrowDown len -> app "vector(0,-1)" (text (show len))
|
||||||
|
|
||||||
|
put p@(_,_) = app ("put" ++ show p)
|
||||||
|
small w = text "{\\tiny" <+> w <> text "}"
|
||||||
|
comment s = text "%%" <+> text s -- line break show follow
|
||||||
|
|
||||||
|
app macro arg = text "\\" <> text macro <> text "{" <> arg <> text "}"
|
||||||
|
|
||||||
|
|
||||||
|
latexDoc :: Doc -> Doc
|
||||||
|
latexDoc body =
|
||||||
|
vcat [text "\\documentclass{article}",
|
||||||
|
text "\\usepackage[utf8]{inputenc}",
|
||||||
|
text "\\begin{document}",
|
||||||
|
body,
|
||||||
|
text "\\end{document}"]
|
||||||
|
|
||||||
|
-- | Render LaTeX pictures as SVG
|
||||||
|
toSVG = concatMap toSVG1
|
||||||
|
where
|
||||||
|
toSVG1 el =
|
||||||
|
case el of
|
||||||
|
Comment s -> []
|
||||||
|
Picture unit size@(w,h) cmds ->
|
||||||
|
[Elem "svg" ["width".=x1,"height".=y0+5,
|
||||||
|
("viewBox",unwords (map show [0,0,x1,y0+5])),
|
||||||
|
("version","1.1"),
|
||||||
|
("xmlns","http://www.w3.org/2000/svg")]
|
||||||
|
(concatMap draw cmds)]
|
||||||
|
where
|
||||||
|
draw (Put pos obj) = objectSVG pos obj
|
||||||
|
|
||||||
|
objectSVG pos obj =
|
||||||
|
case obj of
|
||||||
|
Text s -> [text 16 pos s]
|
||||||
|
TinyText s -> [text 10 pos s]
|
||||||
|
OvalTop size -> [ovalTop pos size]
|
||||||
|
ArrowDown len -> arrowDown pos len
|
||||||
|
|
||||||
|
text h (x,y) s =
|
||||||
|
Elem "text" ["x".=xc x,"y".=yc y-2,"font-size".=h]
|
||||||
|
[CharData s]
|
||||||
|
|
||||||
|
ovalTop (x,y) (w,h) =
|
||||||
|
Elem "path" [("d",path),("stroke","black"),("fill","none")] []
|
||||||
|
where
|
||||||
|
x1 = x-w/2
|
||||||
|
x2 = min x (x1+r)
|
||||||
|
x3 = max x (x4-r)
|
||||||
|
x4 = x+w/2
|
||||||
|
y1 = y
|
||||||
|
y2 = y+r
|
||||||
|
r = h/2
|
||||||
|
sx = show . xc
|
||||||
|
sy = show . yc
|
||||||
|
path = unwords (["M",sx x1,sy y1,"Q",sx x1,sy y2,sx x2,sy y2,
|
||||||
|
"L",sx x3,sy y2,"Q",sx x4,sy y2,sx x4,sy y1])
|
||||||
|
|
||||||
|
arrowDown (x,y) len =
|
||||||
|
[Elem "line" ["x1".=xc x,"y1".=yc y,"x2".=xc x,"y2".=y2,
|
||||||
|
("stroke","black")] [],
|
||||||
|
Elem "path" [("d",unwords arrowhead)] []]
|
||||||
|
where
|
||||||
|
x2 = xc x
|
||||||
|
y2 = yc (y-len)
|
||||||
|
arrowhead = "M":map show [x2,y2,x2-3,y2-6,x2+3,y2-6]
|
||||||
|
|
||||||
|
xc x = num x+5
|
||||||
|
yc y = y0-num y
|
||||||
|
x1 = num w+10
|
||||||
|
y0 = num h+20
|
||||||
|
num x = round (scale*x)
|
||||||
|
scale = unit*5
|
||||||
|
|
||||||
|
infix 0 .=
|
||||||
|
n.=v = (n,show v)
|
||||||
|
|
||||||
|
-- * SVG is XML
|
||||||
|
|
||||||
|
data SVG = CharData String | Elem TagName Attrs [SVG]
|
||||||
|
type TagName = String
|
||||||
|
type Attrs = [(String,String)]
|
||||||
|
|
||||||
|
ppSVG svg =
|
||||||
|
vcat [text "<?xml version=\"1.0\" standalone=\"no\"?>",
|
||||||
|
text "<!DOCTYPE svg PUBLIC \"-//W3C//DTD SVG 1.1//EN\"",
|
||||||
|
text "\"http://www.w3.org/Graphics/SVG/1.1/DTD/svg11.dtd\">",
|
||||||
|
text "",
|
||||||
|
vcat (map ppSVG1 svg)] -- It should be a single <svg> element...
|
||||||
|
where
|
||||||
|
ppSVG1 svg1 =
|
||||||
|
case svg1 of
|
||||||
|
CharData s -> text (encode s)
|
||||||
|
Elem tag attrs [] ->
|
||||||
|
text "<"<>text tag<>cat (map attr attrs) <> text "/>"
|
||||||
|
Elem tag attrs svg ->
|
||||||
|
cat [text "<"<>text tag<>cat (map attr attrs) <> text ">",
|
||||||
|
nest 2 (cat (map ppSVG1 svg)),
|
||||||
|
text "</"<>text tag<>text ">"]
|
||||||
|
|
||||||
|
attr (n,v) = text " "<>text n<>text "=\""<>text (encode v)<>text "\""
|
||||||
|
|
||||||
|
encode s = foldr encodeEntity "" s
|
||||||
|
|
||||||
|
encodeEntity = encodeEntity' (const False)
|
||||||
|
encodeEntity' esc c r =
|
||||||
|
case c of
|
||||||
|
'&' -> "&"++r
|
||||||
|
'<' -> "<"++r
|
||||||
|
'>' -> ">"++r
|
||||||
|
_ -> c:r
|
||||||
|
|||||||
Reference in New Issue
Block a user