From 9335a2997c5784d36e65d019ce39409a5bde5a95 Mon Sep 17 00:00:00 2001 From: aarne Date: Wed, 18 Nov 2015 08:12:26 +0000 Subject: [PATCH] reduced label size and optimized arc length in vd -output=latex --- src/runtime/haskell/PGF/VisualizeTree.hs | 88 +++++++++++------------- 1 file changed, 39 insertions(+), 49 deletions(-) diff --git a/src/runtime/haskell/PGF/VisualizeTree.hs b/src/runtime/haskell/PGF/VisualizeTree.hs index 7e3ac1ae6..2aaa84bc1 100644 --- a/src/runtime/haskell/PGF/VisualizeTree.hs +++ b/src/runtime/haskell/PGF/VisualizeTree.hs @@ -475,7 +475,6 @@ tag i -- visualization with latex output. AR Nov 2015 --- convert a set of CoNLL annotated dependency trees into LaTeX pictures conlls2latexDoc :: [String] -> String conlls2latexDoc = latexDoc . intersperse "\n\\vspace{4mm}\n" . map conll2latex @@ -489,14 +488,19 @@ data Dep = Dep { , root :: Int -- root word position , pictureSize :: (Int,Int) -- width = #words*wordlength } - -defaultWordLength :: Double -defaultWordLength = 20.0 -- the minimum fixed width word length -defaultUnit :: Double -defaultUnit = 0.2 -- 0.2 millimetres +-- some general measures +defaultWordLength = 20.0 -- the default fixed width word length, making word 100 units +defaultUnit = 0.2 -- unit in latex pictures, 0.2 millimetres + +wsize rwld = 100 * rwld -- word length, units +wpos rwld i = fromIntegral i * wsize rwld -- start position of the i'th word +wdist rwld x y = wsize rwld * fromIntegral (abs (x-y)) -- distance between words x and y +labelheight h = h/2 + arcbase + 5 -- label just above arc; 25 would put it just below +labelstart c = c - 15.0 -- label starts 15u left of arc centre +arcbase = 30.0 -- arcs start and end 40u above the bottom +arcfactor r = r * 1500 -- reduction of arc size from word distance -comment s = "%% " ++ s dep2latex :: Dep -> [String] dep2latex d = @@ -504,37 +508,52 @@ dep2latex d = : app "setlength{\\unitlength}" (show defaultUnit ++ "mm") : ("\\begin{picture}(" ++ show width ++ "," ++ show height ++ ")") : [put (wpos rwld i) 0 w | (i,w) <- zip [0..] (map fst (tokens d))] -- words - ++ [put (wpos rwld i) 15 w | (i,w) <- zip [0..] (map snd (tokens d))] -- pos tags 15u above bottom + ++ [put (wpos rwld i) 15 (small w) | (i,w) <- zip [0..] (map snd (tokens d))] -- pos tags 15u above bottom ++ [putArc rwld x y label | ((x,y),label) <- deps d] -- arcs and labels - ++ [put (wpos rwld (root d) + 10) height (app "vector(0,-1)" (show (height-40)))] - ++ [put (wpos rwld (root d) + 15) (height - 10) "ROOT"] + ++ [put (wpos rwld (root d) + 15) height (app "vector(0,-1)" (show (fromIntegral height-arcbase)))] + ++ [put (wpos rwld (root d) + 20) (height - 10) (small "ROOT")] ++ ["\\end{picture}"] where (width,height) = case pictureSize d of (w,h) -> (w, h) wld = wordLength d -- >= 20.0 rwld = wld / defaultWordLength -- >= 1.0 --- some general measures -wsize rwld = 100 * rwld -- word length, units -wpos rwld i = fromIntegral i * wsize rwld -- start position of the i'th word -wdist rwld x y = wsize rwld * fromIntegral (abs (x-y)) -- distance between words x and y -labelheight h = h/2 + 45 -- label just above arc; 25 would put it just below -labelstart c = c - 20.0 -- label starts 20u left of arc centre -arcbase = 40.0 -- arcs start and end 40u above the bottom - putArc :: Double -> Int -> Int -> String -> String putArc rwld x y label = unlines [oval,arrowhead,labelling] where oval = put ctr arcbase ("\\oval(" ++ show wdth ++ "," ++ show hght ++ ")[t]") arrowhead = put endp (arcbase + 5) (app "vector(0,-1)" "5") -- downgoing arrow 5u above the arc base - labelling = put (labelstart ctr) (labelheight hght) label + labelling = put (labelstart ctr) (labelheight hght) (small label) dxy = wdist rwld x y -- distance between words, >>= 20.0 hdxy = dxy / 2 -- half the distance - wdth = dxy - 3000/dxy -- longer arcs are less wide in proportion + wdth = dxy - (arcfactor rwld)/dxy -- longer arcs are less wide in proportion hght = hdxy / rwld -- arc height is independent of word length begp = min x y -- begin position of oval ctr = wpos rwld begp + hdxy + (if x < y then 20 else 10) -- LR arcs are farther right from center of oval endp = (if x < y then (+) else (-)) ctr (wdth/2) -- the point of the arrow +conll2dep :: String -> Dep +conll2dep str = Dep { + wordLength = wld + , tokens = toks + , deps = dps + , root = head $ [read x-1 | x:_:_:_:_:_:"0":_ <- ls] ++ [1] + , pictureSize = (round (wsize rwld * fromIntegral (length ls)), 60 + 25*maxdist) -- highest arc + 60u + } + where + wld = maximum [2 * fromIntegral (length w) | w <- map fst toks ++ map snd toks] + ls = map words (lines str) + toks = [(w,c) | _:w:_:c:_ <- ls] + dps = [((read y-1, read x-1),lab) | x:_:_:_:_:_:y:lab:_ <- ls, y /="0"] + rwld = wld / defaultWordLength + maxdist = maximum [abs (x-y) | ((x,y),_) <- dps] + +-- latex formatting + +app macro arg = "\\" ++ macro ++ "{" ++ arg ++ "}" +put x y obj = app ("put(" ++ show x ++ "," ++ show y ++ ")") obj +small w = "{\\tiny " ++ w ++ "}" +comment s = "%% " ++ s + latexDoc :: [String] -> String latexDoc body = unlines $ "\\documentclass{article}" @@ -543,32 +562,3 @@ latexDoc body = unlines $ : body ++ ["\\end{document}"] --- initialize with just the words, at optimal constant distances -string2dep :: String -> Dep -string2dep s = Dep { - wordLength = max defaultWordLength (maximum [2 * fromIntegral (length w) | w <- ws]) - , tokens = zip ws (repeat "WORD") - , deps = [] - , root = 0 - , pictureSize = (100*length ws, (100*length ws) `div` 2) - } - where ws = words s - -app macro arg = "\\" ++ macro ++ "{" ++ arg ++ "}" -put x y obj = app ("put(" ++ show x ++ "," ++ show y ++ ")") obj - -conll2dep :: String -> Dep -conll2dep str = dep0 { - tokens = toks - , deps = dps - , root = head $ [read x-1 | x:_:_:_:_:_:"0":_ <- ls] ++ [1] - , pictureSize = (rwld*100*length ls, (20 + 50*(maximum [abs (x-y) | ((x,y),_) <- dps]))) - } - where - dep0 = string2dep sentence - ls = map words (lines str) - sentence = unwords (map fst toks) - toks = [(w,c) | _:w:_:c:_ <- ls] - dps = [((read y-1, read x-1),lab) | x:_:_:_:_:_:y:lab:_ <- ls, y /="0"] - rwld = round (wordLength dep0 / defaultWordLength) -