vd -conll2latex now converts conll to latex. Without going through GF trees, but as a service to the dependency parser community.

This commit is contained in:
aarne
2015-11-23 10:43:03 +00:00
parent 1d80db4919
commit 0a38e137b6
3 changed files with 28 additions and 17 deletions

View File

@@ -551,25 +551,29 @@ pgfCommands = Map.fromList [
_ -> (Just . getDepLabels . lines) `fmap` restricted (readFile file) _ -> (Just . getDepLabels . lines) `fmap` restricted (readFile file)
let lang = optLang pgf opts let lang = optLang pgf opts
let grphs = map (graphvizDependencyTree outp debug mlab Nothing pgf lang) es let grphs = map (graphvizDependencyTree outp debug mlab Nothing pgf lang) es
if isFlag "view" opts && valStrOpts "output" "" opts == "latex" if isOpt "conll2latex" opts
then do then return $ fromString $ conlls2latexDoc $ stanzas $ unlines $ toStrings es
let view = optViewGraph opts else if isFlag "view" opts && valStrOpts "output" "" opts == "latex"
viewLatex view "_grphd_" grphs
else if isFlag "view" opts || isFlag "format" opts
then do then do
let view = optViewGraph opts let view = optViewGraph opts
let format = optViewFormat opts viewLatex view "_grphd_" grphs
viewGraphviz view format "_grphd_" grphs else if isFlag "view" opts || isFlag "format" opts
else return $ fromString $ unlines grphs, then do
let view = optViewGraph opts
let format = optViewFormat opts
viewGraphviz view format "_grphd_" grphs
else return $ fromString $ unlines $ intersperse "" grphs,
examples = [ examples = [
mkEx "gr | vd -- generate a tree and show dependency tree in .dot", mkEx "gr | vd -- generate a tree and show dependency tree in .dot",
mkEx "gr | vd -view=open -- generate a tree and display dependency tree on a Mac", mkEx "gr | vd -view=open -- generate a tree and display dependency tree on a Mac",
mkEx "gr | vd -view=open -output=latex -- generate a tree and display latex dependency tree on a Mac", mkEx "gr | vd -view=open -output=latex -- generate a tree and display latex dependency tree on a Mac",
mkEx "gr -number=1000 | vd -file=dep.labels -output=conll -- generate training treebank", mkEx "gr -number=1000 | vd -file=dep.labels -output=conll -- generate training treebank",
mkEx "gr -number=100 | vd -file=dep.labels -output=malt_input -- generate test sentences" mkEx "gr -number=100 | vd -file=dep.labels -output=malt_input -- generate test sentences",
mkEx "rf -file=ex.conll | vd -conll2latex | wf -file=ex.tex -- convert conll file to latex"
], ],
options = [ options = [
("v","show extra information") ("v","show extra information"),
("conll2latex", "convert conll to latex")
], ],
flags = [ flags = [
("file","configuration file for labels, format per line 'fun label*'"), ("file","configuration file for labels, format per line 'fun label*'"),
@@ -990,3 +994,8 @@ latexDoc body = unlines $
spaces = intersperse "\\vspace{6mm}" spaces = intersperse "\\vspace{6mm}"
---- also reduce the size for long sentences ---- also reduce the size for long sentences
stanzas :: String -> [String]
stanzas = map unlines . chop . lines where
chop ls = case break (=="") ls of
(ls1,[]) -> [ls1]
(ls1,_:ls2) -> ls1 : chop ls2

View File

@@ -129,6 +129,7 @@ module PGF(
gizaAlignment, gizaAlignment,
GraphvizOptions(..), GraphvizOptions(..),
graphvizDefaults, graphvizDefaults,
conlls2latexDoc,
-- extra: -- extra:
getDepLabels, getDepLabels,

View File

@@ -20,6 +20,7 @@ module PGF.VisualizeTree
, graphvizAlignment , graphvizAlignment
, gizaAlignment , gizaAlignment
, getDepLabels , getDepLabels
, conlls2latexDoc
) where ) where
import PGF.CId (wildCId,showCId,ppCId,mkCId) --CId,pCId, import PGF.CId (wildCId,showCId,ppCId,mkCId) --CId,pCId,
@@ -476,7 +477,7 @@ tag i
-- visualization with latex output. AR Nov 2015 -- visualization with latex output. AR Nov 2015
conlls2latexDoc :: [String] -> String conlls2latexDoc :: [String] -> String
conlls2latexDoc = latexDoc . intersperse "\n\\vspace{4mm}\n" . map conll2latex conlls2latexDoc = latexDoc . intersperse "\n\\vspace{4mm}\n" . map conll2latex . filter (not . null)
conll2latex :: String -> String conll2latex :: String -> String
conll2latex = unlines . dep2latex . conll2dep conll2latex = unlines . dep2latex . conll2dep
@@ -496,11 +497,11 @@ defaultUnit = 0.2 -- unit in latex pictures, 0.2 millimetres
wsize rwld = 100 * rwld -- word length, units wsize rwld = 100 * rwld -- word length, units
wpos rwld i = fromIntegral i * wsize rwld -- start position of the i'th word wpos rwld i = fromIntegral i * wsize rwld -- start position of the i'th word
wdist rwld x y = wsize rwld * fromIntegral (abs (x-y)) -- distance between words x and y wdist rwld x y = wsize rwld * fromIntegral (abs (x-y)) -- distance between words x and y
labelheight h = h/2 + arcbase + 5 -- label just above arc; 25 would put it just below labelheight h = h + arcbase + 3 -- label just above arc; 25 would put it just below
labelstart c = c - 15.0 -- label starts 15u left of arc centre labelstart c = c - 15.0 -- label starts 15u left of arc centre
arcbase = 30.0 -- arcs start and end 40u above the bottom arcbase = 30.0 -- arcs start and end 40u above the bottom
arcfactor r = r * 1500 -- reduction of arc size from word distance arcfactor r = r * 1500 -- reduction of arc size from word distance
xyratio = 3 -- width/height ratio of arcs
dep2latex :: Dep -> [String] dep2latex :: Dep -> [String]
dep2latex d = dep2latex d =
@@ -522,11 +523,11 @@ putArc :: Double -> Int -> Int -> String -> String
putArc rwld x y label = unlines [oval,arrowhead,labelling] where putArc rwld x y label = unlines [oval,arrowhead,labelling] where
oval = put ctr arcbase ("\\oval(" ++ show wdth ++ "," ++ show hght ++ ")[t]") oval = put ctr arcbase ("\\oval(" ++ show wdth ++ "," ++ show hght ++ ")[t]")
arrowhead = put endp (arcbase + 5) (app "vector(0,-1)" "5") -- downgoing arrow 5u above the arc base arrowhead = put endp (arcbase + 5) (app "vector(0,-1)" "5") -- downgoing arrow 5u above the arc base
labelling = put (labelstart ctr) (labelheight hght) (small label) labelling = put (labelstart ctr) (labelheight (hght/2)) (small label)
dxy = wdist rwld x y -- distance between words, >>= 20.0 dxy = wdist rwld x y -- distance between words, >>= 20.0
hdxy = dxy / 2 -- half the distance hdxy = dxy / 2 -- half the distance
wdth = dxy - (arcfactor rwld)/dxy -- longer arcs are less wide in proportion wdth = dxy - (arcfactor rwld)/dxy -- longer arcs are less wide in proportion
hght = hdxy / rwld -- arc height is independent of word length hght = dxy / (xyratio * rwld) -- arc height is independent of word length
begp = min x y -- begin position of oval begp = min x y -- begin position of oval
ctr = wpos rwld begp + hdxy + (if x < y then 20 else 10) -- LR arcs are farther right from center of oval ctr = wpos rwld begp + hdxy + (if x < y then 20 else 10) -- LR arcs are farther right from center of oval
endp = (if x < y then (+) else (-)) ctr (wdth/2) -- the point of the arrow endp = (if x < y then (+) else (-)) ctr (wdth/2) -- the point of the arrow
@@ -537,7 +538,7 @@ conll2dep str = Dep {
, tokens = toks , tokens = toks
, deps = dps , deps = dps
, root = head $ [read x-1 | x:_:_:_:_:_:"0":_ <- ls] ++ [1] , root = head $ [read x-1 | x:_:_:_:_:_:"0":_ <- ls] ++ [1]
, pictureSize = (round (wsize rwld * fromIntegral (length ls)), 60 + 25*maxdist) -- highest arc + 60u , pictureSize = (round (wsize rwld * fromIntegral (length ls)), 60 + 16*maxdist) -- highest arc + 60u
} }
where where
wld = maximum [2 * fromIntegral (length w) | w <- map fst toks ++ map snd toks] wld = maximum [2 * fromIntegral (length w) | w <- map fst toks ++ map snd toks]