1
0
forked from GitHub/gf-core

allow comments in dependency configs and fix conll output

This commit is contained in:
Prasanth Kolachina
2018-12-17 12:02:13 +01:00
parent eb22112178
commit 61c1510620

View File

@@ -236,10 +236,18 @@ graphvizDependencyTree format debug mlab mclab pgf lang t =
root_lbl = "ROOT"
unspec = text "_"
-- auxiliaries for UD conversion PK 15/12/2018
rmcomments :: String -> String
rmcomments [] = []
rmcomments ('-':'-':xs) = []
rmcomments ('-':x :xs) = '-':rmcomments (x:xs)
rmcomments (x:xs) = x:rmcomments xs
-- | Prepare lines obtained from a configuration file for labels for
-- use with 'graphvizDependencyTree'. Format per line /fun/ /label/@*@.
getDepLabels :: String -> Labels
getDepLabels s = Map.fromList [(mkCId f,ls) | f:ls <- map words (lines s)]
-- getDepLabels s = Map.fromList [(mkCId f,ls) | f:ls <- map words (lines s)]
getDepLabels s = Map.fromList [(mkCId f,ls) | f:ls <- map (words . rmcomments) (lines s)]
-- the old function, without dependencies
graphvizParseTree :: PGF -> Language -> GraphvizOptions -> Tree -> String
@@ -784,6 +792,7 @@ getCncDepLabels =
sortBy (comparing fst) .
concatMap analyse .
filter choose .
map rmcomments .
lines
where
--- choose is for compatibility with the general notation
@@ -806,6 +815,7 @@ getCncDepLabels =
toks s = case lex s of [(t,"")] -> [t] ; [(t,cc)] -> t:toks cc ; _ -> []
unquote s = case s of '"':cc@(_:_) | last cc == '"' -> init cc ; _ -> s
-- added init to remove the last \n. otherwise, two empty lines are in between each sentence PK 17/12/2018
printCoNLL :: CoNLL -> String
printCoNLL = unlines . map (concat . intersperse "\t")
printCoNLL = init . unlines . map (concat . intersperse "\t")