From e32e9147034b564b2de1b0a54be714c98a0daf9b Mon Sep 17 00:00:00 2001 From: aarne Date: Tue, 16 Dec 2008 13:56:23 +0000 Subject: [PATCH] datatype for bracketed texts, and improved word alignment --- src/PGF/Linearize.hs | 3 +- src/PGF/VisualizeTree.hs | 69 +++++++++++++++++++++++++--------------- 2 files changed, 44 insertions(+), 28 deletions(-) diff --git a/src/PGF/Linearize.hs b/src/PGF/Linearize.hs index 36f639053..c15bbd105 100644 --- a/src/PGF/Linearize.hs +++ b/src/PGF/Linearize.hs @@ -146,11 +146,10 @@ linTreeMark pgf lang = lin [] R ts -> R $ map (mark p) ts FV ts -> R $ map (mark p) ts S ts -> S $ bracket p ts - K s -> S $ bracketw p [t] + K s -> S $ bracket p [t] W s (R ts) -> R [mark p $ kks (s ++ u) | K (KS u) <- ts] _ -> t -- otherwise in normal form bracket p ts = [kks ("("++show p)] ++ ts ++ [kks ")"] - bracketw p ts = [kks ("{"++show p)] ++ ts ++ [kks "}"] -- for easy word alignment sub p i = p ++ [i] diff --git a/src/PGF/VisualizeTree.hs b/src/PGF/VisualizeTree.hs index 401b30b96..943e79efb 100644 --- a/src/PGF/VisualizeTree.hs +++ b/src/PGF/VisualizeTree.hs @@ -16,6 +16,7 @@ ----------------------------------------------------------------------------- module PGF.VisualizeTree ( visualizeTrees, alignLinearize + ,PosText(..),readPosText ) where import PGF.CId (prCId) @@ -23,7 +24,9 @@ import PGF.Data import PGF.Linearize import PGF.Macros (lookValCat) -import Data.List (intersperse) +import Data.List (intersperse,nub) +import Data.Char (isDigit) +import qualified Text.ParserCombinators.ReadP as RP visualizeTrees :: PGF -> (Bool,Bool) -> [Tree] -> String visualizeTrees pgf funscats = unlines . map (prGraph False . tree2graph pgf funscats) @@ -65,31 +68,12 @@ lin2graph ss = prelude ++ nodes ++ links prelude = ["rankdir=LR ;", "node [shape = record] ;"] - -- the plain string, with syncategorematic words included - strings = filter (flip notElem "{[()]}" . head) . words - - -- find all lexicalized words - lins :: String -> [(String,String)] - lins [] = [] - lins s = let (s1, s2) = if null s then ([],[]) else span (/='{') s in - let (s21,s22) = if null s2 then ([],[]) else span (/='}') (tail s2) in - if null s21 then lins s22 else wlink s21 : lins s22 - - -- separate a word to the link (1,2,3) and the word itself - wlink :: String -> (String,String) - wlink s = let (s1, s2) = span (/=']') s in - (tail s1, unwords (words (init (drop 1 s2)))) - - -- to merge in syncat words - slins i s = merge (strings s) (lins s) where - merge ws cs = case (ws,cs) of - (w:ws2,(m,c):cs2) | w==c -> (m,c) : merge ws2 cs2 - (w:ws2,_ ) -> ("w" ++ show i,w) : merge ws2 cs - _ -> [] - - -- make all marks unique to deal with discontinuities nlins :: [(Int,[((Int,String),String)])] - nlins = [(i, [((j,m),w) | (j,(m,w)) <- zip [0..] (slins i s)]) | (i,s) <- zip [0..] ss] + nlins = [(i, [((0,showp p),unw ws) | (p,ws) <- ws]) | + (i,ws) <- zip [0..] (map (wlins . readPosText) ss)] + + unw = concat . intersperse "\\ " -- space escape in graphviz + showp = init . tail . show nodes = map mkStruct nlins @@ -105,7 +89,7 @@ lin2graph ss = prelude ++ nodes ++ links tag s = "<" ++ s ++ ">" - links = concatMap mkEdge (init nlins) + links = nub $ concatMap mkEdge (init nlins) mkEdge (i,lin) = let lin' = snd (nlins !! (i+1)) in -- next lin in the list [edge i v w | (v@(_,p),_) <- lin, (w@(_,q),_) <- lin', p == q] @@ -113,6 +97,39 @@ lin2graph ss = prelude ++ nodes ++ links edge i v w = struct i ++ ":" ++ mark v ++ ":e -> " ++ struct (i+1) ++ ":" ++ mark w ++ ":w ;" +wlins :: PosText -> [([Int],[String])] +wlins pt = case pt of + T p pts -> concatMap (lins p) pts + M ws -> if null ws then [] else [([],ws)] + where + lins p pt = case pt of + T q pts -> concatMap (lins q) pts + M ws -> if null ws then [] else [(p,ws)] + +data PosText = + T [Int] [PosText] + | M [String] + deriving Show + +readPosText :: String -> PosText +readPosText = fst . head . (RP.readP_to_S pPosText) where + pPosText = do + RP.char '(' >> RP.skipSpaces + p <- pPos + RP.skipSpaces + ts <- RP.many pPosText + RP.char ')' >> RP.skipSpaces + return (T p ts) + RP.<++ do + ws <- RP.sepBy1 (RP.munch1 (flip notElem "()")) (RP.char ' ') + return (M ws) + pPos = do + RP.char '[' >> RP.skipSpaces + is <- RP.sepBy (RP.munch1 isDigit) (RP.char ',') + RP.char ']' >> RP.skipSpaces + return (map read is) + + {- digraph{ rankdir ="LR" ;