From e32e9147034b564b2de1b0a54be714c98a0daf9b Mon Sep 17 00:00:00 2001
From: aarne <aarne@cs.chalmers.se>
Date: Tue, 16 Dec 2008 13:56:23 +0000
Subject: [PATCH] datatype for bracketed texts, and improved word alignment

---
 src/PGF/Linearize.hs     |  3 +-
 src/PGF/VisualizeTree.hs | 69 +++++++++++++++++++++++++---------------
 2 files changed, 44 insertions(+), 28 deletions(-)

diff --git a/src/PGF/Linearize.hs b/src/PGF/Linearize.hs
index 36f639053..c15bbd105 100644
--- a/src/PGF/Linearize.hs
+++ b/src/PGF/Linearize.hs
@@ -146,11 +146,10 @@ linTreeMark pgf lang = lin []
       R  ts -> R $ map (mark p) ts
       FV ts -> R $ map (mark p) ts
       S  ts -> S $ bracket p ts
-      K  s  -> S $ bracketw p [t]
+      K  s  -> S $ bracket p [t]
       W s (R ts) -> R [mark p $ kks (s ++ u) | K (KS u) <- ts]
       _     -> t
       -- otherwise in normal form
 
     bracket  p ts = [kks ("("++show p)] ++ ts ++ [kks ")"]
-    bracketw p ts = [kks ("{"++show p)] ++ ts ++ [kks "}"] -- for easy word alignment
     sub p i = p ++ [i]
diff --git a/src/PGF/VisualizeTree.hs b/src/PGF/VisualizeTree.hs
index 401b30b96..943e79efb 100644
--- a/src/PGF/VisualizeTree.hs
+++ b/src/PGF/VisualizeTree.hs
@@ -16,6 +16,7 @@
 -----------------------------------------------------------------------------
 
 module PGF.VisualizeTree ( visualizeTrees, alignLinearize
+  ,PosText(..),readPosText
 			) where
 
 import PGF.CId (prCId)
@@ -23,7 +24,9 @@ import PGF.Data
 import PGF.Linearize
 import PGF.Macros (lookValCat)
 
-import Data.List (intersperse)
+import Data.List (intersperse,nub)
+import Data.Char (isDigit)
+import qualified Text.ParserCombinators.ReadP as RP
 
 visualizeTrees :: PGF -> (Bool,Bool) -> [Tree] -> String
 visualizeTrees pgf funscats = unlines . map (prGraph False . tree2graph pgf funscats)
@@ -65,31 +68,12 @@ lin2graph ss = prelude ++ nodes ++ links
 
   prelude = ["rankdir=LR ;", "node [shape = record] ;"]
 
-  -- the plain string, with syncategorematic words included
-  strings = filter (flip notElem "{[()]}" . head) . words
-
-  -- find all lexicalized words
-  lins :: String -> [(String,String)]
-  lins [] = []
-  lins s = let (s1, s2)  = if null s  then ([],[]) else span (/='{') s in 
-           let (s21,s22) = if null s2 then ([],[]) else span (/='}') (tail s2) in
-           if null s21 then lins s22 else wlink s21 : lins s22
-
-  -- separate a word to the link (1,2,3) and the word itself
-  wlink :: String -> (String,String)
-  wlink s = let (s1, s2)  = span (/=']') s in 
-            (tail s1, unwords (words (init (drop 1 s2))))
-
-  -- to merge in syncat words
-  slins i s = merge (strings s) (lins s) where
-    merge ws cs = case (ws,cs) of
-      (w:ws2,(m,c):cs2) | w==c -> (m,c) : merge ws2 cs2
-      (w:ws2,_        )        -> ("w" ++ show i,w) : merge ws2 cs
-      _                        -> []
-
-  -- make all marks unique to deal with discontinuities
   nlins :: [(Int,[((Int,String),String)])]
-  nlins = [(i, [((j,m),w) | (j,(m,w)) <- zip [0..] (slins i s)]) | (i,s) <- zip [0..] ss]
+  nlins = [(i, [((0,showp p),unw ws) | (p,ws) <- ws]) | 
+                                  (i,ws) <- zip [0..] (map (wlins . readPosText) ss)]
+
+  unw = concat . intersperse "\\ "  -- space escape in graphviz
+  showp = init . tail . show
 
   nodes = map mkStruct nlins
 
@@ -105,7 +89,7 @@ lin2graph ss = prelude ++ nodes ++ links
 
   tag s = "<" ++ s ++ ">"
 
-  links = concatMap mkEdge (init nlins)
+  links = nub $ concatMap mkEdge (init nlins)
 
   mkEdge (i,lin) = let lin' = snd (nlins !! (i+1)) in -- next lin in the list
     [edge i v w | (v@(_,p),_) <- lin, (w@(_,q),_) <- lin', p == q]
@@ -113,6 +97,39 @@ lin2graph ss = prelude ++ nodes ++ links
   edge i v w = 
     struct i ++ ":" ++ mark v ++ ":e -> " ++ struct (i+1) ++ ":" ++ mark w ++ ":w ;"
 
+wlins :: PosText -> [([Int],[String])]
+wlins pt = case pt of
+  T p pts -> concatMap (lins p) pts
+  M ws -> if null ws then [] else [([],ws)]
+ where
+  lins p pt = case pt of
+    T q pts -> concatMap (lins q) pts
+    M ws -> if null ws then [] else [(p,ws)]
+
+data PosText = 
+   T [Int] [PosText]
+ | M [String]
+  deriving Show
+
+readPosText :: String -> PosText
+readPosText = fst . head . (RP.readP_to_S pPosText) where
+  pPosText = do
+    RP.char '(' >> RP.skipSpaces
+    p  <- pPos  
+    RP.skipSpaces
+    ts <- RP.many pPosText
+    RP.char ')' >> RP.skipSpaces
+    return (T p ts)
+   RP.<++ do
+    ws <- RP.sepBy1 (RP.munch1 (flip notElem "()")) (RP.char ' ') 
+    return (M ws) 
+  pPos = do
+    RP.char '[' >> RP.skipSpaces
+    is <- RP.sepBy (RP.munch1 isDigit) (RP.char ',')
+    RP.char ']' >> RP.skipSpaces
+    return (map read is)
+
+
 {-
 digraph{
 rankdir ="LR" ;