From 10675e29cac9149c18fbf61903e52468426a0ea1 Mon Sep 17 00:00:00 2001 From: aarne Date: Mon, 15 Dec 2008 10:33:53 +0000 Subject: [PATCH] visualization of word alignment based on bracketing (command aw); does not work for syncategorematic words yet --- src/GF/Command/Commands.hs | 35 ++++++++++++++++++ src/PGF/Linearize.hs | 5 ++- src/PGF/VisualizeTree.hs | 75 +++++++++++++++++++++++++++++++++++++- 3 files changed, 112 insertions(+), 3 deletions(-) diff --git a/src/GF/Command/Commands.hs b/src/GF/Command/Commands.hs index a78fa0fac..5674f1107 100644 --- a/src/GF/Command/Commands.hs +++ b/src/GF/Command/Commands.hs @@ -104,6 +104,41 @@ allCommands cod env@(pgf, mos) = Map.fromList [ "gt | l | ? wc -- generate, linearize, word-count" ] }), + + ("aw", emptyCommandInfo { + longname = "align_words", + synopsis = "show word alignments between languages graphically", + explanation = unlines [ + "Prints a set of strings in the .dot format (the graphviz format).", + "The graph can be saved in a file by the wf command as usual.", + "If the -view flag is defined, the graph is saved in a temporary file", + "which is processed by graphviz and displayed by the program indicated", + "by the flag. The target format is postscript, unless overridden by the", + "flag -format." + ], + exec = \opts ts -> do + let grph = if null ts then [] else alignLinearize pgf (head ts) + if isFlag "view" opts || isFlag "format" opts then do + let file s = "_grph." ++ s + let view = optViewGraph opts ++ " " + let format = optViewFormat opts + writeFile (file "dot") (enc grph) + system $ "dot -T" ++ format ++ " " ++ file "dot" ++ " > " ++ file format ++ + " ; " ++ view ++ file format + return void + else return $ fromString grph, + examples = [ + "gr | aw -- generate a tree and show word alignment as graph script", + "gr | vt -view=\"open\" -- generate a tree and display alignment on a Mac" + ], + options = [ + ], + flags = [ + ("format","format of the visualization file (default \"ps\")"), + ("view","program to open the resulting file (default \"gv\")") + ] + }), + ("cc", emptyCommandInfo { longname = "compute_concrete", syntax = "cc (-all | -table | -unqual)? TERM", diff --git a/src/PGF/Linearize.hs b/src/PGF/Linearize.hs index 3b0c42597..36f639053 100644 --- a/src/PGF/Linearize.hs +++ b/src/PGF/Linearize.hs @@ -146,10 +146,11 @@ linTreeMark pgf lang = lin [] R ts -> R $ map (mark p) ts FV ts -> R $ map (mark p) ts S ts -> S $ bracket p ts - K s -> S $ bracket p [t] + K s -> S $ bracketw p [t] W s (R ts) -> R [mark p $ kks (s ++ u) | K (KS u) <- ts] _ -> t -- otherwise in normal form - bracket p ts = [kks ("["++show p)] ++ ts ++ [kks "]"] + bracket p ts = [kks ("("++show p)] ++ ts ++ [kks ")"] + bracketw p ts = [kks ("{"++show p)] ++ ts ++ [kks "}"] -- for easy word alignment sub p i = p ++ [i] diff --git a/src/PGF/VisualizeTree.hs b/src/PGF/VisualizeTree.hs index 0219dcbde..a15e380d6 100644 --- a/src/PGF/VisualizeTree.hs +++ b/src/PGF/VisualizeTree.hs @@ -15,13 +15,16 @@ -- instead of rolling its own. ----------------------------------------------------------------------------- -module PGF.VisualizeTree ( visualizeTrees +module PGF.VisualizeTree ( visualizeTrees, alignLinearize ) where import PGF.CId (prCId) import PGF.Data +import PGF.Linearize import PGF.Macros (lookValCat) +import Data.List (intersperse) + visualizeTrees :: PGF -> (Bool,Bool) -> [Tree] -> String visualizeTrees pgf funscats = unlines . map (prGraph False . tree2graph pgf funscats) @@ -46,3 +49,73 @@ tree2graph pgf (funs,cats) = prf [] where prGraph digr ns = concat $ map (++"\n") $ [graph ++ "{\n"] ++ ns ++ ["}"] where graph = if digr then "digraph" else "graph" + + +-- word alignments from Linearize.linearizesMark +-- words are chunks like {[0,1,1,0] old} + +alignLinearize :: PGF -> Tree -> String +alignLinearize pgf = prGraph True . lin2graph . linsMark where + linsMark t = [s | la <- cncnames pgf, s <- take 1 (linearizesMark pgf la t)] + +lin2graph :: [String] -> [String] +lin2graph ss = prelude ++ nodes ++ links + + where + + prelude = ["rankdir=LR ;", "node [shape = record] ;"] + + -- find all words + lins :: String -> [(String,String)] + lins [] = [] + lins s = let (s1, s2) = if null s then ([],[]) else span (/='{') s in + let (s21,s22) = if null s2 then ([],[]) else span (/='}') (tail s2) in + if null s21 then lins s22 else wlink s21 : lins s22 + + -- separate a word to the link (1,2,3) and the word itself + wlink :: String -> (String,String) + wlink s = let (s1, s2) = span (/=']') s in + (tail s1, init (drop 1 s2)) + + -- make all marks unique to deal with discontinuities + nlins :: [(Int,[((Int,String),String)])] + nlins = [(i, [((j,m),w) | (j,(m,w)) <- zip [0..] (lins s)]) | (i,s) <- zip [0..] ss] + + nodes = map mkStruct nlins + + mkStruct (i, ws) = struct i ++ "[label = \"" ++ fields ws ++ "\"] ;" + + fields ws = concat (intersperse "|" [tag (mark m) ++ " " ++ w | (m,w) <- ws]) + + struct i = "struct" ++ show i + + mark (j,n) = "n" ++ show j ++ "a" ++ uncommas n + + uncommas = map (\c -> if c==',' then 'c' else c) + + tag s = "<" ++ s ++ ">" + + links = concatMap mkEdge (init nlins) + + mkEdge (i,lin) = let lin' = snd (nlins !! (i+1)) in -- next lin in the list + [edge i v w | (v@(_,p),_) <- lin, (w@(_,q),_) <- lin', p == q] + + edge i v w = + struct i ++ ":" ++ mark v ++ " -> " ++ struct (i+1) ++ ":" ++ mark w ++ " ;" + +{- +digraph{ +rankdir ="LR" ; +node [shape = record] ; + +struct1 [label = " this| very| intelligent| man"] ; +struct2 [label = " cet| homme| tres| intelligent| ci"] ; + +struct1:f0 -> struct2:f0 ; +struct1:f1 -> struct2:f2 ; +struct1:f2 -> struct2:f3 ; +struct1:f3 -> struct2:f1 ; +struct1:f0 -> struct2:f4 ; +} +-} +