From cd69929b4d64a2435250c3cee30bc7d664dc1f9a Mon Sep 17 00:00:00 2001 From: "ramona.enache" Date: Sun, 31 Oct 2010 13:39:01 +0000 Subject: [PATCH] added giza Alignments with command ga and merged the rendering algorithm for graphviz and giza alignments --- src/compiler/GF/Command/Commands.hs | 22 ++++ src/runtime/haskell/PGF.hs | 3 +- src/runtime/haskell/PGF/VisualizeTree.hs | 155 ++++++++++++++++------- 3 files changed, 136 insertions(+), 44 deletions(-) diff --git a/src/compiler/GF/Command/Commands.hs b/src/compiler/GF/Command/Commands.hs index b10d35ec7..7989078c2 100644 --- a/src/compiler/GF/Command/Commands.hs +++ b/src/compiler/GF/Command/Commands.hs @@ -172,6 +172,28 @@ allCommands env@(pgf, mos) = Map.fromList [ ("view","program to open the resulting file (default \"open\")") ] }), + ("ga", emptyCommandInfo { + longname = "giza_alignment", + synopsis = "show the giza alignment between 2 languages", + explanation = unlines [ + "Prints a set of alignments in the .txt format.", + "The graph can be saved in a file by the wf command as usual." + ], + exec = \opts es -> do + let giz = map (gizaAlignment pgf (head $ languages pgf, head $ tail $ languages pgf)) es + let lsrc = unlines $ map (\(x,_,_) -> x) giz + let ltrg = unlines $ map (\(_,x,_) -> x) giz + let align = unlines $ map (\(_,_,x) -> x) giz + let grph = if null es then [] else lsrc ++ "\n--end_source--\n\n"++ltrg++"\n-end_target--\n\n"++align + return $ fromString grph, + examples = [ + "gr | ga -- generate a tree and show giza alignments" + ], + options = [ + ], + flags = [ + ] + }), ("cc", emptyCommandInfo { longname = "compute_concrete", diff --git a/src/runtime/haskell/PGF.hs b/src/runtime/haskell/PGF.hs index 9165f01ef..42ef8aaff 100644 --- a/src/runtime/haskell/PGF.hs +++ b/src/runtime/haskell/PGF.hs @@ -116,7 +116,8 @@ module PGF( graphvizDependencyTree, graphvizBracketedString, graphvizAlignment, - + gizaAlignment, + -- * Probabilities Probabilities, mkProbabilities, diff --git a/src/runtime/haskell/PGF/VisualizeTree.hs b/src/runtime/haskell/PGF/VisualizeTree.hs index 916681d93..c054e1e78 100644 --- a/src/runtime/haskell/PGF/VisualizeTree.hs +++ b/src/runtime/haskell/PGF/VisualizeTree.hs @@ -21,6 +21,7 @@ module PGF.VisualizeTree , graphvizDependencyTree , graphvizBracketedString , graphvizAlignment + , gizaAlignment , getDepLabels ) where @@ -211,55 +212,122 @@ graphvizBracketedString = render . lin2tree fields cs = hsep (intersperse (char '|') [tbrackets (tag id) <> text c | (_,_,id,c) <- cs]) -graphvizAlignment :: PGF -> [Language] -> Expr -> String -graphvizAlignment pgf langs = render . lin2graph . linsBracketed - where - linsBracketed t = [bracketedLinearize pgf lang t | lang <- langs] +type Rel = (Int,[Int]) +-- possibly needs changes after clearing about many-to-many on this level - lin2graph :: [BracketedString] -> Doc - lin2graph bss = - text "digraph {" $$ +type IndexedSeq = (Int,[String]) +type LangSeq = [IndexedSeq] + +data PreAlign = PreAlign [LangSeq] [[Rel]] + deriving Show +-- alignment structure for a phrase in 2 languages, along with the +-- many-to-many relations + + +genPreAlignment :: PGF -> [Language] -> Expr -> PreAlign +genPreAlignment pgf langs = lin2align . linsBracketed + where + linsBracketed t = [bracketedLinearize pgf lang t | lang <- langs] + + lin2align :: [BracketedString] -> PreAlign + lin2align bss = PreAlign langSeqs langRels + where + (langSeqs,langRels) = mkLayers leaves + nil = -1 + leaves = map (groupAndIndexIt 0 . getLeaves nil) bss + + groupAndIndexIt id [] = [] + groupAndIndexIt id ((p,w):pws) = let (ws,pws1) = collect pws + in (p,id,unwords (w:ws)) : groupAndIndexIt (id+1) pws1 + where + collect pws@((p1,w):pws1) + | p == p1 = let (ws,pws2) = collect pws1 + in (w:ws,pws2) + collect pws = ([],pws) + + getLeaves parent bs = + case bs of + Leaf w -> [(parent,w)] + Bracket _ fid _ _ bss -> concatMap (getLeaves fid) bss + + mkLayers (cs:css:rest) = let (lrest, rrest) = mkLayers (css:rest) + in ((fields cs) : lrest, (map (mkLinks css) cs) : rrest) + mkLayers [cs] = ([fields cs], []) + mkLayers _ = ([],[]) + + mkLinks cs (p0,id0,_) = (id0,indices) + where + indices = [id1 | (p1,id1,_) <- cs, p1 == p0] + + fields cs = [(id, [w]) | (_,id,w) <- cs] + + +-- we assume we have 2 languages - source and target +gizaAlignment :: PGF -> (Language,Language) -> Expr -> (String,String,String) +gizaAlignment pgf (l1,l2) e = let PreAlign [rl1,rl2] rels = genPreAlignment pgf [l1,l2] e + in + (unwords (map showIndSeq rl1), unwords (concat $ map snd rl2), + unwords $ words $ showRels rl2 (concat rels)) + + +showIndSeq (_,l) = let ww = map words l + w_ = map (intersperse "_") ww + in + concat $ concat w_ + +showRels inds2 [] = [] +showRels inds2 ((ind,is):rest) = + let lOffs = computeOffset inds2 0 + ltemp = [(i,getOffsetIndex i lOffs) | i <- is] + lcurr = concat $ map (\(offset,ncomp) -> [show ind ++ "-" ++ show (-1 + offset + ii) ++ " "| ii <- [1..ncomp]]) (map snd ltemp) + lrest = showRels inds2 rest + in + (unwords lcurr) ++ lrest + + + + + + + +getOffsetIndex i lst = let ll = filter (\(x,_) -> x == i) lst + in + snd $ head ll + +computeOffset [] transp = [] +computeOffset ((i,l):rest) transp = let nw = (length $ words $ concat l) + in (i,(transp,nw)) : (computeOffset rest (transp + nw)) + + + +-- alignment in the Graphviz format from the intermediate structure +-- same effect as the old direct function +graphvizAlignment :: PGF -> [Language] -> Expr -> String +graphvizAlignment pgf langs exp = + render (text "digraph {" $$ space $$ nest 2 (text "rankdir=LR ;" $$ text "node [shape = record] ;" $$ space $$ - mkLayers 0 leaves) $$ - text "}" - where - nil = -1 - - leaves = map (groupAndIndexIt 0 . getLeaves nil) bss - - groupAndIndexIt id [] = [] - groupAndIndexIt id ((p,w):pws) = let (ws,pws1) = collect pws - in (p,id,unwords (w:ws)) : groupAndIndexIt (id+1) pws1 - where - collect pws@((p1,w):pws1) - | p == p1 = let (ws,pws2) = collect pws1 - in (w:ws,pws2) - collect pws = ([],pws) - - getLeaves parent bs = - case bs of - Leaf w -> [(parent,w)] - Bracket _ fid _ _ bss -> concatMap (getLeaves fid) bss - - mkLayers l [] = empty - mkLayers l (cs:css) = struct l <> text "[label = \"" <> fields cs <> text "\"] ;" $$ - (case css of - (ncs:_) -> vcat (map (mkLinks l ncs) cs) - _ -> empty) $$ - mkLayers (l+1) css - - mkLinks l cs (p0,id0,_) = - vcat (map (\id1 -> struct l <> colon <> tag id0 <> colon <> char 'e' <+> - text "->" <+> - struct (l+1) <> colon <> tag id1 <> colon <> char 'w' <+> semi) indices) - where - indices = [id1 | (p1,id1,_) <- cs, p1 == p0] - - fields cs = hsep (intersperse (char '|') [tbrackets (tag id) <> text w | (_,id,w) <- cs]) + renderList 0 lrels rrels) $$ + text "}") + where + (PreAlign lrels rrels) = genPreAlignment pgf langs exp + + + renderList ii (l:ls) (r:rs) = struct ii <> text "[label = \"" <> fields l <> text "\"] ;" $$ + (case ls of + [] -> empty + _ -> vcat [struct ii <> colon <> tag id0 + <> colon <> char 'e' <+> text "->" <+> struct (ii+1) + <> colon <> tag id1 <> colon <> char 'w' <+> semi + | (id0,ids) <- r, id1 <- ids] $$ renderList (ii + 1) ls rs) + renderList ii [] _ = empty + renderList ii [l] [] = struct ii <> text "[label = \"" <> fields l <> text "\"] ;" + + fields cs = hsep (intersperse (char '|') [tbrackets (tag id) <> text w | (id,ws) <- cs, w <- ws]) + -- auxiliaries for graphviz syntax struct l = text ("struct" ++ show l) @@ -269,6 +337,7 @@ tag i | otherwise = char 'n' <> int i + -------------------------------------------------------------------- -- The linearization code bellow is needed just in order to -- produce the dependency tree. Unfortunately the bracketed string