forked from GitHub/gf-core
added giza Alignments with command ga and merged the rendering algorithm for graphviz and giza alignments
This commit is contained in:
@@ -172,6 +172,28 @@ allCommands env@(pgf, mos) = Map.fromList [
|
|||||||
("view","program to open the resulting file (default \"open\")")
|
("view","program to open the resulting file (default \"open\")")
|
||||||
]
|
]
|
||||||
}),
|
}),
|
||||||
|
("ga", emptyCommandInfo {
|
||||||
|
longname = "giza_alignment",
|
||||||
|
synopsis = "show the giza alignment between 2 languages",
|
||||||
|
explanation = unlines [
|
||||||
|
"Prints a set of alignments in the .txt format.",
|
||||||
|
"The graph can be saved in a file by the wf command as usual."
|
||||||
|
],
|
||||||
|
exec = \opts es -> do
|
||||||
|
let giz = map (gizaAlignment pgf (head $ languages pgf, head $ tail $ languages pgf)) es
|
||||||
|
let lsrc = unlines $ map (\(x,_,_) -> x) giz
|
||||||
|
let ltrg = unlines $ map (\(_,x,_) -> x) giz
|
||||||
|
let align = unlines $ map (\(_,_,x) -> x) giz
|
||||||
|
let grph = if null es then [] else lsrc ++ "\n--end_source--\n\n"++ltrg++"\n-end_target--\n\n"++align
|
||||||
|
return $ fromString grph,
|
||||||
|
examples = [
|
||||||
|
"gr | ga -- generate a tree and show giza alignments"
|
||||||
|
],
|
||||||
|
options = [
|
||||||
|
],
|
||||||
|
flags = [
|
||||||
|
]
|
||||||
|
}),
|
||||||
|
|
||||||
("cc", emptyCommandInfo {
|
("cc", emptyCommandInfo {
|
||||||
longname = "compute_concrete",
|
longname = "compute_concrete",
|
||||||
|
|||||||
@@ -116,7 +116,8 @@ module PGF(
|
|||||||
graphvizDependencyTree,
|
graphvizDependencyTree,
|
||||||
graphvizBracketedString,
|
graphvizBracketedString,
|
||||||
graphvizAlignment,
|
graphvizAlignment,
|
||||||
|
gizaAlignment,
|
||||||
|
|
||||||
-- * Probabilities
|
-- * Probabilities
|
||||||
Probabilities,
|
Probabilities,
|
||||||
mkProbabilities,
|
mkProbabilities,
|
||||||
|
|||||||
@@ -21,6 +21,7 @@ module PGF.VisualizeTree
|
|||||||
, graphvizDependencyTree
|
, graphvizDependencyTree
|
||||||
, graphvizBracketedString
|
, graphvizBracketedString
|
||||||
, graphvizAlignment
|
, graphvizAlignment
|
||||||
|
, gizaAlignment
|
||||||
, getDepLabels
|
, getDepLabels
|
||||||
) where
|
) where
|
||||||
|
|
||||||
@@ -211,55 +212,122 @@ graphvizBracketedString = render . lin2tree
|
|||||||
fields cs = hsep (intersperse (char '|') [tbrackets (tag id) <> text c | (_,_,id,c) <- cs])
|
fields cs = hsep (intersperse (char '|') [tbrackets (tag id) <> text c | (_,_,id,c) <- cs])
|
||||||
|
|
||||||
|
|
||||||
graphvizAlignment :: PGF -> [Language] -> Expr -> String
|
type Rel = (Int,[Int])
|
||||||
graphvizAlignment pgf langs = render . lin2graph . linsBracketed
|
-- possibly needs changes after clearing about many-to-many on this level
|
||||||
where
|
|
||||||
linsBracketed t = [bracketedLinearize pgf lang t | lang <- langs]
|
|
||||||
|
|
||||||
lin2graph :: [BracketedString] -> Doc
|
type IndexedSeq = (Int,[String])
|
||||||
lin2graph bss =
|
type LangSeq = [IndexedSeq]
|
||||||
text "digraph {" $$
|
|
||||||
|
data PreAlign = PreAlign [LangSeq] [[Rel]]
|
||||||
|
deriving Show
|
||||||
|
-- alignment structure for a phrase in 2 languages, along with the
|
||||||
|
-- many-to-many relations
|
||||||
|
|
||||||
|
|
||||||
|
genPreAlignment :: PGF -> [Language] -> Expr -> PreAlign
|
||||||
|
genPreAlignment pgf langs = lin2align . linsBracketed
|
||||||
|
where
|
||||||
|
linsBracketed t = [bracketedLinearize pgf lang t | lang <- langs]
|
||||||
|
|
||||||
|
lin2align :: [BracketedString] -> PreAlign
|
||||||
|
lin2align bss = PreAlign langSeqs langRels
|
||||||
|
where
|
||||||
|
(langSeqs,langRels) = mkLayers leaves
|
||||||
|
nil = -1
|
||||||
|
leaves = map (groupAndIndexIt 0 . getLeaves nil) bss
|
||||||
|
|
||||||
|
groupAndIndexIt id [] = []
|
||||||
|
groupAndIndexIt id ((p,w):pws) = let (ws,pws1) = collect pws
|
||||||
|
in (p,id,unwords (w:ws)) : groupAndIndexIt (id+1) pws1
|
||||||
|
where
|
||||||
|
collect pws@((p1,w):pws1)
|
||||||
|
| p == p1 = let (ws,pws2) = collect pws1
|
||||||
|
in (w:ws,pws2)
|
||||||
|
collect pws = ([],pws)
|
||||||
|
|
||||||
|
getLeaves parent bs =
|
||||||
|
case bs of
|
||||||
|
Leaf w -> [(parent,w)]
|
||||||
|
Bracket _ fid _ _ bss -> concatMap (getLeaves fid) bss
|
||||||
|
|
||||||
|
mkLayers (cs:css:rest) = let (lrest, rrest) = mkLayers (css:rest)
|
||||||
|
in ((fields cs) : lrest, (map (mkLinks css) cs) : rrest)
|
||||||
|
mkLayers [cs] = ([fields cs], [])
|
||||||
|
mkLayers _ = ([],[])
|
||||||
|
|
||||||
|
mkLinks cs (p0,id0,_) = (id0,indices)
|
||||||
|
where
|
||||||
|
indices = [id1 | (p1,id1,_) <- cs, p1 == p0]
|
||||||
|
|
||||||
|
fields cs = [(id, [w]) | (_,id,w) <- cs]
|
||||||
|
|
||||||
|
|
||||||
|
-- we assume we have 2 languages - source and target
|
||||||
|
gizaAlignment :: PGF -> (Language,Language) -> Expr -> (String,String,String)
|
||||||
|
gizaAlignment pgf (l1,l2) e = let PreAlign [rl1,rl2] rels = genPreAlignment pgf [l1,l2] e
|
||||||
|
in
|
||||||
|
(unwords (map showIndSeq rl1), unwords (concat $ map snd rl2),
|
||||||
|
unwords $ words $ showRels rl2 (concat rels))
|
||||||
|
|
||||||
|
|
||||||
|
showIndSeq (_,l) = let ww = map words l
|
||||||
|
w_ = map (intersperse "_") ww
|
||||||
|
in
|
||||||
|
concat $ concat w_
|
||||||
|
|
||||||
|
showRels inds2 [] = []
|
||||||
|
showRels inds2 ((ind,is):rest) =
|
||||||
|
let lOffs = computeOffset inds2 0
|
||||||
|
ltemp = [(i,getOffsetIndex i lOffs) | i <- is]
|
||||||
|
lcurr = concat $ map (\(offset,ncomp) -> [show ind ++ "-" ++ show (-1 + offset + ii) ++ " "| ii <- [1..ncomp]]) (map snd ltemp)
|
||||||
|
lrest = showRels inds2 rest
|
||||||
|
in
|
||||||
|
(unwords lcurr) ++ lrest
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
getOffsetIndex i lst = let ll = filter (\(x,_) -> x == i) lst
|
||||||
|
in
|
||||||
|
snd $ head ll
|
||||||
|
|
||||||
|
computeOffset [] transp = []
|
||||||
|
computeOffset ((i,l):rest) transp = let nw = (length $ words $ concat l)
|
||||||
|
in (i,(transp,nw)) : (computeOffset rest (transp + nw))
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
-- alignment in the Graphviz format from the intermediate structure
|
||||||
|
-- same effect as the old direct function
|
||||||
|
graphvizAlignment :: PGF -> [Language] -> Expr -> String
|
||||||
|
graphvizAlignment pgf langs exp =
|
||||||
|
render (text "digraph {" $$
|
||||||
space $$
|
space $$
|
||||||
nest 2 (text "rankdir=LR ;" $$
|
nest 2 (text "rankdir=LR ;" $$
|
||||||
text "node [shape = record] ;" $$
|
text "node [shape = record] ;" $$
|
||||||
space $$
|
space $$
|
||||||
mkLayers 0 leaves) $$
|
renderList 0 lrels rrels) $$
|
||||||
text "}"
|
text "}")
|
||||||
where
|
where
|
||||||
nil = -1
|
(PreAlign lrels rrels) = genPreAlignment pgf langs exp
|
||||||
|
|
||||||
leaves = map (groupAndIndexIt 0 . getLeaves nil) bss
|
|
||||||
|
renderList ii (l:ls) (r:rs) = struct ii <> text "[label = \"" <> fields l <> text "\"] ;" $$
|
||||||
groupAndIndexIt id [] = []
|
(case ls of
|
||||||
groupAndIndexIt id ((p,w):pws) = let (ws,pws1) = collect pws
|
[] -> empty
|
||||||
in (p,id,unwords (w:ws)) : groupAndIndexIt (id+1) pws1
|
_ -> vcat [struct ii <> colon <> tag id0
|
||||||
where
|
<> colon <> char 'e' <+> text "->" <+> struct (ii+1)
|
||||||
collect pws@((p1,w):pws1)
|
<> colon <> tag id1 <> colon <> char 'w' <+> semi
|
||||||
| p == p1 = let (ws,pws2) = collect pws1
|
| (id0,ids) <- r, id1 <- ids] $$ renderList (ii + 1) ls rs)
|
||||||
in (w:ws,pws2)
|
renderList ii [] _ = empty
|
||||||
collect pws = ([],pws)
|
renderList ii [l] [] = struct ii <> text "[label = \"" <> fields l <> text "\"] ;"
|
||||||
|
|
||||||
getLeaves parent bs =
|
fields cs = hsep (intersperse (char '|') [tbrackets (tag id) <> text w | (id,ws) <- cs, w <- ws])
|
||||||
case bs of
|
|
||||||
Leaf w -> [(parent,w)]
|
|
||||||
Bracket _ fid _ _ bss -> concatMap (getLeaves fid) bss
|
|
||||||
|
|
||||||
mkLayers l [] = empty
|
|
||||||
mkLayers l (cs:css) = struct l <> text "[label = \"" <> fields cs <> text "\"] ;" $$
|
|
||||||
(case css of
|
|
||||||
(ncs:_) -> vcat (map (mkLinks l ncs) cs)
|
|
||||||
_ -> empty) $$
|
|
||||||
mkLayers (l+1) css
|
|
||||||
|
|
||||||
mkLinks l cs (p0,id0,_) =
|
|
||||||
vcat (map (\id1 -> struct l <> colon <> tag id0 <> colon <> char 'e' <+>
|
|
||||||
text "->" <+>
|
|
||||||
struct (l+1) <> colon <> tag id1 <> colon <> char 'w' <+> semi) indices)
|
|
||||||
where
|
|
||||||
indices = [id1 | (p1,id1,_) <- cs, p1 == p0]
|
|
||||||
|
|
||||||
fields cs = hsep (intersperse (char '|') [tbrackets (tag id) <> text w | (_,id,w) <- cs])
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
-- auxiliaries for graphviz syntax
|
-- auxiliaries for graphviz syntax
|
||||||
struct l = text ("struct" ++ show l)
|
struct l = text ("struct" ++ show l)
|
||||||
@@ -269,6 +337,7 @@ tag i
|
|||||||
| otherwise = char 'n' <> int i
|
| otherwise = char 'n' <> int i
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
--------------------------------------------------------------------
|
--------------------------------------------------------------------
|
||||||
-- The linearization code bellow is needed just in order to
|
-- The linearization code bellow is needed just in order to
|
||||||
-- produce the dependency tree. Unfortunately the bracketed string
|
-- produce the dependency tree. Unfortunately the bracketed string
|
||||||
|
|||||||
Reference in New Issue
Block a user