forked from GitHub/gf-core
548 lines
21 KiB
Haskell
548 lines
21 KiB
Haskell
----------------------------------------------------------------------
|
|
-- |
|
|
-- Module : VisualizeTree
|
|
-- Maintainer : AR
|
|
-- Stability : (stable)
|
|
-- Portability : (portable)
|
|
--
|
|
-- > CVS $Date:
|
|
-- > CVS $Author:
|
|
-- > CVS $Revision:
|
|
--
|
|
-- Print a graph of an abstract syntax tree in Graphviz DOT format
|
|
-- Based on BB's VisualizeGrammar
|
|
-- FIXME: change this to use GF.Visualization.Graphviz,
|
|
-- instead of rolling its own.
|
|
-----------------------------------------------------------------------------
|
|
|
|
module PGF.VisualizeTree
|
|
( graphvizAbstractTree
|
|
, graphvizParseTree
|
|
, graphvizDependencyTree
|
|
, graphvizBracketedString
|
|
, graphvizAlignment
|
|
, gizaAlignment
|
|
, getDepLabels
|
|
) where
|
|
|
|
import PGF.CId (CId,showCId,ppCId,pCId,mkCId)
|
|
import PGF.Data
|
|
import PGF.Expr (showExpr, Tree)
|
|
import PGF.Linearize
|
|
import PGF.Macros (lookValCat, lookMap,
|
|
BracketedString(..), BracketedTokn(..), flattenBracketedString)
|
|
|
|
import qualified Data.Map as Map
|
|
import qualified Data.IntMap as IntMap
|
|
import Data.List (intersperse,nub,isPrefixOf,sort,sortBy)
|
|
import Data.Char (isDigit)
|
|
import Data.Maybe (fromMaybe)
|
|
import Text.PrettyPrint
|
|
|
|
import Data.Array.IArray
|
|
import Control.Monad
|
|
import qualified Data.Set as Set
|
|
import qualified Text.ParserCombinators.ReadP as RP
|
|
|
|
|
|
-- | Renders abstract syntax tree in Graphviz format
|
|
graphvizAbstractTree :: PGF -> (Bool,Bool) -> Tree -> String
|
|
graphvizAbstractTree pgf (funs,cats) = render . tree2graph
|
|
where
|
|
tree2graph t =
|
|
text "graph {" $$
|
|
ppGraph [] [] 0 t $$
|
|
text "}"
|
|
|
|
getAbs xs (EAbs _ x e) = getAbs (x:xs) e
|
|
getAbs xs (ETyped e _) = getAbs xs e
|
|
getAbs xs (EImplArg e) = getAbs xs e
|
|
getAbs xs e = (xs,e)
|
|
|
|
getApp (EApp x y) es = getApp x (y:es)
|
|
getApp (ETyped e _) es = getApp e es
|
|
getApp (EImplArg e) es = getApp e es
|
|
getApp e es = (e,es)
|
|
|
|
getLbl scope (EFun f) = let fun = if funs then ppCId f else empty
|
|
cat = if cats then ppCId (lookValCat pgf f) else empty
|
|
sep = if funs && cats then colon else empty
|
|
in fun <+> sep <+> cat
|
|
getLbl scope (ELit l) = text (escapeStr (render (ppLit l)))
|
|
getLbl scope (EMeta i) = ppMeta i
|
|
getLbl scope (EVar i) = ppCId (scope !! i)
|
|
getLbl scope (ETyped e _) = getLbl scope e
|
|
getLbl scope (EImplArg e) = getLbl scope e
|
|
|
|
ppGraph scope ps i e0 =
|
|
let (xs, e1) = getAbs [] e0
|
|
(e2,args) = getApp e1 []
|
|
binds = if null xs
|
|
then empty
|
|
else text "\\\\" <> hcat (punctuate comma (map ppCId xs)) <+> text "->"
|
|
(lbl,eargs) = case e2 of
|
|
EAbs _ _ _ -> (char '@', e2:args) -- eta-redexes are rendered with artificial "@" node
|
|
_ -> (getLbl scope' e2, args)
|
|
scope' = xs ++ scope
|
|
in ppNode (i:ps) <> text "[label =" <+> doubleQuotes (binds <+> lbl) <> text ", style = \"solid\", shape = \"plaintext\"] ;" $$
|
|
(if null ps
|
|
then empty
|
|
else ppNode ps <+> text "--" <+> ppNode (i:ps) <+> text "[style = \"solid\"];") $$
|
|
vcat (zipWith (ppGraph scope' (i:ps)) [0..] eargs)
|
|
|
|
ppNode ps = char 'n' <> hcat (punctuate (char '_') (map int ps))
|
|
|
|
escapeStr [] = []
|
|
escapeStr ('\\':cs) = '\\':'\\':escapeStr cs
|
|
escapeStr ('"' :cs) = '\\':'"' :escapeStr cs
|
|
escapeStr (c :cs) = c :escapeStr cs
|
|
|
|
|
|
type Labels = Map.Map CId [String]
|
|
|
|
{- This is an attempt to build the dependency tree from the bracketed string.
|
|
Unfortunately it doesn't quite work. See the actual implementation at
|
|
the end of this module.
|
|
|
|
graphvizDependencyTree :: String -> Bool -> Maybe Labels -> Maybe String -> PGF -> CId -> Tree -> String
|
|
graphvizDependencyTree format debug mlab ms pgf lang t = render $
|
|
case format of
|
|
"malt" -> vcat (map (hcat . intersperse (char '\t') ) wnodes)
|
|
"malt_input" -> vcat (map (hcat . intersperse (char '\t') . take 6) wnodes)
|
|
_ -> text "digraph {" $$
|
|
space $$
|
|
nest 2 (text "rankdir=LR ;" $$
|
|
text "node [shape = plaintext] ;" $$
|
|
vcat nodes $$
|
|
vcat links) $$
|
|
text "}"
|
|
where
|
|
nodes = map mkNode leaves
|
|
links = map mkLink [(fid, fromMaybe nil (lookup fid deps)) | (fid,_,w) <- tail leaves]
|
|
wnodes = undefined
|
|
|
|
nil = -1
|
|
|
|
bs = bracketedLinearize pgf lang t
|
|
|
|
leaves = (nil,0,"ROOT") : (groupAndIndexIt 1 . getLeaves nil) bs
|
|
deps = getDeps nil [bs]
|
|
|
|
groupAndIndexIt id [] = []
|
|
groupAndIndexIt id ((p,w):pws) = let (ws,pws1) = collect pws
|
|
in (p,id,unwords (w:ws)) : groupAndIndexIt (id+1) pws1
|
|
where
|
|
collect pws@((p1,w):pws1)
|
|
| p == p1 = let (ws,pws2) = collect pws1
|
|
in (w:ws,pws2)
|
|
collect pws = ([],pws)
|
|
|
|
getLeaves parent bs =
|
|
case bs of
|
|
Leaf w -> [(parent,w)]
|
|
Bracket _ fid _ _ bss -> concatMap (getLeaves fid) bss
|
|
|
|
getDeps out_head bss =
|
|
case selectHead (children bss) of
|
|
Just ((head, bss'), deps) -> concat (descend out_head head bss' : [descend (headOf head bss') fid bss | (fid,bss) <- IntMap.toList deps])
|
|
Nothing -> []
|
|
where
|
|
descend head fid bss = (fid,head) : getDeps head bss
|
|
|
|
headOf head bss
|
|
| null [() | Leaf _ <- bss] =
|
|
case selectHead (children bss) of
|
|
Just ((head, bss), deps) -> headOf head bss
|
|
Nothing -> head
|
|
| otherwise = head
|
|
|
|
children bss = IntMap.fromListWith (++) [(fid,bss) | Bracket _ fid _ _ bss <- bss]
|
|
|
|
selectHead children = IntMap.maxViewWithKey children
|
|
|
|
mkNode (p,i,w) =
|
|
tag p <+> brackets (text "label = " <> doubleQuotes (int i <> char '.' <+> text w)) <+> semi
|
|
|
|
mkLink (x,y) = tag y <+> text "->" <+> tag x -- ++ " [label = \"" ++ l ++ "\"] ;"
|
|
-}
|
|
|
|
getDepLabels :: [String] -> Labels
|
|
getDepLabels ss = Map.fromList [(mkCId f,ls) | f:ls <- map words ss]
|
|
|
|
|
|
graphvizParseTree :: PGF -> Language -> Tree -> String
|
|
graphvizParseTree pgf lang = graphvizBracketedString . bracketedLinearize pgf lang
|
|
|
|
graphvizBracketedString :: BracketedString -> String
|
|
graphvizBracketedString = render . lin2tree
|
|
where
|
|
lin2tree bs =
|
|
text "graph {" $$
|
|
space $$
|
|
nest 2 (text "rankdir=BU ;" $$
|
|
text "node [shape = record, color = white] ;" $$
|
|
space $$
|
|
vcat (nodes bs)) $$
|
|
text "}"
|
|
where
|
|
nodes bs = zipWith mkStruct [0..] (interns ++ [zipWith (\i (l,p,w) -> (l,p,i,w)) [99990..] leaves])
|
|
|
|
nil = -1
|
|
|
|
leaves = getLeaves 0 nil bs
|
|
interns = getInterns 0 [(nil,bs)]
|
|
|
|
getLeaves level parent bs =
|
|
case bs of
|
|
Leaf w -> [(level-1,parent,w)]
|
|
Bracket _ fid i _ bss -> concatMap (getLeaves (level+1) fid) bss
|
|
|
|
getInterns level [] = []
|
|
getInterns level nodes =
|
|
nub [(level-1,parent,fid,showCId cat) | (parent,Bracket cat fid _ _ _) <- nodes] :
|
|
getInterns (level+1) [(fid,child) | (_,Bracket _ fid _ _ children) <- nodes, child <- children]
|
|
|
|
mkStruct l cs = struct l <> text "[label = \"" <> fields cs <> text "\"] ;" $$
|
|
vcat [link pl pid l id | (pl,pid,id,_) <- cs]
|
|
link pl pid l id
|
|
| pl < 0 = empty
|
|
| otherwise = struct pl <> colon <> tag pid <> colon <> char 's' <+>
|
|
text "--" <+>
|
|
struct l <> colon <> tag id <> colon <> char 'n' <+> semi
|
|
fields cs = hsep (intersperse (char '|') [tbrackets (tag id) <> text c | (_,_,id,c) <- cs])
|
|
|
|
|
|
type Rel = (Int,[Int])
|
|
-- possibly needs changes after clearing about many-to-many on this level
|
|
|
|
type IndexedSeq = (Int,[String])
|
|
type LangSeq = [IndexedSeq]
|
|
|
|
data PreAlign = PreAlign [LangSeq] [[Rel]]
|
|
deriving Show
|
|
-- alignment structure for a phrase in 2 languages, along with the
|
|
-- many-to-many relations
|
|
|
|
|
|
genPreAlignment :: PGF -> [Language] -> Expr -> PreAlign
|
|
genPreAlignment pgf langs = lin2align . linsBracketed
|
|
where
|
|
linsBracketed t = [bracketedLinearize pgf lang t | lang <- langs]
|
|
|
|
lin2align :: [BracketedString] -> PreAlign
|
|
lin2align bss = PreAlign langSeqs langRels
|
|
where
|
|
(langSeqs,langRels) = mkLayers leaves
|
|
nil = -1
|
|
leaves = map (groupAndIndexIt 0 . getLeaves nil) bss
|
|
|
|
groupAndIndexIt id [] = []
|
|
groupAndIndexIt id ((p,w):pws) = let (ws,pws1) = collect pws
|
|
in (p,id,unwords (w:ws)) : groupAndIndexIt (id+1) pws1
|
|
where
|
|
collect pws@((p1,w):pws1)
|
|
| p == p1 = let (ws,pws2) = collect pws1
|
|
in (w:ws,pws2)
|
|
collect pws = ([],pws)
|
|
|
|
getLeaves parent bs =
|
|
case bs of
|
|
Leaf w -> [(parent,w)]
|
|
Bracket _ fid _ _ bss -> concatMap (getLeaves fid) bss
|
|
|
|
mkLayers (cs:css:rest) = let (lrest, rrest) = mkLayers (css:rest)
|
|
in ((fields cs) : lrest, (map (mkLinks css) cs) : rrest)
|
|
mkLayers [cs] = ([fields cs], [])
|
|
mkLayers _ = ([],[])
|
|
|
|
mkLinks cs (p0,id0,_) = (id0,indices)
|
|
where
|
|
indices = [id1 | (p1,id1,_) <- cs, p1 == p0]
|
|
|
|
fields cs = [(id, [w]) | (_,id,w) <- cs]
|
|
|
|
|
|
-- we assume we have 2 languages - source and target
|
|
gizaAlignment :: PGF -> (Language,Language) -> Expr -> (String,String,String)
|
|
gizaAlignment pgf (l1,l2) e = let PreAlign [rl1,rl2] rels = genPreAlignment pgf [l1,l2] e
|
|
in
|
|
(unwords (map showIndSeq rl1), unwords (concat $ map snd rl2),
|
|
unwords $ words $ showRels rl2 (concat rels))
|
|
|
|
|
|
showIndSeq (_,l) = let ww = map words l
|
|
w_ = map (intersperse "_") ww
|
|
in
|
|
concat $ concat w_
|
|
|
|
showRels inds2 [] = []
|
|
showRels inds2 ((ind,is):rest) =
|
|
let lOffs = computeOffset inds2 0
|
|
ltemp = [(i,getOffsetIndex i lOffs) | i <- is]
|
|
lcurr = concat $ map (\(offset,ncomp) -> [show ind ++ "-" ++ show (-1 + offset + ii) ++ " "| ii <- [1..ncomp]]) (map snd ltemp)
|
|
lrest = showRels inds2 rest
|
|
in
|
|
(unwords lcurr) ++ lrest
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
getOffsetIndex i lst = let ll = filter (\(x,_) -> x == i) lst
|
|
in
|
|
snd $ head ll
|
|
|
|
computeOffset [] transp = []
|
|
computeOffset ((i,l):rest) transp = let nw = (length $ words $ concat l)
|
|
in (i,(transp,nw)) : (computeOffset rest (transp + nw))
|
|
|
|
|
|
|
|
-- alignment in the Graphviz format from the intermediate structure
|
|
-- same effect as the old direct function
|
|
graphvizAlignment :: PGF -> [Language] -> Expr -> String
|
|
graphvizAlignment pgf langs exp =
|
|
render (text "digraph {" $$
|
|
space $$
|
|
nest 2 (text "rankdir=LR ;" $$
|
|
text "node [shape = record] ;" $$
|
|
space $$
|
|
renderList 0 lrels rrels) $$
|
|
text "}")
|
|
where
|
|
(PreAlign lrels rrels) = genPreAlignment pgf langs exp
|
|
|
|
|
|
renderList ii (l:ls) (r:rs) = struct ii <> text "[label = \"" <> fields l <> text "\"] ;" $$
|
|
(case ls of
|
|
[] -> empty
|
|
_ -> vcat [struct ii <> colon <> tag id0
|
|
<> colon <> char 'e' <+> text "->" <+> struct (ii+1)
|
|
<> colon <> tag id1 <> colon <> char 'w' <+> semi
|
|
| (id0,ids) <- r, id1 <- ids] $$ renderList (ii + 1) ls rs)
|
|
renderList ii [] _ = empty
|
|
renderList ii [l] [] = struct ii <> text "[label = \"" <> fields l <> text "\"] ;"
|
|
|
|
fields cs = hsep (intersperse (char '|') [tbrackets (tag id) <> text w | (id,ws) <- cs, w <- ws])
|
|
|
|
|
|
|
|
-- auxiliaries for graphviz syntax
|
|
struct l = text ("struct" ++ show l)
|
|
tbrackets d = char '<' <> d <> char '>'
|
|
tag i
|
|
| i < 0 = char 'r' <> int (negate i)
|
|
| otherwise = char 'n' <> int i
|
|
|
|
|
|
|
|
--------------------------------------------------------------------
|
|
-- The linearization code bellow is needed just in order to
|
|
-- produce the dependency tree. Unfortunately the bracketed string
|
|
-- doesn't give us an easy way to find which part of the string
|
|
-- corresponds to which argument of the parent function.
|
|
--
|
|
-- Uuuuugly!!! I hope that this code will be removed one day.
|
|
|
|
type LinTable = Array LIndex [BracketedTokn]
|
|
|
|
|
|
linTree :: PGF -> Language -> (Maybe CId -> [Int] -> LinTable -> LinTable) -> Expr -> [LinTable]
|
|
linTree pgf lang mark e = lin0 [] [] [] Nothing e
|
|
where
|
|
cnc = lookMap (error "no lang") lang (concretes pgf)
|
|
lp = lproductions cnc
|
|
|
|
lin0 path xs ys mb_fid (EAbs _ x e) = lin0 path (showCId x:xs) ys mb_fid e
|
|
lin0 path xs ys mb_fid (ETyped e _) = lin0 path xs ys mb_fid e
|
|
lin0 path xs ys mb_fid e = lin path ys mb_fid e []
|
|
|
|
lin path xs mb_fid (EApp e1 e2) es = lin path xs mb_fid e1 (e2:es)
|
|
lin path xs mb_fid (ELit l) [] = case l of
|
|
LStr s -> return (mark Nothing path (ss s))
|
|
LInt n -> return (mark Nothing path (ss (show n)))
|
|
LFlt f -> return (mark Nothing path (ss (show f)))
|
|
lin path xs mb_fid (EFun f) es = map (mark (Just f) path) (apply path xs mb_fid f es)
|
|
lin path xs mb_fid (ETyped e _) es = lin path xs mb_fid e es
|
|
lin path xs mb_fid (EImplArg e) es = lin path xs mb_fid e es
|
|
|
|
ss s = listArray (0,0) [[LeafKS [s]]]
|
|
|
|
apply path xs mb_fid f es =
|
|
case Map.lookup f lp of
|
|
Just prods -> case lookupProds mb_fid prods of
|
|
Just set -> do prod <- Set.toList set
|
|
case prod of
|
|
PApply funid fids -> do guard (length fids == length es)
|
|
args <- sequence (zipWith3 (\i (PArg _ fid) e -> lin0 (sub i path) [] xs (Just fid) e) [0..] fids es)
|
|
let (CncFun _ lins) = cncfuns cnc ! funid
|
|
return (listArray (bounds lins) [computeSeq seqid args | seqid <- elems lins])
|
|
PCoerce fid -> apply path xs (Just fid) f es
|
|
Nothing -> mzero
|
|
where
|
|
lookupProds (Just fid) prods = IntMap.lookup fid prods
|
|
lookupProds Nothing prods = Just (Set.filter isApp (Set.unions (IntMap.elems prods)))
|
|
|
|
sub i path = i:path
|
|
|
|
isApp (PApply _ _) = True
|
|
isApp _ = False
|
|
|
|
computeSeq seqid args = concatMap compute (elems seq)
|
|
where
|
|
seq = sequences cnc ! seqid
|
|
|
|
compute (SymCat d r) = (args !! d) ! r
|
|
compute (SymLit d r) = (args !! d) ! r
|
|
compute (SymKS ts) = [LeafKS ts]
|
|
compute (SymKP ts alts) = [LeafKP ts alts]
|
|
|
|
untokn :: [BracketedTokn] -> [String]
|
|
untokn ts = case ts of
|
|
LeafKP d _ : [] -> d
|
|
LeafKP d vs : ws -> let ss@(s:_) = untokn ws in sel d vs s ++ ss
|
|
LeafKS s : ws -> s ++ untokn ws
|
|
[] -> []
|
|
where
|
|
sel d vs w = case [v | Alt v cs <- vs, any (\c -> isPrefixOf c w) cs] of
|
|
v:_ -> v
|
|
_ -> d
|
|
|
|
|
|
-- show bracketed markup with references to tree structure
|
|
markLinearizes :: PGF -> CId -> Expr -> [String]
|
|
markLinearizes pgf lang = map (unwords . untokn . (! 0)) . linTree pgf lang mark
|
|
where
|
|
mark mb_f path lint = amap (bracket mb_f path) lint
|
|
|
|
bracket Nothing path ts = [LeafKS ["("++show (reverse path)]] ++ ts ++ [LeafKS [")"]]
|
|
bracket (Just f) path ts = [LeafKS ["(("++showCId f++","++show (reverse path)++")"]] ++ ts ++ [LeafKS [")"]]
|
|
|
|
|
|
graphvizDependencyTree :: String -> Bool -> Maybe Labels -> Maybe String -> PGF -> CId -> Expr -> String
|
|
graphvizDependencyTree format debug mlab ms pgf lang tr = case format of
|
|
"malt" -> unlines (lin2dep format)
|
|
"malt_input" -> unlines (lin2dep format)
|
|
_ -> concat $ map (++"\n") $ ["digraph {\n"] ++ lin2dep format ++ ["}"]
|
|
where
|
|
|
|
lin2dep format = -- trace (ifd (show sortedNodes ++ show nodeWords)) $
|
|
case format of
|
|
"malt" -> map (concat . intersperse "\t") wnodes
|
|
"malt_input" -> map (concat . intersperse "\t" . take 6) wnodes
|
|
_ -> prelude ++ nodes ++ links
|
|
|
|
ifd s = if debug then s else []
|
|
|
|
pot = readPosText $ concat $ take 1 $ markLinearizes pgf lang tr
|
|
---- use Just str if you have str to match against
|
|
|
|
prelude = ["rankdir=LR ;", "node [shape = plaintext] ;"]
|
|
|
|
nodes = map mkNode nodeWords
|
|
mkNode (i,((_,p),ss)) =
|
|
node p ++ " [label = \"" ++ show i ++ ". " ++ ifd (show p) ++ unwords ss ++ "\"] ;"
|
|
nodeWords = (0,((mkCId "",[]),["ROOT"])) : zip [1..] [((f,p),w)|
|
|
((Just f,p),w) <- wlins pot]
|
|
|
|
links = map mkLink thelinks
|
|
thelinks = [(word y, x, label tr y x) |
|
|
(_,((f,x),_)) <- tail nodeWords,
|
|
let y = dominant x]
|
|
mkLink (x,y,l) = node x ++ " -> " ++ node y ++ " [label = \"" ++ l ++ "\"] ;"
|
|
node = show . show
|
|
|
|
dominant x = case x of
|
|
[] -> x
|
|
_ | not (x == hx) -> hx
|
|
_ -> dominant (init x)
|
|
where
|
|
hx = headArg (init x) tr x
|
|
|
|
headArg x0 tr x = case (unApp tr,x) of
|
|
(Just (f,[]),[_]) -> x0 ---- ??
|
|
(Just (f,ts),[_]) -> x0 ++ [getHead (length ts - 1) f]
|
|
(Just (f,ts),i:y) -> headArg x0 (ts !! i) y
|
|
_ -> x0 ----
|
|
|
|
label tr y x = case span (uncurry (==)) (zip y x) of
|
|
(xys,(_,i):_) -> getLabel i (funAt tr (map fst xys))
|
|
_ -> "" ----
|
|
|
|
funAt tr x = case (unApp tr,x) of
|
|
(Just (f,_) ,[]) -> f
|
|
(Just (f,ts),i:y) -> funAt (ts !! i) y
|
|
_ -> mkCId (render (ppExpr 0 [] tr)) ----
|
|
|
|
word x = if elem x sortedNodes then x else
|
|
let x' = headArg x tr (x ++[0]) in
|
|
if x' == x then [] else word x'
|
|
|
|
sortedNodes = [p | (_,((_,p),_)) <- nodeWords]
|
|
|
|
labels = maybe Map.empty id mlab
|
|
getHead i f = case Map.lookup f labels of
|
|
Just ls -> length $ takeWhile (/= "head") ls
|
|
_ -> i
|
|
getLabel i f = case Map.lookup f labels of
|
|
Just ls | length ls > i -> ifd (showCId f ++ "#" ++ show i ++ "=") ++ ls !! i
|
|
_ -> showCId f ++ "#" ++ show i
|
|
|
|
-- to generate CoNLL format for MaltParser
|
|
nodeMap :: Map.Map [Int] Int
|
|
nodeMap = Map.fromList [(p,i) | (i,((_,p),_)) <- nodeWords]
|
|
|
|
arcMap :: Map.Map [Int] ([Int],String)
|
|
arcMap = Map.fromList [(y,(x,l)) | (x,y,l) <- thelinks]
|
|
|
|
lookDomLab p = case Map.lookup p arcMap of
|
|
Just (q,l) -> (maybe 0 id (Map.lookup q nodeMap), if null l then rootlabel else l)
|
|
_ -> (0,rootlabel)
|
|
|
|
wnodes = [[show i, maltws ws, showCId fun, pos, pos, morph, show dom, lab, unspec, unspec] |
|
|
(i, ((fun,p),ws)) <- tail nodeWords,
|
|
let pos = showCId $ lookValCat pgf fun,
|
|
let morph = unspec,
|
|
let (dom,lab) = lookDomLab p
|
|
]
|
|
maltws = concat . intersperse "+" . words . unwords -- no spaces in column 2
|
|
unspec = "_"
|
|
rootlabel = "ROOT"
|
|
|
|
wlins :: PosText -> [((Maybe CId,[Int]),[String])]
|
|
wlins pt = case pt of
|
|
T p pts -> concatMap (lins p) pts
|
|
M ws -> if null ws then [] else [((Nothing,[]),ws)]
|
|
where
|
|
lins p pt = case pt of
|
|
T q pts -> concatMap (lins q) pts
|
|
M ws -> if null ws then [] else [(p,ws)]
|
|
|
|
data PosText =
|
|
T (Maybe CId,[Int]) [PosText]
|
|
| M [String]
|
|
deriving Show
|
|
|
|
readPosText :: String -> PosText
|
|
readPosText = fst . head . (RP.readP_to_S pPosText) where
|
|
pPosText = do
|
|
RP.char '(' >> RP.skipSpaces
|
|
p <- pPos
|
|
RP.skipSpaces
|
|
ts <- RP.many pPosText
|
|
RP.char ')' >> RP.skipSpaces
|
|
return (T p ts)
|
|
RP.<++ do
|
|
ws <- RP.sepBy1 (RP.munch1 (flip notElem "()")) (RP.char ' ')
|
|
return (M ws)
|
|
pPos = do
|
|
fun <- (RP.char '(' >> pCId >>= \f -> RP.char ',' >> (return $ Just f))
|
|
RP.<++ (return Nothing)
|
|
RP.char '[' >> RP.skipSpaces
|
|
is <- RP.sepBy (RP.munch1 isDigit) (RP.char ',')
|
|
RP.char ']' >> RP.skipSpaces
|
|
RP.char ')' RP.<++ return ' '
|
|
return (fun,map read is)
|