first incarnation of the bracketed string API

This commit is contained in:
krasimir
2010-04-30 14:36:06 +00:00
parent 7a4cb3c271
commit 8460598801
12 changed files with 632 additions and 468 deletions

View File

@@ -42,6 +42,7 @@ library
PGF.VisualizeTree
PGF.Printer
PGF.Probabilistic
PGF.Forest
GF.Data.TrieMap
GF.Data.Utilities
GF.Data.SortedList

View File

@@ -22,6 +22,7 @@ import PGF.Morphology
import PGF.Printer
import PGF.Probabilistic -- (getProbsFromFile,prProbabilities,defaultProbabilities)
import PGF.Generate (generateRandomFrom) ----
import PGF.Tree (Tree(Fun), expr2tree, tree2expr)
import GF.Compile.Export
import GF.Compile.ExampleBased
import GF.Infra.Option (noOptions, readOutputFormat)
@@ -150,7 +151,7 @@ allCommands env@(pgf, mos) = Map.fromList [
"flag -format."
],
exec = \opts es -> do
let grph = if null es then [] else graphvizAlignment pgf (head es)
let grph = if null es then [] else graphvizAlignment pgf (languages pgf) (head es)
if isFlag "view" opts || isFlag "format" opts then do
let file s = "_grph." ++ s
let view = optViewGraph opts
@@ -481,11 +482,14 @@ allCommands env@(pgf, mos) = Map.fromList [
"will accept unknown adjectives, nouns and verbs with the resource grammar."
],
exec = \opts ts ->
returnFromExprsPar opts ts $ concatMap (par opts) $ toStrings ts,
return $ fromParse opts ts $ concatMap (par opts) $ toStrings ts,
flags = [
("cat","target category of parsing"),
("lang","the languages of parsing (comma-separated, no spaces)"),
("openclass","list of open-class categories for robust parsing")
],
options = [
("bracket","prints the bracketed string from the parser")
]
}),
("pg", emptyCommandInfo { -----
@@ -893,8 +897,8 @@ allCommands env@(pgf, mos) = Map.fromList [
]
where
par opts s = case optOpenTypes opts of
[] -> concat [parse pgf lang (optType opts) s | lang <- optLangs opts]
open_typs -> concat [parseWithRecovery pgf lang (optType opts) open_typs s | lang <- optLangs opts]
[] -> [parse pgf lang (optType opts) s | lang <- optLangs opts]
open_typs -> [parseWithRecovery pgf lang (optType opts) open_typs s | lang <- optLangs opts]
void = ([],[])
@@ -918,9 +922,17 @@ allCommands env@(pgf, mos) = Map.fromList [
map (map (unl . snd)) . tabularLinearizes pgf lang
_ | isOpt "table" opts -> unlines . concat . intersperse [[]] .
map (map (\(p,v) -> p+++":"+++unl v)) . tabularLinearizes pgf lang
_ | isOpt "bracket" opts -> unlines . markLinearizes pgf lang
_ | isOpt "bracket" opts -> showBracketedString . bracketedLinearize pgf lang
_ -> unl . linearize pgf lang
-- replace each non-atomic constructor with mkC, where C is the val cat
tree2mk pgf = showExpr [] . tree2expr . t2m . expr2tree where
t2m t = case t of
Fun cid [] -> t
Fun cid ts -> Fun (mk cid) (map t2m ts)
_ -> t
mk = mkCId . ("mk" ++) . showCId . lookValCat pgf
unlex opts lang = stringOps Nothing (getUnlex opts lang ++ map prOpt opts) ----
getUnlex opts lang = case words (valStrOpts "unlexer" "" opts) of
@@ -991,14 +1003,22 @@ allCommands env@(pgf, mos) = Map.fromList [
toStrings = map showAsString
toString = unwords . toStrings
fromParse opts ts parses
| isOpt "bracket" opts = case catMaybes bss of
[] -> ([], "no brackets found")
bss -> ([], unlines $ map showBracketedString bss)
| otherwise = case ts of
[] -> ([], "no trees found" ++
missingWordMsg (optMorpho opts) (concatMap words (toStrings ts))
)
_ -> fromExprs ts
where
(prs,bss) = unzip parses
ts = [t | ParseResult ts <- prs, t <- ts]
returnFromExprs es = return $ case es of
[] -> ([], "no trees found")
_ -> fromExprs es
returnFromExprsPar opts ts es = return $ case es of
[] -> ([], "no trees found" ++
missingWordMsg (optMorpho opts) (concatMap words (toStrings ts))
)
_ -> fromExprs es
prGrammar opts
| isOpt "cats" opts = return $ fromString $ unwords $ map showCId $ categories pgf

View File

@@ -41,17 +41,20 @@ convertFile conf src file = do
convEx (cat,ex) = do
appn "("
let typ = maybe (error "no valid cat") id $ readType cat
let ts = rank $ parse pgf lang typ ex
ws <- case ts of
[] -> do
ws <- case fst (parse pgf lang typ ex) of
ParseFailed _ -> do
let ws = morphoMissing morpho (words ex)
appv ("WARNING: cannot parse example " ++ ex)
case ws of
[] -> return ()
_ -> appv (" missing words: " ++ unwords ws)
return ws
t:tt -> appv ("WARNING: ambiguous example " ++ ex) >>
appn t >> mapM_ (appn . (" --- " ++)) tt >> return []
return ws
TypeError _ _ ->
return []
ParseResult ts ->
case rank ts of
(t:tt) -> appv ("WARNING: ambiguous example " ++ ex) >>
appn t >> mapM_ (appn . (" --- " ++)) tt >> return []
appn ")"
return ws
rank ts = case probs conf of

View File

@@ -46,7 +46,10 @@ translationList mex mprobs pgf ig og typ number = do
return $ map mkOne $ ts
where
mkOne t = (norml (linearize pgf ig t), map (norml . linearize pgf og) (homonyms t))
homonyms = nub . parse pgf ig typ . linearize pgf ig
homonyms t =
case (fst . parse pgf ig typ . linearize pgf ig) t of
ParseResult ts -> ts
_ -> []
morphologyList ::
Maybe Expr -> Maybe Probabilities ->

View File

@@ -1,7 +1,7 @@
-------------------------------------------------
-- |
-- Module : PGF
-- Maintainer : Aarne Ranta
-- Maintainer : Krasimir Angelov
-- Stability : stable
-- Portability : portable
--
@@ -50,9 +50,12 @@ module PGF(
-- * Operations
-- ** Linearization
linearize, linearizeAllLang, linearizeAll,
linearize, linearizeAllLang, linearizeAll, bracketedLinearize, tabularLinearizes,
groupResults, -- lins of trees by language, removing duplicates
showPrintName,
BracketedString(..), FId, LIndex,
Forest.showBracketedString,
-- ** Parsing
parse, parseWithRecovery, parseAllLang, parseAll,
@@ -73,10 +76,11 @@ module PGF(
checkType, checkExpr, inferExpr,
TcError(..), ppTcError,
-- ** Word Completion (Incremental Parsing)
-- ** Low level parsing API
complete,
Parse.ParseState,
Parse.initState, Parse.nextState, Parse.getCompletions, Parse.recoveryStates, Parse.extractTrees,
Parse.initState, Parse.nextState, Parse.getCompletions, Parse.recoveryStates,
Parse.ParseResult(..), Parse.getParseResult,
-- ** Generation
generateRandom, generateAll, generateAllDepth,
@@ -90,6 +94,7 @@ module PGF(
graphvizAbstractTree,
graphvizParseTree,
graphvizDependencyTree,
graphvizBracketedString,
graphvizAlignment,
-- * Browsing
@@ -107,6 +112,7 @@ import PGF.Expr (Tree)
import PGF.Morphology
import PGF.Data
import PGF.Binary
import qualified PGF.Forest as Forest
import qualified PGF.Parse as Parse
import GF.Data.Utilities (replace)
@@ -131,34 +137,18 @@ import Text.PrettyPrint
-- > $ gf -make <grammar file name>
readPGF :: FilePath -> IO PGF
-- | Linearizes given expression as string in the language
linearize :: PGF -> Language -> Tree -> String
-- | Tries to parse the given string in the specified language
-- and to produce abstract syntax expression. An empty
-- list is returned if the parsing is not successful. The list may also
-- contain more than one element if the grammar is ambiguous.
-- Throws an exception if the given language cannot be used
-- for parsing, see 'canParse'.
parse :: PGF -> Language -> Type -> String -> [Tree]
-- and to produce abstract syntax expression.
parse :: PGF -> Language -> Type -> String -> (Parse.ParseResult,Maybe BracketedString)
parseWithRecovery :: PGF -> Language -> Type -> [Type] -> String -> [Tree]
-- | The same as 'linearizeAllLang' but does not return
-- the language.
linearizeAll :: PGF -> Tree -> [String]
-- | Linearizes given expression as string in all languages
-- available in the grammar.
linearizeAllLang :: PGF -> Tree -> [(Language,String)]
-- | This is an experimental function. Use it on your own risk
parseWithRecovery :: PGF -> Language -> Type -> [Type] -> String -> (Parse.ParseResult,Maybe BracketedString)
-- | The same as 'parseAllLang' but does not return
-- the language.
parseAll :: PGF -> Type -> String -> [[Tree]]
-- | Tries to parse the given string with all available languages.
-- Languages which cannot be used for parsing (see 'canParse')
-- are ignored.
-- The returned list contains pairs of language
-- and list of abstract syntax expressions
-- (this is a list, since grammars can be ambiguous).
@@ -227,8 +217,6 @@ complete :: PGF -> Language -> Type -> String
readPGF f = decodeFile f
linearize pgf lang = concat . take 1 . PGF.Linearize.linearizes pgf lang
parse pgf lang typ s =
case Map.lookup lang (concretes pgf) of
Just cnc -> Parse.parse pgf lang typ (words s)
@@ -236,10 +224,6 @@ parse pgf lang typ s =
parseWithRecovery pgf lang typ open_typs s = Parse.parseWithRecovery pgf lang typ open_typs (words s)
linearizeAll mgr = map snd . linearizeAllLang mgr
linearizeAllLang mgr t =
[(lang,PGF.linearize mgr lang t) | lang <- languages mgr]
groupResults :: [[(Language,String)]] -> [(Language,[String])]
groupResults = Map.toList . foldr more Map.empty . start . concat
where
@@ -250,7 +234,7 @@ groupResults = Map.toList . foldr more Map.empty . start . concat
parseAll mgr typ = map snd . parseAllLang mgr typ
parseAllLang mgr typ s =
[(lang,ts) | lang <- languages mgr, let ts = parse mgr lang typ s, not (null ts)]
[(lang,ts) | lang <- languages mgr, (Parse.ParseResult ts,_) <- [parse mgr lang typ s], not (null ts)]
generateRandom pgf cat = do
gen <- newStdGen
@@ -280,14 +264,18 @@ functionType pgf fun =
Nothing -> Nothing
complete pgf from typ input =
let (ws,prefix) = tokensAndPrefix input
state0 = Parse.initState pgf from typ
in case loop state0 ws of
Nothing -> []
Just state ->
(if null prefix && not (null (Parse.extractTrees state typ)) then [unwords ws ++ " "] else [])
++ [unwords (ws++[c]) ++ " " | c <- Map.keys (Parse.getCompletions state prefix)]
let (ws,prefix) = tokensAndPrefix input
state0 = Parse.initState pgf from typ
in case loop state0 ws of
Nothing -> []
Just state -> (if null prefix && isSuccessful state then [unwords ws ++ " "] else [])
++ [unwords (ws++[c]) ++ " " | c <- Map.keys (Parse.getCompletions state prefix)]
where
isSuccessful state =
case Parse.getParseResult state typ of
(Parse.ParseResult ts, _) -> not (null ts)
_ -> False
tokensAndPrefix :: String -> ([String],String)
tokensAndPrefix s | not (null s) && isSpace (last s) = (ws, "")
| null ws = ([],"")

View File

@@ -56,7 +56,7 @@ data Symbol
data Production
= PApply {-# UNPACK #-} !FunId [FId]
| PCoerce {-# UNPACK #-} !FId
| PConst Expr [String]
| PConst CId Expr [String]
deriving (Eq,Ord,Show)
data CncCat = CncCat {-# UNPACK #-} !FId {-# UNPACK #-} !FId {-# UNPACK #-} !(Array LIndex String)
data CncFun = CncFun CId {-# UNPACK #-} !(UArray LIndex SeqId) deriving (Eq,Ord,Show)
@@ -86,7 +86,6 @@ data Tokn =
| KP [String] [Alternative]
deriving (Eq,Ord,Show)
-- merge two PGFs; fails is differens absnames; priority to second arg
unionPGF :: PGF -> PGF -> PGF

View File

@@ -0,0 +1,101 @@
-------------------------------------------------
-- |
-- Module : PGF
-- Maintainer : Krasimir Angelov
-- Stability : stable
-- Portability : portable
--
-- Forest is a compact representation of a set
-- of parse trees. This let us to efficiently
-- represent local ambiguities
--
-------------------------------------------------
module PGF.Forest( Forest(..)
, BracketedString, showBracketedString
, linearizeWithBrackets
) where
import PGF.CId
import PGF.Data
import PGF.Macros
import Data.List
import Data.Array.IArray
import qualified Data.Set as Set
import qualified Data.Map as Map
import qualified Data.IntSet as IntSet
import qualified Data.IntMap as IntMap
import Control.Monad
data Forest
= Forest
{ abstr :: Abstr
, concr :: Concr
, forest :: IntMap.IntMap (Set.Set Production)
, root :: {-# UNPACK #-} !FId
, label :: {-# UNPACK #-} !LIndex
}
--------------------------------------------------------------------
-- Rendering of bracketed strings
--------------------------------------------------------------------
linearizeWithBrackets :: Forest -> BracketedString
linearizeWithBrackets = head . snd . untokn "" . bracketedTokn
---------------------------------------------------------------
-- Internally we have to do everything with Tokn first because
-- we must handle the pre {...} construction.
--
bracketedTokn :: Forest -> BracketedTokn
bracketedTokn (Forest abs cnc forest root label) =
let (fid,cat,lin) = render IntMap.empty root
in Bracket_ fid label cat (lin ! label)
where
trusted = trustedSpots IntSet.empty root
render parents fid =
case (IntMap.lookup fid parents) `mplus` (fmap Set.toList $ IntMap.lookup fid forest) of
Just (p:ps) -> descend (IntMap.insert fid ps parents) p
Nothing -> error ("wrong forest id " ++ show fid)
where
descend parents (PApply funid args) = let (CncFun fun lins) = cncfuns cnc ! funid
Just (DTyp _ cat _,_,_) = Map.lookup fun (funs abs)
largs = map (render parents) args
in (fid,cat,listArray (bounds lins) [computeSeq seqid largs | seqid <- elems lins])
descend parents (PCoerce fid) = render parents fid
descend parents (PConst cat _ ts) = (fid,cat,listArray (0,0) [[LeafKS ts]])
trustedSpots parents fid
| IntSet.member fid parents
= IntSet.empty
| otherwise = IntSet.insert fid $
case IntMap.lookup fid forest of
Just prods -> foldl1 IntSet.intersection [descend prod | prod <- Set.toList prods]
Nothing -> IntSet.empty
where
parents' = IntSet.insert fid parents
descend (PApply funid args) = IntSet.unions (map (trustedSpots parents') args)
descend (PCoerce fid) = trustedSpots parents' fid
descend (PConst c e _) = IntSet.empty
computeSeq :: SeqId -> [(FId,CId,LinTable)] -> [BracketedTokn]
computeSeq seqid args = concatMap compute (elems seq)
where
seq = sequences cnc ! seqid
compute (SymCat d r) = getArg d r
compute (SymLit d r) = getArg d r
compute (SymKS ts) = [LeafKS ts]
compute (SymKP ts alts) = [LeafKP ts alts]
getArg d r
| not (null arg_lin) &&
IntSet.member fid trusted
= [Bracket_ fid r cat arg_lin]
| otherwise = arg_lin
where
arg_lin = lin ! r
(fid,cat,lin) = args !! d

View File

@@ -1,8 +1,15 @@
module PGF.Linearize(linearizes,markLinearizes,tabularLinearizes) where
module PGF.Linearize
( linearize
, linearizeAll
, linearizeAllLang
, bracketedLinearize
, tabularLinearizes
) where
import PGF.CId
import PGF.Data
import PGF.Macros
import PGF.Expr
import Data.Array.IArray
import Data.List
import Control.Monad
@@ -10,85 +17,33 @@ import qualified Data.Map as Map
import qualified Data.IntMap as IntMap
import qualified Data.Set as Set
-- linearization and computation of concrete PGF Terms
--------------------------------------------------------------------
-- The API
--------------------------------------------------------------------
type LinTable = Array LIndex [Tokn]
-- | Linearizes given expression as string in the language
linearize :: PGF -> Language -> Tree -> String
linearize pgf lang = concat . take 1 . map (unwords . concatMap flattenBracketedString . snd . untokn "" . (!0)) . linTree pgf lang
linearizes :: PGF -> CId -> Expr -> [String]
linearizes pgf lang = map (unwords . untokn . (! 0)) . linTree pgf lang (\_ _ lint -> lint)
-- | The same as 'linearizeAllLang' but does not return
-- the language.
linearizeAll :: PGF -> Tree -> [String]
linearizeAll pgf = map snd . linearizeAllLang pgf
linTree :: PGF -> Language -> (Maybe CId -> [Int] -> LinTable -> LinTable) -> Expr -> [LinTable]
linTree pgf lang mark e = lin0 [] [] [] Nothing e
where
cnc = lookMap (error "no lang") lang (concretes pgf)
lp = lproductions cnc
-- | Linearizes given expression as string in all languages
-- available in the grammar.
linearizeAllLang :: PGF -> Tree -> [(Language,String)]
linearizeAllLang pgf t = [(lang,linearize pgf lang t) | lang <- Map.keys (concretes pgf)]
lin0 path xs ys mb_fid (EAbs _ x e) = lin0 path (showCId x:xs) ys mb_fid e
lin0 path xs ys mb_fid (ETyped e _) = lin0 path xs ys mb_fid e
lin0 path xs ys mb_fid e | null xs = lin path ys mb_fid e []
| otherwise = apply path (xs ++ ys) mb_fid _B (e:[ELit (LStr x) | x <- xs])
-- | Linearizes given expression as a bracketed string in the language
bracketedLinearize :: PGF -> Language -> Tree -> BracketedString
bracketedLinearize pgf lang = head . concat . map (snd . untokn "" . (!0)) . linTree pgf lang
lin path xs mb_fid (EApp e1 e2) es = lin path xs mb_fid e1 (e2:es)
lin path xs mb_fid (ELit l) [] = case l of
LStr s -> return (mark Nothing path (ss s))
LInt n -> return (mark Nothing path (ss (show n)))
LFlt f -> return (mark Nothing path (ss (show f)))
lin path xs mb_fid (EMeta i) es = apply path xs mb_fid _V (ELit (LStr ('?':show i)):es)
lin path xs mb_fid (EFun f) es = map (mark (Just f) path) (apply path xs mb_fid f es)
lin path xs mb_fid (EVar i) es = apply path xs mb_fid _V (ELit (LStr (xs !! i)) :es)
lin path xs mb_fid (ETyped e _) es = lin path xs mb_fid e es
lin path xs mb_fid (EImplArg e) es = lin path xs mb_fid e es
ss s = listArray (0,0) [[KS s]]
apply path xs mb_fid f es =
case Map.lookup f lp of
Just prods -> case lookupProds mb_fid prods of
Just set -> do prod <- Set.toList set
case prod of
PApply funid fids -> do guard (length fids == length es)
args <- sequence (zipWith3 (\i fid e -> lin0 (sub i path) [] xs (Just fid) e) [0..] fids es)
let (CncFun _ lins) = cncfuns cnc ! funid
return (listArray (bounds lins) [computeSeq seqid args | seqid <- elems lins])
PCoerce fid -> apply path xs (Just fid) f es
Nothing -> mzero
Nothing -> apply path xs mb_fid _V [ELit (LStr ("[" ++ showCId f ++ "]"))] -- fun without lin
where
lookupProds (Just fid) prods = IntMap.lookup fid prods
lookupProds Nothing prods
| f == _B || f == _V = Nothing
| otherwise = Just (Set.filter isApp (Set.unions (IntMap.elems prods)))
sub i path
| f == _B || f == _V = path
| otherwise = i:path
isApp (PApply _ _) = True
isApp _ = False
computeSeq seqid args = concatMap compute (elems seq)
where
seq = sequences cnc ! seqid
compute (SymCat d r) = (args !! d) ! r
compute (SymLit d r) = (args !! d) ! r
compute (SymKS ts) = map KS ts
compute (SymKP ts alts) = [KP ts alts]
untokn :: [Tokn] -> [String]
untokn ts = case ts of
KP d _ : [] -> d
KP d vs : ws -> let ss@(s:_) = untokn ws in sel d vs s ++ ss
KS s : ws -> s : untokn ws
[] -> []
where
sel d vs w = case [v | Alt v cs <- vs, any (\c -> isPrefixOf c w) cs] of
v:_ -> v
_ -> d
-- create a table from labels+params to variants
-- | Creates a table from feature name to linearization.
-- The outher list encodes the variations
tabularLinearizes :: PGF -> CId -> Expr -> [[(String,String)]]
tabularLinearizes pgf lang e = map (zip lbls . map (unwords . untokn) . elems) (linTree pgf lang (\_ _ lint -> lint) e)
tabularLinearizes pgf lang e = map (zip lbls . map (unwords . concatMap flattenBracketedString . snd . untokn "") . elems)
(linTree pgf lang e)
where
lbls = case unApp e of
Just (f,_) -> let cat = valCat (lookType pgf f)
@@ -97,12 +52,77 @@ tabularLinearizes pgf lang e = map (zip lbls . map (unwords . untokn) . elems) (
Nothing -> error "No labels"
Nothing -> error "Not function application"
--------------------------------------------------------------------
-- Implementation
--------------------------------------------------------------------
-- show bracketed markup with references to tree structure
markLinearizes :: PGF -> CId -> Expr -> [String]
markLinearizes pgf lang = map (unwords . untokn . (! 0)) . linTree pgf lang mark
linTree :: PGF -> Language -> Expr -> [Array LIndex BracketedTokn]
linTree pgf lang e =
[amapWithIndex (\label -> Bracket_ fid label cat) lin | (_,(fid,cat,lin)) <- lin0 [] [] Nothing 0 e]
where
mark mb_f path lint = amap (bracket mb_f path) lint
cnc = lookMap (error "no lang") lang (concretes pgf)
lp = lproductions cnc
lin0 xs ys mb_fid n_fid (EAbs _ x e) = lin0 (showCId x:xs) ys mb_fid n_fid e
lin0 xs ys mb_fid n_fid (ETyped e _) = lin0 xs ys mb_fid n_fid e
lin0 xs ys mb_fid n_fid e | null xs = lin ys mb_fid n_fid e []
| otherwise = apply (xs ++ ys) mb_fid n_fid _B (e:[ELit (LStr x) | x <- xs])
bracket Nothing path ts = [KS ("("++show (reverse path))] ++ ts ++ [KS ")"]
bracket (Just f) path ts = [KS ("(("++showCId f++","++show (reverse path)++")")] ++ ts ++ [KS ")"]
lin xs mb_fid n_fid (EApp e1 e2) es = lin xs mb_fid n_fid e1 (e2:es)
lin xs mb_fid n_fid (ELit l) [] = case l of
LStr s -> return (n_fid+1,(n_fid,cidString,ss s))
LInt n -> return (n_fid+1,(n_fid,cidInt ,ss (show n)))
LFlt f -> return (n_fid+1,(n_fid,cidFloat ,ss (show f)))
lin xs mb_fid n_fid (EMeta i) es = apply xs mb_fid n_fid _V (ELit (LStr ('?':show i)):es)
lin xs mb_fid n_fid (EFun f) es = apply xs mb_fid n_fid f es
lin xs mb_fid n_fid (EVar i) es = apply xs mb_fid n_fid _V (ELit (LStr (xs !! i)) :es)
lin xs mb_fid n_fid (ETyped e _) es = lin xs mb_fid n_fid e es
lin xs mb_fid n_fid (EImplArg e) es = lin xs mb_fid n_fid e es
ss s = listArray (0,0) [[LeafKS [s]]]
apply :: [String] -> Maybe FId -> FId -> CId -> [Expr] -> [(FId,(FId, CId, LinTable))]
apply xs mb_fid n_fid f es =
case Map.lookup f lp of
Just prods -> do prod <- lookupProds mb_fid prods
case prod of
PApply funid fids -> do guard (length fids == length es)
(n_fid,args) <- descend n_fid (zip fids es)
let (CncFun fun lins) = cncfuns cnc ! funid
Just (DTyp _ cat _,_,_) = Map.lookup fun (funs (abstract pgf))
return (n_fid+1,(n_fid,cat,listArray (bounds lins) [computeSeq seqid args | seqid <- elems lins]))
PCoerce fid -> apply xs (Just fid) n_fid f es
Nothing -> apply xs mb_fid n_fid _V [ELit (LStr ("[" ++ showCId f ++ "]"))] -- fun without lin
where
lookupProds (Just fid) prods = maybe [] Set.toList (IntMap.lookup fid prods)
lookupProds Nothing prods
| f == _B || f == _V = []
| otherwise = [prod | (fid,set) <- IntMap.toList prods, prod <- Set.toList set]
descend n_fid [] = return (n_fid,[])
descend n_fid ((fid,e):fes) = do (n_fid,xx) <- lin0 [] xs (Just fid) n_fid e
(n_fid,xxs) <- descend n_fid fes
return (n_fid,xx:xxs)
isApp (PApply _ _) = True
isApp _ = False
computeSeq :: SeqId -> [(FId,CId,LinTable)] -> [BracketedTokn]
computeSeq seqid args = concatMap compute (elems seq)
where
seq = sequences cnc ! seqid
compute (SymCat d r) = getArg d r
compute (SymLit d r) = getArg d r
compute (SymKS ts) = [LeafKS ts]
compute (SymKP ts alts) = [LeafKP ts alts]
getArg d r
| not (null arg_lin) = [Bracket_ fid r cat arg_lin]
| otherwise = arg_lin
where
arg_lin = lin ! r
(fid,cat,lin) = args !! d
amapWithIndex :: (IArray a e1, IArray a e2, Ix i) => (i -> e1 -> e2) -> a i e1 -> a i e2
amapWithIndex f arr = listArray (bounds arr) (map (uncurry f) (assocs arr))

View File

@@ -11,6 +11,7 @@ import qualified Data.Array as Array
import Data.Maybe
import Data.List
import GF.Data.Utilities(sortNub)
import Text.PrettyPrint
-- operations for manipulating PGF grammars and objects
@@ -202,3 +203,56 @@ updateProductionIndices pgf = pgf{ concretes = fmap updateConcrete (concretes pg
getFunctions (PCoerce fid) = case IntMap.lookup fid productions of
Nothing -> []
Just prods -> [fun | prod <- Set.toList prods, fun <- getFunctions prod]
-- Utilities for doing linearization
-- | BracketedString represents a sentence that is linearized
-- as usual but we also want to retain the ''brackets'' that
-- mark the beginning and the end of each constituent.
data BracketedString
= Leaf String -- ^ this is the leaf i.e. a single token
| Bracket {-# UNPACK #-} !FId {-# UNPACK #-} !LIndex CId [BracketedString] -- ^ this is a bracket. The 'CId' is the category of
-- the phrase. The 'FId' is an unique identifier for
-- every phrase in the sentence. For context-free grammars
-- i.e. without discontinuous constituents this identifier
-- is also unique for every bracket. When there are discontinuous
-- phrases then the identifiers are unique for every phrase but
-- not for every bracket since the bracket represents a constituent.
-- The different constituents could still be distinguished by using
-- the constituent index i.e. 'LIndex'. If the grammar is reduplicating
-- then the constituent indices will be the same for all brackets
-- that represents the same constituent.
data BracketedTokn
= LeafKS [String]
| LeafKP [String] [Alternative]
| Bracket_ {-# UNPACK #-} !FId {-# UNPACK #-} !LIndex CId [BracketedTokn] -- Invariant: the list is not empty
type LinTable = Array.Array LIndex [BracketedTokn]
-- | Renders the bracketed string as string where
-- the brackets are shown as @(S ...)@ where
-- @S@ is the category.
showBracketedString :: BracketedString -> String
showBracketedString = render . ppBracketedString
ppBracketedString (Leaf t) = text t
ppBracketedString (Bracket fcat index cat bss) = parens (ppCId cat <+> hsep (map ppBracketedString bss))
untokn :: String -> BracketedTokn -> (String,[BracketedString])
untokn nw (LeafKS ts) = (head ts,map Leaf ts)
untokn nw (LeafKP d vs) = let ts = sel d vs nw
in (head ts,map Leaf ts)
where
sel d vs nw =
case [v | Alt v cs <- vs, any (\c -> isPrefixOf c nw) cs] of
v:_ -> v
_ -> d
untokn nw (Bracket_ fid index cat bss) =
let (nw',bss') = mapAccumR untokn nw bss
in (nw',[Bracket fid index cat (concat bss')])
flattenBracketedString :: BracketedString -> [String]
flattenBracketedString (Leaf w) = [w]
flattenBracketedString (Bracket _ _ _ bss) = concatMap flattenBracketedString bss

View File

@@ -6,7 +6,7 @@ module PGF.Parse
, nextState
, getCompletions
, recoveryStates
, extractTrees
, ParseResult(..), getParseResult
, parse
, parseWithRecovery
) where
@@ -14,7 +14,7 @@ module PGF.Parse
import Data.Array.IArray
import Data.Array.Base (unsafeAt)
import Data.List (isPrefixOf, foldl')
import Data.Maybe (fromMaybe, maybe)
import Data.Maybe (fromMaybe, maybe, maybeToList)
import qualified Data.Map as Map
import qualified GF.Data.TrieMap as TMap
import qualified Data.IntMap as IntMap
@@ -27,26 +27,35 @@ import PGF.Data
import PGF.Expr(Tree)
import PGF.Macros
import PGF.TypeCheck
import Debug.Trace
import PGF.Forest(Forest(Forest), linearizeWithBrackets)
parse :: PGF -> Language -> Type -> [String] -> [Tree]
parse pgf lang typ toks = loop (initState pgf lang typ) toks
-- | This data type encodes the different outcomes which you could get from the parser.
data ParseResult
= ParseFailed Int -- ^ The integer is the position in number of tokens where the parser failed.
| TypeError FId [TcError] -- ^ The parsing was successful but none of the trees is type correct.
-- The forest id ('FId') points to the bracketed string from the parser
-- where the type checking failed. More than one error is returned
-- if there are many analizes for some phrase but they all are not type correct.
| ParseResult [Tree] -- ^ If the parsing was successful we get a list of abstract syntax trees. The list should be non-empty.
parse :: PGF -> Language -> Type -> [String] -> (ParseResult,Maybe BracketedString)
parse pgf lang typ toks = loop 0 (initState pgf lang typ) toks
where
loop ps [] = extractTrees ps typ
loop ps (t:ts) = case nextState ps t of
Left es -> []
Right ps -> loop ps ts
loop i ps [] = getParseResult ps typ
loop i ps (t:ts) = case nextState ps t of
Left es -> (ParseFailed i,Nothing)
Right ps -> loop (i+1) ps ts
parseWithRecovery :: PGF -> Language -> Type -> [Type] -> [String] -> [Tree]
parseWithRecovery :: PGF -> Language -> Type -> [Type] -> [String] -> (ParseResult,Maybe BracketedString)
parseWithRecovery pgf lang typ open_typs toks = accept (initState pgf lang typ) toks
where
accept ps [] = extractTrees ps typ
accept ps [] = getParseResult ps typ
accept ps (t:ts) =
case nextState ps t of
Right ps -> accept ps ts
Left es -> skip (recoveryStates open_typs es) ts
skip ps_map [] = extractTrees (fst ps_map) typ
skip ps_map [] = getParseResult (fst ps_map) typ
skip ps_map (t:ts) =
case Map.lookup t (snd ps_map) of
Just ps -> accept ps ts
@@ -145,23 +154,31 @@ recoveryStates open_types (EState pgf cnc chart) =
-- that spans the whole input consumed so far. The trees are also
-- limited by the category specified, which is usually
-- the same as the startup category.
extractTrees :: ParseState -> Type -> [Tree]
extractTrees (PState pgf cnc chart items) ty@(DTyp _ start _) =
nubsort [e1 | e <- exps, Right e1 <- [checkExpr pgf e ty]]
getParseResult :: ParseState -> Type -> (ParseResult,Maybe BracketedString)
getParseResult (PState pgf cnc chart items) ty@(DTyp _ start _) =
let mb_bs = case roots of
((root,lbl):_) -> Just $ linearizeWithBrackets $ Forest (abstract pgf) cnc (forest st) root lbl
_ -> Nothing
exps = nubsort $ do
(fid,lbl) <- roots
(fvs,e) <- go Set.empty 0 (0,fid)
guard (Set.null fvs)
Right e1 <- [checkExpr pgf e ty]
return e1
in (ParseResult exps,mb_bs)
where
(mb_agenda,acc) = TMap.decompose items
agenda = maybe [] Set.toList mb_agenda
(_,st) = process Nothing (\_ _ -> id) (sequences cnc) (cncfuns cnc) agenda () chart
exps =
case Map.lookup start (cnccats cnc) of
Just (CncCat s e lbls) -> do cat <- range (s,e)
lbl <- indices lbls
Just fid <- [lookupPC (PK cat lbl 0) (passive st)]
(fvs,tree) <- go Set.empty 0 (0,fid)
guard (Set.null fvs)
return tree
Nothing -> mzero
roots = case Map.lookup start (cnccats cnc) of
Just (CncCat s e lbls) -> do cat <- range (s,e)
lbl <- indices lbls
fid <- maybeToList (lookupPC (PK cat lbl 0) (passive st))
return (fid,lbl)
Nothing -> mzero
go rec fcat' (d,fcat)
| fcat < totalCats cnc = return (Set.empty,EMeta (fcat'*10+d)) -- FIXME: here we assume that every rule has at most 10 arguments
@@ -189,6 +206,7 @@ extractTrees (PState pgf cnc chart items) ty@(DTyp _ start _) =
freeVar (EFun v) = Set.singleton v
freeVar _ = Set.empty
process mbt fn !seqs !funs [] acc chart = (acc,chart)
process mbt fn !seqs !funs (item@(Active j ppos funid seqid args key0):items) acc chart
| inRange (bounds lin) ppos =
@@ -218,15 +236,15 @@ process mbt fn !seqs !funs (item@(Active j ppos funid seqid args key0):items) ac
Nothing -> fid
Just fid -> fid
in case [ts | PConst _ ts <- maybe [] Set.toList (IntMap.lookup fid' (forest chart))] of
in case [ts | PConst _ _ ts <- maybe [] Set.toList (IntMap.lookup fid' (forest chart))] of
(toks:_) -> let !acc' = fn toks (Active j (ppos+1) funid seqid (updateAt d fid' args) key0) acc
in process mbt fn seqs funs items acc' chart
[] -> case litCatMatch fid mbt of
Just (toks,lit)
Just (cat,lit,toks)
-> let fid' = nextId chart
!acc' = fn toks (Active j (ppos+1) funid seqid (updateAt d fid' args) key0) acc
in process mbt fn seqs funs items acc' chart{passive=insertPC (mkPK key k) fid' (passive chart)
,forest =IntMap.insert fid' (Set.singleton (PConst lit toks)) (forest chart)
,forest =IntMap.insert fid' (Set.singleton (PConst cat lit toks)) (forest chart)
,nextId =nextId chart+1
}
Nothing -> process mbt fn seqs funs items acc chart
@@ -260,12 +278,12 @@ updateAt :: Int -> a -> [a] -> [a]
updateAt nr x xs = [if i == nr then x else y | (i,y) <- zip [0..] xs]
litCatMatch fcat (Just t)
| fcat == fcatString = Just ([t],ELit (LStr t))
| fcat == fcatInt = case reads t of {[(n,"")] -> Just ([t],ELit (LInt n));
| fcat == fcatString = Just (cidString,ELit (LStr t),[t])
| fcat == fcatInt = case reads t of {[(n,"")] -> Just (cidInt,ELit (LInt n),[t]);
_ -> Nothing }
| fcat == fcatFloat = case reads t of {[(d,"")] -> Just ([t],ELit (LFlt d));
| fcat == fcatFloat = case reads t of {[(d,"")] -> Just (cidFloat,ELit (LFlt d),[t]);
_ -> Nothing }
| fcat == fcatVar = Just ([t],EFun (mkCId t))
| fcat == fcatVar = Just (cidVar,EFun (mkCId t),[t])
litCatMatch _ _ = Nothing
@@ -341,9 +359,9 @@ foldForest f g b fcat forest =
Nothing -> b
Just set -> Set.fold foldProd b set
where
foldProd (PCoerce fcat) b = foldForest f g b fcat forest
foldProd (PApply funid args) b = f funid args b
foldProd (PConst const toks) b = g const toks b
foldProd (PCoerce fcat) b = foldForest f g b fcat forest
foldProd (PApply funid args) b = f funid args b
foldProd (PConst _ const toks) b = g const toks b
----------------------------------------------------------------

View File

@@ -58,7 +58,7 @@ ppProduction (fcat,PApply funid args) =
ppFCat fcat <+> text "->" <+> ppFunId funid <> brackets (hcat (punctuate comma (map ppFCat args)))
ppProduction (fcat,PCoerce arg) =
ppFCat fcat <+> text "->" <+> char '_' <> brackets (ppFCat arg)
ppProduction (fcat,PConst _ ss) =
ppProduction (fcat,PConst _ _ ss) =
ppFCat fcat <+> text "->" <+> ppStrs ss
ppCncFun (funid,CncFun fun arr) =

View File

@@ -15,339 +15,296 @@
-- instead of rolling its own.
-----------------------------------------------------------------------------
module PGF.VisualizeTree ( graphvizAbstractTree
, graphvizParseTree
, graphvizDependencyTree
, graphvizAlignment
, tree2mk
, getDepLabels
, PosText(..), readPosText
module PGF.VisualizeTree
( graphvizAbstractTree
, graphvizParseTree
, graphvizDependencyTree
, graphvizBracketedString
, graphvizAlignment
, getDepLabels
) where
import PGF.CId (CId,showCId,pCId,mkCId)
import PGF.CId (CId,showCId,ppCId,mkCId)
import PGF.Data
import PGF.Tree
import PGF.Expr (showExpr)
import PGF.Expr (showExpr, Tree)
import PGF.Linearize
import PGF.Macros (lookValCat)
import PGF.Macros (lookValCat, BracketedString(..), flattenBracketedString)
import qualified Data.Map as Map
import Data.List (intersperse,nub,isPrefixOf,sort,sortBy)
import Data.Char (isDigit)
import qualified Text.ParserCombinators.ReadP as RP
import Text.PrettyPrint
import Debug.Trace
-- | Renders abstract syntax tree in Graphviz format
graphvizAbstractTree :: PGF -> (Bool,Bool) -> Tree -> String
graphvizAbstractTree pgf (funs,cats) = render . tree2graph
where
tree2graph t =
text "graph {" $$
ppGraph [] [] 0 t $$
text "}"
graphvizAbstractTree :: PGF -> (Bool,Bool) -> Expr -> String
graphvizAbstractTree pgf funscats = prGraph False . tree2graph pgf funscats . expr2tree
getAbs xs (EAbs _ x e) = getAbs (x:xs) e
getAbs xs (ETyped e _) = getAbs xs e
getAbs xs e = (xs,e)
getApp (EApp x y) es = getApp x (y:es)
getApp (ETyped e _) es = getApp e es
getApp e es = (e,es)
tree2graph :: PGF -> (Bool,Bool) -> Tree -> [String]
tree2graph pgf (funs,cats) = prf [] where
prf ps t = let (nod,lab) = prn ps t in
(nod ++ " [label = " ++ lab ++ ", style = \"solid\", shape = \"plaintext\"] ;") :
case t of
Fun cid trees ->
[ pra (j:ps) nod t | (j,t) <- zip [0..] trees] ++
concat [prf (j:ps) t | (j,t) <- zip [0..] trees]
Abs xs (Fun cid trees) ->
[ pra (j:ps) nod t | (j,t) <- zip [0..] trees] ++
concat [prf (j:ps) t | (j,t) <- zip [0..] trees]
_ -> []
prn ps t = case t of
Fun cid _ ->
let
fun = if funs then showCId cid else ""
cat = if cats then prCat cid else ""
colon = if funs && cats then " : " else ""
lab = "\"" ++ fun ++ colon ++ cat ++ "\""
in (show(show (ps :: [Int])),lab)
Abs bs tree ->
let fun = case tree of
Fun cid _ -> Fun cid []
_ -> tree
in (show(show (ps :: [Int])),"\"" ++ esc (prTree (Abs bs fun)) ++ "\"")
_ -> (show(show (ps :: [Int])),"\"" ++ esc (prTree t) ++ "\"")
pra i nod t = nod ++ arr ++ fst (prn i t) ++ " [style = \"solid\"];"
arr = " -- " -- if digr then " -> " else " -- "
prCat = showCId . lookValCat pgf
esc = concatMap (\c -> if c =='\\' then [c,c] else [c]) --- escape backslash in abstracts
getLbl scope (EFun f) = let fun = if funs then ppCId f else empty
cat = if cats then ppCId (lookValCat pgf f) else empty
sep = if funs && cats then colon else empty
in fun <+> sep <+> cat
getLbl scope (ELit l) = text (escapeStr (render (ppLit l)))
getLbl scope (EMeta i) = ppMeta i
getLbl scope (EVar i) = ppCId (scope !! i)
getLbl scope (ETyped e _) = getLbl scope e
getLbl scope (EImplArg e) = getLbl scope e
prGraph digr ns = concat $ map (++"\n") $ [graph ++ "{\n"] ++ ns ++ ["}"] where
graph = if digr then "digraph" else "graph"
ppGraph scope ps i e0 =
let (xs, e1) = getAbs [] e0
(e2,args) = getApp e1 []
binds = if null xs
then empty
else text "\\\\" <> hcat (punctuate comma (map ppCId xs)) <+> text "->"
(lbl,eargs) = case e2 of
EAbs _ _ _ -> (char '@', e2:args) -- eta-redexes are rendered with artificial "@" node
_ -> (getLbl scope' e2, args)
scope' = xs ++ scope
in ppNode (i:ps) <> text "[label =" <+> doubleQuotes (binds <+> lbl) <> text ", style = \"solid\", shape = \"plaintext\"] ;" $$
(if null ps
then empty
else ppNode ps <+> text "--" <+> ppNode (i:ps) <+> text "[style = \"solid\"];") $$
vcat (zipWith (ppGraph scope' (i:ps)) [0..] eargs)
ppNode ps = char 'n' <> hcat (punctuate (char '_') (map int ps))
escapeStr [] = []
escapeStr ('\\':cs) = '\\':'\\':escapeStr cs
escapeStr ('"' :cs) = '\\':'"' :escapeStr cs
escapeStr (c :cs) = c :escapeStr cs
-- replace each non-atomic constructor with mkC, where C is the val cat
tree2mk :: PGF -> Expr -> String
tree2mk pgf = showExpr [] . tree2expr . t2m . expr2tree where
t2m t = case t of
Fun cid [] -> t
Fun cid ts -> Fun (mk cid) (map t2m ts)
_ -> t
mk = mkCId . ("mk" ++) . showCId . lookValCat pgf
-- dependency trees from Linearize.linearizeMark
graphvizDependencyTree :: String -> Bool -> Maybe Labels -> Maybe String -> PGF -> CId -> Expr -> String
graphvizDependencyTree format debug mlab ms pgf lang exp = case format of
"malt" -> unlines (lin2dep format)
"malt_input" -> unlines (lin2dep format)
_ -> prGraph True (lin2dep format)
where
lin2dep format = -- trace (ifd (show sortedNodes ++ show nodeWords)) $
case format of
"malt" -> map (concat . intersperse "\t") wnodes
"malt_input" -> map (concat . intersperse "\t" . take 6) wnodes
_ -> prelude ++ nodes ++ links
ifd s = if debug then s else []
pot = readPosText $ concat $ take 1 $ markLinearizes pgf lang exp
---- use Just str if you have str to match against
prelude = ["rankdir=LR ;", "node [shape = plaintext] ;"]
nodes = map mkNode nodeWords
mkNode (i,((_,p),ss)) =
node p ++ " [label = \"" ++ show i ++ ". " ++ ifd (show p) ++ unwords ss ++ "\"] ;"
nodeWords = (0,((mkCId "",[]),["ROOT"])) : zip [1..] [((f,p),w)|
((Just f,p),w) <- wlins pot]
links = map mkLink thelinks
thelinks = [(word y, x, label tr y x) |
(_,((f,x),_)) <- tail nodeWords,
let y = dominant x]
mkLink (x,y,l) = node x ++ " -> " ++ node y ++ " [label = \"" ++ l ++ "\"] ;"
node = show . show
dominant x = case x of
[] -> x
_ | not (x == hx) -> hx
_ -> dominant (init x)
where
hx = headArg (init x) tr x
headArg x0 tr x = case (tr,x) of
(Fun f [],[_]) -> x0 ---- ??
(Fun f ts,[_]) -> x0 ++ [getHead (length ts - 1) f]
(Fun f ts,i:y) -> headArg x0 (ts !! i) y
_ -> x0 ----
label tr y x = case span (uncurry (==)) (zip y x) of
(xys,(_,i):_) -> getLabel i (funAt tr (map fst xys))
_ -> "" ----
funAt tr x = case (tr,x) of
(Fun f _ ,[]) -> f
(Fun f ts,i:y) -> funAt (ts !! i) y
_ -> mkCId (prTree tr) ----
word x = if elem x sortedNodes then x else
let x' = headArg x tr (x ++[0]) in
if x' == x then [] else word x'
tr = expr2tree exp
sortedNodes = [p | (_,((_,p),_)) <- nodeWords]
labels = maybe Map.empty id mlab
getHead i f = case Map.lookup f labels of
Just ls -> length $ takeWhile (/= "head") ls
_ -> i
getLabel i f = case Map.lookup f labels of
Just ls | length ls > i -> ifd (showCId f ++ "#" ++ show i ++ "=") ++ ls !! i
_ -> showCId f ++ "#" ++ show i
-- to generate CoNLL format for MaltParser
nodeMap :: Map.Map [Int] Int
nodeMap = Map.fromList [(p,i) | (i,((_,p),_)) <- nodeWords]
arcMap :: Map.Map [Int] ([Int],String)
arcMap = Map.fromList [(y,(x,l)) | (x,y,l) <- thelinks]
lookDomLab p = case Map.lookup p arcMap of
Just (q,l) -> (maybe 0 id (Map.lookup q nodeMap), if null l then rootlabel else l)
_ -> (0,rootlabel)
wnodes = [[show i, maltws ws, showCId fun, pos, pos, morph, show dom, lab, unspec, unspec] |
(i, ((fun,p),ws)) <- tail nodeWords,
let pos = showCId $ lookValCat pgf fun,
let morph = unspec,
let (dom,lab) = lookDomLab p
]
maltws = concat . intersperse "+" . words . unwords -- no spaces in column 2
unspec = "_"
rootlabel = "ROOT"
type Labels = Map.Map CId [String]
graphvizDependencyTree :: String -> Bool -> Maybe Labels -> Maybe String -> PGF -> CId -> Tree -> String
graphvizDependencyTree format debug mlab ms pgf lang t = render $
case format of
"malt" -> vcat (map (hcat . intersperse (char '\t') ) wnodes)
"malt_input" -> vcat (map (hcat . intersperse (char '\t') . take 6) wnodes)
_ -> text "digraph {" $$
space $$
nest 2 (text "rankdir=LR ;" $$
text "node [shape = plaintext] ;" $$
vcat nodes $$
links) $$
text "}"
where
nodes = map mkNode leaves
links = empty
wnodes = undefined
nil = -1
bs = bracketedLinearize pgf lang t
leaves = (nil,0,"ROOT") : (groupAndIndexIt 1 . getLeaves nil) bs
groupAndIndexIt id [] = []
groupAndIndexIt id ((p,w):pws) = let (ws,pws1) = collect pws
in (p,id,unwords (w:ws)) : groupAndIndexIt (id+1) pws1
where
collect pws@((p1,w):pws1)
| p == p1 = let (ws,pws2) = collect pws1
in (w:ws,pws2)
collect pws = ([],pws)
getLeaves parent bs =
case bs of
Leaf w -> [(parent,w)]
Bracket fid _ _ bss -> concatMap (getLeaves fid) bss
mkNode (p,i,w) =
tag p <> text " [label = " <> doubleQuotes (int i <> char '.' <+> text w) <> text "] ;"
{-
ifd s = if debug then s else []
pot = readPosText $ concat $ take 1 $ markLinearizes pgf lang exp
nodes = map mkNode nodeWords
mkNode (i,((_,p),ss)) =
node p ++ " [label = \"" ++ show i ++ ". " ++ ifd (show p) ++ unwords ss ++ "\"] ;"
nodeWords = (0,((mkCId "",[]),["ROOT"])) : zip [1..] [((f,p),w)|
((Just f,p),w) <- wlins pot]
links = map mkLink thelinks
thelinks = [(word y, x, label tr y x) |
(_,((f,x),_)) <- tail nodeWords,
let y = dominant x]
mkLink (x,y,l) = node x ++ " -> " ++ node y ++ " [label = \"" ++ l ++ "\"] ;"
node = show . show
dominant x = case x of
[] -> x
_ | not (x == hx) -> hx
_ -> dominant (init x)
where
hx = headArg (init x) tr x
headArg x0 tr x = case (tr,x) of
(Fun f [],[_]) -> x0 ---- ??
(Fun f ts,[_]) -> x0 ++ [getHead (length ts - 1) f]
(Fun f ts,i:y) -> headArg x0 (ts !! i) y
_ -> x0 ----
label tr y x = case span (uncurry (==)) (zip y x) of
(xys,(_,i):_) -> getLabel i (funAt tr (map fst xys))
_ -> "" ----
funAt tr x = case (tr,x) of
(Fun f _ ,[]) -> f
(Fun f ts,i:y) -> funAt (ts !! i) y
_ -> mkCId (prTree tr) ----
word x = if elem x sortedNodes then x else
let x' = headArg x tr (x ++[0]) in
if x' == x then [] else word x'
tr = expr2tree exp
sortedNodes = [p | (_,((_,p),_)) <- nodeWords]
labels = maybe Map.empty id mlab
getHead i f = case Map.lookup f labels of
Just ls -> length $ takeWhile (/= "head") ls
_ -> i
getLabel i f = case Map.lookup f labels of
Just ls | length ls > i -> ifd (showCId f ++ "#" ++ show i ++ "=") ++ ls !! i
_ -> showCId f ++ "#" ++ show i
-- to generate CoNLL format for MaltParser
nodeMap :: Map.Map [Int] Int
nodeMap = Map.fromList [(p,i) | (i,((_,p),_)) <- nodeWords]
arcMap :: Map.Map [Int] ([Int],String)
arcMap = Map.fromList [(y,(x,l)) | (x,y,l) <- thelinks]
lookDomLab p = case Map.lookup p arcMap of
Just (q,l) -> (maybe 0 id (Map.lookup q nodeMap), if null l then rootlabel else l)
_ -> (0,rootlabel)
wnodes = [[show i, maltws ws, showCId fun, pos, pos, morph, show dom, lab, unspec, unspec] |
(i, ((fun,p),ws)) <- tail nodeWords,
let pos = showCId $ lookValCat pgf fun,
let morph = unspec,
let (dom,lab) = lookDomLab p
]
maltws = concat . intersperse "+" . words . unwords -- no spaces in column 2
unspec = "_"
rootlabel = "ROOT"
-}
getDepLabels :: [String] -> Labels
getDepLabels ss = Map.fromList [(mkCId f,ls) | f:ls <- map words ss]
-- parse trees from Linearize.linearizeMark
---- nubrec and domins are quadratic, but could be (n log n)
graphvizParseTree :: PGF -> Language -> Tree -> String
graphvizParseTree pgf lang = graphvizBracketedString . bracketedLinearize pgf lang
graphvizParseTree :: PGF -> CId -> Expr -> String
graphvizParseTree pgf lang = prGraph False . lin2tree pgf . concat . take 1 . markLinearizes pgf lang where
graphvizBracketedString :: BracketedString -> String
graphvizBracketedString = render . lin2tree
where
lin2tree bs =
text "graph {" $$
space $$
nest 2 (text "rankdir=BU ;" $$
text "node [shape = record, color = white] ;" $$
space $$
vcat (nodes bs)) $$
text "}"
where
nodes bs = zipWith mkStruct [0..] (interns ++ [zipWith (\i (l,p,w) -> (l,p,i,w)) [99990..] leaves])
lin2tree pgf s = prelude ++ nodes ++ links where
nil = -1
leaves = getLeaves 0 nil bs
interns = getInterns 0 [(nil,bs)]
prelude = ["rankdir=BU ;", "node [shape = record, color = white] ;"]
getLeaves level parent bs =
case bs of
Leaf w -> [(level-1,parent,w)]
Bracket fid i _ bss -> concatMap (getLeaves (level+1) fid) bss
nodeRecs = zip [0..]
(nub (filter (not . null) (nlins [postext] ++ [leaves postext])))
nlins pts =
nubrec [] $ [(p,cat f) | T (Just f, p) _ <- pts] :
concatMap nlins [ts | T _ ts <- pts]
leaves pt = [(p++[j],s) | (j,(p,s)) <-
zip [9990..] [(p,s) | ((_,p),ss) <- wlins pt, s <- ss]]
getInterns level [] = []
getInterns level nodes =
nub [(level-1,parent,fid,showCId cat) | (parent,Bracket fid _ cat _) <- nodes] :
getInterns (level+1) [(fid,child) | (_,Bracket fid _ _ children) <- nodes, child <- children]
nubrec es rs = case rs of
r:rr -> let r' = filter (not . flip elem es) (nub r)
in r' : nubrec (r' ++ es) rr
_ -> rs
mkStruct l cs = struct l <> text "[label = \"" <> fields cs <> text "\"] ;" $$
vcat [link pl pid l id | (pl,pid,id,_) <- cs]
link pl pid l id
| pl < 0 = empty
| otherwise = struct pl <> colon <> tag pid <> colon <> char 's' <+>
text "--" <+>
struct l <> colon <> tag id <> colon <> char 'n' <+> semi
fields cs = hsep (intersperse (char '|') [tbrackets (tag id) <> text c | (_,_,id,c) <- cs])
nodes = map mkStruct nodeRecs
mkStruct (i,cs) = struct i ++ "[label = \"" ++ fields cs ++ "\"] ;"
cat = showCId . lookValCat pgf
fields cs = concat (intersperse "|" [ mtag (showp p) ++ c | (p,c) <- cs])
struct i = "struct" ++ show i
links = map mkEdge domins
domins = nub [((i,x),(j,y)) |
(i,xs) <- nodeRecs, (j,ys) <- nodeRecs,
x <- xs, y <- ys, dominates x y]
dominates (p,x) (q,y) = not (null q) && p == init q
mkEdge ((i,x),(j,y)) =
struct i ++ ":n" ++ uncommas (showp (fst x)) ++ ":s -- " ++
struct j ++ ":n" ++ uncommas (showp (fst y)) ++ ":n ;"
postext = readPosText s
-- auxiliaries for graphviz syntax
struct i = "struct" ++ show i
mark (j,n) = "n" ++ show j ++ "a" ++ uncommas n
uncommas = map (\c -> if c==',' then 'c' else c)
tag s = "<" ++ s ++ ">"
showp = init . tail . show
mtag = tag . ('n':) . uncommas
struct l = text ("struct" ++ show l)
tbrackets d = char '<' <> d <> char '>'
tag i = char 'n' <> int i
-- word alignments from Linearize.markLinearize
-- words are chunks like {[0,1,1,0] old}
graphvizAlignment :: PGF -> Expr -> String
graphvizAlignment pgf = prGraph True . lin2graph . linsMark where
linsMark t = [concat (take 1 (markLinearizes pgf la t)) | la <- Map.keys (concretes pgf)]
graphvizAlignment :: PGF -> [Language] -> Expr -> String
graphvizAlignment pgf langs = render . lin2graph . linsBracketed
where
linsBracketed t = [bracketedLinearize pgf lang t | lang <- langs]
lin2graph :: [String] -> [String]
lin2graph ss = -- trace (show ss) $
prelude ++ nodes ++ links
lin2graph :: [BracketedString] -> Doc
lin2graph bss =
text "digraph {" $$
space $$
nest 2 (text "rankdir=LR ;" $$
text "node [shape = record] ;" $$
space $$
mkLayers 0 leaves) $$
text "}"
where
nil = -1
where
leaves = map (groupAndIndexIt 0 . getLeaves nil) bss
prelude = ["rankdir=LR ;", "node [shape = record] ;"]
groupAndIndexIt id [] = []
groupAndIndexIt id ((p,w):pws) = let (ws,pws1) = collect pws
in (p,id,unwords (w:ws)) : groupAndIndexIt (id+1) pws1
where
collect pws@((p1,w):pws1)
| p == p1 = let (ws,pws2) = collect pws1
in (w:ws,pws2)
collect pws = ([],pws)
nlins :: [(Int,[((Int,String),String)])]
nlins = [(i, [((j,showp p),unw ws) | (j,((_,p),ws)) <- zip [0..] ws]) |
(i,ws) <- zip [0..] (map (wlins . readPosText) ss)]
getLeaves parent bs =
case bs of
Leaf w -> [(parent,w)]
Bracket fid _ _ bss -> concatMap (getLeaves fid) bss
unw = concat . intersperse "\\ " -- space escape in graphviz
mkLayers l [] = empty
mkLayers l (cs:css) = struct l <> text "[label = \"" <> fields cs <> text "\"] ;" $$
(case css of
(ncs:_) -> vcat (map (mkLinks l ncs) cs)
_ -> empty) $$
mkLayers (l+1) css
nodes = map mkStruct nlins
mkStruct (i, ws) = struct i ++ "[label = \"" ++ fields ws ++ "\"] ;"
fields ws = concat (intersperse "|" [tag (mark m) ++ " " ++ w | (m,w) <- ws])
links = nub $ concatMap mkEdge (init nlins)
mkEdge (i,lin) = let lin' = snd (nlins !! (i+1)) in -- next lin in the list
[edge i v w | (v@(_,p),_) <- lin, (w@(_,q),_) <- lin', p == q]
edge i v w =
struct i ++ ":" ++ mark v ++ ":e -> " ++ struct (i+1) ++ ":" ++ mark w ++ ":w ;"
{-
alignmentData :: PGF -> [Expr] -> Map.Map String (Map.Map String Double)
alignmentData pgf = mkStat . concatMap (mkAlign . linsMark) where
linsMark t =
[s | la <- take 2 (cncnames pgf), s <- take 1 (linearizesMark pgf la t)]
mkStat :: [(String,String)] -> Map.Map String (Map.Map String Double)
mkStat =
mkAlign :: [String] -> [(String,String)]
mkAlign ss =
nlins :: [(Int,[((Int,String),String)])]
nlins = [(i, [((j,showp p),unw ws) | (j,((_,p),ws)) <- zip [0..] vs]) |
(i,vs) <- zip [0..] (map (wlins . readPosText) ss)]
nodes = map mkStruct nlins
mkStruct (i, ws) = struct i ++ "[label = \"" ++ fields ws ++ "\"] ;"
fields ws = concat (intersperse "|" [tag (mark m) ++ " " ++ w | (m,w) <- ws])
links = nub $ concatMap mkEdge (init nlins)
mkEdge (i,lin) = let lin' = snd (nlins !! (i+1)) in -- next lin in the list
[edge i v w | (v@(_,p),_) <- lin, (w@(_,q),_) <- lin', p == q]
edge i v w =
struct i ++ ":" ++ mark v ++ ":e -> " ++ struct (i+1) ++ ":" ++ mark w ++ ":w ;"
-}
wlins :: PosText -> [((Maybe CId,[Int]),[String])]
wlins pt = case pt of
T p pts -> concatMap (lins p) pts
M ws -> if null ws then [] else [((Nothing,[]),ws)]
where
lins p pt = case pt of
T q pts -> concatMap (lins q) pts
M ws -> if null ws then [] else [(p,ws)]
data PosText =
T (Maybe CId,[Int]) [PosText]
| M [String]
deriving Show
readPosText :: String -> PosText
readPosText = fst . head . (RP.readP_to_S pPosText) where
pPosText = do
RP.char '(' >> RP.skipSpaces
p <- pPos
RP.skipSpaces
ts <- RP.many pPosText
RP.char ')' >> RP.skipSpaces
return (T p ts)
RP.<++ do
ws <- RP.sepBy1 (RP.munch1 (flip notElem "()")) (RP.char ' ')
return (M ws)
pPos = do
fun <- (RP.char '(' >> pCId >>= \f -> RP.char ',' >> (return $ Just f))
RP.<++ (return Nothing)
RP.char '[' >> RP.skipSpaces
is <- RP.sepBy (RP.munch1 isDigit) (RP.char ',')
RP.char ']' >> RP.skipSpaces
RP.char ')' RP.<++ return ' '
return (fun,map read is)
{-
digraph{
rankdir ="LR" ;
node [shape = record] ;
struct1 [label = "<f0> this|<f1> very|<f2> intelligent|<f3> man"] ;
struct2 [label = "<f0> cet|<f1> homme|<f2> tres|<f3> intelligent|<f4> ci"] ;
struct1:f0 -> struct2:f0 ;
struct1:f1 -> struct2:f2 ;
struct1:f2 -> struct2:f3 ;
struct1:f3 -> struct2:f1 ;
struct1:f0 -> struct2:f4 ;
}
-}
mkLinks l cs (p0,id0,_) =
vcat (map (\id1 -> struct l <> colon <> tag id0 <> colon <> char 'e' <+>
text "->" <+>
struct (l+1) <> colon <> tag id1 <> colon <> char 'w' <+> semi) indices)
where
indices = [id1 | (p1,id1,_) <- cs, p1 == p0]
fields cs = hsep (intersperse (char '|') [tbrackets (tag id) <> text w | (_,id,w) <- cs])