mirror of
https://github.com/GrammaticalFramework/gf-core.git
synced 2026-04-09 04:59:31 -06:00
first incarnation of the bracketed string API
This commit is contained in:
1
gf.cabal
1
gf.cabal
@@ -42,6 +42,7 @@ library
|
||||
PGF.VisualizeTree
|
||||
PGF.Printer
|
||||
PGF.Probabilistic
|
||||
PGF.Forest
|
||||
GF.Data.TrieMap
|
||||
GF.Data.Utilities
|
||||
GF.Data.SortedList
|
||||
|
||||
@@ -22,6 +22,7 @@ import PGF.Morphology
|
||||
import PGF.Printer
|
||||
import PGF.Probabilistic -- (getProbsFromFile,prProbabilities,defaultProbabilities)
|
||||
import PGF.Generate (generateRandomFrom) ----
|
||||
import PGF.Tree (Tree(Fun), expr2tree, tree2expr)
|
||||
import GF.Compile.Export
|
||||
import GF.Compile.ExampleBased
|
||||
import GF.Infra.Option (noOptions, readOutputFormat)
|
||||
@@ -150,7 +151,7 @@ allCommands env@(pgf, mos) = Map.fromList [
|
||||
"flag -format."
|
||||
],
|
||||
exec = \opts es -> do
|
||||
let grph = if null es then [] else graphvizAlignment pgf (head es)
|
||||
let grph = if null es then [] else graphvizAlignment pgf (languages pgf) (head es)
|
||||
if isFlag "view" opts || isFlag "format" opts then do
|
||||
let file s = "_grph." ++ s
|
||||
let view = optViewGraph opts
|
||||
@@ -481,11 +482,14 @@ allCommands env@(pgf, mos) = Map.fromList [
|
||||
"will accept unknown adjectives, nouns and verbs with the resource grammar."
|
||||
],
|
||||
exec = \opts ts ->
|
||||
returnFromExprsPar opts ts $ concatMap (par opts) $ toStrings ts,
|
||||
return $ fromParse opts ts $ concatMap (par opts) $ toStrings ts,
|
||||
flags = [
|
||||
("cat","target category of parsing"),
|
||||
("lang","the languages of parsing (comma-separated, no spaces)"),
|
||||
("openclass","list of open-class categories for robust parsing")
|
||||
],
|
||||
options = [
|
||||
("bracket","prints the bracketed string from the parser")
|
||||
]
|
||||
}),
|
||||
("pg", emptyCommandInfo { -----
|
||||
@@ -893,8 +897,8 @@ allCommands env@(pgf, mos) = Map.fromList [
|
||||
]
|
||||
where
|
||||
par opts s = case optOpenTypes opts of
|
||||
[] -> concat [parse pgf lang (optType opts) s | lang <- optLangs opts]
|
||||
open_typs -> concat [parseWithRecovery pgf lang (optType opts) open_typs s | lang <- optLangs opts]
|
||||
[] -> [parse pgf lang (optType opts) s | lang <- optLangs opts]
|
||||
open_typs -> [parseWithRecovery pgf lang (optType opts) open_typs s | lang <- optLangs opts]
|
||||
|
||||
void = ([],[])
|
||||
|
||||
@@ -918,9 +922,17 @@ allCommands env@(pgf, mos) = Map.fromList [
|
||||
map (map (unl . snd)) . tabularLinearizes pgf lang
|
||||
_ | isOpt "table" opts -> unlines . concat . intersperse [[]] .
|
||||
map (map (\(p,v) -> p+++":"+++unl v)) . tabularLinearizes pgf lang
|
||||
_ | isOpt "bracket" opts -> unlines . markLinearizes pgf lang
|
||||
_ | isOpt "bracket" opts -> showBracketedString . bracketedLinearize pgf lang
|
||||
_ -> unl . linearize pgf lang
|
||||
|
||||
-- replace each non-atomic constructor with mkC, where C is the val cat
|
||||
tree2mk pgf = showExpr [] . tree2expr . t2m . expr2tree where
|
||||
t2m t = case t of
|
||||
Fun cid [] -> t
|
||||
Fun cid ts -> Fun (mk cid) (map t2m ts)
|
||||
_ -> t
|
||||
mk = mkCId . ("mk" ++) . showCId . lookValCat pgf
|
||||
|
||||
unlex opts lang = stringOps Nothing (getUnlex opts lang ++ map prOpt opts) ----
|
||||
|
||||
getUnlex opts lang = case words (valStrOpts "unlexer" "" opts) of
|
||||
@@ -991,14 +1003,22 @@ allCommands env@(pgf, mos) = Map.fromList [
|
||||
toStrings = map showAsString
|
||||
toString = unwords . toStrings
|
||||
|
||||
fromParse opts ts parses
|
||||
| isOpt "bracket" opts = case catMaybes bss of
|
||||
[] -> ([], "no brackets found")
|
||||
bss -> ([], unlines $ map showBracketedString bss)
|
||||
| otherwise = case ts of
|
||||
[] -> ([], "no trees found" ++
|
||||
missingWordMsg (optMorpho opts) (concatMap words (toStrings ts))
|
||||
)
|
||||
_ -> fromExprs ts
|
||||
where
|
||||
(prs,bss) = unzip parses
|
||||
ts = [t | ParseResult ts <- prs, t <- ts]
|
||||
|
||||
returnFromExprs es = return $ case es of
|
||||
[] -> ([], "no trees found")
|
||||
_ -> fromExprs es
|
||||
returnFromExprsPar opts ts es = return $ case es of
|
||||
[] -> ([], "no trees found" ++
|
||||
missingWordMsg (optMorpho opts) (concatMap words (toStrings ts))
|
||||
)
|
||||
_ -> fromExprs es
|
||||
|
||||
prGrammar opts
|
||||
| isOpt "cats" opts = return $ fromString $ unwords $ map showCId $ categories pgf
|
||||
|
||||
@@ -41,17 +41,20 @@ convertFile conf src file = do
|
||||
convEx (cat,ex) = do
|
||||
appn "("
|
||||
let typ = maybe (error "no valid cat") id $ readType cat
|
||||
let ts = rank $ parse pgf lang typ ex
|
||||
ws <- case ts of
|
||||
[] -> do
|
||||
ws <- case fst (parse pgf lang typ ex) of
|
||||
ParseFailed _ -> do
|
||||
let ws = morphoMissing morpho (words ex)
|
||||
appv ("WARNING: cannot parse example " ++ ex)
|
||||
case ws of
|
||||
[] -> return ()
|
||||
_ -> appv (" missing words: " ++ unwords ws)
|
||||
return ws
|
||||
t:tt -> appv ("WARNING: ambiguous example " ++ ex) >>
|
||||
appn t >> mapM_ (appn . (" --- " ++)) tt >> return []
|
||||
return ws
|
||||
TypeError _ _ ->
|
||||
return []
|
||||
ParseResult ts ->
|
||||
case rank ts of
|
||||
(t:tt) -> appv ("WARNING: ambiguous example " ++ ex) >>
|
||||
appn t >> mapM_ (appn . (" --- " ++)) tt >> return []
|
||||
appn ")"
|
||||
return ws
|
||||
rank ts = case probs conf of
|
||||
|
||||
@@ -46,7 +46,10 @@ translationList mex mprobs pgf ig og typ number = do
|
||||
return $ map mkOne $ ts
|
||||
where
|
||||
mkOne t = (norml (linearize pgf ig t), map (norml . linearize pgf og) (homonyms t))
|
||||
homonyms = nub . parse pgf ig typ . linearize pgf ig
|
||||
homonyms t =
|
||||
case (fst . parse pgf ig typ . linearize pgf ig) t of
|
||||
ParseResult ts -> ts
|
||||
_ -> []
|
||||
|
||||
morphologyList ::
|
||||
Maybe Expr -> Maybe Probabilities ->
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
-------------------------------------------------
|
||||
-- |
|
||||
-- Module : PGF
|
||||
-- Maintainer : Aarne Ranta
|
||||
-- Maintainer : Krasimir Angelov
|
||||
-- Stability : stable
|
||||
-- Portability : portable
|
||||
--
|
||||
@@ -50,9 +50,12 @@ module PGF(
|
||||
|
||||
-- * Operations
|
||||
-- ** Linearization
|
||||
linearize, linearizeAllLang, linearizeAll,
|
||||
linearize, linearizeAllLang, linearizeAll, bracketedLinearize, tabularLinearizes,
|
||||
groupResults, -- lins of trees by language, removing duplicates
|
||||
showPrintName,
|
||||
|
||||
BracketedString(..), FId, LIndex,
|
||||
Forest.showBracketedString,
|
||||
|
||||
-- ** Parsing
|
||||
parse, parseWithRecovery, parseAllLang, parseAll,
|
||||
@@ -73,10 +76,11 @@ module PGF(
|
||||
checkType, checkExpr, inferExpr,
|
||||
TcError(..), ppTcError,
|
||||
|
||||
-- ** Word Completion (Incremental Parsing)
|
||||
-- ** Low level parsing API
|
||||
complete,
|
||||
Parse.ParseState,
|
||||
Parse.initState, Parse.nextState, Parse.getCompletions, Parse.recoveryStates, Parse.extractTrees,
|
||||
Parse.initState, Parse.nextState, Parse.getCompletions, Parse.recoveryStates,
|
||||
Parse.ParseResult(..), Parse.getParseResult,
|
||||
|
||||
-- ** Generation
|
||||
generateRandom, generateAll, generateAllDepth,
|
||||
@@ -90,6 +94,7 @@ module PGF(
|
||||
graphvizAbstractTree,
|
||||
graphvizParseTree,
|
||||
graphvizDependencyTree,
|
||||
graphvizBracketedString,
|
||||
graphvizAlignment,
|
||||
|
||||
-- * Browsing
|
||||
@@ -107,6 +112,7 @@ import PGF.Expr (Tree)
|
||||
import PGF.Morphology
|
||||
import PGF.Data
|
||||
import PGF.Binary
|
||||
import qualified PGF.Forest as Forest
|
||||
import qualified PGF.Parse as Parse
|
||||
|
||||
import GF.Data.Utilities (replace)
|
||||
@@ -131,34 +137,18 @@ import Text.PrettyPrint
|
||||
-- > $ gf -make <grammar file name>
|
||||
readPGF :: FilePath -> IO PGF
|
||||
|
||||
-- | Linearizes given expression as string in the language
|
||||
linearize :: PGF -> Language -> Tree -> String
|
||||
|
||||
-- | Tries to parse the given string in the specified language
|
||||
-- and to produce abstract syntax expression. An empty
|
||||
-- list is returned if the parsing is not successful. The list may also
|
||||
-- contain more than one element if the grammar is ambiguous.
|
||||
-- Throws an exception if the given language cannot be used
|
||||
-- for parsing, see 'canParse'.
|
||||
parse :: PGF -> Language -> Type -> String -> [Tree]
|
||||
-- and to produce abstract syntax expression.
|
||||
parse :: PGF -> Language -> Type -> String -> (Parse.ParseResult,Maybe BracketedString)
|
||||
|
||||
parseWithRecovery :: PGF -> Language -> Type -> [Type] -> String -> [Tree]
|
||||
|
||||
-- | The same as 'linearizeAllLang' but does not return
|
||||
-- the language.
|
||||
linearizeAll :: PGF -> Tree -> [String]
|
||||
|
||||
-- | Linearizes given expression as string in all languages
|
||||
-- available in the grammar.
|
||||
linearizeAllLang :: PGF -> Tree -> [(Language,String)]
|
||||
-- | This is an experimental function. Use it on your own risk
|
||||
parseWithRecovery :: PGF -> Language -> Type -> [Type] -> String -> (Parse.ParseResult,Maybe BracketedString)
|
||||
|
||||
-- | The same as 'parseAllLang' but does not return
|
||||
-- the language.
|
||||
parseAll :: PGF -> Type -> String -> [[Tree]]
|
||||
|
||||
-- | Tries to parse the given string with all available languages.
|
||||
-- Languages which cannot be used for parsing (see 'canParse')
|
||||
-- are ignored.
|
||||
-- The returned list contains pairs of language
|
||||
-- and list of abstract syntax expressions
|
||||
-- (this is a list, since grammars can be ambiguous).
|
||||
@@ -227,8 +217,6 @@ complete :: PGF -> Language -> Type -> String
|
||||
|
||||
readPGF f = decodeFile f
|
||||
|
||||
linearize pgf lang = concat . take 1 . PGF.Linearize.linearizes pgf lang
|
||||
|
||||
parse pgf lang typ s =
|
||||
case Map.lookup lang (concretes pgf) of
|
||||
Just cnc -> Parse.parse pgf lang typ (words s)
|
||||
@@ -236,10 +224,6 @@ parse pgf lang typ s =
|
||||
|
||||
parseWithRecovery pgf lang typ open_typs s = Parse.parseWithRecovery pgf lang typ open_typs (words s)
|
||||
|
||||
linearizeAll mgr = map snd . linearizeAllLang mgr
|
||||
linearizeAllLang mgr t =
|
||||
[(lang,PGF.linearize mgr lang t) | lang <- languages mgr]
|
||||
|
||||
groupResults :: [[(Language,String)]] -> [(Language,[String])]
|
||||
groupResults = Map.toList . foldr more Map.empty . start . concat
|
||||
where
|
||||
@@ -250,7 +234,7 @@ groupResults = Map.toList . foldr more Map.empty . start . concat
|
||||
parseAll mgr typ = map snd . parseAllLang mgr typ
|
||||
|
||||
parseAllLang mgr typ s =
|
||||
[(lang,ts) | lang <- languages mgr, let ts = parse mgr lang typ s, not (null ts)]
|
||||
[(lang,ts) | lang <- languages mgr, (Parse.ParseResult ts,_) <- [parse mgr lang typ s], not (null ts)]
|
||||
|
||||
generateRandom pgf cat = do
|
||||
gen <- newStdGen
|
||||
@@ -280,14 +264,18 @@ functionType pgf fun =
|
||||
Nothing -> Nothing
|
||||
|
||||
complete pgf from typ input =
|
||||
let (ws,prefix) = tokensAndPrefix input
|
||||
state0 = Parse.initState pgf from typ
|
||||
in case loop state0 ws of
|
||||
Nothing -> []
|
||||
Just state ->
|
||||
(if null prefix && not (null (Parse.extractTrees state typ)) then [unwords ws ++ " "] else [])
|
||||
++ [unwords (ws++[c]) ++ " " | c <- Map.keys (Parse.getCompletions state prefix)]
|
||||
let (ws,prefix) = tokensAndPrefix input
|
||||
state0 = Parse.initState pgf from typ
|
||||
in case loop state0 ws of
|
||||
Nothing -> []
|
||||
Just state -> (if null prefix && isSuccessful state then [unwords ws ++ " "] else [])
|
||||
++ [unwords (ws++[c]) ++ " " | c <- Map.keys (Parse.getCompletions state prefix)]
|
||||
where
|
||||
isSuccessful state =
|
||||
case Parse.getParseResult state typ of
|
||||
(Parse.ParseResult ts, _) -> not (null ts)
|
||||
_ -> False
|
||||
|
||||
tokensAndPrefix :: String -> ([String],String)
|
||||
tokensAndPrefix s | not (null s) && isSpace (last s) = (ws, "")
|
||||
| null ws = ([],"")
|
||||
|
||||
@@ -56,7 +56,7 @@ data Symbol
|
||||
data Production
|
||||
= PApply {-# UNPACK #-} !FunId [FId]
|
||||
| PCoerce {-# UNPACK #-} !FId
|
||||
| PConst Expr [String]
|
||||
| PConst CId Expr [String]
|
||||
deriving (Eq,Ord,Show)
|
||||
data CncCat = CncCat {-# UNPACK #-} !FId {-# UNPACK #-} !FId {-# UNPACK #-} !(Array LIndex String)
|
||||
data CncFun = CncFun CId {-# UNPACK #-} !(UArray LIndex SeqId) deriving (Eq,Ord,Show)
|
||||
@@ -86,7 +86,6 @@ data Tokn =
|
||||
| KP [String] [Alternative]
|
||||
deriving (Eq,Ord,Show)
|
||||
|
||||
|
||||
-- merge two PGFs; fails is differens absnames; priority to second arg
|
||||
|
||||
unionPGF :: PGF -> PGF -> PGF
|
||||
|
||||
101
src/runtime/haskell/PGF/Forest.hs
Normal file
101
src/runtime/haskell/PGF/Forest.hs
Normal file
@@ -0,0 +1,101 @@
|
||||
-------------------------------------------------
|
||||
-- |
|
||||
-- Module : PGF
|
||||
-- Maintainer : Krasimir Angelov
|
||||
-- Stability : stable
|
||||
-- Portability : portable
|
||||
--
|
||||
-- Forest is a compact representation of a set
|
||||
-- of parse trees. This let us to efficiently
|
||||
-- represent local ambiguities
|
||||
--
|
||||
-------------------------------------------------
|
||||
|
||||
module PGF.Forest( Forest(..)
|
||||
, BracketedString, showBracketedString
|
||||
, linearizeWithBrackets
|
||||
) where
|
||||
|
||||
import PGF.CId
|
||||
import PGF.Data
|
||||
import PGF.Macros
|
||||
import Data.List
|
||||
import Data.Array.IArray
|
||||
import qualified Data.Set as Set
|
||||
import qualified Data.Map as Map
|
||||
import qualified Data.IntSet as IntSet
|
||||
import qualified Data.IntMap as IntMap
|
||||
import Control.Monad
|
||||
|
||||
data Forest
|
||||
= Forest
|
||||
{ abstr :: Abstr
|
||||
, concr :: Concr
|
||||
, forest :: IntMap.IntMap (Set.Set Production)
|
||||
, root :: {-# UNPACK #-} !FId
|
||||
, label :: {-# UNPACK #-} !LIndex
|
||||
}
|
||||
|
||||
--------------------------------------------------------------------
|
||||
-- Rendering of bracketed strings
|
||||
--------------------------------------------------------------------
|
||||
|
||||
linearizeWithBrackets :: Forest -> BracketedString
|
||||
linearizeWithBrackets = head . snd . untokn "" . bracketedTokn
|
||||
|
||||
---------------------------------------------------------------
|
||||
-- Internally we have to do everything with Tokn first because
|
||||
-- we must handle the pre {...} construction.
|
||||
--
|
||||
|
||||
bracketedTokn :: Forest -> BracketedTokn
|
||||
bracketedTokn (Forest abs cnc forest root label) =
|
||||
let (fid,cat,lin) = render IntMap.empty root
|
||||
in Bracket_ fid label cat (lin ! label)
|
||||
where
|
||||
trusted = trustedSpots IntSet.empty root
|
||||
|
||||
render parents fid =
|
||||
case (IntMap.lookup fid parents) `mplus` (fmap Set.toList $ IntMap.lookup fid forest) of
|
||||
Just (p:ps) -> descend (IntMap.insert fid ps parents) p
|
||||
Nothing -> error ("wrong forest id " ++ show fid)
|
||||
where
|
||||
descend parents (PApply funid args) = let (CncFun fun lins) = cncfuns cnc ! funid
|
||||
Just (DTyp _ cat _,_,_) = Map.lookup fun (funs abs)
|
||||
largs = map (render parents) args
|
||||
in (fid,cat,listArray (bounds lins) [computeSeq seqid largs | seqid <- elems lins])
|
||||
descend parents (PCoerce fid) = render parents fid
|
||||
descend parents (PConst cat _ ts) = (fid,cat,listArray (0,0) [[LeafKS ts]])
|
||||
|
||||
trustedSpots parents fid
|
||||
| IntSet.member fid parents
|
||||
= IntSet.empty
|
||||
| otherwise = IntSet.insert fid $
|
||||
case IntMap.lookup fid forest of
|
||||
Just prods -> foldl1 IntSet.intersection [descend prod | prod <- Set.toList prods]
|
||||
Nothing -> IntSet.empty
|
||||
where
|
||||
parents' = IntSet.insert fid parents
|
||||
|
||||
descend (PApply funid args) = IntSet.unions (map (trustedSpots parents') args)
|
||||
descend (PCoerce fid) = trustedSpots parents' fid
|
||||
descend (PConst c e _) = IntSet.empty
|
||||
|
||||
computeSeq :: SeqId -> [(FId,CId,LinTable)] -> [BracketedTokn]
|
||||
computeSeq seqid args = concatMap compute (elems seq)
|
||||
where
|
||||
seq = sequences cnc ! seqid
|
||||
|
||||
compute (SymCat d r) = getArg d r
|
||||
compute (SymLit d r) = getArg d r
|
||||
compute (SymKS ts) = [LeafKS ts]
|
||||
compute (SymKP ts alts) = [LeafKP ts alts]
|
||||
|
||||
getArg d r
|
||||
| not (null arg_lin) &&
|
||||
IntSet.member fid trusted
|
||||
= [Bracket_ fid r cat arg_lin]
|
||||
| otherwise = arg_lin
|
||||
where
|
||||
arg_lin = lin ! r
|
||||
(fid,cat,lin) = args !! d
|
||||
@@ -1,8 +1,15 @@
|
||||
module PGF.Linearize(linearizes,markLinearizes,tabularLinearizes) where
|
||||
module PGF.Linearize
|
||||
( linearize
|
||||
, linearizeAll
|
||||
, linearizeAllLang
|
||||
, bracketedLinearize
|
||||
, tabularLinearizes
|
||||
) where
|
||||
|
||||
import PGF.CId
|
||||
import PGF.Data
|
||||
import PGF.Macros
|
||||
import PGF.Expr
|
||||
import Data.Array.IArray
|
||||
import Data.List
|
||||
import Control.Monad
|
||||
@@ -10,85 +17,33 @@ import qualified Data.Map as Map
|
||||
import qualified Data.IntMap as IntMap
|
||||
import qualified Data.Set as Set
|
||||
|
||||
-- linearization and computation of concrete PGF Terms
|
||||
--------------------------------------------------------------------
|
||||
-- The API
|
||||
--------------------------------------------------------------------
|
||||
|
||||
type LinTable = Array LIndex [Tokn]
|
||||
-- | Linearizes given expression as string in the language
|
||||
linearize :: PGF -> Language -> Tree -> String
|
||||
linearize pgf lang = concat . take 1 . map (unwords . concatMap flattenBracketedString . snd . untokn "" . (!0)) . linTree pgf lang
|
||||
|
||||
linearizes :: PGF -> CId -> Expr -> [String]
|
||||
linearizes pgf lang = map (unwords . untokn . (! 0)) . linTree pgf lang (\_ _ lint -> lint)
|
||||
-- | The same as 'linearizeAllLang' but does not return
|
||||
-- the language.
|
||||
linearizeAll :: PGF -> Tree -> [String]
|
||||
linearizeAll pgf = map snd . linearizeAllLang pgf
|
||||
|
||||
linTree :: PGF -> Language -> (Maybe CId -> [Int] -> LinTable -> LinTable) -> Expr -> [LinTable]
|
||||
linTree pgf lang mark e = lin0 [] [] [] Nothing e
|
||||
where
|
||||
cnc = lookMap (error "no lang") lang (concretes pgf)
|
||||
lp = lproductions cnc
|
||||
-- | Linearizes given expression as string in all languages
|
||||
-- available in the grammar.
|
||||
linearizeAllLang :: PGF -> Tree -> [(Language,String)]
|
||||
linearizeAllLang pgf t = [(lang,linearize pgf lang t) | lang <- Map.keys (concretes pgf)]
|
||||
|
||||
lin0 path xs ys mb_fid (EAbs _ x e) = lin0 path (showCId x:xs) ys mb_fid e
|
||||
lin0 path xs ys mb_fid (ETyped e _) = lin0 path xs ys mb_fid e
|
||||
lin0 path xs ys mb_fid e | null xs = lin path ys mb_fid e []
|
||||
| otherwise = apply path (xs ++ ys) mb_fid _B (e:[ELit (LStr x) | x <- xs])
|
||||
-- | Linearizes given expression as a bracketed string in the language
|
||||
bracketedLinearize :: PGF -> Language -> Tree -> BracketedString
|
||||
bracketedLinearize pgf lang = head . concat . map (snd . untokn "" . (!0)) . linTree pgf lang
|
||||
|
||||
lin path xs mb_fid (EApp e1 e2) es = lin path xs mb_fid e1 (e2:es)
|
||||
lin path xs mb_fid (ELit l) [] = case l of
|
||||
LStr s -> return (mark Nothing path (ss s))
|
||||
LInt n -> return (mark Nothing path (ss (show n)))
|
||||
LFlt f -> return (mark Nothing path (ss (show f)))
|
||||
lin path xs mb_fid (EMeta i) es = apply path xs mb_fid _V (ELit (LStr ('?':show i)):es)
|
||||
lin path xs mb_fid (EFun f) es = map (mark (Just f) path) (apply path xs mb_fid f es)
|
||||
lin path xs mb_fid (EVar i) es = apply path xs mb_fid _V (ELit (LStr (xs !! i)) :es)
|
||||
lin path xs mb_fid (ETyped e _) es = lin path xs mb_fid e es
|
||||
lin path xs mb_fid (EImplArg e) es = lin path xs mb_fid e es
|
||||
|
||||
ss s = listArray (0,0) [[KS s]]
|
||||
|
||||
apply path xs mb_fid f es =
|
||||
case Map.lookup f lp of
|
||||
Just prods -> case lookupProds mb_fid prods of
|
||||
Just set -> do prod <- Set.toList set
|
||||
case prod of
|
||||
PApply funid fids -> do guard (length fids == length es)
|
||||
args <- sequence (zipWith3 (\i fid e -> lin0 (sub i path) [] xs (Just fid) e) [0..] fids es)
|
||||
let (CncFun _ lins) = cncfuns cnc ! funid
|
||||
return (listArray (bounds lins) [computeSeq seqid args | seqid <- elems lins])
|
||||
PCoerce fid -> apply path xs (Just fid) f es
|
||||
Nothing -> mzero
|
||||
Nothing -> apply path xs mb_fid _V [ELit (LStr ("[" ++ showCId f ++ "]"))] -- fun without lin
|
||||
where
|
||||
lookupProds (Just fid) prods = IntMap.lookup fid prods
|
||||
lookupProds Nothing prods
|
||||
| f == _B || f == _V = Nothing
|
||||
| otherwise = Just (Set.filter isApp (Set.unions (IntMap.elems prods)))
|
||||
|
||||
sub i path
|
||||
| f == _B || f == _V = path
|
||||
| otherwise = i:path
|
||||
|
||||
isApp (PApply _ _) = True
|
||||
isApp _ = False
|
||||
|
||||
computeSeq seqid args = concatMap compute (elems seq)
|
||||
where
|
||||
seq = sequences cnc ! seqid
|
||||
|
||||
compute (SymCat d r) = (args !! d) ! r
|
||||
compute (SymLit d r) = (args !! d) ! r
|
||||
compute (SymKS ts) = map KS ts
|
||||
compute (SymKP ts alts) = [KP ts alts]
|
||||
|
||||
untokn :: [Tokn] -> [String]
|
||||
untokn ts = case ts of
|
||||
KP d _ : [] -> d
|
||||
KP d vs : ws -> let ss@(s:_) = untokn ws in sel d vs s ++ ss
|
||||
KS s : ws -> s : untokn ws
|
||||
[] -> []
|
||||
where
|
||||
sel d vs w = case [v | Alt v cs <- vs, any (\c -> isPrefixOf c w) cs] of
|
||||
v:_ -> v
|
||||
_ -> d
|
||||
|
||||
-- create a table from labels+params to variants
|
||||
-- | Creates a table from feature name to linearization.
|
||||
-- The outher list encodes the variations
|
||||
tabularLinearizes :: PGF -> CId -> Expr -> [[(String,String)]]
|
||||
tabularLinearizes pgf lang e = map (zip lbls . map (unwords . untokn) . elems) (linTree pgf lang (\_ _ lint -> lint) e)
|
||||
tabularLinearizes pgf lang e = map (zip lbls . map (unwords . concatMap flattenBracketedString . snd . untokn "") . elems)
|
||||
(linTree pgf lang e)
|
||||
where
|
||||
lbls = case unApp e of
|
||||
Just (f,_) -> let cat = valCat (lookType pgf f)
|
||||
@@ -97,12 +52,77 @@ tabularLinearizes pgf lang e = map (zip lbls . map (unwords . untokn) . elems) (
|
||||
Nothing -> error "No labels"
|
||||
Nothing -> error "Not function application"
|
||||
|
||||
--------------------------------------------------------------------
|
||||
-- Implementation
|
||||
--------------------------------------------------------------------
|
||||
|
||||
-- show bracketed markup with references to tree structure
|
||||
markLinearizes :: PGF -> CId -> Expr -> [String]
|
||||
markLinearizes pgf lang = map (unwords . untokn . (! 0)) . linTree pgf lang mark
|
||||
linTree :: PGF -> Language -> Expr -> [Array LIndex BracketedTokn]
|
||||
linTree pgf lang e =
|
||||
[amapWithIndex (\label -> Bracket_ fid label cat) lin | (_,(fid,cat,lin)) <- lin0 [] [] Nothing 0 e]
|
||||
where
|
||||
mark mb_f path lint = amap (bracket mb_f path) lint
|
||||
cnc = lookMap (error "no lang") lang (concretes pgf)
|
||||
lp = lproductions cnc
|
||||
|
||||
lin0 xs ys mb_fid n_fid (EAbs _ x e) = lin0 (showCId x:xs) ys mb_fid n_fid e
|
||||
lin0 xs ys mb_fid n_fid (ETyped e _) = lin0 xs ys mb_fid n_fid e
|
||||
lin0 xs ys mb_fid n_fid e | null xs = lin ys mb_fid n_fid e []
|
||||
| otherwise = apply (xs ++ ys) mb_fid n_fid _B (e:[ELit (LStr x) | x <- xs])
|
||||
|
||||
bracket Nothing path ts = [KS ("("++show (reverse path))] ++ ts ++ [KS ")"]
|
||||
bracket (Just f) path ts = [KS ("(("++showCId f++","++show (reverse path)++")")] ++ ts ++ [KS ")"]
|
||||
lin xs mb_fid n_fid (EApp e1 e2) es = lin xs mb_fid n_fid e1 (e2:es)
|
||||
lin xs mb_fid n_fid (ELit l) [] = case l of
|
||||
LStr s -> return (n_fid+1,(n_fid,cidString,ss s))
|
||||
LInt n -> return (n_fid+1,(n_fid,cidInt ,ss (show n)))
|
||||
LFlt f -> return (n_fid+1,(n_fid,cidFloat ,ss (show f)))
|
||||
lin xs mb_fid n_fid (EMeta i) es = apply xs mb_fid n_fid _V (ELit (LStr ('?':show i)):es)
|
||||
lin xs mb_fid n_fid (EFun f) es = apply xs mb_fid n_fid f es
|
||||
lin xs mb_fid n_fid (EVar i) es = apply xs mb_fid n_fid _V (ELit (LStr (xs !! i)) :es)
|
||||
lin xs mb_fid n_fid (ETyped e _) es = lin xs mb_fid n_fid e es
|
||||
lin xs mb_fid n_fid (EImplArg e) es = lin xs mb_fid n_fid e es
|
||||
|
||||
ss s = listArray (0,0) [[LeafKS [s]]]
|
||||
|
||||
apply :: [String] -> Maybe FId -> FId -> CId -> [Expr] -> [(FId,(FId, CId, LinTable))]
|
||||
apply xs mb_fid n_fid f es =
|
||||
case Map.lookup f lp of
|
||||
Just prods -> do prod <- lookupProds mb_fid prods
|
||||
case prod of
|
||||
PApply funid fids -> do guard (length fids == length es)
|
||||
(n_fid,args) <- descend n_fid (zip fids es)
|
||||
let (CncFun fun lins) = cncfuns cnc ! funid
|
||||
Just (DTyp _ cat _,_,_) = Map.lookup fun (funs (abstract pgf))
|
||||
return (n_fid+1,(n_fid,cat,listArray (bounds lins) [computeSeq seqid args | seqid <- elems lins]))
|
||||
PCoerce fid -> apply xs (Just fid) n_fid f es
|
||||
Nothing -> apply xs mb_fid n_fid _V [ELit (LStr ("[" ++ showCId f ++ "]"))] -- fun without lin
|
||||
where
|
||||
lookupProds (Just fid) prods = maybe [] Set.toList (IntMap.lookup fid prods)
|
||||
lookupProds Nothing prods
|
||||
| f == _B || f == _V = []
|
||||
| otherwise = [prod | (fid,set) <- IntMap.toList prods, prod <- Set.toList set]
|
||||
|
||||
descend n_fid [] = return (n_fid,[])
|
||||
descend n_fid ((fid,e):fes) = do (n_fid,xx) <- lin0 [] xs (Just fid) n_fid e
|
||||
(n_fid,xxs) <- descend n_fid fes
|
||||
return (n_fid,xx:xxs)
|
||||
|
||||
isApp (PApply _ _) = True
|
||||
isApp _ = False
|
||||
|
||||
computeSeq :: SeqId -> [(FId,CId,LinTable)] -> [BracketedTokn]
|
||||
computeSeq seqid args = concatMap compute (elems seq)
|
||||
where
|
||||
seq = sequences cnc ! seqid
|
||||
|
||||
compute (SymCat d r) = getArg d r
|
||||
compute (SymLit d r) = getArg d r
|
||||
compute (SymKS ts) = [LeafKS ts]
|
||||
compute (SymKP ts alts) = [LeafKP ts alts]
|
||||
|
||||
getArg d r
|
||||
| not (null arg_lin) = [Bracket_ fid r cat arg_lin]
|
||||
| otherwise = arg_lin
|
||||
where
|
||||
arg_lin = lin ! r
|
||||
(fid,cat,lin) = args !! d
|
||||
|
||||
amapWithIndex :: (IArray a e1, IArray a e2, Ix i) => (i -> e1 -> e2) -> a i e1 -> a i e2
|
||||
amapWithIndex f arr = listArray (bounds arr) (map (uncurry f) (assocs arr))
|
||||
|
||||
@@ -11,6 +11,7 @@ import qualified Data.Array as Array
|
||||
import Data.Maybe
|
||||
import Data.List
|
||||
import GF.Data.Utilities(sortNub)
|
||||
import Text.PrettyPrint
|
||||
|
||||
-- operations for manipulating PGF grammars and objects
|
||||
|
||||
@@ -202,3 +203,56 @@ updateProductionIndices pgf = pgf{ concretes = fmap updateConcrete (concretes pg
|
||||
getFunctions (PCoerce fid) = case IntMap.lookup fid productions of
|
||||
Nothing -> []
|
||||
Just prods -> [fun | prod <- Set.toList prods, fun <- getFunctions prod]
|
||||
|
||||
|
||||
-- Utilities for doing linearization
|
||||
|
||||
-- | BracketedString represents a sentence that is linearized
|
||||
-- as usual but we also want to retain the ''brackets'' that
|
||||
-- mark the beginning and the end of each constituent.
|
||||
data BracketedString
|
||||
= Leaf String -- ^ this is the leaf i.e. a single token
|
||||
| Bracket {-# UNPACK #-} !FId {-# UNPACK #-} !LIndex CId [BracketedString] -- ^ this is a bracket. The 'CId' is the category of
|
||||
-- the phrase. The 'FId' is an unique identifier for
|
||||
-- every phrase in the sentence. For context-free grammars
|
||||
-- i.e. without discontinuous constituents this identifier
|
||||
-- is also unique for every bracket. When there are discontinuous
|
||||
-- phrases then the identifiers are unique for every phrase but
|
||||
-- not for every bracket since the bracket represents a constituent.
|
||||
-- The different constituents could still be distinguished by using
|
||||
-- the constituent index i.e. 'LIndex'. If the grammar is reduplicating
|
||||
-- then the constituent indices will be the same for all brackets
|
||||
-- that represents the same constituent.
|
||||
|
||||
data BracketedTokn
|
||||
= LeafKS [String]
|
||||
| LeafKP [String] [Alternative]
|
||||
| Bracket_ {-# UNPACK #-} !FId {-# UNPACK #-} !LIndex CId [BracketedTokn] -- Invariant: the list is not empty
|
||||
|
||||
type LinTable = Array.Array LIndex [BracketedTokn]
|
||||
|
||||
-- | Renders the bracketed string as string where
|
||||
-- the brackets are shown as @(S ...)@ where
|
||||
-- @S@ is the category.
|
||||
showBracketedString :: BracketedString -> String
|
||||
showBracketedString = render . ppBracketedString
|
||||
|
||||
ppBracketedString (Leaf t) = text t
|
||||
ppBracketedString (Bracket fcat index cat bss) = parens (ppCId cat <+> hsep (map ppBracketedString bss))
|
||||
|
||||
untokn :: String -> BracketedTokn -> (String,[BracketedString])
|
||||
untokn nw (LeafKS ts) = (head ts,map Leaf ts)
|
||||
untokn nw (LeafKP d vs) = let ts = sel d vs nw
|
||||
in (head ts,map Leaf ts)
|
||||
where
|
||||
sel d vs nw =
|
||||
case [v | Alt v cs <- vs, any (\c -> isPrefixOf c nw) cs] of
|
||||
v:_ -> v
|
||||
_ -> d
|
||||
untokn nw (Bracket_ fid index cat bss) =
|
||||
let (nw',bss') = mapAccumR untokn nw bss
|
||||
in (nw',[Bracket fid index cat (concat bss')])
|
||||
|
||||
flattenBracketedString :: BracketedString -> [String]
|
||||
flattenBracketedString (Leaf w) = [w]
|
||||
flattenBracketedString (Bracket _ _ _ bss) = concatMap flattenBracketedString bss
|
||||
|
||||
@@ -6,7 +6,7 @@ module PGF.Parse
|
||||
, nextState
|
||||
, getCompletions
|
||||
, recoveryStates
|
||||
, extractTrees
|
||||
, ParseResult(..), getParseResult
|
||||
, parse
|
||||
, parseWithRecovery
|
||||
) where
|
||||
@@ -14,7 +14,7 @@ module PGF.Parse
|
||||
import Data.Array.IArray
|
||||
import Data.Array.Base (unsafeAt)
|
||||
import Data.List (isPrefixOf, foldl')
|
||||
import Data.Maybe (fromMaybe, maybe)
|
||||
import Data.Maybe (fromMaybe, maybe, maybeToList)
|
||||
import qualified Data.Map as Map
|
||||
import qualified GF.Data.TrieMap as TMap
|
||||
import qualified Data.IntMap as IntMap
|
||||
@@ -27,26 +27,35 @@ import PGF.Data
|
||||
import PGF.Expr(Tree)
|
||||
import PGF.Macros
|
||||
import PGF.TypeCheck
|
||||
import Debug.Trace
|
||||
import PGF.Forest(Forest(Forest), linearizeWithBrackets)
|
||||
|
||||
parse :: PGF -> Language -> Type -> [String] -> [Tree]
|
||||
parse pgf lang typ toks = loop (initState pgf lang typ) toks
|
||||
-- | This data type encodes the different outcomes which you could get from the parser.
|
||||
data ParseResult
|
||||
= ParseFailed Int -- ^ The integer is the position in number of tokens where the parser failed.
|
||||
| TypeError FId [TcError] -- ^ The parsing was successful but none of the trees is type correct.
|
||||
-- The forest id ('FId') points to the bracketed string from the parser
|
||||
-- where the type checking failed. More than one error is returned
|
||||
-- if there are many analizes for some phrase but they all are not type correct.
|
||||
| ParseResult [Tree] -- ^ If the parsing was successful we get a list of abstract syntax trees. The list should be non-empty.
|
||||
|
||||
parse :: PGF -> Language -> Type -> [String] -> (ParseResult,Maybe BracketedString)
|
||||
parse pgf lang typ toks = loop 0 (initState pgf lang typ) toks
|
||||
where
|
||||
loop ps [] = extractTrees ps typ
|
||||
loop ps (t:ts) = case nextState ps t of
|
||||
Left es -> []
|
||||
Right ps -> loop ps ts
|
||||
loop i ps [] = getParseResult ps typ
|
||||
loop i ps (t:ts) = case nextState ps t of
|
||||
Left es -> (ParseFailed i,Nothing)
|
||||
Right ps -> loop (i+1) ps ts
|
||||
|
||||
parseWithRecovery :: PGF -> Language -> Type -> [Type] -> [String] -> [Tree]
|
||||
parseWithRecovery :: PGF -> Language -> Type -> [Type] -> [String] -> (ParseResult,Maybe BracketedString)
|
||||
parseWithRecovery pgf lang typ open_typs toks = accept (initState pgf lang typ) toks
|
||||
where
|
||||
accept ps [] = extractTrees ps typ
|
||||
accept ps [] = getParseResult ps typ
|
||||
accept ps (t:ts) =
|
||||
case nextState ps t of
|
||||
Right ps -> accept ps ts
|
||||
Left es -> skip (recoveryStates open_typs es) ts
|
||||
|
||||
skip ps_map [] = extractTrees (fst ps_map) typ
|
||||
skip ps_map [] = getParseResult (fst ps_map) typ
|
||||
skip ps_map (t:ts) =
|
||||
case Map.lookup t (snd ps_map) of
|
||||
Just ps -> accept ps ts
|
||||
@@ -145,23 +154,31 @@ recoveryStates open_types (EState pgf cnc chart) =
|
||||
-- that spans the whole input consumed so far. The trees are also
|
||||
-- limited by the category specified, which is usually
|
||||
-- the same as the startup category.
|
||||
extractTrees :: ParseState -> Type -> [Tree]
|
||||
extractTrees (PState pgf cnc chart items) ty@(DTyp _ start _) =
|
||||
nubsort [e1 | e <- exps, Right e1 <- [checkExpr pgf e ty]]
|
||||
getParseResult :: ParseState -> Type -> (ParseResult,Maybe BracketedString)
|
||||
getParseResult (PState pgf cnc chart items) ty@(DTyp _ start _) =
|
||||
let mb_bs = case roots of
|
||||
((root,lbl):_) -> Just $ linearizeWithBrackets $ Forest (abstract pgf) cnc (forest st) root lbl
|
||||
_ -> Nothing
|
||||
|
||||
exps = nubsort $ do
|
||||
(fid,lbl) <- roots
|
||||
(fvs,e) <- go Set.empty 0 (0,fid)
|
||||
guard (Set.null fvs)
|
||||
Right e1 <- [checkExpr pgf e ty]
|
||||
return e1
|
||||
|
||||
in (ParseResult exps,mb_bs)
|
||||
where
|
||||
(mb_agenda,acc) = TMap.decompose items
|
||||
agenda = maybe [] Set.toList mb_agenda
|
||||
(_,st) = process Nothing (\_ _ -> id) (sequences cnc) (cncfuns cnc) agenda () chart
|
||||
|
||||
exps =
|
||||
case Map.lookup start (cnccats cnc) of
|
||||
Just (CncCat s e lbls) -> do cat <- range (s,e)
|
||||
lbl <- indices lbls
|
||||
Just fid <- [lookupPC (PK cat lbl 0) (passive st)]
|
||||
(fvs,tree) <- go Set.empty 0 (0,fid)
|
||||
guard (Set.null fvs)
|
||||
return tree
|
||||
Nothing -> mzero
|
||||
roots = case Map.lookup start (cnccats cnc) of
|
||||
Just (CncCat s e lbls) -> do cat <- range (s,e)
|
||||
lbl <- indices lbls
|
||||
fid <- maybeToList (lookupPC (PK cat lbl 0) (passive st))
|
||||
return (fid,lbl)
|
||||
Nothing -> mzero
|
||||
|
||||
go rec fcat' (d,fcat)
|
||||
| fcat < totalCats cnc = return (Set.empty,EMeta (fcat'*10+d)) -- FIXME: here we assume that every rule has at most 10 arguments
|
||||
@@ -189,6 +206,7 @@ extractTrees (PState pgf cnc chart items) ty@(DTyp _ start _) =
|
||||
freeVar (EFun v) = Set.singleton v
|
||||
freeVar _ = Set.empty
|
||||
|
||||
|
||||
process mbt fn !seqs !funs [] acc chart = (acc,chart)
|
||||
process mbt fn !seqs !funs (item@(Active j ppos funid seqid args key0):items) acc chart
|
||||
| inRange (bounds lin) ppos =
|
||||
@@ -218,15 +236,15 @@ process mbt fn !seqs !funs (item@(Active j ppos funid seqid args key0):items) ac
|
||||
Nothing -> fid
|
||||
Just fid -> fid
|
||||
|
||||
in case [ts | PConst _ ts <- maybe [] Set.toList (IntMap.lookup fid' (forest chart))] of
|
||||
in case [ts | PConst _ _ ts <- maybe [] Set.toList (IntMap.lookup fid' (forest chart))] of
|
||||
(toks:_) -> let !acc' = fn toks (Active j (ppos+1) funid seqid (updateAt d fid' args) key0) acc
|
||||
in process mbt fn seqs funs items acc' chart
|
||||
[] -> case litCatMatch fid mbt of
|
||||
Just (toks,lit)
|
||||
Just (cat,lit,toks)
|
||||
-> let fid' = nextId chart
|
||||
!acc' = fn toks (Active j (ppos+1) funid seqid (updateAt d fid' args) key0) acc
|
||||
in process mbt fn seqs funs items acc' chart{passive=insertPC (mkPK key k) fid' (passive chart)
|
||||
,forest =IntMap.insert fid' (Set.singleton (PConst lit toks)) (forest chart)
|
||||
,forest =IntMap.insert fid' (Set.singleton (PConst cat lit toks)) (forest chart)
|
||||
,nextId =nextId chart+1
|
||||
}
|
||||
Nothing -> process mbt fn seqs funs items acc chart
|
||||
@@ -260,12 +278,12 @@ updateAt :: Int -> a -> [a] -> [a]
|
||||
updateAt nr x xs = [if i == nr then x else y | (i,y) <- zip [0..] xs]
|
||||
|
||||
litCatMatch fcat (Just t)
|
||||
| fcat == fcatString = Just ([t],ELit (LStr t))
|
||||
| fcat == fcatInt = case reads t of {[(n,"")] -> Just ([t],ELit (LInt n));
|
||||
| fcat == fcatString = Just (cidString,ELit (LStr t),[t])
|
||||
| fcat == fcatInt = case reads t of {[(n,"")] -> Just (cidInt,ELit (LInt n),[t]);
|
||||
_ -> Nothing }
|
||||
| fcat == fcatFloat = case reads t of {[(d,"")] -> Just ([t],ELit (LFlt d));
|
||||
| fcat == fcatFloat = case reads t of {[(d,"")] -> Just (cidFloat,ELit (LFlt d),[t]);
|
||||
_ -> Nothing }
|
||||
| fcat == fcatVar = Just ([t],EFun (mkCId t))
|
||||
| fcat == fcatVar = Just (cidVar,EFun (mkCId t),[t])
|
||||
litCatMatch _ _ = Nothing
|
||||
|
||||
|
||||
@@ -341,9 +359,9 @@ foldForest f g b fcat forest =
|
||||
Nothing -> b
|
||||
Just set -> Set.fold foldProd b set
|
||||
where
|
||||
foldProd (PCoerce fcat) b = foldForest f g b fcat forest
|
||||
foldProd (PApply funid args) b = f funid args b
|
||||
foldProd (PConst const toks) b = g const toks b
|
||||
foldProd (PCoerce fcat) b = foldForest f g b fcat forest
|
||||
foldProd (PApply funid args) b = f funid args b
|
||||
foldProd (PConst _ const toks) b = g const toks b
|
||||
|
||||
|
||||
----------------------------------------------------------------
|
||||
|
||||
@@ -58,7 +58,7 @@ ppProduction (fcat,PApply funid args) =
|
||||
ppFCat fcat <+> text "->" <+> ppFunId funid <> brackets (hcat (punctuate comma (map ppFCat args)))
|
||||
ppProduction (fcat,PCoerce arg) =
|
||||
ppFCat fcat <+> text "->" <+> char '_' <> brackets (ppFCat arg)
|
||||
ppProduction (fcat,PConst _ ss) =
|
||||
ppProduction (fcat,PConst _ _ ss) =
|
||||
ppFCat fcat <+> text "->" <+> ppStrs ss
|
||||
|
||||
ppCncFun (funid,CncFun fun arr) =
|
||||
|
||||
@@ -15,339 +15,296 @@
|
||||
-- instead of rolling its own.
|
||||
-----------------------------------------------------------------------------
|
||||
|
||||
module PGF.VisualizeTree ( graphvizAbstractTree
|
||||
, graphvizParseTree
|
||||
, graphvizDependencyTree
|
||||
, graphvizAlignment
|
||||
, tree2mk
|
||||
, getDepLabels
|
||||
, PosText(..), readPosText
|
||||
module PGF.VisualizeTree
|
||||
( graphvizAbstractTree
|
||||
, graphvizParseTree
|
||||
, graphvizDependencyTree
|
||||
, graphvizBracketedString
|
||||
, graphvizAlignment
|
||||
, getDepLabels
|
||||
) where
|
||||
|
||||
import PGF.CId (CId,showCId,pCId,mkCId)
|
||||
import PGF.CId (CId,showCId,ppCId,mkCId)
|
||||
import PGF.Data
|
||||
import PGF.Tree
|
||||
import PGF.Expr (showExpr)
|
||||
import PGF.Expr (showExpr, Tree)
|
||||
import PGF.Linearize
|
||||
import PGF.Macros (lookValCat)
|
||||
import PGF.Macros (lookValCat, BracketedString(..), flattenBracketedString)
|
||||
|
||||
import qualified Data.Map as Map
|
||||
import Data.List (intersperse,nub,isPrefixOf,sort,sortBy)
|
||||
import Data.Char (isDigit)
|
||||
import qualified Text.ParserCombinators.ReadP as RP
|
||||
import Text.PrettyPrint
|
||||
|
||||
import Debug.Trace
|
||||
-- | Renders abstract syntax tree in Graphviz format
|
||||
graphvizAbstractTree :: PGF -> (Bool,Bool) -> Tree -> String
|
||||
graphvizAbstractTree pgf (funs,cats) = render . tree2graph
|
||||
where
|
||||
tree2graph t =
|
||||
text "graph {" $$
|
||||
ppGraph [] [] 0 t $$
|
||||
text "}"
|
||||
|
||||
graphvizAbstractTree :: PGF -> (Bool,Bool) -> Expr -> String
|
||||
graphvizAbstractTree pgf funscats = prGraph False . tree2graph pgf funscats . expr2tree
|
||||
getAbs xs (EAbs _ x e) = getAbs (x:xs) e
|
||||
getAbs xs (ETyped e _) = getAbs xs e
|
||||
getAbs xs e = (xs,e)
|
||||
|
||||
getApp (EApp x y) es = getApp x (y:es)
|
||||
getApp (ETyped e _) es = getApp e es
|
||||
getApp e es = (e,es)
|
||||
|
||||
tree2graph :: PGF -> (Bool,Bool) -> Tree -> [String]
|
||||
tree2graph pgf (funs,cats) = prf [] where
|
||||
prf ps t = let (nod,lab) = prn ps t in
|
||||
(nod ++ " [label = " ++ lab ++ ", style = \"solid\", shape = \"plaintext\"] ;") :
|
||||
case t of
|
||||
Fun cid trees ->
|
||||
[ pra (j:ps) nod t | (j,t) <- zip [0..] trees] ++
|
||||
concat [prf (j:ps) t | (j,t) <- zip [0..] trees]
|
||||
Abs xs (Fun cid trees) ->
|
||||
[ pra (j:ps) nod t | (j,t) <- zip [0..] trees] ++
|
||||
concat [prf (j:ps) t | (j,t) <- zip [0..] trees]
|
||||
_ -> []
|
||||
prn ps t = case t of
|
||||
Fun cid _ ->
|
||||
let
|
||||
fun = if funs then showCId cid else ""
|
||||
cat = if cats then prCat cid else ""
|
||||
colon = if funs && cats then " : " else ""
|
||||
lab = "\"" ++ fun ++ colon ++ cat ++ "\""
|
||||
in (show(show (ps :: [Int])),lab)
|
||||
Abs bs tree ->
|
||||
let fun = case tree of
|
||||
Fun cid _ -> Fun cid []
|
||||
_ -> tree
|
||||
in (show(show (ps :: [Int])),"\"" ++ esc (prTree (Abs bs fun)) ++ "\"")
|
||||
_ -> (show(show (ps :: [Int])),"\"" ++ esc (prTree t) ++ "\"")
|
||||
pra i nod t = nod ++ arr ++ fst (prn i t) ++ " [style = \"solid\"];"
|
||||
arr = " -- " -- if digr then " -> " else " -- "
|
||||
prCat = showCId . lookValCat pgf
|
||||
esc = concatMap (\c -> if c =='\\' then [c,c] else [c]) --- escape backslash in abstracts
|
||||
getLbl scope (EFun f) = let fun = if funs then ppCId f else empty
|
||||
cat = if cats then ppCId (lookValCat pgf f) else empty
|
||||
sep = if funs && cats then colon else empty
|
||||
in fun <+> sep <+> cat
|
||||
getLbl scope (ELit l) = text (escapeStr (render (ppLit l)))
|
||||
getLbl scope (EMeta i) = ppMeta i
|
||||
getLbl scope (EVar i) = ppCId (scope !! i)
|
||||
getLbl scope (ETyped e _) = getLbl scope e
|
||||
getLbl scope (EImplArg e) = getLbl scope e
|
||||
|
||||
prGraph digr ns = concat $ map (++"\n") $ [graph ++ "{\n"] ++ ns ++ ["}"] where
|
||||
graph = if digr then "digraph" else "graph"
|
||||
ppGraph scope ps i e0 =
|
||||
let (xs, e1) = getAbs [] e0
|
||||
(e2,args) = getApp e1 []
|
||||
binds = if null xs
|
||||
then empty
|
||||
else text "\\\\" <> hcat (punctuate comma (map ppCId xs)) <+> text "->"
|
||||
(lbl,eargs) = case e2 of
|
||||
EAbs _ _ _ -> (char '@', e2:args) -- eta-redexes are rendered with artificial "@" node
|
||||
_ -> (getLbl scope' e2, args)
|
||||
scope' = xs ++ scope
|
||||
in ppNode (i:ps) <> text "[label =" <+> doubleQuotes (binds <+> lbl) <> text ", style = \"solid\", shape = \"plaintext\"] ;" $$
|
||||
(if null ps
|
||||
then empty
|
||||
else ppNode ps <+> text "--" <+> ppNode (i:ps) <+> text "[style = \"solid\"];") $$
|
||||
vcat (zipWith (ppGraph scope' (i:ps)) [0..] eargs)
|
||||
|
||||
ppNode ps = char 'n' <> hcat (punctuate (char '_') (map int ps))
|
||||
|
||||
escapeStr [] = []
|
||||
escapeStr ('\\':cs) = '\\':'\\':escapeStr cs
|
||||
escapeStr ('"' :cs) = '\\':'"' :escapeStr cs
|
||||
escapeStr (c :cs) = c :escapeStr cs
|
||||
|
||||
-- replace each non-atomic constructor with mkC, where C is the val cat
|
||||
tree2mk :: PGF -> Expr -> String
|
||||
tree2mk pgf = showExpr [] . tree2expr . t2m . expr2tree where
|
||||
t2m t = case t of
|
||||
Fun cid [] -> t
|
||||
Fun cid ts -> Fun (mk cid) (map t2m ts)
|
||||
_ -> t
|
||||
mk = mkCId . ("mk" ++) . showCId . lookValCat pgf
|
||||
|
||||
-- dependency trees from Linearize.linearizeMark
|
||||
|
||||
graphvizDependencyTree :: String -> Bool -> Maybe Labels -> Maybe String -> PGF -> CId -> Expr -> String
|
||||
graphvizDependencyTree format debug mlab ms pgf lang exp = case format of
|
||||
"malt" -> unlines (lin2dep format)
|
||||
"malt_input" -> unlines (lin2dep format)
|
||||
_ -> prGraph True (lin2dep format)
|
||||
|
||||
where
|
||||
|
||||
lin2dep format = -- trace (ifd (show sortedNodes ++ show nodeWords)) $
|
||||
case format of
|
||||
"malt" -> map (concat . intersperse "\t") wnodes
|
||||
"malt_input" -> map (concat . intersperse "\t" . take 6) wnodes
|
||||
_ -> prelude ++ nodes ++ links
|
||||
|
||||
ifd s = if debug then s else []
|
||||
|
||||
pot = readPosText $ concat $ take 1 $ markLinearizes pgf lang exp
|
||||
---- use Just str if you have str to match against
|
||||
|
||||
prelude = ["rankdir=LR ;", "node [shape = plaintext] ;"]
|
||||
|
||||
nodes = map mkNode nodeWords
|
||||
mkNode (i,((_,p),ss)) =
|
||||
node p ++ " [label = \"" ++ show i ++ ". " ++ ifd (show p) ++ unwords ss ++ "\"] ;"
|
||||
nodeWords = (0,((mkCId "",[]),["ROOT"])) : zip [1..] [((f,p),w)|
|
||||
((Just f,p),w) <- wlins pot]
|
||||
|
||||
links = map mkLink thelinks
|
||||
thelinks = [(word y, x, label tr y x) |
|
||||
(_,((f,x),_)) <- tail nodeWords,
|
||||
let y = dominant x]
|
||||
mkLink (x,y,l) = node x ++ " -> " ++ node y ++ " [label = \"" ++ l ++ "\"] ;"
|
||||
node = show . show
|
||||
|
||||
dominant x = case x of
|
||||
[] -> x
|
||||
_ | not (x == hx) -> hx
|
||||
_ -> dominant (init x)
|
||||
where
|
||||
hx = headArg (init x) tr x
|
||||
|
||||
headArg x0 tr x = case (tr,x) of
|
||||
(Fun f [],[_]) -> x0 ---- ??
|
||||
(Fun f ts,[_]) -> x0 ++ [getHead (length ts - 1) f]
|
||||
(Fun f ts,i:y) -> headArg x0 (ts !! i) y
|
||||
_ -> x0 ----
|
||||
|
||||
label tr y x = case span (uncurry (==)) (zip y x) of
|
||||
(xys,(_,i):_) -> getLabel i (funAt tr (map fst xys))
|
||||
_ -> "" ----
|
||||
|
||||
funAt tr x = case (tr,x) of
|
||||
(Fun f _ ,[]) -> f
|
||||
(Fun f ts,i:y) -> funAt (ts !! i) y
|
||||
_ -> mkCId (prTree tr) ----
|
||||
|
||||
word x = if elem x sortedNodes then x else
|
||||
let x' = headArg x tr (x ++[0]) in
|
||||
if x' == x then [] else word x'
|
||||
|
||||
tr = expr2tree exp
|
||||
sortedNodes = [p | (_,((_,p),_)) <- nodeWords]
|
||||
|
||||
labels = maybe Map.empty id mlab
|
||||
getHead i f = case Map.lookup f labels of
|
||||
Just ls -> length $ takeWhile (/= "head") ls
|
||||
_ -> i
|
||||
getLabel i f = case Map.lookup f labels of
|
||||
Just ls | length ls > i -> ifd (showCId f ++ "#" ++ show i ++ "=") ++ ls !! i
|
||||
_ -> showCId f ++ "#" ++ show i
|
||||
|
||||
-- to generate CoNLL format for MaltParser
|
||||
nodeMap :: Map.Map [Int] Int
|
||||
nodeMap = Map.fromList [(p,i) | (i,((_,p),_)) <- nodeWords]
|
||||
|
||||
arcMap :: Map.Map [Int] ([Int],String)
|
||||
arcMap = Map.fromList [(y,(x,l)) | (x,y,l) <- thelinks]
|
||||
|
||||
lookDomLab p = case Map.lookup p arcMap of
|
||||
Just (q,l) -> (maybe 0 id (Map.lookup q nodeMap), if null l then rootlabel else l)
|
||||
_ -> (0,rootlabel)
|
||||
|
||||
wnodes = [[show i, maltws ws, showCId fun, pos, pos, morph, show dom, lab, unspec, unspec] |
|
||||
(i, ((fun,p),ws)) <- tail nodeWords,
|
||||
let pos = showCId $ lookValCat pgf fun,
|
||||
let morph = unspec,
|
||||
let (dom,lab) = lookDomLab p
|
||||
]
|
||||
maltws = concat . intersperse "+" . words . unwords -- no spaces in column 2
|
||||
unspec = "_"
|
||||
rootlabel = "ROOT"
|
||||
|
||||
type Labels = Map.Map CId [String]
|
||||
|
||||
graphvizDependencyTree :: String -> Bool -> Maybe Labels -> Maybe String -> PGF -> CId -> Tree -> String
|
||||
graphvizDependencyTree format debug mlab ms pgf lang t = render $
|
||||
case format of
|
||||
"malt" -> vcat (map (hcat . intersperse (char '\t') ) wnodes)
|
||||
"malt_input" -> vcat (map (hcat . intersperse (char '\t') . take 6) wnodes)
|
||||
_ -> text "digraph {" $$
|
||||
space $$
|
||||
nest 2 (text "rankdir=LR ;" $$
|
||||
text "node [shape = plaintext] ;" $$
|
||||
vcat nodes $$
|
||||
links) $$
|
||||
text "}"
|
||||
where
|
||||
nodes = map mkNode leaves
|
||||
links = empty
|
||||
wnodes = undefined
|
||||
|
||||
nil = -1
|
||||
|
||||
bs = bracketedLinearize pgf lang t
|
||||
|
||||
leaves = (nil,0,"ROOT") : (groupAndIndexIt 1 . getLeaves nil) bs
|
||||
|
||||
groupAndIndexIt id [] = []
|
||||
groupAndIndexIt id ((p,w):pws) = let (ws,pws1) = collect pws
|
||||
in (p,id,unwords (w:ws)) : groupAndIndexIt (id+1) pws1
|
||||
where
|
||||
collect pws@((p1,w):pws1)
|
||||
| p == p1 = let (ws,pws2) = collect pws1
|
||||
in (w:ws,pws2)
|
||||
collect pws = ([],pws)
|
||||
|
||||
getLeaves parent bs =
|
||||
case bs of
|
||||
Leaf w -> [(parent,w)]
|
||||
Bracket fid _ _ bss -> concatMap (getLeaves fid) bss
|
||||
|
||||
mkNode (p,i,w) =
|
||||
tag p <> text " [label = " <> doubleQuotes (int i <> char '.' <+> text w) <> text "] ;"
|
||||
|
||||
{-
|
||||
ifd s = if debug then s else []
|
||||
|
||||
pot = readPosText $ concat $ take 1 $ markLinearizes pgf lang exp
|
||||
|
||||
nodes = map mkNode nodeWords
|
||||
mkNode (i,((_,p),ss)) =
|
||||
node p ++ " [label = \"" ++ show i ++ ". " ++ ifd (show p) ++ unwords ss ++ "\"] ;"
|
||||
nodeWords = (0,((mkCId "",[]),["ROOT"])) : zip [1..] [((f,p),w)|
|
||||
((Just f,p),w) <- wlins pot]
|
||||
|
||||
links = map mkLink thelinks
|
||||
thelinks = [(word y, x, label tr y x) |
|
||||
(_,((f,x),_)) <- tail nodeWords,
|
||||
let y = dominant x]
|
||||
mkLink (x,y,l) = node x ++ " -> " ++ node y ++ " [label = \"" ++ l ++ "\"] ;"
|
||||
node = show . show
|
||||
|
||||
dominant x = case x of
|
||||
[] -> x
|
||||
_ | not (x == hx) -> hx
|
||||
_ -> dominant (init x)
|
||||
where
|
||||
hx = headArg (init x) tr x
|
||||
|
||||
headArg x0 tr x = case (tr,x) of
|
||||
(Fun f [],[_]) -> x0 ---- ??
|
||||
(Fun f ts,[_]) -> x0 ++ [getHead (length ts - 1) f]
|
||||
(Fun f ts,i:y) -> headArg x0 (ts !! i) y
|
||||
_ -> x0 ----
|
||||
|
||||
label tr y x = case span (uncurry (==)) (zip y x) of
|
||||
(xys,(_,i):_) -> getLabel i (funAt tr (map fst xys))
|
||||
_ -> "" ----
|
||||
|
||||
funAt tr x = case (tr,x) of
|
||||
(Fun f _ ,[]) -> f
|
||||
(Fun f ts,i:y) -> funAt (ts !! i) y
|
||||
_ -> mkCId (prTree tr) ----
|
||||
|
||||
word x = if elem x sortedNodes then x else
|
||||
let x' = headArg x tr (x ++[0]) in
|
||||
if x' == x then [] else word x'
|
||||
|
||||
tr = expr2tree exp
|
||||
sortedNodes = [p | (_,((_,p),_)) <- nodeWords]
|
||||
|
||||
labels = maybe Map.empty id mlab
|
||||
getHead i f = case Map.lookup f labels of
|
||||
Just ls -> length $ takeWhile (/= "head") ls
|
||||
_ -> i
|
||||
getLabel i f = case Map.lookup f labels of
|
||||
Just ls | length ls > i -> ifd (showCId f ++ "#" ++ show i ++ "=") ++ ls !! i
|
||||
_ -> showCId f ++ "#" ++ show i
|
||||
|
||||
-- to generate CoNLL format for MaltParser
|
||||
nodeMap :: Map.Map [Int] Int
|
||||
nodeMap = Map.fromList [(p,i) | (i,((_,p),_)) <- nodeWords]
|
||||
|
||||
arcMap :: Map.Map [Int] ([Int],String)
|
||||
arcMap = Map.fromList [(y,(x,l)) | (x,y,l) <- thelinks]
|
||||
|
||||
lookDomLab p = case Map.lookup p arcMap of
|
||||
Just (q,l) -> (maybe 0 id (Map.lookup q nodeMap), if null l then rootlabel else l)
|
||||
_ -> (0,rootlabel)
|
||||
|
||||
wnodes = [[show i, maltws ws, showCId fun, pos, pos, morph, show dom, lab, unspec, unspec] |
|
||||
(i, ((fun,p),ws)) <- tail nodeWords,
|
||||
let pos = showCId $ lookValCat pgf fun,
|
||||
let morph = unspec,
|
||||
let (dom,lab) = lookDomLab p
|
||||
]
|
||||
maltws = concat . intersperse "+" . words . unwords -- no spaces in column 2
|
||||
unspec = "_"
|
||||
rootlabel = "ROOT"
|
||||
-}
|
||||
|
||||
|
||||
getDepLabels :: [String] -> Labels
|
||||
getDepLabels ss = Map.fromList [(mkCId f,ls) | f:ls <- map words ss]
|
||||
|
||||
|
||||
-- parse trees from Linearize.linearizeMark
|
||||
---- nubrec and domins are quadratic, but could be (n log n)
|
||||
graphvizParseTree :: PGF -> Language -> Tree -> String
|
||||
graphvizParseTree pgf lang = graphvizBracketedString . bracketedLinearize pgf lang
|
||||
|
||||
graphvizParseTree :: PGF -> CId -> Expr -> String
|
||||
graphvizParseTree pgf lang = prGraph False . lin2tree pgf . concat . take 1 . markLinearizes pgf lang where
|
||||
graphvizBracketedString :: BracketedString -> String
|
||||
graphvizBracketedString = render . lin2tree
|
||||
where
|
||||
lin2tree bs =
|
||||
text "graph {" $$
|
||||
space $$
|
||||
nest 2 (text "rankdir=BU ;" $$
|
||||
text "node [shape = record, color = white] ;" $$
|
||||
space $$
|
||||
vcat (nodes bs)) $$
|
||||
text "}"
|
||||
where
|
||||
nodes bs = zipWith mkStruct [0..] (interns ++ [zipWith (\i (l,p,w) -> (l,p,i,w)) [99990..] leaves])
|
||||
|
||||
lin2tree pgf s = prelude ++ nodes ++ links where
|
||||
nil = -1
|
||||
|
||||
leaves = getLeaves 0 nil bs
|
||||
interns = getInterns 0 [(nil,bs)]
|
||||
|
||||
prelude = ["rankdir=BU ;", "node [shape = record, color = white] ;"]
|
||||
getLeaves level parent bs =
|
||||
case bs of
|
||||
Leaf w -> [(level-1,parent,w)]
|
||||
Bracket fid i _ bss -> concatMap (getLeaves (level+1) fid) bss
|
||||
|
||||
nodeRecs = zip [0..]
|
||||
(nub (filter (not . null) (nlins [postext] ++ [leaves postext])))
|
||||
nlins pts =
|
||||
nubrec [] $ [(p,cat f) | T (Just f, p) _ <- pts] :
|
||||
concatMap nlins [ts | T _ ts <- pts]
|
||||
leaves pt = [(p++[j],s) | (j,(p,s)) <-
|
||||
zip [9990..] [(p,s) | ((_,p),ss) <- wlins pt, s <- ss]]
|
||||
getInterns level [] = []
|
||||
getInterns level nodes =
|
||||
nub [(level-1,parent,fid,showCId cat) | (parent,Bracket fid _ cat _) <- nodes] :
|
||||
getInterns (level+1) [(fid,child) | (_,Bracket fid _ _ children) <- nodes, child <- children]
|
||||
|
||||
nubrec es rs = case rs of
|
||||
r:rr -> let r' = filter (not . flip elem es) (nub r)
|
||||
in r' : nubrec (r' ++ es) rr
|
||||
_ -> rs
|
||||
mkStruct l cs = struct l <> text "[label = \"" <> fields cs <> text "\"] ;" $$
|
||||
vcat [link pl pid l id | (pl,pid,id,_) <- cs]
|
||||
link pl pid l id
|
||||
| pl < 0 = empty
|
||||
| otherwise = struct pl <> colon <> tag pid <> colon <> char 's' <+>
|
||||
text "--" <+>
|
||||
struct l <> colon <> tag id <> colon <> char 'n' <+> semi
|
||||
fields cs = hsep (intersperse (char '|') [tbrackets (tag id) <> text c | (_,_,id,c) <- cs])
|
||||
|
||||
nodes = map mkStruct nodeRecs
|
||||
|
||||
mkStruct (i,cs) = struct i ++ "[label = \"" ++ fields cs ++ "\"] ;"
|
||||
cat = showCId . lookValCat pgf
|
||||
fields cs = concat (intersperse "|" [ mtag (showp p) ++ c | (p,c) <- cs])
|
||||
struct i = "struct" ++ show i
|
||||
|
||||
links = map mkEdge domins
|
||||
domins = nub [((i,x),(j,y)) |
|
||||
(i,xs) <- nodeRecs, (j,ys) <- nodeRecs,
|
||||
x <- xs, y <- ys, dominates x y]
|
||||
dominates (p,x) (q,y) = not (null q) && p == init q
|
||||
mkEdge ((i,x),(j,y)) =
|
||||
struct i ++ ":n" ++ uncommas (showp (fst x)) ++ ":s -- " ++
|
||||
struct j ++ ":n" ++ uncommas (showp (fst y)) ++ ":n ;"
|
||||
|
||||
postext = readPosText s
|
||||
|
||||
-- auxiliaries for graphviz syntax
|
||||
struct i = "struct" ++ show i
|
||||
mark (j,n) = "n" ++ show j ++ "a" ++ uncommas n
|
||||
uncommas = map (\c -> if c==',' then 'c' else c)
|
||||
tag s = "<" ++ s ++ ">"
|
||||
showp = init . tail . show
|
||||
mtag = tag . ('n':) . uncommas
|
||||
struct l = text ("struct" ++ show l)
|
||||
tbrackets d = char '<' <> d <> char '>'
|
||||
tag i = char 'n' <> int i
|
||||
|
||||
-- word alignments from Linearize.markLinearize
|
||||
-- words are chunks like {[0,1,1,0] old}
|
||||
|
||||
graphvizAlignment :: PGF -> Expr -> String
|
||||
graphvizAlignment pgf = prGraph True . lin2graph . linsMark where
|
||||
linsMark t = [concat (take 1 (markLinearizes pgf la t)) | la <- Map.keys (concretes pgf)]
|
||||
graphvizAlignment :: PGF -> [Language] -> Expr -> String
|
||||
graphvizAlignment pgf langs = render . lin2graph . linsBracketed
|
||||
where
|
||||
linsBracketed t = [bracketedLinearize pgf lang t | lang <- langs]
|
||||
|
||||
lin2graph :: [String] -> [String]
|
||||
lin2graph ss = -- trace (show ss) $
|
||||
prelude ++ nodes ++ links
|
||||
lin2graph :: [BracketedString] -> Doc
|
||||
lin2graph bss =
|
||||
text "digraph {" $$
|
||||
space $$
|
||||
nest 2 (text "rankdir=LR ;" $$
|
||||
text "node [shape = record] ;" $$
|
||||
space $$
|
||||
mkLayers 0 leaves) $$
|
||||
text "}"
|
||||
where
|
||||
nil = -1
|
||||
|
||||
where
|
||||
leaves = map (groupAndIndexIt 0 . getLeaves nil) bss
|
||||
|
||||
prelude = ["rankdir=LR ;", "node [shape = record] ;"]
|
||||
groupAndIndexIt id [] = []
|
||||
groupAndIndexIt id ((p,w):pws) = let (ws,pws1) = collect pws
|
||||
in (p,id,unwords (w:ws)) : groupAndIndexIt (id+1) pws1
|
||||
where
|
||||
collect pws@((p1,w):pws1)
|
||||
| p == p1 = let (ws,pws2) = collect pws1
|
||||
in (w:ws,pws2)
|
||||
collect pws = ([],pws)
|
||||
|
||||
nlins :: [(Int,[((Int,String),String)])]
|
||||
nlins = [(i, [((j,showp p),unw ws) | (j,((_,p),ws)) <- zip [0..] ws]) |
|
||||
(i,ws) <- zip [0..] (map (wlins . readPosText) ss)]
|
||||
getLeaves parent bs =
|
||||
case bs of
|
||||
Leaf w -> [(parent,w)]
|
||||
Bracket fid _ _ bss -> concatMap (getLeaves fid) bss
|
||||
|
||||
unw = concat . intersperse "\\ " -- space escape in graphviz
|
||||
mkLayers l [] = empty
|
||||
mkLayers l (cs:css) = struct l <> text "[label = \"" <> fields cs <> text "\"] ;" $$
|
||||
(case css of
|
||||
(ncs:_) -> vcat (map (mkLinks l ncs) cs)
|
||||
_ -> empty) $$
|
||||
mkLayers (l+1) css
|
||||
|
||||
nodes = map mkStruct nlins
|
||||
|
||||
mkStruct (i, ws) = struct i ++ "[label = \"" ++ fields ws ++ "\"] ;"
|
||||
|
||||
fields ws = concat (intersperse "|" [tag (mark m) ++ " " ++ w | (m,w) <- ws])
|
||||
|
||||
links = nub $ concatMap mkEdge (init nlins)
|
||||
|
||||
mkEdge (i,lin) = let lin' = snd (nlins !! (i+1)) in -- next lin in the list
|
||||
[edge i v w | (v@(_,p),_) <- lin, (w@(_,q),_) <- lin', p == q]
|
||||
|
||||
edge i v w =
|
||||
struct i ++ ":" ++ mark v ++ ":e -> " ++ struct (i+1) ++ ":" ++ mark w ++ ":w ;"
|
||||
{-
|
||||
alignmentData :: PGF -> [Expr] -> Map.Map String (Map.Map String Double)
|
||||
alignmentData pgf = mkStat . concatMap (mkAlign . linsMark) where
|
||||
linsMark t =
|
||||
[s | la <- take 2 (cncnames pgf), s <- take 1 (linearizesMark pgf la t)]
|
||||
|
||||
mkStat :: [(String,String)] -> Map.Map String (Map.Map String Double)
|
||||
mkStat =
|
||||
|
||||
mkAlign :: [String] -> [(String,String)]
|
||||
mkAlign ss =
|
||||
|
||||
nlins :: [(Int,[((Int,String),String)])]
|
||||
nlins = [(i, [((j,showp p),unw ws) | (j,((_,p),ws)) <- zip [0..] vs]) |
|
||||
(i,vs) <- zip [0..] (map (wlins . readPosText) ss)]
|
||||
|
||||
nodes = map mkStruct nlins
|
||||
|
||||
mkStruct (i, ws) = struct i ++ "[label = \"" ++ fields ws ++ "\"] ;"
|
||||
|
||||
fields ws = concat (intersperse "|" [tag (mark m) ++ " " ++ w | (m,w) <- ws])
|
||||
|
||||
links = nub $ concatMap mkEdge (init nlins)
|
||||
|
||||
mkEdge (i,lin) = let lin' = snd (nlins !! (i+1)) in -- next lin in the list
|
||||
[edge i v w | (v@(_,p),_) <- lin, (w@(_,q),_) <- lin', p == q]
|
||||
|
||||
edge i v w =
|
||||
struct i ++ ":" ++ mark v ++ ":e -> " ++ struct (i+1) ++ ":" ++ mark w ++ ":w ;"
|
||||
-}
|
||||
|
||||
wlins :: PosText -> [((Maybe CId,[Int]),[String])]
|
||||
wlins pt = case pt of
|
||||
T p pts -> concatMap (lins p) pts
|
||||
M ws -> if null ws then [] else [((Nothing,[]),ws)]
|
||||
where
|
||||
lins p pt = case pt of
|
||||
T q pts -> concatMap (lins q) pts
|
||||
M ws -> if null ws then [] else [(p,ws)]
|
||||
|
||||
data PosText =
|
||||
T (Maybe CId,[Int]) [PosText]
|
||||
| M [String]
|
||||
deriving Show
|
||||
|
||||
readPosText :: String -> PosText
|
||||
readPosText = fst . head . (RP.readP_to_S pPosText) where
|
||||
pPosText = do
|
||||
RP.char '(' >> RP.skipSpaces
|
||||
p <- pPos
|
||||
RP.skipSpaces
|
||||
ts <- RP.many pPosText
|
||||
RP.char ')' >> RP.skipSpaces
|
||||
return (T p ts)
|
||||
RP.<++ do
|
||||
ws <- RP.sepBy1 (RP.munch1 (flip notElem "()")) (RP.char ' ')
|
||||
return (M ws)
|
||||
pPos = do
|
||||
fun <- (RP.char '(' >> pCId >>= \f -> RP.char ',' >> (return $ Just f))
|
||||
RP.<++ (return Nothing)
|
||||
RP.char '[' >> RP.skipSpaces
|
||||
is <- RP.sepBy (RP.munch1 isDigit) (RP.char ',')
|
||||
RP.char ']' >> RP.skipSpaces
|
||||
RP.char ')' RP.<++ return ' '
|
||||
return (fun,map read is)
|
||||
|
||||
|
||||
{-
|
||||
digraph{
|
||||
rankdir ="LR" ;
|
||||
node [shape = record] ;
|
||||
|
||||
struct1 [label = "<f0> this|<f1> very|<f2> intelligent|<f3> man"] ;
|
||||
struct2 [label = "<f0> cet|<f1> homme|<f2> tres|<f3> intelligent|<f4> ci"] ;
|
||||
|
||||
struct1:f0 -> struct2:f0 ;
|
||||
struct1:f1 -> struct2:f2 ;
|
||||
struct1:f2 -> struct2:f3 ;
|
||||
struct1:f3 -> struct2:f1 ;
|
||||
struct1:f0 -> struct2:f4 ;
|
||||
}
|
||||
-}
|
||||
mkLinks l cs (p0,id0,_) =
|
||||
vcat (map (\id1 -> struct l <> colon <> tag id0 <> colon <> char 'e' <+>
|
||||
text "->" <+>
|
||||
struct (l+1) <> colon <> tag id1 <> colon <> char 'w' <+> semi) indices)
|
||||
where
|
||||
indices = [id1 | (p1,id1,_) <- cs, p1 == p0]
|
||||
|
||||
fields cs = hsep (intersperse (char '|') [tbrackets (tag id) <> text w | (_,id,w) <- cs])
|
||||
|
||||
Reference in New Issue
Block a user