From 8460598801b644f323db0b7d7ca879e3acb9215b Mon Sep 17 00:00:00 2001 From: krasimir Date: Fri, 30 Apr 2010 14:36:06 +0000 Subject: [PATCH] first incarnation of the bracketed string API --- gf.cabal | 1 + src/compiler/GF/Command/Commands.hs | 40 +- src/compiler/GF/Compile/ExampleBased.hs | 15 +- src/compiler/GF/Quiz.hs | 5 +- src/runtime/haskell/PGF.hs | 64 ++- src/runtime/haskell/PGF/Data.hs | 3 +- src/runtime/haskell/PGF/Forest.hs | 101 +++++ src/runtime/haskell/PGF/Linearize.hs | 180 ++++---- src/runtime/haskell/PGF/Macros.hs | 54 +++ src/runtime/haskell/PGF/Parse.hs | 86 ++-- src/runtime/haskell/PGF/Printer.hs | 2 +- src/runtime/haskell/PGF/VisualizeTree.hs | 549 +++++++++++------------ 12 files changed, 632 insertions(+), 468 deletions(-) create mode 100644 src/runtime/haskell/PGF/Forest.hs diff --git a/gf.cabal b/gf.cabal index 22dfb9fcc..a5532dd0d 100644 --- a/gf.cabal +++ b/gf.cabal @@ -42,6 +42,7 @@ library PGF.VisualizeTree PGF.Printer PGF.Probabilistic + PGF.Forest GF.Data.TrieMap GF.Data.Utilities GF.Data.SortedList diff --git a/src/compiler/GF/Command/Commands.hs b/src/compiler/GF/Command/Commands.hs index 0273b82eb..f89e497ad 100644 --- a/src/compiler/GF/Command/Commands.hs +++ b/src/compiler/GF/Command/Commands.hs @@ -22,6 +22,7 @@ import PGF.Morphology import PGF.Printer import PGF.Probabilistic -- (getProbsFromFile,prProbabilities,defaultProbabilities) import PGF.Generate (generateRandomFrom) ---- +import PGF.Tree (Tree(Fun), expr2tree, tree2expr) import GF.Compile.Export import GF.Compile.ExampleBased import GF.Infra.Option (noOptions, readOutputFormat) @@ -150,7 +151,7 @@ allCommands env@(pgf, mos) = Map.fromList [ "flag -format." ], exec = \opts es -> do - let grph = if null es then [] else graphvizAlignment pgf (head es) + let grph = if null es then [] else graphvizAlignment pgf (languages pgf) (head es) if isFlag "view" opts || isFlag "format" opts then do let file s = "_grph." ++ s let view = optViewGraph opts @@ -481,11 +482,14 @@ allCommands env@(pgf, mos) = Map.fromList [ "will accept unknown adjectives, nouns and verbs with the resource grammar." ], exec = \opts ts -> - returnFromExprsPar opts ts $ concatMap (par opts) $ toStrings ts, + return $ fromParse opts ts $ concatMap (par opts) $ toStrings ts, flags = [ ("cat","target category of parsing"), ("lang","the languages of parsing (comma-separated, no spaces)"), ("openclass","list of open-class categories for robust parsing") + ], + options = [ + ("bracket","prints the bracketed string from the parser") ] }), ("pg", emptyCommandInfo { ----- @@ -893,8 +897,8 @@ allCommands env@(pgf, mos) = Map.fromList [ ] where par opts s = case optOpenTypes opts of - [] -> concat [parse pgf lang (optType opts) s | lang <- optLangs opts] - open_typs -> concat [parseWithRecovery pgf lang (optType opts) open_typs s | lang <- optLangs opts] + [] -> [parse pgf lang (optType opts) s | lang <- optLangs opts] + open_typs -> [parseWithRecovery pgf lang (optType opts) open_typs s | lang <- optLangs opts] void = ([],[]) @@ -918,9 +922,17 @@ allCommands env@(pgf, mos) = Map.fromList [ map (map (unl . snd)) . tabularLinearizes pgf lang _ | isOpt "table" opts -> unlines . concat . intersperse [[]] . map (map (\(p,v) -> p+++":"+++unl v)) . tabularLinearizes pgf lang - _ | isOpt "bracket" opts -> unlines . markLinearizes pgf lang + _ | isOpt "bracket" opts -> showBracketedString . bracketedLinearize pgf lang _ -> unl . linearize pgf lang + -- replace each non-atomic constructor with mkC, where C is the val cat + tree2mk pgf = showExpr [] . tree2expr . t2m . expr2tree where + t2m t = case t of + Fun cid [] -> t + Fun cid ts -> Fun (mk cid) (map t2m ts) + _ -> t + mk = mkCId . ("mk" ++) . showCId . lookValCat pgf + unlex opts lang = stringOps Nothing (getUnlex opts lang ++ map prOpt opts) ---- getUnlex opts lang = case words (valStrOpts "unlexer" "" opts) of @@ -991,14 +1003,22 @@ allCommands env@(pgf, mos) = Map.fromList [ toStrings = map showAsString toString = unwords . toStrings + fromParse opts ts parses + | isOpt "bracket" opts = case catMaybes bss of + [] -> ([], "no brackets found") + bss -> ([], unlines $ map showBracketedString bss) + | otherwise = case ts of + [] -> ([], "no trees found" ++ + missingWordMsg (optMorpho opts) (concatMap words (toStrings ts)) + ) + _ -> fromExprs ts + where + (prs,bss) = unzip parses + ts = [t | ParseResult ts <- prs, t <- ts] + returnFromExprs es = return $ case es of [] -> ([], "no trees found") _ -> fromExprs es - returnFromExprsPar opts ts es = return $ case es of - [] -> ([], "no trees found" ++ - missingWordMsg (optMorpho opts) (concatMap words (toStrings ts)) - ) - _ -> fromExprs es prGrammar opts | isOpt "cats" opts = return $ fromString $ unwords $ map showCId $ categories pgf diff --git a/src/compiler/GF/Compile/ExampleBased.hs b/src/compiler/GF/Compile/ExampleBased.hs index 983f38869..f197722ba 100644 --- a/src/compiler/GF/Compile/ExampleBased.hs +++ b/src/compiler/GF/Compile/ExampleBased.hs @@ -41,17 +41,20 @@ convertFile conf src file = do convEx (cat,ex) = do appn "(" let typ = maybe (error "no valid cat") id $ readType cat - let ts = rank $ parse pgf lang typ ex - ws <- case ts of - [] -> do + ws <- case fst (parse pgf lang typ ex) of + ParseFailed _ -> do let ws = morphoMissing morpho (words ex) appv ("WARNING: cannot parse example " ++ ex) case ws of [] -> return () _ -> appv (" missing words: " ++ unwords ws) - return ws - t:tt -> appv ("WARNING: ambiguous example " ++ ex) >> - appn t >> mapM_ (appn . (" --- " ++)) tt >> return [] + return ws + TypeError _ _ -> + return [] + ParseResult ts -> + case rank ts of + (t:tt) -> appv ("WARNING: ambiguous example " ++ ex) >> + appn t >> mapM_ (appn . (" --- " ++)) tt >> return [] appn ")" return ws rank ts = case probs conf of diff --git a/src/compiler/GF/Quiz.hs b/src/compiler/GF/Quiz.hs index 4a4caafc8..2a9b28ccb 100644 --- a/src/compiler/GF/Quiz.hs +++ b/src/compiler/GF/Quiz.hs @@ -46,7 +46,10 @@ translationList mex mprobs pgf ig og typ number = do return $ map mkOne $ ts where mkOne t = (norml (linearize pgf ig t), map (norml . linearize pgf og) (homonyms t)) - homonyms = nub . parse pgf ig typ . linearize pgf ig + homonyms t = + case (fst . parse pgf ig typ . linearize pgf ig) t of + ParseResult ts -> ts + _ -> [] morphologyList :: Maybe Expr -> Maybe Probabilities -> diff --git a/src/runtime/haskell/PGF.hs b/src/runtime/haskell/PGF.hs index 9ab24b100..f2235cf37 100644 --- a/src/runtime/haskell/PGF.hs +++ b/src/runtime/haskell/PGF.hs @@ -1,7 +1,7 @@ ------------------------------------------------- -- | -- Module : PGF --- Maintainer : Aarne Ranta +-- Maintainer : Krasimir Angelov -- Stability : stable -- Portability : portable -- @@ -50,9 +50,12 @@ module PGF( -- * Operations -- ** Linearization - linearize, linearizeAllLang, linearizeAll, + linearize, linearizeAllLang, linearizeAll, bracketedLinearize, tabularLinearizes, groupResults, -- lins of trees by language, removing duplicates showPrintName, + + BracketedString(..), FId, LIndex, + Forest.showBracketedString, -- ** Parsing parse, parseWithRecovery, parseAllLang, parseAll, @@ -73,10 +76,11 @@ module PGF( checkType, checkExpr, inferExpr, TcError(..), ppTcError, - -- ** Word Completion (Incremental Parsing) + -- ** Low level parsing API complete, Parse.ParseState, - Parse.initState, Parse.nextState, Parse.getCompletions, Parse.recoveryStates, Parse.extractTrees, + Parse.initState, Parse.nextState, Parse.getCompletions, Parse.recoveryStates, + Parse.ParseResult(..), Parse.getParseResult, -- ** Generation generateRandom, generateAll, generateAllDepth, @@ -90,6 +94,7 @@ module PGF( graphvizAbstractTree, graphvizParseTree, graphvizDependencyTree, + graphvizBracketedString, graphvizAlignment, -- * Browsing @@ -107,6 +112,7 @@ import PGF.Expr (Tree) import PGF.Morphology import PGF.Data import PGF.Binary +import qualified PGF.Forest as Forest import qualified PGF.Parse as Parse import GF.Data.Utilities (replace) @@ -131,34 +137,18 @@ import Text.PrettyPrint -- > $ gf -make readPGF :: FilePath -> IO PGF --- | Linearizes given expression as string in the language -linearize :: PGF -> Language -> Tree -> String - -- | Tries to parse the given string in the specified language --- and to produce abstract syntax expression. An empty --- list is returned if the parsing is not successful. The list may also --- contain more than one element if the grammar is ambiguous. --- Throws an exception if the given language cannot be used --- for parsing, see 'canParse'. -parse :: PGF -> Language -> Type -> String -> [Tree] +-- and to produce abstract syntax expression. +parse :: PGF -> Language -> Type -> String -> (Parse.ParseResult,Maybe BracketedString) -parseWithRecovery :: PGF -> Language -> Type -> [Type] -> String -> [Tree] - --- | The same as 'linearizeAllLang' but does not return --- the language. -linearizeAll :: PGF -> Tree -> [String] - --- | Linearizes given expression as string in all languages --- available in the grammar. -linearizeAllLang :: PGF -> Tree -> [(Language,String)] +-- | This is an experimental function. Use it on your own risk +parseWithRecovery :: PGF -> Language -> Type -> [Type] -> String -> (Parse.ParseResult,Maybe BracketedString) -- | The same as 'parseAllLang' but does not return -- the language. parseAll :: PGF -> Type -> String -> [[Tree]] -- | Tries to parse the given string with all available languages. --- Languages which cannot be used for parsing (see 'canParse') --- are ignored. -- The returned list contains pairs of language -- and list of abstract syntax expressions -- (this is a list, since grammars can be ambiguous). @@ -227,8 +217,6 @@ complete :: PGF -> Language -> Type -> String readPGF f = decodeFile f -linearize pgf lang = concat . take 1 . PGF.Linearize.linearizes pgf lang - parse pgf lang typ s = case Map.lookup lang (concretes pgf) of Just cnc -> Parse.parse pgf lang typ (words s) @@ -236,10 +224,6 @@ parse pgf lang typ s = parseWithRecovery pgf lang typ open_typs s = Parse.parseWithRecovery pgf lang typ open_typs (words s) -linearizeAll mgr = map snd . linearizeAllLang mgr -linearizeAllLang mgr t = - [(lang,PGF.linearize mgr lang t) | lang <- languages mgr] - groupResults :: [[(Language,String)]] -> [(Language,[String])] groupResults = Map.toList . foldr more Map.empty . start . concat where @@ -250,7 +234,7 @@ groupResults = Map.toList . foldr more Map.empty . start . concat parseAll mgr typ = map snd . parseAllLang mgr typ parseAllLang mgr typ s = - [(lang,ts) | lang <- languages mgr, let ts = parse mgr lang typ s, not (null ts)] + [(lang,ts) | lang <- languages mgr, (Parse.ParseResult ts,_) <- [parse mgr lang typ s], not (null ts)] generateRandom pgf cat = do gen <- newStdGen @@ -280,14 +264,18 @@ functionType pgf fun = Nothing -> Nothing complete pgf from typ input = - let (ws,prefix) = tokensAndPrefix input - state0 = Parse.initState pgf from typ - in case loop state0 ws of - Nothing -> [] - Just state -> - (if null prefix && not (null (Parse.extractTrees state typ)) then [unwords ws ++ " "] else []) - ++ [unwords (ws++[c]) ++ " " | c <- Map.keys (Parse.getCompletions state prefix)] + let (ws,prefix) = tokensAndPrefix input + state0 = Parse.initState pgf from typ + in case loop state0 ws of + Nothing -> [] + Just state -> (if null prefix && isSuccessful state then [unwords ws ++ " "] else []) + ++ [unwords (ws++[c]) ++ " " | c <- Map.keys (Parse.getCompletions state prefix)] where + isSuccessful state = + case Parse.getParseResult state typ of + (Parse.ParseResult ts, _) -> not (null ts) + _ -> False + tokensAndPrefix :: String -> ([String],String) tokensAndPrefix s | not (null s) && isSpace (last s) = (ws, "") | null ws = ([],"") diff --git a/src/runtime/haskell/PGF/Data.hs b/src/runtime/haskell/PGF/Data.hs index b604317fc..7623a05f3 100644 --- a/src/runtime/haskell/PGF/Data.hs +++ b/src/runtime/haskell/PGF/Data.hs @@ -56,7 +56,7 @@ data Symbol data Production = PApply {-# UNPACK #-} !FunId [FId] | PCoerce {-# UNPACK #-} !FId - | PConst Expr [String] + | PConst CId Expr [String] deriving (Eq,Ord,Show) data CncCat = CncCat {-# UNPACK #-} !FId {-# UNPACK #-} !FId {-# UNPACK #-} !(Array LIndex String) data CncFun = CncFun CId {-# UNPACK #-} !(UArray LIndex SeqId) deriving (Eq,Ord,Show) @@ -86,7 +86,6 @@ data Tokn = | KP [String] [Alternative] deriving (Eq,Ord,Show) - -- merge two PGFs; fails is differens absnames; priority to second arg unionPGF :: PGF -> PGF -> PGF diff --git a/src/runtime/haskell/PGF/Forest.hs b/src/runtime/haskell/PGF/Forest.hs new file mode 100644 index 000000000..3dd996aa6 --- /dev/null +++ b/src/runtime/haskell/PGF/Forest.hs @@ -0,0 +1,101 @@ +------------------------------------------------- +-- | +-- Module : PGF +-- Maintainer : Krasimir Angelov +-- Stability : stable +-- Portability : portable +-- +-- Forest is a compact representation of a set +-- of parse trees. This let us to efficiently +-- represent local ambiguities +-- +------------------------------------------------- + +module PGF.Forest( Forest(..) + , BracketedString, showBracketedString + , linearizeWithBrackets + ) where + +import PGF.CId +import PGF.Data +import PGF.Macros +import Data.List +import Data.Array.IArray +import qualified Data.Set as Set +import qualified Data.Map as Map +import qualified Data.IntSet as IntSet +import qualified Data.IntMap as IntMap +import Control.Monad + +data Forest + = Forest + { abstr :: Abstr + , concr :: Concr + , forest :: IntMap.IntMap (Set.Set Production) + , root :: {-# UNPACK #-} !FId + , label :: {-# UNPACK #-} !LIndex + } + +-------------------------------------------------------------------- +-- Rendering of bracketed strings +-------------------------------------------------------------------- + +linearizeWithBrackets :: Forest -> BracketedString +linearizeWithBrackets = head . snd . untokn "" . bracketedTokn + +--------------------------------------------------------------- +-- Internally we have to do everything with Tokn first because +-- we must handle the pre {...} construction. +-- + +bracketedTokn :: Forest -> BracketedTokn +bracketedTokn (Forest abs cnc forest root label) = + let (fid,cat,lin) = render IntMap.empty root + in Bracket_ fid label cat (lin ! label) + where + trusted = trustedSpots IntSet.empty root + + render parents fid = + case (IntMap.lookup fid parents) `mplus` (fmap Set.toList $ IntMap.lookup fid forest) of + Just (p:ps) -> descend (IntMap.insert fid ps parents) p + Nothing -> error ("wrong forest id " ++ show fid) + where + descend parents (PApply funid args) = let (CncFun fun lins) = cncfuns cnc ! funid + Just (DTyp _ cat _,_,_) = Map.lookup fun (funs abs) + largs = map (render parents) args + in (fid,cat,listArray (bounds lins) [computeSeq seqid largs | seqid <- elems lins]) + descend parents (PCoerce fid) = render parents fid + descend parents (PConst cat _ ts) = (fid,cat,listArray (0,0) [[LeafKS ts]]) + + trustedSpots parents fid + | IntSet.member fid parents + = IntSet.empty + | otherwise = IntSet.insert fid $ + case IntMap.lookup fid forest of + Just prods -> foldl1 IntSet.intersection [descend prod | prod <- Set.toList prods] + Nothing -> IntSet.empty + where + parents' = IntSet.insert fid parents + + descend (PApply funid args) = IntSet.unions (map (trustedSpots parents') args) + descend (PCoerce fid) = trustedSpots parents' fid + descend (PConst c e _) = IntSet.empty + + computeSeq :: SeqId -> [(FId,CId,LinTable)] -> [BracketedTokn] + computeSeq seqid args = concatMap compute (elems seq) + where + seq = sequences cnc ! seqid + + compute (SymCat d r) = getArg d r + compute (SymLit d r) = getArg d r + compute (SymKS ts) = [LeafKS ts] + compute (SymKP ts alts) = [LeafKP ts alts] + + getArg d r + | not (null arg_lin) && + IntSet.member fid trusted + = [Bracket_ fid r cat arg_lin] + | otherwise = arg_lin + where + arg_lin = lin ! r + (fid,cat,lin) = args !! d diff --git a/src/runtime/haskell/PGF/Linearize.hs b/src/runtime/haskell/PGF/Linearize.hs index 058d8281f..4a399f5e9 100644 --- a/src/runtime/haskell/PGF/Linearize.hs +++ b/src/runtime/haskell/PGF/Linearize.hs @@ -1,8 +1,15 @@ -module PGF.Linearize(linearizes,markLinearizes,tabularLinearizes) where +module PGF.Linearize + ( linearize + , linearizeAll + , linearizeAllLang + , bracketedLinearize + , tabularLinearizes + ) where import PGF.CId import PGF.Data import PGF.Macros +import PGF.Expr import Data.Array.IArray import Data.List import Control.Monad @@ -10,85 +17,33 @@ import qualified Data.Map as Map import qualified Data.IntMap as IntMap import qualified Data.Set as Set --- linearization and computation of concrete PGF Terms +-------------------------------------------------------------------- +-- The API +-------------------------------------------------------------------- -type LinTable = Array LIndex [Tokn] +-- | Linearizes given expression as string in the language +linearize :: PGF -> Language -> Tree -> String +linearize pgf lang = concat . take 1 . map (unwords . concatMap flattenBracketedString . snd . untokn "" . (!0)) . linTree pgf lang -linearizes :: PGF -> CId -> Expr -> [String] -linearizes pgf lang = map (unwords . untokn . (! 0)) . linTree pgf lang (\_ _ lint -> lint) +-- | The same as 'linearizeAllLang' but does not return +-- the language. +linearizeAll :: PGF -> Tree -> [String] +linearizeAll pgf = map snd . linearizeAllLang pgf -linTree :: PGF -> Language -> (Maybe CId -> [Int] -> LinTable -> LinTable) -> Expr -> [LinTable] -linTree pgf lang mark e = lin0 [] [] [] Nothing e - where - cnc = lookMap (error "no lang") lang (concretes pgf) - lp = lproductions cnc +-- | Linearizes given expression as string in all languages +-- available in the grammar. +linearizeAllLang :: PGF -> Tree -> [(Language,String)] +linearizeAllLang pgf t = [(lang,linearize pgf lang t) | lang <- Map.keys (concretes pgf)] - lin0 path xs ys mb_fid (EAbs _ x e) = lin0 path (showCId x:xs) ys mb_fid e - lin0 path xs ys mb_fid (ETyped e _) = lin0 path xs ys mb_fid e - lin0 path xs ys mb_fid e | null xs = lin path ys mb_fid e [] - | otherwise = apply path (xs ++ ys) mb_fid _B (e:[ELit (LStr x) | x <- xs]) +-- | Linearizes given expression as a bracketed string in the language +bracketedLinearize :: PGF -> Language -> Tree -> BracketedString +bracketedLinearize pgf lang = head . concat . map (snd . untokn "" . (!0)) . linTree pgf lang - lin path xs mb_fid (EApp e1 e2) es = lin path xs mb_fid e1 (e2:es) - lin path xs mb_fid (ELit l) [] = case l of - LStr s -> return (mark Nothing path (ss s)) - LInt n -> return (mark Nothing path (ss (show n))) - LFlt f -> return (mark Nothing path (ss (show f))) - lin path xs mb_fid (EMeta i) es = apply path xs mb_fid _V (ELit (LStr ('?':show i)):es) - lin path xs mb_fid (EFun f) es = map (mark (Just f) path) (apply path xs mb_fid f es) - lin path xs mb_fid (EVar i) es = apply path xs mb_fid _V (ELit (LStr (xs !! i)) :es) - lin path xs mb_fid (ETyped e _) es = lin path xs mb_fid e es - lin path xs mb_fid (EImplArg e) es = lin path xs mb_fid e es - - ss s = listArray (0,0) [[KS s]] - - apply path xs mb_fid f es = - case Map.lookup f lp of - Just prods -> case lookupProds mb_fid prods of - Just set -> do prod <- Set.toList set - case prod of - PApply funid fids -> do guard (length fids == length es) - args <- sequence (zipWith3 (\i fid e -> lin0 (sub i path) [] xs (Just fid) e) [0..] fids es) - let (CncFun _ lins) = cncfuns cnc ! funid - return (listArray (bounds lins) [computeSeq seqid args | seqid <- elems lins]) - PCoerce fid -> apply path xs (Just fid) f es - Nothing -> mzero - Nothing -> apply path xs mb_fid _V [ELit (LStr ("[" ++ showCId f ++ "]"))] -- fun without lin - where - lookupProds (Just fid) prods = IntMap.lookup fid prods - lookupProds Nothing prods - | f == _B || f == _V = Nothing - | otherwise = Just (Set.filter isApp (Set.unions (IntMap.elems prods))) - - sub i path - | f == _B || f == _V = path - | otherwise = i:path - - isApp (PApply _ _) = True - isApp _ = False - - computeSeq seqid args = concatMap compute (elems seq) - where - seq = sequences cnc ! seqid - - compute (SymCat d r) = (args !! d) ! r - compute (SymLit d r) = (args !! d) ! r - compute (SymKS ts) = map KS ts - compute (SymKP ts alts) = [KP ts alts] - -untokn :: [Tokn] -> [String] -untokn ts = case ts of - KP d _ : [] -> d - KP d vs : ws -> let ss@(s:_) = untokn ws in sel d vs s ++ ss - KS s : ws -> s : untokn ws - [] -> [] - where - sel d vs w = case [v | Alt v cs <- vs, any (\c -> isPrefixOf c w) cs] of - v:_ -> v - _ -> d - --- create a table from labels+params to variants +-- | Creates a table from feature name to linearization. +-- The outher list encodes the variations tabularLinearizes :: PGF -> CId -> Expr -> [[(String,String)]] -tabularLinearizes pgf lang e = map (zip lbls . map (unwords . untokn) . elems) (linTree pgf lang (\_ _ lint -> lint) e) +tabularLinearizes pgf lang e = map (zip lbls . map (unwords . concatMap flattenBracketedString . snd . untokn "") . elems) + (linTree pgf lang e) where lbls = case unApp e of Just (f,_) -> let cat = valCat (lookType pgf f) @@ -97,12 +52,77 @@ tabularLinearizes pgf lang e = map (zip lbls . map (unwords . untokn) . elems) ( Nothing -> error "No labels" Nothing -> error "Not function application" +-------------------------------------------------------------------- +-- Implementation +-------------------------------------------------------------------- --- show bracketed markup with references to tree structure -markLinearizes :: PGF -> CId -> Expr -> [String] -markLinearizes pgf lang = map (unwords . untokn . (! 0)) . linTree pgf lang mark +linTree :: PGF -> Language -> Expr -> [Array LIndex BracketedTokn] +linTree pgf lang e = + [amapWithIndex (\label -> Bracket_ fid label cat) lin | (_,(fid,cat,lin)) <- lin0 [] [] Nothing 0 e] where - mark mb_f path lint = amap (bracket mb_f path) lint + cnc = lookMap (error "no lang") lang (concretes pgf) + lp = lproductions cnc + + lin0 xs ys mb_fid n_fid (EAbs _ x e) = lin0 (showCId x:xs) ys mb_fid n_fid e + lin0 xs ys mb_fid n_fid (ETyped e _) = lin0 xs ys mb_fid n_fid e + lin0 xs ys mb_fid n_fid e | null xs = lin ys mb_fid n_fid e [] + | otherwise = apply (xs ++ ys) mb_fid n_fid _B (e:[ELit (LStr x) | x <- xs]) - bracket Nothing path ts = [KS ("("++show (reverse path))] ++ ts ++ [KS ")"] - bracket (Just f) path ts = [KS ("(("++showCId f++","++show (reverse path)++")")] ++ ts ++ [KS ")"] + lin xs mb_fid n_fid (EApp e1 e2) es = lin xs mb_fid n_fid e1 (e2:es) + lin xs mb_fid n_fid (ELit l) [] = case l of + LStr s -> return (n_fid+1,(n_fid,cidString,ss s)) + LInt n -> return (n_fid+1,(n_fid,cidInt ,ss (show n))) + LFlt f -> return (n_fid+1,(n_fid,cidFloat ,ss (show f))) + lin xs mb_fid n_fid (EMeta i) es = apply xs mb_fid n_fid _V (ELit (LStr ('?':show i)):es) + lin xs mb_fid n_fid (EFun f) es = apply xs mb_fid n_fid f es + lin xs mb_fid n_fid (EVar i) es = apply xs mb_fid n_fid _V (ELit (LStr (xs !! i)) :es) + lin xs mb_fid n_fid (ETyped e _) es = lin xs mb_fid n_fid e es + lin xs mb_fid n_fid (EImplArg e) es = lin xs mb_fid n_fid e es + + ss s = listArray (0,0) [[LeafKS [s]]] + + apply :: [String] -> Maybe FId -> FId -> CId -> [Expr] -> [(FId,(FId, CId, LinTable))] + apply xs mb_fid n_fid f es = + case Map.lookup f lp of + Just prods -> do prod <- lookupProds mb_fid prods + case prod of + PApply funid fids -> do guard (length fids == length es) + (n_fid,args) <- descend n_fid (zip fids es) + let (CncFun fun lins) = cncfuns cnc ! funid + Just (DTyp _ cat _,_,_) = Map.lookup fun (funs (abstract pgf)) + return (n_fid+1,(n_fid,cat,listArray (bounds lins) [computeSeq seqid args | seqid <- elems lins])) + PCoerce fid -> apply xs (Just fid) n_fid f es + Nothing -> apply xs mb_fid n_fid _V [ELit (LStr ("[" ++ showCId f ++ "]"))] -- fun without lin + where + lookupProds (Just fid) prods = maybe [] Set.toList (IntMap.lookup fid prods) + lookupProds Nothing prods + | f == _B || f == _V = [] + | otherwise = [prod | (fid,set) <- IntMap.toList prods, prod <- Set.toList set] + + descend n_fid [] = return (n_fid,[]) + descend n_fid ((fid,e):fes) = do (n_fid,xx) <- lin0 [] xs (Just fid) n_fid e + (n_fid,xxs) <- descend n_fid fes + return (n_fid,xx:xxs) + + isApp (PApply _ _) = True + isApp _ = False + + computeSeq :: SeqId -> [(FId,CId,LinTable)] -> [BracketedTokn] + computeSeq seqid args = concatMap compute (elems seq) + where + seq = sequences cnc ! seqid + + compute (SymCat d r) = getArg d r + compute (SymLit d r) = getArg d r + compute (SymKS ts) = [LeafKS ts] + compute (SymKP ts alts) = [LeafKP ts alts] + + getArg d r + | not (null arg_lin) = [Bracket_ fid r cat arg_lin] + | otherwise = arg_lin + where + arg_lin = lin ! r + (fid,cat,lin) = args !! d + +amapWithIndex :: (IArray a e1, IArray a e2, Ix i) => (i -> e1 -> e2) -> a i e1 -> a i e2 +amapWithIndex f arr = listArray (bounds arr) (map (uncurry f) (assocs arr)) diff --git a/src/runtime/haskell/PGF/Macros.hs b/src/runtime/haskell/PGF/Macros.hs index 8886bc696..1b563fc48 100644 --- a/src/runtime/haskell/PGF/Macros.hs +++ b/src/runtime/haskell/PGF/Macros.hs @@ -11,6 +11,7 @@ import qualified Data.Array as Array import Data.Maybe import Data.List import GF.Data.Utilities(sortNub) +import Text.PrettyPrint -- operations for manipulating PGF grammars and objects @@ -202,3 +203,56 @@ updateProductionIndices pgf = pgf{ concretes = fmap updateConcrete (concretes pg getFunctions (PCoerce fid) = case IntMap.lookup fid productions of Nothing -> [] Just prods -> [fun | prod <- Set.toList prods, fun <- getFunctions prod] + + +-- Utilities for doing linearization + +-- | BracketedString represents a sentence that is linearized +-- as usual but we also want to retain the ''brackets'' that +-- mark the beginning and the end of each constituent. +data BracketedString + = Leaf String -- ^ this is the leaf i.e. a single token + | Bracket {-# UNPACK #-} !FId {-# UNPACK #-} !LIndex CId [BracketedString] -- ^ this is a bracket. The 'CId' is the category of + -- the phrase. The 'FId' is an unique identifier for + -- every phrase in the sentence. For context-free grammars + -- i.e. without discontinuous constituents this identifier + -- is also unique for every bracket. When there are discontinuous + -- phrases then the identifiers are unique for every phrase but + -- not for every bracket since the bracket represents a constituent. + -- The different constituents could still be distinguished by using + -- the constituent index i.e. 'LIndex'. If the grammar is reduplicating + -- then the constituent indices will be the same for all brackets + -- that represents the same constituent. + +data BracketedTokn + = LeafKS [String] + | LeafKP [String] [Alternative] + | Bracket_ {-# UNPACK #-} !FId {-# UNPACK #-} !LIndex CId [BracketedTokn] -- Invariant: the list is not empty + +type LinTable = Array.Array LIndex [BracketedTokn] + +-- | Renders the bracketed string as string where +-- the brackets are shown as @(S ...)@ where +-- @S@ is the category. +showBracketedString :: BracketedString -> String +showBracketedString = render . ppBracketedString + +ppBracketedString (Leaf t) = text t +ppBracketedString (Bracket fcat index cat bss) = parens (ppCId cat <+> hsep (map ppBracketedString bss)) + +untokn :: String -> BracketedTokn -> (String,[BracketedString]) +untokn nw (LeafKS ts) = (head ts,map Leaf ts) +untokn nw (LeafKP d vs) = let ts = sel d vs nw + in (head ts,map Leaf ts) + where + sel d vs nw = + case [v | Alt v cs <- vs, any (\c -> isPrefixOf c nw) cs] of + v:_ -> v + _ -> d +untokn nw (Bracket_ fid index cat bss) = + let (nw',bss') = mapAccumR untokn nw bss + in (nw',[Bracket fid index cat (concat bss')]) + +flattenBracketedString :: BracketedString -> [String] +flattenBracketedString (Leaf w) = [w] +flattenBracketedString (Bracket _ _ _ bss) = concatMap flattenBracketedString bss diff --git a/src/runtime/haskell/PGF/Parse.hs b/src/runtime/haskell/PGF/Parse.hs index ab5c91f02..85fc026aa 100644 --- a/src/runtime/haskell/PGF/Parse.hs +++ b/src/runtime/haskell/PGF/Parse.hs @@ -6,7 +6,7 @@ module PGF.Parse , nextState , getCompletions , recoveryStates - , extractTrees + , ParseResult(..), getParseResult , parse , parseWithRecovery ) where @@ -14,7 +14,7 @@ module PGF.Parse import Data.Array.IArray import Data.Array.Base (unsafeAt) import Data.List (isPrefixOf, foldl') -import Data.Maybe (fromMaybe, maybe) +import Data.Maybe (fromMaybe, maybe, maybeToList) import qualified Data.Map as Map import qualified GF.Data.TrieMap as TMap import qualified Data.IntMap as IntMap @@ -27,26 +27,35 @@ import PGF.Data import PGF.Expr(Tree) import PGF.Macros import PGF.TypeCheck -import Debug.Trace +import PGF.Forest(Forest(Forest), linearizeWithBrackets) -parse :: PGF -> Language -> Type -> [String] -> [Tree] -parse pgf lang typ toks = loop (initState pgf lang typ) toks +-- | This data type encodes the different outcomes which you could get from the parser. +data ParseResult + = ParseFailed Int -- ^ The integer is the position in number of tokens where the parser failed. + | TypeError FId [TcError] -- ^ The parsing was successful but none of the trees is type correct. + -- The forest id ('FId') points to the bracketed string from the parser + -- where the type checking failed. More than one error is returned + -- if there are many analizes for some phrase but they all are not type correct. + | ParseResult [Tree] -- ^ If the parsing was successful we get a list of abstract syntax trees. The list should be non-empty. + +parse :: PGF -> Language -> Type -> [String] -> (ParseResult,Maybe BracketedString) +parse pgf lang typ toks = loop 0 (initState pgf lang typ) toks where - loop ps [] = extractTrees ps typ - loop ps (t:ts) = case nextState ps t of - Left es -> [] - Right ps -> loop ps ts + loop i ps [] = getParseResult ps typ + loop i ps (t:ts) = case nextState ps t of + Left es -> (ParseFailed i,Nothing) + Right ps -> loop (i+1) ps ts -parseWithRecovery :: PGF -> Language -> Type -> [Type] -> [String] -> [Tree] +parseWithRecovery :: PGF -> Language -> Type -> [Type] -> [String] -> (ParseResult,Maybe BracketedString) parseWithRecovery pgf lang typ open_typs toks = accept (initState pgf lang typ) toks where - accept ps [] = extractTrees ps typ + accept ps [] = getParseResult ps typ accept ps (t:ts) = case nextState ps t of Right ps -> accept ps ts Left es -> skip (recoveryStates open_typs es) ts - skip ps_map [] = extractTrees (fst ps_map) typ + skip ps_map [] = getParseResult (fst ps_map) typ skip ps_map (t:ts) = case Map.lookup t (snd ps_map) of Just ps -> accept ps ts @@ -145,23 +154,31 @@ recoveryStates open_types (EState pgf cnc chart) = -- that spans the whole input consumed so far. The trees are also -- limited by the category specified, which is usually -- the same as the startup category. -extractTrees :: ParseState -> Type -> [Tree] -extractTrees (PState pgf cnc chart items) ty@(DTyp _ start _) = - nubsort [e1 | e <- exps, Right e1 <- [checkExpr pgf e ty]] +getParseResult :: ParseState -> Type -> (ParseResult,Maybe BracketedString) +getParseResult (PState pgf cnc chart items) ty@(DTyp _ start _) = + let mb_bs = case roots of + ((root,lbl):_) -> Just $ linearizeWithBrackets $ Forest (abstract pgf) cnc (forest st) root lbl + _ -> Nothing + + exps = nubsort $ do + (fid,lbl) <- roots + (fvs,e) <- go Set.empty 0 (0,fid) + guard (Set.null fvs) + Right e1 <- [checkExpr pgf e ty] + return e1 + + in (ParseResult exps,mb_bs) where (mb_agenda,acc) = TMap.decompose items agenda = maybe [] Set.toList mb_agenda (_,st) = process Nothing (\_ _ -> id) (sequences cnc) (cncfuns cnc) agenda () chart - exps = - case Map.lookup start (cnccats cnc) of - Just (CncCat s e lbls) -> do cat <- range (s,e) - lbl <- indices lbls - Just fid <- [lookupPC (PK cat lbl 0) (passive st)] - (fvs,tree) <- go Set.empty 0 (0,fid) - guard (Set.null fvs) - return tree - Nothing -> mzero + roots = case Map.lookup start (cnccats cnc) of + Just (CncCat s e lbls) -> do cat <- range (s,e) + lbl <- indices lbls + fid <- maybeToList (lookupPC (PK cat lbl 0) (passive st)) + return (fid,lbl) + Nothing -> mzero go rec fcat' (d,fcat) | fcat < totalCats cnc = return (Set.empty,EMeta (fcat'*10+d)) -- FIXME: here we assume that every rule has at most 10 arguments @@ -189,6 +206,7 @@ extractTrees (PState pgf cnc chart items) ty@(DTyp _ start _) = freeVar (EFun v) = Set.singleton v freeVar _ = Set.empty + process mbt fn !seqs !funs [] acc chart = (acc,chart) process mbt fn !seqs !funs (item@(Active j ppos funid seqid args key0):items) acc chart | inRange (bounds lin) ppos = @@ -218,15 +236,15 @@ process mbt fn !seqs !funs (item@(Active j ppos funid seqid args key0):items) ac Nothing -> fid Just fid -> fid - in case [ts | PConst _ ts <- maybe [] Set.toList (IntMap.lookup fid' (forest chart))] of + in case [ts | PConst _ _ ts <- maybe [] Set.toList (IntMap.lookup fid' (forest chart))] of (toks:_) -> let !acc' = fn toks (Active j (ppos+1) funid seqid (updateAt d fid' args) key0) acc in process mbt fn seqs funs items acc' chart [] -> case litCatMatch fid mbt of - Just (toks,lit) + Just (cat,lit,toks) -> let fid' = nextId chart !acc' = fn toks (Active j (ppos+1) funid seqid (updateAt d fid' args) key0) acc in process mbt fn seqs funs items acc' chart{passive=insertPC (mkPK key k) fid' (passive chart) - ,forest =IntMap.insert fid' (Set.singleton (PConst lit toks)) (forest chart) + ,forest =IntMap.insert fid' (Set.singleton (PConst cat lit toks)) (forest chart) ,nextId =nextId chart+1 } Nothing -> process mbt fn seqs funs items acc chart @@ -260,12 +278,12 @@ updateAt :: Int -> a -> [a] -> [a] updateAt nr x xs = [if i == nr then x else y | (i,y) <- zip [0..] xs] litCatMatch fcat (Just t) - | fcat == fcatString = Just ([t],ELit (LStr t)) - | fcat == fcatInt = case reads t of {[(n,"")] -> Just ([t],ELit (LInt n)); + | fcat == fcatString = Just (cidString,ELit (LStr t),[t]) + | fcat == fcatInt = case reads t of {[(n,"")] -> Just (cidInt,ELit (LInt n),[t]); _ -> Nothing } - | fcat == fcatFloat = case reads t of {[(d,"")] -> Just ([t],ELit (LFlt d)); + | fcat == fcatFloat = case reads t of {[(d,"")] -> Just (cidFloat,ELit (LFlt d),[t]); _ -> Nothing } - | fcat == fcatVar = Just ([t],EFun (mkCId t)) + | fcat == fcatVar = Just (cidVar,EFun (mkCId t),[t]) litCatMatch _ _ = Nothing @@ -341,9 +359,9 @@ foldForest f g b fcat forest = Nothing -> b Just set -> Set.fold foldProd b set where - foldProd (PCoerce fcat) b = foldForest f g b fcat forest - foldProd (PApply funid args) b = f funid args b - foldProd (PConst const toks) b = g const toks b + foldProd (PCoerce fcat) b = foldForest f g b fcat forest + foldProd (PApply funid args) b = f funid args b + foldProd (PConst _ const toks) b = g const toks b ---------------------------------------------------------------- diff --git a/src/runtime/haskell/PGF/Printer.hs b/src/runtime/haskell/PGF/Printer.hs index 1d9928304..fa6c5e549 100644 --- a/src/runtime/haskell/PGF/Printer.hs +++ b/src/runtime/haskell/PGF/Printer.hs @@ -58,7 +58,7 @@ ppProduction (fcat,PApply funid args) = ppFCat fcat <+> text "->" <+> ppFunId funid <> brackets (hcat (punctuate comma (map ppFCat args))) ppProduction (fcat,PCoerce arg) = ppFCat fcat <+> text "->" <+> char '_' <> brackets (ppFCat arg) -ppProduction (fcat,PConst _ ss) = +ppProduction (fcat,PConst _ _ ss) = ppFCat fcat <+> text "->" <+> ppStrs ss ppCncFun (funid,CncFun fun arr) = diff --git a/src/runtime/haskell/PGF/VisualizeTree.hs b/src/runtime/haskell/PGF/VisualizeTree.hs index e58791d4d..3075e7a86 100644 --- a/src/runtime/haskell/PGF/VisualizeTree.hs +++ b/src/runtime/haskell/PGF/VisualizeTree.hs @@ -15,339 +15,296 @@ -- instead of rolling its own. ----------------------------------------------------------------------------- -module PGF.VisualizeTree ( graphvizAbstractTree - , graphvizParseTree - , graphvizDependencyTree - , graphvizAlignment - , tree2mk - , getDepLabels - , PosText(..), readPosText +module PGF.VisualizeTree + ( graphvizAbstractTree + , graphvizParseTree + , graphvizDependencyTree + , graphvizBracketedString + , graphvizAlignment + , getDepLabels ) where -import PGF.CId (CId,showCId,pCId,mkCId) +import PGF.CId (CId,showCId,ppCId,mkCId) import PGF.Data -import PGF.Tree -import PGF.Expr (showExpr) +import PGF.Expr (showExpr, Tree) import PGF.Linearize -import PGF.Macros (lookValCat) +import PGF.Macros (lookValCat, BracketedString(..), flattenBracketedString) import qualified Data.Map as Map import Data.List (intersperse,nub,isPrefixOf,sort,sortBy) import Data.Char (isDigit) -import qualified Text.ParserCombinators.ReadP as RP +import Text.PrettyPrint -import Debug.Trace +-- | Renders abstract syntax tree in Graphviz format +graphvizAbstractTree :: PGF -> (Bool,Bool) -> Tree -> String +graphvizAbstractTree pgf (funs,cats) = render . tree2graph + where + tree2graph t = + text "graph {" $$ + ppGraph [] [] 0 t $$ + text "}" -graphvizAbstractTree :: PGF -> (Bool,Bool) -> Expr -> String -graphvizAbstractTree pgf funscats = prGraph False . tree2graph pgf funscats . expr2tree + getAbs xs (EAbs _ x e) = getAbs (x:xs) e + getAbs xs (ETyped e _) = getAbs xs e + getAbs xs e = (xs,e) + + getApp (EApp x y) es = getApp x (y:es) + getApp (ETyped e _) es = getApp e es + getApp e es = (e,es) -tree2graph :: PGF -> (Bool,Bool) -> Tree -> [String] -tree2graph pgf (funs,cats) = prf [] where - prf ps t = let (nod,lab) = prn ps t in - (nod ++ " [label = " ++ lab ++ ", style = \"solid\", shape = \"plaintext\"] ;") : - case t of - Fun cid trees -> - [ pra (j:ps) nod t | (j,t) <- zip [0..] trees] ++ - concat [prf (j:ps) t | (j,t) <- zip [0..] trees] - Abs xs (Fun cid trees) -> - [ pra (j:ps) nod t | (j,t) <- zip [0..] trees] ++ - concat [prf (j:ps) t | (j,t) <- zip [0..] trees] - _ -> [] - prn ps t = case t of - Fun cid _ -> - let - fun = if funs then showCId cid else "" - cat = if cats then prCat cid else "" - colon = if funs && cats then " : " else "" - lab = "\"" ++ fun ++ colon ++ cat ++ "\"" - in (show(show (ps :: [Int])),lab) - Abs bs tree -> - let fun = case tree of - Fun cid _ -> Fun cid [] - _ -> tree - in (show(show (ps :: [Int])),"\"" ++ esc (prTree (Abs bs fun)) ++ "\"") - _ -> (show(show (ps :: [Int])),"\"" ++ esc (prTree t) ++ "\"") - pra i nod t = nod ++ arr ++ fst (prn i t) ++ " [style = \"solid\"];" - arr = " -- " -- if digr then " -> " else " -- " - prCat = showCId . lookValCat pgf - esc = concatMap (\c -> if c =='\\' then [c,c] else [c]) --- escape backslash in abstracts + getLbl scope (EFun f) = let fun = if funs then ppCId f else empty + cat = if cats then ppCId (lookValCat pgf f) else empty + sep = if funs && cats then colon else empty + in fun <+> sep <+> cat + getLbl scope (ELit l) = text (escapeStr (render (ppLit l))) + getLbl scope (EMeta i) = ppMeta i + getLbl scope (EVar i) = ppCId (scope !! i) + getLbl scope (ETyped e _) = getLbl scope e + getLbl scope (EImplArg e) = getLbl scope e -prGraph digr ns = concat $ map (++"\n") $ [graph ++ "{\n"] ++ ns ++ ["}"] where - graph = if digr then "digraph" else "graph" + ppGraph scope ps i e0 = + let (xs, e1) = getAbs [] e0 + (e2,args) = getApp e1 [] + binds = if null xs + then empty + else text "\\\\" <> hcat (punctuate comma (map ppCId xs)) <+> text "->" + (lbl,eargs) = case e2 of + EAbs _ _ _ -> (char '@', e2:args) -- eta-redexes are rendered with artificial "@" node + _ -> (getLbl scope' e2, args) + scope' = xs ++ scope + in ppNode (i:ps) <> text "[label =" <+> doubleQuotes (binds <+> lbl) <> text ", style = \"solid\", shape = \"plaintext\"] ;" $$ + (if null ps + then empty + else ppNode ps <+> text "--" <+> ppNode (i:ps) <+> text "[style = \"solid\"];") $$ + vcat (zipWith (ppGraph scope' (i:ps)) [0..] eargs) + ppNode ps = char 'n' <> hcat (punctuate (char '_') (map int ps)) + + escapeStr [] = [] + escapeStr ('\\':cs) = '\\':'\\':escapeStr cs + escapeStr ('"' :cs) = '\\':'"' :escapeStr cs + escapeStr (c :cs) = c :escapeStr cs --- replace each non-atomic constructor with mkC, where C is the val cat -tree2mk :: PGF -> Expr -> String -tree2mk pgf = showExpr [] . tree2expr . t2m . expr2tree where - t2m t = case t of - Fun cid [] -> t - Fun cid ts -> Fun (mk cid) (map t2m ts) - _ -> t - mk = mkCId . ("mk" ++) . showCId . lookValCat pgf - --- dependency trees from Linearize.linearizeMark - -graphvizDependencyTree :: String -> Bool -> Maybe Labels -> Maybe String -> PGF -> CId -> Expr -> String -graphvizDependencyTree format debug mlab ms pgf lang exp = case format of - "malt" -> unlines (lin2dep format) - "malt_input" -> unlines (lin2dep format) - _ -> prGraph True (lin2dep format) - - where - - lin2dep format = -- trace (ifd (show sortedNodes ++ show nodeWords)) $ - case format of - "malt" -> map (concat . intersperse "\t") wnodes - "malt_input" -> map (concat . intersperse "\t" . take 6) wnodes - _ -> prelude ++ nodes ++ links - - ifd s = if debug then s else [] - - pot = readPosText $ concat $ take 1 $ markLinearizes pgf lang exp - ---- use Just str if you have str to match against - - prelude = ["rankdir=LR ;", "node [shape = plaintext] ;"] - - nodes = map mkNode nodeWords - mkNode (i,((_,p),ss)) = - node p ++ " [label = \"" ++ show i ++ ". " ++ ifd (show p) ++ unwords ss ++ "\"] ;" - nodeWords = (0,((mkCId "",[]),["ROOT"])) : zip [1..] [((f,p),w)| - ((Just f,p),w) <- wlins pot] - - links = map mkLink thelinks - thelinks = [(word y, x, label tr y x) | - (_,((f,x),_)) <- tail nodeWords, - let y = dominant x] - mkLink (x,y,l) = node x ++ " -> " ++ node y ++ " [label = \"" ++ l ++ "\"] ;" - node = show . show - - dominant x = case x of - [] -> x - _ | not (x == hx) -> hx - _ -> dominant (init x) - where - hx = headArg (init x) tr x - - headArg x0 tr x = case (tr,x) of - (Fun f [],[_]) -> x0 ---- ?? - (Fun f ts,[_]) -> x0 ++ [getHead (length ts - 1) f] - (Fun f ts,i:y) -> headArg x0 (ts !! i) y - _ -> x0 ---- - - label tr y x = case span (uncurry (==)) (zip y x) of - (xys,(_,i):_) -> getLabel i (funAt tr (map fst xys)) - _ -> "" ---- - - funAt tr x = case (tr,x) of - (Fun f _ ,[]) -> f - (Fun f ts,i:y) -> funAt (ts !! i) y - _ -> mkCId (prTree tr) ---- - - word x = if elem x sortedNodes then x else - let x' = headArg x tr (x ++[0]) in - if x' == x then [] else word x' - - tr = expr2tree exp - sortedNodes = [p | (_,((_,p),_)) <- nodeWords] - - labels = maybe Map.empty id mlab - getHead i f = case Map.lookup f labels of - Just ls -> length $ takeWhile (/= "head") ls - _ -> i - getLabel i f = case Map.lookup f labels of - Just ls | length ls > i -> ifd (showCId f ++ "#" ++ show i ++ "=") ++ ls !! i - _ -> showCId f ++ "#" ++ show i - --- to generate CoNLL format for MaltParser - nodeMap :: Map.Map [Int] Int - nodeMap = Map.fromList [(p,i) | (i,((_,p),_)) <- nodeWords] - - arcMap :: Map.Map [Int] ([Int],String) - arcMap = Map.fromList [(y,(x,l)) | (x,y,l) <- thelinks] - - lookDomLab p = case Map.lookup p arcMap of - Just (q,l) -> (maybe 0 id (Map.lookup q nodeMap), if null l then rootlabel else l) - _ -> (0,rootlabel) - - wnodes = [[show i, maltws ws, showCId fun, pos, pos, morph, show dom, lab, unspec, unspec] | - (i, ((fun,p),ws)) <- tail nodeWords, - let pos = showCId $ lookValCat pgf fun, - let morph = unspec, - let (dom,lab) = lookDomLab p - ] - maltws = concat . intersperse "+" . words . unwords -- no spaces in column 2 - unspec = "_" - rootlabel = "ROOT" type Labels = Map.Map CId [String] +graphvizDependencyTree :: String -> Bool -> Maybe Labels -> Maybe String -> PGF -> CId -> Tree -> String +graphvizDependencyTree format debug mlab ms pgf lang t = render $ + case format of + "malt" -> vcat (map (hcat . intersperse (char '\t') ) wnodes) + "malt_input" -> vcat (map (hcat . intersperse (char '\t') . take 6) wnodes) + _ -> text "digraph {" $$ + space $$ + nest 2 (text "rankdir=LR ;" $$ + text "node [shape = plaintext] ;" $$ + vcat nodes $$ + links) $$ + text "}" + where + nodes = map mkNode leaves + links = empty + wnodes = undefined + + nil = -1 + + bs = bracketedLinearize pgf lang t + + leaves = (nil,0,"ROOT") : (groupAndIndexIt 1 . getLeaves nil) bs + + groupAndIndexIt id [] = [] + groupAndIndexIt id ((p,w):pws) = let (ws,pws1) = collect pws + in (p,id,unwords (w:ws)) : groupAndIndexIt (id+1) pws1 + where + collect pws@((p1,w):pws1) + | p == p1 = let (ws,pws2) = collect pws1 + in (w:ws,pws2) + collect pws = ([],pws) + + getLeaves parent bs = + case bs of + Leaf w -> [(parent,w)] + Bracket fid _ _ bss -> concatMap (getLeaves fid) bss + + mkNode (p,i,w) = + tag p <> text " [label = " <> doubleQuotes (int i <> char '.' <+> text w) <> text "] ;" + +{- + ifd s = if debug then s else [] + + pot = readPosText $ concat $ take 1 $ markLinearizes pgf lang exp + + nodes = map mkNode nodeWords + mkNode (i,((_,p),ss)) = + node p ++ " [label = \"" ++ show i ++ ". " ++ ifd (show p) ++ unwords ss ++ "\"] ;" + nodeWords = (0,((mkCId "",[]),["ROOT"])) : zip [1..] [((f,p),w)| + ((Just f,p),w) <- wlins pot] + + links = map mkLink thelinks + thelinks = [(word y, x, label tr y x) | + (_,((f,x),_)) <- tail nodeWords, + let y = dominant x] + mkLink (x,y,l) = node x ++ " -> " ++ node y ++ " [label = \"" ++ l ++ "\"] ;" + node = show . show + + dominant x = case x of + [] -> x + _ | not (x == hx) -> hx + _ -> dominant (init x) + where + hx = headArg (init x) tr x + + headArg x0 tr x = case (tr,x) of + (Fun f [],[_]) -> x0 ---- ?? + (Fun f ts,[_]) -> x0 ++ [getHead (length ts - 1) f] + (Fun f ts,i:y) -> headArg x0 (ts !! i) y + _ -> x0 ---- + + label tr y x = case span (uncurry (==)) (zip y x) of + (xys,(_,i):_) -> getLabel i (funAt tr (map fst xys)) + _ -> "" ---- + + funAt tr x = case (tr,x) of + (Fun f _ ,[]) -> f + (Fun f ts,i:y) -> funAt (ts !! i) y + _ -> mkCId (prTree tr) ---- + + word x = if elem x sortedNodes then x else + let x' = headArg x tr (x ++[0]) in + if x' == x then [] else word x' + + tr = expr2tree exp + sortedNodes = [p | (_,((_,p),_)) <- nodeWords] + + labels = maybe Map.empty id mlab + getHead i f = case Map.lookup f labels of + Just ls -> length $ takeWhile (/= "head") ls + _ -> i + getLabel i f = case Map.lookup f labels of + Just ls | length ls > i -> ifd (showCId f ++ "#" ++ show i ++ "=") ++ ls !! i + _ -> showCId f ++ "#" ++ show i + + -- to generate CoNLL format for MaltParser + nodeMap :: Map.Map [Int] Int + nodeMap = Map.fromList [(p,i) | (i,((_,p),_)) <- nodeWords] + + arcMap :: Map.Map [Int] ([Int],String) + arcMap = Map.fromList [(y,(x,l)) | (x,y,l) <- thelinks] + + lookDomLab p = case Map.lookup p arcMap of + Just (q,l) -> (maybe 0 id (Map.lookup q nodeMap), if null l then rootlabel else l) + _ -> (0,rootlabel) + + wnodes = [[show i, maltws ws, showCId fun, pos, pos, morph, show dom, lab, unspec, unspec] | + (i, ((fun,p),ws)) <- tail nodeWords, + let pos = showCId $ lookValCat pgf fun, + let morph = unspec, + let (dom,lab) = lookDomLab p + ] + maltws = concat . intersperse "+" . words . unwords -- no spaces in column 2 + unspec = "_" + rootlabel = "ROOT" +-} + + getDepLabels :: [String] -> Labels getDepLabels ss = Map.fromList [(mkCId f,ls) | f:ls <- map words ss] --- parse trees from Linearize.linearizeMark ----- nubrec and domins are quadratic, but could be (n log n) +graphvizParseTree :: PGF -> Language -> Tree -> String +graphvizParseTree pgf lang = graphvizBracketedString . bracketedLinearize pgf lang -graphvizParseTree :: PGF -> CId -> Expr -> String -graphvizParseTree pgf lang = prGraph False . lin2tree pgf . concat . take 1 . markLinearizes pgf lang where +graphvizBracketedString :: BracketedString -> String +graphvizBracketedString = render . lin2tree + where + lin2tree bs = + text "graph {" $$ + space $$ + nest 2 (text "rankdir=BU ;" $$ + text "node [shape = record, color = white] ;" $$ + space $$ + vcat (nodes bs)) $$ + text "}" + where + nodes bs = zipWith mkStruct [0..] (interns ++ [zipWith (\i (l,p,w) -> (l,p,i,w)) [99990..] leaves]) -lin2tree pgf s = prelude ++ nodes ++ links where + nil = -1 + + leaves = getLeaves 0 nil bs + interns = getInterns 0 [(nil,bs)] - prelude = ["rankdir=BU ;", "node [shape = record, color = white] ;"] + getLeaves level parent bs = + case bs of + Leaf w -> [(level-1,parent,w)] + Bracket fid i _ bss -> concatMap (getLeaves (level+1) fid) bss - nodeRecs = zip [0..] - (nub (filter (not . null) (nlins [postext] ++ [leaves postext]))) - nlins pts = - nubrec [] $ [(p,cat f) | T (Just f, p) _ <- pts] : - concatMap nlins [ts | T _ ts <- pts] - leaves pt = [(p++[j],s) | (j,(p,s)) <- - zip [9990..] [(p,s) | ((_,p),ss) <- wlins pt, s <- ss]] + getInterns level [] = [] + getInterns level nodes = + nub [(level-1,parent,fid,showCId cat) | (parent,Bracket fid _ cat _) <- nodes] : + getInterns (level+1) [(fid,child) | (_,Bracket fid _ _ children) <- nodes, child <- children] - nubrec es rs = case rs of - r:rr -> let r' = filter (not . flip elem es) (nub r) - in r' : nubrec (r' ++ es) rr - _ -> rs + mkStruct l cs = struct l <> text "[label = \"" <> fields cs <> text "\"] ;" $$ + vcat [link pl pid l id | (pl,pid,id,_) <- cs] + link pl pid l id + | pl < 0 = empty + | otherwise = struct pl <> colon <> tag pid <> colon <> char 's' <+> + text "--" <+> + struct l <> colon <> tag id <> colon <> char 'n' <+> semi + fields cs = hsep (intersperse (char '|') [tbrackets (tag id) <> text c | (_,_,id,c) <- cs]) - nodes = map mkStruct nodeRecs - - mkStruct (i,cs) = struct i ++ "[label = \"" ++ fields cs ++ "\"] ;" - cat = showCId . lookValCat pgf - fields cs = concat (intersperse "|" [ mtag (showp p) ++ c | (p,c) <- cs]) - struct i = "struct" ++ show i - - links = map mkEdge domins - domins = nub [((i,x),(j,y)) | - (i,xs) <- nodeRecs, (j,ys) <- nodeRecs, - x <- xs, y <- ys, dominates x y] - dominates (p,x) (q,y) = not (null q) && p == init q - mkEdge ((i,x),(j,y)) = - struct i ++ ":n" ++ uncommas (showp (fst x)) ++ ":s -- " ++ - struct j ++ ":n" ++ uncommas (showp (fst y)) ++ ":n ;" - - postext = readPosText s -- auxiliaries for graphviz syntax -struct i = "struct" ++ show i -mark (j,n) = "n" ++ show j ++ "a" ++ uncommas n -uncommas = map (\c -> if c==',' then 'c' else c) -tag s = "<" ++ s ++ ">" -showp = init . tail . show -mtag = tag . ('n':) . uncommas +struct l = text ("struct" ++ show l) +tbrackets d = char '<' <> d <> char '>' +tag i = char 'n' <> int i -- word alignments from Linearize.markLinearize -- words are chunks like {[0,1,1,0] old} -graphvizAlignment :: PGF -> Expr -> String -graphvizAlignment pgf = prGraph True . lin2graph . linsMark where - linsMark t = [concat (take 1 (markLinearizes pgf la t)) | la <- Map.keys (concretes pgf)] +graphvizAlignment :: PGF -> [Language] -> Expr -> String +graphvizAlignment pgf langs = render . lin2graph . linsBracketed + where + linsBracketed t = [bracketedLinearize pgf lang t | lang <- langs] -lin2graph :: [String] -> [String] -lin2graph ss = -- trace (show ss) $ - prelude ++ nodes ++ links + lin2graph :: [BracketedString] -> Doc + lin2graph bss = + text "digraph {" $$ + space $$ + nest 2 (text "rankdir=LR ;" $$ + text "node [shape = record] ;" $$ + space $$ + mkLayers 0 leaves) $$ + text "}" + where + nil = -1 - where + leaves = map (groupAndIndexIt 0 . getLeaves nil) bss - prelude = ["rankdir=LR ;", "node [shape = record] ;"] + groupAndIndexIt id [] = [] + groupAndIndexIt id ((p,w):pws) = let (ws,pws1) = collect pws + in (p,id,unwords (w:ws)) : groupAndIndexIt (id+1) pws1 + where + collect pws@((p1,w):pws1) + | p == p1 = let (ws,pws2) = collect pws1 + in (w:ws,pws2) + collect pws = ([],pws) - nlins :: [(Int,[((Int,String),String)])] - nlins = [(i, [((j,showp p),unw ws) | (j,((_,p),ws)) <- zip [0..] ws]) | - (i,ws) <- zip [0..] (map (wlins . readPosText) ss)] + getLeaves parent bs = + case bs of + Leaf w -> [(parent,w)] + Bracket fid _ _ bss -> concatMap (getLeaves fid) bss - unw = concat . intersperse "\\ " -- space escape in graphviz + mkLayers l [] = empty + mkLayers l (cs:css) = struct l <> text "[label = \"" <> fields cs <> text "\"] ;" $$ + (case css of + (ncs:_) -> vcat (map (mkLinks l ncs) cs) + _ -> empty) $$ + mkLayers (l+1) css - nodes = map mkStruct nlins - - mkStruct (i, ws) = struct i ++ "[label = \"" ++ fields ws ++ "\"] ;" - - fields ws = concat (intersperse "|" [tag (mark m) ++ " " ++ w | (m,w) <- ws]) - - links = nub $ concatMap mkEdge (init nlins) - - mkEdge (i,lin) = let lin' = snd (nlins !! (i+1)) in -- next lin in the list - [edge i v w | (v@(_,p),_) <- lin, (w@(_,q),_) <- lin', p == q] - - edge i v w = - struct i ++ ":" ++ mark v ++ ":e -> " ++ struct (i+1) ++ ":" ++ mark w ++ ":w ;" -{- -alignmentData :: PGF -> [Expr] -> Map.Map String (Map.Map String Double) -alignmentData pgf = mkStat . concatMap (mkAlign . linsMark) where - linsMark t = - [s | la <- take 2 (cncnames pgf), s <- take 1 (linearizesMark pgf la t)] - - mkStat :: [(String,String)] -> Map.Map String (Map.Map String Double) - mkStat = - - mkAlign :: [String] -> [(String,String)] - mkAlign ss = - - nlins :: [(Int,[((Int,String),String)])] - nlins = [(i, [((j,showp p),unw ws) | (j,((_,p),ws)) <- zip [0..] vs]) | - (i,vs) <- zip [0..] (map (wlins . readPosText) ss)] - - nodes = map mkStruct nlins - - mkStruct (i, ws) = struct i ++ "[label = \"" ++ fields ws ++ "\"] ;" - - fields ws = concat (intersperse "|" [tag (mark m) ++ " " ++ w | (m,w) <- ws]) - - links = nub $ concatMap mkEdge (init nlins) - - mkEdge (i,lin) = let lin' = snd (nlins !! (i+1)) in -- next lin in the list - [edge i v w | (v@(_,p),_) <- lin, (w@(_,q),_) <- lin', p == q] - - edge i v w = - struct i ++ ":" ++ mark v ++ ":e -> " ++ struct (i+1) ++ ":" ++ mark w ++ ":w ;" --} - -wlins :: PosText -> [((Maybe CId,[Int]),[String])] -wlins pt = case pt of - T p pts -> concatMap (lins p) pts - M ws -> if null ws then [] else [((Nothing,[]),ws)] - where - lins p pt = case pt of - T q pts -> concatMap (lins q) pts - M ws -> if null ws then [] else [(p,ws)] - -data PosText = - T (Maybe CId,[Int]) [PosText] - | M [String] - deriving Show - -readPosText :: String -> PosText -readPosText = fst . head . (RP.readP_to_S pPosText) where - pPosText = do - RP.char '(' >> RP.skipSpaces - p <- pPos - RP.skipSpaces - ts <- RP.many pPosText - RP.char ')' >> RP.skipSpaces - return (T p ts) - RP.<++ do - ws <- RP.sepBy1 (RP.munch1 (flip notElem "()")) (RP.char ' ') - return (M ws) - pPos = do - fun <- (RP.char '(' >> pCId >>= \f -> RP.char ',' >> (return $ Just f)) - RP.<++ (return Nothing) - RP.char '[' >> RP.skipSpaces - is <- RP.sepBy (RP.munch1 isDigit) (RP.char ',') - RP.char ']' >> RP.skipSpaces - RP.char ')' RP.<++ return ' ' - return (fun,map read is) - - -{- -digraph{ -rankdir ="LR" ; -node [shape = record] ; - -struct1 [label = " this| very| intelligent| man"] ; -struct2 [label = " cet| homme| tres| intelligent| ci"] ; - -struct1:f0 -> struct2:f0 ; -struct1:f1 -> struct2:f2 ; -struct1:f2 -> struct2:f3 ; -struct1:f3 -> struct2:f1 ; -struct1:f0 -> struct2:f4 ; -} --} + mkLinks l cs (p0,id0,_) = + vcat (map (\id1 -> struct l <> colon <> tag id0 <> colon <> char 'e' <+> + text "->" <+> + struct (l+1) <> colon <> tag id1 <> colon <> char 'w' <+> semi) indices) + where + indices = [id1 | (p1,id1,_) <- cs, p1 == p0] + fields cs = hsep (intersperse (char '|') [tbrackets (tag id) <> text w | (_,id,w) <- cs])