From af13bae2dfb9adaa7c4aa273961fc09cc7ba1b7a Mon Sep 17 00:00:00 2001 From: krasimir Date: Sun, 17 Jan 2010 17:05:21 +0000 Subject: [PATCH] now the linearization is completely based on PMCFG --- GF.cabal | 1 - src/compiler/GF/Command/Commands.hs | 13 +- src/compiler/GF/Compile/GeneratePMCFG.hs | 37 ++-- src/compiler/GF/Compile/GrammarToPGF.hs | 32 +++- src/compiler/GF/Compile/PGFPretty.hs | 1 + src/compiler/GF/Compile/PGFtoJS.hs | 2 +- src/compiler/GF/Quiz.hs | 8 +- src/compiler/GF/Speech/PGFToCFG.hs | 4 +- src/compiler/GF/Speech/VoiceXML.hs | 1 - src/runtime/haskell/PGF/Binary.hs | 19 +- src/runtime/haskell/PGF/Linearize.hs | 222 +++++++++-------------- src/runtime/haskell/PGF/Macros.hs | 72 +++++++- src/runtime/haskell/PGF/Morphology.hs | 24 ++- src/runtime/haskell/PGF/PMCFG.hs | 32 +--- src/runtime/haskell/PGF/Parse.hs | 4 +- src/runtime/haskell/PGF/ShowLinearize.hs | 114 ------------ src/runtime/haskell/PGF/VisualizeTree.hs | 10 +- 17 files changed, 250 insertions(+), 346 deletions(-) delete mode 100644 src/runtime/haskell/PGF/ShowLinearize.hs diff --git a/GF.cabal b/GF.cabal index a542349d0..d1d02f50a 100644 --- a/GF.cabal +++ b/GF.cabal @@ -48,7 +48,6 @@ library PGF.TypeCheck PGF.Binary PGF.Morphology - PGF.ShowLinearize PGF.VisualizeTree GF.Data.TrieMap GF.Data.Utilities diff --git a/src/compiler/GF/Command/Commands.hs b/src/compiler/GF/Command/Commands.hs index 3647d2e14..addf9b94a 100644 --- a/src/compiler/GF/Command/Commands.hs +++ b/src/compiler/GF/Command/Commands.hs @@ -14,7 +14,7 @@ module GF.Command.Commands ( import PGF import PGF.CId -import PGF.ShowLinearize +import PGF.Linearize import PGF.VisualizeTree import PGF.Macros import PGF.Data ---- @@ -344,9 +344,7 @@ allCommands cod env@(pgf, mos) = Map.fromList [ ("all","show all forms and variants"), ("bracket","show tree structure with brackets and paths to nodes"), ("multi","linearize to all languages (default)"), - ("record","show source-code-like record"), ("table","show all forms labelled by parameters"), - ("term", "show PGF term"), ("treebank","show the tree and tag linearizations with language names") ] ++ stringOpOptions, flags = [ @@ -797,11 +795,9 @@ allCommands cod env@(pgf, mos) = Map.fromList [ linear :: [Option] -> CId -> Expr -> String linear opts lang = let unl = unlex opts lang in case opts of - _ | isOpt "all" opts -> allLinearize unl pgf lang - _ | isOpt "table" opts -> tableLinearize unl pgf lang - _ | isOpt "term" opts -> termLinearize pgf lang - _ | isOpt "record" opts -> recordLinearize pgf lang - _ | isOpt "bracket" opts -> markLinearize pgf lang + _ | isOpt "all" opts -> unlines . concat . intersperse [[]] . map (map (unl . snd)) . tabularLinearizes pgf lang + _ | isOpt "table" opts -> unlines . concat . intersperse [[]] . map (map (\(p,v) -> p+++":"+++unl v)) . tabularLinearizes pgf lang + _ | isOpt "bracket" opts -> unlines . markLinearizes pgf lang _ -> unl . linearize pgf lang unlex opts lang = stringOps Nothing (getUnlex opts lang ++ map prOpt opts) ---- @@ -957,4 +953,3 @@ prMorphoAnalysis (w,lps) = morphoMissing :: Morpho -> [String] -> [String] morphoMissing mo ws = [w | w <- ws, null (lookupMorpho mo w)] - diff --git a/src/compiler/GF/Compile/GeneratePMCFG.hs b/src/compiler/GF/Compile/GeneratePMCFG.hs index d7bc39e7c..e6e3fdc79 100644 --- a/src/compiler/GF/Compile/GeneratePMCFG.hs +++ b/src/compiler/GF/Compile/GeneratePMCFG.hs @@ -168,10 +168,7 @@ choices nr path = BM (\c s -> let (args,_) = s | otherwise = c : addConstraint path0 index0 tcs mkRecord :: [BranchM (Value a)] -> BranchM (Value a) -mkRecord xs = BM (\c -> go xs (c . Rec)) - where - go [] c s = c [] s - go (BM m:fs) c s = go fs (\bs s -> c (m (\v s -> Return v) s : bs) s) s +mkRecord xs = BM (\c -> foldl (\c (BM m) bs s -> c (m (\v s -> Return v) s : bs) s) (c . Rec) xs []) ---------------------------------------------------------------------- @@ -202,7 +199,7 @@ protoFCat cnc_defs (n,cat) ctype = _ -> error $ "Not a record: " ++ show ctype | otherwise = ctype - loop path rcs tcs (R record) = List.foldl' (\(rcs,tcs) (index,term) -> loop (index:path) rcs tcs term) (rcs,tcs) (zip [0..] record) + loop path rcs tcs (R record) = List.foldr (\(index,term) (rcs,tcs) -> loop (index:path) rcs tcs term) (rcs,tcs) (zip [0..] record) loop path rcs tcs (C i) = ( rcs,(path,[0..i]):tcs) loop path rcs tcs (S _) = (path:rcs, tcs) loop path rcs tcs (F id) = case Map.lookup id cnc_defs of @@ -229,7 +226,7 @@ go' (Variant bs) path ss = do b <- member bs go' (Return v) path ss = go v path ss go :: Value SeqId -> FPath -> [SeqId] -> BacktrackM Env [SeqId] -go (Rec xs) path ss = foldM (\ss (lbl,b) -> go' b (lbl:path) ss) ss (zip [0..] xs) +go (Rec xs) path ss = foldM (\ss (lbl,b) -> go' b (lbl:path) ss) ss (reverse (zip [0..] xs)) go (Str seqid) path ss = return (seqid : ss) go (Con i) path ss = restrictHead path i >> return ss @@ -350,7 +347,7 @@ emptyGrammarEnv cnc_defs lincats params = where (size,poly) = getMultipliers 1 [] ctype - getMultipliers m ms (R record) = foldl (\(m,ms) t -> getMultipliers m ms t) (m,ms) record + getMultipliers m ms (R record) = foldr (\t (m,ms) -> getMultipliers m ms t) (m,ms) record getMultipliers m ms (S _) = (m,ms) getMultipliers m ms (C max_index) = (m*(max_index+1),m : ms) getMultipliers m ms (F id) = case Map.lookup id cnc_defs of @@ -364,17 +361,11 @@ emptyGrammarEnv cnc_defs lincats params = getLabels _ t = error (show t) expandHOAS abs_defs cnc_defs lincats lindefs env = - foldl add_varFun (foldl (\env ncat -> add_hoFun (add_hoCat env ncat) ncat) env hoTypes) hoCats + foldl add_varFun (foldl (\env ncat -> add_hoFun (add_hoCat env ncat) ncat) env hoTypes) (Map.keys lincats) where hoTypes :: [(Int,CId)] hoTypes = sortNub [(n,c) | (_,(ty,_,_)) <- abs_defs , (n,c) <- fst (typeSkeleton ty), n > 0] - - hoCats :: [CId] - hoCats = sortNub [c | (_,(ty,_,_)) <- abs_defs - , h <- case ty of {DTyp hyps val _ -> hyps} - , let ty = typeOfHypo h - , c <- fst (catSkeleton ty)] -- add a range of PMCFG categories for each GF high-order category add_hoCat env@(GrammarEnv last_id catSet seqSet funSet crcSet prodSet) (n,cat) = @@ -386,8 +377,7 @@ expandHOAS abs_defs cnc_defs lincats lindefs env = -- add one PMCFG function for each high-order type: _B : Cat -> Var -> ... -> Var -> HoCat add_hoFun env (n,cat) = - let linRec = reverse $ - [[FSymCat 0 i] | (l,i) <- case arg of {PFCat _ _ rcs _ -> zip rcs [0..]}] ++ + let linRec = [[FSymCat 0 i] | i <- case arg of {PFCat _ _ rcs _ -> [0..length rcs-1]}] ++ [[FSymLit i 0] | i <- [1..n]] (env1,lins) = List.mapAccumL addFSeq env linRec newLinRec = mkArray lins @@ -405,13 +395,10 @@ expandHOAS abs_defs cnc_defs lincats lindefs env = -- add one PMCFG function for each high-order category: _V : Var -> Cat add_varFun env cat = - convertRule cnc_defs env (PFRule _V [(0,cidVar)] (0,cat) [arg] res lindef) + case Map.lookup cat lindefs of + Nothing -> env + Just lindef -> convertRule cnc_defs env (PFRule _V [(0,cidVar)] (0,cat) [arg] res lindef) where - lindef = - case Map.lookup cat lindefs of - Nothing -> error $ "No lindef for " ++ showCId cat - Just def -> def - arg = case Map.lookup cidVar lincats of Nothing -> error $ "No lincat for " ++ showCId cat @@ -455,15 +442,15 @@ getParserInfo :: GrammarEnv -> ParserInfo getParserInfo (GrammarEnv last_id catSet seqSet funSet crcSet prodSet) = ParserInfo { functions = mkArray funSet , sequences = mkArray seqSet - , productions0= productions0 - , productions = filterProductions productions0 + , productions = IntMap.union prodSet coercions + , pproductions = IntMap.empty + , lproductions = Map.empty , startCats = maybe Map.empty (Map.map (\(start,end,_,lbls) -> (start,end,lbls))) (IntMap.lookup 0 catSet) , totalCats = last_id+1 } where mkArray map = array (0,Map.size map-1) [(v,k) | (k,v) <- Map.toList map] - productions0 = IntMap.union prodSet coercions coercions = IntMap.fromList [(fcat,Set.fromList (map FCoerce sub_fcats)) | (sub_fcats,fcat) <- Map.toList crcSet] getFCats :: GrammarEnv -> ProtoFCat -> [FCat] diff --git a/src/compiler/GF/Compile/GrammarToPGF.hs b/src/compiler/GF/Compile/GrammarToPGF.hs index c015eff01..31c768045 100644 --- a/src/compiler/GF/Compile/GrammarToPGF.hs +++ b/src/compiler/GF/Compile/GrammarToPGF.hs @@ -5,7 +5,7 @@ import GF.Compile.Export import GF.Compile.GeneratePMCFG import PGF.CId -import PGF.Linearize(realize) +import PGF.Macros(updateProductionIndices) import qualified PGF.Macros as CM import qualified PGF.Data as C import qualified PGF.Data as D @@ -46,7 +46,7 @@ mkCanon2gfcc opts cnc gr = -- Adds parsers for all concretes addParsers :: Options -> D.PGF -> IO D.PGF addParsers opts pgf = do cncs <- sequence [conv lang cnc | (lang,cnc) <- Map.toList (D.concretes pgf)] - return pgf { D.concretes = Map.fromList cncs } + return $ updateProductionIndices $ pgf { D.concretes = Map.fromList cncs } where conv lang cnc = do pinfo <- convertConcrete opts (D.abstract pgf) lang cnc return (lang,cnc { D.parser = Just pinfo }) @@ -586,3 +586,31 @@ requiredCanModules isSingle gr c = nub $ filter notReuse ops ++ exts where notReuse i = errVal True $ do m <- M.lookupModule gr i return $ M.isModRes m -- to exclude reused Cnc and Abs from required + + +realize :: C.Term -> String +realize = concat . take 1 . realizes + +realizes :: C.Term -> [String] +realizes = map (unwords . untokn) . realizest + +realizest :: C.Term -> [[C.Tokn]] +realizest trm = case trm of + C.R ts -> realizest (ts !! 0) + C.S ss -> map concat $ combinations $ map realizest ss + C.K t -> [[t]] + C.W s t -> [[C.KS (s ++ r)] | [C.KS r] <- realizest t] + C.FV ts -> concatMap realizest ts + C.TM s -> [[C.KS s]] + _ -> [[C.KS $ "REALIZE_ERROR " ++ show trm]] ---- debug + +untokn :: [C.Tokn] -> [String] +untokn ts = case ts of + C.KP d _ : [] -> d + C.KP d vs : ws -> let ss@(s:_) = untokn ws in sel d vs s ++ ss + C.KS s : ws -> s : untokn ws + [] -> [] + where + sel d vs w = case [v | C.Alt v cs <- vs, any (\c -> isPrefixOf c w) cs] of + v:_ -> v + _ -> d diff --git a/src/compiler/GF/Compile/PGFPretty.hs b/src/compiler/GF/Compile/PGFPretty.hs index 679714db5..706081999 100644 --- a/src/compiler/GF/Compile/PGFPretty.hs +++ b/src/compiler/GF/Compile/PGFPretty.hs @@ -60,6 +60,7 @@ prCnc abstr name c = prAll prLinCat (lincats c) $$ prAll prLin (lins (expand c)) pr p (P t1 t2) = prec p 3 (pr 3 t1 <> text "!" <> pr 3 t2) pr p (S ts) = prec p 2 (hsep (punctuate (text " ++") (map (pr 2) ts))) pr p (K (KS t)) = doubleQuotes (text t) + pr p (K _) = empty pr p (V i) = text ("argv_" ++ show (i+1)) pr p (C i) = text (show (i+1)) pr p (FV ts) = prec p 1 (hsep (punctuate (text " |") (map (pr 1) ts))) diff --git a/src/compiler/GF/Compile/PGFtoJS.hs b/src/compiler/GF/Compile/PGFtoJS.hs index 0cec4121d..67d18809a 100644 --- a/src/compiler/GF/Compile/PGFtoJS.hs +++ b/src/compiler/GF/Compile/PGFtoJS.hs @@ -90,7 +90,7 @@ children = JS.Ident "cs" -- Parser parser2js :: ParserInfo -> [JS.Expr] -parser2js p = [new "Parser" [JS.EObj $ [JS.Prop (JS.IntPropName cat) (JS.EArray (map frule2js (Set.toList set))) | (cat,set) <- IntMap.toList (productions0 p)], +parser2js p = [new "Parser" [JS.EObj $ [JS.Prop (JS.IntPropName cat) (JS.EArray (map frule2js (Set.toList set))) | (cat,set) <- IntMap.toList (productions p)], JS.EArray $ (map ffun2js (Array.elems (functions p))), JS.EArray $ (map seq2js (Array.elems (sequences p))), JS.EObj $ map cats (Map.assocs (startCats p)), diff --git a/src/compiler/GF/Quiz.hs b/src/compiler/GF/Quiz.hs index 52d9dee6b..d0353ff79 100644 --- a/src/compiler/GF/Quiz.hs +++ b/src/compiler/GF/Quiz.hs @@ -19,7 +19,7 @@ module GF.Quiz ( ) where import PGF -import PGF.ShowLinearize +import PGF.Linearize import GF.Data.Operations import GF.Infra.UseIO import GF.Infra.Option @@ -51,11 +51,11 @@ morphologyList :: PGF -> Language -> Type -> Int -> IO [(String,[String])] morphologyList pgf ig typ number = do ts <- generateRandom pgf typ >>= return . take (max 1 number) gen <- newStdGen - let ss = map (tabularLinearize pgf ig) ts + let ss = map (tabularLinearizes pgf ig) ts let size = length (head ss) let forms = take number $ randomRs (0,size-1) gen - return [(head (snd (head pws)) +++ par, ws) | - (pws,i) <- zip ss forms, let (par,ws) = pws !! i] + return [(snd (head pws0) +++ fst (pws0 !! i), ws) | + (pwss@(pws0:_),i) <- zip ss forms, let ws = map (\pws -> snd (pws !! i)) pwss] -- | compare answer to the list of right answers, increase score and give feedback mkAnswer :: Encoding -> [String] -> String -> (Integer, String) diff --git a/src/compiler/GF/Speech/PGFToCFG.hs b/src/compiler/GF/Speech/PGFToCFG.hs index bd27deadf..4ac430704 100644 --- a/src/compiler/GF/Speech/PGFToCFG.hs +++ b/src/compiler/GF/Speech/PGFToCFG.hs @@ -37,7 +37,7 @@ pgfToCFG pgf lang = mkCFG (showCId (lookStartCat pgf)) extCats (startRules ++ co pinfo = fromMaybe (error "pgfToCFG: No parser.") (lookParser pgf lang) rules :: [(FCat,Production)] - rules = [(fcat,prod) | (fcat,set) <- IntMap.toList (PGF.productions pinfo) + rules = [(fcat,prod) | (fcat,set) <- IntMap.toList (PGF.pproductions pinfo) , prod <- Set.toList set] fcatCats :: Map FCat Cat @@ -58,7 +58,7 @@ pgfToCFG pgf lang = mkCFG (showCId (lookStartCat pgf)) extCats (startRules ++ co topdownRules cat = f cat [] where - f cat rules = maybe rules (Set.fold g rules) (IntMap.lookup cat (productions pinfo)) + f cat rules = maybe rules (Set.fold g rules) (IntMap.lookup cat (pproductions pinfo)) g (FApply funid args) rules = (functions pinfo ! funid,args) : rules g (FCoerce cat) rules = f cat rules diff --git a/src/compiler/GF/Speech/VoiceXML.hs b/src/compiler/GF/Speech/VoiceXML.hs index fb25d6a1e..d3939931e 100644 --- a/src/compiler/GF/Speech/VoiceXML.hs +++ b/src/compiler/GF/Speech/VoiceXML.hs @@ -16,7 +16,6 @@ import GF.Speech.SRG (getSpeechLanguage) import PGF.CId import PGF.Data import PGF.Macros -import PGF.Linearize (showPrintName) import Control.Monad (liftM) import Data.List (isPrefixOf, find, intersperse) diff --git a/src/runtime/haskell/PGF/Binary.hs b/src/runtime/haskell/PGF/Binary.hs index 7d5db73af..a9a6a78dc 100644 --- a/src/runtime/haskell/PGF/Binary.hs +++ b/src/runtime/haskell/PGF/Binary.hs @@ -2,6 +2,7 @@ module PGF.Binary where import PGF.CId import PGF.Data +import PGF.Macros import Data.Binary import Data.Binary.Put import Data.Binary.Get @@ -28,10 +29,11 @@ instance Binary PGF where gflags <- get abstract <- get concretes <- get - return (PGF{ absname=absname, cncnames=cncnames - , gflags=gflags - , abstract=abstract, concretes=concretes - }) + return $ updateProductionIndices $ + (PGF{ absname=absname, cncnames=cncnames + , gflags=gflags + , abstract=abstract, concretes=concretes + }) instance Binary CId where put (CId bs) = put bs @@ -185,15 +187,16 @@ instance Binary Production where _ -> decodingError instance Binary ParserInfo where - put p = put (functions p, sequences p, productions0 p, totalCats p, startCats p) + put p = put (functions p, sequences p, productions p, totalCats p, startCats p) get = do functions <- get sequences <- get - productions0<- get + productions <- get totalCats <- get startCats <- get return (ParserInfo{functions=functions,sequences=sequences - ,productions0=productions0 - ,productions =filterProductions productions0 + ,productions = productions + ,pproductions = IntMap.empty + ,lproductions = Map.empty ,totalCats=totalCats,startCats=startCats}) decodingError = fail "This PGF file was compiled with different version of GF" diff --git a/src/runtime/haskell/PGF/Linearize.hs b/src/runtime/haskell/PGF/Linearize.hs index de3daf11d..9058cba61 100644 --- a/src/runtime/haskell/PGF/Linearize.hs +++ b/src/runtime/haskell/PGF/Linearize.hs @@ -1,38 +1,81 @@ -{-# LANGUAGE ParallelListComp #-} -module PGF.Linearize - (linearizes,showPrintName,realize,realizes,linTree, linTreeMark,linearizesMark) where +module PGF.Linearize(linearizes,markLinearizes,tabularLinearizes) where import PGF.CId import PGF.Data import PGF.Macros -import PGF.Tree - +import Data.Maybe (fromJust) +import Data.Array.IArray +import Data.List import Control.Monad import qualified Data.Map as Map -import Data.List - -import Debug.Trace +import qualified Data.IntMap as IntMap +import qualified Data.Set as Set -- linearization and computation of concrete PGF Terms +type LinTable = Array FIndex [Tokn] + linearizes :: PGF -> CId -> Expr -> [String] -linearizes pgf lang = realizes . linTree pgf lang +linearizes pgf lang = map (unwords . untokn . (! 0)) . linTree pgf lang (\_ _ lint -> lint) -realize :: Term -> String -realize = concat . take 1 . realizes +linTree :: PGF -> Language -> (Maybe CId -> [Int] -> LinTable -> LinTable) -> Expr -> [LinTable] +linTree pgf lang mark e = lin0 [] [] [] Nothing e + where + cnc = lookMap (error "no lang") lang (concretes pgf) + pinfo = fromJust (parser cnc) + lp = lproductions pinfo -realizes :: Term -> [String] -realizes = map (unwords . untokn) . realizest + lin0 path xs ys mb_fid (EAbs _ x e) = lin0 path (showCId x:xs) ys mb_fid e + lin0 path xs ys mb_fid (ETyped e _) = lin0 path xs ys mb_fid e + lin0 path xs ys mb_fid e | null xs = lin path ys mb_fid e [] + | otherwise = apply path (xs ++ ys) mb_fid _B (e:[ELit (LStr x) | x <- xs]) -realizest :: Term -> [[Tokn]] -realizest trm = case trm of - R ts -> realizest (ts !! 0) - S ss -> map concat $ combinations $ map realizest ss - K t -> [[t]] - W s t -> [[KS (s ++ r)] | [KS r] <- realizest t] - FV ts -> concatMap realizest ts - TM s -> [[KS s]] - _ -> [[KS $ "REALIZE_ERROR " ++ show trm]] ---- debug + lin path xs mb_fid (EApp e1 e2) es = lin path xs mb_fid e1 (e2:es) + lin path xs mb_fid (ELit l) [] = case l of + LStr s -> return (mark Nothing path (ss s)) + LInt n -> return (mark Nothing path (ss (show n))) + LFlt f -> return (mark Nothing path (ss (show f))) + lin path xs mb_fid (EMeta i) es = apply path xs mb_fid _V (ELit (LStr ('?':show i)):es) + lin path xs mb_fid (EFun f) es = map (mark (Just f) path) (apply path xs mb_fid f es) + lin path xs mb_fid (EVar i) es = apply path xs mb_fid _V (ELit (LStr (xs !! i)) :es) + lin path xs mb_fid (ETyped e _) es = lin path xs mb_fid e es + lin path xs mb_fid (EImplArg e) es = lin path xs mb_fid e es + + ss s = listArray (0,0) [[KS s]] + + apply path xs mb_fid f es = + case Map.lookup f lp of + Just prods -> case lookupProds mb_fid prods of + Just set -> do prod <- Set.toList set + case prod of + FApply funid fids -> do guard (length fids == length es) + args <- sequence (zipWith3 (\i fid e -> lin0 (sub i path) [] xs (Just fid) e) [0..] fids es) + let (FFun _ lins) = functions pinfo ! funid + return (listArray (bounds lins) [computeSeq seqid args | seqid <- elems lins]) + FCoerce fid -> apply path xs (Just fid) f es + Nothing -> mzero + Nothing -> apply path xs mb_fid _V [ELit (LStr "?")] -- function without linearization + where + lookupProds (Just fid) prods = IntMap.lookup fid prods + lookupProds Nothing prods + | f == _B || f == _V = Nothing + | otherwise = Just (Set.filter isApp (Set.unions (IntMap.elems prods))) + + sub i path + | f == _B || f == _V = path + | otherwise = i:path + + isApp (FApply _ _) = True + isApp _ = False + + computeSeq seqid args = concatMap compute (elems seq) + where + seq = sequences pinfo ! seqid + + compute (FSymCat d r) = (args !! d) ! r + compute (FSymLit d r) = (args !! d) ! r + compute (FSymKS ts) = map KS ts + compute (FSymKP ts alts) = [KP ts alts] untokn :: [Tokn] -> [String] untokn ts = case ts of @@ -45,126 +88,23 @@ untokn ts = case ts of v:_ -> v _ -> d --- Lifts all variants to the top level (except those in macros). -liftVariants :: Term -> [Term] -liftVariants = f +-- create a table from labels+params to variants +tabularLinearizes :: PGF -> CId -> Expr -> [[(String,String)]] +tabularLinearizes pgf lang e = map (zip lbls . map (unwords . untokn) . elems) (linTree pgf lang (\_ _ lint -> lint) e) where - f (R ts) = liftM R $ mapM f ts - f (P t1 t2) = liftM2 P (f t1) (f t2) - f (S ts) = liftM S $ mapM f ts - f (FV ts) = ts >>= f - f (W s t) = liftM (W s) $ f t - f t = return t + lbls = case unApp e of + Just (f,_) -> let cat = valCat (lookType pgf f) + in case parser (lookConcr pgf lang) >>= Map.lookup cat . startCats of + Just (_,_,lbls) -> elems lbls + Nothing -> error "No labels" + Nothing -> error "Not function application" -linTree :: PGF -> CId -> Expr -> Term -linTree pgf lang e = lin (expr2tree e) Nothing + +-- show bracketed markup with references to tree structure +markLinearizes :: PGF -> CId -> Expr -> [String] +markLinearizes pgf lang = map (unwords . untokn . (! 0)) . linTree pgf lang mark where - cnc = lookMap (error "no lang") lang (concretes pgf) + mark mb_f path lint = amap (bracket mb_f path) lint - lin (Abs xs e ) mty = case lin e Nothing of - R ts -> R $ ts ++ (Data.List.map (kks . showCId . snd) xs) - TM s -> R $ (TM s) : (Data.List.map (kks . showCId . snd) xs) - lin (Fun fun es) mty = case Map.lookup fun (funs (abstract pgf)) of - Just (DTyp hyps _ _,_,_) -> let argVariants = sequence [liftVariants (lin e (Just ty)) | e <- es | (_,_,ty) <- hyps] - in variants [compute pgf lang args $ lookMap tm0 fun (lins cnc) | args <- argVariants] - Nothing -> tm0 - lin (Lit (LStr s)) mty = R [kks (show s)] -- quoted - lin (Lit (LInt i)) mty = R [kks (show i)] - lin (Lit (LFlt d)) mty = R [kks (show d)] - lin (Var x) mty = case mty of - Just (DTyp _ cat _) -> compute pgf lang [K (KS (showCId x))] (lookMap tm0 cat (lindefs cnc)) - Nothing -> TM (showCId x) - lin (Meta i) mty = case mty of - Just (DTyp _ cat _) -> compute pgf lang [K (KS ("?" ++ show i))] (lookMap tm0 cat (lindefs cnc)) - Nothing -> TM (show i) - -variants :: [Term] -> Term -variants ts = case ts of - [t] -> t - _ -> FV ts - -unvariants :: Term -> [Term] -unvariants t = case t of - FV ts -> ts - _ -> [t] - -compute :: PGF -> CId -> [Term] -> Term -> Term -compute pgf lang args = comp where - comp trm = case trm of - P r p -> proj (comp r) (comp p) - W s t -> W s (comp t) - R ts -> R $ map comp ts - V i -> idx args i -- already computed - F c -> comp $ look c -- not computed (if contains argvar) - FV ts -> FV $ map comp ts - S ts -> S $ filter (/= S []) $ map comp ts - _ -> trm - - look = lookOper pgf lang - - idx xs i = if i > length xs - 1 - then trace - ("too large " ++ show i ++ " for\n" ++ unlines (map show xs) ++ "\n") tm0 - else xs !! i - - proj r p = case (r,p) of - (_, FV ts) -> FV $ map (proj r) ts - (FV ts, _ ) -> FV $ map (\t -> proj t p) ts - (W s t, _) -> kks (s ++ getString (proj t p)) - _ -> comp $ getField r (getIndex p) - - getString t = case t of - K (KS s) -> s - _ -> error ("ERROR in grammar compiler: string from "++ show t) "ERR" - - getIndex t = case t of - C i -> i - TM _ -> 0 -- default value for parameter - _ -> trace ("ERROR in grammar compiler: index from " ++ show t) 666 - - getField t i = case t of - R rs -> idx rs i - TM s -> TM s - _ -> error ("ERROR in grammar compiler: field from " ++ show t) t - ---------- --- markup with tree positions - -linearizesMark :: PGF -> CId -> Expr -> [String] -linearizesMark pgf lang = realizes . linTreeMark pgf lang - -linTreeMark :: PGF -> CId -> Expr -> Term -linTreeMark pgf lang = lin [] . expr2tree - where - lin p (Abs xs e ) = case lin p e of - R ts -> R $ ts ++ (Data.List.map (kks . showCId . snd) xs) - TM s -> R $ (TM s) : (Data.List.map (kks . showCId . snd) xs) - lin p (Fun fun es) = - let argVariants = - mapM (\ (i,e) -> liftVariants $ lin (sub p i) e) (zip [0..] es) - in variants [mark (fun,p) $ compute pgf lang args $ look fun | - args <- argVariants] - lin p (Lit (LStr s)) = mark p $ R [kks (show s)] -- quoted - lin p (Lit (LInt i)) = mark p $ R [kks (show i)] - lin p (Lit (LFlt d)) = mark p $ R [kks (show d)] - lin p (Var x) = mark p $ TM (showCId x) - lin p (Meta i) = mark p $ TM (show i) - - look = lookLin pgf lang - - mark :: Show a => a -> Term -> Term - mark p t = case t of - R ts -> R $ map (mark p) ts - FV ts -> R $ map (mark p) ts - S ts -> S $ bracket p ts - K s -> S $ bracket p [t] - W s (R ts) -> R [mark p $ kks (s ++ u) | K (KS u) <- ts] - _ -> t - -- otherwise in normal form - - bracket p ts = [kks ("("++show p)] ++ ts ++ [kks ")"] - sub p i = p ++ [i] - --- | Show the printname of function or category -showPrintName :: PGF -> Language -> CId -> String -showPrintName pgf lang id = lookMap "?" id $ printnames $ lookMap (error "no lang") lang $ concretes pgf + bracket Nothing path ts = [KS ("("++show (reverse path))] ++ ts ++ [KS ")"] + bracket (Just f) path ts = [KS ("(("++showCId f++","++show (reverse path)++")")] ++ ts ++ [KS ")"] diff --git a/src/runtime/haskell/PGF/Macros.hs b/src/runtime/haskell/PGF/Macros.hs index 81f946211..bf6252f2a 100644 --- a/src/runtime/haskell/PGF/Macros.hs +++ b/src/runtime/haskell/PGF/Macros.hs @@ -3,10 +3,14 @@ module PGF.Macros where import PGF.CId import PGF.Data import Control.Monad -import qualified Data.Map as Map -import qualified Data.Array as Array +import qualified Data.Map as Map +import qualified Data.Set as Set +import qualified Data.IntMap as IntMap +import qualified Data.IntSet as IntSet +import qualified Data.Array as Array import Data.Maybe import Data.List +import GF.Data.Utilities(sortNub) -- operations for manipulating PGF grammars and objects @@ -122,6 +126,10 @@ contextLength :: Type -> Int contextLength ty = case ty of DTyp hyps _ _ -> length hyps +-- | Show the printname of function or category +showPrintName :: PGF -> Language -> CId -> String +showPrintName pgf lang id = lookMap "?" id $ printnames $ lookMap (error "no lang") lang $ concretes pgf + term0 :: CId -> Term term0 = TM . showCId @@ -151,3 +159,63 @@ cidVar = mkCId "__gfVar" _B = mkCId "__gfB" _V = mkCId "__gfV" + +updateProductionIndices :: PGF -> PGF +updateProductionIndices pgf = pgf{concretes = fmap updateConcrete (concretes pgf)} + where + updateConcrete cnc = + case parser cnc of + Nothing -> cnc + Just pinfo -> let prods0 = filterProductions (productions pinfo) + p_prods = parseIndex pinfo prods0 + l_prods = linIndex pinfo prods0 + in cnc{parser = Just pinfo{pproductions = p_prods, lproductions = l_prods}} + + filterProductions prods0 + | IntMap.size prods == IntMap.size prods0 = prods + | otherwise = filterProductions prods + where + prods = IntMap.mapMaybe (filterProdSet prods0) prods0 + + filterProdSet prods set0 + | Set.null set = Nothing + | otherwise = Just set + where + set = Set.filter (filterRule prods) set0 + + filterRule prods (FApply funid args) = all (\fcat -> isLiteralFCat fcat || IntMap.member fcat prods) args + filterRule prods (FCoerce fcat) = isLiteralFCat fcat || IntMap.member fcat prods + filterRule prods _ = True + + parseIndex pinfo = IntMap.mapMaybeWithKey filterProdSet + where + filterProdSet fid prods + | fid `IntSet.member` ho_fids = Just prods + | otherwise = let prods' = Set.filter (not . is_ho_prod) prods + in if Set.null prods' + then Nothing + else Just prods' + + is_ho_prod (FApply _ [fid]) | fid == fcatVar = True + is_ho_prod _ = False + + ho_fids :: IntSet.IntSet + ho_fids = IntSet.fromList [fid | cat <- ho_cats + , fid <- maybe [] (\(s,e,_) -> [s..e]) (Map.lookup cat (startCats pinfo))] + + ho_cats :: [CId] + ho_cats = sortNub [c | (ty,_,_) <- Map.elems (funs (abstract pgf)) + , h <- case ty of {DTyp hyps val _ -> hyps} + , let ty = typeOfHypo h + , c <- fst (catSkeleton ty)] + + linIndex pinfo productions = + Map.fromListWith (IntMap.unionWith Set.union) + [(fun,IntMap.singleton res (Set.singleton prod)) | (res,prods) <- IntMap.toList productions + , prod <- Set.toList prods + , fun <- getFunctions prod] + where + getFunctions (FApply funid args) = let FFun fun _ = functions pinfo Array.! funid in [fun] + getFunctions (FCoerce fid) = case IntMap.lookup fid productions of + Nothing -> [] + Just prods -> [fun | prod <- Set.toList prods, fun <- getFunctions prod] \ No newline at end of file diff --git a/src/runtime/haskell/PGF/Morphology.hs b/src/runtime/haskell/PGF/Morphology.hs index 9eee71a97..be786ebbb 100644 --- a/src/runtime/haskell/PGF/Morphology.hs +++ b/src/runtime/haskell/PGF/Morphology.hs @@ -2,11 +2,13 @@ module PGF.Morphology(Lemma,Analysis,Morpho, buildMorpho, lookupMorpho,fullFormLexicon) where -import PGF.ShowLinearize (collectWords) -import PGF.Data import PGF.CId +import PGF.Data import qualified Data.Map as Map +import qualified Data.Set as Set +import qualified Data.IntMap as IntMap +import Data.Array.IArray import Data.List (intersperse) -- these 4 definitions depend on the datastructure used @@ -17,7 +19,23 @@ type Analysis = String newtype Morpho = Morpho (Map.Map String [(Lemma,Analysis)]) buildMorpho :: PGF -> Language -> Morpho -buildMorpho pgf lang = Morpho (Map.fromListWith (++) (collectWords pgf lang)) +buildMorpho pgf lang = Morpho $ + case Map.lookup lang (concretes pgf) >>= parser of + Just pinfo -> collectWords pinfo + Nothing -> Map.empty + +collectWords pinfo = Map.fromListWith (++) + [(t, [(fun,lbls ! l)]) | (s,e,lbls) <- Map.elems (startCats pinfo) + , fid <- [s..e] + , FApply funid _ <- maybe [] Set.toList (IntMap.lookup fid (pproductions pinfo)) + , let FFun fun lins = functions pinfo ! funid + , (l,seqid) <- assocs lins + , sym <- elems (sequences pinfo ! seqid) + , t <- sym2tokns sym] + where + sym2tokns (FSymKS ts) = ts + sym2tokns (FSymKP ts alts) = ts ++ [t | Alt ts ps <- alts, t <- ts] + sym2tokns _ = [] lookupMorpho :: Morpho -> String -> [(Lemma,Analysis)] lookupMorpho (Morpho mo) s = maybe [] id $ Map.lookup s mo diff --git a/src/runtime/haskell/PGF/PMCFG.hs b/src/runtime/haskell/PGF/PMCFG.hs index abf7e4380..0ef0e3295 100644 --- a/src/runtime/haskell/PGF/PMCFG.hs +++ b/src/runtime/haskell/PGF/PMCFG.hs @@ -34,12 +34,13 @@ data Alternative = deriving (Eq,Ord,Show) data ParserInfo - = ParserInfo { functions :: Array FunId FFun - , sequences :: Array SeqId FSeq - , productions0:: IntMap.IntMap (Set.Set Production) -- this are the original productions as they are loaded from the PGF file - , productions :: IntMap.IntMap (Set.Set Production) -- this are the productions after the filtering for useless productions - , startCats :: Map.Map CId (FCat,FCat,Array FIndex String) -- for every category - start/end FCat and a list of label names - , totalCats :: {-# UNPACK #-} !FCat + = ParserInfo { functions :: Array FunId FFun + , sequences :: Array SeqId FSeq + , productions :: IntMap.IntMap (Set.Set Production) -- the original productions loaded from the PGF file + , pproductions :: IntMap.IntMap (Set.Set Production) -- productions needed for parsing + , lproductions :: Map.Map CId (IntMap.IntMap (Set.Set Production)) -- productions needed for linearization + , startCats :: Map.Map CId (FCat,FCat,Array FIndex String) -- for every category - start/end FCat and a list of label names + , totalCats :: {-# UNPACK #-} !FCat } @@ -98,22 +99,3 @@ ppFCat fcat ppFunId funid = char 'F' <> int funid ppSeqId seqid = char 'S' <> int seqid - - -filterProductions = closure - where - closure prods0 - | IntMap.size prods == IntMap.size prods0 = prods - | otherwise = closure prods - where - prods = IntMap.mapMaybe (filterProdSet prods0) prods0 - - filterProdSet prods set0 - | Set.null set = Nothing - | otherwise = Just set - where - set = Set.filter (filterRule prods) set0 - - filterRule prods (FApply funid args) = all (\fcat -> isLiteralFCat fcat || IntMap.member fcat prods) args - filterRule prods (FCoerce fcat) = isLiteralFCat fcat || IntMap.member fcat prods - filterRule prods _ = True diff --git a/src/runtime/haskell/PGF/Parse.hs b/src/runtime/haskell/PGF/Parse.hs index e9936233c..5a4ccc719 100644 --- a/src/runtime/haskell/PGF/Parse.hs +++ b/src/runtime/haskell/PGF/Parse.hs @@ -59,7 +59,7 @@ initState pgf lang (DTyp _ start _) = let items = case Map.lookup start (startCats pinfo) of Just (s,e,labels) -> do cat <- range (s,e) (funid,args) <- foldForest (\funid args -> (:) (funid,args)) (\_ _ args -> args) - [] cat (productions pinfo) + [] cat (pproductions pinfo) let FFun fn lins = functions pinfo ! funid (lbl,seqid) <- assocs lins return (Active 0 0 funid seqid args (AK cat lbl)) @@ -72,7 +72,7 @@ initState pgf lang (DTyp _ start _) = in PState pgf pinfo - (Chart emptyAC [] emptyPC (productions pinfo) (totalCats pinfo) 0) + (Chart emptyAC [] emptyPC (pproductions pinfo) (totalCats pinfo) 0) (TMap.singleton [] (Set.fromList items)) -- | From the current state and the next token diff --git a/src/runtime/haskell/PGF/ShowLinearize.hs b/src/runtime/haskell/PGF/ShowLinearize.hs deleted file mode 100644 index fa4de86c8..000000000 --- a/src/runtime/haskell/PGF/ShowLinearize.hs +++ /dev/null @@ -1,114 +0,0 @@ -module PGF.ShowLinearize ( - collectWords, - tableLinearize, - recordLinearize, - termLinearize, - tabularLinearize, - allLinearize, - markLinearize - ) where - -import PGF.CId -import PGF.Data -import PGF.Tree -import PGF.Macros -import PGF.Linearize - -import GF.Data.Operations -import Data.List -import qualified Data.Map as Map - --- printing linearizations in different ways with source parameters - --- internal representation, only used internally in this module -data Record = - RR [(String,Record)] - | RT [(String,Record)] - | RFV [Record] - | RS String - | RCon String - -prRecord :: Record -> String -prRecord = prr where - prr t = case t of - RR fs -> concat $ - "{" : - (intersperse ";" (map (\ (l,v) -> unwords [l,"=", prr v]) fs)) ++ ["}"] - RT fs -> concat $ - "table {" : - (intersperse ";" (map (\ (l,v) -> unwords [l,"=>",prr v]) fs)) ++ ["}"] - RFV ts -> concat $ - "variants {" : (intersperse ";" (map prr ts)) ++ ["}"] - RS s -> prQuotedString s - RCon s -> s - --- uses the encoding of record types in PGF.paramlincat -mkRecord :: Term -> Term -> Record -mkRecord typ trm = case (typ,trm) of - (_, FV ts) -> RFV $ map (mkRecord typ) ts - (R rs, R ts) -> RR [(str lab, mkRecord ty t) | (P lab ty, t) <- zip rs ts] - (S [FV ps,ty],R ts) -> RT [(str par, mkRecord ty t) | (par, t) <- zip ps ts] - (_,W s (R ts)) -> mkRecord typ (R [K (KS (s ++ u)) | K (KS u) <- ts]) - (FV ps, C i) -> RCon $ str $ ps !! i - (S [], _) -> case realizes trm of - [s] -> RS s - ss -> RFV $ map RS ss - _ -> RS $ show trm ---- printTree trm - where - str = realize - --- show all branches, without labels and params -allLinearize :: (String -> String) -> PGF -> CId -> Expr -> String -allLinearize unlex pgf lang = concat . map (unlex . pr) . tabularLinearize pgf lang where - pr (p,vs) = unlines vs - --- show all branches, with labels and params -tableLinearize :: (String -> String) -> PGF -> CId -> Expr -> String -tableLinearize unlex pgf lang = unlines . map pr . tabularLinearize pgf lang where - pr (p,vs) = p +++ ":" +++ unwords (intersperse "|" (map unlex vs)) - --- create a table from labels+params to variants -tabularLinearize :: PGF -> CId -> Expr -> [(String,[String])] -tabularLinearize pgf lang = branches . recLinearize pgf lang where - branches r = case r of - RR fs -> [(lab +++ b,s) | (lab,t) <- fs, (b,s) <- branches t] - RT fs -> [(lab +++ b,s) | (lab,t) <- fs, (b,s) <- branches t] - RFV rs -> concatMap branches rs - RS s -> [([], [s])] - RCon _ -> [] - --- show record in GF-source-like syntax -recordLinearize :: PGF -> CId -> Expr -> String -recordLinearize pgf lang = prRecord . recLinearize pgf lang - --- create a GF-like record, forming the basis of all functions above -recLinearize :: PGF -> CId -> Expr -> Record -recLinearize pgf lang tree = mkRecord typ $ linTree pgf lang tree where - typ = case expr2tree tree of - Fun f _ -> lookParamLincat pgf lang $ valCat $ lookType pgf f - --- show PGF term -termLinearize :: PGF -> CId -> Expr -> String -termLinearize pgf lang = show . linTree pgf lang - --- show bracketed markup with references to tree structure -markLinearize :: PGF -> CId -> Expr -> String -markLinearize pgf lang = concat . take 1 . linearizesMark pgf lang - - --- for Morphology: word, lemma, tags -collectWords :: PGF -> Language -> [(String, [(CId,String)])] -collectWords pgf lang = - concatMap collOne - [(f,c,length xs) | (f,(DTyp xs c _,_,_)) <- Map.toList $ funs $ abstract pgf] - where - collOne (f,c,i) = - fromRec f [showCId c] (recLinearize pgf lang (foldl EApp (EFun f) (replicate i (EMeta 888)))) - fromRec f v r = case r of - RR rs -> concat [fromRec f v t | (_,t) <- rs] - RT rs -> concat [fromRec f (p:v) t | (p,t) <- rs] - RFV rs -> concatMap (fromRec f v) rs - RS s -> [(w,[(f,unwords (reverse v))]) | w <- words s, w /= "?888"] --- --- RS s -> [(s,[(f,unwords (reverse v))])] - RCon c -> [] ---- inherent - diff --git a/src/runtime/haskell/PGF/VisualizeTree.hs b/src/runtime/haskell/PGF/VisualizeTree.hs index 429551f54..8e9b28740 100644 --- a/src/runtime/haskell/PGF/VisualizeTree.hs +++ b/src/runtime/haskell/PGF/VisualizeTree.hs @@ -102,7 +102,7 @@ graphvizDependencyTree format debug mlab ms pgf lang exp = case format of ifd s = if debug then s else [] - pot = readPosText $ head $ linearizesMark pgf lang exp + pot = readPosText $ concat $ take 1 $ markLinearizes pgf lang exp ---- use Just str if you have str to match against prelude = ["rankdir=LR ;", "node [shape = plaintext] ;"] @@ -188,9 +188,7 @@ getDepLabels ss = Map.fromList [(mkCId f,ls) | f:ls <- map words ss] ---- nubrec and domins are quadratic, but could be (n log n) graphvizParseTree :: PGF -> CId -> Expr -> String -graphvizParseTree pgf lang = prGraph False . lin2tree pgf . linMark where - linMark = head . linearizesMark pgf lang - ---- use Just str if you have str to match against +graphvizParseTree pgf lang = prGraph False . lin2tree pgf . concat . take 1 . markLinearizes pgf lang where lin2tree pgf s = trace s $ prelude ++ nodes ++ links where @@ -235,12 +233,12 @@ tag s = "<" ++ s ++ ">" showp = init . tail . show mtag = tag . ('n':) . uncommas --- word alignments from Linearize.linearizesMark +-- word alignments from Linearize.markLinearize -- words are chunks like {[0,1,1,0] old} graphvizAlignment :: PGF -> Expr -> String graphvizAlignment pgf = prGraph True . lin2graph . linsMark where - linsMark t = [s | la <- cncnames pgf, s <- take 1 (linearizesMark pgf la t)] + linsMark t = [concat (take 1 (markLinearizes pgf la t)) | la <- cncnames pgf] lin2graph :: [String] -> [String] lin2graph ss = trace (show ss) $ prelude ++ nodes ++ links