From ee3ce9694c134d697bf3fa6b8910c7d863f84dab Mon Sep 17 00:00:00 2001 From: krasimir Date: Tue, 5 Jan 2010 08:35:33 +0000 Subject: [PATCH] store the label names in PMCFG --- src/compiler/GF/Compile/GeneratePMCFG.hs | 45 ++++++++++++++---------- src/compiler/GF/Compile/PGFtoJS.hs | 2 +- src/compiler/GF/Speech/PGFToCFG.hs | 8 ++--- src/runtime/haskell/PGF/PMCFG.hs | 15 ++++---- src/runtime/haskell/PGF/Parse.hs | 42 +++++++++++----------- 5 files changed, 61 insertions(+), 51 deletions(-) diff --git a/src/compiler/GF/Compile/GeneratePMCFG.hs b/src/compiler/GF/Compile/GeneratePMCFG.hs index fed2521e1..8b815008e 100644 --- a/src/compiler/GF/Compile/GeneratePMCFG.hs +++ b/src/compiler/GF/Compile/GeneratePMCFG.hs @@ -37,7 +37,7 @@ import Control.Exception convertConcrete :: Options -> Abstr -> CId -> Concr -> IO ParserInfo convertConcrete opts abs lang cnc = do - let env0 = emptyGrammarEnv cnc_defs cat_defs + let env0 = emptyGrammarEnv cnc_defs cat_defs params when (flag optProf opts) $ do profileGrammar lang cnc_defs env0 pfrules let env1 = expandHOAS abs_defs cnc_defs cat_defs lin_defs env0 @@ -47,6 +47,7 @@ convertConcrete opts abs lang cnc = do abs_defs = Map.assocs (funs abs) cnc_defs = Map.union (opers cnc) (lins cnc) -- "union big+small most efficient" cat_defs = Map.insert cidVar (S []) (lincats cnc) + params = paramlincats cnc lin_defs = lindefs cnc pfrules = [ @@ -72,7 +73,7 @@ profileGrammar lang cnc_defs (GrammarEnv last_id catSet seqSet funSet crcSet pro mapM_ profileRule pfrules hPutStrLn stderr "--------------------------------" where - profileCat (cid,(fcat1,fcat2,_)) = do + profileCat (cid,(fcat1,fcat2,_,_)) = do hPutStrLn stderr (lformat 23 cid ++ rformat 9 (fcat2-fcat1+1)) profileRule (PFRule fun args res ctypes ctype term) = do @@ -340,21 +341,21 @@ evalTerm cnc_defs path x = error ("evalTerm ("++show x++")") -- GrammarEnv data GrammarEnv = GrammarEnv {-# UNPACK #-} !Int CatSet SeqSet FunSet CoerceSet (IntMap.IntMap (Set.Set Production)) -type CatSet = IntMap.IntMap (Map.Map CId (FCat,FCat,[Int])) +type CatSet = IntMap.IntMap (Map.Map CId (FCat,FCat,[Int],Array FIndex String)) type SeqSet = Map.Map FSeq SeqId type FunSet = Map.Map FFun FunId type CoerceSet= Map.Map [FCat] FCat -emptyGrammarEnv cnc_defs lincats = +emptyGrammarEnv cnc_defs lincats params = let (last_id,catSet) = Map.mapAccumWithKey computeCatRange 0 lincats in GrammarEnv last_id (IntMap.singleton 0 catSet) Map.empty Map.empty Map.empty IntMap.empty where computeCatRange index cat ctype - | cat == cidString = (index, (fcatString,fcatString,[])) - | cat == cidInt = (index, (fcatInt, fcatInt, [])) - | cat == cidFloat = (index, (fcatFloat, fcatFloat, [])) - | cat == cidVar = (index, (fcatVar, fcatVar, [])) - | otherwise = (index+size,(index,index+size-1,poly)) + | cat == cidString = (index, (fcatString,fcatString,[],listArray (0,0) ["s"])) + | cat == cidInt = (index, (fcatInt, fcatInt, [],listArray (0,0) ["s"])) + | cat == cidFloat = (index, (fcatFloat, fcatFloat, [],listArray (0,0) ["s"])) + | cat == cidVar = (index, (fcatVar, fcatVar, [],listArray (0,0) ["s"])) + | otherwise = (index+size,(index,index+size-1, poly,maybe (error "missing params") (mkArray . getLabels []) (Map.lookup cat params))) where (size,poly) = getMultipliers 1 [] ctype @@ -365,6 +366,12 @@ emptyGrammarEnv cnc_defs lincats = Just term -> getMultipliers m ms term Nothing -> error ("unknown identifier: "++showCId id) + getLabels ls (R record) = concat [getLabels (l:ls) t | P (K (KS l)) t <- record] + getLabels ls (S [FV ps,t]) = concat [getLabels (l:ls) t | K (KS l) <- ps] + getLabels ls (S []) = [unwords (reverse ls)] + getLabels ls (FV _) = [] + getLabels _ t = error (show t) + expandHOAS abs_defs cnc_defs lincats lindefs env = foldl add_varFun (foldl (\env ncat -> add_hoFun (add_hoCat env ncat) ncat) env hoTypes) hoCats where @@ -381,10 +388,10 @@ expandHOAS abs_defs cnc_defs lincats lindefs env = -- add a range of PMCFG categories for each GF high-order category add_hoCat env@(GrammarEnv last_id catSet seqSet funSet crcSet prodSet) (n,cat) = case IntMap.lookup 0 catSet >>= Map.lookup cat of - Just (start,end,ms) -> let !catSet' = IntMap.insertWith Map.union n (Map.singleton cat (last_id,last_id+(end-start),ms)) catSet - !last_id' = last_id+(end-start)+1 - in (GrammarEnv last_id' catSet' seqSet funSet crcSet prodSet) - Nothing -> env + Just (start,end,ms,lbls) -> let !catSet' = IntMap.insertWith Map.union n (Map.singleton cat (last_id,last_id+(end-start),ms,lbls)) catSet + !last_id' = last_id+(end-start)+1 + in (GrammarEnv last_id' catSet' seqSet funSet crcSet prodSet) + Nothing -> env -- add one PMCFG function for each high-order type: _B : Cat -> Var -> ... -> Var -> HoCat add_hoFun env (n,cat) = @@ -460,11 +467,11 @@ getParserInfo :: GrammarEnv -> ParserInfo getParserInfo (GrammarEnv last_id catSet seqSet funSet crcSet prodSet) = ParserInfo { functions = mkArray funSet , sequences = mkArray seqSet - , productions0= productions0 - , productions = filterProductions productions0 - , startCats = maybe Map.empty (Map.map (\(start,end,_) -> (start,end))) (IntMap.lookup 0 catSet) - , totalCats = last_id+1 - } + , productions0= productions0 + , productions = filterProductions productions0 + , startCats = maybe Map.empty (Map.map (\(start,end,_,lbls) -> (start,end,lbls))) (IntMap.lookup 0 catSet) + , totalCats = last_id+1 + } where mkArray map = array (0,Map.size map-1) [(v,k) | (k,v) <- Map.toList map] @@ -474,7 +481,7 @@ getParserInfo (GrammarEnv last_id catSet seqSet funSet crcSet prodSet) = getFCats :: GrammarEnv -> ProtoFCat -> [FCat] getFCats (GrammarEnv last_id catSet seqSet funSet crcSet prodSet) (PFCat n cat rcs tcs) = case IntMap.lookup n catSet >>= Map.lookup cat of - Just (start,end,ms) -> reverse (solutions (variants ms tcs start) ()) + Just (start,end,ms,_) -> reverse (solutions (variants ms tcs start) ()) where variants _ [] fcat = return fcat variants (m:ms) ((_,indices) : tcs) fcat = do index <- member indices diff --git a/src/compiler/GF/Compile/PGFtoJS.hs b/src/compiler/GF/Compile/PGFtoJS.hs index 01be44e8c..0cec4121d 100644 --- a/src/compiler/GF/Compile/PGFtoJS.hs +++ b/src/compiler/GF/Compile/PGFtoJS.hs @@ -96,7 +96,7 @@ parser2js p = [new "Parser" [JS.EObj $ [JS.Prop (JS.IntPropName cat) (JS.EArray JS.EObj $ map cats (Map.assocs (startCats p)), JS.EInt (totalCats p)]] where - cats (c,(start,end)) = JS.Prop (JS.IdentPropName (JS.Ident (showCId c))) (JS.EObj [JS.Prop (JS.IdentPropName (JS.Ident "s")) (JS.EInt start) + cats (c,(start,end,_)) = JS.Prop (JS.IdentPropName (JS.Ident (showCId c))) (JS.EObj [JS.Prop (JS.IdentPropName (JS.Ident "s")) (JS.EInt start) ,JS.Prop (JS.IdentPropName (JS.Ident "e")) (JS.EInt end)]) frule2js :: Production -> JS.Expr diff --git a/src/compiler/GF/Speech/PGFToCFG.hs b/src/compiler/GF/Speech/PGFToCFG.hs index a9bb20ef6..bd27deadf 100644 --- a/src/compiler/GF/Speech/PGFToCFG.hs +++ b/src/compiler/GF/Speech/PGFToCFG.hs @@ -42,8 +42,8 @@ pgfToCFG pgf lang = mkCFG (showCId (lookStartCat pgf)) extCats (startRules ++ co fcatCats :: Map FCat Cat fcatCats = Map.fromList [(fc, showCId c ++ "_" ++ show i) - | (c,fcs) <- Map.toList (startCats pinfo), - (fc,i) <- zip (range fcs) [1..]] + | (c,(s,e,lbls)) <- Map.toList (startCats pinfo), + (fc,i) <- zip (range (s,e)) [1..]] fcatCat :: FCat -> Cat fcatCat c = Map.findWithDefault ("Unknown_" ++ show c) c fcatCats @@ -69,8 +69,8 @@ pgfToCFG pgf lang = mkCFG (showCId (lookStartCat pgf)) extCats (startRules ++ co startRules :: [CFRule] startRules = [CFRule (showCId c) [NonTerminal (fcatToCat fc r)] (CFRes 0) - | (c,fcs) <- Map.toList (startCats pinfo), - fc <- range fcs, not (isLiteralFCat fc), + | (c,(s,e,lbls)) <- Map.toList (startCats pinfo), + fc <- range (s,e), not (isLiteralFCat fc), r <- [0..catLinArity fc-1]] fruleToCFRule :: (FCat,Production) -> [CFRule] diff --git a/src/runtime/haskell/PGF/PMCFG.hs b/src/runtime/haskell/PGF/PMCFG.hs index b9303aeb8..abf7e4380 100644 --- a/src/runtime/haskell/PGF/PMCFG.hs +++ b/src/runtime/haskell/PGF/PMCFG.hs @@ -36,11 +36,11 @@ data Alternative = data ParserInfo = ParserInfo { functions :: Array FunId FFun , sequences :: Array SeqId FSeq - , productions0:: IntMap.IntMap (Set.Set Production) -- this are the original productions as they are loaded from the PGF file - , productions :: IntMap.IntMap (Set.Set Production) -- this are the productions after the filtering for useless productions - , startCats :: Map.Map CId (FCat,FCat) - , totalCats :: {-# UNPACK #-} !FCat - } + , productions0:: IntMap.IntMap (Set.Set Production) -- this are the original productions as they are loaded from the PGF file + , productions :: IntMap.IntMap (Set.Set Production) -- this are the productions after the filtering for useless productions + , startCats :: Map.Map CId (FCat,FCat,Array FIndex String) -- for every category - start/end FCat and a list of label names + , totalCats :: {-# UNPACK #-} !FCat + } fcatString, fcatInt, fcatFloat, fcatVar :: Int @@ -76,8 +76,9 @@ ppFun (funid,FFun fun arr) = ppSeq (seqid,seq) = ppSeqId seqid <+> text ":=" <+> hsep (map ppSymbol (elems seq)) -ppStartCat (id,(start,end)) = - ppCId id <+> text ":=" <+> brackets (ppFCat start <+> text ".." <+> ppFCat end) +ppStartCat (id,(start,end,labels)) = + ppCId id <+> text ":=" <+> (text "range " <+> brackets (ppFCat start <+> text ".." <+> ppFCat end) $$ + text "labels" <+> brackets (vcat (map (text . show) (elems labels)))) ppSymbol (FSymCat d r) = char '<' <> int d <> comma <> int r <> char '>' ppSymbol (FSymLit d r) = char '<' <> int d <> comma <> int r <> char '>' diff --git a/src/runtime/haskell/PGF/Parse.hs b/src/runtime/haskell/PGF/Parse.hs index 44ff525b4..6de7f29a8 100644 --- a/src/runtime/haskell/PGF/Parse.hs +++ b/src/runtime/haskell/PGF/Parse.hs @@ -56,14 +56,15 @@ parseWithRecovery pgf lang typ open_typs toks = accept (initState pgf lang typ) -- startup category. initState :: PGF -> Language -> Type -> ParseState initState pgf lang (DTyp _ start _) = - let items = do - cat <- maybe [] range (Map.lookup start (startCats pinfo)) - (funid,args) <- foldForest (\funid args -> (:) (funid,args)) (\_ _ args -> args) - [] cat (productions pinfo) - let FFun fn lins = functions pinfo ! funid - (lbl,seqid) <- assocs lins - return (Active 0 0 funid seqid args (AK cat lbl)) - + let items = case Map.lookup start (startCats pinfo) of + Just (s,e,labels) -> do cat <- range (s,e) + (funid,args) <- foldForest (\funid args -> (:) (funid,args)) (\_ _ args -> args) + [] cat (productions pinfo) + let FFun fn lins = functions pinfo ! funid + (lbl,seqid) <- assocs lins + return (Active 0 0 funid seqid args (AK cat lbl)) + Nothing -> mzero + pinfo = case lookParser pgf lang of Just pinfo -> pinfo @@ -131,8 +132,10 @@ recoveryStates open_types (EState pgf pinfo chart) = } in (PState pgf pinfo chart (TMap.singleton [] (Set.fromList agenda)), fmap (PState pgf pinfo chart2) acc) where - type2fcats (DTyp _ cat _) = maybe [] range (Map.lookup cat (startCats pinfo)) - + type2fcats (DTyp _ cat _) = case Map.lookup cat (startCats pinfo) of + Just (s,e,labels) -> range (s,e) + Nothing -> [] + complete open_fcats items ac = foldl (Set.fold (\(Active j' ppos funid seqid args keyc) -> (:) (Active j' (ppos+1) funid seqid args keyc))) @@ -153,16 +156,15 @@ extractTrees (PState pgf pinfo chart items) ty@(DTyp _ start _) = agenda = maybe [] Set.toList mb_agenda (_,st) = process Nothing (\_ _ -> id) (sequences pinfo) (functions pinfo) agenda () chart - exps = do - cat <- maybe [] range (Map.lookup start (startCats pinfo)) - (funid,args) <- foldForest (\funid args -> (:) (funid,args)) (\_ _ args -> args) - [] cat (productions pinfo) - let FFun fn lins = functions pinfo ! funid - lbl <- indices lins - Just fid <- [lookupPC (PK cat lbl 0) (passive st)] - (fvs,tree) <- go Set.empty 0 (0,fid) - guard (Set.null fvs) - return tree + exps = + case Map.lookup start (startCats pinfo) of + Just (s,e,lbls) -> do cat <- range (s,e) + lbl <- indices lbls + Just fid <- [lookupPC (PK cat lbl 0) (passive st)] + (fvs,tree) <- go Set.empty 0 (0,fid) + guard (Set.null fvs) + return tree + Nothing -> mzero go rec fcat' (d,fcat) | fcat < totalCats pinfo = return (Set.empty,EMeta (fcat'*10+d)) -- FIXME: here we assume that every rule has at most 10 arguments