From 4922ab6cc495087f40399746f616cd88590fd884 Mon Sep 17 00:00:00 2001 From: "kr.angelov" Date: Tue, 12 Feb 2013 10:53:13 +0000 Subject: [PATCH] now the beam size for the statistical parser can be configured by using the flag beam_size in the top-level concrete module --- src/compiler/GF/Compile/GrammarToPGF.hs | 6 +++--- src/compiler/GF/Grammar/Parser.y | 3 +++ src/compiler/GF/Infra/Option.hs | 8 ++++++++ src/runtime/c/pgf/parser.c | 17 ++++++++++++++++- 4 files changed, 30 insertions(+), 4 deletions(-) diff --git a/src/compiler/GF/Compile/GrammarToPGF.hs b/src/compiler/GF/Compile/GrammarToPGF.hs index cc560ca1c..e10a2b13b 100644 --- a/src/compiler/GF/Compile/GrammarToPGF.hs +++ b/src/compiler/GF/Compile/GrammarToPGF.hs @@ -54,8 +54,8 @@ mkCanon2pgf opts gr am = do [((cPredefAbs,c), AbsCat (Just (L NoLoc []))) | c <- [cFloat,cInt,cString]] ++ Look.allOrigInfos gr am - flags = Map.fromList [(mkCId f,C.LStr x) | (f,x) <- optionsPGF aflags] - + flags = Map.fromList [(mkCId f,if f == "beam_size" then C.LFlt (read x) else C.LStr x) | (f,x) <- optionsPGF aflags] + funs = Map.fromList [(i2i f, (mkType [] ty, mkArrity ma, mkDef pty, 0, addr)) | ((m,f),AbsFun (Just (L _ ty)) ma pty _,addr) <- adefs] @@ -74,7 +74,7 @@ mkCanon2pgf opts gr am = do ([((cPredefAbs,c), CncCat (Just (L NoLoc GM.defLinType)) Nothing Nothing Nothing) | c <- [cInt,cFloat,cString]] ++ Look.allOrigInfos gr cm) - let flags = Map.fromList [(mkCId f,C.LStr x) | (f,x) <- optionsPGF cflags] + let flags = Map.fromList [(mkCId f,if f == "beam_size" then C.LFlt (read x) else C.LStr x) | (f,x) <- optionsPGF cflags] !(!fid_cnt1,!cnccats) = genCncCats gr am cm cdefs !(!fid_cnt2,!productions,!lindefs,!sequences,!cncfuns) diff --git a/src/compiler/GF/Grammar/Parser.y b/src/compiler/GF/Grammar/Parser.y index 680724fcc..f8e0d3b46 100644 --- a/src/compiler/GF/Grammar/Parser.y +++ b/src/compiler/GF/Grammar/Parser.y @@ -277,6 +277,9 @@ FlagDef : Posn Ident '=' Ident Posn {% case parseModuleOptions ["--" ++ showIdent $2 ++ "=" ++ showIdent $4] of Ok x -> return x Bad msg -> failLoc $1 msg } + | Posn Ident '=' Double Posn {% case parseModuleOptions ["--" ++ showIdent $2 ++ "=" ++ show $4] of + Ok x -> return x + Bad msg -> failLoc $1 msg } ListDataConstr :: { [Ident] } ListDataConstr diff --git a/src/compiler/GF/Infra/Option.hs b/src/compiler/GF/Infra/Option.hs index d07a96112..a1e69d027 100644 --- a/src/compiler/GF/Infra/Option.hs +++ b/src/compiler/GF/Infra/Option.hs @@ -172,6 +172,7 @@ data Flags = Flags { optWarnings :: [Warning], optDump :: [Dump], optTagsOnly :: Bool, + optBeamSize :: Maybe Double, optNewComp :: Bool } deriving (Show) @@ -216,6 +217,7 @@ optionsPGF :: Options -> [(String,String)] optionsPGF opts = maybe [] (\x -> [("language",x)]) (flag optSpeechLanguage opts) ++ maybe [] (\x -> [("startcat",x)]) (flag optStartCat opts) + ++ maybe [] (\x -> [("beam_size",show x)]) (flag optBeamSize opts) -- Option manipulation @@ -272,6 +274,7 @@ defaultFlags = Flags { optWarnings = [], optDump = [], optTagsOnly = False, + optBeamSize = Nothing, optNewComp = #ifdef NEW_COMP True @@ -357,6 +360,7 @@ optDescr = Option [] ["stem"] (onOff (toggleOptimize OptStem) True) "Perform stem-suffix analysis (default on).", Option [] ["cse"] (onOff (toggleOptimize OptCSE) True) "Perform common sub-expression elimination (default on).", Option [] ["cfg"] (ReqArg cfgTransform "TRANS") "Enable or disable specific CFG transformations. TRANS = merge, no-merge, bottomup, no-bottomup, ...", + Option [] ["beam_size"] (ReqArg readDouble "SIZE") "Set the beam size for statistical parsing", Option [] ["new-comp"] (NoArg (set $ \o -> o{optNewComp = True})) "Use the new experimental compiler.", Option [] ["old-comp"] (NoArg (set $ \o -> o{optNewComp = False})) "Use old trusty compiler.", dumpOption "source" Source, @@ -431,6 +435,10 @@ optDescr = Nothing -> fail $ "Unknown CFG transformation: " ++ x' ++ " Known: " ++ show (map fst cfgTransformNames) + readDouble x = case reads x of + [(d,"")] -> set $ \o -> o { optBeamSize = Just d } + _ -> fail "A floating point number is expected" + dumpOption s d = Option [] ["dump-"++s] (NoArg (set $ \o -> o { optDump = Dump d:optDump o})) ("Dump output of the " ++ s ++ " phase.") set = return . Options diff --git a/src/runtime/c/pgf/parser.c b/src/runtime/c/pgf/parser.c index 6f02ed730..80de54ce8 100644 --- a/src/runtime/c/pgf/parser.c +++ b/src/runtime/c/pgf/parser.c @@ -1522,6 +1522,21 @@ pgf_parsing_proceed(PgfParseState* state) { return true; } +static prob_t +pgf_parsing_default_beam_size(PgfConcr* concr) +{ + GuPool* tmp_pool = gu_new_pool(); + PgfCId flag_name = gu_str_string("beam_size", tmp_pool); + PgfLiteral lit = gu_map_get(concr->cflags, &flag_name, PgfLiteral); + + if (gu_variant_is_null(lit)) + return 0; + + GuVariantInfo pi = gu_variant_open(lit); + gu_assert (pi.tag == PGF_LITERAL_FLT); + return ((PgfLiteralFlt*) pi.data)->val; +} + static PgfParsing* pgf_new_parsing(PgfConcr* concr, GuPool* pool) { @@ -1538,7 +1553,7 @@ pgf_new_parsing(PgfConcr* concr, GuPool* pool) ps->prod_full_count = 0; #endif ps->free_item = NULL; - ps->beam_size = 0.95; + ps->beam_size = pgf_parsing_default_beam_size(concr); PgfExprMeta *expr_meta = gu_new_variant(PGF_EXPR_META,