the content of ParseEngAbs3.probs is now merged with ParseEngAbs.probs. The later is now retrained. Once the grammar is compiled with the .probs file now it doesn't need anything more to do robust parsing. The robustness itself is controlled by the flags 'heuristic_search_factor', 'meta_prob' and 'meta_token_prob' in ParseEngAbs.gf

This commit is contained in:
kr.angelov
2013-11-06 10:21:46 +00:00
parent 84ef5fa5fa
commit 2483dc7728
27 changed files with 65052 additions and 65106 deletions

View File

@@ -40,13 +40,13 @@ instance Binary CId where
instance Binary Abstr where
put abs = put (aflags abs,
fmap (\(w,x,y,z,_) -> (w,x,y,z)) (funs abs),
fmap (\(x,y,_) -> (x,y)) (cats abs))
fmap (\(x,y,z,_) -> (x,y,z)) (cats abs))
get = do aflags <- get
funs <- get
cats <- get
return (Abstr{ aflags=aflags
, funs=fmap (\(w,x,y,z) -> (w,x,y,z,0)) funs
, cats=fmap (\(x,y) -> (x,y,0)) cats
, cats=fmap (\(x,y,z) -> (x,y,z,0)) cats
, code=BS.empty
})

View File

@@ -29,10 +29,10 @@ data PGF = PGF {
data Abstr = Abstr {
aflags :: Map.Map CId Literal, -- ^ value of a flag
funs :: Map.Map CId (Type,Int,Maybe [Equation],Double,BCAddr), -- ^ type, arrity and definition of function + probability
cats :: Map.Map CId ([Hypo],[(Double, CId)],BCAddr), -- ^ 1. context of a category
-- ^ 2. functions of a category. The order in the list is important,
-- this is the order in which the type singatures are given in the source.
-- The termination of the exhaustive generation might depend on this.
cats :: Map.Map CId ([Hypo],[(Double, CId)],Double,BCAddr), -- ^ 1. context of a category
-- 2. functions of a category. The functions are stored
-- in decreasing probability order.
-- 3. probability
code :: BS.ByteString
}

View File

@@ -67,7 +67,7 @@ functionsToCat :: PGF -> CId -> [(CId,Type)]
functionsToCat pgf cat =
[(f,ty) | (_,f) <- fs, Just (ty,_,_,_,_) <- [Map.lookup f $ funs $ abstract pgf]]
where
(_,fs,_) = lookMap ([],[],0) cat $ cats $ abstract pgf
(_,fs,_,_) = lookMap ([],[],0,0) cat $ cats $ abstract pgf
-- | List of functions that lack linearizations in the given language.
missingLins :: PGF -> Language -> [CId]
@@ -82,7 +82,7 @@ restrictPGF :: (CId -> Bool) -> PGF -> PGF
restrictPGF cond pgf = pgf {
abstract = abstr {
funs = Map.filterWithKey (\c _ -> cond c) (funs abstr),
cats = Map.map (\(hyps,fs,addr) -> (hyps,filter (cond . snd) fs,addr)) (cats abstr)
cats = Map.map (\(hyps,fs,p,addr) -> (hyps,filter (cond . snd) fs,p,addr)) (cats abstr)
}
} ---- restrict concrs also, might be needed
where

View File

@@ -26,8 +26,8 @@ ppAbs name a = text "abstract" <+> ppCId name <+> char '{' $$
ppFlag :: CId -> Literal -> Doc
ppFlag flag value = text "flag" <+> ppCId flag <+> char '=' <+> ppLit value <+> char ';'
ppCat :: CId -> ([Hypo],[(Double,CId)],BCAddr) -> Doc
ppCat c (hyps,_,_) = text "cat" <+> ppCId c <+> hsep (snd (mapAccumL (ppHypo 4) [] hyps)) <+> char ';'
ppCat :: CId -> ([Hypo],[(Double,CId)],Double,BCAddr) -> Doc
ppCat c (hyps,_,_,_) = text "cat" <+> ppCId c <+> hsep (snd (mapAccumL (ppHypo 4) [] hyps)) <+> char ';'
ppFun :: CId -> (Type,Int,Maybe [Equation],Double,BCAddr) -> Doc
ppFun f (t,_,Just eqs,_,_) = text "fun" <+> ppCId f <+> colon <+> ppType 0 [] t <+> char ';' $$

View File

@@ -24,13 +24,14 @@ import Data.Maybe (fromMaybe) --, fromJust
-- the probabilities for the different functions in a grammar.
data Probabilities = Probs {
funProbs :: Map.Map CId Double,
catProbs :: Map.Map CId [(Double, CId)]
catProbs :: Map.Map CId (Double, [(Double, CId)])
}
-- | Renders the probability structure as string
showProbabilities :: Probabilities -> String
showProbabilities = unlines . map pr . Map.toList . funProbs where
pr (f,d) = showCId f ++ "\t" ++ show d
showProbabilities = unlines . concatMap prProb . Map.toList . catProbs where
prProb (c,(p,fns)) = pr (p,c) : map pr fns
pr (p,f) = showCId f ++ "\t" ++ show p
-- | Reads the probabilities from a file.
-- This should be a text file where on every line
@@ -50,8 +51,12 @@ readProbabilitiesFromFile file pgf = do
-- for the result category.
mkProbabilities :: PGF -> Map.Map CId Double -> Probabilities
mkProbabilities pgf probs =
let funs1 = Map.fromList [(f,p) | (_,cf) <- Map.toList cats1, (p,f) <- cf]
cats1 = Map.map (\(_,fs,_) -> sortBy cmpProb (fill fs)) (cats (abstract pgf))
let funs1 = Map.fromList [(f,p) | (_,(_,fns)) <- Map.toList cats1, (p,f) <- fns]
cats1 = Map.mapWithKey (\c (_,fns,_,_) ->
let p' = fromMaybe 0 (Map.lookup c probs)
fns' = sortBy cmpProb (fill fns)
in (p', fns'))
(cats (abstract pgf))
in Probs funs1 cats1
where
cmpProb (p1,_) (p2,_) = compare p2 p1
@@ -71,15 +76,15 @@ defaultProbabilities pgf = mkProbabilities pgf Map.empty
getProbabilities :: PGF -> Probabilities
getProbabilities pgf = Probs {
funProbs = Map.map (\(_,_,_,p,_) -> p) (funs (abstract pgf)),
catProbs = Map.map (\(_,fns,_) -> fns) (cats (abstract pgf))
funProbs = Map.map (\(_,_,_,p,_) -> p ) (funs (abstract pgf)),
catProbs = Map.map (\(_,fns,p,_) -> (p,fns)) (cats (abstract pgf))
}
setProbabilities :: Probabilities -> PGF -> PGF
setProbabilities probs pgf = pgf {
abstract = (abstract pgf) {
funs = mapUnionWith (\(ty,a,df,_,addr) p -> (ty,a,df,p,addr)) (funs (abstract pgf)) (funProbs probs),
cats = mapUnionWith (\(hypos,_,addr) fns -> (hypos,fns,addr)) (cats (abstract pgf)) (catProbs probs)
funs = mapUnionWith (\(ty,a,df,_,addr) p -> (ty,a,df, p,addr)) (funs (abstract pgf)) (funProbs probs),
cats = mapUnionWith (\(hypos,_,_,addr) (p,fns) -> (hypos,fns,p,addr)) (cats (abstract pgf)) (catProbs probs)
}}
where
mapUnionWith f map1 map2 =
@@ -102,7 +107,7 @@ rankTreesByProbs pgf ts = sortBy (\ (_,p) (_,q) -> compare q p)
mkProbDefs :: PGF -> ([[CId]],[(CId,Type,[Equation])])
mkProbDefs pgf =
let cs = [(c,hyps,fns) | (c,(hyps0,fs,_)) <- Map.toList (cats (abstract pgf)),
let cs = [(c,hyps,fns) | (c,(hyps0,fs,_,_)) <- Map.toList (cats (abstract pgf)),
not (elem c [cidString,cidInt,cidFloat]),
let hyps = zipWith (\(bt,_,ty) n -> (bt,mkCId ('v':show n),ty))
hyps0

View File

@@ -121,8 +121,8 @@ runTcM abstr f ms s = unTcM f abstr (\x ms s cp b -> let (es,xs) = cp b
lookupCatHyps :: CId -> TcM s [Hypo]
lookupCatHyps cat = TcM (\abstr k h ms -> case Map.lookup cat (cats abstr) of
Just (hyps,_,_) -> k hyps ms
Nothing -> h (UnknownCat cat))
Just (hyps,_,_,_) -> k hyps ms
Nothing -> h (UnknownCat cat))
lookupFunType :: CId -> TcM s Type
lookupFunType fun = TcM (\abstr k h ms -> case Map.lookup fun (funs abstr) of
@@ -143,8 +143,8 @@ typeGenerators scope cat = fmap normalize (liftM2 (++) x y)
| cat == cidString = return [(1.0,ELit (LStr "Foo"),TTyp [] (DTyp [] cat []))]
| otherwise = TcM (\abstr k h ms ->
case Map.lookup cat (cats abstr) of
Just (_,fns,_) -> unTcM (mapM helper fns) abstr k h ms
Nothing -> h (UnknownCat cat))
Just (_,fns,_,_) -> unTcM (mapM helper fns) abstr k h ms
Nothing -> h (UnknownCat cat))
helper (p,fn) = do
ty <- lookupFunType fn