forked from GitHub/gf-core
126 lines
5.1 KiB
Haskell
126 lines
5.1 KiB
Haskell
import PGF
|
|
import qualified Data.Map as Map
|
|
import Data.Maybe
|
|
import Data.List
|
|
|
|
main = do
|
|
pgf <- readPGF "ParseEngAbs.pgf"
|
|
ls <- fmap (filterExprs . zip [1..] . lines) $ readFile "log4.txt"
|
|
putStrLn ""
|
|
putStrLn ("trees: "++show (length ls))
|
|
let stats = foldl' (collectStats pgf)
|
|
(initStats pgf)
|
|
[(n,fromMaybe (error l) (readExpr (toQ l)),Just (mkCId "Phr"),Nothing) | (n,l) <- ls]
|
|
|
|
putStrLn ("coverage: "++show (coverage stats))
|
|
|
|
putStrLn ("Writing ParseEngAbs.probs...")
|
|
writeFile "ParseEngAbs.probs" (unlines [show f ++ "\t" ++ show p | (f,p) <- uprobs pgf stats])
|
|
|
|
putStrLn ("Writing ParseEngAbs2.probs...")
|
|
writeFile "ParseEngAbs2.probs" (unlines [show cat1 ++ "\t" ++ show cat2 ++ "\t" ++ show p | (cat1,cat2,p) <- mprobs pgf stats])
|
|
|
|
putStrLn ("Writing global.probs...")
|
|
writeFile "global.probs" (unlines [show f ++ "\t" ++ show p | (f,p) <- gprobs pgf stats])
|
|
|
|
putStrLn ("Writing categories.probs...")
|
|
writeFile "categories.probs" (unlines [show f ++ "\t" ++ show p | (f,p) <- cprobs pgf stats])
|
|
where
|
|
toQ [] = []
|
|
toQ ('[':cs) = let (xs,']':ys) = break (==']') cs
|
|
in toQ ('?' : ys)
|
|
toQ ('?':cs) = 'Q' : toQ cs
|
|
toQ (c:cs) = c : toQ cs
|
|
|
|
filterExprs [] = []
|
|
filterExprs ((n,l):ls)
|
|
| null l = filterExprs ls
|
|
| elem (head l) "+#*" = (n,drop 2 l) : filterExprs ls
|
|
| otherwise = filterExprs ls
|
|
|
|
initStats pgf =
|
|
(Map.fromListWith (+)
|
|
([(f,1) | f <- functions pgf] ++
|
|
[(cat pgf f,1) | f <- functions pgf])
|
|
,Map.empty
|
|
,0
|
|
)
|
|
|
|
collectStats pgf (ustats,bstats,count) (n,e,mb_cat1,mb_cat2) =
|
|
case unApp e of
|
|
Just (f,args) -> let fcat2 = cat2 pgf f n e
|
|
fcat = fromMaybe (cat2 pgf f n e) mb_cat1
|
|
cf = fromMaybe 0 (Map.lookup f ustats)
|
|
cc = fromMaybe 0 (Map.lookup fcat ustats)
|
|
in if isJust mb_cat1 && f /= mkCId "Q" && fcat /= fcat2
|
|
then error (show n ++ ": " ++ showExpr [] e)
|
|
else
|
|
cf `seq` cc `seq` bstats `seq` count `seq`
|
|
foldl' (collectStats pgf)
|
|
(Map.insert f (cf+1) (Map.insert fcat (cc+1) ustats)
|
|
,(if null args
|
|
then Map.insertWith (+) (fcat,wildCId) 1
|
|
else id)
|
|
(maybe bstats (\cat2 -> Map.insertWith (+) (cat2,fcat) 1 bstats) mb_cat2)
|
|
,count+1
|
|
)
|
|
(zipWith3 (\e mb_cat1 mb_cat2 -> (n,e,mb_cat1,mb_cat2)) args (argCats f) (repeat (Just fcat)))
|
|
Nothing -> case unStr e of
|
|
Just _ -> (ustats,bstats,count+1)
|
|
Nothing -> error ("collectStats ("++showExpr [] e++")")
|
|
where
|
|
argCats f =
|
|
case fmap unType (functionType pgf f) of
|
|
Just (arg_tys,_,_) -> let tyCat (_,_,ty) = let (_,cat,_) = unType ty in Just cat
|
|
in map tyCat arg_tys
|
|
Nothing -> repeat Nothing
|
|
|
|
coverage (ustats,bstats,count) =
|
|
let c = fromMaybe 0 (Map.lookup (mkCId "Q") ustats)
|
|
in (fromIntegral (count - c) / fromIntegral count) * 100
|
|
|
|
uprobs pgf (ustats,bstats,count) =
|
|
[toProb f (cat pgf f) | f <- functions pgf]
|
|
where
|
|
toProb f cat =
|
|
let count = fromMaybe 0 (Map.lookup f ustats)
|
|
cat_mass = fromMaybe 0 (Map.lookup cat ustats)
|
|
in (f, fromIntegral count / fromIntegral cat_mass :: Double)
|
|
|
|
mprobs pgf (ustats,bstats,count) =
|
|
concat [toProb cat | cat <- categories pgf]
|
|
where
|
|
toProb cat =
|
|
let mass = sum [count | ((cat1,cat2),count) <- Map.toList bstats, cat1==cat]
|
|
cat_count = fromMaybe 0 (Map.lookup cat ustats)
|
|
fun_count = sum [fromMaybe 0 (Map.lookup f ustats) | f <- functionsByCat pgf cat]
|
|
in (cat,mkCId "*",if cat_count == 0 then 0 else fromIntegral (cat_count - fun_count) / fromIntegral cat_count) :
|
|
[(cat1,cat2,fromIntegral count / fromIntegral mass)
|
|
| ((cat1,cat2),count) <- Map.toList bstats, cat1==cat]
|
|
|
|
gprobs pgf (ustats,bstats,count) =
|
|
sortBy (\x y -> compare (snd y) (snd x)) [toProb f | f <- functions pgf]
|
|
where
|
|
toProb f =
|
|
let fcount = fromMaybe 0 (Map.lookup f ustats)
|
|
in (f, fromIntegral fcount / fromIntegral count :: Double)
|
|
|
|
cprobs pgf (ustats,bstats,count) =
|
|
sortBy (\x y -> compare (snd y) (snd x)) [toProb c | c <- categories pgf]
|
|
where
|
|
mass = sum [fromMaybe 0 (Map.lookup c ustats) | c <- categories pgf]
|
|
|
|
toProb c =
|
|
let fcount = fromMaybe 0 (Map.lookup c ustats)
|
|
in (c, fromIntegral fcount / fromIntegral mass :: Double)
|
|
|
|
cat pgf f =
|
|
case fmap unType (functionType pgf f) of
|
|
Just (_,cat,_) -> cat
|
|
Nothing -> error ("Unknown function "++showCId f)
|
|
|
|
cat2 pgf f n e =
|
|
case fmap unType (functionType pgf f) of
|
|
Just (_,cat,_) -> cat
|
|
Nothing -> error (show n ++ ": Unknown function "++showCId f++" in "++showExpr [] e)
|