mirror of
https://github.com/GrammaticalFramework/gf-core.git
synced 2026-04-09 04:59:31 -06:00
101 lines
3.6 KiB
Haskell
101 lines
3.6 KiB
Haskell
import PGF
|
|
import qualified Data.Map as Map
|
|
import Data.Maybe
|
|
import Data.List
|
|
|
|
grammar_name = "ParseEngAbs.pgf"
|
|
|
|
treebank_name = "log4.txt"
|
|
|
|
chunk_cats = map mkCId
|
|
["A", "AP", "AdA", "AdV", "Adv", "CN", "Cl", "ClSlash", "Conj", "Det",
|
|
"IAdv", "IP", "N", "NP", "Num", "Ord", "Predet", "Prep", "Pron", "QS",
|
|
"Quant", "RP", "RS", "S", "Subj", "V", "V2", "VPS", "VS"]
|
|
|
|
main = do
|
|
pgf <- readPGF grammar_name
|
|
ls <- fmap (filterExprs . zip [1..] . lines) $ readFile treebank_name
|
|
putStrLn ""
|
|
putStrLn ("trees: "++show (length ls))
|
|
let (_,cat,_) = unType (startCat pgf)
|
|
stats = foldl' (collectStats pgf)
|
|
(initStats pgf)
|
|
[(n,fromMaybe (error l) (readExpr (toQ l)),Just cat) | (n,l) <- ls]
|
|
|
|
putStrLn ("coverage: "++show (coverage stats))
|
|
|
|
putStrLn ("Writing ParseEngAbs.probs...")
|
|
writeFile "ParseEngAbs.probs" (unlines [show f ++ "\t" ++ show p | (f,p) <- probs pgf stats])
|
|
where
|
|
toQ [] = []
|
|
toQ ('?':cs) = 'Q' : toQ cs
|
|
toQ (c:cs) = c : toQ cs
|
|
|
|
filterExprs [] = []
|
|
filterExprs ((n,l):ls)
|
|
| null l = filterExprs ls
|
|
| elem (head l) "+#*" = (n,drop 2 l) : filterExprs ls
|
|
| otherwise = filterExprs ls
|
|
|
|
initStats pgf =
|
|
(Map.fromListWith (+)
|
|
([(f,1) | f <- functions pgf] ++
|
|
[(cat pgf f,1) | f <- functions pgf])
|
|
,0
|
|
)
|
|
|
|
collectStats pgf (ustats,count) (n,e,mb_cat1) =
|
|
case unApp e of
|
|
Just (f,args) -> let fcat2 = cat2 pgf f n e
|
|
fcat = fromMaybe fcat2 mb_cat1
|
|
cf = fromMaybe 0 (Map.lookup f ustats)
|
|
cc = fromMaybe 0 (Map.lookup fcat ustats)
|
|
in if isJust mb_cat1 && f /= mkCId "Q" && fcat /= fcat2
|
|
then error (show n ++ ": " ++ showExpr [] e)
|
|
else
|
|
cf `seq` cc `seq` count `seq`
|
|
foldl' (collectStats pgf)
|
|
(Map.insert f (cf+1) (Map.insert fcat (cc+1) ustats)
|
|
,count+1
|
|
)
|
|
(zipWith (\e mb_cat1 -> (n,e,mb_cat1)) args (argCats f))
|
|
Nothing -> case unStr e of
|
|
Just _ -> (ustats,count+1)
|
|
Nothing -> error ("collectStats ("++showExpr [] e++")")
|
|
where
|
|
argCats f =
|
|
case fmap unType (functionType pgf f) of
|
|
Just (arg_tys,_,_) -> let tyCat (_,_,ty) = let (_,cat,_) = unType ty in Just cat
|
|
in map tyCat arg_tys
|
|
Nothing -> repeat Nothing
|
|
|
|
coverage (stats,gcount) =
|
|
let c = fromMaybe 0 (Map.lookup (mkCId "Q") stats)
|
|
in (fromIntegral (gcount - c) / fromIntegral gcount) * 100
|
|
|
|
probs pgf (stats,gcount) =
|
|
[toFProb f (cat pgf f) | f <- functions pgf] ++
|
|
[toCProb c | c <- chunk_cats]
|
|
where
|
|
toFProb f cat =
|
|
let count = fromMaybe 0 (Map.lookup f stats)
|
|
cat_mass = fromMaybe 0 (Map.lookup cat stats)
|
|
in (f, fromIntegral count / fromIntegral cat_mass :: Double)
|
|
|
|
toCProb c =
|
|
let ccount = fromMaybe 0 (Map.lookup c stats)
|
|
in (c, fromIntegral ccount / fromIntegral chunk_mass :: Double)
|
|
|
|
chunk_mass =
|
|
sum [fromMaybe 0 (Map.lookup c stats) | c <- chunk_cats]
|
|
|
|
cat pgf f =
|
|
case fmap unType (functionType pgf f) of
|
|
Just (_,cat,_) -> cat
|
|
Nothing -> error ("Unknown function "++showCId f)
|
|
|
|
cat2 pgf f n e =
|
|
case fmap unType (functionType pgf f) of
|
|
Just (_,cat,_) -> cat
|
|
Nothing -> error (show n ++ ": Unknown function "++showCId f++" in "++showExpr [] e)
|