Files
gf-core/examples/PennTreebank/training.hs
2012-06-12 10:05:17 +00:00

76 lines
2.8 KiB
Haskell

import PGF
import qualified Data.Map as Map
import Data.Maybe
import Data.List
main = do
pgf <- readPGF "ParseEngAbs.pgf"
ls <- fmap lines $ readFile "log.txt"
let stats = foldl' (collectStats pgf)
(initStats pgf)
[(fromMaybe (error l) (readExpr (toQ l)),Just (mkCId "Phr"),Nothing) | l <- ls]
mapM_ putStrLn [show f ++ "\t" ++ show p | (f,p) <- uprobs pgf stats]
mapM_ putStrLn [show cat1 ++ "\t" ++ show cat2 ++ "\t" ++ show p | (cat1,cat2,p) <- bprobs pgf stats]
where
toQ [] = []
toQ ('[':cs) = let (xs,']':ys) = break (==']') cs
in toQ ('?' : ys)
toQ ('?':cs) = 'Q' : toQ cs
toQ (c:cs) = c : toQ cs
initStats pgf =
(Map.fromListWith (+)
([(f,1) | f <- functions pgf] ++
[(cat pgf f,1) | f <- functions pgf])
,Map.empty
)
collectStats pgf (ustats,bstats) (e,mb_cat1,mb_cat2) =
case unApp e of
Just (f,args) -> let fcat = fromMaybe (cat2 pgf f e) mb_cat1
cf = fromMaybe 0 (Map.lookup f ustats)
cc = fromMaybe 0 (Map.lookup fcat ustats)
in cf `seq` cc `seq` bstats `seq`
foldl' (collectStats pgf)
(Map.insert f (cf+1) (Map.insert fcat (cc+1) ustats)
,(if null args
then Map.insertWith (+) (fcat,wildCId) 1
else id)
(maybe bstats (\cat2 -> Map.insertWith (+) (cat2,fcat) 1 bstats) mb_cat2)
)
(zip3 args (argCats f) (repeat (Just fcat)))
Nothing -> (ustats,bstats)
where
argCats f =
case fmap unType (functionType pgf f) of
Just (arg_tys,_,_) -> let tyCat (_,_,ty) = let (_,cat,_) = unType ty in Just cat
in map tyCat arg_tys
Nothing -> repeat Nothing
uprobs pgf (ustats,bstats) =
[toProb f (cat pgf f) | f <- functions pgf]
where
toProb f cat =
let count = fromMaybe 0 (Map.lookup f ustats)
cat_mass = fromMaybe 0 (Map.lookup cat ustats)
in (f, fromIntegral count / fromIntegral cat_mass :: Double)
bprobs pgf (ustats,bstats) =
concat [toProb cat | cat <- categories pgf]
where
toProb cat =
let mass = sum [count | ((cat1,cat2),count) <- Map.toList bstats, cat1==cat]
in [(cat1,cat2,fromIntegral count / fromIntegral mass)
| ((cat1,cat2),count) <- Map.toList bstats, cat1==cat]
cat pgf f =
case fmap unType (functionType pgf f) of
Just (_,cat,_) -> cat
Nothing -> error ("Unknown function "++showCId f)
cat2 pgf f e =
case fmap unType (functionType pgf f) of
Just (_,cat,_) -> cat
Nothing -> error ("Unknown function "++showCId f++show e)