probabilistic model for the new syntax

This commit is contained in:
aarne
2014-02-27 17:34:53 +00:00
parent 678732a7f1
commit 599b5689c3
4 changed files with 65280 additions and 6 deletions

File diff suppressed because it is too large Load Diff

View File

@@ -20,8 +20,13 @@ counts t = case t of
"UttVP" -> ["UttPrVPI","InfVP_none"]
"UseRCl" -> []
"TTAnt" -> []
"RelCl" -> ["RelCl_none"]
"RelVP" -> ["RelVP_none"]
"RelSlash" -> ["RelSlash_none"]
"PastPartRS" -> ["PastPartAP_none"]
"PresPartRS" -> ["PresPartAP_none"]
"ComparAdvAdjS" -> ["ComparAdvAdjS_none"] ----
"SubjS" -> ["AdvSubjS"] ----
"ConjS" -> ["UseClC_none"]
"PredVPS" -> ["UseCl_none","PredVP_none","UseVPC_none"]
"BaseVPS" -> ["StartVPC_none"]
@@ -51,13 +56,23 @@ counts t = case t of
"CompQS" -> ["UseQ_none"]
"CompS" -> ["UseS_none"]
"SlashV2a" -> ["UseV_np","LiftV2"]
"ComplSlashPartLast" -> ["ComplV2_none"]
"ComplVPIVV" -> ["ComplVV_none","UseV_v","LiftVV","InfVP_none"]
"MkVPI" -> []
"ConjVPI" -> ["UseVPC_none"]
"PassVPSlash" -> ["PassUseV_none","LiftV2"] ---- can be other V's
"PassAgentVPSlash" -> ["AgentPassUseV_none","LiftV2"] ---- can be other V's
"SlashV2S" -> ["SlashV2S_none","UseV_np_s","LiftV2S"]
"SlashV2Q" -> ["SlashV2Q_none","UseV_np_q","LiftV2Q"]
"SlashV2A" -> ["SlashV2A_none","UseV_np_a","LiftV2A"]
"SlashV2V" -> ["SlashV2V_none","UseV_np_v","LiftV2V","InfVP_none"]
"SlashVV" -> ["ComplVV_np","UseV_v","LiftVV","InfVP_none"]
"SlashVP" -> ["PredVP_np"]
"SlashPrep" -> ["AdvCl_np","LiftPrep"]
"SlashVS" -> [] ----
"AdvSlash" -> [] ----
"UseCl" -> []
"UseQCl" -> []
"UseSlash" -> []
t -> [t]

View File

@@ -37,14 +37,18 @@ on t = case t of
-- RS
GUseRCl (GTTAnt t a) p (GRelVP rp vp) -> GRelVP_none rp (onVP t a p vp)
GUseRCl (GTTAnt t a) p (GRelSlash rp cls) -> GRelSlash_none rp (onClSlash t a p cls)
GUseRCl (GTTAnt t a) p (GRelCl cl) -> GRelCl_none (onCl t a p cl)
GPastPartRS _ _ _ -> error "PastPartRS : Ant -> Pol -> VPSlash -> RS"
GPresPartRS _ _ _ -> error "PresPartRS : Ant -> Pol -> VPSlash -> RS"
-- NP
-- Adv
GComparAdvAdjS cadv a s -> error "GComparAdvAdjS cadv a (onS s)"
GSubjS subj s -> error "GSubjS subj s"
GSubjS subj s -> error "AdvSubjS subj s"
-- AP
---- SentAP : AP -> SC -> AP
_ -> composOp on t
@@ -63,8 +67,6 @@ mkClC conj cls = foldr GContClC_none (GStartClC_none conj cl1 cl2) cls2
where
(cls2,[cl1,cl2]) = splitAt (length cls - 2) cls
onVPS2VP :: Tree GVPS_ -> Tree GPrVP_none_
onVPS2VP vps = case vps of
GMkVPS (GTTAnt t a) p vp -> onVP t a p vp
@@ -88,6 +90,8 @@ getAdvs :: GVP -> ([GPrAdv_none],GVP)
getAdvs vp = case vp of
GAdvVP vp1 adv -> let (advs,vp2) = getAdvs vp1 in (advs ++ [GLiftAdv adv],vp2)
GAdVVP adv vp1 -> let (advs,vp2) = getAdvs vp1 in (advs ++ [GLiftAdV adv],vp2)
GExtAdvVP vp1 adv -> let (advs,vp2) = getAdvs vp1 in (advs ++ [GLiftAdv adv],vp2) ---- as a variant
_ -> ([],vp)
appAdvCl :: [GPrAdv_none] -> GPrCl_none -> GPrCl_none
@@ -118,9 +122,8 @@ onVP t a p vp = case vp of
GCompS s -> GUseS_none a t p (onS2Cl s)
GCompQS qs -> GUseQ_none a t p (onQS2QCl qs)
GCompVP ant pol vp -> GUseVP_none a t p (GInfVP_none (onVP GTPres ant pol vp)) -- !!
---- ComplSlashPartLast : VPSlash -> NP -> VP ;
---- ComplVPIVV : VV -> VPI -> VP ;
---- ExtAdvVP : VP -> Adv -> VP ;
GComplSlashPartLast vps np -> GComplV2_none (onVPSlash t a p vps) np ---- as a variant
GComplVPIVV vv vpi -> GComplVV_none (GUseV_v a t p (GLiftVV vv)) (GInfVP_none (onVPI2VP vpi))
GPassVPSlash vps -> onVPSlashPass t a p vps
GPassAgentVPSlash vps np -> onVPSlashPassAgent t a p vps np
---- ProgrVP : VP -> VP ;
@@ -128,6 +131,11 @@ onVP t a p vp = case vp of
---- SelfAdVVP : VP -> VP ;
---- SelfAdvVP : VP -> VP ;
onVPI2VP :: Tree GVPI_ -> Tree GPrVP_none_
onVPI2VP vpi = case vpi of
GMkVPI vp -> onVP GTPres GASimul GPPos vp
GConjVPI conj (GListVPI vs) -> GUseVPC_none (mkVPC conj [onVPI2VP v | v <- vs])
onVPSlash :: GTense -> GAnt -> GPol -> Tree GVPSlash_ -> Tree GPrVP_np_
onVPSlash t a p vps = case vps of
GSlashV2a v2 -> GUseV_np a t p (GLiftV2 v2)
@@ -156,7 +164,11 @@ onVPSlashPassAgent t a p vps np = case vps of
onClSlash :: GTense -> GAnt -> GPol -> Tree GClSlash_ -> Tree GPrCl_np_
onClSlash t a p cls = case cls of
GSlashVP np vps -> GPredVP_np np (onVPSlash t a p vps)
GSlashPrep cl prep -> GAdvCl_np (GLiftPrep prep) (onCl t a p cl)
---- GSlashVS np vs sslash ->
---- GAdvSlash cls adv -> GAdvCl_none (GLiftAdv adv) (onClSlash t a p cls)
---- UseSlash : Temp -> Pol -> ClSlash -> SSlash ;
onS2Cl :: Tree GS_ -> Tree GPrCl_none_
onS2Cl s = case s of

View File

@@ -0,0 +1,31 @@
import Data.List
import qualified Data.Map
main = do
ws <- readFile "new-trees.txt" >>= return . words
let freqs ws = Data.Map.fromListWith (+) [(w,1) | w <- ws]
let freqmap = freqs ws
fs <- readFile "all-ndfuns.txt" >>= return . lines
let catf ws = case ws of f:ty -> (f,last(init ty))
let catmap = Data.Map.fromList [catf (words f) | f <- fs]
let allist = [(f,(c,Data.Map.lookup f freqmap)) | (f,c) <- Data.Map.assocs catmap]
let catlist = Data.List.sortBy (\(f,(c,_)) (_,(k,_)) -> compare c k) allist
let gcatlist = Data.List.groupBy (\(f,(c,_)) (_,(k,_)) -> c==k) catlist
let fcatfreqs fcs = let cat = fst (snd (head fcs)) in let tot = sum [maybe 0 id mn | (f,(c,mn)) <- fcs] in [(f,maybe 0 id mn, cat, tot) | (f,(c,mn)) <- fcs]
let fcatfreqlist = map fcatfreqs gcatlist
let relprobs cat = [(f, (fromIntegral i+1 :: Double) / (fromIntegral tot :: Double)) | (f,i,c,t) <- cat, let tot = t + length cat]
-- writeFile "allFunFreqs.txt" $ unlines $ [unwords [f,show i,c,show t] | (f,i,c,t) <- concat fcatfreqlist]
writeFile "NDPredTrans.probs" $ unlines $ [unwords [f,show n] | (f,n) <- concatMap relprobs fcatfreqlist]