mirror of
https://github.com/GrammaticalFramework/gf-core.git
synced 2026-04-09 04:59:31 -06:00
probabilistic model for the new syntax
This commit is contained in:
65216
lib/src/experimental/NDPredTrans.probs
Normal file
65216
lib/src/experimental/NDPredTrans.probs
Normal file
File diff suppressed because it is too large
Load Diff
@@ -20,8 +20,13 @@ counts t = case t of
|
||||
"UttVP" -> ["UttPrVPI","InfVP_none"]
|
||||
"UseRCl" -> []
|
||||
"TTAnt" -> []
|
||||
"RelCl" -> ["RelCl_none"]
|
||||
"RelVP" -> ["RelVP_none"]
|
||||
"RelSlash" -> ["RelSlash_none"]
|
||||
"PastPartRS" -> ["PastPartAP_none"]
|
||||
"PresPartRS" -> ["PresPartAP_none"]
|
||||
"ComparAdvAdjS" -> ["ComparAdvAdjS_none"] ----
|
||||
"SubjS" -> ["AdvSubjS"] ----
|
||||
"ConjS" -> ["UseClC_none"]
|
||||
"PredVPS" -> ["UseCl_none","PredVP_none","UseVPC_none"]
|
||||
"BaseVPS" -> ["StartVPC_none"]
|
||||
@@ -51,13 +56,23 @@ counts t = case t of
|
||||
"CompQS" -> ["UseQ_none"]
|
||||
"CompS" -> ["UseS_none"]
|
||||
"SlashV2a" -> ["UseV_np","LiftV2"]
|
||||
"ComplSlashPartLast" -> ["ComplV2_none"]
|
||||
"ComplVPIVV" -> ["ComplVV_none","UseV_v","LiftVV","InfVP_none"]
|
||||
"MkVPI" -> []
|
||||
"ConjVPI" -> ["UseVPC_none"]
|
||||
"PassVPSlash" -> ["PassUseV_none","LiftV2"] ---- can be other V's
|
||||
"PassAgentVPSlash" -> ["AgentPassUseV_none","LiftV2"] ---- can be other V's
|
||||
"SlashV2S" -> ["SlashV2S_none","UseV_np_s","LiftV2S"]
|
||||
"SlashV2Q" -> ["SlashV2Q_none","UseV_np_q","LiftV2Q"]
|
||||
"SlashV2A" -> ["SlashV2A_none","UseV_np_a","LiftV2A"]
|
||||
"SlashV2V" -> ["SlashV2V_none","UseV_np_v","LiftV2V","InfVP_none"]
|
||||
"SlashVV" -> ["ComplVV_np","UseV_v","LiftVV","InfVP_none"]
|
||||
"SlashVP" -> ["PredVP_np"]
|
||||
"SlashPrep" -> ["AdvCl_np","LiftPrep"]
|
||||
"SlashVS" -> [] ----
|
||||
"AdvSlash" -> [] ----
|
||||
"UseCl" -> []
|
||||
"UseQCl" -> []
|
||||
"UseSlash" -> []
|
||||
|
||||
t -> [t]
|
||||
|
||||
@@ -37,14 +37,18 @@ on t = case t of
|
||||
-- RS
|
||||
GUseRCl (GTTAnt t a) p (GRelVP rp vp) -> GRelVP_none rp (onVP t a p vp)
|
||||
GUseRCl (GTTAnt t a) p (GRelSlash rp cls) -> GRelSlash_none rp (onClSlash t a p cls)
|
||||
GUseRCl (GTTAnt t a) p (GRelCl cl) -> GRelCl_none (onCl t a p cl)
|
||||
GPastPartRS _ _ _ -> error "PastPartRS : Ant -> Pol -> VPSlash -> RS"
|
||||
GPresPartRS _ _ _ -> error "PresPartRS : Ant -> Pol -> VPSlash -> RS"
|
||||
|
||||
-- NP
|
||||
|
||||
-- Adv
|
||||
GComparAdvAdjS cadv a s -> error "GComparAdvAdjS cadv a (onS s)"
|
||||
GSubjS subj s -> error "GSubjS subj s"
|
||||
GSubjS subj s -> error "AdvSubjS subj s"
|
||||
|
||||
-- AP
|
||||
---- SentAP : AP -> SC -> AP
|
||||
|
||||
_ -> composOp on t
|
||||
|
||||
@@ -63,8 +67,6 @@ mkClC conj cls = foldr GContClC_none (GStartClC_none conj cl1 cl2) cls2
|
||||
where
|
||||
(cls2,[cl1,cl2]) = splitAt (length cls - 2) cls
|
||||
|
||||
|
||||
|
||||
onVPS2VP :: Tree GVPS_ -> Tree GPrVP_none_
|
||||
onVPS2VP vps = case vps of
|
||||
GMkVPS (GTTAnt t a) p vp -> onVP t a p vp
|
||||
@@ -88,6 +90,8 @@ getAdvs :: GVP -> ([GPrAdv_none],GVP)
|
||||
getAdvs vp = case vp of
|
||||
GAdvVP vp1 adv -> let (advs,vp2) = getAdvs vp1 in (advs ++ [GLiftAdv adv],vp2)
|
||||
GAdVVP adv vp1 -> let (advs,vp2) = getAdvs vp1 in (advs ++ [GLiftAdV adv],vp2)
|
||||
GExtAdvVP vp1 adv -> let (advs,vp2) = getAdvs vp1 in (advs ++ [GLiftAdv adv],vp2) ---- as a variant
|
||||
|
||||
_ -> ([],vp)
|
||||
|
||||
appAdvCl :: [GPrAdv_none] -> GPrCl_none -> GPrCl_none
|
||||
@@ -118,9 +122,8 @@ onVP t a p vp = case vp of
|
||||
GCompS s -> GUseS_none a t p (onS2Cl s)
|
||||
GCompQS qs -> GUseQ_none a t p (onQS2QCl qs)
|
||||
GCompVP ant pol vp -> GUseVP_none a t p (GInfVP_none (onVP GTPres ant pol vp)) -- !!
|
||||
---- ComplSlashPartLast : VPSlash -> NP -> VP ;
|
||||
---- ComplVPIVV : VV -> VPI -> VP ;
|
||||
---- ExtAdvVP : VP -> Adv -> VP ;
|
||||
GComplSlashPartLast vps np -> GComplV2_none (onVPSlash t a p vps) np ---- as a variant
|
||||
GComplVPIVV vv vpi -> GComplVV_none (GUseV_v a t p (GLiftVV vv)) (GInfVP_none (onVPI2VP vpi))
|
||||
GPassVPSlash vps -> onVPSlashPass t a p vps
|
||||
GPassAgentVPSlash vps np -> onVPSlashPassAgent t a p vps np
|
||||
---- ProgrVP : VP -> VP ;
|
||||
@@ -128,6 +131,11 @@ onVP t a p vp = case vp of
|
||||
---- SelfAdVVP : VP -> VP ;
|
||||
---- SelfAdvVP : VP -> VP ;
|
||||
|
||||
onVPI2VP :: Tree GVPI_ -> Tree GPrVP_none_
|
||||
onVPI2VP vpi = case vpi of
|
||||
GMkVPI vp -> onVP GTPres GASimul GPPos vp
|
||||
GConjVPI conj (GListVPI vs) -> GUseVPC_none (mkVPC conj [onVPI2VP v | v <- vs])
|
||||
|
||||
onVPSlash :: GTense -> GAnt -> GPol -> Tree GVPSlash_ -> Tree GPrVP_np_
|
||||
onVPSlash t a p vps = case vps of
|
||||
GSlashV2a v2 -> GUseV_np a t p (GLiftV2 v2)
|
||||
@@ -156,7 +164,11 @@ onVPSlashPassAgent t a p vps np = case vps of
|
||||
onClSlash :: GTense -> GAnt -> GPol -> Tree GClSlash_ -> Tree GPrCl_np_
|
||||
onClSlash t a p cls = case cls of
|
||||
GSlashVP np vps -> GPredVP_np np (onVPSlash t a p vps)
|
||||
GSlashPrep cl prep -> GAdvCl_np (GLiftPrep prep) (onCl t a p cl)
|
||||
---- GSlashVS np vs sslash ->
|
||||
---- GAdvSlash cls adv -> GAdvCl_none (GLiftAdv adv) (onClSlash t a p cls)
|
||||
|
||||
---- UseSlash : Temp -> Pol -> ClSlash -> SSlash ;
|
||||
|
||||
onS2Cl :: Tree GS_ -> Tree GPrCl_none_
|
||||
onS2Cl s = case s of
|
||||
|
||||
31
lib/src/experimental/transfer/TrainFreq.hs
Normal file
31
lib/src/experimental/transfer/TrainFreq.hs
Normal file
@@ -0,0 +1,31 @@
|
||||
import Data.List
|
||||
import qualified Data.Map
|
||||
|
||||
main = do
|
||||
|
||||
ws <- readFile "new-trees.txt" >>= return . words
|
||||
|
||||
let freqs ws = Data.Map.fromListWith (+) [(w,1) | w <- ws]
|
||||
|
||||
let freqmap = freqs ws
|
||||
|
||||
fs <- readFile "all-ndfuns.txt" >>= return . lines
|
||||
|
||||
let catf ws = case ws of f:ty -> (f,last(init ty))
|
||||
|
||||
let catmap = Data.Map.fromList [catf (words f) | f <- fs]
|
||||
|
||||
let allist = [(f,(c,Data.Map.lookup f freqmap)) | (f,c) <- Data.Map.assocs catmap]
|
||||
|
||||
let catlist = Data.List.sortBy (\(f,(c,_)) (_,(k,_)) -> compare c k) allist
|
||||
|
||||
let gcatlist = Data.List.groupBy (\(f,(c,_)) (_,(k,_)) -> c==k) catlist
|
||||
|
||||
let fcatfreqs fcs = let cat = fst (snd (head fcs)) in let tot = sum [maybe 0 id mn | (f,(c,mn)) <- fcs] in [(f,maybe 0 id mn, cat, tot) | (f,(c,mn)) <- fcs]
|
||||
|
||||
let fcatfreqlist = map fcatfreqs gcatlist
|
||||
|
||||
let relprobs cat = [(f, (fromIntegral i+1 :: Double) / (fromIntegral tot :: Double)) | (f,i,c,t) <- cat, let tot = t + length cat]
|
||||
|
||||
-- writeFile "allFunFreqs.txt" $ unlines $ [unwords [f,show i,c,show t] | (f,i,c,t) <- concat fcatfreqlist]
|
||||
writeFile "NDPredTrans.probs" $ unlines $ [unwords [f,show n] | (f,n) <- concatMap relprobs fcatfreqlist]
|
||||
Reference in New Issue
Block a user