mirror of
https://github.com/GrammaticalFramework/gf-core.git
synced 2026-05-27 03:38:55 -06:00
probabilistic model for the new syntax
This commit is contained in:
65216
lib/src/experimental/NDPredTrans.probs
Normal file
65216
lib/src/experimental/NDPredTrans.probs
Normal file
File diff suppressed because it is too large
Load Diff
@@ -20,8 +20,13 @@ counts t = case t of
|
|||||||
"UttVP" -> ["UttPrVPI","InfVP_none"]
|
"UttVP" -> ["UttPrVPI","InfVP_none"]
|
||||||
"UseRCl" -> []
|
"UseRCl" -> []
|
||||||
"TTAnt" -> []
|
"TTAnt" -> []
|
||||||
|
"RelCl" -> ["RelCl_none"]
|
||||||
"RelVP" -> ["RelVP_none"]
|
"RelVP" -> ["RelVP_none"]
|
||||||
"RelSlash" -> ["RelSlash_none"]
|
"RelSlash" -> ["RelSlash_none"]
|
||||||
|
"PastPartRS" -> ["PastPartAP_none"]
|
||||||
|
"PresPartRS" -> ["PresPartAP_none"]
|
||||||
|
"ComparAdvAdjS" -> ["ComparAdvAdjS_none"] ----
|
||||||
|
"SubjS" -> ["AdvSubjS"] ----
|
||||||
"ConjS" -> ["UseClC_none"]
|
"ConjS" -> ["UseClC_none"]
|
||||||
"PredVPS" -> ["UseCl_none","PredVP_none","UseVPC_none"]
|
"PredVPS" -> ["UseCl_none","PredVP_none","UseVPC_none"]
|
||||||
"BaseVPS" -> ["StartVPC_none"]
|
"BaseVPS" -> ["StartVPC_none"]
|
||||||
@@ -51,13 +56,23 @@ counts t = case t of
|
|||||||
"CompQS" -> ["UseQ_none"]
|
"CompQS" -> ["UseQ_none"]
|
||||||
"CompS" -> ["UseS_none"]
|
"CompS" -> ["UseS_none"]
|
||||||
"SlashV2a" -> ["UseV_np","LiftV2"]
|
"SlashV2a" -> ["UseV_np","LiftV2"]
|
||||||
|
"ComplSlashPartLast" -> ["ComplV2_none"]
|
||||||
|
"ComplVPIVV" -> ["ComplVV_none","UseV_v","LiftVV","InfVP_none"]
|
||||||
|
"MkVPI" -> []
|
||||||
|
"ConjVPI" -> ["UseVPC_none"]
|
||||||
|
"PassVPSlash" -> ["PassUseV_none","LiftV2"] ---- can be other V's
|
||||||
|
"PassAgentVPSlash" -> ["AgentPassUseV_none","LiftV2"] ---- can be other V's
|
||||||
"SlashV2S" -> ["SlashV2S_none","UseV_np_s","LiftV2S"]
|
"SlashV2S" -> ["SlashV2S_none","UseV_np_s","LiftV2S"]
|
||||||
"SlashV2Q" -> ["SlashV2Q_none","UseV_np_q","LiftV2Q"]
|
"SlashV2Q" -> ["SlashV2Q_none","UseV_np_q","LiftV2Q"]
|
||||||
"SlashV2A" -> ["SlashV2A_none","UseV_np_a","LiftV2A"]
|
"SlashV2A" -> ["SlashV2A_none","UseV_np_a","LiftV2A"]
|
||||||
"SlashV2V" -> ["SlashV2V_none","UseV_np_v","LiftV2V","InfVP_none"]
|
"SlashV2V" -> ["SlashV2V_none","UseV_np_v","LiftV2V","InfVP_none"]
|
||||||
"SlashVV" -> ["ComplVV_np","UseV_v","LiftVV","InfVP_none"]
|
"SlashVV" -> ["ComplVV_np","UseV_v","LiftVV","InfVP_none"]
|
||||||
"SlashVP" -> ["PredVP_np"]
|
"SlashVP" -> ["PredVP_np"]
|
||||||
|
"SlashPrep" -> ["AdvCl_np","LiftPrep"]
|
||||||
|
"SlashVS" -> [] ----
|
||||||
|
"AdvSlash" -> [] ----
|
||||||
"UseCl" -> []
|
"UseCl" -> []
|
||||||
"UseQCl" -> []
|
"UseQCl" -> []
|
||||||
|
"UseSlash" -> []
|
||||||
|
|
||||||
t -> [t]
|
t -> [t]
|
||||||
|
|||||||
@@ -37,14 +37,18 @@ on t = case t of
|
|||||||
-- RS
|
-- RS
|
||||||
GUseRCl (GTTAnt t a) p (GRelVP rp vp) -> GRelVP_none rp (onVP t a p vp)
|
GUseRCl (GTTAnt t a) p (GRelVP rp vp) -> GRelVP_none rp (onVP t a p vp)
|
||||||
GUseRCl (GTTAnt t a) p (GRelSlash rp cls) -> GRelSlash_none rp (onClSlash t a p cls)
|
GUseRCl (GTTAnt t a) p (GRelSlash rp cls) -> GRelSlash_none rp (onClSlash t a p cls)
|
||||||
|
GUseRCl (GTTAnt t a) p (GRelCl cl) -> GRelCl_none (onCl t a p cl)
|
||||||
|
GPastPartRS _ _ _ -> error "PastPartRS : Ant -> Pol -> VPSlash -> RS"
|
||||||
|
GPresPartRS _ _ _ -> error "PresPartRS : Ant -> Pol -> VPSlash -> RS"
|
||||||
|
|
||||||
-- NP
|
-- NP
|
||||||
|
|
||||||
-- Adv
|
-- Adv
|
||||||
GComparAdvAdjS cadv a s -> error "GComparAdvAdjS cadv a (onS s)"
|
GComparAdvAdjS cadv a s -> error "GComparAdvAdjS cadv a (onS s)"
|
||||||
GSubjS subj s -> error "GSubjS subj s"
|
GSubjS subj s -> error "AdvSubjS subj s"
|
||||||
|
|
||||||
-- AP
|
-- AP
|
||||||
|
---- SentAP : AP -> SC -> AP
|
||||||
|
|
||||||
_ -> composOp on t
|
_ -> composOp on t
|
||||||
|
|
||||||
@@ -63,8 +67,6 @@ mkClC conj cls = foldr GContClC_none (GStartClC_none conj cl1 cl2) cls2
|
|||||||
where
|
where
|
||||||
(cls2,[cl1,cl2]) = splitAt (length cls - 2) cls
|
(cls2,[cl1,cl2]) = splitAt (length cls - 2) cls
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
onVPS2VP :: Tree GVPS_ -> Tree GPrVP_none_
|
onVPS2VP :: Tree GVPS_ -> Tree GPrVP_none_
|
||||||
onVPS2VP vps = case vps of
|
onVPS2VP vps = case vps of
|
||||||
GMkVPS (GTTAnt t a) p vp -> onVP t a p vp
|
GMkVPS (GTTAnt t a) p vp -> onVP t a p vp
|
||||||
@@ -88,6 +90,8 @@ getAdvs :: GVP -> ([GPrAdv_none],GVP)
|
|||||||
getAdvs vp = case vp of
|
getAdvs vp = case vp of
|
||||||
GAdvVP vp1 adv -> let (advs,vp2) = getAdvs vp1 in (advs ++ [GLiftAdv adv],vp2)
|
GAdvVP vp1 adv -> let (advs,vp2) = getAdvs vp1 in (advs ++ [GLiftAdv adv],vp2)
|
||||||
GAdVVP adv vp1 -> let (advs,vp2) = getAdvs vp1 in (advs ++ [GLiftAdV adv],vp2)
|
GAdVVP adv vp1 -> let (advs,vp2) = getAdvs vp1 in (advs ++ [GLiftAdV adv],vp2)
|
||||||
|
GExtAdvVP vp1 adv -> let (advs,vp2) = getAdvs vp1 in (advs ++ [GLiftAdv adv],vp2) ---- as a variant
|
||||||
|
|
||||||
_ -> ([],vp)
|
_ -> ([],vp)
|
||||||
|
|
||||||
appAdvCl :: [GPrAdv_none] -> GPrCl_none -> GPrCl_none
|
appAdvCl :: [GPrAdv_none] -> GPrCl_none -> GPrCl_none
|
||||||
@@ -118,9 +122,8 @@ onVP t a p vp = case vp of
|
|||||||
GCompS s -> GUseS_none a t p (onS2Cl s)
|
GCompS s -> GUseS_none a t p (onS2Cl s)
|
||||||
GCompQS qs -> GUseQ_none a t p (onQS2QCl qs)
|
GCompQS qs -> GUseQ_none a t p (onQS2QCl qs)
|
||||||
GCompVP ant pol vp -> GUseVP_none a t p (GInfVP_none (onVP GTPres ant pol vp)) -- !!
|
GCompVP ant pol vp -> GUseVP_none a t p (GInfVP_none (onVP GTPres ant pol vp)) -- !!
|
||||||
---- ComplSlashPartLast : VPSlash -> NP -> VP ;
|
GComplSlashPartLast vps np -> GComplV2_none (onVPSlash t a p vps) np ---- as a variant
|
||||||
---- ComplVPIVV : VV -> VPI -> VP ;
|
GComplVPIVV vv vpi -> GComplVV_none (GUseV_v a t p (GLiftVV vv)) (GInfVP_none (onVPI2VP vpi))
|
||||||
---- ExtAdvVP : VP -> Adv -> VP ;
|
|
||||||
GPassVPSlash vps -> onVPSlashPass t a p vps
|
GPassVPSlash vps -> onVPSlashPass t a p vps
|
||||||
GPassAgentVPSlash vps np -> onVPSlashPassAgent t a p vps np
|
GPassAgentVPSlash vps np -> onVPSlashPassAgent t a p vps np
|
||||||
---- ProgrVP : VP -> VP ;
|
---- ProgrVP : VP -> VP ;
|
||||||
@@ -128,6 +131,11 @@ onVP t a p vp = case vp of
|
|||||||
---- SelfAdVVP : VP -> VP ;
|
---- SelfAdVVP : VP -> VP ;
|
||||||
---- SelfAdvVP : VP -> VP ;
|
---- SelfAdvVP : VP -> VP ;
|
||||||
|
|
||||||
|
onVPI2VP :: Tree GVPI_ -> Tree GPrVP_none_
|
||||||
|
onVPI2VP vpi = case vpi of
|
||||||
|
GMkVPI vp -> onVP GTPres GASimul GPPos vp
|
||||||
|
GConjVPI conj (GListVPI vs) -> GUseVPC_none (mkVPC conj [onVPI2VP v | v <- vs])
|
||||||
|
|
||||||
onVPSlash :: GTense -> GAnt -> GPol -> Tree GVPSlash_ -> Tree GPrVP_np_
|
onVPSlash :: GTense -> GAnt -> GPol -> Tree GVPSlash_ -> Tree GPrVP_np_
|
||||||
onVPSlash t a p vps = case vps of
|
onVPSlash t a p vps = case vps of
|
||||||
GSlashV2a v2 -> GUseV_np a t p (GLiftV2 v2)
|
GSlashV2a v2 -> GUseV_np a t p (GLiftV2 v2)
|
||||||
@@ -156,7 +164,11 @@ onVPSlashPassAgent t a p vps np = case vps of
|
|||||||
onClSlash :: GTense -> GAnt -> GPol -> Tree GClSlash_ -> Tree GPrCl_np_
|
onClSlash :: GTense -> GAnt -> GPol -> Tree GClSlash_ -> Tree GPrCl_np_
|
||||||
onClSlash t a p cls = case cls of
|
onClSlash t a p cls = case cls of
|
||||||
GSlashVP np vps -> GPredVP_np np (onVPSlash t a p vps)
|
GSlashVP np vps -> GPredVP_np np (onVPSlash t a p vps)
|
||||||
|
GSlashPrep cl prep -> GAdvCl_np (GLiftPrep prep) (onCl t a p cl)
|
||||||
|
---- GSlashVS np vs sslash ->
|
||||||
|
---- GAdvSlash cls adv -> GAdvCl_none (GLiftAdv adv) (onClSlash t a p cls)
|
||||||
|
|
||||||
|
---- UseSlash : Temp -> Pol -> ClSlash -> SSlash ;
|
||||||
|
|
||||||
onS2Cl :: Tree GS_ -> Tree GPrCl_none_
|
onS2Cl :: Tree GS_ -> Tree GPrCl_none_
|
||||||
onS2Cl s = case s of
|
onS2Cl s = case s of
|
||||||
|
|||||||
31
lib/src/experimental/transfer/TrainFreq.hs
Normal file
31
lib/src/experimental/transfer/TrainFreq.hs
Normal file
@@ -0,0 +1,31 @@
|
|||||||
|
import Data.List
|
||||||
|
import qualified Data.Map
|
||||||
|
|
||||||
|
main = do
|
||||||
|
|
||||||
|
ws <- readFile "new-trees.txt" >>= return . words
|
||||||
|
|
||||||
|
let freqs ws = Data.Map.fromListWith (+) [(w,1) | w <- ws]
|
||||||
|
|
||||||
|
let freqmap = freqs ws
|
||||||
|
|
||||||
|
fs <- readFile "all-ndfuns.txt" >>= return . lines
|
||||||
|
|
||||||
|
let catf ws = case ws of f:ty -> (f,last(init ty))
|
||||||
|
|
||||||
|
let catmap = Data.Map.fromList [catf (words f) | f <- fs]
|
||||||
|
|
||||||
|
let allist = [(f,(c,Data.Map.lookup f freqmap)) | (f,c) <- Data.Map.assocs catmap]
|
||||||
|
|
||||||
|
let catlist = Data.List.sortBy (\(f,(c,_)) (_,(k,_)) -> compare c k) allist
|
||||||
|
|
||||||
|
let gcatlist = Data.List.groupBy (\(f,(c,_)) (_,(k,_)) -> c==k) catlist
|
||||||
|
|
||||||
|
let fcatfreqs fcs = let cat = fst (snd (head fcs)) in let tot = sum [maybe 0 id mn | (f,(c,mn)) <- fcs] in [(f,maybe 0 id mn, cat, tot) | (f,(c,mn)) <- fcs]
|
||||||
|
|
||||||
|
let fcatfreqlist = map fcatfreqs gcatlist
|
||||||
|
|
||||||
|
let relprobs cat = [(f, (fromIntegral i+1 :: Double) / (fromIntegral tot :: Double)) | (f,i,c,t) <- cat, let tot = t + length cat]
|
||||||
|
|
||||||
|
-- writeFile "allFunFreqs.txt" $ unlines $ [unwords [f,show i,c,show t] | (f,i,c,t) <- concat fcatfreqlist]
|
||||||
|
writeFile "NDPredTrans.probs" $ unlines $ [unwords [f,show n] | (f,n) <- concatMap relprobs fcatfreqlist]
|
||||||
Reference in New Issue
Block a user