mirror of
https://github.com/GrammaticalFramework/gf-core.git
synced 2026-05-24 02:12:50 -06:00
file SenseSplits.hs to create baseline linearizations for split senses by just using the old unsplit functions; this is better than failure at runtime. Applied to DictionarySwe.
This commit is contained in:
@@ -3772,6 +3772,8 @@ lin assist_N = mkN "assist" "assister" ; -- comment=2
|
|||||||
lin assist_V = mkV "assisterar" | mkV "hjälper" ; -- status=guess
|
lin assist_V = mkV "assisterar" | mkV "hjälper" ; -- status=guess
|
||||||
lin assist_V2 = mkV2 (mkV "assisterar") | mkV2 (mkV "hjälper") | mkV2 (mkV "bistå" "bistod" "bistått") ; -- status=guess
|
lin assist_V2 = mkV2 (mkV "assisterar") | mkV2 (mkV "hjälper") | mkV2 (mkV "bistå" "bistod" "bistått") ; -- status=guess
|
||||||
lin assistance_N = mkN "assist" "assister" ; -- comment=4
|
lin assistance_N = mkN "assist" "assister" ; -- comment=4
|
||||||
|
lin assistantFem_N = mkN "assistent" "assistenter" ; -- status=guess ---- sense to be split
|
||||||
|
lin assistantMasc_N = mkN "assistent" "assistenter" ; -- status=guess ---- sense to be split
|
||||||
lin assistant_A = mkA "assisterande" | mkA "biträdande" ; -- status=guess
|
lin assistant_A = mkA "assisterande" | mkA "biträdande" ; -- status=guess
|
||||||
lin assistant_N = mkN "assistent" "assistenter" ; -- status=guess
|
lin assistant_N = mkN "assistent" "assistenter" ; -- status=guess
|
||||||
lin assistive_A = variants {} ; -- status=guess
|
lin assistive_A = variants {} ; -- status=guess
|
||||||
@@ -5786,6 +5788,8 @@ lin betrayal_N = mkN "svek" neutrum ;
|
|||||||
lin betrayer_N = (mkN "angivare" utrum) | mkN "förrädare" | (mkN "svikare" utrum) ; -- status=guess status=guess status=guess
|
lin betrayer_N = (mkN "angivare" utrum) | mkN "förrädare" | (mkN "svikare" utrum) ; -- status=guess status=guess status=guess
|
||||||
lin betroth_V2 = mkV2 (mkV "förlova") | mkV2 (mkV "trolovar") ; -- status=guess, src=wikt status=guess, src=wikt
|
lin betroth_V2 = mkV2 (mkV "förlova") | mkV2 (mkV "trolovar") ; -- status=guess, src=wikt status=guess, src=wikt
|
||||||
lin betrothal_N = mkN "trolovning" ; -- status=guess
|
lin betrothal_N = mkN "trolovning" ; -- status=guess
|
||||||
|
lin betrothedFem_N = mkN "förlovad" | mkN "trolovad" | mkN "fästman" | mkN "fästmö" ; -- status=guess status=guess status=guess status=guess ---- sense to be split
|
||||||
|
lin betrothedMasc_N = mkN "förlovad" | mkN "trolovad" | mkN "fästman" | mkN "fästmö" ; -- status=guess status=guess status=guess status=guess ---- sense to be split
|
||||||
lin betrothed_N = mkN "förlovad" | mkN "trolovad" | mkN "fästman" | mkN "fästmö" ; -- status=guess status=guess status=guess status=guess
|
lin betrothed_N = mkN "förlovad" | mkN "trolovad" | mkN "fästman" | mkN "fästmö" ; -- status=guess status=guess status=guess status=guess
|
||||||
lin betsy_PN = mkPN "Betsy" ; -- src=eng status=guess
|
lin betsy_PN = mkPN "Betsy" ; -- src=eng status=guess
|
||||||
lin better_A = mkA "bra" "bra" "bra" "bättre" "bäst" ; -- status=guess
|
lin better_A = mkA "bra" "bra" "bra" "bättre" "bäst" ; -- status=guess
|
||||||
@@ -8895,6 +8899,8 @@ lin caption_N = mkN "bildtext" "bildtexter" ; -- comment=3
|
|||||||
lin captious_A = variants {} ; -- status=guess
|
lin captious_A = variants {} ; -- status=guess
|
||||||
lin captivate_V2 = variants {} ; -- status=guess
|
lin captivate_V2 = variants {} ; -- status=guess
|
||||||
lin captivation_N = mkN "hänförelse" utrum ; -- status=guess
|
lin captivation_N = mkN "hänförelse" utrum ; -- status=guess
|
||||||
|
lin captiveFem_N = mkN "fånge" ; -- status=guess ---- sense to be split
|
||||||
|
lin captiveMasc_N = mkN "fånge" ; -- status=guess ---- sense to be split
|
||||||
lin captive_A = variants {} ; -- status=guess
|
lin captive_A = variants {} ; -- status=guess
|
||||||
lin captive_N = mkN "fånge" ; -- status=guess
|
lin captive_N = mkN "fånge" ; -- status=guess
|
||||||
lin captivity_N = mkN "fångenskap" ; -- status=guess
|
lin captivity_N = mkN "fångenskap" ; -- status=guess
|
||||||
@@ -14865,6 +14871,8 @@ lin debate_V2 = dirV2 (partV (mkV "funderar")"ut") ; -- status=guess
|
|||||||
lin debater_N = mkN "debattör" "debattörer" ; -- status=guess
|
lin debater_N = mkN "debattör" "debattörer" ; -- status=guess
|
||||||
lin debauch_N = mkN "orgie" "orgier" ; -- status=guess
|
lin debauch_N = mkN "orgie" "orgier" ; -- status=guess
|
||||||
lin debauch_V2 = variants {} ; -- status=guess
|
lin debauch_V2 = variants {} ; -- status=guess
|
||||||
|
lin debaucheeFem_N = variants {} ; -- status=guess ---- sense to be split
|
||||||
|
lin debaucheeMasc_N = variants {} ; -- status=guess ---- sense to be split
|
||||||
lin debauchee_N = variants {} ; -- status=guess
|
lin debauchee_N = variants {} ; -- status=guess
|
||||||
lin debauchery_N = mkN "sedeslöshet" ; -- status=guess
|
lin debauchery_N = mkN "sedeslöshet" ; -- status=guess
|
||||||
lin debbie_PN = mkPN "Debbie" ; -- src=eng status=guess
|
lin debbie_PN = mkPN "Debbie" ; -- src=eng status=guess
|
||||||
@@ -24573,8 +24581,8 @@ lin greece_PN = mkPN "Grekland" neutrum;
|
|||||||
lin greed_N = mkN "glupskhet" ; -- comment=3
|
lin greed_N = mkN "glupskhet" ; -- comment=3
|
||||||
lin greediness_N = variants {} ; -- status=guess
|
lin greediness_N = variants {} ; -- status=guess
|
||||||
lin greedy_A = mkA "lysten" "lystet" ; -- comment=5
|
lin greedy_A = mkA "lysten" "lystet" ; -- comment=5
|
||||||
lin greekMasc_N = mkN "grek";
|
|
||||||
lin greekFem_N = mkN "grekiska" ;
|
lin greekFem_N = mkN "grekiska" ;
|
||||||
|
lin greekMasc_N = mkN "grek" ;
|
||||||
lin greek_A = mkA "grekisk" ;
|
lin greek_A = mkA "grekisk" ;
|
||||||
lin green_A = L.green_A ;
|
lin green_A = L.green_A ;
|
||||||
lin green_N = mkN "grönska" ;
|
lin green_N = mkN "grönska" ;
|
||||||
@@ -30648,6 +30656,7 @@ lin keurboom_N = variants {} ; -- status=guess
|
|||||||
lin kevin_PN = mkPN "Kevin" ; -- src=eng status=guess
|
lin kevin_PN = mkPN "Kevin" ; -- src=eng status=guess
|
||||||
lin key_1_N = mkN "nyckel" ;
|
lin key_1_N = mkN "nyckel" ;
|
||||||
lin key_2_N = mkN "tonart" "tonarter" ;
|
lin key_2_N = mkN "tonart" "tonarter" ;
|
||||||
|
lin key_3_N = mkN "nyckel" ; ---- sense to be split
|
||||||
lin key_A = mkA "huvudsaklig" ; ---- cat
|
lin key_A = mkA "huvudsaklig" ; ---- cat
|
||||||
lin key_N = mkN "nyckel" ;
|
lin key_N = mkN "nyckel" ;
|
||||||
lin key_V2 = variants {} ; -- status=guess
|
lin key_V2 = variants {} ; -- status=guess
|
||||||
@@ -34251,6 +34260,10 @@ lin matchmaking_N = variants {} ; -- status=guess
|
|||||||
lin matchstick_N = mkN "tändsticka" ;
|
lin matchstick_N = mkN "tändsticka" ;
|
||||||
lin matchweed_N = variants {} ; -- status=guess
|
lin matchweed_N = variants {} ; -- status=guess
|
||||||
lin matchwood_N = mkN "tändsticksträ" ; -- src=google
|
lin matchwood_N = mkN "tändsticksträ" ; -- src=google
|
||||||
|
lin mateFem_1_N = variants {} ; ---- sense to be split
|
||||||
|
lin mateFem_2_N = variants {} ; ---- sense to be split
|
||||||
|
lin mateMasc_1_N = variants {} ; ---- sense to be split
|
||||||
|
lin mateMasc_2_N = variants {} ; ---- sense to be split
|
||||||
lin mate_1_N = mkN "kompis" ; -- comment=8
|
lin mate_1_N = mkN "kompis" ; -- comment=8
|
||||||
lin mate_2_N = variants {} ; --
|
lin mate_2_N = variants {} ; --
|
||||||
lin mate_N = mkN "kompis" ; -- comment=8
|
lin mate_N = mkN "kompis" ; -- comment=8
|
||||||
@@ -40171,6 +40184,8 @@ lin pampas_grass_N = mkN "pampasgräs" neutrum ;
|
|||||||
lin pamper_V2 = mkV2 (mkV "daltar") ; -- status=guess, src=wikt
|
lin pamper_V2 = mkV2 (mkV "daltar") ; -- status=guess, src=wikt
|
||||||
lin pamperer_N = variants {} ; -- status=guess
|
lin pamperer_N = variants {} ; -- status=guess
|
||||||
lin pamphlet_N = mkN "broschyr" "broschyrer" ; -- status=guess
|
lin pamphlet_N = mkN "broschyr" "broschyrer" ; -- status=guess
|
||||||
|
lin pamphleteerFem_N = variants {} ; -- status=guess ---- sense to be split
|
||||||
|
lin pamphleteerMasc_N = variants {} ; -- status=guess ---- sense to be split
|
||||||
lin pamphleteer_N = variants {} ; -- status=guess
|
lin pamphleteer_N = variants {} ; -- status=guess
|
||||||
lin pamplona_PN = mkPN "Pamplona" neutrum ; -- src=geonames status=guess
|
lin pamplona_PN = mkPN "Pamplona" neutrum ; -- src=geonames status=guess
|
||||||
lin pan_N = mkN "schimpans" "schimpanser" | mkN "vågskål" ; -- SaldoWN -- comment=7
|
lin pan_N = mkN "schimpans" "schimpanser" | mkN "vågskål" ; -- SaldoWN -- comment=7
|
||||||
@@ -43519,8 +43534,8 @@ lin practice_V2 = mkV2 (mkV "öva") | mkV2 (mkV "träna") ; -- status=guess, src
|
|||||||
lin practician_N = mkN "praktiker" "praktikern" "praktiker" "praktikerna" ;
|
lin practician_N = mkN "praktiker" "praktikern" "praktiker" "praktikerna" ;
|
||||||
lin practise_V = mkV "övar" ; -- comment=8
|
lin practise_V = mkV "övar" ; -- comment=8
|
||||||
lin practise_V2 = variants {} ; -- mkV "övar" ; -- comment=8
|
lin practise_V2 = variants {} ; -- mkV "övar" ; -- comment=8
|
||||||
lin practitionerMasc_N = mkN "utövare" "utövare" ; -- status=guess
|
|
||||||
lin practitionerFem_N = mkN "utövare" "utövare" ; -- status=guess
|
lin practitionerFem_N = mkN "utövare" "utövare" ; -- status=guess
|
||||||
|
lin practitionerMasc_N = mkN "utövare" "utövare" ; -- status=guess
|
||||||
lin praenomen_N = variants {} ; -- status=guess
|
lin praenomen_N = variants {} ; -- status=guess
|
||||||
lin praesidium_N = mkN "presidium" "presidiet" "presidier" "presidierna" ;
|
lin praesidium_N = mkN "presidium" "presidiet" "presidier" "presidierna" ;
|
||||||
lin praetor_N = variants {} ; -- status=guess
|
lin praetor_N = variants {} ; -- status=guess
|
||||||
@@ -58544,8 +58559,8 @@ lin trainband_N = variants {} ; -- status=guess
|
|||||||
lin trainbandsman_N = variants {} ; -- status=guess
|
lin trainbandsman_N = variants {} ; -- status=guess
|
||||||
lin trainbearer_N = variants {} ; -- status=guess
|
lin trainbearer_N = variants {} ; -- status=guess
|
||||||
lin trained_A = variants {} ; --
|
lin trained_A = variants {} ; --
|
||||||
lin traineeMasc_N = mkN "praktikant" "praktikanter" ; -- comment=5
|
|
||||||
lin traineeFem_N = mkN "praktikant" "praktikanter" ; -- comment=5
|
lin traineeFem_N = mkN "praktikant" "praktikanter" ; -- comment=5
|
||||||
|
lin traineeMasc_N = mkN "praktikant" "praktikanter" ; -- comment=5
|
||||||
lin traineeship_N = mkN "praktisering" ; -- status=guess
|
lin traineeship_N = mkN "praktisering" ; -- status=guess
|
||||||
lin trainer_N = mkN "tränare" utrum | mkN "tränare" utrum ; -- SaldoWN -- comment=3
|
lin trainer_N = mkN "tränare" utrum | mkN "tränare" utrum ; -- SaldoWN -- comment=3
|
||||||
lin training_N = mkN "träning" | mkN "utbildning" ;
|
lin training_N = mkN "träning" | mkN "utbildning" ;
|
||||||
@@ -60151,6 +60166,8 @@ lin understanding_N = mkN "förståelse" utrum | mkN "förståelse" "förståels
|
|||||||
lin understate_V2 = variants {} ; -- status=guess
|
lin understate_V2 = variants {} ; -- status=guess
|
||||||
lin understatement_N = mkN "understatement" neutrum | mkN "understatement" neutrum ; -- SaldoWN
|
lin understatement_N = mkN "understatement" neutrum | mkN "understatement" neutrum ; -- SaldoWN
|
||||||
lin understock_V2 = variants {} ; -- status=guess
|
lin understock_V2 = variants {} ; -- status=guess
|
||||||
|
lin understudyFem_N = mkN "inhoppare" utrum ; -- status=guess ---- sense to be split
|
||||||
|
lin understudyMasc_N = mkN "inhoppare" utrum ; -- status=guess ---- sense to be split
|
||||||
lin understudy_N = mkN "inhoppare" utrum ; -- status=guess
|
lin understudy_N = mkN "inhoppare" utrum ; -- status=guess
|
||||||
lin understudy_V2 = variants {} ; -- status=guess
|
lin understudy_V2 = variants {} ; -- status=guess
|
||||||
lin undersurface_N = mkN "underyta" ; -- src=google
|
lin undersurface_N = mkN "underyta" ; -- src=google
|
||||||
@@ -63020,6 +63037,8 @@ lin weathervane_N = mkN "vindflöjel" ;
|
|||||||
lin weave_N = mkN "väv" ; -- status=guess
|
lin weave_N = mkN "väv" ; -- status=guess
|
||||||
lin weave_V = mkV "väver" ; -- status=guess
|
lin weave_V = mkV "väver" ; -- status=guess
|
||||||
lin weave_V2 = mkV2 (mkV "väva") ; -- status=guess, src=wikt
|
lin weave_V2 = mkV2 (mkV "väva") ; -- status=guess, src=wikt
|
||||||
|
lin weaverFem_N = mkN "vävare" utrum ; -- status=guess ---- sense to be split
|
||||||
|
lin weaverMasc_N = mkN "vävare" utrum ; -- status=guess ---- sense to be split
|
||||||
lin weaver_N = mkN "vävare" utrum ; -- status=guess
|
lin weaver_N = mkN "vävare" utrum ; -- status=guess
|
||||||
lin weaverbird_N = mkN "vävarfågel" ;
|
lin weaverbird_N = mkN "vävarfågel" ;
|
||||||
lin weaving_N = mkN "vävning" ;
|
lin weaving_N = mkN "vävning" ;
|
||||||
@@ -64528,7 +64547,6 @@ lin zymoid_A = variants {} ; -- status=guess
|
|||||||
lin zymology_N = variants {} ; -- status=guess
|
lin zymology_N = variants {} ; -- status=guess
|
||||||
lin zymosis_N = variants {} ; -- status=guess
|
lin zymosis_N = variants {} ; -- status=guess
|
||||||
lin zymotic_A = variants {} ; -- status=guess
|
lin zymotic_A = variants {} ; -- status=guess
|
||||||
|
|
||||||
oper OP_by_Prep : Prep = mkPrep "genom" ;
|
oper OP_by_Prep : Prep = mkPrep "genom" ;
|
||||||
oper OP_for_Prep : Prep = S.for_Prep ;
|
oper OP_for_Prep : Prep = S.for_Prep ;
|
||||||
oper OP_on_Prep : Prep = S.on_Prep ;
|
oper OP_on_Prep : Prep = S.on_Prep ;
|
||||||
@@ -64835,5 +64853,4 @@ oper OP_shrug_V2 : V2 = mkV2 (mkV (mkV "rycka") "på axlarna") ; -- status=guess
|
|||||||
oper OP_shut_V2 : V2 = mkV2 (mkV "stänga") ; -- status=guess, src=wikt
|
oper OP_shut_V2 : V2 = mkV2 (mkV "stänga") ; -- status=guess, src=wikt
|
||||||
oper OP_take_V2 : V2 = mkV2 I.taga_V ;
|
oper OP_take_V2 : V2 = mkV2 I.taga_V ;
|
||||||
oper OP_urge_V2 : V2 = mkV2 (mkV "uppmanar") ; -- status=guess, src=wikt
|
oper OP_urge_V2 : V2 = mkV2 (mkV "uppmanar") ; -- status=guess, src=wikt
|
||||||
|
|
||||||
}
|
}
|
||||||
@@ -49,6 +49,7 @@ mergeDict old new pref comm file = do
|
|||||||
olds1 <- readFile old >>= return . lines
|
olds1 <- readFile old >>= return . lines
|
||||||
news1 <- readFile new >>= return . lines
|
news1 <- readFile new >>= return . lines
|
||||||
let (preamble,olds2) = break ((== ["lin"]) . take 1 . words) olds1
|
let (preamble,olds2) = break ((== ["lin"]) . take 1 . words) olds1
|
||||||
|
let lastopers = [l | l@('o':'p':'e':'r':_) <- olds2]
|
||||||
let olds = [mkRule 0 (w:ws) | w:ws <- map words olds2, w == "lin"]
|
let olds = [mkRule 0 (w:ws) | w:ws <- map words olds2, w == "lin"]
|
||||||
let news = [mkRule 1 (w:ws) | w:ws <- map words news1, w == "lin"]
|
let news = [mkRule 1 (w:ws) | w:ws <- map words news1, w == "lin"]
|
||||||
let lins = sort $ olds ++ news
|
let lins = sort $ olds ++ news
|
||||||
@@ -56,6 +57,7 @@ mergeDict old new pref comm file = do
|
|||||||
let lins2 = map (mergeRule pref comm) linss
|
let lins2 = map (mergeRule pref comm) linss
|
||||||
writeFile file $ unlines preamble
|
writeFile file $ unlines preamble
|
||||||
appendFile file $ unlines $ map prRule lins2
|
appendFile file $ unlines $ map prRule lins2
|
||||||
|
appendFile file $ unlines $ lastopers
|
||||||
appendFile file "}"
|
appendFile file "}"
|
||||||
|
|
||||||
data Rule = R {fun :: String, priority :: Int, lins :: [[String]], comment :: [String]} -- fun, variants, comment
|
data Rule = R {fun :: String, priority :: Int, lins :: [[String]], comment :: [String]} -- fun, variants, comment
|
||||||
|
|||||||
64
lib/src/translator/SenseSplit.hs
Normal file
64
lib/src/translator/SenseSplit.hs
Normal file
@@ -0,0 +1,64 @@
|
|||||||
|
-- if a sense is split in the abstract syntax, create baseline implementations by copying the old lin rules
|
||||||
|
--
|
||||||
|
-- usage: writeSplitsFile "Dictionary.gf" "DictionarySwe.gf"
|
||||||
|
--
|
||||||
|
-- then open MergeDict.hs and do: mergeDict "DictionarySwe.gf" "tmp/splitDictionarySwe.gf" POld Nothing "tmp/DictionarySwe.gf"
|
||||||
|
--
|
||||||
|
-- AR 8 June 2015
|
||||||
|
|
||||||
|
import Data.Char
|
||||||
|
import qualified Data.Map as M
|
||||||
|
import qualified Data.Set as S
|
||||||
|
|
||||||
|
type Fun = String
|
||||||
|
type Cat = String
|
||||||
|
type Sense = String
|
||||||
|
type Rule = String
|
||||||
|
|
||||||
|
analyseFun :: Fun -> (Fun, Maybe Sense, Cat)
|
||||||
|
analyseFun = split . reverse where
|
||||||
|
split nuf = case break (=='_') nuf of
|
||||||
|
(tac, '_':'c':'s':'a':'M':w) -> (reverse w, Just "Masc", reverse tac)
|
||||||
|
(tac, '_':'m':'e':'F' :w) -> (reverse w, Just "Fem", reverse tac)
|
||||||
|
(tac, '_':d:'_' :w) | isDigit d -> (reverse w, Just [d], reverse tac)
|
||||||
|
(tac, '_':w) -> (reverse w, Nothing, reverse tac)
|
||||||
|
_ -> (reverse nuf, Nothing, "") ---- should not happen
|
||||||
|
|
||||||
|
mkFun :: (Fun, Maybe Sense, Cat) -> Fun
|
||||||
|
mkFun (f,ms,c) = f ++ s ++ "_" ++ c where
|
||||||
|
s = case ms of
|
||||||
|
Just g | elem s ["Masc","Fem"] -> g
|
||||||
|
Just i -> "_" ++ i -- integer index
|
||||||
|
_ -> ""
|
||||||
|
|
||||||
|
unsplitFun :: Fun -> Fun
|
||||||
|
unsplitFun f = let (w,_,c) = analyseFun f in mkFun (w,Nothing,c)
|
||||||
|
|
||||||
|
isSplitFun :: Fun -> Bool
|
||||||
|
isSplitFun f = case analyseFun f of
|
||||||
|
(_,ms,_) -> maybe False (const True) ms
|
||||||
|
|
||||||
|
|
||||||
|
allSplitFuns :: FilePath -> IO [Fun]
|
||||||
|
allSplitFuns absfile = do
|
||||||
|
ls <- readFile absfile >>= return . lines
|
||||||
|
return [f | "fun":f:_ <- map words ls, isSplitFun f]
|
||||||
|
|
||||||
|
baselineLinSplitFuns :: [Fun] -> FilePath -> IO [Rule]
|
||||||
|
baselineLinSplitFuns funs cncfile = do
|
||||||
|
let funset = S.fromList (funs ++ map unsplitFun funs)
|
||||||
|
ls <- readFile cncfile >>= return . lines
|
||||||
|
let lmap = M.fromList [(f,unwords ws) | "lin":f:ws <- map words ls, S.member f funset]
|
||||||
|
let look f = case M.lookup f lmap of
|
||||||
|
Just l -> (l,False)
|
||||||
|
_ -> case M.lookup (unsplitFun f) lmap of
|
||||||
|
Just l -> (l ++ " ---- sense to be split", True)
|
||||||
|
_ -> ("= variants {} ; ---- sense to be split",True)
|
||||||
|
return [unwords ["lin",f,l] | f <- funs, let (l,notYet) = look f, notYet]
|
||||||
|
|
||||||
|
writeSplitsFile :: FilePath -> FilePath -> IO ()
|
||||||
|
writeSplitsFile abs cnc = do
|
||||||
|
fs <- allSplitFuns abs
|
||||||
|
rs <- baselineLinSplitFuns fs cnc
|
||||||
|
writeFile ("tmp/split"++cnc) (unlines rs)
|
||||||
|
|
||||||
Reference in New Issue
Block a user