forked from GitHub/gf-core
file SenseSplits.hs to create baseline linearizations for split senses by just using the old unsplit functions; this is better than failure at runtime. Applied to DictionarySwe.
This commit is contained in:
File diff suppressed because it is too large
Load Diff
@@ -49,6 +49,7 @@ mergeDict old new pref comm file = do
|
||||
olds1 <- readFile old >>= return . lines
|
||||
news1 <- readFile new >>= return . lines
|
||||
let (preamble,olds2) = break ((== ["lin"]) . take 1 . words) olds1
|
||||
let lastopers = [l | l@('o':'p':'e':'r':_) <- olds2]
|
||||
let olds = [mkRule 0 (w:ws) | w:ws <- map words olds2, w == "lin"]
|
||||
let news = [mkRule 1 (w:ws) | w:ws <- map words news1, w == "lin"]
|
||||
let lins = sort $ olds ++ news
|
||||
@@ -56,6 +57,7 @@ mergeDict old new pref comm file = do
|
||||
let lins2 = map (mergeRule pref comm) linss
|
||||
writeFile file $ unlines preamble
|
||||
appendFile file $ unlines $ map prRule lins2
|
||||
appendFile file $ unlines $ lastopers
|
||||
appendFile file "}"
|
||||
|
||||
data Rule = R {fun :: String, priority :: Int, lins :: [[String]], comment :: [String]} -- fun, variants, comment
|
||||
|
||||
64
lib/src/translator/SenseSplit.hs
Normal file
64
lib/src/translator/SenseSplit.hs
Normal file
@@ -0,0 +1,64 @@
|
||||
-- if a sense is split in the abstract syntax, create baseline implementations by copying the old lin rules
|
||||
--
|
||||
-- usage: writeSplitsFile "Dictionary.gf" "DictionarySwe.gf"
|
||||
--
|
||||
-- then open MergeDict.hs and do: mergeDict "DictionarySwe.gf" "tmp/splitDictionarySwe.gf" POld Nothing "tmp/DictionarySwe.gf"
|
||||
--
|
||||
-- AR 8 June 2015
|
||||
|
||||
import Data.Char
|
||||
import qualified Data.Map as M
|
||||
import qualified Data.Set as S
|
||||
|
||||
type Fun = String
|
||||
type Cat = String
|
||||
type Sense = String
|
||||
type Rule = String
|
||||
|
||||
analyseFun :: Fun -> (Fun, Maybe Sense, Cat)
|
||||
analyseFun = split . reverse where
|
||||
split nuf = case break (=='_') nuf of
|
||||
(tac, '_':'c':'s':'a':'M':w) -> (reverse w, Just "Masc", reverse tac)
|
||||
(tac, '_':'m':'e':'F' :w) -> (reverse w, Just "Fem", reverse tac)
|
||||
(tac, '_':d:'_' :w) | isDigit d -> (reverse w, Just [d], reverse tac)
|
||||
(tac, '_':w) -> (reverse w, Nothing, reverse tac)
|
||||
_ -> (reverse nuf, Nothing, "") ---- should not happen
|
||||
|
||||
mkFun :: (Fun, Maybe Sense, Cat) -> Fun
|
||||
mkFun (f,ms,c) = f ++ s ++ "_" ++ c where
|
||||
s = case ms of
|
||||
Just g | elem s ["Masc","Fem"] -> g
|
||||
Just i -> "_" ++ i -- integer index
|
||||
_ -> ""
|
||||
|
||||
unsplitFun :: Fun -> Fun
|
||||
unsplitFun f = let (w,_,c) = analyseFun f in mkFun (w,Nothing,c)
|
||||
|
||||
isSplitFun :: Fun -> Bool
|
||||
isSplitFun f = case analyseFun f of
|
||||
(_,ms,_) -> maybe False (const True) ms
|
||||
|
||||
|
||||
allSplitFuns :: FilePath -> IO [Fun]
|
||||
allSplitFuns absfile = do
|
||||
ls <- readFile absfile >>= return . lines
|
||||
return [f | "fun":f:_ <- map words ls, isSplitFun f]
|
||||
|
||||
baselineLinSplitFuns :: [Fun] -> FilePath -> IO [Rule]
|
||||
baselineLinSplitFuns funs cncfile = do
|
||||
let funset = S.fromList (funs ++ map unsplitFun funs)
|
||||
ls <- readFile cncfile >>= return . lines
|
||||
let lmap = M.fromList [(f,unwords ws) | "lin":f:ws <- map words ls, S.member f funset]
|
||||
let look f = case M.lookup f lmap of
|
||||
Just l -> (l,False)
|
||||
_ -> case M.lookup (unsplitFun f) lmap of
|
||||
Just l -> (l ++ " ---- sense to be split", True)
|
||||
_ -> ("= variants {} ; ---- sense to be split",True)
|
||||
return [unwords ["lin",f,l] | f <- funs, let (l,notYet) = look f, notYet]
|
||||
|
||||
writeSplitsFile :: FilePath -> FilePath -> IO ()
|
||||
writeSplitsFile abs cnc = do
|
||||
fs <- allSplitFuns abs
|
||||
rs <- baselineLinSplitFuns fs cnc
|
||||
writeFile ("tmp/split"++cnc) (unlines rs)
|
||||
|
||||
Reference in New Issue
Block a user