1
0
forked from GitHub/gf-core
Files
gf-core/lib/src/translator/SenseSplit.hs

65 lines
2.3 KiB
Haskell

-- if a sense is split in the abstract syntax, create baseline implementations by copying the old lin rules
--
-- usage: writeSplitsFile "Dictionary.gf" "DictionarySwe.gf"
--
-- then open MergeDict.hs and do: mergeDict "DictionarySwe.gf" "tmp/splitDictionarySwe.gf" POld Nothing "tmp/DictionarySwe.gf"
--
-- AR 8 June 2015
import Data.Char
import qualified Data.Map as M
import qualified Data.Set as S
type Fun = String
type Cat = String
type Sense = String
type Rule = String
analyseFun :: Fun -> (Fun, Maybe Sense, Cat)
analyseFun = split . reverse where
split nuf = case break (=='_') nuf of
(tac, '_':'c':'s':'a':'M':w) -> (reverse w, Just "Masc", reverse tac)
(tac, '_':'m':'e':'F' :w) -> (reverse w, Just "Fem", reverse tac)
(tac, '_':d:'_' :w) | isDigit d -> (reverse w, Just [d], reverse tac)
(tac, '_':w) -> (reverse w, Nothing, reverse tac)
_ -> (reverse nuf, Nothing, "") ---- should not happen
mkFun :: (Fun, Maybe Sense, Cat) -> Fun
mkFun (f,ms,c) = f ++ s ++ "_" ++ c where
s = case ms of
Just g | elem s ["Masc","Fem"] -> g
Just i -> "_" ++ i -- integer index
_ -> ""
unsplitFun :: Fun -> Fun
unsplitFun f = let (w,_,c) = analyseFun f in mkFun (w,Nothing,c)
isSplitFun :: Fun -> Bool
isSplitFun f = case analyseFun f of
(_,ms,_) -> maybe False (const True) ms
allSplitFuns :: FilePath -> IO [Fun]
allSplitFuns absfile = do
ls <- readFile absfile >>= return . lines
return [f | "fun":f:_ <- map words ls, isSplitFun f]
baselineLinSplitFuns :: [Fun] -> FilePath -> IO [Rule]
baselineLinSplitFuns funs cncfile = do
let funset = S.fromList (funs ++ map unsplitFun funs)
ls <- readFile cncfile >>= return . lines
let lmap = M.fromList [(f,unwords ws) | "lin":f:ws <- map words ls, S.member f funset]
let look f = case M.lookup f lmap of
Just l -> (l,False)
_ -> case M.lookup (unsplitFun f) lmap of
Just l -> (l ++ " ---- sense to be split", True)
_ -> ("= variants {} ; ---- sense to be split",True)
return [unwords ["lin",f,l] | f <- funs, let (l,notYet) = look f, notYet]
writeSplitsFile :: FilePath -> FilePath -> IO ()
writeSplitsFile abs cnc = do
fs <- allSplitFuns abs
rs <- baselineLinSplitFuns fs cnc
writeFile ("tmp/split"++cnc) (unlines rs)