file SenseSplits.hs to create baseline linearizations for split senses by just using the old unsplit functions; this is better than failure at runtime. Applied to DictionarySwe.

2015-06-08 08:47:32 +00:00
parent a128373dce
commit 19707b3efd
3 changed files with 5645 additions and 5562 deletions
--- a/lib/src/translator/DictionarySwe.gf
+++ b/lib/src/translator/DictionarySwe.gf
--- a/lib/src/translator/MergeDict.hs
+++ b/lib/src/translator/MergeDict.hs
@@ -49,6 +49,7 @@ mergeDict old new pref comm file = do
  olds1 <- readFile old >>= return . lines
  news1 <- readFile new >>= return . lines
  let (preamble,olds2) = break ((== ["lin"]) . take 1 . words) olds1
+  let lastopers = [l | l@('o':'p':'e':'r':_) <- olds2] 
  let olds = [mkRule 0 (w:ws) | w:ws <- map words olds2, w == "lin"]
  let news = [mkRule 1 (w:ws) | w:ws <- map words news1, w == "lin"]
  let lins = sort $ olds ++ news
@@ -56,6 +57,7 @@ mergeDict old new pref comm file = do
  let lins2 = map (mergeRule pref comm) linss
  writeFile file $ unlines preamble
  appendFile file $ unlines $ map prRule lins2
+  appendFile file $ unlines $ lastopers
  appendFile file "}"

 data Rule = R {fun :: String, priority :: Int, lins :: [[String]], comment :: [String]} -- fun, variants, comment
--- a/lib/src/translator/SenseSplit.hs
+++ b/lib/src/translator/SenseSplit.hs
@@ -0,0 +1,64 @@
+-- if a sense is split in the abstract syntax, create baseline implementations by copying the old lin rules
+--
+-- usage: writeSplitsFile "Dictionary.gf" "DictionarySwe.gf"
+--
+-- then open MergeDict.hs and do: mergeDict "DictionarySwe.gf" "tmp/splitDictionarySwe.gf" POld Nothing "tmp/DictionarySwe.gf"
+--
+-- AR 8 June 2015
+
+import Data.Char
+import qualified Data.Map as M
+import qualified Data.Set as S
+
+type Fun = String
+type Cat = String
+type Sense = String
+type Rule = String
+
+analyseFun :: Fun -> (Fun, Maybe Sense, Cat)
+analyseFun = split . reverse where
+  split nuf = case break (=='_') nuf of
+    (tac, '_':'c':'s':'a':'M':w)    -> (reverse w, Just "Masc", reverse tac)
+    (tac, '_':'m':'e':'F'    :w)    -> (reverse w, Just "Fem",  reverse tac)
+    (tac, '_':d:'_' :w) | isDigit d -> (reverse w, Just [d],    reverse tac)
+    (tac, '_':w)                    -> (reverse w, Nothing,     reverse tac)
+    _ -> (reverse nuf, Nothing, "") ---- should not happen
+
+mkFun :: (Fun, Maybe Sense, Cat) -> Fun
+mkFun (f,ms,c) = f ++ s ++ "_" ++ c where
+  s = case ms of
+    Just g | elem s ["Masc","Fem"] -> g
+    Just i -> "_" ++ i  -- integer index
+    _ -> ""
+
+unsplitFun :: Fun -> Fun
+unsplitFun f = let (w,_,c) = analyseFun f in mkFun (w,Nothing,c)
+
+isSplitFun :: Fun -> Bool
+isSplitFun f = case analyseFun f of
+  (_,ms,_) -> maybe False (const True) ms
+
+
+allSplitFuns :: FilePath -> IO [Fun]
+allSplitFuns absfile = do
+  ls <- readFile absfile >>= return . lines
+  return [f | "fun":f:_ <- map words ls, isSplitFun f]
+
+baselineLinSplitFuns :: [Fun] -> FilePath -> IO [Rule]
+baselineLinSplitFuns funs cncfile = do
+  let funset = S.fromList (funs ++ map unsplitFun funs)
+  ls <- readFile cncfile >>= return . lines
+  let lmap = M.fromList [(f,unwords ws) | "lin":f:ws <- map words ls, S.member f funset]
+  let look f = case M.lookup f lmap of
+        Just l -> (l,False)
+        _ -> case M.lookup (unsplitFun f) lmap of
+          Just l -> (l ++ " ---- sense to be split", True)
+          _ -> ("= variants {} ; ---- sense to be split",True)
+  return [unwords ["lin",f,l] | f <- funs, let (l,notYet) = look f, notYet] 
+
+writeSplitsFile :: FilePath -> FilePath -> IO ()
+writeSplitsFile abs cnc = do
+  fs <- allSplitFuns abs
+  rs <- baselineLinSplitFuns fs cnc
+  writeFile ("tmp/split"++cnc) (unlines rs)
+