From dec8e7661636eb138ac3d0d5cfaad9b148efcb60 Mon Sep 17 00:00:00 2001 From: aarne Date: Wed, 28 Jan 2004 12:42:20 +0000 Subject: [PATCH] Unicode. Batch transl into HTML. --- src/GF.hs | 2 +- src/GF/API.hs | 5 ++++ src/GF/Compile/Update.hs | 7 +++++- src/GF/Shell.hs | 2 +- src/GF/Shell/Commands.hs | 6 +++-- src/GF/Source/SourceToGrammar.hs | 2 +- src/GF/Text/Arabic.hs | 3 ++- src/GF/Text/Hebrew.hs | 3 ++- src/GF/Text/Unicode.hs | 19 +++++++++++--- src/GF/Translate/GFT.hs | 43 ++++++++++++++++++++++++++++++++ src/Makefile | 2 ++ src/Today.hs | 2 +- 12 files changed, 84 insertions(+), 12 deletions(-) create mode 100644 src/GF/Translate/GFT.hs diff --git a/src/GF.hs b/src/GF.hs index c153b55b6..af75126b2 100644 --- a/src/GF.hs +++ b/src/GF.hs @@ -24,7 +24,7 @@ main = do let (os,fs) = getOptions "-" xs java = oElem forJava os isNew = oElem newParser os ---- temporary hack to have two parallel GUIs - putStrLn $ if java then encodeUTF8 welcomeMsg else welcomeMsg + putStrLnFlush $ if java then encodeUTF8 welcomeMsg else welcomeMsg st <- case fs of _ -> useIOE emptyShellState $ foldM (shellStateFromFiles os) emptyShellState fs --- _ -> return emptyShellState diff --git a/src/GF/API.hs b/src/GF/API.hs index 7053a1b67..7c708c933 100644 --- a/src/GF/API.hs +++ b/src/GF/API.hs @@ -304,3 +304,8 @@ optEncodeUTF8 gr = case getOptVal (stateOptions gr) uniCoding of Just "utf8" -> id _ -> encodeUTF8 +optDecodeUTF8 :: GFGrammar -> String -> String +optDecodeUTF8 gr = case getOptVal (stateOptions gr) uniCoding of + Just "utf8" -> decodeUTF8 + _ -> id + diff --git a/src/GF/Compile/Update.hs b/src/GF/Compile/Update.hs index 4eb4849ef..ae8dc4aac 100644 --- a/src/GF/Compile/Update.hs +++ b/src/GF/Compile/Update.hs @@ -48,8 +48,13 @@ unifyAnyInfo c i j = errIn ("combining information for" +++ prt c) $ case (i,j) liftM3 CncCat (unifPerhaps mc1 mc2) (unifPerhaps mf1 mf2) (unifPerhaps mp1 mp2) (CncFun m mt1 md1, CncFun _ mt2 md2) -> liftM2 (CncFun m) (unifPerhaps mt1 mt2) (unifPerhaps md1 md2) ---- adding defs +-- for bw compatibility with unspecified printnames in old GF + (CncFun Nothing Nope (Yes pr),_) -> + unifyAnyInfo c (CncCat Nope Nope (Yes pr)) j + (_,CncFun Nothing Nope (Yes pr)) -> + unifyAnyInfo c i (CncCat Nope Nope (Yes pr)) - _ -> Bad $ "cannot unify information for" +++ show i + _ -> Bad $ "cannot unify informations in" +++ show i +++ "and" +++ show j --- these auxiliaries should be somewhere else since they don't use the info types diff --git a/src/GF/Shell.hs b/src/GF/Shell.hs index dba4e1823..352f220d9 100644 --- a/src/GF/Shell.hs +++ b/src/GF/Shell.hs @@ -13,7 +13,7 @@ import API import IOGrammar import Compile ---- import GFTex ------import TeachYourself -- also a subshell +---- import TeachYourself -- also a subshell import ShellState import Option diff --git a/src/GF/Shell/Commands.hs b/src/GF/Shell/Commands.hs index e1c0736ab..32c496893 100644 --- a/src/GF/Shell/Commands.hs +++ b/src/GF/Shell/Commands.hs @@ -35,6 +35,7 @@ import Custom import qualified Ident as I import Option import Str (sstr) ---- +import UTF8 ---- import Random (mkStdGen, newStdGen) import Monad (liftM2, foldM) @@ -398,7 +399,8 @@ displaySStateIn env state = (tree',msg,menu) where ---- the Boolean is a temporary hack to have two parallel GUIs displaySStateJavaX :: Bool -> CEnv -> SState -> String -displaySStateJavaX isNew env state = unlines $ tagXML "gfedit" $ concat [ +displaySStateJavaX isNew env state = encodeUTF8 $ mkUnicode $ + unlines $ tagXML "gfedit" $ concat [ tagXML "linearizations" (concat [tagAttrXML "lin" ("lang", prLanguage lang) ss | (lang,ss) <- lins]), tagXML "tree" tree, @@ -414,7 +416,7 @@ displaySStateJavaX isNew env state = unlines $ tagXML "gfedit" $ concat [ opts = addOptions (optsSState state) -- state opts override (addOption (markLin mark) (globalOptions env)) lin (n,gr) = (n, map uni $ linearizeState noWrap opts gr zipper) where - uni = {- optEncodeUTF8 gr . -} mkUnicode + uni = optDecodeUTF8 gr exp = prprTree $ loc2tree zipper zipper = stateSState state linAll = map lin lgrs diff --git a/src/GF/Source/SourceToGrammar.hs b/src/GF/Source/SourceToGrammar.hs index c01d06c9b..5e085b199 100644 --- a/src/GF/Source/SourceToGrammar.hs +++ b/src/GF/Source/SourceToGrammar.hs @@ -250,7 +250,7 @@ transCncDef x = case x of DefPrintFun defs -> do defs' <- liftM concat $ mapM transPrintDef defs returnl [(f, G.CncFun Nothing nope (yes e)) | (f,e) <- defs'] - DefPrintOld defs -> do -- a guess, for backward compatibility + DefPrintOld defs -> do --- a guess, for backward compatibility defs' <- liftM concat $ mapM transPrintDef defs returnl [(f, G.CncFun Nothing nope (yes e)) | (f,e) <- defs'] DefFlag defs -> liftM Right $ mapM transFlagDef defs diff --git a/src/GF/Text/Arabic.hs b/src/GF/Text/Arabic.hs index 6df79c4a9..6882176eb 100644 --- a/src/GF/Text/Arabic.hs +++ b/src/GF/Text/Arabic.hs @@ -1,7 +1,8 @@ module Arabic where mkArabic :: String -> String -mkArabic = reverse . unwords . (map mkArabicWord) . words +mkArabic = unwords . (map mkArabicWord) . words +----mkArabic = reverse . unwords . (map mkArabicWord) . words --- reverse : assumes everything's on same line type ArabicChar = Char diff --git a/src/GF/Text/Hebrew.hs b/src/GF/Text/Hebrew.hs index b5a827518..5c163fbb8 100644 --- a/src/GF/Text/Hebrew.hs +++ b/src/GF/Text/Hebrew.hs @@ -1,7 +1,8 @@ module Hebrew where mkHebrew :: String -> String -mkHebrew = reverse . mkHebrewWord +mkHebrew = mkHebrewWord +----mkHebrew = reverse . mkHebrewWord --- reverse : assumes everything's on same line type HebrewChar = Char diff --git a/src/GF/Text/Unicode.hs b/src/GF/Text/Unicode.hs index 197759213..4d7da0c26 100644 --- a/src/GF/Text/Unicode.hs +++ b/src/GF/Text/Unicode.hs @@ -14,15 +14,17 @@ import ExtendedArabic (mkArabic0600) import ExtendedArabic (mkExtendedArabic) import ExtraDiacritics (mkExtraDiacritics) +import Char + -- ad hoc Unicode conversions from different alphabets -- AR 12/4/2000, 18/9/2001, 30/5/2002, 26/1/2004 mkUnicode s = case s of - '/':'/':cs -> mkGreek unic ++ mkUnicode rest + '/':'/':cs -> treat [] mkGreek unic ++ mkUnicode rest '/':'+':cs -> mkHebrew unic ++ mkUnicode rest '/':'-':cs -> mkArabic unic ++ mkUnicode rest - '/':'_':cs -> mkRussian unic ++ mkUnicode rest + '/':'_':cs -> treat [] mkRussian unic ++ mkUnicode rest '/':'*':cs -> mkRusKOI8 unic ++ mkUnicode rest '/':'E':cs -> mkEthiopic unic ++ mkUnicode rest '/':'T':cs -> mkTamil unic ++ mkUnicode rest @@ -36,8 +38,19 @@ mkUnicode s = case s of c:cs -> c:mkUnicode cs _ -> s where - (unic,rest) = remClosing [] $ drop 2 s + (unic,rest) = remClosing [] $ dropWhile isSpace $ drop 2 s remClosing u s = case s of c:'/':s | elem c "/+-_*ETC&LJ6AX" -> (reverse u, s) --- end need not match c:cs -> remClosing (c:u) cs _ -> (reverse u,[]) -- forgiving missing end + + -- don't convert XML tags --- assumes <> always means XML tags + treat old mk s = case s of + '<':cs -> mk (reverse old) ++ '<':noTreat cs + c:cs -> treat (c:old) mk cs + _ -> mk (reverse old) + where + noTreat s = case s of + '>':cs -> '>' : treat [] mk cs + c:cs -> c : noTreat cs + _ -> s diff --git a/src/GF/Translate/GFT.hs b/src/GF/Translate/GFT.hs new file mode 100644 index 000000000..0dd42c6d2 --- /dev/null +++ b/src/GF/Translate/GFT.hs @@ -0,0 +1,43 @@ +module Main where + +import ShellState +import GetGFC +import API + +import Unicode +import UTF8 +import UseIO +import Option +import Modules (emptyMGrammar) ---- +import Operations + +import System +import List + + +main :: IO () +main = do + file:_ <- getArgs + let opts = noOptions + can <- useIOE (error "no grammar file") $ getCanonGrammar file + st <- err error return $ + grammar2shellState opts (can, emptyMGrammar) + let grs = allStateGrammars st + let cat = firstCatOpts opts (firstStateGrammar st) + +---- interact (doTranslate grs cat) + s <- getLine + putStrLnFlush $ doTranslate grs cat s + +doTranslate grs cat s = + let ss = [l +++ ":" +++ s | (l,s) <- zip (map (prIdent . cncId) grs) + (translateBetweenAll grs cat s)] + in mkHTML ss + +mkHTML = unlines . htmlDoc . intersperse "

" . map (encodeUTF8 . mkUnicode) . sort + +htmlDoc ss = "":metaHead:"": ss ++ ["",""] + +metaHead = + "" + diff --git a/src/Makefile b/src/Makefile index 5f739e040..069a34e51 100644 --- a/src/Makefile +++ b/src/Makefile @@ -9,6 +9,8 @@ all: make today ; make ghc ghc: $(GHMAKE) $(GHCFLAGS) $(GHCINCLUDE) $(GHCFUDFLAG) --make GF.hs -o gf2+ ; strip gf2+ ; mv gf2+ ../bin/ +gft: + $(GHMAKE) $(GHCFLAGS) $(GHCINCLUDENOFUD) -itranslate --make translate/GFT.hs -o gft ; strip gft ; mv gft ../bin/ nofud: $(GHMAKE) $(GHCFLAGS) $(GHCINCLUDENOFUD) --make GF.hs -o gf2n ; strip gf2n ; mv gf2n ../bin/ windows: diff --git a/src/Today.hs b/src/Today.hs index 2ade42c90..281ea68a1 100644 --- a/src/Today.hs +++ b/src/Today.hs @@ -1 +1 @@ -module Today where today = "Mon Jan 26 10:15:46 CET 2004" +module Today where today = "Wed Jan 28 14:24:20 CET 2004"