From 5792a86afb836359b2f1818c0a10c160b51473cb Mon Sep 17 00:00:00 2001 From: aarne Date: Mon, 26 Jan 2004 08:39:52 +0000 Subject: [PATCH] Improved unicode output. --- src/GF/Compile/Optimize.hs | 10 ++++++++- src/GF/Shell/Commands.hs | 2 +- src/GF/Shell/JGF.hs | 2 +- src/GF/Text/Hebrew.hs | 6 +++-- src/GF/Text/Unicode.hs | 45 +++++++++++++++++++------------------- src/Today.hs | 2 +- 6 files changed, 39 insertions(+), 28 deletions(-) diff --git a/src/GF/Compile/Optimize.hs b/src/GF/Compile/Optimize.hs index ceec2c1b6..bb54df050 100644 --- a/src/GF/Compile/Optimize.hs +++ b/src/GF/Compile/Optimize.hs @@ -178,7 +178,7 @@ evalPrintname gr c ppr lin = case ppr of Yes pr -> comp pr _ -> case lin of - Yes t -> return $ K $ prt $ oneBranch t ---- stringFromTerm + Yes t -> return $ K $ clean $ prt $ oneBranch t ---- stringFromTerm _ -> return $ K $ prt c ---- where comp = computeConcrete gr @@ -193,3 +193,11 @@ evalPrintname gr c ppr lin = P x _ -> oneBranch x Alts (d,_) -> oneBranch d _ -> t + + --- very unclean cleaner + clean s = case s of + '+':'+':' ':cs -> clean cs + '"':cs -> clean cs + c:cs -> c: clean cs + _ -> s + diff --git a/src/GF/Shell/Commands.hs b/src/GF/Shell/Commands.hs index 00d8d176b..e1c0736ab 100644 --- a/src/GF/Shell/Commands.hs +++ b/src/GF/Shell/Commands.hs @@ -414,7 +414,7 @@ displaySStateJavaX isNew env state = unlines $ tagXML "gfedit" $ concat [ opts = addOptions (optsSState state) -- state opts override (addOption (markLin mark) (globalOptions env)) lin (n,gr) = (n, map uni $ linearizeState noWrap opts gr zipper) where - uni = optEncodeUTF8 gr . mkUnicode + uni = {- optEncodeUTF8 gr . -} mkUnicode exp = prprTree $ loc2tree zipper zipper = stateSState state linAll = map lin lgrs diff --git a/src/GF/Shell/JGF.hs b/src/GF/Shell/JGF.hs index 309ee40c9..86a7d77fb 100644 --- a/src/GF/Shell/JGF.hs +++ b/src/GF/Shell/JGF.hs @@ -56,5 +56,5 @@ initEditMsgJavaX env = encodeUTF8 $ unlines $ tagXML "gfinit" $ concat [tagAttrXML "language" ("file",file) [prLanguage lang] | (file,lang) <- zip (allGrammarFileNames env) (allLanguages env)] -initAndEditMsgJavaX isNew env state = +initAndEditMsgJavaX isNew env state = encodeUTF8 $ initEditMsgJavaX env ++++ displaySStateJavaX isNew env state diff --git a/src/GF/Text/Hebrew.hs b/src/GF/Text/Hebrew.hs index abd2855b8..b5a827518 100644 --- a/src/GF/Text/Hebrew.hs +++ b/src/GF/Text/Hebrew.hs @@ -1,13 +1,15 @@ module Hebrew where mkHebrew :: String -> String -mkHebrew = mkHebrewWord +mkHebrew = reverse . mkHebrewWord --- reverse : assumes everything's on same line type HebrewChar = Char -- HH 031103 added code for spooling the markup --- removed reverse, words, unwords (seemed obsolete and come out wrong on the screen) +-- removed reverse, words, unwords +-- (seemed obsolete and come out wrong on the screen) +-- AR 26/1/2004 put reverse back - needed in Fudgets (but not in Java?) mkHebrewWord :: String -> [HebrewChar] -- mkHebrewWord = map mkHebrewChar diff --git a/src/GF/Text/Unicode.hs b/src/GF/Text/Unicode.hs index 4fef4a93e..197759213 100644 --- a/src/GF/Text/Unicode.hs +++ b/src/GF/Text/Unicode.hs @@ -16,27 +16,28 @@ import ExtraDiacritics (mkExtraDiacritics) -- ad hoc Unicode conversions from different alphabets --- AR 12/4/2000, 18/9/2001, 30/5/2002 +-- AR 12/4/2000, 18/9/2001, 30/5/2002, 26/1/2004 mkUnicode s = case s of - '/':'/':cs -> mkGreek (remClosing cs) - '/':'+':cs -> mkHebrew (remClosing cs) - '/':'-':cs -> mkArabic (remClosing cs) - '/':'_':cs -> mkRussian (remClosing cs) - '/':'*':cs -> mkRusKOI8 (remClosing cs) - '/':'E':cs -> mkEthiopic (remClosing cs) - '/':'T':cs -> mkTamil (remClosing cs) - '/':'C':cs -> mkOCSCyrillic (remClosing cs) - '/':'&':cs -> mkDevanagari (remClosing cs) - '/':'L':cs -> mkLatinASupplement (remClosing cs) - '/':'J':cs -> mkJapanese (remClosing cs) - '/':'6':cs -> mkArabic0600 (remClosing cs) - '/':'A':cs -> mkExtendedArabic (remClosing cs) - '/':'X':cs -> mkExtraDiacritics (remClosing cs) - _ -> s - -remClosing cs - | lcs > 1 && last cs == '/' = take (lcs-2) cs - | otherwise = cs - where lcs = length cs - + '/':'/':cs -> mkGreek unic ++ mkUnicode rest + '/':'+':cs -> mkHebrew unic ++ mkUnicode rest + '/':'-':cs -> mkArabic unic ++ mkUnicode rest + '/':'_':cs -> mkRussian unic ++ mkUnicode rest + '/':'*':cs -> mkRusKOI8 unic ++ mkUnicode rest + '/':'E':cs -> mkEthiopic unic ++ mkUnicode rest + '/':'T':cs -> mkTamil unic ++ mkUnicode rest + '/':'C':cs -> mkOCSCyrillic unic ++ mkUnicode rest + '/':'&':cs -> mkDevanagari unic ++ mkUnicode rest + '/':'L':cs -> mkLatinASupplement unic ++ mkUnicode rest + '/':'J':cs -> mkJapanese unic ++ mkUnicode rest + '/':'6':cs -> mkArabic0600 unic ++ mkUnicode rest + '/':'A':cs -> mkExtendedArabic unic ++ mkUnicode rest + '/':'X':cs -> mkExtraDiacritics unic ++ mkUnicode rest + c:cs -> c:mkUnicode cs + _ -> s + where + (unic,rest) = remClosing [] $ drop 2 s + remClosing u s = case s of + c:'/':s | elem c "/+-_*ETC&LJ6AX" -> (reverse u, s) --- end need not match + c:cs -> remClosing (c:u) cs + _ -> (reverse u,[]) -- forgiving missing end diff --git a/src/Today.hs b/src/Today.hs index c87aac992..2ade42c90 100644 --- a/src/Today.hs +++ b/src/Today.hs @@ -1 +1 @@ -module Today where today = "Wed Jan 21 15:14:24 CET 2004" +module Today where today = "Mon Jan 26 10:15:46 CET 2004"