Unicode. Batch transl into HTML.

This commit is contained in:
aarne
2004-01-28 12:42:20 +00:00
parent fdda9fac0a
commit dec8e76616
12 changed files with 84 additions and 12 deletions

View File

@@ -24,7 +24,7 @@ main = do
let (os,fs) = getOptions "-" xs
java = oElem forJava os
isNew = oElem newParser os ---- temporary hack to have two parallel GUIs
putStrLn $ if java then encodeUTF8 welcomeMsg else welcomeMsg
putStrLnFlush $ if java then encodeUTF8 welcomeMsg else welcomeMsg
st <- case fs of
_ -> useIOE emptyShellState $ foldM (shellStateFromFiles os) emptyShellState fs
--- _ -> return emptyShellState

View File

@@ -304,3 +304,8 @@ optEncodeUTF8 gr = case getOptVal (stateOptions gr) uniCoding of
Just "utf8" -> id
_ -> encodeUTF8
optDecodeUTF8 :: GFGrammar -> String -> String
optDecodeUTF8 gr = case getOptVal (stateOptions gr) uniCoding of
Just "utf8" -> decodeUTF8
_ -> id

View File

@@ -48,8 +48,13 @@ unifyAnyInfo c i j = errIn ("combining information for" +++ prt c) $ case (i,j)
liftM3 CncCat (unifPerhaps mc1 mc2) (unifPerhaps mf1 mf2) (unifPerhaps mp1 mp2)
(CncFun m mt1 md1, CncFun _ mt2 md2) ->
liftM2 (CncFun m) (unifPerhaps mt1 mt2) (unifPerhaps md1 md2) ---- adding defs
-- for bw compatibility with unspecified printnames in old GF
(CncFun Nothing Nope (Yes pr),_) ->
unifyAnyInfo c (CncCat Nope Nope (Yes pr)) j
(_,CncFun Nothing Nope (Yes pr)) ->
unifyAnyInfo c i (CncCat Nope Nope (Yes pr))
_ -> Bad $ "cannot unify information for" +++ show i
_ -> Bad $ "cannot unify informations in" +++ show i +++ "and" +++ show j
--- these auxiliaries should be somewhere else since they don't use the info types

View File

@@ -13,7 +13,7 @@ import API
import IOGrammar
import Compile
---- import GFTex
-----import TeachYourself -- also a subshell
---- import TeachYourself -- also a subshell
import ShellState
import Option

View File

@@ -35,6 +35,7 @@ import Custom
import qualified Ident as I
import Option
import Str (sstr) ----
import UTF8 ----
import Random (mkStdGen, newStdGen)
import Monad (liftM2, foldM)
@@ -398,7 +399,8 @@ displaySStateIn env state = (tree',msg,menu) where
---- the Boolean is a temporary hack to have two parallel GUIs
displaySStateJavaX :: Bool -> CEnv -> SState -> String
displaySStateJavaX isNew env state = unlines $ tagXML "gfedit" $ concat [
displaySStateJavaX isNew env state = encodeUTF8 $ mkUnicode $
unlines $ tagXML "gfedit" $ concat [
tagXML "linearizations" (concat
[tagAttrXML "lin" ("lang", prLanguage lang) ss | (lang,ss) <- lins]),
tagXML "tree" tree,
@@ -414,7 +416,7 @@ displaySStateJavaX isNew env state = unlines $ tagXML "gfedit" $ concat [
opts = addOptions (optsSState state) -- state opts override
(addOption (markLin mark) (globalOptions env))
lin (n,gr) = (n, map uni $ linearizeState noWrap opts gr zipper) where
uni = {- optEncodeUTF8 gr . -} mkUnicode
uni = optDecodeUTF8 gr
exp = prprTree $ loc2tree zipper
zipper = stateSState state
linAll = map lin lgrs

View File

@@ -250,7 +250,7 @@ transCncDef x = case x of
DefPrintFun defs -> do
defs' <- liftM concat $ mapM transPrintDef defs
returnl [(f, G.CncFun Nothing nope (yes e)) | (f,e) <- defs']
DefPrintOld defs -> do -- a guess, for backward compatibility
DefPrintOld defs -> do --- a guess, for backward compatibility
defs' <- liftM concat $ mapM transPrintDef defs
returnl [(f, G.CncFun Nothing nope (yes e)) | (f,e) <- defs']
DefFlag defs -> liftM Right $ mapM transFlagDef defs

View File

@@ -1,7 +1,8 @@
module Arabic where
mkArabic :: String -> String
mkArabic = reverse . unwords . (map mkArabicWord) . words
mkArabic = unwords . (map mkArabicWord) . words
----mkArabic = reverse . unwords . (map mkArabicWord) . words
--- reverse : assumes everything's on same line
type ArabicChar = Char

View File

@@ -1,7 +1,8 @@
module Hebrew where
mkHebrew :: String -> String
mkHebrew = reverse . mkHebrewWord
mkHebrew = mkHebrewWord
----mkHebrew = reverse . mkHebrewWord
--- reverse : assumes everything's on same line
type HebrewChar = Char

View File

@@ -14,15 +14,17 @@ import ExtendedArabic (mkArabic0600)
import ExtendedArabic (mkExtendedArabic)
import ExtraDiacritics (mkExtraDiacritics)
import Char
-- ad hoc Unicode conversions from different alphabets
-- AR 12/4/2000, 18/9/2001, 30/5/2002, 26/1/2004
mkUnicode s = case s of
'/':'/':cs -> mkGreek unic ++ mkUnicode rest
'/':'/':cs -> treat [] mkGreek unic ++ mkUnicode rest
'/':'+':cs -> mkHebrew unic ++ mkUnicode rest
'/':'-':cs -> mkArabic unic ++ mkUnicode rest
'/':'_':cs -> mkRussian unic ++ mkUnicode rest
'/':'_':cs -> treat [] mkRussian unic ++ mkUnicode rest
'/':'*':cs -> mkRusKOI8 unic ++ mkUnicode rest
'/':'E':cs -> mkEthiopic unic ++ mkUnicode rest
'/':'T':cs -> mkTamil unic ++ mkUnicode rest
@@ -36,8 +38,19 @@ mkUnicode s = case s of
c:cs -> c:mkUnicode cs
_ -> s
where
(unic,rest) = remClosing [] $ drop 2 s
(unic,rest) = remClosing [] $ dropWhile isSpace $ drop 2 s
remClosing u s = case s of
c:'/':s | elem c "/+-_*ETC&LJ6AX" -> (reverse u, s) --- end need not match
c:cs -> remClosing (c:u) cs
_ -> (reverse u,[]) -- forgiving missing end
-- don't convert XML tags --- assumes <> always means XML tags
treat old mk s = case s of
'<':cs -> mk (reverse old) ++ '<':noTreat cs
c:cs -> treat (c:old) mk cs
_ -> mk (reverse old)
where
noTreat s = case s of
'>':cs -> '>' : treat [] mk cs
c:cs -> c : noTreat cs
_ -> s

43
src/GF/Translate/GFT.hs Normal file
View File

@@ -0,0 +1,43 @@
module Main where
import ShellState
import GetGFC
import API
import Unicode
import UTF8
import UseIO
import Option
import Modules (emptyMGrammar) ----
import Operations
import System
import List
main :: IO ()
main = do
file:_ <- getArgs
let opts = noOptions
can <- useIOE (error "no grammar file") $ getCanonGrammar file
st <- err error return $
grammar2shellState opts (can, emptyMGrammar)
let grs = allStateGrammars st
let cat = firstCatOpts opts (firstStateGrammar st)
---- interact (doTranslate grs cat)
s <- getLine
putStrLnFlush $ doTranslate grs cat s
doTranslate grs cat s =
let ss = [l +++ ":" +++ s | (l,s) <- zip (map (prIdent . cncId) grs)
(translateBetweenAll grs cat s)]
in mkHTML ss
mkHTML = unlines . htmlDoc . intersperse "<p>" . map (encodeUTF8 . mkUnicode) . sort
htmlDoc ss = "<html>":metaHead:"<body>": ss ++ ["</body>","</html>"]
metaHead =
"<HEAD><META http-equiv=Content-Type content=\"text/html; charset=utf-8\"></HEAD>"

View File

@@ -9,6 +9,8 @@ all:
make today ; make ghc
ghc:
$(GHMAKE) $(GHCFLAGS) $(GHCINCLUDE) $(GHCFUDFLAG) --make GF.hs -o gf2+ ; strip gf2+ ; mv gf2+ ../bin/
gft:
$(GHMAKE) $(GHCFLAGS) $(GHCINCLUDENOFUD) -itranslate --make translate/GFT.hs -o gft ; strip gft ; mv gft ../bin/
nofud:
$(GHMAKE) $(GHCFLAGS) $(GHCINCLUDENOFUD) --make GF.hs -o gf2n ; strip gf2n ; mv gf2n ../bin/
windows:

View File

@@ -1 +1 @@
module Today where today = "Mon Jan 26 10:15:46 CET 2004"
module Today where today = "Wed Jan 28 14:24:20 CET 2004"