diff --git a/src/GF.hs b/src/GF.hs index 8cbc45f68..5672b9150 100644 --- a/src/GF.hs +++ b/src/GF.hs @@ -125,7 +125,7 @@ welcomeMsgLib = do welcomeMsg lib = "Welcome to " ++ authorMsg ++++ - "If ä and ö (umlaut letters) look strange, see 'h -coding'." ++ + "If \228 and \246 (umlaut letters) look strange, see 'h -coding'." ++ "\n" ++ lib ++ "\n\nType 'h' for help, and 'h [Command] for more detailed help.\n" @@ -133,9 +133,9 @@ authorMsg = unlines [ "Grammatical Framework, Version " ++ version, "Compiled " ++ today, "Copyright (c)", - "Björn Bringert, Håkan Burden, Hans-Joachim Daniels, Markus Forsberg", - "Thomas Hallgren, Harald Hammarström, Kristofer Johannisson,", - "Janna Khegai, Peter Ljunglöf, Petri Mäenpää, and", + "Bj\246rn Bringert, H\229kan Burden, Hans-Joachim Daniels, Markus Forsberg", + "Thomas Hallgren, Harald Hammarstr\246m, Kristofer Johannisson,", + "Janna Khegai, Peter Ljungl\246f, Petri M\228enp\228\228, and", "Aarne Ranta, 1998-2006, under GNU General Public License (GPL)", "Bug reports to aarne@cs.chalmers.se" ] diff --git a/src/GF/Data/Glue.hs b/src/GF/Data/Glue.hs index b3a766418..4f276222b 100644 --- a/src/GF/Data/Glue.hs +++ b/src/GF/Data/Glue.hs @@ -26,5 +26,5 @@ decomposeSimple t s = do else return $ concat [intersperse "&+" ws | ws <- ss] exTrie = tcompile (zip ws ws) where - ws = words "ett två tre tjugo trettio hundra tusen" + ws = words "ett tv\229 tre tjugo trettio hundra tusen" diff --git a/src/GF/Data/Map.hs b/src/GF/Data/Map.hs index ea27826f6..c86c9ab55 100644 --- a/src/GF/Data/Map.hs +++ b/src/GF/Data/Map.hs @@ -39,7 +39,7 @@ empty = emptyTree -- | lookup operator. (!) :: Ord key => Map key el -> key -> Maybe el -fm ! e = lookupTree e fm +(!) fm e = lookupTree e fm -- | lookupMany operator. (!+) :: Ord key => Map key el -> [key] -> [Maybe el] diff --git a/src/GF/Data/Parsers.hs b/src/GF/Data/Parsers.hs index 7333510f8..d9920c3d2 100644 --- a/src/GF/Data/Parsers.hs +++ b/src/GF/Data/Parsers.hs @@ -170,7 +170,7 @@ pIdent = pLetter ... longestOfMany pAlphaPlusChar *** uncurry (:) pLetter, pDigit :: Parser Char Char pLetter = satisfy (`elem` (['A'..'Z'] ++ ['a'..'z'] ++ - ['À' .. 'Û'] ++ ['à' .. 'û'])) -- no such in Char + ['\192' .. '\255'])) -- no such in Char pDigit = satisfy isDigit pLetters :: Parser Char String diff --git a/src/GF/Shell/HelpFile.hs b/src/GF/Shell/HelpFile.hs index 39706e9d8..16603b3ff 100644 --- a/src/GF/Shell/HelpFile.hs +++ b/src/GF/Shell/HelpFile.hs @@ -225,7 +225,7 @@ txtHelpFile = "\n -parser use this parsing strategy" ++ "\n -number return this many results at most" ++ "\n examples:" ++ - "\n p -cat=S -mcfg \"jag är gammal\" -- parse an S with the MCFG" ++ + "\n p -cat=S -mcfg \"jag \228r gammal\" -- parse an S with the MCFG" ++ "\n rf examples.txt | p -lines -- parse each non-empty line of the file" ++ "\n" ++ "\nat, apply_transfer: at (Module.Fun | Fun)" ++ @@ -690,7 +690,7 @@ txtHelpFile = "\n -mark=java show tree structure with XML tags (used in gfeditor)" ++ "\n" ++ "\n-coding, Some grammars are in UTF-8, some in isolatin-1." ++ - "\n If the letters ä (a-umlaut) and ö (u-umlaut) look strange, either" ++ + "\n If the letters \228 (a-umlaut) and \246 (o-umlaut) look strange, either" ++ "\n change your terminal to isolatin-1, or rewrite the grammar with" ++ "\n 'pg -utf8'." ++ "\n" ++ diff --git a/src/GF/Text/Devanagari.hs b/src/GF/Text/Devanagari.hs index 6bd23149f..bf4343cd0 100644 --- a/src/GF/Text/Devanagari.hs +++ b/src/GF/Text/Devanagari.hs @@ -82,7 +82,7 @@ digraphToUnicode (c1, c2) = case lookup (c1, c2) cc of Just c' -> c' ; _ -> c2 where cc = zip allDevanagariCodes allDevanagari -digraphedDevanagari = " ~ M ;__ AA: II: UU:RoLoEvE~ EE:AvA~ OAU kkH ggHNG ccH jjH ñ TTH DDH N ttH ddH nn. ppH bbH m y rr. l LL. v ç S s h____ .-Sa: ii: uu:ror:eve~ eaiava~ oau ^____OM | -dddu______ Q X G zD.RH fy.R:L:mrmR#I#d#0#1#2#3#4#5#6#7#8#9#o" +digraphedDevanagari = " ~ M ;__ AA: II: UU:RoLoEvE~ EE:AvA~ OAU kkH ggHNG ccH jjH \241 TTH DDH N ttH ddH nn. ppH bbH m y rr. l LL. v \231 S s h____ .-Sa: ii: uu:ror:eve~ eaiava~ oau ^____OM | -dddu______ Q X G zD.RH fy.R:L:mrmR#I#d#0#1#2#3#4#5#6#7#8#9#o" allDevanagariCodes :: [(Char, Char)] allDevanagariCodes = mkPairs digraphedDevanagari diff --git a/src/GF/Text/Ethiopic.hs b/src/GF/Text/Ethiopic.hs index 26ae4b933..81abbf719 100644 --- a/src/GF/Text/Ethiopic.hs +++ b/src/GF/Text/Ethiopic.hs @@ -38,18 +38,18 @@ spoolMarkup s = case s of '>' : cs -> ('>', -1) : adHocToDigraphWord cs c1 : cs -> (c1, -1) : spoolMarkup cs -isVowel x = elem x "AäuiïaeoI" +isVowel x = elem x "A\228ui\239aeoI" vowelOrder :: Char -> Int vowelOrder x = case x of 'A' -> 0 - 'ä' -> 0 + '\228' -> 0 -- ä 'u' -> 1 'i' -> 2 'a' -> 3 'e' -> 4 'I' -> 5 - 'ï' -> 5 + '\239' -> 5 -- ï 'o' -> 6 c -> 5 -- vowelless diff --git a/src/GF/Text/LatinASupplement.hs b/src/GF/Text/LatinASupplement.hs index de6e170ac..f42423c91 100644 --- a/src/GF/Text/LatinASupplement.hs +++ b/src/GF/Text/LatinASupplement.hs @@ -32,7 +32,7 @@ mkLatinASupplementWord str = case str of -- Turkish 'g' : '%' : cs -> toEnum 0x011f : mkLatinASupplementWord cs 'I' : cs -> toEnum 0x0131 : mkLatinASupplementWord cs - 'c' : ',' : cs -> 'ç' : mkLatinASupplementWord cs + 'c' : ',' : cs -> toEnum 0x00e7 : mkLatinASupplementWord cs -- Polish 'e' : ',' : cs -> toEnum 0x0119 : mkLatinASupplementWord cs 'a' : ',' : cs -> toEnum 0x0105 : mkLatinASupplementWord cs diff --git a/src/GF/Text/OCSCyrillic.hs b/src/GF/Text/OCSCyrillic.hs index 7bbd1a773..0d4696944 100644 --- a/src/GF/Text/OCSCyrillic.hs +++ b/src/GF/Text/OCSCyrillic.hs @@ -21,7 +21,7 @@ mkOCSCyrillicWord str = case str of [] -> [] ' ' : cs -> ' ' : mkOCSCyrillicWord cs '<' : cs -> '<' : spoolMarkup cs - 'ä' : cs -> toEnum 0x0463 : mkOCSCyrillicWord cs + '\228' : cs -> toEnum 0x0463 : mkOCSCyrillicWord cs -- ä 'j' : 'e' : '~' : cs -> toEnum 0x0469 : mkOCSCyrillicWord cs 'j' : 'o' : '~' : cs -> toEnum 0x046d : mkOCSCyrillicWord cs 'j' : 'e' : cs -> toEnum 0x0465 : mkOCSCyrillicWord cs diff --git a/src/GF/Text/Russian.hs b/src/GF/Text/Russian.hs index 5e49d2fbb..c4f1bfd89 100644 --- a/src/GF/Text/Russian.hs +++ b/src/GF/Text/Russian.hs @@ -34,10 +34,21 @@ mkRussianChar chars c = case lookup c cc of Just c' -> c' ; _ -> c where cc = zip chars allRussian +allRussianCodes :: [Char] allRussianCodes = - "ÅåABVGDEXZIJKLMNOPRSTUFHCQW£}!*ÖYÄabvgdexzijklmnoprstufhcqw#01'öyä" + -- changed to Ints to work with Haskell compilers e.g. GHC 6.5 CVS + -- which expect source files to be in UTF-8 + -- /bringert 2006-05-19 + -- "ÅåABVGDEXZIJKLMNOPRSTUFHCQW£}!*ÖYÄabvgdexzijklmnoprstufhcqw#01'öyä" + map toEnum [197,229,65,66,86,71,68,69,88,90,73,74,75,76,77,78,79,80,82,83,84,85,70,72,67,81,87,163,125,33,42,214,89,196,97,98,118,103,100,101,120,122,105,106,107,108,109,110,111,112,114,115,116,117,102,104,99,113,119,35,48,49,39,246,121,228] + +allRussianKOI8 :: [Char] allRussianKOI8 = - "^@áâ÷çäåöúéêëìíîïðòóôõæèãþûýøùÿüàñÁÂ×ÇÄÅÖÚÉÊËÌÍÎÏÐÒÓÔÕÆÈÃÞÛÝØÙßÜÀÑ" + -- changed to Ints to work with Haskell compilers e.g. GHC 6.5 CVS + -- which expect source files to be in UTF-8 + -- /bringert 2006-05-19 + -- "^@áâ÷çäåöúéêëìíîïðòóôõæèãþûýøùÿüàñÁÂ×ÇÄÅÖÚÉÊËÌÍÎÏÐÒÓÔÕÆÈÃÞÛÝØÙßÜÀÑ" + map toEnum [94,64,225,226,247,231,228,229,246,250,233,234,235,236,237,238,239,240,242,243,244,245,230,232,227,254,251,253,248,249,255,252,224,241,193,194,215,199,196,197,214,218,201,202,203,204,205,206,207,208,210,211,212,213,198,200,195,222,219,221,216,217,223,220,192,209] allRussian :: String allRussian = (map toEnum (0x0401:0x0451:[0x0410 .. 0x044f])) -- Ëë in odd places diff --git a/src/GF/Text/Tamil.hs b/src/GF/Text/Tamil.hs index e938247c0..8ee171acf 100644 --- a/src/GF/Text/Tamil.hs +++ b/src/GF/Text/Tamil.hs @@ -73,5 +73,5 @@ allTamilCodes = mkPairs digraphedTamil allTamil :: String allTamil = (map toEnum [0x0b85 .. 0x0bfa]) -digraphedTamil = " AA: II: UU:______ EE:AI__ OO:AU k______ G c__ j__ ñ T______ N t______ V n p______ m y r l L M v__ s S h________a: ii: uu:______ ee:ai__ oo:au .__________________ :______________________________#1#2#3#4#5#6#7#8#9^1^2^3=d=m=y=d=c==ru##" +digraphedTamil = " AA: II: UU:______ EE:AI__ OO:AU k______ G c__ j__ \241 T______ N t______ V n p______ m y r l L M v__ s S h________a: ii: uu:______ ee:ai__ oo:au .__________________ :______________________________#1#2#3#4#5#6#7#8#9^1^2^3=d=m=y=d=c==ru##" diff --git a/src/GF/Text/Text.hs b/src/GF/Text/Text.hs index a2a69f70c..b55355c20 100644 --- a/src/GF/Text/Text.hs +++ b/src/GF/Text/Text.hs @@ -82,7 +82,7 @@ formatAsTextGen tag para = unwords . format . cap . words where major = flip elem (map singleton ".!?") minor = flip elem (map singleton ",:;)") openp = all (flip elem "(") - spanish = all (flip elem "¡¿") + spanish = all (flip elem "\161\191") formatAsCode :: String -> String formatAsCode = rend 0 . words where @@ -125,10 +125,10 @@ performBindsFinnish :: String -> String performBindsFinnish = performBindsOpt vowelHarmony where vowelHarmony w p = if any (flip elem "aouAOU") w then p else map toFront p toFront c = case c of - 'A' -> 'Ä' - 'O' -> 'Ö' - 'a' -> 'ä' - 'o' -> 'ö' + 'A' -> '\196' + 'O' -> '\214' + 'a' -> '\228' + 'o' -> '\246' _ -> c unStringLit :: String -> String diff --git a/src/HelpFile b/src/HelpFile index 3b3a443ff..df785bda2 100644 --- a/src/HelpFile +++ b/src/HelpFile @@ -661,7 +661,7 @@ q, quit: q -mark=java show tree structure with XML tags (used in gfeditor) -coding, Some grammars are in UTF-8, some in isolatin-1. - If the letters ä (a-umlaut) and ö (u-umlaut) look strange, either + If the letters ä (a-umlaut) and ö (o-umlaut) look strange, either change your terminal to isolatin-1, or rewrite the grammar with 'pg -utf8'. diff --git a/src/tools/MkHelpFile.hs b/src/tools/MkHelpFile.hs index 45e9d302a..a0fafa918 100644 --- a/src/tools/MkHelpFile.hs +++ b/src/tools/MkHelpFile.hs @@ -38,6 +38,7 @@ mkOne s = " \"" ++ pref s ++ (escs s) ++ "\" ++" pref _ = "\\n" --- escs [] = [] escs (c:cs) | elem c "\"\\" = '\\':c:escs cs + | fromEnum c > 127 = "\\" ++show (fromEnum c)++escs cs escs (c:cs) = c:escs cs helpHeader = unlines [