mirror of
https://github.com/GrammaticalFramework/gf-core.git
synced 2026-05-23 01:52:50 -06:00
cp1251 coding ; trying to recognize the coding flag in grammar
This commit is contained in:
@@ -60,4 +60,8 @@ isFlag o opts = elem o [x | OFlag x _ <- opts]
|
|||||||
prOpt :: Option -> String
|
prOpt :: Option -> String
|
||||||
prOpt (OOpt i) = i ----
|
prOpt (OOpt i) = i ----
|
||||||
|
|
||||||
|
mkOpt :: String -> Option
|
||||||
|
mkOpt = OOpt
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
@@ -32,6 +32,8 @@ import Data.Maybe
|
|||||||
import qualified Data.Map as Map
|
import qualified Data.Map as Map
|
||||||
import System.Cmd
|
import System.Cmd
|
||||||
|
|
||||||
|
import Debug.Trace
|
||||||
|
|
||||||
type CommandOutput = ([Tree],String) ---- errors, etc
|
type CommandOutput = ([Tree],String) ---- errors, etc
|
||||||
|
|
||||||
data CommandInfo = CommandInfo {
|
data CommandInfo = CommandInfo {
|
||||||
@@ -343,7 +345,7 @@ allCommands pgf = Map.fromList [
|
|||||||
"ps -from_utf8 \"jag ?r h?r\" | p -- parser in LangSwe in UTF8 terminal",
|
"ps -from_utf8 \"jag ?r h?r\" | p -- parser in LangSwe in UTF8 terminal",
|
||||||
"ps -to_devanagari -to_utf8 \"A-p\" -- show Devanagari in UTF8 terminal"
|
"ps -to_devanagari -to_utf8 \"A-p\" -- show Devanagari in UTF8 terminal"
|
||||||
],
|
],
|
||||||
exec = \opts -> return . fromString . stringOps opts . toString,
|
exec = \opts -> return . fromString . stringOps (map prOpt opts) . toString,
|
||||||
options = stringOpOptions
|
options = stringOpOptions
|
||||||
}),
|
}),
|
||||||
("q", emptyCommandInfo {
|
("q", emptyCommandInfo {
|
||||||
@@ -497,11 +499,18 @@ allCommands pgf = Map.fromList [
|
|||||||
(abstractName pgf ++ ": " ++ showTree t) :
|
(abstractName pgf ++ ": " ++ showTree t) :
|
||||||
[lang ++ ": " ++ linear opts lang t | lang <- optLangs opts]
|
[lang ++ ": " ++ linear opts lang t | lang <- optLangs opts]
|
||||||
|
|
||||||
unlex opts lang = stringOps (exceptUTF8 opts) where
|
-- logic of coding in unlexing:
|
||||||
exceptUTF8 = if isUTF8 then filter ((/="to_UTF8") . prOpt) else id
|
-- - If lang has no coding flag, or -to_utf8 is not in opts, just opts are used.
|
||||||
isUTF8 = case lookFlag pgf lang "coding" of
|
-- - If lang has flag coding=utf8, -to_utf8 is ignored.
|
||||||
Just "utf8" -> True
|
-- - If lang has coding=other, and -to_utf8 is in opts, from_other is applied first.
|
||||||
_ -> False
|
|
||||||
|
unlex opts lang = {- trace (unwords optsC) $ -} stringOps optsC where
|
||||||
|
optsC = case lookFlag pgf lang "coding" of
|
||||||
|
Just "utf8" -> filter (/="to_utf8") $ map prOpt opts
|
||||||
|
Just other | isOpt "to_utf8" opts ->
|
||||||
|
let cod = ("from_" ++ other)
|
||||||
|
in cod : filter (/=cod) (map prOpt opts)
|
||||||
|
_ -> map prOpt opts
|
||||||
|
|
||||||
optRestricted opts = restrictPGF (hasLin pgf (mkCId (optLang opts))) pgf
|
optRestricted opts = restrictPGF (hasLin pgf (mkCId (optLang opts))) pgf
|
||||||
|
|
||||||
@@ -536,7 +545,7 @@ allCommands pgf = Map.fromList [
|
|||||||
[lookupMorpho (buildMorpho pgf (mkCId la)) s | la <- optLangs opts]
|
[lookupMorpho (buildMorpho pgf (mkCId la)) s | la <- optLangs opts]
|
||||||
|
|
||||||
-- ps -f -g s returns g (f s)
|
-- ps -f -g s returns g (f s)
|
||||||
stringOps opts s = foldr app s (reverse (map prOpt opts)) where
|
stringOps opts s = foldr app s (reverse opts) where
|
||||||
app f = maybe id id (stringOp f)
|
app f = maybe id id (stringOp f)
|
||||||
|
|
||||||
stringOpOptions = [
|
stringOpOptions = [
|
||||||
|
|||||||
@@ -76,7 +76,7 @@ data Verbosity = Quiet | Normal | Verbose | Debug
|
|||||||
data Phase = Preproc | Convert | Compile | Link
|
data Phase = Preproc | Convert | Compile | Link
|
||||||
deriving (Show,Eq,Ord)
|
deriving (Show,Eq,Ord)
|
||||||
|
|
||||||
data Encoding = UTF_8 | ISO_8859_1
|
data Encoding = UTF_8 | ISO_8859_1 | CP_1251
|
||||||
deriving (Show,Eq,Ord)
|
deriving (Show,Eq,Ord)
|
||||||
|
|
||||||
data OutputFormat = FmtPGF
|
data OutputFormat = FmtPGF
|
||||||
@@ -469,7 +469,9 @@ optimizationPackages =
|
|||||||
encodings :: [(String,Encoding)]
|
encodings :: [(String,Encoding)]
|
||||||
encodings =
|
encodings =
|
||||||
[("utf8", UTF_8),
|
[("utf8", UTF_8),
|
||||||
("latin1", ISO_8859_1)]
|
("cp1251", CP_1251),
|
||||||
|
("latin1", ISO_8859_1)
|
||||||
|
]
|
||||||
|
|
||||||
lookupShow :: Eq a => [(String,a)] -> a -> String
|
lookupShow :: Eq a => [(String,a)] -> a -> String
|
||||||
lookupShow xs z = fromMaybe "lookupShow" $ lookup z [(y,x) | (x,y) <- xs]
|
lookupShow xs z = fromMaybe "lookupShow" $ lookup z [(y,x) | (x,y) <- xs]
|
||||||
|
|||||||
@@ -24,6 +24,8 @@ stringOp name = case name of
|
|||||||
"to_html" -> Just wrapHTML
|
"to_html" -> Just wrapHTML
|
||||||
"to_utf8" -> Just encodeUTF8
|
"to_utf8" -> Just encodeUTF8
|
||||||
"from_utf8" -> Just decodeUTF8
|
"from_utf8" -> Just decodeUTF8
|
||||||
|
"to_cp1251" -> Just encodeCP1251
|
||||||
|
"from_cp1251" -> Just decodeCP1251
|
||||||
_ -> transliterate name
|
_ -> transliterate name
|
||||||
|
|
||||||
appLexer :: (String -> [String]) -> String -> String
|
appLexer :: (String -> [String]) -> String -> String
|
||||||
@@ -97,3 +99,17 @@ isPunct = flip elem ".?!,:;"
|
|||||||
isParen = flip elem "()[]{}"
|
isParen = flip elem "()[]{}"
|
||||||
isClosing = flip elem ")]}"
|
isClosing = flip elem ")]}"
|
||||||
|
|
||||||
|
|
||||||
|
-- might be in a file of its own: Windows Cyrillic, used in Bulgarian resource
|
||||||
|
|
||||||
|
decodeCP1251 = map convert where
|
||||||
|
convert c
|
||||||
|
| c >= '\192' && c <= '\255' = chr (ord c + 848)
|
||||||
|
| otherwise = c
|
||||||
|
|
||||||
|
encodeCP1251 = map convert where
|
||||||
|
convert c
|
||||||
|
| oc >= 1040 && oc <= 1103 = chr (oc - 848)
|
||||||
|
| otherwise = c
|
||||||
|
where oc = ord c
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user