mirror of
https://github.com/GrammaticalFramework/gf-core.git
synced 2026-04-09 04:59:31 -06:00
cp1251 coding ; trying to recognize the coding flag in grammar
This commit is contained in:
@@ -60,4 +60,8 @@ isFlag o opts = elem o [x | OFlag x _ <- opts]
|
||||
prOpt :: Option -> String
|
||||
prOpt (OOpt i) = i ----
|
||||
|
||||
mkOpt :: String -> Option
|
||||
mkOpt = OOpt
|
||||
|
||||
|
||||
|
||||
|
||||
@@ -32,6 +32,8 @@ import Data.Maybe
|
||||
import qualified Data.Map as Map
|
||||
import System.Cmd
|
||||
|
||||
import Debug.Trace
|
||||
|
||||
type CommandOutput = ([Tree],String) ---- errors, etc
|
||||
|
||||
data CommandInfo = CommandInfo {
|
||||
@@ -343,7 +345,7 @@ allCommands pgf = Map.fromList [
|
||||
"ps -from_utf8 \"jag ?r h?r\" | p -- parser in LangSwe in UTF8 terminal",
|
||||
"ps -to_devanagari -to_utf8 \"A-p\" -- show Devanagari in UTF8 terminal"
|
||||
],
|
||||
exec = \opts -> return . fromString . stringOps opts . toString,
|
||||
exec = \opts -> return . fromString . stringOps (map prOpt opts) . toString,
|
||||
options = stringOpOptions
|
||||
}),
|
||||
("q", emptyCommandInfo {
|
||||
@@ -497,11 +499,18 @@ allCommands pgf = Map.fromList [
|
||||
(abstractName pgf ++ ": " ++ showTree t) :
|
||||
[lang ++ ": " ++ linear opts lang t | lang <- optLangs opts]
|
||||
|
||||
unlex opts lang = stringOps (exceptUTF8 opts) where
|
||||
exceptUTF8 = if isUTF8 then filter ((/="to_UTF8") . prOpt) else id
|
||||
isUTF8 = case lookFlag pgf lang "coding" of
|
||||
Just "utf8" -> True
|
||||
_ -> False
|
||||
-- logic of coding in unlexing:
|
||||
-- - If lang has no coding flag, or -to_utf8 is not in opts, just opts are used.
|
||||
-- - If lang has flag coding=utf8, -to_utf8 is ignored.
|
||||
-- - If lang has coding=other, and -to_utf8 is in opts, from_other is applied first.
|
||||
|
||||
unlex opts lang = {- trace (unwords optsC) $ -} stringOps optsC where
|
||||
optsC = case lookFlag pgf lang "coding" of
|
||||
Just "utf8" -> filter (/="to_utf8") $ map prOpt opts
|
||||
Just other | isOpt "to_utf8" opts ->
|
||||
let cod = ("from_" ++ other)
|
||||
in cod : filter (/=cod) (map prOpt opts)
|
||||
_ -> map prOpt opts
|
||||
|
||||
optRestricted opts = restrictPGF (hasLin pgf (mkCId (optLang opts))) pgf
|
||||
|
||||
@@ -536,7 +545,7 @@ allCommands pgf = Map.fromList [
|
||||
[lookupMorpho (buildMorpho pgf (mkCId la)) s | la <- optLangs opts]
|
||||
|
||||
-- ps -f -g s returns g (f s)
|
||||
stringOps opts s = foldr app s (reverse (map prOpt opts)) where
|
||||
stringOps opts s = foldr app s (reverse opts) where
|
||||
app f = maybe id id (stringOp f)
|
||||
|
||||
stringOpOptions = [
|
||||
|
||||
@@ -76,7 +76,7 @@ data Verbosity = Quiet | Normal | Verbose | Debug
|
||||
data Phase = Preproc | Convert | Compile | Link
|
||||
deriving (Show,Eq,Ord)
|
||||
|
||||
data Encoding = UTF_8 | ISO_8859_1
|
||||
data Encoding = UTF_8 | ISO_8859_1 | CP_1251
|
||||
deriving (Show,Eq,Ord)
|
||||
|
||||
data OutputFormat = FmtPGF
|
||||
@@ -469,7 +469,9 @@ optimizationPackages =
|
||||
encodings :: [(String,Encoding)]
|
||||
encodings =
|
||||
[("utf8", UTF_8),
|
||||
("latin1", ISO_8859_1)]
|
||||
("cp1251", CP_1251),
|
||||
("latin1", ISO_8859_1)
|
||||
]
|
||||
|
||||
lookupShow :: Eq a => [(String,a)] -> a -> String
|
||||
lookupShow xs z = fromMaybe "lookupShow" $ lookup z [(y,x) | (x,y) <- xs]
|
||||
@@ -542,4 +544,4 @@ instance Functor OptDescr where
|
||||
instance Functor ArgDescr where
|
||||
fmap f (NoArg x) = NoArg (f x)
|
||||
fmap f (ReqArg g s) = ReqArg (f . g) s
|
||||
fmap f (OptArg g s) = OptArg (f . g) s
|
||||
fmap f (OptArg g s) = OptArg (f . g) s
|
||||
|
||||
@@ -24,6 +24,8 @@ stringOp name = case name of
|
||||
"to_html" -> Just wrapHTML
|
||||
"to_utf8" -> Just encodeUTF8
|
||||
"from_utf8" -> Just decodeUTF8
|
||||
"to_cp1251" -> Just encodeCP1251
|
||||
"from_cp1251" -> Just decodeCP1251
|
||||
_ -> transliterate name
|
||||
|
||||
appLexer :: (String -> [String]) -> String -> String
|
||||
@@ -97,3 +99,17 @@ isPunct = flip elem ".?!,:;"
|
||||
isParen = flip elem "()[]{}"
|
||||
isClosing = flip elem ")]}"
|
||||
|
||||
|
||||
-- might be in a file of its own: Windows Cyrillic, used in Bulgarian resource
|
||||
|
||||
decodeCP1251 = map convert where
|
||||
convert c
|
||||
| c >= '\192' && c <= '\255' = chr (ord c + 848)
|
||||
| otherwise = c
|
||||
|
||||
encodeCP1251 = map convert where
|
||||
convert c
|
||||
| oc >= 1040 && oc <= 1103 = chr (oc - 848)
|
||||
| otherwise = c
|
||||
where oc = ord c
|
||||
|
||||
|
||||
Reference in New Issue
Block a user