GF.Compile.ReadFiles: a simpler & faster way to convert from Latin1 to UTF-8

This commit is contained in:
hallgren
2014-08-19 14:27:15 +00:00
parent 7dee933802
commit d2e326b182

View File

@@ -232,12 +232,15 @@ toUTF8 opts0 raw =
coding = getEncoding $ opts0 `addOptions` opts
utf8 <- if coding=="UTF-8"
then return raw
else lift $ do --ePutStrLn $ "toUTF8 from "++coding
enc <- mkTextEncoding coding
-- decodeUnicodeIO uses a lot of stack space,
-- so we need to split the file into smaller pieces
ls <- mapM (decodeUnicodeIO enc) (BS.lines raw)
return $ UTF8.fromString (unlines ls)
else if coding=="CP1252" -- Latin1
then return . UTF8.fromString $ BS.unpack raw -- faster
else lift $
do --ePutStrLn $ "toUTF8 from "++coding
enc <- mkTextEncoding coding
-- decodeUnicodeIO uses a lot of stack space,
-- so we need to split the file into smaller pieces
ls <- mapM (decodeUnicodeIO enc) (BS.lines raw)
return $ UTF8.fromString (unlines ls)
return (given,utf8)
--lift io = ioe (fmap Ok io `catch` (return . Bad . show))