mirror of
https://github.com/GrammaticalFramework/gf-core.git
synced 2026-04-09 04:59:31 -06:00
many of the uses of peekCString and withCString in the Haskell binding were incorrect since they encode the string in the system locale while the C runtime is always using UTF8
This commit is contained in:
@@ -99,7 +99,7 @@ languages p =
|
||||
getLanguages :: IORef (Map.Map String Concr) -> MapItorCallback
|
||||
getLanguages ref itor key value exn = do
|
||||
langs <- readIORef ref
|
||||
name <- peekCString (castPtr key)
|
||||
name <- peekUtf8CString (castPtr key)
|
||||
concr <- fmap (\ptr -> Concr ptr p) $ peek (castPtr value)
|
||||
writeIORef ref $! Map.insert name concr langs
|
||||
|
||||
@@ -108,18 +108,18 @@ generateAll p cat =
|
||||
unsafePerformIO $
|
||||
do genPl <- gu_new_pool
|
||||
exprPl <- gu_new_pool
|
||||
enum <- withCString cat $ \cat -> do
|
||||
exn <- gu_new_exn genPl
|
||||
pgf_generate_all (pgf p) cat exn genPl exprPl
|
||||
cat <- newUtf8CString cat genPl
|
||||
exn <- gu_new_exn genPl
|
||||
enum <- pgf_generate_all (pgf p) cat exn genPl exprPl
|
||||
genFPl <- newForeignPtr gu_pool_finalizer genPl
|
||||
exprFPl <- newForeignPtr gu_pool_finalizer exprPl
|
||||
fromPgfExprEnum enum genFPl (p,exprFPl)
|
||||
|
||||
abstractName :: PGF -> AbsName
|
||||
abstractName p = unsafePerformIO (peekCString =<< pgf_abstract_name (pgf p))
|
||||
abstractName p = unsafePerformIO (peekUtf8CString =<< pgf_abstract_name (pgf p))
|
||||
|
||||
startCat :: PGF -> Cat
|
||||
startCat p = unsafePerformIO (peekCString =<< pgf_start_cat (pgf p))
|
||||
startCat p = unsafePerformIO (peekUtf8CString =<< pgf_start_cat (pgf p))
|
||||
|
||||
loadConcr :: Concr -> FilePath -> IO ()
|
||||
loadConcr c fpath =
|
||||
@@ -146,12 +146,13 @@ unloadConcr c = pgf_concrete_unload (concr c)
|
||||
functionType :: PGF -> CId -> Type
|
||||
functionType p fn =
|
||||
unsafePerformIO $
|
||||
withCString fn $ \c_fn -> do
|
||||
withGuPool $ \tmpPl -> do
|
||||
c_fn <- newUtf8CString fn tmpPl
|
||||
c_type <- pgf_function_type (pgf p) c_fn
|
||||
peekType c_type
|
||||
where
|
||||
peekType c_type = do
|
||||
cid <- (#peek PgfType, cid) c_type >>= peekCString
|
||||
cid <- (#peek PgfType, cid) c_type >>= peekUtf8CString
|
||||
c_hypos <- (#peek PgfType, hypos) c_type
|
||||
n_hypos <- (#peek GuSeq, len) c_hypos
|
||||
hs <- peekHypos (c_hypos `plusPtr` (#offset GuSeq, data)) 0 n_hypos
|
||||
@@ -161,7 +162,7 @@ functionType p fn =
|
||||
|
||||
peekHypos :: Ptr a -> Int -> Int -> IO [Hypo]
|
||||
peekHypos c_hypo i n
|
||||
| i < n = do cid <- (#peek PgfHypo, cid) c_hypo >>= peekCString
|
||||
| i < n = do cid <- (#peek PgfHypo, cid) c_hypo >>= peekUtf8CString
|
||||
ty <- (#peek PgfHypo, type) c_hypo >>= peekType
|
||||
bt <- fmap toBindType ((#peek PgfHypo, bind_type) c_hypo)
|
||||
hs <- peekHypos (plusPtr c_hypo (#size PgfHypo)) (i+1) n
|
||||
@@ -190,7 +191,7 @@ graphvizAbstractTree p e =
|
||||
exn <- gu_new_exn tmpPl
|
||||
pgf_graphviz_abstract_tree (pgf p) (expr e) out exn
|
||||
s <- gu_string_buf_freeze sb tmpPl
|
||||
peekCString s
|
||||
peekUtf8CString s
|
||||
|
||||
|
||||
graphvizParseTree :: Concr -> Expr -> String
|
||||
@@ -201,7 +202,7 @@ graphvizParseTree c e =
|
||||
exn <- gu_new_exn tmpPl
|
||||
pgf_graphviz_parse_tree (concr c) (expr e) out exn
|
||||
s <- gu_string_buf_freeze sb tmpPl
|
||||
peekCString s
|
||||
peekUtf8CString s
|
||||
|
||||
-----------------------------------------------------------------------------
|
||||
-- Functions using Concr
|
||||
@@ -210,15 +211,17 @@ graphvizParseTree c e =
|
||||
type MorphoAnalysis = (Fun,String,Float)
|
||||
|
||||
lookupMorpho :: Concr -> String -> [MorphoAnalysis]
|
||||
lookupMorpho (Concr concr master) sent = unsafePerformIO $
|
||||
do ref <- newIORef []
|
||||
allocaBytes (#size PgfMorphoCallback) $ \cback ->
|
||||
do fptr <- wrapLookupMorphoCallback (getAnalysis ref)
|
||||
(#poke PgfMorphoCallback, callback) cback fptr
|
||||
withCString sent $ \c_sent ->
|
||||
pgf_lookup_morpho concr c_sent cback nullPtr
|
||||
freeHaskellFunPtr fptr
|
||||
readIORef ref
|
||||
lookupMorpho (Concr concr master) sent =
|
||||
unsafePerformIO $
|
||||
withGuPool $ \tmpPl -> do
|
||||
ref <- newIORef []
|
||||
cback <- gu_malloc tmpPl (#size PgfMorphoCallback)
|
||||
fptr <- wrapLookupMorphoCallback (getAnalysis ref)
|
||||
(#poke PgfMorphoCallback, callback) cback fptr
|
||||
c_sent <- newUtf8CString sent tmpPl
|
||||
pgf_lookup_morpho concr c_sent cback nullPtr
|
||||
freeHaskellFunPtr fptr
|
||||
readIORef ref
|
||||
|
||||
fullFormLexicon :: Concr -> [(String, [MorphoAnalysis])]
|
||||
fullFormLexicon lang =
|
||||
@@ -237,7 +240,7 @@ fullFormLexicon lang =
|
||||
if ffEntry == nullPtr
|
||||
then do finalizeForeignPtr fpl
|
||||
return []
|
||||
else do tok <- peekCString =<< pgf_fullform_get_string ffEntry
|
||||
else do tok <- peekUtf8CString =<< pgf_fullform_get_string ffEntry
|
||||
ref <- newIORef []
|
||||
allocaBytes (#size PgfMorphoCallback) $ \cback ->
|
||||
do fptr <- wrapLookupMorphoCallback (getAnalysis ref)
|
||||
@@ -250,8 +253,8 @@ fullFormLexicon lang =
|
||||
getAnalysis :: IORef [MorphoAnalysis] -> LookupMorphoCallback
|
||||
getAnalysis ref self c_lemma c_anal prob exn = do
|
||||
ans <- readIORef ref
|
||||
lemma <- peekCString c_lemma
|
||||
anal <- peekCString c_anal
|
||||
lemma <- peekUtf8CString c_lemma
|
||||
anal <- peekUtf8CString c_anal
|
||||
writeIORef ref ((lemma, anal, prob):ans)
|
||||
|
||||
parse :: Concr -> Cat -> String -> Either String [(Expr,Float)]
|
||||
@@ -274,26 +277,26 @@ parseWithHeuristics :: Concr -- ^ the language with which we parse
|
||||
-> Either String [(Expr,Float)]
|
||||
parseWithHeuristics lang cat sent heuristic callbacks =
|
||||
unsafePerformIO $
|
||||
do parsePl <- gu_new_pool
|
||||
exprPl <- gu_new_pool
|
||||
do exprPl <- gu_new_pool
|
||||
parsePl <- gu_new_pool
|
||||
exn <- gu_new_exn parsePl
|
||||
enum <- withCString cat $ \cat ->
|
||||
withCString sent $ \sent -> do
|
||||
callbacks_map <- mkCallbacksMap (concr lang) callbacks parsePl
|
||||
pgf_parse_with_heuristics (concr lang) cat sent heuristic callbacks_map exn parsePl exprPl
|
||||
cat <- newUtf8CString cat parsePl
|
||||
sent <- newUtf8CString sent parsePl
|
||||
callbacks_map <- mkCallbacksMap (concr lang) callbacks parsePl
|
||||
enum <- pgf_parse_with_heuristics (concr lang) cat sent heuristic callbacks_map exn parsePl exprPl
|
||||
failed <- gu_exn_is_raised exn
|
||||
if failed
|
||||
then do is_parse_error <- gu_exn_caught exn gu_exn_type_PgfParseError
|
||||
if is_parse_error
|
||||
then do c_tok <- (#peek GuExn, data.data) exn
|
||||
tok <- peekCString c_tok
|
||||
tok <- peekUtf8CString c_tok
|
||||
gu_pool_free parsePl
|
||||
gu_pool_free exprPl
|
||||
return (Left tok)
|
||||
else do is_exn <- gu_exn_caught exn gu_exn_type_PgfExn
|
||||
if is_exn
|
||||
then do c_msg <- (#peek GuExn, data.data) exn
|
||||
msg <- peekCString c_msg
|
||||
msg <- peekUtf8CString c_msg
|
||||
gu_pool_free parsePl
|
||||
gu_pool_free exprPl
|
||||
throwIO (PGFError msg)
|
||||
@@ -308,15 +311,15 @@ parseWithHeuristics lang cat sent heuristic callbacks =
|
||||
mkCallbacksMap :: Ptr PgfConcr -> [(String, Int -> String -> Int -> Maybe (Expr,Float,Int))] -> Ptr GuPool -> IO (Ptr PgfCallbacksMap)
|
||||
mkCallbacksMap concr callbacks pool = do
|
||||
callbacks_map <- pgf_new_callbacks_map concr pool
|
||||
forM_ callbacks $ \(cat,match) ->
|
||||
withCString cat $ \ccat -> do
|
||||
match <- wrapLiteralMatchCallback (match_callback match)
|
||||
predict <- wrapLiteralPredictCallback predict_callback
|
||||
hspgf_callbacks_map_add_literal concr callbacks_map ccat match predict pool
|
||||
forM_ callbacks $ \(cat,match) -> do
|
||||
ccat <- newUtf8CString cat pool
|
||||
match <- wrapLiteralMatchCallback (match_callback match)
|
||||
predict <- wrapLiteralPredictCallback predict_callback
|
||||
hspgf_callbacks_map_add_literal concr callbacks_map ccat match predict pool
|
||||
return callbacks_map
|
||||
where
|
||||
match_callback match _ clin_idx csentence poffset out_pool = do
|
||||
sentence <- peekCString csentence
|
||||
sentence <- peekUtf8CString csentence
|
||||
coffset <- peek poffset
|
||||
case match (fromIntegral clin_idx) sentence (fromIntegral coffset) of
|
||||
Nothing -> return nullPtr
|
||||
@@ -361,26 +364,26 @@ parseWithOracle lang cat sent (predict,complete,literal) =
|
||||
do parsePl <- gu_new_pool
|
||||
exprPl <- gu_new_pool
|
||||
exn <- gu_new_exn parsePl
|
||||
enum <- withCString cat $ \cat ->
|
||||
withCString sent $ \sent -> do
|
||||
predictPtr <- maybe (return nullFunPtr) (wrapOracleCallback . oracleWrapper) predict
|
||||
completePtr <- maybe (return nullFunPtr) (wrapOracleCallback . oracleWrapper) complete
|
||||
literalPtr <- maybe (return nullFunPtr) (wrapOracleLiteralCallback . oracleLiteralWrapper) literal
|
||||
cback <- hspgf_new_oracle_callback sent predictPtr completePtr literalPtr parsePl
|
||||
pgf_parse_with_oracle (concr lang) cat sent cback exn parsePl exprPl
|
||||
cat <- newUtf8CString cat parsePl
|
||||
sent <- newUtf8CString sent parsePl
|
||||
predictPtr <- maybe (return nullFunPtr) (wrapOracleCallback . oracleWrapper) predict
|
||||
completePtr <- maybe (return nullFunPtr) (wrapOracleCallback . oracleWrapper) complete
|
||||
literalPtr <- maybe (return nullFunPtr) (wrapOracleLiteralCallback . oracleLiteralWrapper) literal
|
||||
cback <- hspgf_new_oracle_callback sent predictPtr completePtr literalPtr parsePl
|
||||
enum <- pgf_parse_with_oracle (concr lang) cat sent cback exn parsePl exprPl
|
||||
failed <- gu_exn_is_raised exn
|
||||
if failed
|
||||
then do is_parse_error <- gu_exn_caught exn gu_exn_type_PgfParseError
|
||||
if is_parse_error
|
||||
then do c_tok <- (#peek GuExn, data.data) exn
|
||||
tok <- peekCString c_tok
|
||||
tok <- peekUtf8CString c_tok
|
||||
gu_pool_free parsePl
|
||||
gu_pool_free exprPl
|
||||
return (Left tok)
|
||||
else do is_exn <- gu_exn_caught exn gu_exn_type_PgfExn
|
||||
if is_exn
|
||||
then do c_msg <- (#peek GuExn, data.data) exn
|
||||
msg <- peekCString c_msg
|
||||
msg <- peekUtf8CString c_msg
|
||||
gu_pool_free parsePl
|
||||
gu_pool_free exprPl
|
||||
throwIO (PGFError msg)
|
||||
@@ -393,13 +396,13 @@ parseWithOracle lang cat sent (predict,complete,literal) =
|
||||
return (Right exprs)
|
||||
where
|
||||
oracleWrapper oracle catPtr lblPtr offset = do
|
||||
cat <- peekCString catPtr
|
||||
lbl <- peekCString lblPtr
|
||||
cat <- peekUtf8CString catPtr
|
||||
lbl <- peekUtf8CString lblPtr
|
||||
return (oracle cat lbl (fromIntegral offset))
|
||||
|
||||
oracleLiteralWrapper oracle catPtr lblPtr poffset out_pool = do
|
||||
cat <- peekCString catPtr
|
||||
lbl <- peekCString lblPtr
|
||||
cat <- peekUtf8CString catPtr
|
||||
lbl <- peekUtf8CString lblPtr
|
||||
offset <- peek poffset
|
||||
case oracle cat lbl (fromIntegral offset) of
|
||||
Just (e,prob,offset) ->
|
||||
@@ -425,7 +428,8 @@ parseWithOracle lang cat sent (predict,complete,literal) =
|
||||
|
||||
hasLinearization :: Concr -> Fun -> Bool
|
||||
hasLinearization lang id = unsafePerformIO $
|
||||
withCString id (pgf_has_linearization (concr lang))
|
||||
withGuPool $ \pl ->
|
||||
newUtf8CString id pl >>= pgf_has_linearization (concr lang)
|
||||
|
||||
linearize :: Concr -> Expr -> String
|
||||
linearize lang e = unsafePerformIO $
|
||||
@@ -441,11 +445,11 @@ linearize lang e = unsafePerformIO $
|
||||
else do is_exn <- gu_exn_caught exn gu_exn_type_PgfExn
|
||||
if is_exn
|
||||
then do c_msg <- (#peek GuExn, data.data) exn
|
||||
msg <- peekCString c_msg
|
||||
msg <- peekUtf8CString c_msg
|
||||
throwIO (PGFError msg)
|
||||
else throwIO (PGFError "The abstract tree cannot be linearized")
|
||||
else do lin <- gu_string_buf_freeze sb pl
|
||||
peekCString lin
|
||||
peekUtf8CString lin
|
||||
|
||||
linearizeAll :: Concr -> Expr -> [String]
|
||||
linearizeAll lang e = unsafePerformIO $
|
||||
@@ -473,7 +477,7 @@ linearizeAll lang e = unsafePerformIO $
|
||||
then collect cts exn pl
|
||||
else throwExn exn pl
|
||||
else do lin <- gu_string_buf_freeze sb tmpPl
|
||||
s <- peekCString lin
|
||||
s <- peekUtf8CString lin
|
||||
ss <- unsafeInterleaveIO (collect cts exn pl)
|
||||
return (s:ss)
|
||||
|
||||
@@ -481,7 +485,7 @@ linearizeAll lang e = unsafePerformIO $
|
||||
is_exn <- gu_exn_caught exn gu_exn_type_PgfExn
|
||||
if is_exn
|
||||
then do c_msg <- (#peek GuExn, data.data) exn
|
||||
msg <- peekCString c_msg
|
||||
msg <- peekUtf8CString c_msg
|
||||
gu_pool_free pl
|
||||
throwIO (PGFError msg)
|
||||
else do gu_pool_free pl
|
||||
@@ -500,7 +504,7 @@ alignWords lang e = unsafePerformIO $
|
||||
else do is_exn <- gu_exn_caught exn gu_exn_type_PgfExn
|
||||
if is_exn
|
||||
then do c_msg <- (#peek GuExn, data.data) exn
|
||||
msg <- peekCString c_msg
|
||||
msg <- peekUtf8CString c_msg
|
||||
throwIO (PGFError msg)
|
||||
else throwIO (PGFError "The abstract tree cannot be linearized")
|
||||
else do len <- (#peek GuSeq, len) seq
|
||||
@@ -510,7 +514,7 @@ alignWords lang e = unsafePerformIO $
|
||||
peekAlignmentPhrase :: Ptr () -> IO (String, [Int])
|
||||
peekAlignmentPhrase ptr = do
|
||||
c_phrase <- (#peek PgfAlignmentPhrase, phrase) ptr
|
||||
phrase <- peekCString c_phrase
|
||||
phrase <- peekUtf8CString c_phrase
|
||||
n_fids <- (#peek PgfAlignmentPhrase, n_fids) ptr
|
||||
(fids :: [CInt]) <- peekArray (fromIntegral (n_fids :: CInt)) (ptr `plusPtr` (#offset PgfAlignmentPhrase, fids))
|
||||
return (phrase, map fromIntegral fids)
|
||||
@@ -532,7 +536,7 @@ functions p =
|
||||
getFunctions :: IORef [String] -> MapItorCallback
|
||||
getFunctions ref itor key value exn = do
|
||||
names <- readIORef ref
|
||||
name <- peekCString (castPtr key)
|
||||
name <- peekUtf8CString (castPtr key)
|
||||
writeIORef ref $! (name : names)
|
||||
|
||||
categories :: PGF -> [Cat]
|
||||
|
||||
@@ -2,12 +2,13 @@
|
||||
|
||||
module PGF2.FFI where
|
||||
|
||||
import Foreign ( alloca, poke )
|
||||
import Foreign.C
|
||||
--import Foreign.C.String
|
||||
import Foreign.Ptr
|
||||
import Foreign.ForeignPtr
|
||||
import Control.Exception
|
||||
import GHC.Ptr
|
||||
import Data.Int(Int32)
|
||||
|
||||
data PGF = PGF {pgf :: Ptr PgfPGF, pgfMaster :: ForeignPtr GuPool}
|
||||
data Concr = Concr {concr :: Ptr PgfConcr, concrMaster :: PGF}
|
||||
@@ -72,10 +73,16 @@ foreign import ccall "gu/file.h gu_file_in"
|
||||
|
||||
foreign import ccall "gu/enum.h gu_enum_next"
|
||||
gu_enum_next :: Ptr a -> Ptr (Ptr b) -> Ptr GuPool -> IO ()
|
||||
|
||||
|
||||
foreign import ccall "gu/string.h gu_string_buf_freeze"
|
||||
gu_string_buf_freeze :: Ptr GuStringBuf -> Ptr GuPool -> IO CString
|
||||
|
||||
foreign import ccall unsafe "gu/utf8.h gu_utf8_decode"
|
||||
gu_utf8_decode :: Ptr CString -> IO Int32
|
||||
|
||||
foreign import ccall unsafe "gu/utf8.h gu_utf8_encode"
|
||||
gu_utf8_encode :: Int32 -> Ptr CString -> IO ()
|
||||
|
||||
withGuPool :: (Ptr GuPool -> IO a) -> IO a
|
||||
withGuPool f = bracket gu_new_pool gu_pool_free f
|
||||
|
||||
@@ -85,6 +92,34 @@ newOut pool =
|
||||
out <- gu_string_buf_out sb
|
||||
return (sb,out)
|
||||
|
||||
peekUtf8CString :: CString -> IO String
|
||||
peekUtf8CString ptr =
|
||||
alloca $ \pptr ->
|
||||
poke pptr ptr >> decode pptr
|
||||
where
|
||||
decode pptr = do
|
||||
x <- gu_utf8_decode pptr
|
||||
if x == 0
|
||||
then return []
|
||||
else do cs <- decode pptr
|
||||
return (((toEnum . fromEnum) x) : cs)
|
||||
|
||||
newUtf8CString :: String -> Ptr GuPool -> IO CString
|
||||
newUtf8CString s pool = do
|
||||
-- An UTF8 character takes up to 6 bytes. We allocate enough
|
||||
-- memory for the worst case. This is wasteful but those
|
||||
-- strings are usually allocated only temporary.
|
||||
ptr <- gu_malloc pool (fromIntegral (length s * 6+1))
|
||||
alloca $ \pptr ->
|
||||
poke pptr ptr >> encode s pptr
|
||||
return ptr
|
||||
where
|
||||
encode [] pptr = do
|
||||
gu_utf8_encode 0 pptr
|
||||
encode (c:cs) pptr = do
|
||||
gu_utf8_encode ((toEnum . fromEnum) c) pptr
|
||||
encode cs pptr
|
||||
|
||||
------------------------------------------------------------------
|
||||
-- libpgf API
|
||||
|
||||
|
||||
Reference in New Issue
Block a user