mirror of
https://github.com/GrammaticalFramework/gf-core.git
synced 2026-05-20 00:22:51 -06:00
the parser in the C runtime can now detect incomplete sentences just like the parser in the Haskell runtime. This is also reflected in all bindings.
This commit is contained in:
@@ -63,7 +63,7 @@ module PGF2 (-- * PGF
|
||||
|
||||
alignWords,
|
||||
-- ** Parsing
|
||||
parse, parseWithHeuristics,
|
||||
ParseOutput(..), parse, parseWithHeuristics,
|
||||
-- ** Sentence Lookup
|
||||
lookupSentence,
|
||||
-- ** Generation
|
||||
@@ -480,7 +480,15 @@ getAnalysis ref self c_lemma c_anal prob exn = do
|
||||
anal <- peekUtf8CString c_anal
|
||||
writeIORef ref ((lemma, anal, prob):ans)
|
||||
|
||||
parse :: Concr -> Type -> String -> Either String [(Expr,Float)]
|
||||
-- | This data type encodes the different outcomes which you could get from the parser.
|
||||
data ParseOutput
|
||||
= ParseFailed Int String -- ^ The integer is the position in number of unicode characters where the parser failed.
|
||||
-- The string is the token where the parser have failed.
|
||||
| ParseOk [(Expr,Float)] -- ^ If the parsing and the type checking are successful we get a list of abstract syntax trees.
|
||||
-- The list should be non-empty.
|
||||
| ParseIncomplete -- ^ The sentence is not complete.
|
||||
|
||||
parse :: Concr -> Type -> String -> ParseOutput
|
||||
parse lang ty sent = parseWithHeuristics lang ty sent (-1.0) []
|
||||
|
||||
parseWithHeuristics :: Concr -- ^ the language with which we parse
|
||||
@@ -497,7 +505,7 @@ parseWithHeuristics :: Concr -- ^ the language with which we parse
|
||||
-- the input sentence; the current offset in the sentence.
|
||||
-- If a literal has been recognized then the output should
|
||||
-- be Just (expr,probability,end_offset)
|
||||
-> Either String [(Expr,Float)]
|
||||
-> ParseOutput
|
||||
parseWithHeuristics lang (Type ctype _) sent heuristic callbacks =
|
||||
unsafePerformIO $
|
||||
do exprPl <- gu_new_pool
|
||||
@@ -510,11 +518,19 @@ parseWithHeuristics lang (Type ctype _) sent heuristic callbacks =
|
||||
if failed
|
||||
then do is_parse_error <- gu_exn_caught exn gu_exn_type_PgfParseError
|
||||
if is_parse_error
|
||||
then do c_tok <- (#peek GuExn, data.data) exn
|
||||
tok <- peekUtf8CString c_tok
|
||||
gu_pool_free parsePl
|
||||
gu_pool_free exprPl
|
||||
return (Left tok)
|
||||
then do c_err <- (#peek GuExn, data.data) exn
|
||||
c_incomplete <- (#peek PgfParseError, incomplete) c_err
|
||||
if (c_incomplete :: CInt) == 0
|
||||
then do c_offset <- (#peek PgfParseError, offset) c_err
|
||||
token_ptr <- (#peek PgfParseError, token_ptr) c_err
|
||||
token_len <- (#peek PgfParseError, token_len) c_err
|
||||
tok <- peekUtf8CStringLen token_ptr token_len
|
||||
gu_pool_free parsePl
|
||||
gu_pool_free exprPl
|
||||
return (ParseFailed (fromIntegral (c_offset :: CInt)) tok)
|
||||
else do gu_pool_free parsePl
|
||||
gu_pool_free exprPl
|
||||
return ParseIncomplete
|
||||
else do is_exn <- gu_exn_caught exn gu_exn_type_PgfExn
|
||||
if is_exn
|
||||
then do c_msg <- (#peek GuExn, data.data) exn
|
||||
@@ -528,7 +544,7 @@ parseWithHeuristics lang (Type ctype _) sent heuristic callbacks =
|
||||
else do parseFPl <- newForeignPtr gu_pool_finalizer parsePl
|
||||
exprFPl <- newForeignPtr gu_pool_finalizer exprPl
|
||||
exprs <- fromPgfExprEnum enum parseFPl (touchConcr lang >> touchForeignPtr exprFPl)
|
||||
return (Right exprs)
|
||||
return (ParseOk exprs)
|
||||
|
||||
mkCallbacksMap :: Ptr PgfConcr -> [(String, Int -> Int -> Maybe (Expr,Float,Int))] -> Ptr GuPool -> IO (Ptr PgfCallbacksMap)
|
||||
mkCallbacksMap concr callbacks pool = do
|
||||
@@ -595,7 +611,7 @@ parseWithOracle :: Concr -- ^ the language with which we parse
|
||||
-> Cat -- ^ the start category
|
||||
-> String -- ^ the input sentence
|
||||
-> Oracle
|
||||
-> Either String [(Expr,Float)]
|
||||
-> ParseOutput
|
||||
parseWithOracle lang cat sent (predict,complete,literal) =
|
||||
unsafePerformIO $
|
||||
do parsePl <- gu_new_pool
|
||||
@@ -612,11 +628,19 @@ parseWithOracle lang cat sent (predict,complete,literal) =
|
||||
if failed
|
||||
then do is_parse_error <- gu_exn_caught exn gu_exn_type_PgfParseError
|
||||
if is_parse_error
|
||||
then do c_tok <- (#peek GuExn, data.data) exn
|
||||
tok <- peekUtf8CString c_tok
|
||||
gu_pool_free parsePl
|
||||
gu_pool_free exprPl
|
||||
return (Left tok)
|
||||
then do c_err <- (#peek GuExn, data.data) exn
|
||||
c_incomplete <- (#peek PgfParseError, incomplete) c_err
|
||||
if (c_incomplete :: CInt) == 0
|
||||
then do c_offset <- (#peek PgfParseError, offset) c_err
|
||||
token_ptr <- (#peek PgfParseError, token_ptr) c_err
|
||||
token_len <- (#peek PgfParseError, token_len) c_err
|
||||
tok <- peekUtf8CStringLen token_ptr token_len
|
||||
gu_pool_free parsePl
|
||||
gu_pool_free exprPl
|
||||
return (ParseFailed (fromIntegral (c_offset :: CInt)) tok)
|
||||
else do gu_pool_free parsePl
|
||||
gu_pool_free exprPl
|
||||
return ParseIncomplete
|
||||
else do is_exn <- gu_exn_caught exn gu_exn_type_PgfExn
|
||||
if is_exn
|
||||
then do c_msg <- (#peek GuExn, data.data) exn
|
||||
@@ -630,7 +654,7 @@ parseWithOracle lang cat sent (predict,complete,literal) =
|
||||
else do parseFPl <- newForeignPtr gu_pool_finalizer parsePl
|
||||
exprFPl <- newForeignPtr gu_pool_finalizer exprPl
|
||||
exprs <- fromPgfExprEnum enum parseFPl (touchConcr lang >> touchForeignPtr exprFPl)
|
||||
return (Right exprs)
|
||||
return (ParseOk exprs)
|
||||
where
|
||||
oracleWrapper oracle catPtr lblPtr offset = do
|
||||
cat <- peekUtf8CString catPtr
|
||||
|
||||
@@ -2,7 +2,7 @@
|
||||
|
||||
module PGF2.FFI where
|
||||
|
||||
import Foreign ( alloca, poke )
|
||||
import Foreign ( alloca, peek, poke )
|
||||
import Foreign.C
|
||||
import Foreign.Ptr
|
||||
import Foreign.ForeignPtr
|
||||
@@ -116,6 +116,19 @@ peekUtf8CString ptr =
|
||||
else do cs <- decode pptr
|
||||
return (((toEnum . fromEnum) x) : cs)
|
||||
|
||||
peekUtf8CStringLen :: CString -> CInt -> IO String
|
||||
peekUtf8CStringLen ptr len =
|
||||
alloca $ \pptr ->
|
||||
poke pptr ptr >> decode pptr (ptr `plusPtr` fromIntegral len)
|
||||
where
|
||||
decode pptr end = do
|
||||
ptr <- peek pptr
|
||||
if ptr >= end
|
||||
then return []
|
||||
else do x <- gu_utf8_decode pptr
|
||||
cs <- decode pptr end
|
||||
return (((toEnum . fromEnum) x) : cs)
|
||||
|
||||
newUtf8CString :: String -> Ptr GuPool -> IO CString
|
||||
newUtf8CString s pool = do
|
||||
-- An UTF8 character takes up to 6 bytes. We allocate enough
|
||||
|
||||
@@ -37,18 +37,18 @@ execute cmd =
|
||||
P lang s -> do pgf <- gets fst
|
||||
c <- getConcr' pgf lang
|
||||
case parse c (startCat pgf) s of
|
||||
Left tok -> do put (pgf,[])
|
||||
putln ("Parse error: "++tok)
|
||||
Right ts -> do put (pgf,map show ts)
|
||||
pop
|
||||
ParseFailed _ tok -> do put (pgf,[])
|
||||
putln ("Parse error: "++tok)
|
||||
ParseOk ts -> do put (pgf,map show ts)
|
||||
pop
|
||||
T from to s -> do pgf <- gets fst
|
||||
cfrom <- getConcr' pgf from
|
||||
cto <- getConcr' pgf to
|
||||
case parse cfrom (startCat pgf) s of
|
||||
Left tok -> do put (pgf,[])
|
||||
putln ("Parse error: "++tok)
|
||||
Right ts -> do put (pgf,map (linearize cto.fst) ts)
|
||||
pop
|
||||
ParseFailed _ tok -> do put (pgf,[])
|
||||
putln ("Parse error: "++tok)
|
||||
ParseOk ts -> do put (pgf,map (linearize cto.fst) ts)
|
||||
pop
|
||||
I path -> do pgf <- liftIO (readPGF path)
|
||||
putln . unwords . M.keys $ languages pgf
|
||||
put (pgf,[])
|
||||
|
||||
Reference in New Issue
Block a user