mirror of
https://github.com/GrammaticalFramework/gf-core.git
synced 2026-04-24 03:52:50 -06:00
the parser in the C runtime can now detect incomplete sentences just like the parser in the Haskell runtime. This is also reflected in all bindings.
This commit is contained in:
@@ -154,12 +154,12 @@ or by calling __next__ if you are using Python 3:
|
|||||||
</pre>
|
</pre>
|
||||||
</span>
|
</span>
|
||||||
<span class="haskell">
|
<span class="haskell">
|
||||||
This gives you a result of type <tt>Either String [(Expr, Float)]</tt>.
|
This gives you a result of type <tt>ParseOutput</tt>.
|
||||||
If the result is <tt>Left</tt> then the parser has failed and you will
|
If the result is <tt>ParseFailed</tt> then the parser has failed and you will
|
||||||
get the token where the parser got stuck. If the parsing was successful
|
get the offset and the token where the parser got stuck. If the parsing was successful
|
||||||
then you get a potentially infinite list of parse results:
|
then you get <tt>ParseOk</tt> with a potentially infinite list of parse results:
|
||||||
<pre class="haskell">
|
<pre class="haskell">
|
||||||
Prelude PGF2> let Right ((e,p):rest) = res
|
Prelude PGF2> let ParseOk ((e,p):rest) = res
|
||||||
</pre>
|
</pre>
|
||||||
</span>
|
</span>
|
||||||
<span class="java">
|
<span class="java">
|
||||||
|
|||||||
@@ -636,10 +636,12 @@ pgfCommands = Map.fromList [
|
|||||||
cncs = optConcs env opts
|
cncs = optConcs env opts
|
||||||
parsed rs = Piped (Exprs ts,unlines msgs)
|
parsed rs = Piped (Exprs ts,unlines msgs)
|
||||||
where
|
where
|
||||||
ts = [hsExpr t|Right ts<-rs,(t,p)<-takeOptNum opts ts]
|
ts = [hsExpr t|ParseOk ts<-rs,(t,p)<-takeOptNum opts ts]
|
||||||
msgs = concatMap (either err ok) rs
|
msgs = concatMap mkMsg rs
|
||||||
err msg = ["Parse failed: "++msg]
|
|
||||||
ok = map (PGF2.showExpr [] . fst).takeOptNum opts
|
mkMsg (ParseOk ts) = (map (PGF2.showExpr [] . fst).takeOptNum opts) ts
|
||||||
|
mkMsg (ParseFailed _ tok) = ["Parse failed: "++tok]
|
||||||
|
mkMsg (ParseIncomplete) = ["The sentence is incomplete"]
|
||||||
|
|
||||||
optLins env opts ts = case opts of
|
optLins env opts ts = case opts of
|
||||||
_ | isOpt "groups" opts ->
|
_ | isOpt "groups" opts ->
|
||||||
|
|||||||
@@ -2139,30 +2139,37 @@ pgf_parse_result_enum_next(GuEnum* self, void* to, GuPool* pool)
|
|||||||
*(PgfExprProb**)to = pgf_parse_result_next(ps);
|
*(PgfExprProb**)to = pgf_parse_result_next(ps);
|
||||||
}
|
}
|
||||||
|
|
||||||
static GuString
|
static PgfParseError*
|
||||||
pgf_parsing_last_token(PgfParsing* ps, GuPool* pool)
|
pgf_parsing_new_exception(PgfParsing* ps, GuPool* pool)
|
||||||
{
|
{
|
||||||
if (ps->before == NULL)
|
const uint8_t* p = (uint8_t*) ps->sentence;
|
||||||
return "";
|
const uint8_t* end = p + (ps->before ? ps->before->end_offset : 0);
|
||||||
|
|
||||||
const uint8_t* start = (uint8_t*) ps->sentence;
|
PgfParseError* err = gu_new(PgfParseError, pool);
|
||||||
const uint8_t* end = (uint8_t*) ps->sentence + ps->before->end_offset;
|
err->incomplete= (*end == 0);
|
||||||
|
err->offset = 0;
|
||||||
|
err->token_ptr = (char*) p;
|
||||||
|
|
||||||
const uint8_t* p = start;
|
|
||||||
while (p < end) {
|
while (p < end) {
|
||||||
if (gu_ucs_is_space(gu_utf8_decode(&p))) {
|
if (gu_ucs_is_space(gu_utf8_decode(&p))) {
|
||||||
start = p;
|
err->token_ptr = (char*) p;
|
||||||
}
|
}
|
||||||
|
err->offset++;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (err->incomplete) {
|
||||||
|
err->token_ptr = NULL;
|
||||||
|
err->token_len = 0;
|
||||||
|
return err;
|
||||||
}
|
}
|
||||||
|
|
||||||
while (*p && !gu_ucs_is_space(gu_utf8_decode(&p))) {
|
while (*p && !gu_ucs_is_space(gu_utf8_decode(&p))) {
|
||||||
end = p;
|
end = p;
|
||||||
}
|
}
|
||||||
|
|
||||||
char* tok = gu_malloc(pool, end-start+1);
|
err->token_len = ((char*)end)-err->token_ptr;
|
||||||
memcpy(tok, start, (end-start));
|
|
||||||
tok[end-start] = 0;
|
return err;
|
||||||
return tok;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
PGF_API GuEnum*
|
PGF_API GuEnum*
|
||||||
@@ -2204,7 +2211,7 @@ pgf_parse_with_heuristics(PgfConcr* concr, PgfType* typ, GuString sentence,
|
|||||||
while (gu_buf_length(ps->expr_queue) == 0) {
|
while (gu_buf_length(ps->expr_queue) == 0) {
|
||||||
if (!pgf_parsing_proceed(ps)) {
|
if (!pgf_parsing_proceed(ps)) {
|
||||||
GuExnData* exn = gu_raise(err, PgfParseError);
|
GuExnData* exn = gu_raise(err, PgfParseError);
|
||||||
exn->data = (void*) pgf_parsing_last_token(ps, exn->pool);
|
exn->data = (void*) pgf_parsing_new_exception(ps, exn->pool);
|
||||||
return NULL;
|
return NULL;
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -2249,7 +2256,7 @@ pgf_parse_with_oracle(PgfConcr* concr, PgfType* typ,
|
|||||||
while (gu_buf_length(ps->expr_queue) == 0) {
|
while (gu_buf_length(ps->expr_queue) == 0) {
|
||||||
if (!pgf_parsing_proceed(ps)) {
|
if (!pgf_parsing_proceed(ps)) {
|
||||||
GuExnData* exn = gu_raise(err, PgfParseError);
|
GuExnData* exn = gu_raise(err, PgfParseError);
|
||||||
exn->data = (void*) pgf_parsing_last_token(ps, exn->pool);
|
exn->data = (void*) pgf_parsing_new_exception(ps, exn->pool);
|
||||||
return NULL;
|
return NULL;
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -2312,7 +2319,7 @@ pgf_complete(PgfConcr* concr, PgfType* type, GuString sentence,
|
|||||||
while (ps->before->end_offset < len) {
|
while (ps->before->end_offset < len) {
|
||||||
if (!pgf_parsing_proceed(ps)) {
|
if (!pgf_parsing_proceed(ps)) {
|
||||||
GuExnData* exn = gu_raise(err, PgfParseError);
|
GuExnData* exn = gu_raise(err, PgfParseError);
|
||||||
exn->data = (void*) pgf_parsing_last_token(ps, exn->pool);
|
exn->data = (void*) pgf_parsing_new_exception(ps, exn->pool);
|
||||||
return NULL;
|
return NULL;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -122,6 +122,13 @@ PGF_API_DECL PgfExprEnum*
|
|||||||
pgf_generate_all(PgfPGF* pgf, PgfType* ty,
|
pgf_generate_all(PgfPGF* pgf, PgfType* ty,
|
||||||
GuExn* err, GuPool* pool, GuPool* out_pool);
|
GuExn* err, GuPool* pool, GuPool* out_pool);
|
||||||
|
|
||||||
|
typedef struct {
|
||||||
|
int incomplete; // equal to !=0 if the sentence is incomplete, 0 otherwise
|
||||||
|
size_t offset;
|
||||||
|
const char* token_ptr;
|
||||||
|
size_t token_len;
|
||||||
|
} PgfParseError;
|
||||||
|
|
||||||
PGF_API_DECL PgfExprEnum*
|
PGF_API_DECL PgfExprEnum*
|
||||||
pgf_parse(PgfConcr* concr, PgfType* typ, GuString sentence,
|
pgf_parse(PgfConcr* concr, PgfType* typ, GuString sentence,
|
||||||
GuExn* err, GuPool* pool, GuPool* out_pool);
|
GuExn* err, GuPool* pool, GuPool* out_pool);
|
||||||
|
|||||||
@@ -63,7 +63,7 @@ module PGF2 (-- * PGF
|
|||||||
|
|
||||||
alignWords,
|
alignWords,
|
||||||
-- ** Parsing
|
-- ** Parsing
|
||||||
parse, parseWithHeuristics,
|
ParseOutput(..), parse, parseWithHeuristics,
|
||||||
-- ** Sentence Lookup
|
-- ** Sentence Lookup
|
||||||
lookupSentence,
|
lookupSentence,
|
||||||
-- ** Generation
|
-- ** Generation
|
||||||
@@ -480,7 +480,15 @@ getAnalysis ref self c_lemma c_anal prob exn = do
|
|||||||
anal <- peekUtf8CString c_anal
|
anal <- peekUtf8CString c_anal
|
||||||
writeIORef ref ((lemma, anal, prob):ans)
|
writeIORef ref ((lemma, anal, prob):ans)
|
||||||
|
|
||||||
parse :: Concr -> Type -> String -> Either String [(Expr,Float)]
|
-- | This data type encodes the different outcomes which you could get from the parser.
|
||||||
|
data ParseOutput
|
||||||
|
= ParseFailed Int String -- ^ The integer is the position in number of unicode characters where the parser failed.
|
||||||
|
-- The string is the token where the parser have failed.
|
||||||
|
| ParseOk [(Expr,Float)] -- ^ If the parsing and the type checking are successful we get a list of abstract syntax trees.
|
||||||
|
-- The list should be non-empty.
|
||||||
|
| ParseIncomplete -- ^ The sentence is not complete.
|
||||||
|
|
||||||
|
parse :: Concr -> Type -> String -> ParseOutput
|
||||||
parse lang ty sent = parseWithHeuristics lang ty sent (-1.0) []
|
parse lang ty sent = parseWithHeuristics lang ty sent (-1.0) []
|
||||||
|
|
||||||
parseWithHeuristics :: Concr -- ^ the language with which we parse
|
parseWithHeuristics :: Concr -- ^ the language with which we parse
|
||||||
@@ -497,7 +505,7 @@ parseWithHeuristics :: Concr -- ^ the language with which we parse
|
|||||||
-- the input sentence; the current offset in the sentence.
|
-- the input sentence; the current offset in the sentence.
|
||||||
-- If a literal has been recognized then the output should
|
-- If a literal has been recognized then the output should
|
||||||
-- be Just (expr,probability,end_offset)
|
-- be Just (expr,probability,end_offset)
|
||||||
-> Either String [(Expr,Float)]
|
-> ParseOutput
|
||||||
parseWithHeuristics lang (Type ctype _) sent heuristic callbacks =
|
parseWithHeuristics lang (Type ctype _) sent heuristic callbacks =
|
||||||
unsafePerformIO $
|
unsafePerformIO $
|
||||||
do exprPl <- gu_new_pool
|
do exprPl <- gu_new_pool
|
||||||
@@ -510,11 +518,19 @@ parseWithHeuristics lang (Type ctype _) sent heuristic callbacks =
|
|||||||
if failed
|
if failed
|
||||||
then do is_parse_error <- gu_exn_caught exn gu_exn_type_PgfParseError
|
then do is_parse_error <- gu_exn_caught exn gu_exn_type_PgfParseError
|
||||||
if is_parse_error
|
if is_parse_error
|
||||||
then do c_tok <- (#peek GuExn, data.data) exn
|
then do c_err <- (#peek GuExn, data.data) exn
|
||||||
tok <- peekUtf8CString c_tok
|
c_incomplete <- (#peek PgfParseError, incomplete) c_err
|
||||||
gu_pool_free parsePl
|
if (c_incomplete :: CInt) == 0
|
||||||
gu_pool_free exprPl
|
then do c_offset <- (#peek PgfParseError, offset) c_err
|
||||||
return (Left tok)
|
token_ptr <- (#peek PgfParseError, token_ptr) c_err
|
||||||
|
token_len <- (#peek PgfParseError, token_len) c_err
|
||||||
|
tok <- peekUtf8CStringLen token_ptr token_len
|
||||||
|
gu_pool_free parsePl
|
||||||
|
gu_pool_free exprPl
|
||||||
|
return (ParseFailed (fromIntegral (c_offset :: CInt)) tok)
|
||||||
|
else do gu_pool_free parsePl
|
||||||
|
gu_pool_free exprPl
|
||||||
|
return ParseIncomplete
|
||||||
else do is_exn <- gu_exn_caught exn gu_exn_type_PgfExn
|
else do is_exn <- gu_exn_caught exn gu_exn_type_PgfExn
|
||||||
if is_exn
|
if is_exn
|
||||||
then do c_msg <- (#peek GuExn, data.data) exn
|
then do c_msg <- (#peek GuExn, data.data) exn
|
||||||
@@ -528,7 +544,7 @@ parseWithHeuristics lang (Type ctype _) sent heuristic callbacks =
|
|||||||
else do parseFPl <- newForeignPtr gu_pool_finalizer parsePl
|
else do parseFPl <- newForeignPtr gu_pool_finalizer parsePl
|
||||||
exprFPl <- newForeignPtr gu_pool_finalizer exprPl
|
exprFPl <- newForeignPtr gu_pool_finalizer exprPl
|
||||||
exprs <- fromPgfExprEnum enum parseFPl (touchConcr lang >> touchForeignPtr exprFPl)
|
exprs <- fromPgfExprEnum enum parseFPl (touchConcr lang >> touchForeignPtr exprFPl)
|
||||||
return (Right exprs)
|
return (ParseOk exprs)
|
||||||
|
|
||||||
mkCallbacksMap :: Ptr PgfConcr -> [(String, Int -> Int -> Maybe (Expr,Float,Int))] -> Ptr GuPool -> IO (Ptr PgfCallbacksMap)
|
mkCallbacksMap :: Ptr PgfConcr -> [(String, Int -> Int -> Maybe (Expr,Float,Int))] -> Ptr GuPool -> IO (Ptr PgfCallbacksMap)
|
||||||
mkCallbacksMap concr callbacks pool = do
|
mkCallbacksMap concr callbacks pool = do
|
||||||
@@ -595,7 +611,7 @@ parseWithOracle :: Concr -- ^ the language with which we parse
|
|||||||
-> Cat -- ^ the start category
|
-> Cat -- ^ the start category
|
||||||
-> String -- ^ the input sentence
|
-> String -- ^ the input sentence
|
||||||
-> Oracle
|
-> Oracle
|
||||||
-> Either String [(Expr,Float)]
|
-> ParseOutput
|
||||||
parseWithOracle lang cat sent (predict,complete,literal) =
|
parseWithOracle lang cat sent (predict,complete,literal) =
|
||||||
unsafePerformIO $
|
unsafePerformIO $
|
||||||
do parsePl <- gu_new_pool
|
do parsePl <- gu_new_pool
|
||||||
@@ -612,11 +628,19 @@ parseWithOracle lang cat sent (predict,complete,literal) =
|
|||||||
if failed
|
if failed
|
||||||
then do is_parse_error <- gu_exn_caught exn gu_exn_type_PgfParseError
|
then do is_parse_error <- gu_exn_caught exn gu_exn_type_PgfParseError
|
||||||
if is_parse_error
|
if is_parse_error
|
||||||
then do c_tok <- (#peek GuExn, data.data) exn
|
then do c_err <- (#peek GuExn, data.data) exn
|
||||||
tok <- peekUtf8CString c_tok
|
c_incomplete <- (#peek PgfParseError, incomplete) c_err
|
||||||
gu_pool_free parsePl
|
if (c_incomplete :: CInt) == 0
|
||||||
gu_pool_free exprPl
|
then do c_offset <- (#peek PgfParseError, offset) c_err
|
||||||
return (Left tok)
|
token_ptr <- (#peek PgfParseError, token_ptr) c_err
|
||||||
|
token_len <- (#peek PgfParseError, token_len) c_err
|
||||||
|
tok <- peekUtf8CStringLen token_ptr token_len
|
||||||
|
gu_pool_free parsePl
|
||||||
|
gu_pool_free exprPl
|
||||||
|
return (ParseFailed (fromIntegral (c_offset :: CInt)) tok)
|
||||||
|
else do gu_pool_free parsePl
|
||||||
|
gu_pool_free exprPl
|
||||||
|
return ParseIncomplete
|
||||||
else do is_exn <- gu_exn_caught exn gu_exn_type_PgfExn
|
else do is_exn <- gu_exn_caught exn gu_exn_type_PgfExn
|
||||||
if is_exn
|
if is_exn
|
||||||
then do c_msg <- (#peek GuExn, data.data) exn
|
then do c_msg <- (#peek GuExn, data.data) exn
|
||||||
@@ -630,7 +654,7 @@ parseWithOracle lang cat sent (predict,complete,literal) =
|
|||||||
else do parseFPl <- newForeignPtr gu_pool_finalizer parsePl
|
else do parseFPl <- newForeignPtr gu_pool_finalizer parsePl
|
||||||
exprFPl <- newForeignPtr gu_pool_finalizer exprPl
|
exprFPl <- newForeignPtr gu_pool_finalizer exprPl
|
||||||
exprs <- fromPgfExprEnum enum parseFPl (touchConcr lang >> touchForeignPtr exprFPl)
|
exprs <- fromPgfExprEnum enum parseFPl (touchConcr lang >> touchForeignPtr exprFPl)
|
||||||
return (Right exprs)
|
return (ParseOk exprs)
|
||||||
where
|
where
|
||||||
oracleWrapper oracle catPtr lblPtr offset = do
|
oracleWrapper oracle catPtr lblPtr offset = do
|
||||||
cat <- peekUtf8CString catPtr
|
cat <- peekUtf8CString catPtr
|
||||||
|
|||||||
@@ -2,7 +2,7 @@
|
|||||||
|
|
||||||
module PGF2.FFI where
|
module PGF2.FFI where
|
||||||
|
|
||||||
import Foreign ( alloca, poke )
|
import Foreign ( alloca, peek, poke )
|
||||||
import Foreign.C
|
import Foreign.C
|
||||||
import Foreign.Ptr
|
import Foreign.Ptr
|
||||||
import Foreign.ForeignPtr
|
import Foreign.ForeignPtr
|
||||||
@@ -116,6 +116,19 @@ peekUtf8CString ptr =
|
|||||||
else do cs <- decode pptr
|
else do cs <- decode pptr
|
||||||
return (((toEnum . fromEnum) x) : cs)
|
return (((toEnum . fromEnum) x) : cs)
|
||||||
|
|
||||||
|
peekUtf8CStringLen :: CString -> CInt -> IO String
|
||||||
|
peekUtf8CStringLen ptr len =
|
||||||
|
alloca $ \pptr ->
|
||||||
|
poke pptr ptr >> decode pptr (ptr `plusPtr` fromIntegral len)
|
||||||
|
where
|
||||||
|
decode pptr end = do
|
||||||
|
ptr <- peek pptr
|
||||||
|
if ptr >= end
|
||||||
|
then return []
|
||||||
|
else do x <- gu_utf8_decode pptr
|
||||||
|
cs <- decode pptr end
|
||||||
|
return (((toEnum . fromEnum) x) : cs)
|
||||||
|
|
||||||
newUtf8CString :: String -> Ptr GuPool -> IO CString
|
newUtf8CString :: String -> Ptr GuPool -> IO CString
|
||||||
newUtf8CString s pool = do
|
newUtf8CString s pool = do
|
||||||
-- An UTF8 character takes up to 6 bytes. We allocate enough
|
-- An UTF8 character takes up to 6 bytes. We allocate enough
|
||||||
|
|||||||
@@ -37,18 +37,18 @@ execute cmd =
|
|||||||
P lang s -> do pgf <- gets fst
|
P lang s -> do pgf <- gets fst
|
||||||
c <- getConcr' pgf lang
|
c <- getConcr' pgf lang
|
||||||
case parse c (startCat pgf) s of
|
case parse c (startCat pgf) s of
|
||||||
Left tok -> do put (pgf,[])
|
ParseFailed _ tok -> do put (pgf,[])
|
||||||
putln ("Parse error: "++tok)
|
putln ("Parse error: "++tok)
|
||||||
Right ts -> do put (pgf,map show ts)
|
ParseOk ts -> do put (pgf,map show ts)
|
||||||
pop
|
pop
|
||||||
T from to s -> do pgf <- gets fst
|
T from to s -> do pgf <- gets fst
|
||||||
cfrom <- getConcr' pgf from
|
cfrom <- getConcr' pgf from
|
||||||
cto <- getConcr' pgf to
|
cto <- getConcr' pgf to
|
||||||
case parse cfrom (startCat pgf) s of
|
case parse cfrom (startCat pgf) s of
|
||||||
Left tok -> do put (pgf,[])
|
ParseFailed _ tok -> do put (pgf,[])
|
||||||
putln ("Parse error: "++tok)
|
putln ("Parse error: "++tok)
|
||||||
Right ts -> do put (pgf,map (linearize cto.fst) ts)
|
ParseOk ts -> do put (pgf,map (linearize cto.fst) ts)
|
||||||
pop
|
pop
|
||||||
I path -> do pgf <- liftIO (readPGF path)
|
I path -> do pgf <- liftIO (readPGF path)
|
||||||
putln . unwords . M.keys $ languages pgf
|
putln . unwords . M.keys $ languages pgf
|
||||||
put (pgf,[])
|
put (pgf,[])
|
||||||
|
|||||||
@@ -34,10 +34,8 @@ gu2j_string(JNIEnv *env, GuString s) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
JPGF_INTERNAL jstring
|
JPGF_INTERNAL jstring
|
||||||
gu2j_string_buf(JNIEnv *env, GuStringBuf* sbuf) {
|
gu2j_string_len(JNIEnv *env, const char* s, size_t len) {
|
||||||
const char* s = gu_string_buf_data(sbuf);
|
|
||||||
const char* utf8 = s;
|
const char* utf8 = s;
|
||||||
size_t len = gu_string_buf_length(sbuf);
|
|
||||||
|
|
||||||
jchar* utf16 = alloca(len*sizeof(jchar));
|
jchar* utf16 = alloca(len*sizeof(jchar));
|
||||||
jchar* dst = utf16;
|
jchar* dst = utf16;
|
||||||
@@ -56,6 +54,11 @@ gu2j_string_buf(JNIEnv *env, GuStringBuf* sbuf) {
|
|||||||
return (*env)->NewString(env, utf16, dst-utf16);
|
return (*env)->NewString(env, utf16, dst-utf16);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
JPGF_INTERNAL jstring
|
||||||
|
gu2j_string_buf(JNIEnv *env, GuStringBuf* sbuf) {
|
||||||
|
return gu2j_string_len(env, gu_string_buf_data(sbuf), gu_string_buf_length(sbuf));
|
||||||
|
}
|
||||||
|
|
||||||
JPGF_INTERNAL GuString
|
JPGF_INTERNAL GuString
|
||||||
j2gu_string(JNIEnv *env, jstring s, GuPool* pool) {
|
j2gu_string(JNIEnv *env, jstring s, GuPool* pool) {
|
||||||
GuString str = (*env)->GetStringUTFChars(env, s, 0);
|
GuString str = (*env)->GetStringUTFChars(env, s, 0);
|
||||||
|
|||||||
@@ -20,6 +20,9 @@
|
|||||||
JPGF_INTERNAL_DECL jstring
|
JPGF_INTERNAL_DECL jstring
|
||||||
gu2j_string(JNIEnv *env, GuString s);
|
gu2j_string(JNIEnv *env, GuString s);
|
||||||
|
|
||||||
|
JPGF_INTERNAL_DECL jstring
|
||||||
|
gu2j_string_len(JNIEnv *env, const char* s, size_t len);
|
||||||
|
|
||||||
JPGF_INTERNAL_DECL jstring
|
JPGF_INTERNAL_DECL jstring
|
||||||
gu2j_string_buf(JNIEnv *env, GuStringBuf* sbuf);
|
gu2j_string_buf(JNIEnv *env, GuStringBuf* sbuf);
|
||||||
|
|
||||||
|
|||||||
@@ -591,6 +591,30 @@ JNIEXPORT void JNICALL Java_org_grammaticalframework_pgf_Parser_addLiteralCallba
|
|||||||
j2gu_string(env, jcat, pool), &callback->callback);
|
j2gu_string(env, jcat, pool), &callback->callback);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static void
|
||||||
|
throw_parse_error(JNIEnv *env, PgfParseError* err)
|
||||||
|
{
|
||||||
|
jstring jtoken;
|
||||||
|
if (err->incomplete)
|
||||||
|
jtoken = NULL;
|
||||||
|
else {
|
||||||
|
jtoken = gu2j_string_len(env, err->token_ptr, err->token_len);
|
||||||
|
if (!jtoken)
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
jclass exception_class = (*env)->FindClass(env, "org/grammaticalframework/pgf/ParseError");
|
||||||
|
if (!exception_class)
|
||||||
|
return;
|
||||||
|
jmethodID constrId = (*env)->GetMethodID(env, exception_class, "<init>", "(Ljava/lang/String;IZ)V");
|
||||||
|
if (!constrId)
|
||||||
|
return;
|
||||||
|
jobject exception = (*env)->NewObject(env, exception_class, constrId, jtoken, err->offset, err->incomplete);
|
||||||
|
if (!exception)
|
||||||
|
return;
|
||||||
|
(*env)->Throw(env, exception);
|
||||||
|
}
|
||||||
|
|
||||||
JNIEXPORT jobject JNICALL
|
JNIEXPORT jobject JNICALL
|
||||||
Java_org_grammaticalframework_pgf_Parser_parseWithHeuristics
|
Java_org_grammaticalframework_pgf_Parser_parseWithHeuristics
|
||||||
(JNIEnv* env, jclass clazz, jobject jconcr, jstring jstartCat, jstring js, jdouble heuristics, jlong callbacksRef, jobject jpool)
|
(JNIEnv* env, jclass clazz, jobject jconcr, jstring jstartCat, jstring js, jdouble heuristics, jlong callbacksRef, jobject jpool)
|
||||||
@@ -615,8 +639,7 @@ Java_org_grammaticalframework_pgf_Parser_parseWithHeuristics
|
|||||||
GuString msg = (GuString) gu_exn_caught_data(parse_err);
|
GuString msg = (GuString) gu_exn_caught_data(parse_err);
|
||||||
throw_string_exception(env, "org/grammaticalframework/pgf/PGFError", msg);
|
throw_string_exception(env, "org/grammaticalframework/pgf/PGFError", msg);
|
||||||
} else if (gu_exn_caught(parse_err, PgfParseError)) {
|
} else if (gu_exn_caught(parse_err, PgfParseError)) {
|
||||||
GuString tok = (GuString) gu_exn_caught_data(parse_err);
|
throw_parse_error(env, (PgfParseError*) gu_exn_caught_data(parse_err));
|
||||||
throw_string_exception(env, "org/grammaticalframework/pgf/ParseError", tok);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
gu_pool_free(out_pool);
|
gu_pool_free(out_pool);
|
||||||
@@ -656,8 +679,7 @@ Java_org_grammaticalframework_pgf_Completer_complete(JNIEnv* env, jclass clazz,
|
|||||||
GuString msg = (GuString) gu_exn_caught_data(parse_err);
|
GuString msg = (GuString) gu_exn_caught_data(parse_err);
|
||||||
throw_string_exception(env, "org/grammaticalframework/pgf/PGFError", msg);
|
throw_string_exception(env, "org/grammaticalframework/pgf/PGFError", msg);
|
||||||
} else if (gu_exn_caught(parse_err, PgfParseError)) {
|
} else if (gu_exn_caught(parse_err, PgfParseError)) {
|
||||||
GuString tok = (GuString) gu_exn_caught_data(parse_err);
|
throw_parse_error(env, (PgfParseError*) gu_exn_caught_data(parse_err));
|
||||||
throw_string_exception(env, "org/grammaticalframework/pgf/ParseError", tok);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
gu_pool_free(pool);
|
gu_pool_free(pool);
|
||||||
|
|||||||
@@ -4,11 +4,26 @@ package org.grammaticalframework.pgf;
|
|||||||
public class ParseError extends Exception {
|
public class ParseError extends Exception {
|
||||||
private static final long serialVersionUID = -6086991674218306569L;
|
private static final long serialVersionUID = -6086991674218306569L;
|
||||||
|
|
||||||
public ParseError(String token) {
|
private String token;
|
||||||
super(token);
|
private int offset;
|
||||||
|
private boolean incomplete;
|
||||||
|
|
||||||
|
public ParseError(String token, int offset, boolean incomplete) {
|
||||||
|
super(incomplete ? "The sentence is incomplete" : "Unexpected token: \""+token+"\"");
|
||||||
|
this.token = token;
|
||||||
|
this.offset = offset;
|
||||||
|
this.incomplete = incomplete;
|
||||||
|
}
|
||||||
|
|
||||||
|
public String getToken() {
|
||||||
|
return token;
|
||||||
|
}
|
||||||
|
|
||||||
|
public int getOffset() {
|
||||||
|
return offset;
|
||||||
}
|
}
|
||||||
|
|
||||||
public String getToken() {
|
public boolean isIncomplete() {
|
||||||
return getMessage();
|
return incomplete;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -1545,13 +1545,28 @@ Concr_parse(ConcrObject* self, PyObject *args, PyObject *keywds)
|
|||||||
GuString msg = (GuString) gu_exn_caught_data(parse_err);
|
GuString msg = (GuString) gu_exn_caught_data(parse_err);
|
||||||
PyErr_SetString(PGFError, msg);
|
PyErr_SetString(PGFError, msg);
|
||||||
} else if (gu_exn_caught(parse_err, PgfParseError)) {
|
} else if (gu_exn_caught(parse_err, PgfParseError)) {
|
||||||
GuString tok = (GuString) gu_exn_caught_data(parse_err);
|
PgfParseError* err = (PgfParseError*) gu_exn_caught_data(parse_err);
|
||||||
PyObject* py_tok = PyString_FromString(tok);
|
PyObject* py_offset = PyInt_FromLong(err->offset);
|
||||||
PyObject_SetAttrString(ParseError, "token", py_tok);
|
if (err->incomplete) {
|
||||||
PyErr_Format(ParseError, "Unexpected token: \"%s\"", tok);
|
PyObject_SetAttrString(ParseError, "incomplete", Py_True);
|
||||||
Py_DECREF(py_tok);
|
PyObject_SetAttrString(ParseError, "offset", py_offset);
|
||||||
|
PyErr_Format(ParseError, "The sentence is incomplete");
|
||||||
|
} else {
|
||||||
|
PyObject* py_tok = PyString_FromStringAndSize(err->token_ptr,
|
||||||
|
err->token_len);
|
||||||
|
PyObject_SetAttrString(ParseError, "incomplete", Py_False);
|
||||||
|
PyObject_SetAttrString(ParseError, "offset", py_offset);
|
||||||
|
PyObject_SetAttrString(ParseError, "token", py_tok);
|
||||||
|
#if PY_MAJOR_VERSION >= 3
|
||||||
|
PyErr_Format(ParseError, "Unexpected token: \"%U\"", py_tok);
|
||||||
|
#else
|
||||||
|
PyErr_Format(ParseError, "Unexpected token: \"%s\"", PyString_AsString(py_tok));
|
||||||
|
#endif
|
||||||
|
Py_DECREF(py_tok);
|
||||||
|
}
|
||||||
|
Py_DECREF(py_offset);
|
||||||
}
|
}
|
||||||
|
|
||||||
Py_DECREF(pyres);
|
Py_DECREF(pyres);
|
||||||
pyres = NULL;
|
pyres = NULL;
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -191,10 +191,11 @@ cpgfMain qsem command (t,(pgf,pc)) =
|
|||||||
|
|
||||||
-- Without caching parse results:
|
-- Without caching parse results:
|
||||||
parse' start mlimit ((from,concr),input) =
|
parse' start mlimit ((from,concr),input) =
|
||||||
return $ maybe id take mlimit . drop start # cparse
|
case C.parseWithHeuristics concr cat input (-1) callbacks of
|
||||||
|
C.ParseOk ts -> return (Right (maybe id take mlimit (drop start ts)))
|
||||||
|
C.ParseFailed _ tok -> return (Left tok)
|
||||||
|
C.ParseIncomplete -> return (Left "")
|
||||||
where
|
where
|
||||||
--cparse = C.parse concr cat input
|
|
||||||
cparse = C.parseWithHeuristics concr cat input (-1) callbacks
|
|
||||||
callbacks = maybe [] cb $ lookup (C.abstractName pgf) C.literalCallbacks
|
callbacks = maybe [] cb $ lookup (C.abstractName pgf) C.literalCallbacks
|
||||||
cb fs = [(cat,f pgf (from,concr) input)|(cat,f)<-fs]
|
cb fs = [(cat,f pgf (from,concr) input)|(cat,f)<-fs]
|
||||||
{-
|
{-
|
||||||
@@ -277,8 +278,9 @@ cpgfMain qsem command (t,(pgf,pc)) =
|
|||||||
| isUpper c -> toLower c : cs
|
| isUpper c -> toLower c : cs
|
||||||
s -> s
|
s -> s
|
||||||
|
|
||||||
parse1 = either (const Nothing) (fmap fst . listToMaybe) .
|
parse1 s = case C.parse concr cat s of
|
||||||
C.parse concr cat
|
C.ParseOk ((t,_):ts) -> Just t
|
||||||
|
_ -> Nothing
|
||||||
morph w = listToMaybe
|
morph w = listToMaybe
|
||||||
[t | (f,a,p)<-C.lookupMorpho concr w,
|
[t | (f,a,p)<-C.lookupMorpho concr w,
|
||||||
t<-maybeToList (C.readExpr f)]
|
t<-maybeToList (C.readExpr f)]
|
||||||
|
|||||||
Reference in New Issue
Block a user