The APIs for callbacks and the bracketed strings now use a string for the analysis intead of an integer. This is now consistent with lookupMorpho and friends

This commit is contained in:
krangelov
2020-03-05 11:58:21 +01:00
parent 3133900125
commit f22bd70585
19 changed files with 104 additions and 98 deletions

View File

@@ -142,14 +142,14 @@ pgf_aligner_lzn_symbol_token(PgfLinFuncs** funcs, PgfToken tok)
} }
static void static void
pgf_aligner_lzn_begin_phrase(PgfLinFuncs** funcs, PgfCId cat, int fid, size_t lindex, PgfCId fun) pgf_aligner_lzn_begin_phrase(PgfLinFuncs** funcs, PgfCId cat, int fid, GuString ann, PgfCId fun)
{ {
PgfAlignerLin* alin = gu_container(funcs, PgfAlignerLin, funcs); PgfAlignerLin* alin = gu_container(funcs, PgfAlignerLin, funcs);
gu_buf_push(alin->parent_stack, int, fid); gu_buf_push(alin->parent_stack, int, fid);
} }
static void static void
pgf_aligner_lzn_end_phrase(PgfLinFuncs** funcs, PgfCId cat, int fid, size_t lindex, PgfCId fun) pgf_aligner_lzn_end_phrase(PgfLinFuncs** funcs, PgfCId cat, int fid, GuString ann, PgfCId fun)
{ {
PgfAlignerLin* alin = gu_container(funcs, PgfAlignerLin, funcs); PgfAlignerLin* alin = gu_container(funcs, PgfAlignerLin, funcs);
gu_buf_pop(alin->parent_stack, int); gu_buf_pop(alin->parent_stack, int);

View File

@@ -155,7 +155,7 @@ pgf_bracket_lzn_symbol_token(PgfLinFuncs** funcs, PgfToken tok)
} }
static void static void
pgf_bracket_lzn_begin_phrase(PgfLinFuncs** funcs, PgfCId cat, int fid, size_t lindex, PgfCId fun) pgf_bracket_lzn_begin_phrase(PgfLinFuncs** funcs, PgfCId cat, int fid, GuString ann, PgfCId fun)
{ {
PgfBracketLznState* state = gu_container(funcs, PgfBracketLznState, funcs); PgfBracketLznState* state = gu_container(funcs, PgfBracketLznState, funcs);

View File

@@ -606,7 +606,7 @@ typedef struct {
PgfLzrCachedTag tag; PgfLzrCachedTag tag;
PgfCId cat; PgfCId cat;
int fid; int fid;
int lin_idx; GuString ann;
PgfCId fun; PgfCId fun;
} PgfLzrCached; } PgfLzrCached;
@@ -644,7 +644,7 @@ pgf_lzr_cache_flush(PgfLzrCache* cache, PgfSymbols* form)
cache->lzr->funcs, cache->lzr->funcs,
event->cat, event->cat,
event->fid, event->fid,
event->lin_idx, event->ann,
event->fun); event->fun);
} }
break; break;
@@ -654,7 +654,7 @@ pgf_lzr_cache_flush(PgfLzrCache* cache, PgfSymbols* form)
cache->lzr->funcs, cache->lzr->funcs,
event->cat, event->cat,
event->fid, event->fid,
event->lin_idx, event->ann,
event->fun); event->fun);
} }
break; break;
@@ -709,27 +709,27 @@ found:
} }
static void static void
pgf_lzr_cache_begin_phrase(PgfLinFuncs** funcs, PgfCId cat, int fid, size_t lin_idx, PgfCId fun) pgf_lzr_cache_begin_phrase(PgfLinFuncs** funcs, PgfCId cat, int fid, GuString ann, PgfCId fun)
{ {
PgfLzrCache* cache = gu_container(funcs, PgfLzrCache, funcs); PgfLzrCache* cache = gu_container(funcs, PgfLzrCache, funcs);
PgfLzrCached* event = gu_buf_extend(cache->events); PgfLzrCached* event = gu_buf_extend(cache->events);
event->tag = PGF_CACHED_BEGIN; event->tag = PGF_CACHED_BEGIN;
event->cat = cat; event->cat = cat;
event->fid = fid; event->fid = fid;
event->lin_idx = lin_idx; event->ann = ann;
event->fun = fun; event->fun = fun;
} }
static void static void
pgf_lzr_cache_end_phrase(PgfLinFuncs** funcs, PgfCId cat, int fid, size_t lin_idx, PgfCId fun) pgf_lzr_cache_end_phrase(PgfLinFuncs** funcs, PgfCId cat, int fid, GuString ann, PgfCId fun)
{ {
PgfLzrCache* cache = gu_container(funcs, PgfLzrCache, funcs); PgfLzrCache* cache = gu_container(funcs, PgfLzrCache, funcs);
PgfLzrCached* event = gu_buf_extend(cache->events); PgfLzrCached* event = gu_buf_extend(cache->events);
event->tag = PGF_CACHED_END; event->tag = PGF_CACHED_END;
event->cat = cat; event->cat = cat;
event->fid = fid; event->fid = fid;
event->lin_idx = lin_idx; event->ann = ann;
event->fun = fun; event->fun = fun;
} }
static void static void
@@ -918,7 +918,7 @@ pgf_lzr_linearize_tree(PgfLzr* lzr, PgfCncTree ctree, size_t lin_idx)
if ((*lzr->funcs)->begin_phrase && fapp->ccat != NULL) { if ((*lzr->funcs)->begin_phrase && fapp->ccat != NULL) {
(*lzr->funcs)->begin_phrase(lzr->funcs, (*lzr->funcs)->begin_phrase(lzr->funcs,
fapp->ccat->cnccat->abscat->name, fapp->ccat->cnccat->abscat->name,
fapp->fid, lin_idx, fapp->fid, fapp->ccat->cnccat->labels[lin_idx],
fapp->abs_id); fapp->abs_id);
} }
@@ -928,7 +928,7 @@ pgf_lzr_linearize_tree(PgfLzr* lzr, PgfCncTree ctree, size_t lin_idx)
if ((*lzr->funcs)->end_phrase && fapp->ccat != NULL) { if ((*lzr->funcs)->end_phrase && fapp->ccat != NULL) {
(*lzr->funcs)->end_phrase(lzr->funcs, (*lzr->funcs)->end_phrase(lzr->funcs,
fapp->ccat->cnccat->abscat->name, fapp->ccat->cnccat->abscat->name,
fapp->fid, lin_idx, fapp->fid, fapp->ccat->cnccat->labels[lin_idx],
fapp->abs_id); fapp->abs_id);
} }
break; break;
@@ -957,7 +957,7 @@ pgf_lzr_linearize_tree(PgfLzr* lzr, PgfCncTree ctree, size_t lin_idx)
if ((*lzr->funcs)->begin_phrase && flit->fid >= 0) { if ((*lzr->funcs)->begin_phrase && flit->fid >= 0) {
(*lzr->funcs)->begin_phrase(lzr->funcs, (*lzr->funcs)->begin_phrase(lzr->funcs,
cat, flit->fid, 0, cat, flit->fid, "s",
""); "");
} }
@@ -989,7 +989,7 @@ pgf_lzr_linearize_tree(PgfLzr* lzr, PgfCncTree ctree, size_t lin_idx)
if ((*lzr->funcs)->end_phrase && flit->fid >= 0) { if ((*lzr->funcs)->end_phrase && flit->fid >= 0) {
(*lzr->funcs)->end_phrase(lzr->funcs, (*lzr->funcs)->end_phrase(lzr->funcs,
cat, flit->fid, 0, cat, flit->fid, "s",
""); "");
} }

View File

@@ -84,10 +84,10 @@ struct PgfLinFuncs
void (*symbol_token)(PgfLinFuncs** self, PgfToken tok); void (*symbol_token)(PgfLinFuncs** self, PgfToken tok);
/// Begin phrase /// Begin phrase
void (*begin_phrase)(PgfLinFuncs** self, PgfCId cat, int fid, size_t lindex, PgfCId fun); void (*begin_phrase)(PgfLinFuncs** self, PgfCId cat, int fid, GuString ann, PgfCId fun);
/// End phrase /// End phrase
void (*end_phrase)(PgfLinFuncs** self, PgfCId cat, int fid, size_t lindex, PgfCId fun); void (*end_phrase)(PgfLinFuncs** self, PgfCId cat, int fid, GuString ann, PgfCId fun);
/// handling nonExist /// handling nonExist
void (*symbol_ne)(PgfLinFuncs** self); void (*symbol_ne)(PgfLinFuncs** self);

View File

@@ -6,11 +6,12 @@
static PgfExprProb* static PgfExprProb*
pgf_match_string_lit(PgfLiteralCallback* self, PgfConcr* concr, pgf_match_string_lit(PgfLiteralCallback* self, PgfConcr* concr,
size_t lin_idx, GuString ann,
GuString sentence, size_t* poffset, GuString sentence, size_t* poffset,
GuPool *out_pool) GuPool *out_pool)
{ {
gu_assert(lin_idx == 0); if (strcmp("ann","s") != 0)
return NULL;
const uint8_t* buf = (uint8_t*) (sentence + *poffset); const uint8_t* buf = (uint8_t*) (sentence + *poffset);
const uint8_t* p = buf; const uint8_t* p = buf;
@@ -51,7 +52,7 @@ pgf_predict_empty_next(GuEnum* self, void* to, GuPool* pool)
static GuEnum* static GuEnum*
pgf_predict_empty(PgfLiteralCallback* self, PgfConcr* concr, pgf_predict_empty(PgfLiteralCallback* self, PgfConcr* concr,
size_t lin_idx, GuString ann,
GuString prefix, GuString prefix,
GuPool *out_pool) GuPool *out_pool)
{ {
@@ -67,11 +68,12 @@ static PgfLiteralCallback pgf_string_literal_callback =
static PgfExprProb* static PgfExprProb*
pgf_match_int_lit(PgfLiteralCallback* self, PgfConcr* concr, pgf_match_int_lit(PgfLiteralCallback* self, PgfConcr* concr,
size_t lin_idx, GuString ann,
GuString sentence, size_t* poffset, GuString sentence, size_t* poffset,
GuPool *out_pool) GuPool *out_pool)
{ {
gu_assert(lin_idx == 0); if (strcmp("ann","s") != 0)
return NULL;
const uint8_t* buf = (uint8_t*) (sentence + *poffset); const uint8_t* buf = (uint8_t*) (sentence + *poffset);
const uint8_t* p = buf; const uint8_t* p = buf;
@@ -121,11 +123,12 @@ static PgfLiteralCallback pgf_int_literal_callback =
static PgfExprProb* static PgfExprProb*
pgf_match_float_lit(PgfLiteralCallback* self, PgfConcr* concr, pgf_match_float_lit(PgfLiteralCallback* self, PgfConcr* concr,
size_t lin_idx, GuString ann,
GuString sentence, size_t* poffset, GuString sentence, size_t* poffset,
GuPool *out_pool) GuPool *out_pool)
{ {
gu_assert(lin_idx == 0); if (strcmp("ann","s") != 0)
return NULL;
const uint8_t* buf = (uint8_t*) (sentence + *poffset); const uint8_t* buf = (uint8_t*) (sentence + *poffset);
const uint8_t* p = buf; const uint8_t* p = buf;
@@ -226,11 +229,11 @@ pgf_match_name_morpho_callback(PgfMorphoCallback* self_,
static PgfExprProb* static PgfExprProb*
pgf_match_name_lit(PgfLiteralCallback* self, PgfConcr* concr, pgf_match_name_lit(PgfLiteralCallback* self, PgfConcr* concr,
size_t lin_idx, GuString ann,
GuString sentence, size_t* poffset, GuString sentence, size_t* poffset,
GuPool *out_pool) GuPool *out_pool)
{ {
if (lin_idx != 0) if (strcmp("ann","s") != 0)
return NULL; return NULL;
GuPool* tmp_pool = gu_local_pool(); GuPool* tmp_pool = gu_local_pool();
@@ -349,7 +352,7 @@ pgf_match_unknown_morpho_callback(PgfMorphoCallback* self_,
static PgfExprProb* static PgfExprProb*
pgf_match_unknown_lit(PgfLiteralCallback* self, PgfConcr* concr, pgf_match_unknown_lit(PgfLiteralCallback* self, PgfConcr* concr,
size_t lin_idx, GuString ann,
GuString sentence, size_t* poffset, GuString sentence, size_t* poffset,
GuPool *out_pool) GuPool *out_pool)
{ {

View File

@@ -876,7 +876,7 @@ pgf_lookup_symbol_token(PgfLinFuncs** self, PgfToken token)
} }
static void static void
pgf_lookup_begin_phrase(PgfLinFuncs** self, PgfCId cat, int fid, size_t lindex, PgfCId funname) pgf_lookup_begin_phrase(PgfLinFuncs** self, PgfCId cat, int fid, GuString ann, PgfCId funname)
{ {
PgfLookupState* st = gu_container(self, PgfLookupState, funcs); PgfLookupState* st = gu_container(self, PgfLookupState, funcs);
@@ -890,7 +890,7 @@ pgf_lookup_begin_phrase(PgfLinFuncs** self, PgfCId cat, int fid, size_t lindex,
} }
static void static void
pgf_lookup_end_phrase(PgfLinFuncs** self, PgfCId cat, int fid, size_t lindex, PgfCId fun) pgf_lookup_end_phrase(PgfLinFuncs** self, PgfCId cat, int fid, GuString ann, PgfCId fun)
{ {
PgfLookupState* st = gu_container(self, PgfLookupState, funcs); PgfLookupState* st = gu_container(self, PgfLookupState, funcs);
st->curr_absfun = NULL; st->curr_absfun = NULL;

View File

@@ -9,7 +9,7 @@
#include <math.h> #include <math.h>
#include <stdlib.h> #include <stdlib.h>
//#define PGF_PARSER_DEBUG #define PGF_PARSER_DEBUG
//#define PGF_COUNTS_DEBUG //#define PGF_COUNTS_DEBUG
//#define PGF_RESULT_DEBUG //#define PGF_RESULT_DEBUG
@@ -1370,7 +1370,7 @@ pgf_parsing_symbol(PgfParsing* ps, PgfItem* item, PgfSymbol sym)
if (callback != NULL) { if (callback != NULL) {
ep = callback->match(callback, ps->concr, ep = callback->match(callback, ps->concr,
slit->r, parg->ccat->cnccat->labels[slit->r],
ps->sentence, &offset, ps->sentence, &offset,
ps->out_pool); ps->out_pool);
} }
@@ -1480,6 +1480,7 @@ pgf_parsing_symbol(PgfParsing* ps, PgfItem* item, PgfSymbol sym)
} }
case PGF_SYMBOL_CAPIT: case PGF_SYMBOL_CAPIT:
case PGF_SYMBOL_ALL_CAPIT: { case PGF_SYMBOL_ALL_CAPIT: {
printf("PGF_SYMBOL_CAPIT\n");
pgf_item_advance(item, ps->pool); pgf_item_advance(item, ps->pool);
pgf_parsing_symbol(ps, item, item->curr_sym); pgf_parsing_symbol(ps, item, item->curr_sym);
break; break;

View File

@@ -6,7 +6,7 @@
typedef struct { typedef struct {
int start, end; int start, end;
PgfCId cat; PgfCId cat;
size_t lin_idx; GuString ann;
} PgfPhrase; } PgfPhrase;
typedef struct { typedef struct {
@@ -46,14 +46,14 @@ pgf_metrics_lzn_symbol_token(PgfLinFuncs** funcs, PgfToken tok)
} }
static void static void
pgf_metrics_lzn_begin_phrase(PgfLinFuncs** funcs, PgfCId cat, int fid, size_t lin_index, PgfCId fun) pgf_metrics_lzn_begin_phrase(PgfLinFuncs** funcs, PgfCId cat, int fid, GuString ann, PgfCId fun)
{ {
PgfMetricsLznState* state = gu_container(funcs, PgfMetricsLznState, funcs); PgfMetricsLznState* state = gu_container(funcs, PgfMetricsLznState, funcs);
gu_buf_push(state->marks, int, state->pos); gu_buf_push(state->marks, int, state->pos);
} }
static void static void
pgf_metrics_lzn_end_phrase1(PgfLinFuncs** funcs, PgfCId cat, int fid, size_t lin_idx, PgfCId fun) pgf_metrics_lzn_end_phrase1(PgfLinFuncs** funcs, PgfCId cat, int fid, GuString ann, PgfCId fun)
{ {
PgfMetricsLznState* state = gu_container(funcs, PgfMetricsLznState, funcs); PgfMetricsLznState* state = gu_container(funcs, PgfMetricsLznState, funcs);
@@ -65,7 +65,7 @@ pgf_metrics_lzn_end_phrase1(PgfLinFuncs** funcs, PgfCId cat, int fid, size_t lin
phrase->start = start; phrase->start = start;
phrase->end = end; phrase->end = end;
phrase->cat = cat; phrase->cat = cat;
phrase->lin_idx = lin_idx; phrase->ann = ann;
gu_buf_push(state->phrases, PgfPhrase*, phrase); gu_buf_push(state->phrases, PgfPhrase*, phrase);
} }
} }
@@ -85,7 +85,7 @@ pgf_metrics_symbol_bind(PgfLinFuncs** funcs)
} }
static void static void
pgf_metrics_lzn_end_phrase2(PgfLinFuncs** funcs, PgfCId cat, int fid, size_t lin_idx, PgfCId fun) pgf_metrics_lzn_end_phrase2(PgfLinFuncs** funcs, PgfCId cat, int fid, GuString ann, PgfCId fun)
{ {
PgfMetricsLznState* state = gu_container(funcs, PgfMetricsLznState, funcs); PgfMetricsLznState* state = gu_container(funcs, PgfMetricsLznState, funcs);
@@ -100,7 +100,7 @@ pgf_metrics_lzn_end_phrase2(PgfLinFuncs** funcs, PgfCId cat, int fid, size_t lin
if (phrase->start == start && if (phrase->start == start &&
phrase->end == end && phrase->end == end &&
strcmp(phrase->cat, cat) == 0 && strcmp(phrase->cat, cat) == 0 &&
phrase->lin_idx == lin_idx) { strcmp(phrase->ann, ann) == 0) {
state->matches++; state->matches++;
break; break;
} }

View File

@@ -254,11 +254,11 @@ typedef struct PgfLiteralCallback PgfLiteralCallback;
struct PgfLiteralCallback { struct PgfLiteralCallback {
PgfExprProb* (*match)(PgfLiteralCallback* self, PgfConcr* concr, PgfExprProb* (*match)(PgfLiteralCallback* self, PgfConcr* concr,
size_t lin_idx, GuString ann,
GuString sentence, size_t* poffset, GuString sentence, size_t* poffset,
GuPool *out_pool); GuPool *out_pool);
GuEnum* (*predict)(PgfLiteralCallback* self, PgfConcr* concr, GuEnum* (*predict)(PgfLiteralCallback* self, PgfConcr* concr,
size_t lin_idx, GuString ann,
GuString prefix, GuString prefix,
GuPool *out_pool); GuPool *out_pool);
}; };

View File

@@ -60,7 +60,7 @@ module PGF2 (-- * PGF
-- ** Linearization -- ** Linearization
linearize,linearizeAll,tabularLinearize,tabularLinearizeAll,bracketedLinearize,bracketedLinearizeAll, linearize,linearizeAll,tabularLinearize,tabularLinearizeAll,bracketedLinearize,bracketedLinearizeAll,
FId, LIndex, BracketedString(..), showBracketedString, flattenBracketedString, FId, BracketedString(..), showBracketedString, flattenBracketedString,
printName, printName,
alignWords, alignWords,
@@ -589,7 +589,7 @@ parseWithHeuristics :: Concr -- ^ the language with which we parse
-- A negative value tells the parser -- A negative value tells the parser
-- to lookup up the default from -- to lookup up the default from
-- the grammar flags -- the grammar flags
-> [(Cat, Int -> Int -> Maybe (Expr,Float,Int))] -> [(Cat, String -> Int -> Maybe (Expr,Float,Int))]
-- ^ a list of callbacks for literal categories. -- ^ a list of callbacks for literal categories.
-- The arguments of the callback are: -- The arguments of the callback are:
-- the index of the constituent for the literal category; -- the index of the constituent for the literal category;
@@ -645,7 +645,7 @@ parseToChart :: Concr -- ^ the language with which we parse
-- A negative value tells the parser -- A negative value tells the parser
-- to lookup up the default from -- to lookup up the default from
-- the grammar flags -- the grammar flags
-> [(Cat, Int -> Int -> Maybe (Expr,Float,Int))] -> [(Cat, String -> Int -> Maybe (Expr,Float,Int))]
-- ^ a list of callbacks for literal categories. -- ^ a list of callbacks for literal categories.
-- The arguments of the callback are: -- The arguments of the callback are:
-- the index of the constituent for the literal category; -- the index of the constituent for the literal category;
@@ -761,7 +761,7 @@ parseToChart lang (Type ctype touchType) sent heuristic callbacks roots =
f <- (#peek PgfParseRange, field) ptr >>= peekCString f <- (#peek PgfParseRange, field) ptr >>= peekCString
return ((fromIntegral :: CSizeT -> Int) s, (fromIntegral :: CSizeT -> Int) e, f) return ((fromIntegral :: CSizeT -> Int) s, (fromIntegral :: CSizeT -> Int) e, f)
mkCallbacksMap :: Ptr PgfConcr -> [(String, Int -> Int -> Maybe (Expr,Float,Int))] -> Ptr GuPool -> IO (Ptr PgfCallbacksMap) mkCallbacksMap :: Ptr PgfConcr -> [(String, String -> Int -> Maybe (Expr,Float,Int))] -> Ptr GuPool -> IO (Ptr PgfCallbacksMap)
mkCallbacksMap concr callbacks pool = do mkCallbacksMap concr callbacks pool = do
callbacks_map <- pgf_new_callbacks_map concr pool callbacks_map <- pgf_new_callbacks_map concr pool
forM_ callbacks $ \(cat,match) -> do forM_ callbacks $ \(cat,match) -> do
@@ -771,9 +771,10 @@ mkCallbacksMap concr callbacks pool = do
hspgf_callbacks_map_add_literal concr callbacks_map ccat match predict pool hspgf_callbacks_map_add_literal concr callbacks_map ccat match predict pool
return callbacks_map return callbacks_map
where where
match_callback match clin_idx poffset out_pool = do match_callback match c_ann poffset out_pool = do
coffset <- peek poffset coffset <- peek poffset
case match (fromIntegral clin_idx) (fromIntegral coffset) of ann <- peekUtf8CString c_ann
case match ann (fromIntegral coffset) of
Nothing -> return nullPtr Nothing -> return nullPtr
Just (e,prob,offset') -> do poke poffset (fromIntegral offset') Just (e,prob,offset') -> do poke poffset (fromIntegral offset')
@@ -1032,15 +1033,13 @@ tabularLinearizeAll lang e = unsafePerformIO $
throwIO (PGFError msg) throwIO (PGFError msg)
else do throwIO (PGFError "The abstract tree cannot be linearized") else do throwIO (PGFError "The abstract tree cannot be linearized")
type LIndex = Int
-- | BracketedString represents a sentence that is linearized -- | BracketedString represents a sentence that is linearized
-- as usual but we also want to retain the ''brackets'' that -- as usual but we also want to retain the ''brackets'' that
-- mark the beginning and the end of each constituent. -- mark the beginning and the end of each constituent.
data BracketedString data BracketedString
= Leaf String -- ^ this is the leaf i.e. a single token = Leaf String -- ^ this is the leaf i.e. a single token
| BIND -- ^ the surrounding tokens must be bound together | BIND -- ^ the surrounding tokens must be bound together
| Bracket CId {-# UNPACK #-} !FId {-# UNPACK #-} !LIndex CId [BracketedString] | Bracket CId {-# UNPACK #-} !FId {-# UNPACK #-} String CId [BracketedString]
-- ^ this is a bracket. The 'CId' is the category of -- ^ this is a bracket. The 'CId' is the category of
-- the phrase. The 'FId' is an unique identifier for -- the phrase. The 'FId' is an unique identifier for
-- every phrase in the sentence. For context-free grammars -- every phrase in the sentence. For context-free grammars
@@ -1049,7 +1048,7 @@ data BracketedString
-- phrases then the identifiers are unique for every phrase but -- phrases then the identifiers are unique for every phrase but
-- not for every bracket since the bracket represents a constituent. -- not for every bracket since the bracket represents a constituent.
-- The different constituents could still be distinguished by using -- The different constituents could still be distinguished by using
-- the constituent index i.e. 'LIndex'. If the grammar is reduplicating -- the analysis string. If the grammar is reduplicating
-- then the constituent indices will be the same for all brackets -- then the constituent indices will be the same for all brackets
-- that represents the same constituent. -- that represents the same constituent.
-- The second 'CId' is the name of the abstract function that generated -- The second 'CId' is the name of the abstract function that generated
@@ -1063,7 +1062,7 @@ showBracketedString = render . ppBracketedString
ppBracketedString (Leaf t) = text t ppBracketedString (Leaf t) = text t
ppBracketedString BIND = text "&+" ppBracketedString BIND = text "&+"
ppBracketedString (Bracket cat fid index _ bss) = parens (text cat <> colon <> int fid <+> hsep (map ppBracketedString bss)) ppBracketedString (Bracket cat fid _ _ bss) = parens (text cat <> colon <> int fid <+> hsep (map ppBracketedString bss))
-- | Extracts the sequence of tokens from the bracketed string -- | Extracts the sequence of tokens from the bracketed string
flattenBracketedString :: BracketedString -> [String] flattenBracketedString :: BracketedString -> [String]
@@ -1161,19 +1160,19 @@ withBracketLinFuncs ref exn f =
token <- peekUtf8CString c_token token <- peekUtf8CString c_token
writeIORef ref (stack,Leaf token : bs) writeIORef ref (stack,Leaf token : bs)
begin_phrase ref _ c_cat c_fid c_lindex c_fun = do begin_phrase ref _ c_cat c_fid c_ann c_fun = do
(stack,bs) <- readIORef ref (stack,bs) <- readIORef ref
writeIORef ref (bs:stack,[]) writeIORef ref (bs:stack,[])
end_phrase ref _ c_cat c_fid c_lindex c_fun = do end_phrase ref _ c_cat c_fid c_ann c_fun = do
(bs':stack,bs) <- readIORef ref (bs':stack,bs) <- readIORef ref
if null bs if null bs
then writeIORef ref (stack, bs') then writeIORef ref (stack, bs')
else do cat <- peekUtf8CString c_cat else do cat <- peekUtf8CString c_cat
let fid = fromIntegral c_fid let fid = fromIntegral c_fid
let lindex = fromIntegral c_lindex ann <- peekUtf8CString c_ann
fun <- peekUtf8CString c_fun fun <- peekUtf8CString c_fun
writeIORef ref (stack, Bracket cat fid lindex fun (reverse bs) : bs') writeIORef ref (stack, Bracket cat fid ann fun (reverse bs) : bs')
symbol_ne exn _ = do symbol_ne exn _ = do
gu_exn_raise exn gu_exn_type_PgfLinNonExist gu_exn_raise exn gu_exn_type_PgfLinNonExist

View File

@@ -350,7 +350,7 @@ foreign import ccall "pgf/pgf.h pgf_lzr_get_table"
pgf_lzr_get_table :: Ptr PgfConcr -> Ptr PgfCncTree -> Ptr CSizeT -> Ptr (Ptr CString) -> IO () pgf_lzr_get_table :: Ptr PgfConcr -> Ptr PgfCncTree -> Ptr CSizeT -> Ptr (Ptr CString) -> IO ()
type SymbolTokenCallback = Ptr (Ptr PgfLinFuncs) -> CString -> IO () type SymbolTokenCallback = Ptr (Ptr PgfLinFuncs) -> CString -> IO ()
type PhraseCallback = Ptr (Ptr PgfLinFuncs) -> CString -> CInt -> CSizeT -> CString -> IO () type PhraseCallback = Ptr (Ptr PgfLinFuncs) -> CString -> CInt -> CString -> CString -> IO ()
type NonExistCallback = Ptr (Ptr PgfLinFuncs) -> IO () type NonExistCallback = Ptr (Ptr PgfLinFuncs) -> IO ()
type BindCallback = Ptr (Ptr PgfLinFuncs) -> IO () type BindCallback = Ptr (Ptr PgfLinFuncs) -> IO ()
type MetaCallback = Ptr (Ptr PgfLinFuncs) -> CInt -> IO () type MetaCallback = Ptr (Ptr PgfLinFuncs) -> CInt -> IO ()
@@ -388,12 +388,12 @@ foreign import ccall "pgf/pgf.h pgf_parse_with_heuristics"
foreign import ccall "pgf/pgf.h pgf_lookup_sentence" foreign import ccall "pgf/pgf.h pgf_lookup_sentence"
pgf_lookup_sentence :: Ptr PgfConcr -> PgfType -> CString -> Ptr GuPool -> Ptr GuPool -> IO (Ptr GuEnum) pgf_lookup_sentence :: Ptr PgfConcr -> PgfType -> CString -> Ptr GuPool -> Ptr GuPool -> IO (Ptr GuEnum)
type LiteralMatchCallback = CSizeT -> Ptr CSizeT -> Ptr GuPool -> IO (Ptr PgfExprProb) type LiteralMatchCallback = CString -> Ptr CSizeT -> Ptr GuPool -> IO (Ptr PgfExprProb)
foreign import ccall "wrapper" foreign import ccall "wrapper"
wrapLiteralMatchCallback :: LiteralMatchCallback -> IO (FunPtr LiteralMatchCallback) wrapLiteralMatchCallback :: LiteralMatchCallback -> IO (FunPtr LiteralMatchCallback)
type LiteralPredictCallback = CSizeT -> CString -> Ptr GuPool -> IO (Ptr PgfExprProb) type LiteralPredictCallback = CString -> CString -> Ptr GuPool -> IO (Ptr PgfExprProb)
foreign import ccall "wrapper" foreign import ccall "wrapper"
wrapLiteralPredictCallback :: LiteralPredictCallback -> IO (FunPtr LiteralPredictCallback) wrapLiteralPredictCallback :: LiteralPredictCallback -> IO (FunPtr LiteralPredictCallback)

View File

@@ -35,7 +35,8 @@ import Control.Exception(Exception,throwIO)
import Control.Monad(foldM) import Control.Monad(foldM)
import qualified Data.Map as Map import qualified Data.Map as Map
type Token = String type Token = String
type LIndex = Int
data Symbol data Symbol
= SymCat {-# UNPACK #-} !Int {-# UNPACK #-} !LIndex = SymCat {-# UNPACK #-} !Int {-# UNPACK #-} !LIndex
| SymLit {-# UNPACK #-} !Int {-# UNPACK #-} !LIndex | SymLit {-# UNPACK #-} !Int {-# UNPACK #-} !LIndex

View File

@@ -4,7 +4,7 @@
typedef struct { typedef struct {
PgfLiteralCallback callback; PgfLiteralCallback callback;
PgfExprProb* (*match)(size_t lin_idx, size_t* poffset, PgfExprProb* (*match)(GuString ann, size_t* poffset,
GuPool *out_pool); GuPool *out_pool);
GuFinalizer fin; GuFinalizer fin;
} HSPgfLiteralCallback; } HSPgfLiteralCallback;
@@ -37,7 +37,7 @@ hspgf_hs2offset(GuString sentence, size_t hs_offset)
static PgfExprProb* static PgfExprProb*
hspgf_match_callback(PgfLiteralCallback* self, PgfConcr* concr, hspgf_match_callback(PgfLiteralCallback* self, PgfConcr* concr,
size_t lin_idx, GuString ann,
GuString sentence, size_t* poffset, GuString sentence, size_t* poffset,
GuPool *out_pool) GuPool *out_pool)
{ {
@@ -46,7 +46,7 @@ hspgf_match_callback(PgfLiteralCallback* self, PgfConcr* concr,
size_t hs_offset = size_t hs_offset =
hspgf_offset2hs(sentence, *poffset); hspgf_offset2hs(sentence, *poffset);
PgfExprProb* ep = PgfExprProb* ep =
callback->match(lin_idx, &hs_offset, out_pool); callback->match(ann, &hs_offset, out_pool);
*poffset = hspgf_hs2offset(sentence, hs_offset); *poffset = hspgf_hs2offset(sentence, hs_offset);
return ep; return ep;

View File

@@ -456,7 +456,7 @@ typedef struct {
static PgfExprProb* static PgfExprProb*
jpgf_literal_callback_match(PgfLiteralCallback* self, PgfConcr* concr, jpgf_literal_callback_match(PgfLiteralCallback* self, PgfConcr* concr,
size_t lin_idx, GuString ann,
GuString sentence, size_t* poffset, GuString sentence, size_t* poffset,
GuPool *out_pool) GuPool *out_pool)
{ {
@@ -465,8 +465,9 @@ jpgf_literal_callback_match(PgfLiteralCallback* self, PgfConcr* concr,
JNIEnv *env; JNIEnv *env;
(*cachedJVM)->AttachCurrentThread(cachedJVM, (void**)&env, NULL); (*cachedJVM)->AttachCurrentThread(cachedJVM, (void**)&env, NULL);
size_t joffset = gu2j_string_offset(sentence, *poffset); jstring jann = gu2j_string(env, ann);
jobject result = (*env)->CallObjectMethod(env, callback->jcallback, callback->match_methodId, lin_idx, joffset); size_t joffset = gu2j_string_offset(sentence, *poffset);
jobject result = (*env)->CallObjectMethod(env, callback->jcallback, callback->match_methodId, jann, joffset);
if (result == NULL) if (result == NULL)
return NULL; return NULL;
@@ -534,7 +535,7 @@ jpgf_token_prob_enum_fin(GuFinalizer* self)
static GuEnum* static GuEnum*
jpgf_literal_callback_predict(PgfLiteralCallback* self, PgfConcr* concr, jpgf_literal_callback_predict(PgfLiteralCallback* self, PgfConcr* concr,
size_t lin_idx, GuString ann,
GuString prefix, GuString prefix,
GuPool *out_pool) GuPool *out_pool)
{ {
@@ -543,8 +544,9 @@ jpgf_literal_callback_predict(PgfLiteralCallback* self, PgfConcr* concr,
JNIEnv *env; JNIEnv *env;
(*cachedJVM)->AttachCurrentThread(cachedJVM, (void**)&env, NULL); (*cachedJVM)->AttachCurrentThread(cachedJVM, (void**)&env, NULL);
jstring jann = gu2j_string(env, ann);
jstring jprefix = gu2j_string(env, prefix); jstring jprefix = gu2j_string(env, prefix);
jobject jiterator = (*env)->CallObjectMethod(env, callback->jcallback, callback->predict_methodId, lin_idx, jprefix); jobject jiterator = (*env)->CallObjectMethod(env, callback->jcallback, callback->predict_methodId, jann, jprefix);
if (jiterator == NULL) if (jiterator == NULL)
return NULL; return NULL;
@@ -582,8 +584,8 @@ JNIEXPORT void JNICALL Java_org_grammaticalframework_pgf_Parser_addLiteralCallba
callback->fin.fn = jpgf_literal_callback_fin; callback->fin.fn = jpgf_literal_callback_fin;
jclass callback_class = (*env)->GetObjectClass(env, jcallback); jclass callback_class = (*env)->GetObjectClass(env, jcallback);
callback->match_methodId = (*env)->GetMethodID(env, callback_class, "match", "(II)Lorg/grammaticalframework/pgf/LiteralCallback$CallbackResult;"); callback->match_methodId = (*env)->GetMethodID(env, callback_class, "match", "(Ljava/lang/String;I)Lorg/grammaticalframework/pgf/LiteralCallback$CallbackResult;");
callback->predict_methodId = (*env)->GetMethodID(env, callback_class, "predict", "(ILjava/lang/String;)Ljava/util/Iterator;"); callback->predict_methodId = (*env)->GetMethodID(env, callback_class, "predict", "(Ljava/lang/String;Ljava/lang/String;)Ljava/util/Iterator;");
gu_pool_finally(pool, &callback->fin); gu_pool_finally(pool, &callback->fin);
@@ -964,7 +966,7 @@ pgf_bracket_lzn_symbol_token(PgfLinFuncs** funcs, PgfToken tok)
} }
static void static void
pgf_bracket_lzn_begin_phrase(PgfLinFuncs** funcs, PgfCId cat, int fid, size_t lindex, PgfCId fun) pgf_bracket_lzn_begin_phrase(PgfLinFuncs** funcs, PgfCId cat, int fid, GuString ann, PgfCId fun)
{ {
PgfBracketLznState* state = gu_container(funcs, PgfBracketLznState, funcs); PgfBracketLznState* state = gu_container(funcs, PgfBracketLznState, funcs);
@@ -973,7 +975,7 @@ pgf_bracket_lzn_begin_phrase(PgfLinFuncs** funcs, PgfCId cat, int fid, size_t li
} }
static void static void
pgf_bracket_lzn_end_phrase(PgfLinFuncs** funcs, PgfCId cat, int fid, size_t lindex, PgfCId fun) pgf_bracket_lzn_end_phrase(PgfLinFuncs** funcs, PgfCId cat, int fid, GuString ann, PgfCId fun)
{ {
PgfBracketLznState* state = gu_container(funcs, PgfBracketLznState, funcs); PgfBracketLznState* state = gu_container(funcs, PgfBracketLznState, funcs);
JNIEnv* env = state->env; JNIEnv* env = state->env;
@@ -998,7 +1000,7 @@ pgf_bracket_lzn_end_phrase(PgfLinFuncs** funcs, PgfCId cat, int fid, size_t lind
jcat, jcat,
jfun, jfun,
fid, fid,
lindex, ann,
jchildren); jchildren);
(*env)->DeleteLocalRef(env, jchildren); (*env)->DeleteLocalRef(env, jchildren);
@@ -1051,7 +1053,7 @@ Java_org_grammaticalframework_pgf_Concr_bracketedLinearize(JNIEnv* env, jobject
jclass bracket_class = (*env)->FindClass(env, "org/grammaticalframework/pgf/Bracket"); jclass bracket_class = (*env)->FindClass(env, "org/grammaticalframework/pgf/Bracket");
if (!bracket_class) if (!bracket_class)
return NULL; return NULL;
jmethodID bracket_constrId = (*env)->GetMethodID(env, bracket_class, "<init>", "(Ljava/lang/String;Ljava/lang/String;II[Ljava/lang/Object;)V"); jmethodID bracket_constrId = (*env)->GetMethodID(env, bracket_class, "<init>", "(Ljava/lang/String;Ljava/lang/String;ILjava/lang/String;[Ljava/lang/Object;)V");
if (!bracket_constrId) if (!bracket_constrId)
return NULL; return NULL;

View File

@@ -14,18 +14,18 @@ public class Bracket {
* where they all will have the same id */ * where they all will have the same id */
public final int fid; public final int fid;
public final int lindex; public final String ann;
/** The children of the bracket. Every element is either a string /** The children of the bracket. Every element is either a string
* if this is a leaf in the parse tree, or a {@link Bracket} object. * if this is a leaf in the parse tree, or a {@link Bracket} object.
*/ */
public final Object[] children; public final Object[] children;
public Bracket(String cat, String fun, int fid, int lindex, Object[] children) { public Bracket(String cat, String fun, int fid, String ann, Object[] children) {
this.cat = cat; this.cat = cat;
this.fun = fun; this.fun = fun;
this.fid = fid; this.fid = fid;
this.lindex = lindex; this.ann = ann;
this.children = children; this.children = children;
} }
} }

View File

@@ -3,9 +3,9 @@ package org.grammaticalframework.pgf;
import java.util.Iterator; import java.util.Iterator;
public interface LiteralCallback { public interface LiteralCallback {
public CallbackResult match(int lin_idx, int start_offset); public CallbackResult match(String ann, int start_offset);
public Iterator<TokenProb> predict(int lin_idx, String prefix); public Iterator<TokenProb> predict(String ann, String prefix);
public static class CallbackResult { public static class CallbackResult {
private ExprProb ep; private ExprProb ep;

View File

@@ -19,7 +19,7 @@ public class NercLiteralCallback implements LiteralCallback {
this.sentence = sentence; this.sentence = sentence;
} }
public CallbackResult match(int lin_idx, int offset) { public CallbackResult match(String ann, int offset) {
StringBuilder sbuilder = new StringBuilder(); StringBuilder sbuilder = new StringBuilder();
int i = 0; int i = 0;
@@ -83,7 +83,7 @@ public class NercLiteralCallback implements LiteralCallback {
return null; return null;
} }
public Iterator<TokenProb> predict(int lin_idx, String prefix) { public Iterator<TokenProb> predict(String ann, String prefix) {
return Collections.<TokenProb>emptyList().iterator(); return Collections.<TokenProb>emptyList().iterator();
} }
} }

View File

@@ -15,7 +15,7 @@ public class UnknownLiteralCallback implements LiteralCallback {
this.sentence = sentence; this.sentence = sentence;
} }
public CallbackResult match(int lin_idx, int offset) { public CallbackResult match(String ann, int offset) {
if (offset < sentence.length() && if (offset < sentence.length() &&
!Character.isUpperCase(sentence.charAt(offset))) { !Character.isUpperCase(sentence.charAt(offset))) {
int start_offset = offset; int start_offset = offset;
@@ -35,7 +35,7 @@ public class UnknownLiteralCallback implements LiteralCallback {
return null; return null;
} }
public Iterator<TokenProb> predict(int lin_idx, String prefix) { public Iterator<TokenProb> predict(String ann, String prefix) {
return Collections.<TokenProb>emptyList().iterator(); return Collections.<TokenProb>emptyList().iterator();
} }
} }

View File

@@ -1382,7 +1382,7 @@ unicode_to_utf8_offset(GuString sentence, size_t chars)
static PgfExprProb* static PgfExprProb*
pypgf_literal_callback_match(PgfLiteralCallback* self, PgfConcr* concr, pypgf_literal_callback_match(PgfLiteralCallback* self, PgfConcr* concr,
size_t lin_idx, GuString ann,
GuString sentence, size_t* poffset, GuString sentence, size_t* poffset,
GuPool *out_pool) GuPool *out_pool)
{ {
@@ -1390,8 +1390,8 @@ pypgf_literal_callback_match(PgfLiteralCallback* self, PgfConcr* concr,
gu_container(self, PyPgfLiteralCallback, callback); gu_container(self, PyPgfLiteralCallback, callback);
PyObject* result = PyObject* result =
PyObject_CallFunction(callback->pycallback, "ii", PyObject_CallFunction(callback->pycallback, "si",
lin_idx, ann,
#if PY_MAJOR_VERSION >= 3 #if PY_MAJOR_VERSION >= 3
utf8_to_unicode_offset(sentence, *poffset) utf8_to_unicode_offset(sentence, *poffset)
#else #else
@@ -1460,7 +1460,7 @@ pypgf_literal_callback_match(PgfLiteralCallback* self, PgfConcr* concr,
static GuEnum* static GuEnum*
pypgf_literal_callback_predict(PgfLiteralCallback* self, PgfConcr* concr, pypgf_literal_callback_predict(PgfLiteralCallback* self, PgfConcr* concr,
size_t lin_idx, GuString ann,
GuString prefix, GuString prefix,
GuPool *out_pool) GuPool *out_pool)
{ {
@@ -1983,7 +1983,7 @@ typedef struct {
PyObject_HEAD PyObject_HEAD
PyObject* cat; PyObject* cat;
int fid; int fid;
int lindex; PyObject* ann;
PyObject* fun; PyObject* fun;
PyObject* children; PyObject* children;
} BracketObject; } BracketObject;
@@ -2058,8 +2058,8 @@ static PyMemberDef Bracket_members[] = {
"the abstract function for this bracket"}, "the abstract function for this bracket"},
{"fid", T_INT, offsetof(BracketObject, fid), 0, {"fid", T_INT, offsetof(BracketObject, fid), 0,
"an id which identifies this bracket in the bracketed string. If there are discontinuous phrases this id will be shared for all brackets belonging to the same phrase."}, "an id which identifies this bracket in the bracketed string. If there are discontinuous phrases this id will be shared for all brackets belonging to the same phrase."},
{"lindex", T_INT, offsetof(BracketObject, lindex), 0, {"ann", T_OBJECT_EX, offsetof(BracketObject, ann), 0,
"the constituent index"}, "the analysis of the constituent"},
{"children", T_OBJECT_EX, offsetof(BracketObject, children), 0, {"children", T_OBJECT_EX, offsetof(BracketObject, children), 0,
"a list with the children of this bracket"}, "a list with the children of this bracket"},
{NULL} /* Sentinel */ {NULL} /* Sentinel */
@@ -2124,7 +2124,7 @@ pgf_bracket_lzn_symbol_token(PgfLinFuncs** funcs, PgfToken tok)
} }
static void static void
pgf_bracket_lzn_begin_phrase(PgfLinFuncs** funcs, PgfCId cat, int fid, size_t lindex, PgfCId fun) pgf_bracket_lzn_begin_phrase(PgfLinFuncs** funcs, PgfCId cat, int fid, GuString ann, PgfCId fun)
{ {
PgfBracketLznState* state = gu_container(funcs, PgfBracketLznState, funcs); PgfBracketLznState* state = gu_container(funcs, PgfBracketLznState, funcs);
@@ -2133,7 +2133,7 @@ pgf_bracket_lzn_begin_phrase(PgfLinFuncs** funcs, PgfCId cat, int fid, size_t li
} }
static void static void
pgf_bracket_lzn_end_phrase(PgfLinFuncs** funcs, PgfCId cat, int fid, size_t lindex, PgfCId fun) pgf_bracket_lzn_end_phrase(PgfLinFuncs** funcs, PgfCId cat, int fid, GuString ann, PgfCId fun)
{ {
PgfBracketLznState* state = gu_container(funcs, PgfBracketLznState, funcs); PgfBracketLznState* state = gu_container(funcs, PgfBracketLznState, funcs);
@@ -2145,7 +2145,7 @@ pgf_bracket_lzn_end_phrase(PgfLinFuncs** funcs, PgfCId cat, int fid, size_t lind
if (bracket != NULL) { if (bracket != NULL) {
bracket->cat = PyString_FromString(cat); bracket->cat = PyString_FromString(cat);
bracket->fid = fid; bracket->fid = fid;
bracket->lindex = lindex; bracket->ann = PyString_FromString(ann);
bracket->fun = PyString_FromString(fun); bracket->fun = PyString_FromString(fun);
bracket->children = state->list; bracket->children = state->list;
PyList_Append(parent, (PyObject*) bracket); PyList_Append(parent, (PyObject*) bracket);