diff --git a/src/runtime/c/pgf/aligner.c b/src/runtime/c/pgf/aligner.c index 53209bb4c..e75743d99 100644 --- a/src/runtime/c/pgf/aligner.c +++ b/src/runtime/c/pgf/aligner.c @@ -142,14 +142,14 @@ pgf_aligner_lzn_symbol_token(PgfLinFuncs** funcs, PgfToken tok) } static void -pgf_aligner_lzn_begin_phrase(PgfLinFuncs** funcs, PgfCId cat, int fid, size_t lindex, PgfCId fun) +pgf_aligner_lzn_begin_phrase(PgfLinFuncs** funcs, PgfCId cat, int fid, GuString ann, PgfCId fun) { PgfAlignerLin* alin = gu_container(funcs, PgfAlignerLin, funcs); gu_buf_push(alin->parent_stack, int, fid); } static void -pgf_aligner_lzn_end_phrase(PgfLinFuncs** funcs, PgfCId cat, int fid, size_t lindex, PgfCId fun) +pgf_aligner_lzn_end_phrase(PgfLinFuncs** funcs, PgfCId cat, int fid, GuString ann, PgfCId fun) { PgfAlignerLin* alin = gu_container(funcs, PgfAlignerLin, funcs); gu_buf_pop(alin->parent_stack, int); diff --git a/src/runtime/c/pgf/graphviz.c b/src/runtime/c/pgf/graphviz.c index 66e203dbc..a404ed009 100644 --- a/src/runtime/c/pgf/graphviz.c +++ b/src/runtime/c/pgf/graphviz.c @@ -155,7 +155,7 @@ pgf_bracket_lzn_symbol_token(PgfLinFuncs** funcs, PgfToken tok) } static void -pgf_bracket_lzn_begin_phrase(PgfLinFuncs** funcs, PgfCId cat, int fid, size_t lindex, PgfCId fun) +pgf_bracket_lzn_begin_phrase(PgfLinFuncs** funcs, PgfCId cat, int fid, GuString ann, PgfCId fun) { PgfBracketLznState* state = gu_container(funcs, PgfBracketLznState, funcs); diff --git a/src/runtime/c/pgf/linearizer.c b/src/runtime/c/pgf/linearizer.c index 12b047b13..a7ca6e764 100644 --- a/src/runtime/c/pgf/linearizer.c +++ b/src/runtime/c/pgf/linearizer.c @@ -606,7 +606,7 @@ typedef struct { PgfLzrCachedTag tag; PgfCId cat; int fid; - int lin_idx; + GuString ann; PgfCId fun; } PgfLzrCached; @@ -644,7 +644,7 @@ pgf_lzr_cache_flush(PgfLzrCache* cache, PgfSymbols* form) cache->lzr->funcs, event->cat, event->fid, - event->lin_idx, + event->ann, event->fun); } break; @@ -654,7 +654,7 @@ pgf_lzr_cache_flush(PgfLzrCache* cache, PgfSymbols* form) cache->lzr->funcs, event->cat, event->fid, - event->lin_idx, + event->ann, event->fun); } break; @@ -709,27 +709,27 @@ found: } static void -pgf_lzr_cache_begin_phrase(PgfLinFuncs** funcs, PgfCId cat, int fid, size_t lin_idx, PgfCId fun) +pgf_lzr_cache_begin_phrase(PgfLinFuncs** funcs, PgfCId cat, int fid, GuString ann, PgfCId fun) { PgfLzrCache* cache = gu_container(funcs, PgfLzrCache, funcs); PgfLzrCached* event = gu_buf_extend(cache->events); - event->tag = PGF_CACHED_BEGIN; - event->cat = cat; - event->fid = fid; - event->lin_idx = lin_idx; - event->fun = fun; + event->tag = PGF_CACHED_BEGIN; + event->cat = cat; + event->fid = fid; + event->ann = ann; + event->fun = fun; } static void -pgf_lzr_cache_end_phrase(PgfLinFuncs** funcs, PgfCId cat, int fid, size_t lin_idx, PgfCId fun) +pgf_lzr_cache_end_phrase(PgfLinFuncs** funcs, PgfCId cat, int fid, GuString ann, PgfCId fun) { PgfLzrCache* cache = gu_container(funcs, PgfLzrCache, funcs); PgfLzrCached* event = gu_buf_extend(cache->events); - event->tag = PGF_CACHED_END; - event->cat = cat; - event->fid = fid; - event->lin_idx = lin_idx; - event->fun = fun; + event->tag = PGF_CACHED_END; + event->cat = cat; + event->fid = fid; + event->ann = ann; + event->fun = fun; } static void @@ -918,7 +918,7 @@ pgf_lzr_linearize_tree(PgfLzr* lzr, PgfCncTree ctree, size_t lin_idx) if ((*lzr->funcs)->begin_phrase && fapp->ccat != NULL) { (*lzr->funcs)->begin_phrase(lzr->funcs, fapp->ccat->cnccat->abscat->name, - fapp->fid, lin_idx, + fapp->fid, fapp->ccat->cnccat->labels[lin_idx], fapp->abs_id); } @@ -928,7 +928,7 @@ pgf_lzr_linearize_tree(PgfLzr* lzr, PgfCncTree ctree, size_t lin_idx) if ((*lzr->funcs)->end_phrase && fapp->ccat != NULL) { (*lzr->funcs)->end_phrase(lzr->funcs, fapp->ccat->cnccat->abscat->name, - fapp->fid, lin_idx, + fapp->fid, fapp->ccat->cnccat->labels[lin_idx], fapp->abs_id); } break; @@ -957,7 +957,7 @@ pgf_lzr_linearize_tree(PgfLzr* lzr, PgfCncTree ctree, size_t lin_idx) if ((*lzr->funcs)->begin_phrase && flit->fid >= 0) { (*lzr->funcs)->begin_phrase(lzr->funcs, - cat, flit->fid, 0, + cat, flit->fid, "s", ""); } @@ -989,7 +989,7 @@ pgf_lzr_linearize_tree(PgfLzr* lzr, PgfCncTree ctree, size_t lin_idx) if ((*lzr->funcs)->end_phrase && flit->fid >= 0) { (*lzr->funcs)->end_phrase(lzr->funcs, - cat, flit->fid, 0, + cat, flit->fid, "s", ""); } diff --git a/src/runtime/c/pgf/linearizer.h b/src/runtime/c/pgf/linearizer.h index 790dd5800..8a0562913 100644 --- a/src/runtime/c/pgf/linearizer.h +++ b/src/runtime/c/pgf/linearizer.h @@ -84,10 +84,10 @@ struct PgfLinFuncs void (*symbol_token)(PgfLinFuncs** self, PgfToken tok); /// Begin phrase - void (*begin_phrase)(PgfLinFuncs** self, PgfCId cat, int fid, size_t lindex, PgfCId fun); + void (*begin_phrase)(PgfLinFuncs** self, PgfCId cat, int fid, GuString ann, PgfCId fun); /// End phrase - void (*end_phrase)(PgfLinFuncs** self, PgfCId cat, int fid, size_t lindex, PgfCId fun); + void (*end_phrase)(PgfLinFuncs** self, PgfCId cat, int fid, GuString ann, PgfCId fun); /// handling nonExist void (*symbol_ne)(PgfLinFuncs** self); diff --git a/src/runtime/c/pgf/literals.c b/src/runtime/c/pgf/literals.c index a76b8ae77..a3116810a 100644 --- a/src/runtime/c/pgf/literals.c +++ b/src/runtime/c/pgf/literals.c @@ -6,11 +6,12 @@ static PgfExprProb* pgf_match_string_lit(PgfLiteralCallback* self, PgfConcr* concr, - size_t lin_idx, + GuString ann, GuString sentence, size_t* poffset, GuPool *out_pool) { - gu_assert(lin_idx == 0); + if (strcmp("ann","s") != 0) + return NULL; const uint8_t* buf = (uint8_t*) (sentence + *poffset); const uint8_t* p = buf; @@ -51,7 +52,7 @@ pgf_predict_empty_next(GuEnum* self, void* to, GuPool* pool) static GuEnum* pgf_predict_empty(PgfLiteralCallback* self, PgfConcr* concr, - size_t lin_idx, + GuString ann, GuString prefix, GuPool *out_pool) { @@ -67,11 +68,12 @@ static PgfLiteralCallback pgf_string_literal_callback = static PgfExprProb* pgf_match_int_lit(PgfLiteralCallback* self, PgfConcr* concr, - size_t lin_idx, + GuString ann, GuString sentence, size_t* poffset, GuPool *out_pool) { - gu_assert(lin_idx == 0); + if (strcmp("ann","s") != 0) + return NULL; const uint8_t* buf = (uint8_t*) (sentence + *poffset); const uint8_t* p = buf; @@ -121,11 +123,12 @@ static PgfLiteralCallback pgf_int_literal_callback = static PgfExprProb* pgf_match_float_lit(PgfLiteralCallback* self, PgfConcr* concr, - size_t lin_idx, + GuString ann, GuString sentence, size_t* poffset, GuPool *out_pool) { - gu_assert(lin_idx == 0); + if (strcmp("ann","s") != 0) + return NULL; const uint8_t* buf = (uint8_t*) (sentence + *poffset); const uint8_t* p = buf; @@ -226,11 +229,11 @@ pgf_match_name_morpho_callback(PgfMorphoCallback* self_, static PgfExprProb* pgf_match_name_lit(PgfLiteralCallback* self, PgfConcr* concr, - size_t lin_idx, + GuString ann, GuString sentence, size_t* poffset, GuPool *out_pool) { - if (lin_idx != 0) + if (strcmp("ann","s") != 0) return NULL; GuPool* tmp_pool = gu_local_pool(); @@ -349,7 +352,7 @@ pgf_match_unknown_morpho_callback(PgfMorphoCallback* self_, static PgfExprProb* pgf_match_unknown_lit(PgfLiteralCallback* self, PgfConcr* concr, - size_t lin_idx, + GuString ann, GuString sentence, size_t* poffset, GuPool *out_pool) { diff --git a/src/runtime/c/pgf/lookup.c b/src/runtime/c/pgf/lookup.c index 5918275c1..2ec385ee8 100644 --- a/src/runtime/c/pgf/lookup.c +++ b/src/runtime/c/pgf/lookup.c @@ -876,7 +876,7 @@ pgf_lookup_symbol_token(PgfLinFuncs** self, PgfToken token) } static void -pgf_lookup_begin_phrase(PgfLinFuncs** self, PgfCId cat, int fid, size_t lindex, PgfCId funname) +pgf_lookup_begin_phrase(PgfLinFuncs** self, PgfCId cat, int fid, GuString ann, PgfCId funname) { PgfLookupState* st = gu_container(self, PgfLookupState, funcs); @@ -890,7 +890,7 @@ pgf_lookup_begin_phrase(PgfLinFuncs** self, PgfCId cat, int fid, size_t lindex, } static void -pgf_lookup_end_phrase(PgfLinFuncs** self, PgfCId cat, int fid, size_t lindex, PgfCId fun) +pgf_lookup_end_phrase(PgfLinFuncs** self, PgfCId cat, int fid, GuString ann, PgfCId fun) { PgfLookupState* st = gu_container(self, PgfLookupState, funcs); st->curr_absfun = NULL; diff --git a/src/runtime/c/pgf/parser.c b/src/runtime/c/pgf/parser.c index ea5228bc1..5646becfc 100644 --- a/src/runtime/c/pgf/parser.c +++ b/src/runtime/c/pgf/parser.c @@ -9,7 +9,7 @@ #include #include -//#define PGF_PARSER_DEBUG +#define PGF_PARSER_DEBUG //#define PGF_COUNTS_DEBUG //#define PGF_RESULT_DEBUG @@ -1370,7 +1370,7 @@ pgf_parsing_symbol(PgfParsing* ps, PgfItem* item, PgfSymbol sym) if (callback != NULL) { ep = callback->match(callback, ps->concr, - slit->r, + parg->ccat->cnccat->labels[slit->r], ps->sentence, &offset, ps->out_pool); } @@ -1480,6 +1480,7 @@ pgf_parsing_symbol(PgfParsing* ps, PgfItem* item, PgfSymbol sym) } case PGF_SYMBOL_CAPIT: case PGF_SYMBOL_ALL_CAPIT: { + printf("PGF_SYMBOL_CAPIT\n"); pgf_item_advance(item, ps->pool); pgf_parsing_symbol(ps, item, item->curr_sym); break; diff --git a/src/runtime/c/pgf/parseval.c b/src/runtime/c/pgf/parseval.c index 2882f7643..501430fda 100644 --- a/src/runtime/c/pgf/parseval.c +++ b/src/runtime/c/pgf/parseval.c @@ -6,7 +6,7 @@ typedef struct { int start, end; PgfCId cat; - size_t lin_idx; + GuString ann; } PgfPhrase; typedef struct { @@ -46,14 +46,14 @@ pgf_metrics_lzn_symbol_token(PgfLinFuncs** funcs, PgfToken tok) } static void -pgf_metrics_lzn_begin_phrase(PgfLinFuncs** funcs, PgfCId cat, int fid, size_t lin_index, PgfCId fun) +pgf_metrics_lzn_begin_phrase(PgfLinFuncs** funcs, PgfCId cat, int fid, GuString ann, PgfCId fun) { PgfMetricsLznState* state = gu_container(funcs, PgfMetricsLznState, funcs); gu_buf_push(state->marks, int, state->pos); } static void -pgf_metrics_lzn_end_phrase1(PgfLinFuncs** funcs, PgfCId cat, int fid, size_t lin_idx, PgfCId fun) +pgf_metrics_lzn_end_phrase1(PgfLinFuncs** funcs, PgfCId cat, int fid, GuString ann, PgfCId fun) { PgfMetricsLznState* state = gu_container(funcs, PgfMetricsLznState, funcs); @@ -65,7 +65,7 @@ pgf_metrics_lzn_end_phrase1(PgfLinFuncs** funcs, PgfCId cat, int fid, size_t lin phrase->start = start; phrase->end = end; phrase->cat = cat; - phrase->lin_idx = lin_idx; + phrase->ann = ann; gu_buf_push(state->phrases, PgfPhrase*, phrase); } } @@ -85,7 +85,7 @@ pgf_metrics_symbol_bind(PgfLinFuncs** funcs) } static void -pgf_metrics_lzn_end_phrase2(PgfLinFuncs** funcs, PgfCId cat, int fid, size_t lin_idx, PgfCId fun) +pgf_metrics_lzn_end_phrase2(PgfLinFuncs** funcs, PgfCId cat, int fid, GuString ann, PgfCId fun) { PgfMetricsLznState* state = gu_container(funcs, PgfMetricsLznState, funcs); @@ -100,7 +100,7 @@ pgf_metrics_lzn_end_phrase2(PgfLinFuncs** funcs, PgfCId cat, int fid, size_t lin if (phrase->start == start && phrase->end == end && strcmp(phrase->cat, cat) == 0 && - phrase->lin_idx == lin_idx) { + strcmp(phrase->ann, ann) == 0) { state->matches++; break; } diff --git a/src/runtime/c/pgf/pgf.h b/src/runtime/c/pgf/pgf.h index b40284a42..c0a64f01d 100644 --- a/src/runtime/c/pgf/pgf.h +++ b/src/runtime/c/pgf/pgf.h @@ -254,11 +254,11 @@ typedef struct PgfLiteralCallback PgfLiteralCallback; struct PgfLiteralCallback { PgfExprProb* (*match)(PgfLiteralCallback* self, PgfConcr* concr, - size_t lin_idx, + GuString ann, GuString sentence, size_t* poffset, GuPool *out_pool); GuEnum* (*predict)(PgfLiteralCallback* self, PgfConcr* concr, - size_t lin_idx, + GuString ann, GuString prefix, GuPool *out_pool); }; diff --git a/src/runtime/haskell-bind/PGF2.hsc b/src/runtime/haskell-bind/PGF2.hsc index 9ef325343..fd7580c3b 100644 --- a/src/runtime/haskell-bind/PGF2.hsc +++ b/src/runtime/haskell-bind/PGF2.hsc @@ -60,7 +60,7 @@ module PGF2 (-- * PGF -- ** Linearization linearize,linearizeAll,tabularLinearize,tabularLinearizeAll,bracketedLinearize,bracketedLinearizeAll, - FId, LIndex, BracketedString(..), showBracketedString, flattenBracketedString, + FId, BracketedString(..), showBracketedString, flattenBracketedString, printName, alignWords, @@ -589,7 +589,7 @@ parseWithHeuristics :: Concr -- ^ the language with which we parse -- A negative value tells the parser -- to lookup up the default from -- the grammar flags - -> [(Cat, Int -> Int -> Maybe (Expr,Float,Int))] + -> [(Cat, String -> Int -> Maybe (Expr,Float,Int))] -- ^ a list of callbacks for literal categories. -- The arguments of the callback are: -- the index of the constituent for the literal category; @@ -645,7 +645,7 @@ parseToChart :: Concr -- ^ the language with which we parse -- A negative value tells the parser -- to lookup up the default from -- the grammar flags - -> [(Cat, Int -> Int -> Maybe (Expr,Float,Int))] + -> [(Cat, String -> Int -> Maybe (Expr,Float,Int))] -- ^ a list of callbacks for literal categories. -- The arguments of the callback are: -- the index of the constituent for the literal category; @@ -761,7 +761,7 @@ parseToChart lang (Type ctype touchType) sent heuristic callbacks roots = f <- (#peek PgfParseRange, field) ptr >>= peekCString return ((fromIntegral :: CSizeT -> Int) s, (fromIntegral :: CSizeT -> Int) e, f) -mkCallbacksMap :: Ptr PgfConcr -> [(String, Int -> Int -> Maybe (Expr,Float,Int))] -> Ptr GuPool -> IO (Ptr PgfCallbacksMap) +mkCallbacksMap :: Ptr PgfConcr -> [(String, String -> Int -> Maybe (Expr,Float,Int))] -> Ptr GuPool -> IO (Ptr PgfCallbacksMap) mkCallbacksMap concr callbacks pool = do callbacks_map <- pgf_new_callbacks_map concr pool forM_ callbacks $ \(cat,match) -> do @@ -771,9 +771,10 @@ mkCallbacksMap concr callbacks pool = do hspgf_callbacks_map_add_literal concr callbacks_map ccat match predict pool return callbacks_map where - match_callback match clin_idx poffset out_pool = do + match_callback match c_ann poffset out_pool = do coffset <- peek poffset - case match (fromIntegral clin_idx) (fromIntegral coffset) of + ann <- peekUtf8CString c_ann + case match ann (fromIntegral coffset) of Nothing -> return nullPtr Just (e,prob,offset') -> do poke poffset (fromIntegral offset') @@ -1032,15 +1033,13 @@ tabularLinearizeAll lang e = unsafePerformIO $ throwIO (PGFError msg) else do throwIO (PGFError "The abstract tree cannot be linearized") -type LIndex = Int - -- | BracketedString represents a sentence that is linearized -- as usual but we also want to retain the ''brackets'' that -- mark the beginning and the end of each constituent. data BracketedString = Leaf String -- ^ this is the leaf i.e. a single token | BIND -- ^ the surrounding tokens must be bound together - | Bracket CId {-# UNPACK #-} !FId {-# UNPACK #-} !LIndex CId [BracketedString] + | Bracket CId {-# UNPACK #-} !FId {-# UNPACK #-} String CId [BracketedString] -- ^ this is a bracket. The 'CId' is the category of -- the phrase. The 'FId' is an unique identifier for -- every phrase in the sentence. For context-free grammars @@ -1049,7 +1048,7 @@ data BracketedString -- phrases then the identifiers are unique for every phrase but -- not for every bracket since the bracket represents a constituent. -- The different constituents could still be distinguished by using - -- the constituent index i.e. 'LIndex'. If the grammar is reduplicating + -- the analysis string. If the grammar is reduplicating -- then the constituent indices will be the same for all brackets -- that represents the same constituent. -- The second 'CId' is the name of the abstract function that generated @@ -1063,7 +1062,7 @@ showBracketedString = render . ppBracketedString ppBracketedString (Leaf t) = text t ppBracketedString BIND = text "&+" -ppBracketedString (Bracket cat fid index _ bss) = parens (text cat <> colon <> int fid <+> hsep (map ppBracketedString bss)) +ppBracketedString (Bracket cat fid _ _ bss) = parens (text cat <> colon <> int fid <+> hsep (map ppBracketedString bss)) -- | Extracts the sequence of tokens from the bracketed string flattenBracketedString :: BracketedString -> [String] @@ -1161,19 +1160,19 @@ withBracketLinFuncs ref exn f = token <- peekUtf8CString c_token writeIORef ref (stack,Leaf token : bs) - begin_phrase ref _ c_cat c_fid c_lindex c_fun = do + begin_phrase ref _ c_cat c_fid c_ann c_fun = do (stack,bs) <- readIORef ref writeIORef ref (bs:stack,[]) - end_phrase ref _ c_cat c_fid c_lindex c_fun = do + end_phrase ref _ c_cat c_fid c_ann c_fun = do (bs':stack,bs) <- readIORef ref if null bs then writeIORef ref (stack, bs') else do cat <- peekUtf8CString c_cat let fid = fromIntegral c_fid - let lindex = fromIntegral c_lindex + ann <- peekUtf8CString c_ann fun <- peekUtf8CString c_fun - writeIORef ref (stack, Bracket cat fid lindex fun (reverse bs) : bs') + writeIORef ref (stack, Bracket cat fid ann fun (reverse bs) : bs') symbol_ne exn _ = do gu_exn_raise exn gu_exn_type_PgfLinNonExist diff --git a/src/runtime/haskell-bind/PGF2/FFI.hsc b/src/runtime/haskell-bind/PGF2/FFI.hsc index 673c5c877..2db9577a0 100644 --- a/src/runtime/haskell-bind/PGF2/FFI.hsc +++ b/src/runtime/haskell-bind/PGF2/FFI.hsc @@ -350,7 +350,7 @@ foreign import ccall "pgf/pgf.h pgf_lzr_get_table" pgf_lzr_get_table :: Ptr PgfConcr -> Ptr PgfCncTree -> Ptr CSizeT -> Ptr (Ptr CString) -> IO () type SymbolTokenCallback = Ptr (Ptr PgfLinFuncs) -> CString -> IO () -type PhraseCallback = Ptr (Ptr PgfLinFuncs) -> CString -> CInt -> CSizeT -> CString -> IO () +type PhraseCallback = Ptr (Ptr PgfLinFuncs) -> CString -> CInt -> CString -> CString -> IO () type NonExistCallback = Ptr (Ptr PgfLinFuncs) -> IO () type BindCallback = Ptr (Ptr PgfLinFuncs) -> IO () type MetaCallback = Ptr (Ptr PgfLinFuncs) -> CInt -> IO () @@ -388,12 +388,12 @@ foreign import ccall "pgf/pgf.h pgf_parse_with_heuristics" foreign import ccall "pgf/pgf.h pgf_lookup_sentence" pgf_lookup_sentence :: Ptr PgfConcr -> PgfType -> CString -> Ptr GuPool -> Ptr GuPool -> IO (Ptr GuEnum) -type LiteralMatchCallback = CSizeT -> Ptr CSizeT -> Ptr GuPool -> IO (Ptr PgfExprProb) +type LiteralMatchCallback = CString -> Ptr CSizeT -> Ptr GuPool -> IO (Ptr PgfExprProb) foreign import ccall "wrapper" wrapLiteralMatchCallback :: LiteralMatchCallback -> IO (FunPtr LiteralMatchCallback) -type LiteralPredictCallback = CSizeT -> CString -> Ptr GuPool -> IO (Ptr PgfExprProb) +type LiteralPredictCallback = CString -> CString -> Ptr GuPool -> IO (Ptr PgfExprProb) foreign import ccall "wrapper" wrapLiteralPredictCallback :: LiteralPredictCallback -> IO (FunPtr LiteralPredictCallback) diff --git a/src/runtime/haskell-bind/PGF2/Internal.hsc b/src/runtime/haskell-bind/PGF2/Internal.hsc index ed894a361..7230c7d92 100644 --- a/src/runtime/haskell-bind/PGF2/Internal.hsc +++ b/src/runtime/haskell-bind/PGF2/Internal.hsc @@ -35,7 +35,8 @@ import Control.Exception(Exception,throwIO) import Control.Monad(foldM) import qualified Data.Map as Map -type Token = String +type Token = String +type LIndex = Int data Symbol = SymCat {-# UNPACK #-} !Int {-# UNPACK #-} !LIndex | SymLit {-# UNPACK #-} !Int {-# UNPACK #-} !LIndex diff --git a/src/runtime/haskell-bind/utils.c b/src/runtime/haskell-bind/utils.c index 91d62ea56..bee94083e 100644 --- a/src/runtime/haskell-bind/utils.c +++ b/src/runtime/haskell-bind/utils.c @@ -4,7 +4,7 @@ typedef struct { PgfLiteralCallback callback; - PgfExprProb* (*match)(size_t lin_idx, size_t* poffset, + PgfExprProb* (*match)(GuString ann, size_t* poffset, GuPool *out_pool); GuFinalizer fin; } HSPgfLiteralCallback; @@ -37,7 +37,7 @@ hspgf_hs2offset(GuString sentence, size_t hs_offset) static PgfExprProb* hspgf_match_callback(PgfLiteralCallback* self, PgfConcr* concr, - size_t lin_idx, + GuString ann, GuString sentence, size_t* poffset, GuPool *out_pool) { @@ -46,7 +46,7 @@ hspgf_match_callback(PgfLiteralCallback* self, PgfConcr* concr, size_t hs_offset = hspgf_offset2hs(sentence, *poffset); PgfExprProb* ep = - callback->match(lin_idx, &hs_offset, out_pool); + callback->match(ann, &hs_offset, out_pool); *poffset = hspgf_hs2offset(sentence, hs_offset); return ep; diff --git a/src/runtime/java/jpgf.c b/src/runtime/java/jpgf.c index bdfdc8e8c..966762222 100644 --- a/src/runtime/java/jpgf.c +++ b/src/runtime/java/jpgf.c @@ -456,7 +456,7 @@ typedef struct { static PgfExprProb* jpgf_literal_callback_match(PgfLiteralCallback* self, PgfConcr* concr, - size_t lin_idx, + GuString ann, GuString sentence, size_t* poffset, GuPool *out_pool) { @@ -465,8 +465,9 @@ jpgf_literal_callback_match(PgfLiteralCallback* self, PgfConcr* concr, JNIEnv *env; (*cachedJVM)->AttachCurrentThread(cachedJVM, (void**)&env, NULL); - size_t joffset = gu2j_string_offset(sentence, *poffset); - jobject result = (*env)->CallObjectMethod(env, callback->jcallback, callback->match_methodId, lin_idx, joffset); + jstring jann = gu2j_string(env, ann); + size_t joffset = gu2j_string_offset(sentence, *poffset); + jobject result = (*env)->CallObjectMethod(env, callback->jcallback, callback->match_methodId, jann, joffset); if (result == NULL) return NULL; @@ -534,7 +535,7 @@ jpgf_token_prob_enum_fin(GuFinalizer* self) static GuEnum* jpgf_literal_callback_predict(PgfLiteralCallback* self, PgfConcr* concr, - size_t lin_idx, + GuString ann, GuString prefix, GuPool *out_pool) { @@ -543,8 +544,9 @@ jpgf_literal_callback_predict(PgfLiteralCallback* self, PgfConcr* concr, JNIEnv *env; (*cachedJVM)->AttachCurrentThread(cachedJVM, (void**)&env, NULL); + jstring jann = gu2j_string(env, ann); jstring jprefix = gu2j_string(env, prefix); - jobject jiterator = (*env)->CallObjectMethod(env, callback->jcallback, callback->predict_methodId, lin_idx, jprefix); + jobject jiterator = (*env)->CallObjectMethod(env, callback->jcallback, callback->predict_methodId, jann, jprefix); if (jiterator == NULL) return NULL; @@ -582,8 +584,8 @@ JNIEXPORT void JNICALL Java_org_grammaticalframework_pgf_Parser_addLiteralCallba callback->fin.fn = jpgf_literal_callback_fin; jclass callback_class = (*env)->GetObjectClass(env, jcallback); - callback->match_methodId = (*env)->GetMethodID(env, callback_class, "match", "(II)Lorg/grammaticalframework/pgf/LiteralCallback$CallbackResult;"); - callback->predict_methodId = (*env)->GetMethodID(env, callback_class, "predict", "(ILjava/lang/String;)Ljava/util/Iterator;"); + callback->match_methodId = (*env)->GetMethodID(env, callback_class, "match", "(Ljava/lang/String;I)Lorg/grammaticalframework/pgf/LiteralCallback$CallbackResult;"); + callback->predict_methodId = (*env)->GetMethodID(env, callback_class, "predict", "(Ljava/lang/String;Ljava/lang/String;)Ljava/util/Iterator;"); gu_pool_finally(pool, &callback->fin); @@ -964,7 +966,7 @@ pgf_bracket_lzn_symbol_token(PgfLinFuncs** funcs, PgfToken tok) } static void -pgf_bracket_lzn_begin_phrase(PgfLinFuncs** funcs, PgfCId cat, int fid, size_t lindex, PgfCId fun) +pgf_bracket_lzn_begin_phrase(PgfLinFuncs** funcs, PgfCId cat, int fid, GuString ann, PgfCId fun) { PgfBracketLznState* state = gu_container(funcs, PgfBracketLznState, funcs); @@ -973,7 +975,7 @@ pgf_bracket_lzn_begin_phrase(PgfLinFuncs** funcs, PgfCId cat, int fid, size_t li } static void -pgf_bracket_lzn_end_phrase(PgfLinFuncs** funcs, PgfCId cat, int fid, size_t lindex, PgfCId fun) +pgf_bracket_lzn_end_phrase(PgfLinFuncs** funcs, PgfCId cat, int fid, GuString ann, PgfCId fun) { PgfBracketLznState* state = gu_container(funcs, PgfBracketLznState, funcs); JNIEnv* env = state->env; @@ -998,7 +1000,7 @@ pgf_bracket_lzn_end_phrase(PgfLinFuncs** funcs, PgfCId cat, int fid, size_t lind jcat, jfun, fid, - lindex, + ann, jchildren); (*env)->DeleteLocalRef(env, jchildren); @@ -1051,7 +1053,7 @@ Java_org_grammaticalframework_pgf_Concr_bracketedLinearize(JNIEnv* env, jobject jclass bracket_class = (*env)->FindClass(env, "org/grammaticalframework/pgf/Bracket"); if (!bracket_class) return NULL; - jmethodID bracket_constrId = (*env)->GetMethodID(env, bracket_class, "", "(Ljava/lang/String;Ljava/lang/String;II[Ljava/lang/Object;)V"); + jmethodID bracket_constrId = (*env)->GetMethodID(env, bracket_class, "", "(Ljava/lang/String;Ljava/lang/String;ILjava/lang/String;[Ljava/lang/Object;)V"); if (!bracket_constrId) return NULL; diff --git a/src/runtime/java/org/grammaticalframework/pgf/Bracket.java b/src/runtime/java/org/grammaticalframework/pgf/Bracket.java index 40fb744ea..e2e12fe19 100644 --- a/src/runtime/java/org/grammaticalframework/pgf/Bracket.java +++ b/src/runtime/java/org/grammaticalframework/pgf/Bracket.java @@ -14,18 +14,18 @@ public class Bracket { * where they all will have the same id */ public final int fid; - public final int lindex; + public final String ann; /** The children of the bracket. Every element is either a string * if this is a leaf in the parse tree, or a {@link Bracket} object. */ public final Object[] children; - public Bracket(String cat, String fun, int fid, int lindex, Object[] children) { + public Bracket(String cat, String fun, int fid, String ann, Object[] children) { this.cat = cat; this.fun = fun; this.fid = fid; - this.lindex = lindex; + this.ann = ann; this.children = children; } } diff --git a/src/runtime/java/org/grammaticalframework/pgf/LiteralCallback.java b/src/runtime/java/org/grammaticalframework/pgf/LiteralCallback.java index 6c6b657e5..0d146159a 100644 --- a/src/runtime/java/org/grammaticalframework/pgf/LiteralCallback.java +++ b/src/runtime/java/org/grammaticalframework/pgf/LiteralCallback.java @@ -3,9 +3,9 @@ package org.grammaticalframework.pgf; import java.util.Iterator; public interface LiteralCallback { - public CallbackResult match(int lin_idx, int start_offset); + public CallbackResult match(String ann, int start_offset); - public Iterator predict(int lin_idx, String prefix); + public Iterator predict(String ann, String prefix); public static class CallbackResult { private ExprProb ep; diff --git a/src/runtime/java/org/grammaticalframework/pgf/NercLiteralCallback.java b/src/runtime/java/org/grammaticalframework/pgf/NercLiteralCallback.java index f5375a70a..a49dcc218 100644 --- a/src/runtime/java/org/grammaticalframework/pgf/NercLiteralCallback.java +++ b/src/runtime/java/org/grammaticalframework/pgf/NercLiteralCallback.java @@ -19,7 +19,7 @@ public class NercLiteralCallback implements LiteralCallback { this.sentence = sentence; } - public CallbackResult match(int lin_idx, int offset) { + public CallbackResult match(String ann, int offset) { StringBuilder sbuilder = new StringBuilder(); int i = 0; @@ -83,7 +83,7 @@ public class NercLiteralCallback implements LiteralCallback { return null; } - public Iterator predict(int lin_idx, String prefix) { + public Iterator predict(String ann, String prefix) { return Collections.emptyList().iterator(); } } diff --git a/src/runtime/java/org/grammaticalframework/pgf/UnknownLiteralCallback.java b/src/runtime/java/org/grammaticalframework/pgf/UnknownLiteralCallback.java index d8e865db7..2fb03bb6e 100644 --- a/src/runtime/java/org/grammaticalframework/pgf/UnknownLiteralCallback.java +++ b/src/runtime/java/org/grammaticalframework/pgf/UnknownLiteralCallback.java @@ -15,7 +15,7 @@ public class UnknownLiteralCallback implements LiteralCallback { this.sentence = sentence; } - public CallbackResult match(int lin_idx, int offset) { + public CallbackResult match(String ann, int offset) { if (offset < sentence.length() && !Character.isUpperCase(sentence.charAt(offset))) { int start_offset = offset; @@ -35,7 +35,7 @@ public class UnknownLiteralCallback implements LiteralCallback { return null; } - public Iterator predict(int lin_idx, String prefix) { + public Iterator predict(String ann, String prefix) { return Collections.emptyList().iterator(); } } diff --git a/src/runtime/python/pypgf.c b/src/runtime/python/pypgf.c index ed575544a..8d249d45a 100644 --- a/src/runtime/python/pypgf.c +++ b/src/runtime/python/pypgf.c @@ -1382,7 +1382,7 @@ unicode_to_utf8_offset(GuString sentence, size_t chars) static PgfExprProb* pypgf_literal_callback_match(PgfLiteralCallback* self, PgfConcr* concr, - size_t lin_idx, + GuString ann, GuString sentence, size_t* poffset, GuPool *out_pool) { @@ -1390,8 +1390,8 @@ pypgf_literal_callback_match(PgfLiteralCallback* self, PgfConcr* concr, gu_container(self, PyPgfLiteralCallback, callback); PyObject* result = - PyObject_CallFunction(callback->pycallback, "ii", - lin_idx, + PyObject_CallFunction(callback->pycallback, "si", + ann, #if PY_MAJOR_VERSION >= 3 utf8_to_unicode_offset(sentence, *poffset) #else @@ -1460,7 +1460,7 @@ pypgf_literal_callback_match(PgfLiteralCallback* self, PgfConcr* concr, static GuEnum* pypgf_literal_callback_predict(PgfLiteralCallback* self, PgfConcr* concr, - size_t lin_idx, + GuString ann, GuString prefix, GuPool *out_pool) { @@ -1983,7 +1983,7 @@ typedef struct { PyObject_HEAD PyObject* cat; int fid; - int lindex; + PyObject* ann; PyObject* fun; PyObject* children; } BracketObject; @@ -2058,8 +2058,8 @@ static PyMemberDef Bracket_members[] = { "the abstract function for this bracket"}, {"fid", T_INT, offsetof(BracketObject, fid), 0, "an id which identifies this bracket in the bracketed string. If there are discontinuous phrases this id will be shared for all brackets belonging to the same phrase."}, - {"lindex", T_INT, offsetof(BracketObject, lindex), 0, - "the constituent index"}, + {"ann", T_OBJECT_EX, offsetof(BracketObject, ann), 0, + "the analysis of the constituent"}, {"children", T_OBJECT_EX, offsetof(BracketObject, children), 0, "a list with the children of this bracket"}, {NULL} /* Sentinel */ @@ -2124,7 +2124,7 @@ pgf_bracket_lzn_symbol_token(PgfLinFuncs** funcs, PgfToken tok) } static void -pgf_bracket_lzn_begin_phrase(PgfLinFuncs** funcs, PgfCId cat, int fid, size_t lindex, PgfCId fun) +pgf_bracket_lzn_begin_phrase(PgfLinFuncs** funcs, PgfCId cat, int fid, GuString ann, PgfCId fun) { PgfBracketLznState* state = gu_container(funcs, PgfBracketLznState, funcs); @@ -2133,7 +2133,7 @@ pgf_bracket_lzn_begin_phrase(PgfLinFuncs** funcs, PgfCId cat, int fid, size_t li } static void -pgf_bracket_lzn_end_phrase(PgfLinFuncs** funcs, PgfCId cat, int fid, size_t lindex, PgfCId fun) +pgf_bracket_lzn_end_phrase(PgfLinFuncs** funcs, PgfCId cat, int fid, GuString ann, PgfCId fun) { PgfBracketLznState* state = gu_container(funcs, PgfBracketLznState, funcs); @@ -2145,7 +2145,7 @@ pgf_bracket_lzn_end_phrase(PgfLinFuncs** funcs, PgfCId cat, int fid, size_t lind if (bracket != NULL) { bracket->cat = PyString_FromString(cat); bracket->fid = fid; - bracket->lindex = lindex; + bracket->ann = PyString_FromString(ann); bracket->fun = PyString_FromString(fun); bracket->children = state->list; PyList_Append(parent, (PyObject*) bracket);