partial implementation for fullFormLexicon

This commit is contained in:
Krasimir Angelov
2022-01-11 15:07:03 +01:00
parent c119349479
commit 78b462c607
5 changed files with 49 additions and 1 deletions

View File

@@ -1003,6 +1003,31 @@ PgfText *pgf_print_sequence_internal(size_t seq_id, object o)
return printer.get_text();
}
PGF_API
PgfText *pgf_sequence_get_text_internal(object o)
{
ref<PgfSequence> seq = o;
PgfPrinter printer(NULL,0,NULL);
for (size_t i = 0; i < seq->syms.len; i++) {
if (i > 0)
printer.puts(" ");
PgfSymbol sym = *vector_elem(&seq->syms, i);
switch (ref<PgfSymbol>::get_tag(sym)) {
case PgfSymbolKS::tag: {
auto sym_ks = ref<PgfSymbolKS>::untagged(sym);
printer.puts(&sym_ks->token);
break;
}
default:
return NULL;
}
}
return printer.get_text();
}
PGF_API_DECL
void pgf_release_phrasetable_ids(PgfPhrasetableIds *seq_ids)
{

View File

@@ -428,6 +428,9 @@ PgfText *pgf_print_lin_internal(PgfPhrasetableIds *seq_ids, object o, size_t i);
PGF_API_DECL
PgfText *pgf_print_sequence_internal(size_t seq_id, object o);
PGF_API_DECL
PgfText *pgf_sequence_get_text_internal(object o);
PGF_API_DECL
void pgf_release_phrasetable_ids(PgfPhrasetableIds *seq_ids);

View File

@@ -33,6 +33,8 @@ public:
PgfPrinter(PgfPrintContext *context, int priority,
PgfMarshaller *marshaller);
PgfPrinter() { free(res); }
// Push a new variable in the printing context. If the name
// collides with an existing variable, the variable is renamed
// by adding a number.

View File

@@ -568,7 +568,23 @@ unk w [] | any (not . isPunctuation) w = True
unk _ _ = False
fullFormLexicon :: Concr -> [(String, [MorphoAnalysis])]
fullFormLexicon lang = error "TODO: fullFormLexicon"
fullFormLexicon c = unsafePerformIO $ do
ref <- newIORef []
(allocaBytes (#size PgfSequenceItor) $ \itor ->
bracket (wrapSequenceItorCallback (getSequences ref)) freeHaskellFunPtr $ \fptr ->
withForeignPtr (c_revision c) $ \c_revision -> do
(#poke PgfSequenceItor, fn) itor fptr
seq_ids <- withPgfExn "fullFormLexicon" (pgf_iter_sequences (c_db c) c_revision itor)
pgf_release_phrasetable_ids seq_ids)
fmap reverse (readIORef ref)
where
getSequences ref itor seq_id val exn = do
bracket (pgf_sequence_get_text_internal val) free $ \c_text ->
if c_text == nullPtr
then return ()
else do lemma <- peekText c_text
modifyIORef ref $ (\lexicon -> (lemma, []) : lexicon)
-- | This data type encodes the different outcomes which you could get from the parser.
data ParseOutput a

View File

@@ -132,6 +132,8 @@ foreign import ccall pgf_print_lin_internal :: Ptr PgfPhrasetableIds -> Ptr () -
foreign import ccall pgf_print_sequence_internal :: CSize -> Ptr () -> IO (Ptr PgfText)
foreign import ccall pgf_sequence_get_text_internal :: Ptr () -> IO (Ptr PgfText)
foreign import ccall pgf_release_phrasetable_ids :: Ptr PgfPhrasetableIds -> IO ()
type ItorCallback = Ptr PgfItor -> Ptr PgfText -> Ptr () -> Ptr PgfExn -> IO ()