Python binding: add a parsing function that accepts directly a list of tokens.

Is allows to define a tokenizer in python (or use an existing one, from nltk for instance.)
2026-05-06 09:42:50 -06:00 · 2013-01-24 13:31:34 +00:00
parent e7db50b9bd
commit 0aae4702ed
3 changed files with 101 additions and 0 deletions
--- a/src/runtime/c/pgf/pgf.c
+++ b/src/runtime/c/pgf/pgf.c
@@ -227,6 +227,33 @@ pgf_parse(PgfConcr* concr, PgfCId cat, PgfLexer *lexer, GuPool* pool)
 	return pgf_parse_result(state, pool);
 }

+// Same as previous but accept a list of tokens as input instead of a 
+// lexer
+GuEnum*
+pgf_parse_tokens(PgfConcr* concr, PgfCId cat, char **tokens, int len, GuPool* pool)
+{
+    // Begin parsing a sentence of the specified category
+    PgfParseState* state =
+        pgf_parser_init_state(concr, cat, 0, pool);
+    if (state == NULL) {
+        return NULL;
+    }
+
+    // Parsing
+    PgfToken tok;
+    for (int i = 0; i < len; i++) {
+        tok = gu_str_string(tokens[i], pool);
+
+        state = pgf_parser_next_state(state, tok, pool);
+        if (state == NULL) {
+            return NULL;
+        }
+    }
+
+    // Now begin enumerating the resulting syntax trees
+    return pgf_parse_result(state, pool);
+}
+
 void
 pgf_print_chunks(PgfConcr* concr, PgfCId cat, PgfLexer *lexer, GuPool* pool)
 {
--- a/src/runtime/c/pgf/pgf.h
+++ b/src/runtime/c/pgf/pgf.h
@@ -115,6 +115,9 @@ pgf_linearize(PgfConcr* concr, PgfExpr expr, GuWriter* wtr, GuExn* err);
 PgfExprEnum*
 pgf_parse(PgfConcr* concr, PgfCId cat, PgfLexer *lexer, GuPool* pool);

+PgfExprEnum*
+pgf_parse_tokens(PgfConcr* concr, PgfCId cat, char* tokens[], int len, GuPool* pool);
+
 PgfExprEnum*
 pgf_generate(PgfPGF* pgf, PgfCId cat, GuPool* pool);