a top-level API for parsing in the C runtime

2026-06-03 23:28:55 -06:00 · 2012-12-13 14:44:33 +00:00
parent 60942c440a
commit 2ba632dc9f
10 changed files with 138 additions and 91 deletions
--- a/src/runtime/c/pgf/data.c
+++ b/src/runtime/c/pgf/data.c
@@ -1,5 +1,4 @@
 #include "data.h"
-#include "expr.h"
 #include <gu/type.h>
 #include <gu/variant.h>
 #include <gu/assert.h>
--- a/src/runtime/c/pgf/data.h
+++ b/src/runtime/c/pgf/data.h
@@ -27,7 +27,6 @@
 #include <gu/type.h>
 #include <gu/seq.h>
 #include <pgf/pgf.h>
-#include <pgf/expr.h>

 typedef struct PgfCCat PgfCCat;
 typedef PgfCCat* PgfCCatId;
@@ -123,15 +122,6 @@ struct PgfPGF {

 extern GU_DECLARE_TYPE(PgfPGF, struct);

-typedef float prob_t;
-
-typedef struct {
-	prob_t prob;
-	PgfExpr expr;
-} PgfExprProb;
-
-extern GU_DECLARE_TYPE(PgfExprProb, struct);
-
 struct PgfFunDecl {
 	PgfType* type;
 	int arity;
--- a/src/runtime/c/pgf/expr.c
+++ b/src/runtime/c/pgf/expr.c
@@ -1,4 +1,4 @@
-#include "expr.h"
+#include "pgf.h"
 #include <gu/intern.h>
 #include <gu/assert.h>
 #include <ctype.h>
--- a/src/runtime/c/pgf/expr.h
+++ b/src/runtime/c/pgf/expr.h
@@ -5,7 +5,6 @@
 #include <gu/write.h>
 #include <gu/variant.h>
 #include <gu/seq.h>
-#include <pgf/pgf.h>

 /// Abstract syntax trees
 /// @file
@@ -125,6 +124,15 @@ typedef struct {
 	PgfExpr expr;
 } PgfExprImplArg;

+typedef float prob_t;
+
+typedef struct {
+	prob_t prob;
+	PgfExpr expr;
+} PgfExprProb;
+
+extern GU_DECLARE_TYPE(PgfExprProb, struct);
+
 int
 pgf_expr_arity(PgfExpr expr);

--- a/src/runtime/c/pgf/lexer.c
+++ b/src/runtime/c/pgf/lexer.c
@@ -1,11 +1,13 @@
 #include <gu/list.h>
-#include <pgf/lexer.h>
+#include <pgf/pgf.h>
 #include <pgf/data.h>
 #include <wctype.h>

 struct PgfLexer {
 	GuReader* rdr;
+	GuPool* pool;
 	GuUCS ucs;
+	PgfToken tok;
 };

 PgfLexer*
@@ -13,17 +15,17 @@ pgf_new_lexer(GuReader *rdr, GuPool *pool)
 {
 	PgfLexer* lexer = gu_new(PgfLexer, pool);
 	lexer->rdr = rdr;
+	lexer->pool = pool;
 	lexer->ucs = ' ';
+	lexer->tok = gu_empty_string;
 	return lexer;
 }

 PgfToken
-pgf_lexer_next_token(PgfLexer *lexer, GuExn* err, GuPool *pool)
+pgf_lexer_read_token(PgfLexer *lexer, GuExn* err)
 {
 	GuPool* tmp_pool = gu_new_pool();

-	PgfToken tok;
-
 	GuStringBuf* buf = gu_string_buf(tmp_pool);
 	GuWriter* wtr = gu_string_buf_writer(buf);

@@ -109,8 +111,14 @@ pgf_lexer_next_token(PgfLexer *lexer, GuExn* err, GuPool *pool)
 	}

 stop:
-	tok = gu_string_buf_freeze(buf, pool);
+	lexer->tok = gu_string_buf_freeze(buf, lexer->pool);

 	gu_pool_free(tmp_pool);
-	return tok;
+	return lexer->tok;
+}
+
+PgfToken
+pgf_lexer_current_token(PgfLexer *lexer)
+{
+	return lexer->tok;
 }
--- a/src/runtime/c/pgf/lexer.h
+++ b/src/runtime/c/pgf/lexer.h
@@ -2,7 +2,9 @@
 #define PGF_LEXER_H_

 #include <gu/read.h>
-#include <pgf/data.h>
+
+/// A single lexical token			      
+typedef GuString PgfToken;

 typedef struct PgfLexer PgfLexer;

@@ -10,6 +12,9 @@ PgfLexer*
 pgf_new_lexer(GuReader *rdr, GuPool *pool);

 PgfToken
-pgf_lexer_next_token(PgfLexer *lexer, GuExn* err, GuPool *pool);
+pgf_lexer_read_token(PgfLexer *lexer, GuExn* err);
+
+PgfToken
+pgf_lexer_current_token(PgfLexer *lexer);

 #endif // PGF_LEXER_H_
--- a/src/runtime/c/pgf/pgf.c
+++ b/src/runtime/c/pgf/pgf.c
@@ -2,8 +2,12 @@
 #include <pgf/data.h>
 #include <pgf/expr.h>
 #include <pgf/reader.h>
+#include <pgf/linearize.h>
+#include <pgf/parser.h>
+#include <pgf/lexer.h>
 #include <gu/file.h>
 #include <gu/string.h>
+#include <gu/enum.h>
 #include <stdio.h>
 #include <math.h>

@@ -167,3 +171,73 @@ pgf_print_name(PgfConcr* concr, PgfCId id)
 		name = id;
 	return name;
 }
+
+void
+pgf_linearize(PgfConcr* concr, PgfExpr expr, GuWriter* wtr, GuExn* err)
+{
+	GuPool* tmp_pool = gu_local_pool();
+	
+	GuEnum* cts = 
+		pgf_lzr_concretize(concr, expr, tmp_pool);
+	PgfCncTree ctree = gu_next(cts, PgfCncTree, tmp_pool);
+	if (!gu_variant_is_null(ctree)) {
+		pgf_lzr_linearize_simple(concr, ctree, 0, wtr, err);
+	}
+
+	gu_pool_free(tmp_pool);
+}
+
+GuEnum*
+pgf_parse(PgfConcr* concr, PgfCId cat, PgfLexer *lexer, GuPool* pool)
+{
+	// Begin parsing a sentence of the specified category
+	PgfParseState* state =
+		pgf_parser_init_state(concr, cat, 0, pool);
+	if (state == NULL) {
+		return NULL;
+	}
+
+	// Tokenization
+	GuExn* lex_err = gu_new_exn(NULL, gu_kind(type), pool);
+	PgfToken tok = pgf_lexer_read_token(lexer, lex_err);
+	while (!gu_exn_is_raised(lex_err)) {
+		// feed the token to get a new parse state
+		state = pgf_parser_next_state(state, tok, pool);
+		if (state == NULL) {
+			return NULL;
+		}
+
+		tok = pgf_lexer_read_token(lexer, lex_err);
+	}
+
+	// Now begin enumerating the resulting syntax trees
+	return pgf_parse_result(state, pool);
+}
+
+void
+pgf_print_chunks(PgfConcr* concr, PgfCId cat, PgfLexer *lexer, GuPool* pool)
+{
+	// Begin parsing a sentence of the specified category
+	PgfParseState* state =
+		pgf_parser_init_state(concr, cat, 0, pool);
+	if (state == NULL) {
+		printf("\n");
+		return;
+	}
+
+	// Tokenization
+	GuExn* lex_err = gu_new_exn(NULL, gu_kind(type), pool);
+	PgfToken tok = pgf_lexer_read_token(lexer, lex_err);
+	while (!gu_exn_is_raised(lex_err)) {
+		// feed the token to get a new parse state
+		state = pgf_parser_next_state(state, tok, pool);
+		if (state == NULL) {
+			printf("\n");
+			return;
+		}
+
+		tok = pgf_lexer_read_token(lexer, lex_err);
+	}
+
+	pgf_parse_print_chunks(state);
+}
--- a/src/runtime/c/pgf/pgf.h
+++ b/src/runtime/c/pgf/pgf.h
@@ -28,6 +28,7 @@
 #include <gu/exn.h>
 #include <gu/mem.h>
 #include <gu/map.h>
+#include <gu/enum.h>
 #include <gu/string.h>


@@ -37,19 +38,21 @@ extern GU_DECLARE_TYPE(PgfCId, typedef);

 extern GU_DECLARE_TYPE(PgfExn, abstract);

-
-/// A single lexical token			      
-typedef GuString PgfToken;
-
 /// @name PGF Grammar objects
 /// @{

 typedef struct PgfPGF PgfPGF;
+extern GU_DECLARE_TYPE(PgfPGF, struct);
+
 typedef struct PgfConcr PgfConcr;
+extern GU_DECLARE_TYPE(PgfConcr, struct);
+

 /**< A representation of a PGF grammar. 
 */

+#include <pgf/expr.h>
+#include <pgf/lexer.h>

 PgfPGF*
 pgf_read(const char* fpath,
@@ -103,8 +106,16 @@ pgf_iter_functions_by_cat(PgfPGF* pgf, PgfCId catname,
 GuString
 pgf_print_name(PgfConcr*, PgfCId id);

-#include <gu/type.h>
-extern GU_DECLARE_TYPE(PgfPGF, struct);
+void
+pgf_linearize(PgfConcr* concr, PgfExpr expr, GuWriter* wtr, GuExn* err);
+
+GuEnum*
+pgf_parse(PgfConcr* concr, PgfCId cat, PgfLexer *lexer, GuPool* pool);
+
+// an experimental function. Please don't use it
+void
+pgf_print_chunks(PgfConcr* concr, PgfCId cat, PgfLexer *lexer, GuPool* pool);
+

 /// @}

--- a/src/runtime/c/utils/pgf-chunk.c
+++ b/src/runtime/c/utils/pgf-chunk.c
@@ -70,14 +70,6 @@ int main(int argc, char* argv[]) {
 	pgf_parser_add_literal(from_concr, gu_str_string("Symb", pool),
 	                       &pgf_nerc_literal_callback);

-	// Create an output stream for stdout
-	GuOut* out = gu_file_out(stdout, pool);
-
-	// Locale-encoding writers are currently unsupported
-	// GuWriter* wtr = gu_locale_writer(out, pool);
-	// Use a writer with hard-coded utf-8 encoding for now.
-	GuWriter* wtr = gu_new_utf8_writer(out, pool);
-
 	// We will keep the latest results in the 'ppool' and
 	// we will iterate over them by using 'result'.
 	GuPool* ppool = NULL;
@@ -103,42 +95,15 @@ int main(int argc, char* argv[]) {
 		// sentence, so our memory usage doesn't increase over time.
 		ppool = gu_new_pool();

-		// Begin parsing a sentence of the specified category
-		PgfParseState* state =
-			pgf_parser_init_state(from_concr, cat, 0, ppool);
-		if (state == NULL) {
-			fprintf(stderr, "Couldn't begin parsing\n");
-			status = EXIT_FAILURE;
-			break;
-		}
-		
 		GuReader *rdr =
 			gu_string_reader(gu_str_string(line, ppool), ppool);
 		PgfLexer *lexer =
 			pgf_new_lexer(rdr, ppool);

-		// Tokenization
-		GuExn* lex_err = gu_new_exn(NULL, gu_kind(type), ppool);
-		PgfToken tok = pgf_lexer_next_token(lexer, lex_err, ppool);
-		while (!gu_exn_is_raised(lex_err)) {
-			// feed the token to get a new parse state
-			state = pgf_parser_next_state(state, tok, ppool);
-			if (!state) {
-				gu_puts("Unexpected token: \"", wtr, err);
-				gu_string_write(tok, wtr, err);
-				gu_puts("\"\n", wtr, err);
-				goto fail_parse;
-			}
-			
-			tok = pgf_lexer_next_token(lexer, lex_err, ppool);
-		}
-
-		pgf_parse_print_chunks(state);
-		continue;
-	fail_parse:
+		pgf_print_chunks(from_concr, cat, lexer, ppool);
+		
 		// Free all resources allocated during parsing and linearization
 		gu_pool_free(ppool);
-		ppool = NULL;
 	}
 fail_concr:
 fail:
--- a/src/runtime/c/utils/pgf-translate.c
+++ b/src/runtime/c/utils/pgf-translate.c
@@ -9,7 +9,6 @@
 #include <pgf/lexer.h>
 #include <pgf/literals.h>
 #include <pgf/linearize.h>
-#include <pgf/expr.h>
 #include <pgf/edsl.h>
 #include <stdio.h>
 #include <stdlib.h>
@@ -160,40 +159,29 @@ int main(int argc, char* argv[]) {
 		// sentence, so our memory usage doesn't increase over time.
 		ppool = gu_new_pool();

-		clock_t start = clock();
-
-		// Begin parsing a sentence of the specified category
-		PgfParseState* state =
-			pgf_parser_init_state(from_concr, cat, 0, ppool);
-		if (state == NULL) {
-			fprintf(stderr, "Couldn't begin parsing\n");
-			status = EXIT_FAILURE;
-			break;
-		}
-		
 		GuReader *rdr =
 			gu_string_reader(gu_str_string(line, ppool), ppool);
 		PgfLexer *lexer =
 			pgf_new_lexer(rdr, ppool);

-		// Tokenization
-		GuExn* lex_err = gu_new_exn(NULL, gu_kind(type), ppool);
-		PgfToken tok = pgf_lexer_next_token(lexer, lex_err, ppool);
-		while (!gu_exn_is_raised(lex_err)) {
-			// feed the token to get a new parse state
-			state = pgf_parser_next_state(state, tok, ppool);
-			if (!state) {
+		clock_t start = clock();
+
+		GuEnum* result =
+			pgf_parse(from_concr, cat, lexer, ppool);
+		if (result == NULL) {
+			PgfToken tok =
+				pgf_lexer_current_token(lexer);
+
+			if (gu_string_eq(tok, gu_empty_string))
+				gu_puts("Couldn't begin parsing", wtr, err);
+			else {
 				gu_puts("Unexpected token: \"", wtr, err);
 				gu_string_write(tok, wtr, err);
 				gu_puts("\"\n", wtr, err);
-				goto fail_parse;
 			}
-			
-			tok = pgf_lexer_next_token(lexer, lex_err, ppool);
-		}

-		// Now begin enumerating the resulting syntax trees
-		result = pgf_parse_result(state, ppool);
+			goto fail_parse;
+		}

 		PgfExprProb* ep = gu_next(result, PgfExprProb*, ppool);

@@ -201,8 +189,7 @@ int main(int argc, char* argv[]) {
 		double cpu_time_used = ((double) (end - start)) / CLOCKS_PER_SEC;
 		printf("%.2f sec\n", cpu_time_used);

-		// The enumerator will return a null variant at the
-		// end of the results.
+		// The enumerator will return null at the end of the results.
 		if (ep == NULL) {
 			goto fail_parse;
 		}