started an official API to the C runtime

This commit is contained in:
kr.angelov
2012-12-12 11:25:58 +00:00
parent 5e091d2e3d
commit aa13090b66
15 changed files with 329 additions and 173 deletions

View File

@@ -42,7 +42,6 @@ guinclude_HEADERS = \
pgfincludedir=$(includedir)/pgf
pgfinclude_HEADERS = \
pgf/data.h \
pgf/expr.h \
pgf/linearize.h \
pgf/parser.h \
@@ -109,9 +108,12 @@ libpgf_la_SOURCES = \
pgf/lexer.h \
pgf/literals.c \
pgf/literals.h \
pgf/reader.h \
pgf/reader.c \
pgf/linearize.c \
pgf/printer.c
pgf/printer.c \
pgf/pgf.c \
pgf/pgf.h
bin_PROGRAMS = \
utils/pgf2yaml \

View File

@@ -1,8 +1,6 @@
#ifndef GU_SYSDEPS_H_
#define GU_SYSDEPS_H_
#include <config.h>
#if defined(__GNUC__) && !defined(__STRICT_ANSI__)
# define GU_GNUC
#endif

View File

@@ -1,6 +1,6 @@
#include <gu/ucs.h>
#include <gu/assert.h>
#include <config.h>
#include "config.h"
GU_DEFINE_TYPE(GuUCSExn, abstract, _);
@@ -131,5 +131,16 @@ gu_ucs_to_str(const GuUCS* ubuf, size_t len, char* cbuf, GuExn* err)
extern inline bool
gu_ucs_valid(GuUCS ucs);
extern inline GuUCS
gu_char_ucs(char c);
GuUCS
gu_char_ucs(char c)
{
gu_require(gu_char_is_valid(c));
#ifdef CHAR_ASCII
GuUCS u = (GuUCS) c;
#else
extern const uint8_t gu_ucs_ascii_reverse_[CHAR_MAX];
GuUCS u = gu_ucs_ascii_reverse_[(unsigned char) c];
#endif
gu_ensure(u < 0x80);
return u;
}

View File

@@ -5,7 +5,6 @@
#include <gu/exn.h>
#include <gu/assert.h>
#if defined(__STDC_ISO_10646__) && WCHAR_MAX >= 0x10FFFF
#include <wchar.h>
#define GU_UCS_WCHAR
@@ -25,19 +24,8 @@ gu_ucs_valid(GuUCS ucs)
return ucs >= 0 && ucs <= GU_UCS_MAX;
}
inline GuUCS
gu_char_ucs(char c)
{
gu_require(gu_char_is_valid(c));
#ifdef CHAR_ASCII
GuUCS u = (GuUCS) c;
#else
extern const uint8_t gu_ucs_ascii_reverse_[CHAR_MAX];
GuUCS u = gu_ucs_ascii_reverse_[(unsigned char) c];
#endif
gu_ensure(u < 0x80);
return u;
}
GuUCS
gu_char_ucs(char c);
char
gu_ucs_char(GuUCS uc, GuExn* err);

View File

@@ -1,6 +1,6 @@
#include <gu/assert.h>
#include <gu/utf8.h>
#include <config.h>
#include "config.h"
GuUCS
gu_utf8_decode(const uint8_t** src_inout)
@@ -73,7 +73,6 @@ fail:
return 0;
}
size_t
gu_advance_utf8(GuUCS ucs, uint8_t* buf)
{
@@ -105,6 +104,19 @@ gu_in_utf8_char_(GuIn* in, GuExn* err)
return gu_ucs_char(gu_in_utf8(in, err), err);
}
char
gu_in_utf8_char(GuIn* in, GuExn* err)
{
#ifdef CHAR_ASCII
int i = gu_in_peek_u8(in);
if (i >= 0 && i < 0x80) {
gu_in_consume(in, 1);
return (char) i;
}
#endif
return gu_in_utf8_char_(in, err);
}
void
gu_out_utf8_long_(GuUCS ucs, GuOut* out, GuExn* err)
{
@@ -210,11 +222,17 @@ void gu_str_out_utf8_(const char* str, GuOut* out, GuExn* err)
#endif
extern inline void
gu_str_out_utf8(const char* str, GuOut* out, GuExn* err);
extern inline GuUCS
gu_in_utf8(GuIn* in, GuExn* err);
extern inline char
gu_in_utf8_char(GuIn* in, GuExn* err);
void
gu_str_out_utf8(const char* str, GuOut* out, GuExn* err)
{
#ifdef CHAR_ASCII
gu_out_bytes(out, (const uint8_t*) str, strlen(str), err);
#else
extern void
gu_str_out_utf8_(const char* str, GuOut* out, GuExn* err);
gu_str_out_utf8_(str, out, err);
#endif
}

View File

@@ -18,19 +18,8 @@ gu_in_utf8(GuIn* in, GuExn* err)
}
inline char
gu_in_utf8_char(GuIn* in, GuExn* err)
{
#ifdef CHAR_ASCII
int i = gu_in_peek_u8(in);
if (i >= 0 && i < 0x80) {
gu_in_consume(in, 1);
return (char) i;
}
#endif
extern char gu_in_utf8_char_(GuIn* in, GuExn* err);
return gu_in_utf8_char_(in, err);
}
char
gu_in_utf8_char(GuIn* in, GuExn* err);
void
gu_out_utf8_long_(GuUCS ucs, GuOut* out, GuExn* err);
@@ -52,16 +41,7 @@ gu_utf32_out_utf8(const GuUCS* src, size_t len, GuOut* out, GuExn* err);
GuUCS
gu_utf8_decode(const uint8_t** utf8);
inline void
gu_str_out_utf8(const char* str, GuOut* out, GuExn* err)
{
#ifdef CHAR_ASCII
gu_out_bytes(out, (const uint8_t*) str, strlen(str), err);
#else
extern void
gu_str_out_utf8_(const char* str, GuOut* out, GuExn* err);
gu_str_out_utf8_(str, out, err);
#endif
}
void
gu_str_out_utf8(const char* str, GuOut* out, GuExn* err);
#endif // GU_UTF8_H_

View File

@@ -217,7 +217,7 @@ GU_DEFINE_TYPE(
&gu_null_struct))));
GU_DEFINE_TYPE(
PgfPrintNames, PgfCIdMap, gu_type(GuString), NULL);
PgfPrintNames, PgfCIdMap, gu_type(GuString), &gu_empty_string);
GU_DEFINE_TYPE(
PgfConcr, struct,

169
src/runtime/c/pgf/pgf.c Normal file
View File

@@ -0,0 +1,169 @@
#include <pgf/pgf.h>
#include <pgf/data.h>
#include <pgf/expr.h>
#include <pgf/reader.h>
#include <gu/file.h>
#include <gu/string.h>
#include <stdio.h>
#include <math.h>
GU_DEFINE_TYPE(PgfExn, abstract, _);
PgfPGF*
pgf_read(const char* fpath,
GuPool* pool, GuExn* err)
{
FILE* infile = fopen(fpath, "r");
if (infile == NULL) {
gu_raise_errno(err);
return NULL;
}
GuPool* tmp_pool = gu_new_pool();
// Create an input stream from the input file
GuIn* in = gu_file_in(infile, tmp_pool);
PgfReader* rdr = pgf_new_reader(in, pool, tmp_pool, err);
PgfPGF* pgf = pgf_read_new(rdr, gu_type(PgfPGF), pool, NULL);
gu_pool_free(tmp_pool);
gu_return_on_exn(err, NULL);
return pgf;
}
void
pgf_load_meta_child_probs(PgfPGF* pgf, const char* fpath,
GuPool* pool, GuExn* err)
{
FILE *fp = fopen(fpath, "r");
if (!fp) {
gu_raise_errno(err);
return;
}
GuPool* tmp_pool = gu_new_pool();
for (;;) {
char cat1_s[21];
char cat2_s[21];
prob_t prob;
if (fscanf(fp, "%20s\t%20s\t%f", cat1_s, cat2_s, &prob) < 3)
break;
prob = - log(prob);
GuString cat1 = gu_str_string(cat1_s, tmp_pool);
PgfCat* abscat1 =
gu_map_get(pgf->abstract.cats, &cat1, PgfCat*);
if (abscat1 == NULL) {
gu_raise(err, PgfExn);
goto close;
}
if (strcmp(cat2_s, "*") == 0) {
abscat1->meta_prob = prob;
} else if (strcmp(cat2_s, "_") == 0) {
abscat1->meta_token_prob = prob;
} else {
GuString cat2 = gu_str_string(cat2_s, tmp_pool);
PgfCat* abscat2 = gu_map_get(pgf->abstract.cats, &cat2, PgfCat*);
if (abscat2 == NULL) {
gu_raise(err, PgfExn);
goto close;
}
if (abscat1->meta_child_probs == NULL) {
abscat1->meta_child_probs =
gu_map_type_new(PgfMetaChildMap, pool);
}
gu_map_put(abscat1->meta_child_probs, abscat2, prob_t, prob);
}
}
close:
gu_pool_free(tmp_pool);
fclose(fp);
}
GuString
pgf_abstract_name(PgfPGF* pgf)
{
return pgf->absname;
}
void
pgf_iter_languages(PgfPGF* pgf, GuMapItor* fn, GuExn* err)
{
gu_map_iter(pgf->concretes, fn, err);
}
PgfConcr*
pgf_get_language(PgfPGF* pgf, PgfCId lang)
{
return gu_map_get(pgf->concretes, &lang, PgfConcr*);
}
void
pgf_iter_categories(PgfPGF* pgf, GuMapItor* fn, GuExn* err)
{
gu_map_iter(pgf->abstract.cats, fn, err);
}
PgfCId
pgf_start_cat(PgfPGF* pgf, GuPool* pool)
{
GuPool* tmp_pool = gu_local_pool();
GuString s = gu_str_string("startcat", tmp_pool);
PgfLiteral lit =
gu_map_get(pgf->abstract.aflags, &s, PgfLiteral);
if (gu_variant_is_null(lit))
return gu_str_string("S", pool);
GuVariantInfo i = gu_variant_open(lit);
switch (i.tag) {
case PGF_LITERAL_STR: {
PgfLiteralStr *lstr = (PgfLiteralStr *) i.data;
return lstr->val;
}
}
return gu_str_string("S", pool);
}
void
pgf_iter_functions(PgfPGF* pgf, GuMapItor* fn, GuExn* err)
{
gu_map_iter(pgf->abstract.funs, fn, err);
}
void
pgf_iter_functions_by_cat(PgfPGF* pgf, PgfCId catname,
GuMapItor* fn, GuExn* err)
{
PgfCat* abscat =
gu_map_get(pgf->abstract.cats, &catname, PgfCat*);
if (abscat == NULL) {
gu_raise(err, PgfExn);
return;
}
for (size_t i = 0; i < abscat->n_functions; i++) {
fn->fn(fn, &abscat->functions[i].fun, NULL, err);
if (!gu_ok(err))
return;
}
}
GuString
pgf_print_name(PgfConcr* concr, PgfCId id)
{
PgfCId name =
gu_map_get(concr->printnames, &id, PgfCId);
if (gu_string_eq(name, gu_empty_string))
name = id;
return name;
}

View File

@@ -1,5 +1,5 @@
/*
* Copyright 2010 University of Helsinki.
* Copyright 2010 University of Gothenburg.
*
* This file is part of libpgf.
*
@@ -27,7 +27,7 @@
#include <gu/exn.h>
#include <gu/mem.h>
#include <gu/in.h>
#include <gu/map.h>
#include <gu/string.h>
@@ -35,20 +35,25 @@ typedef GuString PgfCId;
extern GU_DECLARE_TYPE(PgfCId, typedef);
extern GU_DECLARE_TYPE(PgfExn, abstract);
/// A single lexical token
typedef GuString PgfToken;
typedef GuString PgfToken;
/// @name PGF Grammar objects
/// @{
typedef struct PgfPGF PgfPGF;
typedef struct PgfConcr PgfConcr;
/**< A representation of a PGF grammar.
*/
PgfPGF*
pgf_read(GuIn* in, GuPool* pool, GuExn* err);
pgf_read(const char* fpath,
GuPool* pool, GuExn* err);
/**< Read a grammar from a PGF file.
*
@@ -69,10 +74,34 @@ pgf_read(GuIn* in, GuPool* pool, GuExn* err);
*/
bool
pgf_load_meta_child_probs(PgfPGF*, const char* fpath, GuPool* pool);
void
pgf_load_meta_child_probs(PgfPGF*, const char* fpath,
GuPool* pool, GuExn* err);
typedef struct PgfConcr PgfConcr;
GuString
pgf_abstract_name(PgfPGF*);
void
pgf_iter_languages(PgfPGF*, GuMapItor*, GuExn* err);
PgfConcr*
pgf_get_language(PgfPGF*, PgfCId lang);
void
pgf_iter_categories(PgfPGF* pgf, GuMapItor* fn, GuExn* err);
PgfCId
pgf_start_cat(PgfPGF* pgf, GuPool* pool);
void
pgf_iter_functions(PgfPGF* pgf, GuMapItor* fn, GuExn* err);
void
pgf_iter_functions_by_cat(PgfPGF* pgf, PgfCId catname,
GuMapItor* fn, GuExn* err);
GuString
pgf_print_name(PgfConcr*, PgfCId id);
#include <gu/type.h>
extern GU_DECLARE_TYPE(PgfPGF, struct);

View File

@@ -20,6 +20,7 @@
#include "data.h"
#include "expr.h"
#include "literals.h"
#include "reader.h"
#include <gu/defs.h>
#include <gu/map.h>
#include <gu/seq.h>
@@ -40,8 +41,6 @@
// PgfReader
//
typedef struct PgfReader PgfReader;
struct PgfReader {
GuIn* in;
GuExn* err;
@@ -132,7 +131,7 @@ struct PgfReadNewFn {
size_t* size_out);
};
static void*
void*
pgf_read_new(PgfReader* rdr, GuType* type, GuPool* pool, size_t* size_out)
{
size_t size = 0;
@@ -884,7 +883,7 @@ pgf_read_new_table = GU_TYPETABLE(
PGF_READ_NEW(PgfConcr)
);
static PgfReader*
PgfReader*
pgf_new_reader(GuIn* in, GuPool* opool, GuPool* tmp_pool, GuExn* err)
{
PgfReader* rdr = gu_new(PgfReader, tmp_pool);
@@ -900,65 +899,3 @@ pgf_new_reader(GuIn* in, GuPool* opool, GuPool* tmp_pool, GuExn* err)
rdr->read_new_map = gu_new_type_map(&pgf_read_new_table, tmp_pool);
return rdr;
}
PgfPGF*
pgf_read(GuIn* in, GuPool* pool, GuExn* err)
{
GuPool* tmp_pool = gu_new_pool();
PgfReader* rdr = pgf_new_reader(in, pool, tmp_pool, err);
PgfPGF* pgf = pgf_read_new(rdr, gu_type(PgfPGF), pool, NULL);
gu_pool_free(tmp_pool);
gu_return_on_exn(err, NULL);
return pgf;
}
bool
pgf_load_meta_child_probs(PgfPGF* pgf, const char* fpath, GuPool* pool)
{
FILE *fp = fopen(fpath, "r");
if (!fp)
return false;
GuPool* tmp_pool = gu_new_pool();
for (;;) {
char cat1_s[21];
char cat2_s[21];
prob_t prob;
if (fscanf(fp, "%20s\t%20s\t%f", cat1_s, cat2_s, &prob) < 3)
break;
prob = - log(prob);
GuString cat1 = gu_str_string(cat1_s, tmp_pool);
PgfCat* abscat1 =
gu_map_get(pgf->abstract.cats, &cat1, PgfCat*);
if (abscat1 == NULL)
return false;
if (strcmp(cat2_s, "*") == 0) {
abscat1->meta_prob = prob;
} else if (strcmp(cat2_s, "_") == 0) {
abscat1->meta_token_prob = prob;
} else {
GuString cat2 = gu_str_string(cat2_s, tmp_pool);
PgfCat* abscat2 = gu_map_get(pgf->abstract.cats, &cat2, PgfCat*);
if (abscat2 == NULL)
return false;
if (abscat1->meta_child_probs == NULL) {
abscat1->meta_child_probs =
gu_map_type_new(PgfMetaChildMap, pool);
}
gu_map_put(abscat1->meta_child_probs, abscat2, prob_t, prob);
}
}
gu_pool_free(tmp_pool);
fclose(fp);
return true;
}

View File

@@ -0,0 +1,35 @@
/*
* Copyright 2012 University of Gothenburg.
*
* This file is part of libpgf.
*
* Libpgf is free software: you can redistribute it and/or modify it under
* the terms of the GNU Lesser General Public License as published by the
* Free Software Foundation, either version 3 of the License, or (at your
* option) any later version.
*
* Libpgf is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
* License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with libpgf. If not, see <http://www.gnu.org/licenses/>.
*/
#ifndef READER_H_
#define READER_H_
#include <gu/exn.h>
#include <gu/mem.h>
#include <gu/in.h>
typedef struct PgfReader PgfReader;
PgfReader*
pgf_new_reader(GuIn* in, GuPool* opool, GuPool* tmp_pool, GuExn* err);
void*
pgf_read_new(PgfReader* rdr, GuType* type, GuPool* pool, size_t* size_out);
#endif // READER_H_

View File

@@ -8,7 +8,6 @@
#include <gu/enum.h>
#include <gu/file.h>
#include <pgf/pgf.h>
#include <pgf/data.h>
#include <pgf/parser.h>
#include <pgf/lexer.h>
#include <pgf/literals.h>
@@ -39,38 +38,28 @@ int main(int argc, char* argv[]) {
GuString from_lang = gu_str_string(argv[3], pool);
FILE* infile = fopen(filename, "r");
if (infile == NULL) {
fprintf(stderr, "couldn't open %s\n", filename);
status = EXIT_FAILURE;
goto fail;
}
// Create an input stream from the input file
GuIn* in = gu_file_in(infile, pool);
// Create an exception frame that catches all errors.
GuExn* err = gu_new_exn(NULL, gu_kind(type), pool);
// Read the PGF grammar.
PgfPGF* pgf = pgf_read(in, pool, err);
PgfPGF* pgf = pgf_read(filename, pool, err);
// If an error occured, it shows in the exception frame
if (!gu_ok(err)) {
fprintf(stderr, "Reading PGF failed\n");
status = EXIT_FAILURE;
goto fail_read;
goto fail;
}
if (!pgf_load_meta_child_probs(pgf, "../../../treebanks/PennTreebank/ParseEngAbs3.probs", pool)) {
pgf_load_meta_child_probs(pgf, "../../../treebanks/PennTreebank/ParseEngAbs3.probs", pool, err);
if (!gu_ok(err)) {
fprintf(stderr, "Loading meta child probs failed\n");
status = EXIT_FAILURE;
goto fail_read;
goto fail;
}
// Look up the source and destination concrete categories
PgfConcr* from_concr =
gu_map_get(pgf->concretes, &from_lang, PgfConcr*);
PgfConcr* from_concr = pgf_get_language(pgf, from_lang);
if (!from_concr) {
fprintf(stderr, "Unknown language\n");
status = EXIT_FAILURE;
@@ -152,8 +141,6 @@ int main(int argc, char* argv[]) {
ppool = NULL;
}
fail_concr:
fail_read:
fclose(infile);
fail:
gu_pool_free(pool);
return status;

View File

@@ -5,13 +5,24 @@
#include <gu/file.h>
#include <gu/utf8.h>
#include <locale.h>
#include <stdlib.h>
GU_DECLARE_TYPE(PgfAbstr, struct);
int main(void) {
int main(int argc, char* argv[]) {
// Set the character locale, so we can produce proper output.
setlocale(LC_CTYPE, "");
if (argc != 1) {
fprintf(stderr, "usage: %s pgf\n", argv[0]);
return EXIT_FAILURE;
}
char* filename = argv[1];
GuPool* pool = gu_new_pool();
GuExn* err = gu_exn(NULL, type, pool);
GuIn* in = gu_file_in(stdin, pool);
PgfPGF* pgf = pgf_read(in, pool, err);
PgfPGF* pgf = pgf_read(filename, pool, err);
int status = 0;
if (!gu_ok(err)) {
fprintf(stderr, "Reading PGF failed\n");

View File

@@ -5,7 +5,6 @@
#include <gu/enum.h>
#include <gu/file.h>
#include <pgf/pgf.h>
#include <pgf/data.h>
#include <pgf/parser.h>
#include <pgf/lexer.h>
#include <pgf/literals.h>
@@ -53,7 +52,7 @@ int main(int argc, char* argv[]) {
GuPool* pool = gu_new_pool();
int status = EXIT_SUCCESS;
if (argc != 5) {
fprintf(stderr, "usage: %s pgf [.]cat from_lang to_lang\n", argv[0]);
fprintf(stderr, "usage: %s pgf cat from_lang to_lang\n", argv[0]);
status = EXIT_FAILURE;
goto fail;
}
@@ -64,40 +63,29 @@ int main(int argc, char* argv[]) {
GuString from_lang = gu_str_string(argv[3], pool);
GuString to_lang = gu_str_string(argv[4], pool);
FILE* infile = fopen(filename, "r");
if (infile == NULL) {
fprintf(stderr, "couldn't open %s\n", filename);
status = EXIT_FAILURE;
goto fail;
}
// Create an input stream from the input file
GuIn* in = gu_file_in(infile, pool);
// Create an exception frame that catches all errors.
GuExn* err = gu_new_exn(NULL, gu_kind(type), pool);
// Read the PGF grammar.
PgfPGF* pgf = pgf_read(in, pool, err);
PgfPGF* pgf = pgf_read(filename, pool, err);
// If an error occured, it shows in the exception frame
if (!gu_ok(err)) {
fprintf(stderr, "Reading PGF failed\n");
status = EXIT_FAILURE;
goto fail_read;
goto fail;
}
if (!pgf_load_meta_child_probs(pgf, "../../../treebanks/PennTreebank/ParseEngAbs3.probs", pool)) {
pgf_load_meta_child_probs(pgf, "../../../treebanks/PennTreebank/ParseEngAbs3.probs", pool, err);
if (!gu_ok(err)) {
fprintf(stderr, "Loading meta child probs failed\n");
status = EXIT_FAILURE;
goto fail_read;
goto fail;
}
// Look up the source and destination concrete categories
PgfConcr* from_concr =
gu_map_get(pgf->concretes, &from_lang, PgfConcr*);
PgfConcr* to_concr =
gu_map_get(pgf->concretes, &to_lang, PgfConcr*);
PgfConcr* from_concr = pgf_get_language(pgf, from_lang);
PgfConcr* to_concr = pgf_get_language(pgf, to_lang);
if (!from_concr || !to_concr) {
fprintf(stderr, "Unknown language\n");
status = EXIT_FAILURE;
@@ -229,8 +217,6 @@ int main(int argc, char* argv[]) {
result = NULL;
}
fail_concr:
fail_read:
fclose(infile);
fail:
gu_pool_free(pool);
return status;

View File

@@ -4,11 +4,16 @@
#include <gu/file.h>
#include <gu/utf8.h>
int main(void) {
int main(int argc, char* argv[]) {
if (argc != 1) {
fprintf(stderr, "usage: %s pgf\n", argv[0]);
return 1;
}
char* filename = argv[1];
GuPool* pool = gu_new_pool();
GuExn* err = gu_exn(NULL, type, pool);
GuIn* in = gu_file_in(stdin, pool);
PgfPGF* pgf = pgf_read(in, pool, err);
PgfPGF* pgf = pgf_read(filename, pool, err);
int status = 0;
if (!gu_ok(err)) {
fprintf(stderr, "Reading PGF failed\n");