forked from GitHub/gf-core
started an official API to the C runtime
This commit is contained in:
@@ -42,7 +42,6 @@ guinclude_HEADERS = \
|
|||||||
|
|
||||||
pgfincludedir=$(includedir)/pgf
|
pgfincludedir=$(includedir)/pgf
|
||||||
pgfinclude_HEADERS = \
|
pgfinclude_HEADERS = \
|
||||||
pgf/data.h \
|
|
||||||
pgf/expr.h \
|
pgf/expr.h \
|
||||||
pgf/linearize.h \
|
pgf/linearize.h \
|
||||||
pgf/parser.h \
|
pgf/parser.h \
|
||||||
@@ -109,9 +108,12 @@ libpgf_la_SOURCES = \
|
|||||||
pgf/lexer.h \
|
pgf/lexer.h \
|
||||||
pgf/literals.c \
|
pgf/literals.c \
|
||||||
pgf/literals.h \
|
pgf/literals.h \
|
||||||
|
pgf/reader.h \
|
||||||
pgf/reader.c \
|
pgf/reader.c \
|
||||||
pgf/linearize.c \
|
pgf/linearize.c \
|
||||||
pgf/printer.c
|
pgf/printer.c \
|
||||||
|
pgf/pgf.c \
|
||||||
|
pgf/pgf.h
|
||||||
|
|
||||||
bin_PROGRAMS = \
|
bin_PROGRAMS = \
|
||||||
utils/pgf2yaml \
|
utils/pgf2yaml \
|
||||||
|
|||||||
@@ -1,8 +1,6 @@
|
|||||||
#ifndef GU_SYSDEPS_H_
|
#ifndef GU_SYSDEPS_H_
|
||||||
#define GU_SYSDEPS_H_
|
#define GU_SYSDEPS_H_
|
||||||
|
|
||||||
#include <config.h>
|
|
||||||
|
|
||||||
#if defined(__GNUC__) && !defined(__STRICT_ANSI__)
|
#if defined(__GNUC__) && !defined(__STRICT_ANSI__)
|
||||||
# define GU_GNUC
|
# define GU_GNUC
|
||||||
#endif
|
#endif
|
||||||
|
|||||||
@@ -1,6 +1,6 @@
|
|||||||
#include <gu/ucs.h>
|
#include <gu/ucs.h>
|
||||||
#include <gu/assert.h>
|
#include <gu/assert.h>
|
||||||
#include <config.h>
|
#include "config.h"
|
||||||
|
|
||||||
GU_DEFINE_TYPE(GuUCSExn, abstract, _);
|
GU_DEFINE_TYPE(GuUCSExn, abstract, _);
|
||||||
|
|
||||||
@@ -131,5 +131,16 @@ gu_ucs_to_str(const GuUCS* ubuf, size_t len, char* cbuf, GuExn* err)
|
|||||||
extern inline bool
|
extern inline bool
|
||||||
gu_ucs_valid(GuUCS ucs);
|
gu_ucs_valid(GuUCS ucs);
|
||||||
|
|
||||||
extern inline GuUCS
|
GuUCS
|
||||||
gu_char_ucs(char c);
|
gu_char_ucs(char c)
|
||||||
|
{
|
||||||
|
gu_require(gu_char_is_valid(c));
|
||||||
|
#ifdef CHAR_ASCII
|
||||||
|
GuUCS u = (GuUCS) c;
|
||||||
|
#else
|
||||||
|
extern const uint8_t gu_ucs_ascii_reverse_[CHAR_MAX];
|
||||||
|
GuUCS u = gu_ucs_ascii_reverse_[(unsigned char) c];
|
||||||
|
#endif
|
||||||
|
gu_ensure(u < 0x80);
|
||||||
|
return u;
|
||||||
|
}
|
||||||
|
|||||||
@@ -5,7 +5,6 @@
|
|||||||
#include <gu/exn.h>
|
#include <gu/exn.h>
|
||||||
#include <gu/assert.h>
|
#include <gu/assert.h>
|
||||||
|
|
||||||
|
|
||||||
#if defined(__STDC_ISO_10646__) && WCHAR_MAX >= 0x10FFFF
|
#if defined(__STDC_ISO_10646__) && WCHAR_MAX >= 0x10FFFF
|
||||||
#include <wchar.h>
|
#include <wchar.h>
|
||||||
#define GU_UCS_WCHAR
|
#define GU_UCS_WCHAR
|
||||||
@@ -25,19 +24,8 @@ gu_ucs_valid(GuUCS ucs)
|
|||||||
return ucs >= 0 && ucs <= GU_UCS_MAX;
|
return ucs >= 0 && ucs <= GU_UCS_MAX;
|
||||||
}
|
}
|
||||||
|
|
||||||
inline GuUCS
|
GuUCS
|
||||||
gu_char_ucs(char c)
|
gu_char_ucs(char c);
|
||||||
{
|
|
||||||
gu_require(gu_char_is_valid(c));
|
|
||||||
#ifdef CHAR_ASCII
|
|
||||||
GuUCS u = (GuUCS) c;
|
|
||||||
#else
|
|
||||||
extern const uint8_t gu_ucs_ascii_reverse_[CHAR_MAX];
|
|
||||||
GuUCS u = gu_ucs_ascii_reverse_[(unsigned char) c];
|
|
||||||
#endif
|
|
||||||
gu_ensure(u < 0x80);
|
|
||||||
return u;
|
|
||||||
}
|
|
||||||
|
|
||||||
char
|
char
|
||||||
gu_ucs_char(GuUCS uc, GuExn* err);
|
gu_ucs_char(GuUCS uc, GuExn* err);
|
||||||
|
|||||||
@@ -1,6 +1,6 @@
|
|||||||
#include <gu/assert.h>
|
#include <gu/assert.h>
|
||||||
#include <gu/utf8.h>
|
#include <gu/utf8.h>
|
||||||
#include <config.h>
|
#include "config.h"
|
||||||
|
|
||||||
GuUCS
|
GuUCS
|
||||||
gu_utf8_decode(const uint8_t** src_inout)
|
gu_utf8_decode(const uint8_t** src_inout)
|
||||||
@@ -73,7 +73,6 @@ fail:
|
|||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
size_t
|
size_t
|
||||||
gu_advance_utf8(GuUCS ucs, uint8_t* buf)
|
gu_advance_utf8(GuUCS ucs, uint8_t* buf)
|
||||||
{
|
{
|
||||||
@@ -105,6 +104,19 @@ gu_in_utf8_char_(GuIn* in, GuExn* err)
|
|||||||
return gu_ucs_char(gu_in_utf8(in, err), err);
|
return gu_ucs_char(gu_in_utf8(in, err), err);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
char
|
||||||
|
gu_in_utf8_char(GuIn* in, GuExn* err)
|
||||||
|
{
|
||||||
|
#ifdef CHAR_ASCII
|
||||||
|
int i = gu_in_peek_u8(in);
|
||||||
|
if (i >= 0 && i < 0x80) {
|
||||||
|
gu_in_consume(in, 1);
|
||||||
|
return (char) i;
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
return gu_in_utf8_char_(in, err);
|
||||||
|
}
|
||||||
|
|
||||||
void
|
void
|
||||||
gu_out_utf8_long_(GuUCS ucs, GuOut* out, GuExn* err)
|
gu_out_utf8_long_(GuUCS ucs, GuOut* out, GuExn* err)
|
||||||
{
|
{
|
||||||
@@ -210,11 +222,17 @@ void gu_str_out_utf8_(const char* str, GuOut* out, GuExn* err)
|
|||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
extern inline void
|
|
||||||
gu_str_out_utf8(const char* str, GuOut* out, GuExn* err);
|
|
||||||
|
|
||||||
extern inline GuUCS
|
extern inline GuUCS
|
||||||
gu_in_utf8(GuIn* in, GuExn* err);
|
gu_in_utf8(GuIn* in, GuExn* err);
|
||||||
|
|
||||||
extern inline char
|
void
|
||||||
gu_in_utf8_char(GuIn* in, GuExn* err);
|
gu_str_out_utf8(const char* str, GuOut* out, GuExn* err)
|
||||||
|
{
|
||||||
|
#ifdef CHAR_ASCII
|
||||||
|
gu_out_bytes(out, (const uint8_t*) str, strlen(str), err);
|
||||||
|
#else
|
||||||
|
extern void
|
||||||
|
gu_str_out_utf8_(const char* str, GuOut* out, GuExn* err);
|
||||||
|
gu_str_out_utf8_(str, out, err);
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
|||||||
@@ -18,19 +18,8 @@ gu_in_utf8(GuIn* in, GuExn* err)
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
inline char
|
char
|
||||||
gu_in_utf8_char(GuIn* in, GuExn* err)
|
gu_in_utf8_char(GuIn* in, GuExn* err);
|
||||||
{
|
|
||||||
#ifdef CHAR_ASCII
|
|
||||||
int i = gu_in_peek_u8(in);
|
|
||||||
if (i >= 0 && i < 0x80) {
|
|
||||||
gu_in_consume(in, 1);
|
|
||||||
return (char) i;
|
|
||||||
}
|
|
||||||
#endif
|
|
||||||
extern char gu_in_utf8_char_(GuIn* in, GuExn* err);
|
|
||||||
return gu_in_utf8_char_(in, err);
|
|
||||||
}
|
|
||||||
|
|
||||||
void
|
void
|
||||||
gu_out_utf8_long_(GuUCS ucs, GuOut* out, GuExn* err);
|
gu_out_utf8_long_(GuUCS ucs, GuOut* out, GuExn* err);
|
||||||
@@ -52,16 +41,7 @@ gu_utf32_out_utf8(const GuUCS* src, size_t len, GuOut* out, GuExn* err);
|
|||||||
GuUCS
|
GuUCS
|
||||||
gu_utf8_decode(const uint8_t** utf8);
|
gu_utf8_decode(const uint8_t** utf8);
|
||||||
|
|
||||||
inline void
|
void
|
||||||
gu_str_out_utf8(const char* str, GuOut* out, GuExn* err)
|
gu_str_out_utf8(const char* str, GuOut* out, GuExn* err);
|
||||||
{
|
|
||||||
#ifdef CHAR_ASCII
|
|
||||||
gu_out_bytes(out, (const uint8_t*) str, strlen(str), err);
|
|
||||||
#else
|
|
||||||
extern void
|
|
||||||
gu_str_out_utf8_(const char* str, GuOut* out, GuExn* err);
|
|
||||||
gu_str_out_utf8_(str, out, err);
|
|
||||||
#endif
|
|
||||||
}
|
|
||||||
|
|
||||||
#endif // GU_UTF8_H_
|
#endif // GU_UTF8_H_
|
||||||
|
|||||||
@@ -217,7 +217,7 @@ GU_DEFINE_TYPE(
|
|||||||
&gu_null_struct))));
|
&gu_null_struct))));
|
||||||
|
|
||||||
GU_DEFINE_TYPE(
|
GU_DEFINE_TYPE(
|
||||||
PgfPrintNames, PgfCIdMap, gu_type(GuString), NULL);
|
PgfPrintNames, PgfCIdMap, gu_type(GuString), &gu_empty_string);
|
||||||
|
|
||||||
GU_DEFINE_TYPE(
|
GU_DEFINE_TYPE(
|
||||||
PgfConcr, struct,
|
PgfConcr, struct,
|
||||||
|
|||||||
169
src/runtime/c/pgf/pgf.c
Normal file
169
src/runtime/c/pgf/pgf.c
Normal file
@@ -0,0 +1,169 @@
|
|||||||
|
#include <pgf/pgf.h>
|
||||||
|
#include <pgf/data.h>
|
||||||
|
#include <pgf/expr.h>
|
||||||
|
#include <pgf/reader.h>
|
||||||
|
#include <gu/file.h>
|
||||||
|
#include <gu/string.h>
|
||||||
|
#include <stdio.h>
|
||||||
|
#include <math.h>
|
||||||
|
|
||||||
|
GU_DEFINE_TYPE(PgfExn, abstract, _);
|
||||||
|
|
||||||
|
PgfPGF*
|
||||||
|
pgf_read(const char* fpath,
|
||||||
|
GuPool* pool, GuExn* err)
|
||||||
|
{
|
||||||
|
FILE* infile = fopen(fpath, "r");
|
||||||
|
if (infile == NULL) {
|
||||||
|
gu_raise_errno(err);
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
|
||||||
|
GuPool* tmp_pool = gu_new_pool();
|
||||||
|
|
||||||
|
// Create an input stream from the input file
|
||||||
|
GuIn* in = gu_file_in(infile, tmp_pool);
|
||||||
|
|
||||||
|
PgfReader* rdr = pgf_new_reader(in, pool, tmp_pool, err);
|
||||||
|
PgfPGF* pgf = pgf_read_new(rdr, gu_type(PgfPGF), pool, NULL);
|
||||||
|
gu_pool_free(tmp_pool);
|
||||||
|
gu_return_on_exn(err, NULL);
|
||||||
|
return pgf;
|
||||||
|
}
|
||||||
|
|
||||||
|
void
|
||||||
|
pgf_load_meta_child_probs(PgfPGF* pgf, const char* fpath,
|
||||||
|
GuPool* pool, GuExn* err)
|
||||||
|
{
|
||||||
|
FILE *fp = fopen(fpath, "r");
|
||||||
|
if (!fp) {
|
||||||
|
gu_raise_errno(err);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
GuPool* tmp_pool = gu_new_pool();
|
||||||
|
|
||||||
|
for (;;) {
|
||||||
|
char cat1_s[21];
|
||||||
|
char cat2_s[21];
|
||||||
|
prob_t prob;
|
||||||
|
|
||||||
|
if (fscanf(fp, "%20s\t%20s\t%f", cat1_s, cat2_s, &prob) < 3)
|
||||||
|
break;
|
||||||
|
|
||||||
|
prob = - log(prob);
|
||||||
|
|
||||||
|
GuString cat1 = gu_str_string(cat1_s, tmp_pool);
|
||||||
|
PgfCat* abscat1 =
|
||||||
|
gu_map_get(pgf->abstract.cats, &cat1, PgfCat*);
|
||||||
|
if (abscat1 == NULL) {
|
||||||
|
gu_raise(err, PgfExn);
|
||||||
|
goto close;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (strcmp(cat2_s, "*") == 0) {
|
||||||
|
abscat1->meta_prob = prob;
|
||||||
|
} else if (strcmp(cat2_s, "_") == 0) {
|
||||||
|
abscat1->meta_token_prob = prob;
|
||||||
|
} else {
|
||||||
|
GuString cat2 = gu_str_string(cat2_s, tmp_pool);
|
||||||
|
PgfCat* abscat2 = gu_map_get(pgf->abstract.cats, &cat2, PgfCat*);
|
||||||
|
if (abscat2 == NULL) {
|
||||||
|
gu_raise(err, PgfExn);
|
||||||
|
goto close;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (abscat1->meta_child_probs == NULL) {
|
||||||
|
abscat1->meta_child_probs =
|
||||||
|
gu_map_type_new(PgfMetaChildMap, pool);
|
||||||
|
}
|
||||||
|
|
||||||
|
gu_map_put(abscat1->meta_child_probs, abscat2, prob_t, prob);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
close:
|
||||||
|
gu_pool_free(tmp_pool);
|
||||||
|
fclose(fp);
|
||||||
|
}
|
||||||
|
|
||||||
|
GuString
|
||||||
|
pgf_abstract_name(PgfPGF* pgf)
|
||||||
|
{
|
||||||
|
return pgf->absname;
|
||||||
|
}
|
||||||
|
|
||||||
|
void
|
||||||
|
pgf_iter_languages(PgfPGF* pgf, GuMapItor* fn, GuExn* err)
|
||||||
|
{
|
||||||
|
gu_map_iter(pgf->concretes, fn, err);
|
||||||
|
}
|
||||||
|
|
||||||
|
PgfConcr*
|
||||||
|
pgf_get_language(PgfPGF* pgf, PgfCId lang)
|
||||||
|
{
|
||||||
|
return gu_map_get(pgf->concretes, &lang, PgfConcr*);
|
||||||
|
}
|
||||||
|
|
||||||
|
void
|
||||||
|
pgf_iter_categories(PgfPGF* pgf, GuMapItor* fn, GuExn* err)
|
||||||
|
{
|
||||||
|
gu_map_iter(pgf->abstract.cats, fn, err);
|
||||||
|
}
|
||||||
|
|
||||||
|
PgfCId
|
||||||
|
pgf_start_cat(PgfPGF* pgf, GuPool* pool)
|
||||||
|
{
|
||||||
|
GuPool* tmp_pool = gu_local_pool();
|
||||||
|
|
||||||
|
GuString s = gu_str_string("startcat", tmp_pool);
|
||||||
|
PgfLiteral lit =
|
||||||
|
gu_map_get(pgf->abstract.aflags, &s, PgfLiteral);
|
||||||
|
|
||||||
|
if (gu_variant_is_null(lit))
|
||||||
|
return gu_str_string("S", pool);
|
||||||
|
|
||||||
|
GuVariantInfo i = gu_variant_open(lit);
|
||||||
|
switch (i.tag) {
|
||||||
|
case PGF_LITERAL_STR: {
|
||||||
|
PgfLiteralStr *lstr = (PgfLiteralStr *) i.data;
|
||||||
|
return lstr->val;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return gu_str_string("S", pool);
|
||||||
|
}
|
||||||
|
|
||||||
|
void
|
||||||
|
pgf_iter_functions(PgfPGF* pgf, GuMapItor* fn, GuExn* err)
|
||||||
|
{
|
||||||
|
gu_map_iter(pgf->abstract.funs, fn, err);
|
||||||
|
}
|
||||||
|
|
||||||
|
void
|
||||||
|
pgf_iter_functions_by_cat(PgfPGF* pgf, PgfCId catname,
|
||||||
|
GuMapItor* fn, GuExn* err)
|
||||||
|
{
|
||||||
|
PgfCat* abscat =
|
||||||
|
gu_map_get(pgf->abstract.cats, &catname, PgfCat*);
|
||||||
|
if (abscat == NULL) {
|
||||||
|
gu_raise(err, PgfExn);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
for (size_t i = 0; i < abscat->n_functions; i++) {
|
||||||
|
fn->fn(fn, &abscat->functions[i].fun, NULL, err);
|
||||||
|
if (!gu_ok(err))
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
GuString
|
||||||
|
pgf_print_name(PgfConcr* concr, PgfCId id)
|
||||||
|
{
|
||||||
|
PgfCId name =
|
||||||
|
gu_map_get(concr->printnames, &id, PgfCId);
|
||||||
|
if (gu_string_eq(name, gu_empty_string))
|
||||||
|
name = id;
|
||||||
|
return name;
|
||||||
|
}
|
||||||
@@ -1,5 +1,5 @@
|
|||||||
/*
|
/*
|
||||||
* Copyright 2010 University of Helsinki.
|
* Copyright 2010 University of Gothenburg.
|
||||||
*
|
*
|
||||||
* This file is part of libpgf.
|
* This file is part of libpgf.
|
||||||
*
|
*
|
||||||
@@ -27,7 +27,7 @@
|
|||||||
|
|
||||||
#include <gu/exn.h>
|
#include <gu/exn.h>
|
||||||
#include <gu/mem.h>
|
#include <gu/mem.h>
|
||||||
#include <gu/in.h>
|
#include <gu/map.h>
|
||||||
#include <gu/string.h>
|
#include <gu/string.h>
|
||||||
|
|
||||||
|
|
||||||
@@ -35,20 +35,25 @@ typedef GuString PgfCId;
|
|||||||
extern GU_DECLARE_TYPE(PgfCId, typedef);
|
extern GU_DECLARE_TYPE(PgfCId, typedef);
|
||||||
|
|
||||||
|
|
||||||
|
extern GU_DECLARE_TYPE(PgfExn, abstract);
|
||||||
|
|
||||||
|
|
||||||
/// A single lexical token
|
/// A single lexical token
|
||||||
typedef GuString PgfToken;
|
typedef GuString PgfToken;
|
||||||
|
|
||||||
/// @name PGF Grammar objects
|
/// @name PGF Grammar objects
|
||||||
/// @{
|
/// @{
|
||||||
|
|
||||||
typedef struct PgfPGF PgfPGF;
|
typedef struct PgfPGF PgfPGF;
|
||||||
|
typedef struct PgfConcr PgfConcr;
|
||||||
|
|
||||||
/**< A representation of a PGF grammar.
|
/**< A representation of a PGF grammar.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
|
|
||||||
PgfPGF*
|
PgfPGF*
|
||||||
pgf_read(GuIn* in, GuPool* pool, GuExn* err);
|
pgf_read(const char* fpath,
|
||||||
|
GuPool* pool, GuExn* err);
|
||||||
|
|
||||||
/**< Read a grammar from a PGF file.
|
/**< Read a grammar from a PGF file.
|
||||||
*
|
*
|
||||||
@@ -69,10 +74,34 @@ pgf_read(GuIn* in, GuPool* pool, GuExn* err);
|
|||||||
*/
|
*/
|
||||||
|
|
||||||
|
|
||||||
bool
|
void
|
||||||
pgf_load_meta_child_probs(PgfPGF*, const char* fpath, GuPool* pool);
|
pgf_load_meta_child_probs(PgfPGF*, const char* fpath,
|
||||||
|
GuPool* pool, GuExn* err);
|
||||||
|
|
||||||
typedef struct PgfConcr PgfConcr;
|
GuString
|
||||||
|
pgf_abstract_name(PgfPGF*);
|
||||||
|
|
||||||
|
void
|
||||||
|
pgf_iter_languages(PgfPGF*, GuMapItor*, GuExn* err);
|
||||||
|
|
||||||
|
PgfConcr*
|
||||||
|
pgf_get_language(PgfPGF*, PgfCId lang);
|
||||||
|
|
||||||
|
void
|
||||||
|
pgf_iter_categories(PgfPGF* pgf, GuMapItor* fn, GuExn* err);
|
||||||
|
|
||||||
|
PgfCId
|
||||||
|
pgf_start_cat(PgfPGF* pgf, GuPool* pool);
|
||||||
|
|
||||||
|
void
|
||||||
|
pgf_iter_functions(PgfPGF* pgf, GuMapItor* fn, GuExn* err);
|
||||||
|
|
||||||
|
void
|
||||||
|
pgf_iter_functions_by_cat(PgfPGF* pgf, PgfCId catname,
|
||||||
|
GuMapItor* fn, GuExn* err);
|
||||||
|
|
||||||
|
GuString
|
||||||
|
pgf_print_name(PgfConcr*, PgfCId id);
|
||||||
|
|
||||||
#include <gu/type.h>
|
#include <gu/type.h>
|
||||||
extern GU_DECLARE_TYPE(PgfPGF, struct);
|
extern GU_DECLARE_TYPE(PgfPGF, struct);
|
||||||
|
|||||||
@@ -20,6 +20,7 @@
|
|||||||
#include "data.h"
|
#include "data.h"
|
||||||
#include "expr.h"
|
#include "expr.h"
|
||||||
#include "literals.h"
|
#include "literals.h"
|
||||||
|
#include "reader.h"
|
||||||
#include <gu/defs.h>
|
#include <gu/defs.h>
|
||||||
#include <gu/map.h>
|
#include <gu/map.h>
|
||||||
#include <gu/seq.h>
|
#include <gu/seq.h>
|
||||||
@@ -40,8 +41,6 @@
|
|||||||
// PgfReader
|
// PgfReader
|
||||||
//
|
//
|
||||||
|
|
||||||
typedef struct PgfReader PgfReader;
|
|
||||||
|
|
||||||
struct PgfReader {
|
struct PgfReader {
|
||||||
GuIn* in;
|
GuIn* in;
|
||||||
GuExn* err;
|
GuExn* err;
|
||||||
@@ -132,7 +131,7 @@ struct PgfReadNewFn {
|
|||||||
size_t* size_out);
|
size_t* size_out);
|
||||||
};
|
};
|
||||||
|
|
||||||
static void*
|
void*
|
||||||
pgf_read_new(PgfReader* rdr, GuType* type, GuPool* pool, size_t* size_out)
|
pgf_read_new(PgfReader* rdr, GuType* type, GuPool* pool, size_t* size_out)
|
||||||
{
|
{
|
||||||
size_t size = 0;
|
size_t size = 0;
|
||||||
@@ -884,7 +883,7 @@ pgf_read_new_table = GU_TYPETABLE(
|
|||||||
PGF_READ_NEW(PgfConcr)
|
PGF_READ_NEW(PgfConcr)
|
||||||
);
|
);
|
||||||
|
|
||||||
static PgfReader*
|
PgfReader*
|
||||||
pgf_new_reader(GuIn* in, GuPool* opool, GuPool* tmp_pool, GuExn* err)
|
pgf_new_reader(GuIn* in, GuPool* opool, GuPool* tmp_pool, GuExn* err)
|
||||||
{
|
{
|
||||||
PgfReader* rdr = gu_new(PgfReader, tmp_pool);
|
PgfReader* rdr = gu_new(PgfReader, tmp_pool);
|
||||||
@@ -900,65 +899,3 @@ pgf_new_reader(GuIn* in, GuPool* opool, GuPool* tmp_pool, GuExn* err)
|
|||||||
rdr->read_new_map = gu_new_type_map(&pgf_read_new_table, tmp_pool);
|
rdr->read_new_map = gu_new_type_map(&pgf_read_new_table, tmp_pool);
|
||||||
return rdr;
|
return rdr;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
PgfPGF*
|
|
||||||
pgf_read(GuIn* in, GuPool* pool, GuExn* err)
|
|
||||||
{
|
|
||||||
GuPool* tmp_pool = gu_new_pool();
|
|
||||||
PgfReader* rdr = pgf_new_reader(in, pool, tmp_pool, err);
|
|
||||||
PgfPGF* pgf = pgf_read_new(rdr, gu_type(PgfPGF), pool, NULL);
|
|
||||||
gu_pool_free(tmp_pool);
|
|
||||||
gu_return_on_exn(err, NULL);
|
|
||||||
return pgf;
|
|
||||||
}
|
|
||||||
|
|
||||||
bool
|
|
||||||
pgf_load_meta_child_probs(PgfPGF* pgf, const char* fpath, GuPool* pool)
|
|
||||||
{
|
|
||||||
FILE *fp = fopen(fpath, "r");
|
|
||||||
if (!fp)
|
|
||||||
return false;
|
|
||||||
|
|
||||||
GuPool* tmp_pool = gu_new_pool();
|
|
||||||
|
|
||||||
for (;;) {
|
|
||||||
char cat1_s[21];
|
|
||||||
char cat2_s[21];
|
|
||||||
prob_t prob;
|
|
||||||
|
|
||||||
if (fscanf(fp, "%20s\t%20s\t%f", cat1_s, cat2_s, &prob) < 3)
|
|
||||||
break;
|
|
||||||
|
|
||||||
prob = - log(prob);
|
|
||||||
|
|
||||||
GuString cat1 = gu_str_string(cat1_s, tmp_pool);
|
|
||||||
PgfCat* abscat1 =
|
|
||||||
gu_map_get(pgf->abstract.cats, &cat1, PgfCat*);
|
|
||||||
if (abscat1 == NULL)
|
|
||||||
return false;
|
|
||||||
|
|
||||||
if (strcmp(cat2_s, "*") == 0) {
|
|
||||||
abscat1->meta_prob = prob;
|
|
||||||
} else if (strcmp(cat2_s, "_") == 0) {
|
|
||||||
abscat1->meta_token_prob = prob;
|
|
||||||
} else {
|
|
||||||
GuString cat2 = gu_str_string(cat2_s, tmp_pool);
|
|
||||||
PgfCat* abscat2 = gu_map_get(pgf->abstract.cats, &cat2, PgfCat*);
|
|
||||||
if (abscat2 == NULL)
|
|
||||||
return false;
|
|
||||||
|
|
||||||
if (abscat1->meta_child_probs == NULL) {
|
|
||||||
abscat1->meta_child_probs =
|
|
||||||
gu_map_type_new(PgfMetaChildMap, pool);
|
|
||||||
}
|
|
||||||
|
|
||||||
gu_map_put(abscat1->meta_child_probs, abscat2, prob_t, prob);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
gu_pool_free(tmp_pool);
|
|
||||||
|
|
||||||
fclose(fp);
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
|
|||||||
35
src/runtime/c/pgf/reader.h
Normal file
35
src/runtime/c/pgf/reader.h
Normal file
@@ -0,0 +1,35 @@
|
|||||||
|
/*
|
||||||
|
* Copyright 2012 University of Gothenburg.
|
||||||
|
*
|
||||||
|
* This file is part of libpgf.
|
||||||
|
*
|
||||||
|
* Libpgf is free software: you can redistribute it and/or modify it under
|
||||||
|
* the terms of the GNU Lesser General Public License as published by the
|
||||||
|
* Free Software Foundation, either version 3 of the License, or (at your
|
||||||
|
* option) any later version.
|
||||||
|
*
|
||||||
|
* Libpgf is distributed in the hope that it will be useful, but WITHOUT
|
||||||
|
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
||||||
|
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
|
||||||
|
* License for more details.
|
||||||
|
*
|
||||||
|
* You should have received a copy of the GNU Lesser General Public
|
||||||
|
* License along with libpgf. If not, see <http://www.gnu.org/licenses/>.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#ifndef READER_H_
|
||||||
|
#define READER_H_
|
||||||
|
|
||||||
|
#include <gu/exn.h>
|
||||||
|
#include <gu/mem.h>
|
||||||
|
#include <gu/in.h>
|
||||||
|
|
||||||
|
typedef struct PgfReader PgfReader;
|
||||||
|
|
||||||
|
PgfReader*
|
||||||
|
pgf_new_reader(GuIn* in, GuPool* opool, GuPool* tmp_pool, GuExn* err);
|
||||||
|
|
||||||
|
void*
|
||||||
|
pgf_read_new(PgfReader* rdr, GuType* type, GuPool* pool, size_t* size_out);
|
||||||
|
|
||||||
|
#endif // READER_H_
|
||||||
@@ -8,7 +8,6 @@
|
|||||||
#include <gu/enum.h>
|
#include <gu/enum.h>
|
||||||
#include <gu/file.h>
|
#include <gu/file.h>
|
||||||
#include <pgf/pgf.h>
|
#include <pgf/pgf.h>
|
||||||
#include <pgf/data.h>
|
|
||||||
#include <pgf/parser.h>
|
#include <pgf/parser.h>
|
||||||
#include <pgf/lexer.h>
|
#include <pgf/lexer.h>
|
||||||
#include <pgf/literals.h>
|
#include <pgf/literals.h>
|
||||||
@@ -39,38 +38,28 @@ int main(int argc, char* argv[]) {
|
|||||||
|
|
||||||
GuString from_lang = gu_str_string(argv[3], pool);
|
GuString from_lang = gu_str_string(argv[3], pool);
|
||||||
|
|
||||||
FILE* infile = fopen(filename, "r");
|
|
||||||
if (infile == NULL) {
|
|
||||||
fprintf(stderr, "couldn't open %s\n", filename);
|
|
||||||
status = EXIT_FAILURE;
|
|
||||||
goto fail;
|
|
||||||
}
|
|
||||||
|
|
||||||
// Create an input stream from the input file
|
|
||||||
GuIn* in = gu_file_in(infile, pool);
|
|
||||||
|
|
||||||
// Create an exception frame that catches all errors.
|
// Create an exception frame that catches all errors.
|
||||||
GuExn* err = gu_new_exn(NULL, gu_kind(type), pool);
|
GuExn* err = gu_new_exn(NULL, gu_kind(type), pool);
|
||||||
|
|
||||||
// Read the PGF grammar.
|
// Read the PGF grammar.
|
||||||
PgfPGF* pgf = pgf_read(in, pool, err);
|
PgfPGF* pgf = pgf_read(filename, pool, err);
|
||||||
|
|
||||||
// If an error occured, it shows in the exception frame
|
// If an error occured, it shows in the exception frame
|
||||||
if (!gu_ok(err)) {
|
if (!gu_ok(err)) {
|
||||||
fprintf(stderr, "Reading PGF failed\n");
|
fprintf(stderr, "Reading PGF failed\n");
|
||||||
status = EXIT_FAILURE;
|
status = EXIT_FAILURE;
|
||||||
goto fail_read;
|
goto fail;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (!pgf_load_meta_child_probs(pgf, "../../../treebanks/PennTreebank/ParseEngAbs3.probs", pool)) {
|
pgf_load_meta_child_probs(pgf, "../../../treebanks/PennTreebank/ParseEngAbs3.probs", pool, err);
|
||||||
|
if (!gu_ok(err)) {
|
||||||
fprintf(stderr, "Loading meta child probs failed\n");
|
fprintf(stderr, "Loading meta child probs failed\n");
|
||||||
status = EXIT_FAILURE;
|
status = EXIT_FAILURE;
|
||||||
goto fail_read;
|
goto fail;
|
||||||
}
|
}
|
||||||
|
|
||||||
// Look up the source and destination concrete categories
|
// Look up the source and destination concrete categories
|
||||||
PgfConcr* from_concr =
|
PgfConcr* from_concr = pgf_get_language(pgf, from_lang);
|
||||||
gu_map_get(pgf->concretes, &from_lang, PgfConcr*);
|
|
||||||
if (!from_concr) {
|
if (!from_concr) {
|
||||||
fprintf(stderr, "Unknown language\n");
|
fprintf(stderr, "Unknown language\n");
|
||||||
status = EXIT_FAILURE;
|
status = EXIT_FAILURE;
|
||||||
@@ -152,8 +141,6 @@ int main(int argc, char* argv[]) {
|
|||||||
ppool = NULL;
|
ppool = NULL;
|
||||||
}
|
}
|
||||||
fail_concr:
|
fail_concr:
|
||||||
fail_read:
|
|
||||||
fclose(infile);
|
|
||||||
fail:
|
fail:
|
||||||
gu_pool_free(pool);
|
gu_pool_free(pool);
|
||||||
return status;
|
return status;
|
||||||
|
|||||||
@@ -5,13 +5,24 @@
|
|||||||
#include <gu/file.h>
|
#include <gu/file.h>
|
||||||
#include <gu/utf8.h>
|
#include <gu/utf8.h>
|
||||||
|
|
||||||
|
#include <locale.h>
|
||||||
|
#include <stdlib.h>
|
||||||
|
|
||||||
GU_DECLARE_TYPE(PgfAbstr, struct);
|
GU_DECLARE_TYPE(PgfAbstr, struct);
|
||||||
|
|
||||||
int main(void) {
|
int main(int argc, char* argv[]) {
|
||||||
|
// Set the character locale, so we can produce proper output.
|
||||||
|
setlocale(LC_CTYPE, "");
|
||||||
|
|
||||||
|
if (argc != 1) {
|
||||||
|
fprintf(stderr, "usage: %s pgf\n", argv[0]);
|
||||||
|
return EXIT_FAILURE;
|
||||||
|
}
|
||||||
|
char* filename = argv[1];
|
||||||
|
|
||||||
GuPool* pool = gu_new_pool();
|
GuPool* pool = gu_new_pool();
|
||||||
GuExn* err = gu_exn(NULL, type, pool);
|
GuExn* err = gu_exn(NULL, type, pool);
|
||||||
GuIn* in = gu_file_in(stdin, pool);
|
PgfPGF* pgf = pgf_read(filename, pool, err);
|
||||||
PgfPGF* pgf = pgf_read(in, pool, err);
|
|
||||||
int status = 0;
|
int status = 0;
|
||||||
if (!gu_ok(err)) {
|
if (!gu_ok(err)) {
|
||||||
fprintf(stderr, "Reading PGF failed\n");
|
fprintf(stderr, "Reading PGF failed\n");
|
||||||
|
|||||||
@@ -5,7 +5,6 @@
|
|||||||
#include <gu/enum.h>
|
#include <gu/enum.h>
|
||||||
#include <gu/file.h>
|
#include <gu/file.h>
|
||||||
#include <pgf/pgf.h>
|
#include <pgf/pgf.h>
|
||||||
#include <pgf/data.h>
|
|
||||||
#include <pgf/parser.h>
|
#include <pgf/parser.h>
|
||||||
#include <pgf/lexer.h>
|
#include <pgf/lexer.h>
|
||||||
#include <pgf/literals.h>
|
#include <pgf/literals.h>
|
||||||
@@ -53,7 +52,7 @@ int main(int argc, char* argv[]) {
|
|||||||
GuPool* pool = gu_new_pool();
|
GuPool* pool = gu_new_pool();
|
||||||
int status = EXIT_SUCCESS;
|
int status = EXIT_SUCCESS;
|
||||||
if (argc != 5) {
|
if (argc != 5) {
|
||||||
fprintf(stderr, "usage: %s pgf [.]cat from_lang to_lang\n", argv[0]);
|
fprintf(stderr, "usage: %s pgf cat from_lang to_lang\n", argv[0]);
|
||||||
status = EXIT_FAILURE;
|
status = EXIT_FAILURE;
|
||||||
goto fail;
|
goto fail;
|
||||||
}
|
}
|
||||||
@@ -64,40 +63,29 @@ int main(int argc, char* argv[]) {
|
|||||||
GuString from_lang = gu_str_string(argv[3], pool);
|
GuString from_lang = gu_str_string(argv[3], pool);
|
||||||
GuString to_lang = gu_str_string(argv[4], pool);
|
GuString to_lang = gu_str_string(argv[4], pool);
|
||||||
|
|
||||||
FILE* infile = fopen(filename, "r");
|
|
||||||
if (infile == NULL) {
|
|
||||||
fprintf(stderr, "couldn't open %s\n", filename);
|
|
||||||
status = EXIT_FAILURE;
|
|
||||||
goto fail;
|
|
||||||
}
|
|
||||||
|
|
||||||
// Create an input stream from the input file
|
|
||||||
GuIn* in = gu_file_in(infile, pool);
|
|
||||||
|
|
||||||
// Create an exception frame that catches all errors.
|
// Create an exception frame that catches all errors.
|
||||||
GuExn* err = gu_new_exn(NULL, gu_kind(type), pool);
|
GuExn* err = gu_new_exn(NULL, gu_kind(type), pool);
|
||||||
|
|
||||||
// Read the PGF grammar.
|
// Read the PGF grammar.
|
||||||
PgfPGF* pgf = pgf_read(in, pool, err);
|
PgfPGF* pgf = pgf_read(filename, pool, err);
|
||||||
|
|
||||||
// If an error occured, it shows in the exception frame
|
// If an error occured, it shows in the exception frame
|
||||||
if (!gu_ok(err)) {
|
if (!gu_ok(err)) {
|
||||||
fprintf(stderr, "Reading PGF failed\n");
|
fprintf(stderr, "Reading PGF failed\n");
|
||||||
status = EXIT_FAILURE;
|
status = EXIT_FAILURE;
|
||||||
goto fail_read;
|
goto fail;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (!pgf_load_meta_child_probs(pgf, "../../../treebanks/PennTreebank/ParseEngAbs3.probs", pool)) {
|
pgf_load_meta_child_probs(pgf, "../../../treebanks/PennTreebank/ParseEngAbs3.probs", pool, err);
|
||||||
|
if (!gu_ok(err)) {
|
||||||
fprintf(stderr, "Loading meta child probs failed\n");
|
fprintf(stderr, "Loading meta child probs failed\n");
|
||||||
status = EXIT_FAILURE;
|
status = EXIT_FAILURE;
|
||||||
goto fail_read;
|
goto fail;
|
||||||
}
|
}
|
||||||
|
|
||||||
// Look up the source and destination concrete categories
|
// Look up the source and destination concrete categories
|
||||||
PgfConcr* from_concr =
|
PgfConcr* from_concr = pgf_get_language(pgf, from_lang);
|
||||||
gu_map_get(pgf->concretes, &from_lang, PgfConcr*);
|
PgfConcr* to_concr = pgf_get_language(pgf, to_lang);
|
||||||
PgfConcr* to_concr =
|
|
||||||
gu_map_get(pgf->concretes, &to_lang, PgfConcr*);
|
|
||||||
if (!from_concr || !to_concr) {
|
if (!from_concr || !to_concr) {
|
||||||
fprintf(stderr, "Unknown language\n");
|
fprintf(stderr, "Unknown language\n");
|
||||||
status = EXIT_FAILURE;
|
status = EXIT_FAILURE;
|
||||||
@@ -229,8 +217,6 @@ int main(int argc, char* argv[]) {
|
|||||||
result = NULL;
|
result = NULL;
|
||||||
}
|
}
|
||||||
fail_concr:
|
fail_concr:
|
||||||
fail_read:
|
|
||||||
fclose(infile);
|
|
||||||
fail:
|
fail:
|
||||||
gu_pool_free(pool);
|
gu_pool_free(pool);
|
||||||
return status;
|
return status;
|
||||||
|
|||||||
@@ -4,11 +4,16 @@
|
|||||||
#include <gu/file.h>
|
#include <gu/file.h>
|
||||||
#include <gu/utf8.h>
|
#include <gu/utf8.h>
|
||||||
|
|
||||||
int main(void) {
|
int main(int argc, char* argv[]) {
|
||||||
|
if (argc != 1) {
|
||||||
|
fprintf(stderr, "usage: %s pgf\n", argv[0]);
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
char* filename = argv[1];
|
||||||
|
|
||||||
GuPool* pool = gu_new_pool();
|
GuPool* pool = gu_new_pool();
|
||||||
GuExn* err = gu_exn(NULL, type, pool);
|
GuExn* err = gu_exn(NULL, type, pool);
|
||||||
GuIn* in = gu_file_in(stdin, pool);
|
PgfPGF* pgf = pgf_read(filename, pool, err);
|
||||||
PgfPGF* pgf = pgf_read(in, pool, err);
|
|
||||||
int status = 0;
|
int status = 0;
|
||||||
if (!gu_ok(err)) {
|
if (!gu_ok(err)) {
|
||||||
fprintf(stderr, "Reading PGF failed\n");
|
fprintf(stderr, "Reading PGF failed\n");
|
||||||
|
|||||||
Reference in New Issue
Block a user