1
0
forked from GitHub/gf-core

added the linref construction in GF. The PGF version number is now bumped

This commit is contained in:
kr.angelov
2013-10-30 12:53:36 +00:00
parent 122c40bb3b
commit 042243f08a
28 changed files with 267 additions and 102 deletions

View File

@@ -299,6 +299,7 @@ typedef struct {
struct PgfCCat {
PgfCncCat* cnccat;
PgfCncFuns* lindefs;
PgfCncFuns* linrefs;
size_t n_synprods;
PgfProductionSeq* prods;
float viterbi_prob;

View File

@@ -611,6 +611,8 @@ pgf_parsing_create_completed(PgfParsing* ps, PgfParseState* state,
{
PgfCCat* cat = gu_new_flex(ps->pool, PgfCCat, fin, 1);
cat->cnccat = conts->ccat->cnccat;
cat->lindefs = conts->ccat->lindefs;
cat->linrefs = conts->ccat->linrefs;
cat->viterbi_prob = viterbi_prob;
cat->fid = ps->max_fid++;
cat->conts = conts;

View File

@@ -164,9 +164,33 @@ pgf_print_lindefs(GuMapItor* fn, const void* key, void* value,
}
}
static void
pgf_print_linrefs(GuMapItor* fn, const void* key, void* value,
GuExn* err)
{
PgfPrintFn* clo = (PgfPrintFn*) fn;
int fid = *((int *) key);
PgfCCat* ccat = *((PgfCCat**) value);
GuOut *out = clo->out;
if (ccat->linrefs != NULL) {
gu_puts(" ",out,err);
size_t n_linrefs = gu_seq_length(ccat->linrefs);
for (size_t i = 0; i < n_linrefs; i++) {
if (i > 0) gu_putc(' ', out, err);
PgfCncFun* fun = gu_seq_get(ccat->linrefs, PgfCncFun*, i);
gu_printf(out,err,"F%d",fun->funid);
}
gu_printf(out,err," -> C%d\n",fid);
}
}
static void
pgf_print_cncfun(PgfCncFun *cncfun, PgfSequences* sequences,
GuOut *out, GuExn *err)
GuOut *out, GuExn *err)
{
gu_printf(out,err," F%d := (", cncfun->funid);
@@ -321,6 +345,10 @@ pgf_print_concrete(PgfCId cncname, PgfConcr* concr,
PgfPrintFn clo3 = { { pgf_print_lindefs }, out };
gu_map_iter(concr->ccats, &clo3.fn, err);
gu_puts(" linrefs\n", out, err);
PgfPrintFn clo4 = { { pgf_print_linrefs }, out };
gu_map_iter(concr->ccats, &clo4.fn, err);
gu_puts(" lin\n", out, err);
size_t n_funs = gu_seq_length(concr->cncfuns);
for (size_t i = 0; i < n_funs; i++) {
@@ -338,8 +366,8 @@ pgf_print_concrete(PgfCId cncname, PgfConcr* concr,
}
gu_puts(" categories\n", out, err);
PgfPrintFn clo4 = { { pgf_print_cnccat }, out };
gu_map_iter(concr->cnccats, &clo4.fn, err);
PgfPrintFn clo5 = { { pgf_print_cnccat }, out };
gu_map_iter(concr->cnccats, &clo5.fn, err);
gu_puts("}\n", out, err);
}

View File

@@ -830,6 +830,7 @@ pgf_read_fid(PgfReader* rdr, PgfConcr* concr)
ccat = gu_new(PgfCCat, rdr->opool);
ccat->cnccat = NULL;
ccat->lindefs = NULL;
ccat->linrefs = NULL;
ccat->n_synprods = 0;
ccat->prods = NULL;
ccat->viterbi_prob = 0;
@@ -858,7 +859,7 @@ pgf_read_funid(PgfReader* rdr, PgfConcr* concr)
}
static void
pgf_read_lindefs(PgfReader* rdr, PgfConcr* concr)
pgf_read_lindefs(PgfReader* rdr, PgfAbsFun* abs_lin_fun, PgfConcr* concr)
{
size_t len = pgf_read_len(rdr);
gu_return_on_exn(rdr->err, );
@@ -872,11 +873,33 @@ pgf_read_lindefs(PgfReader* rdr, PgfConcr* concr)
ccat->lindefs = gu_new_seq(PgfCncFun*, n_funs, rdr->opool);
for (size_t j = 0; j < n_funs; j++) {
PgfCncFun* fun = pgf_read_funid(rdr, concr);
fun->absfun = abs_lin_fun;
gu_seq_set(ccat->lindefs, PgfCncFun*, j, fun);
}
}
}
static void
pgf_read_linrefs(PgfReader* rdr, PgfAbsFun* abs_lin_fun, PgfConcr* concr)
{
size_t len = pgf_read_len(rdr);
gu_return_on_exn(rdr->err, );
for (size_t i = 0; i < len; i++) {
PgfCCat* ccat = pgf_read_fid(rdr, concr);
size_t n_funs = pgf_read_len(rdr);
gu_return_on_exn(rdr->err, );
ccat->linrefs = gu_new_seq(PgfCncFun*, n_funs, rdr->opool);
for (size_t j = 0; j < n_funs; j++) {
PgfCncFun* fun = pgf_read_funid(rdr, concr);
fun->absfun = abs_lin_fun;
gu_seq_set(ccat->linrefs, PgfCncFun*, j, fun);
}
}
}
static void
pgf_read_parg(PgfReader* rdr, PgfConcr* concr, PgfPArg* parg)
{
@@ -1000,6 +1023,7 @@ pgf_read_cnccat(PgfReader* rdr, PgfAbstr* abstr, PgfConcr* concr, PgfCId name)
ccat = gu_new(PgfCCat, rdr->opool);
ccat->cnccat = NULL;
ccat->lindefs = NULL;
ccat->linrefs = NULL;
ccat->n_synprods = 0;
ccat->prods = NULL;
ccat->viterbi_prob = 0;
@@ -1123,7 +1147,7 @@ pgf_read_ccat_cb(GuMapItor* fn, const void* key, void* value, GuExn* err)
}
static PgfConcr*
pgf_read_concrete(PgfReader* rdr, PgfAbstr* abstr)
pgf_read_concrete(PgfReader* rdr, PgfAbstr* abstr, PgfAbsFun* abs_lin_fun)
{
PgfConcr* concr = gu_new(PgfConcr, rdr->opool);
@@ -1153,7 +1177,8 @@ pgf_read_concrete(PgfReader* rdr, PgfAbstr* abstr)
gu_new_int_map(PgfCCat*, &gu_null_struct, rdr->opool);
concr->fun_indices = gu_map_type_new(PgfCncFunOverloadMap, rdr->opool);
concr->coerce_idx = gu_map_type_new(PgfCncOverloadMap, rdr->opool);
pgf_read_lindefs(rdr, concr);
pgf_read_lindefs(rdr, abs_lin_fun, concr);
pgf_read_linrefs(rdr, abs_lin_fun, concr);
pgf_read_ccats(rdr, concr);
concr->cnccats = pgf_read_cnccats(rdr, abstr, concr);
concr->callbacks = pgf_new_callbacks_map(concr, rdr->opool);
@@ -1177,10 +1202,21 @@ pgf_read_concretes(PgfReader* rdr, PgfAbstr* abstr)
size_t len = pgf_read_len(rdr);
gu_return_on_exn(rdr->err, NULL);
PgfAbsFun* abs_lin_fun = gu_new(PgfAbsFun, rdr->opool);
abs_lin_fun->name = "_";
abs_lin_fun->type = gu_new(PgfType, rdr->opool);
abs_lin_fun->type->hypos = NULL;
abs_lin_fun->type->cid = "_";
abs_lin_fun->type->n_exprs = 0;
abs_lin_fun->arity = 0;
abs_lin_fun->defns = NULL;
abs_lin_fun->ep.prob = INFINITY;
abs_lin_fun->ep.expr = gu_null_variant;
for (size_t i = 0; i < len; i++) {
PgfConcr* concr = pgf_read_concrete(rdr, abstr);
PgfConcr* concr = pgf_read_concrete(rdr, abstr, abs_lin_fun);
gu_return_on_exn(rdr->err, NULL);
gu_map_put(concretes, concr->name, PgfConcr*, concr);
}

View File

@@ -32,7 +32,7 @@ module PGF(
showType, readType,
mkType, mkHypo, mkDepHypo, mkImplHypo,
unType,
categories, startCat,
categories, categoryContext, startCat,
-- * Functions
functions, functionsByCat, functionType, missingLins,
@@ -221,6 +221,8 @@ abstractName :: PGF -> Language
-- with the \'cat\' keyword.
categories :: PGF -> [CId]
categoryContext :: PGF -> CId -> Maybe [Hypo]
-- | The start category is defined in the grammar with
-- the \'startcat\' flag. This is usually the sentence category
-- but it is not necessary. Despite that there is a start category
@@ -279,6 +281,11 @@ languageCode pgf lang =
categories pgf = [c | (c,hs) <- Map.toList (cats (abstract pgf))]
categoryContext pgf cat =
case Map.lookup cat (cats (abstract pgf)) of
Just (hypos,_,_) -> Just hypos
Nothing -> Nothing
startCat pgf = DTyp [] (lookStartCat pgf) []
functions pgf = Map.keys (funs (abstract pgf))

View File

@@ -14,7 +14,7 @@ import qualified Data.Set as Set
import Control.Monad
pgfMajorVersion, pgfMinorVersion :: Word16
(pgfMajorVersion, pgfMinorVersion) = (1,0)
(pgfMajorVersion, pgfMinorVersion) = (2,0)
instance Binary PGF where
put pgf = do putWord16be pgfMajorVersion
@@ -56,6 +56,7 @@ instance Binary Concr where
putArray2 (sequences cnc)
putArray (cncfuns cnc)
put (lindefs cnc)
put (linrefs cnc)
put (productions cnc)
put (cnccats cnc)
put (totalCats cnc)
@@ -64,11 +65,13 @@ instance Binary Concr where
sequences <- getArray2
cncfuns <- getArray
lindefs <- get
linrefs <- get
productions <- get
cnccats <- get
totalCats <- get
return (Concr{ cflags=cflags, printnames=printnames
, sequences=sequences, cncfuns=cncfuns, lindefs=lindefs
, sequences=sequences, cncfuns=cncfuns
, lindefs=lindefs, linrefs=linrefs
, productions=productions
, pproductions = IntMap.empty
, lproductions = Map.empty

View File

@@ -41,6 +41,7 @@ data Concr = Concr {
printnames :: Map.Map CId String, -- printname of a cat or a fun
cncfuns :: Array FunId CncFun,
lindefs :: IntMap.IntMap [FunId],
linrefs :: IntMap.IntMap [FunId],
sequences :: Array SeqId Sequence,
productions :: IntMap.IntMap (Set.Set Production), -- the original productions loaded from the PGF file
pproductions :: IntMap.IntMap (Set.Set Production), -- productions needed for parsing

View File

@@ -47,7 +47,9 @@ ppCnc name cnc =
text "productions" $$
nest 2 (vcat [ppProduction (fcat,prod) | (fcat,set) <- IntMap.toList (productions cnc), prod <- Set.toList set]) $$
text "lindefs" $$
nest 2 (vcat (map ppLinDef (IntMap.toList (lindefs cnc)))) $$
nest 2 (vcat (map ppFunList (IntMap.toList (lindefs cnc)))) $$
text "linrefs" $$
nest 2 (vcat (map ppFunList (IntMap.toList (linrefs cnc)))) $$
text "lin" $$
nest 2 (vcat (map ppCncFun (assocs (cncfuns cnc)))) $$
text "sequences" $$
@@ -73,7 +75,7 @@ ppProduction (fid,PConst _ _ ss) =
ppCncFun (funid,CncFun fun arr) =
ppFunId funid <+> text ":=" <+> parens (hcat (punctuate comma (map ppSeqId (elems arr)))) <+> brackets (ppCId fun)
ppLinDef (fid,funids) =
ppFunList (fid,funids) =
ppFId fid <+> text "->" <+> hcat (punctuate comma (map ppFunId funids))
ppSeq (seqid,seq) =