forked from GitHub/gf-core
added the linref construction in GF. The PGF version number is now bumped
This commit is contained in:
@@ -299,6 +299,7 @@ typedef struct {
|
||||
struct PgfCCat {
|
||||
PgfCncCat* cnccat;
|
||||
PgfCncFuns* lindefs;
|
||||
PgfCncFuns* linrefs;
|
||||
size_t n_synprods;
|
||||
PgfProductionSeq* prods;
|
||||
float viterbi_prob;
|
||||
|
||||
@@ -611,6 +611,8 @@ pgf_parsing_create_completed(PgfParsing* ps, PgfParseState* state,
|
||||
{
|
||||
PgfCCat* cat = gu_new_flex(ps->pool, PgfCCat, fin, 1);
|
||||
cat->cnccat = conts->ccat->cnccat;
|
||||
cat->lindefs = conts->ccat->lindefs;
|
||||
cat->linrefs = conts->ccat->linrefs;
|
||||
cat->viterbi_prob = viterbi_prob;
|
||||
cat->fid = ps->max_fid++;
|
||||
cat->conts = conts;
|
||||
|
||||
@@ -164,9 +164,33 @@ pgf_print_lindefs(GuMapItor* fn, const void* key, void* value,
|
||||
}
|
||||
}
|
||||
|
||||
static void
|
||||
pgf_print_linrefs(GuMapItor* fn, const void* key, void* value,
|
||||
GuExn* err)
|
||||
{
|
||||
PgfPrintFn* clo = (PgfPrintFn*) fn;
|
||||
int fid = *((int *) key);
|
||||
PgfCCat* ccat = *((PgfCCat**) value);
|
||||
GuOut *out = clo->out;
|
||||
|
||||
if (ccat->linrefs != NULL) {
|
||||
gu_puts(" ",out,err);
|
||||
|
||||
size_t n_linrefs = gu_seq_length(ccat->linrefs);
|
||||
for (size_t i = 0; i < n_linrefs; i++) {
|
||||
if (i > 0) gu_putc(' ', out, err);
|
||||
|
||||
PgfCncFun* fun = gu_seq_get(ccat->linrefs, PgfCncFun*, i);
|
||||
gu_printf(out,err,"F%d",fun->funid);
|
||||
}
|
||||
|
||||
gu_printf(out,err," -> C%d\n",fid);
|
||||
}
|
||||
}
|
||||
|
||||
static void
|
||||
pgf_print_cncfun(PgfCncFun *cncfun, PgfSequences* sequences,
|
||||
GuOut *out, GuExn *err)
|
||||
GuOut *out, GuExn *err)
|
||||
{
|
||||
gu_printf(out,err," F%d := (", cncfun->funid);
|
||||
|
||||
@@ -321,6 +345,10 @@ pgf_print_concrete(PgfCId cncname, PgfConcr* concr,
|
||||
PgfPrintFn clo3 = { { pgf_print_lindefs }, out };
|
||||
gu_map_iter(concr->ccats, &clo3.fn, err);
|
||||
|
||||
gu_puts(" linrefs\n", out, err);
|
||||
PgfPrintFn clo4 = { { pgf_print_linrefs }, out };
|
||||
gu_map_iter(concr->ccats, &clo4.fn, err);
|
||||
|
||||
gu_puts(" lin\n", out, err);
|
||||
size_t n_funs = gu_seq_length(concr->cncfuns);
|
||||
for (size_t i = 0; i < n_funs; i++) {
|
||||
@@ -338,8 +366,8 @@ pgf_print_concrete(PgfCId cncname, PgfConcr* concr,
|
||||
}
|
||||
|
||||
gu_puts(" categories\n", out, err);
|
||||
PgfPrintFn clo4 = { { pgf_print_cnccat }, out };
|
||||
gu_map_iter(concr->cnccats, &clo4.fn, err);
|
||||
PgfPrintFn clo5 = { { pgf_print_cnccat }, out };
|
||||
gu_map_iter(concr->cnccats, &clo5.fn, err);
|
||||
|
||||
gu_puts("}\n", out, err);
|
||||
}
|
||||
|
||||
@@ -830,6 +830,7 @@ pgf_read_fid(PgfReader* rdr, PgfConcr* concr)
|
||||
ccat = gu_new(PgfCCat, rdr->opool);
|
||||
ccat->cnccat = NULL;
|
||||
ccat->lindefs = NULL;
|
||||
ccat->linrefs = NULL;
|
||||
ccat->n_synprods = 0;
|
||||
ccat->prods = NULL;
|
||||
ccat->viterbi_prob = 0;
|
||||
@@ -858,7 +859,7 @@ pgf_read_funid(PgfReader* rdr, PgfConcr* concr)
|
||||
}
|
||||
|
||||
static void
|
||||
pgf_read_lindefs(PgfReader* rdr, PgfConcr* concr)
|
||||
pgf_read_lindefs(PgfReader* rdr, PgfAbsFun* abs_lin_fun, PgfConcr* concr)
|
||||
{
|
||||
size_t len = pgf_read_len(rdr);
|
||||
gu_return_on_exn(rdr->err, );
|
||||
@@ -872,11 +873,33 @@ pgf_read_lindefs(PgfReader* rdr, PgfConcr* concr)
|
||||
ccat->lindefs = gu_new_seq(PgfCncFun*, n_funs, rdr->opool);
|
||||
for (size_t j = 0; j < n_funs; j++) {
|
||||
PgfCncFun* fun = pgf_read_funid(rdr, concr);
|
||||
fun->absfun = abs_lin_fun;
|
||||
gu_seq_set(ccat->lindefs, PgfCncFun*, j, fun);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static void
|
||||
pgf_read_linrefs(PgfReader* rdr, PgfAbsFun* abs_lin_fun, PgfConcr* concr)
|
||||
{
|
||||
size_t len = pgf_read_len(rdr);
|
||||
gu_return_on_exn(rdr->err, );
|
||||
|
||||
for (size_t i = 0; i < len; i++) {
|
||||
PgfCCat* ccat = pgf_read_fid(rdr, concr);
|
||||
|
||||
size_t n_funs = pgf_read_len(rdr);
|
||||
gu_return_on_exn(rdr->err, );
|
||||
|
||||
ccat->linrefs = gu_new_seq(PgfCncFun*, n_funs, rdr->opool);
|
||||
for (size_t j = 0; j < n_funs; j++) {
|
||||
PgfCncFun* fun = pgf_read_funid(rdr, concr);
|
||||
fun->absfun = abs_lin_fun;
|
||||
gu_seq_set(ccat->linrefs, PgfCncFun*, j, fun);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static void
|
||||
pgf_read_parg(PgfReader* rdr, PgfConcr* concr, PgfPArg* parg)
|
||||
{
|
||||
@@ -1000,6 +1023,7 @@ pgf_read_cnccat(PgfReader* rdr, PgfAbstr* abstr, PgfConcr* concr, PgfCId name)
|
||||
ccat = gu_new(PgfCCat, rdr->opool);
|
||||
ccat->cnccat = NULL;
|
||||
ccat->lindefs = NULL;
|
||||
ccat->linrefs = NULL;
|
||||
ccat->n_synprods = 0;
|
||||
ccat->prods = NULL;
|
||||
ccat->viterbi_prob = 0;
|
||||
@@ -1123,7 +1147,7 @@ pgf_read_ccat_cb(GuMapItor* fn, const void* key, void* value, GuExn* err)
|
||||
}
|
||||
|
||||
static PgfConcr*
|
||||
pgf_read_concrete(PgfReader* rdr, PgfAbstr* abstr)
|
||||
pgf_read_concrete(PgfReader* rdr, PgfAbstr* abstr, PgfAbsFun* abs_lin_fun)
|
||||
{
|
||||
PgfConcr* concr = gu_new(PgfConcr, rdr->opool);
|
||||
|
||||
@@ -1153,7 +1177,8 @@ pgf_read_concrete(PgfReader* rdr, PgfAbstr* abstr)
|
||||
gu_new_int_map(PgfCCat*, &gu_null_struct, rdr->opool);
|
||||
concr->fun_indices = gu_map_type_new(PgfCncFunOverloadMap, rdr->opool);
|
||||
concr->coerce_idx = gu_map_type_new(PgfCncOverloadMap, rdr->opool);
|
||||
pgf_read_lindefs(rdr, concr);
|
||||
pgf_read_lindefs(rdr, abs_lin_fun, concr);
|
||||
pgf_read_linrefs(rdr, abs_lin_fun, concr);
|
||||
pgf_read_ccats(rdr, concr);
|
||||
concr->cnccats = pgf_read_cnccats(rdr, abstr, concr);
|
||||
concr->callbacks = pgf_new_callbacks_map(concr, rdr->opool);
|
||||
@@ -1177,10 +1202,21 @@ pgf_read_concretes(PgfReader* rdr, PgfAbstr* abstr)
|
||||
size_t len = pgf_read_len(rdr);
|
||||
gu_return_on_exn(rdr->err, NULL);
|
||||
|
||||
PgfAbsFun* abs_lin_fun = gu_new(PgfAbsFun, rdr->opool);
|
||||
abs_lin_fun->name = "_";
|
||||
abs_lin_fun->type = gu_new(PgfType, rdr->opool);
|
||||
abs_lin_fun->type->hypos = NULL;
|
||||
abs_lin_fun->type->cid = "_";
|
||||
abs_lin_fun->type->n_exprs = 0;
|
||||
abs_lin_fun->arity = 0;
|
||||
abs_lin_fun->defns = NULL;
|
||||
abs_lin_fun->ep.prob = INFINITY;
|
||||
abs_lin_fun->ep.expr = gu_null_variant;
|
||||
|
||||
for (size_t i = 0; i < len; i++) {
|
||||
PgfConcr* concr = pgf_read_concrete(rdr, abstr);
|
||||
PgfConcr* concr = pgf_read_concrete(rdr, abstr, abs_lin_fun);
|
||||
gu_return_on_exn(rdr->err, NULL);
|
||||
|
||||
|
||||
gu_map_put(concretes, concr->name, PgfConcr*, concr);
|
||||
}
|
||||
|
||||
|
||||
@@ -32,7 +32,7 @@ module PGF(
|
||||
showType, readType,
|
||||
mkType, mkHypo, mkDepHypo, mkImplHypo,
|
||||
unType,
|
||||
categories, startCat,
|
||||
categories, categoryContext, startCat,
|
||||
|
||||
-- * Functions
|
||||
functions, functionsByCat, functionType, missingLins,
|
||||
@@ -221,6 +221,8 @@ abstractName :: PGF -> Language
|
||||
-- with the \'cat\' keyword.
|
||||
categories :: PGF -> [CId]
|
||||
|
||||
categoryContext :: PGF -> CId -> Maybe [Hypo]
|
||||
|
||||
-- | The start category is defined in the grammar with
|
||||
-- the \'startcat\' flag. This is usually the sentence category
|
||||
-- but it is not necessary. Despite that there is a start category
|
||||
@@ -279,6 +281,11 @@ languageCode pgf lang =
|
||||
|
||||
categories pgf = [c | (c,hs) <- Map.toList (cats (abstract pgf))]
|
||||
|
||||
categoryContext pgf cat =
|
||||
case Map.lookup cat (cats (abstract pgf)) of
|
||||
Just (hypos,_,_) -> Just hypos
|
||||
Nothing -> Nothing
|
||||
|
||||
startCat pgf = DTyp [] (lookStartCat pgf) []
|
||||
|
||||
functions pgf = Map.keys (funs (abstract pgf))
|
||||
|
||||
@@ -14,7 +14,7 @@ import qualified Data.Set as Set
|
||||
import Control.Monad
|
||||
|
||||
pgfMajorVersion, pgfMinorVersion :: Word16
|
||||
(pgfMajorVersion, pgfMinorVersion) = (1,0)
|
||||
(pgfMajorVersion, pgfMinorVersion) = (2,0)
|
||||
|
||||
instance Binary PGF where
|
||||
put pgf = do putWord16be pgfMajorVersion
|
||||
@@ -56,6 +56,7 @@ instance Binary Concr where
|
||||
putArray2 (sequences cnc)
|
||||
putArray (cncfuns cnc)
|
||||
put (lindefs cnc)
|
||||
put (linrefs cnc)
|
||||
put (productions cnc)
|
||||
put (cnccats cnc)
|
||||
put (totalCats cnc)
|
||||
@@ -64,11 +65,13 @@ instance Binary Concr where
|
||||
sequences <- getArray2
|
||||
cncfuns <- getArray
|
||||
lindefs <- get
|
||||
linrefs <- get
|
||||
productions <- get
|
||||
cnccats <- get
|
||||
totalCats <- get
|
||||
return (Concr{ cflags=cflags, printnames=printnames
|
||||
, sequences=sequences, cncfuns=cncfuns, lindefs=lindefs
|
||||
, sequences=sequences, cncfuns=cncfuns
|
||||
, lindefs=lindefs, linrefs=linrefs
|
||||
, productions=productions
|
||||
, pproductions = IntMap.empty
|
||||
, lproductions = Map.empty
|
||||
|
||||
@@ -41,6 +41,7 @@ data Concr = Concr {
|
||||
printnames :: Map.Map CId String, -- printname of a cat or a fun
|
||||
cncfuns :: Array FunId CncFun,
|
||||
lindefs :: IntMap.IntMap [FunId],
|
||||
linrefs :: IntMap.IntMap [FunId],
|
||||
sequences :: Array SeqId Sequence,
|
||||
productions :: IntMap.IntMap (Set.Set Production), -- the original productions loaded from the PGF file
|
||||
pproductions :: IntMap.IntMap (Set.Set Production), -- productions needed for parsing
|
||||
|
||||
@@ -47,7 +47,9 @@ ppCnc name cnc =
|
||||
text "productions" $$
|
||||
nest 2 (vcat [ppProduction (fcat,prod) | (fcat,set) <- IntMap.toList (productions cnc), prod <- Set.toList set]) $$
|
||||
text "lindefs" $$
|
||||
nest 2 (vcat (map ppLinDef (IntMap.toList (lindefs cnc)))) $$
|
||||
nest 2 (vcat (map ppFunList (IntMap.toList (lindefs cnc)))) $$
|
||||
text "linrefs" $$
|
||||
nest 2 (vcat (map ppFunList (IntMap.toList (linrefs cnc)))) $$
|
||||
text "lin" $$
|
||||
nest 2 (vcat (map ppCncFun (assocs (cncfuns cnc)))) $$
|
||||
text "sequences" $$
|
||||
@@ -73,7 +75,7 @@ ppProduction (fid,PConst _ _ ss) =
|
||||
ppCncFun (funid,CncFun fun arr) =
|
||||
ppFunId funid <+> text ":=" <+> parens (hcat (punctuate comma (map ppSeqId (elems arr)))) <+> brackets (ppCId fun)
|
||||
|
||||
ppLinDef (fid,funids) =
|
||||
ppFunList (fid,funids) =
|
||||
ppFId fid <+> text "->" <+> hcat (punctuate comma (map ppFunId funids))
|
||||
|
||||
ppSeq (seqid,seq) =
|
||||
|
||||
Reference in New Issue
Block a user