forked from GitHub/gf-core
a major refactoring in the C and the Haskell runtimes. Note incompatible change in the PGF format!!!
The following are the outcomes:
- Predef.nonExist is fully supported by both the Haskell and the C runtimes
- Predef.BIND is now an internal compiler defined token. For now
it behaves just as usual for the Haskell runtime, i.e. it generates &+.
However, the special treatment will let us to handle it properly in
the C runtime.
- This required a major change in the PGF format since both
nonExist and BIND may appear inside 'pre' and this was not supported
before.
This commit is contained in:
@@ -84,6 +84,8 @@ primitives = Map.fromList
|
||||
[(Explicit,varL,typeType),(Explicit,identW,mkFunType [typeStr] typeStr),(Explicit,identW,Vr varL)] (Vr varL) []))) Nothing)
|
||||
, (cNonExist , ResOper (Just (noLoc (mkProd -- Str
|
||||
[] typeStr []))) Nothing)
|
||||
, (cBIND , ResOper (Just (noLoc (mkProd -- Str
|
||||
[] typeStr []))) Nothing)
|
||||
]
|
||||
where
|
||||
fun from to = oper (mkFunType from to)
|
||||
|
||||
@@ -78,7 +78,7 @@ predefList =
|
||||
(cError,Error),
|
||||
-- Canonical values:
|
||||
(cPBool,PBool),(cPFalse,PFalse),(cPTrue,PTrue),(cInt,Int),
|
||||
(cInts,Ints),(cNonExist,NonExist)]
|
||||
(cInts,Ints),(cNonExist,NonExist),(cBIND,BIND)]
|
||||
--- add more functions!!!
|
||||
|
||||
delta f vs =
|
||||
@@ -106,6 +106,7 @@ delta f vs =
|
||||
PFalse -> canonical
|
||||
PTrue -> canonical
|
||||
NonExist-> canonical
|
||||
BIND -> canonical
|
||||
where
|
||||
canonical = delay
|
||||
delay = return (VApp f vs) -- wrong number of arguments
|
||||
|
||||
@@ -51,5 +51,5 @@ data Predefined = Drop | Take | Tk | Dp | EqStr | Occur | Occurs | ToUpper
|
||||
{- | Show | Read | ToStr | MapStr | EqVal -}
|
||||
| Error
|
||||
-- Canonical values below:
|
||||
| PBool | PFalse | PTrue | Int | Ints | NonExist
|
||||
| PBool | PFalse | PTrue | Int | Ints | NonExist | BIND
|
||||
deriving (Show,Eq,Ord,Ix,Bounded,Enum)
|
||||
|
||||
@@ -14,7 +14,7 @@ module GF.Compile.GeneratePMCFG
|
||||
) where
|
||||
|
||||
import PGF.CId
|
||||
import PGF.Data(Alternative(..),CncCat(..),Symbol(..),fidVar)
|
||||
import PGF.Data(CncCat(..),Symbol(..),fidVar)
|
||||
|
||||
import GF.Infra.Option
|
||||
import GF.Grammar hiding (Env, mkRecord, mkTable)
|
||||
@@ -376,30 +376,24 @@ convertTerm opts sel ctype (FV vars) = do term <- variants vars
|
||||
convertTerm opts sel ctype (C t1 t2) = do v1 <- convertTerm opts sel ctype t1
|
||||
v2 <- convertTerm opts sel ctype t2
|
||||
return (CStr (concat [s | CStr s <- [v1,v2]]))
|
||||
convertTerm opts sel ctype (K t) = return (CStr [SymKS [t]])
|
||||
convertTerm opts sel ctype (K t) = return (CStr [SymKS t])
|
||||
convertTerm opts sel ctype Empty = return (CStr [])
|
||||
convertTerm opts sel ctype (Alts s alts)
|
||||
= return (CStr [SymKP (strings s) [Alt (strings u) (strings v) | (u,v) <- alts]])
|
||||
where
|
||||
strings (K s) = [s]
|
||||
strings (C u v) = strings u ++ strings v
|
||||
strings (Strs ss) = concatMap strings ss
|
||||
strings (EPatt p) = getPatts p
|
||||
strings Empty = [""]
|
||||
strings t = bug $ "strings "++show t
|
||||
|
||||
getPatts p =
|
||||
case p of
|
||||
PAlt a b -> getPatts a ++ getPatts b
|
||||
PString s -> [s]
|
||||
PSeq a b -> [s ++ t | s <- getPatts a, t <- getPatts b]
|
||||
_ -> ppbug $ hang (text "not valid pattern in pre expression:")
|
||||
4
|
||||
(ppPatt Unqualified 0 p)
|
||||
convertTerm opts sel ctype (Alts s alts)= do CStr s <- convertTerm opts CNil ctype s
|
||||
alts <- forM alts $ \(u,Strs ps) -> do
|
||||
CStr u <- convertTerm opts CNil ctype u
|
||||
ps <- mapM (convertTerm opts CNil ctype) ps
|
||||
return (u,map unSym ps)
|
||||
return (CStr [SymKP s alts])
|
||||
where
|
||||
unSym (CStr []) = ""
|
||||
unSym (CStr [SymKS t]) = t
|
||||
unSym _ = ppbug $ hang (text "invalid prefix in pre expression:") 4 (ppU 0 (Alts s alts))
|
||||
|
||||
convertTerm opts sel ctype (Q (m,f))
|
||||
| m == cPredef &&
|
||||
f == cNonExist = return (CStr [SymNE])
|
||||
| m == cPredef &&
|
||||
f == cBIND = return (CStr [SymBIND])
|
||||
|
||||
convertTerm opts sel@(CProj l _) ctype (ExtR t1 t2@(R rs2))
|
||||
| l `elem` map fst rs2 = convertTerm opts sel ctype t2
|
||||
@@ -492,7 +486,7 @@ addSequencesV seqs (CRec vs) = let !(seqs1,vs1) = mapAccumL' (\seqs (lbl,b) ->
|
||||
addSequencesV seqs (CTbl pt vs)=let !(seqs1,vs1) = mapAccumL' (\seqs (trm,b) -> let !(seqs',b') = addSequencesB seqs b
|
||||
in (seqs',(trm,b'))) seqs vs
|
||||
in (seqs1,CTbl pt vs1)
|
||||
addSequencesV seqs (CStr lin) = let !(seqs1,seqid) = addSequence seqs (optimizeLin lin)
|
||||
addSequencesV seqs (CStr lin) = let !(seqs1,seqid) = addSequence seqs lin
|
||||
in (seqs1,CStr seqid)
|
||||
addSequencesV seqs (CPar i) = (seqs,CPar i)
|
||||
|
||||
@@ -502,16 +496,6 @@ mapAccumL' f s (x:xs) = (s'',y:ys)
|
||||
where !(s', y ) = f s x
|
||||
!(s'',ys) = mapAccumL' f s' xs
|
||||
|
||||
optimizeLin [] = []
|
||||
optimizeLin lin@(SymKS _ : _) =
|
||||
let (ts,lin') = getRest lin
|
||||
in SymKS ts : optimizeLin lin'
|
||||
where
|
||||
getRest (SymKS ts : lin) = let (ts1,lin') = getRest lin
|
||||
in (ts++ts1,lin')
|
||||
getRest lin = ([],lin)
|
||||
optimizeLin (sym : lin) = sym : optimizeLin lin
|
||||
|
||||
addSequence :: SeqSet -> [Symbol] -> (SeqSet,SeqId)
|
||||
addSequence seqs lst =
|
||||
case Map.lookup seq seqs of
|
||||
@@ -629,4 +613,4 @@ mkSetArray map = array (0,Map.size map-1) [(v,k) | (k,v) <- Map.toList map]
|
||||
bug msg = ppbug (text msg)
|
||||
ppbug = error . render . hang (text "Internal error in GeneratePMCFG:") 4
|
||||
|
||||
ppU = ppTerm Unqualified
|
||||
ppU = ppTerm Unqualified
|
||||
|
||||
@@ -85,10 +85,12 @@ sym2js :: Symbol -> JS.Expr
|
||||
sym2js (SymCat n l) = new "SymCat" [JS.EInt n, JS.EInt l]
|
||||
sym2js (SymLit n l) = new "SymLit" [JS.EInt n, JS.EInt l]
|
||||
sym2js (SymVar n l) = new "SymVar" [JS.EInt n, JS.EInt l]
|
||||
sym2js (SymKS ts) = new "SymKS" (map JS.EStr ts)
|
||||
sym2js (SymKP ts alts) = new "SymKP" [JS.EArray (map JS.EStr ts), JS.EArray (map alt2js alts)]
|
||||
sym2js (SymKS t) = new "SymKS" [JS.EStr t]
|
||||
sym2js (SymKP ts alts) = new "SymKP" [JS.EArray (map sym2js ts), JS.EArray (map alt2js alts)]
|
||||
sym2js SymNE = new "SymNE" []
|
||||
sym2js SymBIND = new "SymKS" [JS.EStr "&+"]
|
||||
|
||||
alt2js (Alt ps ts) = new "Alt" [JS.EArray (map JS.EStr ps), JS.EArray (map JS.EStr ts)]
|
||||
alt2js (ps,ts) = new "Alt" [JS.EArray (map sym2js ps), JS.EArray (map JS.EStr ts)]
|
||||
|
||||
new :: String -> [JS.Expr] -> JS.Expr
|
||||
new f xs = JS.ENew (JS.Ident f) xs
|
||||
|
||||
@@ -136,9 +136,9 @@ instance PLPrint Symbol where
|
||||
plp (SymCat n l) = plOper ":" (show n) (show l)
|
||||
plp (SymLit n l) = plTerm "lit" [show n, show l]
|
||||
plp (SymVar n l) = plTerm "var" [show n, show l]
|
||||
plp (SymKS ts) = prTList "," (map plAtom ts)
|
||||
plp (SymKP ts alts) = plTerm "pre" [plList (map plAtom ts), plList (map plAlt alts)]
|
||||
where plAlt (Alt ps ts) = plOper "/" (plList (map plAtom ps)) (plList (map plAtom ts))
|
||||
plp (SymKS t) = plAtom t
|
||||
plp (SymKP ts alts) = plTerm "pre" [plList (map plp ts), plList (map plAlt alts)]
|
||||
where plAlt (ps,ts) = plOper "/" (plList (map plp ps)) (plList (map plAtom ts))
|
||||
|
||||
class PLPrint a where
|
||||
plp :: a -> String
|
||||
|
||||
@@ -75,9 +75,9 @@ pySymbol :: Symbol -> String
|
||||
pySymbol (SymCat n l) = pyTuple 0 show [n, l]
|
||||
pySymbol (SymLit n l) = pyDict 0 pyStr id [("lit", pyTuple 0 show [n, l])]
|
||||
pySymbol (SymVar n l) = pyDict 0 pyStr id [("var", pyTuple 0 show [n, l])]
|
||||
pySymbol (SymKS ts) = prTList "," (map pyStr ts)
|
||||
pySymbol (SymKP ts alts) = pyDict 0 pyStr id [("pre", pyList 0 pyStr ts), ("alts", pyList 0 alt2py alts)]
|
||||
where alt2py (Alt ps ts) = pyTuple 0 (pyList 0 pyStr) [ps, ts]
|
||||
pySymbol (SymKS t) = pyStr t
|
||||
pySymbol (SymKP ts alts) = pyDict 0 pyStr id [("pre", pyList 0 pySymbol ts), ("alts", pyList 0 alt2py alts)]
|
||||
where alt2py (ps,ts) = pyTuple 0 (pyList 0 pyStr) [map pySymbol ps, ts]
|
||||
|
||||
----------------------------------------------------------------------
|
||||
-- python helpers
|
||||
|
||||
@@ -30,6 +30,7 @@ cErrorType = identS "Error"
|
||||
cOverload = identS "overload"
|
||||
cUndefinedType = identS "UndefinedType"
|
||||
cNonExist = identS "nonExist"
|
||||
cBIND = identS "BIND"
|
||||
|
||||
isPredefCat :: Ident -> Bool
|
||||
isPredefCat c = elem c [cInt,cString,cFloat]
|
||||
|
||||
@@ -91,8 +91,8 @@ pgfToCFG pgf lang = mkCFG (showCId (lookStartCat pgf)) extCats (startRules ++ co
|
||||
|
||||
symbolToCFSymbol :: Symbol -> [CFSymbol]
|
||||
symbolToCFSymbol (SymCat n l) = [let PArg _ fid = args!!n in NonTerminal (fcatToCat fid l)]
|
||||
symbolToCFSymbol (SymKS ts) = map Terminal ts
|
||||
symbolToCFSymbol (SymKP ts as) = map Terminal $ ts
|
||||
symbolToCFSymbol (SymKS t) = [Terminal t]
|
||||
symbolToCFSymbol (SymKP syms as) = concatMap symbolToCFSymbol syms
|
||||
---- ++ [t | Alt ss _ <- as, t <- ss]
|
||||
---- should be alternatives in [[CFSymbol]]
|
||||
---- AR 3/6/2010
|
||||
|
||||
@@ -131,8 +131,11 @@ extern GU_DECLARE_TYPE(PgfCncCat, abstract);
|
||||
bool
|
||||
pgf_tokens_equal(PgfTokens* t1, PgfTokens* t2);
|
||||
|
||||
typedef GuSeq PgfSequence; // -> PgfSymbol
|
||||
typedef GuSeq PgfSequences;
|
||||
|
||||
typedef struct {
|
||||
PgfTokens* form;
|
||||
PgfSequence* form;
|
||||
/**< The form of this variant as a list of tokens. */
|
||||
|
||||
GuStrings* prefixes;
|
||||
@@ -175,7 +178,8 @@ typedef enum {
|
||||
PGF_SYMBOL_VAR,
|
||||
PGF_SYMBOL_KS,
|
||||
PGF_SYMBOL_KP,
|
||||
PGF_SYMBOL_NE
|
||||
PGF_SYMBOL_NE,
|
||||
PGF_SYMBOL_BIND
|
||||
} PgfSymbolTag;
|
||||
|
||||
typedef struct {
|
||||
@@ -186,14 +190,14 @@ typedef struct {
|
||||
typedef PgfSymbolIdx PgfSymbolCat, PgfSymbolLit, PgfSymbolVar;
|
||||
|
||||
typedef struct {
|
||||
PgfTokens* tokens;
|
||||
PgfToken token;
|
||||
} PgfSymbolKS;
|
||||
|
||||
typedef struct PgfSymbolKP
|
||||
/** A prefix-dependent symbol. The form that this symbol takes
|
||||
* depends on the form of a prefix of the following symbol. */
|
||||
{
|
||||
PgfTokens* default_form;
|
||||
PgfSequence* default_form;
|
||||
/**< Default form that this symbol takes if none of of the
|
||||
* variant forms is triggered. */
|
||||
|
||||
@@ -206,8 +210,8 @@ typedef struct PgfSymbolKP
|
||||
typedef struct {
|
||||
} PgfSymbolNE;
|
||||
|
||||
typedef GuSeq PgfSequence; // -> PgfSymbol
|
||||
typedef GuSeq PgfSequences;
|
||||
typedef struct {
|
||||
} PgfSymbolBIND;
|
||||
|
||||
typedef struct {
|
||||
PgfAbsFun* absfun;
|
||||
|
||||
@@ -116,18 +116,15 @@ typedef struct {
|
||||
} PgfBracketLznState;
|
||||
|
||||
static void
|
||||
pgf_bracket_lzn_symbol_tokens(PgfLinFuncs** funcs, PgfTokens* toks)
|
||||
pgf_bracket_lzn_symbol_token(PgfLinFuncs** funcs, PgfToken tok)
|
||||
{
|
||||
PgfBracketLznState* state = gu_container(funcs, PgfBracketLznState, funcs);
|
||||
|
||||
size_t len = gu_seq_length(toks);
|
||||
for (size_t i = 0; i < len; i++) {
|
||||
PgfParseNode* node = gu_new(PgfParseNode, state->pool);
|
||||
node->id = 100000 + gu_buf_length(state->leaves);
|
||||
node->parent = state->parent;
|
||||
node->label = gu_seq_get(toks, PgfToken, i);
|
||||
gu_buf_push(state->leaves, PgfParseNode*, node);
|
||||
}
|
||||
PgfParseNode* node = gu_new(PgfParseNode, state->pool);
|
||||
node->id = 100000 + gu_buf_length(state->leaves);
|
||||
node->parent = state->parent;
|
||||
node->label = tok;
|
||||
gu_buf_push(state->leaves, PgfParseNode*, node);
|
||||
}
|
||||
|
||||
static void
|
||||
@@ -214,7 +211,7 @@ pgf_bracket_lzn_end_phrase(PgfLinFuncs** funcs, PgfCId cat, int fid, int lindex,
|
||||
}
|
||||
|
||||
static PgfLinFuncs pgf_bracket_lin_funcs = {
|
||||
.symbol_tokens = pgf_bracket_lzn_symbol_tokens,
|
||||
.symbol_token = pgf_bracket_lzn_symbol_token,
|
||||
.expr_literal = pgf_bracket_lzn_expr_literal,
|
||||
.begin_phrase = pgf_bracket_lzn_begin_phrase,
|
||||
.end_phrase = pgf_bracket_lzn_end_phrase
|
||||
|
||||
@@ -453,6 +453,50 @@ pgf_lzr_concretize(PgfConcr* concr, PgfExpr expr, GuPool* pool)
|
||||
return &lzn->en;
|
||||
}
|
||||
|
||||
void
|
||||
pgf_lzr_linearize_sequence(PgfConcr* concr,
|
||||
PgfCncTreeApp* fapp, PgfSequence* seq,
|
||||
PgfLinFuncs** fnsp)
|
||||
{
|
||||
size_t nsyms = gu_seq_length(seq);
|
||||
PgfSymbol* syms = gu_seq_data(seq);
|
||||
for (size_t i = 0; i < nsyms; i++) {
|
||||
PgfSymbol sym = syms[i];
|
||||
GuVariantInfo sym_i = gu_variant_open(sym);
|
||||
switch (sym_i.tag) {
|
||||
case PGF_SYMBOL_CAT:
|
||||
case PGF_SYMBOL_VAR:
|
||||
case PGF_SYMBOL_LIT: {
|
||||
PgfSymbolIdx* sidx = sym_i.data;
|
||||
gu_assert((unsigned) sidx->d < fapp->n_args);
|
||||
|
||||
PgfCncTree argf = fapp->args[sidx->d];
|
||||
pgf_lzr_linearize(concr, argf, sidx->r, fnsp);
|
||||
break;
|
||||
}
|
||||
case PGF_SYMBOL_KS: {
|
||||
PgfSymbolKS* ks = sym_i.data;
|
||||
if ((*fnsp)->symbol_token) {
|
||||
(*fnsp)->symbol_token(fnsp, ks->token);
|
||||
}
|
||||
break;
|
||||
}
|
||||
case PGF_SYMBOL_KP: {
|
||||
// TODO: correct prefix-dependencies
|
||||
PgfSymbolKP* kp = sym_i.data;
|
||||
pgf_lzr_linearize_sequence(concr, fapp, kp->default_form, fnsp);
|
||||
break;
|
||||
}
|
||||
case PGF_SYMBOL_NE: {
|
||||
// Nothing to be done here
|
||||
break;
|
||||
}
|
||||
default:
|
||||
gu_impossible();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
pgf_lzr_linearize(PgfConcr* concr, PgfCncTree ctree, size_t lin_idx, PgfLinFuncs** fnsp)
|
||||
{
|
||||
@@ -472,47 +516,9 @@ pgf_lzr_linearize(PgfConcr* concr, PgfCncTree ctree, size_t lin_idx, PgfLinFuncs
|
||||
}
|
||||
|
||||
gu_require(lin_idx < fun->n_lins);
|
||||
PgfSequence* seq = fun->lins[lin_idx];
|
||||
size_t nsyms = gu_seq_length(seq);
|
||||
PgfSymbol* syms = gu_seq_data(seq);
|
||||
for (size_t i = 0; i < nsyms; i++) {
|
||||
PgfSymbol sym = syms[i];
|
||||
GuVariantInfo sym_i = gu_variant_open(sym);
|
||||
switch (sym_i.tag) {
|
||||
case PGF_SYMBOL_CAT:
|
||||
case PGF_SYMBOL_VAR:
|
||||
case PGF_SYMBOL_LIT: {
|
||||
PgfSymbolIdx* sidx = sym_i.data;
|
||||
gu_assert((unsigned) sidx->d < fapp->n_args);
|
||||
|
||||
PgfCncTree argf = fapp->args[sidx->d];
|
||||
pgf_lzr_linearize(concr, argf, sidx->r, fnsp);
|
||||
break;
|
||||
}
|
||||
case PGF_SYMBOL_KS: {
|
||||
PgfSymbolKS* ks = sym_i.data;
|
||||
if (fns->symbol_tokens) {
|
||||
fns->symbol_tokens(fnsp, ks->tokens);
|
||||
}
|
||||
break;
|
||||
}
|
||||
case PGF_SYMBOL_KP: {
|
||||
// TODO: correct prefix-dependencies
|
||||
PgfSymbolKP* kp = sym_i.data;
|
||||
if (fns->symbol_tokens) {
|
||||
fns->symbol_tokens(fnsp,
|
||||
kp->default_form);
|
||||
}
|
||||
break;
|
||||
}
|
||||
case PGF_SYMBOL_NE: {
|
||||
// Nothing to be done here
|
||||
break;
|
||||
}
|
||||
default:
|
||||
gu_impossible();
|
||||
}
|
||||
}
|
||||
PgfSequence* seq = fun->lins[lin_idx];
|
||||
pgf_lzr_linearize_sequence(concr, fapp, seq, fnsp);
|
||||
|
||||
if (fns->end_phrase) {
|
||||
fns->end_phrase(fnsp,
|
||||
@@ -572,22 +578,18 @@ struct PgfSimpleLin {
|
||||
};
|
||||
|
||||
static void
|
||||
pgf_file_lzn_symbol_tokens(PgfLinFuncs** funcs, PgfTokens* toks)
|
||||
pgf_file_lzn_symbol_token(PgfLinFuncs** funcs, PgfToken tok)
|
||||
{
|
||||
PgfSimpleLin* flin = gu_container(funcs, PgfSimpleLin, funcs);
|
||||
if (!gu_ok(flin->err)) {
|
||||
return;
|
||||
}
|
||||
size_t len = gu_seq_length(toks);
|
||||
for (size_t i = 0; i < len; i++) {
|
||||
if (flin->n_tokens > 0)
|
||||
gu_putc(' ', flin->out, flin->err);
|
||||
if (flin->n_tokens > 0)
|
||||
gu_putc(' ', flin->out, flin->err);
|
||||
|
||||
PgfToken tok = gu_seq_get(toks, PgfToken, i);
|
||||
gu_string_write(tok, flin->out, flin->err);
|
||||
|
||||
flin->n_tokens++;
|
||||
}
|
||||
gu_string_write(tok, flin->out, flin->err);
|
||||
|
||||
flin->n_tokens++;
|
||||
}
|
||||
|
||||
static void
|
||||
@@ -626,10 +628,10 @@ pgf_file_lzn_expr_literal(PgfLinFuncs** funcs, PgfLiteral lit)
|
||||
}
|
||||
|
||||
static PgfLinFuncs pgf_file_lin_funcs = {
|
||||
.symbol_tokens = pgf_file_lzn_symbol_tokens,
|
||||
.expr_literal = pgf_file_lzn_expr_literal,
|
||||
.begin_phrase = NULL,
|
||||
.end_phrase = NULL,
|
||||
.symbol_token = pgf_file_lzn_symbol_token,
|
||||
.expr_literal = pgf_file_lzn_expr_literal,
|
||||
.begin_phrase = NULL,
|
||||
.end_phrase = NULL,
|
||||
};
|
||||
|
||||
void
|
||||
|
||||
@@ -51,7 +51,7 @@ typedef struct PgfLinFuncs PgfLinFuncs;
|
||||
struct PgfLinFuncs
|
||||
{
|
||||
/// Output tokens
|
||||
void (*symbol_tokens)(PgfLinFuncs** self, PgfTokens* toks);
|
||||
void (*symbol_token)(PgfLinFuncs** self, PgfToken tok);
|
||||
|
||||
/// Output literal
|
||||
void (*expr_literal)(PgfLinFuncs** self, PgfLiteral lit);
|
||||
|
||||
@@ -42,7 +42,7 @@ pgf_match_string_lit(PgfConcr* concr, PgfItem* item, PgfToken tok,
|
||||
gu_new_variant(PGF_LITERAL_STR,
|
||||
PgfLiteralStr,
|
||||
&expr_lit->lit, pool);
|
||||
lit_str->val = gu_seq_get(sks->tokens, PgfToken, 0);
|
||||
lit_str->val = sks->token;
|
||||
|
||||
*out_ep = ep;
|
||||
accepted = false;
|
||||
@@ -80,10 +80,9 @@ pgf_match_int_lit(PgfConcr* concr, PgfItem* item, PgfToken tok,
|
||||
} else if (n_syms == 1) {
|
||||
PgfSymbolKS* sks =
|
||||
gu_variant_data(gu_seq_get(seq, PgfSymbol, 0));
|
||||
PgfToken tok = gu_seq_get(sks->tokens, PgfToken, 0);
|
||||
|
||||
int val;
|
||||
if (!gu_string_to_int(tok, &val)) {
|
||||
if (!gu_string_to_int(sks->token, &val)) {
|
||||
*out_ep = NULL;
|
||||
} else {
|
||||
PgfExprProb* ep = gu_new(PgfExprProb, pool);
|
||||
@@ -137,10 +136,9 @@ pgf_match_float_lit(PgfConcr* concr, PgfItem* item, PgfToken tok,
|
||||
} else if (n_syms == 1) {
|
||||
PgfSymbolKS* sks =
|
||||
gu_variant_data(gu_seq_get(seq, PgfSymbol, 0));
|
||||
PgfToken tok = gu_seq_get(sks->tokens, PgfToken, 0);
|
||||
|
||||
double val;
|
||||
if (!gu_string_to_double(tok, &val)) {
|
||||
if (!gu_string_to_double(sks->token, &val)) {
|
||||
*out_ep = NULL;
|
||||
} else {
|
||||
PgfExprProb* ep = gu_new(PgfExprProb, pool);
|
||||
@@ -209,9 +207,8 @@ pgf_match_name_lit(PgfConcr* concr, PgfItem* item, PgfToken tok,
|
||||
PgfSymbol sym = gu_seq_get(seq, PgfSymbol, i);
|
||||
gu_assert(gu_variant_tag(sym) == PGF_SYMBOL_KS);
|
||||
PgfSymbolKS* sks = gu_variant_data(sym);
|
||||
PgfToken tok = gu_seq_get(sks->tokens, PgfToken, 0);
|
||||
|
||||
gu_string_write(tok, out, err);
|
||||
gu_string_write(sks->token, out, err);
|
||||
}
|
||||
|
||||
PgfExprProb* ep = gu_new(PgfExprProb, pool);
|
||||
|
||||
@@ -133,8 +133,8 @@ struct PgfItem {
|
||||
PgfPArgs* args;
|
||||
PgfSymbol curr_sym;
|
||||
uint16_t seq_idx;
|
||||
uint8_t tok_idx;
|
||||
uint8_t alt;
|
||||
uint8_t alt_idx; // position in the pre alternative
|
||||
uint8_t alt; // the number of the alternative
|
||||
prob_t inside_prob;
|
||||
};
|
||||
|
||||
@@ -694,7 +694,7 @@ pgf_new_item(PgfItemConts* conts, PgfProduction prod,
|
||||
item->prod = prod;
|
||||
item->curr_sym = gu_null_variant;
|
||||
item->seq_idx = 0;
|
||||
item->tok_idx = 0;
|
||||
item->alt_idx = 0;
|
||||
item->alt = 0;
|
||||
|
||||
conts->ref_count++;
|
||||
@@ -758,8 +758,12 @@ pgf_item_update_arg(PgfItem* item, size_t d, PgfCCat *new_ccat,
|
||||
static void
|
||||
pgf_item_advance(PgfItem* item, GuPool* pool)
|
||||
{
|
||||
item->seq_idx++;
|
||||
pgf_item_set_curr_symbol(item, pool);
|
||||
if (GU_LIKELY(item->alt == 0)) {
|
||||
item->seq_idx++;
|
||||
pgf_item_set_curr_symbol(item, pool);
|
||||
}
|
||||
else
|
||||
item->alt_idx++;
|
||||
}
|
||||
|
||||
static void
|
||||
@@ -1133,8 +1137,7 @@ pgf_parsing_meta_scan(PgfParseState* before, PgfParseState* after,
|
||||
gu_alignof(PgfSymbolKS),
|
||||
&item->curr_sym, after->ps->pool);
|
||||
*((PgfSymbol*)(sks+1)) = prev;
|
||||
sks->tokens = gu_new_seq(PgfToken, 1, after->ps->pool);
|
||||
gu_seq_set(sks->tokens, PgfToken, 0, tok);
|
||||
sks->token = tok;
|
||||
|
||||
gu_buf_heap_push(before->agenda, &pgf_item_prob_order, &item);
|
||||
}
|
||||
@@ -1218,76 +1221,54 @@ pgf_parsing_symbol(PgfParseState* before, PgfParseState* after,
|
||||
case PGF_SYMBOL_KS: {
|
||||
if (after != NULL) {
|
||||
PgfSymbolKS* sks = gu_variant_data(sym);
|
||||
gu_assert(item->tok_idx < gu_seq_length(sks->tokens));
|
||||
PgfToken tok =
|
||||
gu_seq_get(sks->tokens, PgfToken, item->tok_idx++);
|
||||
if (item->tok_idx == gu_seq_length(sks->tokens)) {
|
||||
item->tok_idx = 0;
|
||||
pgf_item_advance(item, after->ps->pool);
|
||||
}
|
||||
pgf_parsing_add_transition(before, after, tok, item);
|
||||
pgf_item_advance(item, after->ps->pool);
|
||||
pgf_parsing_add_transition(before, after, sks->token, item);
|
||||
}
|
||||
break;
|
||||
}
|
||||
case PGF_SYMBOL_KP: {
|
||||
if (after != NULL) {
|
||||
PgfSymbolKP* skp = gu_variant_data(sym);
|
||||
size_t idx = item->tok_idx;
|
||||
uint8_t alt = item->alt;
|
||||
gu_assert(idx < gu_seq_length(skp->default_form));
|
||||
if (idx == 0) {
|
||||
PgfToken tok;
|
||||
|
||||
PgfSymbol sym;
|
||||
if (item->alt == 0) {
|
||||
PgfItem* new_item;
|
||||
|
||||
tok = gu_seq_get(skp->default_form, PgfToken, 0);
|
||||
|
||||
new_item = pgf_item_copy(item, after->ps->pool, after->ps);
|
||||
new_item->tok_idx++;
|
||||
if (new_item->tok_idx == gu_seq_length(skp->default_form)) {
|
||||
new_item->tok_idx = 0;
|
||||
pgf_item_advance(new_item, after->ps->pool);
|
||||
}
|
||||
pgf_parsing_add_transition(before, after, tok, new_item);
|
||||
new_item->alt = 1;
|
||||
new_item->alt_idx = 0;
|
||||
sym = gu_seq_get(skp->default_form, PgfSymbol, new_item->alt_idx);
|
||||
pgf_parsing_symbol(before, after, new_item, sym);
|
||||
|
||||
for (size_t i = 0; i < skp->n_forms; i++) {
|
||||
// XXX: do nubbing properly
|
||||
PgfTokens* toks = skp->forms[i].form;
|
||||
PgfTokens* toks2 = skp->default_form;
|
||||
bool skip = pgf_tokens_equal(toks, toks2);
|
||||
PgfSequence* syms = skp->forms[i].form;
|
||||
PgfSequence* syms2 = skp->default_form;
|
||||
bool skip = false; /*pgf_tokens_equal(toks, toks2);
|
||||
for (size_t j = 0; j < i; j++) {
|
||||
PgfTokens* toks2 = skp->forms[j].form;
|
||||
skip |= pgf_tokens_equal(toks, toks2);
|
||||
}
|
||||
}*/
|
||||
if (!skip) {
|
||||
tok = gu_seq_get(toks, PgfToken, 0);
|
||||
new_item = pgf_item_copy(item, after->ps->pool, after->ps);
|
||||
new_item->tok_idx++;
|
||||
new_item->alt = i;
|
||||
if (new_item->tok_idx == gu_seq_length(toks)) {
|
||||
new_item->tok_idx = 0;
|
||||
pgf_item_advance(new_item, after->ps->pool);
|
||||
}
|
||||
pgf_parsing_add_transition(before, after, tok, new_item);
|
||||
new_item->alt = i+2;
|
||||
new_item->alt_idx = 0;
|
||||
sym = gu_seq_get(syms, PgfSymbol, new_item->alt_idx);
|
||||
pgf_parsing_symbol(before, after, new_item, sym);
|
||||
}
|
||||
}
|
||||
} else if (alt == 0) {
|
||||
PgfToken tok =
|
||||
gu_seq_get(skp->default_form, PgfToken, idx);
|
||||
item->tok_idx++;
|
||||
if (item->tok_idx == gu_seq_length(skp->default_form)) {
|
||||
item->tok_idx = 0;
|
||||
pgf_item_advance(item, after->ps->pool);
|
||||
}
|
||||
pgf_parsing_add_transition(before, after, tok, item);
|
||||
} else {
|
||||
gu_assert(alt <= skp->n_forms);
|
||||
PgfTokens* toks = skp->forms[alt - 1].form;
|
||||
PgfToken tok = gu_seq_get(toks, PgfToken, idx);
|
||||
item->tok_idx++;
|
||||
if (item->tok_idx == gu_seq_length(toks)) {
|
||||
item->tok_idx = 0;
|
||||
PgfSequence* syms =
|
||||
(item->alt == 1) ? skp->default_form :
|
||||
skp->forms[item->alt-2].form;
|
||||
|
||||
if (item->alt_idx < gu_seq_length(syms)) {
|
||||
sym = gu_seq_get(syms, PgfSymbol, item->alt_idx);
|
||||
pgf_parsing_symbol(before, after, item, sym);
|
||||
} else {
|
||||
item->alt = 0;
|
||||
pgf_item_advance(item, after->ps->pool);
|
||||
gu_buf_heap_push(before->agenda, &pgf_item_prob_order, &item);
|
||||
}
|
||||
pgf_parsing_add_transition(before, after, tok, item);
|
||||
}
|
||||
}
|
||||
break;
|
||||
@@ -1357,7 +1338,7 @@ pgf_parsing_symbol(PgfParseState* before, PgfParseState* after,
|
||||
// XXX TODO proper support
|
||||
break;
|
||||
case PGF_SYMBOL_NE: {
|
||||
// Nothing to be done here
|
||||
pgf_item_free(before, after, item);
|
||||
break;
|
||||
}
|
||||
default:
|
||||
@@ -1450,8 +1431,7 @@ pgf_parsing_item(PgfParseState* before, PgfParseState* after, PgfItem* item)
|
||||
gu_alignof(PgfSymbolKS),
|
||||
&item->curr_sym, after->ps->pool);
|
||||
*((PgfSymbol*)(sks+1)) = prev;
|
||||
sks->tokens = gu_new_seq(PgfToken, 1, after->ps->pool);
|
||||
gu_seq_set(sks->tokens, PgfToken, 0, tok);
|
||||
sks->token = tok;
|
||||
|
||||
item->seq_idx++;
|
||||
pgf_parsing_add_transition(before, after, tok, item);
|
||||
@@ -1755,9 +1735,7 @@ typedef struct {
|
||||
} PgfPrefixTokenState;
|
||||
|
||||
static GuString
|
||||
pgf_get_tokens(PgfSequence* seq,
|
||||
uint16_t seq_idx, uint8_t tok_idx,
|
||||
GuPool* pool)
|
||||
pgf_get_tokens(PgfSequence* seq, uint16_t seq_idx, GuPool* pool)
|
||||
{
|
||||
GuPool* tmp_pool = gu_new_pool();
|
||||
GuExn* err = gu_new_exn(NULL, gu_kind(type), tmp_pool);
|
||||
@@ -1773,17 +1751,7 @@ pgf_get_tokens(PgfSequence* seq,
|
||||
switch (i.tag) {
|
||||
case PGF_SYMBOL_KS: {
|
||||
PgfSymbolKS* symks = i.data;
|
||||
size_t len = gu_seq_length(symks->tokens);
|
||||
for (size_t i = tok_idx; i < len; i++) {
|
||||
if (i > 0) {
|
||||
gu_putc(' ', out, err);
|
||||
}
|
||||
|
||||
PgfToken tok = gu_seq_get(symks->tokens, PgfToken, i);
|
||||
gu_string_write(tok, out, err);
|
||||
}
|
||||
|
||||
tok_idx = 0;
|
||||
gu_string_write(symks->token, out, err);
|
||||
}
|
||||
default:
|
||||
goto end;
|
||||
@@ -1809,18 +1777,9 @@ pgf_prefix_match_token(PgfTokenState* ts0, PgfToken tok, PgfItem* item)
|
||||
PgfSequence* seq;
|
||||
pgf_item_sequence(item, &lin_idx, &seq, ts->pool);
|
||||
|
||||
uint16_t seq_idx = item->seq_idx;
|
||||
uint8_t tok_idx = item->tok_idx;
|
||||
|
||||
// go one token back
|
||||
if (tok_idx > 0)
|
||||
tok_idx--;
|
||||
else
|
||||
seq_idx--;
|
||||
|
||||
ts->tp = gu_new(PgfTokenProb, ts->pool);
|
||||
ts->tp->tok =
|
||||
pgf_get_tokens(seq, seq_idx, tok_idx, ts->pool);
|
||||
pgf_get_tokens(seq, item->seq_idx-1, ts->pool);
|
||||
ts->tp->cat = item->conts->ccat->cnccat->abscat->name;
|
||||
ts->tp->prob = item->inside_prob+item->conts->outside_prob;
|
||||
}
|
||||
@@ -2346,17 +2305,15 @@ pgf_morpho_iter(GuMapItor* fn, const void* key, void* value, GuExn* err)
|
||||
switch (i.tag) {
|
||||
case PGF_SYMBOL_KS: {
|
||||
PgfSymbolKS* symks = i.data;
|
||||
size_t len = gu_seq_length(symks->tokens);
|
||||
for (size_t i = 0; i < len; i++) {
|
||||
if (pos >= gu_seq_length(clo->tokens))
|
||||
goto cont;
|
||||
|
||||
if (pos >= gu_seq_length(clo->tokens))
|
||||
goto cont;
|
||||
|
||||
PgfToken tok1 = gu_seq_get(symks->tokens, PgfToken, i);
|
||||
PgfToken tok2 = gu_seq_get(clo->tokens, PgfToken, pos++);
|
||||
|
||||
if (!gu_string_eq(tok1, tok2))
|
||||
goto cont;
|
||||
}
|
||||
PgfToken tok1 = symks->token;
|
||||
PgfToken tok2 = gu_seq_get(clo->tokens, PgfToken, pos++);
|
||||
|
||||
if (!gu_string_eq(tok1, tok2))
|
||||
goto cont;
|
||||
}
|
||||
default:
|
||||
continue;
|
||||
@@ -2443,7 +2400,7 @@ pgf_fullform_iter(GuMapItor* fn, const void* key, void* value, GuExn* err)
|
||||
PgfProductionApply* papp = i.data;
|
||||
|
||||
PgfSequence* seq = papp->fun->lins[cfc.lin_idx];
|
||||
GuString tokens = pgf_get_tokens(seq, 0, 0, st->pool);
|
||||
GuString tokens = pgf_get_tokens(seq, 0, st->pool);
|
||||
|
||||
// create a new production index with keys that
|
||||
// are multiword units
|
||||
@@ -2531,12 +2488,10 @@ pgf_fullform_get_analyses(PgfFullFormEntry* entry,
|
||||
|
||||
static void
|
||||
pgf_parser_index_token(PgfConcr* concr,
|
||||
PgfTokens* tokens,
|
||||
PgfToken tok,
|
||||
PgfCCat* ccat, size_t lin_idx, PgfProduction prod,
|
||||
GuPool *pool)
|
||||
{
|
||||
PgfToken tok = gu_seq_get(tokens, PgfToken, 0);
|
||||
|
||||
PgfProductionIdx* set =
|
||||
gu_map_get(concr->leftcorner_tok_idx, &tok, PgfProductionIdx*);
|
||||
if (set == NULL) {
|
||||
@@ -2570,6 +2525,47 @@ pgf_parser_index_epsilon(PgfConcr* concr,
|
||||
gu_buf_push(prods, PgfProduction, prod);
|
||||
}
|
||||
|
||||
static void
|
||||
pgf_parser_index_symbol(PgfConcr* concr, PgfSymbol sym,
|
||||
PgfCCat* ccat, size_t lin_idx, PgfProduction prod,
|
||||
GuPool *pool)
|
||||
{
|
||||
GuVariantInfo i = gu_variant_open(sym);
|
||||
switch (i.tag) {
|
||||
case PGF_SYMBOL_KS: {
|
||||
PgfSymbolKS* sks = i.data;
|
||||
pgf_parser_index_token(concr,
|
||||
sks->token,
|
||||
ccat, lin_idx, prod,
|
||||
pool);
|
||||
break;
|
||||
}
|
||||
case PGF_SYMBOL_KP: {
|
||||
PgfSymbolKP* skp = i.data;
|
||||
PgfSymbol sym =
|
||||
gu_seq_get(skp->default_form, PgfSymbol, 0);
|
||||
pgf_parser_index_symbol(concr, sym,
|
||||
ccat, lin_idx, prod,
|
||||
pool);
|
||||
for (size_t i = 0; i < skp->n_forms; i++) {
|
||||
sym = gu_seq_get(skp->forms[i].form, PgfSymbol, 0);
|
||||
pgf_parser_index_symbol(concr, sym,
|
||||
ccat, lin_idx, prod,
|
||||
pool);
|
||||
}
|
||||
break;
|
||||
}
|
||||
case PGF_SYMBOL_CAT:
|
||||
case PGF_SYMBOL_LIT:
|
||||
case PGF_SYMBOL_NE:
|
||||
case PGF_SYMBOL_VAR:
|
||||
// Nothing to be done here
|
||||
break;
|
||||
default:
|
||||
gu_impossible();
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
pgf_parser_index(PgfConcr* concr,
|
||||
PgfCCat* ccat, PgfProduction prod,
|
||||
@@ -2586,39 +2582,9 @@ pgf_parser_index(PgfConcr* concr,
|
||||
|
||||
PgfSequence* seq = papp->fun->lins[lin_idx];
|
||||
if (gu_seq_length(seq) > 0) {
|
||||
GuVariantInfo i = gu_variant_open(gu_seq_get(seq, PgfSymbol, 0));
|
||||
switch (i.tag) {
|
||||
case PGF_SYMBOL_KS: {
|
||||
PgfSymbolKS* sks = i.data;
|
||||
pgf_parser_index_token(concr,
|
||||
sks->tokens,
|
||||
ccat, lin_idx, prod,
|
||||
pool);
|
||||
break;
|
||||
}
|
||||
case PGF_SYMBOL_KP: {
|
||||
PgfSymbolKP* skp = i.data;
|
||||
pgf_parser_index_token(concr,
|
||||
skp->default_form,
|
||||
ccat, lin_idx, prod,
|
||||
pool);
|
||||
for (size_t i = 0; i < skp->n_forms; i++) {
|
||||
pgf_parser_index_token(concr,
|
||||
skp->forms[i].form,
|
||||
ccat, lin_idx, prod,
|
||||
pool);
|
||||
}
|
||||
break;
|
||||
}
|
||||
case PGF_SYMBOL_CAT:
|
||||
case PGF_SYMBOL_LIT:
|
||||
case PGF_SYMBOL_NE:
|
||||
case PGF_SYMBOL_VAR:
|
||||
// Nothing to be done here
|
||||
break;
|
||||
default:
|
||||
gu_impossible();
|
||||
}
|
||||
pgf_parser_index_symbol(concr, gu_seq_get(seq, PgfSymbol, 0),
|
||||
ccat, lin_idx, prod,
|
||||
pool);
|
||||
} else {
|
||||
pgf_parser_index_epsilon(concr,
|
||||
ccat, lin_idx, prod,
|
||||
|
||||
@@ -19,19 +19,14 @@ typedef struct {
|
||||
} PgfMetricsLznState;
|
||||
|
||||
static void
|
||||
pgf_metrics_lzn_symbol_tokens(PgfLinFuncs** funcs, PgfTokens* toks)
|
||||
pgf_metrics_lzn_symbol_token(PgfLinFuncs** funcs, PgfToken tok)
|
||||
{
|
||||
PgfMetricsLznState* state = gu_container(funcs, PgfMetricsLznState, funcs);
|
||||
|
||||
size_t len = gu_seq_length(toks);
|
||||
for (size_t i = 0; i < len; i++) {
|
||||
PgfToken tok = gu_seq_get(toks, PgfToken, i);
|
||||
|
||||
if (state->ps != NULL)
|
||||
state->ps = pgf_parser_next_state(state->ps, tok);
|
||||
|
||||
if (state->ps != NULL)
|
||||
state->ps = pgf_parser_next_state(state->ps, tok);
|
||||
|
||||
state->pos++;
|
||||
}
|
||||
state->pos++;
|
||||
}
|
||||
|
||||
static void
|
||||
@@ -128,17 +123,17 @@ pgf_metrics_lzn_end_phrase2(PgfLinFuncs** funcs, PgfCId cat, int fid, int lin_id
|
||||
}
|
||||
|
||||
static PgfLinFuncs pgf_metrics_lin_funcs1 = {
|
||||
.symbol_tokens = pgf_metrics_lzn_symbol_tokens,
|
||||
.expr_literal = pgf_metrics_lzn_expr_literal,
|
||||
.begin_phrase = pgf_metrics_lzn_begin_phrase,
|
||||
.end_phrase = pgf_metrics_lzn_end_phrase1
|
||||
.symbol_token = pgf_metrics_lzn_symbol_token,
|
||||
.expr_literal = pgf_metrics_lzn_expr_literal,
|
||||
.begin_phrase = pgf_metrics_lzn_begin_phrase,
|
||||
.end_phrase = pgf_metrics_lzn_end_phrase1
|
||||
};
|
||||
|
||||
static PgfLinFuncs pgf_metrics_lin_funcs2 = {
|
||||
.symbol_tokens = pgf_metrics_lzn_symbol_tokens,
|
||||
.expr_literal = pgf_metrics_lzn_expr_literal,
|
||||
.begin_phrase = pgf_metrics_lzn_begin_phrase,
|
||||
.end_phrase = pgf_metrics_lzn_end_phrase2
|
||||
.symbol_token = pgf_metrics_lzn_symbol_token,
|
||||
.expr_literal = pgf_metrics_lzn_expr_literal,
|
||||
.begin_phrase = pgf_metrics_lzn_begin_phrase,
|
||||
.end_phrase = pgf_metrics_lzn_end_phrase2
|
||||
};
|
||||
|
||||
bool
|
||||
|
||||
@@ -196,19 +196,16 @@ pgf_print_cncfun(PgfCncFun *cncfun, PgfSequences* sequences,
|
||||
}
|
||||
|
||||
static void
|
||||
pgf_print_tokens(PgfTokens* tokens, GuOut *out, GuExn *err)
|
||||
pgf_print_token(PgfToken tok, GuOut *out, GuExn *err)
|
||||
{
|
||||
gu_putc('"', out, err);
|
||||
size_t n_toks = gu_seq_length(tokens);
|
||||
for (size_t i = 0; i < n_toks; i++) {
|
||||
if (i > 0) gu_putc(' ', out, err);
|
||||
|
||||
PgfToken tok = gu_seq_get(tokens, PgfToken, i);
|
||||
gu_string_write(tok, out, err);
|
||||
}
|
||||
gu_string_write(tok, out, err);
|
||||
gu_putc('"', out, err);
|
||||
}
|
||||
|
||||
static void
|
||||
pgf_print_sequence(PgfSequence* seq, GuOut *out, GuExn *err);
|
||||
|
||||
void
|
||||
pgf_print_symbol(PgfSymbol sym, GuOut *out, GuExn *err)
|
||||
{
|
||||
@@ -220,18 +217,18 @@ pgf_print_symbol(PgfSymbol sym, GuOut *out, GuExn *err)
|
||||
}
|
||||
case PGF_SYMBOL_KS: {
|
||||
PgfSymbolKS* sks = gu_variant_data(sym);
|
||||
pgf_print_tokens(sks->tokens, out, err);
|
||||
pgf_print_token(sks->token, out, err);
|
||||
break;
|
||||
}
|
||||
case PGF_SYMBOL_KP: {
|
||||
PgfSymbolKP* skp = gu_variant_data(sym);
|
||||
|
||||
gu_puts("pre {", out, err);
|
||||
pgf_print_tokens(skp->default_form, out, err);
|
||||
pgf_print_sequence(skp->default_form, out, err);
|
||||
|
||||
for (size_t i = 0; i < skp->n_forms; i++) {
|
||||
gu_puts("; ", out, err);
|
||||
pgf_print_tokens(skp->forms[i].form, out, err);
|
||||
pgf_print_sequence(skp->forms[i].form, out, err);
|
||||
gu_puts(" / ", out, err);
|
||||
|
||||
size_t n_prefixes = gu_seq_length(skp->forms[i].prefixes);
|
||||
@@ -262,16 +259,18 @@ pgf_print_symbol(PgfSymbol sym, GuOut *out, GuExn *err)
|
||||
gu_puts("nonExist", out, err);
|
||||
break;
|
||||
}
|
||||
case PGF_SYMBOL_BIND: {
|
||||
gu_puts("BIND", out, err);
|
||||
break;
|
||||
}
|
||||
default:
|
||||
gu_impossible();
|
||||
}
|
||||
}
|
||||
|
||||
static void
|
||||
pgf_print_sequence(size_t seqid, PgfSequence* seq, GuOut *out, GuExn *err)
|
||||
pgf_print_sequence(PgfSequence* seq, GuOut *out, GuExn *err)
|
||||
{
|
||||
gu_printf(out,err," S%d := ", seqid);
|
||||
|
||||
int n_syms = gu_seq_length(seq);
|
||||
for (int i = 0; i < n_syms; i++) {
|
||||
if (i > 0) gu_putc(' ', out, err);
|
||||
@@ -279,8 +278,6 @@ pgf_print_sequence(size_t seqid, PgfSequence* seq, GuOut *out, GuExn *err)
|
||||
PgfSymbol sym = gu_seq_get(seq, PgfSymbol, i);
|
||||
pgf_print_symbol(sym, out, err);
|
||||
}
|
||||
|
||||
gu_putc('\n', out, err);
|
||||
}
|
||||
|
||||
static void
|
||||
@@ -342,7 +339,10 @@ pgf_print_concrete(PgfCId cncname, PgfConcr* concr,
|
||||
size_t n_seqs = gu_seq_length(concr->sequences);
|
||||
for (size_t i = 0; i < n_seqs; i++) {
|
||||
PgfSequence* seq = gu_seq_get(concr->sequences, PgfSequence*, i);
|
||||
pgf_print_sequence(i, seq, out, err);
|
||||
|
||||
gu_printf(out,err," S%d := ", i);
|
||||
pgf_print_sequence(seq, out, err);
|
||||
gu_putc('\n', out, err);
|
||||
}
|
||||
|
||||
gu_puts(" categories\n", out, err);
|
||||
|
||||
@@ -586,27 +586,13 @@ pgf_read_printnames(PgfReader* rdr)
|
||||
return printnames;
|
||||
}
|
||||
|
||||
static PgfTokens*
|
||||
pgf_read_tokens(PgfReader* rdr)
|
||||
{
|
||||
size_t len = pgf_read_len(rdr);
|
||||
gu_return_on_exn(rdr->err, NULL);
|
||||
|
||||
PgfTokens* tokens = gu_new_seq(PgfToken, len, rdr->opool);
|
||||
for (size_t i = 0; i < len; i++) {
|
||||
PgfToken token = pgf_read_string(rdr);
|
||||
gu_return_on_exn(rdr->err, NULL);
|
||||
|
||||
gu_seq_set(tokens, PgfToken, i, token);
|
||||
}
|
||||
|
||||
return tokens;
|
||||
}
|
||||
static PgfSequence*
|
||||
pgf_read_sequence(PgfReader* rdr);
|
||||
|
||||
static void
|
||||
pgf_read_alternative(PgfReader* rdr, PgfAlternative* alt)
|
||||
{
|
||||
alt->form = pgf_read_tokens(rdr);
|
||||
alt->form = pgf_read_sequence(rdr);
|
||||
gu_return_on_exn(rdr->err,);
|
||||
|
||||
size_t n_prefixes = pgf_read_len(rdr);
|
||||
@@ -672,12 +658,12 @@ pgf_read_symbol(PgfReader* rdr)
|
||||
gu_new_variant(PGF_SYMBOL_KS,
|
||||
PgfSymbolKS,
|
||||
&sym, rdr->opool);
|
||||
sym_ks->tokens = pgf_read_tokens(rdr);
|
||||
sym_ks->token = pgf_read_string(rdr);
|
||||
gu_return_on_exn(rdr->err, gu_null_variant);
|
||||
break;
|
||||
}
|
||||
case PGF_SYMBOL_KP: {
|
||||
PgfTokens* default_form = pgf_read_tokens(rdr);
|
||||
PgfSequence* default_form = pgf_read_sequence(rdr);
|
||||
gu_return_on_exn(rdr->err, gu_null_variant);
|
||||
|
||||
size_t n_forms = pgf_read_len(rdr);
|
||||
@@ -703,6 +689,13 @@ pgf_read_symbol(PgfReader* rdr)
|
||||
gu_return_on_exn(rdr->err, gu_null_variant);
|
||||
break;
|
||||
}
|
||||
case PGF_SYMBOL_BIND: {
|
||||
gu_new_variant(PGF_SYMBOL_BIND,
|
||||
PgfSymbolBIND,
|
||||
&sym, rdr->opool);
|
||||
gu_return_on_exn(rdr->err, gu_null_variant);
|
||||
break;
|
||||
}
|
||||
default:
|
||||
pgf_read_tag_error(rdr);
|
||||
}
|
||||
|
||||
@@ -76,10 +76,6 @@ instance Binary Concr where
|
||||
, cnccats=cnccats, totalCats=totalCats
|
||||
})
|
||||
|
||||
instance Binary Alternative where
|
||||
put (Alt v x) = put (v,x)
|
||||
get = liftM2 Alt get get
|
||||
|
||||
instance Binary Expr where
|
||||
put (EAbs b x exp) = putWord8 0 >> put (b,x,exp)
|
||||
put (EApp e1 e2) = putWord8 1 >> put (e1,e2)
|
||||
@@ -153,6 +149,7 @@ instance Binary Symbol where
|
||||
put (SymKS ts) = putWord8 3 >> put ts
|
||||
put (SymKP d vs) = putWord8 4 >> put (d,vs)
|
||||
put SymNE = putWord8 5
|
||||
put SymBIND = putWord8 6
|
||||
get = do tag <- getWord8
|
||||
case tag of
|
||||
0 -> liftM2 SymCat get get
|
||||
@@ -161,6 +158,7 @@ instance Binary Symbol where
|
||||
3 -> liftM SymKS get
|
||||
4 -> liftM2 (\d vs -> SymKP d vs) get get
|
||||
5 -> return SymNE
|
||||
6 -> return SymBIND
|
||||
_ -> decodingError
|
||||
|
||||
instance Binary PArg where
|
||||
|
||||
@@ -58,9 +58,10 @@ data Symbol
|
||||
= SymCat {-# UNPACK #-} !Int {-# UNPACK #-} !LIndex
|
||||
| SymLit {-# UNPACK #-} !Int {-# UNPACK #-} !LIndex
|
||||
| SymVar {-# UNPACK #-} !Int {-# UNPACK #-} !Int
|
||||
| SymKS [Token]
|
||||
| SymKP [Token] [Alternative]
|
||||
| SymKS Token
|
||||
| SymNE -- non exist
|
||||
| SymBIND -- the special BIND token
|
||||
| SymKP [Symbol] [([Symbol],[String])]
|
||||
deriving (Eq,Ord,Show)
|
||||
data Production
|
||||
= PApply {-# UNPACK #-} !FunId [PArg]
|
||||
@@ -75,10 +76,6 @@ type FunId = Int
|
||||
type SeqId = Int
|
||||
type BCAddr = Int
|
||||
|
||||
data Alternative =
|
||||
Alt [Token] [String]
|
||||
deriving (Eq,Ord,Show)
|
||||
|
||||
|
||||
-- merge two PGFs; fails is differens absnames; priority to second arg
|
||||
|
||||
|
||||
@@ -80,7 +80,7 @@ bracketedTokn dp f@(Forest abs cnc forest root) =
|
||||
ltable = mkLinTable cnc isTrusted [] funid largs
|
||||
in ((cat,fid),wildCId,either (const []) id $ getAbsTrees f arg Nothing dp,ltable)
|
||||
descend forest (PCoerce fid) = render forest (PArg [] fid)
|
||||
descend forest (PConst cat e ts) = ((cat,fid),wildCId,[e],([],listArray (0,0) [[LeafKS ts]]))
|
||||
descend forest (PConst cat e ts) = ((cat,fid),wildCId,[e],([],listArray (0,0) [map LeafKS ts]))
|
||||
|
||||
getVar (fid,_)
|
||||
| fid == fidVar = wildCId
|
||||
|
||||
@@ -82,7 +82,7 @@ linTree pgf lang e =
|
||||
LInt n -> return (n_fid+1,((cidInt, n_fid),wildCId,[e0],([],ss (show n))))
|
||||
LFlt f -> return (n_fid+1,((cidFloat, n_fid),wildCId,[e0],([],ss (show f))))
|
||||
|
||||
ss s = listArray (0,0) [[LeafKS [s]]]
|
||||
ss s = listArray (0,0) [[LeafKS s]]
|
||||
|
||||
apply :: Maybe CncType -> FId -> Expr -> [CId] -> [CId] -> CId -> [Expr] -> [(FId,(CncType, CId, [Expr], LinTable))]
|
||||
apply mb_cty n_fid e0 ys xs f es =
|
||||
@@ -115,7 +115,7 @@ linTree pgf lang e =
|
||||
let args = [((wildCId, n_fid),wildCId,[e0],([],ss s))]
|
||||
return (n_fid+2,((cat,n_fid+1),wildCId,[e0],mkLinTable cnc (const True) xs funid args))
|
||||
Nothing
|
||||
| isPredefFId fid -> return (n_fid+2,((cat,n_fid+1),wildCId,[e0],(xs,listArray (0,0) [[LeafKS [s]]])))
|
||||
| isPredefFId fid -> return (n_fid+2,((cat,n_fid+1),wildCId,[e0],(xs,listArray (0,0) [[LeafKS s]])))
|
||||
| otherwise -> do PCoerce fid <- maybe [] Set.toList (IntMap.lookup fid (pproductions cnc))
|
||||
def (Just (cat,fid)) n_fid e0 ys xs s
|
||||
def Nothing n_fid e0 ys xs s = []
|
||||
|
||||
@@ -156,9 +156,11 @@ data BracketedString
|
||||
-- that represents the same constituent.
|
||||
|
||||
data BracketedTokn
|
||||
= LeafKS [Token]
|
||||
| LeafKP [Token] [Alternative]
|
||||
| Bracket_ CId {-# UNPACK #-} !FId {-# UNPACK #-} !LIndex CId [Expr] [BracketedTokn] -- Invariant: the list is not empty
|
||||
= Bracket_ CId {-# UNPACK #-} !FId {-# UNPACK #-} !LIndex CId [Expr] [BracketedTokn] -- Invariant: the list is not empty
|
||||
| LeafKS Token
|
||||
| LeafNE
|
||||
| LeafBIND
|
||||
| LeafKP [BracketedTokn] [([BracketedTokn],[String])]
|
||||
deriving Eq
|
||||
|
||||
type LinTable = ([CId],Array.Array LIndex [BracketedTokn])
|
||||
@@ -178,21 +180,30 @@ lengthBracketedString (Leaf _) = 1
|
||||
lengthBracketedString (Bracket _ _ _ _ _ bss) = sum (map lengthBracketedString bss)
|
||||
|
||||
untokn :: Maybe String -> BracketedTokn -> (Maybe String,[BracketedString])
|
||||
untokn nw (LeafKS ts) = (has_tok nw ts,map Leaf ts)
|
||||
untokn nw (LeafKP d vs) = let ts = filter (not . null) (sel d vs nw)
|
||||
in (has_tok nw ts,map Leaf ts)
|
||||
where
|
||||
sel d vs Nothing = d
|
||||
sel d vs (Just w) =
|
||||
case [v | Alt v cs <- vs, any (\c -> isPrefixOf c w) cs] of
|
||||
v:_ -> v
|
||||
_ -> d
|
||||
untokn nw (Bracket_ cat fid index fun es bss) =
|
||||
let (nw',bss') = mapAccumR untokn nw bss
|
||||
in (nw',[Bracket cat fid index fun es (concat bss')])
|
||||
|
||||
has_tok nw [] = nw
|
||||
has_tok nw (t:ts) = Just t
|
||||
untokn nw bs =
|
||||
case untokn nw bs of
|
||||
(nw,Nothing ) -> (nw,[] )
|
||||
(nw,Just bss) -> (nw,bss)
|
||||
where
|
||||
untokn nw (Bracket_ cat fid index fun es bss) =
|
||||
let (nw',bss') = mapAccumR untokn nw bss
|
||||
in case sequence bss' of
|
||||
Just bss -> (nw',Just [Bracket cat fid index fun es (concat bss)])
|
||||
Nothing -> (Nothing, Nothing)
|
||||
untokn nw (LeafKS t)
|
||||
| null t = (nw,Just [])
|
||||
| otherwise = (Just t,Just [Leaf t])
|
||||
untokn nw LeafNE = (Nothing, Nothing)
|
||||
untokn nw (LeafKP d vs) = let (nw',bss') = mapAccumR untokn nw (sel d vs nw)
|
||||
in case sequence bss' of
|
||||
Just bss -> (nw',Just (concat bss))
|
||||
Nothing -> (Nothing, Nothing)
|
||||
where
|
||||
sel d vs Nothing = d
|
||||
sel d vs (Just w) =
|
||||
case [v | (v,cs) <- vs, any (\c -> isPrefixOf c w) cs] of
|
||||
v:_ -> v
|
||||
_ -> d
|
||||
|
||||
type CncType = (CId, FId) -- concrete type is the abstract type (the category) + the forest id
|
||||
|
||||
@@ -204,11 +215,13 @@ mkLinTable cnc filter xs funid args = (xs,listArray (bounds lins) [computeSeq fi
|
||||
computeSeq :: (CncType -> Bool) -> [Symbol] -> [(CncType,CId,[Expr],LinTable)] -> [BracketedTokn]
|
||||
computeSeq filter seq args = concatMap compute seq
|
||||
where
|
||||
compute (SymCat d r) = getArg d r
|
||||
compute (SymLit d r) = getArg d r
|
||||
compute (SymVar d r) = getVar d r
|
||||
compute (SymKS ts) = [LeafKS ts]
|
||||
compute (SymKP ts alts) = [LeafKP ts alts]
|
||||
compute (SymCat d r) = getArg d r
|
||||
compute (SymLit d r) = getArg d r
|
||||
compute (SymVar d r) = getVar d r
|
||||
compute (SymKS t) = [LeafKS t]
|
||||
compute SymNE = [LeafNE]
|
||||
compute SymBIND = [LeafKS "&+"]
|
||||
compute (SymKP syms alts) = [LeafKP (concatMap compute syms) [(concatMap compute syms,cs) | (syms,cs) <- alts]]
|
||||
|
||||
getArg d r
|
||||
| not (null arg_lin) &&
|
||||
@@ -218,7 +231,7 @@ computeSeq filter seq args = concatMap compute seq
|
||||
arg_lin = lin ! r
|
||||
(ct@(cat,fid),fun,es,(xs,lin)) = args !! d
|
||||
|
||||
getVar d r = [LeafKS [showCId (xs !! r)]]
|
||||
getVar d r = [LeafKS (showCId (xs !! r))]
|
||||
where
|
||||
(ct,fun,es,(xs,lin)) = args !! d
|
||||
|
||||
|
||||
@@ -36,8 +36,8 @@ collectWords pinfo = Map.fromListWith (++)
|
||||
, sym <- elems (sequences pinfo ! seqid)
|
||||
, t <- sym2tokns sym]
|
||||
where
|
||||
sym2tokns (SymKS ts) = ts
|
||||
sym2tokns (SymKP ts alts) = ts ++ [t | Alt ts ps <- alts, t <- ts]
|
||||
sym2tokns (SymKS t) = [t]
|
||||
sym2tokns (SymKP ts alts) = concat (map sym2tokns ts ++ [sym2tokns sym | (syms,ps) <- alts, sym <- syms])
|
||||
sym2tokns _ = []
|
||||
|
||||
lookupMorpho :: Morpho -> String -> [(Lemma,Analysis)]
|
||||
|
||||
@@ -221,9 +221,13 @@ splitLexicalRules cnc p_prods =
|
||||
|
||||
wf ts = (ts,IntSet.singleton funid)
|
||||
|
||||
seq2prefix [] = TrieMap.fromList [wf []]
|
||||
seq2prefix (SymKS ts :syms) = TrieMap.fromList [wf ts]
|
||||
seq2prefix (SymKP ts alts:syms) = TrieMap.fromList (wf ts : [wf ts | Alt ts ps <- alts])
|
||||
seq2prefix [] = TrieMap.fromList [wf []]
|
||||
seq2prefix (SymKS t :syms) = TrieMap.fromList [wf [t]]
|
||||
seq2prefix (SymKP syms0 alts:syms) = TrieMap.unionsWith IntSet.union
|
||||
(seq2prefix (syms0++syms) :
|
||||
[seq2prefix (syms1 ++ syms) | (syms1,ps) <- alts])
|
||||
seq2prefix (SymNE :syms) = TrieMap.empty
|
||||
seq2prefix (SymBIND :syms) = TrieMap.fromList [wf ["&+"]]
|
||||
|
||||
updateConcrete abs cnc =
|
||||
let p_prods0 = filterProductions IntMap.empty IntSet.empty (productions cnc)
|
||||
|
||||
@@ -244,14 +244,12 @@ getParseOutput (PState abs cnc chart cnt) ty@(DTyp _ start _) dp =
|
||||
flit _ = Nothing
|
||||
ftok toks = TrieMap.unionWith Set.union (TrieMap.compose Nothing toks)
|
||||
|
||||
cutAt ppos toks seqid =
|
||||
cutAt ppos toks seqid =
|
||||
let seq = unsafeAt (sequences cnc) seqid
|
||||
init = take (ppos-1) (elems seq)
|
||||
tail = case unsafeAt seq (ppos-1) of
|
||||
SymKS ts -> let ts' = reverse (drop (length toks) (reverse ts))
|
||||
in if null ts' then [] else [SymKS ts']
|
||||
SymKP ts _ -> let ts' = reverse (drop (length toks) (reverse ts))
|
||||
in if null ts' then [] else [SymKS ts']
|
||||
SymKS t -> drop (length toks) [SymKS t]
|
||||
SymKP ts _ -> reverse (drop (length toks) (reverse ts))
|
||||
sym -> []
|
||||
in init ++ tail
|
||||
|
||||
@@ -307,11 +305,18 @@ process flit ftok cnc (item@(Active j ppos funid seqid args key0):items) acc cha
|
||||
Nothing -> process flit ftok cnc items4 acc' chart{active=insertAC key (Set.singleton item,new_sc) (active chart)}
|
||||
Just (set,sc) | Set.member item set -> process flit ftok cnc items acc chart
|
||||
| otherwise -> process flit ftok cnc items2 acc chart{active=insertAC key (Set.insert item set,IntMap.unionWith Set.union new_sc sc) (active chart)}
|
||||
SymKS toks -> let !acc' = ftok_ toks (Active j (ppos+1) funid seqid args key0) acc
|
||||
SymKS tok -> let !acc' = ftok_ [tok] (Active j (ppos+1) funid seqid args key0) acc
|
||||
in process flit ftok cnc items acc' chart
|
||||
SymKP strs vars
|
||||
-> let !acc' = foldl (\acc toks -> ftok_ toks (Active j (ppos+1) funid seqid args key0) acc) acc
|
||||
(strs:[strs' | Alt strs' _ <- vars])
|
||||
SymNE -> process flit ftok cnc items acc chart
|
||||
SymBIND -> let !acc' = ftok_ ["&+"] (Active j (ppos+1) funid seqid args key0) acc
|
||||
in process flit ftok cnc items acc' chart
|
||||
SymKP syms vars
|
||||
-> let to_tok (SymKS t) = [t]
|
||||
to_tok SymBIND = ["&+"]
|
||||
to_tok _ = []
|
||||
|
||||
!acc' = foldl (\acc syms -> ftok_ (concatMap to_tok syms) (Active j (ppos+1) funid seqid args key0) acc) acc
|
||||
(syms:[syms' | (syms',_) <- vars])
|
||||
in process flit ftok cnc items acc' chart
|
||||
SymLit d r -> let PArg hypos fid = args !! d
|
||||
key = AK fid r
|
||||
|
||||
@@ -89,10 +89,12 @@ ppPrintName (id,name) =
|
||||
ppSymbol (SymCat d r) = char '<' <> int d <> comma <> int r <> char '>'
|
||||
ppSymbol (SymLit d r) = char '{' <> int d <> comma <> int r <> char '}'
|
||||
ppSymbol (SymVar d r) = char '<' <> int d <> comma <> char '$' <> int r <> char '>'
|
||||
ppSymbol (SymKS ts) = ppStrs ts
|
||||
ppSymbol (SymKP ts alts) = text "pre" <+> braces (hsep (punctuate semi (ppStrs ts : map ppAlt alts)))
|
||||
ppSymbol (SymKS t) = doubleQuotes (text t)
|
||||
ppSymbol SymNE = text "nonExist"
|
||||
ppSymbol SymBIND = text "BIND"
|
||||
ppSymbol (SymKP syms alts) = text "pre" <+> braces (hsep (punctuate semi (hsep (map ppSymbol syms) : map ppAlt alts)))
|
||||
|
||||
ppAlt (Alt ts ps) = ppStrs ts <+> char '/' <+> hsep (map (doubleQuotes . text) ps)
|
||||
ppAlt (syms,ps) = hsep (map ppSymbol syms) <+> char '/' <+> hsep (map (doubleQuotes . text) ps)
|
||||
|
||||
ppStrs ss = doubleQuotes (hsep (map text ss))
|
||||
|
||||
|
||||
@@ -1518,17 +1518,13 @@ typedef struct {
|
||||
} PgfBracketLznState;
|
||||
|
||||
static void
|
||||
pgf_bracket_lzn_symbol_tokens(PgfLinFuncs** funcs, PgfTokens* toks)
|
||||
pgf_bracket_lzn_symbol_token(PgfLinFuncs** funcs, PgfToken tok)
|
||||
{
|
||||
PgfBracketLznState* state = gu_container(funcs, PgfBracketLznState, funcs);
|
||||
|
||||
size_t len = gu_seq_length(toks);
|
||||
for (size_t i = 0; i < len; i++) {
|
||||
PgfToken tok = gu_seq_get(toks, PgfToken, i);
|
||||
PyObject* str = gu2py_string(tok);
|
||||
PyList_Append(state->list, str);
|
||||
Py_DECREF(str);
|
||||
}
|
||||
PyObject* str = gu2py_string(tok);
|
||||
PyList_Append(state->list, str);
|
||||
Py_DECREF(str);
|
||||
}
|
||||
|
||||
static void
|
||||
@@ -1600,7 +1596,7 @@ pgf_bracket_lzn_end_phrase(PgfLinFuncs** funcs, PgfCId cat, int fid, int lindex,
|
||||
}
|
||||
|
||||
static PgfLinFuncs pgf_bracket_lin_funcs = {
|
||||
.symbol_tokens = pgf_bracket_lzn_symbol_tokens,
|
||||
.symbol_token = pgf_bracket_lzn_symbol_token,
|
||||
.expr_literal = pgf_bracket_lzn_expr_literal,
|
||||
.begin_phrase = pgf_bracket_lzn_begin_phrase,
|
||||
.end_phrase = pgf_bracket_lzn_end_phrase
|
||||
|
||||
Reference in New Issue
Block a user