From 426bc49a52b4efa0ef0129d713842d8c9abdf0ff Mon Sep 17 00:00:00 2001 From: "kr.angelov" Date: Fri, 27 Sep 2013 15:09:48 +0000 Subject: [PATCH] a major refactoring in the C and the Haskell runtimes. Note incompatible change in the PGF format!!! The following are the outcomes: - Predef.nonExist is fully supported by both the Haskell and the C runtimes - Predef.BIND is now an internal compiler defined token. For now it behaves just as usual for the Haskell runtime, i.e. it generates &+. However, the special treatment will let us to handle it properly in the C runtime. - This required a major change in the PGF format since both nonExist and BIND may appear inside 'pre' and this was not supported before. --- .../GF/Compile/Compute/AppPredefined.hs | 2 + src/compiler/GF/Compile/Compute/Predef.hs | 3 +- src/compiler/GF/Compile/Compute/Value.hs | 2 +- src/compiler/GF/Compile/GeneratePMCFG.hs | 48 ++-- src/compiler/GF/Compile/PGFtoJS.hs | 8 +- src/compiler/GF/Compile/PGFtoProlog.hs | 6 +- src/compiler/GF/Compile/PGFtoPython.hs | 6 +- src/compiler/GF/Grammar/Predef.hs | 1 + src/compiler/GF/Speech/PGFToCFG.hs | 4 +- src/runtime/c/pgf/data.h | 16 +- src/runtime/c/pgf/graphviz.c | 17 +- src/runtime/c/pgf/linearizer.c | 110 ++++----- src/runtime/c/pgf/linearizer.h | 2 +- src/runtime/c/pgf/literals.c | 11 +- src/runtime/c/pgf/parser.c | 228 ++++++++---------- src/runtime/c/pgf/parseval.c | 31 +-- src/runtime/c/pgf/printer.c | 34 +-- src/runtime/c/pgf/reader.c | 31 +-- src/runtime/haskell/PGF/Binary.hs | 6 +- src/runtime/haskell/PGF/Data.hs | 9 +- src/runtime/haskell/PGF/Forest.hs | 2 +- src/runtime/haskell/PGF/Linearize.hs | 4 +- src/runtime/haskell/PGF/Macros.hs | 61 +++-- src/runtime/haskell/PGF/Morphology.hs | 4 +- src/runtime/haskell/PGF/Optimize.hs | 10 +- src/runtime/haskell/PGF/Parse.hs | 23 +- src/runtime/haskell/PGF/Printer.hs | 8 +- src/runtime/python/pypgf.c | 14 +- 28 files changed, 330 insertions(+), 371 deletions(-) diff --git a/src/compiler/GF/Compile/Compute/AppPredefined.hs b/src/compiler/GF/Compile/Compute/AppPredefined.hs index 869052e0a..861a74a89 100644 --- a/src/compiler/GF/Compile/Compute/AppPredefined.hs +++ b/src/compiler/GF/Compile/Compute/AppPredefined.hs @@ -84,6 +84,8 @@ primitives = Map.fromList [(Explicit,varL,typeType),(Explicit,identW,mkFunType [typeStr] typeStr),(Explicit,identW,Vr varL)] (Vr varL) []))) Nothing) , (cNonExist , ResOper (Just (noLoc (mkProd -- Str [] typeStr []))) Nothing) + , (cBIND , ResOper (Just (noLoc (mkProd -- Str + [] typeStr []))) Nothing) ] where fun from to = oper (mkFunType from to) diff --git a/src/compiler/GF/Compile/Compute/Predef.hs b/src/compiler/GF/Compile/Compute/Predef.hs index 11c4002b8..b8b7f7c77 100644 --- a/src/compiler/GF/Compile/Compute/Predef.hs +++ b/src/compiler/GF/Compile/Compute/Predef.hs @@ -78,7 +78,7 @@ predefList = (cError,Error), -- Canonical values: (cPBool,PBool),(cPFalse,PFalse),(cPTrue,PTrue),(cInt,Int), - (cInts,Ints),(cNonExist,NonExist)] + (cInts,Ints),(cNonExist,NonExist),(cBIND,BIND)] --- add more functions!!! delta f vs = @@ -106,6 +106,7 @@ delta f vs = PFalse -> canonical PTrue -> canonical NonExist-> canonical + BIND -> canonical where canonical = delay delay = return (VApp f vs) -- wrong number of arguments diff --git a/src/compiler/GF/Compile/Compute/Value.hs b/src/compiler/GF/Compile/Compute/Value.hs index 7dbaaa193..e72b06778 100644 --- a/src/compiler/GF/Compile/Compute/Value.hs +++ b/src/compiler/GF/Compile/Compute/Value.hs @@ -51,5 +51,5 @@ data Predefined = Drop | Take | Tk | Dp | EqStr | Occur | Occurs | ToUpper {- | Show | Read | ToStr | MapStr | EqVal -} | Error -- Canonical values below: - | PBool | PFalse | PTrue | Int | Ints | NonExist + | PBool | PFalse | PTrue | Int | Ints | NonExist | BIND deriving (Show,Eq,Ord,Ix,Bounded,Enum) diff --git a/src/compiler/GF/Compile/GeneratePMCFG.hs b/src/compiler/GF/Compile/GeneratePMCFG.hs index 0afa2bd49..9642110bc 100644 --- a/src/compiler/GF/Compile/GeneratePMCFG.hs +++ b/src/compiler/GF/Compile/GeneratePMCFG.hs @@ -14,7 +14,7 @@ module GF.Compile.GeneratePMCFG ) where import PGF.CId -import PGF.Data(Alternative(..),CncCat(..),Symbol(..),fidVar) +import PGF.Data(CncCat(..),Symbol(..),fidVar) import GF.Infra.Option import GF.Grammar hiding (Env, mkRecord, mkTable) @@ -376,30 +376,24 @@ convertTerm opts sel ctype (FV vars) = do term <- variants vars convertTerm opts sel ctype (C t1 t2) = do v1 <- convertTerm opts sel ctype t1 v2 <- convertTerm opts sel ctype t2 return (CStr (concat [s | CStr s <- [v1,v2]])) -convertTerm opts sel ctype (K t) = return (CStr [SymKS [t]]) +convertTerm opts sel ctype (K t) = return (CStr [SymKS t]) convertTerm opts sel ctype Empty = return (CStr []) -convertTerm opts sel ctype (Alts s alts) - = return (CStr [SymKP (strings s) [Alt (strings u) (strings v) | (u,v) <- alts]]) - where - strings (K s) = [s] - strings (C u v) = strings u ++ strings v - strings (Strs ss) = concatMap strings ss - strings (EPatt p) = getPatts p - strings Empty = [""] - strings t = bug $ "strings "++show t - - getPatts p = - case p of - PAlt a b -> getPatts a ++ getPatts b - PString s -> [s] - PSeq a b -> [s ++ t | s <- getPatts a, t <- getPatts b] - _ -> ppbug $ hang (text "not valid pattern in pre expression:") - 4 - (ppPatt Unqualified 0 p) +convertTerm opts sel ctype (Alts s alts)= do CStr s <- convertTerm opts CNil ctype s + alts <- forM alts $ \(u,Strs ps) -> do + CStr u <- convertTerm opts CNil ctype u + ps <- mapM (convertTerm opts CNil ctype) ps + return (u,map unSym ps) + return (CStr [SymKP s alts]) + where + unSym (CStr []) = "" + unSym (CStr [SymKS t]) = t + unSym _ = ppbug $ hang (text "invalid prefix in pre expression:") 4 (ppU 0 (Alts s alts)) convertTerm opts sel ctype (Q (m,f)) | m == cPredef && f == cNonExist = return (CStr [SymNE]) + | m == cPredef && + f == cBIND = return (CStr [SymBIND]) convertTerm opts sel@(CProj l _) ctype (ExtR t1 t2@(R rs2)) | l `elem` map fst rs2 = convertTerm opts sel ctype t2 @@ -492,7 +486,7 @@ addSequencesV seqs (CRec vs) = let !(seqs1,vs1) = mapAccumL' (\seqs (lbl,b) -> addSequencesV seqs (CTbl pt vs)=let !(seqs1,vs1) = mapAccumL' (\seqs (trm,b) -> let !(seqs',b') = addSequencesB seqs b in (seqs',(trm,b'))) seqs vs in (seqs1,CTbl pt vs1) -addSequencesV seqs (CStr lin) = let !(seqs1,seqid) = addSequence seqs (optimizeLin lin) +addSequencesV seqs (CStr lin) = let !(seqs1,seqid) = addSequence seqs lin in (seqs1,CStr seqid) addSequencesV seqs (CPar i) = (seqs,CPar i) @@ -502,16 +496,6 @@ mapAccumL' f s (x:xs) = (s'',y:ys) where !(s', y ) = f s x !(s'',ys) = mapAccumL' f s' xs -optimizeLin [] = [] -optimizeLin lin@(SymKS _ : _) = - let (ts,lin') = getRest lin - in SymKS ts : optimizeLin lin' - where - getRest (SymKS ts : lin) = let (ts1,lin') = getRest lin - in (ts++ts1,lin') - getRest lin = ([],lin) -optimizeLin (sym : lin) = sym : optimizeLin lin - addSequence :: SeqSet -> [Symbol] -> (SeqSet,SeqId) addSequence seqs lst = case Map.lookup seq seqs of @@ -629,4 +613,4 @@ mkSetArray map = array (0,Map.size map-1) [(v,k) | (k,v) <- Map.toList map] bug msg = ppbug (text msg) ppbug = error . render . hang (text "Internal error in GeneratePMCFG:") 4 -ppU = ppTerm Unqualified \ No newline at end of file +ppU = ppTerm Unqualified diff --git a/src/compiler/GF/Compile/PGFtoJS.hs b/src/compiler/GF/Compile/PGFtoJS.hs index b7b3d5545..5cb01fac4 100644 --- a/src/compiler/GF/Compile/PGFtoJS.hs +++ b/src/compiler/GF/Compile/PGFtoJS.hs @@ -85,10 +85,12 @@ sym2js :: Symbol -> JS.Expr sym2js (SymCat n l) = new "SymCat" [JS.EInt n, JS.EInt l] sym2js (SymLit n l) = new "SymLit" [JS.EInt n, JS.EInt l] sym2js (SymVar n l) = new "SymVar" [JS.EInt n, JS.EInt l] -sym2js (SymKS ts) = new "SymKS" (map JS.EStr ts) -sym2js (SymKP ts alts) = new "SymKP" [JS.EArray (map JS.EStr ts), JS.EArray (map alt2js alts)] +sym2js (SymKS t) = new "SymKS" [JS.EStr t] +sym2js (SymKP ts alts) = new "SymKP" [JS.EArray (map sym2js ts), JS.EArray (map alt2js alts)] +sym2js SymNE = new "SymNE" [] +sym2js SymBIND = new "SymKS" [JS.EStr "&+"] -alt2js (Alt ps ts) = new "Alt" [JS.EArray (map JS.EStr ps), JS.EArray (map JS.EStr ts)] +alt2js (ps,ts) = new "Alt" [JS.EArray (map sym2js ps), JS.EArray (map JS.EStr ts)] new :: String -> [JS.Expr] -> JS.Expr new f xs = JS.ENew (JS.Ident f) xs diff --git a/src/compiler/GF/Compile/PGFtoProlog.hs b/src/compiler/GF/Compile/PGFtoProlog.hs index de50d86d1..02993ac65 100644 --- a/src/compiler/GF/Compile/PGFtoProlog.hs +++ b/src/compiler/GF/Compile/PGFtoProlog.hs @@ -136,9 +136,9 @@ instance PLPrint Symbol where plp (SymCat n l) = plOper ":" (show n) (show l) plp (SymLit n l) = plTerm "lit" [show n, show l] plp (SymVar n l) = plTerm "var" [show n, show l] - plp (SymKS ts) = prTList "," (map plAtom ts) - plp (SymKP ts alts) = plTerm "pre" [plList (map plAtom ts), plList (map plAlt alts)] - where plAlt (Alt ps ts) = plOper "/" (plList (map plAtom ps)) (plList (map plAtom ts)) + plp (SymKS t) = plAtom t + plp (SymKP ts alts) = plTerm "pre" [plList (map plp ts), plList (map plAlt alts)] + where plAlt (ps,ts) = plOper "/" (plList (map plp ps)) (plList (map plAtom ts)) class PLPrint a where plp :: a -> String diff --git a/src/compiler/GF/Compile/PGFtoPython.hs b/src/compiler/GF/Compile/PGFtoPython.hs index a4268b714..1877f8d70 100644 --- a/src/compiler/GF/Compile/PGFtoPython.hs +++ b/src/compiler/GF/Compile/PGFtoPython.hs @@ -75,9 +75,9 @@ pySymbol :: Symbol -> String pySymbol (SymCat n l) = pyTuple 0 show [n, l] pySymbol (SymLit n l) = pyDict 0 pyStr id [("lit", pyTuple 0 show [n, l])] pySymbol (SymVar n l) = pyDict 0 pyStr id [("var", pyTuple 0 show [n, l])] -pySymbol (SymKS ts) = prTList "," (map pyStr ts) -pySymbol (SymKP ts alts) = pyDict 0 pyStr id [("pre", pyList 0 pyStr ts), ("alts", pyList 0 alt2py alts)] - where alt2py (Alt ps ts) = pyTuple 0 (pyList 0 pyStr) [ps, ts] +pySymbol (SymKS t) = pyStr t +pySymbol (SymKP ts alts) = pyDict 0 pyStr id [("pre", pyList 0 pySymbol ts), ("alts", pyList 0 alt2py alts)] + where alt2py (ps,ts) = pyTuple 0 (pyList 0 pyStr) [map pySymbol ps, ts] ---------------------------------------------------------------------- -- python helpers diff --git a/src/compiler/GF/Grammar/Predef.hs b/src/compiler/GF/Grammar/Predef.hs index 8bee8dcb5..b814dd110 100644 --- a/src/compiler/GF/Grammar/Predef.hs +++ b/src/compiler/GF/Grammar/Predef.hs @@ -30,6 +30,7 @@ cErrorType = identS "Error" cOverload = identS "overload" cUndefinedType = identS "UndefinedType" cNonExist = identS "nonExist" +cBIND = identS "BIND" isPredefCat :: Ident -> Bool isPredefCat c = elem c [cInt,cString,cFloat] diff --git a/src/compiler/GF/Speech/PGFToCFG.hs b/src/compiler/GF/Speech/PGFToCFG.hs index 163f02537..39c5b2a32 100644 --- a/src/compiler/GF/Speech/PGFToCFG.hs +++ b/src/compiler/GF/Speech/PGFToCFG.hs @@ -91,8 +91,8 @@ pgfToCFG pgf lang = mkCFG (showCId (lookStartCat pgf)) extCats (startRules ++ co symbolToCFSymbol :: Symbol -> [CFSymbol] symbolToCFSymbol (SymCat n l) = [let PArg _ fid = args!!n in NonTerminal (fcatToCat fid l)] - symbolToCFSymbol (SymKS ts) = map Terminal ts - symbolToCFSymbol (SymKP ts as) = map Terminal $ ts + symbolToCFSymbol (SymKS t) = [Terminal t] + symbolToCFSymbol (SymKP syms as) = concatMap symbolToCFSymbol syms ---- ++ [t | Alt ss _ <- as, t <- ss] ---- should be alternatives in [[CFSymbol]] ---- AR 3/6/2010 diff --git a/src/runtime/c/pgf/data.h b/src/runtime/c/pgf/data.h index 7717f89f9..2d7fc450e 100644 --- a/src/runtime/c/pgf/data.h +++ b/src/runtime/c/pgf/data.h @@ -131,8 +131,11 @@ extern GU_DECLARE_TYPE(PgfCncCat, abstract); bool pgf_tokens_equal(PgfTokens* t1, PgfTokens* t2); +typedef GuSeq PgfSequence; // -> PgfSymbol +typedef GuSeq PgfSequences; + typedef struct { - PgfTokens* form; + PgfSequence* form; /**< The form of this variant as a list of tokens. */ GuStrings* prefixes; @@ -175,7 +178,8 @@ typedef enum { PGF_SYMBOL_VAR, PGF_SYMBOL_KS, PGF_SYMBOL_KP, - PGF_SYMBOL_NE + PGF_SYMBOL_NE, + PGF_SYMBOL_BIND } PgfSymbolTag; typedef struct { @@ -186,14 +190,14 @@ typedef struct { typedef PgfSymbolIdx PgfSymbolCat, PgfSymbolLit, PgfSymbolVar; typedef struct { - PgfTokens* tokens; + PgfToken token; } PgfSymbolKS; typedef struct PgfSymbolKP /** A prefix-dependent symbol. The form that this symbol takes * depends on the form of a prefix of the following symbol. */ { - PgfTokens* default_form; + PgfSequence* default_form; /**< Default form that this symbol takes if none of of the * variant forms is triggered. */ @@ -206,8 +210,8 @@ typedef struct PgfSymbolKP typedef struct { } PgfSymbolNE; -typedef GuSeq PgfSequence; // -> PgfSymbol -typedef GuSeq PgfSequences; +typedef struct { +} PgfSymbolBIND; typedef struct { PgfAbsFun* absfun; diff --git a/src/runtime/c/pgf/graphviz.c b/src/runtime/c/pgf/graphviz.c index 5190d2fee..1003c4e8e 100644 --- a/src/runtime/c/pgf/graphviz.c +++ b/src/runtime/c/pgf/graphviz.c @@ -116,18 +116,15 @@ typedef struct { } PgfBracketLznState; static void -pgf_bracket_lzn_symbol_tokens(PgfLinFuncs** funcs, PgfTokens* toks) +pgf_bracket_lzn_symbol_token(PgfLinFuncs** funcs, PgfToken tok) { PgfBracketLznState* state = gu_container(funcs, PgfBracketLznState, funcs); - size_t len = gu_seq_length(toks); - for (size_t i = 0; i < len; i++) { - PgfParseNode* node = gu_new(PgfParseNode, state->pool); - node->id = 100000 + gu_buf_length(state->leaves); - node->parent = state->parent; - node->label = gu_seq_get(toks, PgfToken, i); - gu_buf_push(state->leaves, PgfParseNode*, node); - } + PgfParseNode* node = gu_new(PgfParseNode, state->pool); + node->id = 100000 + gu_buf_length(state->leaves); + node->parent = state->parent; + node->label = tok; + gu_buf_push(state->leaves, PgfParseNode*, node); } static void @@ -214,7 +211,7 @@ pgf_bracket_lzn_end_phrase(PgfLinFuncs** funcs, PgfCId cat, int fid, int lindex, } static PgfLinFuncs pgf_bracket_lin_funcs = { - .symbol_tokens = pgf_bracket_lzn_symbol_tokens, + .symbol_token = pgf_bracket_lzn_symbol_token, .expr_literal = pgf_bracket_lzn_expr_literal, .begin_phrase = pgf_bracket_lzn_begin_phrase, .end_phrase = pgf_bracket_lzn_end_phrase diff --git a/src/runtime/c/pgf/linearizer.c b/src/runtime/c/pgf/linearizer.c index 0a29db824..6a3eb8c9d 100644 --- a/src/runtime/c/pgf/linearizer.c +++ b/src/runtime/c/pgf/linearizer.c @@ -453,6 +453,50 @@ pgf_lzr_concretize(PgfConcr* concr, PgfExpr expr, GuPool* pool) return &lzn->en; } +void +pgf_lzr_linearize_sequence(PgfConcr* concr, + PgfCncTreeApp* fapp, PgfSequence* seq, + PgfLinFuncs** fnsp) +{ + size_t nsyms = gu_seq_length(seq); + PgfSymbol* syms = gu_seq_data(seq); + for (size_t i = 0; i < nsyms; i++) { + PgfSymbol sym = syms[i]; + GuVariantInfo sym_i = gu_variant_open(sym); + switch (sym_i.tag) { + case PGF_SYMBOL_CAT: + case PGF_SYMBOL_VAR: + case PGF_SYMBOL_LIT: { + PgfSymbolIdx* sidx = sym_i.data; + gu_assert((unsigned) sidx->d < fapp->n_args); + + PgfCncTree argf = fapp->args[sidx->d]; + pgf_lzr_linearize(concr, argf, sidx->r, fnsp); + break; + } + case PGF_SYMBOL_KS: { + PgfSymbolKS* ks = sym_i.data; + if ((*fnsp)->symbol_token) { + (*fnsp)->symbol_token(fnsp, ks->token); + } + break; + } + case PGF_SYMBOL_KP: { + // TODO: correct prefix-dependencies + PgfSymbolKP* kp = sym_i.data; + pgf_lzr_linearize_sequence(concr, fapp, kp->default_form, fnsp); + break; + } + case PGF_SYMBOL_NE: { + // Nothing to be done here + break; + } + default: + gu_impossible(); + } + } +} + void pgf_lzr_linearize(PgfConcr* concr, PgfCncTree ctree, size_t lin_idx, PgfLinFuncs** fnsp) { @@ -472,47 +516,9 @@ pgf_lzr_linearize(PgfConcr* concr, PgfCncTree ctree, size_t lin_idx, PgfLinFuncs } gu_require(lin_idx < fun->n_lins); - PgfSequence* seq = fun->lins[lin_idx]; - size_t nsyms = gu_seq_length(seq); - PgfSymbol* syms = gu_seq_data(seq); - for (size_t i = 0; i < nsyms; i++) { - PgfSymbol sym = syms[i]; - GuVariantInfo sym_i = gu_variant_open(sym); - switch (sym_i.tag) { - case PGF_SYMBOL_CAT: - case PGF_SYMBOL_VAR: - case PGF_SYMBOL_LIT: { - PgfSymbolIdx* sidx = sym_i.data; - gu_assert((unsigned) sidx->d < fapp->n_args); - PgfCncTree argf = fapp->args[sidx->d]; - pgf_lzr_linearize(concr, argf, sidx->r, fnsp); - break; - } - case PGF_SYMBOL_KS: { - PgfSymbolKS* ks = sym_i.data; - if (fns->symbol_tokens) { - fns->symbol_tokens(fnsp, ks->tokens); - } - break; - } - case PGF_SYMBOL_KP: { - // TODO: correct prefix-dependencies - PgfSymbolKP* kp = sym_i.data; - if (fns->symbol_tokens) { - fns->symbol_tokens(fnsp, - kp->default_form); - } - break; - } - case PGF_SYMBOL_NE: { - // Nothing to be done here - break; - } - default: - gu_impossible(); - } - } + PgfSequence* seq = fun->lins[lin_idx]; + pgf_lzr_linearize_sequence(concr, fapp, seq, fnsp); if (fns->end_phrase) { fns->end_phrase(fnsp, @@ -572,22 +578,18 @@ struct PgfSimpleLin { }; static void -pgf_file_lzn_symbol_tokens(PgfLinFuncs** funcs, PgfTokens* toks) +pgf_file_lzn_symbol_token(PgfLinFuncs** funcs, PgfToken tok) { PgfSimpleLin* flin = gu_container(funcs, PgfSimpleLin, funcs); if (!gu_ok(flin->err)) { return; } - size_t len = gu_seq_length(toks); - for (size_t i = 0; i < len; i++) { - if (flin->n_tokens > 0) - gu_putc(' ', flin->out, flin->err); + if (flin->n_tokens > 0) + gu_putc(' ', flin->out, flin->err); - PgfToken tok = gu_seq_get(toks, PgfToken, i); - gu_string_write(tok, flin->out, flin->err); - - flin->n_tokens++; - } + gu_string_write(tok, flin->out, flin->err); + + flin->n_tokens++; } static void @@ -626,10 +628,10 @@ pgf_file_lzn_expr_literal(PgfLinFuncs** funcs, PgfLiteral lit) } static PgfLinFuncs pgf_file_lin_funcs = { - .symbol_tokens = pgf_file_lzn_symbol_tokens, - .expr_literal = pgf_file_lzn_expr_literal, - .begin_phrase = NULL, - .end_phrase = NULL, + .symbol_token = pgf_file_lzn_symbol_token, + .expr_literal = pgf_file_lzn_expr_literal, + .begin_phrase = NULL, + .end_phrase = NULL, }; void diff --git a/src/runtime/c/pgf/linearizer.h b/src/runtime/c/pgf/linearizer.h index bd143c1c2..ea240dc32 100644 --- a/src/runtime/c/pgf/linearizer.h +++ b/src/runtime/c/pgf/linearizer.h @@ -51,7 +51,7 @@ typedef struct PgfLinFuncs PgfLinFuncs; struct PgfLinFuncs { /// Output tokens - void (*symbol_tokens)(PgfLinFuncs** self, PgfTokens* toks); + void (*symbol_token)(PgfLinFuncs** self, PgfToken tok); /// Output literal void (*expr_literal)(PgfLinFuncs** self, PgfLiteral lit); diff --git a/src/runtime/c/pgf/literals.c b/src/runtime/c/pgf/literals.c index a11097781..7e0c664c6 100644 --- a/src/runtime/c/pgf/literals.c +++ b/src/runtime/c/pgf/literals.c @@ -42,7 +42,7 @@ pgf_match_string_lit(PgfConcr* concr, PgfItem* item, PgfToken tok, gu_new_variant(PGF_LITERAL_STR, PgfLiteralStr, &expr_lit->lit, pool); - lit_str->val = gu_seq_get(sks->tokens, PgfToken, 0); + lit_str->val = sks->token; *out_ep = ep; accepted = false; @@ -80,10 +80,9 @@ pgf_match_int_lit(PgfConcr* concr, PgfItem* item, PgfToken tok, } else if (n_syms == 1) { PgfSymbolKS* sks = gu_variant_data(gu_seq_get(seq, PgfSymbol, 0)); - PgfToken tok = gu_seq_get(sks->tokens, PgfToken, 0); int val; - if (!gu_string_to_int(tok, &val)) { + if (!gu_string_to_int(sks->token, &val)) { *out_ep = NULL; } else { PgfExprProb* ep = gu_new(PgfExprProb, pool); @@ -137,10 +136,9 @@ pgf_match_float_lit(PgfConcr* concr, PgfItem* item, PgfToken tok, } else if (n_syms == 1) { PgfSymbolKS* sks = gu_variant_data(gu_seq_get(seq, PgfSymbol, 0)); - PgfToken tok = gu_seq_get(sks->tokens, PgfToken, 0); double val; - if (!gu_string_to_double(tok, &val)) { + if (!gu_string_to_double(sks->token, &val)) { *out_ep = NULL; } else { PgfExprProb* ep = gu_new(PgfExprProb, pool); @@ -209,9 +207,8 @@ pgf_match_name_lit(PgfConcr* concr, PgfItem* item, PgfToken tok, PgfSymbol sym = gu_seq_get(seq, PgfSymbol, i); gu_assert(gu_variant_tag(sym) == PGF_SYMBOL_KS); PgfSymbolKS* sks = gu_variant_data(sym); - PgfToken tok = gu_seq_get(sks->tokens, PgfToken, 0); - gu_string_write(tok, out, err); + gu_string_write(sks->token, out, err); } PgfExprProb* ep = gu_new(PgfExprProb, pool); diff --git a/src/runtime/c/pgf/parser.c b/src/runtime/c/pgf/parser.c index 92c689fae..188672dd3 100644 --- a/src/runtime/c/pgf/parser.c +++ b/src/runtime/c/pgf/parser.c @@ -133,8 +133,8 @@ struct PgfItem { PgfPArgs* args; PgfSymbol curr_sym; uint16_t seq_idx; - uint8_t tok_idx; - uint8_t alt; + uint8_t alt_idx; // position in the pre alternative + uint8_t alt; // the number of the alternative prob_t inside_prob; }; @@ -694,7 +694,7 @@ pgf_new_item(PgfItemConts* conts, PgfProduction prod, item->prod = prod; item->curr_sym = gu_null_variant; item->seq_idx = 0; - item->tok_idx = 0; + item->alt_idx = 0; item->alt = 0; conts->ref_count++; @@ -758,8 +758,12 @@ pgf_item_update_arg(PgfItem* item, size_t d, PgfCCat *new_ccat, static void pgf_item_advance(PgfItem* item, GuPool* pool) { - item->seq_idx++; - pgf_item_set_curr_symbol(item, pool); + if (GU_LIKELY(item->alt == 0)) { + item->seq_idx++; + pgf_item_set_curr_symbol(item, pool); + } + else + item->alt_idx++; } static void @@ -1133,8 +1137,7 @@ pgf_parsing_meta_scan(PgfParseState* before, PgfParseState* after, gu_alignof(PgfSymbolKS), &item->curr_sym, after->ps->pool); *((PgfSymbol*)(sks+1)) = prev; - sks->tokens = gu_new_seq(PgfToken, 1, after->ps->pool); - gu_seq_set(sks->tokens, PgfToken, 0, tok); + sks->token = tok; gu_buf_heap_push(before->agenda, &pgf_item_prob_order, &item); } @@ -1218,76 +1221,54 @@ pgf_parsing_symbol(PgfParseState* before, PgfParseState* after, case PGF_SYMBOL_KS: { if (after != NULL) { PgfSymbolKS* sks = gu_variant_data(sym); - gu_assert(item->tok_idx < gu_seq_length(sks->tokens)); - PgfToken tok = - gu_seq_get(sks->tokens, PgfToken, item->tok_idx++); - if (item->tok_idx == gu_seq_length(sks->tokens)) { - item->tok_idx = 0; - pgf_item_advance(item, after->ps->pool); - } - pgf_parsing_add_transition(before, after, tok, item); + pgf_item_advance(item, after->ps->pool); + pgf_parsing_add_transition(before, after, sks->token, item); } break; } case PGF_SYMBOL_KP: { if (after != NULL) { PgfSymbolKP* skp = gu_variant_data(sym); - size_t idx = item->tok_idx; - uint8_t alt = item->alt; - gu_assert(idx < gu_seq_length(skp->default_form)); - if (idx == 0) { - PgfToken tok; + + PgfSymbol sym; + if (item->alt == 0) { PgfItem* new_item; - - tok = gu_seq_get(skp->default_form, PgfToken, 0); + new_item = pgf_item_copy(item, after->ps->pool, after->ps); - new_item->tok_idx++; - if (new_item->tok_idx == gu_seq_length(skp->default_form)) { - new_item->tok_idx = 0; - pgf_item_advance(new_item, after->ps->pool); - } - pgf_parsing_add_transition(before, after, tok, new_item); + new_item->alt = 1; + new_item->alt_idx = 0; + sym = gu_seq_get(skp->default_form, PgfSymbol, new_item->alt_idx); + pgf_parsing_symbol(before, after, new_item, sym); for (size_t i = 0; i < skp->n_forms; i++) { - // XXX: do nubbing properly - PgfTokens* toks = skp->forms[i].form; - PgfTokens* toks2 = skp->default_form; - bool skip = pgf_tokens_equal(toks, toks2); + PgfSequence* syms = skp->forms[i].form; + PgfSequence* syms2 = skp->default_form; + bool skip = false; /*pgf_tokens_equal(toks, toks2); for (size_t j = 0; j < i; j++) { PgfTokens* toks2 = skp->forms[j].form; skip |= pgf_tokens_equal(toks, toks2); - } + }*/ if (!skip) { - tok = gu_seq_get(toks, PgfToken, 0); new_item = pgf_item_copy(item, after->ps->pool, after->ps); - new_item->tok_idx++; - new_item->alt = i; - if (new_item->tok_idx == gu_seq_length(toks)) { - new_item->tok_idx = 0; - pgf_item_advance(new_item, after->ps->pool); - } - pgf_parsing_add_transition(before, after, tok, new_item); + new_item->alt = i+2; + new_item->alt_idx = 0; + sym = gu_seq_get(syms, PgfSymbol, new_item->alt_idx); + pgf_parsing_symbol(before, after, new_item, sym); } } - } else if (alt == 0) { - PgfToken tok = - gu_seq_get(skp->default_form, PgfToken, idx); - item->tok_idx++; - if (item->tok_idx == gu_seq_length(skp->default_form)) { - item->tok_idx = 0; - pgf_item_advance(item, after->ps->pool); - } - pgf_parsing_add_transition(before, after, tok, item); } else { - gu_assert(alt <= skp->n_forms); - PgfTokens* toks = skp->forms[alt - 1].form; - PgfToken tok = gu_seq_get(toks, PgfToken, idx); - item->tok_idx++; - if (item->tok_idx == gu_seq_length(toks)) { - item->tok_idx = 0; + PgfSequence* syms = + (item->alt == 1) ? skp->default_form : + skp->forms[item->alt-2].form; + + if (item->alt_idx < gu_seq_length(syms)) { + sym = gu_seq_get(syms, PgfSymbol, item->alt_idx); + pgf_parsing_symbol(before, after, item, sym); + } else { + item->alt = 0; pgf_item_advance(item, after->ps->pool); + gu_buf_heap_push(before->agenda, &pgf_item_prob_order, &item); } - pgf_parsing_add_transition(before, after, tok, item); } } break; @@ -1357,7 +1338,7 @@ pgf_parsing_symbol(PgfParseState* before, PgfParseState* after, // XXX TODO proper support break; case PGF_SYMBOL_NE: { - // Nothing to be done here + pgf_item_free(before, after, item); break; } default: @@ -1450,8 +1431,7 @@ pgf_parsing_item(PgfParseState* before, PgfParseState* after, PgfItem* item) gu_alignof(PgfSymbolKS), &item->curr_sym, after->ps->pool); *((PgfSymbol*)(sks+1)) = prev; - sks->tokens = gu_new_seq(PgfToken, 1, after->ps->pool); - gu_seq_set(sks->tokens, PgfToken, 0, tok); + sks->token = tok; item->seq_idx++; pgf_parsing_add_transition(before, after, tok, item); @@ -1755,9 +1735,7 @@ typedef struct { } PgfPrefixTokenState; static GuString -pgf_get_tokens(PgfSequence* seq, - uint16_t seq_idx, uint8_t tok_idx, - GuPool* pool) +pgf_get_tokens(PgfSequence* seq, uint16_t seq_idx, GuPool* pool) { GuPool* tmp_pool = gu_new_pool(); GuExn* err = gu_new_exn(NULL, gu_kind(type), tmp_pool); @@ -1773,17 +1751,7 @@ pgf_get_tokens(PgfSequence* seq, switch (i.tag) { case PGF_SYMBOL_KS: { PgfSymbolKS* symks = i.data; - size_t len = gu_seq_length(symks->tokens); - for (size_t i = tok_idx; i < len; i++) { - if (i > 0) { - gu_putc(' ', out, err); - } - - PgfToken tok = gu_seq_get(symks->tokens, PgfToken, i); - gu_string_write(tok, out, err); - } - - tok_idx = 0; + gu_string_write(symks->token, out, err); } default: goto end; @@ -1809,18 +1777,9 @@ pgf_prefix_match_token(PgfTokenState* ts0, PgfToken tok, PgfItem* item) PgfSequence* seq; pgf_item_sequence(item, &lin_idx, &seq, ts->pool); - uint16_t seq_idx = item->seq_idx; - uint8_t tok_idx = item->tok_idx; - - // go one token back - if (tok_idx > 0) - tok_idx--; - else - seq_idx--; - ts->tp = gu_new(PgfTokenProb, ts->pool); ts->tp->tok = - pgf_get_tokens(seq, seq_idx, tok_idx, ts->pool); + pgf_get_tokens(seq, item->seq_idx-1, ts->pool); ts->tp->cat = item->conts->ccat->cnccat->abscat->name; ts->tp->prob = item->inside_prob+item->conts->outside_prob; } @@ -2346,17 +2305,15 @@ pgf_morpho_iter(GuMapItor* fn, const void* key, void* value, GuExn* err) switch (i.tag) { case PGF_SYMBOL_KS: { PgfSymbolKS* symks = i.data; - size_t len = gu_seq_length(symks->tokens); - for (size_t i = 0; i < len; i++) { - if (pos >= gu_seq_length(clo->tokens)) - goto cont; + + if (pos >= gu_seq_length(clo->tokens)) + goto cont; - PgfToken tok1 = gu_seq_get(symks->tokens, PgfToken, i); - PgfToken tok2 = gu_seq_get(clo->tokens, PgfToken, pos++); - - if (!gu_string_eq(tok1, tok2)) - goto cont; - } + PgfToken tok1 = symks->token; + PgfToken tok2 = gu_seq_get(clo->tokens, PgfToken, pos++); + + if (!gu_string_eq(tok1, tok2)) + goto cont; } default: continue; @@ -2443,7 +2400,7 @@ pgf_fullform_iter(GuMapItor* fn, const void* key, void* value, GuExn* err) PgfProductionApply* papp = i.data; PgfSequence* seq = papp->fun->lins[cfc.lin_idx]; - GuString tokens = pgf_get_tokens(seq, 0, 0, st->pool); + GuString tokens = pgf_get_tokens(seq, 0, st->pool); // create a new production index with keys that // are multiword units @@ -2531,12 +2488,10 @@ pgf_fullform_get_analyses(PgfFullFormEntry* entry, static void pgf_parser_index_token(PgfConcr* concr, - PgfTokens* tokens, + PgfToken tok, PgfCCat* ccat, size_t lin_idx, PgfProduction prod, GuPool *pool) { - PgfToken tok = gu_seq_get(tokens, PgfToken, 0); - PgfProductionIdx* set = gu_map_get(concr->leftcorner_tok_idx, &tok, PgfProductionIdx*); if (set == NULL) { @@ -2570,6 +2525,47 @@ pgf_parser_index_epsilon(PgfConcr* concr, gu_buf_push(prods, PgfProduction, prod); } +static void +pgf_parser_index_symbol(PgfConcr* concr, PgfSymbol sym, + PgfCCat* ccat, size_t lin_idx, PgfProduction prod, + GuPool *pool) +{ + GuVariantInfo i = gu_variant_open(sym); + switch (i.tag) { + case PGF_SYMBOL_KS: { + PgfSymbolKS* sks = i.data; + pgf_parser_index_token(concr, + sks->token, + ccat, lin_idx, prod, + pool); + break; + } + case PGF_SYMBOL_KP: { + PgfSymbolKP* skp = i.data; + PgfSymbol sym = + gu_seq_get(skp->default_form, PgfSymbol, 0); + pgf_parser_index_symbol(concr, sym, + ccat, lin_idx, prod, + pool); + for (size_t i = 0; i < skp->n_forms; i++) { + sym = gu_seq_get(skp->forms[i].form, PgfSymbol, 0); + pgf_parser_index_symbol(concr, sym, + ccat, lin_idx, prod, + pool); + } + break; + } + case PGF_SYMBOL_CAT: + case PGF_SYMBOL_LIT: + case PGF_SYMBOL_NE: + case PGF_SYMBOL_VAR: + // Nothing to be done here + break; + default: + gu_impossible(); + } +} + void pgf_parser_index(PgfConcr* concr, PgfCCat* ccat, PgfProduction prod, @@ -2586,39 +2582,9 @@ pgf_parser_index(PgfConcr* concr, PgfSequence* seq = papp->fun->lins[lin_idx]; if (gu_seq_length(seq) > 0) { - GuVariantInfo i = gu_variant_open(gu_seq_get(seq, PgfSymbol, 0)); - switch (i.tag) { - case PGF_SYMBOL_KS: { - PgfSymbolKS* sks = i.data; - pgf_parser_index_token(concr, - sks->tokens, - ccat, lin_idx, prod, - pool); - break; - } - case PGF_SYMBOL_KP: { - PgfSymbolKP* skp = i.data; - pgf_parser_index_token(concr, - skp->default_form, - ccat, lin_idx, prod, - pool); - for (size_t i = 0; i < skp->n_forms; i++) { - pgf_parser_index_token(concr, - skp->forms[i].form, - ccat, lin_idx, prod, - pool); - } - break; - } - case PGF_SYMBOL_CAT: - case PGF_SYMBOL_LIT: - case PGF_SYMBOL_NE: - case PGF_SYMBOL_VAR: - // Nothing to be done here - break; - default: - gu_impossible(); - } + pgf_parser_index_symbol(concr, gu_seq_get(seq, PgfSymbol, 0), + ccat, lin_idx, prod, + pool); } else { pgf_parser_index_epsilon(concr, ccat, lin_idx, prod, diff --git a/src/runtime/c/pgf/parseval.c b/src/runtime/c/pgf/parseval.c index 84d93b346..8b38d252d 100644 --- a/src/runtime/c/pgf/parseval.c +++ b/src/runtime/c/pgf/parseval.c @@ -19,19 +19,14 @@ typedef struct { } PgfMetricsLznState; static void -pgf_metrics_lzn_symbol_tokens(PgfLinFuncs** funcs, PgfTokens* toks) +pgf_metrics_lzn_symbol_token(PgfLinFuncs** funcs, PgfToken tok) { PgfMetricsLznState* state = gu_container(funcs, PgfMetricsLznState, funcs); - - size_t len = gu_seq_length(toks); - for (size_t i = 0; i < len; i++) { - PgfToken tok = gu_seq_get(toks, PgfToken, i); - - if (state->ps != NULL) - state->ps = pgf_parser_next_state(state->ps, tok); + + if (state->ps != NULL) + state->ps = pgf_parser_next_state(state->ps, tok); - state->pos++; - } + state->pos++; } static void @@ -128,17 +123,17 @@ pgf_metrics_lzn_end_phrase2(PgfLinFuncs** funcs, PgfCId cat, int fid, int lin_id } static PgfLinFuncs pgf_metrics_lin_funcs1 = { - .symbol_tokens = pgf_metrics_lzn_symbol_tokens, - .expr_literal = pgf_metrics_lzn_expr_literal, - .begin_phrase = pgf_metrics_lzn_begin_phrase, - .end_phrase = pgf_metrics_lzn_end_phrase1 + .symbol_token = pgf_metrics_lzn_symbol_token, + .expr_literal = pgf_metrics_lzn_expr_literal, + .begin_phrase = pgf_metrics_lzn_begin_phrase, + .end_phrase = pgf_metrics_lzn_end_phrase1 }; static PgfLinFuncs pgf_metrics_lin_funcs2 = { - .symbol_tokens = pgf_metrics_lzn_symbol_tokens, - .expr_literal = pgf_metrics_lzn_expr_literal, - .begin_phrase = pgf_metrics_lzn_begin_phrase, - .end_phrase = pgf_metrics_lzn_end_phrase2 + .symbol_token = pgf_metrics_lzn_symbol_token, + .expr_literal = pgf_metrics_lzn_expr_literal, + .begin_phrase = pgf_metrics_lzn_begin_phrase, + .end_phrase = pgf_metrics_lzn_end_phrase2 }; bool diff --git a/src/runtime/c/pgf/printer.c b/src/runtime/c/pgf/printer.c index 79aac5afd..82ce0bf45 100644 --- a/src/runtime/c/pgf/printer.c +++ b/src/runtime/c/pgf/printer.c @@ -196,19 +196,16 @@ pgf_print_cncfun(PgfCncFun *cncfun, PgfSequences* sequences, } static void -pgf_print_tokens(PgfTokens* tokens, GuOut *out, GuExn *err) +pgf_print_token(PgfToken tok, GuOut *out, GuExn *err) { gu_putc('"', out, err); - size_t n_toks = gu_seq_length(tokens); - for (size_t i = 0; i < n_toks; i++) { - if (i > 0) gu_putc(' ', out, err); - - PgfToken tok = gu_seq_get(tokens, PgfToken, i); - gu_string_write(tok, out, err); - } + gu_string_write(tok, out, err); gu_putc('"', out, err); } +static void +pgf_print_sequence(PgfSequence* seq, GuOut *out, GuExn *err); + void pgf_print_symbol(PgfSymbol sym, GuOut *out, GuExn *err) { @@ -220,18 +217,18 @@ pgf_print_symbol(PgfSymbol sym, GuOut *out, GuExn *err) } case PGF_SYMBOL_KS: { PgfSymbolKS* sks = gu_variant_data(sym); - pgf_print_tokens(sks->tokens, out, err); + pgf_print_token(sks->token, out, err); break; } case PGF_SYMBOL_KP: { PgfSymbolKP* skp = gu_variant_data(sym); gu_puts("pre {", out, err); - pgf_print_tokens(skp->default_form, out, err); + pgf_print_sequence(skp->default_form, out, err); for (size_t i = 0; i < skp->n_forms; i++) { gu_puts("; ", out, err); - pgf_print_tokens(skp->forms[i].form, out, err); + pgf_print_sequence(skp->forms[i].form, out, err); gu_puts(" / ", out, err); size_t n_prefixes = gu_seq_length(skp->forms[i].prefixes); @@ -262,16 +259,18 @@ pgf_print_symbol(PgfSymbol sym, GuOut *out, GuExn *err) gu_puts("nonExist", out, err); break; } + case PGF_SYMBOL_BIND: { + gu_puts("BIND", out, err); + break; + } default: gu_impossible(); } } static void -pgf_print_sequence(size_t seqid, PgfSequence* seq, GuOut *out, GuExn *err) +pgf_print_sequence(PgfSequence* seq, GuOut *out, GuExn *err) { - gu_printf(out,err," S%d := ", seqid); - int n_syms = gu_seq_length(seq); for (int i = 0; i < n_syms; i++) { if (i > 0) gu_putc(' ', out, err); @@ -279,8 +278,6 @@ pgf_print_sequence(size_t seqid, PgfSequence* seq, GuOut *out, GuExn *err) PgfSymbol sym = gu_seq_get(seq, PgfSymbol, i); pgf_print_symbol(sym, out, err); } - - gu_putc('\n', out, err); } static void @@ -342,7 +339,10 @@ pgf_print_concrete(PgfCId cncname, PgfConcr* concr, size_t n_seqs = gu_seq_length(concr->sequences); for (size_t i = 0; i < n_seqs; i++) { PgfSequence* seq = gu_seq_get(concr->sequences, PgfSequence*, i); - pgf_print_sequence(i, seq, out, err); + + gu_printf(out,err," S%d := ", i); + pgf_print_sequence(seq, out, err); + gu_putc('\n', out, err); } gu_puts(" categories\n", out, err); diff --git a/src/runtime/c/pgf/reader.c b/src/runtime/c/pgf/reader.c index 95b5a4c04..ebc5050e4 100644 --- a/src/runtime/c/pgf/reader.c +++ b/src/runtime/c/pgf/reader.c @@ -586,27 +586,13 @@ pgf_read_printnames(PgfReader* rdr) return printnames; } -static PgfTokens* -pgf_read_tokens(PgfReader* rdr) -{ - size_t len = pgf_read_len(rdr); - gu_return_on_exn(rdr->err, NULL); - - PgfTokens* tokens = gu_new_seq(PgfToken, len, rdr->opool); - for (size_t i = 0; i < len; i++) { - PgfToken token = pgf_read_string(rdr); - gu_return_on_exn(rdr->err, NULL); - - gu_seq_set(tokens, PgfToken, i, token); - } - - return tokens; -} +static PgfSequence* +pgf_read_sequence(PgfReader* rdr); static void pgf_read_alternative(PgfReader* rdr, PgfAlternative* alt) { - alt->form = pgf_read_tokens(rdr); + alt->form = pgf_read_sequence(rdr); gu_return_on_exn(rdr->err,); size_t n_prefixes = pgf_read_len(rdr); @@ -672,12 +658,12 @@ pgf_read_symbol(PgfReader* rdr) gu_new_variant(PGF_SYMBOL_KS, PgfSymbolKS, &sym, rdr->opool); - sym_ks->tokens = pgf_read_tokens(rdr); + sym_ks->token = pgf_read_string(rdr); gu_return_on_exn(rdr->err, gu_null_variant); break; } case PGF_SYMBOL_KP: { - PgfTokens* default_form = pgf_read_tokens(rdr); + PgfSequence* default_form = pgf_read_sequence(rdr); gu_return_on_exn(rdr->err, gu_null_variant); size_t n_forms = pgf_read_len(rdr); @@ -703,6 +689,13 @@ pgf_read_symbol(PgfReader* rdr) gu_return_on_exn(rdr->err, gu_null_variant); break; } + case PGF_SYMBOL_BIND: { + gu_new_variant(PGF_SYMBOL_BIND, + PgfSymbolBIND, + &sym, rdr->opool); + gu_return_on_exn(rdr->err, gu_null_variant); + break; + } default: pgf_read_tag_error(rdr); } diff --git a/src/runtime/haskell/PGF/Binary.hs b/src/runtime/haskell/PGF/Binary.hs index 865f98417..3c9dcc265 100644 --- a/src/runtime/haskell/PGF/Binary.hs +++ b/src/runtime/haskell/PGF/Binary.hs @@ -76,10 +76,6 @@ instance Binary Concr where , cnccats=cnccats, totalCats=totalCats }) -instance Binary Alternative where - put (Alt v x) = put (v,x) - get = liftM2 Alt get get - instance Binary Expr where put (EAbs b x exp) = putWord8 0 >> put (b,x,exp) put (EApp e1 e2) = putWord8 1 >> put (e1,e2) @@ -153,6 +149,7 @@ instance Binary Symbol where put (SymKS ts) = putWord8 3 >> put ts put (SymKP d vs) = putWord8 4 >> put (d,vs) put SymNE = putWord8 5 + put SymBIND = putWord8 6 get = do tag <- getWord8 case tag of 0 -> liftM2 SymCat get get @@ -161,6 +158,7 @@ instance Binary Symbol where 3 -> liftM SymKS get 4 -> liftM2 (\d vs -> SymKP d vs) get get 5 -> return SymNE + 6 -> return SymBIND _ -> decodingError instance Binary PArg where diff --git a/src/runtime/haskell/PGF/Data.hs b/src/runtime/haskell/PGF/Data.hs index 58ced6a1e..e86b02778 100644 --- a/src/runtime/haskell/PGF/Data.hs +++ b/src/runtime/haskell/PGF/Data.hs @@ -58,9 +58,10 @@ data Symbol = SymCat {-# UNPACK #-} !Int {-# UNPACK #-} !LIndex | SymLit {-# UNPACK #-} !Int {-# UNPACK #-} !LIndex | SymVar {-# UNPACK #-} !Int {-# UNPACK #-} !Int - | SymKS [Token] - | SymKP [Token] [Alternative] + | SymKS Token | SymNE -- non exist + | SymBIND -- the special BIND token + | SymKP [Symbol] [([Symbol],[String])] deriving (Eq,Ord,Show) data Production = PApply {-# UNPACK #-} !FunId [PArg] @@ -75,10 +76,6 @@ type FunId = Int type SeqId = Int type BCAddr = Int -data Alternative = - Alt [Token] [String] - deriving (Eq,Ord,Show) - -- merge two PGFs; fails is differens absnames; priority to second arg diff --git a/src/runtime/haskell/PGF/Forest.hs b/src/runtime/haskell/PGF/Forest.hs index 9c47583ad..e6e3c1136 100644 --- a/src/runtime/haskell/PGF/Forest.hs +++ b/src/runtime/haskell/PGF/Forest.hs @@ -80,7 +80,7 @@ bracketedTokn dp f@(Forest abs cnc forest root) = ltable = mkLinTable cnc isTrusted [] funid largs in ((cat,fid),wildCId,either (const []) id $ getAbsTrees f arg Nothing dp,ltable) descend forest (PCoerce fid) = render forest (PArg [] fid) - descend forest (PConst cat e ts) = ((cat,fid),wildCId,[e],([],listArray (0,0) [[LeafKS ts]])) + descend forest (PConst cat e ts) = ((cat,fid),wildCId,[e],([],listArray (0,0) [map LeafKS ts])) getVar (fid,_) | fid == fidVar = wildCId diff --git a/src/runtime/haskell/PGF/Linearize.hs b/src/runtime/haskell/PGF/Linearize.hs index cf4c78193..7ff7d9c7a 100644 --- a/src/runtime/haskell/PGF/Linearize.hs +++ b/src/runtime/haskell/PGF/Linearize.hs @@ -82,7 +82,7 @@ linTree pgf lang e = LInt n -> return (n_fid+1,((cidInt, n_fid),wildCId,[e0],([],ss (show n)))) LFlt f -> return (n_fid+1,((cidFloat, n_fid),wildCId,[e0],([],ss (show f)))) - ss s = listArray (0,0) [[LeafKS [s]]] + ss s = listArray (0,0) [[LeafKS s]] apply :: Maybe CncType -> FId -> Expr -> [CId] -> [CId] -> CId -> [Expr] -> [(FId,(CncType, CId, [Expr], LinTable))] apply mb_cty n_fid e0 ys xs f es = @@ -115,7 +115,7 @@ linTree pgf lang e = let args = [((wildCId, n_fid),wildCId,[e0],([],ss s))] return (n_fid+2,((cat,n_fid+1),wildCId,[e0],mkLinTable cnc (const True) xs funid args)) Nothing - | isPredefFId fid -> return (n_fid+2,((cat,n_fid+1),wildCId,[e0],(xs,listArray (0,0) [[LeafKS [s]]]))) + | isPredefFId fid -> return (n_fid+2,((cat,n_fid+1),wildCId,[e0],(xs,listArray (0,0) [[LeafKS s]]))) | otherwise -> do PCoerce fid <- maybe [] Set.toList (IntMap.lookup fid (pproductions cnc)) def (Just (cat,fid)) n_fid e0 ys xs s def Nothing n_fid e0 ys xs s = [] diff --git a/src/runtime/haskell/PGF/Macros.hs b/src/runtime/haskell/PGF/Macros.hs index 2497d53ae..ffec9279f 100644 --- a/src/runtime/haskell/PGF/Macros.hs +++ b/src/runtime/haskell/PGF/Macros.hs @@ -156,9 +156,11 @@ data BracketedString -- that represents the same constituent. data BracketedTokn - = LeafKS [Token] - | LeafKP [Token] [Alternative] - | Bracket_ CId {-# UNPACK #-} !FId {-# UNPACK #-} !LIndex CId [Expr] [BracketedTokn] -- Invariant: the list is not empty + = Bracket_ CId {-# UNPACK #-} !FId {-# UNPACK #-} !LIndex CId [Expr] [BracketedTokn] -- Invariant: the list is not empty + | LeafKS Token + | LeafNE + | LeafBIND + | LeafKP [BracketedTokn] [([BracketedTokn],[String])] deriving Eq type LinTable = ([CId],Array.Array LIndex [BracketedTokn]) @@ -178,21 +180,30 @@ lengthBracketedString (Leaf _) = 1 lengthBracketedString (Bracket _ _ _ _ _ bss) = sum (map lengthBracketedString bss) untokn :: Maybe String -> BracketedTokn -> (Maybe String,[BracketedString]) -untokn nw (LeafKS ts) = (has_tok nw ts,map Leaf ts) -untokn nw (LeafKP d vs) = let ts = filter (not . null) (sel d vs nw) - in (has_tok nw ts,map Leaf ts) - where - sel d vs Nothing = d - sel d vs (Just w) = - case [v | Alt v cs <- vs, any (\c -> isPrefixOf c w) cs] of - v:_ -> v - _ -> d -untokn nw (Bracket_ cat fid index fun es bss) = - let (nw',bss') = mapAccumR untokn nw bss - in (nw',[Bracket cat fid index fun es (concat bss')]) - -has_tok nw [] = nw -has_tok nw (t:ts) = Just t +untokn nw bs = + case untokn nw bs of + (nw,Nothing ) -> (nw,[] ) + (nw,Just bss) -> (nw,bss) + where + untokn nw (Bracket_ cat fid index fun es bss) = + let (nw',bss') = mapAccumR untokn nw bss + in case sequence bss' of + Just bss -> (nw',Just [Bracket cat fid index fun es (concat bss)]) + Nothing -> (Nothing, Nothing) + untokn nw (LeafKS t) + | null t = (nw,Just []) + | otherwise = (Just t,Just [Leaf t]) + untokn nw LeafNE = (Nothing, Nothing) + untokn nw (LeafKP d vs) = let (nw',bss') = mapAccumR untokn nw (sel d vs nw) + in case sequence bss' of + Just bss -> (nw',Just (concat bss)) + Nothing -> (Nothing, Nothing) + where + sel d vs Nothing = d + sel d vs (Just w) = + case [v | (v,cs) <- vs, any (\c -> isPrefixOf c w) cs] of + v:_ -> v + _ -> d type CncType = (CId, FId) -- concrete type is the abstract type (the category) + the forest id @@ -204,11 +215,13 @@ mkLinTable cnc filter xs funid args = (xs,listArray (bounds lins) [computeSeq fi computeSeq :: (CncType -> Bool) -> [Symbol] -> [(CncType,CId,[Expr],LinTable)] -> [BracketedTokn] computeSeq filter seq args = concatMap compute seq where - compute (SymCat d r) = getArg d r - compute (SymLit d r) = getArg d r - compute (SymVar d r) = getVar d r - compute (SymKS ts) = [LeafKS ts] - compute (SymKP ts alts) = [LeafKP ts alts] + compute (SymCat d r) = getArg d r + compute (SymLit d r) = getArg d r + compute (SymVar d r) = getVar d r + compute (SymKS t) = [LeafKS t] + compute SymNE = [LeafNE] + compute SymBIND = [LeafKS "&+"] + compute (SymKP syms alts) = [LeafKP (concatMap compute syms) [(concatMap compute syms,cs) | (syms,cs) <- alts]] getArg d r | not (null arg_lin) && @@ -218,7 +231,7 @@ computeSeq filter seq args = concatMap compute seq arg_lin = lin ! r (ct@(cat,fid),fun,es,(xs,lin)) = args !! d - getVar d r = [LeafKS [showCId (xs !! r)]] + getVar d r = [LeafKS (showCId (xs !! r))] where (ct,fun,es,(xs,lin)) = args !! d diff --git a/src/runtime/haskell/PGF/Morphology.hs b/src/runtime/haskell/PGF/Morphology.hs index 2f8fdecc2..894b64dfb 100644 --- a/src/runtime/haskell/PGF/Morphology.hs +++ b/src/runtime/haskell/PGF/Morphology.hs @@ -36,8 +36,8 @@ collectWords pinfo = Map.fromListWith (++) , sym <- elems (sequences pinfo ! seqid) , t <- sym2tokns sym] where - sym2tokns (SymKS ts) = ts - sym2tokns (SymKP ts alts) = ts ++ [t | Alt ts ps <- alts, t <- ts] + sym2tokns (SymKS t) = [t] + sym2tokns (SymKP ts alts) = concat (map sym2tokns ts ++ [sym2tokns sym | (syms,ps) <- alts, sym <- syms]) sym2tokns _ = [] lookupMorpho :: Morpho -> String -> [(Lemma,Analysis)] diff --git a/src/runtime/haskell/PGF/Optimize.hs b/src/runtime/haskell/PGF/Optimize.hs index a339c9add..f04a8b04c 100644 --- a/src/runtime/haskell/PGF/Optimize.hs +++ b/src/runtime/haskell/PGF/Optimize.hs @@ -221,9 +221,13 @@ splitLexicalRules cnc p_prods = wf ts = (ts,IntSet.singleton funid) - seq2prefix [] = TrieMap.fromList [wf []] - seq2prefix (SymKS ts :syms) = TrieMap.fromList [wf ts] - seq2prefix (SymKP ts alts:syms) = TrieMap.fromList (wf ts : [wf ts | Alt ts ps <- alts]) + seq2prefix [] = TrieMap.fromList [wf []] + seq2prefix (SymKS t :syms) = TrieMap.fromList [wf [t]] + seq2prefix (SymKP syms0 alts:syms) = TrieMap.unionsWith IntSet.union + (seq2prefix (syms0++syms) : + [seq2prefix (syms1 ++ syms) | (syms1,ps) <- alts]) + seq2prefix (SymNE :syms) = TrieMap.empty + seq2prefix (SymBIND :syms) = TrieMap.fromList [wf ["&+"]] updateConcrete abs cnc = let p_prods0 = filterProductions IntMap.empty IntSet.empty (productions cnc) diff --git a/src/runtime/haskell/PGF/Parse.hs b/src/runtime/haskell/PGF/Parse.hs index 7057db3bc..e50f8436e 100644 --- a/src/runtime/haskell/PGF/Parse.hs +++ b/src/runtime/haskell/PGF/Parse.hs @@ -244,14 +244,12 @@ getParseOutput (PState abs cnc chart cnt) ty@(DTyp _ start _) dp = flit _ = Nothing ftok toks = TrieMap.unionWith Set.union (TrieMap.compose Nothing toks) - cutAt ppos toks seqid = + cutAt ppos toks seqid = let seq = unsafeAt (sequences cnc) seqid init = take (ppos-1) (elems seq) tail = case unsafeAt seq (ppos-1) of - SymKS ts -> let ts' = reverse (drop (length toks) (reverse ts)) - in if null ts' then [] else [SymKS ts'] - SymKP ts _ -> let ts' = reverse (drop (length toks) (reverse ts)) - in if null ts' then [] else [SymKS ts'] + SymKS t -> drop (length toks) [SymKS t] + SymKP ts _ -> reverse (drop (length toks) (reverse ts)) sym -> [] in init ++ tail @@ -307,11 +305,18 @@ process flit ftok cnc (item@(Active j ppos funid seqid args key0):items) acc cha Nothing -> process flit ftok cnc items4 acc' chart{active=insertAC key (Set.singleton item,new_sc) (active chart)} Just (set,sc) | Set.member item set -> process flit ftok cnc items acc chart | otherwise -> process flit ftok cnc items2 acc chart{active=insertAC key (Set.insert item set,IntMap.unionWith Set.union new_sc sc) (active chart)} - SymKS toks -> let !acc' = ftok_ toks (Active j (ppos+1) funid seqid args key0) acc + SymKS tok -> let !acc' = ftok_ [tok] (Active j (ppos+1) funid seqid args key0) acc in process flit ftok cnc items acc' chart - SymKP strs vars - -> let !acc' = foldl (\acc toks -> ftok_ toks (Active j (ppos+1) funid seqid args key0) acc) acc - (strs:[strs' | Alt strs' _ <- vars]) + SymNE -> process flit ftok cnc items acc chart + SymBIND -> let !acc' = ftok_ ["&+"] (Active j (ppos+1) funid seqid args key0) acc + in process flit ftok cnc items acc' chart + SymKP syms vars + -> let to_tok (SymKS t) = [t] + to_tok SymBIND = ["&+"] + to_tok _ = [] + + !acc' = foldl (\acc syms -> ftok_ (concatMap to_tok syms) (Active j (ppos+1) funid seqid args key0) acc) acc + (syms:[syms' | (syms',_) <- vars]) in process flit ftok cnc items acc' chart SymLit d r -> let PArg hypos fid = args !! d key = AK fid r diff --git a/src/runtime/haskell/PGF/Printer.hs b/src/runtime/haskell/PGF/Printer.hs index c0529b116..9385e81c4 100644 --- a/src/runtime/haskell/PGF/Printer.hs +++ b/src/runtime/haskell/PGF/Printer.hs @@ -89,10 +89,12 @@ ppPrintName (id,name) = ppSymbol (SymCat d r) = char '<' <> int d <> comma <> int r <> char '>' ppSymbol (SymLit d r) = char '{' <> int d <> comma <> int r <> char '}' ppSymbol (SymVar d r) = char '<' <> int d <> comma <> char '$' <> int r <> char '>' -ppSymbol (SymKS ts) = ppStrs ts -ppSymbol (SymKP ts alts) = text "pre" <+> braces (hsep (punctuate semi (ppStrs ts : map ppAlt alts))) +ppSymbol (SymKS t) = doubleQuotes (text t) +ppSymbol SymNE = text "nonExist" +ppSymbol SymBIND = text "BIND" +ppSymbol (SymKP syms alts) = text "pre" <+> braces (hsep (punctuate semi (hsep (map ppSymbol syms) : map ppAlt alts))) -ppAlt (Alt ts ps) = ppStrs ts <+> char '/' <+> hsep (map (doubleQuotes . text) ps) +ppAlt (syms,ps) = hsep (map ppSymbol syms) <+> char '/' <+> hsep (map (doubleQuotes . text) ps) ppStrs ss = doubleQuotes (hsep (map text ss)) diff --git a/src/runtime/python/pypgf.c b/src/runtime/python/pypgf.c index 27655166b..b5ff53af0 100644 --- a/src/runtime/python/pypgf.c +++ b/src/runtime/python/pypgf.c @@ -1518,17 +1518,13 @@ typedef struct { } PgfBracketLznState; static void -pgf_bracket_lzn_symbol_tokens(PgfLinFuncs** funcs, PgfTokens* toks) +pgf_bracket_lzn_symbol_token(PgfLinFuncs** funcs, PgfToken tok) { PgfBracketLznState* state = gu_container(funcs, PgfBracketLznState, funcs); - size_t len = gu_seq_length(toks); - for (size_t i = 0; i < len; i++) { - PgfToken tok = gu_seq_get(toks, PgfToken, i); - PyObject* str = gu2py_string(tok); - PyList_Append(state->list, str); - Py_DECREF(str); - } + PyObject* str = gu2py_string(tok); + PyList_Append(state->list, str); + Py_DECREF(str); } static void @@ -1600,7 +1596,7 @@ pgf_bracket_lzn_end_phrase(PgfLinFuncs** funcs, PgfCId cat, int fid, int lindex, } static PgfLinFuncs pgf_bracket_lin_funcs = { - .symbol_tokens = pgf_bracket_lzn_symbol_tokens, + .symbol_token = pgf_bracket_lzn_symbol_token, .expr_literal = pgf_bracket_lzn_expr_literal, .begin_phrase = pgf_bracket_lzn_begin_phrase, .end_phrase = pgf_bracket_lzn_end_phrase