From baf78528d308585d2735f2f1e56b9edd7ed7c916 Mon Sep 17 00:00:00 2001 From: krangelov Date: Fri, 3 Dec 2021 09:44:03 +0100 Subject: [PATCH] implement bracketedLinearize --- src/runtime/c/pgf/linearizer.cxx | 147 ++++++++++++++++--------------- src/runtime/c/pgf/linearizer.h | 9 +- src/runtime/c/pgf/pgf.cxx | 20 +++++ src/runtime/c/pgf/pgf.h | 38 ++++++-- src/runtime/haskell/PGF2.hsc | 57 +++++++++++- src/runtime/haskell/PGF2/FFI.hsc | 11 +++ 6 files changed, 202 insertions(+), 80 deletions(-) diff --git a/src/runtime/c/pgf/linearizer.cxx b/src/runtime/c/pgf/linearizer.cxx index 66f2c65a8..42d2050f3 100644 --- a/src/runtime/c/pgf/linearizer.cxx +++ b/src/runtime/c/pgf/linearizer.cxx @@ -2,26 +2,22 @@ #include "printer.h" #include "linearizer.h" -PgfLinearizer::TreeNode::TreeNode(PgfLinearizer *linearizer, ref lin, PgfText *lit) { - this->next = NULL; +PgfLinearizer::TreeNode::TreeNode(PgfLinearizer *linearizer, ref lin, PgfText *lit) +{ + this->next = linearizer->root; this->next_arg = NULL; this->args = linearizer->args; - this->lin = lin; + this->fid = 0; this->literal = lit; + this->lin = lin; this->lin_index = 0; this->value = 0; this->var_count = 0; this->var_values= NULL; - if (linearizer->first == NULL) { - linearizer->first = this; - linearizer->root = this; - } else { - linearizer->root->next = this; - linearizer->root = this; - } + linearizer->root= this; } size_t PgfLinearizer::TreeNode::eval_param(PgfLParam *param) @@ -46,6 +42,8 @@ PgfLinearizer::PgfLinearizer(ref concr, PgfMarshaller *m) { this->root = NULL; this->first = NULL; this->args = NULL; + this->capit = false; + this->allcapit = false; }; PgfLinearizer::~PgfLinearizer() @@ -150,6 +148,22 @@ bool PgfLinearizer::resolve() return true; } +void PgfLinearizer::reverse_and_label() +{ + // Reverse the list of nodes and label them with fid; + int fid = 0; + TreeNode *node = root; + while (node != NULL) { + TreeNode *tmp = node->next; + + node->fid = fid++; + node->next = first; + + first = node; + node = tmp; + } +} + void PgfLinearizer::linearize(PgfLinearizationOutputIface *out, TreeNode *node, ref> syms) { ref> hypos = node->lin->absfun->type->hypos; @@ -172,9 +186,15 @@ void PgfLinearizer::linearize(PgfLinearizationOutputIface *out, TreeNode *node, size_t lindex = node->eval_param(&sym_cat->r); PgfText *cat = &vector_elem(hypos, sym_cat->d)->type->name; - out->begin_phrase(cat, 0, NULL, &node->lin->name); + PgfText *field = NULL; + ref lincat = namespace_lookup(concr->lincats, cat); + if (lincat != 0) { + field = &(**vector_elem(lincat->fields, lindex)); + } + + out->begin_phrase(cat, arg->fid, field, &arg->lin->name); linearize(out, arg, lindex); - out->end_phrase(cat, 0, NULL, &node->lin->name); + out->end_phrase(cat, arg->fid, field, &arg->lin->name); break; } case PgfSymbolLit::tag: { @@ -202,7 +222,49 @@ void PgfLinearizer::linearize(PgfLinearizationOutputIface *out, TreeNode *node, } case PgfSymbolKS::tag: { auto sym_ks = ref::untagged(sym); - out->symbol_token(&sym_ks->token); + + if (capit) { + PgfText *cap = (PgfText *) alloca(sizeof(PgfText)+sym_ks->token.size+6); + + const uint8_t *p = (const uint8_t *) sym_ks->token.text; + const uint8_t *end = p + sym_ks->token.size; + + uint8_t *q = (uint8_t *) cap->text; + + uint32_t ucs = pgf_utf8_decode(&p); + ucs = pgf_utf8_to_upper(ucs); + pgf_utf8_encode(ucs,&q); + + memcpy(q, p, (end - p)+1); + q += (end - p); + + cap->size = q - (uint8_t *) cap->text; + out->symbol_token(cap); + + capit = false; + } else if (allcapit) { + PgfText *cap = (PgfText *) alloca(sizeof(PgfText)+sym_ks->token.size*6); + + const uint8_t *p = (const uint8_t *) sym_ks->token.text; + const uint8_t *end = p + sym_ks->token.size; + + uint8_t *q = (uint8_t *) cap->text; + + while (p != end) { + uint32_t ucs = pgf_utf8_decode(&p); + ucs = pgf_utf8_to_upper(ucs); + pgf_utf8_encode(ucs,&q); + } + + cap->size = q - (uint8_t *) cap->text; + *q = 0; + + out->symbol_token(cap); + + allcapit = false; + } else { + out->symbol_token(&sym_ks->token); + } break; } case PgfSymbolKP::tag: { @@ -223,10 +285,10 @@ void PgfLinearizer::linearize(PgfLinearizationOutputIface *out, TreeNode *node, // Nothing to do break; case PgfSymbolCAPIT::tag: - out->symbol_capit(); + capit = true; break; case PgfSymbolALLCAPIT::tag: - out->symbol_allcapit(); + allcapit = true; break; } } @@ -325,8 +387,6 @@ PgfLinearizationOutput::PgfLinearizationOutput() : printer(NULL,0,NULL) { bind = true; nonexist = false; - capit = false; - allcapit = false; } PgfText *PgfLinearizationOutput::get_text() @@ -345,48 +405,7 @@ void PgfLinearizationOutput::symbol_token(PgfText *tok) } bind = false; - if (capit) { - PgfText *cap = (PgfText *) alloca(sizeof(PgfText)+tok->size+6); - - const uint8_t *p = (const uint8_t *) tok->text; - const uint8_t *end = p + tok->size; - - uint8_t *q = (uint8_t *) cap->text; - - uint32_t ucs = pgf_utf8_decode(&p); - ucs = pgf_utf8_to_upper(ucs); - pgf_utf8_encode(ucs,&q); - - memcpy(q, p, (end - p)+1); - q += (end - p); - - cap->size = q - (uint8_t *) cap->text; - printer.puts(cap); - - capit = false; - } else if (allcapit) { - PgfText *cap = (PgfText *) alloca(sizeof(PgfText)+tok->size*6); - - const uint8_t *p = (const uint8_t *) tok->text; - const uint8_t *end = p + tok->size; - - uint8_t *q = (uint8_t *) cap->text; - - while (p != end) { - uint32_t ucs = pgf_utf8_decode(&p); - ucs = pgf_utf8_to_upper(ucs); - pgf_utf8_encode(ucs,&q); - } - - cap->size = q - (uint8_t *) cap->text; - *q = 0; - - printer.puts(cap); - - allcapit = false; - } else { - printer.puts(tok); - } + printer.puts(tok); } void PgfLinearizationOutput::begin_phrase(PgfText *cat, int fid, PgfText *ann, PgfText *fun) @@ -407,16 +426,6 @@ void PgfLinearizationOutput::symbol_bind() bind = true; } -void PgfLinearizationOutput::symbol_capit() -{ - capit = true; -} - -void PgfLinearizationOutput::symbol_allcapit() -{ - allcapit = true; -} - void PgfLinearizationOutput::symbol_meta(PgfMetaId id) { printer.nprintf(32, "?%d", id); diff --git a/src/runtime/c/pgf/linearizer.h b/src/runtime/c/pgf/linearizer.h index e1ac38a4e..7cac25cb1 100644 --- a/src/runtime/c/pgf/linearizer.h +++ b/src/runtime/c/pgf/linearizer.h @@ -5,8 +5,6 @@ class PGF_INTERNAL_DECL PgfLinearizationOutput : public PgfLinearizationOutputIf PgfPrinter printer; bool bind; bool nonexist; - bool capit; - bool allcapit; public: PgfLinearizationOutput(); @@ -18,8 +16,6 @@ public: virtual void end_phrase(PgfText *cat, int fid, PgfText *ann, PgfText *fun); virtual void symbol_ne(); virtual void symbol_bind(); - virtual void symbol_capit(); - virtual void symbol_allcapit(); virtual void symbol_meta(PgfMetaId id); }; @@ -32,6 +28,7 @@ class PGF_INTERNAL_DECL PgfLinearizer : public PgfUnmarshaller { TreeNode *next_arg; TreeNode *args; + int fid; PgfText *literal; // != NULL if literal ref lin; // != 0 if function size_t lin_index; @@ -49,6 +46,9 @@ class PGF_INTERNAL_DECL PgfLinearizer : public PgfUnmarshaller { TreeNode *first; TreeNode *args; + bool capit; + bool allcapit; + void linearize(PgfLinearizationOutputIface *out, TreeNode *node, ref> syms); void linearize(PgfLinearizationOutputIface *out, TreeNode *node, size_t lindex); @@ -56,6 +56,7 @@ public: PgfLinearizer(ref concr, PgfMarshaller *m); bool resolve(); + void reverse_and_label(); void linearize(PgfLinearizationOutputIface *out) { linearize(out, root, 0); } diff --git a/src/runtime/c/pgf/pgf.cxx b/src/runtime/c/pgf/pgf.cxx index 91fa38e91..08204a9bb 100644 --- a/src/runtime/c/pgf/pgf.cxx +++ b/src/runtime/c/pgf/pgf.cxx @@ -1860,6 +1860,7 @@ PgfText *pgf_linearize(PgfDB *db, PgfConcrRevision revision, PgfLinearizationOutput out; PgfLinearizer linearizer(concr, m); m->match_expr(&linearizer, expr); + linearizer.reverse_and_label(); if (linearizer.resolve()) { linearizer.linearize(&out); return out.get_text(); @@ -1869,6 +1870,25 @@ PgfText *pgf_linearize(PgfDB *db, PgfConcrRevision revision, return NULL; } +PGF_API_DECL +void pgf_bracketed_linearize(PgfDB *db, PgfConcrRevision revision, + PgfExpr expr, PgfMarshaller *m, + PgfLinearizationOutputIface *out, + PgfExn* err) +{ + PGF_API_BEGIN { + DB_scope scope(db, READER_SCOPE); + + ref concr = PgfDB::revision2concr(revision); + PgfLinearizer linearizer(concr, m); + m->match_expr(&linearizer, expr); + linearizer.reverse_and_label(); + if (linearizer.resolve()) { + linearizer.linearize(out); + } + } PGF_API_END +} + PGF_API PgfLiteral pgf_get_global_flag(PgfDB *db, PgfRevision revision, PgfText *name, diff --git a/src/runtime/c/pgf/pgf.h b/src/runtime/c/pgf/pgf.h index bc56c0188..ad7ab0243 100644 --- a/src/runtime/c/pgf/pgf.h +++ b/src/runtime/c/pgf/pgf.h @@ -587,16 +587,36 @@ struct PgfLinearizationOutputIface /// token binding virtual void symbol_bind()=0; - /// capitalization - virtual void symbol_capit()=0; - - /// capitalization - virtual void symbol_allcapit()=0; - /// meta variable virtual void symbol_meta(PgfMetaId id)=0; }; #else +typedef struct PgfLinearizationOutputIface PgfLinearizationOutputIface; +typedef struct PgfLinearizationOutputIfaceVtbl PgfLinearizationOutputIfaceVtbl; +struct PgfLinearizationOutputIfaceVtbl +{ + /// Output tokens + void (*symbol_token)(PgfLinearizationOutputIface *this, PgfText *tok); + + /// Begin phrase + void (*begin_phrase)(PgfLinearizationOutputIface *this, PgfText *cat, int fid, PgfText *ann, PgfText *fun); + + /// End phrase + void (*end_phrase)(PgfLinearizationOutputIface *this, PgfText *cat, int fid, PgfText *ann, PgfText *fun); + + /// handling nonExist + void (*symbol_ne)(PgfLinearizationOutputIface *this); + + /// token binding + void (*symbol_bind)(PgfLinearizationOutputIface *this); + + /// meta variable + void (*symbol_meta)(PgfLinearizationOutputIface *this, PgfMetaId id); +}; +struct PgfLinearizationOutputIface +{ + PgfLinearizationOutputIfaceVtbl *vtbl; +}; #endif PGF_API_DECL @@ -604,6 +624,12 @@ PgfText *pgf_linearize(PgfDB *db, PgfConcrRevision revision, PgfExpr expr, PgfMarshaller *m, PgfExn* err); +PGF_API_DECL +void pgf_bracketed_linearize(PgfDB *db, PgfConcrRevision revision, + PgfExpr expr, PgfMarshaller *m, + PgfLinearizationOutputIface *out, + PgfExn* err); + PGF_API_DECL PgfLiteral pgf_get_global_flag(PgfDB *db, PgfRevision revision, PgfText *name, diff --git a/src/runtime/haskell/PGF2.hsc b/src/runtime/haskell/PGF2.hsc index 382ab0ff1..89d9897ba 100644 --- a/src/runtime/haskell/PGF2.hsc +++ b/src/runtime/haskell/PGF2.hsc @@ -642,7 +642,62 @@ flattenBracketedString (Leaf w) = [w] flattenBracketedString (Bracket _ _ _ _ bss) = concatMap flattenBracketedString bss bracketedLinearize :: Concr -> Expr -> [BracketedString] -bracketedLinearize = error "TODO: bracketedLinearize" +bracketedLinearize c e = unsafePerformIO $ do + ref <- newIORef (False,[],[]) + (withForeignPtr (c_revision c) $ \c_revision -> + bracket (newStablePtr e) freeStablePtr $ \c_e -> + withForeignPtr marshaller $ \m -> + allocaBytes (#size PgfLinearizationOutputIface) $ \c_out -> + allocaBytes (#size PgfLinearizationOutputIfaceVtbl) $ \vtbl -> + bracket (wrapSymbol1 (symbol_token ref)) freeHaskellFunPtr $ \c_symbol_token -> + bracket (wrapSymbol2 (begin_phrase ref)) freeHaskellFunPtr $ \c_begin_phrase -> + bracket (wrapSymbol2 (end_phrase ref)) freeHaskellFunPtr $ \c_end_phrase -> + bracket (wrapSymbol0 (symbol_bind ref)) freeHaskellFunPtr $ \c_symbol_bind -> + bracket (wrapSymbol0 (symbol_ne ref)) freeHaskellFunPtr $ \c_symbol_ne -> + bracket (wrapSymbol3 (symbol_meta ref)) freeHaskellFunPtr $ \c_symbol_meta -> do + (#poke PgfLinearizationOutputIfaceVtbl, symbol_token) vtbl c_symbol_token + (#poke PgfLinearizationOutputIfaceVtbl, begin_phrase) vtbl c_begin_phrase + (#poke PgfLinearizationOutputIfaceVtbl, end_phrase) vtbl c_end_phrase + (#poke PgfLinearizationOutputIfaceVtbl, symbol_bind) vtbl c_symbol_bind + (#poke PgfLinearizationOutputIfaceVtbl, symbol_ne) vtbl c_symbol_ne + (#poke PgfLinearizationOutputIfaceVtbl, symbol_meta) vtbl c_symbol_meta + (#poke PgfLinearizationOutputIface, vtbl) c_out vtbl + withPgfExn "bracketedLinearize" (pgf_bracketed_linearize (c_db c) c_revision c_e m c_out)) + (ne,_,bs) <- readIORef ref + (if ne + then return [] + else return (reverse bs)) + where + symbol_token ref _ c_text = do + (ne,stack,bs) <- readIORef ref + token <- peekText c_text + writeIORef ref (ne,stack,Leaf token : bs) + + begin_phrase ref _ c_cat c_fid c_ann c_fun = do + (ne,stack,bs) <- readIORef ref + writeIORef ref (ne,bs:stack,[]) + + end_phrase ref _ c_cat c_fid c_ann c_fun = do + (ne,bs':stack,bs) <- readIORef ref + if null bs + then writeIORef ref (ne,stack, bs') + else do cat <- peekText c_cat + let fid = fromIntegral c_fid + ann <- peekText c_ann + fun <- peekText c_fun + writeIORef ref (ne,stack,Bracket cat fid ann fun (reverse bs) : bs') + + symbol_bind ref _ = do + (ne,stack,bs) <- readIORef ref + writeIORef ref (ne,stack,BIND : bs) + + symbol_ne ref _ = do + (ne,stack,bs) <- readIORef ref + writeIORef ref (True,[],[]) + + symbol_meta ref _ meta_id = do + (ne,stack,bs) <- readIORef ref + writeIORef ref (ne,stack,Leaf "?" : bs) bracketedLinearizeAll :: Concr -> Expr -> [[BracketedString]] bracketedLinearizeAll = error "TODO: bracketedLinearizeAll" diff --git a/src/runtime/haskell/PGF2/FFI.hsc b/src/runtime/haskell/PGF2/FFI.hsc index a068f459c..d5b1b2676 100644 --- a/src/runtime/haskell/PGF2/FFI.hsc +++ b/src/runtime/haskell/PGF2/FFI.hsc @@ -43,6 +43,7 @@ data PgfMarshaller data PgfUnmarshaller data PgfBuildLinIface data PgfLinBuilderIface +data PgfLinearizationOutputIface type Wrapper a = a -> IO (FunPtr a) type Dynamic a = FunPtr a -> a @@ -199,6 +200,16 @@ foreign import ccall pgf_has_linearization :: Ptr PgfDB -> Ptr Concr -> Ptr PgfT foreign import ccall pgf_linearize :: Ptr PgfDB -> Ptr Concr -> StablePtr Expr -> Ptr PgfMarshaller -> Ptr PgfExn -> IO (Ptr PgfText) +foreign import ccall pgf_bracketed_linearize :: Ptr PgfDB -> Ptr Concr -> StablePtr Expr -> Ptr PgfMarshaller -> Ptr PgfLinearizationOutputIface -> Ptr PgfExn -> IO () + +foreign import ccall "wrapper" wrapSymbol0 :: Wrapper (Ptr PgfLinearizationOutputIface -> IO ()) + +foreign import ccall "wrapper" wrapSymbol1 :: Wrapper (Ptr PgfLinearizationOutputIface -> Ptr PgfText -> IO ()) + +foreign import ccall "wrapper" wrapSymbol2 :: Wrapper (Ptr PgfLinearizationOutputIface -> Ptr PgfText -> CInt -> Ptr PgfText -> Ptr PgfText -> IO ()) + +foreign import ccall "wrapper" wrapSymbol3 :: Wrapper (Ptr PgfLinearizationOutputIface -> CInt -> IO ()) + foreign import ccall pgf_get_global_flag :: Ptr PgfDB -> Ptr PGF -> Ptr PgfText -> Ptr PgfUnmarshaller -> Ptr PgfExn -> IO (StablePtr Literal) foreign import ccall pgf_set_global_flag :: Ptr PgfDB -> Ptr PGF -> Ptr PgfText -> StablePtr Literal -> Ptr PgfMarshaller -> Ptr PgfExn -> IO ()