From 3e7d80bf308e6f07af40650cf8f1507e2e217370 Mon Sep 17 00:00:00 2001 From: krangelov Date: Fri, 27 Aug 2021 14:44:42 +0200 Subject: [PATCH] reading & showing unicode identifiers --- src/runtime/c/pgf/expr.cxx | 7 ++- src/runtime/c/pgf/expr.h | 6 +++ src/runtime/c/pgf/printer.cxx | 68 +++++++++++++++++++++++++++--- src/runtime/c/pgf/printer.h | 2 + src/runtime/haskell/tests/basic.hs | 4 ++ 5 files changed, 80 insertions(+), 7 deletions(-) diff --git a/src/runtime/c/pgf/expr.cxx b/src/runtime/c/pgf/expr.cxx index 5a9057bdb..a54729f37 100644 --- a/src/runtime/c/pgf/expr.cxx +++ b/src/runtime/c/pgf/expr.cxx @@ -160,7 +160,7 @@ bool PgfExprParser::eof() return (token_tag == PGF_TOKEN_EOF); } -static bool +PGF_INTERNAL bool pgf_is_ident_first(uint32_t ucs) { return (ucs == '_') || @@ -169,7 +169,7 @@ pgf_is_ident_first(uint32_t ucs) (ucs >= 192 && ucs <= 255 && ucs != 247 && ucs != 215); } -static bool +PGF_INTERNAL bool pgf_is_ident_rest(uint32_t ucs) { return (ucs == '_') || @@ -211,6 +211,9 @@ void PgfExprParser::str_char() case '"': putc('\"'); break; + case '\'': + putc('\''); + break; case 'n': putc('\n'); break; diff --git a/src/runtime/c/pgf/expr.h b/src/runtime/c/pgf/expr.h index a80826e80..d75a83810 100644 --- a/src/runtime/c/pgf/expr.h +++ b/src/runtime/c/pgf/expr.h @@ -108,6 +108,12 @@ typedef struct PgfBind { PgfText var; } PgfBind; +PGF_INTERNAL_DECL bool +pgf_is_ident_first(uint32_t ucs); + +PGF_INTERNAL_DECL bool +pgf_is_ident_rest(uint32_t ucs); + class PGF_INTERNAL_DECL PgfExprParser { enum PGF_TOKEN_TAG { PGF_TOKEN_LPAR, diff --git a/src/runtime/c/pgf/printer.cxx b/src/runtime/c/pgf/printer.cxx index c6ae7f09b..4cb3b6738 100644 --- a/src/runtime/c/pgf/printer.cxx +++ b/src/runtime/c/pgf/printer.cxx @@ -209,14 +209,71 @@ PgfExpr PgfPrinter::efun(PgfText *name) { flush_lambdas(); - puts(name); + bool normal_name = true; + + const uint8_t* start = (uint8_t*) name->text; + const uint8_t* end = start + name->size; + + const uint8_t* s = start; + while (s < end) { + uint32_t ucs = pgf_utf8_decode(&s); + + if (!((s == (uint8_t*) start) + ? pgf_is_ident_first(ucs) + : pgf_is_ident_rest (ucs))) { + normal_name = false; + break; + } + } + + if (normal_name) { + puts(name); + } else { + PgfText *charbuf = (PgfText *) alloca(sizeof(PgfText)+7); + + puts("'"); + while (start < end) { + const uint8_t* s = start; + uint32_t ucs = pgf_utf8_decode(&s); + + switch (ucs) { + case '\\': + puts("\\\\"); + break; + case '\'': + puts("\\'"); + break; + case '\n': + puts("\\n"); + break; + case '\r': + puts("\\r"); + break; + case '\b': + puts("\\b"); + break; + case '\t': + puts("\\t"); + break; + case '\0': + puts("\\0"); + break; + default: + charbuf->size = s-start; + memcpy(charbuf->text, start, charbuf->size); + charbuf->text[charbuf->size] = 0; + puts(charbuf); + } + start = s; + } + puts("'"); + } + return 0; } PgfExpr PgfPrinter::evar(int index) { - flush_lambdas(); - PgfPrintContext *var = ctxt; for (int i = 0; i < index; i++) { if (var == NULL) @@ -224,9 +281,10 @@ PgfExpr PgfPrinter::evar(int index) var = var->next; } if (var == NULL) { + flush_lambdas(); nprintf(4, "#%d", index); } else { - puts(&var->name); + efun(&var->name); } return 0; } @@ -353,7 +411,7 @@ PgfType PgfPrinter::dtyp(int n_hypos, PgfTypeHypo *hypos, puts(" -> "); } - puts(cat); + efun(cat); for (int i = 0; i < n_exprs; i++) { puts(" "); diff --git a/src/runtime/c/pgf/printer.h b/src/runtime/c/pgf/printer.h index 378c27492..7a7e67987 100644 --- a/src/runtime/c/pgf/printer.h +++ b/src/runtime/c/pgf/printer.h @@ -46,6 +46,8 @@ public: // it will be allocated automatically. void nprintf(size_t buf_size, const char *format, ...) __attribute__ ((format (printf, 3, 4))); + void print_name(PgfText *name); + PgfText *get_text(); virtual PgfExpr eabs(PgfBindType btype, PgfText *name, PgfExpr body); diff --git a/src/runtime/haskell/tests/basic.hs b/src/runtime/haskell/tests/basic.hs index ce8d6ad17..83e02c0f5 100644 --- a/src/runtime/haskell/tests/basic.hs +++ b/src/runtime/haskell/tests/basic.hs @@ -67,6 +67,10 @@ main = do ,TestCase (assertEqual "fresh variables 3" "\\v,v1,v2->v1" (showExpr [] (EAbs Explicit "v" (EAbs Explicit "v" (EAbs Explicit "v" (EVar 1)))))) ,TestCase (assertBool "large integer 1" (null [n | n <- ns, showExpr [] (ELit (LInt n)) /= show n])) ,TestCase (assertBool "large integer 2" (null [n | n <- ns, readExpr (show n) /= Just (ELit (LInt n))])) + ,TestCase (assertEqual "unicode names 1" (Just "'абв'") (fmap (showExpr []) (readExpr "'абв'"))) + ,TestCase (assertEqual "unicode names 2" (Just "ab") (fmap (showExpr []) (readExpr "'ab'"))) + ,TestCase (assertEqual "unicode names 3" (Just "a'b") (fmap (showExpr []) (readExpr "'a\\'b'"))) + ,TestCase (assertEqual "unicode names 4" (Just "'а\\'б'") (fmap (showExpr []) (readExpr "'а\\'б'"))) ] testLoadFailure fpath = do