From 7d9349271b4e46bc1fb4017c5a40689cdfd9188f Mon Sep 17 00:00:00 2001 From: "jordi.saludes" Date: Sat, 5 Jun 2010 11:57:55 +0000 Subject: [PATCH] Added gf-lexing.* to c-bindings. --- contrib/c-bindings/build-gfctest.sh | 2 +- contrib/c-bindings/gf_lexing.c | 286 ++++++++++++++++++++++++++++ contrib/c-bindings/gf_lexing.h | 26 +++ contrib/c-bindings/gfctest.c | 8 +- 4 files changed, 317 insertions(+), 5 deletions(-) create mode 100644 contrib/c-bindings/gf_lexing.c create mode 100644 contrib/c-bindings/gf_lexing.h diff --git a/contrib/c-bindings/build-gfctest.sh b/contrib/c-bindings/build-gfctest.sh index f3d48f225..4a61ea289 100644 --- a/contrib/c-bindings/build-gfctest.sh +++ b/contrib/c-bindings/build-gfctest.sh @@ -20,4 +20,4 @@ src=../../src import=-i$src/runtime/haskell:$src/compiler $gf --make ../../examples/tutorial/embedded/QueryEng.gf && ghc $import --make -fglasgow-exts -O2 -no-hs-main $* -c PGFFFI.hs && -ghc $import --make -fglasgow-exts -O2 -no-hs-main $* gfctest.c PGFFFI.hs -o gfctest # gf_lexing.c +ghc $import --make -fglasgow-exts -O2 -no-hs-main $* gfctest.c gf_lexing.c PGFFFI.hs -o gfctest diff --git a/contrib/c-bindings/gf_lexing.c b/contrib/c-bindings/gf_lexing.c new file mode 100644 index 000000000..21353ec8b --- /dev/null +++ b/contrib/c-bindings/gf_lexing.c @@ -0,0 +1,286 @@ +/* GF C Bindings + Copyright (C) 2010 Kevin Kofler + + This library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + This library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with this library; if not, see . +*/ + +#include "gf_lexing.h" +#include +#include +#include +#include + +typedef char **(*GF_Lexer)(const char *str); +typedef char *(*GF_Unlexer)(char **arr); + +static inline void freev(char **p) +{ + char **q = p; + while (*q) + free(*(q++)); + free(p); +} + +static char **words(const char *str) +{ + unsigned char *buf = (unsigned char *) strdup(str); + unsigned char *p = buf, *q; + char **result, **r; + size_t count = 0u; + while (isspace(*p)) p++; + q = p; + if (*p) count++; + while (*p) { + if (isspace(*p)) { + *(p++) = 0; + while (isspace(*p)) *(p++) = 0; + if (*p) count++; + } else p++; + } + r = result = malloc((count+1)*sizeof(char *)); + if (count) while (1) { + *(r++) = strdup((char *) q); + if (!--count) break; + while (*q) q++; + while (!*q) q++; + } + *r = NULL; + return result; +} + +static char *unwords(char **arr) +{ + size_t len = 0u; + char **p = arr, *result, *r; + while (*p) + len += strlen(*(p++)) + 1u; + if (!len) return calloc(1, 1); + r = result = malloc(len); + p = arr; + while (1) { + size_t l = strlen(*p); + strcpy(r, *(p++)); + if (!*p) break; + r += l; + *(r++) = ' '; + } + return result; +} + +static char **lines(const char *str) +{ + unsigned char *buf = (unsigned char *) strdup(str); + unsigned char *p = buf, *q; + char **result, **r; + size_t count = 0u; + while (*p == '\n') p++; + q = p; + if (*p) count++; + while (*p) { + if (*p == '\n') { + *(p++) = 0; + while (*p == '\n') *(p++) = 0; + if (*p) count++; + } else p++; + } + r = result = malloc((count+1)*sizeof(char *)); + if (count) while (1) { + *(r++) = strdup((char *) q); + if (!--count) break; + while (*q) q++; + while (!*q) q++; + } + *r = NULL; + return result; +} + +static char *unlines(char **arr) +{ + size_t len = 0u; + char **p = arr, *result, *r; + while (*p) + len += strlen(*(p++)) + 1u; + if (!len) return calloc(1, 1); + r = result = malloc(len); + p = arr; + while (1) { + size_t l = strlen(*p); + strcpy(r, *(p++)); + if (!*p) break; + r += l; + *(r++) = '\n'; + } + return result; +} + +static char *appLexer(GF_Lexer f, const char *str) +{ + char **arr = f(str), **p = arr, *result; + int ofs = 0; + while (*p && **p) p++; + while (*p) { + if (**p) p[-ofs] = *p; else ofs++; + p++; + } + p[-ofs] = NULL; + result = unwords(arr); + freev(arr); + return result; +} + +static char *appUnlexer(GF_Unlexer f, const char *str) +{ + char **arr = lines(str), **p = arr, *result; + while (*p) { + char **warr = words(*p); + free(*p); + *(p++) = f(warr); + freev(warr); + } + result = unlines(arr); + freev(arr); + return result; +} + +static inline int isPunct(char c) +{ + return c && strchr(".?!,:;", c); +} + +static inline int isMajorPunct(char c) +{ + return c && strchr(".?!", c); +} + +static inline int isMinorPunct(char c) +{ + return c && strchr(",:;", c); +} + +static char *charToStr(char c) +{ + char *result = malloc(2), *p = result; + *(p++) = c; + *p = 0; + return result; +} + +static char **lexChars(const char *str) +{ + char **result = malloc((strlen(str)+1)*sizeof(char *)), **r = result; + const char *p = str; + while (*p) { + if (!isspace(*p)) *(r++) = charToStr(*p); + p++; + } + *r = NULL; + return result; +} + +static char **lexText(const char *str) +{ + char **result = malloc((strlen(str)+1)*sizeof(char *)), **r = result; + const char *p = str; + int uncap = 1; + while (*p) { + if (isMajorPunct(*p)) { + *(r++) = charToStr(*(p++)); + uncap = 1; + } else if (isMinorPunct(*p)) { + *(r++) = charToStr(*(p++)); + uncap = 0; + } else if (isspace(*p)) { + p++; + uncap = 0; + } else { + const char *q = p; + char *word; + size_t l; + while (*p && !isspace(*p) && !isPunct(*p)) p++; + l = p - q; + word = malloc(l + 1); + strncpy(word, q, l); + word[l] = 0; + if (uncap) *word = tolower(*word); + *(r++) = word; + uncap = 0; + } + } + *r = NULL; + return result; +} + +static char *unlexText(char **arr) +{ + size_t len = 0u; + char **p = arr, *result, *r; + int cap = 1; + while (*p) + len += strlen(*(p++)) + 1u; + if (!len) return calloc(1, 1); + r = result = malloc(len); + p = arr; + while (1) { + size_t l = strlen(*p); + char *word = *(p++); + if (*word == '"' && word[l-1] == '"') word++, l--; + strncpy(r, word, l); + if (cap) *r = toupper(*r); + if (!*p) break; + r += l; + if (isPunct(**p) && !(*p)[1]) { + *(r++) = **p; + if (!p[1]) break; + cap = isMajorPunct(**(p++)); + } else cap = 0; + *(r++) = ' '; + } + return result; + +} + +static char *stringop_chars(const char *str) +{ + return appLexer(lexChars, str); +} + +static char *stringop_lextext(const char *str) +{ + return appLexer(lexText, str); +} + +static char *stringop_words(const char *str) +{ + return appLexer(words, str); +} + +static char *stringop_unlextext(const char *str) +{ + return appUnlexer(unlexText, str); +} + +static char *stringop_unwords(const char *str) +{ + return appUnlexer(unwords, str); +} + +GF_StringOp gf_stringOp(const char *op) +{ + if (!strcmp(op, "chars")) return stringop_chars; + if (!strcmp(op, "lextext")) return stringop_lextext; + if (!strcmp(op, "words")) return stringop_words; + if (!strcmp(op, "unlextext")) return stringop_unlextext; + if (!strcmp(op, "unwords")) return stringop_unwords; + return NULL; +} diff --git a/contrib/c-bindings/gf_lexing.h b/contrib/c-bindings/gf_lexing.h new file mode 100644 index 000000000..262cff8f2 --- /dev/null +++ b/contrib/c-bindings/gf_lexing.h @@ -0,0 +1,26 @@ +/* GF C Bindings + Copyright (C) 2010 Kevin Kofler + + This library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + This library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with this library; if not, see . +*/ + +/* Function pointer type which applies a string operation to str, which is + assumed to be non-NULL. + The resulting string can be assumed to be non-NULL and must be freed using + free. */ +typedef char *(*GF_StringOp)(const char *str); + +/* Returns a GF_StringOp applying the operation op if available, otherwise + NULL. op is assumed to be non-NULL. The GF_StringOp MUST NOT be freed. */ +GF_StringOp gf_stringOp(const char *op); \ No newline at end of file diff --git a/contrib/c-bindings/gfctest.c b/contrib/c-bindings/gfctest.c index 4c3b5602f..ab0da52fc 100644 --- a/contrib/c-bindings/gfctest.c +++ b/contrib/c-bindings/gfctest.c @@ -18,7 +18,7 @@ #include #include #include "pgf.h" -// #include "gf_lexing.h" +#include "gf_lexing.h" int main(int argc, char *argv[]) { @@ -27,10 +27,10 @@ int main(int argc, char *argv[]) GF_PGF pgf = gf_readPGF("Query.pgf"); GF_Language lang = gf_readLanguage("QueryEng"); GF_Type cat = gf_startCat(pgf); -// char *lexed = gf_stringOp("lextext", "Is 2 prime"); - char *lexed = "is 23 odd"; + char *lexed = gf_stringOp("lextext")("Is 2 prime"); + // char *lexed = "is 23 odd"; GF_Tree *result = gf_parse(pgf, lang, cat, lexed); - //free(lexed); + free(lexed); GF_Tree *p = result; if (*p) { do {