diff --git a/contrib/c-bindings/build-gfctest.sh b/contrib/c-bindings/build-gfctest.sh
index f3d48f225..4a61ea289 100644
--- a/contrib/c-bindings/build-gfctest.sh
+++ b/contrib/c-bindings/build-gfctest.sh
@@ -20,4 +20,4 @@ src=../../src
import=-i$src/runtime/haskell:$src/compiler
$gf --make ../../examples/tutorial/embedded/QueryEng.gf &&
ghc $import --make -fglasgow-exts -O2 -no-hs-main $* -c PGFFFI.hs &&
-ghc $import --make -fglasgow-exts -O2 -no-hs-main $* gfctest.c PGFFFI.hs -o gfctest # gf_lexing.c
+ghc $import --make -fglasgow-exts -O2 -no-hs-main $* gfctest.c gf_lexing.c PGFFFI.hs -o gfctest
diff --git a/contrib/c-bindings/gf_lexing.c b/contrib/c-bindings/gf_lexing.c
new file mode 100644
index 000000000..21353ec8b
--- /dev/null
+++ b/contrib/c-bindings/gf_lexing.c
@@ -0,0 +1,286 @@
+/* GF C Bindings
+ Copyright (C) 2010 Kevin Kofler
+
+ This library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ This library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with this library; if not, see .
+*/
+
+#include "gf_lexing.h"
+#include
+#include
+#include
+#include
+
+typedef char **(*GF_Lexer)(const char *str);
+typedef char *(*GF_Unlexer)(char **arr);
+
+static inline void freev(char **p)
+{
+ char **q = p;
+ while (*q)
+ free(*(q++));
+ free(p);
+}
+
+static char **words(const char *str)
+{
+ unsigned char *buf = (unsigned char *) strdup(str);
+ unsigned char *p = buf, *q;
+ char **result, **r;
+ size_t count = 0u;
+ while (isspace(*p)) p++;
+ q = p;
+ if (*p) count++;
+ while (*p) {
+ if (isspace(*p)) {
+ *(p++) = 0;
+ while (isspace(*p)) *(p++) = 0;
+ if (*p) count++;
+ } else p++;
+ }
+ r = result = malloc((count+1)*sizeof(char *));
+ if (count) while (1) {
+ *(r++) = strdup((char *) q);
+ if (!--count) break;
+ while (*q) q++;
+ while (!*q) q++;
+ }
+ *r = NULL;
+ return result;
+}
+
+static char *unwords(char **arr)
+{
+ size_t len = 0u;
+ char **p = arr, *result, *r;
+ while (*p)
+ len += strlen(*(p++)) + 1u;
+ if (!len) return calloc(1, 1);
+ r = result = malloc(len);
+ p = arr;
+ while (1) {
+ size_t l = strlen(*p);
+ strcpy(r, *(p++));
+ if (!*p) break;
+ r += l;
+ *(r++) = ' ';
+ }
+ return result;
+}
+
+static char **lines(const char *str)
+{
+ unsigned char *buf = (unsigned char *) strdup(str);
+ unsigned char *p = buf, *q;
+ char **result, **r;
+ size_t count = 0u;
+ while (*p == '\n') p++;
+ q = p;
+ if (*p) count++;
+ while (*p) {
+ if (*p == '\n') {
+ *(p++) = 0;
+ while (*p == '\n') *(p++) = 0;
+ if (*p) count++;
+ } else p++;
+ }
+ r = result = malloc((count+1)*sizeof(char *));
+ if (count) while (1) {
+ *(r++) = strdup((char *) q);
+ if (!--count) break;
+ while (*q) q++;
+ while (!*q) q++;
+ }
+ *r = NULL;
+ return result;
+}
+
+static char *unlines(char **arr)
+{
+ size_t len = 0u;
+ char **p = arr, *result, *r;
+ while (*p)
+ len += strlen(*(p++)) + 1u;
+ if (!len) return calloc(1, 1);
+ r = result = malloc(len);
+ p = arr;
+ while (1) {
+ size_t l = strlen(*p);
+ strcpy(r, *(p++));
+ if (!*p) break;
+ r += l;
+ *(r++) = '\n';
+ }
+ return result;
+}
+
+static char *appLexer(GF_Lexer f, const char *str)
+{
+ char **arr = f(str), **p = arr, *result;
+ int ofs = 0;
+ while (*p && **p) p++;
+ while (*p) {
+ if (**p) p[-ofs] = *p; else ofs++;
+ p++;
+ }
+ p[-ofs] = NULL;
+ result = unwords(arr);
+ freev(arr);
+ return result;
+}
+
+static char *appUnlexer(GF_Unlexer f, const char *str)
+{
+ char **arr = lines(str), **p = arr, *result;
+ while (*p) {
+ char **warr = words(*p);
+ free(*p);
+ *(p++) = f(warr);
+ freev(warr);
+ }
+ result = unlines(arr);
+ freev(arr);
+ return result;
+}
+
+static inline int isPunct(char c)
+{
+ return c && strchr(".?!,:;", c);
+}
+
+static inline int isMajorPunct(char c)
+{
+ return c && strchr(".?!", c);
+}
+
+static inline int isMinorPunct(char c)
+{
+ return c && strchr(",:;", c);
+}
+
+static char *charToStr(char c)
+{
+ char *result = malloc(2), *p = result;
+ *(p++) = c;
+ *p = 0;
+ return result;
+}
+
+static char **lexChars(const char *str)
+{
+ char **result = malloc((strlen(str)+1)*sizeof(char *)), **r = result;
+ const char *p = str;
+ while (*p) {
+ if (!isspace(*p)) *(r++) = charToStr(*p);
+ p++;
+ }
+ *r = NULL;
+ return result;
+}
+
+static char **lexText(const char *str)
+{
+ char **result = malloc((strlen(str)+1)*sizeof(char *)), **r = result;
+ const char *p = str;
+ int uncap = 1;
+ while (*p) {
+ if (isMajorPunct(*p)) {
+ *(r++) = charToStr(*(p++));
+ uncap = 1;
+ } else if (isMinorPunct(*p)) {
+ *(r++) = charToStr(*(p++));
+ uncap = 0;
+ } else if (isspace(*p)) {
+ p++;
+ uncap = 0;
+ } else {
+ const char *q = p;
+ char *word;
+ size_t l;
+ while (*p && !isspace(*p) && !isPunct(*p)) p++;
+ l = p - q;
+ word = malloc(l + 1);
+ strncpy(word, q, l);
+ word[l] = 0;
+ if (uncap) *word = tolower(*word);
+ *(r++) = word;
+ uncap = 0;
+ }
+ }
+ *r = NULL;
+ return result;
+}
+
+static char *unlexText(char **arr)
+{
+ size_t len = 0u;
+ char **p = arr, *result, *r;
+ int cap = 1;
+ while (*p)
+ len += strlen(*(p++)) + 1u;
+ if (!len) return calloc(1, 1);
+ r = result = malloc(len);
+ p = arr;
+ while (1) {
+ size_t l = strlen(*p);
+ char *word = *(p++);
+ if (*word == '"' && word[l-1] == '"') word++, l--;
+ strncpy(r, word, l);
+ if (cap) *r = toupper(*r);
+ if (!*p) break;
+ r += l;
+ if (isPunct(**p) && !(*p)[1]) {
+ *(r++) = **p;
+ if (!p[1]) break;
+ cap = isMajorPunct(**(p++));
+ } else cap = 0;
+ *(r++) = ' ';
+ }
+ return result;
+
+}
+
+static char *stringop_chars(const char *str)
+{
+ return appLexer(lexChars, str);
+}
+
+static char *stringop_lextext(const char *str)
+{
+ return appLexer(lexText, str);
+}
+
+static char *stringop_words(const char *str)
+{
+ return appLexer(words, str);
+}
+
+static char *stringop_unlextext(const char *str)
+{
+ return appUnlexer(unlexText, str);
+}
+
+static char *stringop_unwords(const char *str)
+{
+ return appUnlexer(unwords, str);
+}
+
+GF_StringOp gf_stringOp(const char *op)
+{
+ if (!strcmp(op, "chars")) return stringop_chars;
+ if (!strcmp(op, "lextext")) return stringop_lextext;
+ if (!strcmp(op, "words")) return stringop_words;
+ if (!strcmp(op, "unlextext")) return stringop_unlextext;
+ if (!strcmp(op, "unwords")) return stringop_unwords;
+ return NULL;
+}
diff --git a/contrib/c-bindings/gf_lexing.h b/contrib/c-bindings/gf_lexing.h
new file mode 100644
index 000000000..262cff8f2
--- /dev/null
+++ b/contrib/c-bindings/gf_lexing.h
@@ -0,0 +1,26 @@
+/* GF C Bindings
+ Copyright (C) 2010 Kevin Kofler
+
+ This library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ This library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with this library; if not, see .
+*/
+
+/* Function pointer type which applies a string operation to str, which is
+ assumed to be non-NULL.
+ The resulting string can be assumed to be non-NULL and must be freed using
+ free. */
+typedef char *(*GF_StringOp)(const char *str);
+
+/* Returns a GF_StringOp applying the operation op if available, otherwise
+ NULL. op is assumed to be non-NULL. The GF_StringOp MUST NOT be freed. */
+GF_StringOp gf_stringOp(const char *op);
\ No newline at end of file
diff --git a/contrib/c-bindings/gfctest.c b/contrib/c-bindings/gfctest.c
index 4c3b5602f..ab0da52fc 100644
--- a/contrib/c-bindings/gfctest.c
+++ b/contrib/c-bindings/gfctest.c
@@ -18,7 +18,7 @@
#include
#include
#include "pgf.h"
-// #include "gf_lexing.h"
+#include "gf_lexing.h"
int main(int argc, char *argv[])
{
@@ -27,10 +27,10 @@ int main(int argc, char *argv[])
GF_PGF pgf = gf_readPGF("Query.pgf");
GF_Language lang = gf_readLanguage("QueryEng");
GF_Type cat = gf_startCat(pgf);
-// char *lexed = gf_stringOp("lextext", "Is 2 prime");
- char *lexed = "is 23 odd";
+ char *lexed = gf_stringOp("lextext")("Is 2 prime");
+ // char *lexed = "is 23 odd";
GF_Tree *result = gf_parse(pgf, lang, cat, lexed);
- //free(lexed);
+ free(lexed);
GF_Tree *p = result;
if (*p) {
do {