forked from GitHub/gf-core
Added gf-lexing.* to c-bindings.
This commit is contained in:
@@ -20,4 +20,4 @@ src=../../src
|
||||
import=-i$src/runtime/haskell:$src/compiler
|
||||
$gf --make ../../examples/tutorial/embedded/QueryEng.gf &&
|
||||
ghc $import --make -fglasgow-exts -O2 -no-hs-main $* -c PGFFFI.hs &&
|
||||
ghc $import --make -fglasgow-exts -O2 -no-hs-main $* gfctest.c PGFFFI.hs -o gfctest # gf_lexing.c
|
||||
ghc $import --make -fglasgow-exts -O2 -no-hs-main $* gfctest.c gf_lexing.c PGFFFI.hs -o gfctest
|
||||
|
||||
286
contrib/c-bindings/gf_lexing.c
Normal file
286
contrib/c-bindings/gf_lexing.c
Normal file
@@ -0,0 +1,286 @@
|
||||
/* GF C Bindings
|
||||
Copyright (C) 2010 Kevin Kofler
|
||||
|
||||
This library is free software; you can redistribute it and/or
|
||||
modify it under the terms of the GNU Lesser General Public
|
||||
License as published by the Free Software Foundation; either
|
||||
version 2.1 of the License, or (at your option) any later version.
|
||||
|
||||
This library is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
Lesser General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Lesser General Public
|
||||
License along with this library; if not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
#include "gf_lexing.h"
|
||||
#include <stddef.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
#include <ctype.h>
|
||||
|
||||
typedef char **(*GF_Lexer)(const char *str);
|
||||
typedef char *(*GF_Unlexer)(char **arr);
|
||||
|
||||
static inline void freev(char **p)
|
||||
{
|
||||
char **q = p;
|
||||
while (*q)
|
||||
free(*(q++));
|
||||
free(p);
|
||||
}
|
||||
|
||||
static char **words(const char *str)
|
||||
{
|
||||
unsigned char *buf = (unsigned char *) strdup(str);
|
||||
unsigned char *p = buf, *q;
|
||||
char **result, **r;
|
||||
size_t count = 0u;
|
||||
while (isspace(*p)) p++;
|
||||
q = p;
|
||||
if (*p) count++;
|
||||
while (*p) {
|
||||
if (isspace(*p)) {
|
||||
*(p++) = 0;
|
||||
while (isspace(*p)) *(p++) = 0;
|
||||
if (*p) count++;
|
||||
} else p++;
|
||||
}
|
||||
r = result = malloc((count+1)*sizeof(char *));
|
||||
if (count) while (1) {
|
||||
*(r++) = strdup((char *) q);
|
||||
if (!--count) break;
|
||||
while (*q) q++;
|
||||
while (!*q) q++;
|
||||
}
|
||||
*r = NULL;
|
||||
return result;
|
||||
}
|
||||
|
||||
static char *unwords(char **arr)
|
||||
{
|
||||
size_t len = 0u;
|
||||
char **p = arr, *result, *r;
|
||||
while (*p)
|
||||
len += strlen(*(p++)) + 1u;
|
||||
if (!len) return calloc(1, 1);
|
||||
r = result = malloc(len);
|
||||
p = arr;
|
||||
while (1) {
|
||||
size_t l = strlen(*p);
|
||||
strcpy(r, *(p++));
|
||||
if (!*p) break;
|
||||
r += l;
|
||||
*(r++) = ' ';
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
static char **lines(const char *str)
|
||||
{
|
||||
unsigned char *buf = (unsigned char *) strdup(str);
|
||||
unsigned char *p = buf, *q;
|
||||
char **result, **r;
|
||||
size_t count = 0u;
|
||||
while (*p == '\n') p++;
|
||||
q = p;
|
||||
if (*p) count++;
|
||||
while (*p) {
|
||||
if (*p == '\n') {
|
||||
*(p++) = 0;
|
||||
while (*p == '\n') *(p++) = 0;
|
||||
if (*p) count++;
|
||||
} else p++;
|
||||
}
|
||||
r = result = malloc((count+1)*sizeof(char *));
|
||||
if (count) while (1) {
|
||||
*(r++) = strdup((char *) q);
|
||||
if (!--count) break;
|
||||
while (*q) q++;
|
||||
while (!*q) q++;
|
||||
}
|
||||
*r = NULL;
|
||||
return result;
|
||||
}
|
||||
|
||||
static char *unlines(char **arr)
|
||||
{
|
||||
size_t len = 0u;
|
||||
char **p = arr, *result, *r;
|
||||
while (*p)
|
||||
len += strlen(*(p++)) + 1u;
|
||||
if (!len) return calloc(1, 1);
|
||||
r = result = malloc(len);
|
||||
p = arr;
|
||||
while (1) {
|
||||
size_t l = strlen(*p);
|
||||
strcpy(r, *(p++));
|
||||
if (!*p) break;
|
||||
r += l;
|
||||
*(r++) = '\n';
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
static char *appLexer(GF_Lexer f, const char *str)
|
||||
{
|
||||
char **arr = f(str), **p = arr, *result;
|
||||
int ofs = 0;
|
||||
while (*p && **p) p++;
|
||||
while (*p) {
|
||||
if (**p) p[-ofs] = *p; else ofs++;
|
||||
p++;
|
||||
}
|
||||
p[-ofs] = NULL;
|
||||
result = unwords(arr);
|
||||
freev(arr);
|
||||
return result;
|
||||
}
|
||||
|
||||
static char *appUnlexer(GF_Unlexer f, const char *str)
|
||||
{
|
||||
char **arr = lines(str), **p = arr, *result;
|
||||
while (*p) {
|
||||
char **warr = words(*p);
|
||||
free(*p);
|
||||
*(p++) = f(warr);
|
||||
freev(warr);
|
||||
}
|
||||
result = unlines(arr);
|
||||
freev(arr);
|
||||
return result;
|
||||
}
|
||||
|
||||
static inline int isPunct(char c)
|
||||
{
|
||||
return c && strchr(".?!,:;", c);
|
||||
}
|
||||
|
||||
static inline int isMajorPunct(char c)
|
||||
{
|
||||
return c && strchr(".?!", c);
|
||||
}
|
||||
|
||||
static inline int isMinorPunct(char c)
|
||||
{
|
||||
return c && strchr(",:;", c);
|
||||
}
|
||||
|
||||
static char *charToStr(char c)
|
||||
{
|
||||
char *result = malloc(2), *p = result;
|
||||
*(p++) = c;
|
||||
*p = 0;
|
||||
return result;
|
||||
}
|
||||
|
||||
static char **lexChars(const char *str)
|
||||
{
|
||||
char **result = malloc((strlen(str)+1)*sizeof(char *)), **r = result;
|
||||
const char *p = str;
|
||||
while (*p) {
|
||||
if (!isspace(*p)) *(r++) = charToStr(*p);
|
||||
p++;
|
||||
}
|
||||
*r = NULL;
|
||||
return result;
|
||||
}
|
||||
|
||||
static char **lexText(const char *str)
|
||||
{
|
||||
char **result = malloc((strlen(str)+1)*sizeof(char *)), **r = result;
|
||||
const char *p = str;
|
||||
int uncap = 1;
|
||||
while (*p) {
|
||||
if (isMajorPunct(*p)) {
|
||||
*(r++) = charToStr(*(p++));
|
||||
uncap = 1;
|
||||
} else if (isMinorPunct(*p)) {
|
||||
*(r++) = charToStr(*(p++));
|
||||
uncap = 0;
|
||||
} else if (isspace(*p)) {
|
||||
p++;
|
||||
uncap = 0;
|
||||
} else {
|
||||
const char *q = p;
|
||||
char *word;
|
||||
size_t l;
|
||||
while (*p && !isspace(*p) && !isPunct(*p)) p++;
|
||||
l = p - q;
|
||||
word = malloc(l + 1);
|
||||
strncpy(word, q, l);
|
||||
word[l] = 0;
|
||||
if (uncap) *word = tolower(*word);
|
||||
*(r++) = word;
|
||||
uncap = 0;
|
||||
}
|
||||
}
|
||||
*r = NULL;
|
||||
return result;
|
||||
}
|
||||
|
||||
static char *unlexText(char **arr)
|
||||
{
|
||||
size_t len = 0u;
|
||||
char **p = arr, *result, *r;
|
||||
int cap = 1;
|
||||
while (*p)
|
||||
len += strlen(*(p++)) + 1u;
|
||||
if (!len) return calloc(1, 1);
|
||||
r = result = malloc(len);
|
||||
p = arr;
|
||||
while (1) {
|
||||
size_t l = strlen(*p);
|
||||
char *word = *(p++);
|
||||
if (*word == '"' && word[l-1] == '"') word++, l--;
|
||||
strncpy(r, word, l);
|
||||
if (cap) *r = toupper(*r);
|
||||
if (!*p) break;
|
||||
r += l;
|
||||
if (isPunct(**p) && !(*p)[1]) {
|
||||
*(r++) = **p;
|
||||
if (!p[1]) break;
|
||||
cap = isMajorPunct(**(p++));
|
||||
} else cap = 0;
|
||||
*(r++) = ' ';
|
||||
}
|
||||
return result;
|
||||
|
||||
}
|
||||
|
||||
static char *stringop_chars(const char *str)
|
||||
{
|
||||
return appLexer(lexChars, str);
|
||||
}
|
||||
|
||||
static char *stringop_lextext(const char *str)
|
||||
{
|
||||
return appLexer(lexText, str);
|
||||
}
|
||||
|
||||
static char *stringop_words(const char *str)
|
||||
{
|
||||
return appLexer(words, str);
|
||||
}
|
||||
|
||||
static char *stringop_unlextext(const char *str)
|
||||
{
|
||||
return appUnlexer(unlexText, str);
|
||||
}
|
||||
|
||||
static char *stringop_unwords(const char *str)
|
||||
{
|
||||
return appUnlexer(unwords, str);
|
||||
}
|
||||
|
||||
GF_StringOp gf_stringOp(const char *op)
|
||||
{
|
||||
if (!strcmp(op, "chars")) return stringop_chars;
|
||||
if (!strcmp(op, "lextext")) return stringop_lextext;
|
||||
if (!strcmp(op, "words")) return stringop_words;
|
||||
if (!strcmp(op, "unlextext")) return stringop_unlextext;
|
||||
if (!strcmp(op, "unwords")) return stringop_unwords;
|
||||
return NULL;
|
||||
}
|
||||
26
contrib/c-bindings/gf_lexing.h
Normal file
26
contrib/c-bindings/gf_lexing.h
Normal file
@@ -0,0 +1,26 @@
|
||||
/* GF C Bindings
|
||||
Copyright (C) 2010 Kevin Kofler
|
||||
|
||||
This library is free software; you can redistribute it and/or
|
||||
modify it under the terms of the GNU Lesser General Public
|
||||
License as published by the Free Software Foundation; either
|
||||
version 2.1 of the License, or (at your option) any later version.
|
||||
|
||||
This library is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
Lesser General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Lesser General Public
|
||||
License along with this library; if not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
/* Function pointer type which applies a string operation to str, which is
|
||||
assumed to be non-NULL.
|
||||
The resulting string can be assumed to be non-NULL and must be freed using
|
||||
free. */
|
||||
typedef char *(*GF_StringOp)(const char *str);
|
||||
|
||||
/* Returns a GF_StringOp applying the operation op if available, otherwise
|
||||
NULL. op is assumed to be non-NULL. The GF_StringOp MUST NOT be freed. */
|
||||
GF_StringOp gf_stringOp(const char *op);
|
||||
@@ -18,7 +18,7 @@
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include "pgf.h"
|
||||
// #include "gf_lexing.h"
|
||||
#include "gf_lexing.h"
|
||||
|
||||
int main(int argc, char *argv[])
|
||||
{
|
||||
@@ -27,10 +27,10 @@ int main(int argc, char *argv[])
|
||||
GF_PGF pgf = gf_readPGF("Query.pgf");
|
||||
GF_Language lang = gf_readLanguage("QueryEng");
|
||||
GF_Type cat = gf_startCat(pgf);
|
||||
// char *lexed = gf_stringOp("lextext", "Is 2 prime");
|
||||
char *lexed = "is 23 odd";
|
||||
char *lexed = gf_stringOp("lextext")("Is 2 prime");
|
||||
// char *lexed = "is 23 odd";
|
||||
GF_Tree *result = gf_parse(pgf, lang, cat, lexed);
|
||||
//free(lexed);
|
||||
free(lexed);
|
||||
GF_Tree *p = result;
|
||||
if (*p) {
|
||||
do {
|
||||
|
||||
Reference in New Issue
Block a user