1
0
forked from GitHub/gf-core

Added gf-lexing.* to c-bindings.

This commit is contained in:
jordi.saludes
2010-06-05 11:57:55 +00:00
parent 4061674fae
commit 7d9349271b
4 changed files with 317 additions and 5 deletions

View File

@@ -20,4 +20,4 @@ src=../../src
import=-i$src/runtime/haskell:$src/compiler
$gf --make ../../examples/tutorial/embedded/QueryEng.gf &&
ghc $import --make -fglasgow-exts -O2 -no-hs-main $* -c PGFFFI.hs &&
ghc $import --make -fglasgow-exts -O2 -no-hs-main $* gfctest.c PGFFFI.hs -o gfctest # gf_lexing.c
ghc $import --make -fglasgow-exts -O2 -no-hs-main $* gfctest.c gf_lexing.c PGFFFI.hs -o gfctest

View File

@@ -0,0 +1,286 @@
/* GF C Bindings
Copyright (C) 2010 Kevin Kofler
This library is free software; you can redistribute it and/or
modify it under the terms of the GNU Lesser General Public
License as published by the Free Software Foundation; either
version 2.1 of the License, or (at your option) any later version.
This library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public
License along with this library; if not, see <http://www.gnu.org/licenses/>.
*/
#include "gf_lexing.h"
#include <stddef.h>
#include <stdlib.h>
#include <string.h>
#include <ctype.h>
typedef char **(*GF_Lexer)(const char *str);
typedef char *(*GF_Unlexer)(char **arr);
static inline void freev(char **p)
{
char **q = p;
while (*q)
free(*(q++));
free(p);
}
static char **words(const char *str)
{
unsigned char *buf = (unsigned char *) strdup(str);
unsigned char *p = buf, *q;
char **result, **r;
size_t count = 0u;
while (isspace(*p)) p++;
q = p;
if (*p) count++;
while (*p) {
if (isspace(*p)) {
*(p++) = 0;
while (isspace(*p)) *(p++) = 0;
if (*p) count++;
} else p++;
}
r = result = malloc((count+1)*sizeof(char *));
if (count) while (1) {
*(r++) = strdup((char *) q);
if (!--count) break;
while (*q) q++;
while (!*q) q++;
}
*r = NULL;
return result;
}
static char *unwords(char **arr)
{
size_t len = 0u;
char **p = arr, *result, *r;
while (*p)
len += strlen(*(p++)) + 1u;
if (!len) return calloc(1, 1);
r = result = malloc(len);
p = arr;
while (1) {
size_t l = strlen(*p);
strcpy(r, *(p++));
if (!*p) break;
r += l;
*(r++) = ' ';
}
return result;
}
static char **lines(const char *str)
{
unsigned char *buf = (unsigned char *) strdup(str);
unsigned char *p = buf, *q;
char **result, **r;
size_t count = 0u;
while (*p == '\n') p++;
q = p;
if (*p) count++;
while (*p) {
if (*p == '\n') {
*(p++) = 0;
while (*p == '\n') *(p++) = 0;
if (*p) count++;
} else p++;
}
r = result = malloc((count+1)*sizeof(char *));
if (count) while (1) {
*(r++) = strdup((char *) q);
if (!--count) break;
while (*q) q++;
while (!*q) q++;
}
*r = NULL;
return result;
}
static char *unlines(char **arr)
{
size_t len = 0u;
char **p = arr, *result, *r;
while (*p)
len += strlen(*(p++)) + 1u;
if (!len) return calloc(1, 1);
r = result = malloc(len);
p = arr;
while (1) {
size_t l = strlen(*p);
strcpy(r, *(p++));
if (!*p) break;
r += l;
*(r++) = '\n';
}
return result;
}
static char *appLexer(GF_Lexer f, const char *str)
{
char **arr = f(str), **p = arr, *result;
int ofs = 0;
while (*p && **p) p++;
while (*p) {
if (**p) p[-ofs] = *p; else ofs++;
p++;
}
p[-ofs] = NULL;
result = unwords(arr);
freev(arr);
return result;
}
static char *appUnlexer(GF_Unlexer f, const char *str)
{
char **arr = lines(str), **p = arr, *result;
while (*p) {
char **warr = words(*p);
free(*p);
*(p++) = f(warr);
freev(warr);
}
result = unlines(arr);
freev(arr);
return result;
}
static inline int isPunct(char c)
{
return c && strchr(".?!,:;", c);
}
static inline int isMajorPunct(char c)
{
return c && strchr(".?!", c);
}
static inline int isMinorPunct(char c)
{
return c && strchr(",:;", c);
}
static char *charToStr(char c)
{
char *result = malloc(2), *p = result;
*(p++) = c;
*p = 0;
return result;
}
static char **lexChars(const char *str)
{
char **result = malloc((strlen(str)+1)*sizeof(char *)), **r = result;
const char *p = str;
while (*p) {
if (!isspace(*p)) *(r++) = charToStr(*p);
p++;
}
*r = NULL;
return result;
}
static char **lexText(const char *str)
{
char **result = malloc((strlen(str)+1)*sizeof(char *)), **r = result;
const char *p = str;
int uncap = 1;
while (*p) {
if (isMajorPunct(*p)) {
*(r++) = charToStr(*(p++));
uncap = 1;
} else if (isMinorPunct(*p)) {
*(r++) = charToStr(*(p++));
uncap = 0;
} else if (isspace(*p)) {
p++;
uncap = 0;
} else {
const char *q = p;
char *word;
size_t l;
while (*p && !isspace(*p) && !isPunct(*p)) p++;
l = p - q;
word = malloc(l + 1);
strncpy(word, q, l);
word[l] = 0;
if (uncap) *word = tolower(*word);
*(r++) = word;
uncap = 0;
}
}
*r = NULL;
return result;
}
static char *unlexText(char **arr)
{
size_t len = 0u;
char **p = arr, *result, *r;
int cap = 1;
while (*p)
len += strlen(*(p++)) + 1u;
if (!len) return calloc(1, 1);
r = result = malloc(len);
p = arr;
while (1) {
size_t l = strlen(*p);
char *word = *(p++);
if (*word == '"' && word[l-1] == '"') word++, l--;
strncpy(r, word, l);
if (cap) *r = toupper(*r);
if (!*p) break;
r += l;
if (isPunct(**p) && !(*p)[1]) {
*(r++) = **p;
if (!p[1]) break;
cap = isMajorPunct(**(p++));
} else cap = 0;
*(r++) = ' ';
}
return result;
}
static char *stringop_chars(const char *str)
{
return appLexer(lexChars, str);
}
static char *stringop_lextext(const char *str)
{
return appLexer(lexText, str);
}
static char *stringop_words(const char *str)
{
return appLexer(words, str);
}
static char *stringop_unlextext(const char *str)
{
return appUnlexer(unlexText, str);
}
static char *stringop_unwords(const char *str)
{
return appUnlexer(unwords, str);
}
GF_StringOp gf_stringOp(const char *op)
{
if (!strcmp(op, "chars")) return stringop_chars;
if (!strcmp(op, "lextext")) return stringop_lextext;
if (!strcmp(op, "words")) return stringop_words;
if (!strcmp(op, "unlextext")) return stringop_unlextext;
if (!strcmp(op, "unwords")) return stringop_unwords;
return NULL;
}

View File

@@ -0,0 +1,26 @@
/* GF C Bindings
Copyright (C) 2010 Kevin Kofler
This library is free software; you can redistribute it and/or
modify it under the terms of the GNU Lesser General Public
License as published by the Free Software Foundation; either
version 2.1 of the License, or (at your option) any later version.
This library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public
License along with this library; if not, see <http://www.gnu.org/licenses/>.
*/
/* Function pointer type which applies a string operation to str, which is
assumed to be non-NULL.
The resulting string can be assumed to be non-NULL and must be freed using
free. */
typedef char *(*GF_StringOp)(const char *str);
/* Returns a GF_StringOp applying the operation op if available, otherwise
NULL. op is assumed to be non-NULL. The GF_StringOp MUST NOT be freed. */
GF_StringOp gf_stringOp(const char *op);

View File

@@ -18,7 +18,7 @@
#include <stdio.h>
#include <stdlib.h>
#include "pgf.h"
// #include "gf_lexing.h"
#include "gf_lexing.h"
int main(int argc, char *argv[])
{
@@ -27,10 +27,10 @@ int main(int argc, char *argv[])
GF_PGF pgf = gf_readPGF("Query.pgf");
GF_Language lang = gf_readLanguage("QueryEng");
GF_Type cat = gf_startCat(pgf);
// char *lexed = gf_stringOp("lextext", "Is 2 prime");
char *lexed = "is 23 odd";
char *lexed = gf_stringOp("lextext")("Is 2 prime");
// char *lexed = "is 23 odd";
GF_Tree *result = gf_parse(pgf, lang, cat, lexed);
//free(lexed);
free(lexed);
GF_Tree *p = result;
if (*p) {
do {