1
0
forked from GitHub/gf-core

reorganize the directories under src, and rescue the JavaScript interpreter from deprecated

This commit is contained in:
krasimir
2009-12-13 18:50:29 +00:00
parent 15305efa5a
commit c92f9d1c0c
189 changed files with 2 additions and 2 deletions

19
src/runtime/c/Makefile Normal file
View File

@@ -0,0 +1,19 @@
CC = gcc
CFLAGS += -O2 -W -Wall
.PHONY: all clean
all: libgfcc.a
libgfcc.a: gfcc-tree.o gfcc-term.o
ar r $@ $^
gfcc-tree.o: gfcc-tree.c gfcc-tree.h
$(CC) $(CFLAGS) -c -o $@ $<
gfcc-term.o: gfcc-term.c gfcc-term.h
$(CC) $(CFLAGS) -c -o $@ $<
clean:
-rm -f libgfcc.a
-rm -f *.o

203
src/runtime/c/gfcc-term.c Normal file
View File

@@ -0,0 +1,203 @@
#include "gfcc-term.h"
#include <stdarg.h>
#include <stdio.h>
#include <stdlib.h>
static void *buffer = NULL;
static size_t current;
extern void term_alloc_pool(size_t size) {
if (buffer == NULL)
buffer = malloc(size);
current = 0;
}
extern void term_free_pool() {
if (buffer != NULL)
free(buffer);
buffer = NULL;
}
extern void *term_alloc(size_t size) {
void *off = buffer + current;
current += size;
return off;
}
static inline Term *create_term(TermType type, int n) {
Term *t = (Term*)term_alloc(sizeof(Term) + n * sizeof(Term *));
t->type = type;
t->value.size = n; /* FIXME: hack! */
return t;
}
extern Term *term_array(int n, ...) {
Term *t = create_term(TERM_ARRAY, n);
va_list ap;
int i;
va_start(ap, n);
for (i = 0; i < n; i++) {
term_set_child(t, i, va_arg(ap, Term *));
}
va_end(ap);
return t;
}
extern Term *term_seq(int n, ...) {
Term *t = create_term(TERM_SEQUENCE, n);
va_list ap;
int i;
va_start(ap, n);
for (i = 0; i < n; i++) {
term_set_child(t, i, va_arg(ap, Term *));
}
va_end(ap);
return t;
}
extern Term *term_variants(int n, ...) {
Term *t = create_term(TERM_VARIANTS, n);
va_list ap;
int i;
va_start(ap, n);
for (i = 0; i < n; i++) {
term_set_child(t, i, va_arg(ap, Term *));
}
va_end(ap);
return t;
}
extern Term *term_glue(int n, ...) {
Term *t = create_term(TERM_GLUE, n);
va_list ap;
int i;
va_start(ap, n);
for (i = 0; i < n; i++) {
term_set_child(t, i, va_arg(ap, Term *));
}
va_end(ap);
return t;
}
extern Term *term_rp(Term *t1, Term *t2) {
Term *t = create_term(TERM_RECORD_PARAM, 2);
term_set_child(t, 0, t1);
term_set_child(t, 1, t2);
return t;
}
extern Term *term_suffix(const char *pref, Term *suf) {
Term *t = create_term(TERM_SUFFIX_TABLE, 2);
term_set_child(t,0,term_str(pref));
term_set_child(t,1,suf);
return t;
}
extern Term *term_str(const char *s) {
Term *t = create_term(TERM_STRING, 0);
t->value.string_value = s;
return t;
}
extern Term *term_int(int i) {
Term *t = create_term(TERM_INTEGER,0);
t->value.integer_value = i;
return t;
}
extern Term *term_meta() {
return create_term(TERM_META, 0);
}
extern Term *term_sel_int(Term *t, int i) {
switch (t->type) {
case TERM_ARRAY:
return term_get_child(t,i);
case TERM_SUFFIX_TABLE:
return term_glue(2,
term_get_child(t,0),
term_sel_int(term_get_child(t,1),i));
case TERM_META:
return t;
default:
fprintf(stderr,"Error: term_sel_int %d %d\n", t->type, i);
exit(1);
return NULL;
}
}
extern Term *term_sel(Term *t1, Term *t2) {
switch (t2->type) {
case TERM_INTEGER:
return term_sel_int(t1, t2->value.integer_value);
case TERM_RECORD_PARAM:
return term_sel(t1,term_get_child(t2,0));
case TERM_META:
return term_sel_int(t1,0);
default:
fprintf(stderr,"Error: term_sel %d %d\n", t1->type, t2->type);
exit(1);
return 0;
}
}
static void term_print_sep(FILE *stream, Term *t, const char *sep) {
int n = t->value.size;
int i;
for (i = 0; i < n; i++) {
term_print(stream, term_get_child(t,i));
if (i < n-1) {
fputs(sep, stream);
}
}
}
extern void term_print(FILE *stream, Term *t) {
switch (t->type) {
case TERM_ARRAY:
term_print(stream, term_get_child(t,0));
break;
case TERM_SEQUENCE:
term_print_sep(stream, t, " ");
break;
case TERM_VARIANTS:
term_print_sep(stream, t, "/");
break;
case TERM_GLUE:
term_print_sep(stream, t, "");
break;
case TERM_RECORD_PARAM:
term_print(stream, term_get_child(t,0));
break;
case TERM_SUFFIX_TABLE:
term_print(stream, term_get_child(t,0));
term_print(stream, term_get_child(t,1));
break;
case TERM_META:
fputs("?", stream);
break;
case TERM_STRING:
fputs(t->value.string_value, stream);
break;
case TERM_INTEGER:
fprintf(stream, "%d", t->value.integer_value);
break;
default:
fprintf(stderr,"Error: term_print %d\n", t->type);
exit(1);
}
}

65
src/runtime/c/gfcc-term.h Normal file
View File

@@ -0,0 +1,65 @@
#ifndef GFCC_TERM_H
#define GFCC_TERM_H
#include <stdio.h>
typedef enum {
/* size = variable */
TERM_ARRAY,
TERM_SEQUENCE,
TERM_VARIANTS,
TERM_GLUE,
/* size = 2 */
TERM_RECORD_PARAM,
TERM_SUFFIX_TABLE,
/* size = 0 */
TERM_META,
TERM_STRING,
TERM_INTEGER
} TermType;
struct Term_ {
TermType type;
union {
const char *string_value;
int integer_value;
int size;
} value;
struct Term_ *args[0];
};
typedef struct Term_ Term;
static inline Term *term_get_child(Term *t, int n) {
return t->args[n];
}
static inline void term_set_child(Term *t, int n, Term *c) {
t->args[n] = c;
}
extern void term_alloc_pool(size_t size);
extern void term_free_pool();
extern void *term_alloc(size_t size);
extern Term *term_array(int n, ...);
extern Term *term_seq(int n, ...);
extern Term *term_variants(int n, ...);
extern Term *term_glue(int n, ...);
extern Term *term_rp(Term *t1, Term *t2);
extern Term *term_suffix(const char *pref, Term *suf);
extern Term *term_str(const char *s);
extern Term *term_int(int i);
extern Term *term_meta();
extern Term *term_sel_int(Term *t, int i);
extern Term *term_sel(Term *t1, Term *t2);
extern void term_print(FILE *stream, Term *t);
#endif

61
src/runtime/c/gfcc-tree.c Normal file
View File

@@ -0,0 +1,61 @@
#include "gfcc-tree.h"
#include <stdlib.h>
extern int arity(Tree *t) {
switch (t->type) {
case ATOM_STRING:
case ATOM_INTEGER:
case ATOM_DOUBLE:
case ATOM_META:
return 0;
default:
return t->value.size;
}
}
static Tree *create_tree(atom_type c, int n) {
Tree *t = (Tree *)malloc(sizeof(Tree) + n * sizeof(Tree *));
t->type = c;
return t;
}
extern Tree *tree_string(const char *s) {
Tree *t = create_tree(ATOM_STRING, 0);
t->value.string_value = s;
return t;
}
extern Tree *tree_integer(int i) {
Tree *t = create_tree(ATOM_INTEGER, 0);
t->value.integer_value = i;
return t;
}
extern Tree *tree_double(double d) {
Tree *t = create_tree(ATOM_DOUBLE, 0);
t->value.double_value = d;
return t;
}
extern Tree *tree_meta() {
return create_tree(ATOM_META, 0);
}
extern Tree *tree_fun(atom_type f, int n) {
Tree *t = create_tree(f, n);
t->value.size = n;
return t;
}
extern void tree_free(Tree *t) {
int n = arity(t);
int i;
for (i = 0; i < n; i++) {
tree_free(tree_get_child(t,i));
}
free(t);
}

49
src/runtime/c/gfcc-tree.h Normal file
View File

@@ -0,0 +1,49 @@
#ifndef GFCC_TREE_H
#define GFCC_TREE_H
typedef enum {
ATOM_STRING,
ATOM_INTEGER,
ATOM_DOUBLE,
ATOM_META,
ATOM_FIRST_FUN
} atom_type;
struct Tree_{
atom_type type;
union {
const char *string_value;
int integer_value;
double double_value;
int size;
} value;
struct Tree_ *args[0];
};
typedef struct Tree_ Tree;
static inline Tree *tree_get_child(Tree *t, int n) {
return t->args[n];
}
static inline void tree_set_child(Tree *t, int n, Tree *a) {
t->args[n] = a;
}
extern int arity(Tree *t);
extern Tree *tree_string(const char *s);
extern Tree *tree_integer(int i);
extern Tree *tree_double(double d);
extern Tree *tree_meta();
extern Tree *tree_fun(atom_type f, int n);
extern void tree_free(Tree *t);
#endif

View File

@@ -0,0 +1,791 @@
{-# LANGUAGE CPP, FlexibleInstances, FlexibleContexts #-}
-----------------------------------------------------------------------------
-- |
-- Module : Data.Binary
-- Copyright : Lennart Kolmodin
-- License : BSD3-style (see LICENSE)
--
-- Maintainer : Lennart Kolmodin <kolmodin@dtek.chalmers.se>
-- Stability : unstable
-- Portability : portable to Hugs and GHC. Requires the FFI and some flexible instances
--
-- Binary serialisation of Haskell values to and from lazy ByteStrings.
-- The Binary library provides methods for encoding Haskell values as
-- streams of bytes directly in memory. The resulting @ByteString@ can
-- then be written to disk, sent over the network, or futher processed
-- (for example, compressed with gzip).
--
-- The 'Binary' package is notable in that it provides both pure, and
-- high performance serialisation.
--
-- Values are always encoded in network order (big endian) form, and
-- encoded data should be portable across machine endianess, word size,
-- or compiler version. For example, data encoded using the Binary class
-- could be written from GHC, and read back in Hugs.
--
-----------------------------------------------------------------------------
module Data.Binary (
-- * The Binary class
Binary(..)
-- $example
-- * The Get and Put monads
, Get
, Put
-- * Useful helpers for writing instances
, putWord8
, getWord8
-- * Binary serialisation
, encode -- :: Binary a => a -> ByteString
, decode -- :: Binary a => ByteString -> a
-- * IO functions for serialisation
, encodeFile -- :: Binary a => FilePath -> a -> IO ()
, decodeFile -- :: Binary a => FilePath -> IO a
-- Lazy put and get
-- , lazyPut
-- , lazyGet
, module Data.Word -- useful
) where
#include "MachDeps.h"
import Data.Word
import Data.Binary.Put
import Data.Binary.Get
import Control.Monad
import Control.Exception
import Foreign
import System.IO
import Data.ByteString.Lazy (ByteString)
import qualified Data.ByteString.Lazy as L
import Data.Char (chr,ord)
import Data.List (unfoldr)
-- And needed for the instances:
import qualified Data.ByteString as B
import qualified Data.Map as Map
import qualified Data.Set as Set
import qualified Data.IntMap as IntMap
import qualified Data.IntSet as IntSet
import qualified Data.Ratio as R
import qualified Data.Tree as T
import Data.Array.Unboxed
--
-- This isn't available in older Hugs or older GHC
--
#if __GLASGOW_HASKELL__ >= 606
import qualified Data.Sequence as Seq
import qualified Data.Foldable as Fold
#endif
------------------------------------------------------------------------
-- | The @Binary@ class provides 'put' and 'get', methods to encode and
-- decode a Haskell value to a lazy ByteString. It mirrors the Read and
-- Show classes for textual representation of Haskell types, and is
-- suitable for serialising Haskell values to disk, over the network.
--
-- For parsing and generating simple external binary formats (e.g. C
-- structures), Binary may be used, but in general is not suitable
-- for complex protocols. Instead use the Put and Get primitives
-- directly.
--
-- Instances of Binary should satisfy the following property:
--
-- > decode . encode == id
--
-- That is, the 'get' and 'put' methods should be the inverse of each
-- other. A range of instances are provided for basic Haskell types.
--
class Binary t where
-- | Encode a value in the Put monad.
put :: t -> Put
-- | Decode a value in the Get monad
get :: Get t
-- $example
-- To serialise a custom type, an instance of Binary for that type is
-- required. For example, suppose we have a data structure:
--
-- > data Exp = IntE Int
-- > | OpE String Exp Exp
-- > deriving Show
--
-- We can encode values of this type into bytestrings using the
-- following instance, which proceeds by recursively breaking down the
-- structure to serialise:
--
-- > instance Binary Exp where
-- > put (IntE i) = do put (0 :: Word8)
-- > put i
-- > put (OpE s e1 e2) = do put (1 :: Word8)
-- > put s
-- > put e1
-- > put e2
-- >
-- > get = do t <- get :: Get Word8
-- > case t of
-- > 0 -> do i <- get
-- > return (IntE i)
-- > 1 -> do s <- get
-- > e1 <- get
-- > e2 <- get
-- > return (OpE s e1 e2)
--
-- Note how we write an initial tag byte to indicate each variant of the
-- data type.
--
-- We can simplify the writing of 'get' instances using monadic
-- combinators:
--
-- > get = do tag <- getWord8
-- > case tag of
-- > 0 -> liftM IntE get
-- > 1 -> liftM3 OpE get get get
--
-- The generation of Binary instances has been automated by a script
-- using Scrap Your Boilerplate generics. Use the script here:
-- <http://darcs.haskell.org/binary/tools/derive/BinaryDerive.hs>.
--
-- To derive the instance for a type, load this script into GHCi, and
-- bring your type into scope. Your type can then have its Binary
-- instances derived as follows:
--
-- > $ ghci -fglasgow-exts BinaryDerive.hs
-- > *BinaryDerive> :l Example.hs
-- > *Main> deriveM (undefined :: Drinks)
-- >
-- > instance Binary Main.Drinks where
-- > put (Beer a) = putWord8 0 >> put a
-- > put Coffee = putWord8 1
-- > put Tea = putWord8 2
-- > put EnergyDrink = putWord8 3
-- > put Water = putWord8 4
-- > put Wine = putWord8 5
-- > put Whisky = putWord8 6
-- > get = do
-- > tag_ <- getWord8
-- > case tag_ of
-- > 0 -> get >>= \a -> return (Beer a)
-- > 1 -> return Coffee
-- > 2 -> return Tea
-- > 3 -> return EnergyDrink
-- > 4 -> return Water
-- > 5 -> return Wine
-- > 6 -> return Whisky
-- >
--
-- To serialise this to a bytestring, we use 'encode', which packs the
-- data structure into a binary format, in a lazy bytestring
--
-- > > let e = OpE "*" (IntE 7) (OpE "/" (IntE 4) (IntE 2))
-- > > let v = encode e
--
-- Where 'v' is a binary encoded data structure. To reconstruct the
-- original data, we use 'decode'
--
-- > > decode v :: Exp
-- > OpE "*" (IntE 7) (OpE "/" (IntE 4) (IntE 2))
--
-- The lazy ByteString that results from 'encode' can be written to
-- disk, and read from disk using Data.ByteString.Lazy IO functions,
-- such as hPutStr or writeFile:
--
-- > > writeFile "/tmp/exp.txt" (encode e)
--
-- And read back with:
--
-- > > readFile "/tmp/exp.txt" >>= return . decode :: IO Exp
-- > OpE "*" (IntE 7) (OpE "/" (IntE 4) (IntE 2))
--
-- We can also directly serialise a value to and from a Handle, or a file:
--
-- > > v <- decodeFile "/tmp/exp.txt" :: IO Exp
-- > OpE "*" (IntE 7) (OpE "/" (IntE 4) (IntE 2))
--
-- And write a value to disk
--
-- > > encodeFile "/tmp/a.txt" v
--
------------------------------------------------------------------------
-- Wrappers to run the underlying monad
-- | Encode a value using binary serialisation to a lazy ByteString.
--
encode :: Binary a => a -> ByteString
encode = runPut . put
{-# INLINE encode #-}
-- | Decode a value from a lazy ByteString, reconstructing the original structure.
--
decode :: Binary a => ByteString -> a
decode = runGet get
------------------------------------------------------------------------
-- Convenience IO operations
-- | Lazily serialise a value to a file
--
-- This is just a convenience function, it's defined simply as:
--
-- > encodeFile f = B.writeFile f . encode
--
-- So for example if you wanted to compress as well, you could use:
--
-- > B.writeFile f . compress . encode
--
encodeFile :: Binary a => FilePath -> a -> IO ()
encodeFile f v = L.writeFile f (encode v)
-- | Lazily reconstruct a value previously written to a file.
--
-- This is just a convenience function, it's defined simply as:
--
-- > decodeFile f = return . decode =<< B.readFile f
--
-- So for example if you wanted to decompress as well, you could use:
--
-- > return . decode . decompress =<< B.readFile f
--
decodeFile :: Binary a => FilePath -> IO a
decodeFile f = bracket (openBinaryFile f ReadMode) hClose $ \h -> do
s <- L.hGetContents h
evaluate $ runGet get s
-- needs bytestring 0.9.1.x to work
------------------------------------------------------------------------
-- Lazy put and get
-- lazyPut :: (Binary a) => a -> Put
-- lazyPut a = put (encode a)
-- lazyGet :: (Binary a) => Get a
-- lazyGet = fmap decode get
------------------------------------------------------------------------
-- Simple instances
-- The () type need never be written to disk: values of singleton type
-- can be reconstructed from the type alone
instance Binary () where
put () = return ()
get = return ()
-- Bools are encoded as a byte in the range 0 .. 1
instance Binary Bool where
put = putWord8 . fromIntegral . fromEnum
get = liftM (toEnum . fromIntegral) getWord8
-- Values of type 'Ordering' are encoded as a byte in the range 0 .. 2
instance Binary Ordering where
put = putWord8 . fromIntegral . fromEnum
get = liftM (toEnum . fromIntegral) getWord8
------------------------------------------------------------------------
-- Words and Ints
-- Words8s are written as bytes
instance Binary Word8 where
put = putWord8
get = getWord8
-- Words16s are written as 2 bytes in big-endian (network) order
instance Binary Word16 where
put = putWord16be
get = getWord16be
-- Words32s are written as 4 bytes in big-endian (network) order
instance Binary Word32 where
put = putWord32be
get = getWord32be
-- Words64s are written as 8 bytes in big-endian (network) order
instance Binary Word64 where
put = putWord64be
get = getWord64be
-- Int8s are written as a single byte.
instance Binary Int8 where
put i = put (fromIntegral i :: Word8)
get = liftM fromIntegral (get :: Get Word8)
-- Int16s are written as a 2 bytes in big endian format
instance Binary Int16 where
put i = put (fromIntegral i :: Word16)
get = liftM fromIntegral (get :: Get Word16)
-- Int32s are written as a 4 bytes in big endian format
instance Binary Int32 where
put i = put (fromIntegral i :: Word32)
get = liftM fromIntegral (get :: Get Word32)
-- Int64s are written as a 4 bytes in big endian format
instance Binary Int64 where
put i = put (fromIntegral i :: Word64)
get = liftM fromIntegral (get :: Get Word64)
------------------------------------------------------------------------
-- Words are written as sequence of bytes. The last bit of each
-- byte indicates whether there are more bytes to be read
instance Binary Word where
put i | i <= 0x7f = do put a
| i <= 0x3fff = do put (a .|. 0x80)
put b
| i <= 0x1fffff = do put (a .|. 0x80)
put (b .|. 0x80)
put c
| i <= 0xfffffff = do put (a .|. 0x80)
put (b .|. 0x80)
put (c .|. 0x80)
put d
#if WORD_SIZE_IN_BITS < 64
| otherwise = do put (a .|. 0x80)
put (b .|. 0x80)
put (c .|. 0x80)
put (d .|. 0x80)
put e
#else
| i <= 0x7ffffffff = do put (a .|. 0x80)
put (b .|. 0x80)
put (c .|. 0x80)
put (d .|. 0x80)
put e
| i <= 0x3ffffffffff = do put (a .|. 0x80)
put (b .|. 0x80)
put (c .|. 0x80)
put (d .|. 0x80)
put (e .|. 0x80)
put f
| i <= 0x1ffffffffffff = do put (a .|. 0x80)
put (b .|. 0x80)
put (c .|. 0x80)
put (d .|. 0x80)
put (e .|. 0x80)
put (f .|. 0x80)
put g
| i <= 0xffffffffffffff = do put (a .|. 0x80)
put (b .|. 0x80)
put (c .|. 0x80)
put (d .|. 0x80)
put (e .|. 0x80)
put (f .|. 0x80)
put (g .|. 0x80)
put h
| i <= 0xffffffffffffff = do put (a .|. 0x80)
put (b .|. 0x80)
put (c .|. 0x80)
put (d .|. 0x80)
put (e .|. 0x80)
put (f .|. 0x80)
put (g .|. 0x80)
put h
| i <= 0x7fffffffffffffff = do put (a .|. 0x80)
put (b .|. 0x80)
put (c .|. 0x80)
put (d .|. 0x80)
put (e .|. 0x80)
put (f .|. 0x80)
put (g .|. 0x80)
put (h .|. 0x80)
put j
| otherwise = do put (a .|. 0x80)
put (b .|. 0x80)
put (c .|. 0x80)
put (d .|. 0x80)
put (e .|. 0x80)
put (f .|. 0x80)
put (g .|. 0x80)
put (h .|. 0x80)
put (j .|. 0x80)
put k
#endif
where
a = fromIntegral ( i .&. 0x7f) :: Word8
b = fromIntegral (shiftR i 7 .&. 0x7f) :: Word8
c = fromIntegral (shiftR i 14 .&. 0x7f) :: Word8
d = fromIntegral (shiftR i 21 .&. 0x7f) :: Word8
e = fromIntegral (shiftR i 28 .&. 0x7f) :: Word8
f = fromIntegral (shiftR i 35 .&. 0x7f) :: Word8
g = fromIntegral (shiftR i 42 .&. 0x7f) :: Word8
h = fromIntegral (shiftR i 49 .&. 0x7f) :: Word8
j = fromIntegral (shiftR i 56 .&. 0x7f) :: Word8
k = fromIntegral (shiftR i 63 .&. 0x7f) :: Word8
get = do i <- getWord8
(if i <= 0x7f
then return (fromIntegral i)
else do n <- get
return $ (n `shiftL` 7) .|. (fromIntegral (i .&. 0x7f)))
-- Int has the same representation as Word
instance Binary Int where
put i = put (fromIntegral i :: Word)
get = liftM fromIntegral (get :: Get Word)
------------------------------------------------------------------------
--
-- Portable, and pretty efficient, serialisation of Integer
--
-- Fixed-size type for a subset of Integer
type SmallInt = Int32
-- Integers are encoded in two ways: if they fit inside a SmallInt,
-- they're written as a byte tag, and that value. If the Integer value
-- is too large to fit in a SmallInt, it is written as a byte array,
-- along with a sign and length field.
instance Binary Integer where
{-# INLINE put #-}
put n | n >= lo && n <= hi = do
putWord8 0
put (fromIntegral n :: SmallInt) -- fast path
where
lo = fromIntegral (minBound :: SmallInt) :: Integer
hi = fromIntegral (maxBound :: SmallInt) :: Integer
put n = do
putWord8 1
put sign
put (unroll (abs n)) -- unroll the bytes
where
sign = fromIntegral (signum n) :: Word8
{-# INLINE get #-}
get = do
tag <- get :: Get Word8
case tag of
0 -> liftM fromIntegral (get :: Get SmallInt)
_ -> do sign <- get
bytes <- get
let v = roll bytes
return $! if sign == (1 :: Word8) then v else - v
--
-- Fold and unfold an Integer to and from a list of its bytes
--
unroll :: Integer -> [Word8]
unroll = unfoldr step
where
step 0 = Nothing
step i = Just (fromIntegral i, i `shiftR` 8)
roll :: [Word8] -> Integer
roll = foldr unstep 0
where
unstep b a = a `shiftL` 8 .|. fromIntegral b
{-
--
-- An efficient, raw serialisation for Integer (GHC only)
--
-- TODO This instance is not architecture portable. GMP stores numbers as
-- arrays of machine sized words, so the byte format is not portable across
-- architectures with different endianess and word size.
import Data.ByteString.Base (toForeignPtr,unsafePackAddress, memcpy)
import GHC.Base hiding (ord, chr)
import GHC.Prim
import GHC.Ptr (Ptr(..))
import GHC.IOBase (IO(..))
instance Binary Integer where
put (S# i) = putWord8 0 >> put (I# i)
put (J# s ba) = do
putWord8 1
put (I# s)
put (BA ba)
get = do
b <- getWord8
case b of
0 -> do (I# i#) <- get
return (S# i#)
_ -> do (I# s#) <- get
(BA a#) <- get
return (J# s# a#)
instance Binary ByteArray where
-- Pretty safe.
put (BA ba) =
let sz = sizeofByteArray# ba -- (primitive) in *bytes*
addr = byteArrayContents# ba
bs = unsafePackAddress (I# sz) addr
in put bs -- write as a ByteString. easy, yay!
-- Pretty scary. Should be quick though
get = do
(fp, off, n@(I# sz)) <- liftM toForeignPtr get -- so decode a ByteString
assert (off == 0) $ return $ unsafePerformIO $ do
(MBA arr) <- newByteArray sz -- and copy it into a ByteArray#
let to = byteArrayContents# (unsafeCoerce# arr) -- urk, is this safe?
withForeignPtr fp $ \from -> memcpy (Ptr to) from (fromIntegral n)
freezeByteArray arr
-- wrapper for ByteArray#
data ByteArray = BA {-# UNPACK #-} !ByteArray#
data MBA = MBA {-# UNPACK #-} !(MutableByteArray# RealWorld)
newByteArray :: Int# -> IO MBA
newByteArray sz = IO $ \s ->
case newPinnedByteArray# sz s of { (# s', arr #) ->
(# s', MBA arr #) }
freezeByteArray :: MutableByteArray# RealWorld -> IO ByteArray
freezeByteArray arr = IO $ \s ->
case unsafeFreezeByteArray# arr s of { (# s', arr' #) ->
(# s', BA arr' #) }
-}
instance (Binary a,Integral a) => Binary (R.Ratio a) where
put r = put (R.numerator r) >> put (R.denominator r)
get = liftM2 (R.%) get get
------------------------------------------------------------------------
-- Char is serialised as UTF-8
instance Binary Char where
put a | c <= 0x7f = put (fromIntegral c :: Word8)
| c <= 0x7ff = do put (0xc0 .|. y)
put (0x80 .|. z)
| c <= 0xffff = do put (0xe0 .|. x)
put (0x80 .|. y)
put (0x80 .|. z)
| c <= 0x10ffff = do put (0xf0 .|. w)
put (0x80 .|. x)
put (0x80 .|. y)
put (0x80 .|. z)
| otherwise = error "Not a valid Unicode code point"
where
c = ord a
z, y, x, w :: Word8
z = fromIntegral (c .&. 0x3f)
y = fromIntegral (shiftR c 6 .&. 0x3f)
x = fromIntegral (shiftR c 12 .&. 0x3f)
w = fromIntegral (shiftR c 18 .&. 0x7)
get = do
let getByte = liftM (fromIntegral :: Word8 -> Int) get
shiftL6 = flip shiftL 6 :: Int -> Int
w <- getByte
r <- case () of
_ | w < 0x80 -> return w
| w < 0xe0 -> do
x <- liftM (xor 0x80) getByte
return (x .|. shiftL6 (xor 0xc0 w))
| w < 0xf0 -> do
x <- liftM (xor 0x80) getByte
y <- liftM (xor 0x80) getByte
return (y .|. shiftL6 (x .|. shiftL6
(xor 0xe0 w)))
| otherwise -> do
x <- liftM (xor 0x80) getByte
y <- liftM (xor 0x80) getByte
z <- liftM (xor 0x80) getByte
return (z .|. shiftL6 (y .|. shiftL6
(x .|. shiftL6 (xor 0xf0 w))))
return $! chr r
------------------------------------------------------------------------
-- Instances for the first few tuples
instance (Binary a, Binary b) => Binary (a,b) where
put (a,b) = put a >> put b
get = liftM2 (,) get get
instance (Binary a, Binary b, Binary c) => Binary (a,b,c) where
put (a,b,c) = put a >> put b >> put c
get = liftM3 (,,) get get get
instance (Binary a, Binary b, Binary c, Binary d) => Binary (a,b,c,d) where
put (a,b,c,d) = put a >> put b >> put c >> put d
get = liftM4 (,,,) get get get get
instance (Binary a, Binary b, Binary c, Binary d, Binary e) => Binary (a,b,c,d,e) where
put (a,b,c,d,e) = put a >> put b >> put c >> put d >> put e
get = liftM5 (,,,,) get get get get get
--
-- and now just recurse:
--
instance (Binary a, Binary b, Binary c, Binary d, Binary e, Binary f)
=> Binary (a,b,c,d,e,f) where
put (a,b,c,d,e,f) = put (a,(b,c,d,e,f))
get = do (a,(b,c,d,e,f)) <- get ; return (a,b,c,d,e,f)
instance (Binary a, Binary b, Binary c, Binary d, Binary e, Binary f, Binary g)
=> Binary (a,b,c,d,e,f,g) where
put (a,b,c,d,e,f,g) = put (a,(b,c,d,e,f,g))
get = do (a,(b,c,d,e,f,g)) <- get ; return (a,b,c,d,e,f,g)
instance (Binary a, Binary b, Binary c, Binary d, Binary e,
Binary f, Binary g, Binary h)
=> Binary (a,b,c,d,e,f,g,h) where
put (a,b,c,d,e,f,g,h) = put (a,(b,c,d,e,f,g,h))
get = do (a,(b,c,d,e,f,g,h)) <- get ; return (a,b,c,d,e,f,g,h)
instance (Binary a, Binary b, Binary c, Binary d, Binary e,
Binary f, Binary g, Binary h, Binary i)
=> Binary (a,b,c,d,e,f,g,h,i) where
put (a,b,c,d,e,f,g,h,i) = put (a,(b,c,d,e,f,g,h,i))
get = do (a,(b,c,d,e,f,g,h,i)) <- get ; return (a,b,c,d,e,f,g,h,i)
instance (Binary a, Binary b, Binary c, Binary d, Binary e,
Binary f, Binary g, Binary h, Binary i, Binary j)
=> Binary (a,b,c,d,e,f,g,h,i,j) where
put (a,b,c,d,e,f,g,h,i,j) = put (a,(b,c,d,e,f,g,h,i,j))
get = do (a,(b,c,d,e,f,g,h,i,j)) <- get ; return (a,b,c,d,e,f,g,h,i,j)
------------------------------------------------------------------------
-- Container types
instance Binary a => Binary [a] where
put l = put (length l) >> mapM_ put l
get = do n <- get :: Get Int
xs <- replicateM n get
return xs
instance (Binary a) => Binary (Maybe a) where
put Nothing = putWord8 0
put (Just x) = putWord8 1 >> put x
get = do
w <- getWord8
case w of
0 -> return Nothing
_ -> liftM Just get
instance (Binary a, Binary b) => Binary (Either a b) where
put (Left a) = putWord8 0 >> put a
put (Right b) = putWord8 1 >> put b
get = do
w <- getWord8
case w of
0 -> liftM Left get
_ -> liftM Right get
------------------------------------------------------------------------
-- ByteStrings (have specially efficient instances)
instance Binary B.ByteString where
put bs = do put (B.length bs)
putByteString bs
get = get >>= getByteString
--
-- Using old versions of fps, this is a type synonym, and non portable
--
-- Requires 'flexible instances'
--
instance Binary ByteString where
put bs = do put (fromIntegral (L.length bs) :: Int)
putLazyByteString bs
get = get >>= getLazyByteString
------------------------------------------------------------------------
-- Maps and Sets
instance (Ord a, Binary a) => Binary (Set.Set a) where
put s = put (Set.size s) >> mapM_ put (Set.toAscList s)
get = liftM Set.fromDistinctAscList get
instance (Ord k, Binary k, Binary e) => Binary (Map.Map k e) where
put m = put (Map.size m) >> mapM_ put (Map.toAscList m)
get = liftM Map.fromDistinctAscList get
instance Binary IntSet.IntSet where
put s = put (IntSet.size s) >> mapM_ put (IntSet.toAscList s)
get = liftM IntSet.fromDistinctAscList get
instance (Binary e) => Binary (IntMap.IntMap e) where
put m = put (IntMap.size m) >> mapM_ put (IntMap.toAscList m)
get = liftM IntMap.fromDistinctAscList get
------------------------------------------------------------------------
-- Queues and Sequences
#if __GLASGOW_HASKELL__ >= 606
--
-- This is valid Hugs, but you need the most recent Hugs
--
instance (Binary e) => Binary (Seq.Seq e) where
put s = put (Seq.length s) >> Fold.mapM_ put s
get = do n <- get :: Get Int
rep Seq.empty n get
where rep xs 0 _ = return $! xs
rep xs n g = xs `seq` n `seq` do
x <- g
rep (xs Seq.|> x) (n-1) g
#endif
------------------------------------------------------------------------
-- Floating point
instance Binary Double where
put d = put (decodeFloat d)
get = liftM2 encodeFloat get get
instance Binary Float where
put f = put (decodeFloat f)
get = liftM2 encodeFloat get get
------------------------------------------------------------------------
-- Trees
instance (Binary e) => Binary (T.Tree e) where
put (T.Node r s) = put r >> put s
get = liftM2 T.Node get get
------------------------------------------------------------------------
-- Arrays
instance (Binary i, Ix i, Binary e) => Binary (Array i e) where
put a = do
put (bounds a)
put (rangeSize $ bounds a) -- write the length
mapM_ put (elems a) -- now the elems.
get = do
bs <- get
n <- get -- read the length
xs <- replicateM n get -- now the elems.
return (listArray bs xs)
--
-- The IArray UArray e constraint is non portable. Requires flexible instances
--
instance (Binary i, Ix i, Binary e, IArray UArray e) => Binary (UArray i e) where
put a = do
put (bounds a)
put (rangeSize $ bounds a) -- now write the length
mapM_ put (elems a)
get = do
bs <- get
n <- get
xs <- replicateM n get
return (listArray bs xs)

View File

@@ -0,0 +1,426 @@
{-# LANGUAGE CPP #-}
{-# OPTIONS_GHC -fglasgow-exts #-}
-- for unboxed shifts
-----------------------------------------------------------------------------
-- |
-- Module : Data.Binary.Builder
-- Copyright : Lennart Kolmodin, Ross Paterson
-- License : BSD3-style (see LICENSE)
--
-- Maintainer : Lennart Kolmodin <kolmodin@dtek.chalmers.se>
-- Stability : experimental
-- Portability : portable to Hugs and GHC
--
-- Efficient construction of lazy bytestrings.
--
-----------------------------------------------------------------------------
#if defined(__GLASGOW_HASKELL__) && !defined(__HADDOCK__)
#include "MachDeps.h"
#endif
module Data.Binary.Builder (
-- * The Builder type
Builder
, toLazyByteString
-- * Constructing Builders
, empty
, singleton
, append
, fromByteString -- :: S.ByteString -> Builder
, fromLazyByteString -- :: L.ByteString -> Builder
-- * Flushing the buffer state
, flush
-- * Derived Builders
-- ** Big-endian writes
, putWord16be -- :: Word16 -> Builder
, putWord32be -- :: Word32 -> Builder
, putWord64be -- :: Word64 -> Builder
-- ** Little-endian writes
, putWord16le -- :: Word16 -> Builder
, putWord32le -- :: Word32 -> Builder
, putWord64le -- :: Word64 -> Builder
-- ** Host-endian, unaligned writes
, putWordhost -- :: Word -> Builder
, putWord16host -- :: Word16 -> Builder
, putWord32host -- :: Word32 -> Builder
, putWord64host -- :: Word64 -> Builder
) where
import Foreign
import Data.Monoid
import Data.Word
import qualified Data.ByteString as S
import qualified Data.ByteString.Lazy as L
#ifdef BYTESTRING_IN_BASE
import Data.ByteString.Base (inlinePerformIO)
import qualified Data.ByteString.Base as S
#else
import Data.ByteString.Internal (inlinePerformIO)
import qualified Data.ByteString.Internal as S
import qualified Data.ByteString.Lazy.Internal as L
#endif
#if defined(__GLASGOW_HASKELL__) && !defined(__HADDOCK__)
import GHC.Base
import GHC.Word (Word32(..),Word16(..),Word64(..))
#if WORD_SIZE_IN_BITS < 64 && __GLASGOW_HASKELL__ >= 608
import GHC.Word (uncheckedShiftRL64#)
#endif
#endif
------------------------------------------------------------------------
-- | A 'Builder' is an efficient way to build lazy 'L.ByteString's.
-- There are several functions for constructing 'Builder's, but only one
-- to inspect them: to extract any data, you have to turn them into lazy
-- 'L.ByteString's using 'toLazyByteString'.
--
-- Internally, a 'Builder' constructs a lazy 'L.Bytestring' by filling byte
-- arrays piece by piece. As each buffer is filled, it is \'popped\'
-- off, to become a new chunk of the resulting lazy 'L.ByteString'.
-- All this is hidden from the user of the 'Builder'.
newtype Builder = Builder {
-- Invariant (from Data.ByteString.Lazy):
-- The lists include no null ByteStrings.
runBuilder :: (Buffer -> [S.ByteString]) -> Buffer -> [S.ByteString]
}
instance Monoid Builder where
mempty = empty
{-# INLINE mempty #-}
mappend = append
{-# INLINE mappend #-}
------------------------------------------------------------------------
-- | /O(1)./ The empty Builder, satisfying
--
-- * @'toLazyByteString' 'empty' = 'L.empty'@
--
empty :: Builder
empty = Builder id
{-# INLINE empty #-}
-- | /O(1)./ A Builder taking a single byte, satisfying
--
-- * @'toLazyByteString' ('singleton' b) = 'L.singleton' b@
--
singleton :: Word8 -> Builder
singleton = writeN 1 . flip poke
{-# INLINE singleton #-}
------------------------------------------------------------------------
-- | /O(1)./ The concatenation of two Builders, an associative operation
-- with identity 'empty', satisfying
--
-- * @'toLazyByteString' ('append' x y) = 'L.append' ('toLazyByteString' x) ('toLazyByteString' y)@
--
append :: Builder -> Builder -> Builder
append (Builder f) (Builder g) = Builder (f . g)
{-# INLINE append #-}
-- | /O(1)./ A Builder taking a 'S.ByteString', satisfying
--
-- * @'toLazyByteString' ('fromByteString' bs) = 'L.fromChunks' [bs]@
--
fromByteString :: S.ByteString -> Builder
fromByteString bs
| S.null bs = empty
| otherwise = flush `append` mapBuilder (bs :)
{-# INLINE fromByteString #-}
-- | /O(1)./ A Builder taking a lazy 'L.ByteString', satisfying
--
-- * @'toLazyByteString' ('fromLazyByteString' bs) = bs@
--
fromLazyByteString :: L.ByteString -> Builder
fromLazyByteString bss = flush `append` mapBuilder (L.toChunks bss ++)
{-# INLINE fromLazyByteString #-}
------------------------------------------------------------------------
-- Our internal buffer type
data Buffer = Buffer {-# UNPACK #-} !(ForeignPtr Word8)
{-# UNPACK #-} !Int -- offset
{-# UNPACK #-} !Int -- used bytes
{-# UNPACK #-} !Int -- length left
------------------------------------------------------------------------
-- | /O(n)./ Extract a lazy 'L.ByteString' from a 'Builder'.
-- The construction work takes place if and when the relevant part of
-- the lazy 'L.ByteString' is demanded.
--
toLazyByteString :: Builder -> L.ByteString
toLazyByteString m = L.fromChunks $ unsafePerformIO $ do
buf <- newBuffer defaultSize
return (runBuilder (m `append` flush) (const []) buf)
-- | /O(1)./ Pop the 'S.ByteString' we have constructed so far, if any,
-- yielding a new chunk in the result lazy 'L.ByteString'.
flush :: Builder
flush = Builder $ \ k buf@(Buffer p o u l) ->
if u == 0
then k buf
else S.PS p o u : k (Buffer p (o+u) 0 l)
------------------------------------------------------------------------
--
-- copied from Data.ByteString.Lazy
--
defaultSize :: Int
defaultSize = 32 * k - overhead
where k = 1024
overhead = 2 * sizeOf (undefined :: Int)
------------------------------------------------------------------------
-- | Sequence an IO operation on the buffer
unsafeLiftIO :: (Buffer -> IO Buffer) -> Builder
unsafeLiftIO f = Builder $ \ k buf -> inlinePerformIO $ do
buf' <- f buf
return (k buf')
{-# INLINE unsafeLiftIO #-}
-- | Get the size of the buffer
withSize :: (Int -> Builder) -> Builder
withSize f = Builder $ \ k buf@(Buffer _ _ _ l) ->
runBuilder (f l) k buf
-- | Map the resulting list of bytestrings.
mapBuilder :: ([S.ByteString] -> [S.ByteString]) -> Builder
mapBuilder f = Builder (f .)
------------------------------------------------------------------------
-- | Ensure that there are at least @n@ many bytes available.
ensureFree :: Int -> Builder
ensureFree n = n `seq` withSize $ \ l ->
if n <= l then empty else
flush `append` unsafeLiftIO (const (newBuffer (max n defaultSize)))
{-# INLINE ensureFree #-}
-- | Ensure that @n@ many bytes are available, and then use @f@ to write some
-- bytes into the memory.
writeN :: Int -> (Ptr Word8 -> IO ()) -> Builder
writeN n f = ensureFree n `append` unsafeLiftIO (writeNBuffer n f)
{-# INLINE writeN #-}
writeNBuffer :: Int -> (Ptr Word8 -> IO ()) -> Buffer -> IO Buffer
writeNBuffer n f (Buffer fp o u l) = do
withForeignPtr fp (\p -> f (p `plusPtr` (o+u)))
return (Buffer fp o (u+n) (l-n))
{-# INLINE writeNBuffer #-}
newBuffer :: Int -> IO Buffer
newBuffer size = do
fp <- S.mallocByteString size
return $! Buffer fp 0 0 size
{-# INLINE newBuffer #-}
------------------------------------------------------------------------
-- Aligned, host order writes of storable values
-- | Ensure that @n@ many bytes are available, and then use @f@ to write some
-- storable values into the memory.
writeNbytes :: Storable a => Int -> (Ptr a -> IO ()) -> Builder
writeNbytes n f = ensureFree n `append` unsafeLiftIO (writeNBufferBytes n f)
{-# INLINE writeNbytes #-}
writeNBufferBytes :: Storable a => Int -> (Ptr a -> IO ()) -> Buffer -> IO Buffer
writeNBufferBytes n f (Buffer fp o u l) = do
withForeignPtr fp (\p -> f (p `plusPtr` (o+u)))
return (Buffer fp o (u+n) (l-n))
{-# INLINE writeNBufferBytes #-}
------------------------------------------------------------------------
--
-- We rely on the fromIntegral to do the right masking for us.
-- The inlining here is critical, and can be worth 4x performance
--
-- | Write a Word16 in big endian format
putWord16be :: Word16 -> Builder
putWord16be w = writeN 2 $ \p -> do
poke p (fromIntegral (shiftr_w16 w 8) :: Word8)
poke (p `plusPtr` 1) (fromIntegral (w) :: Word8)
{-# INLINE putWord16be #-}
-- | Write a Word16 in little endian format
putWord16le :: Word16 -> Builder
putWord16le w = writeN 2 $ \p -> do
poke p (fromIntegral (w) :: Word8)
poke (p `plusPtr` 1) (fromIntegral (shiftr_w16 w 8) :: Word8)
{-# INLINE putWord16le #-}
-- putWord16le w16 = writeN 2 (\p -> poke (castPtr p) w16)
-- | Write a Word32 in big endian format
putWord32be :: Word32 -> Builder
putWord32be w = writeN 4 $ \p -> do
poke p (fromIntegral (shiftr_w32 w 24) :: Word8)
poke (p `plusPtr` 1) (fromIntegral (shiftr_w32 w 16) :: Word8)
poke (p `plusPtr` 2) (fromIntegral (shiftr_w32 w 8) :: Word8)
poke (p `plusPtr` 3) (fromIntegral (w) :: Word8)
{-# INLINE putWord32be #-}
--
-- a data type to tag Put/Check. writes construct these which are then
-- inlined and flattened. matching Checks will be more robust with rules.
--
-- | Write a Word32 in little endian format
putWord32le :: Word32 -> Builder
putWord32le w = writeN 4 $ \p -> do
poke p (fromIntegral (w) :: Word8)
poke (p `plusPtr` 1) (fromIntegral (shiftr_w32 w 8) :: Word8)
poke (p `plusPtr` 2) (fromIntegral (shiftr_w32 w 16) :: Word8)
poke (p `plusPtr` 3) (fromIntegral (shiftr_w32 w 24) :: Word8)
{-# INLINE putWord32le #-}
-- on a little endian machine:
-- putWord32le w32 = writeN 4 (\p -> poke (castPtr p) w32)
-- | Write a Word64 in big endian format
putWord64be :: Word64 -> Builder
#if WORD_SIZE_IN_BITS < 64
--
-- To avoid expensive 64 bit shifts on 32 bit machines, we cast to
-- Word32, and write that
--
putWord64be w =
let a = fromIntegral (shiftr_w64 w 32) :: Word32
b = fromIntegral w :: Word32
in writeN 8 $ \p -> do
poke p (fromIntegral (shiftr_w32 a 24) :: Word8)
poke (p `plusPtr` 1) (fromIntegral (shiftr_w32 a 16) :: Word8)
poke (p `plusPtr` 2) (fromIntegral (shiftr_w32 a 8) :: Word8)
poke (p `plusPtr` 3) (fromIntegral (a) :: Word8)
poke (p `plusPtr` 4) (fromIntegral (shiftr_w32 b 24) :: Word8)
poke (p `plusPtr` 5) (fromIntegral (shiftr_w32 b 16) :: Word8)
poke (p `plusPtr` 6) (fromIntegral (shiftr_w32 b 8) :: Word8)
poke (p `plusPtr` 7) (fromIntegral (b) :: Word8)
#else
putWord64be w = writeN 8 $ \p -> do
poke p (fromIntegral (shiftr_w64 w 56) :: Word8)
poke (p `plusPtr` 1) (fromIntegral (shiftr_w64 w 48) :: Word8)
poke (p `plusPtr` 2) (fromIntegral (shiftr_w64 w 40) :: Word8)
poke (p `plusPtr` 3) (fromIntegral (shiftr_w64 w 32) :: Word8)
poke (p `plusPtr` 4) (fromIntegral (shiftr_w64 w 24) :: Word8)
poke (p `plusPtr` 5) (fromIntegral (shiftr_w64 w 16) :: Word8)
poke (p `plusPtr` 6) (fromIntegral (shiftr_w64 w 8) :: Word8)
poke (p `plusPtr` 7) (fromIntegral (w) :: Word8)
#endif
{-# INLINE putWord64be #-}
-- | Write a Word64 in little endian format
putWord64le :: Word64 -> Builder
#if WORD_SIZE_IN_BITS < 64
putWord64le w =
let b = fromIntegral (shiftr_w64 w 32) :: Word32
a = fromIntegral w :: Word32
in writeN 8 $ \p -> do
poke (p) (fromIntegral (a) :: Word8)
poke (p `plusPtr` 1) (fromIntegral (shiftr_w32 a 8) :: Word8)
poke (p `plusPtr` 2) (fromIntegral (shiftr_w32 a 16) :: Word8)
poke (p `plusPtr` 3) (fromIntegral (shiftr_w32 a 24) :: Word8)
poke (p `plusPtr` 4) (fromIntegral (b) :: Word8)
poke (p `plusPtr` 5) (fromIntegral (shiftr_w32 b 8) :: Word8)
poke (p `plusPtr` 6) (fromIntegral (shiftr_w32 b 16) :: Word8)
poke (p `plusPtr` 7) (fromIntegral (shiftr_w32 b 24) :: Word8)
#else
putWord64le w = writeN 8 $ \p -> do
poke p (fromIntegral (w) :: Word8)
poke (p `plusPtr` 1) (fromIntegral (shiftr_w64 w 8) :: Word8)
poke (p `plusPtr` 2) (fromIntegral (shiftr_w64 w 16) :: Word8)
poke (p `plusPtr` 3) (fromIntegral (shiftr_w64 w 24) :: Word8)
poke (p `plusPtr` 4) (fromIntegral (shiftr_w64 w 32) :: Word8)
poke (p `plusPtr` 5) (fromIntegral (shiftr_w64 w 40) :: Word8)
poke (p `plusPtr` 6) (fromIntegral (shiftr_w64 w 48) :: Word8)
poke (p `plusPtr` 7) (fromIntegral (shiftr_w64 w 56) :: Word8)
#endif
{-# INLINE putWord64le #-}
-- on a little endian machine:
-- putWord64le w64 = writeN 8 (\p -> poke (castPtr p) w64)
------------------------------------------------------------------------
-- Unaligned, word size ops
-- | /O(1)./ A Builder taking a single native machine word. The word is
-- written in host order, host endian form, for the machine you're on.
-- On a 64 bit machine the Word is an 8 byte value, on a 32 bit machine,
-- 4 bytes. Values written this way are not portable to
-- different endian or word sized machines, without conversion.
--
putWordhost :: Word -> Builder
putWordhost w = writeNbytes (sizeOf (undefined :: Word)) (\p -> poke p w)
{-# INLINE putWordhost #-}
-- | Write a Word16 in native host order and host endianness.
-- 2 bytes will be written, unaligned.
putWord16host :: Word16 -> Builder
putWord16host w16 = writeNbytes (sizeOf (undefined :: Word16)) (\p -> poke p w16)
{-# INLINE putWord16host #-}
-- | Write a Word32 in native host order and host endianness.
-- 4 bytes will be written, unaligned.
putWord32host :: Word32 -> Builder
putWord32host w32 = writeNbytes (sizeOf (undefined :: Word32)) (\p -> poke p w32)
{-# INLINE putWord32host #-}
-- | Write a Word64 in native host order.
-- On a 32 bit machine we write two host order Word32s, in big endian form.
-- 8 bytes will be written, unaligned.
putWord64host :: Word64 -> Builder
putWord64host w = writeNbytes (sizeOf (undefined :: Word64)) (\p -> poke p w)
{-# INLINE putWord64host #-}
------------------------------------------------------------------------
-- Unchecked shifts
{-# INLINE shiftr_w16 #-}
shiftr_w16 :: Word16 -> Int -> Word16
{-# INLINE shiftr_w32 #-}
shiftr_w32 :: Word32 -> Int -> Word32
{-# INLINE shiftr_w64 #-}
shiftr_w64 :: Word64 -> Int -> Word64
#if defined(__GLASGOW_HASKELL__) && !defined(__HADDOCK__)
shiftr_w16 (W16# w) (I# i) = W16# (w `uncheckedShiftRL#` i)
shiftr_w32 (W32# w) (I# i) = W32# (w `uncheckedShiftRL#` i)
#if WORD_SIZE_IN_BITS < 64
shiftr_w64 (W64# w) (I# i) = W64# (w `uncheckedShiftRL64#` i)
#if __GLASGOW_HASKELL__ <= 606
-- Exported by GHC.Word in GHC 6.8 and higher
foreign import ccall unsafe "stg_uncheckedShiftRL64"
uncheckedShiftRL64# :: Word64# -> Int# -> Word64#
#endif
#else
shiftr_w64 (W64# w) (I# i) = W64# (w `uncheckedShiftRL#` i)
#endif
#else
shiftr_w16 = shiftR
shiftr_w32 = shiftR
shiftr_w64 = shiftR
#endif

View File

@@ -0,0 +1,544 @@
{-# LANGUAGE CPP #-}
{-# OPTIONS_GHC -fglasgow-exts #-}
-- for unboxed shifts
-----------------------------------------------------------------------------
-- |
-- Module : Data.Binary.Get
-- Copyright : Lennart Kolmodin
-- License : BSD3-style (see LICENSE)
--
-- Maintainer : Lennart Kolmodin <kolmodin@dtek.chalmers.se>
-- Stability : experimental
-- Portability : portable to Hugs and GHC.
--
-- The Get monad. A monad for efficiently building structures from
-- encoded lazy ByteStrings
--
-----------------------------------------------------------------------------
#if defined(__GLASGOW_HASKELL__) && !defined(__HADDOCK__)
#include "MachDeps.h"
#endif
module Data.Binary.Get (
-- * The Get type
Get
, runGet
, runGetState
-- * Parsing
, skip
, uncheckedSkip
, lookAhead
, lookAheadM
, lookAheadE
, uncheckedLookAhead
-- * Utility
, bytesRead
, getBytes
, remaining
, isEmpty
-- * Parsing particular types
, getWord8
-- ** ByteStrings
, getByteString
, getLazyByteString
, getLazyByteStringNul
, getRemainingLazyByteString
-- ** Big-endian reads
, getWord16be
, getWord32be
, getWord64be
-- ** Little-endian reads
, getWord16le
, getWord32le
, getWord64le
-- ** Host-endian, unaligned reads
, getWordhost
, getWord16host
, getWord32host
, getWord64host
) where
import Control.Monad (when,liftM,ap)
import Control.Monad.Fix
import Data.Maybe (isNothing)
import qualified Data.ByteString as B
import qualified Data.ByteString.Lazy as L
#ifdef BYTESTRING_IN_BASE
import qualified Data.ByteString.Base as B
#else
import qualified Data.ByteString.Internal as B
import qualified Data.ByteString.Lazy.Internal as L
#endif
#ifdef APPLICATIVE_IN_BASE
import Control.Applicative (Applicative(..))
#endif
import Foreign
-- used by splitAtST
import Control.Monad.ST
import Data.STRef
#if defined(__GLASGOW_HASKELL__) && !defined(__HADDOCK__)
import GHC.Base
import GHC.Word
import GHC.Int
#endif
-- | The parse state
data S = S {-# UNPACK #-} !B.ByteString -- current chunk
L.ByteString -- the rest of the input
{-# UNPACK #-} !Int64 -- bytes read
-- | The Get monad is just a State monad carrying around the input ByteString
newtype Get a = Get { unGet :: S -> (a, S) }
instance Functor Get where
fmap f m = Get (\s -> case unGet m s of
(a, s') -> (f a, s'))
{-# INLINE fmap #-}
#ifdef APPLICATIVE_IN_BASE
instance Applicative Get where
pure = return
(<*>) = ap
#endif
instance Monad Get where
return a = Get (\s -> (a, s))
{-# INLINE return #-}
m >>= k = Get (\s -> case unGet m s of
(a, s') -> unGet (k a) s')
{-# INLINE (>>=) #-}
fail = failDesc
instance MonadFix Get where
mfix f = Get (\s -> let (a,s') = unGet (f a) s
in (a,s'))
------------------------------------------------------------------------
get :: Get S
get = Get (\s -> (s, s))
put :: S -> Get ()
put s = Get (\_ -> ((), s))
------------------------------------------------------------------------
--
-- dons, GHC 6.10: explicit inlining disabled, was killing performance.
-- Without it, GHC seems to do just fine. And we get similar
-- performance with 6.8.2 anyway.
--
initState :: L.ByteString -> S
initState xs = mkState xs 0
{- INLINE initState -}
{-
initState (B.LPS xs) =
case xs of
[] -> S B.empty L.empty 0
(x:xs') -> S x (B.LPS xs') 0
-}
#ifndef BYTESTRING_IN_BASE
mkState :: L.ByteString -> Int64 -> S
mkState l = case l of
L.Empty -> S B.empty L.empty
L.Chunk x xs -> S x xs
{- INLINE mkState -}
#else
mkState :: L.ByteString -> Int64 -> S
mkState (B.LPS xs) =
case xs of
[] -> S B.empty L.empty
(x:xs') -> S x (B.LPS xs')
#endif
-- | Run the Get monad applies a 'get'-based parser on the input ByteString
runGet :: Get a -> L.ByteString -> a
runGet m str = case unGet m (initState str) of (a, _) -> a
-- | Run the Get monad applies a 'get'-based parser on the input
-- ByteString. Additional to the result of get it returns the number of
-- consumed bytes and the rest of the input.
runGetState :: Get a -> L.ByteString -> Int64 -> (a, L.ByteString, Int64)
runGetState m str off =
case unGet m (mkState str off) of
(a, ~(S s ss newOff)) -> (a, s `join` ss, newOff)
------------------------------------------------------------------------
failDesc :: String -> Get a
failDesc err = do
S _ _ bytes <- get
Get (error (err ++ ". Failed reading at byte position " ++ show bytes))
-- | Skip ahead @n@ bytes. Fails if fewer than @n@ bytes are available.
skip :: Int -> Get ()
skip n = readN (fromIntegral n) (const ())
-- | Skip ahead @n@ bytes. No error if there isn't enough bytes.
uncheckedSkip :: Int64 -> Get ()
uncheckedSkip n = do
S s ss bytes <- get
if fromIntegral (B.length s) >= n
then put (S (B.drop (fromIntegral n) s) ss (bytes + n))
else do
let rest = L.drop (n - fromIntegral (B.length s)) ss
put $! mkState rest (bytes + n)
-- | Run @ga@, but return without consuming its input.
-- Fails if @ga@ fails.
lookAhead :: Get a -> Get a
lookAhead ga = do
s <- get
a <- ga
put s
return a
-- | Like 'lookAhead', but consume the input if @gma@ returns 'Just _'.
-- Fails if @gma@ fails.
lookAheadM :: Get (Maybe a) -> Get (Maybe a)
lookAheadM gma = do
s <- get
ma <- gma
when (isNothing ma) $
put s
return ma
-- | Like 'lookAhead', but consume the input if @gea@ returns 'Right _'.
-- Fails if @gea@ fails.
lookAheadE :: Get (Either a b) -> Get (Either a b)
lookAheadE gea = do
s <- get
ea <- gea
case ea of
Left _ -> put s
_ -> return ()
return ea
-- | Get the next up to @n@ bytes as a lazy ByteString, without consuming them.
uncheckedLookAhead :: Int64 -> Get L.ByteString
uncheckedLookAhead n = do
S s ss _ <- get
if n <= fromIntegral (B.length s)
then return (L.fromChunks [B.take (fromIntegral n) s])
else return $ L.take n (s `join` ss)
------------------------------------------------------------------------
-- Utility
-- | Get the total number of bytes read to this point.
bytesRead :: Get Int64
bytesRead = do
S _ _ b <- get
return b
-- | Get the number of remaining unparsed bytes.
-- Useful for checking whether all input has been consumed.
-- Note that this forces the rest of the input.
remaining :: Get Int64
remaining = do
S s ss _ <- get
return (fromIntegral (B.length s) + L.length ss)
-- | Test whether all input has been consumed,
-- i.e. there are no remaining unparsed bytes.
isEmpty :: Get Bool
isEmpty = do
S s ss _ <- get
return (B.null s && L.null ss)
------------------------------------------------------------------------
-- Utility with ByteStrings
-- | An efficient 'get' method for strict ByteStrings. Fails if fewer
-- than @n@ bytes are left in the input.
getByteString :: Int -> Get B.ByteString
getByteString n = readN n id
{-# INLINE getByteString #-}
-- | An efficient 'get' method for lazy ByteStrings. Does not fail if fewer than
-- @n@ bytes are left in the input.
getLazyByteString :: Int64 -> Get L.ByteString
getLazyByteString n = do
S s ss bytes <- get
let big = s `join` ss
case splitAtST n big of
(consume, rest) -> do put $ mkState rest (bytes + n)
return consume
{-# INLINE getLazyByteString #-}
-- | Get a lazy ByteString that is terminated with a NUL byte. Fails
-- if it reaches the end of input without hitting a NUL.
getLazyByteStringNul :: Get L.ByteString
getLazyByteStringNul = do
S s ss bytes <- get
let big = s `join` ss
(consume, t) = L.break (== 0) big
(h, rest) = L.splitAt 1 t
if L.null h
then fail "too few bytes"
else do
put $ mkState rest (bytes + L.length consume + 1)
return consume
{-# INLINE getLazyByteStringNul #-}
-- | Get the remaining bytes as a lazy ByteString
getRemainingLazyByteString :: Get L.ByteString
getRemainingLazyByteString = do
S s ss _ <- get
return (s `join` ss)
------------------------------------------------------------------------
-- Helpers
-- | Pull @n@ bytes from the input, as a strict ByteString.
getBytes :: Int -> Get B.ByteString
getBytes n = do
S s ss bytes <- get
if n <= B.length s
then do let (consume,rest) = B.splitAt n s
put $! S rest ss (bytes + fromIntegral n)
return $! consume
else
case L.splitAt (fromIntegral n) (s `join` ss) of
(consuming, rest) ->
do let now = B.concat . L.toChunks $ consuming
put $! mkState rest (bytes + fromIntegral n)
-- forces the next chunk before this one is returned
if (B.length now < n)
then
fail "too few bytes"
else
return now
{- INLINE getBytes -}
-- ^ important
#ifndef BYTESTRING_IN_BASE
join :: B.ByteString -> L.ByteString -> L.ByteString
join bb lb
| B.null bb = lb
| otherwise = L.Chunk bb lb
#else
join :: B.ByteString -> L.ByteString -> L.ByteString
join bb (B.LPS lb)
| B.null bb = B.LPS lb
| otherwise = B.LPS (bb:lb)
#endif
-- don't use L.append, it's strict in it's second argument :/
{- INLINE join -}
-- | Split a ByteString. If the first result is consumed before the --
-- second, this runs in constant heap space.
--
-- You must force the returned tuple for that to work, e.g.
--
-- > case splitAtST n xs of
-- > (ys,zs) -> consume ys ... consume zs
--
splitAtST :: Int64 -> L.ByteString -> (L.ByteString, L.ByteString)
splitAtST i ps | i <= 0 = (L.empty, ps)
#ifndef BYTESTRING_IN_BASE
splitAtST i ps = runST (
do r <- newSTRef undefined
xs <- first r i ps
ys <- unsafeInterleaveST (readSTRef r)
return (xs, ys))
where
first r 0 xs@(L.Chunk _ _) = writeSTRef r xs >> return L.Empty
first r _ L.Empty = writeSTRef r L.Empty >> return L.Empty
first r n (L.Chunk x xs)
| n < l = do writeSTRef r (L.Chunk (B.drop (fromIntegral n) x) xs)
return $ L.Chunk (B.take (fromIntegral n) x) L.Empty
| otherwise = do writeSTRef r (L.drop (n - l) xs)
liftM (L.Chunk x) $ unsafeInterleaveST (first r (n - l) xs)
where l = fromIntegral (B.length x)
#else
splitAtST i (B.LPS ps) = runST (
do r <- newSTRef undefined
xs <- first r i ps
ys <- unsafeInterleaveST (readSTRef r)
return (B.LPS xs, B.LPS ys))
where first r 0 xs = writeSTRef r xs >> return []
first r _ [] = writeSTRef r [] >> return []
first r n (x:xs)
| n < l = do writeSTRef r (B.drop (fromIntegral n) x : xs)
return [B.take (fromIntegral n) x]
| otherwise = do writeSTRef r (L.toChunks (L.drop (n - l) (B.LPS xs)))
fmap (x:) $ unsafeInterleaveST (first r (n - l) xs)
where l = fromIntegral (B.length x)
#endif
{- INLINE splitAtST -}
-- Pull n bytes from the input, and apply a parser to those bytes,
-- yielding a value. If less than @n@ bytes are available, fail with an
-- error. This wraps @getBytes@.
readN :: Int -> (B.ByteString -> a) -> Get a
readN n f = fmap f $ getBytes n
{- INLINE readN -}
-- ^ important
------------------------------------------------------------------------
-- Primtives
-- helper, get a raw Ptr onto a strict ByteString copied out of the
-- underlying lazy byteString. So many indirections from the raw parser
-- state that my head hurts...
getPtr :: Storable a => Int -> Get a
getPtr n = do
(fp,o,_) <- readN n B.toForeignPtr
return . B.inlinePerformIO $ withForeignPtr fp $ \p -> peek (castPtr $ p `plusPtr` o)
{- INLINE getPtr -}
------------------------------------------------------------------------
-- | Read a Word8 from the monad state
getWord8 :: Get Word8
getWord8 = getPtr (sizeOf (undefined :: Word8))
{- INLINE getWord8 -}
-- | Read a Word16 in big endian format
getWord16be :: Get Word16
getWord16be = do
s <- readN 2 id
return $! (fromIntegral (s `B.index` 0) `shiftl_w16` 8) .|.
(fromIntegral (s `B.index` 1))
{- INLINE getWord16be -}
-- | Read a Word16 in little endian format
getWord16le :: Get Word16
getWord16le = do
s <- readN 2 id
return $! (fromIntegral (s `B.index` 1) `shiftl_w16` 8) .|.
(fromIntegral (s `B.index` 0) )
{- INLINE getWord16le -}
-- | Read a Word32 in big endian format
getWord32be :: Get Word32
getWord32be = do
s <- readN 4 id
return $! (fromIntegral (s `B.index` 0) `shiftl_w32` 24) .|.
(fromIntegral (s `B.index` 1) `shiftl_w32` 16) .|.
(fromIntegral (s `B.index` 2) `shiftl_w32` 8) .|.
(fromIntegral (s `B.index` 3) )
{- INLINE getWord32be -}
-- | Read a Word32 in little endian format
getWord32le :: Get Word32
getWord32le = do
s <- readN 4 id
return $! (fromIntegral (s `B.index` 3) `shiftl_w32` 24) .|.
(fromIntegral (s `B.index` 2) `shiftl_w32` 16) .|.
(fromIntegral (s `B.index` 1) `shiftl_w32` 8) .|.
(fromIntegral (s `B.index` 0) )
{- INLINE getWord32le -}
-- | Read a Word64 in big endian format
getWord64be :: Get Word64
getWord64be = do
s <- readN 8 id
return $! (fromIntegral (s `B.index` 0) `shiftl_w64` 56) .|.
(fromIntegral (s `B.index` 1) `shiftl_w64` 48) .|.
(fromIntegral (s `B.index` 2) `shiftl_w64` 40) .|.
(fromIntegral (s `B.index` 3) `shiftl_w64` 32) .|.
(fromIntegral (s `B.index` 4) `shiftl_w64` 24) .|.
(fromIntegral (s `B.index` 5) `shiftl_w64` 16) .|.
(fromIntegral (s `B.index` 6) `shiftl_w64` 8) .|.
(fromIntegral (s `B.index` 7) )
{- INLINE getWord64be -}
-- | Read a Word64 in little endian format
getWord64le :: Get Word64
getWord64le = do
s <- readN 8 id
return $! (fromIntegral (s `B.index` 7) `shiftl_w64` 56) .|.
(fromIntegral (s `B.index` 6) `shiftl_w64` 48) .|.
(fromIntegral (s `B.index` 5) `shiftl_w64` 40) .|.
(fromIntegral (s `B.index` 4) `shiftl_w64` 32) .|.
(fromIntegral (s `B.index` 3) `shiftl_w64` 24) .|.
(fromIntegral (s `B.index` 2) `shiftl_w64` 16) .|.
(fromIntegral (s `B.index` 1) `shiftl_w64` 8) .|.
(fromIntegral (s `B.index` 0) )
{- INLINE getWord64le -}
------------------------------------------------------------------------
-- Host-endian reads
-- | /O(1)./ Read a single native machine word. The word is read in
-- host order, host endian form, for the machine you're on. On a 64 bit
-- machine the Word is an 8 byte value, on a 32 bit machine, 4 bytes.
getWordhost :: Get Word
getWordhost = getPtr (sizeOf (undefined :: Word))
{- INLINE getWordhost -}
-- | /O(1)./ Read a 2 byte Word16 in native host order and host endianness.
getWord16host :: Get Word16
getWord16host = getPtr (sizeOf (undefined :: Word16))
{- INLINE getWord16host -}
-- | /O(1)./ Read a Word32 in native host order and host endianness.
getWord32host :: Get Word32
getWord32host = getPtr (sizeOf (undefined :: Word32))
{- INLINE getWord32host -}
-- | /O(1)./ Read a Word64 in native host order and host endianess.
getWord64host :: Get Word64
getWord64host = getPtr (sizeOf (undefined :: Word64))
{- INLINE getWord64host -}
------------------------------------------------------------------------
-- Unchecked shifts
shiftl_w16 :: Word16 -> Int -> Word16
shiftl_w32 :: Word32 -> Int -> Word32
shiftl_w64 :: Word64 -> Int -> Word64
#if defined(__GLASGOW_HASKELL__) && !defined(__HADDOCK__)
shiftl_w16 (W16# w) (I# i) = W16# (w `uncheckedShiftL#` i)
shiftl_w32 (W32# w) (I# i) = W32# (w `uncheckedShiftL#` i)
#if WORD_SIZE_IN_BITS < 64
shiftl_w64 (W64# w) (I# i) = W64# (w `uncheckedShiftL64#` i)
#if __GLASGOW_HASKELL__ <= 606
-- Exported by GHC.Word in GHC 6.8 and higher
foreign import ccall unsafe "stg_uncheckedShiftL64"
uncheckedShiftL64# :: Word64# -> Int# -> Word64#
#endif
#else
shiftl_w64 (W64# w) (I# i) = W64# (w `uncheckedShiftL#` i)
#endif
#else
shiftl_w16 = shiftL
shiftl_w32 = shiftL
shiftl_w64 = shiftL
#endif

View File

@@ -0,0 +1,216 @@
{-# LANGUAGE CPP #-}
-----------------------------------------------------------------------------
-- |
-- Module : Data.Binary.Put
-- Copyright : Lennart Kolmodin
-- License : BSD3-style (see LICENSE)
--
-- Maintainer : Lennart Kolmodin <kolmodin@dtek.chalmers.se>
-- Stability : stable
-- Portability : Portable to Hugs and GHC. Requires MPTCs
--
-- The Put monad. A monad for efficiently constructing lazy bytestrings.
--
-----------------------------------------------------------------------------
module Data.Binary.Put (
-- * The Put type
Put
, PutM(..)
, runPut
, runPutM
, putBuilder
, execPut
-- * Flushing the implicit parse state
, flush
-- * Primitives
, putWord8
, putByteString
, putLazyByteString
-- * Big-endian primitives
, putWord16be
, putWord32be
, putWord64be
-- * Little-endian primitives
, putWord16le
, putWord32le
, putWord64le
-- * Host-endian, unaligned writes
, putWordhost -- :: Word -> Put
, putWord16host -- :: Word16 -> Put
, putWord32host -- :: Word32 -> Put
, putWord64host -- :: Word64 -> Put
) where
import Data.Monoid
import Data.Binary.Builder (Builder, toLazyByteString)
import qualified Data.Binary.Builder as B
import Data.Word
import qualified Data.ByteString as S
import qualified Data.ByteString.Lazy as L
#ifdef APPLICATIVE_IN_BASE
import Control.Applicative
#endif
------------------------------------------------------------------------
-- XXX Strict in buffer only.
data PairS a = PairS a {-# UNPACK #-}!Builder
sndS :: PairS a -> Builder
sndS (PairS _ b) = b
-- | The PutM type. A Writer monad over the efficient Builder monoid.
newtype PutM a = Put { unPut :: PairS a }
-- | Put merely lifts Builder into a Writer monad, applied to ().
type Put = PutM ()
instance Functor PutM where
fmap f m = Put $ let PairS a w = unPut m in PairS (f a) w
{-# INLINE fmap #-}
#ifdef APPLICATIVE_IN_BASE
instance Applicative PutM where
pure = return
m <*> k = Put $
let PairS f w = unPut m
PairS x w' = unPut k
in PairS (f x) (w `mappend` w')
#endif
-- Standard Writer monad, with aggressive inlining
instance Monad PutM where
return a = Put $ PairS a mempty
{-# INLINE return #-}
m >>= k = Put $
let PairS a w = unPut m
PairS b w' = unPut (k a)
in PairS b (w `mappend` w')
{-# INLINE (>>=) #-}
m >> k = Put $
let PairS _ w = unPut m
PairS b w' = unPut k
in PairS b (w `mappend` w')
{-# INLINE (>>) #-}
tell :: Builder -> Put
tell b = Put $ PairS () b
{-# INLINE tell #-}
putBuilder :: Builder -> Put
putBuilder = tell
{-# INLINE putBuilder #-}
-- | Run the 'Put' monad
execPut :: PutM a -> Builder
execPut = sndS . unPut
{-# INLINE execPut #-}
-- | Run the 'Put' monad with a serialiser
runPut :: Put -> L.ByteString
runPut = toLazyByteString . sndS . unPut
{-# INLINE runPut #-}
-- | Run the 'Put' monad with a serialiser and get its result
runPutM :: PutM a -> (a, L.ByteString)
runPutM (Put (PairS f s)) = (f, toLazyByteString s)
{-# INLINE runPutM #-}
------------------------------------------------------------------------
-- | Pop the ByteString we have constructed so far, if any, yielding a
-- new chunk in the result ByteString.
flush :: Put
flush = tell B.flush
{-# INLINE flush #-}
-- | Efficiently write a byte into the output buffer
putWord8 :: Word8 -> Put
putWord8 = tell . B.singleton
{-# INLINE putWord8 #-}
-- | An efficient primitive to write a strict ByteString into the output buffer.
-- It flushes the current buffer, and writes the argument into a new chunk.
putByteString :: S.ByteString -> Put
putByteString = tell . B.fromByteString
{-# INLINE putByteString #-}
-- | Write a lazy ByteString efficiently, simply appending the lazy
-- ByteString chunks to the output buffer
putLazyByteString :: L.ByteString -> Put
putLazyByteString = tell . B.fromLazyByteString
{-# INLINE putLazyByteString #-}
-- | Write a Word16 in big endian format
putWord16be :: Word16 -> Put
putWord16be = tell . B.putWord16be
{-# INLINE putWord16be #-}
-- | Write a Word16 in little endian format
putWord16le :: Word16 -> Put
putWord16le = tell . B.putWord16le
{-# INLINE putWord16le #-}
-- | Write a Word32 in big endian format
putWord32be :: Word32 -> Put
putWord32be = tell . B.putWord32be
{-# INLINE putWord32be #-}
-- | Write a Word32 in little endian format
putWord32le :: Word32 -> Put
putWord32le = tell . B.putWord32le
{-# INLINE putWord32le #-}
-- | Write a Word64 in big endian format
putWord64be :: Word64 -> Put
putWord64be = tell . B.putWord64be
{-# INLINE putWord64be #-}
-- | Write a Word64 in little endian format
putWord64le :: Word64 -> Put
putWord64le = tell . B.putWord64le
{-# INLINE putWord64le #-}
------------------------------------------------------------------------
-- | /O(1)./ Write a single native machine word. The word is
-- written in host order, host endian form, for the machine you're on.
-- On a 64 bit machine the Word is an 8 byte value, on a 32 bit machine,
-- 4 bytes. Values written this way are not portable to
-- different endian or word sized machines, without conversion.
--
putWordhost :: Word -> Put
putWordhost = tell . B.putWordhost
{-# INLINE putWordhost #-}
-- | /O(1)./ Write a Word16 in native host order and host endianness.
-- For portability issues see @putWordhost@.
putWord16host :: Word16 -> Put
putWord16host = tell . B.putWord16host
{-# INLINE putWord16host #-}
-- | /O(1)./ Write a Word32 in native host order and host endianness.
-- For portability issues see @putWordhost@.
putWord32host :: Word32 -> Put
putWord32host = tell . B.putWord32host
{-# INLINE putWord32host #-}
-- | /O(1)./ Write a Word64 in native host order
-- On a 32 bit machine we write two host order Word32s, in big endian form.
-- For portability issues see @putWordhost@.
putWord64host :: Word64 -> Put
putWord64host = tell . B.putWord64host
{-# INLINE putWord64host #-}

352
src/runtime/haskell/PGF.hs Normal file
View File

@@ -0,0 +1,352 @@
-------------------------------------------------
-- |
-- Module : PGF
-- Maintainer : Aarne Ranta
-- Stability : stable
-- Portability : portable
--
-- This module is an Application Programming Interface to
-- load and interpret grammars compiled in Portable Grammar Format (PGF).
-- The PGF format is produced as a final output from the GF compiler.
-- The API is meant to be used for embedding GF grammars in Haskell
-- programs
-------------------------------------------------
module PGF(
-- * PGF
PGF,
readPGF,
-- * Identifiers
CId, mkCId, wildCId,
showCId, readCId,
-- * Languages
Language,
showLanguage, readLanguage,
languages, abstractName, languageCode,
-- * Types
Type, Hypo,
showType, readType,
mkType, mkHypo, mkDepHypo, mkImplHypo,
categories, startCat,
-- * Functions
functions, functionType,
-- * Expressions & Trees
-- ** Tree
Tree,
-- ** Expr
Expr,
showExpr, readExpr,
mkApp, unApp,
mkStr, unStr,
mkInt, unInt,
mkDouble, unDouble,
mkMeta, isMeta,
-- * Operations
-- ** Linearization
linearize, linearizeAllLang, linearizeAll,
showPrintName,
-- ** Parsing
parse, parseWithRecovery, canParse, parseAllLang, parseAll,
-- ** Evaluation
PGF.compute, paraphrase,
-- ** Type Checking
-- | The type checker in PGF does both type checking and renaming
-- i.e. it verifies that all identifiers are declared and it
-- distinguishes between global function or type indentifiers and
-- variable names. The type checker should always be applied on
-- expressions entered by the user i.e. those produced via functions
-- like 'readType' and 'readExpr' because otherwise unexpected results
-- could appear. All typechecking functions returns updated versions
-- of the input types or expressions because the typechecking could
-- also lead to metavariables instantiations.
checkType, checkExpr, inferExpr,
TcError(..), ppTcError,
-- ** Word Completion (Incremental Parsing)
complete,
Incremental.ParseState,
Incremental.initState, Incremental.nextState, Incremental.getCompletions, Incremental.recoveryStates, Incremental.extractTrees,
-- ** Generation
generateRandom, generateAll, generateAllDepth,
-- ** Morphological Analysis
Lemma, Analysis, Morpho,
lookupMorpho, buildMorpho,
-- ** Visualizations
graphvizAbstractTree,
graphvizParseTree,
graphvizDependencyTree,
graphvizAlignment,
-- * Browsing
browse
) where
import PGF.CId
import PGF.Linearize
import PGF.Generate
import PGF.TypeCheck
import PGF.Paraphrase
import PGF.VisualizeTree
import PGF.Macros
import PGF.Expr (Tree)
import PGF.Morphology
import PGF.Data hiding (functions)
import PGF.Binary
import qualified PGF.Parsing.FCFG.Active as Active
import qualified PGF.Parsing.FCFG.Incremental as Incremental
import qualified GF.Compile.GeneratePMCFG as PMCFG
import GF.Infra.Option
import GF.Data.Utilities (replace)
import Data.Char
import qualified Data.Map as Map
import qualified Data.IntMap as IntMap
import Data.Maybe
import Data.Binary
import Data.List(mapAccumL)
import System.Random (newStdGen)
import Control.Monad
import Text.PrettyPrint
---------------------------------------------------
-- Interface
---------------------------------------------------
-- | Reads file in Portable Grammar Format and produces
-- 'PGF' structure. The file is usually produced with:
--
-- > $ gf -make <grammar file name>
readPGF :: FilePath -> IO PGF
-- | Linearizes given expression as string in the language
linearize :: PGF -> Language -> Tree -> String
-- | Tries to parse the given string in the specified language
-- and to produce abstract syntax expression. An empty
-- list is returned if the parsing is not successful. The list may also
-- contain more than one element if the grammar is ambiguous.
-- Throws an exception if the given language cannot be used
-- for parsing, see 'canParse'.
parse :: PGF -> Language -> Type -> String -> [Tree]
parseWithRecovery :: PGF -> Language -> Type -> [Type] -> String -> [Tree]
-- | Checks whether the given language can be used for parsing.
canParse :: PGF -> Language -> Bool
-- | The same as 'linearizeAllLang' but does not return
-- the language.
linearizeAll :: PGF -> Tree -> [String]
-- | Linearizes given expression as string in all languages
-- available in the grammar.
linearizeAllLang :: PGF -> Tree -> [(Language,String)]
-- | Show the printname of a type
showPrintName :: PGF -> Language -> Type -> String
-- | The same as 'parseAllLang' but does not return
-- the language.
parseAll :: PGF -> Type -> String -> [[Tree]]
-- | Tries to parse the given string with all available languages.
-- Languages which cannot be used for parsing (see 'canParse')
-- are ignored.
-- The returned list contains pairs of language
-- and list of abstract syntax expressions
-- (this is a list, since grammars can be ambiguous).
-- Only those languages
-- for which at least one parsing is possible are listed.
parseAllLang :: PGF -> Type -> String -> [(Language,[Tree])]
-- | The same as 'generateAllDepth' but does not limit
-- the depth in the generation.
generateAll :: PGF -> Type -> [Expr]
-- | Generates an infinite list of random abstract syntax expressions.
-- This is usefull for tree bank generation which after that can be used
-- for grammar testing.
generateRandom :: PGF -> Type -> IO [Expr]
-- | Generates an exhaustive possibly infinite list of
-- abstract syntax expressions. A depth can be specified
-- to limit the search space.
generateAllDepth :: PGF -> Type -> Maybe Int -> [Expr]
-- | List of all languages available in the given grammar.
languages :: PGF -> [Language]
-- | Gets the RFC 4646 language tag
-- of the language which the given concrete syntax implements,
-- if this is listed in the source grammar.
-- Example language tags include @\"en\"@ for English,
-- and @\"en-UK\"@ for British English.
languageCode :: PGF -> Language -> Maybe String
-- | The abstract language name is the name of the top-level
-- abstract module
abstractName :: PGF -> Language
-- | List of all categories defined in the given grammar.
-- The categories are defined in the abstract syntax
-- with the \'cat\' keyword.
categories :: PGF -> [CId]
-- | The start category is defined in the grammar with
-- the \'startcat\' flag. This is usually the sentence category
-- but it is not necessary. Despite that there is a start category
-- defined you can parse with any category. The start category
-- definition is just for convenience.
startCat :: PGF -> Type
-- | List of all functions defined in the abstract syntax
functions :: PGF -> [CId]
-- | The type of a given function
functionType :: PGF -> CId -> Maybe Type
-- | Complete the last word in the given string. If the input
-- is empty or ends in whitespace, the last word is considred
-- to be the empty string. This means that the completions
-- will be all possible next words.
complete :: PGF -> Language -> Type -> String
-> [String] -- ^ Possible completions,
-- including the given input.
---------------------------------------------------
-- Implementation
---------------------------------------------------
readPGF f = decodeFile f >>= addParsers
-- Adds parsers for all concretes that don't have a parser and that have parser=ondemand.
addParsers :: PGF -> IO PGF
addParsers pgf = do cncs <- sequence [if wantsParser cnc then addParser lang cnc else return (lang,cnc)
| (lang,cnc) <- Map.toList (concretes pgf)]
return pgf { concretes = Map.fromList cncs }
where
wantsParser cnc = isNothing (parser cnc) && Map.lookup (mkCId "parser") (cflags cnc) == Just "ondemand"
addParser lang cnc = do pinfo <- PMCFG.convertConcrete noOptions (abstract pgf) lang cnc
return (lang,cnc { parser = Just pinfo })
linearize pgf lang = concat . take 1 . PGF.Linearize.linearizes pgf lang
parse pgf lang typ s =
case Map.lookup lang (concretes pgf) of
Just cnc -> case parser cnc of
Just pinfo -> if Map.lookup (mkCId "erasing") (cflags cnc) == Just "on"
then Incremental.parse pgf lang typ (words s)
else Active.parse "t" pinfo typ (words s)
Nothing -> error ("No parser built for language: " ++ showCId lang)
Nothing -> error ("Unknown language: " ++ showCId lang)
parseWithRecovery pgf lang typ open_typs s = Incremental.parseWithRecovery pgf lang typ open_typs (words s)
canParse pgf cnc = isJust (lookParser pgf cnc)
linearizeAll mgr = map snd . linearizeAllLang mgr
linearizeAllLang mgr t =
[(lang,PGF.linearize mgr lang t) | lang <- languages mgr]
showPrintName pgf lang (DTyp _ c _) = realize $ lookPrintName pgf lang c
parseAll mgr typ = map snd . parseAllLang mgr typ
parseAllLang mgr typ s =
[(lang,ts) | lang <- languages mgr, canParse mgr lang, let ts = parse mgr lang typ s, not (null ts)]
generateRandom pgf cat = do
gen <- newStdGen
return $ genRandom gen pgf cat
generateAll pgf cat = generate pgf cat Nothing
generateAllDepth pgf cat = generate pgf cat
abstractName pgf = absname pgf
languages pgf = cncnames pgf
languageCode pgf lang =
fmap (replace '_' '-') $ lookConcrFlag pgf lang (mkCId "language")
categories pgf = [c | (c,hs) <- Map.toList (cats (abstract pgf))]
startCat pgf = DTyp [] (lookStartCat pgf) []
functions pgf = Map.keys (funs (abstract pgf))
functionType pgf fun =
case Map.lookup fun (funs (abstract pgf)) of
Just (ty,_,_) -> Just ty
Nothing -> Nothing
complete pgf from typ input =
let (ws,prefix) = tokensAndPrefix input
state0 = Incremental.initState pgf from typ
in case loop state0 ws of
Nothing -> []
Just state ->
(if null prefix && not (null (Incremental.extractTrees state typ)) then [unwords ws ++ " "] else [])
++ [unwords (ws++[c]) ++ " " | c <- Map.keys (Incremental.getCompletions state prefix)]
where
tokensAndPrefix :: String -> ([String],String)
tokensAndPrefix s | not (null s) && isSpace (last s) = (ws, "")
| null ws = ([],"")
| otherwise = (init ws, last ws)
where ws = words s
loop ps [] = Just ps
loop ps (t:ts) = case Incremental.nextState ps t of
Left es -> Nothing
Right ps -> loop ps ts
-- | Converts an expression to normal form
compute :: PGF -> Expr -> Expr
compute pgf = PGF.Data.normalForm (funs (abstract pgf)) 0 []
browse :: PGF -> CId -> Maybe (String,[CId],[CId])
browse pgf id = fmap (\def -> (def,producers,consumers)) definition
where
definition = case Map.lookup id (funs (abstract pgf)) of
Just (ty,_,eqs) -> Just $ render (text "fun" <+> ppCId id <+> colon <+> ppType 0 [] ty $$
if null eqs
then empty
else text "def" <+> vcat [let (scope,ds) = mapAccumL (ppPatt 9) [] patts
in ppCId id <+> hsep ds <+> char '=' <+> ppExpr 0 scope res | Equ patts res <- eqs])
Nothing -> case Map.lookup id (cats (abstract pgf)) of
Just hyps -> Just $ render (text "cat" <+> ppCId id <+> hsep (snd (mapAccumL ppHypo [] hyps)))
Nothing -> Nothing
(producers,consumers) = Map.foldWithKey accum ([],[]) (funs (abstract pgf))
where
accum f (ty,_,_) (plist,clist) =
let !plist' = if id `elem` ps then f : plist else plist
!clist' = if id `elem` cs then f : clist else clist
in (plist',clist')
where
(ps,cs) = tyIds ty
tyIds (DTyp hyps cat es) = (foldr expIds (cat:concat css) es,concat pss)
where
(pss,css) = unzip [tyIds ty | (_,_,ty) <- hyps]
expIds (EAbs _ _ e) ids = expIds e ids
expIds (EApp e1 e2) ids = expIds e1 (expIds e2 ids)
expIds (EFun id) ids = id : ids
expIds (ETyped e _) ids = expIds e ids
expIds _ ids = ids

View File

@@ -0,0 +1,199 @@
module PGF.Binary where
import PGF.CId
import PGF.Data
import Data.Binary
import Data.Binary.Put
import Data.Binary.Get
import qualified Data.ByteString as BS
import qualified Data.Map as Map
import qualified Data.IntMap as IntMap
import qualified Data.Set as Set
import Control.Monad
pgfMajorVersion, pgfMinorVersion :: Word16
(pgfMajorVersion, pgfMinorVersion) = (1,0)
instance Binary PGF where
put pgf = putWord16be pgfMajorVersion >>
putWord16be pgfMinorVersion >>
put ( absname pgf, cncnames pgf
, gflags pgf
, abstract pgf, concretes pgf
)
get = do v1 <- getWord16be
v2 <- getWord16be
absname <- get
cncnames <- get
gflags <- get
abstract <- get
concretes <- get
return (PGF{ absname=absname, cncnames=cncnames
, gflags=gflags
, abstract=abstract, concretes=concretes
})
instance Binary CId where
put (CId bs) = put bs
get = liftM CId get
instance Binary Abstr where
put abs = put (aflags abs, funs abs, cats abs)
get = do aflags <- get
funs <- get
cats <- get
let catfuns = Map.mapWithKey (\cat _ -> [f | (f, (DTyp _ c _,_,_)) <- Map.toList funs, c==cat]) cats
return (Abstr{ aflags=aflags
, funs=funs, cats=cats
, catfuns=catfuns
})
instance Binary Concr where
put cnc = put ( cflags cnc, lins cnc, opers cnc
, lincats cnc, lindefs cnc
, printnames cnc, paramlincats cnc
, parser cnc
)
get = do cflags <- get
lins <- get
opers <- get
lincats <- get
lindefs <- get
printnames <- get
paramlincats <- get
parser <- get
return (Concr{ cflags=cflags, lins=lins, opers=opers
, lincats=lincats, lindefs=lindefs
, printnames=printnames
, paramlincats=paramlincats
, parser=parser
})
instance Binary Alternative where
put (Alt v x) = put v >> put x
get = liftM2 Alt get get
instance Binary Term where
put (R es) = putWord8 0 >> put es
put (S es) = putWord8 1 >> put es
put (FV es) = putWord8 2 >> put es
put (P e v) = putWord8 3 >> put (e,v)
put (W e v) = putWord8 4 >> put (e,v)
put (C i ) = putWord8 5 >> put i
put (TM i ) = putWord8 6 >> put i
put (F f) = putWord8 7 >> put f
put (V i) = putWord8 8 >> put i
put (K (KS s)) = putWord8 9 >> put s
put (K (KP d vs)) = putWord8 10 >> put (d,vs)
get = do tag <- getWord8
case tag of
0 -> liftM R get
1 -> liftM S get
2 -> liftM FV get
3 -> liftM2 P get get
4 -> liftM2 W get get
5 -> liftM C get
6 -> liftM TM get
7 -> liftM F get
8 -> liftM V get
9 -> liftM (K . KS) get
10 -> liftM2 (\d vs -> K (KP d vs)) get get
_ -> decodingError
instance Binary Expr where
put (EAbs b x exp) = putWord8 0 >> put (b,x,exp)
put (EApp e1 e2) = putWord8 1 >> put (e1,e2)
put (ELit (LStr s)) = putWord8 2 >> put s
put (ELit (LFlt d)) = putWord8 3 >> put d
put (ELit (LInt i)) = putWord8 4 >> put i
put (EMeta i) = putWord8 5 >> put i
put (EFun f) = putWord8 6 >> put f
put (EVar i) = putWord8 7 >> put i
put (ETyped e ty) = putWord8 8 >> put (e,ty)
get = do tag <- getWord8
case tag of
0 -> liftM3 EAbs get get get
1 -> liftM2 EApp get get
2 -> liftM (ELit . LStr) get
3 -> liftM (ELit . LFlt) get
4 -> liftM (ELit . LInt) get
5 -> liftM EMeta get
6 -> liftM EFun get
7 -> liftM EVar get
8 -> liftM2 ETyped get get
_ -> decodingError
instance Binary Patt where
put (PApp f ps) = putWord8 0 >> put (f,ps)
put (PVar x) = putWord8 1 >> put x
put PWild = putWord8 2
put (PLit (LStr s)) = putWord8 3 >> put s
put (PLit (LFlt d)) = putWord8 4 >> put d
put (PLit (LInt i)) = putWord8 5 >> put i
get = do tag <- getWord8
case tag of
0 -> liftM2 PApp get get
1 -> liftM PVar get
2 -> return PWild
3 -> liftM (PLit . LStr) get
4 -> liftM (PLit . LFlt) get
5 -> liftM (PLit . LInt) get
_ -> decodingError
instance Binary Equation where
put (Equ ps e) = put (ps,e)
get = liftM2 Equ get get
instance Binary Type where
put (DTyp hypos cat exps) = put (hypos,cat,exps)
get = liftM3 DTyp get get get
instance Binary BindType where
put Explicit = putWord8 0
put Implicit = putWord8 1
get = do tag <- getWord8
case tag of
0 -> return Explicit
1 -> return Implicit
_ -> decodingError
instance Binary FFun where
put (FFun fun prof lins) = put (fun,prof,lins)
get = liftM3 FFun get get get
instance Binary FSymbol where
put (FSymCat n l) = putWord8 0 >> put (n,l)
put (FSymLit n l) = putWord8 1 >> put (n,l)
put (FSymKS ts) = putWord8 2 >> put ts
put (FSymKP d vs) = putWord8 3 >> put (d,vs)
get = do tag <- getWord8
case tag of
0 -> liftM2 FSymCat get get
1 -> liftM2 FSymLit get get
2 -> liftM FSymKS get
3 -> liftM2 (\d vs -> FSymKP d vs) get get
_ -> decodingError
instance Binary Production where
put (FApply ruleid args) = putWord8 0 >> put (ruleid,args)
put (FCoerce fcat) = putWord8 1 >> put fcat
get = do tag <- getWord8
case tag of
0 -> liftM2 FApply get get
1 -> liftM FCoerce get
_ -> decodingError
instance Binary ParserInfo where
put p = put (functions p, sequences p, productions0 p, totalCats p, startCats p)
get = do functions <- get
sequences <- get
productions0<- get
totalCats <- get
startCats <- get
return (ParserInfo{functions=functions,sequences=sequences
,productions0=productions0
,productions =filterProductions productions0
,totalCats=totalCats,startCats=startCats})
decodingError = fail "This PGF file was compiled with different version of GF"

View File

@@ -0,0 +1,76 @@
---------------------------------------------------------------------
-- |
-- Maintainer : Krasimir Angelov
-- Stability : (stable)
-- Portability : (portable)
--
-- FCFG parsing, parser information
-----------------------------------------------------------------------------
module PGF.BuildParser where
import GF.Data.SortedList
import GF.Data.Assoc
import PGF.CId
import PGF.Data
import PGF.Parsing.FCFG.Utilities
import Data.Array.IArray
import Data.Maybe
import qualified Data.IntMap as IntMap
import qualified Data.Map as Map
import qualified Data.Set as Set
import Debug.Trace
data ParserInfoEx
= ParserInfoEx { epsilonRules :: [(FunId,[FCat],FCat)]
, leftcornerCats :: Assoc FCat [(FunId,[FCat],FCat)]
, leftcornerTokens :: Assoc String [(FunId,[FCat],FCat)]
, grammarToks :: [String]
}
------------------------------------------------------------
-- parser information
getLeftCornerTok pinfo (FFun _ _ lins)
| inRange (bounds syms) 0 = case syms ! 0 of
FSymKS [tok] -> [tok]
_ -> []
| otherwise = []
where
syms = (sequences pinfo) ! (lins ! 0)
getLeftCornerCat pinfo args (FFun _ _ lins)
| inRange (bounds syms) 0 = case syms ! 0 of
FSymCat d _ -> let cat = args !! d
in case IntMap.lookup cat (productions pinfo) of
Just set -> cat : [cat' | FCoerce cat' <- Set.toList set]
Nothing -> [cat]
_ -> []
| otherwise = []
where
syms = (sequences pinfo) ! (lins ! 0)
buildParserInfo :: ParserInfo -> ParserInfoEx
buildParserInfo pinfo =
ParserInfoEx { epsilonRules = epsilonrules
, leftcornerCats = leftcorncats
, leftcornerTokens = leftcorntoks
, grammarToks = grammartoks
}
where epsilonrules = [ (ruleid,args,cat)
| (cat,set) <- IntMap.toList (productions pinfo)
, (FApply ruleid args) <- Set.toList set
, let (FFun _ _ lins) = (functions pinfo) ! ruleid
, not (inRange (bounds ((sequences pinfo) ! (lins ! 0))) 0) ]
leftcorncats = accumAssoc id [ (cat', (ruleid, args, cat))
| (cat,set) <- IntMap.toList (productions pinfo)
, (FApply ruleid args) <- Set.toList set
, cat' <- getLeftCornerCat pinfo args ((functions pinfo) ! ruleid) ]
leftcorntoks = accumAssoc id [ (tok, (ruleid, args, cat))
| (cat,set) <- IntMap.toList (productions pinfo)
, (FApply ruleid args) <- Set.toList set
, tok <- getLeftCornerTok pinfo ((functions pinfo) ! ruleid) ]
grammartoks = nubsort [t | lin <- elems (sequences pinfo), FSymKS [t] <- elems lin]

View File

@@ -0,0 +1,55 @@
module PGF.CId (CId(..),
mkCId, wildCId,
readCId, showCId,
-- utils
pCId, pIdent, ppCId) where
import Control.Monad
import qualified Data.ByteString.Char8 as BS
import Data.Char
import qualified Text.ParserCombinators.ReadP as RP
import qualified Text.PrettyPrint as PP
-- | An abstract data type that represents
-- identifiers for functions and categories in PGF.
newtype CId = CId BS.ByteString deriving (Eq,Ord)
wildCId :: CId
wildCId = CId (BS.singleton '_')
-- | Creates a new identifier from 'String'
mkCId :: String -> CId
mkCId s = CId (BS.pack s)
-- | Reads an identifier from 'String'. The function returns 'Nothing' if the string is not valid identifier.
readCId :: String -> Maybe CId
readCId s = case [x | (x,cs) <- RP.readP_to_S pCId s, all isSpace cs] of
[x] -> Just x
_ -> Nothing
-- | Renders the identifier as 'String'
showCId :: CId -> String
showCId (CId x) = BS.unpack x
instance Show CId where
showsPrec _ = showString . showCId
instance Read CId where
readsPrec _ = RP.readP_to_S pCId
pCId :: RP.ReadP CId
pCId = do s <- pIdent
if s == "_"
then RP.pfail
else return (mkCId s)
pIdent :: RP.ReadP String
pIdent = liftM2 (:) (RP.satisfy isIdentFirst) (RP.munch isIdentRest)
where
isIdentFirst c = c == '_' || isLetter c
isIdentRest c = c == '_' || c == '\'' || isAlphaNum c
ppCId :: CId -> PP.Doc
ppCId = PP.text . showCId

View File

@@ -0,0 +1,173 @@
module PGF.Check (checkPGF) where
import PGF.CId
import PGF.Data
import PGF.Macros
import GF.Data.ErrM
import qualified Data.Map as Map
import Control.Monad
import Debug.Trace
checkPGF :: PGF -> Err (PGF,Bool)
checkPGF pgf = do
(cs,bs) <- mapM (checkConcrete pgf)
(Map.assocs (concretes pgf)) >>= return . unzip
return (pgf {concretes = Map.fromAscList cs}, and bs)
-- errors are non-fatal; replace with 'fail' to change this
msg s = trace s (return ())
andMapM :: Monad m => (a -> m Bool) -> [a] -> m Bool
andMapM f xs = mapM f xs >>= return . and
labelBoolErr :: String -> Err (x,Bool) -> Err (x,Bool)
labelBoolErr ms iob = do
(x,b) <- iob
if b then return (x,b) else (msg ms >> return (x,b))
checkConcrete :: PGF -> (CId,Concr) -> Err ((CId,Concr),Bool)
checkConcrete pgf (lang,cnc) =
labelBoolErr ("happened in language " ++ showCId lang) $ do
(rs,bs) <- mapM checkl (Map.assocs (lins cnc)) >>= return . unzip
return ((lang,cnc{lins = Map.fromAscList rs}),and bs)
where
checkl = checkLin pgf lang
checkLin :: PGF -> CId -> (CId,Term) -> Err ((CId,Term),Bool)
checkLin pgf lang (f,t) =
labelBoolErr ("happened in function " ++ showCId f) $ do
(t',b) <- checkTerm (lintype pgf lang f) t --- $ inline pgf lang t
return ((f,t'),b)
inferTerm :: [CType] -> Term -> Err (Term,CType)
inferTerm args trm = case trm of
K _ -> returnt str
C i -> returnt $ ints i
V i -> do
testErr (i < length args) ("too large index " ++ show i)
returnt $ args !! i
S ts -> do
(ts',tys) <- mapM infer ts >>= return . unzip
let tys' = filter (/=str) tys
testErr (null tys')
("expected Str in " ++ show trm ++ " not " ++ unwords (map show tys'))
return (S ts',str)
R ts -> do
(ts',tys) <- mapM infer ts >>= return . unzip
return $ (R ts',tuple tys)
P t u -> do
(t',tt) <- infer t
(u',tu) <- infer u
case tt of
R tys -> case tu of
R vs -> infer $ foldl P t' [P u' (C i) | i <- [0 .. length vs - 1]]
--- R [v] -> infer $ P t v
--- R (v:vs) -> infer $ P (head tys) (R vs)
C i -> do
testErr (i < length tys)
("required more than " ++ show i ++ " fields in " ++ show (R tys))
return (P t' u', tys !! i) -- record: index must be known
_ -> do
let typ = head tys
testErr (all (==typ) tys) ("different types in table " ++ show trm)
return (P t' u', typ) -- table: types must be same
_ -> Bad $ "projection from " ++ show t ++ " : " ++ show tt
FV [] -> returnt tm0 ----
FV (t:ts) -> do
(t',ty) <- infer t
(ts',tys) <- mapM infer ts >>= return . unzip
testErr (all (eqType True ty) tys) ("different types in variants " ++ show trm)
return (FV (t':ts'),ty)
W s r -> infer r
_ -> Bad ("no type inference for " ++ show trm)
where
returnt ty = return (trm,ty)
infer = inferTerm args
checkTerm :: LinType -> Term -> Err (Term,Bool)
checkTerm (args,val) trm = case inferTerm args trm of
Ok (t,ty) -> if eqType False ty val
then return (t,True)
else do
msg ("term: " ++ show trm ++
"\nexpected type: " ++ show val ++
"\ninferred type: " ++ show ty)
return (t,False)
Bad s -> do
msg s
return (trm,False)
-- symmetry in (Ints m == Ints n) is all we can use in variants
eqType :: Bool -> CType -> CType -> Bool
eqType symm inf exp = case (inf,exp) of
(C k, C n) -> if symm then True else k <= n -- only run-time corr.
(R rs,R ts) -> length rs == length ts && and [eqType symm r t | (r,t) <- zip rs ts]
(TM _, _) -> True ---- for variants [] ; not safe
_ -> inf == exp
-- should be in a generic module, but not in the run-time DataGFCC
type CType = Term
type LinType = ([CType],CType)
tuple :: [CType] -> CType
tuple = R
ints :: Int -> CType
ints = C
str :: CType
str = S []
lintype :: PGF -> CId -> CId -> LinType
lintype pgf lang fun = case typeSkeleton (lookType pgf fun) of
(cs,c) -> (map vlinc cs, linc c) ---- HOAS
where
linc = lookLincat pgf lang
vlinc (0,c) = linc c
vlinc (i,c) = case linc c of
R ts -> R (ts ++ replicate i str)
inline :: PGF -> CId -> Term -> Term
inline pgf lang t = case t of
F c -> inl $ look c
_ -> composSafeOp inl t
where
inl = inline pgf lang
look = lookLin pgf lang
composOp :: Monad m => (Term -> m Term) -> Term -> m Term
composOp f trm = case trm of
R ts -> liftM R $ mapM f ts
S ts -> liftM S $ mapM f ts
FV ts -> liftM FV $ mapM f ts
P t u -> liftM2 P (f t) (f u)
W s t -> liftM (W s) $ f t
_ -> return trm
composSafeOp :: (Term -> Term) -> Term -> Term
composSafeOp f = maybe undefined id . composOp (return . f)
-- from GF.Data.Oper
maybeErr :: String -> Maybe a -> Err a
maybeErr s = maybe (Bad s) Ok
testErr :: Bool -> String -> Err ()
testErr cond msg = if cond then return () else Bad msg
errVal :: a -> Err a -> a
errVal a = err (const a) id
errIn :: String -> Err a -> Err a
errIn msg = err (\s -> Bad (s ++ "\nOCCURRED IN\n" ++ msg)) return
err :: (String -> b) -> (a -> b) -> Err a -> b
err d f e = case e of
Ok a -> f a
Bad s -> d s

View File

@@ -0,0 +1,95 @@
module PGF.Data (module PGF.Data, module PGF.Expr, module PGF.Type, module PGF.PMCFG) where
import PGF.CId
import PGF.Expr hiding (Value, Env, Tree)
import PGF.Type
import PGF.PMCFG
import qualified Data.Map as Map
import qualified Data.Set as Set
import qualified Data.IntMap as IntMap
import Data.List
-- internal datatypes for PGF
-- | An abstract data type representing multilingual grammar
-- in Portable Grammar Format.
data PGF = PGF {
absname :: CId ,
cncnames :: [CId] ,
gflags :: Map.Map CId String, -- value of a global flag
abstract :: Abstr ,
concretes :: Map.Map CId Concr
}
data Abstr = Abstr {
aflags :: Map.Map CId String, -- value of a flag
funs :: Map.Map CId (Type,Int,[Equation]), -- type, arrity and definition of function
cats :: Map.Map CId [Hypo], -- context of a cat
catfuns :: Map.Map CId [CId] -- funs to a cat (redundant, for fast lookup)
}
data Concr = Concr {
cflags :: Map.Map CId String, -- value of a flag
lins :: Map.Map CId Term, -- lin of a fun
opers :: Map.Map CId Term, -- oper generated by subex elim
lincats :: Map.Map CId Term, -- lin type of a cat
lindefs :: Map.Map CId Term, -- lin default of a cat
printnames :: Map.Map CId Term, -- printname of a cat or a fun
paramlincats :: Map.Map CId Term, -- lin type of cat, with printable param names
parser :: Maybe ParserInfo -- parser
}
data Term =
R [Term]
| P Term Term
| S [Term]
| K Tokn
| V Int
| C Int
| F CId
| FV [Term]
| W String Term
| TM String
deriving (Eq,Ord,Show)
data Tokn =
KS String
| KP [String] [Alternative]
deriving (Eq,Ord,Show)
-- merge two GFCCs; fails is differens absnames; priority to second arg
unionPGF :: PGF -> PGF -> PGF
unionPGF one two = case absname one of
n | n == wildCId -> two -- extending empty grammar
| n == absname two -> one { -- extending grammar with same abstract
concretes = Map.union (concretes two) (concretes one),
cncnames = union (cncnames one) (cncnames two)
}
_ -> one -- abstracts don't match ---- print error msg
emptyPGF :: PGF
emptyPGF = PGF {
absname = wildCId,
cncnames = [] ,
gflags = Map.empty,
abstract = error "empty grammar, no abstract",
concretes = Map.empty
}
-- | This is just a 'CId' with the language name.
-- A language name is the identifier that you write in the
-- top concrete or abstract module in GF after the
-- concrete/abstract keyword. Example:
--
-- > abstract Lang = ...
-- > concrete LangEng of Lang = ...
type Language = CId
readLanguage :: String -> Maybe Language
readLanguage = readCId
showLanguage :: Language -> String
showLanguage = showCId

View File

@@ -0,0 +1,241 @@
module PGF.Editor (
State, -- datatype -- type-annotated possibly open tree with a focus
Dict, -- datatype -- abstract syntax information optimized for editing
Position, -- datatype -- path from top to focus
new, -- :: Type -> State -- create new State
refine, -- :: Dict -> CId -> State -> State -- refine focus with CId
replace, -- :: Dict -> Tree -> State -> State -- replace focus with Tree
delete, -- :: State -> State -- replace focus with ?
goNextMeta, -- :: State -> State -- move focus to next ? node
goNext, -- :: State -> State -- move to next node
goTop, -- :: State -> State -- move focus to the top (=root)
goPosition, -- :: Position -> State -> State -- move focus to given position
mkPosition, -- :: [Int] -> Position -- list of choices (top = [])
showPosition,-- :: Position -> [Int] -- readable position
focusType, -- :: State -> Type -- get the type of focus
stateTree, -- :: State -> Tree -- get the current tree
isMetaFocus, -- :: State -> Bool -- whether focus is ?
allMetas, -- :: State -> [(Position,Type)] -- all ?s and their positions
prState, -- :: State -> String -- print state, focus marked *
refineMenu, -- :: Dict -> State -> [CId] -- get refinement menu
pgf2dict -- :: PGF -> Dict -- create editing Dict from PGF
) where
import PGF.Data
import PGF.CId
import qualified Data.Map as M
import Debug.Trace ----
-- API
new :: Type -> State
new (DTyp _ t _) = etree2state (uETree t)
refine :: Dict -> CId -> State -> State
refine dict f = replaceInState (mkRefinement dict f)
replace :: Dict -> Tree -> State -> State
replace dict t = replaceInState (tree2etree dict t)
delete :: State -> State
delete s = replaceInState (uETree (typ (tree s))) s
goNextMeta :: State -> State
goNextMeta s =
if isComplete s then s
else let s1 = goNext s in if isMetaFocus s1
then s1 else goNextMeta s1
isComplete :: State -> Bool
isComplete s = isc (tree s) where
isc t = case atom t of
AMeta _ -> False
ACon _ -> all isc (children t)
goTop :: State -> State
goTop = navigate (const top)
goPosition :: [Int] -> State -> State
goPosition p s = s{position = p}
mkPosition :: [Int] -> Position
mkPosition = id
refineMenu :: Dict -> State -> [CId]
refineMenu dict s = maybe [] (map fst) $ M.lookup (focusBType s) (refines dict)
focusType :: State -> Type
focusType s = btype2type (focusBType s)
stateTree :: State -> Tree
stateTree = etree2tree . tree
pgf2dict :: PGF -> Dict
pgf2dict pgf = Dict (M.fromAscList fus) refs where
fus = [(f,mkFType ty) | (f,(ty,_)) <- M.toList (funs abs)]
refs = M.fromAscList [(c, fusTo c) | (c,_) <- M.toList (cats abs)]
fusTo c = [(f,ty) | (f,ty@(_,k)) <- fus, k==c] ---- quadratic
mkFType (DTyp hyps c _) = ([k | Hyp _ (DTyp _ k _) <- hyps],c) ----dep types
abs = abstract pgf
etree2tree :: ETree -> Tree
etree2tree t = case atom t of
ACon f -> Fun f (map etree2tree (children t))
AMeta i -> Meta i
tree2etree :: Dict -> Tree -> ETree
tree2etree dict t = case t of
Fun f _ -> annot (look f) t
where
annot (tys,ty) tr = case tr of
Fun f trs -> ETree (ACon f) ty [annt t tr | (t,tr) <- zip tys trs]
Meta i -> ETree (AMeta i) ty []
annt ty tr = case tr of
Fun _ _ -> tree2etree dict tr
Meta _ -> annot ([],ty) tr
look f = maybe undefined id $ M.lookup f (functs dict)
prState :: State -> String
prState s = unlines [replicate i ' ' ++ f | (i,f) <- pr [] (tree s)] where
pr i t =
(ind i,prAtom i (atom t)) : concat [pr (sub j i) c | (j,c) <- zip [0..] (children t)]
prAtom i a = prFocus i ++ case a of
ACon f -> prCId f
AMeta i -> "?" ++ show i
prFocus i = if i == position s then "*" else ""
ind i = 2 * length i
sub j i = i ++ [j]
showPosition :: Position -> [Int]
showPosition = id
allMetas :: State -> [(Position,Type)]
allMetas s = [(reverse p, btype2type ty) | (p,ty) <- metas [] (tree s)] where
metas p t =
(if isMetaAtom (atom t) then [(p,typ t)] else []) ++
concat [metas (i:p) u | (i,u) <- zip [0..] (children t)]
---- Trees and navigation
data ETree = ETree {
atom :: Atom,
typ :: BType,
children :: [ETree]
}
deriving Show
data Atom =
ACon CId
| AMeta Int
deriving Show
btype2type :: BType -> Type
btype2type t = DTyp [] t []
uETree :: BType -> ETree
uETree ty = ETree (AMeta 0) ty []
data State = State {
position :: Position,
tree :: ETree
}
deriving Show
type Position = [Int]
top :: Position
top = []
up :: Position -> Position
up p = case p of
_:_ -> init p
_ -> p
down :: Position -> Position
down = (++[0])
left :: Position -> Position
left p = case p of
_:_ | last p > 0 -> init p ++ [last p - 1]
_ -> top
right :: Position -> Position
right p = case p of
_:_ -> init p ++ [last p + 1]
_ -> top
etree2state :: ETree -> State
etree2state = State top
doInState :: (ETree -> ETree) -> State -> State
doInState f s = s{tree = change (position s) (tree s)} where
change p t = case p of
[] -> f t
n:ns -> let (ts1,t0:ts2) = splitAt n (children t) in
t{children = ts1 ++ [change ns t0] ++ ts2}
subtree :: Position -> ETree -> ETree
subtree p t = case p of
[] -> t
n:ns -> subtree ns (children t !! n)
focus :: State -> ETree
focus s = subtree (position s) (tree s)
focusBType :: State -> BType
focusBType s = typ (focus s)
navigate :: (Position -> Position) -> State -> State
navigate p s = s{position = p (position s)}
-- p is a fix-point aspect of state change
untilFix :: Eq a => (State -> a) -> (State -> Bool) -> (State -> State) -> State -> State
untilFix p b f s =
if b s
then s
else let fs = f s in if p fs == p s
then s
else untilFix p b f fs
untilPosition :: (State -> Bool) -> (State -> State) -> State -> State
untilPosition = untilFix position
goNext :: State -> State
goNext s = case focus s of
st | not (null (children st)) -> navigate down s
_ -> findSister s
where
findSister s = case s of
s' | null (position s') -> s'
s' | hasYoungerSisters s' -> navigate right s'
s' -> findSister (navigate up s')
hasYoungerSisters s = case position s of
p@(_:_) -> length (children (focus (navigate up s))) > last p + 1
_ -> False
isMetaFocus :: State -> Bool
isMetaFocus s = isMetaAtom (atom (focus s))
isMetaAtom :: Atom -> Bool
isMetaAtom a = case a of
AMeta _ -> True
_ -> False
replaceInState :: ETree -> State -> State
replaceInState t = doInState (const t)
-------
type BType = CId ----dep types
type FType = ([BType],BType) ----dep types
data Dict = Dict {
functs :: M.Map CId FType,
refines :: M.Map BType [(CId,FType)]
}
mkRefinement :: Dict -> CId -> ETree
mkRefinement dict f = ETree (ACon f) val (map uETree args) where
(args,val) = maybe undefined id $ M.lookup f (functs dict)

View File

@@ -0,0 +1,355 @@
module PGF.Expr(Tree, BindType(..), Expr(..), Literal(..), Patt(..), Equation(..),
readExpr, showExpr, pExpr, pBinds, ppExpr, ppPatt,
mkApp, unApp,
mkStr, unStr,
mkInt, unInt,
mkDouble, unDouble,
mkMeta, isMeta,
normalForm,
-- needed in the typechecker
Value(..), Env, Funs, eval, apply,
MetaId,
-- helpers
pMeta,pStr,pArg,pLit,freshName,ppMeta,ppLit,ppParens
) where
import PGF.CId
import PGF.Type
import Data.Char
import Data.Maybe
import Data.List as List
import Data.Map as Map hiding (showTree)
import Control.Monad
import qualified Text.PrettyPrint as PP
import qualified Text.ParserCombinators.ReadP as RP
data Literal =
LStr String -- ^ string constant
| LInt Integer -- ^ integer constant
| LFlt Double -- ^ floating point constant
deriving (Eq,Ord,Show)
type MetaId = Int
data BindType =
Explicit
| Implicit
deriving (Eq,Ord,Show)
-- | Tree is the abstract syntax representation of a given sentence
-- in some concrete syntax. Technically 'Tree' is a type synonym
-- of 'Expr'.
type Tree = Expr
-- | An expression in the abstract syntax of the grammar. It could be
-- both parameter of a dependent type or an abstract syntax tree for
-- for some sentence.
data Expr =
EAbs BindType CId Expr -- ^ lambda abstraction
| EApp Expr Expr -- ^ application
| ELit Literal -- ^ literal
| EMeta {-# UNPACK #-} !MetaId -- ^ meta variable
| EFun CId -- ^ function or data constructor
| EVar {-# UNPACK #-} !Int -- ^ variable with de Bruijn index
| ETyped Expr Type -- ^ local type signature
| EImplArg Expr -- ^ implicit argument in expression
deriving (Eq,Ord,Show)
-- | The pattern is used to define equations in the abstract syntax of the grammar.
data Patt =
PApp CId [Patt] -- ^ application. The identifier should be constructor i.e. defined with 'data'
| PLit Literal -- ^ literal
| PVar CId -- ^ variable
| PWild -- ^ wildcard
| PImplArg Patt -- ^ implicit argument in pattern
deriving (Eq,Ord)
-- | The equation is used to define lambda function as a sequence
-- of equations with pattern matching. The list of 'Expr' represents
-- the patterns and the second 'Expr' is the function body for this
-- equation.
data Equation =
Equ [Patt] Expr
deriving (Eq,Ord)
-- | parses 'String' as an expression
readExpr :: String -> Maybe Expr
readExpr s = case [x | (x,cs) <- RP.readP_to_S pExpr s, all isSpace cs] of
[x] -> Just x
_ -> Nothing
-- | renders expression as 'String'. The list
-- of identifiers is the list of all free variables
-- in the expression in order reverse to the order
-- of binding.
showExpr :: [CId] -> Expr -> String
showExpr vars = PP.render . ppExpr 0 vars
instance Read Expr where
readsPrec _ = RP.readP_to_S pExpr
-- | Constructs an expression by applying a function to a list of expressions
mkApp :: CId -> [Expr] -> Expr
mkApp f es = foldl EApp (EFun f) es
-- | Decomposes an expression into application of function
unApp :: Expr -> Maybe (CId,[Expr])
unApp = extract []
where
extract es (EFun f) = Just (f,es)
extract es (EApp e1 e2) = extract (e2:es) e1
extract es _ = Nothing
-- | Constructs an expression from string literal
mkStr :: String -> Expr
mkStr s = ELit (LStr s)
-- | Decomposes an expression into string literal
unStr :: Expr -> Maybe String
unStr (ELit (LStr s)) = Just s
unStr _ = Nothing
-- | Constructs an expression from integer literal
mkInt :: Integer -> Expr
mkInt i = ELit (LInt i)
-- | Decomposes an expression into integer literal
unInt :: Expr -> Maybe Integer
unInt (ELit (LInt i)) = Just i
unInt _ = Nothing
-- | Constructs an expression from real number literal
mkDouble :: Double -> Expr
mkDouble f = ELit (LFlt f)
-- | Decomposes an expression into real number literal
unDouble :: Expr -> Maybe Double
unDouble (ELit (LFlt f)) = Just f
unDouble _ = Nothing
-- | Constructs an expression which is meta variable
mkMeta :: Expr
mkMeta = EMeta 0
-- | Checks whether an expression is a meta variable
isMeta :: Expr -> Bool
isMeta (EMeta _) = True
isMeta _ = False
-----------------------------------------------------
-- Parsing
-----------------------------------------------------
pExpr :: RP.ReadP Expr
pExpr = RP.skipSpaces >> (pAbs RP.<++ pTerm)
where
pTerm = do f <- pFactor
RP.skipSpaces
as <- RP.sepBy pArg RP.skipSpaces
return (foldl EApp f as)
pAbs = do xs <- RP.between (RP.char '\\') (RP.skipSpaces >> RP.string "->") pBinds
e <- pExpr
return (foldr (\(b,x) e -> EAbs b x e) e xs)
pBinds :: RP.ReadP [(BindType,CId)]
pBinds = do xss <- RP.sepBy1 (RP.skipSpaces >> pBind) (RP.skipSpaces >> RP.char ',')
return (concat xss)
where
pCIdOrWild = pCId `mplus` (RP.char '_' >> return wildCId)
pBind =
do x <- pCIdOrWild
return [(Explicit,x)]
`mplus`
RP.between (RP.char '{')
(RP.skipSpaces >> RP.char '}')
(RP.sepBy1 (RP.skipSpaces >> pCIdOrWild >>= \id -> return (Implicit,id)) (RP.skipSpaces >> RP.char ','))
pArg = fmap EImplArg (RP.between (RP.char '{') (RP.char '}') pExpr)
RP.<++
pFactor
pFactor = fmap EFun pCId
RP.<++ fmap ELit pLit
RP.<++ fmap EMeta pMeta
RP.<++ RP.between (RP.char '(') (RP.char ')') pExpr
RP.<++ RP.between (RP.char '<') (RP.char '>') pTyped
pTyped = do RP.skipSpaces
e <- pExpr
RP.skipSpaces
RP.char ':'
RP.skipSpaces
ty <- pType
return (ETyped e ty)
pMeta = do RP.char '?'
return 0
pLit :: RP.ReadP Literal
pLit = pNum RP.<++ liftM LStr pStr
pNum = do x <- RP.munch1 isDigit
((RP.char '.' >> RP.munch1 isDigit >>= \y -> return (LFlt (read (x++"."++y))))
RP.<++
(return (LInt (read x))))
pStr = RP.char '"' >> (RP.manyTill (pEsc RP.<++ RP.get) (RP.char '"'))
where
pEsc = RP.char '\\' >> RP.get
-----------------------------------------------------
-- Printing
-----------------------------------------------------
ppExpr :: Int -> [CId] -> Expr -> PP.Doc
ppExpr d scope (EAbs b x e) = let (bs,xs,e1) = getVars [] [] (EAbs b x e)
in ppParens (d > 1) (PP.char '\\' PP.<>
PP.hsep (PP.punctuate PP.comma (reverse (List.zipWith ppBind bs xs))) PP.<+>
PP.text "->" PP.<+>
ppExpr 1 (xs++scope) e1)
where
getVars bs xs (EAbs b x e) = getVars (b:bs) ((freshName x xs):xs) e
getVars bs xs e = (bs,xs,e)
ppExpr d scope (EApp e1 e2) = ppParens (d > 3) ((ppExpr 3 scope e1) PP.<+> (ppExpr 4 scope e2))
ppExpr d scope (ELit l) = ppLit l
ppExpr d scope (EMeta n) = ppMeta n
ppExpr d scope (EFun f) = ppCId f
ppExpr d scope (EVar i) = ppCId (scope !! i)
ppExpr d scope (ETyped e ty)= PP.char '<' PP.<> ppExpr 0 scope e PP.<+> PP.colon PP.<+> ppType 0 scope ty PP.<> PP.char '>'
ppExpr d scope (EImplArg e) = PP.braces (ppExpr 0 scope e)
ppPatt :: Int -> [CId] -> Patt -> ([CId],PP.Doc)
ppPatt d scope (PApp f ps) = let (scope',ds) = mapAccumL (ppPatt 2) scope ps
in (scope',ppParens (not (List.null ps) && d > 1) (ppCId f PP.<+> PP.hsep ds))
ppPatt d scope (PLit l) = (scope,ppLit l)
ppPatt d scope (PVar f) = (f:scope,ppCId f)
ppPatt d scope PWild = (scope,PP.char '_')
ppPatt d scope (PImplArg p) = let (scope',d) = ppPatt 0 scope p
in (scope',PP.braces d)
ppBind Explicit x = ppCId x
ppBind Implicit x = PP.braces (ppCId x)
ppLit (LStr s) = PP.text (show s)
ppLit (LInt n) = PP.integer n
ppLit (LFlt d) = PP.double d
ppMeta :: MetaId -> PP.Doc
ppMeta n
| n == 0 = PP.char '?'
| otherwise = PP.char '?' PP.<> PP.int n
ppParens True = PP.parens
ppParens False = id
freshName :: CId -> [CId] -> CId
freshName x xs0 = loop 1 x
where
xs = wildCId : xs0
loop i y
| elem y xs = loop (i+1) (mkCId (show x++show i))
| otherwise = y
-----------------------------------------------------
-- Computation
-----------------------------------------------------
-- | Compute an expression to normal form
normalForm :: Funs -> Int -> Env -> Expr -> Expr
normalForm funs k env e = value2expr k (eval funs env e)
where
value2expr i (VApp f vs) = foldl EApp (EFun f) (List.map (value2expr i) vs)
value2expr i (VGen j vs) = foldl EApp (EVar (i-j-1)) (List.map (value2expr i) vs)
value2expr i (VMeta j env vs) = foldl EApp (EMeta j) (List.map (value2expr i) vs)
value2expr i (VSusp j env vs k) = value2expr i (k (VGen j vs))
value2expr i (VLit l) = ELit l
value2expr i (VClosure env (EAbs b x e)) = EAbs b x (value2expr (i+1) (eval funs ((VGen i []):env) e))
value2expr i (VImplArg v) = EImplArg (value2expr i v)
data Value
= VApp CId [Value]
| VLit Literal
| VMeta {-# UNPACK #-} !MetaId Env [Value]
| VSusp {-# UNPACK #-} !MetaId Env [Value] (Value -> Value)
| VGen {-# UNPACK #-} !Int [Value]
| VClosure Env Expr
| VImplArg Value
type Funs = Map.Map CId (Type,Int,[Equation]) -- type and def of a fun
type Env = [Value]
eval :: Funs -> Env -> Expr -> Value
eval funs env (EVar i) = env !! i
eval funs env (EFun f) = case Map.lookup f funs of
Just (_,a,eqs) -> if a == 0
then case eqs of
Equ [] e : _ -> eval funs [] e
_ -> VApp f []
else VApp f []
Nothing -> error ("unknown function "++showCId f)
eval funs env (EApp e1 e2) = apply funs env e1 [eval funs env e2]
eval funs env (EAbs b x e) = VClosure env (EAbs b x e)
eval funs env (EMeta i) = VMeta i env []
eval funs env (ELit l) = VLit l
eval funs env (ETyped e _) = eval funs env e
eval funs env (EImplArg e) = VImplArg (eval funs env e)
apply :: Funs -> Env -> Expr -> [Value] -> Value
apply funs env e [] = eval funs env e
apply funs env (EVar i) vs = applyValue funs (env !! i) vs
apply funs env (EFun f) vs = case Map.lookup f funs of
Just (_,a,eqs) -> if a <= length vs
then let (as,vs') = splitAt a vs
in match funs f eqs as vs'
else VApp f vs
Nothing -> error ("unknown function "++showCId f)
apply funs env (EApp e1 e2) vs = apply funs env e1 (eval funs env e2 : vs)
apply funs env (EAbs _ x e) (v:vs) = apply funs (v:env) e vs
apply funs env (EMeta i) vs = VMeta i env vs
apply funs env (ELit l) vs = error "literal of function type"
apply funs env (ETyped e _) vs = apply funs env e vs
apply funs env (EImplArg _) vs = error "implicit argument in function position"
applyValue funs v [] = v
applyValue funs (VApp f vs0) vs = apply funs [] (EFun f) (vs0++vs)
applyValue funs (VLit _) vs = error "literal of function type"
applyValue funs (VMeta i env vs0) vs = VMeta i env (vs0++vs)
applyValue funs (VGen i vs0) vs = VGen i (vs0++vs)
applyValue funs (VSusp i env vs0 k) vs = VSusp i env vs0 (\v -> applyValue funs (k v) vs)
applyValue funs (VClosure env (EAbs b x e)) (v:vs) = apply funs (v:env) e vs
applyValue funs (VImplArg _) vs = error "implicit argument in function position"
-----------------------------------------------------
-- Pattern matching
-----------------------------------------------------
match :: Funs -> CId -> [Equation] -> [Value] -> [Value] -> Value
match funs f eqs as0 vs0 =
case eqs of
[] -> VApp f (as0++vs0)
(Equ ps res):eqs -> tryMatches eqs ps as0 res []
where
tryMatches eqs [] [] res env = apply funs env res vs0
tryMatches eqs (p:ps) (a:as) res env = tryMatch p a env
where
tryMatch (PVar x ) (v ) env = tryMatches eqs ps as res (v:env)
tryMatch (PWild ) (_ ) env = tryMatches eqs ps as res env
tryMatch (p ) (VMeta i envi vs ) env = VSusp i envi vs (\v -> tryMatch p v env)
tryMatch (p ) (VGen i vs ) env = VApp f (as0++vs0)
tryMatch (p ) (VSusp i envi vs k) env = VSusp i envi vs (\v -> tryMatch p (k v) env)
tryMatch (PApp f1 ps1) (VApp f2 vs2 ) env | f1 == f2 = tryMatches eqs (ps1++ps) (vs2++as) res env
tryMatch (PLit l1 ) (VLit l2 ) env | l1 == l2 = tryMatches eqs ps as res env
tryMatch (PImplArg p ) (VImplArg v ) env = tryMatch p v env
tryMatch _ _ env = match funs f eqs as0 vs0

View File

@@ -0,0 +1,28 @@
module PGF.Expr where
import PGF.CId
import qualified Text.PrettyPrint as PP
import qualified Text.ParserCombinators.ReadP as RP
data Expr
instance Eq Expr
instance Ord Expr
instance Show Expr
data BindType = Explicit | Implicit
instance Eq BindType
instance Ord BindType
instance Show BindType
pArg :: RP.ReadP Expr
pBinds :: RP.ReadP [(BindType,CId)]
ppExpr :: Int -> [CId] -> Expr -> PP.Doc
freshName :: CId -> [CId] -> CId
ppParens :: Bool -> PP.Doc -> PP.Doc

View File

@@ -0,0 +1,66 @@
module PGF.Generate where
import PGF.CId
import PGF.Data
import PGF.Macros
import PGF.TypeCheck
import qualified Data.Map as M
import System.Random
-- generate an infinite list of trees exhaustively
generate :: PGF -> Type -> Maybe Int -> [Expr]
generate pgf ty@(DTyp _ cat _) dp = filter (\e -> case checkExpr pgf e ty of
Left _ -> False
Right _ -> True )
(concatMap (\i -> gener i cat) depths)
where
gener 0 c = [EFun f | (f, ([],_)) <- fns c]
gener i c = [
tr |
(f, (cs,_)) <- fns c,
let alts = map (gener (i-1)) cs,
ts <- combinations alts,
let tr = foldl EApp (EFun f) ts,
depth tr >= i
]
fns c = [(f,catSkeleton ty) | (f,ty) <- functionsToCat pgf c]
depths = maybe [0 ..] (\d -> [0..d]) dp
-- generate an infinite list of trees randomly
genRandom :: StdGen -> PGF -> Type -> [Expr]
genRandom gen pgf ty@(DTyp _ cat _) = filter (\e -> case checkExpr pgf e ty of
Left _ -> False
Right _ -> True )
(genTrees (randomRs (0.0, 1.0 :: Double) gen) cat)
where
timeout = 47 -- give up
genTrees ds0 cat =
let (ds,ds2) = splitAt (timeout+1) ds0 -- for time out, else ds
(t,k) = genTree ds cat
in (if k>timeout then id else (t:))
(genTrees ds2 cat) -- else (drop k ds)
genTree rs = gett rs where
gett ds cid | cid == cidString = (ELit (LStr "foo"), 1)
gett ds cid | cid == cidInt = (ELit (LInt 12345), 1)
gett ds cid | cid == cidFloat = (ELit (LFlt 12345), 1)
gett [] _ = (ELit (LStr "TIMEOUT"), 1) ----
gett ds cat = case fns cat of
[] -> (EMeta 0,1)
fs -> let
d:ds2 = ds
(f,args) = getf d fs
(ts,k) = getts ds2 args
in (foldl EApp (EFun f) ts, k+1)
getf d fs = let lg = (length fs) in
fs !! (floor (d * fromIntegral lg))
getts ds cats = case cats of
c:cs -> let
(t, k) = gett ds c
(ts,ks) = getts (drop k ds) cs
in (t:ts, k + ks)
_ -> ([],0)
fns cat = [(f,(fst (catSkeleton ty))) | (f,ty) <- functionsToCat pgf cat]

View File

@@ -0,0 +1,166 @@
{-# LANGUAGE ParallelListComp #-}
module PGF.Linearize
(linearizes,realize,realizes,linTree, linTreeMark,linearizesMark) where
import PGF.CId
import PGF.Data
import PGF.Macros
import PGF.Tree
import Control.Monad
import qualified Data.Map as Map
import Data.List
import Debug.Trace
-- linearization and computation of concrete PGF Terms
linearizes :: PGF -> CId -> Expr -> [String]
linearizes pgf lang = realizes . linTree pgf lang
realize :: Term -> String
realize = concat . take 1 . realizes
realizes :: Term -> [String]
realizes = map (unwords . untokn) . realizest
realizest :: Term -> [[Tokn]]
realizest trm = case trm of
R ts -> realizest (ts !! 0)
S ss -> map concat $ combinations $ map realizest ss
K t -> [[t]]
W s t -> [[KS (s ++ r)] | [KS r] <- realizest t]
FV ts -> concatMap realizest ts
TM s -> [[KS s]]
_ -> [[KS $ "REALIZE_ERROR " ++ show trm]] ---- debug
untokn :: [Tokn] -> [String]
untokn ts = case ts of
KP d _ : [] -> d
KP d vs : ws -> let ss@(s:_) = untokn ws in sel d vs s ++ ss
KS s : ws -> s : untokn ws
[] -> []
where
sel d vs w = case [v | Alt v cs <- vs, any (\c -> isPrefixOf c w) cs] of
v:_ -> v
_ -> d
-- Lifts all variants to the top level (except those in macros).
liftVariants :: Term -> [Term]
liftVariants = f
where
f (R ts) = liftM R $ mapM f ts
f (P t1 t2) = liftM2 P (f t1) (f t2)
f (S ts) = liftM S $ mapM f ts
f (FV ts) = ts >>= f
f (W s t) = liftM (W s) $ f t
f t = return t
linTree :: PGF -> CId -> Expr -> Term
linTree pgf lang e = lin (expr2tree e) Nothing
where
cnc = lookMap (error "no lang") lang (concretes pgf)
lin (Abs xs e ) mty = case lin e Nothing of
R ts -> R $ ts ++ (Data.List.map (kks . showCId . snd) xs)
TM s -> R $ (TM s) : (Data.List.map (kks . showCId . snd) xs)
lin (Fun fun es) mty = case Map.lookup fun (funs (abstract pgf)) of
Just (DTyp hyps _ _,_,_) -> let argVariants = sequence [liftVariants (lin e (Just ty)) | e <- es | (_,_,ty) <- hyps]
in variants [compute pgf lang args $ lookMap tm0 fun (lins cnc) | args <- argVariants]
Nothing -> tm0
lin (Lit (LStr s)) mty = R [kks (show s)] -- quoted
lin (Lit (LInt i)) mty = R [kks (show i)]
lin (Lit (LFlt d)) mty = R [kks (show d)]
lin (Var x) mty = case mty of
Just (DTyp _ cat _) -> compute pgf lang [K (KS (showCId x))] (lookMap tm0 cat (lindefs cnc))
Nothing -> TM (showCId x)
lin (Meta i) mty = case mty of
Just (DTyp _ cat _) -> compute pgf lang [K (KS (show i))] (lookMap tm0 cat (lindefs cnc))
Nothing -> TM (show i)
variants :: [Term] -> Term
variants ts = case ts of
[t] -> t
_ -> FV ts
unvariants :: Term -> [Term]
unvariants t = case t of
FV ts -> ts
_ -> [t]
compute :: PGF -> CId -> [Term] -> Term -> Term
compute pgf lang args = comp where
comp trm = case trm of
P r p -> proj (comp r) (comp p)
W s t -> W s (comp t)
R ts -> R $ map comp ts
V i -> idx args i -- already computed
F c -> comp $ look c -- not computed (if contains argvar)
FV ts -> FV $ map comp ts
S ts -> S $ filter (/= S []) $ map comp ts
_ -> trm
look = lookOper pgf lang
idx xs i = if i > length xs - 1
then trace
("too large " ++ show i ++ " for\n" ++ unlines (map show xs) ++ "\n") tm0
else xs !! i
proj r p = case (r,p) of
(_, FV ts) -> FV $ map (proj r) ts
(FV ts, _ ) -> FV $ map (\t -> proj t p) ts
(W s t, _) -> kks (s ++ getString (proj t p))
_ -> comp $ getField r (getIndex p)
getString t = case t of
K (KS s) -> s
_ -> error ("ERROR in grammar compiler: string from "++ show t) "ERR"
getIndex t = case t of
C i -> i
TM _ -> 0 -- default value for parameter
_ -> trace ("ERROR in grammar compiler: index from " ++ show t) 666
getField t i = case t of
R rs -> idx rs i
TM s -> TM s
_ -> error ("ERROR in grammar compiler: field from " ++ show t) t
---------
-- markup with tree positions
linearizesMark :: PGF -> CId -> Expr -> [String]
linearizesMark pgf lang = realizes . linTreeMark pgf lang
linTreeMark :: PGF -> CId -> Expr -> Term
linTreeMark pgf lang = lin [] . expr2tree
where
lin p (Abs xs e ) = case lin p e of
R ts -> R $ ts ++ (Data.List.map (kks . showCId . snd) xs)
TM s -> R $ (TM s) : (Data.List.map (kks . showCId . snd) xs)
lin p (Fun fun es) =
let argVariants =
mapM (\ (i,e) -> liftVariants $ lin (sub p i) e) (zip [0..] es)
in variants [mark (fun,p) $ compute pgf lang args $ look fun |
args <- argVariants]
lin p (Lit (LStr s)) = mark p $ R [kks (show s)] -- quoted
lin p (Lit (LInt i)) = mark p $ R [kks (show i)]
lin p (Lit (LFlt d)) = mark p $ R [kks (show d)]
lin p (Var x) = mark p $ TM (showCId x)
lin p (Meta i) = mark p $ TM (show i)
look = lookLin pgf lang
mark :: Show a => a -> Term -> Term
mark p t = case t of
R ts -> R $ map (mark p) ts
FV ts -> R $ map (mark p) ts
S ts -> S $ bracket p ts
K s -> S $ bracket p [t]
W s (R ts) -> R [mark p $ kks (s ++ u) | K (KS u) <- ts]
_ -> t
-- otherwise in normal form
bracket p ts = [kks ("("++show p)] ++ ts ++ [kks ")"]
sub p i = p ++ [i]

View File

@@ -0,0 +1,154 @@
module PGF.Macros where
import PGF.CId
import PGF.Data
import Control.Monad
import qualified Data.Map as Map
import qualified Data.Array as Array
import Data.Maybe
import Data.List
-- operations for manipulating PGF grammars and objects
mapConcretes :: (Concr -> Concr) -> PGF -> PGF
mapConcretes f pgf = pgf { concretes = Map.map f (concretes pgf) }
lookLin :: PGF -> CId -> CId -> Term
lookLin pgf lang fun =
lookMap tm0 fun $ lins $ lookMap (error "no lang") lang $ concretes pgf
lookOper :: PGF -> CId -> CId -> Term
lookOper pgf lang fun =
lookMap tm0 fun $ opers $ lookMap (error "no lang") lang $ concretes pgf
lookLincat :: PGF -> CId -> CId -> Term
lookLincat pgf lang fun =
lookMap tm0 fun $ lincats $ lookMap (error "no lang") lang $ concretes pgf
lookParamLincat :: PGF -> CId -> CId -> Term
lookParamLincat pgf lang fun =
lookMap tm0 fun $ paramlincats $ lookMap (error "no lang") lang $ concretes pgf
lookPrintName :: PGF -> CId -> CId -> Term
lookPrintName pgf lang fun =
lookMap tm0 fun $ printnames $ lookMap (error "no lang") lang $ concretes pgf
lookType :: PGF -> CId -> Type
lookType pgf f =
case lookMap (error $ "lookType " ++ show f) f (funs (abstract pgf)) of
(ty,_,_) -> ty
lookDef :: PGF -> CId -> [Equation]
lookDef pgf f =
case lookMap (error $ "lookDef " ++ show f) f (funs (abstract pgf)) of
(_,a,eqs) -> eqs
isData :: PGF -> CId -> Bool
isData pgf f =
case Map.lookup f (funs (abstract pgf)) of
Just (_,_,[]) -> True -- the encoding of data constrs
_ -> False
lookValCat :: PGF -> CId -> CId
lookValCat pgf = valCat . lookType pgf
lookParser :: PGF -> CId -> Maybe ParserInfo
lookParser pgf lang = Map.lookup lang (concretes pgf) >>= parser
lookStartCat :: PGF -> CId
lookStartCat pgf = mkCId $ fromMaybe "S" $ msum $ Data.List.map (Map.lookup (mkCId "startcat"))
[gflags pgf, aflags (abstract pgf)]
lookGlobalFlag :: PGF -> CId -> String
lookGlobalFlag pgf f =
lookMap "?" f (gflags pgf)
lookAbsFlag :: PGF -> CId -> String
lookAbsFlag pgf f =
lookMap "?" f (aflags (abstract pgf))
lookConcr :: PGF -> CId -> Concr
lookConcr pgf cnc =
lookMap (error $ "Missing concrete syntax: " ++ showCId cnc) cnc $ concretes pgf
lookConcrFlag :: PGF -> CId -> CId -> Maybe String
lookConcrFlag pgf lang f = Map.lookup f $ cflags $ lookConcr pgf lang
functionsToCat :: PGF -> CId -> [(CId,Type)]
functionsToCat pgf cat =
[(f,ty) | f <- fs, Just (ty,_,_) <- [Map.lookup f $ funs $ abstract pgf]]
where
fs = lookMap [] cat $ catfuns $ abstract pgf
missingLins :: PGF -> CId -> [CId]
missingLins pgf lang = [c | c <- fs, not (hasl c)] where
fs = Map.keys $ funs $ abstract pgf
hasl = hasLin pgf lang
hasLin :: PGF -> CId -> CId -> Bool
hasLin pgf lang f = Map.member f $ lins $ lookConcr pgf lang
restrictPGF :: (CId -> Bool) -> PGF -> PGF
restrictPGF cond pgf = pgf {
abstract = abstr {
funs = restrict $ funs $ abstr,
cats = restrict $ cats $ abstr
}
} ---- restrict concrs also, might be needed
where
restrict = Map.filterWithKey (\c _ -> cond c)
abstr = abstract pgf
depth :: Expr -> Int
depth (EAbs _ _ t) = depth t
depth (EApp e1 e2) = max (depth e1) (depth e2) + 1
depth _ = 1
cftype :: [CId] -> CId -> Type
cftype args val = DTyp [(Explicit,wildCId,cftype [] arg) | arg <- args] val []
typeOfHypo :: Hypo -> Type
typeOfHypo (_,_,ty) = ty
catSkeleton :: Type -> ([CId],CId)
catSkeleton ty = case ty of
DTyp hyps val _ -> ([valCat (typeOfHypo h) | h <- hyps],val)
typeSkeleton :: Type -> ([(Int,CId)],CId)
typeSkeleton ty = case ty of
DTyp hyps val _ -> ([(contextLength ty, valCat ty) | h <- hyps, let ty = typeOfHypo h],val)
valCat :: Type -> CId
valCat ty = case ty of
DTyp _ val _ -> val
contextLength :: Type -> Int
contextLength ty = case ty of
DTyp hyps _ _ -> length hyps
term0 :: CId -> Term
term0 = TM . showCId
tm0 :: Term
tm0 = TM "?"
kks :: String -> Term
kks = K . KS
-- lookup with default value
lookMap :: (Show i, Ord i) => a -> i -> Map.Map i a -> a
lookMap d c m = Map.findWithDefault d c m
--- from Operations
combinations :: [[a]] -> [[a]]
combinations t = case t of
[] -> [[]]
aa:uu -> [a:u | a <- aa, u <- combinations uu]
isLiteralCat :: CId -> Bool
isLiteralCat = (`elem` [cidString, cidFloat, cidInt, cidVar])
cidString = mkCId "String"
cidInt = mkCId "Int"
cidFloat = mkCId "Float"
cidVar = mkCId "__gfVar"

View File

@@ -0,0 +1,26 @@
module PGF.Morphology(Lemma,Analysis,Morpho,
buildMorpho,
lookupMorpho,fullFormLexicon) where
import PGF.ShowLinearize (collectWords)
import PGF.Data
import PGF.CId
import qualified Data.Map as Map
import Data.List (intersperse)
-- these 4 definitions depend on the datastructure used
type Lemma = CId
type Analysis = String
newtype Morpho = Morpho (Map.Map String [(Lemma,Analysis)])
buildMorpho :: PGF -> Language -> Morpho
buildMorpho pgf lang = Morpho (Map.fromListWith (++) (collectWords pgf lang))
lookupMorpho :: Morpho -> String -> [(Lemma,Analysis)]
lookupMorpho (Morpho mo) s = maybe [] id $ Map.lookup s mo
fullFormLexicon :: Morpho -> [(String,[(Lemma,Analysis)])]
fullFormLexicon (Morpho mo) = Map.toList mo

View File

@@ -0,0 +1,119 @@
module PGF.PMCFG where
import PGF.CId
import PGF.Expr
import qualified Data.Map as Map
import qualified Data.Set as Set
import qualified Data.IntMap as IntMap
import Data.Array.IArray
import Data.Array.Unboxed
import Text.PrettyPrint
type FCat = Int
type FIndex = Int
type FPointPos = Int
data FSymbol
= FSymCat {-# UNPACK #-} !Int {-# UNPACK #-} !FIndex
| FSymLit {-# UNPACK #-} !Int {-# UNPACK #-} !FIndex
| FSymKS [String]
| FSymKP [String] [Alternative]
deriving (Eq,Ord,Show)
type Profile = [Int]
data Production
= FApply {-# UNPACK #-} !FunId [FCat]
| FCoerce {-# UNPACK #-} !FCat
| FConst Expr [String]
deriving (Eq,Ord,Show)
data FFun = FFun CId [Profile] {-# UNPACK #-} !(UArray FIndex SeqId) deriving (Eq,Ord,Show)
type FSeq = Array FPointPos FSymbol
type FunId = Int
type SeqId = Int
data Alternative =
Alt [String] [String]
deriving (Eq,Ord,Show)
data ParserInfo
= ParserInfo { functions :: Array FunId FFun
, sequences :: Array SeqId FSeq
, productions0:: IntMap.IntMap (Set.Set Production) -- this are the original productions as they are loaded from the PGF file
, productions :: IntMap.IntMap (Set.Set Production) -- this are the productions after the filtering for useless productions
, startCats :: Map.Map CId [FCat]
, totalCats :: {-# UNPACK #-} !FCat
}
fcatString, fcatInt, fcatFloat, fcatVar :: Int
fcatString = (-1)
fcatInt = (-2)
fcatFloat = (-3)
fcatVar = (-4)
isLiteralFCat :: FCat -> Bool
isLiteralFCat = (`elem` [fcatString, fcatInt, fcatFloat, fcatVar])
ppPMCFG :: ParserInfo -> Doc
ppPMCFG pinfo =
text "productions" $$
nest 2 (vcat [ppProduction (fcat,prod) | (fcat,set) <- IntMap.toList (productions pinfo), prod <- Set.toList set]) $$
text "functions" $$
nest 2 (vcat (map ppFun (assocs (functions pinfo)))) $$
text "sequences" $$
nest 2 (vcat (map ppSeq (assocs (sequences pinfo)))) $$
text "startcats" $$
nest 2 (vcat (map ppStartCat (Map.toList (startCats pinfo))))
ppProduction (fcat,FApply funid args) =
ppFCat fcat <+> text "->" <+> ppFunId funid <> brackets (hcat (punctuate comma (map ppFCat args)))
ppProduction (fcat,FCoerce arg) =
ppFCat fcat <+> text "->" <+> char '_' <> brackets (ppFCat arg)
ppProduction (fcat,FConst _ ss) =
ppFCat fcat <+> text "->" <+> ppStrs ss
ppFun (funid,FFun fun _ arr) =
ppFunId funid <+> text ":=" <+> parens (hcat (punctuate comma (map ppSeqId (elems arr)))) <+> brackets (ppCId fun)
ppSeq (seqid,seq) =
ppSeqId seqid <+> text ":=" <+> hsep (map ppSymbol (elems seq))
ppStartCat (id,fcats) =
ppCId id <+> text ":=" <+> brackets (hcat (punctuate comma (map ppFCat fcats)))
ppSymbol (FSymCat d r) = char '<' <> int d <> comma <> int r <> char '>'
ppSymbol (FSymLit d r) = char '<' <> int d <> comma <> int r <> char '>'
ppSymbol (FSymKS ts) = ppStrs ts
ppSymbol (FSymKP ts alts) = text "pre" <+> braces (hsep (punctuate semi (ppStrs ts : map ppAlt alts)))
ppAlt (Alt ts ps) = ppStrs ts <+> char '/' <+> hsep (map (doubleQuotes . text) ps)
ppStrs ss = doubleQuotes (hsep (map text ss))
ppFCat fcat
| fcat == fcatString = text "CString"
| fcat == fcatInt = text "CInt"
| fcat == fcatFloat = text "CFloat"
| fcat == fcatVar = text "CVar"
| otherwise = char 'C' <> int fcat
ppFunId funid = char 'F' <> int funid
ppSeqId seqid = char 'S' <> int seqid
filterProductions = closure
where
closure prods0
| IntMap.size prods == IntMap.size prods0 = prods
| otherwise = closure prods
where
prods = IntMap.mapMaybe (filterProdSet prods0) prods0
filterProdSet prods set0
| Set.null set = Nothing
| otherwise = Just set
where
set = Set.filter (filterRule prods) set0
filterRule prods (FApply funid args) = all (\fcat -> isLiteralFCat fcat || IntMap.member fcat prods) args
filterRule prods (FCoerce fcat) = isLiteralFCat fcat || IntMap.member fcat prods
filterRule prods _ = True

View File

@@ -0,0 +1,112 @@
----------------------------------------------------------------------
-- |
-- Module : Paraphrase
-- Maintainer : AR
-- Stability : (stable)
-- Portability : (portable)
--
-- Generate parapharases with def definitions.
-----------------------------------------------------------------------------
module PGF.Paraphrase (
paraphrase,
paraphraseN
) where
import PGF.Data
import PGF.Tree
import PGF.Macros (lookDef,isData)
import PGF.CId
import Data.List (nub,sort,group)
import qualified Data.Map as Map
import Debug.Trace ----
paraphrase :: PGF -> Expr -> [Expr]
paraphrase pgf = nub . paraphraseN 2 pgf
paraphraseN :: Int -> PGF -> Expr -> [Expr]
paraphraseN i pgf = map tree2expr . paraphraseN' i pgf . expr2tree
paraphraseN' :: Int -> PGF -> Tree -> [Tree]
paraphraseN' 0 _ t = [t]
paraphraseN' i pgf t =
step i t ++ [Fun g ts' | Fun g ts <- step (i-1) t, ts' <- sequence (map par ts)]
where
par = paraphraseN' (i-1) pgf
step 0 t = [t]
step i t = let stept = step (i-1) t in stept ++ concat [def u | u <- stept]
def = fromDef pgf
fromDef :: PGF -> Tree -> [Tree]
fromDef pgf t@(Fun f ts) = defDown t ++ defUp t where
defDown t = [subst g u | let equ = equsFrom f, (u,g) <- match equ ts, trequ "U" f equ]
defUp t = [subst g u | equ <- equsTo f, (u,g) <- match [equ] ts, trequ "D" f equ]
equsFrom f = [(ps,d) | Just equs <- [lookup f equss], (Fun _ ps,d) <- equs]
equsTo f = [c | (_,equs) <- equss, c <- casesTo f equs]
casesTo f equs =
[(ps,p) | (p,d@(Fun g ps)) <- equs, g==f,
isClosed d || (length equs == 1 && isLinear d)]
equss = [(f,[(Fun f (map patt2tree ps), expr2tree d) | (Equ ps d) <- eqs]) |
(f,(_,_,eqs)) <- Map.assocs (funs (abstract pgf)), not (null eqs)]
trequ s f e = True ----trace (s ++ ": " ++ show f ++ " " ++ show e) True
subst :: Subst -> Tree -> Tree
subst g e = case e of
Fun f ts -> Fun f (map substg ts)
Var x -> maybe e id $ lookup x g
_ -> e
where
substg = subst g
type Subst = [(CId,Tree)]
-- this applies to pattern, hence don't need to consider abstractions
isClosed :: Tree -> Bool
isClosed t = case t of
Fun _ ts -> all isClosed ts
Var _ -> False
_ -> True
-- this applies to pattern, hence don't need to consider abstractions
isLinear :: Tree -> Bool
isLinear = nodup . vars where
vars t = case t of
Fun _ ts -> concatMap vars ts
Var x -> [x]
_ -> []
nodup = all ((<2) . length) . group . sort
match :: [([Tree],Tree)] -> [Tree] -> [(Tree, Subst)]
match cases terms = case cases of
[] -> []
(patts,_):_ | length patts /= length terms -> []
(patts,val):cc -> case mapM tryMatch (zip patts terms) of
Just substs -> return (val, concat substs)
_ -> match cc terms
where
tryMatch (p,t) = case (p, t) of
(Var x, _) | notMeta t -> return [(x,t)]
(Fun p pp, Fun f tt) | p == f && length pp == length tt -> do
matches <- mapM tryMatch (zip pp tt)
return (concat matches)
_ -> if p==t then return [] else Nothing
notMeta e = case e of
Meta _ -> False
Fun f ts -> all notMeta ts
_ -> True
-- | Converts a pattern to tree.
patt2tree :: Patt -> Tree
patt2tree (PApp f ps) = Fun f (map patt2tree ps)
patt2tree (PLit l) = Lit l
patt2tree (PVar x) = Var x
patt2tree PWild = Meta 0

View File

@@ -0,0 +1,205 @@
----------------------------------------------------------------------
-- |
-- Maintainer : Krasimir Angelov
-- Stability : (stable)
-- Portability : (portable)
--
-- MCFG parsing, the active algorithm
-----------------------------------------------------------------------------
module PGF.Parsing.FCFG.Active (parse) where
import GF.Data.Assoc
import GF.Data.SortedList
import GF.Data.Utilities
import qualified GF.Data.MultiMap as MM
import PGF.CId
import PGF.Data
import PGF.Tree
import PGF.Parsing.FCFG.Utilities
import PGF.BuildParser
import Control.Monad (guard)
import qualified Data.List as List
import qualified Data.Map as Map
import qualified Data.IntMap as IntMap
import qualified Data.Set as Set
import Data.Array.IArray
import Debug.Trace
----------------------------------------------------------------------
-- * parsing
type FToken = String
makeFinalEdge cat 0 0 = (cat, [EmptyRange])
makeFinalEdge cat i j = (cat, [makeRange i j])
-- | the list of categories = possible starting categories
parse :: String -> ParserInfo -> Type -> [FToken] -> [Expr]
parse strategy pinfo (DTyp _ start _) toks = map (tree2expr) . nubsort $ filteredForests >>= forest2trees
where
inTokens = input toks
starts = Map.findWithDefault [] start (startCats pinfo)
schart = xchart2syntaxchart chart pinfo
(i,j) = inputBounds inTokens
finalEdges = [makeFinalEdge cat i j | cat <- starts]
forests = chart2forests schart (const False) finalEdges
filteredForests = forests >>= applyProfileToForest
pinfoex = buildParserInfo pinfo
chart = process strategy pinfo pinfoex inTokens axioms emptyXChart
axioms | isBU strategy = literals pinfoex inTokens ++ initialBU pinfo pinfoex inTokens
| isTD strategy = literals pinfoex inTokens ++ initialTD pinfo starts inTokens
isBU s = s=="b"
isTD s = s=="t"
-- used in prediction
emptyChildren :: FunId -> [FCat] -> SyntaxNode FunId RangeRec
emptyChildren ruleid args = SNode ruleid (replicate (length args) [])
process :: String -> ParserInfo -> ParserInfoEx -> Input FToken -> [Item] -> XChart FCat -> XChart FCat
process strategy pinfo pinfoex toks [] chart = chart
process strategy pinfo pinfoex toks (item:items) chart = process strategy pinfo pinfoex toks items $! univRule item chart
where
univRule item@(Active found rng lbl ppos node@(SNode ruleid recs) args cat) chart
| inRange (bounds lin) ppos =
case lin ! ppos of
FSymCat d r -> let c = args !! d
in case recs !! d of
[] -> case insertXChart chart item c of
Nothing -> chart
Just chart -> let items = do item@(Final found' _ _ _) <- lookupXChartFinal chart c
rng <- concatRange rng (found' !! r)
return (Active found rng lbl (ppos+1) (SNode ruleid (updateNth (const found') d recs)) args cat)
++
do guard (isTD strategy)
(ruleid,args) <- topdownRules pinfo c
return (Active [] EmptyRange 0 0 (emptyChildren ruleid args) args c)
in process strategy pinfo pinfoex toks items chart
found' -> let items = do rng <- concatRange rng (found' !! r)
return (Active found rng lbl (ppos+1) node args cat)
in process strategy pinfo pinfoex toks items chart
FSymKS [tok]
-> let items = do t_rng <- inputToken toks ? tok
rng' <- concatRange rng t_rng
return (Active found rng' lbl (ppos+1) node args cat)
in process strategy pinfo pinfoex toks items chart
| otherwise =
if inRange (bounds lins) (lbl+1)
then univRule (Active (rng:found) EmptyRange (lbl+1) 0 node args cat) chart
else univRule (Final (reverse (rng:found)) node args cat) chart
where
(FFun _ _ lins) = functions pinfo ! ruleid
lin = sequences pinfo ! (lins ! lbl)
univRule item@(Final found' node args cat) chart =
case insertXChart chart item cat of
Nothing -> chart
Just chart -> let items = do (Active found rng l ppos node@(SNode ruleid _) args c) <- lookupXChartAct chart cat
let FFun _ _ lins = functions pinfo ! ruleid
FSymCat d r = (sequences pinfo ! (lins ! l)) ! ppos
rng <- concatRange rng (found' !! r)
return (Active found rng l (ppos+1) (updateChildren node d found') args c)
++
do guard (isBU strategy)
(ruleid,args,c) <- leftcornerCats pinfoex ? cat
let FFun _ _ lins = functions pinfo ! ruleid
FSymCat d r = (sequences pinfo ! (lins ! 0)) ! 0
return (Active [] (found' !! r) 0 1 (updateChildren (emptyChildren ruleid args) d found') args c)
updateChildren :: SyntaxNode FunId RangeRec -> Int -> RangeRec -> SyntaxNode FunId RangeRec
updateChildren (SNode ruleid recs) i rec = SNode ruleid $! updateNth (const rec) i recs
in process strategy pinfo pinfoex toks items chart
----------------------------------------------------------------------
-- * XChart
data Item
= Active RangeRec
Range
{-# UNPACK #-} !FIndex
{-# UNPACK #-} !FPointPos
(SyntaxNode FunId RangeRec)
[FCat]
FCat
| Final RangeRec (SyntaxNode FunId RangeRec) [FCat] FCat
deriving (Eq, Ord, Show)
data XChart c = XChart !(MM.MultiMap c Item) !(MM.MultiMap c Item)
emptyXChart :: Ord c => XChart c
emptyXChart = XChart MM.empty MM.empty
insertXChart (XChart actives finals) item@(Active _ _ _ _ _ _ _) c =
case MM.insert' c item actives of
Nothing -> Nothing
Just actives -> Just (XChart actives finals)
insertXChart (XChart actives finals) item@(Final _ _ _ _) c =
case MM.insert' c item finals of
Nothing -> Nothing
Just finals -> Just (XChart actives finals)
lookupXChartAct (XChart actives finals) c = actives MM.! c
lookupXChartFinal (XChart actives finals) c = finals MM.! c
xchart2syntaxchart :: XChart FCat -> ParserInfo -> SyntaxChart (CId,[Profile]) (FCat,RangeRec)
xchart2syntaxchart (XChart actives finals) pinfo =
accumAssoc groupSyntaxNodes $
[ case node of
SNode ruleid rrecs -> let FFun fun prof _ = functions pinfo ! ruleid
in ((cat,found), SNode (fun,prof) (zip rhs rrecs))
SString s -> ((cat,found), SString s)
SInt n -> ((cat,found), SInt n)
SFloat f -> ((cat,found), SFloat f)
| (Final found node rhs cat) <- MM.elems finals
]
literals :: ParserInfoEx -> Input FToken -> [Item]
literals pinfoex toks =
[let (c,node) = lexer t in (Final [rng] node [] c) | (t,rngs) <- aAssocs (inputToken toks), rng <- rngs, not (t `elem` grammarToks pinfoex)]
where
lexer t =
case reads t of
[(n,"")] -> (fcatInt, SInt (n::Integer))
_ -> case reads t of
[(f,"")] -> (fcatFloat, SFloat (f::Double))
_ -> (fcatString,SString t)
----------------------------------------------------------------------
-- Earley --
-- called with all starting categories
initialTD :: ParserInfo -> [FCat] -> Input FToken -> [Item]
initialTD pinfo starts toks =
do cat <- starts
(ruleid,args) <- topdownRules pinfo cat
return (Active [] (Range 0 0) 0 0 (emptyChildren ruleid args) args cat)
topdownRules pinfo cat = f cat []
where
f cat rules = maybe rules (Set.fold g rules) (IntMap.lookup cat (productions pinfo))
g (FApply ruleid args) rules = (ruleid,args) : rules
g (FCoerce cat) rules = f cat rules
----------------------------------------------------------------------
-- Kilbury --
initialBU :: ParserInfo -> ParserInfoEx -> Input FToken -> [Item]
initialBU pinfo pinfoex toks =
do (tok,rngs) <- aAssocs (inputToken toks)
(ruleid,args,cat) <- leftcornerTokens pinfoex ? tok
rng <- rngs
return (Active [] rng 0 1 (emptyChildren ruleid args) args cat)
++
do (ruleid,args,cat) <- epsilonRules pinfoex
let FFun _ _ _ = functions pinfo ! ruleid
return (Active [] EmptyRange 0 0 (emptyChildren ruleid args) args cat)

View File

@@ -0,0 +1,371 @@
{-# LANGUAGE BangPatterns #-}
module PGF.Parsing.FCFG.Incremental
( ParseState
, ErrorState
, initState
, nextState
, getCompletions
, recoveryStates
, extractTrees
, parse
, parseWithRecovery
) where
import Data.Array.IArray
import Data.Array.Base (unsafeAt)
import Data.List (isPrefixOf, foldl')
import Data.Maybe (fromMaybe, maybe)
import qualified Data.Map as Map
import qualified GF.Data.TrieMap as TMap
import qualified Data.IntMap as IntMap
import qualified Data.Set as Set
import Control.Monad
import GF.Data.SortedList
import PGF.CId
import PGF.Data
import PGF.Expr(Tree)
import PGF.Macros
import PGF.TypeCheck
import Debug.Trace
parse :: PGF -> Language -> Type -> [String] -> [Tree]
parse pgf lang typ toks = loop (initState pgf lang typ) toks
where
loop ps [] = extractTrees ps typ
loop ps (t:ts) = case nextState ps t of
Left es -> []
Right ps -> loop ps ts
parseWithRecovery :: PGF -> Language -> Type -> [Type] -> [String] -> [Tree]
parseWithRecovery pgf lang typ open_typs toks = accept (initState pgf lang typ) toks
where
accept ps [] = extractTrees ps typ
accept ps (t:ts) =
case nextState ps t of
Right ps -> accept ps ts
Left es -> skip (recoveryStates open_typs es) ts
skip ps_map [] = extractTrees (fst ps_map) typ
skip ps_map (t:ts) =
case Map.lookup t (snd ps_map) of
Just ps -> accept ps ts
Nothing -> skip ps_map ts
-- | Creates an initial parsing state for a given language and
-- startup category.
initState :: PGF -> Language -> Type -> ParseState
initState pgf lang (DTyp _ start _) =
let items = do
cat <- fromMaybe [] (Map.lookup start (startCats pinfo))
(funid,args) <- foldForest (\funid args -> (:) (funid,args)) (\_ _ args -> args)
[] cat (productions pinfo)
let FFun fn _ lins = functions pinfo ! funid
(lbl,seqid) <- assocs lins
return (Active 0 0 funid seqid args (AK cat lbl))
pinfo =
case lookParser pgf lang of
Just pinfo -> pinfo
_ -> error ("Unknown language: " ++ showCId lang)
in PState pgf
pinfo
(Chart emptyAC [] emptyPC (productions pinfo) (totalCats pinfo) 0)
(TMap.singleton [] (Set.fromList items))
-- | From the current state and the next token
-- 'nextState' computes a new state, where the token
-- is consumed and the current position is shifted by one.
-- If the new token cannot be accepted then an error state
-- is returned.
nextState :: ParseState -> String -> Either ErrorState ParseState
nextState (PState pgf pinfo chart items) t =
let (mb_agenda,map_items) = TMap.decompose items
agenda = maybe [] Set.toList mb_agenda
acc = fromMaybe TMap.empty (Map.lookup t map_items)
(acc1,chart1) = process (Just t) add (sequences pinfo) (functions pinfo) agenda acc chart
chart2 = chart1{ active =emptyAC
, actives=active chart1 : actives chart1
, passive=emptyPC
, offset =offset chart1+1
}
in if TMap.null acc1
then Left (EState pgf pinfo chart2)
else Right (PState pgf pinfo chart2 acc1)
where
add (tok:toks) item acc
| tok == t = TMap.insertWith Set.union toks (Set.singleton item) acc
add _ item acc = acc
-- | If the next token is not known but only its prefix (possible empty prefix)
-- then the 'getCompletions' function can be used to calculate the possible
-- next words and the consequent states. This is used for word completions in
-- the GF interpreter.
getCompletions :: ParseState -> String -> Map.Map String ParseState
getCompletions (PState pgf pinfo chart items) w =
let (mb_agenda,map_items) = TMap.decompose items
agenda = maybe [] Set.toList mb_agenda
acc = Map.filterWithKey (\tok _ -> isPrefixOf w tok) map_items
(acc',chart1) = process Nothing add (sequences pinfo) (functions pinfo) agenda acc chart
chart2 = chart1{ active =emptyAC
, actives=active chart1 : actives chart1
, passive=emptyPC
, offset =offset chart1+1
}
in fmap (PState pgf pinfo chart2) acc'
where
add (tok:toks) item acc
| isPrefixOf w tok = Map.insertWith (TMap.unionWith Set.union) tok (TMap.singleton toks (Set.singleton item)) acc
add _ item acc = acc
recoveryStates :: [Type] -> ErrorState -> (ParseState, Map.Map String ParseState)
recoveryStates open_types (EState pgf pinfo chart) =
let open_fcats = concatMap type2fcats open_types
agenda = foldl (complete open_fcats) [] (actives chart)
(acc,chart1) = process Nothing add (sequences pinfo) (functions pinfo) agenda Map.empty chart
chart2 = chart1{ active =emptyAC
, actives=active chart1 : actives chart1
, passive=emptyPC
, offset =offset chart1+1
}
in (PState pgf pinfo chart (TMap.singleton [] (Set.fromList agenda)), fmap (PState pgf pinfo chart2) acc)
where
type2fcats (DTyp _ cat _) = fromMaybe [] (Map.lookup cat (startCats pinfo))
complete open_fcats items ac =
foldl (Set.fold (\(Active j' ppos funid seqid args keyc) ->
(:) (Active j' (ppos+1) funid seqid args keyc)))
items
[set | fcat <- open_fcats, set <- lookupACByFCat fcat ac]
add (tok:toks) item acc = Map.insertWith (TMap.unionWith Set.union) tok (TMap.singleton toks (Set.singleton item)) acc
-- | This function extracts the list of all completed parse trees
-- that spans the whole input consumed so far. The trees are also
-- limited by the category specified, which is usually
-- the same as the startup category.
extractTrees :: ParseState -> Type -> [Tree]
extractTrees (PState pgf pinfo chart items) ty@(DTyp _ start _) =
nubsort [e1 | e <- exps, Right e1 <- [checkExpr pgf e ty]]
where
(mb_agenda,acc) = TMap.decompose items
agenda = maybe [] Set.toList mb_agenda
(_,st) = process Nothing (\_ _ -> id) (sequences pinfo) (functions pinfo) agenda () chart
exps = do
cat <- fromMaybe [] (Map.lookup start (startCats pinfo))
(funid,args) <- foldForest (\funid args -> (:) (funid,args)) (\_ _ args -> args)
[] cat (productions pinfo)
let FFun fn _ lins = functions pinfo ! funid
lbl <- indices lins
Just fid <- [lookupPC (PK cat lbl 0) (passive st)]
(fvs,tree) <- go Set.empty 0 (0,fid)
guard (Set.null fvs)
return tree
go rec fcat' (d,fcat)
| fcat < totalCats pinfo = return (Set.empty,EMeta (fcat'*10+d)) -- FIXME: here we assume that every rule has at most 10 arguments
| Set.member fcat rec = mzero
| otherwise = foldForest (\funid args trees ->
do let FFun fn _ lins = functions pinfo ! funid
args <- mapM (go (Set.insert fcat rec) fcat) (zip [0..] args)
check_ho_fun fn args
`mplus`
trees)
(\const _ trees ->
return (freeVar const,const)
`mplus`
trees)
[] fcat (forest st)
check_ho_fun fun args
| fun == _V = return (head args)
| fun == _B = return (foldl1 Set.difference (map fst args), foldr (\x e -> EAbs Explicit (mkVar (snd x)) e) (snd (head args)) (tail args))
| otherwise = return (Set.unions (map fst args),foldl (\e x -> EApp e (snd x)) (EFun fun) args)
mkVar (EFun v) = v
mkVar (EMeta _) = wildCId
freeVar (EFun v) = Set.singleton v
freeVar _ = Set.empty
_B = mkCId "_B"
_V = mkCId "_V"
process mbt fn !seqs !funs [] acc chart = (acc,chart)
process mbt fn !seqs !funs (item@(Active j ppos funid seqid args key0):items) acc chart
| inRange (bounds lin) ppos =
case unsafeAt lin ppos of
FSymCat d r -> let !fid = args !! d
key = AK fid r
items2 = case lookupPC (mkPK key k) (passive chart) of
Nothing -> items
Just id -> (Active j (ppos+1) funid seqid (updateAt d id args) key0) : items
items3 = foldForest (\funid args items -> Active k 0 funid (rhs funid r) args key : items)
(\_ _ items -> items)
items2 fid (forest chart)
in case lookupAC key (active chart) of
Nothing -> process mbt fn seqs funs items3 acc chart{active=insertAC key (Set.singleton item) (active chart)}
Just set | Set.member item set -> process mbt fn seqs funs items acc chart
| otherwise -> process mbt fn seqs funs items2 acc chart{active=insertAC key (Set.insert item set) (active chart)}
FSymKS toks -> let !acc' = fn toks (Active j (ppos+1) funid seqid args key0) acc
in process mbt fn seqs funs items acc' chart
FSymKP strs vars
-> let !acc' = foldl (\acc toks -> fn toks (Active j (ppos+1) funid seqid args key0) acc) acc
(strs:[strs' | Alt strs' _ <- vars])
in process mbt fn seqs funs items acc' chart
FSymLit d r -> let !fid = args !! d
in case [ts | FConst _ ts <- maybe [] Set.toList (IntMap.lookup fid (forest chart))] of
(toks:_) -> let !acc' = fn toks (Active j (ppos+1) funid seqid args key0) acc
in process mbt fn seqs funs items acc' chart
[] -> case litCatMatch fid mbt of
Just (toks,lit) -> let fid' = nextId chart
!acc' = fn toks (Active j (ppos+1) funid seqid (updateAt d fid' args) key0) acc
in process mbt fn seqs funs items acc' chart{forest=IntMap.insert fid' (Set.singleton (FConst lit toks)) (forest chart)
,nextId=nextId chart+1
}
Nothing -> process mbt fn seqs funs items acc chart
| otherwise =
case lookupPC (mkPK key0 j) (passive chart) of
Nothing -> let fid = nextId chart
items2 = case lookupAC key0 ((active chart:actives chart) !! (k-j)) of
Nothing -> items
Just set -> Set.fold (\(Active j' ppos funid seqid args keyc) ->
let FSymCat d _ = unsafeAt (unsafeAt seqs seqid) ppos
in (:) (Active j' (ppos+1) funid seqid (updateAt d fid args) keyc)) items set
in process mbt fn seqs funs items2 acc chart{passive=insertPC (mkPK key0 j) fid (passive chart)
,forest =IntMap.insert fid (Set.singleton (FApply funid args)) (forest chart)
,nextId =nextId chart+1
}
Just id -> let items2 = [Active k 0 funid (rhs funid r) args (AK id r) | r <- labelsAC id (active chart)] ++ items
in process mbt fn seqs funs items2 acc chart{forest = IntMap.insertWith Set.union id (Set.singleton (FApply funid args)) (forest chart)}
where
!lin = unsafeAt seqs seqid
!k = offset chart
mkPK (AK fid lbl) j = PK fid lbl j
rhs funid lbl = unsafeAt lins lbl
where
FFun _ _ lins = unsafeAt funs funid
updateAt :: Int -> a -> [a] -> [a]
updateAt nr x xs = [if i == nr then x else y | (i,y) <- zip [0..] xs]
litCatMatch fcat (Just t)
| fcat == fcatString = Just ([t],ELit (LStr t))
| fcat == fcatInt = case reads t of {[(n,"")] -> Just ([t],ELit (LInt n));
_ -> Nothing }
| fcat == fcatFloat = case reads t of {[(d,"")] -> Just ([t],ELit (LFlt d));
_ -> Nothing }
| fcat == fcatVar = Just ([t],EFun (mkCId t))
litCatMatch _ _ = Nothing
----------------------------------------------------------------
-- Active Chart
----------------------------------------------------------------
data Active
= Active {-# UNPACK #-} !Int
{-# UNPACK #-} !FPointPos
{-# UNPACK #-} !FunId
{-# UNPACK #-} !SeqId
[FCat]
{-# UNPACK #-} !ActiveKey
deriving (Eq,Show,Ord)
data ActiveKey
= AK {-# UNPACK #-} !FCat
{-# UNPACK #-} !FIndex
deriving (Eq,Ord,Show)
type ActiveChart = IntMap.IntMap (IntMap.IntMap (Set.Set Active))
emptyAC :: ActiveChart
emptyAC = IntMap.empty
lookupAC :: ActiveKey -> ActiveChart -> Maybe (Set.Set Active)
lookupAC (AK fcat l) chart = IntMap.lookup fcat chart >>= IntMap.lookup l
lookupACByFCat :: FCat -> ActiveChart -> [Set.Set Active]
lookupACByFCat fcat chart =
case IntMap.lookup fcat chart of
Nothing -> []
Just map -> IntMap.elems map
labelsAC :: FCat -> ActiveChart -> [FIndex]
labelsAC fcat chart =
case IntMap.lookup fcat chart of
Nothing -> []
Just map -> IntMap.keys map
insertAC :: ActiveKey -> Set.Set Active -> ActiveChart -> ActiveChart
insertAC (AK fcat l) set chart = IntMap.insertWith IntMap.union fcat (IntMap.singleton l set) chart
----------------------------------------------------------------
-- Passive Chart
----------------------------------------------------------------
data PassiveKey
= PK {-# UNPACK #-} !FCat
{-# UNPACK #-} !FIndex
{-# UNPACK #-} !Int
deriving (Eq,Ord,Show)
type PassiveChart = Map.Map PassiveKey FCat
emptyPC :: PassiveChart
emptyPC = Map.empty
lookupPC :: PassiveKey -> PassiveChart -> Maybe FCat
lookupPC key chart = Map.lookup key chart
insertPC :: PassiveKey -> FCat -> PassiveChart -> PassiveChart
insertPC key fcat chart = Map.insert key fcat chart
----------------------------------------------------------------
-- Forest
----------------------------------------------------------------
foldForest :: (FunId -> [FCat] -> b -> b) -> (Expr -> [String] -> b -> b) -> b -> FCat -> IntMap.IntMap (Set.Set Production) -> b
foldForest f g b fcat forest =
case IntMap.lookup fcat forest of
Nothing -> b
Just set -> Set.fold foldProd b set
where
foldProd (FCoerce fcat) b = foldForest f g b fcat forest
foldProd (FApply funid args) b = f funid args b
foldProd (FConst const toks) b = g const toks b
----------------------------------------------------------------
-- Parse State
----------------------------------------------------------------
-- | An abstract data type whose values represent
-- the current state in an incremental parser.
data ParseState = PState PGF ParserInfo Chart (TMap.TrieMap String (Set.Set Active))
data Chart
= Chart
{ active :: ActiveChart
, actives :: [ActiveChart]
, passive :: PassiveChart
, forest :: IntMap.IntMap (Set.Set Production)
, nextId :: {-# UNPACK #-} !FCat
, offset :: {-# UNPACK #-} !Int
}
deriving Show
----------------------------------------------------------------
-- Error State
----------------------------------------------------------------
-- | An abstract data type whose values represent
-- the state in an incremental parser after an error.
data ErrorState = EState PGF ParserInfo Chart

View File

@@ -0,0 +1,188 @@
----------------------------------------------------------------------
-- |
-- Maintainer : PL
-- Stability : (stable)
-- Portability : (portable)
--
-- > CVS $Date: 2005/05/13 12:40:19 $
-- > CVS $Author: peb $
-- > CVS $Revision: 1.6 $
--
-- Basic type declarations and functions for grammar formalisms
-----------------------------------------------------------------------------
module PGF.Parsing.FCFG.Utilities where
import Control.Monad
import Data.Array
import Data.List (groupBy)
import PGF.CId
import PGF.Data
import PGF.Tree
import GF.Data.Assoc
import GF.Data.Utilities (sameLength, foldMerge, splitBy)
------------------------------------------------------------
-- ranges as single pairs
type RangeRec = [Range]
data Range = Range {-# UNPACK #-} !Int {-# UNPACK #-} !Int
| EmptyRange
deriving (Eq, Ord, Show)
makeRange :: Int -> Int -> Range
makeRange = Range
concatRange :: Range -> Range -> [Range]
concatRange EmptyRange rng = return rng
concatRange rng EmptyRange = return rng
concatRange (Range i j) (Range j' k) = [Range i k | j==j']
minRange :: Range -> Int
minRange (Range i j) = i
maxRange :: Range -> Int
maxRange (Range i j) = j
------------------------------------------------------------
-- * representaions of input tokens
data Input t = MkInput { inputBounds :: (Int, Int),
inputToken :: Assoc t [Range]
}
input :: Ord t => [t] -> Input t
input toks = MkInput inBounds inToken
where
inBounds = (0, length toks)
inToken = accumAssoc id [ (tok, makeRange i j) | (i,j,tok) <- zip3 [0..] [1..] toks ]
inputMany :: Ord t => [[t]] -> Input t
inputMany toks = MkInput inBounds inToken
where
inBounds = (0, length toks)
inToken = accumAssoc id [ (tok, makeRange i j) | (i,j,ts) <- zip3 [0..] [1..] toks, tok <- ts ]
------------------------------------------------------------
-- * representations of syntactical analyses
-- ** charts as finite maps over edges
-- | The values of the chart, a list of key-daughters pairs,
-- has unique keys. In essence, it is a map from 'n' to daughters.
-- The daughters should be a set (not necessarily sorted) of rhs's.
type SyntaxChart n e = Assoc e [SyntaxNode n [e]]
data SyntaxNode n e = SMeta
| SNode n [e]
| SString String
| SInt Integer
| SFloat Double
deriving (Eq,Ord,Show)
groupSyntaxNodes :: Ord n => [SyntaxNode n e] -> [SyntaxNode n [e]]
groupSyntaxNodes [] = []
groupSyntaxNodes (SNode n0 es0:xs) = (SNode n0 (es0:ess)) : groupSyntaxNodes xs'
where
(ess,xs') = span xs
span [] = ([],[])
span xs@(SNode n es:xs')
| n0 == n = let (ess,xs) = span xs' in (es:ess,xs)
| otherwise = ([],xs)
groupSyntaxNodes (SString s:xs) = (SString s) : groupSyntaxNodes xs
groupSyntaxNodes (SInt n:xs) = (SInt n) : groupSyntaxNodes xs
groupSyntaxNodes (SFloat f:xs) = (SFloat f) : groupSyntaxNodes xs
-- ** syntax forests
data SyntaxForest n = FMeta
| FNode n [[SyntaxForest n]]
-- ^ The outer list should be a set (not necessarily sorted)
-- of possible alternatives. Ie. the outer list
-- is a disjunctive node, and the inner lists
-- are (conjunctive) concatenative nodes
| FString String
| FInt Integer
| FFloat Double
deriving (Eq, Ord, Show)
instance Functor SyntaxForest where
fmap f (FNode n forests) = FNode (f n) $ map (map (fmap f)) forests
fmap _ (FString s) = FString s
fmap _ (FInt n) = FInt n
fmap _ (FFloat f) = FFloat f
fmap _ (FMeta) = FMeta
forestName :: SyntaxForest n -> Maybe n
forestName (FNode n _) = Just n
forestName _ = Nothing
unifyManyForests :: (Monad m, Eq n) => [SyntaxForest n] -> m (SyntaxForest n)
unifyManyForests = foldM unifyForests FMeta
-- | two forests can be unified, if either is 'FMeta', or both have the same parent,
-- and all children can be unified
unifyForests :: (Monad m, Eq n) => SyntaxForest n -> SyntaxForest n -> m (SyntaxForest n)
unifyForests FMeta forest = return forest
unifyForests forest FMeta = return forest
unifyForests (FNode name1 children1) (FNode name2 children2)
| name1 == name2 && not (null children) = return $ FNode name1 children
where children = [ forests | forests1 <- children1, forests2 <- children2,
sameLength forests1 forests2,
forests <- zipWithM unifyForests forests1 forests2 ]
unifyForests (FString s1) (FString s2)
| s1 == s2 = return $ FString s1
unifyForests (FInt n1) (FInt n2)
| n1 == n2 = return $ FInt n1
unifyForests (FFloat f1) (FFloat f2)
| f1 == f2 = return $ FFloat f1
unifyForests _ _ = fail "forest unification failure"
-- ** conversions between representations
chart2forests :: (Ord n, Ord e) =>
SyntaxChart n e -- ^ The complete chart
-> (e -> Bool) -- ^ When is an edge 'FMeta'?
-> [e] -- ^ The starting edges
-> [SyntaxForest n] -- ^ The result has unique keys, ie. all 'n' are joined together.
-- In essence, the result is a map from 'n' to forest daughters
chart2forests chart isMeta = concatMap (edge2forests [])
where edge2forests edges edge
| isMeta edge = [FMeta]
| edge `elem` edges = []
| otherwise = map (item2forest (edge:edges)) $ chart ? edge
item2forest edges (SMeta) = FMeta
item2forest edges (SNode name children) =
FNode name $ children >>= mapM (edge2forests edges)
item2forest edges (SString s) = FString s
item2forest edges (SInt n) = FInt n
item2forest edges (SFloat f) = FFloat f
applyProfileToForest :: SyntaxForest (CId,[Profile]) -> [SyntaxForest CId]
applyProfileToForest (FNode (fun,profiles) children)
| fun == wildCId = concat chForests
| otherwise = [ FNode fun chForests | not (null chForests) ]
where chForests = concat [ mapM (unifyManyForests . map (forests !!)) profiles |
forests0 <- children,
forests <- mapM applyProfileToForest forests0 ]
applyProfileToForest (FString s) = [FString s]
applyProfileToForest (FInt n) = [FInt n]
applyProfileToForest (FFloat f) = [FFloat f]
applyProfileToForest (FMeta) = [FMeta]
forest2trees :: SyntaxForest CId -> [Tree]
forest2trees (FNode n forests) = map (Fun n) $ forests >>= mapM forest2trees
forest2trees (FString s) = [Lit (LStr s)]
forest2trees (FInt n) = [Lit (LInt n)]
forest2trees (FFloat f) = [Lit (LFlt f)]
forest2trees (FMeta) = [Meta 0]

View File

@@ -0,0 +1,113 @@
module PGF.ShowLinearize (
collectWords,
tableLinearize,
recordLinearize,
termLinearize,
tabularLinearize,
allLinearize,
markLinearize
) where
import PGF.CId
import PGF.Data
import PGF.Tree
import PGF.Macros
import PGF.Linearize
import GF.Data.Operations
import Data.List
import qualified Data.Map as Map
-- printing linearizations in different ways with source parameters
-- internal representation, only used internally in this module
data Record =
RR [(String,Record)]
| RT [(String,Record)]
| RFV [Record]
| RS String
| RCon String
prRecord :: Record -> String
prRecord = prr where
prr t = case t of
RR fs -> concat $
"{" :
(intersperse ";" (map (\ (l,v) -> unwords [l,"=", prr v]) fs)) ++ ["}"]
RT fs -> concat $
"table {" :
(intersperse ";" (map (\ (l,v) -> unwords [l,"=>",prr v]) fs)) ++ ["}"]
RFV ts -> concat $
"variants {" : (intersperse ";" (map prr ts)) ++ ["}"]
RS s -> prQuotedString s
RCon s -> s
-- uses the encoding of record types in PGF.paramlincat
mkRecord :: Term -> Term -> Record
mkRecord typ trm = case (typ,trm) of
(_, FV ts) -> RFV $ map (mkRecord typ) ts
(R rs, R ts) -> RR [(str lab, mkRecord ty t) | (P lab ty, t) <- zip rs ts]
(S [FV ps,ty],R ts) -> RT [(str par, mkRecord ty t) | (par, t) <- zip ps ts]
(_,W s (R ts)) -> mkRecord typ (R [K (KS (s ++ u)) | K (KS u) <- ts])
(FV ps, C i) -> RCon $ str $ ps !! i
(S [], _) -> case realizes trm of
[s] -> RS s
ss -> RFV $ map RS ss
_ -> RS $ show trm ---- printTree trm
where
str = realize
-- show all branches, without labels and params
allLinearize :: (String -> String) -> PGF -> CId -> Expr -> String
allLinearize unlex pgf lang = concat . map (unlex . pr) . tabularLinearize pgf lang where
pr (p,vs) = unlines vs
-- show all branches, with labels and params
tableLinearize :: (String -> String) -> PGF -> CId -> Expr -> String
tableLinearize unlex pgf lang = unlines . map pr . tabularLinearize pgf lang where
pr (p,vs) = p +++ ":" +++ unwords (intersperse "|" (map unlex vs))
-- create a table from labels+params to variants
tabularLinearize :: PGF -> CId -> Expr -> [(String,[String])]
tabularLinearize pgf lang = branches . recLinearize pgf lang where
branches r = case r of
RR fs -> [(lab +++ b,s) | (lab,t) <- fs, (b,s) <- branches t]
RT fs -> [(lab +++ b,s) | (lab,t) <- fs, (b,s) <- branches t]
RFV rs -> concatMap branches rs
RS s -> [([], [s])]
RCon _ -> []
-- show record in GF-source-like syntax
recordLinearize :: PGF -> CId -> Expr -> String
recordLinearize pgf lang = prRecord . recLinearize pgf lang
-- create a GF-like record, forming the basis of all functions above
recLinearize :: PGF -> CId -> Expr -> Record
recLinearize pgf lang tree = mkRecord typ $ linTree pgf lang tree where
typ = case expr2tree tree of
Fun f _ -> lookParamLincat pgf lang $ valCat $ lookType pgf f
-- show PGF term
termLinearize :: PGF -> CId -> Expr -> String
termLinearize pgf lang = show . linTree pgf lang
-- show bracketed markup with references to tree structure
markLinearize :: PGF -> CId -> Expr -> String
markLinearize pgf lang = concat . take 1 . linearizesMark pgf lang
-- for Morphology: word, lemma, tags
collectWords :: PGF -> Language -> [(String, [(CId,String)])]
collectWords pgf lang =
concatMap collOne
[(f,c,0) | (f,(DTyp [] c _,_,_)) <- Map.toList $ funs $ abstract pgf]
where
collOne (f,c,i) =
fromRec f [showCId c] (recLinearize pgf lang (foldl EApp (EFun f) (replicate i (EMeta 888))))
fromRec f v r = case r of
RR rs -> concat [fromRec f v t | (_,t) <- rs]
RT rs -> concat [fromRec f (p:v) t | (p,t) <- rs]
RFV rs -> concatMap (fromRec f v) rs
RS s -> [(s,[(f,unwords (reverse v))])]
RCon c -> [] ---- inherent

View File

@@ -0,0 +1,71 @@
module PGF.Tree
( Tree(..),
tree2expr, expr2tree,
prTree
) where
import PGF.CId
import PGF.Expr hiding (Tree)
import Data.Char
import Data.List as List
import Control.Monad
import qualified Text.PrettyPrint as PP
import qualified Text.ParserCombinators.ReadP as RP
-- | The tree is an evaluated expression in the abstract syntax
-- of the grammar. The type is especially restricted to not
-- allow unapplied lambda abstractions. The tree is used directly
-- from the linearizer and is produced directly from the parser.
data Tree =
Abs [(BindType,CId)] Tree -- ^ lambda abstraction. The list of variables is non-empty
| Var CId -- ^ variable
| Fun CId [Tree] -- ^ function application
| Lit Literal -- ^ literal
| Meta {-# UNPACK #-} !MetaId -- ^ meta variable
deriving (Eq, Ord)
-----------------------------------------------------
-- Conversion Expr <-> Tree
-----------------------------------------------------
-- | Converts a tree to expression. The conversion
-- is always total, every tree is a valid expression.
tree2expr :: Tree -> Expr
tree2expr = tree2expr []
where
tree2expr ys (Fun x ts) = foldl EApp (EFun x) (List.map (tree2expr ys) ts)
tree2expr ys (Lit l) = ELit l
tree2expr ys (Meta n) = EMeta n
tree2expr ys (Abs xs t) = foldr (\(b,x) e -> EAbs b x e) (tree2expr (List.map snd (reverse xs)++ys) t) xs
tree2expr ys (Var x) = case List.lookup x (zip ys [0..]) of
Just i -> EVar i
Nothing -> error "unknown variable"
-- | Converts an expression to tree. The conversion is only partial.
-- Variables and meta variables of function type and beta redexes are not allowed.
expr2tree :: Expr -> Tree
expr2tree e = abs [] [] e
where
abs ys xs (EAbs b x e) = abs ys ((b,x):xs) e
abs ys xs (ETyped e _) = abs ys xs e
abs ys xs e = case xs of
[] -> app ys [] e
xs -> Abs (reverse xs) (app (map snd xs++ys) [] e)
app xs as (EApp e1 e2) = app xs ((abs xs [] e2) : as) e1
app xs as (ELit l)
| List.null as = Lit l
| otherwise = error "literal of function type encountered"
app xs as (EMeta n)
| List.null as = Meta n
| otherwise = error "meta variables of function type are not allowed in trees"
app xs as (EAbs _ x e) = error "beta redexes are not allowed in trees"
app xs as (EVar i) = Var (xs !! i)
app xs as (EFun f) = Fun f as
app xs as (ETyped e _) = app xs as e
prTree :: Tree -> String
prTree = showExpr [] . tree2expr

View File

@@ -0,0 +1,103 @@
module PGF.Type ( Type(..), Hypo,
readType, showType,
mkType, mkHypo, mkDepHypo, mkImplHypo,
pType, ppType, ppHypo ) where
import PGF.CId
import {-# SOURCE #-} PGF.Expr
import Data.Char
import Data.List
import qualified Text.PrettyPrint as PP
import qualified Text.ParserCombinators.ReadP as RP
import Control.Monad
-- | To read a type from a 'String', use 'readType'.
data Type =
DTyp [Hypo] CId [Expr]
deriving (Eq,Ord,Show)
-- | 'Hypo' represents a hypothesis in a type i.e. in the type A -> B, A is the hypothesis
type Hypo = (BindType,CId,Type)
-- | Reads a 'Type' from a 'String'.
readType :: String -> Maybe Type
readType s = case [x | (x,cs) <- RP.readP_to_S pType s, all isSpace cs] of
[x] -> Just x
_ -> Nothing
-- | renders type as 'String'. The list
-- of identifiers is the list of all free variables
-- in the expression in order reverse to the order
-- of binding.
showType :: [CId] -> Type -> String
showType vars = PP.render . ppType 0 vars
-- | creates a type from list of hypothesises, category and
-- list of arguments for the category. The operation
-- @mkType [h_1,...,h_n] C [e_1,...,e_m]@ will create
-- @h_1 -> ... -> h_n -> C e_1 ... e_m@
mkType :: [Hypo] -> CId -> [Expr] -> Type
mkType hyps cat args = DTyp hyps cat args
-- | creates hypothesis for non-dependent type i.e. A
mkHypo :: Type -> Hypo
mkHypo ty = (Explicit,wildCId,ty)
-- | creates hypothesis for dependent type i.e. (x : A)
mkDepHypo :: CId -> Type -> Hypo
mkDepHypo x ty = (Explicit,x,ty)
-- | creates hypothesis for dependent type with implicit argument i.e. ({x} : A)
mkImplHypo :: CId -> Type -> Hypo
mkImplHypo x ty = (Implicit,x,ty)
pType :: RP.ReadP Type
pType = do
RP.skipSpaces
hyps <- RP.sepBy (pHypo >>= \h -> RP.skipSpaces >> RP.string "->" >> return h) RP.skipSpaces
RP.skipSpaces
(cat,args) <- pAtom
return (DTyp (concat hyps) cat args)
where
pHypo =
do (cat,args) <- pAtom
return [(Explicit,wildCId,DTyp [] cat args)]
RP.<++
(RP.between (RP.char '(') (RP.char ')') $ do
xs <- RP.option [(Explicit,wildCId)] $ do
xs <- pBinds
RP.skipSpaces
RP.char ':'
return xs
ty <- pType
return [(b,v,ty) | (b,v) <- xs])
RP.<++
(RP.between (RP.char '{') (RP.char '}') $ do
vs <- RP.sepBy1 (RP.skipSpaces >> pCId) (RP.skipSpaces >> RP.char ',')
RP.skipSpaces
RP.char ':'
ty <- pType
return [(Implicit,v,ty) | v <- vs])
pAtom = do
cat <- pCId
RP.skipSpaces
args <- RP.sepBy pArg RP.skipSpaces
return (cat, args)
ppType :: Int -> [CId] -> Type -> PP.Doc
ppType d scope (DTyp hyps cat args)
| null hyps = ppRes scope cat args
| otherwise = let (scope',hdocs) = mapAccumL ppHypo scope hyps
in ppParens (d > 0) (foldr (\hdoc doc -> hdoc PP.<+> PP.text "->" PP.<+> doc) (ppRes scope' cat args) hdocs)
where
ppRes scope cat es = ppCId cat PP.<+> PP.hsep (map (ppExpr 4 scope) es)
ppHypo scope (Explicit,x,typ) = if x == wildCId
then (scope,ppType 1 scope typ)
else let y = freshName x scope
in (y:scope,PP.parens (ppCId y PP.<+> PP.char ':' PP.<+> ppType 0 scope typ))
ppHypo scope (Implicit,x,typ) = if x == wildCId
then (scope,PP.parens (PP.braces (ppCId x) PP.<+> PP.char ':' PP.<+> ppType 0 scope typ))
else let y = freshName x scope
in (y:scope,PP.parens (PP.braces (ppCId y) PP.<+> PP.char ':' PP.<+> ppType 0 scope typ))

View File

@@ -0,0 +1,524 @@
----------------------------------------------------------------------
-- |
-- Module : PGF.TypeCheck
-- Maintainer : Krasimir Angelov
-- Stability : (stable)
-- Portability : (portable)
--
-- Type checking in abstract syntax with dependent types.
-- The type checker also performs renaming and checking for unknown
-- functions. The variable references are replaced by de Bruijn indices.
--
-----------------------------------------------------------------------------
module PGF.TypeCheck (checkType, checkExpr, inferExpr,
ppTcError, TcError(..)
) where
import PGF.Data
import PGF.Expr
import PGF.Macros (typeOfHypo)
import PGF.CId
import Data.Map as Map
import Data.IntMap as IntMap
import Data.Maybe as Maybe
import Data.List as List
import Control.Monad
import Text.PrettyPrint
-----------------------------------------------------
-- The Scope
-----------------------------------------------------
data TType = TTyp Env Type
newtype Scope = Scope [(CId,TType)]
emptyScope = Scope []
addScopedVar :: CId -> TType -> Scope -> Scope
addScopedVar x tty (Scope gamma) = Scope ((x,tty):gamma)
-- | returns the type and the De Bruijn index of a local variable
lookupVar :: CId -> Scope -> Maybe (Int,TType)
lookupVar x (Scope gamma) = listToMaybe [(i,tty) | ((y,tty),i) <- zip gamma [0..], x == y]
-- | returns the type and the name of a local variable
getVar :: Int -> Scope -> (CId,TType)
getVar i (Scope gamma) = gamma !! i
scopeEnv :: Scope -> Env
scopeEnv (Scope gamma) = let n = length gamma
in [VGen (n-i-1) [] | i <- [0..n-1]]
scopeVars :: Scope -> [CId]
scopeVars (Scope gamma) = List.map fst gamma
scopeSize :: Scope -> Int
scopeSize (Scope gamma) = length gamma
-----------------------------------------------------
-- The Monad
-----------------------------------------------------
type MetaStore = IntMap MetaValue
data MetaValue
= MUnbound Scope [Expr -> TcM ()]
| MBound Expr
| MGuarded Expr [Expr -> TcM ()] {-# UNPACK #-} !Int -- the Int is the number of constraints that have to be solved
-- to unlock this meta variable
newtype TcM a = TcM {unTcM :: Abstr -> MetaId -> MetaStore -> TcResult a}
data TcResult a
= Ok {-# UNPACK #-} !MetaId MetaStore a
| Fail TcError
instance Monad TcM where
return x = TcM (\abstr metaid ms -> Ok metaid ms x)
f >>= g = TcM (\abstr metaid ms -> case unTcM f abstr metaid ms of
Ok metaid ms x -> unTcM (g x) abstr metaid ms
Fail e -> Fail e)
instance Functor TcM where
fmap f x = TcM (\abstr metaid ms -> case unTcM x abstr metaid ms of
Ok metaid ms x -> Ok metaid ms (f x)
Fail e -> Fail e)
lookupCatHyps :: CId -> TcM [Hypo]
lookupCatHyps cat = TcM (\abstr metaid ms -> case Map.lookup cat (cats abstr) of
Just hyps -> Ok metaid ms hyps
Nothing -> Fail (UnknownCat cat))
lookupFunType :: CId -> TcM TType
lookupFunType fun = TcM (\abstr metaid ms -> case Map.lookup fun (funs abstr) of
Just (ty,_,_) -> Ok metaid ms (TTyp [] ty)
Nothing -> Fail (UnknownFun fun))
newMeta :: Scope -> TcM MetaId
newMeta scope = TcM (\abstr metaid ms -> Ok (metaid+1) (IntMap.insert metaid (MUnbound scope []) ms) metaid)
newGuardedMeta :: Scope -> Expr -> TcM MetaId
newGuardedMeta scope e = getFuns >>= \funs -> TcM (\abstr metaid ms -> Ok (metaid+1) (IntMap.insert metaid (MGuarded e [] 0) ms) metaid)
getMeta :: MetaId -> TcM MetaValue
getMeta i = TcM (\abstr metaid ms -> Ok metaid ms $! case IntMap.lookup i ms of
Just mv -> mv)
setMeta :: MetaId -> MetaValue -> TcM ()
setMeta i mv = TcM (\abstr metaid ms -> Ok metaid (IntMap.insert i mv ms) ())
tcError :: TcError -> TcM a
tcError e = TcM (\abstr metaid ms -> Fail e)
getFuns :: TcM Funs
getFuns = TcM (\abstr metaid ms -> Ok metaid ms (funs abstr))
addConstraint :: MetaId -> MetaId -> Env -> [Value] -> (Value -> TcM ()) -> TcM ()
addConstraint i j env vs c = do
funs <- getFuns
mv <- getMeta j
case mv of
MUnbound scope cs -> addRef >> setMeta j (MUnbound scope ((\e -> release >> c (apply funs env e vs)) : cs))
MBound e -> c (apply funs env e vs)
MGuarded e cs x | x == 0 -> c (apply funs env e vs)
| otherwise -> addRef >> setMeta j (MGuarded e ((\e -> release >> c (apply funs env e vs)) : cs) x)
where
addRef = TcM (\abstr metaid ms -> case IntMap.lookup i ms of
Just (MGuarded e cs x) -> Ok metaid (IntMap.insert i (MGuarded e cs (x+1)) ms) ())
release = TcM (\abstr metaid ms -> case IntMap.lookup i ms of
Just (MGuarded e cs x) -> if x == 1
then unTcM (sequence_ [c e | c <- cs]) abstr metaid (IntMap.insert i (MGuarded e [] 0) ms)
else Ok metaid (IntMap.insert i (MGuarded e cs (x-1)) ms) ())
-----------------------------------------------------
-- Type errors
-----------------------------------------------------
-- | If an error occurs in the typechecking phase
-- the type checker returns not a plain text error message
-- but a 'TcError' structure which describes the error.
data TcError
= UnknownCat CId -- ^ Unknown category name was found.
| UnknownFun CId -- ^ Unknown function name was found.
| WrongCatArgs [CId] Type CId Int Int -- ^ A category was applied to wrong number of arguments.
-- The first integer is the number of expected arguments and
-- the second the number of given arguments.
-- The @[CId]@ argument is the list of free variables
-- in the type. It should be used for the 'showType' function.
| TypeMismatch [CId] Expr Type Type -- ^ The expression is not of the expected type.
-- The first type is the expected type, while
-- the second is the inferred. The @[CId]@ argument is the list
-- of free variables in both the expression and the type.
-- It should be used for the 'showType' and 'showExpr' functions.
| NotFunType [CId] Expr Type -- ^ Something that is not of function type was applied to an argument.
| CannotInferType [CId] Expr -- ^ It is not possible to infer the type of an expression.
| UnresolvedMetaVars [CId] Expr [MetaId] -- ^ Some metavariables have to be instantiated in order to complete the typechecking.
| UnexpectedImplArg [CId] Expr -- ^ Implicit argument was passed where the type doesn't allow it
-- | Renders the type checking error to a document. See 'Text.PrettyPrint'.
ppTcError :: TcError -> Doc
ppTcError (UnknownCat cat) = text "Category" <+> ppCId cat <+> text "is not in scope"
ppTcError (UnknownFun fun) = text "Function" <+> ppCId fun <+> text "is not in scope"
ppTcError (WrongCatArgs xs ty cat m n) = text "Category" <+> ppCId cat <+> text "should have" <+> int m <+> text "argument(s), but has been given" <+> int n $$
text "In the type:" <+> ppType 0 xs ty
ppTcError (TypeMismatch xs e ty1 ty2) = text "Couldn't match expected type" <+> ppType 0 xs ty1 $$
text " against inferred type" <+> ppType 0 xs ty2 $$
text "In the expression:" <+> ppExpr 0 xs e
ppTcError (NotFunType xs e ty) = text "A function type is expected for the expression" <+> ppExpr 0 xs e <+> text "instead of type" <+> ppType 0 xs ty
ppTcError (CannotInferType xs e) = text "Cannot infer the type of expression" <+> ppExpr 0 xs e
ppTcError (UnresolvedMetaVars xs e ms) = text "Meta variable(s)" <+> fsep (List.map ppMeta ms) <+> text "should be resolved" $$
text "in the expression:" <+> ppExpr 0 xs e
ppTcError (UnexpectedImplArg xs e) = braces (ppExpr 0 xs e) <+> text "is implicit argument but not implicit argument is expected here"
-----------------------------------------------------
-- checkType
-----------------------------------------------------
-- | Check whether a given type is consistent with the abstract
-- syntax of the grammar.
checkType :: PGF -> Type -> Either TcError Type
checkType pgf ty =
case unTcM (tcType emptyScope ty >>= refineType) (abstract pgf) 0 IntMap.empty of
Ok _ ms ty -> Right ty
Fail err -> Left err
tcType :: Scope -> Type -> TcM Type
tcType scope ty@(DTyp hyps cat es) = do
(scope,hyps) <- tcHypos scope hyps
c_hyps <- lookupCatHyps cat
let m = length es
n = length [ty | (Explicit,x,ty) <- c_hyps]
(delta,es) <- tcCatArgs scope es [] c_hyps ty n m
return (DTyp hyps cat es)
tcHypos :: Scope -> [Hypo] -> TcM (Scope,[Hypo])
tcHypos scope [] = return (scope,[])
tcHypos scope (h:hs) = do
(scope,h ) <- tcHypo scope h
(scope,hs) <- tcHypos scope hs
return (scope,h:hs)
tcHypo :: Scope -> Hypo -> TcM (Scope,Hypo)
tcHypo scope (b,x,ty) = do
ty <- tcType scope ty
if x == wildCId
then return (scope,(b,x,ty))
else return (addScopedVar x (TTyp (scopeEnv scope) ty) scope,(b,x,ty))
tcCatArgs scope [] delta [] ty0 n m = return (delta,[])
tcCatArgs scope (EImplArg e:es) delta ((Explicit,x,ty):hs) ty0 n m = tcError (UnexpectedImplArg (scopeVars scope) e)
tcCatArgs scope (EImplArg e:es) delta ((Implicit,x,ty):hs) ty0 n m = do
e <- tcExpr scope e (TTyp delta ty)
funs <- getFuns
(delta,es) <- if x == wildCId
then tcCatArgs scope es delta hs ty0 n m
else tcCatArgs scope es (eval funs (scopeEnv scope) e:delta) hs ty0 n m
return (delta,EImplArg e:es)
tcCatArgs scope es delta ((Implicit,x,ty):hs) ty0 n m = do
i <- newMeta scope
(delta,es) <- if x == wildCId
then tcCatArgs scope es delta hs ty0 n m
else tcCatArgs scope es (VMeta i (scopeEnv scope) [] : delta) hs ty0 n m
return (delta,EImplArg (EMeta i) : es)
tcCatArgs scope (e:es) delta ((Explicit,x,ty):hs) ty0 n m = do
e <- tcExpr scope e (TTyp delta ty)
funs <- getFuns
(delta,es) <- if x == wildCId
then tcCatArgs scope es delta hs ty0 n m
else tcCatArgs scope es (eval funs (scopeEnv scope) e:delta) hs ty0 n m
return (delta,e:es)
tcCatArgs scope _ delta _ ty0@(DTyp _ cat _) n m = do
tcError (WrongCatArgs (scopeVars scope) ty0 cat n m)
-----------------------------------------------------
-- checkExpr
-----------------------------------------------------
-- | Checks an expression against a specified type.
checkExpr :: PGF -> Expr -> Type -> Either TcError Expr
checkExpr pgf e ty =
case unTcM (do e <- tcExpr emptyScope e (TTyp [] ty)
e <- refineExpr e
checkResolvedMetaStore emptyScope e
return e) (abstract pgf) 0 IntMap.empty of
Ok _ ms e -> Right e
Fail err -> Left err
tcExpr :: Scope -> Expr -> TType -> TcM Expr
tcExpr scope e0@(EAbs Implicit x e) tty =
case tty of
TTyp delta (DTyp ((Implicit,y,ty):hs) c es) -> do e <- if y == wildCId
then tcExpr (addScopedVar x (TTyp delta ty) scope)
e (TTyp delta (DTyp hs c es))
else tcExpr (addScopedVar x (TTyp delta ty) scope)
e (TTyp ((VGen (scopeSize scope) []):delta) (DTyp hs c es))
return (EAbs Implicit x e)
_ -> do ty <- evalType (scopeSize scope) tty
tcError (NotFunType (scopeVars scope) e0 ty)
tcExpr scope e0 (TTyp delta (DTyp ((Implicit,y,ty):hs) c es)) = do
e0 <- if y == wildCId
then tcExpr (addScopedVar wildCId (TTyp delta ty) scope)
e0 (TTyp delta (DTyp hs c es))
else tcExpr (addScopedVar wildCId (TTyp delta ty) scope)
e0 (TTyp ((VGen (scopeSize scope) []):delta) (DTyp hs c es))
return (EAbs Implicit wildCId e0)
tcExpr scope e0@(EAbs Explicit x e) tty =
case tty of
TTyp delta (DTyp ((Explicit,y,ty):hs) c es) -> do e <- if y == wildCId
then tcExpr (addScopedVar x (TTyp delta ty) scope)
e (TTyp delta (DTyp hs c es))
else tcExpr (addScopedVar x (TTyp delta ty) scope)
e (TTyp ((VGen (scopeSize scope) []):delta) (DTyp hs c es))
return (EAbs Explicit x e)
_ -> do ty <- evalType (scopeSize scope) tty
tcError (NotFunType (scopeVars scope) e0 ty)
tcExpr scope (EMeta _) tty = do
i <- newMeta scope
return (EMeta i)
tcExpr scope e0 tty = do
(e0,tty0) <- infExpr scope e0
i <- newGuardedMeta scope e0
eqType scope (scopeSize scope) i tty tty0
return (EMeta i)
-----------------------------------------------------
-- inferExpr
-----------------------------------------------------
-- | Tries to infer the type of a given expression. Note that
-- even if the expression is type correct it is not always
-- possible to infer its type in the GF type system.
-- In this case the function returns the 'CannotInferType' error.
inferExpr :: PGF -> Expr -> Either TcError (Expr,Type)
inferExpr pgf e =
case unTcM (do (e,tty) <- infExpr emptyScope e
e <- refineExpr e
checkResolvedMetaStore emptyScope e
ty <- evalType 0 tty
return (e,ty)) (abstract pgf) 1 IntMap.empty of
Ok _ ms (e,ty) -> Right (e,ty)
Fail err -> Left err
infExpr :: Scope -> Expr -> TcM (Expr,TType)
infExpr scope e0@(EApp e1 e2) = do
(e1,TTyp delta ty) <- infExpr scope e1
(e0,delta,ty) <- tcArg scope e1 e2 delta ty
return (e0,TTyp delta ty)
infExpr scope e0@(EFun x) = do
case lookupVar x scope of
Just (i,tty) -> return (EVar i,tty)
Nothing -> do tty <- lookupFunType x
return (e0,tty)
infExpr scope e0@(EVar i) = do
return (e0,snd (getVar i scope))
infExpr scope e0@(ELit l) = do
let cat = case l of
LStr _ -> mkCId "String"
LInt _ -> mkCId "Int"
LFlt _ -> mkCId "Float"
return (e0,TTyp [] (DTyp [] cat []))
infExpr scope (ETyped e ty) = do
ty <- tcType scope ty
e <- tcExpr scope e (TTyp (scopeEnv scope) ty)
return (ETyped e ty,TTyp (scopeEnv scope) ty)
infExpr scope (EImplArg e) = do
(e,tty) <- infExpr scope e
return (EImplArg e,tty)
infExpr scope e = tcError (CannotInferType (scopeVars scope) e)
tcArg scope e1 e2 delta ty0@(DTyp [] c es) = do
ty1 <- evalType (scopeSize scope) (TTyp delta ty0)
tcError (NotFunType (scopeVars scope) e1 ty1)
tcArg scope e1 (EImplArg e2) delta ty0@(DTyp ((Explicit,x,ty):hs) c es) = tcError (UnexpectedImplArg (scopeVars scope) e2)
tcArg scope e1 (EImplArg e2) delta ty0@(DTyp ((Implicit,x,ty):hs) c es) = do
e2 <- tcExpr scope e2 (TTyp delta ty)
funs <- getFuns
if x == wildCId
then return (EApp e1 (EImplArg e2), delta,DTyp hs c es)
else return (EApp e1 (EImplArg e2),eval funs (scopeEnv scope) e2:delta,DTyp hs c es)
tcArg scope e1 e2 delta ty0@(DTyp ((Explicit,x,ty):hs) c es) = do
e2 <- tcExpr scope e2 (TTyp delta ty)
funs <- getFuns
if x == wildCId
then return (EApp e1 e2, delta,DTyp hs c es)
else return (EApp e1 e2,eval funs (scopeEnv scope) e2:delta,DTyp hs c es)
tcArg scope e1 e2 delta ty0@(DTyp ((Implicit,x,ty):hs) c es) = do
i <- newMeta scope
if x == wildCId
then tcArg scope (EApp e1 (EImplArg (EMeta i))) e2 delta (DTyp hs c es)
else tcArg scope (EApp e1 (EImplArg (EMeta i))) e2 (VMeta i (scopeEnv scope) [] : delta) (DTyp hs c es)
-----------------------------------------------------
-- eqType
-----------------------------------------------------
eqType :: Scope -> Int -> MetaId -> TType -> TType -> TcM ()
eqType scope k i0 tty1@(TTyp delta1 ty1@(DTyp hyps1 cat1 es1)) tty2@(TTyp delta2 ty2@(DTyp hyps2 cat2 es2))
| cat1 == cat2 = do (k,delta1,delta2) <- eqHyps k delta1 hyps1 delta2 hyps2
sequence_ [eqExpr k delta1 e1 delta2 e2 | (e1,e2) <- zip es1 es2]
| otherwise = raiseTypeMatchError
where
raiseTypeMatchError = do ty1 <- evalType k tty1
ty2 <- evalType k tty2
e <- refineExpr (EMeta i0)
tcError (TypeMismatch (scopeVars scope) e ty1 ty2)
eqHyps :: Int -> Env -> [Hypo] -> Env -> [Hypo] -> TcM (Int,Env,Env)
eqHyps k delta1 [] delta2 [] =
return (k,delta1,delta2)
eqHyps k delta1 ((_,x,ty1) : h1s) delta2 ((_,y,ty2) : h2s) = do
eqType scope k i0 (TTyp delta1 ty1) (TTyp delta2 ty2)
if x == wildCId && y == wildCId
then eqHyps k delta1 h1s delta2 h2s
else if x /= wildCId && y /= wildCId
then eqHyps (k+1) ((VGen k []):delta1) h1s ((VGen k []):delta2) h2s
else raiseTypeMatchError
eqHyps k delta1 h1s delta2 h2s = raiseTypeMatchError
eqExpr :: Int -> Env -> Expr -> Env -> Expr -> TcM ()
eqExpr k env1 e1 env2 e2 = do
funs <- getFuns
eqValue k (eval funs env1 e1) (eval funs env2 e2)
eqValue :: Int -> Value -> Value -> TcM ()
eqValue k v1 v2 = do
v1 <- deRef v1
v2 <- deRef v2
eqValue' k v1 v2
deRef v@(VMeta i env vs) = do
mv <- getMeta i
funs <- getFuns
case mv of
MBound e -> deRef (apply funs env e vs)
MGuarded e _ x | x == 0 -> deRef (apply funs env e vs)
| otherwise -> return v
MUnbound _ _ -> return v
deRef v = return v
eqValue' k (VSusp i env vs1 c) v2 = addConstraint i0 i env vs1 (\v1 -> eqValue k (c v1) v2)
eqValue' k v1 (VSusp i env vs2 c) = addConstraint i0 i env vs2 (\v2 -> eqValue k v1 (c v2))
eqValue' k (VMeta i env1 vs1) (VMeta j env2 vs2) | i == j = zipWithM_ (eqValue k) vs1 vs2
eqValue' k (VMeta i env1 vs1) v2 = do (MUnbound scopei cs) <- getMeta i
e2 <- mkLam i scopei env1 vs1 v2
setMeta i (MBound e2)
sequence_ [c e2 | c <- cs]
eqValue' k v1 (VMeta i env2 vs2) = do (MUnbound scopei cs) <- getMeta i
e1 <- mkLam i scopei env2 vs2 v1
setMeta i (MBound e1)
sequence_ [c e1 | c <- cs]
eqValue' k (VApp f1 vs1) (VApp f2 vs2) | f1 == f2 = zipWithM_ (eqValue k) vs1 vs2
eqValue' k (VLit l1) (VLit l2 ) | l1 == l2 = return ()
eqValue' k (VGen i vs1) (VGen j vs2) | i == j = zipWithM_ (eqValue k) vs1 vs2
eqValue' k (VClosure env1 (EAbs _ x1 e1)) (VClosure env2 (EAbs _ x2 e2)) = let v = VGen k []
in eqExpr (k+1) (v:env1) e1 (v:env2) e2
eqValue' k v1 v2 = raiseTypeMatchError
mkLam i scope env vs0 v = do
let k = scopeSize scope
vs = reverse (take k env) ++ vs0
xs = nub [i | VGen i [] <- vs]
if length vs == length xs
then return ()
else raiseTypeMatchError
v <- occurCheck i k xs v
funs <- getFuns
return (addLam vs0 (value2expr funs (length xs) v))
where
addLam [] e = e
addLam (v:vs) e = EAbs Explicit var (addLam vs e)
var = mkCId "v"
occurCheck i0 k xs (VApp f vs) = do vs <- mapM (occurCheck i0 k xs) vs
return (VApp f vs)
occurCheck i0 k xs (VLit l) = return (VLit l)
occurCheck i0 k xs (VMeta i env vs) = do if i == i0
then raiseTypeMatchError
else return ()
mv <- getMeta i
funs <- getFuns
case mv of
MBound e -> occurCheck i0 k xs (apply funs env e vs)
MGuarded e _ _ -> occurCheck i0 k xs (apply funs env e vs)
MUnbound scopei _ | scopeSize scopei > k -> raiseTypeMatchError
| otherwise -> do vs <- mapM (occurCheck i0 k xs) vs
return (VMeta i env vs)
occurCheck i0 k xs (VSusp i env vs cnt) = do addConstraint i0 i env vs (\v -> occurCheck i0 k xs (cnt v) >> return ())
return (VSusp i env vs cnt)
occurCheck i0 k xs (VGen i vs) = case List.findIndex (==i) xs of
Just i -> do vs <- mapM (occurCheck i0 k xs) vs
return (VGen i vs)
Nothing -> raiseTypeMatchError
occurCheck i0 k xs (VClosure env e) = do env <- mapM (occurCheck i0 k xs) env
return (VClosure env e)
-----------------------------------------------------------
-- check for meta variables that still have to be resolved
-----------------------------------------------------------
checkResolvedMetaStore :: Scope -> Expr -> TcM ()
checkResolvedMetaStore scope e = TcM (\abstr metaid ms ->
let xs = [i | (i,mv) <- IntMap.toList ms, not (isResolved mv)]
in if List.null xs
then Ok metaid ms ()
else Fail (UnresolvedMetaVars (scopeVars scope) e xs))
where
isResolved (MUnbound _ []) = True
isResolved (MGuarded _ _ _) = True
isResolved (MBound _) = True
isResolved _ = False
-----------------------------------------------------
-- evalType
-----------------------------------------------------
evalType :: Int -> TType -> TcM Type
evalType k (TTyp delta ty) = do funs <- getFuns
refineType (evalTy funs k delta ty)
where
evalTy sig k delta (DTyp hyps cat es) =
let ((k1,delta1),hyps1) = mapAccumL (evalHypo sig) (k,delta) hyps
in DTyp hyps1 cat (List.map (normalForm sig k1 delta1) es)
evalHypo sig (k,delta) (b,x,ty) =
if x == wildCId
then ((k, delta),(b,x,evalTy sig k delta ty))
else ((k+1,(VGen k []):delta),(b,x,evalTy sig k delta ty))
-----------------------------------------------------
-- refinement
-----------------------------------------------------
refineExpr :: Expr -> TcM Expr
refineExpr e = TcM (\abstr metaid ms -> Ok metaid ms (refineExpr_ ms e))
refineExpr_ ms e = refine e
where
refine (EAbs b x e) = EAbs b x (refine e)
refine (EApp e1 e2) = EApp (refine e1) (refine e2)
refine (ELit l) = ELit l
refine (EMeta i) = case IntMap.lookup i ms of
Just (MBound e ) -> refine e
Just (MGuarded e _ _) -> refine e
_ -> EMeta i
refine (EFun f) = EFun f
refine (EVar i) = EVar i
refine (ETyped e ty) = ETyped (refine e) (refineType_ ms ty)
refine (EImplArg e) = EImplArg (refine e)
refineType :: Type -> TcM Type
refineType ty = TcM (\abstr metaid ms -> Ok metaid ms (refineType_ ms ty))
refineType_ ms (DTyp hyps cat es) = DTyp [(b,x,refineType_ ms ty) | (b,x,ty) <- hyps] cat (List.map (refineExpr_ ms) es)
value2expr sig i (VApp f vs) = foldl EApp (EFun f) (List.map (value2expr sig i) vs)
value2expr sig i (VGen j vs) = foldl EApp (EVar (i-j-1)) (List.map (value2expr sig i) vs)
value2expr sig i (VMeta j env vs) = foldl EApp (EMeta j) (List.map (value2expr sig i) vs)
value2expr sig i (VSusp j env vs k) = value2expr sig i (k (VGen j vs))
value2expr sig i (VLit l) = ELit l
value2expr sig i (VClosure env (EAbs b x e)) = EAbs b x (value2expr sig (i+1) (eval sig ((VGen i []):env) e))

View File

@@ -0,0 +1,353 @@
----------------------------------------------------------------------
-- |
-- Module : VisualizeTree
-- Maintainer : AR
-- Stability : (stable)
-- Portability : (portable)
--
-- > CVS $Date:
-- > CVS $Author:
-- > CVS $Revision:
--
-- Print a graph of an abstract syntax tree in Graphviz DOT format
-- Based on BB's VisualizeGrammar
-- FIXME: change this to use GF.Visualization.Graphviz,
-- instead of rolling its own.
-----------------------------------------------------------------------------
module PGF.VisualizeTree ( graphvizAbstractTree
, graphvizParseTree
, graphvizDependencyTree
, graphvizAlignment
, tree2mk
, getDepLabels
, PosText(..), readPosText
) where
import PGF.CId (CId,showCId,pCId,mkCId)
import PGF.Data
import PGF.Tree
import PGF.Expr (showExpr)
import PGF.Linearize
import PGF.Macros (lookValCat)
import qualified Data.Map as Map
import Data.List (intersperse,nub,isPrefixOf,sort,sortBy)
import Data.Char (isDigit)
import qualified Text.ParserCombinators.ReadP as RP
import Debug.Trace
graphvizAbstractTree :: PGF -> (Bool,Bool) -> Expr -> String
graphvizAbstractTree pgf funscats = prGraph False . tree2graph pgf funscats . expr2tree
tree2graph :: PGF -> (Bool,Bool) -> Tree -> [String]
tree2graph pgf (funs,cats) = prf [] where
prf ps t = let (nod,lab) = prn ps t in
(nod ++ " [label = " ++ lab ++ ", style = \"solid\", shape = \"plaintext\"] ;") :
case t of
Fun cid trees ->
[ pra (j:ps) nod t | (j,t) <- zip [0..] trees] ++
concat [prf (j:ps) t | (j,t) <- zip [0..] trees]
Abs xs (Fun cid trees) ->
[ pra (j:ps) nod t | (j,t) <- zip [0..] trees] ++
concat [prf (j:ps) t | (j,t) <- zip [0..] trees]
_ -> []
prn ps t = case t of
Fun cid _ ->
let
fun = if funs then showCId cid else ""
cat = if cats then prCat cid else ""
colon = if funs && cats then " : " else ""
lab = "\"" ++ fun ++ colon ++ cat ++ "\""
in (show(show (ps :: [Int])),lab)
Abs bs tree ->
let fun = case tree of
Fun cid _ -> Fun cid []
_ -> tree
in (show(show (ps :: [Int])),"\"" ++ esc (prTree (Abs bs fun)) ++ "\"")
_ -> (show(show (ps :: [Int])),"\"" ++ esc (prTree t) ++ "\"")
pra i nod t = nod ++ arr ++ fst (prn i t) ++ " [style = \"solid\"];"
arr = " -- " -- if digr then " -> " else " -- "
prCat = showCId . lookValCat pgf
esc = concatMap (\c -> if c =='\\' then [c,c] else [c]) --- escape backslash in abstracts
prGraph digr ns = concat $ map (++"\n") $ [graph ++ "{\n"] ++ ns ++ ["}"] where
graph = if digr then "digraph" else "graph"
-- replace each non-atomic constructor with mkC, where C is the val cat
tree2mk :: PGF -> Expr -> String
tree2mk pgf = showExpr [] . tree2expr . t2m . expr2tree where
t2m t = case t of
Fun cid [] -> t
Fun cid ts -> Fun (mk cid) (map t2m ts)
_ -> t
mk = mkCId . ("mk" ++) . showCId . lookValCat pgf
-- dependency trees from Linearize.linearizeMark
graphvizDependencyTree :: String -> Bool -> Maybe Labels -> Maybe String -> PGF -> CId -> Expr -> String
graphvizDependencyTree format debug mlab ms pgf lang exp = case format of
"malt" -> unlines (lin2dep format)
"malt_input" -> unlines (lin2dep format)
_ -> prGraph True (lin2dep format)
where
lin2dep format = trace (ifd (show sortedNodes ++ show nodeWords)) $ case format of
"malt" -> map (concat . intersperse "\t") wnodes
"malt_input" -> map (concat . intersperse "\t" . take 6) wnodes
_ -> prelude ++ nodes ++ links
ifd s = if debug then s else []
pot = readPosText $ head $ linearizesMark pgf lang exp
---- use Just str if you have str to match against
prelude = ["rankdir=LR ;", "node [shape = plaintext] ;"]
nodes = map mkNode nodeWords
mkNode (i,((_,p),ss)) =
node p ++ " [label = \"" ++ show i ++ ". " ++ ifd (show p) ++ unwords ss ++ "\"] ;"
nodeWords = (0,((mkCId "",[]),["ROOT"])) : zip [1..] [((f,p),w)|
((Just f,p),w) <- wlins pot]
links = map mkLink thelinks
thelinks = [(word y, x, label tr y x) |
(_,((f,x),_)) <- tail nodeWords,
let y = dominant x]
mkLink (x,y,l) = node x ++ " -> " ++ node y ++ " [label = \"" ++ l ++ "\"] ;"
node = show . show
dominant x = case x of
[] -> x
_ | not (x == hx) -> hx
_ -> dominant (init x)
where
hx = headArg (init x) tr x
headArg x0 tr x = case (tr,x) of
(Fun f [],[_]) -> x0 ---- ??
(Fun f ts,[_]) -> x0 ++ [getHead (length ts - 1) f]
(Fun f ts,i:y) -> headArg x0 (ts !! i) y
_ -> x0 ----
label tr y x = case span (uncurry (==)) (zip y x) of
(xys,(_,i):_) -> getLabel i (funAt tr (map fst xys))
_ -> "" ----
funAt tr x = case (tr,x) of
(Fun f _ ,[]) -> f
(Fun f ts,i:y) -> funAt (ts !! i) y
_ -> mkCId (prTree tr) ----
word x = if elem x sortedNodes then x else
let x' = headArg x tr (x ++[0]) in
if x' == x then [] else word x'
tr = expr2tree exp
sortedNodes = [p | (_,((_,p),_)) <- nodeWords]
labels = maybe Map.empty id mlab
getHead i f = case Map.lookup f labels of
Just ls -> length $ takeWhile (/= "head") ls
_ -> i
getLabel i f = case Map.lookup f labels of
Just ls | length ls > i -> ifd (showCId f ++ "#" ++ show i ++ "=") ++ ls !! i
_ -> showCId f ++ "#" ++ show i
-- to generate CoNLL format for MaltParser
nodeMap :: Map.Map [Int] Int
nodeMap = Map.fromList [(p,i) | (i,((_,p),_)) <- nodeWords]
arcMap :: Map.Map [Int] ([Int],String)
arcMap = Map.fromList [(y,(x,l)) | (x,y,l) <- thelinks]
lookDomLab p = case Map.lookup p arcMap of
Just (q,l) -> (maybe 0 id (Map.lookup q nodeMap), if null l then rootlabel else l)
_ -> (0,rootlabel)
wnodes = [[show i, maltws ws, showCId fun, pos, pos, morph, show dom, lab, unspec, unspec] |
(i, ((fun,p),ws)) <- tail nodeWords,
let pos = showCId $ lookValCat pgf fun,
let morph = unspec,
let (dom,lab) = lookDomLab p
]
maltws = concat . intersperse "+" . words . unwords -- no spaces in column 2
unspec = "_"
rootlabel = "ROOT"
type Labels = Map.Map CId [String]
getDepLabels :: [String] -> Labels
getDepLabels ss = Map.fromList [(mkCId f,ls) | f:ls <- map words ss]
-- parse trees from Linearize.linearizeMark
---- nubrec and domins are quadratic, but could be (n log n)
graphvizParseTree :: PGF -> CId -> Expr -> String
graphvizParseTree pgf lang = prGraph False . lin2tree pgf . linMark where
linMark = head . linearizesMark pgf lang
---- use Just str if you have str to match against
lin2tree pgf s = trace s $ prelude ++ nodes ++ links where
prelude = ["rankdir=BU ;", "node [shape = record, color = white] ;"]
nodeRecs = zip [0..]
(nub (filter (not . null) (nlins [postext] ++ [leaves postext])))
nlins pts =
nubrec [] $ [(p,cat f) | T (Just f, p) _ <- pts] :
concatMap nlins [ts | T _ ts <- pts]
leaves pt = [(p++[j],s) | (j,(p,s)) <-
zip [9990..] [(p,s) | ((_,p),ss) <- wlins pt, s <- ss]]
nubrec es rs = case rs of
r:rr -> let r' = filter (not . flip elem es) (nub r)
in r' : nubrec (r' ++ es) rr
_ -> rs
nodes = map mkStruct nodeRecs
mkStruct (i,cs) = struct i ++ "[label = \"" ++ fields cs ++ "\"] ;"
cat = showCId . lookValCat pgf
fields cs = concat (intersperse "|" [ mtag (showp p) ++ c | (p,c) <- cs])
struct i = "struct" ++ show i
links = map mkEdge domins
domins = nub [((i,x),(j,y)) |
(i,xs) <- nodeRecs, (j,ys) <- nodeRecs,
x <- xs, y <- ys, dominates x y]
dominates (p,x) (q,y) = not (null q) && p == init q
mkEdge ((i,x),(j,y)) =
struct i ++ ":n" ++ uncommas (showp (fst x)) ++ ":s -- " ++
struct j ++ ":n" ++ uncommas (showp (fst y)) ++ ":n ;"
postext = readPosText s
-- auxiliaries for graphviz syntax
struct i = "struct" ++ show i
mark (j,n) = "n" ++ show j ++ "a" ++ uncommas n
uncommas = map (\c -> if c==',' then 'c' else c)
tag s = "<" ++ s ++ ">"
showp = init . tail . show
mtag = tag . ('n':) . uncommas
-- word alignments from Linearize.linearizesMark
-- words are chunks like {[0,1,1,0] old}
graphvizAlignment :: PGF -> Expr -> String
graphvizAlignment pgf = prGraph True . lin2graph . linsMark where
linsMark t = [s | la <- cncnames pgf, s <- take 1 (linearizesMark pgf la t)]
lin2graph :: [String] -> [String]
lin2graph ss = trace (show ss) $ prelude ++ nodes ++ links
where
prelude = ["rankdir=LR ;", "node [shape = record] ;"]
nlins :: [(Int,[((Int,String),String)])]
nlins = [(i, [((j,showp p),unw ws) | (j,((_,p),ws)) <- zip [0..] ws]) |
(i,ws) <- zip [0..] (map (wlins . readPosText) ss)]
unw = concat . intersperse "\\ " -- space escape in graphviz
nodes = map mkStruct nlins
mkStruct (i, ws) = struct i ++ "[label = \"" ++ fields ws ++ "\"] ;"
fields ws = concat (intersperse "|" [tag (mark m) ++ " " ++ w | (m,w) <- ws])
links = nub $ concatMap mkEdge (init nlins)
mkEdge (i,lin) = let lin' = snd (nlins !! (i+1)) in -- next lin in the list
[edge i v w | (v@(_,p),_) <- lin, (w@(_,q),_) <- lin', p == q]
edge i v w =
struct i ++ ":" ++ mark v ++ ":e -> " ++ struct (i+1) ++ ":" ++ mark w ++ ":w ;"
{-
alignmentData :: PGF -> [Expr] -> Map.Map String (Map.Map String Double)
alignmentData pgf = mkStat . concatMap (mkAlign . linsMark) where
linsMark t =
[s | la <- take 2 (cncnames pgf), s <- take 1 (linearizesMark pgf la t)]
mkStat :: [(String,String)] -> Map.Map String (Map.Map String Double)
mkStat =
mkAlign :: [String] -> [(String,String)]
mkAlign ss =
nlins :: [(Int,[((Int,String),String)])]
nlins = [(i, [((j,showp p),unw ws) | (j,((_,p),ws)) <- zip [0..] vs]) |
(i,vs) <- zip [0..] (map (wlins . readPosText) ss)]
nodes = map mkStruct nlins
mkStruct (i, ws) = struct i ++ "[label = \"" ++ fields ws ++ "\"] ;"
fields ws = concat (intersperse "|" [tag (mark m) ++ " " ++ w | (m,w) <- ws])
links = nub $ concatMap mkEdge (init nlins)
mkEdge (i,lin) = let lin' = snd (nlins !! (i+1)) in -- next lin in the list
[edge i v w | (v@(_,p),_) <- lin, (w@(_,q),_) <- lin', p == q]
edge i v w =
struct i ++ ":" ++ mark v ++ ":e -> " ++ struct (i+1) ++ ":" ++ mark w ++ ":w ;"
-}
wlins :: PosText -> [((Maybe CId,[Int]),[String])]
wlins pt = case pt of
T p pts -> concatMap (lins p) pts
M ws -> if null ws then [] else [((Nothing,[]),ws)]
where
lins p pt = case pt of
T q pts -> concatMap (lins q) pts
M ws -> if null ws then [] else [(p,ws)]
data PosText =
T (Maybe CId,[Int]) [PosText]
| M [String]
deriving Show
readPosText :: String -> PosText
readPosText = fst . head . (RP.readP_to_S pPosText) where
pPosText = do
RP.char '(' >> RP.skipSpaces
p <- pPos
RP.skipSpaces
ts <- RP.many pPosText
RP.char ')' >> RP.skipSpaces
return (T p ts)
RP.<++ do
ws <- RP.sepBy1 (RP.munch1 (flip notElem "()")) (RP.char ' ')
return (M ws)
pPos = do
fun <- (RP.char '(' >> pCId >>= \f -> RP.char ',' >> (return $ Just f))
RP.<++ (return Nothing)
RP.char '[' >> RP.skipSpaces
is <- RP.sepBy (RP.munch1 isDigit) (RP.char ',')
RP.char ']' >> RP.skipSpaces
RP.char ')' RP.<++ return ' '
return (fun,map read is)
{-
digraph{
rankdir ="LR" ;
node [shape = record] ;
struct1 [label = "<f0> this|<f1> very|<f2> intelligent|<f3> man"] ;
struct2 [label = "<f0> cet|<f1> homme|<f2> tres|<f3> intelligent|<f4> ci"] ;
struct1:f0 -> struct2:f0 ;
struct1:f1 -> struct2:f2 ;
struct1:f2 -> struct2:f3 ;
struct1:f3 -> struct2:f1 ;
struct1:f0 -> struct2:f4 ;
}
-}

View File

@@ -0,0 +1,17 @@
<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">
<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en">
<head>
<meta http-equiv="Content-Type" content="text/html; charset=UTF-8" />
<link rel="stylesheet" type="text/css" href="style.css" />
<script type="text/javascript" src="gflib.js"></script>
<script type="text/javascript" src="editorGrammar.js"></script>
<script type="text/javascript" src="grammar.js"></script>
<script type="text/javascript" src="gfjseditor.js"></script>
<title>Web-based Syntax Editor</title>
</head>
<body onload="mkEditor('editor', Food)" onkeydown="hotKeys(event)">
<div id="editor">
</div>
</body>
</html>

File diff suppressed because one or more lines are too long

Binary file not shown.

After

Width:  |  Height:  |  Size: 161 B

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,54 @@
/* Output */
function sayText(text) {
document.voice_output_text = text;
activateForm("voice_output");
}
/* XHTML+Voice Utilities */
function activateForm(formid) {
var form = document.getElementById(formid);
var e = document.createEvent("UIEvents");
e.initEvent("DOMActivate","true","true");
form.dispatchEvent(e);
}
/* DOM utilities */
/* Gets the head element of the document. */
function getHeadElement() {
var hs = document.getElementsByTagName("head");
if (hs.length == 0) {
var head = document.createElement("head");
document.documentElement.insertBefore(head, document.documentElement.firstChild);
return head;
} else {
return hs[0];
}
}
/* Gets the body element of the document. */
function getBodyElement() {
var bs = document.getElementsByTagName("body");
if (bs.length == 0) {
var body = document.createElement("body");
document.documentElement.appendChild(body);
return body;
} else {
return bs[0];
}
}
/* Removes all the children of a node */
function removeChildren(node) {
while (node.hasChildNodes()) {
node.removeChild(node.firstChild);
}
}
function setText(node, text) {
removeChildren(node);
node.appendChild(document.createTextNode(text));
}

File diff suppressed because it is too large Load Diff

File diff suppressed because one or more lines are too long

Binary file not shown.

After

Width:  |  Height:  |  Size: 201 B

Binary file not shown.

After

Width:  |  Height:  |  Size: 229 B

View File

@@ -0,0 +1,241 @@
body {
font-family:arial,helvetica,sans-serif;
font-size:12px;
background-color: white;
}
#wrapper {
width:740px;
height:520px;
margin:auto 50px;
border:1px solid gray;
padding:10px;
}
#absFrame {
width:250px;
height:250px;
padding:10px;
border:1px solid gray;
float:left;
white-space: nowrap;
}
#conFrame {
width:436px;
height:250px;
margin-left:10px;
padding:10px;
border:1px solid gray;
float:left;
white-space: normal;
overflow:auto;
}
#actFrame {
width:250px;
height:170px;
margin-top:10px;
padding:10px;
border:1px solid gray;
float:left;
overflow:auto;
}
#refFrame {
width:436px;
height:170px;
margin-left:10px;
margin-top:10px;
padding:10px;
border:1px solid gray;
float:left;
overflow:auto;
}
#messageFrame {
width:506px;
height:15px;
margin-top:10px;
margin-right:10px;
padding:10px;
border:1px solid gray;
float:left;
overflow:hidden;
}
#clipboardFrame {
width:180px;
height:15px;
margin-top:10px;
padding:10px;
border:1px solid gray;
float:left;
overflow:auto;
}
#tree {
left: -10px;
top: -10px;
width: 250px;
height: 250px;
margin: 0px;
padding: 10px;
overflow: auto;
}
ul {
position: relative;
list-style: none;
margin-left: 20px;
padding: 0px;
}
li {
position: relative;
}
img.tree-menu {
margin-right: 5px;
}
a.tree:link, a.tree:visited, a.tree:active {
color: black;
background-color: white;
text-decoration: none;
margin-right:10px;
}
a.tree:hover {
color: blue;
background-color: white;
text-decoration: underline;
margin-right:10px;
}
a.treeSelected:link, a.treeSelected:visited, a.treeSelected:active {
color: white;
background-color: #3366CC;
text-decoration: none;
margin-right:10px;
}
a.treeSelected:hover {
color: white;
background-color: #3366CC;
text-decoration: underline;
margin-right:10px;
}
a.treeGray:link, a.treeGray:visited, a.treeGray:active {
color: silver;
background-color: white;
text-decoration: none;
margin-right:10px;
}
a.treeGray:hover {
color: silver;
background-color: white;
text-decoration: none;
margin-right:10px;
}
table.action, table.refinement, table.wrapper, table.tree, table.language {
margin: 0px;
padding: 0px;
border-style: none;
border-collapse: collapse;
border-spacing: 0px;
}
tr.selected {
color: white;
background-color: #3366CC;
}
tr.unavailable, tr.closed {
color: silver;
background-color: white;
}
tr.unavailable:hover {
color: silver;
background-color: #3366CC;
}
tr.action, tr.refinement, tr.wrapper, tr.tree {
color: black;
background-color: white;
}
tr.action:hover, tr.refinement:hover, tr.wrapper:hover, tr.tree:hover {
color: white;
background-color: #3366CC;
}
td.action {
width: 220px;
margin: 0px;
padding: 0px;
}
td.refinement, td.wrapper, td.tree {
width: 515px;
margin: 0px;
padding: 0px;
}
td.hotKey {
width: 30px;
margin: 0px;
padding: 0px;
text-align: right;
}
td.language {
color: black;
background-color: white;
margin: 1px;
padding: 1px;
}
td.language:hover {
color: blue;
background-color: white;
text-decoration: underline;
margin: 1px;
padding: 1px;
}
td.selected {
color: white;
background-color: #3366CC;
margin: 1px;
padding: 1px;
}
td.selected:hover {
color: white;
background-color: #3366CC;
text-decoration: underline;
margin: 1px;
padding: 1px;
}
p {
margin-bottom: 40px;
}
span.normal {
color: black;
background-color: white;
text-decoration: none;
}
span.selected {
color: white;
background-color: #3366CC;
text-decoration: none;
}

View File

@@ -0,0 +1,54 @@
body {
color: black;
background-color: white;
}
dl {
}
dt {
margin: 0;
padding: 0;
}
dl dd {
margin: 0;
padding: 0;
}
dl.fromLang dt {
display: none;
}
dl.toLang {
border-width: 1px 0 0 0;
border-style: solid;
border-color: #c0c0c0;
}
dl.toLang dt {
color: #c0c0c0;
display: block;
float: left;
width: 5em;
}
dl.toLang dd {
border-width: 0 0 1px 0;
border-style: solid;
border-color: #c0c0c0;
}
ul {
margin: 0;
padding: 0;
}
li {
list-style-type: none;
margin: 0;
padding: 0;
}

View File

@@ -0,0 +1,48 @@
<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">
<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en">
<head>
<meta http-equiv="Content-Type" content="text/html; charset=UTF-8" />
<link rel="stylesheet" type="text/css" href="translator.css" />
<script type="text/javascript" src="gflib.js"></script>
<script type="text/javascript" src="grammar.js"></script>
<script type="text/javascript" src="translator.js"></script>
<script type="text/javascript">
/* CHANGE ME */
var grammar = Food;
function updateTranslation () {
var input = document.getElementById('inputText').value;
var fromLang = document.getElementById('fromLang').value;
var toLang = document.getElementById('toLang').value;
var output = document.getElementById('output');
var translation = grammar.translate(input, fromLang, toLang);
removeChildren(output);
output.appendChild(formatTranslation(translation));
}
function populateLangs () {
var f = document.getElementById('fromLang');
var t = document.getElementById('toLang');
for (var c in grammar.concretes) {
addOption(f, c, c);
addOption(t, c, c);
}
}
</script>
<title>Web-based GF Translator</title>
</head>
<body onload="populateLangs(Food, 'fromLang', 'toLang')">
<form id="translate">
<p>
<input type="text" name="inputText" id="inputText" value="this cheese is warm" size="50" />
</p>
<p>
From: <select name="fromLang" id="fromLang" onchange=""><option value="">Any language</option></select>
To: <select name="toLang" id="toLang"><option value="">All languages</option></select>
<input type="button" value="Translate" onclick="updateTranslation()" />
</p>
</form>
<div id="output"></div>
</body>
</html>

View File

@@ -0,0 +1,51 @@
function formatTranslation (outputs) {
var dl1 = document.createElement("dl");
dl1.className = "fromLang";
for (var fromLang in outputs) {
var ul = document.createElement("ul");
addDefinition(dl1, document.createTextNode(fromLang), ul);
for (var i in outputs[fromLang]) {
var dl2 = document.createElement("dl");
dl2.className = "toLang";
for (var toLang in outputs[fromLang][i]) {
addDefinition(dl2, document.createTextNode(toLang), document.createTextNode(outputs[fromLang][i][toLang]));
}
addItem(ul, dl2);
}
}
return dl1;
}
/* DOM utilities for specific tags */
function addDefinition (dl, t, d) {
var dt = document.createElement("dt");
dt.appendChild(t);
dl.appendChild(dt);
var dd = document.createElement("dd");
dd.appendChild(d);
dl.appendChild(dd);
}
function addItem (ul, i) {
var li = document.createElement("li");
li.appendChild(i);
ul.appendChild(li);
}
function addOption (select, value, content) {
var option = document.createElement("option");
option.value = value;
option.appendChild(document.createTextNode(content));
select.appendChild(option);
}
/* General DOM utilities */
/* Removes all the children of a node */
function removeChildren(node) {
while (node.hasChildNodes()) {
node.removeChild(node.firstChild);
}
}