mirror of
https://github.com/GrammaticalFramework/gf-core.git
synced 2026-04-19 09:49:33 -06:00
reorganize the directories under src, and rescue the JavaScript interpreter from deprecated
This commit is contained in:
791
src/runtime/haskell/Data/Binary.hs
Normal file
791
src/runtime/haskell/Data/Binary.hs
Normal file
@@ -0,0 +1,791 @@
|
||||
{-# LANGUAGE CPP, FlexibleInstances, FlexibleContexts #-}
|
||||
-----------------------------------------------------------------------------
|
||||
-- |
|
||||
-- Module : Data.Binary
|
||||
-- Copyright : Lennart Kolmodin
|
||||
-- License : BSD3-style (see LICENSE)
|
||||
--
|
||||
-- Maintainer : Lennart Kolmodin <kolmodin@dtek.chalmers.se>
|
||||
-- Stability : unstable
|
||||
-- Portability : portable to Hugs and GHC. Requires the FFI and some flexible instances
|
||||
--
|
||||
-- Binary serialisation of Haskell values to and from lazy ByteStrings.
|
||||
-- The Binary library provides methods for encoding Haskell values as
|
||||
-- streams of bytes directly in memory. The resulting @ByteString@ can
|
||||
-- then be written to disk, sent over the network, or futher processed
|
||||
-- (for example, compressed with gzip).
|
||||
--
|
||||
-- The 'Binary' package is notable in that it provides both pure, and
|
||||
-- high performance serialisation.
|
||||
--
|
||||
-- Values are always encoded in network order (big endian) form, and
|
||||
-- encoded data should be portable across machine endianess, word size,
|
||||
-- or compiler version. For example, data encoded using the Binary class
|
||||
-- could be written from GHC, and read back in Hugs.
|
||||
--
|
||||
-----------------------------------------------------------------------------
|
||||
|
||||
module Data.Binary (
|
||||
|
||||
-- * The Binary class
|
||||
Binary(..)
|
||||
|
||||
-- $example
|
||||
|
||||
-- * The Get and Put monads
|
||||
, Get
|
||||
, Put
|
||||
|
||||
-- * Useful helpers for writing instances
|
||||
, putWord8
|
||||
, getWord8
|
||||
|
||||
-- * Binary serialisation
|
||||
, encode -- :: Binary a => a -> ByteString
|
||||
, decode -- :: Binary a => ByteString -> a
|
||||
|
||||
-- * IO functions for serialisation
|
||||
, encodeFile -- :: Binary a => FilePath -> a -> IO ()
|
||||
, decodeFile -- :: Binary a => FilePath -> IO a
|
||||
|
||||
-- Lazy put and get
|
||||
-- , lazyPut
|
||||
-- , lazyGet
|
||||
|
||||
, module Data.Word -- useful
|
||||
|
||||
) where
|
||||
|
||||
#include "MachDeps.h"
|
||||
|
||||
import Data.Word
|
||||
|
||||
import Data.Binary.Put
|
||||
import Data.Binary.Get
|
||||
|
||||
import Control.Monad
|
||||
import Control.Exception
|
||||
import Foreign
|
||||
import System.IO
|
||||
|
||||
import Data.ByteString.Lazy (ByteString)
|
||||
import qualified Data.ByteString.Lazy as L
|
||||
|
||||
import Data.Char (chr,ord)
|
||||
import Data.List (unfoldr)
|
||||
|
||||
-- And needed for the instances:
|
||||
import qualified Data.ByteString as B
|
||||
import qualified Data.Map as Map
|
||||
import qualified Data.Set as Set
|
||||
import qualified Data.IntMap as IntMap
|
||||
import qualified Data.IntSet as IntSet
|
||||
import qualified Data.Ratio as R
|
||||
|
||||
import qualified Data.Tree as T
|
||||
|
||||
import Data.Array.Unboxed
|
||||
|
||||
--
|
||||
-- This isn't available in older Hugs or older GHC
|
||||
--
|
||||
#if __GLASGOW_HASKELL__ >= 606
|
||||
import qualified Data.Sequence as Seq
|
||||
import qualified Data.Foldable as Fold
|
||||
#endif
|
||||
|
||||
------------------------------------------------------------------------
|
||||
|
||||
-- | The @Binary@ class provides 'put' and 'get', methods to encode and
|
||||
-- decode a Haskell value to a lazy ByteString. It mirrors the Read and
|
||||
-- Show classes for textual representation of Haskell types, and is
|
||||
-- suitable for serialising Haskell values to disk, over the network.
|
||||
--
|
||||
-- For parsing and generating simple external binary formats (e.g. C
|
||||
-- structures), Binary may be used, but in general is not suitable
|
||||
-- for complex protocols. Instead use the Put and Get primitives
|
||||
-- directly.
|
||||
--
|
||||
-- Instances of Binary should satisfy the following property:
|
||||
--
|
||||
-- > decode . encode == id
|
||||
--
|
||||
-- That is, the 'get' and 'put' methods should be the inverse of each
|
||||
-- other. A range of instances are provided for basic Haskell types.
|
||||
--
|
||||
class Binary t where
|
||||
-- | Encode a value in the Put monad.
|
||||
put :: t -> Put
|
||||
-- | Decode a value in the Get monad
|
||||
get :: Get t
|
||||
|
||||
-- $example
|
||||
-- To serialise a custom type, an instance of Binary for that type is
|
||||
-- required. For example, suppose we have a data structure:
|
||||
--
|
||||
-- > data Exp = IntE Int
|
||||
-- > | OpE String Exp Exp
|
||||
-- > deriving Show
|
||||
--
|
||||
-- We can encode values of this type into bytestrings using the
|
||||
-- following instance, which proceeds by recursively breaking down the
|
||||
-- structure to serialise:
|
||||
--
|
||||
-- > instance Binary Exp where
|
||||
-- > put (IntE i) = do put (0 :: Word8)
|
||||
-- > put i
|
||||
-- > put (OpE s e1 e2) = do put (1 :: Word8)
|
||||
-- > put s
|
||||
-- > put e1
|
||||
-- > put e2
|
||||
-- >
|
||||
-- > get = do t <- get :: Get Word8
|
||||
-- > case t of
|
||||
-- > 0 -> do i <- get
|
||||
-- > return (IntE i)
|
||||
-- > 1 -> do s <- get
|
||||
-- > e1 <- get
|
||||
-- > e2 <- get
|
||||
-- > return (OpE s e1 e2)
|
||||
--
|
||||
-- Note how we write an initial tag byte to indicate each variant of the
|
||||
-- data type.
|
||||
--
|
||||
-- We can simplify the writing of 'get' instances using monadic
|
||||
-- combinators:
|
||||
--
|
||||
-- > get = do tag <- getWord8
|
||||
-- > case tag of
|
||||
-- > 0 -> liftM IntE get
|
||||
-- > 1 -> liftM3 OpE get get get
|
||||
--
|
||||
-- The generation of Binary instances has been automated by a script
|
||||
-- using Scrap Your Boilerplate generics. Use the script here:
|
||||
-- <http://darcs.haskell.org/binary/tools/derive/BinaryDerive.hs>.
|
||||
--
|
||||
-- To derive the instance for a type, load this script into GHCi, and
|
||||
-- bring your type into scope. Your type can then have its Binary
|
||||
-- instances derived as follows:
|
||||
--
|
||||
-- > $ ghci -fglasgow-exts BinaryDerive.hs
|
||||
-- > *BinaryDerive> :l Example.hs
|
||||
-- > *Main> deriveM (undefined :: Drinks)
|
||||
-- >
|
||||
-- > instance Binary Main.Drinks where
|
||||
-- > put (Beer a) = putWord8 0 >> put a
|
||||
-- > put Coffee = putWord8 1
|
||||
-- > put Tea = putWord8 2
|
||||
-- > put EnergyDrink = putWord8 3
|
||||
-- > put Water = putWord8 4
|
||||
-- > put Wine = putWord8 5
|
||||
-- > put Whisky = putWord8 6
|
||||
-- > get = do
|
||||
-- > tag_ <- getWord8
|
||||
-- > case tag_ of
|
||||
-- > 0 -> get >>= \a -> return (Beer a)
|
||||
-- > 1 -> return Coffee
|
||||
-- > 2 -> return Tea
|
||||
-- > 3 -> return EnergyDrink
|
||||
-- > 4 -> return Water
|
||||
-- > 5 -> return Wine
|
||||
-- > 6 -> return Whisky
|
||||
-- >
|
||||
--
|
||||
-- To serialise this to a bytestring, we use 'encode', which packs the
|
||||
-- data structure into a binary format, in a lazy bytestring
|
||||
--
|
||||
-- > > let e = OpE "*" (IntE 7) (OpE "/" (IntE 4) (IntE 2))
|
||||
-- > > let v = encode e
|
||||
--
|
||||
-- Where 'v' is a binary encoded data structure. To reconstruct the
|
||||
-- original data, we use 'decode'
|
||||
--
|
||||
-- > > decode v :: Exp
|
||||
-- > OpE "*" (IntE 7) (OpE "/" (IntE 4) (IntE 2))
|
||||
--
|
||||
-- The lazy ByteString that results from 'encode' can be written to
|
||||
-- disk, and read from disk using Data.ByteString.Lazy IO functions,
|
||||
-- such as hPutStr or writeFile:
|
||||
--
|
||||
-- > > writeFile "/tmp/exp.txt" (encode e)
|
||||
--
|
||||
-- And read back with:
|
||||
--
|
||||
-- > > readFile "/tmp/exp.txt" >>= return . decode :: IO Exp
|
||||
-- > OpE "*" (IntE 7) (OpE "/" (IntE 4) (IntE 2))
|
||||
--
|
||||
-- We can also directly serialise a value to and from a Handle, or a file:
|
||||
--
|
||||
-- > > v <- decodeFile "/tmp/exp.txt" :: IO Exp
|
||||
-- > OpE "*" (IntE 7) (OpE "/" (IntE 4) (IntE 2))
|
||||
--
|
||||
-- And write a value to disk
|
||||
--
|
||||
-- > > encodeFile "/tmp/a.txt" v
|
||||
--
|
||||
|
||||
------------------------------------------------------------------------
|
||||
-- Wrappers to run the underlying monad
|
||||
|
||||
-- | Encode a value using binary serialisation to a lazy ByteString.
|
||||
--
|
||||
encode :: Binary a => a -> ByteString
|
||||
encode = runPut . put
|
||||
{-# INLINE encode #-}
|
||||
|
||||
-- | Decode a value from a lazy ByteString, reconstructing the original structure.
|
||||
--
|
||||
decode :: Binary a => ByteString -> a
|
||||
decode = runGet get
|
||||
|
||||
------------------------------------------------------------------------
|
||||
-- Convenience IO operations
|
||||
|
||||
-- | Lazily serialise a value to a file
|
||||
--
|
||||
-- This is just a convenience function, it's defined simply as:
|
||||
--
|
||||
-- > encodeFile f = B.writeFile f . encode
|
||||
--
|
||||
-- So for example if you wanted to compress as well, you could use:
|
||||
--
|
||||
-- > B.writeFile f . compress . encode
|
||||
--
|
||||
encodeFile :: Binary a => FilePath -> a -> IO ()
|
||||
encodeFile f v = L.writeFile f (encode v)
|
||||
|
||||
-- | Lazily reconstruct a value previously written to a file.
|
||||
--
|
||||
-- This is just a convenience function, it's defined simply as:
|
||||
--
|
||||
-- > decodeFile f = return . decode =<< B.readFile f
|
||||
--
|
||||
-- So for example if you wanted to decompress as well, you could use:
|
||||
--
|
||||
-- > return . decode . decompress =<< B.readFile f
|
||||
--
|
||||
decodeFile :: Binary a => FilePath -> IO a
|
||||
decodeFile f = bracket (openBinaryFile f ReadMode) hClose $ \h -> do
|
||||
s <- L.hGetContents h
|
||||
evaluate $ runGet get s
|
||||
|
||||
-- needs bytestring 0.9.1.x to work
|
||||
|
||||
------------------------------------------------------------------------
|
||||
-- Lazy put and get
|
||||
|
||||
-- lazyPut :: (Binary a) => a -> Put
|
||||
-- lazyPut a = put (encode a)
|
||||
|
||||
-- lazyGet :: (Binary a) => Get a
|
||||
-- lazyGet = fmap decode get
|
||||
|
||||
------------------------------------------------------------------------
|
||||
-- Simple instances
|
||||
|
||||
-- The () type need never be written to disk: values of singleton type
|
||||
-- can be reconstructed from the type alone
|
||||
instance Binary () where
|
||||
put () = return ()
|
||||
get = return ()
|
||||
|
||||
-- Bools are encoded as a byte in the range 0 .. 1
|
||||
instance Binary Bool where
|
||||
put = putWord8 . fromIntegral . fromEnum
|
||||
get = liftM (toEnum . fromIntegral) getWord8
|
||||
|
||||
-- Values of type 'Ordering' are encoded as a byte in the range 0 .. 2
|
||||
instance Binary Ordering where
|
||||
put = putWord8 . fromIntegral . fromEnum
|
||||
get = liftM (toEnum . fromIntegral) getWord8
|
||||
|
||||
------------------------------------------------------------------------
|
||||
-- Words and Ints
|
||||
|
||||
-- Words8s are written as bytes
|
||||
instance Binary Word8 where
|
||||
put = putWord8
|
||||
get = getWord8
|
||||
|
||||
-- Words16s are written as 2 bytes in big-endian (network) order
|
||||
instance Binary Word16 where
|
||||
put = putWord16be
|
||||
get = getWord16be
|
||||
|
||||
-- Words32s are written as 4 bytes in big-endian (network) order
|
||||
instance Binary Word32 where
|
||||
put = putWord32be
|
||||
get = getWord32be
|
||||
|
||||
-- Words64s are written as 8 bytes in big-endian (network) order
|
||||
instance Binary Word64 where
|
||||
put = putWord64be
|
||||
get = getWord64be
|
||||
|
||||
-- Int8s are written as a single byte.
|
||||
instance Binary Int8 where
|
||||
put i = put (fromIntegral i :: Word8)
|
||||
get = liftM fromIntegral (get :: Get Word8)
|
||||
|
||||
-- Int16s are written as a 2 bytes in big endian format
|
||||
instance Binary Int16 where
|
||||
put i = put (fromIntegral i :: Word16)
|
||||
get = liftM fromIntegral (get :: Get Word16)
|
||||
|
||||
-- Int32s are written as a 4 bytes in big endian format
|
||||
instance Binary Int32 where
|
||||
put i = put (fromIntegral i :: Word32)
|
||||
get = liftM fromIntegral (get :: Get Word32)
|
||||
|
||||
-- Int64s are written as a 4 bytes in big endian format
|
||||
instance Binary Int64 where
|
||||
put i = put (fromIntegral i :: Word64)
|
||||
get = liftM fromIntegral (get :: Get Word64)
|
||||
|
||||
------------------------------------------------------------------------
|
||||
|
||||
-- Words are written as sequence of bytes. The last bit of each
|
||||
-- byte indicates whether there are more bytes to be read
|
||||
instance Binary Word where
|
||||
put i | i <= 0x7f = do put a
|
||||
| i <= 0x3fff = do put (a .|. 0x80)
|
||||
put b
|
||||
| i <= 0x1fffff = do put (a .|. 0x80)
|
||||
put (b .|. 0x80)
|
||||
put c
|
||||
| i <= 0xfffffff = do put (a .|. 0x80)
|
||||
put (b .|. 0x80)
|
||||
put (c .|. 0x80)
|
||||
put d
|
||||
#if WORD_SIZE_IN_BITS < 64
|
||||
| otherwise = do put (a .|. 0x80)
|
||||
put (b .|. 0x80)
|
||||
put (c .|. 0x80)
|
||||
put (d .|. 0x80)
|
||||
put e
|
||||
#else
|
||||
| i <= 0x7ffffffff = do put (a .|. 0x80)
|
||||
put (b .|. 0x80)
|
||||
put (c .|. 0x80)
|
||||
put (d .|. 0x80)
|
||||
put e
|
||||
| i <= 0x3ffffffffff = do put (a .|. 0x80)
|
||||
put (b .|. 0x80)
|
||||
put (c .|. 0x80)
|
||||
put (d .|. 0x80)
|
||||
put (e .|. 0x80)
|
||||
put f
|
||||
| i <= 0x1ffffffffffff = do put (a .|. 0x80)
|
||||
put (b .|. 0x80)
|
||||
put (c .|. 0x80)
|
||||
put (d .|. 0x80)
|
||||
put (e .|. 0x80)
|
||||
put (f .|. 0x80)
|
||||
put g
|
||||
| i <= 0xffffffffffffff = do put (a .|. 0x80)
|
||||
put (b .|. 0x80)
|
||||
put (c .|. 0x80)
|
||||
put (d .|. 0x80)
|
||||
put (e .|. 0x80)
|
||||
put (f .|. 0x80)
|
||||
put (g .|. 0x80)
|
||||
put h
|
||||
| i <= 0xffffffffffffff = do put (a .|. 0x80)
|
||||
put (b .|. 0x80)
|
||||
put (c .|. 0x80)
|
||||
put (d .|. 0x80)
|
||||
put (e .|. 0x80)
|
||||
put (f .|. 0x80)
|
||||
put (g .|. 0x80)
|
||||
put h
|
||||
| i <= 0x7fffffffffffffff = do put (a .|. 0x80)
|
||||
put (b .|. 0x80)
|
||||
put (c .|. 0x80)
|
||||
put (d .|. 0x80)
|
||||
put (e .|. 0x80)
|
||||
put (f .|. 0x80)
|
||||
put (g .|. 0x80)
|
||||
put (h .|. 0x80)
|
||||
put j
|
||||
| otherwise = do put (a .|. 0x80)
|
||||
put (b .|. 0x80)
|
||||
put (c .|. 0x80)
|
||||
put (d .|. 0x80)
|
||||
put (e .|. 0x80)
|
||||
put (f .|. 0x80)
|
||||
put (g .|. 0x80)
|
||||
put (h .|. 0x80)
|
||||
put (j .|. 0x80)
|
||||
put k
|
||||
#endif
|
||||
where
|
||||
a = fromIntegral ( i .&. 0x7f) :: Word8
|
||||
b = fromIntegral (shiftR i 7 .&. 0x7f) :: Word8
|
||||
c = fromIntegral (shiftR i 14 .&. 0x7f) :: Word8
|
||||
d = fromIntegral (shiftR i 21 .&. 0x7f) :: Word8
|
||||
e = fromIntegral (shiftR i 28 .&. 0x7f) :: Word8
|
||||
f = fromIntegral (shiftR i 35 .&. 0x7f) :: Word8
|
||||
g = fromIntegral (shiftR i 42 .&. 0x7f) :: Word8
|
||||
h = fromIntegral (shiftR i 49 .&. 0x7f) :: Word8
|
||||
j = fromIntegral (shiftR i 56 .&. 0x7f) :: Word8
|
||||
k = fromIntegral (shiftR i 63 .&. 0x7f) :: Word8
|
||||
|
||||
get = do i <- getWord8
|
||||
(if i <= 0x7f
|
||||
then return (fromIntegral i)
|
||||
else do n <- get
|
||||
return $ (n `shiftL` 7) .|. (fromIntegral (i .&. 0x7f)))
|
||||
|
||||
-- Int has the same representation as Word
|
||||
instance Binary Int where
|
||||
put i = put (fromIntegral i :: Word)
|
||||
get = liftM fromIntegral (get :: Get Word)
|
||||
|
||||
------------------------------------------------------------------------
|
||||
--
|
||||
-- Portable, and pretty efficient, serialisation of Integer
|
||||
--
|
||||
|
||||
-- Fixed-size type for a subset of Integer
|
||||
type SmallInt = Int32
|
||||
|
||||
-- Integers are encoded in two ways: if they fit inside a SmallInt,
|
||||
-- they're written as a byte tag, and that value. If the Integer value
|
||||
-- is too large to fit in a SmallInt, it is written as a byte array,
|
||||
-- along with a sign and length field.
|
||||
|
||||
instance Binary Integer where
|
||||
|
||||
{-# INLINE put #-}
|
||||
put n | n >= lo && n <= hi = do
|
||||
putWord8 0
|
||||
put (fromIntegral n :: SmallInt) -- fast path
|
||||
where
|
||||
lo = fromIntegral (minBound :: SmallInt) :: Integer
|
||||
hi = fromIntegral (maxBound :: SmallInt) :: Integer
|
||||
|
||||
put n = do
|
||||
putWord8 1
|
||||
put sign
|
||||
put (unroll (abs n)) -- unroll the bytes
|
||||
where
|
||||
sign = fromIntegral (signum n) :: Word8
|
||||
|
||||
{-# INLINE get #-}
|
||||
get = do
|
||||
tag <- get :: Get Word8
|
||||
case tag of
|
||||
0 -> liftM fromIntegral (get :: Get SmallInt)
|
||||
_ -> do sign <- get
|
||||
bytes <- get
|
||||
let v = roll bytes
|
||||
return $! if sign == (1 :: Word8) then v else - v
|
||||
|
||||
--
|
||||
-- Fold and unfold an Integer to and from a list of its bytes
|
||||
--
|
||||
unroll :: Integer -> [Word8]
|
||||
unroll = unfoldr step
|
||||
where
|
||||
step 0 = Nothing
|
||||
step i = Just (fromIntegral i, i `shiftR` 8)
|
||||
|
||||
roll :: [Word8] -> Integer
|
||||
roll = foldr unstep 0
|
||||
where
|
||||
unstep b a = a `shiftL` 8 .|. fromIntegral b
|
||||
|
||||
{-
|
||||
|
||||
--
|
||||
-- An efficient, raw serialisation for Integer (GHC only)
|
||||
--
|
||||
|
||||
-- TODO This instance is not architecture portable. GMP stores numbers as
|
||||
-- arrays of machine sized words, so the byte format is not portable across
|
||||
-- architectures with different endianess and word size.
|
||||
|
||||
import Data.ByteString.Base (toForeignPtr,unsafePackAddress, memcpy)
|
||||
import GHC.Base hiding (ord, chr)
|
||||
import GHC.Prim
|
||||
import GHC.Ptr (Ptr(..))
|
||||
import GHC.IOBase (IO(..))
|
||||
|
||||
instance Binary Integer where
|
||||
put (S# i) = putWord8 0 >> put (I# i)
|
||||
put (J# s ba) = do
|
||||
putWord8 1
|
||||
put (I# s)
|
||||
put (BA ba)
|
||||
|
||||
get = do
|
||||
b <- getWord8
|
||||
case b of
|
||||
0 -> do (I# i#) <- get
|
||||
return (S# i#)
|
||||
_ -> do (I# s#) <- get
|
||||
(BA a#) <- get
|
||||
return (J# s# a#)
|
||||
|
||||
instance Binary ByteArray where
|
||||
|
||||
-- Pretty safe.
|
||||
put (BA ba) =
|
||||
let sz = sizeofByteArray# ba -- (primitive) in *bytes*
|
||||
addr = byteArrayContents# ba
|
||||
bs = unsafePackAddress (I# sz) addr
|
||||
in put bs -- write as a ByteString. easy, yay!
|
||||
|
||||
-- Pretty scary. Should be quick though
|
||||
get = do
|
||||
(fp, off, n@(I# sz)) <- liftM toForeignPtr get -- so decode a ByteString
|
||||
assert (off == 0) $ return $ unsafePerformIO $ do
|
||||
(MBA arr) <- newByteArray sz -- and copy it into a ByteArray#
|
||||
let to = byteArrayContents# (unsafeCoerce# arr) -- urk, is this safe?
|
||||
withForeignPtr fp $ \from -> memcpy (Ptr to) from (fromIntegral n)
|
||||
freezeByteArray arr
|
||||
|
||||
-- wrapper for ByteArray#
|
||||
data ByteArray = BA {-# UNPACK #-} !ByteArray#
|
||||
data MBA = MBA {-# UNPACK #-} !(MutableByteArray# RealWorld)
|
||||
|
||||
newByteArray :: Int# -> IO MBA
|
||||
newByteArray sz = IO $ \s ->
|
||||
case newPinnedByteArray# sz s of { (# s', arr #) ->
|
||||
(# s', MBA arr #) }
|
||||
|
||||
freezeByteArray :: MutableByteArray# RealWorld -> IO ByteArray
|
||||
freezeByteArray arr = IO $ \s ->
|
||||
case unsafeFreezeByteArray# arr s of { (# s', arr' #) ->
|
||||
(# s', BA arr' #) }
|
||||
|
||||
-}
|
||||
|
||||
instance (Binary a,Integral a) => Binary (R.Ratio a) where
|
||||
put r = put (R.numerator r) >> put (R.denominator r)
|
||||
get = liftM2 (R.%) get get
|
||||
|
||||
------------------------------------------------------------------------
|
||||
|
||||
-- Char is serialised as UTF-8
|
||||
instance Binary Char where
|
||||
put a | c <= 0x7f = put (fromIntegral c :: Word8)
|
||||
| c <= 0x7ff = do put (0xc0 .|. y)
|
||||
put (0x80 .|. z)
|
||||
| c <= 0xffff = do put (0xe0 .|. x)
|
||||
put (0x80 .|. y)
|
||||
put (0x80 .|. z)
|
||||
| c <= 0x10ffff = do put (0xf0 .|. w)
|
||||
put (0x80 .|. x)
|
||||
put (0x80 .|. y)
|
||||
put (0x80 .|. z)
|
||||
| otherwise = error "Not a valid Unicode code point"
|
||||
where
|
||||
c = ord a
|
||||
z, y, x, w :: Word8
|
||||
z = fromIntegral (c .&. 0x3f)
|
||||
y = fromIntegral (shiftR c 6 .&. 0x3f)
|
||||
x = fromIntegral (shiftR c 12 .&. 0x3f)
|
||||
w = fromIntegral (shiftR c 18 .&. 0x7)
|
||||
|
||||
get = do
|
||||
let getByte = liftM (fromIntegral :: Word8 -> Int) get
|
||||
shiftL6 = flip shiftL 6 :: Int -> Int
|
||||
w <- getByte
|
||||
r <- case () of
|
||||
_ | w < 0x80 -> return w
|
||||
| w < 0xe0 -> do
|
||||
x <- liftM (xor 0x80) getByte
|
||||
return (x .|. shiftL6 (xor 0xc0 w))
|
||||
| w < 0xf0 -> do
|
||||
x <- liftM (xor 0x80) getByte
|
||||
y <- liftM (xor 0x80) getByte
|
||||
return (y .|. shiftL6 (x .|. shiftL6
|
||||
(xor 0xe0 w)))
|
||||
| otherwise -> do
|
||||
x <- liftM (xor 0x80) getByte
|
||||
y <- liftM (xor 0x80) getByte
|
||||
z <- liftM (xor 0x80) getByte
|
||||
return (z .|. shiftL6 (y .|. shiftL6
|
||||
(x .|. shiftL6 (xor 0xf0 w))))
|
||||
return $! chr r
|
||||
|
||||
------------------------------------------------------------------------
|
||||
-- Instances for the first few tuples
|
||||
|
||||
instance (Binary a, Binary b) => Binary (a,b) where
|
||||
put (a,b) = put a >> put b
|
||||
get = liftM2 (,) get get
|
||||
|
||||
instance (Binary a, Binary b, Binary c) => Binary (a,b,c) where
|
||||
put (a,b,c) = put a >> put b >> put c
|
||||
get = liftM3 (,,) get get get
|
||||
|
||||
instance (Binary a, Binary b, Binary c, Binary d) => Binary (a,b,c,d) where
|
||||
put (a,b,c,d) = put a >> put b >> put c >> put d
|
||||
get = liftM4 (,,,) get get get get
|
||||
|
||||
instance (Binary a, Binary b, Binary c, Binary d, Binary e) => Binary (a,b,c,d,e) where
|
||||
put (a,b,c,d,e) = put a >> put b >> put c >> put d >> put e
|
||||
get = liftM5 (,,,,) get get get get get
|
||||
|
||||
--
|
||||
-- and now just recurse:
|
||||
--
|
||||
|
||||
instance (Binary a, Binary b, Binary c, Binary d, Binary e, Binary f)
|
||||
=> Binary (a,b,c,d,e,f) where
|
||||
put (a,b,c,d,e,f) = put (a,(b,c,d,e,f))
|
||||
get = do (a,(b,c,d,e,f)) <- get ; return (a,b,c,d,e,f)
|
||||
|
||||
instance (Binary a, Binary b, Binary c, Binary d, Binary e, Binary f, Binary g)
|
||||
=> Binary (a,b,c,d,e,f,g) where
|
||||
put (a,b,c,d,e,f,g) = put (a,(b,c,d,e,f,g))
|
||||
get = do (a,(b,c,d,e,f,g)) <- get ; return (a,b,c,d,e,f,g)
|
||||
|
||||
instance (Binary a, Binary b, Binary c, Binary d, Binary e,
|
||||
Binary f, Binary g, Binary h)
|
||||
=> Binary (a,b,c,d,e,f,g,h) where
|
||||
put (a,b,c,d,e,f,g,h) = put (a,(b,c,d,e,f,g,h))
|
||||
get = do (a,(b,c,d,e,f,g,h)) <- get ; return (a,b,c,d,e,f,g,h)
|
||||
|
||||
instance (Binary a, Binary b, Binary c, Binary d, Binary e,
|
||||
Binary f, Binary g, Binary h, Binary i)
|
||||
=> Binary (a,b,c,d,e,f,g,h,i) where
|
||||
put (a,b,c,d,e,f,g,h,i) = put (a,(b,c,d,e,f,g,h,i))
|
||||
get = do (a,(b,c,d,e,f,g,h,i)) <- get ; return (a,b,c,d,e,f,g,h,i)
|
||||
|
||||
instance (Binary a, Binary b, Binary c, Binary d, Binary e,
|
||||
Binary f, Binary g, Binary h, Binary i, Binary j)
|
||||
=> Binary (a,b,c,d,e,f,g,h,i,j) where
|
||||
put (a,b,c,d,e,f,g,h,i,j) = put (a,(b,c,d,e,f,g,h,i,j))
|
||||
get = do (a,(b,c,d,e,f,g,h,i,j)) <- get ; return (a,b,c,d,e,f,g,h,i,j)
|
||||
|
||||
------------------------------------------------------------------------
|
||||
-- Container types
|
||||
|
||||
instance Binary a => Binary [a] where
|
||||
put l = put (length l) >> mapM_ put l
|
||||
get = do n <- get :: Get Int
|
||||
xs <- replicateM n get
|
||||
return xs
|
||||
|
||||
instance (Binary a) => Binary (Maybe a) where
|
||||
put Nothing = putWord8 0
|
||||
put (Just x) = putWord8 1 >> put x
|
||||
get = do
|
||||
w <- getWord8
|
||||
case w of
|
||||
0 -> return Nothing
|
||||
_ -> liftM Just get
|
||||
|
||||
instance (Binary a, Binary b) => Binary (Either a b) where
|
||||
put (Left a) = putWord8 0 >> put a
|
||||
put (Right b) = putWord8 1 >> put b
|
||||
get = do
|
||||
w <- getWord8
|
||||
case w of
|
||||
0 -> liftM Left get
|
||||
_ -> liftM Right get
|
||||
|
||||
------------------------------------------------------------------------
|
||||
-- ByteStrings (have specially efficient instances)
|
||||
|
||||
instance Binary B.ByteString where
|
||||
put bs = do put (B.length bs)
|
||||
putByteString bs
|
||||
get = get >>= getByteString
|
||||
|
||||
--
|
||||
-- Using old versions of fps, this is a type synonym, and non portable
|
||||
--
|
||||
-- Requires 'flexible instances'
|
||||
--
|
||||
instance Binary ByteString where
|
||||
put bs = do put (fromIntegral (L.length bs) :: Int)
|
||||
putLazyByteString bs
|
||||
get = get >>= getLazyByteString
|
||||
|
||||
------------------------------------------------------------------------
|
||||
-- Maps and Sets
|
||||
|
||||
instance (Ord a, Binary a) => Binary (Set.Set a) where
|
||||
put s = put (Set.size s) >> mapM_ put (Set.toAscList s)
|
||||
get = liftM Set.fromDistinctAscList get
|
||||
|
||||
instance (Ord k, Binary k, Binary e) => Binary (Map.Map k e) where
|
||||
put m = put (Map.size m) >> mapM_ put (Map.toAscList m)
|
||||
get = liftM Map.fromDistinctAscList get
|
||||
|
||||
instance Binary IntSet.IntSet where
|
||||
put s = put (IntSet.size s) >> mapM_ put (IntSet.toAscList s)
|
||||
get = liftM IntSet.fromDistinctAscList get
|
||||
|
||||
instance (Binary e) => Binary (IntMap.IntMap e) where
|
||||
put m = put (IntMap.size m) >> mapM_ put (IntMap.toAscList m)
|
||||
get = liftM IntMap.fromDistinctAscList get
|
||||
|
||||
------------------------------------------------------------------------
|
||||
-- Queues and Sequences
|
||||
|
||||
#if __GLASGOW_HASKELL__ >= 606
|
||||
--
|
||||
-- This is valid Hugs, but you need the most recent Hugs
|
||||
--
|
||||
|
||||
instance (Binary e) => Binary (Seq.Seq e) where
|
||||
put s = put (Seq.length s) >> Fold.mapM_ put s
|
||||
get = do n <- get :: Get Int
|
||||
rep Seq.empty n get
|
||||
where rep xs 0 _ = return $! xs
|
||||
rep xs n g = xs `seq` n `seq` do
|
||||
x <- g
|
||||
rep (xs Seq.|> x) (n-1) g
|
||||
|
||||
#endif
|
||||
|
||||
------------------------------------------------------------------------
|
||||
-- Floating point
|
||||
|
||||
instance Binary Double where
|
||||
put d = put (decodeFloat d)
|
||||
get = liftM2 encodeFloat get get
|
||||
|
||||
instance Binary Float where
|
||||
put f = put (decodeFloat f)
|
||||
get = liftM2 encodeFloat get get
|
||||
|
||||
------------------------------------------------------------------------
|
||||
-- Trees
|
||||
|
||||
instance (Binary e) => Binary (T.Tree e) where
|
||||
put (T.Node r s) = put r >> put s
|
||||
get = liftM2 T.Node get get
|
||||
|
||||
------------------------------------------------------------------------
|
||||
-- Arrays
|
||||
|
||||
instance (Binary i, Ix i, Binary e) => Binary (Array i e) where
|
||||
put a = do
|
||||
put (bounds a)
|
||||
put (rangeSize $ bounds a) -- write the length
|
||||
mapM_ put (elems a) -- now the elems.
|
||||
get = do
|
||||
bs <- get
|
||||
n <- get -- read the length
|
||||
xs <- replicateM n get -- now the elems.
|
||||
return (listArray bs xs)
|
||||
|
||||
--
|
||||
-- The IArray UArray e constraint is non portable. Requires flexible instances
|
||||
--
|
||||
instance (Binary i, Ix i, Binary e, IArray UArray e) => Binary (UArray i e) where
|
||||
put a = do
|
||||
put (bounds a)
|
||||
put (rangeSize $ bounds a) -- now write the length
|
||||
mapM_ put (elems a)
|
||||
get = do
|
||||
bs <- get
|
||||
n <- get
|
||||
xs <- replicateM n get
|
||||
return (listArray bs xs)
|
||||
426
src/runtime/haskell/Data/Binary/Builder.hs
Normal file
426
src/runtime/haskell/Data/Binary/Builder.hs
Normal file
@@ -0,0 +1,426 @@
|
||||
{-# LANGUAGE CPP #-}
|
||||
{-# OPTIONS_GHC -fglasgow-exts #-}
|
||||
-- for unboxed shifts
|
||||
|
||||
-----------------------------------------------------------------------------
|
||||
-- |
|
||||
-- Module : Data.Binary.Builder
|
||||
-- Copyright : Lennart Kolmodin, Ross Paterson
|
||||
-- License : BSD3-style (see LICENSE)
|
||||
--
|
||||
-- Maintainer : Lennart Kolmodin <kolmodin@dtek.chalmers.se>
|
||||
-- Stability : experimental
|
||||
-- Portability : portable to Hugs and GHC
|
||||
--
|
||||
-- Efficient construction of lazy bytestrings.
|
||||
--
|
||||
-----------------------------------------------------------------------------
|
||||
|
||||
#if defined(__GLASGOW_HASKELL__) && !defined(__HADDOCK__)
|
||||
#include "MachDeps.h"
|
||||
#endif
|
||||
|
||||
module Data.Binary.Builder (
|
||||
|
||||
-- * The Builder type
|
||||
Builder
|
||||
, toLazyByteString
|
||||
|
||||
-- * Constructing Builders
|
||||
, empty
|
||||
, singleton
|
||||
, append
|
||||
, fromByteString -- :: S.ByteString -> Builder
|
||||
, fromLazyByteString -- :: L.ByteString -> Builder
|
||||
|
||||
-- * Flushing the buffer state
|
||||
, flush
|
||||
|
||||
-- * Derived Builders
|
||||
-- ** Big-endian writes
|
||||
, putWord16be -- :: Word16 -> Builder
|
||||
, putWord32be -- :: Word32 -> Builder
|
||||
, putWord64be -- :: Word64 -> Builder
|
||||
|
||||
-- ** Little-endian writes
|
||||
, putWord16le -- :: Word16 -> Builder
|
||||
, putWord32le -- :: Word32 -> Builder
|
||||
, putWord64le -- :: Word64 -> Builder
|
||||
|
||||
-- ** Host-endian, unaligned writes
|
||||
, putWordhost -- :: Word -> Builder
|
||||
, putWord16host -- :: Word16 -> Builder
|
||||
, putWord32host -- :: Word32 -> Builder
|
||||
, putWord64host -- :: Word64 -> Builder
|
||||
|
||||
) where
|
||||
|
||||
import Foreign
|
||||
import Data.Monoid
|
||||
import Data.Word
|
||||
import qualified Data.ByteString as S
|
||||
import qualified Data.ByteString.Lazy as L
|
||||
|
||||
#ifdef BYTESTRING_IN_BASE
|
||||
import Data.ByteString.Base (inlinePerformIO)
|
||||
import qualified Data.ByteString.Base as S
|
||||
#else
|
||||
import Data.ByteString.Internal (inlinePerformIO)
|
||||
import qualified Data.ByteString.Internal as S
|
||||
import qualified Data.ByteString.Lazy.Internal as L
|
||||
#endif
|
||||
|
||||
#if defined(__GLASGOW_HASKELL__) && !defined(__HADDOCK__)
|
||||
import GHC.Base
|
||||
import GHC.Word (Word32(..),Word16(..),Word64(..))
|
||||
|
||||
#if WORD_SIZE_IN_BITS < 64 && __GLASGOW_HASKELL__ >= 608
|
||||
import GHC.Word (uncheckedShiftRL64#)
|
||||
#endif
|
||||
#endif
|
||||
|
||||
------------------------------------------------------------------------
|
||||
|
||||
-- | A 'Builder' is an efficient way to build lazy 'L.ByteString's.
|
||||
-- There are several functions for constructing 'Builder's, but only one
|
||||
-- to inspect them: to extract any data, you have to turn them into lazy
|
||||
-- 'L.ByteString's using 'toLazyByteString'.
|
||||
--
|
||||
-- Internally, a 'Builder' constructs a lazy 'L.Bytestring' by filling byte
|
||||
-- arrays piece by piece. As each buffer is filled, it is \'popped\'
|
||||
-- off, to become a new chunk of the resulting lazy 'L.ByteString'.
|
||||
-- All this is hidden from the user of the 'Builder'.
|
||||
|
||||
newtype Builder = Builder {
|
||||
-- Invariant (from Data.ByteString.Lazy):
|
||||
-- The lists include no null ByteStrings.
|
||||
runBuilder :: (Buffer -> [S.ByteString]) -> Buffer -> [S.ByteString]
|
||||
}
|
||||
|
||||
instance Monoid Builder where
|
||||
mempty = empty
|
||||
{-# INLINE mempty #-}
|
||||
mappend = append
|
||||
{-# INLINE mappend #-}
|
||||
|
||||
------------------------------------------------------------------------
|
||||
|
||||
-- | /O(1)./ The empty Builder, satisfying
|
||||
--
|
||||
-- * @'toLazyByteString' 'empty' = 'L.empty'@
|
||||
--
|
||||
empty :: Builder
|
||||
empty = Builder id
|
||||
{-# INLINE empty #-}
|
||||
|
||||
-- | /O(1)./ A Builder taking a single byte, satisfying
|
||||
--
|
||||
-- * @'toLazyByteString' ('singleton' b) = 'L.singleton' b@
|
||||
--
|
||||
singleton :: Word8 -> Builder
|
||||
singleton = writeN 1 . flip poke
|
||||
{-# INLINE singleton #-}
|
||||
|
||||
------------------------------------------------------------------------
|
||||
|
||||
-- | /O(1)./ The concatenation of two Builders, an associative operation
|
||||
-- with identity 'empty', satisfying
|
||||
--
|
||||
-- * @'toLazyByteString' ('append' x y) = 'L.append' ('toLazyByteString' x) ('toLazyByteString' y)@
|
||||
--
|
||||
append :: Builder -> Builder -> Builder
|
||||
append (Builder f) (Builder g) = Builder (f . g)
|
||||
{-# INLINE append #-}
|
||||
|
||||
-- | /O(1)./ A Builder taking a 'S.ByteString', satisfying
|
||||
--
|
||||
-- * @'toLazyByteString' ('fromByteString' bs) = 'L.fromChunks' [bs]@
|
||||
--
|
||||
fromByteString :: S.ByteString -> Builder
|
||||
fromByteString bs
|
||||
| S.null bs = empty
|
||||
| otherwise = flush `append` mapBuilder (bs :)
|
||||
{-# INLINE fromByteString #-}
|
||||
|
||||
-- | /O(1)./ A Builder taking a lazy 'L.ByteString', satisfying
|
||||
--
|
||||
-- * @'toLazyByteString' ('fromLazyByteString' bs) = bs@
|
||||
--
|
||||
fromLazyByteString :: L.ByteString -> Builder
|
||||
fromLazyByteString bss = flush `append` mapBuilder (L.toChunks bss ++)
|
||||
{-# INLINE fromLazyByteString #-}
|
||||
|
||||
------------------------------------------------------------------------
|
||||
|
||||
-- Our internal buffer type
|
||||
data Buffer = Buffer {-# UNPACK #-} !(ForeignPtr Word8)
|
||||
{-# UNPACK #-} !Int -- offset
|
||||
{-# UNPACK #-} !Int -- used bytes
|
||||
{-# UNPACK #-} !Int -- length left
|
||||
|
||||
------------------------------------------------------------------------
|
||||
|
||||
-- | /O(n)./ Extract a lazy 'L.ByteString' from a 'Builder'.
|
||||
-- The construction work takes place if and when the relevant part of
|
||||
-- the lazy 'L.ByteString' is demanded.
|
||||
--
|
||||
toLazyByteString :: Builder -> L.ByteString
|
||||
toLazyByteString m = L.fromChunks $ unsafePerformIO $ do
|
||||
buf <- newBuffer defaultSize
|
||||
return (runBuilder (m `append` flush) (const []) buf)
|
||||
|
||||
-- | /O(1)./ Pop the 'S.ByteString' we have constructed so far, if any,
|
||||
-- yielding a new chunk in the result lazy 'L.ByteString'.
|
||||
flush :: Builder
|
||||
flush = Builder $ \ k buf@(Buffer p o u l) ->
|
||||
if u == 0
|
||||
then k buf
|
||||
else S.PS p o u : k (Buffer p (o+u) 0 l)
|
||||
|
||||
------------------------------------------------------------------------
|
||||
|
||||
--
|
||||
-- copied from Data.ByteString.Lazy
|
||||
--
|
||||
defaultSize :: Int
|
||||
defaultSize = 32 * k - overhead
|
||||
where k = 1024
|
||||
overhead = 2 * sizeOf (undefined :: Int)
|
||||
|
||||
------------------------------------------------------------------------
|
||||
|
||||
-- | Sequence an IO operation on the buffer
|
||||
unsafeLiftIO :: (Buffer -> IO Buffer) -> Builder
|
||||
unsafeLiftIO f = Builder $ \ k buf -> inlinePerformIO $ do
|
||||
buf' <- f buf
|
||||
return (k buf')
|
||||
{-# INLINE unsafeLiftIO #-}
|
||||
|
||||
-- | Get the size of the buffer
|
||||
withSize :: (Int -> Builder) -> Builder
|
||||
withSize f = Builder $ \ k buf@(Buffer _ _ _ l) ->
|
||||
runBuilder (f l) k buf
|
||||
|
||||
-- | Map the resulting list of bytestrings.
|
||||
mapBuilder :: ([S.ByteString] -> [S.ByteString]) -> Builder
|
||||
mapBuilder f = Builder (f .)
|
||||
|
||||
------------------------------------------------------------------------
|
||||
|
||||
-- | Ensure that there are at least @n@ many bytes available.
|
||||
ensureFree :: Int -> Builder
|
||||
ensureFree n = n `seq` withSize $ \ l ->
|
||||
if n <= l then empty else
|
||||
flush `append` unsafeLiftIO (const (newBuffer (max n defaultSize)))
|
||||
{-# INLINE ensureFree #-}
|
||||
|
||||
-- | Ensure that @n@ many bytes are available, and then use @f@ to write some
|
||||
-- bytes into the memory.
|
||||
writeN :: Int -> (Ptr Word8 -> IO ()) -> Builder
|
||||
writeN n f = ensureFree n `append` unsafeLiftIO (writeNBuffer n f)
|
||||
{-# INLINE writeN #-}
|
||||
|
||||
writeNBuffer :: Int -> (Ptr Word8 -> IO ()) -> Buffer -> IO Buffer
|
||||
writeNBuffer n f (Buffer fp o u l) = do
|
||||
withForeignPtr fp (\p -> f (p `plusPtr` (o+u)))
|
||||
return (Buffer fp o (u+n) (l-n))
|
||||
{-# INLINE writeNBuffer #-}
|
||||
|
||||
newBuffer :: Int -> IO Buffer
|
||||
newBuffer size = do
|
||||
fp <- S.mallocByteString size
|
||||
return $! Buffer fp 0 0 size
|
||||
{-# INLINE newBuffer #-}
|
||||
|
||||
------------------------------------------------------------------------
|
||||
-- Aligned, host order writes of storable values
|
||||
|
||||
-- | Ensure that @n@ many bytes are available, and then use @f@ to write some
|
||||
-- storable values into the memory.
|
||||
writeNbytes :: Storable a => Int -> (Ptr a -> IO ()) -> Builder
|
||||
writeNbytes n f = ensureFree n `append` unsafeLiftIO (writeNBufferBytes n f)
|
||||
{-# INLINE writeNbytes #-}
|
||||
|
||||
writeNBufferBytes :: Storable a => Int -> (Ptr a -> IO ()) -> Buffer -> IO Buffer
|
||||
writeNBufferBytes n f (Buffer fp o u l) = do
|
||||
withForeignPtr fp (\p -> f (p `plusPtr` (o+u)))
|
||||
return (Buffer fp o (u+n) (l-n))
|
||||
{-# INLINE writeNBufferBytes #-}
|
||||
|
||||
------------------------------------------------------------------------
|
||||
|
||||
--
|
||||
-- We rely on the fromIntegral to do the right masking for us.
|
||||
-- The inlining here is critical, and can be worth 4x performance
|
||||
--
|
||||
|
||||
-- | Write a Word16 in big endian format
|
||||
putWord16be :: Word16 -> Builder
|
||||
putWord16be w = writeN 2 $ \p -> do
|
||||
poke p (fromIntegral (shiftr_w16 w 8) :: Word8)
|
||||
poke (p `plusPtr` 1) (fromIntegral (w) :: Word8)
|
||||
{-# INLINE putWord16be #-}
|
||||
|
||||
-- | Write a Word16 in little endian format
|
||||
putWord16le :: Word16 -> Builder
|
||||
putWord16le w = writeN 2 $ \p -> do
|
||||
poke p (fromIntegral (w) :: Word8)
|
||||
poke (p `plusPtr` 1) (fromIntegral (shiftr_w16 w 8) :: Word8)
|
||||
{-# INLINE putWord16le #-}
|
||||
|
||||
-- putWord16le w16 = writeN 2 (\p -> poke (castPtr p) w16)
|
||||
|
||||
-- | Write a Word32 in big endian format
|
||||
putWord32be :: Word32 -> Builder
|
||||
putWord32be w = writeN 4 $ \p -> do
|
||||
poke p (fromIntegral (shiftr_w32 w 24) :: Word8)
|
||||
poke (p `plusPtr` 1) (fromIntegral (shiftr_w32 w 16) :: Word8)
|
||||
poke (p `plusPtr` 2) (fromIntegral (shiftr_w32 w 8) :: Word8)
|
||||
poke (p `plusPtr` 3) (fromIntegral (w) :: Word8)
|
||||
{-# INLINE putWord32be #-}
|
||||
|
||||
--
|
||||
-- a data type to tag Put/Check. writes construct these which are then
|
||||
-- inlined and flattened. matching Checks will be more robust with rules.
|
||||
--
|
||||
|
||||
-- | Write a Word32 in little endian format
|
||||
putWord32le :: Word32 -> Builder
|
||||
putWord32le w = writeN 4 $ \p -> do
|
||||
poke p (fromIntegral (w) :: Word8)
|
||||
poke (p `plusPtr` 1) (fromIntegral (shiftr_w32 w 8) :: Word8)
|
||||
poke (p `plusPtr` 2) (fromIntegral (shiftr_w32 w 16) :: Word8)
|
||||
poke (p `plusPtr` 3) (fromIntegral (shiftr_w32 w 24) :: Word8)
|
||||
{-# INLINE putWord32le #-}
|
||||
|
||||
-- on a little endian machine:
|
||||
-- putWord32le w32 = writeN 4 (\p -> poke (castPtr p) w32)
|
||||
|
||||
-- | Write a Word64 in big endian format
|
||||
putWord64be :: Word64 -> Builder
|
||||
#if WORD_SIZE_IN_BITS < 64
|
||||
--
|
||||
-- To avoid expensive 64 bit shifts on 32 bit machines, we cast to
|
||||
-- Word32, and write that
|
||||
--
|
||||
putWord64be w =
|
||||
let a = fromIntegral (shiftr_w64 w 32) :: Word32
|
||||
b = fromIntegral w :: Word32
|
||||
in writeN 8 $ \p -> do
|
||||
poke p (fromIntegral (shiftr_w32 a 24) :: Word8)
|
||||
poke (p `plusPtr` 1) (fromIntegral (shiftr_w32 a 16) :: Word8)
|
||||
poke (p `plusPtr` 2) (fromIntegral (shiftr_w32 a 8) :: Word8)
|
||||
poke (p `plusPtr` 3) (fromIntegral (a) :: Word8)
|
||||
poke (p `plusPtr` 4) (fromIntegral (shiftr_w32 b 24) :: Word8)
|
||||
poke (p `plusPtr` 5) (fromIntegral (shiftr_w32 b 16) :: Word8)
|
||||
poke (p `plusPtr` 6) (fromIntegral (shiftr_w32 b 8) :: Word8)
|
||||
poke (p `plusPtr` 7) (fromIntegral (b) :: Word8)
|
||||
#else
|
||||
putWord64be w = writeN 8 $ \p -> do
|
||||
poke p (fromIntegral (shiftr_w64 w 56) :: Word8)
|
||||
poke (p `plusPtr` 1) (fromIntegral (shiftr_w64 w 48) :: Word8)
|
||||
poke (p `plusPtr` 2) (fromIntegral (shiftr_w64 w 40) :: Word8)
|
||||
poke (p `plusPtr` 3) (fromIntegral (shiftr_w64 w 32) :: Word8)
|
||||
poke (p `plusPtr` 4) (fromIntegral (shiftr_w64 w 24) :: Word8)
|
||||
poke (p `plusPtr` 5) (fromIntegral (shiftr_w64 w 16) :: Word8)
|
||||
poke (p `plusPtr` 6) (fromIntegral (shiftr_w64 w 8) :: Word8)
|
||||
poke (p `plusPtr` 7) (fromIntegral (w) :: Word8)
|
||||
#endif
|
||||
{-# INLINE putWord64be #-}
|
||||
|
||||
-- | Write a Word64 in little endian format
|
||||
putWord64le :: Word64 -> Builder
|
||||
|
||||
#if WORD_SIZE_IN_BITS < 64
|
||||
putWord64le w =
|
||||
let b = fromIntegral (shiftr_w64 w 32) :: Word32
|
||||
a = fromIntegral w :: Word32
|
||||
in writeN 8 $ \p -> do
|
||||
poke (p) (fromIntegral (a) :: Word8)
|
||||
poke (p `plusPtr` 1) (fromIntegral (shiftr_w32 a 8) :: Word8)
|
||||
poke (p `plusPtr` 2) (fromIntegral (shiftr_w32 a 16) :: Word8)
|
||||
poke (p `plusPtr` 3) (fromIntegral (shiftr_w32 a 24) :: Word8)
|
||||
poke (p `plusPtr` 4) (fromIntegral (b) :: Word8)
|
||||
poke (p `plusPtr` 5) (fromIntegral (shiftr_w32 b 8) :: Word8)
|
||||
poke (p `plusPtr` 6) (fromIntegral (shiftr_w32 b 16) :: Word8)
|
||||
poke (p `plusPtr` 7) (fromIntegral (shiftr_w32 b 24) :: Word8)
|
||||
#else
|
||||
putWord64le w = writeN 8 $ \p -> do
|
||||
poke p (fromIntegral (w) :: Word8)
|
||||
poke (p `plusPtr` 1) (fromIntegral (shiftr_w64 w 8) :: Word8)
|
||||
poke (p `plusPtr` 2) (fromIntegral (shiftr_w64 w 16) :: Word8)
|
||||
poke (p `plusPtr` 3) (fromIntegral (shiftr_w64 w 24) :: Word8)
|
||||
poke (p `plusPtr` 4) (fromIntegral (shiftr_w64 w 32) :: Word8)
|
||||
poke (p `plusPtr` 5) (fromIntegral (shiftr_w64 w 40) :: Word8)
|
||||
poke (p `plusPtr` 6) (fromIntegral (shiftr_w64 w 48) :: Word8)
|
||||
poke (p `plusPtr` 7) (fromIntegral (shiftr_w64 w 56) :: Word8)
|
||||
#endif
|
||||
{-# INLINE putWord64le #-}
|
||||
|
||||
-- on a little endian machine:
|
||||
-- putWord64le w64 = writeN 8 (\p -> poke (castPtr p) w64)
|
||||
|
||||
------------------------------------------------------------------------
|
||||
-- Unaligned, word size ops
|
||||
|
||||
-- | /O(1)./ A Builder taking a single native machine word. The word is
|
||||
-- written in host order, host endian form, for the machine you're on.
|
||||
-- On a 64 bit machine the Word is an 8 byte value, on a 32 bit machine,
|
||||
-- 4 bytes. Values written this way are not portable to
|
||||
-- different endian or word sized machines, without conversion.
|
||||
--
|
||||
putWordhost :: Word -> Builder
|
||||
putWordhost w = writeNbytes (sizeOf (undefined :: Word)) (\p -> poke p w)
|
||||
{-# INLINE putWordhost #-}
|
||||
|
||||
-- | Write a Word16 in native host order and host endianness.
|
||||
-- 2 bytes will be written, unaligned.
|
||||
putWord16host :: Word16 -> Builder
|
||||
putWord16host w16 = writeNbytes (sizeOf (undefined :: Word16)) (\p -> poke p w16)
|
||||
{-# INLINE putWord16host #-}
|
||||
|
||||
-- | Write a Word32 in native host order and host endianness.
|
||||
-- 4 bytes will be written, unaligned.
|
||||
putWord32host :: Word32 -> Builder
|
||||
putWord32host w32 = writeNbytes (sizeOf (undefined :: Word32)) (\p -> poke p w32)
|
||||
{-# INLINE putWord32host #-}
|
||||
|
||||
-- | Write a Word64 in native host order.
|
||||
-- On a 32 bit machine we write two host order Word32s, in big endian form.
|
||||
-- 8 bytes will be written, unaligned.
|
||||
putWord64host :: Word64 -> Builder
|
||||
putWord64host w = writeNbytes (sizeOf (undefined :: Word64)) (\p -> poke p w)
|
||||
{-# INLINE putWord64host #-}
|
||||
|
||||
------------------------------------------------------------------------
|
||||
-- Unchecked shifts
|
||||
|
||||
{-# INLINE shiftr_w16 #-}
|
||||
shiftr_w16 :: Word16 -> Int -> Word16
|
||||
{-# INLINE shiftr_w32 #-}
|
||||
shiftr_w32 :: Word32 -> Int -> Word32
|
||||
{-# INLINE shiftr_w64 #-}
|
||||
shiftr_w64 :: Word64 -> Int -> Word64
|
||||
|
||||
#if defined(__GLASGOW_HASKELL__) && !defined(__HADDOCK__)
|
||||
shiftr_w16 (W16# w) (I# i) = W16# (w `uncheckedShiftRL#` i)
|
||||
shiftr_w32 (W32# w) (I# i) = W32# (w `uncheckedShiftRL#` i)
|
||||
|
||||
#if WORD_SIZE_IN_BITS < 64
|
||||
shiftr_w64 (W64# w) (I# i) = W64# (w `uncheckedShiftRL64#` i)
|
||||
|
||||
#if __GLASGOW_HASKELL__ <= 606
|
||||
-- Exported by GHC.Word in GHC 6.8 and higher
|
||||
foreign import ccall unsafe "stg_uncheckedShiftRL64"
|
||||
uncheckedShiftRL64# :: Word64# -> Int# -> Word64#
|
||||
#endif
|
||||
|
||||
#else
|
||||
shiftr_w64 (W64# w) (I# i) = W64# (w `uncheckedShiftRL#` i)
|
||||
#endif
|
||||
|
||||
#else
|
||||
shiftr_w16 = shiftR
|
||||
shiftr_w32 = shiftR
|
||||
shiftr_w64 = shiftR
|
||||
#endif
|
||||
544
src/runtime/haskell/Data/Binary/Get.hs
Normal file
544
src/runtime/haskell/Data/Binary/Get.hs
Normal file
@@ -0,0 +1,544 @@
|
||||
{-# LANGUAGE CPP #-}
|
||||
{-# OPTIONS_GHC -fglasgow-exts #-}
|
||||
-- for unboxed shifts
|
||||
|
||||
-----------------------------------------------------------------------------
|
||||
-- |
|
||||
-- Module : Data.Binary.Get
|
||||
-- Copyright : Lennart Kolmodin
|
||||
-- License : BSD3-style (see LICENSE)
|
||||
--
|
||||
-- Maintainer : Lennart Kolmodin <kolmodin@dtek.chalmers.se>
|
||||
-- Stability : experimental
|
||||
-- Portability : portable to Hugs and GHC.
|
||||
--
|
||||
-- The Get monad. A monad for efficiently building structures from
|
||||
-- encoded lazy ByteStrings
|
||||
--
|
||||
-----------------------------------------------------------------------------
|
||||
|
||||
#if defined(__GLASGOW_HASKELL__) && !defined(__HADDOCK__)
|
||||
#include "MachDeps.h"
|
||||
#endif
|
||||
|
||||
module Data.Binary.Get (
|
||||
|
||||
-- * The Get type
|
||||
Get
|
||||
, runGet
|
||||
, runGetState
|
||||
|
||||
-- * Parsing
|
||||
, skip
|
||||
, uncheckedSkip
|
||||
, lookAhead
|
||||
, lookAheadM
|
||||
, lookAheadE
|
||||
, uncheckedLookAhead
|
||||
|
||||
-- * Utility
|
||||
, bytesRead
|
||||
, getBytes
|
||||
, remaining
|
||||
, isEmpty
|
||||
|
||||
-- * Parsing particular types
|
||||
, getWord8
|
||||
|
||||
-- ** ByteStrings
|
||||
, getByteString
|
||||
, getLazyByteString
|
||||
, getLazyByteStringNul
|
||||
, getRemainingLazyByteString
|
||||
|
||||
-- ** Big-endian reads
|
||||
, getWord16be
|
||||
, getWord32be
|
||||
, getWord64be
|
||||
|
||||
-- ** Little-endian reads
|
||||
, getWord16le
|
||||
, getWord32le
|
||||
, getWord64le
|
||||
|
||||
-- ** Host-endian, unaligned reads
|
||||
, getWordhost
|
||||
, getWord16host
|
||||
, getWord32host
|
||||
, getWord64host
|
||||
|
||||
) where
|
||||
|
||||
import Control.Monad (when,liftM,ap)
|
||||
import Control.Monad.Fix
|
||||
import Data.Maybe (isNothing)
|
||||
|
||||
import qualified Data.ByteString as B
|
||||
import qualified Data.ByteString.Lazy as L
|
||||
|
||||
#ifdef BYTESTRING_IN_BASE
|
||||
import qualified Data.ByteString.Base as B
|
||||
#else
|
||||
import qualified Data.ByteString.Internal as B
|
||||
import qualified Data.ByteString.Lazy.Internal as L
|
||||
#endif
|
||||
|
||||
#ifdef APPLICATIVE_IN_BASE
|
||||
import Control.Applicative (Applicative(..))
|
||||
#endif
|
||||
|
||||
import Foreign
|
||||
|
||||
-- used by splitAtST
|
||||
import Control.Monad.ST
|
||||
import Data.STRef
|
||||
|
||||
#if defined(__GLASGOW_HASKELL__) && !defined(__HADDOCK__)
|
||||
import GHC.Base
|
||||
import GHC.Word
|
||||
import GHC.Int
|
||||
#endif
|
||||
|
||||
-- | The parse state
|
||||
data S = S {-# UNPACK #-} !B.ByteString -- current chunk
|
||||
L.ByteString -- the rest of the input
|
||||
{-# UNPACK #-} !Int64 -- bytes read
|
||||
|
||||
-- | The Get monad is just a State monad carrying around the input ByteString
|
||||
newtype Get a = Get { unGet :: S -> (a, S) }
|
||||
|
||||
instance Functor Get where
|
||||
fmap f m = Get (\s -> case unGet m s of
|
||||
(a, s') -> (f a, s'))
|
||||
{-# INLINE fmap #-}
|
||||
|
||||
#ifdef APPLICATIVE_IN_BASE
|
||||
instance Applicative Get where
|
||||
pure = return
|
||||
(<*>) = ap
|
||||
#endif
|
||||
|
||||
instance Monad Get where
|
||||
return a = Get (\s -> (a, s))
|
||||
{-# INLINE return #-}
|
||||
|
||||
m >>= k = Get (\s -> case unGet m s of
|
||||
(a, s') -> unGet (k a) s')
|
||||
{-# INLINE (>>=) #-}
|
||||
|
||||
fail = failDesc
|
||||
|
||||
instance MonadFix Get where
|
||||
mfix f = Get (\s -> let (a,s') = unGet (f a) s
|
||||
in (a,s'))
|
||||
|
||||
------------------------------------------------------------------------
|
||||
|
||||
get :: Get S
|
||||
get = Get (\s -> (s, s))
|
||||
|
||||
put :: S -> Get ()
|
||||
put s = Get (\_ -> ((), s))
|
||||
|
||||
------------------------------------------------------------------------
|
||||
--
|
||||
-- dons, GHC 6.10: explicit inlining disabled, was killing performance.
|
||||
-- Without it, GHC seems to do just fine. And we get similar
|
||||
-- performance with 6.8.2 anyway.
|
||||
--
|
||||
|
||||
initState :: L.ByteString -> S
|
||||
initState xs = mkState xs 0
|
||||
{- INLINE initState -}
|
||||
|
||||
{-
|
||||
initState (B.LPS xs) =
|
||||
case xs of
|
||||
[] -> S B.empty L.empty 0
|
||||
(x:xs') -> S x (B.LPS xs') 0
|
||||
-}
|
||||
|
||||
#ifndef BYTESTRING_IN_BASE
|
||||
mkState :: L.ByteString -> Int64 -> S
|
||||
mkState l = case l of
|
||||
L.Empty -> S B.empty L.empty
|
||||
L.Chunk x xs -> S x xs
|
||||
{- INLINE mkState -}
|
||||
|
||||
#else
|
||||
mkState :: L.ByteString -> Int64 -> S
|
||||
mkState (B.LPS xs) =
|
||||
case xs of
|
||||
[] -> S B.empty L.empty
|
||||
(x:xs') -> S x (B.LPS xs')
|
||||
#endif
|
||||
|
||||
-- | Run the Get monad applies a 'get'-based parser on the input ByteString
|
||||
runGet :: Get a -> L.ByteString -> a
|
||||
runGet m str = case unGet m (initState str) of (a, _) -> a
|
||||
|
||||
-- | Run the Get monad applies a 'get'-based parser on the input
|
||||
-- ByteString. Additional to the result of get it returns the number of
|
||||
-- consumed bytes and the rest of the input.
|
||||
runGetState :: Get a -> L.ByteString -> Int64 -> (a, L.ByteString, Int64)
|
||||
runGetState m str off =
|
||||
case unGet m (mkState str off) of
|
||||
(a, ~(S s ss newOff)) -> (a, s `join` ss, newOff)
|
||||
|
||||
------------------------------------------------------------------------
|
||||
|
||||
failDesc :: String -> Get a
|
||||
failDesc err = do
|
||||
S _ _ bytes <- get
|
||||
Get (error (err ++ ". Failed reading at byte position " ++ show bytes))
|
||||
|
||||
-- | Skip ahead @n@ bytes. Fails if fewer than @n@ bytes are available.
|
||||
skip :: Int -> Get ()
|
||||
skip n = readN (fromIntegral n) (const ())
|
||||
|
||||
-- | Skip ahead @n@ bytes. No error if there isn't enough bytes.
|
||||
uncheckedSkip :: Int64 -> Get ()
|
||||
uncheckedSkip n = do
|
||||
S s ss bytes <- get
|
||||
if fromIntegral (B.length s) >= n
|
||||
then put (S (B.drop (fromIntegral n) s) ss (bytes + n))
|
||||
else do
|
||||
let rest = L.drop (n - fromIntegral (B.length s)) ss
|
||||
put $! mkState rest (bytes + n)
|
||||
|
||||
-- | Run @ga@, but return without consuming its input.
|
||||
-- Fails if @ga@ fails.
|
||||
lookAhead :: Get a -> Get a
|
||||
lookAhead ga = do
|
||||
s <- get
|
||||
a <- ga
|
||||
put s
|
||||
return a
|
||||
|
||||
-- | Like 'lookAhead', but consume the input if @gma@ returns 'Just _'.
|
||||
-- Fails if @gma@ fails.
|
||||
lookAheadM :: Get (Maybe a) -> Get (Maybe a)
|
||||
lookAheadM gma = do
|
||||
s <- get
|
||||
ma <- gma
|
||||
when (isNothing ma) $
|
||||
put s
|
||||
return ma
|
||||
|
||||
-- | Like 'lookAhead', but consume the input if @gea@ returns 'Right _'.
|
||||
-- Fails if @gea@ fails.
|
||||
lookAheadE :: Get (Either a b) -> Get (Either a b)
|
||||
lookAheadE gea = do
|
||||
s <- get
|
||||
ea <- gea
|
||||
case ea of
|
||||
Left _ -> put s
|
||||
_ -> return ()
|
||||
return ea
|
||||
|
||||
-- | Get the next up to @n@ bytes as a lazy ByteString, without consuming them.
|
||||
uncheckedLookAhead :: Int64 -> Get L.ByteString
|
||||
uncheckedLookAhead n = do
|
||||
S s ss _ <- get
|
||||
if n <= fromIntegral (B.length s)
|
||||
then return (L.fromChunks [B.take (fromIntegral n) s])
|
||||
else return $ L.take n (s `join` ss)
|
||||
|
||||
------------------------------------------------------------------------
|
||||
-- Utility
|
||||
|
||||
-- | Get the total number of bytes read to this point.
|
||||
bytesRead :: Get Int64
|
||||
bytesRead = do
|
||||
S _ _ b <- get
|
||||
return b
|
||||
|
||||
-- | Get the number of remaining unparsed bytes.
|
||||
-- Useful for checking whether all input has been consumed.
|
||||
-- Note that this forces the rest of the input.
|
||||
remaining :: Get Int64
|
||||
remaining = do
|
||||
S s ss _ <- get
|
||||
return (fromIntegral (B.length s) + L.length ss)
|
||||
|
||||
-- | Test whether all input has been consumed,
|
||||
-- i.e. there are no remaining unparsed bytes.
|
||||
isEmpty :: Get Bool
|
||||
isEmpty = do
|
||||
S s ss _ <- get
|
||||
return (B.null s && L.null ss)
|
||||
|
||||
------------------------------------------------------------------------
|
||||
-- Utility with ByteStrings
|
||||
|
||||
-- | An efficient 'get' method for strict ByteStrings. Fails if fewer
|
||||
-- than @n@ bytes are left in the input.
|
||||
getByteString :: Int -> Get B.ByteString
|
||||
getByteString n = readN n id
|
||||
{-# INLINE getByteString #-}
|
||||
|
||||
-- | An efficient 'get' method for lazy ByteStrings. Does not fail if fewer than
|
||||
-- @n@ bytes are left in the input.
|
||||
getLazyByteString :: Int64 -> Get L.ByteString
|
||||
getLazyByteString n = do
|
||||
S s ss bytes <- get
|
||||
let big = s `join` ss
|
||||
case splitAtST n big of
|
||||
(consume, rest) -> do put $ mkState rest (bytes + n)
|
||||
return consume
|
||||
{-# INLINE getLazyByteString #-}
|
||||
|
||||
-- | Get a lazy ByteString that is terminated with a NUL byte. Fails
|
||||
-- if it reaches the end of input without hitting a NUL.
|
||||
getLazyByteStringNul :: Get L.ByteString
|
||||
getLazyByteStringNul = do
|
||||
S s ss bytes <- get
|
||||
let big = s `join` ss
|
||||
(consume, t) = L.break (== 0) big
|
||||
(h, rest) = L.splitAt 1 t
|
||||
if L.null h
|
||||
then fail "too few bytes"
|
||||
else do
|
||||
put $ mkState rest (bytes + L.length consume + 1)
|
||||
return consume
|
||||
{-# INLINE getLazyByteStringNul #-}
|
||||
|
||||
-- | Get the remaining bytes as a lazy ByteString
|
||||
getRemainingLazyByteString :: Get L.ByteString
|
||||
getRemainingLazyByteString = do
|
||||
S s ss _ <- get
|
||||
return (s `join` ss)
|
||||
|
||||
------------------------------------------------------------------------
|
||||
-- Helpers
|
||||
|
||||
-- | Pull @n@ bytes from the input, as a strict ByteString.
|
||||
getBytes :: Int -> Get B.ByteString
|
||||
getBytes n = do
|
||||
S s ss bytes <- get
|
||||
if n <= B.length s
|
||||
then do let (consume,rest) = B.splitAt n s
|
||||
put $! S rest ss (bytes + fromIntegral n)
|
||||
return $! consume
|
||||
else
|
||||
case L.splitAt (fromIntegral n) (s `join` ss) of
|
||||
(consuming, rest) ->
|
||||
do let now = B.concat . L.toChunks $ consuming
|
||||
put $! mkState rest (bytes + fromIntegral n)
|
||||
-- forces the next chunk before this one is returned
|
||||
if (B.length now < n)
|
||||
then
|
||||
fail "too few bytes"
|
||||
else
|
||||
return now
|
||||
{- INLINE getBytes -}
|
||||
-- ^ important
|
||||
|
||||
#ifndef BYTESTRING_IN_BASE
|
||||
join :: B.ByteString -> L.ByteString -> L.ByteString
|
||||
join bb lb
|
||||
| B.null bb = lb
|
||||
| otherwise = L.Chunk bb lb
|
||||
|
||||
#else
|
||||
join :: B.ByteString -> L.ByteString -> L.ByteString
|
||||
join bb (B.LPS lb)
|
||||
| B.null bb = B.LPS lb
|
||||
| otherwise = B.LPS (bb:lb)
|
||||
#endif
|
||||
-- don't use L.append, it's strict in it's second argument :/
|
||||
{- INLINE join -}
|
||||
|
||||
-- | Split a ByteString. If the first result is consumed before the --
|
||||
-- second, this runs in constant heap space.
|
||||
--
|
||||
-- You must force the returned tuple for that to work, e.g.
|
||||
--
|
||||
-- > case splitAtST n xs of
|
||||
-- > (ys,zs) -> consume ys ... consume zs
|
||||
--
|
||||
splitAtST :: Int64 -> L.ByteString -> (L.ByteString, L.ByteString)
|
||||
splitAtST i ps | i <= 0 = (L.empty, ps)
|
||||
#ifndef BYTESTRING_IN_BASE
|
||||
splitAtST i ps = runST (
|
||||
do r <- newSTRef undefined
|
||||
xs <- first r i ps
|
||||
ys <- unsafeInterleaveST (readSTRef r)
|
||||
return (xs, ys))
|
||||
|
||||
where
|
||||
first r 0 xs@(L.Chunk _ _) = writeSTRef r xs >> return L.Empty
|
||||
first r _ L.Empty = writeSTRef r L.Empty >> return L.Empty
|
||||
|
||||
first r n (L.Chunk x xs)
|
||||
| n < l = do writeSTRef r (L.Chunk (B.drop (fromIntegral n) x) xs)
|
||||
return $ L.Chunk (B.take (fromIntegral n) x) L.Empty
|
||||
| otherwise = do writeSTRef r (L.drop (n - l) xs)
|
||||
liftM (L.Chunk x) $ unsafeInterleaveST (first r (n - l) xs)
|
||||
|
||||
where l = fromIntegral (B.length x)
|
||||
#else
|
||||
splitAtST i (B.LPS ps) = runST (
|
||||
do r <- newSTRef undefined
|
||||
xs <- first r i ps
|
||||
ys <- unsafeInterleaveST (readSTRef r)
|
||||
return (B.LPS xs, B.LPS ys))
|
||||
|
||||
where first r 0 xs = writeSTRef r xs >> return []
|
||||
first r _ [] = writeSTRef r [] >> return []
|
||||
first r n (x:xs)
|
||||
| n < l = do writeSTRef r (B.drop (fromIntegral n) x : xs)
|
||||
return [B.take (fromIntegral n) x]
|
||||
| otherwise = do writeSTRef r (L.toChunks (L.drop (n - l) (B.LPS xs)))
|
||||
fmap (x:) $ unsafeInterleaveST (first r (n - l) xs)
|
||||
|
||||
where l = fromIntegral (B.length x)
|
||||
#endif
|
||||
{- INLINE splitAtST -}
|
||||
|
||||
-- Pull n bytes from the input, and apply a parser to those bytes,
|
||||
-- yielding a value. If less than @n@ bytes are available, fail with an
|
||||
-- error. This wraps @getBytes@.
|
||||
readN :: Int -> (B.ByteString -> a) -> Get a
|
||||
readN n f = fmap f $ getBytes n
|
||||
{- INLINE readN -}
|
||||
-- ^ important
|
||||
|
||||
------------------------------------------------------------------------
|
||||
-- Primtives
|
||||
|
||||
-- helper, get a raw Ptr onto a strict ByteString copied out of the
|
||||
-- underlying lazy byteString. So many indirections from the raw parser
|
||||
-- state that my head hurts...
|
||||
|
||||
getPtr :: Storable a => Int -> Get a
|
||||
getPtr n = do
|
||||
(fp,o,_) <- readN n B.toForeignPtr
|
||||
return . B.inlinePerformIO $ withForeignPtr fp $ \p -> peek (castPtr $ p `plusPtr` o)
|
||||
{- INLINE getPtr -}
|
||||
|
||||
------------------------------------------------------------------------
|
||||
|
||||
-- | Read a Word8 from the monad state
|
||||
getWord8 :: Get Word8
|
||||
getWord8 = getPtr (sizeOf (undefined :: Word8))
|
||||
{- INLINE getWord8 -}
|
||||
|
||||
-- | Read a Word16 in big endian format
|
||||
getWord16be :: Get Word16
|
||||
getWord16be = do
|
||||
s <- readN 2 id
|
||||
return $! (fromIntegral (s `B.index` 0) `shiftl_w16` 8) .|.
|
||||
(fromIntegral (s `B.index` 1))
|
||||
{- INLINE getWord16be -}
|
||||
|
||||
-- | Read a Word16 in little endian format
|
||||
getWord16le :: Get Word16
|
||||
getWord16le = do
|
||||
s <- readN 2 id
|
||||
return $! (fromIntegral (s `B.index` 1) `shiftl_w16` 8) .|.
|
||||
(fromIntegral (s `B.index` 0) )
|
||||
{- INLINE getWord16le -}
|
||||
|
||||
-- | Read a Word32 in big endian format
|
||||
getWord32be :: Get Word32
|
||||
getWord32be = do
|
||||
s <- readN 4 id
|
||||
return $! (fromIntegral (s `B.index` 0) `shiftl_w32` 24) .|.
|
||||
(fromIntegral (s `B.index` 1) `shiftl_w32` 16) .|.
|
||||
(fromIntegral (s `B.index` 2) `shiftl_w32` 8) .|.
|
||||
(fromIntegral (s `B.index` 3) )
|
||||
{- INLINE getWord32be -}
|
||||
|
||||
-- | Read a Word32 in little endian format
|
||||
getWord32le :: Get Word32
|
||||
getWord32le = do
|
||||
s <- readN 4 id
|
||||
return $! (fromIntegral (s `B.index` 3) `shiftl_w32` 24) .|.
|
||||
(fromIntegral (s `B.index` 2) `shiftl_w32` 16) .|.
|
||||
(fromIntegral (s `B.index` 1) `shiftl_w32` 8) .|.
|
||||
(fromIntegral (s `B.index` 0) )
|
||||
{- INLINE getWord32le -}
|
||||
|
||||
-- | Read a Word64 in big endian format
|
||||
getWord64be :: Get Word64
|
||||
getWord64be = do
|
||||
s <- readN 8 id
|
||||
return $! (fromIntegral (s `B.index` 0) `shiftl_w64` 56) .|.
|
||||
(fromIntegral (s `B.index` 1) `shiftl_w64` 48) .|.
|
||||
(fromIntegral (s `B.index` 2) `shiftl_w64` 40) .|.
|
||||
(fromIntegral (s `B.index` 3) `shiftl_w64` 32) .|.
|
||||
(fromIntegral (s `B.index` 4) `shiftl_w64` 24) .|.
|
||||
(fromIntegral (s `B.index` 5) `shiftl_w64` 16) .|.
|
||||
(fromIntegral (s `B.index` 6) `shiftl_w64` 8) .|.
|
||||
(fromIntegral (s `B.index` 7) )
|
||||
{- INLINE getWord64be -}
|
||||
|
||||
-- | Read a Word64 in little endian format
|
||||
getWord64le :: Get Word64
|
||||
getWord64le = do
|
||||
s <- readN 8 id
|
||||
return $! (fromIntegral (s `B.index` 7) `shiftl_w64` 56) .|.
|
||||
(fromIntegral (s `B.index` 6) `shiftl_w64` 48) .|.
|
||||
(fromIntegral (s `B.index` 5) `shiftl_w64` 40) .|.
|
||||
(fromIntegral (s `B.index` 4) `shiftl_w64` 32) .|.
|
||||
(fromIntegral (s `B.index` 3) `shiftl_w64` 24) .|.
|
||||
(fromIntegral (s `B.index` 2) `shiftl_w64` 16) .|.
|
||||
(fromIntegral (s `B.index` 1) `shiftl_w64` 8) .|.
|
||||
(fromIntegral (s `B.index` 0) )
|
||||
{- INLINE getWord64le -}
|
||||
|
||||
------------------------------------------------------------------------
|
||||
-- Host-endian reads
|
||||
|
||||
-- | /O(1)./ Read a single native machine word. The word is read in
|
||||
-- host order, host endian form, for the machine you're on. On a 64 bit
|
||||
-- machine the Word is an 8 byte value, on a 32 bit machine, 4 bytes.
|
||||
getWordhost :: Get Word
|
||||
getWordhost = getPtr (sizeOf (undefined :: Word))
|
||||
{- INLINE getWordhost -}
|
||||
|
||||
-- | /O(1)./ Read a 2 byte Word16 in native host order and host endianness.
|
||||
getWord16host :: Get Word16
|
||||
getWord16host = getPtr (sizeOf (undefined :: Word16))
|
||||
{- INLINE getWord16host -}
|
||||
|
||||
-- | /O(1)./ Read a Word32 in native host order and host endianness.
|
||||
getWord32host :: Get Word32
|
||||
getWord32host = getPtr (sizeOf (undefined :: Word32))
|
||||
{- INLINE getWord32host -}
|
||||
|
||||
-- | /O(1)./ Read a Word64 in native host order and host endianess.
|
||||
getWord64host :: Get Word64
|
||||
getWord64host = getPtr (sizeOf (undefined :: Word64))
|
||||
{- INLINE getWord64host -}
|
||||
|
||||
------------------------------------------------------------------------
|
||||
-- Unchecked shifts
|
||||
|
||||
shiftl_w16 :: Word16 -> Int -> Word16
|
||||
shiftl_w32 :: Word32 -> Int -> Word32
|
||||
shiftl_w64 :: Word64 -> Int -> Word64
|
||||
|
||||
#if defined(__GLASGOW_HASKELL__) && !defined(__HADDOCK__)
|
||||
shiftl_w16 (W16# w) (I# i) = W16# (w `uncheckedShiftL#` i)
|
||||
shiftl_w32 (W32# w) (I# i) = W32# (w `uncheckedShiftL#` i)
|
||||
|
||||
#if WORD_SIZE_IN_BITS < 64
|
||||
shiftl_w64 (W64# w) (I# i) = W64# (w `uncheckedShiftL64#` i)
|
||||
|
||||
#if __GLASGOW_HASKELL__ <= 606
|
||||
-- Exported by GHC.Word in GHC 6.8 and higher
|
||||
foreign import ccall unsafe "stg_uncheckedShiftL64"
|
||||
uncheckedShiftL64# :: Word64# -> Int# -> Word64#
|
||||
#endif
|
||||
|
||||
#else
|
||||
shiftl_w64 (W64# w) (I# i) = W64# (w `uncheckedShiftL#` i)
|
||||
#endif
|
||||
|
||||
#else
|
||||
shiftl_w16 = shiftL
|
||||
shiftl_w32 = shiftL
|
||||
shiftl_w64 = shiftL
|
||||
#endif
|
||||
216
src/runtime/haskell/Data/Binary/Put.hs
Normal file
216
src/runtime/haskell/Data/Binary/Put.hs
Normal file
@@ -0,0 +1,216 @@
|
||||
{-# LANGUAGE CPP #-}
|
||||
-----------------------------------------------------------------------------
|
||||
-- |
|
||||
-- Module : Data.Binary.Put
|
||||
-- Copyright : Lennart Kolmodin
|
||||
-- License : BSD3-style (see LICENSE)
|
||||
--
|
||||
-- Maintainer : Lennart Kolmodin <kolmodin@dtek.chalmers.se>
|
||||
-- Stability : stable
|
||||
-- Portability : Portable to Hugs and GHC. Requires MPTCs
|
||||
--
|
||||
-- The Put monad. A monad for efficiently constructing lazy bytestrings.
|
||||
--
|
||||
-----------------------------------------------------------------------------
|
||||
|
||||
module Data.Binary.Put (
|
||||
|
||||
-- * The Put type
|
||||
Put
|
||||
, PutM(..)
|
||||
, runPut
|
||||
, runPutM
|
||||
, putBuilder
|
||||
, execPut
|
||||
|
||||
-- * Flushing the implicit parse state
|
||||
, flush
|
||||
|
||||
-- * Primitives
|
||||
, putWord8
|
||||
, putByteString
|
||||
, putLazyByteString
|
||||
|
||||
-- * Big-endian primitives
|
||||
, putWord16be
|
||||
, putWord32be
|
||||
, putWord64be
|
||||
|
||||
-- * Little-endian primitives
|
||||
, putWord16le
|
||||
, putWord32le
|
||||
, putWord64le
|
||||
|
||||
-- * Host-endian, unaligned writes
|
||||
, putWordhost -- :: Word -> Put
|
||||
, putWord16host -- :: Word16 -> Put
|
||||
, putWord32host -- :: Word32 -> Put
|
||||
, putWord64host -- :: Word64 -> Put
|
||||
|
||||
) where
|
||||
|
||||
import Data.Monoid
|
||||
import Data.Binary.Builder (Builder, toLazyByteString)
|
||||
import qualified Data.Binary.Builder as B
|
||||
|
||||
import Data.Word
|
||||
import qualified Data.ByteString as S
|
||||
import qualified Data.ByteString.Lazy as L
|
||||
|
||||
#ifdef APPLICATIVE_IN_BASE
|
||||
import Control.Applicative
|
||||
#endif
|
||||
|
||||
|
||||
------------------------------------------------------------------------
|
||||
|
||||
-- XXX Strict in buffer only.
|
||||
data PairS a = PairS a {-# UNPACK #-}!Builder
|
||||
|
||||
sndS :: PairS a -> Builder
|
||||
sndS (PairS _ b) = b
|
||||
|
||||
-- | The PutM type. A Writer monad over the efficient Builder monoid.
|
||||
newtype PutM a = Put { unPut :: PairS a }
|
||||
|
||||
-- | Put merely lifts Builder into a Writer monad, applied to ().
|
||||
type Put = PutM ()
|
||||
|
||||
instance Functor PutM where
|
||||
fmap f m = Put $ let PairS a w = unPut m in PairS (f a) w
|
||||
{-# INLINE fmap #-}
|
||||
|
||||
#ifdef APPLICATIVE_IN_BASE
|
||||
instance Applicative PutM where
|
||||
pure = return
|
||||
m <*> k = Put $
|
||||
let PairS f w = unPut m
|
||||
PairS x w' = unPut k
|
||||
in PairS (f x) (w `mappend` w')
|
||||
#endif
|
||||
|
||||
-- Standard Writer monad, with aggressive inlining
|
||||
instance Monad PutM where
|
||||
return a = Put $ PairS a mempty
|
||||
{-# INLINE return #-}
|
||||
|
||||
m >>= k = Put $
|
||||
let PairS a w = unPut m
|
||||
PairS b w' = unPut (k a)
|
||||
in PairS b (w `mappend` w')
|
||||
{-# INLINE (>>=) #-}
|
||||
|
||||
m >> k = Put $
|
||||
let PairS _ w = unPut m
|
||||
PairS b w' = unPut k
|
||||
in PairS b (w `mappend` w')
|
||||
{-# INLINE (>>) #-}
|
||||
|
||||
tell :: Builder -> Put
|
||||
tell b = Put $ PairS () b
|
||||
{-# INLINE tell #-}
|
||||
|
||||
putBuilder :: Builder -> Put
|
||||
putBuilder = tell
|
||||
{-# INLINE putBuilder #-}
|
||||
|
||||
-- | Run the 'Put' monad
|
||||
execPut :: PutM a -> Builder
|
||||
execPut = sndS . unPut
|
||||
{-# INLINE execPut #-}
|
||||
|
||||
-- | Run the 'Put' monad with a serialiser
|
||||
runPut :: Put -> L.ByteString
|
||||
runPut = toLazyByteString . sndS . unPut
|
||||
{-# INLINE runPut #-}
|
||||
|
||||
-- | Run the 'Put' monad with a serialiser and get its result
|
||||
runPutM :: PutM a -> (a, L.ByteString)
|
||||
runPutM (Put (PairS f s)) = (f, toLazyByteString s)
|
||||
{-# INLINE runPutM #-}
|
||||
|
||||
------------------------------------------------------------------------
|
||||
|
||||
-- | Pop the ByteString we have constructed so far, if any, yielding a
|
||||
-- new chunk in the result ByteString.
|
||||
flush :: Put
|
||||
flush = tell B.flush
|
||||
{-# INLINE flush #-}
|
||||
|
||||
-- | Efficiently write a byte into the output buffer
|
||||
putWord8 :: Word8 -> Put
|
||||
putWord8 = tell . B.singleton
|
||||
{-# INLINE putWord8 #-}
|
||||
|
||||
-- | An efficient primitive to write a strict ByteString into the output buffer.
|
||||
-- It flushes the current buffer, and writes the argument into a new chunk.
|
||||
putByteString :: S.ByteString -> Put
|
||||
putByteString = tell . B.fromByteString
|
||||
{-# INLINE putByteString #-}
|
||||
|
||||
-- | Write a lazy ByteString efficiently, simply appending the lazy
|
||||
-- ByteString chunks to the output buffer
|
||||
putLazyByteString :: L.ByteString -> Put
|
||||
putLazyByteString = tell . B.fromLazyByteString
|
||||
{-# INLINE putLazyByteString #-}
|
||||
|
||||
-- | Write a Word16 in big endian format
|
||||
putWord16be :: Word16 -> Put
|
||||
putWord16be = tell . B.putWord16be
|
||||
{-# INLINE putWord16be #-}
|
||||
|
||||
-- | Write a Word16 in little endian format
|
||||
putWord16le :: Word16 -> Put
|
||||
putWord16le = tell . B.putWord16le
|
||||
{-# INLINE putWord16le #-}
|
||||
|
||||
-- | Write a Word32 in big endian format
|
||||
putWord32be :: Word32 -> Put
|
||||
putWord32be = tell . B.putWord32be
|
||||
{-# INLINE putWord32be #-}
|
||||
|
||||
-- | Write a Word32 in little endian format
|
||||
putWord32le :: Word32 -> Put
|
||||
putWord32le = tell . B.putWord32le
|
||||
{-# INLINE putWord32le #-}
|
||||
|
||||
-- | Write a Word64 in big endian format
|
||||
putWord64be :: Word64 -> Put
|
||||
putWord64be = tell . B.putWord64be
|
||||
{-# INLINE putWord64be #-}
|
||||
|
||||
-- | Write a Word64 in little endian format
|
||||
putWord64le :: Word64 -> Put
|
||||
putWord64le = tell . B.putWord64le
|
||||
{-# INLINE putWord64le #-}
|
||||
|
||||
------------------------------------------------------------------------
|
||||
|
||||
-- | /O(1)./ Write a single native machine word. The word is
|
||||
-- written in host order, host endian form, for the machine you're on.
|
||||
-- On a 64 bit machine the Word is an 8 byte value, on a 32 bit machine,
|
||||
-- 4 bytes. Values written this way are not portable to
|
||||
-- different endian or word sized machines, without conversion.
|
||||
--
|
||||
putWordhost :: Word -> Put
|
||||
putWordhost = tell . B.putWordhost
|
||||
{-# INLINE putWordhost #-}
|
||||
|
||||
-- | /O(1)./ Write a Word16 in native host order and host endianness.
|
||||
-- For portability issues see @putWordhost@.
|
||||
putWord16host :: Word16 -> Put
|
||||
putWord16host = tell . B.putWord16host
|
||||
{-# INLINE putWord16host #-}
|
||||
|
||||
-- | /O(1)./ Write a Word32 in native host order and host endianness.
|
||||
-- For portability issues see @putWordhost@.
|
||||
putWord32host :: Word32 -> Put
|
||||
putWord32host = tell . B.putWord32host
|
||||
{-# INLINE putWord32host #-}
|
||||
|
||||
-- | /O(1)./ Write a Word64 in native host order
|
||||
-- On a 32 bit machine we write two host order Word32s, in big endian form.
|
||||
-- For portability issues see @putWordhost@.
|
||||
putWord64host :: Word64 -> Put
|
||||
putWord64host = tell . B.putWord64host
|
||||
{-# INLINE putWord64host #-}
|
||||
352
src/runtime/haskell/PGF.hs
Normal file
352
src/runtime/haskell/PGF.hs
Normal file
@@ -0,0 +1,352 @@
|
||||
-------------------------------------------------
|
||||
-- |
|
||||
-- Module : PGF
|
||||
-- Maintainer : Aarne Ranta
|
||||
-- Stability : stable
|
||||
-- Portability : portable
|
||||
--
|
||||
-- This module is an Application Programming Interface to
|
||||
-- load and interpret grammars compiled in Portable Grammar Format (PGF).
|
||||
-- The PGF format is produced as a final output from the GF compiler.
|
||||
-- The API is meant to be used for embedding GF grammars in Haskell
|
||||
-- programs
|
||||
-------------------------------------------------
|
||||
|
||||
module PGF(
|
||||
-- * PGF
|
||||
PGF,
|
||||
readPGF,
|
||||
|
||||
-- * Identifiers
|
||||
CId, mkCId, wildCId,
|
||||
showCId, readCId,
|
||||
|
||||
-- * Languages
|
||||
Language,
|
||||
showLanguage, readLanguage,
|
||||
languages, abstractName, languageCode,
|
||||
|
||||
-- * Types
|
||||
Type, Hypo,
|
||||
showType, readType,
|
||||
mkType, mkHypo, mkDepHypo, mkImplHypo,
|
||||
categories, startCat,
|
||||
|
||||
-- * Functions
|
||||
functions, functionType,
|
||||
|
||||
-- * Expressions & Trees
|
||||
-- ** Tree
|
||||
Tree,
|
||||
|
||||
-- ** Expr
|
||||
Expr,
|
||||
showExpr, readExpr,
|
||||
mkApp, unApp,
|
||||
mkStr, unStr,
|
||||
mkInt, unInt,
|
||||
mkDouble, unDouble,
|
||||
mkMeta, isMeta,
|
||||
|
||||
-- * Operations
|
||||
-- ** Linearization
|
||||
linearize, linearizeAllLang, linearizeAll,
|
||||
showPrintName,
|
||||
|
||||
-- ** Parsing
|
||||
parse, parseWithRecovery, canParse, parseAllLang, parseAll,
|
||||
|
||||
-- ** Evaluation
|
||||
PGF.compute, paraphrase,
|
||||
|
||||
-- ** Type Checking
|
||||
-- | The type checker in PGF does both type checking and renaming
|
||||
-- i.e. it verifies that all identifiers are declared and it
|
||||
-- distinguishes between global function or type indentifiers and
|
||||
-- variable names. The type checker should always be applied on
|
||||
-- expressions entered by the user i.e. those produced via functions
|
||||
-- like 'readType' and 'readExpr' because otherwise unexpected results
|
||||
-- could appear. All typechecking functions returns updated versions
|
||||
-- of the input types or expressions because the typechecking could
|
||||
-- also lead to metavariables instantiations.
|
||||
checkType, checkExpr, inferExpr,
|
||||
TcError(..), ppTcError,
|
||||
|
||||
-- ** Word Completion (Incremental Parsing)
|
||||
complete,
|
||||
Incremental.ParseState,
|
||||
Incremental.initState, Incremental.nextState, Incremental.getCompletions, Incremental.recoveryStates, Incremental.extractTrees,
|
||||
|
||||
-- ** Generation
|
||||
generateRandom, generateAll, generateAllDepth,
|
||||
|
||||
-- ** Morphological Analysis
|
||||
Lemma, Analysis, Morpho,
|
||||
lookupMorpho, buildMorpho,
|
||||
|
||||
-- ** Visualizations
|
||||
graphvizAbstractTree,
|
||||
graphvizParseTree,
|
||||
graphvizDependencyTree,
|
||||
graphvizAlignment,
|
||||
|
||||
-- * Browsing
|
||||
browse
|
||||
) where
|
||||
|
||||
import PGF.CId
|
||||
import PGF.Linearize
|
||||
import PGF.Generate
|
||||
import PGF.TypeCheck
|
||||
import PGF.Paraphrase
|
||||
import PGF.VisualizeTree
|
||||
import PGF.Macros
|
||||
import PGF.Expr (Tree)
|
||||
import PGF.Morphology
|
||||
import PGF.Data hiding (functions)
|
||||
import PGF.Binary
|
||||
import qualified PGF.Parsing.FCFG.Active as Active
|
||||
import qualified PGF.Parsing.FCFG.Incremental as Incremental
|
||||
import qualified GF.Compile.GeneratePMCFG as PMCFG
|
||||
|
||||
import GF.Infra.Option
|
||||
import GF.Data.Utilities (replace)
|
||||
|
||||
import Data.Char
|
||||
import qualified Data.Map as Map
|
||||
import qualified Data.IntMap as IntMap
|
||||
import Data.Maybe
|
||||
import Data.Binary
|
||||
import Data.List(mapAccumL)
|
||||
import System.Random (newStdGen)
|
||||
import Control.Monad
|
||||
import Text.PrettyPrint
|
||||
|
||||
---------------------------------------------------
|
||||
-- Interface
|
||||
---------------------------------------------------
|
||||
|
||||
-- | Reads file in Portable Grammar Format and produces
|
||||
-- 'PGF' structure. The file is usually produced with:
|
||||
--
|
||||
-- > $ gf -make <grammar file name>
|
||||
readPGF :: FilePath -> IO PGF
|
||||
|
||||
-- | Linearizes given expression as string in the language
|
||||
linearize :: PGF -> Language -> Tree -> String
|
||||
|
||||
-- | Tries to parse the given string in the specified language
|
||||
-- and to produce abstract syntax expression. An empty
|
||||
-- list is returned if the parsing is not successful. The list may also
|
||||
-- contain more than one element if the grammar is ambiguous.
|
||||
-- Throws an exception if the given language cannot be used
|
||||
-- for parsing, see 'canParse'.
|
||||
parse :: PGF -> Language -> Type -> String -> [Tree]
|
||||
|
||||
parseWithRecovery :: PGF -> Language -> Type -> [Type] -> String -> [Tree]
|
||||
|
||||
-- | Checks whether the given language can be used for parsing.
|
||||
canParse :: PGF -> Language -> Bool
|
||||
|
||||
-- | The same as 'linearizeAllLang' but does not return
|
||||
-- the language.
|
||||
linearizeAll :: PGF -> Tree -> [String]
|
||||
|
||||
-- | Linearizes given expression as string in all languages
|
||||
-- available in the grammar.
|
||||
linearizeAllLang :: PGF -> Tree -> [(Language,String)]
|
||||
|
||||
-- | Show the printname of a type
|
||||
showPrintName :: PGF -> Language -> Type -> String
|
||||
|
||||
-- | The same as 'parseAllLang' but does not return
|
||||
-- the language.
|
||||
parseAll :: PGF -> Type -> String -> [[Tree]]
|
||||
|
||||
-- | Tries to parse the given string with all available languages.
|
||||
-- Languages which cannot be used for parsing (see 'canParse')
|
||||
-- are ignored.
|
||||
-- The returned list contains pairs of language
|
||||
-- and list of abstract syntax expressions
|
||||
-- (this is a list, since grammars can be ambiguous).
|
||||
-- Only those languages
|
||||
-- for which at least one parsing is possible are listed.
|
||||
parseAllLang :: PGF -> Type -> String -> [(Language,[Tree])]
|
||||
|
||||
-- | The same as 'generateAllDepth' but does not limit
|
||||
-- the depth in the generation.
|
||||
generateAll :: PGF -> Type -> [Expr]
|
||||
|
||||
-- | Generates an infinite list of random abstract syntax expressions.
|
||||
-- This is usefull for tree bank generation which after that can be used
|
||||
-- for grammar testing.
|
||||
generateRandom :: PGF -> Type -> IO [Expr]
|
||||
|
||||
-- | Generates an exhaustive possibly infinite list of
|
||||
-- abstract syntax expressions. A depth can be specified
|
||||
-- to limit the search space.
|
||||
generateAllDepth :: PGF -> Type -> Maybe Int -> [Expr]
|
||||
|
||||
-- | List of all languages available in the given grammar.
|
||||
languages :: PGF -> [Language]
|
||||
|
||||
-- | Gets the RFC 4646 language tag
|
||||
-- of the language which the given concrete syntax implements,
|
||||
-- if this is listed in the source grammar.
|
||||
-- Example language tags include @\"en\"@ for English,
|
||||
-- and @\"en-UK\"@ for British English.
|
||||
languageCode :: PGF -> Language -> Maybe String
|
||||
|
||||
-- | The abstract language name is the name of the top-level
|
||||
-- abstract module
|
||||
abstractName :: PGF -> Language
|
||||
|
||||
-- | List of all categories defined in the given grammar.
|
||||
-- The categories are defined in the abstract syntax
|
||||
-- with the \'cat\' keyword.
|
||||
categories :: PGF -> [CId]
|
||||
|
||||
-- | The start category is defined in the grammar with
|
||||
-- the \'startcat\' flag. This is usually the sentence category
|
||||
-- but it is not necessary. Despite that there is a start category
|
||||
-- defined you can parse with any category. The start category
|
||||
-- definition is just for convenience.
|
||||
startCat :: PGF -> Type
|
||||
|
||||
-- | List of all functions defined in the abstract syntax
|
||||
functions :: PGF -> [CId]
|
||||
|
||||
-- | The type of a given function
|
||||
functionType :: PGF -> CId -> Maybe Type
|
||||
|
||||
-- | Complete the last word in the given string. If the input
|
||||
-- is empty or ends in whitespace, the last word is considred
|
||||
-- to be the empty string. This means that the completions
|
||||
-- will be all possible next words.
|
||||
complete :: PGF -> Language -> Type -> String
|
||||
-> [String] -- ^ Possible completions,
|
||||
-- including the given input.
|
||||
|
||||
|
||||
---------------------------------------------------
|
||||
-- Implementation
|
||||
---------------------------------------------------
|
||||
|
||||
readPGF f = decodeFile f >>= addParsers
|
||||
|
||||
-- Adds parsers for all concretes that don't have a parser and that have parser=ondemand.
|
||||
addParsers :: PGF -> IO PGF
|
||||
addParsers pgf = do cncs <- sequence [if wantsParser cnc then addParser lang cnc else return (lang,cnc)
|
||||
| (lang,cnc) <- Map.toList (concretes pgf)]
|
||||
return pgf { concretes = Map.fromList cncs }
|
||||
where
|
||||
wantsParser cnc = isNothing (parser cnc) && Map.lookup (mkCId "parser") (cflags cnc) == Just "ondemand"
|
||||
addParser lang cnc = do pinfo <- PMCFG.convertConcrete noOptions (abstract pgf) lang cnc
|
||||
return (lang,cnc { parser = Just pinfo })
|
||||
|
||||
linearize pgf lang = concat . take 1 . PGF.Linearize.linearizes pgf lang
|
||||
|
||||
parse pgf lang typ s =
|
||||
case Map.lookup lang (concretes pgf) of
|
||||
Just cnc -> case parser cnc of
|
||||
Just pinfo -> if Map.lookup (mkCId "erasing") (cflags cnc) == Just "on"
|
||||
then Incremental.parse pgf lang typ (words s)
|
||||
else Active.parse "t" pinfo typ (words s)
|
||||
Nothing -> error ("No parser built for language: " ++ showCId lang)
|
||||
Nothing -> error ("Unknown language: " ++ showCId lang)
|
||||
|
||||
parseWithRecovery pgf lang typ open_typs s = Incremental.parseWithRecovery pgf lang typ open_typs (words s)
|
||||
|
||||
canParse pgf cnc = isJust (lookParser pgf cnc)
|
||||
|
||||
linearizeAll mgr = map snd . linearizeAllLang mgr
|
||||
linearizeAllLang mgr t =
|
||||
[(lang,PGF.linearize mgr lang t) | lang <- languages mgr]
|
||||
|
||||
showPrintName pgf lang (DTyp _ c _) = realize $ lookPrintName pgf lang c
|
||||
|
||||
parseAll mgr typ = map snd . parseAllLang mgr typ
|
||||
|
||||
parseAllLang mgr typ s =
|
||||
[(lang,ts) | lang <- languages mgr, canParse mgr lang, let ts = parse mgr lang typ s, not (null ts)]
|
||||
|
||||
generateRandom pgf cat = do
|
||||
gen <- newStdGen
|
||||
return $ genRandom gen pgf cat
|
||||
|
||||
generateAll pgf cat = generate pgf cat Nothing
|
||||
generateAllDepth pgf cat = generate pgf cat
|
||||
|
||||
abstractName pgf = absname pgf
|
||||
|
||||
languages pgf = cncnames pgf
|
||||
|
||||
languageCode pgf lang =
|
||||
fmap (replace '_' '-') $ lookConcrFlag pgf lang (mkCId "language")
|
||||
|
||||
categories pgf = [c | (c,hs) <- Map.toList (cats (abstract pgf))]
|
||||
|
||||
startCat pgf = DTyp [] (lookStartCat pgf) []
|
||||
|
||||
functions pgf = Map.keys (funs (abstract pgf))
|
||||
|
||||
functionType pgf fun =
|
||||
case Map.lookup fun (funs (abstract pgf)) of
|
||||
Just (ty,_,_) -> Just ty
|
||||
Nothing -> Nothing
|
||||
|
||||
complete pgf from typ input =
|
||||
let (ws,prefix) = tokensAndPrefix input
|
||||
state0 = Incremental.initState pgf from typ
|
||||
in case loop state0 ws of
|
||||
Nothing -> []
|
||||
Just state ->
|
||||
(if null prefix && not (null (Incremental.extractTrees state typ)) then [unwords ws ++ " "] else [])
|
||||
++ [unwords (ws++[c]) ++ " " | c <- Map.keys (Incremental.getCompletions state prefix)]
|
||||
where
|
||||
tokensAndPrefix :: String -> ([String],String)
|
||||
tokensAndPrefix s | not (null s) && isSpace (last s) = (ws, "")
|
||||
| null ws = ([],"")
|
||||
| otherwise = (init ws, last ws)
|
||||
where ws = words s
|
||||
|
||||
loop ps [] = Just ps
|
||||
loop ps (t:ts) = case Incremental.nextState ps t of
|
||||
Left es -> Nothing
|
||||
Right ps -> loop ps ts
|
||||
|
||||
-- | Converts an expression to normal form
|
||||
compute :: PGF -> Expr -> Expr
|
||||
compute pgf = PGF.Data.normalForm (funs (abstract pgf)) 0 []
|
||||
|
||||
browse :: PGF -> CId -> Maybe (String,[CId],[CId])
|
||||
browse pgf id = fmap (\def -> (def,producers,consumers)) definition
|
||||
where
|
||||
definition = case Map.lookup id (funs (abstract pgf)) of
|
||||
Just (ty,_,eqs) -> Just $ render (text "fun" <+> ppCId id <+> colon <+> ppType 0 [] ty $$
|
||||
if null eqs
|
||||
then empty
|
||||
else text "def" <+> vcat [let (scope,ds) = mapAccumL (ppPatt 9) [] patts
|
||||
in ppCId id <+> hsep ds <+> char '=' <+> ppExpr 0 scope res | Equ patts res <- eqs])
|
||||
Nothing -> case Map.lookup id (cats (abstract pgf)) of
|
||||
Just hyps -> Just $ render (text "cat" <+> ppCId id <+> hsep (snd (mapAccumL ppHypo [] hyps)))
|
||||
Nothing -> Nothing
|
||||
|
||||
(producers,consumers) = Map.foldWithKey accum ([],[]) (funs (abstract pgf))
|
||||
where
|
||||
accum f (ty,_,_) (plist,clist) =
|
||||
let !plist' = if id `elem` ps then f : plist else plist
|
||||
!clist' = if id `elem` cs then f : clist else clist
|
||||
in (plist',clist')
|
||||
where
|
||||
(ps,cs) = tyIds ty
|
||||
|
||||
tyIds (DTyp hyps cat es) = (foldr expIds (cat:concat css) es,concat pss)
|
||||
where
|
||||
(pss,css) = unzip [tyIds ty | (_,_,ty) <- hyps]
|
||||
|
||||
expIds (EAbs _ _ e) ids = expIds e ids
|
||||
expIds (EApp e1 e2) ids = expIds e1 (expIds e2 ids)
|
||||
expIds (EFun id) ids = id : ids
|
||||
expIds (ETyped e _) ids = expIds e ids
|
||||
expIds _ ids = ids
|
||||
199
src/runtime/haskell/PGF/Binary.hs
Normal file
199
src/runtime/haskell/PGF/Binary.hs
Normal file
@@ -0,0 +1,199 @@
|
||||
module PGF.Binary where
|
||||
|
||||
import PGF.CId
|
||||
import PGF.Data
|
||||
import Data.Binary
|
||||
import Data.Binary.Put
|
||||
import Data.Binary.Get
|
||||
import qualified Data.ByteString as BS
|
||||
import qualified Data.Map as Map
|
||||
import qualified Data.IntMap as IntMap
|
||||
import qualified Data.Set as Set
|
||||
import Control.Monad
|
||||
|
||||
pgfMajorVersion, pgfMinorVersion :: Word16
|
||||
(pgfMajorVersion, pgfMinorVersion) = (1,0)
|
||||
|
||||
instance Binary PGF where
|
||||
put pgf = putWord16be pgfMajorVersion >>
|
||||
putWord16be pgfMinorVersion >>
|
||||
put ( absname pgf, cncnames pgf
|
||||
, gflags pgf
|
||||
, abstract pgf, concretes pgf
|
||||
)
|
||||
get = do v1 <- getWord16be
|
||||
v2 <- getWord16be
|
||||
absname <- get
|
||||
cncnames <- get
|
||||
gflags <- get
|
||||
abstract <- get
|
||||
concretes <- get
|
||||
return (PGF{ absname=absname, cncnames=cncnames
|
||||
, gflags=gflags
|
||||
, abstract=abstract, concretes=concretes
|
||||
})
|
||||
|
||||
instance Binary CId where
|
||||
put (CId bs) = put bs
|
||||
get = liftM CId get
|
||||
|
||||
instance Binary Abstr where
|
||||
put abs = put (aflags abs, funs abs, cats abs)
|
||||
get = do aflags <- get
|
||||
funs <- get
|
||||
cats <- get
|
||||
let catfuns = Map.mapWithKey (\cat _ -> [f | (f, (DTyp _ c _,_,_)) <- Map.toList funs, c==cat]) cats
|
||||
return (Abstr{ aflags=aflags
|
||||
, funs=funs, cats=cats
|
||||
, catfuns=catfuns
|
||||
})
|
||||
|
||||
instance Binary Concr where
|
||||
put cnc = put ( cflags cnc, lins cnc, opers cnc
|
||||
, lincats cnc, lindefs cnc
|
||||
, printnames cnc, paramlincats cnc
|
||||
, parser cnc
|
||||
)
|
||||
get = do cflags <- get
|
||||
lins <- get
|
||||
opers <- get
|
||||
lincats <- get
|
||||
lindefs <- get
|
||||
printnames <- get
|
||||
paramlincats <- get
|
||||
parser <- get
|
||||
return (Concr{ cflags=cflags, lins=lins, opers=opers
|
||||
, lincats=lincats, lindefs=lindefs
|
||||
, printnames=printnames
|
||||
, paramlincats=paramlincats
|
||||
, parser=parser
|
||||
})
|
||||
|
||||
instance Binary Alternative where
|
||||
put (Alt v x) = put v >> put x
|
||||
get = liftM2 Alt get get
|
||||
|
||||
instance Binary Term where
|
||||
put (R es) = putWord8 0 >> put es
|
||||
put (S es) = putWord8 1 >> put es
|
||||
put (FV es) = putWord8 2 >> put es
|
||||
put (P e v) = putWord8 3 >> put (e,v)
|
||||
put (W e v) = putWord8 4 >> put (e,v)
|
||||
put (C i ) = putWord8 5 >> put i
|
||||
put (TM i ) = putWord8 6 >> put i
|
||||
put (F f) = putWord8 7 >> put f
|
||||
put (V i) = putWord8 8 >> put i
|
||||
put (K (KS s)) = putWord8 9 >> put s
|
||||
put (K (KP d vs)) = putWord8 10 >> put (d,vs)
|
||||
|
||||
get = do tag <- getWord8
|
||||
case tag of
|
||||
0 -> liftM R get
|
||||
1 -> liftM S get
|
||||
2 -> liftM FV get
|
||||
3 -> liftM2 P get get
|
||||
4 -> liftM2 W get get
|
||||
5 -> liftM C get
|
||||
6 -> liftM TM get
|
||||
7 -> liftM F get
|
||||
8 -> liftM V get
|
||||
9 -> liftM (K . KS) get
|
||||
10 -> liftM2 (\d vs -> K (KP d vs)) get get
|
||||
_ -> decodingError
|
||||
|
||||
instance Binary Expr where
|
||||
put (EAbs b x exp) = putWord8 0 >> put (b,x,exp)
|
||||
put (EApp e1 e2) = putWord8 1 >> put (e1,e2)
|
||||
put (ELit (LStr s)) = putWord8 2 >> put s
|
||||
put (ELit (LFlt d)) = putWord8 3 >> put d
|
||||
put (ELit (LInt i)) = putWord8 4 >> put i
|
||||
put (EMeta i) = putWord8 5 >> put i
|
||||
put (EFun f) = putWord8 6 >> put f
|
||||
put (EVar i) = putWord8 7 >> put i
|
||||
put (ETyped e ty) = putWord8 8 >> put (e,ty)
|
||||
get = do tag <- getWord8
|
||||
case tag of
|
||||
0 -> liftM3 EAbs get get get
|
||||
1 -> liftM2 EApp get get
|
||||
2 -> liftM (ELit . LStr) get
|
||||
3 -> liftM (ELit . LFlt) get
|
||||
4 -> liftM (ELit . LInt) get
|
||||
5 -> liftM EMeta get
|
||||
6 -> liftM EFun get
|
||||
7 -> liftM EVar get
|
||||
8 -> liftM2 ETyped get get
|
||||
_ -> decodingError
|
||||
|
||||
instance Binary Patt where
|
||||
put (PApp f ps) = putWord8 0 >> put (f,ps)
|
||||
put (PVar x) = putWord8 1 >> put x
|
||||
put PWild = putWord8 2
|
||||
put (PLit (LStr s)) = putWord8 3 >> put s
|
||||
put (PLit (LFlt d)) = putWord8 4 >> put d
|
||||
put (PLit (LInt i)) = putWord8 5 >> put i
|
||||
get = do tag <- getWord8
|
||||
case tag of
|
||||
0 -> liftM2 PApp get get
|
||||
1 -> liftM PVar get
|
||||
2 -> return PWild
|
||||
3 -> liftM (PLit . LStr) get
|
||||
4 -> liftM (PLit . LFlt) get
|
||||
5 -> liftM (PLit . LInt) get
|
||||
_ -> decodingError
|
||||
|
||||
instance Binary Equation where
|
||||
put (Equ ps e) = put (ps,e)
|
||||
get = liftM2 Equ get get
|
||||
|
||||
instance Binary Type where
|
||||
put (DTyp hypos cat exps) = put (hypos,cat,exps)
|
||||
get = liftM3 DTyp get get get
|
||||
|
||||
instance Binary BindType where
|
||||
put Explicit = putWord8 0
|
||||
put Implicit = putWord8 1
|
||||
get = do tag <- getWord8
|
||||
case tag of
|
||||
0 -> return Explicit
|
||||
1 -> return Implicit
|
||||
_ -> decodingError
|
||||
|
||||
instance Binary FFun where
|
||||
put (FFun fun prof lins) = put (fun,prof,lins)
|
||||
get = liftM3 FFun get get get
|
||||
|
||||
instance Binary FSymbol where
|
||||
put (FSymCat n l) = putWord8 0 >> put (n,l)
|
||||
put (FSymLit n l) = putWord8 1 >> put (n,l)
|
||||
put (FSymKS ts) = putWord8 2 >> put ts
|
||||
put (FSymKP d vs) = putWord8 3 >> put (d,vs)
|
||||
get = do tag <- getWord8
|
||||
case tag of
|
||||
0 -> liftM2 FSymCat get get
|
||||
1 -> liftM2 FSymLit get get
|
||||
2 -> liftM FSymKS get
|
||||
3 -> liftM2 (\d vs -> FSymKP d vs) get get
|
||||
_ -> decodingError
|
||||
|
||||
instance Binary Production where
|
||||
put (FApply ruleid args) = putWord8 0 >> put (ruleid,args)
|
||||
put (FCoerce fcat) = putWord8 1 >> put fcat
|
||||
get = do tag <- getWord8
|
||||
case tag of
|
||||
0 -> liftM2 FApply get get
|
||||
1 -> liftM FCoerce get
|
||||
_ -> decodingError
|
||||
|
||||
instance Binary ParserInfo where
|
||||
put p = put (functions p, sequences p, productions0 p, totalCats p, startCats p)
|
||||
get = do functions <- get
|
||||
sequences <- get
|
||||
productions0<- get
|
||||
totalCats <- get
|
||||
startCats <- get
|
||||
return (ParserInfo{functions=functions,sequences=sequences
|
||||
,productions0=productions0
|
||||
,productions =filterProductions productions0
|
||||
,totalCats=totalCats,startCats=startCats})
|
||||
|
||||
decodingError = fail "This PGF file was compiled with different version of GF"
|
||||
76
src/runtime/haskell/PGF/BuildParser.hs
Normal file
76
src/runtime/haskell/PGF/BuildParser.hs
Normal file
@@ -0,0 +1,76 @@
|
||||
---------------------------------------------------------------------
|
||||
-- |
|
||||
-- Maintainer : Krasimir Angelov
|
||||
-- Stability : (stable)
|
||||
-- Portability : (portable)
|
||||
--
|
||||
-- FCFG parsing, parser information
|
||||
-----------------------------------------------------------------------------
|
||||
|
||||
module PGF.BuildParser where
|
||||
|
||||
import GF.Data.SortedList
|
||||
import GF.Data.Assoc
|
||||
import PGF.CId
|
||||
import PGF.Data
|
||||
import PGF.Parsing.FCFG.Utilities
|
||||
|
||||
import Data.Array.IArray
|
||||
import Data.Maybe
|
||||
import qualified Data.IntMap as IntMap
|
||||
import qualified Data.Map as Map
|
||||
import qualified Data.Set as Set
|
||||
import Debug.Trace
|
||||
|
||||
|
||||
data ParserInfoEx
|
||||
= ParserInfoEx { epsilonRules :: [(FunId,[FCat],FCat)]
|
||||
, leftcornerCats :: Assoc FCat [(FunId,[FCat],FCat)]
|
||||
, leftcornerTokens :: Assoc String [(FunId,[FCat],FCat)]
|
||||
, grammarToks :: [String]
|
||||
}
|
||||
|
||||
------------------------------------------------------------
|
||||
-- parser information
|
||||
|
||||
getLeftCornerTok pinfo (FFun _ _ lins)
|
||||
| inRange (bounds syms) 0 = case syms ! 0 of
|
||||
FSymKS [tok] -> [tok]
|
||||
_ -> []
|
||||
| otherwise = []
|
||||
where
|
||||
syms = (sequences pinfo) ! (lins ! 0)
|
||||
|
||||
getLeftCornerCat pinfo args (FFun _ _ lins)
|
||||
| inRange (bounds syms) 0 = case syms ! 0 of
|
||||
FSymCat d _ -> let cat = args !! d
|
||||
in case IntMap.lookup cat (productions pinfo) of
|
||||
Just set -> cat : [cat' | FCoerce cat' <- Set.toList set]
|
||||
Nothing -> [cat]
|
||||
_ -> []
|
||||
| otherwise = []
|
||||
where
|
||||
syms = (sequences pinfo) ! (lins ! 0)
|
||||
|
||||
buildParserInfo :: ParserInfo -> ParserInfoEx
|
||||
buildParserInfo pinfo =
|
||||
ParserInfoEx { epsilonRules = epsilonrules
|
||||
, leftcornerCats = leftcorncats
|
||||
, leftcornerTokens = leftcorntoks
|
||||
, grammarToks = grammartoks
|
||||
}
|
||||
|
||||
where epsilonrules = [ (ruleid,args,cat)
|
||||
| (cat,set) <- IntMap.toList (productions pinfo)
|
||||
, (FApply ruleid args) <- Set.toList set
|
||||
, let (FFun _ _ lins) = (functions pinfo) ! ruleid
|
||||
, not (inRange (bounds ((sequences pinfo) ! (lins ! 0))) 0) ]
|
||||
leftcorncats = accumAssoc id [ (cat', (ruleid, args, cat))
|
||||
| (cat,set) <- IntMap.toList (productions pinfo)
|
||||
, (FApply ruleid args) <- Set.toList set
|
||||
, cat' <- getLeftCornerCat pinfo args ((functions pinfo) ! ruleid) ]
|
||||
leftcorntoks = accumAssoc id [ (tok, (ruleid, args, cat))
|
||||
| (cat,set) <- IntMap.toList (productions pinfo)
|
||||
, (FApply ruleid args) <- Set.toList set
|
||||
, tok <- getLeftCornerTok pinfo ((functions pinfo) ! ruleid) ]
|
||||
grammartoks = nubsort [t | lin <- elems (sequences pinfo), FSymKS [t] <- elems lin]
|
||||
55
src/runtime/haskell/PGF/CId.hs
Normal file
55
src/runtime/haskell/PGF/CId.hs
Normal file
@@ -0,0 +1,55 @@
|
||||
module PGF.CId (CId(..),
|
||||
mkCId, wildCId,
|
||||
readCId, showCId,
|
||||
|
||||
-- utils
|
||||
pCId, pIdent, ppCId) where
|
||||
|
||||
import Control.Monad
|
||||
import qualified Data.ByteString.Char8 as BS
|
||||
import Data.Char
|
||||
import qualified Text.ParserCombinators.ReadP as RP
|
||||
import qualified Text.PrettyPrint as PP
|
||||
|
||||
|
||||
-- | An abstract data type that represents
|
||||
-- identifiers for functions and categories in PGF.
|
||||
newtype CId = CId BS.ByteString deriving (Eq,Ord)
|
||||
|
||||
wildCId :: CId
|
||||
wildCId = CId (BS.singleton '_')
|
||||
|
||||
-- | Creates a new identifier from 'String'
|
||||
mkCId :: String -> CId
|
||||
mkCId s = CId (BS.pack s)
|
||||
|
||||
-- | Reads an identifier from 'String'. The function returns 'Nothing' if the string is not valid identifier.
|
||||
readCId :: String -> Maybe CId
|
||||
readCId s = case [x | (x,cs) <- RP.readP_to_S pCId s, all isSpace cs] of
|
||||
[x] -> Just x
|
||||
_ -> Nothing
|
||||
|
||||
-- | Renders the identifier as 'String'
|
||||
showCId :: CId -> String
|
||||
showCId (CId x) = BS.unpack x
|
||||
|
||||
instance Show CId where
|
||||
showsPrec _ = showString . showCId
|
||||
|
||||
instance Read CId where
|
||||
readsPrec _ = RP.readP_to_S pCId
|
||||
|
||||
pCId :: RP.ReadP CId
|
||||
pCId = do s <- pIdent
|
||||
if s == "_"
|
||||
then RP.pfail
|
||||
else return (mkCId s)
|
||||
|
||||
pIdent :: RP.ReadP String
|
||||
pIdent = liftM2 (:) (RP.satisfy isIdentFirst) (RP.munch isIdentRest)
|
||||
where
|
||||
isIdentFirst c = c == '_' || isLetter c
|
||||
isIdentRest c = c == '_' || c == '\'' || isAlphaNum c
|
||||
|
||||
ppCId :: CId -> PP.Doc
|
||||
ppCId = PP.text . showCId
|
||||
173
src/runtime/haskell/PGF/Check.hs
Normal file
173
src/runtime/haskell/PGF/Check.hs
Normal file
@@ -0,0 +1,173 @@
|
||||
module PGF.Check (checkPGF) where
|
||||
|
||||
import PGF.CId
|
||||
import PGF.Data
|
||||
import PGF.Macros
|
||||
import GF.Data.ErrM
|
||||
|
||||
import qualified Data.Map as Map
|
||||
import Control.Monad
|
||||
import Debug.Trace
|
||||
|
||||
checkPGF :: PGF -> Err (PGF,Bool)
|
||||
checkPGF pgf = do
|
||||
(cs,bs) <- mapM (checkConcrete pgf)
|
||||
(Map.assocs (concretes pgf)) >>= return . unzip
|
||||
return (pgf {concretes = Map.fromAscList cs}, and bs)
|
||||
|
||||
|
||||
-- errors are non-fatal; replace with 'fail' to change this
|
||||
msg s = trace s (return ())
|
||||
|
||||
andMapM :: Monad m => (a -> m Bool) -> [a] -> m Bool
|
||||
andMapM f xs = mapM f xs >>= return . and
|
||||
|
||||
labelBoolErr :: String -> Err (x,Bool) -> Err (x,Bool)
|
||||
labelBoolErr ms iob = do
|
||||
(x,b) <- iob
|
||||
if b then return (x,b) else (msg ms >> return (x,b))
|
||||
|
||||
|
||||
checkConcrete :: PGF -> (CId,Concr) -> Err ((CId,Concr),Bool)
|
||||
checkConcrete pgf (lang,cnc) =
|
||||
labelBoolErr ("happened in language " ++ showCId lang) $ do
|
||||
(rs,bs) <- mapM checkl (Map.assocs (lins cnc)) >>= return . unzip
|
||||
return ((lang,cnc{lins = Map.fromAscList rs}),and bs)
|
||||
where
|
||||
checkl = checkLin pgf lang
|
||||
|
||||
checkLin :: PGF -> CId -> (CId,Term) -> Err ((CId,Term),Bool)
|
||||
checkLin pgf lang (f,t) =
|
||||
labelBoolErr ("happened in function " ++ showCId f) $ do
|
||||
(t',b) <- checkTerm (lintype pgf lang f) t --- $ inline pgf lang t
|
||||
return ((f,t'),b)
|
||||
|
||||
inferTerm :: [CType] -> Term -> Err (Term,CType)
|
||||
inferTerm args trm = case trm of
|
||||
K _ -> returnt str
|
||||
C i -> returnt $ ints i
|
||||
V i -> do
|
||||
testErr (i < length args) ("too large index " ++ show i)
|
||||
returnt $ args !! i
|
||||
S ts -> do
|
||||
(ts',tys) <- mapM infer ts >>= return . unzip
|
||||
let tys' = filter (/=str) tys
|
||||
testErr (null tys')
|
||||
("expected Str in " ++ show trm ++ " not " ++ unwords (map show tys'))
|
||||
return (S ts',str)
|
||||
R ts -> do
|
||||
(ts',tys) <- mapM infer ts >>= return . unzip
|
||||
return $ (R ts',tuple tys)
|
||||
P t u -> do
|
||||
(t',tt) <- infer t
|
||||
(u',tu) <- infer u
|
||||
case tt of
|
||||
R tys -> case tu of
|
||||
R vs -> infer $ foldl P t' [P u' (C i) | i <- [0 .. length vs - 1]]
|
||||
--- R [v] -> infer $ P t v
|
||||
--- R (v:vs) -> infer $ P (head tys) (R vs)
|
||||
|
||||
C i -> do
|
||||
testErr (i < length tys)
|
||||
("required more than " ++ show i ++ " fields in " ++ show (R tys))
|
||||
return (P t' u', tys !! i) -- record: index must be known
|
||||
_ -> do
|
||||
let typ = head tys
|
||||
testErr (all (==typ) tys) ("different types in table " ++ show trm)
|
||||
return (P t' u', typ) -- table: types must be same
|
||||
_ -> Bad $ "projection from " ++ show t ++ " : " ++ show tt
|
||||
FV [] -> returnt tm0 ----
|
||||
FV (t:ts) -> do
|
||||
(t',ty) <- infer t
|
||||
(ts',tys) <- mapM infer ts >>= return . unzip
|
||||
testErr (all (eqType True ty) tys) ("different types in variants " ++ show trm)
|
||||
return (FV (t':ts'),ty)
|
||||
W s r -> infer r
|
||||
_ -> Bad ("no type inference for " ++ show trm)
|
||||
where
|
||||
returnt ty = return (trm,ty)
|
||||
infer = inferTerm args
|
||||
|
||||
checkTerm :: LinType -> Term -> Err (Term,Bool)
|
||||
checkTerm (args,val) trm = case inferTerm args trm of
|
||||
Ok (t,ty) -> if eqType False ty val
|
||||
then return (t,True)
|
||||
else do
|
||||
msg ("term: " ++ show trm ++
|
||||
"\nexpected type: " ++ show val ++
|
||||
"\ninferred type: " ++ show ty)
|
||||
return (t,False)
|
||||
Bad s -> do
|
||||
msg s
|
||||
return (trm,False)
|
||||
|
||||
-- symmetry in (Ints m == Ints n) is all we can use in variants
|
||||
|
||||
eqType :: Bool -> CType -> CType -> Bool
|
||||
eqType symm inf exp = case (inf,exp) of
|
||||
(C k, C n) -> if symm then True else k <= n -- only run-time corr.
|
||||
(R rs,R ts) -> length rs == length ts && and [eqType symm r t | (r,t) <- zip rs ts]
|
||||
(TM _, _) -> True ---- for variants [] ; not safe
|
||||
_ -> inf == exp
|
||||
|
||||
-- should be in a generic module, but not in the run-time DataGFCC
|
||||
|
||||
type CType = Term
|
||||
type LinType = ([CType],CType)
|
||||
|
||||
tuple :: [CType] -> CType
|
||||
tuple = R
|
||||
|
||||
ints :: Int -> CType
|
||||
ints = C
|
||||
|
||||
str :: CType
|
||||
str = S []
|
||||
|
||||
lintype :: PGF -> CId -> CId -> LinType
|
||||
lintype pgf lang fun = case typeSkeleton (lookType pgf fun) of
|
||||
(cs,c) -> (map vlinc cs, linc c) ---- HOAS
|
||||
where
|
||||
linc = lookLincat pgf lang
|
||||
vlinc (0,c) = linc c
|
||||
vlinc (i,c) = case linc c of
|
||||
R ts -> R (ts ++ replicate i str)
|
||||
|
||||
inline :: PGF -> CId -> Term -> Term
|
||||
inline pgf lang t = case t of
|
||||
F c -> inl $ look c
|
||||
_ -> composSafeOp inl t
|
||||
where
|
||||
inl = inline pgf lang
|
||||
look = lookLin pgf lang
|
||||
|
||||
composOp :: Monad m => (Term -> m Term) -> Term -> m Term
|
||||
composOp f trm = case trm of
|
||||
R ts -> liftM R $ mapM f ts
|
||||
S ts -> liftM S $ mapM f ts
|
||||
FV ts -> liftM FV $ mapM f ts
|
||||
P t u -> liftM2 P (f t) (f u)
|
||||
W s t -> liftM (W s) $ f t
|
||||
_ -> return trm
|
||||
|
||||
composSafeOp :: (Term -> Term) -> Term -> Term
|
||||
composSafeOp f = maybe undefined id . composOp (return . f)
|
||||
|
||||
-- from GF.Data.Oper
|
||||
|
||||
maybeErr :: String -> Maybe a -> Err a
|
||||
maybeErr s = maybe (Bad s) Ok
|
||||
|
||||
testErr :: Bool -> String -> Err ()
|
||||
testErr cond msg = if cond then return () else Bad msg
|
||||
|
||||
errVal :: a -> Err a -> a
|
||||
errVal a = err (const a) id
|
||||
|
||||
errIn :: String -> Err a -> Err a
|
||||
errIn msg = err (\s -> Bad (s ++ "\nOCCURRED IN\n" ++ msg)) return
|
||||
|
||||
err :: (String -> b) -> (a -> b) -> Err a -> b
|
||||
err d f e = case e of
|
||||
Ok a -> f a
|
||||
Bad s -> d s
|
||||
95
src/runtime/haskell/PGF/Data.hs
Normal file
95
src/runtime/haskell/PGF/Data.hs
Normal file
@@ -0,0 +1,95 @@
|
||||
module PGF.Data (module PGF.Data, module PGF.Expr, module PGF.Type, module PGF.PMCFG) where
|
||||
|
||||
import PGF.CId
|
||||
import PGF.Expr hiding (Value, Env, Tree)
|
||||
import PGF.Type
|
||||
import PGF.PMCFG
|
||||
|
||||
import qualified Data.Map as Map
|
||||
import qualified Data.Set as Set
|
||||
import qualified Data.IntMap as IntMap
|
||||
import Data.List
|
||||
|
||||
-- internal datatypes for PGF
|
||||
|
||||
-- | An abstract data type representing multilingual grammar
|
||||
-- in Portable Grammar Format.
|
||||
data PGF = PGF {
|
||||
absname :: CId ,
|
||||
cncnames :: [CId] ,
|
||||
gflags :: Map.Map CId String, -- value of a global flag
|
||||
abstract :: Abstr ,
|
||||
concretes :: Map.Map CId Concr
|
||||
}
|
||||
|
||||
data Abstr = Abstr {
|
||||
aflags :: Map.Map CId String, -- value of a flag
|
||||
funs :: Map.Map CId (Type,Int,[Equation]), -- type, arrity and definition of function
|
||||
cats :: Map.Map CId [Hypo], -- context of a cat
|
||||
catfuns :: Map.Map CId [CId] -- funs to a cat (redundant, for fast lookup)
|
||||
}
|
||||
|
||||
data Concr = Concr {
|
||||
cflags :: Map.Map CId String, -- value of a flag
|
||||
lins :: Map.Map CId Term, -- lin of a fun
|
||||
opers :: Map.Map CId Term, -- oper generated by subex elim
|
||||
lincats :: Map.Map CId Term, -- lin type of a cat
|
||||
lindefs :: Map.Map CId Term, -- lin default of a cat
|
||||
printnames :: Map.Map CId Term, -- printname of a cat or a fun
|
||||
paramlincats :: Map.Map CId Term, -- lin type of cat, with printable param names
|
||||
parser :: Maybe ParserInfo -- parser
|
||||
}
|
||||
|
||||
data Term =
|
||||
R [Term]
|
||||
| P Term Term
|
||||
| S [Term]
|
||||
| K Tokn
|
||||
| V Int
|
||||
| C Int
|
||||
| F CId
|
||||
| FV [Term]
|
||||
| W String Term
|
||||
| TM String
|
||||
deriving (Eq,Ord,Show)
|
||||
|
||||
data Tokn =
|
||||
KS String
|
||||
| KP [String] [Alternative]
|
||||
deriving (Eq,Ord,Show)
|
||||
|
||||
|
||||
-- merge two GFCCs; fails is differens absnames; priority to second arg
|
||||
|
||||
unionPGF :: PGF -> PGF -> PGF
|
||||
unionPGF one two = case absname one of
|
||||
n | n == wildCId -> two -- extending empty grammar
|
||||
| n == absname two -> one { -- extending grammar with same abstract
|
||||
concretes = Map.union (concretes two) (concretes one),
|
||||
cncnames = union (cncnames one) (cncnames two)
|
||||
}
|
||||
_ -> one -- abstracts don't match ---- print error msg
|
||||
|
||||
emptyPGF :: PGF
|
||||
emptyPGF = PGF {
|
||||
absname = wildCId,
|
||||
cncnames = [] ,
|
||||
gflags = Map.empty,
|
||||
abstract = error "empty grammar, no abstract",
|
||||
concretes = Map.empty
|
||||
}
|
||||
|
||||
-- | This is just a 'CId' with the language name.
|
||||
-- A language name is the identifier that you write in the
|
||||
-- top concrete or abstract module in GF after the
|
||||
-- concrete/abstract keyword. Example:
|
||||
--
|
||||
-- > abstract Lang = ...
|
||||
-- > concrete LangEng of Lang = ...
|
||||
type Language = CId
|
||||
|
||||
readLanguage :: String -> Maybe Language
|
||||
readLanguage = readCId
|
||||
|
||||
showLanguage :: Language -> String
|
||||
showLanguage = showCId
|
||||
241
src/runtime/haskell/PGF/Editor.hs
Normal file
241
src/runtime/haskell/PGF/Editor.hs
Normal file
@@ -0,0 +1,241 @@
|
||||
module PGF.Editor (
|
||||
State, -- datatype -- type-annotated possibly open tree with a focus
|
||||
Dict, -- datatype -- abstract syntax information optimized for editing
|
||||
Position, -- datatype -- path from top to focus
|
||||
new, -- :: Type -> State -- create new State
|
||||
refine, -- :: Dict -> CId -> State -> State -- refine focus with CId
|
||||
replace, -- :: Dict -> Tree -> State -> State -- replace focus with Tree
|
||||
delete, -- :: State -> State -- replace focus with ?
|
||||
goNextMeta, -- :: State -> State -- move focus to next ? node
|
||||
goNext, -- :: State -> State -- move to next node
|
||||
goTop, -- :: State -> State -- move focus to the top (=root)
|
||||
goPosition, -- :: Position -> State -> State -- move focus to given position
|
||||
mkPosition, -- :: [Int] -> Position -- list of choices (top = [])
|
||||
showPosition,-- :: Position -> [Int] -- readable position
|
||||
focusType, -- :: State -> Type -- get the type of focus
|
||||
stateTree, -- :: State -> Tree -- get the current tree
|
||||
isMetaFocus, -- :: State -> Bool -- whether focus is ?
|
||||
allMetas, -- :: State -> [(Position,Type)] -- all ?s and their positions
|
||||
prState, -- :: State -> String -- print state, focus marked *
|
||||
refineMenu, -- :: Dict -> State -> [CId] -- get refinement menu
|
||||
pgf2dict -- :: PGF -> Dict -- create editing Dict from PGF
|
||||
) where
|
||||
|
||||
import PGF.Data
|
||||
import PGF.CId
|
||||
import qualified Data.Map as M
|
||||
import Debug.Trace ----
|
||||
|
||||
-- API
|
||||
|
||||
new :: Type -> State
|
||||
new (DTyp _ t _) = etree2state (uETree t)
|
||||
|
||||
refine :: Dict -> CId -> State -> State
|
||||
refine dict f = replaceInState (mkRefinement dict f)
|
||||
|
||||
replace :: Dict -> Tree -> State -> State
|
||||
replace dict t = replaceInState (tree2etree dict t)
|
||||
|
||||
delete :: State -> State
|
||||
delete s = replaceInState (uETree (typ (tree s))) s
|
||||
|
||||
goNextMeta :: State -> State
|
||||
goNextMeta s =
|
||||
if isComplete s then s
|
||||
else let s1 = goNext s in if isMetaFocus s1
|
||||
then s1 else goNextMeta s1
|
||||
|
||||
isComplete :: State -> Bool
|
||||
isComplete s = isc (tree s) where
|
||||
isc t = case atom t of
|
||||
AMeta _ -> False
|
||||
ACon _ -> all isc (children t)
|
||||
|
||||
goTop :: State -> State
|
||||
goTop = navigate (const top)
|
||||
|
||||
goPosition :: [Int] -> State -> State
|
||||
goPosition p s = s{position = p}
|
||||
|
||||
mkPosition :: [Int] -> Position
|
||||
mkPosition = id
|
||||
|
||||
refineMenu :: Dict -> State -> [CId]
|
||||
refineMenu dict s = maybe [] (map fst) $ M.lookup (focusBType s) (refines dict)
|
||||
|
||||
focusType :: State -> Type
|
||||
focusType s = btype2type (focusBType s)
|
||||
|
||||
stateTree :: State -> Tree
|
||||
stateTree = etree2tree . tree
|
||||
|
||||
pgf2dict :: PGF -> Dict
|
||||
pgf2dict pgf = Dict (M.fromAscList fus) refs where
|
||||
fus = [(f,mkFType ty) | (f,(ty,_)) <- M.toList (funs abs)]
|
||||
refs = M.fromAscList [(c, fusTo c) | (c,_) <- M.toList (cats abs)]
|
||||
fusTo c = [(f,ty) | (f,ty@(_,k)) <- fus, k==c] ---- quadratic
|
||||
mkFType (DTyp hyps c _) = ([k | Hyp _ (DTyp _ k _) <- hyps],c) ----dep types
|
||||
abs = abstract pgf
|
||||
|
||||
etree2tree :: ETree -> Tree
|
||||
etree2tree t = case atom t of
|
||||
ACon f -> Fun f (map etree2tree (children t))
|
||||
AMeta i -> Meta i
|
||||
|
||||
tree2etree :: Dict -> Tree -> ETree
|
||||
tree2etree dict t = case t of
|
||||
Fun f _ -> annot (look f) t
|
||||
where
|
||||
annot (tys,ty) tr = case tr of
|
||||
Fun f trs -> ETree (ACon f) ty [annt t tr | (t,tr) <- zip tys trs]
|
||||
Meta i -> ETree (AMeta i) ty []
|
||||
annt ty tr = case tr of
|
||||
Fun _ _ -> tree2etree dict tr
|
||||
Meta _ -> annot ([],ty) tr
|
||||
look f = maybe undefined id $ M.lookup f (functs dict)
|
||||
|
||||
prState :: State -> String
|
||||
prState s = unlines [replicate i ' ' ++ f | (i,f) <- pr [] (tree s)] where
|
||||
pr i t =
|
||||
(ind i,prAtom i (atom t)) : concat [pr (sub j i) c | (j,c) <- zip [0..] (children t)]
|
||||
prAtom i a = prFocus i ++ case a of
|
||||
ACon f -> prCId f
|
||||
AMeta i -> "?" ++ show i
|
||||
prFocus i = if i == position s then "*" else ""
|
||||
ind i = 2 * length i
|
||||
sub j i = i ++ [j]
|
||||
|
||||
showPosition :: Position -> [Int]
|
||||
showPosition = id
|
||||
|
||||
allMetas :: State -> [(Position,Type)]
|
||||
allMetas s = [(reverse p, btype2type ty) | (p,ty) <- metas [] (tree s)] where
|
||||
metas p t =
|
||||
(if isMetaAtom (atom t) then [(p,typ t)] else []) ++
|
||||
concat [metas (i:p) u | (i,u) <- zip [0..] (children t)]
|
||||
|
||||
---- Trees and navigation
|
||||
|
||||
data ETree = ETree {
|
||||
atom :: Atom,
|
||||
typ :: BType,
|
||||
children :: [ETree]
|
||||
}
|
||||
deriving Show
|
||||
|
||||
data Atom =
|
||||
ACon CId
|
||||
| AMeta Int
|
||||
deriving Show
|
||||
|
||||
btype2type :: BType -> Type
|
||||
btype2type t = DTyp [] t []
|
||||
|
||||
uETree :: BType -> ETree
|
||||
uETree ty = ETree (AMeta 0) ty []
|
||||
|
||||
data State = State {
|
||||
position :: Position,
|
||||
tree :: ETree
|
||||
}
|
||||
deriving Show
|
||||
|
||||
type Position = [Int]
|
||||
|
||||
top :: Position
|
||||
top = []
|
||||
|
||||
up :: Position -> Position
|
||||
up p = case p of
|
||||
_:_ -> init p
|
||||
_ -> p
|
||||
|
||||
down :: Position -> Position
|
||||
down = (++[0])
|
||||
|
||||
left :: Position -> Position
|
||||
left p = case p of
|
||||
_:_ | last p > 0 -> init p ++ [last p - 1]
|
||||
_ -> top
|
||||
|
||||
right :: Position -> Position
|
||||
right p = case p of
|
||||
_:_ -> init p ++ [last p + 1]
|
||||
_ -> top
|
||||
|
||||
etree2state :: ETree -> State
|
||||
etree2state = State top
|
||||
|
||||
doInState :: (ETree -> ETree) -> State -> State
|
||||
doInState f s = s{tree = change (position s) (tree s)} where
|
||||
change p t = case p of
|
||||
[] -> f t
|
||||
n:ns -> let (ts1,t0:ts2) = splitAt n (children t) in
|
||||
t{children = ts1 ++ [change ns t0] ++ ts2}
|
||||
|
||||
subtree :: Position -> ETree -> ETree
|
||||
subtree p t = case p of
|
||||
[] -> t
|
||||
n:ns -> subtree ns (children t !! n)
|
||||
|
||||
focus :: State -> ETree
|
||||
focus s = subtree (position s) (tree s)
|
||||
|
||||
focusBType :: State -> BType
|
||||
focusBType s = typ (focus s)
|
||||
|
||||
navigate :: (Position -> Position) -> State -> State
|
||||
navigate p s = s{position = p (position s)}
|
||||
|
||||
-- p is a fix-point aspect of state change
|
||||
untilFix :: Eq a => (State -> a) -> (State -> Bool) -> (State -> State) -> State -> State
|
||||
untilFix p b f s =
|
||||
if b s
|
||||
then s
|
||||
else let fs = f s in if p fs == p s
|
||||
then s
|
||||
else untilFix p b f fs
|
||||
|
||||
untilPosition :: (State -> Bool) -> (State -> State) -> State -> State
|
||||
untilPosition = untilFix position
|
||||
|
||||
goNext :: State -> State
|
||||
goNext s = case focus s of
|
||||
st | not (null (children st)) -> navigate down s
|
||||
_ -> findSister s
|
||||
where
|
||||
findSister s = case s of
|
||||
s' | null (position s') -> s'
|
||||
s' | hasYoungerSisters s' -> navigate right s'
|
||||
s' -> findSister (navigate up s')
|
||||
hasYoungerSisters s = case position s of
|
||||
p@(_:_) -> length (children (focus (navigate up s))) > last p + 1
|
||||
_ -> False
|
||||
|
||||
isMetaFocus :: State -> Bool
|
||||
isMetaFocus s = isMetaAtom (atom (focus s))
|
||||
|
||||
isMetaAtom :: Atom -> Bool
|
||||
isMetaAtom a = case a of
|
||||
AMeta _ -> True
|
||||
_ -> False
|
||||
|
||||
replaceInState :: ETree -> State -> State
|
||||
replaceInState t = doInState (const t)
|
||||
|
||||
|
||||
-------
|
||||
|
||||
type BType = CId ----dep types
|
||||
type FType = ([BType],BType) ----dep types
|
||||
|
||||
data Dict = Dict {
|
||||
functs :: M.Map CId FType,
|
||||
refines :: M.Map BType [(CId,FType)]
|
||||
}
|
||||
|
||||
mkRefinement :: Dict -> CId -> ETree
|
||||
mkRefinement dict f = ETree (ACon f) val (map uETree args) where
|
||||
(args,val) = maybe undefined id $ M.lookup f (functs dict)
|
||||
|
||||
355
src/runtime/haskell/PGF/Expr.hs
Normal file
355
src/runtime/haskell/PGF/Expr.hs
Normal file
@@ -0,0 +1,355 @@
|
||||
module PGF.Expr(Tree, BindType(..), Expr(..), Literal(..), Patt(..), Equation(..),
|
||||
readExpr, showExpr, pExpr, pBinds, ppExpr, ppPatt,
|
||||
|
||||
mkApp, unApp,
|
||||
mkStr, unStr,
|
||||
mkInt, unInt,
|
||||
mkDouble, unDouble,
|
||||
mkMeta, isMeta,
|
||||
|
||||
normalForm,
|
||||
|
||||
-- needed in the typechecker
|
||||
Value(..), Env, Funs, eval, apply,
|
||||
|
||||
MetaId,
|
||||
|
||||
-- helpers
|
||||
pMeta,pStr,pArg,pLit,freshName,ppMeta,ppLit,ppParens
|
||||
) where
|
||||
|
||||
import PGF.CId
|
||||
import PGF.Type
|
||||
|
||||
import Data.Char
|
||||
import Data.Maybe
|
||||
import Data.List as List
|
||||
import Data.Map as Map hiding (showTree)
|
||||
import Control.Monad
|
||||
import qualified Text.PrettyPrint as PP
|
||||
import qualified Text.ParserCombinators.ReadP as RP
|
||||
|
||||
data Literal =
|
||||
LStr String -- ^ string constant
|
||||
| LInt Integer -- ^ integer constant
|
||||
| LFlt Double -- ^ floating point constant
|
||||
deriving (Eq,Ord,Show)
|
||||
|
||||
type MetaId = Int
|
||||
|
||||
data BindType =
|
||||
Explicit
|
||||
| Implicit
|
||||
deriving (Eq,Ord,Show)
|
||||
|
||||
-- | Tree is the abstract syntax representation of a given sentence
|
||||
-- in some concrete syntax. Technically 'Tree' is a type synonym
|
||||
-- of 'Expr'.
|
||||
type Tree = Expr
|
||||
|
||||
-- | An expression in the abstract syntax of the grammar. It could be
|
||||
-- both parameter of a dependent type or an abstract syntax tree for
|
||||
-- for some sentence.
|
||||
data Expr =
|
||||
EAbs BindType CId Expr -- ^ lambda abstraction
|
||||
| EApp Expr Expr -- ^ application
|
||||
| ELit Literal -- ^ literal
|
||||
| EMeta {-# UNPACK #-} !MetaId -- ^ meta variable
|
||||
| EFun CId -- ^ function or data constructor
|
||||
| EVar {-# UNPACK #-} !Int -- ^ variable with de Bruijn index
|
||||
| ETyped Expr Type -- ^ local type signature
|
||||
| EImplArg Expr -- ^ implicit argument in expression
|
||||
deriving (Eq,Ord,Show)
|
||||
|
||||
-- | The pattern is used to define equations in the abstract syntax of the grammar.
|
||||
data Patt =
|
||||
PApp CId [Patt] -- ^ application. The identifier should be constructor i.e. defined with 'data'
|
||||
| PLit Literal -- ^ literal
|
||||
| PVar CId -- ^ variable
|
||||
| PWild -- ^ wildcard
|
||||
| PImplArg Patt -- ^ implicit argument in pattern
|
||||
deriving (Eq,Ord)
|
||||
|
||||
-- | The equation is used to define lambda function as a sequence
|
||||
-- of equations with pattern matching. The list of 'Expr' represents
|
||||
-- the patterns and the second 'Expr' is the function body for this
|
||||
-- equation.
|
||||
data Equation =
|
||||
Equ [Patt] Expr
|
||||
deriving (Eq,Ord)
|
||||
|
||||
-- | parses 'String' as an expression
|
||||
readExpr :: String -> Maybe Expr
|
||||
readExpr s = case [x | (x,cs) <- RP.readP_to_S pExpr s, all isSpace cs] of
|
||||
[x] -> Just x
|
||||
_ -> Nothing
|
||||
|
||||
-- | renders expression as 'String'. The list
|
||||
-- of identifiers is the list of all free variables
|
||||
-- in the expression in order reverse to the order
|
||||
-- of binding.
|
||||
showExpr :: [CId] -> Expr -> String
|
||||
showExpr vars = PP.render . ppExpr 0 vars
|
||||
|
||||
instance Read Expr where
|
||||
readsPrec _ = RP.readP_to_S pExpr
|
||||
|
||||
-- | Constructs an expression by applying a function to a list of expressions
|
||||
mkApp :: CId -> [Expr] -> Expr
|
||||
mkApp f es = foldl EApp (EFun f) es
|
||||
|
||||
-- | Decomposes an expression into application of function
|
||||
unApp :: Expr -> Maybe (CId,[Expr])
|
||||
unApp = extract []
|
||||
where
|
||||
extract es (EFun f) = Just (f,es)
|
||||
extract es (EApp e1 e2) = extract (e2:es) e1
|
||||
extract es _ = Nothing
|
||||
|
||||
-- | Constructs an expression from string literal
|
||||
mkStr :: String -> Expr
|
||||
mkStr s = ELit (LStr s)
|
||||
|
||||
-- | Decomposes an expression into string literal
|
||||
unStr :: Expr -> Maybe String
|
||||
unStr (ELit (LStr s)) = Just s
|
||||
unStr _ = Nothing
|
||||
|
||||
-- | Constructs an expression from integer literal
|
||||
mkInt :: Integer -> Expr
|
||||
mkInt i = ELit (LInt i)
|
||||
|
||||
-- | Decomposes an expression into integer literal
|
||||
unInt :: Expr -> Maybe Integer
|
||||
unInt (ELit (LInt i)) = Just i
|
||||
unInt _ = Nothing
|
||||
|
||||
-- | Constructs an expression from real number literal
|
||||
mkDouble :: Double -> Expr
|
||||
mkDouble f = ELit (LFlt f)
|
||||
|
||||
-- | Decomposes an expression into real number literal
|
||||
unDouble :: Expr -> Maybe Double
|
||||
unDouble (ELit (LFlt f)) = Just f
|
||||
unDouble _ = Nothing
|
||||
|
||||
-- | Constructs an expression which is meta variable
|
||||
mkMeta :: Expr
|
||||
mkMeta = EMeta 0
|
||||
|
||||
-- | Checks whether an expression is a meta variable
|
||||
isMeta :: Expr -> Bool
|
||||
isMeta (EMeta _) = True
|
||||
isMeta _ = False
|
||||
|
||||
-----------------------------------------------------
|
||||
-- Parsing
|
||||
-----------------------------------------------------
|
||||
|
||||
pExpr :: RP.ReadP Expr
|
||||
pExpr = RP.skipSpaces >> (pAbs RP.<++ pTerm)
|
||||
where
|
||||
pTerm = do f <- pFactor
|
||||
RP.skipSpaces
|
||||
as <- RP.sepBy pArg RP.skipSpaces
|
||||
return (foldl EApp f as)
|
||||
|
||||
pAbs = do xs <- RP.between (RP.char '\\') (RP.skipSpaces >> RP.string "->") pBinds
|
||||
e <- pExpr
|
||||
return (foldr (\(b,x) e -> EAbs b x e) e xs)
|
||||
|
||||
pBinds :: RP.ReadP [(BindType,CId)]
|
||||
pBinds = do xss <- RP.sepBy1 (RP.skipSpaces >> pBind) (RP.skipSpaces >> RP.char ',')
|
||||
return (concat xss)
|
||||
where
|
||||
pCIdOrWild = pCId `mplus` (RP.char '_' >> return wildCId)
|
||||
|
||||
pBind =
|
||||
do x <- pCIdOrWild
|
||||
return [(Explicit,x)]
|
||||
`mplus`
|
||||
RP.between (RP.char '{')
|
||||
(RP.skipSpaces >> RP.char '}')
|
||||
(RP.sepBy1 (RP.skipSpaces >> pCIdOrWild >>= \id -> return (Implicit,id)) (RP.skipSpaces >> RP.char ','))
|
||||
|
||||
pArg = fmap EImplArg (RP.between (RP.char '{') (RP.char '}') pExpr)
|
||||
RP.<++
|
||||
pFactor
|
||||
|
||||
pFactor = fmap EFun pCId
|
||||
RP.<++ fmap ELit pLit
|
||||
RP.<++ fmap EMeta pMeta
|
||||
RP.<++ RP.between (RP.char '(') (RP.char ')') pExpr
|
||||
RP.<++ RP.between (RP.char '<') (RP.char '>') pTyped
|
||||
|
||||
pTyped = do RP.skipSpaces
|
||||
e <- pExpr
|
||||
RP.skipSpaces
|
||||
RP.char ':'
|
||||
RP.skipSpaces
|
||||
ty <- pType
|
||||
return (ETyped e ty)
|
||||
|
||||
pMeta = do RP.char '?'
|
||||
return 0
|
||||
|
||||
pLit :: RP.ReadP Literal
|
||||
pLit = pNum RP.<++ liftM LStr pStr
|
||||
|
||||
pNum = do x <- RP.munch1 isDigit
|
||||
((RP.char '.' >> RP.munch1 isDigit >>= \y -> return (LFlt (read (x++"."++y))))
|
||||
RP.<++
|
||||
(return (LInt (read x))))
|
||||
|
||||
pStr = RP.char '"' >> (RP.manyTill (pEsc RP.<++ RP.get) (RP.char '"'))
|
||||
where
|
||||
pEsc = RP.char '\\' >> RP.get
|
||||
|
||||
|
||||
-----------------------------------------------------
|
||||
-- Printing
|
||||
-----------------------------------------------------
|
||||
|
||||
ppExpr :: Int -> [CId] -> Expr -> PP.Doc
|
||||
ppExpr d scope (EAbs b x e) = let (bs,xs,e1) = getVars [] [] (EAbs b x e)
|
||||
in ppParens (d > 1) (PP.char '\\' PP.<>
|
||||
PP.hsep (PP.punctuate PP.comma (reverse (List.zipWith ppBind bs xs))) PP.<+>
|
||||
PP.text "->" PP.<+>
|
||||
ppExpr 1 (xs++scope) e1)
|
||||
where
|
||||
getVars bs xs (EAbs b x e) = getVars (b:bs) ((freshName x xs):xs) e
|
||||
getVars bs xs e = (bs,xs,e)
|
||||
ppExpr d scope (EApp e1 e2) = ppParens (d > 3) ((ppExpr 3 scope e1) PP.<+> (ppExpr 4 scope e2))
|
||||
ppExpr d scope (ELit l) = ppLit l
|
||||
ppExpr d scope (EMeta n) = ppMeta n
|
||||
ppExpr d scope (EFun f) = ppCId f
|
||||
ppExpr d scope (EVar i) = ppCId (scope !! i)
|
||||
ppExpr d scope (ETyped e ty)= PP.char '<' PP.<> ppExpr 0 scope e PP.<+> PP.colon PP.<+> ppType 0 scope ty PP.<> PP.char '>'
|
||||
ppExpr d scope (EImplArg e) = PP.braces (ppExpr 0 scope e)
|
||||
|
||||
ppPatt :: Int -> [CId] -> Patt -> ([CId],PP.Doc)
|
||||
ppPatt d scope (PApp f ps) = let (scope',ds) = mapAccumL (ppPatt 2) scope ps
|
||||
in (scope',ppParens (not (List.null ps) && d > 1) (ppCId f PP.<+> PP.hsep ds))
|
||||
ppPatt d scope (PLit l) = (scope,ppLit l)
|
||||
ppPatt d scope (PVar f) = (f:scope,ppCId f)
|
||||
ppPatt d scope PWild = (scope,PP.char '_')
|
||||
ppPatt d scope (PImplArg p) = let (scope',d) = ppPatt 0 scope p
|
||||
in (scope',PP.braces d)
|
||||
|
||||
ppBind Explicit x = ppCId x
|
||||
ppBind Implicit x = PP.braces (ppCId x)
|
||||
|
||||
ppLit (LStr s) = PP.text (show s)
|
||||
ppLit (LInt n) = PP.integer n
|
||||
ppLit (LFlt d) = PP.double d
|
||||
|
||||
ppMeta :: MetaId -> PP.Doc
|
||||
ppMeta n
|
||||
| n == 0 = PP.char '?'
|
||||
| otherwise = PP.char '?' PP.<> PP.int n
|
||||
|
||||
ppParens True = PP.parens
|
||||
ppParens False = id
|
||||
|
||||
freshName :: CId -> [CId] -> CId
|
||||
freshName x xs0 = loop 1 x
|
||||
where
|
||||
xs = wildCId : xs0
|
||||
|
||||
loop i y
|
||||
| elem y xs = loop (i+1) (mkCId (show x++show i))
|
||||
| otherwise = y
|
||||
|
||||
|
||||
-----------------------------------------------------
|
||||
-- Computation
|
||||
-----------------------------------------------------
|
||||
|
||||
-- | Compute an expression to normal form
|
||||
normalForm :: Funs -> Int -> Env -> Expr -> Expr
|
||||
normalForm funs k env e = value2expr k (eval funs env e)
|
||||
where
|
||||
value2expr i (VApp f vs) = foldl EApp (EFun f) (List.map (value2expr i) vs)
|
||||
value2expr i (VGen j vs) = foldl EApp (EVar (i-j-1)) (List.map (value2expr i) vs)
|
||||
value2expr i (VMeta j env vs) = foldl EApp (EMeta j) (List.map (value2expr i) vs)
|
||||
value2expr i (VSusp j env vs k) = value2expr i (k (VGen j vs))
|
||||
value2expr i (VLit l) = ELit l
|
||||
value2expr i (VClosure env (EAbs b x e)) = EAbs b x (value2expr (i+1) (eval funs ((VGen i []):env) e))
|
||||
value2expr i (VImplArg v) = EImplArg (value2expr i v)
|
||||
|
||||
data Value
|
||||
= VApp CId [Value]
|
||||
| VLit Literal
|
||||
| VMeta {-# UNPACK #-} !MetaId Env [Value]
|
||||
| VSusp {-# UNPACK #-} !MetaId Env [Value] (Value -> Value)
|
||||
| VGen {-# UNPACK #-} !Int [Value]
|
||||
| VClosure Env Expr
|
||||
| VImplArg Value
|
||||
|
||||
type Funs = Map.Map CId (Type,Int,[Equation]) -- type and def of a fun
|
||||
type Env = [Value]
|
||||
|
||||
eval :: Funs -> Env -> Expr -> Value
|
||||
eval funs env (EVar i) = env !! i
|
||||
eval funs env (EFun f) = case Map.lookup f funs of
|
||||
Just (_,a,eqs) -> if a == 0
|
||||
then case eqs of
|
||||
Equ [] e : _ -> eval funs [] e
|
||||
_ -> VApp f []
|
||||
else VApp f []
|
||||
Nothing -> error ("unknown function "++showCId f)
|
||||
eval funs env (EApp e1 e2) = apply funs env e1 [eval funs env e2]
|
||||
eval funs env (EAbs b x e) = VClosure env (EAbs b x e)
|
||||
eval funs env (EMeta i) = VMeta i env []
|
||||
eval funs env (ELit l) = VLit l
|
||||
eval funs env (ETyped e _) = eval funs env e
|
||||
eval funs env (EImplArg e) = VImplArg (eval funs env e)
|
||||
|
||||
apply :: Funs -> Env -> Expr -> [Value] -> Value
|
||||
apply funs env e [] = eval funs env e
|
||||
apply funs env (EVar i) vs = applyValue funs (env !! i) vs
|
||||
apply funs env (EFun f) vs = case Map.lookup f funs of
|
||||
Just (_,a,eqs) -> if a <= length vs
|
||||
then let (as,vs') = splitAt a vs
|
||||
in match funs f eqs as vs'
|
||||
else VApp f vs
|
||||
Nothing -> error ("unknown function "++showCId f)
|
||||
apply funs env (EApp e1 e2) vs = apply funs env e1 (eval funs env e2 : vs)
|
||||
apply funs env (EAbs _ x e) (v:vs) = apply funs (v:env) e vs
|
||||
apply funs env (EMeta i) vs = VMeta i env vs
|
||||
apply funs env (ELit l) vs = error "literal of function type"
|
||||
apply funs env (ETyped e _) vs = apply funs env e vs
|
||||
apply funs env (EImplArg _) vs = error "implicit argument in function position"
|
||||
|
||||
applyValue funs v [] = v
|
||||
applyValue funs (VApp f vs0) vs = apply funs [] (EFun f) (vs0++vs)
|
||||
applyValue funs (VLit _) vs = error "literal of function type"
|
||||
applyValue funs (VMeta i env vs0) vs = VMeta i env (vs0++vs)
|
||||
applyValue funs (VGen i vs0) vs = VGen i (vs0++vs)
|
||||
applyValue funs (VSusp i env vs0 k) vs = VSusp i env vs0 (\v -> applyValue funs (k v) vs)
|
||||
applyValue funs (VClosure env (EAbs b x e)) (v:vs) = apply funs (v:env) e vs
|
||||
applyValue funs (VImplArg _) vs = error "implicit argument in function position"
|
||||
|
||||
-----------------------------------------------------
|
||||
-- Pattern matching
|
||||
-----------------------------------------------------
|
||||
|
||||
match :: Funs -> CId -> [Equation] -> [Value] -> [Value] -> Value
|
||||
match funs f eqs as0 vs0 =
|
||||
case eqs of
|
||||
[] -> VApp f (as0++vs0)
|
||||
(Equ ps res):eqs -> tryMatches eqs ps as0 res []
|
||||
where
|
||||
tryMatches eqs [] [] res env = apply funs env res vs0
|
||||
tryMatches eqs (p:ps) (a:as) res env = tryMatch p a env
|
||||
where
|
||||
tryMatch (PVar x ) (v ) env = tryMatches eqs ps as res (v:env)
|
||||
tryMatch (PWild ) (_ ) env = tryMatches eqs ps as res env
|
||||
tryMatch (p ) (VMeta i envi vs ) env = VSusp i envi vs (\v -> tryMatch p v env)
|
||||
tryMatch (p ) (VGen i vs ) env = VApp f (as0++vs0)
|
||||
tryMatch (p ) (VSusp i envi vs k) env = VSusp i envi vs (\v -> tryMatch p (k v) env)
|
||||
tryMatch (PApp f1 ps1) (VApp f2 vs2 ) env | f1 == f2 = tryMatches eqs (ps1++ps) (vs2++as) res env
|
||||
tryMatch (PLit l1 ) (VLit l2 ) env | l1 == l2 = tryMatches eqs ps as res env
|
||||
tryMatch (PImplArg p ) (VImplArg v ) env = tryMatch p v env
|
||||
tryMatch _ _ env = match funs f eqs as0 vs0
|
||||
|
||||
28
src/runtime/haskell/PGF/Expr.hs-boot
Normal file
28
src/runtime/haskell/PGF/Expr.hs-boot
Normal file
@@ -0,0 +1,28 @@
|
||||
module PGF.Expr where
|
||||
|
||||
import PGF.CId
|
||||
import qualified Text.PrettyPrint as PP
|
||||
import qualified Text.ParserCombinators.ReadP as RP
|
||||
|
||||
data Expr
|
||||
|
||||
instance Eq Expr
|
||||
instance Ord Expr
|
||||
instance Show Expr
|
||||
|
||||
|
||||
data BindType = Explicit | Implicit
|
||||
|
||||
instance Eq BindType
|
||||
instance Ord BindType
|
||||
instance Show BindType
|
||||
|
||||
|
||||
pArg :: RP.ReadP Expr
|
||||
pBinds :: RP.ReadP [(BindType,CId)]
|
||||
|
||||
ppExpr :: Int -> [CId] -> Expr -> PP.Doc
|
||||
|
||||
freshName :: CId -> [CId] -> CId
|
||||
|
||||
ppParens :: Bool -> PP.Doc -> PP.Doc
|
||||
66
src/runtime/haskell/PGF/Generate.hs
Normal file
66
src/runtime/haskell/PGF/Generate.hs
Normal file
@@ -0,0 +1,66 @@
|
||||
module PGF.Generate where
|
||||
|
||||
import PGF.CId
|
||||
import PGF.Data
|
||||
import PGF.Macros
|
||||
import PGF.TypeCheck
|
||||
|
||||
import qualified Data.Map as M
|
||||
import System.Random
|
||||
|
||||
-- generate an infinite list of trees exhaustively
|
||||
generate :: PGF -> Type -> Maybe Int -> [Expr]
|
||||
generate pgf ty@(DTyp _ cat _) dp = filter (\e -> case checkExpr pgf e ty of
|
||||
Left _ -> False
|
||||
Right _ -> True )
|
||||
(concatMap (\i -> gener i cat) depths)
|
||||
where
|
||||
gener 0 c = [EFun f | (f, ([],_)) <- fns c]
|
||||
gener i c = [
|
||||
tr |
|
||||
(f, (cs,_)) <- fns c,
|
||||
let alts = map (gener (i-1)) cs,
|
||||
ts <- combinations alts,
|
||||
let tr = foldl EApp (EFun f) ts,
|
||||
depth tr >= i
|
||||
]
|
||||
fns c = [(f,catSkeleton ty) | (f,ty) <- functionsToCat pgf c]
|
||||
depths = maybe [0 ..] (\d -> [0..d]) dp
|
||||
|
||||
-- generate an infinite list of trees randomly
|
||||
genRandom :: StdGen -> PGF -> Type -> [Expr]
|
||||
genRandom gen pgf ty@(DTyp _ cat _) = filter (\e -> case checkExpr pgf e ty of
|
||||
Left _ -> False
|
||||
Right _ -> True )
|
||||
(genTrees (randomRs (0.0, 1.0 :: Double) gen) cat)
|
||||
where
|
||||
timeout = 47 -- give up
|
||||
|
||||
genTrees ds0 cat =
|
||||
let (ds,ds2) = splitAt (timeout+1) ds0 -- for time out, else ds
|
||||
(t,k) = genTree ds cat
|
||||
in (if k>timeout then id else (t:))
|
||||
(genTrees ds2 cat) -- else (drop k ds)
|
||||
|
||||
genTree rs = gett rs where
|
||||
gett ds cid | cid == cidString = (ELit (LStr "foo"), 1)
|
||||
gett ds cid | cid == cidInt = (ELit (LInt 12345), 1)
|
||||
gett ds cid | cid == cidFloat = (ELit (LFlt 12345), 1)
|
||||
gett [] _ = (ELit (LStr "TIMEOUT"), 1) ----
|
||||
gett ds cat = case fns cat of
|
||||
[] -> (EMeta 0,1)
|
||||
fs -> let
|
||||
d:ds2 = ds
|
||||
(f,args) = getf d fs
|
||||
(ts,k) = getts ds2 args
|
||||
in (foldl EApp (EFun f) ts, k+1)
|
||||
getf d fs = let lg = (length fs) in
|
||||
fs !! (floor (d * fromIntegral lg))
|
||||
getts ds cats = case cats of
|
||||
c:cs -> let
|
||||
(t, k) = gett ds c
|
||||
(ts,ks) = getts (drop k ds) cs
|
||||
in (t:ts, k + ks)
|
||||
_ -> ([],0)
|
||||
|
||||
fns cat = [(f,(fst (catSkeleton ty))) | (f,ty) <- functionsToCat pgf cat]
|
||||
166
src/runtime/haskell/PGF/Linearize.hs
Normal file
166
src/runtime/haskell/PGF/Linearize.hs
Normal file
@@ -0,0 +1,166 @@
|
||||
{-# LANGUAGE ParallelListComp #-}
|
||||
module PGF.Linearize
|
||||
(linearizes,realize,realizes,linTree, linTreeMark,linearizesMark) where
|
||||
|
||||
import PGF.CId
|
||||
import PGF.Data
|
||||
import PGF.Macros
|
||||
import PGF.Tree
|
||||
|
||||
import Control.Monad
|
||||
import qualified Data.Map as Map
|
||||
import Data.List
|
||||
|
||||
import Debug.Trace
|
||||
|
||||
-- linearization and computation of concrete PGF Terms
|
||||
|
||||
linearizes :: PGF -> CId -> Expr -> [String]
|
||||
linearizes pgf lang = realizes . linTree pgf lang
|
||||
|
||||
realize :: Term -> String
|
||||
realize = concat . take 1 . realizes
|
||||
|
||||
realizes :: Term -> [String]
|
||||
realizes = map (unwords . untokn) . realizest
|
||||
|
||||
realizest :: Term -> [[Tokn]]
|
||||
realizest trm = case trm of
|
||||
R ts -> realizest (ts !! 0)
|
||||
S ss -> map concat $ combinations $ map realizest ss
|
||||
K t -> [[t]]
|
||||
W s t -> [[KS (s ++ r)] | [KS r] <- realizest t]
|
||||
FV ts -> concatMap realizest ts
|
||||
TM s -> [[KS s]]
|
||||
_ -> [[KS $ "REALIZE_ERROR " ++ show trm]] ---- debug
|
||||
|
||||
untokn :: [Tokn] -> [String]
|
||||
untokn ts = case ts of
|
||||
KP d _ : [] -> d
|
||||
KP d vs : ws -> let ss@(s:_) = untokn ws in sel d vs s ++ ss
|
||||
KS s : ws -> s : untokn ws
|
||||
[] -> []
|
||||
where
|
||||
sel d vs w = case [v | Alt v cs <- vs, any (\c -> isPrefixOf c w) cs] of
|
||||
v:_ -> v
|
||||
_ -> d
|
||||
|
||||
-- Lifts all variants to the top level (except those in macros).
|
||||
liftVariants :: Term -> [Term]
|
||||
liftVariants = f
|
||||
where
|
||||
f (R ts) = liftM R $ mapM f ts
|
||||
f (P t1 t2) = liftM2 P (f t1) (f t2)
|
||||
f (S ts) = liftM S $ mapM f ts
|
||||
f (FV ts) = ts >>= f
|
||||
f (W s t) = liftM (W s) $ f t
|
||||
f t = return t
|
||||
|
||||
linTree :: PGF -> CId -> Expr -> Term
|
||||
linTree pgf lang e = lin (expr2tree e) Nothing
|
||||
where
|
||||
cnc = lookMap (error "no lang") lang (concretes pgf)
|
||||
|
||||
lin (Abs xs e ) mty = case lin e Nothing of
|
||||
R ts -> R $ ts ++ (Data.List.map (kks . showCId . snd) xs)
|
||||
TM s -> R $ (TM s) : (Data.List.map (kks . showCId . snd) xs)
|
||||
lin (Fun fun es) mty = case Map.lookup fun (funs (abstract pgf)) of
|
||||
Just (DTyp hyps _ _,_,_) -> let argVariants = sequence [liftVariants (lin e (Just ty)) | e <- es | (_,_,ty) <- hyps]
|
||||
in variants [compute pgf lang args $ lookMap tm0 fun (lins cnc) | args <- argVariants]
|
||||
Nothing -> tm0
|
||||
lin (Lit (LStr s)) mty = R [kks (show s)] -- quoted
|
||||
lin (Lit (LInt i)) mty = R [kks (show i)]
|
||||
lin (Lit (LFlt d)) mty = R [kks (show d)]
|
||||
lin (Var x) mty = case mty of
|
||||
Just (DTyp _ cat _) -> compute pgf lang [K (KS (showCId x))] (lookMap tm0 cat (lindefs cnc))
|
||||
Nothing -> TM (showCId x)
|
||||
lin (Meta i) mty = case mty of
|
||||
Just (DTyp _ cat _) -> compute pgf lang [K (KS (show i))] (lookMap tm0 cat (lindefs cnc))
|
||||
Nothing -> TM (show i)
|
||||
|
||||
variants :: [Term] -> Term
|
||||
variants ts = case ts of
|
||||
[t] -> t
|
||||
_ -> FV ts
|
||||
|
||||
unvariants :: Term -> [Term]
|
||||
unvariants t = case t of
|
||||
FV ts -> ts
|
||||
_ -> [t]
|
||||
|
||||
compute :: PGF -> CId -> [Term] -> Term -> Term
|
||||
compute pgf lang args = comp where
|
||||
comp trm = case trm of
|
||||
P r p -> proj (comp r) (comp p)
|
||||
W s t -> W s (comp t)
|
||||
R ts -> R $ map comp ts
|
||||
V i -> idx args i -- already computed
|
||||
F c -> comp $ look c -- not computed (if contains argvar)
|
||||
FV ts -> FV $ map comp ts
|
||||
S ts -> S $ filter (/= S []) $ map comp ts
|
||||
_ -> trm
|
||||
|
||||
look = lookOper pgf lang
|
||||
|
||||
idx xs i = if i > length xs - 1
|
||||
then trace
|
||||
("too large " ++ show i ++ " for\n" ++ unlines (map show xs) ++ "\n") tm0
|
||||
else xs !! i
|
||||
|
||||
proj r p = case (r,p) of
|
||||
(_, FV ts) -> FV $ map (proj r) ts
|
||||
(FV ts, _ ) -> FV $ map (\t -> proj t p) ts
|
||||
(W s t, _) -> kks (s ++ getString (proj t p))
|
||||
_ -> comp $ getField r (getIndex p)
|
||||
|
||||
getString t = case t of
|
||||
K (KS s) -> s
|
||||
_ -> error ("ERROR in grammar compiler: string from "++ show t) "ERR"
|
||||
|
||||
getIndex t = case t of
|
||||
C i -> i
|
||||
TM _ -> 0 -- default value for parameter
|
||||
_ -> trace ("ERROR in grammar compiler: index from " ++ show t) 666
|
||||
|
||||
getField t i = case t of
|
||||
R rs -> idx rs i
|
||||
TM s -> TM s
|
||||
_ -> error ("ERROR in grammar compiler: field from " ++ show t) t
|
||||
|
||||
---------
|
||||
-- markup with tree positions
|
||||
|
||||
linearizesMark :: PGF -> CId -> Expr -> [String]
|
||||
linearizesMark pgf lang = realizes . linTreeMark pgf lang
|
||||
|
||||
linTreeMark :: PGF -> CId -> Expr -> Term
|
||||
linTreeMark pgf lang = lin [] . expr2tree
|
||||
where
|
||||
lin p (Abs xs e ) = case lin p e of
|
||||
R ts -> R $ ts ++ (Data.List.map (kks . showCId . snd) xs)
|
||||
TM s -> R $ (TM s) : (Data.List.map (kks . showCId . snd) xs)
|
||||
lin p (Fun fun es) =
|
||||
let argVariants =
|
||||
mapM (\ (i,e) -> liftVariants $ lin (sub p i) e) (zip [0..] es)
|
||||
in variants [mark (fun,p) $ compute pgf lang args $ look fun |
|
||||
args <- argVariants]
|
||||
lin p (Lit (LStr s)) = mark p $ R [kks (show s)] -- quoted
|
||||
lin p (Lit (LInt i)) = mark p $ R [kks (show i)]
|
||||
lin p (Lit (LFlt d)) = mark p $ R [kks (show d)]
|
||||
lin p (Var x) = mark p $ TM (showCId x)
|
||||
lin p (Meta i) = mark p $ TM (show i)
|
||||
|
||||
look = lookLin pgf lang
|
||||
|
||||
mark :: Show a => a -> Term -> Term
|
||||
mark p t = case t of
|
||||
R ts -> R $ map (mark p) ts
|
||||
FV ts -> R $ map (mark p) ts
|
||||
S ts -> S $ bracket p ts
|
||||
K s -> S $ bracket p [t]
|
||||
W s (R ts) -> R [mark p $ kks (s ++ u) | K (KS u) <- ts]
|
||||
_ -> t
|
||||
-- otherwise in normal form
|
||||
|
||||
bracket p ts = [kks ("("++show p)] ++ ts ++ [kks ")"]
|
||||
sub p i = p ++ [i]
|
||||
154
src/runtime/haskell/PGF/Macros.hs
Normal file
154
src/runtime/haskell/PGF/Macros.hs
Normal file
@@ -0,0 +1,154 @@
|
||||
module PGF.Macros where
|
||||
|
||||
import PGF.CId
|
||||
import PGF.Data
|
||||
import Control.Monad
|
||||
import qualified Data.Map as Map
|
||||
import qualified Data.Array as Array
|
||||
import Data.Maybe
|
||||
import Data.List
|
||||
|
||||
-- operations for manipulating PGF grammars and objects
|
||||
|
||||
mapConcretes :: (Concr -> Concr) -> PGF -> PGF
|
||||
mapConcretes f pgf = pgf { concretes = Map.map f (concretes pgf) }
|
||||
|
||||
lookLin :: PGF -> CId -> CId -> Term
|
||||
lookLin pgf lang fun =
|
||||
lookMap tm0 fun $ lins $ lookMap (error "no lang") lang $ concretes pgf
|
||||
|
||||
lookOper :: PGF -> CId -> CId -> Term
|
||||
lookOper pgf lang fun =
|
||||
lookMap tm0 fun $ opers $ lookMap (error "no lang") lang $ concretes pgf
|
||||
|
||||
lookLincat :: PGF -> CId -> CId -> Term
|
||||
lookLincat pgf lang fun =
|
||||
lookMap tm0 fun $ lincats $ lookMap (error "no lang") lang $ concretes pgf
|
||||
|
||||
lookParamLincat :: PGF -> CId -> CId -> Term
|
||||
lookParamLincat pgf lang fun =
|
||||
lookMap tm0 fun $ paramlincats $ lookMap (error "no lang") lang $ concretes pgf
|
||||
|
||||
lookPrintName :: PGF -> CId -> CId -> Term
|
||||
lookPrintName pgf lang fun =
|
||||
lookMap tm0 fun $ printnames $ lookMap (error "no lang") lang $ concretes pgf
|
||||
|
||||
lookType :: PGF -> CId -> Type
|
||||
lookType pgf f =
|
||||
case lookMap (error $ "lookType " ++ show f) f (funs (abstract pgf)) of
|
||||
(ty,_,_) -> ty
|
||||
|
||||
lookDef :: PGF -> CId -> [Equation]
|
||||
lookDef pgf f =
|
||||
case lookMap (error $ "lookDef " ++ show f) f (funs (abstract pgf)) of
|
||||
(_,a,eqs) -> eqs
|
||||
|
||||
isData :: PGF -> CId -> Bool
|
||||
isData pgf f =
|
||||
case Map.lookup f (funs (abstract pgf)) of
|
||||
Just (_,_,[]) -> True -- the encoding of data constrs
|
||||
_ -> False
|
||||
|
||||
lookValCat :: PGF -> CId -> CId
|
||||
lookValCat pgf = valCat . lookType pgf
|
||||
|
||||
lookParser :: PGF -> CId -> Maybe ParserInfo
|
||||
lookParser pgf lang = Map.lookup lang (concretes pgf) >>= parser
|
||||
|
||||
lookStartCat :: PGF -> CId
|
||||
lookStartCat pgf = mkCId $ fromMaybe "S" $ msum $ Data.List.map (Map.lookup (mkCId "startcat"))
|
||||
[gflags pgf, aflags (abstract pgf)]
|
||||
|
||||
lookGlobalFlag :: PGF -> CId -> String
|
||||
lookGlobalFlag pgf f =
|
||||
lookMap "?" f (gflags pgf)
|
||||
|
||||
lookAbsFlag :: PGF -> CId -> String
|
||||
lookAbsFlag pgf f =
|
||||
lookMap "?" f (aflags (abstract pgf))
|
||||
|
||||
lookConcr :: PGF -> CId -> Concr
|
||||
lookConcr pgf cnc =
|
||||
lookMap (error $ "Missing concrete syntax: " ++ showCId cnc) cnc $ concretes pgf
|
||||
|
||||
lookConcrFlag :: PGF -> CId -> CId -> Maybe String
|
||||
lookConcrFlag pgf lang f = Map.lookup f $ cflags $ lookConcr pgf lang
|
||||
|
||||
functionsToCat :: PGF -> CId -> [(CId,Type)]
|
||||
functionsToCat pgf cat =
|
||||
[(f,ty) | f <- fs, Just (ty,_,_) <- [Map.lookup f $ funs $ abstract pgf]]
|
||||
where
|
||||
fs = lookMap [] cat $ catfuns $ abstract pgf
|
||||
|
||||
missingLins :: PGF -> CId -> [CId]
|
||||
missingLins pgf lang = [c | c <- fs, not (hasl c)] where
|
||||
fs = Map.keys $ funs $ abstract pgf
|
||||
hasl = hasLin pgf lang
|
||||
|
||||
hasLin :: PGF -> CId -> CId -> Bool
|
||||
hasLin pgf lang f = Map.member f $ lins $ lookConcr pgf lang
|
||||
|
||||
restrictPGF :: (CId -> Bool) -> PGF -> PGF
|
||||
restrictPGF cond pgf = pgf {
|
||||
abstract = abstr {
|
||||
funs = restrict $ funs $ abstr,
|
||||
cats = restrict $ cats $ abstr
|
||||
}
|
||||
} ---- restrict concrs also, might be needed
|
||||
where
|
||||
restrict = Map.filterWithKey (\c _ -> cond c)
|
||||
abstr = abstract pgf
|
||||
|
||||
depth :: Expr -> Int
|
||||
depth (EAbs _ _ t) = depth t
|
||||
depth (EApp e1 e2) = max (depth e1) (depth e2) + 1
|
||||
depth _ = 1
|
||||
|
||||
cftype :: [CId] -> CId -> Type
|
||||
cftype args val = DTyp [(Explicit,wildCId,cftype [] arg) | arg <- args] val []
|
||||
|
||||
typeOfHypo :: Hypo -> Type
|
||||
typeOfHypo (_,_,ty) = ty
|
||||
|
||||
catSkeleton :: Type -> ([CId],CId)
|
||||
catSkeleton ty = case ty of
|
||||
DTyp hyps val _ -> ([valCat (typeOfHypo h) | h <- hyps],val)
|
||||
|
||||
typeSkeleton :: Type -> ([(Int,CId)],CId)
|
||||
typeSkeleton ty = case ty of
|
||||
DTyp hyps val _ -> ([(contextLength ty, valCat ty) | h <- hyps, let ty = typeOfHypo h],val)
|
||||
|
||||
valCat :: Type -> CId
|
||||
valCat ty = case ty of
|
||||
DTyp _ val _ -> val
|
||||
|
||||
contextLength :: Type -> Int
|
||||
contextLength ty = case ty of
|
||||
DTyp hyps _ _ -> length hyps
|
||||
|
||||
term0 :: CId -> Term
|
||||
term0 = TM . showCId
|
||||
|
||||
tm0 :: Term
|
||||
tm0 = TM "?"
|
||||
|
||||
kks :: String -> Term
|
||||
kks = K . KS
|
||||
|
||||
-- lookup with default value
|
||||
lookMap :: (Show i, Ord i) => a -> i -> Map.Map i a -> a
|
||||
lookMap d c m = Map.findWithDefault d c m
|
||||
|
||||
--- from Operations
|
||||
combinations :: [[a]] -> [[a]]
|
||||
combinations t = case t of
|
||||
[] -> [[]]
|
||||
aa:uu -> [a:u | a <- aa, u <- combinations uu]
|
||||
|
||||
isLiteralCat :: CId -> Bool
|
||||
isLiteralCat = (`elem` [cidString, cidFloat, cidInt, cidVar])
|
||||
|
||||
cidString = mkCId "String"
|
||||
cidInt = mkCId "Int"
|
||||
cidFloat = mkCId "Float"
|
||||
cidVar = mkCId "__gfVar"
|
||||
26
src/runtime/haskell/PGF/Morphology.hs
Normal file
26
src/runtime/haskell/PGF/Morphology.hs
Normal file
@@ -0,0 +1,26 @@
|
||||
module PGF.Morphology(Lemma,Analysis,Morpho,
|
||||
buildMorpho,
|
||||
lookupMorpho,fullFormLexicon) where
|
||||
|
||||
import PGF.ShowLinearize (collectWords)
|
||||
import PGF.Data
|
||||
import PGF.CId
|
||||
|
||||
import qualified Data.Map as Map
|
||||
import Data.List (intersperse)
|
||||
|
||||
-- these 4 definitions depend on the datastructure used
|
||||
|
||||
type Lemma = CId
|
||||
type Analysis = String
|
||||
|
||||
newtype Morpho = Morpho (Map.Map String [(Lemma,Analysis)])
|
||||
|
||||
buildMorpho :: PGF -> Language -> Morpho
|
||||
buildMorpho pgf lang = Morpho (Map.fromListWith (++) (collectWords pgf lang))
|
||||
|
||||
lookupMorpho :: Morpho -> String -> [(Lemma,Analysis)]
|
||||
lookupMorpho (Morpho mo) s = maybe [] id $ Map.lookup s mo
|
||||
|
||||
fullFormLexicon :: Morpho -> [(String,[(Lemma,Analysis)])]
|
||||
fullFormLexicon (Morpho mo) = Map.toList mo
|
||||
119
src/runtime/haskell/PGF/PMCFG.hs
Normal file
119
src/runtime/haskell/PGF/PMCFG.hs
Normal file
@@ -0,0 +1,119 @@
|
||||
module PGF.PMCFG where
|
||||
|
||||
import PGF.CId
|
||||
import PGF.Expr
|
||||
|
||||
import qualified Data.Map as Map
|
||||
import qualified Data.Set as Set
|
||||
import qualified Data.IntMap as IntMap
|
||||
import Data.Array.IArray
|
||||
import Data.Array.Unboxed
|
||||
import Text.PrettyPrint
|
||||
|
||||
type FCat = Int
|
||||
type FIndex = Int
|
||||
type FPointPos = Int
|
||||
data FSymbol
|
||||
= FSymCat {-# UNPACK #-} !Int {-# UNPACK #-} !FIndex
|
||||
| FSymLit {-# UNPACK #-} !Int {-# UNPACK #-} !FIndex
|
||||
| FSymKS [String]
|
||||
| FSymKP [String] [Alternative]
|
||||
deriving (Eq,Ord,Show)
|
||||
type Profile = [Int]
|
||||
data Production
|
||||
= FApply {-# UNPACK #-} !FunId [FCat]
|
||||
| FCoerce {-# UNPACK #-} !FCat
|
||||
| FConst Expr [String]
|
||||
deriving (Eq,Ord,Show)
|
||||
data FFun = FFun CId [Profile] {-# UNPACK #-} !(UArray FIndex SeqId) deriving (Eq,Ord,Show)
|
||||
type FSeq = Array FPointPos FSymbol
|
||||
type FunId = Int
|
||||
type SeqId = Int
|
||||
|
||||
data Alternative =
|
||||
Alt [String] [String]
|
||||
deriving (Eq,Ord,Show)
|
||||
|
||||
data ParserInfo
|
||||
= ParserInfo { functions :: Array FunId FFun
|
||||
, sequences :: Array SeqId FSeq
|
||||
, productions0:: IntMap.IntMap (Set.Set Production) -- this are the original productions as they are loaded from the PGF file
|
||||
, productions :: IntMap.IntMap (Set.Set Production) -- this are the productions after the filtering for useless productions
|
||||
, startCats :: Map.Map CId [FCat]
|
||||
, totalCats :: {-# UNPACK #-} !FCat
|
||||
}
|
||||
|
||||
|
||||
fcatString, fcatInt, fcatFloat, fcatVar :: Int
|
||||
fcatString = (-1)
|
||||
fcatInt = (-2)
|
||||
fcatFloat = (-3)
|
||||
fcatVar = (-4)
|
||||
|
||||
isLiteralFCat :: FCat -> Bool
|
||||
isLiteralFCat = (`elem` [fcatString, fcatInt, fcatFloat, fcatVar])
|
||||
|
||||
ppPMCFG :: ParserInfo -> Doc
|
||||
ppPMCFG pinfo =
|
||||
text "productions" $$
|
||||
nest 2 (vcat [ppProduction (fcat,prod) | (fcat,set) <- IntMap.toList (productions pinfo), prod <- Set.toList set]) $$
|
||||
text "functions" $$
|
||||
nest 2 (vcat (map ppFun (assocs (functions pinfo)))) $$
|
||||
text "sequences" $$
|
||||
nest 2 (vcat (map ppSeq (assocs (sequences pinfo)))) $$
|
||||
text "startcats" $$
|
||||
nest 2 (vcat (map ppStartCat (Map.toList (startCats pinfo))))
|
||||
|
||||
ppProduction (fcat,FApply funid args) =
|
||||
ppFCat fcat <+> text "->" <+> ppFunId funid <> brackets (hcat (punctuate comma (map ppFCat args)))
|
||||
ppProduction (fcat,FCoerce arg) =
|
||||
ppFCat fcat <+> text "->" <+> char '_' <> brackets (ppFCat arg)
|
||||
ppProduction (fcat,FConst _ ss) =
|
||||
ppFCat fcat <+> text "->" <+> ppStrs ss
|
||||
|
||||
ppFun (funid,FFun fun _ arr) =
|
||||
ppFunId funid <+> text ":=" <+> parens (hcat (punctuate comma (map ppSeqId (elems arr)))) <+> brackets (ppCId fun)
|
||||
|
||||
ppSeq (seqid,seq) =
|
||||
ppSeqId seqid <+> text ":=" <+> hsep (map ppSymbol (elems seq))
|
||||
|
||||
ppStartCat (id,fcats) =
|
||||
ppCId id <+> text ":=" <+> brackets (hcat (punctuate comma (map ppFCat fcats)))
|
||||
|
||||
ppSymbol (FSymCat d r) = char '<' <> int d <> comma <> int r <> char '>'
|
||||
ppSymbol (FSymLit d r) = char '<' <> int d <> comma <> int r <> char '>'
|
||||
ppSymbol (FSymKS ts) = ppStrs ts
|
||||
ppSymbol (FSymKP ts alts) = text "pre" <+> braces (hsep (punctuate semi (ppStrs ts : map ppAlt alts)))
|
||||
|
||||
ppAlt (Alt ts ps) = ppStrs ts <+> char '/' <+> hsep (map (doubleQuotes . text) ps)
|
||||
|
||||
ppStrs ss = doubleQuotes (hsep (map text ss))
|
||||
|
||||
ppFCat fcat
|
||||
| fcat == fcatString = text "CString"
|
||||
| fcat == fcatInt = text "CInt"
|
||||
| fcat == fcatFloat = text "CFloat"
|
||||
| fcat == fcatVar = text "CVar"
|
||||
| otherwise = char 'C' <> int fcat
|
||||
|
||||
ppFunId funid = char 'F' <> int funid
|
||||
ppSeqId seqid = char 'S' <> int seqid
|
||||
|
||||
|
||||
filterProductions = closure
|
||||
where
|
||||
closure prods0
|
||||
| IntMap.size prods == IntMap.size prods0 = prods
|
||||
| otherwise = closure prods
|
||||
where
|
||||
prods = IntMap.mapMaybe (filterProdSet prods0) prods0
|
||||
|
||||
filterProdSet prods set0
|
||||
| Set.null set = Nothing
|
||||
| otherwise = Just set
|
||||
where
|
||||
set = Set.filter (filterRule prods) set0
|
||||
|
||||
filterRule prods (FApply funid args) = all (\fcat -> isLiteralFCat fcat || IntMap.member fcat prods) args
|
||||
filterRule prods (FCoerce fcat) = isLiteralFCat fcat || IntMap.member fcat prods
|
||||
filterRule prods _ = True
|
||||
112
src/runtime/haskell/PGF/Paraphrase.hs
Normal file
112
src/runtime/haskell/PGF/Paraphrase.hs
Normal file
@@ -0,0 +1,112 @@
|
||||
----------------------------------------------------------------------
|
||||
-- |
|
||||
-- Module : Paraphrase
|
||||
-- Maintainer : AR
|
||||
-- Stability : (stable)
|
||||
-- Portability : (portable)
|
||||
--
|
||||
-- Generate parapharases with def definitions.
|
||||
-----------------------------------------------------------------------------
|
||||
|
||||
module PGF.Paraphrase (
|
||||
paraphrase,
|
||||
paraphraseN
|
||||
) where
|
||||
|
||||
import PGF.Data
|
||||
import PGF.Tree
|
||||
import PGF.Macros (lookDef,isData)
|
||||
import PGF.CId
|
||||
|
||||
import Data.List (nub,sort,group)
|
||||
import qualified Data.Map as Map
|
||||
|
||||
import Debug.Trace ----
|
||||
|
||||
paraphrase :: PGF -> Expr -> [Expr]
|
||||
paraphrase pgf = nub . paraphraseN 2 pgf
|
||||
|
||||
paraphraseN :: Int -> PGF -> Expr -> [Expr]
|
||||
paraphraseN i pgf = map tree2expr . paraphraseN' i pgf . expr2tree
|
||||
|
||||
paraphraseN' :: Int -> PGF -> Tree -> [Tree]
|
||||
paraphraseN' 0 _ t = [t]
|
||||
paraphraseN' i pgf t =
|
||||
step i t ++ [Fun g ts' | Fun g ts <- step (i-1) t, ts' <- sequence (map par ts)]
|
||||
where
|
||||
par = paraphraseN' (i-1) pgf
|
||||
step 0 t = [t]
|
||||
step i t = let stept = step (i-1) t in stept ++ concat [def u | u <- stept]
|
||||
def = fromDef pgf
|
||||
|
||||
fromDef :: PGF -> Tree -> [Tree]
|
||||
fromDef pgf t@(Fun f ts) = defDown t ++ defUp t where
|
||||
defDown t = [subst g u | let equ = equsFrom f, (u,g) <- match equ ts, trequ "U" f equ]
|
||||
defUp t = [subst g u | equ <- equsTo f, (u,g) <- match [equ] ts, trequ "D" f equ]
|
||||
|
||||
equsFrom f = [(ps,d) | Just equs <- [lookup f equss], (Fun _ ps,d) <- equs]
|
||||
|
||||
equsTo f = [c | (_,equs) <- equss, c <- casesTo f equs]
|
||||
|
||||
casesTo f equs =
|
||||
[(ps,p) | (p,d@(Fun g ps)) <- equs, g==f,
|
||||
isClosed d || (length equs == 1 && isLinear d)]
|
||||
|
||||
equss = [(f,[(Fun f (map patt2tree ps), expr2tree d) | (Equ ps d) <- eqs]) |
|
||||
(f,(_,_,eqs)) <- Map.assocs (funs (abstract pgf)), not (null eqs)]
|
||||
|
||||
trequ s f e = True ----trace (s ++ ": " ++ show f ++ " " ++ show e) True
|
||||
|
||||
subst :: Subst -> Tree -> Tree
|
||||
subst g e = case e of
|
||||
Fun f ts -> Fun f (map substg ts)
|
||||
Var x -> maybe e id $ lookup x g
|
||||
_ -> e
|
||||
where
|
||||
substg = subst g
|
||||
|
||||
type Subst = [(CId,Tree)]
|
||||
|
||||
-- this applies to pattern, hence don't need to consider abstractions
|
||||
isClosed :: Tree -> Bool
|
||||
isClosed t = case t of
|
||||
Fun _ ts -> all isClosed ts
|
||||
Var _ -> False
|
||||
_ -> True
|
||||
|
||||
-- this applies to pattern, hence don't need to consider abstractions
|
||||
isLinear :: Tree -> Bool
|
||||
isLinear = nodup . vars where
|
||||
vars t = case t of
|
||||
Fun _ ts -> concatMap vars ts
|
||||
Var x -> [x]
|
||||
_ -> []
|
||||
nodup = all ((<2) . length) . group . sort
|
||||
|
||||
|
||||
match :: [([Tree],Tree)] -> [Tree] -> [(Tree, Subst)]
|
||||
match cases terms = case cases of
|
||||
[] -> []
|
||||
(patts,_):_ | length patts /= length terms -> []
|
||||
(patts,val):cc -> case mapM tryMatch (zip patts terms) of
|
||||
Just substs -> return (val, concat substs)
|
||||
_ -> match cc terms
|
||||
where
|
||||
tryMatch (p,t) = case (p, t) of
|
||||
(Var x, _) | notMeta t -> return [(x,t)]
|
||||
(Fun p pp, Fun f tt) | p == f && length pp == length tt -> do
|
||||
matches <- mapM tryMatch (zip pp tt)
|
||||
return (concat matches)
|
||||
_ -> if p==t then return [] else Nothing
|
||||
|
||||
notMeta e = case e of
|
||||
Meta _ -> False
|
||||
Fun f ts -> all notMeta ts
|
||||
_ -> True
|
||||
|
||||
-- | Converts a pattern to tree.
|
||||
patt2tree :: Patt -> Tree
|
||||
patt2tree (PApp f ps) = Fun f (map patt2tree ps)
|
||||
patt2tree (PLit l) = Lit l
|
||||
patt2tree (PVar x) = Var x
|
||||
patt2tree PWild = Meta 0
|
||||
205
src/runtime/haskell/PGF/Parsing/FCFG/Active.hs
Normal file
205
src/runtime/haskell/PGF/Parsing/FCFG/Active.hs
Normal file
@@ -0,0 +1,205 @@
|
||||
----------------------------------------------------------------------
|
||||
-- |
|
||||
-- Maintainer : Krasimir Angelov
|
||||
-- Stability : (stable)
|
||||
-- Portability : (portable)
|
||||
--
|
||||
-- MCFG parsing, the active algorithm
|
||||
-----------------------------------------------------------------------------
|
||||
|
||||
module PGF.Parsing.FCFG.Active (parse) where
|
||||
|
||||
import GF.Data.Assoc
|
||||
import GF.Data.SortedList
|
||||
import GF.Data.Utilities
|
||||
import qualified GF.Data.MultiMap as MM
|
||||
|
||||
import PGF.CId
|
||||
import PGF.Data
|
||||
import PGF.Tree
|
||||
import PGF.Parsing.FCFG.Utilities
|
||||
import PGF.BuildParser
|
||||
|
||||
import Control.Monad (guard)
|
||||
|
||||
import qualified Data.List as List
|
||||
import qualified Data.Map as Map
|
||||
import qualified Data.IntMap as IntMap
|
||||
import qualified Data.Set as Set
|
||||
import Data.Array.IArray
|
||||
import Debug.Trace
|
||||
|
||||
----------------------------------------------------------------------
|
||||
-- * parsing
|
||||
|
||||
type FToken = String
|
||||
|
||||
makeFinalEdge cat 0 0 = (cat, [EmptyRange])
|
||||
makeFinalEdge cat i j = (cat, [makeRange i j])
|
||||
|
||||
-- | the list of categories = possible starting categories
|
||||
parse :: String -> ParserInfo -> Type -> [FToken] -> [Expr]
|
||||
parse strategy pinfo (DTyp _ start _) toks = map (tree2expr) . nubsort $ filteredForests >>= forest2trees
|
||||
where
|
||||
inTokens = input toks
|
||||
starts = Map.findWithDefault [] start (startCats pinfo)
|
||||
schart = xchart2syntaxchart chart pinfo
|
||||
(i,j) = inputBounds inTokens
|
||||
finalEdges = [makeFinalEdge cat i j | cat <- starts]
|
||||
forests = chart2forests schart (const False) finalEdges
|
||||
filteredForests = forests >>= applyProfileToForest
|
||||
|
||||
pinfoex = buildParserInfo pinfo
|
||||
|
||||
chart = process strategy pinfo pinfoex inTokens axioms emptyXChart
|
||||
axioms | isBU strategy = literals pinfoex inTokens ++ initialBU pinfo pinfoex inTokens
|
||||
| isTD strategy = literals pinfoex inTokens ++ initialTD pinfo starts inTokens
|
||||
|
||||
isBU s = s=="b"
|
||||
isTD s = s=="t"
|
||||
|
||||
-- used in prediction
|
||||
emptyChildren :: FunId -> [FCat] -> SyntaxNode FunId RangeRec
|
||||
emptyChildren ruleid args = SNode ruleid (replicate (length args) [])
|
||||
|
||||
|
||||
process :: String -> ParserInfo -> ParserInfoEx -> Input FToken -> [Item] -> XChart FCat -> XChart FCat
|
||||
process strategy pinfo pinfoex toks [] chart = chart
|
||||
process strategy pinfo pinfoex toks (item:items) chart = process strategy pinfo pinfoex toks items $! univRule item chart
|
||||
where
|
||||
univRule item@(Active found rng lbl ppos node@(SNode ruleid recs) args cat) chart
|
||||
| inRange (bounds lin) ppos =
|
||||
case lin ! ppos of
|
||||
FSymCat d r -> let c = args !! d
|
||||
in case recs !! d of
|
||||
[] -> case insertXChart chart item c of
|
||||
Nothing -> chart
|
||||
Just chart -> let items = do item@(Final found' _ _ _) <- lookupXChartFinal chart c
|
||||
rng <- concatRange rng (found' !! r)
|
||||
return (Active found rng lbl (ppos+1) (SNode ruleid (updateNth (const found') d recs)) args cat)
|
||||
++
|
||||
do guard (isTD strategy)
|
||||
(ruleid,args) <- topdownRules pinfo c
|
||||
return (Active [] EmptyRange 0 0 (emptyChildren ruleid args) args c)
|
||||
in process strategy pinfo pinfoex toks items chart
|
||||
found' -> let items = do rng <- concatRange rng (found' !! r)
|
||||
return (Active found rng lbl (ppos+1) node args cat)
|
||||
in process strategy pinfo pinfoex toks items chart
|
||||
FSymKS [tok]
|
||||
-> let items = do t_rng <- inputToken toks ? tok
|
||||
rng' <- concatRange rng t_rng
|
||||
return (Active found rng' lbl (ppos+1) node args cat)
|
||||
in process strategy pinfo pinfoex toks items chart
|
||||
| otherwise =
|
||||
if inRange (bounds lins) (lbl+1)
|
||||
then univRule (Active (rng:found) EmptyRange (lbl+1) 0 node args cat) chart
|
||||
else univRule (Final (reverse (rng:found)) node args cat) chart
|
||||
where
|
||||
(FFun _ _ lins) = functions pinfo ! ruleid
|
||||
lin = sequences pinfo ! (lins ! lbl)
|
||||
univRule item@(Final found' node args cat) chart =
|
||||
case insertXChart chart item cat of
|
||||
Nothing -> chart
|
||||
Just chart -> let items = do (Active found rng l ppos node@(SNode ruleid _) args c) <- lookupXChartAct chart cat
|
||||
let FFun _ _ lins = functions pinfo ! ruleid
|
||||
FSymCat d r = (sequences pinfo ! (lins ! l)) ! ppos
|
||||
rng <- concatRange rng (found' !! r)
|
||||
return (Active found rng l (ppos+1) (updateChildren node d found') args c)
|
||||
++
|
||||
do guard (isBU strategy)
|
||||
(ruleid,args,c) <- leftcornerCats pinfoex ? cat
|
||||
let FFun _ _ lins = functions pinfo ! ruleid
|
||||
FSymCat d r = (sequences pinfo ! (lins ! 0)) ! 0
|
||||
return (Active [] (found' !! r) 0 1 (updateChildren (emptyChildren ruleid args) d found') args c)
|
||||
|
||||
updateChildren :: SyntaxNode FunId RangeRec -> Int -> RangeRec -> SyntaxNode FunId RangeRec
|
||||
updateChildren (SNode ruleid recs) i rec = SNode ruleid $! updateNth (const rec) i recs
|
||||
in process strategy pinfo pinfoex toks items chart
|
||||
|
||||
----------------------------------------------------------------------
|
||||
-- * XChart
|
||||
|
||||
data Item
|
||||
= Active RangeRec
|
||||
Range
|
||||
{-# UNPACK #-} !FIndex
|
||||
{-# UNPACK #-} !FPointPos
|
||||
(SyntaxNode FunId RangeRec)
|
||||
[FCat]
|
||||
FCat
|
||||
| Final RangeRec (SyntaxNode FunId RangeRec) [FCat] FCat
|
||||
deriving (Eq, Ord, Show)
|
||||
|
||||
data XChart c = XChart !(MM.MultiMap c Item) !(MM.MultiMap c Item)
|
||||
|
||||
emptyXChart :: Ord c => XChart c
|
||||
emptyXChart = XChart MM.empty MM.empty
|
||||
|
||||
insertXChart (XChart actives finals) item@(Active _ _ _ _ _ _ _) c =
|
||||
case MM.insert' c item actives of
|
||||
Nothing -> Nothing
|
||||
Just actives -> Just (XChart actives finals)
|
||||
|
||||
insertXChart (XChart actives finals) item@(Final _ _ _ _) c =
|
||||
case MM.insert' c item finals of
|
||||
Nothing -> Nothing
|
||||
Just finals -> Just (XChart actives finals)
|
||||
|
||||
lookupXChartAct (XChart actives finals) c = actives MM.! c
|
||||
lookupXChartFinal (XChart actives finals) c = finals MM.! c
|
||||
|
||||
xchart2syntaxchart :: XChart FCat -> ParserInfo -> SyntaxChart (CId,[Profile]) (FCat,RangeRec)
|
||||
xchart2syntaxchart (XChart actives finals) pinfo =
|
||||
accumAssoc groupSyntaxNodes $
|
||||
[ case node of
|
||||
SNode ruleid rrecs -> let FFun fun prof _ = functions pinfo ! ruleid
|
||||
in ((cat,found), SNode (fun,prof) (zip rhs rrecs))
|
||||
SString s -> ((cat,found), SString s)
|
||||
SInt n -> ((cat,found), SInt n)
|
||||
SFloat f -> ((cat,found), SFloat f)
|
||||
| (Final found node rhs cat) <- MM.elems finals
|
||||
]
|
||||
|
||||
literals :: ParserInfoEx -> Input FToken -> [Item]
|
||||
literals pinfoex toks =
|
||||
[let (c,node) = lexer t in (Final [rng] node [] c) | (t,rngs) <- aAssocs (inputToken toks), rng <- rngs, not (t `elem` grammarToks pinfoex)]
|
||||
where
|
||||
lexer t =
|
||||
case reads t of
|
||||
[(n,"")] -> (fcatInt, SInt (n::Integer))
|
||||
_ -> case reads t of
|
||||
[(f,"")] -> (fcatFloat, SFloat (f::Double))
|
||||
_ -> (fcatString,SString t)
|
||||
|
||||
|
||||
----------------------------------------------------------------------
|
||||
-- Earley --
|
||||
|
||||
-- called with all starting categories
|
||||
initialTD :: ParserInfo -> [FCat] -> Input FToken -> [Item]
|
||||
initialTD pinfo starts toks =
|
||||
do cat <- starts
|
||||
(ruleid,args) <- topdownRules pinfo cat
|
||||
return (Active [] (Range 0 0) 0 0 (emptyChildren ruleid args) args cat)
|
||||
|
||||
topdownRules pinfo cat = f cat []
|
||||
where
|
||||
f cat rules = maybe rules (Set.fold g rules) (IntMap.lookup cat (productions pinfo))
|
||||
|
||||
g (FApply ruleid args) rules = (ruleid,args) : rules
|
||||
g (FCoerce cat) rules = f cat rules
|
||||
|
||||
|
||||
----------------------------------------------------------------------
|
||||
-- Kilbury --
|
||||
|
||||
initialBU :: ParserInfo -> ParserInfoEx -> Input FToken -> [Item]
|
||||
initialBU pinfo pinfoex toks =
|
||||
do (tok,rngs) <- aAssocs (inputToken toks)
|
||||
(ruleid,args,cat) <- leftcornerTokens pinfoex ? tok
|
||||
rng <- rngs
|
||||
return (Active [] rng 0 1 (emptyChildren ruleid args) args cat)
|
||||
++
|
||||
do (ruleid,args,cat) <- epsilonRules pinfoex
|
||||
let FFun _ _ _ = functions pinfo ! ruleid
|
||||
return (Active [] EmptyRange 0 0 (emptyChildren ruleid args) args cat)
|
||||
371
src/runtime/haskell/PGF/Parsing/FCFG/Incremental.hs
Normal file
371
src/runtime/haskell/PGF/Parsing/FCFG/Incremental.hs
Normal file
@@ -0,0 +1,371 @@
|
||||
{-# LANGUAGE BangPatterns #-}
|
||||
module PGF.Parsing.FCFG.Incremental
|
||||
( ParseState
|
||||
, ErrorState
|
||||
, initState
|
||||
, nextState
|
||||
, getCompletions
|
||||
, recoveryStates
|
||||
, extractTrees
|
||||
, parse
|
||||
, parseWithRecovery
|
||||
) where
|
||||
|
||||
import Data.Array.IArray
|
||||
import Data.Array.Base (unsafeAt)
|
||||
import Data.List (isPrefixOf, foldl')
|
||||
import Data.Maybe (fromMaybe, maybe)
|
||||
import qualified Data.Map as Map
|
||||
import qualified GF.Data.TrieMap as TMap
|
||||
import qualified Data.IntMap as IntMap
|
||||
import qualified Data.Set as Set
|
||||
import Control.Monad
|
||||
|
||||
import GF.Data.SortedList
|
||||
import PGF.CId
|
||||
import PGF.Data
|
||||
import PGF.Expr(Tree)
|
||||
import PGF.Macros
|
||||
import PGF.TypeCheck
|
||||
import Debug.Trace
|
||||
|
||||
parse :: PGF -> Language -> Type -> [String] -> [Tree]
|
||||
parse pgf lang typ toks = loop (initState pgf lang typ) toks
|
||||
where
|
||||
loop ps [] = extractTrees ps typ
|
||||
loop ps (t:ts) = case nextState ps t of
|
||||
Left es -> []
|
||||
Right ps -> loop ps ts
|
||||
|
||||
parseWithRecovery :: PGF -> Language -> Type -> [Type] -> [String] -> [Tree]
|
||||
parseWithRecovery pgf lang typ open_typs toks = accept (initState pgf lang typ) toks
|
||||
where
|
||||
accept ps [] = extractTrees ps typ
|
||||
accept ps (t:ts) =
|
||||
case nextState ps t of
|
||||
Right ps -> accept ps ts
|
||||
Left es -> skip (recoveryStates open_typs es) ts
|
||||
|
||||
skip ps_map [] = extractTrees (fst ps_map) typ
|
||||
skip ps_map (t:ts) =
|
||||
case Map.lookup t (snd ps_map) of
|
||||
Just ps -> accept ps ts
|
||||
Nothing -> skip ps_map ts
|
||||
|
||||
-- | Creates an initial parsing state for a given language and
|
||||
-- startup category.
|
||||
initState :: PGF -> Language -> Type -> ParseState
|
||||
initState pgf lang (DTyp _ start _) =
|
||||
let items = do
|
||||
cat <- fromMaybe [] (Map.lookup start (startCats pinfo))
|
||||
(funid,args) <- foldForest (\funid args -> (:) (funid,args)) (\_ _ args -> args)
|
||||
[] cat (productions pinfo)
|
||||
let FFun fn _ lins = functions pinfo ! funid
|
||||
(lbl,seqid) <- assocs lins
|
||||
return (Active 0 0 funid seqid args (AK cat lbl))
|
||||
|
||||
pinfo =
|
||||
case lookParser pgf lang of
|
||||
Just pinfo -> pinfo
|
||||
_ -> error ("Unknown language: " ++ showCId lang)
|
||||
|
||||
in PState pgf
|
||||
pinfo
|
||||
(Chart emptyAC [] emptyPC (productions pinfo) (totalCats pinfo) 0)
|
||||
(TMap.singleton [] (Set.fromList items))
|
||||
|
||||
-- | From the current state and the next token
|
||||
-- 'nextState' computes a new state, where the token
|
||||
-- is consumed and the current position is shifted by one.
|
||||
-- If the new token cannot be accepted then an error state
|
||||
-- is returned.
|
||||
nextState :: ParseState -> String -> Either ErrorState ParseState
|
||||
nextState (PState pgf pinfo chart items) t =
|
||||
let (mb_agenda,map_items) = TMap.decompose items
|
||||
agenda = maybe [] Set.toList mb_agenda
|
||||
acc = fromMaybe TMap.empty (Map.lookup t map_items)
|
||||
(acc1,chart1) = process (Just t) add (sequences pinfo) (functions pinfo) agenda acc chart
|
||||
chart2 = chart1{ active =emptyAC
|
||||
, actives=active chart1 : actives chart1
|
||||
, passive=emptyPC
|
||||
, offset =offset chart1+1
|
||||
}
|
||||
in if TMap.null acc1
|
||||
then Left (EState pgf pinfo chart2)
|
||||
else Right (PState pgf pinfo chart2 acc1)
|
||||
where
|
||||
add (tok:toks) item acc
|
||||
| tok == t = TMap.insertWith Set.union toks (Set.singleton item) acc
|
||||
add _ item acc = acc
|
||||
|
||||
-- | If the next token is not known but only its prefix (possible empty prefix)
|
||||
-- then the 'getCompletions' function can be used to calculate the possible
|
||||
-- next words and the consequent states. This is used for word completions in
|
||||
-- the GF interpreter.
|
||||
getCompletions :: ParseState -> String -> Map.Map String ParseState
|
||||
getCompletions (PState pgf pinfo chart items) w =
|
||||
let (mb_agenda,map_items) = TMap.decompose items
|
||||
agenda = maybe [] Set.toList mb_agenda
|
||||
acc = Map.filterWithKey (\tok _ -> isPrefixOf w tok) map_items
|
||||
(acc',chart1) = process Nothing add (sequences pinfo) (functions pinfo) agenda acc chart
|
||||
chart2 = chart1{ active =emptyAC
|
||||
, actives=active chart1 : actives chart1
|
||||
, passive=emptyPC
|
||||
, offset =offset chart1+1
|
||||
}
|
||||
in fmap (PState pgf pinfo chart2) acc'
|
||||
where
|
||||
add (tok:toks) item acc
|
||||
| isPrefixOf w tok = Map.insertWith (TMap.unionWith Set.union) tok (TMap.singleton toks (Set.singleton item)) acc
|
||||
add _ item acc = acc
|
||||
|
||||
recoveryStates :: [Type] -> ErrorState -> (ParseState, Map.Map String ParseState)
|
||||
recoveryStates open_types (EState pgf pinfo chart) =
|
||||
let open_fcats = concatMap type2fcats open_types
|
||||
agenda = foldl (complete open_fcats) [] (actives chart)
|
||||
(acc,chart1) = process Nothing add (sequences pinfo) (functions pinfo) agenda Map.empty chart
|
||||
chart2 = chart1{ active =emptyAC
|
||||
, actives=active chart1 : actives chart1
|
||||
, passive=emptyPC
|
||||
, offset =offset chart1+1
|
||||
}
|
||||
in (PState pgf pinfo chart (TMap.singleton [] (Set.fromList agenda)), fmap (PState pgf pinfo chart2) acc)
|
||||
where
|
||||
type2fcats (DTyp _ cat _) = fromMaybe [] (Map.lookup cat (startCats pinfo))
|
||||
|
||||
complete open_fcats items ac =
|
||||
foldl (Set.fold (\(Active j' ppos funid seqid args keyc) ->
|
||||
(:) (Active j' (ppos+1) funid seqid args keyc)))
|
||||
items
|
||||
[set | fcat <- open_fcats, set <- lookupACByFCat fcat ac]
|
||||
|
||||
add (tok:toks) item acc = Map.insertWith (TMap.unionWith Set.union) tok (TMap.singleton toks (Set.singleton item)) acc
|
||||
|
||||
-- | This function extracts the list of all completed parse trees
|
||||
-- that spans the whole input consumed so far. The trees are also
|
||||
-- limited by the category specified, which is usually
|
||||
-- the same as the startup category.
|
||||
extractTrees :: ParseState -> Type -> [Tree]
|
||||
extractTrees (PState pgf pinfo chart items) ty@(DTyp _ start _) =
|
||||
nubsort [e1 | e <- exps, Right e1 <- [checkExpr pgf e ty]]
|
||||
where
|
||||
(mb_agenda,acc) = TMap.decompose items
|
||||
agenda = maybe [] Set.toList mb_agenda
|
||||
(_,st) = process Nothing (\_ _ -> id) (sequences pinfo) (functions pinfo) agenda () chart
|
||||
|
||||
exps = do
|
||||
cat <- fromMaybe [] (Map.lookup start (startCats pinfo))
|
||||
(funid,args) <- foldForest (\funid args -> (:) (funid,args)) (\_ _ args -> args)
|
||||
[] cat (productions pinfo)
|
||||
let FFun fn _ lins = functions pinfo ! funid
|
||||
lbl <- indices lins
|
||||
Just fid <- [lookupPC (PK cat lbl 0) (passive st)]
|
||||
(fvs,tree) <- go Set.empty 0 (0,fid)
|
||||
guard (Set.null fvs)
|
||||
return tree
|
||||
|
||||
go rec fcat' (d,fcat)
|
||||
| fcat < totalCats pinfo = return (Set.empty,EMeta (fcat'*10+d)) -- FIXME: here we assume that every rule has at most 10 arguments
|
||||
| Set.member fcat rec = mzero
|
||||
| otherwise = foldForest (\funid args trees ->
|
||||
do let FFun fn _ lins = functions pinfo ! funid
|
||||
args <- mapM (go (Set.insert fcat rec) fcat) (zip [0..] args)
|
||||
check_ho_fun fn args
|
||||
`mplus`
|
||||
trees)
|
||||
(\const _ trees ->
|
||||
return (freeVar const,const)
|
||||
`mplus`
|
||||
trees)
|
||||
[] fcat (forest st)
|
||||
|
||||
check_ho_fun fun args
|
||||
| fun == _V = return (head args)
|
||||
| fun == _B = return (foldl1 Set.difference (map fst args), foldr (\x e -> EAbs Explicit (mkVar (snd x)) e) (snd (head args)) (tail args))
|
||||
| otherwise = return (Set.unions (map fst args),foldl (\e x -> EApp e (snd x)) (EFun fun) args)
|
||||
|
||||
mkVar (EFun v) = v
|
||||
mkVar (EMeta _) = wildCId
|
||||
|
||||
freeVar (EFun v) = Set.singleton v
|
||||
freeVar _ = Set.empty
|
||||
|
||||
_B = mkCId "_B"
|
||||
_V = mkCId "_V"
|
||||
|
||||
process mbt fn !seqs !funs [] acc chart = (acc,chart)
|
||||
process mbt fn !seqs !funs (item@(Active j ppos funid seqid args key0):items) acc chart
|
||||
| inRange (bounds lin) ppos =
|
||||
case unsafeAt lin ppos of
|
||||
FSymCat d r -> let !fid = args !! d
|
||||
key = AK fid r
|
||||
|
||||
items2 = case lookupPC (mkPK key k) (passive chart) of
|
||||
Nothing -> items
|
||||
Just id -> (Active j (ppos+1) funid seqid (updateAt d id args) key0) : items
|
||||
items3 = foldForest (\funid args items -> Active k 0 funid (rhs funid r) args key : items)
|
||||
(\_ _ items -> items)
|
||||
items2 fid (forest chart)
|
||||
in case lookupAC key (active chart) of
|
||||
Nothing -> process mbt fn seqs funs items3 acc chart{active=insertAC key (Set.singleton item) (active chart)}
|
||||
Just set | Set.member item set -> process mbt fn seqs funs items acc chart
|
||||
| otherwise -> process mbt fn seqs funs items2 acc chart{active=insertAC key (Set.insert item set) (active chart)}
|
||||
FSymKS toks -> let !acc' = fn toks (Active j (ppos+1) funid seqid args key0) acc
|
||||
in process mbt fn seqs funs items acc' chart
|
||||
FSymKP strs vars
|
||||
-> let !acc' = foldl (\acc toks -> fn toks (Active j (ppos+1) funid seqid args key0) acc) acc
|
||||
(strs:[strs' | Alt strs' _ <- vars])
|
||||
in process mbt fn seqs funs items acc' chart
|
||||
FSymLit d r -> let !fid = args !! d
|
||||
in case [ts | FConst _ ts <- maybe [] Set.toList (IntMap.lookup fid (forest chart))] of
|
||||
(toks:_) -> let !acc' = fn toks (Active j (ppos+1) funid seqid args key0) acc
|
||||
in process mbt fn seqs funs items acc' chart
|
||||
[] -> case litCatMatch fid mbt of
|
||||
Just (toks,lit) -> let fid' = nextId chart
|
||||
!acc' = fn toks (Active j (ppos+1) funid seqid (updateAt d fid' args) key0) acc
|
||||
in process mbt fn seqs funs items acc' chart{forest=IntMap.insert fid' (Set.singleton (FConst lit toks)) (forest chart)
|
||||
,nextId=nextId chart+1
|
||||
}
|
||||
Nothing -> process mbt fn seqs funs items acc chart
|
||||
| otherwise =
|
||||
case lookupPC (mkPK key0 j) (passive chart) of
|
||||
Nothing -> let fid = nextId chart
|
||||
|
||||
items2 = case lookupAC key0 ((active chart:actives chart) !! (k-j)) of
|
||||
Nothing -> items
|
||||
Just set -> Set.fold (\(Active j' ppos funid seqid args keyc) ->
|
||||
let FSymCat d _ = unsafeAt (unsafeAt seqs seqid) ppos
|
||||
in (:) (Active j' (ppos+1) funid seqid (updateAt d fid args) keyc)) items set
|
||||
in process mbt fn seqs funs items2 acc chart{passive=insertPC (mkPK key0 j) fid (passive chart)
|
||||
,forest =IntMap.insert fid (Set.singleton (FApply funid args)) (forest chart)
|
||||
,nextId =nextId chart+1
|
||||
}
|
||||
Just id -> let items2 = [Active k 0 funid (rhs funid r) args (AK id r) | r <- labelsAC id (active chart)] ++ items
|
||||
in process mbt fn seqs funs items2 acc chart{forest = IntMap.insertWith Set.union id (Set.singleton (FApply funid args)) (forest chart)}
|
||||
where
|
||||
!lin = unsafeAt seqs seqid
|
||||
!k = offset chart
|
||||
|
||||
mkPK (AK fid lbl) j = PK fid lbl j
|
||||
|
||||
rhs funid lbl = unsafeAt lins lbl
|
||||
where
|
||||
FFun _ _ lins = unsafeAt funs funid
|
||||
|
||||
|
||||
updateAt :: Int -> a -> [a] -> [a]
|
||||
updateAt nr x xs = [if i == nr then x else y | (i,y) <- zip [0..] xs]
|
||||
|
||||
litCatMatch fcat (Just t)
|
||||
| fcat == fcatString = Just ([t],ELit (LStr t))
|
||||
| fcat == fcatInt = case reads t of {[(n,"")] -> Just ([t],ELit (LInt n));
|
||||
_ -> Nothing }
|
||||
| fcat == fcatFloat = case reads t of {[(d,"")] -> Just ([t],ELit (LFlt d));
|
||||
_ -> Nothing }
|
||||
| fcat == fcatVar = Just ([t],EFun (mkCId t))
|
||||
litCatMatch _ _ = Nothing
|
||||
|
||||
|
||||
----------------------------------------------------------------
|
||||
-- Active Chart
|
||||
----------------------------------------------------------------
|
||||
|
||||
data Active
|
||||
= Active {-# UNPACK #-} !Int
|
||||
{-# UNPACK #-} !FPointPos
|
||||
{-# UNPACK #-} !FunId
|
||||
{-# UNPACK #-} !SeqId
|
||||
[FCat]
|
||||
{-# UNPACK #-} !ActiveKey
|
||||
deriving (Eq,Show,Ord)
|
||||
data ActiveKey
|
||||
= AK {-# UNPACK #-} !FCat
|
||||
{-# UNPACK #-} !FIndex
|
||||
deriving (Eq,Ord,Show)
|
||||
type ActiveChart = IntMap.IntMap (IntMap.IntMap (Set.Set Active))
|
||||
|
||||
emptyAC :: ActiveChart
|
||||
emptyAC = IntMap.empty
|
||||
|
||||
lookupAC :: ActiveKey -> ActiveChart -> Maybe (Set.Set Active)
|
||||
lookupAC (AK fcat l) chart = IntMap.lookup fcat chart >>= IntMap.lookup l
|
||||
|
||||
lookupACByFCat :: FCat -> ActiveChart -> [Set.Set Active]
|
||||
lookupACByFCat fcat chart =
|
||||
case IntMap.lookup fcat chart of
|
||||
Nothing -> []
|
||||
Just map -> IntMap.elems map
|
||||
|
||||
labelsAC :: FCat -> ActiveChart -> [FIndex]
|
||||
labelsAC fcat chart =
|
||||
case IntMap.lookup fcat chart of
|
||||
Nothing -> []
|
||||
Just map -> IntMap.keys map
|
||||
|
||||
insertAC :: ActiveKey -> Set.Set Active -> ActiveChart -> ActiveChart
|
||||
insertAC (AK fcat l) set chart = IntMap.insertWith IntMap.union fcat (IntMap.singleton l set) chart
|
||||
|
||||
|
||||
----------------------------------------------------------------
|
||||
-- Passive Chart
|
||||
----------------------------------------------------------------
|
||||
|
||||
data PassiveKey
|
||||
= PK {-# UNPACK #-} !FCat
|
||||
{-# UNPACK #-} !FIndex
|
||||
{-# UNPACK #-} !Int
|
||||
deriving (Eq,Ord,Show)
|
||||
|
||||
type PassiveChart = Map.Map PassiveKey FCat
|
||||
|
||||
emptyPC :: PassiveChart
|
||||
emptyPC = Map.empty
|
||||
|
||||
lookupPC :: PassiveKey -> PassiveChart -> Maybe FCat
|
||||
lookupPC key chart = Map.lookup key chart
|
||||
|
||||
insertPC :: PassiveKey -> FCat -> PassiveChart -> PassiveChart
|
||||
insertPC key fcat chart = Map.insert key fcat chart
|
||||
|
||||
|
||||
----------------------------------------------------------------
|
||||
-- Forest
|
||||
----------------------------------------------------------------
|
||||
|
||||
foldForest :: (FunId -> [FCat] -> b -> b) -> (Expr -> [String] -> b -> b) -> b -> FCat -> IntMap.IntMap (Set.Set Production) -> b
|
||||
foldForest f g b fcat forest =
|
||||
case IntMap.lookup fcat forest of
|
||||
Nothing -> b
|
||||
Just set -> Set.fold foldProd b set
|
||||
where
|
||||
foldProd (FCoerce fcat) b = foldForest f g b fcat forest
|
||||
foldProd (FApply funid args) b = f funid args b
|
||||
foldProd (FConst const toks) b = g const toks b
|
||||
|
||||
|
||||
----------------------------------------------------------------
|
||||
-- Parse State
|
||||
----------------------------------------------------------------
|
||||
|
||||
-- | An abstract data type whose values represent
|
||||
-- the current state in an incremental parser.
|
||||
data ParseState = PState PGF ParserInfo Chart (TMap.TrieMap String (Set.Set Active))
|
||||
|
||||
data Chart
|
||||
= Chart
|
||||
{ active :: ActiveChart
|
||||
, actives :: [ActiveChart]
|
||||
, passive :: PassiveChart
|
||||
, forest :: IntMap.IntMap (Set.Set Production)
|
||||
, nextId :: {-# UNPACK #-} !FCat
|
||||
, offset :: {-# UNPACK #-} !Int
|
||||
}
|
||||
deriving Show
|
||||
|
||||
----------------------------------------------------------------
|
||||
-- Error State
|
||||
----------------------------------------------------------------
|
||||
|
||||
-- | An abstract data type whose values represent
|
||||
-- the state in an incremental parser after an error.
|
||||
data ErrorState = EState PGF ParserInfo Chart
|
||||
188
src/runtime/haskell/PGF/Parsing/FCFG/Utilities.hs
Normal file
188
src/runtime/haskell/PGF/Parsing/FCFG/Utilities.hs
Normal file
@@ -0,0 +1,188 @@
|
||||
----------------------------------------------------------------------
|
||||
-- |
|
||||
-- Maintainer : PL
|
||||
-- Stability : (stable)
|
||||
-- Portability : (portable)
|
||||
--
|
||||
-- > CVS $Date: 2005/05/13 12:40:19 $
|
||||
-- > CVS $Author: peb $
|
||||
-- > CVS $Revision: 1.6 $
|
||||
--
|
||||
-- Basic type declarations and functions for grammar formalisms
|
||||
-----------------------------------------------------------------------------
|
||||
|
||||
|
||||
module PGF.Parsing.FCFG.Utilities where
|
||||
|
||||
import Control.Monad
|
||||
import Data.Array
|
||||
import Data.List (groupBy)
|
||||
|
||||
import PGF.CId
|
||||
import PGF.Data
|
||||
import PGF.Tree
|
||||
import GF.Data.Assoc
|
||||
import GF.Data.Utilities (sameLength, foldMerge, splitBy)
|
||||
|
||||
|
||||
------------------------------------------------------------
|
||||
-- ranges as single pairs
|
||||
|
||||
type RangeRec = [Range]
|
||||
|
||||
data Range = Range {-# UNPACK #-} !Int {-# UNPACK #-} !Int
|
||||
| EmptyRange
|
||||
deriving (Eq, Ord, Show)
|
||||
|
||||
makeRange :: Int -> Int -> Range
|
||||
makeRange = Range
|
||||
|
||||
concatRange :: Range -> Range -> [Range]
|
||||
concatRange EmptyRange rng = return rng
|
||||
concatRange rng EmptyRange = return rng
|
||||
concatRange (Range i j) (Range j' k) = [Range i k | j==j']
|
||||
|
||||
minRange :: Range -> Int
|
||||
minRange (Range i j) = i
|
||||
|
||||
maxRange :: Range -> Int
|
||||
maxRange (Range i j) = j
|
||||
|
||||
|
||||
------------------------------------------------------------
|
||||
-- * representaions of input tokens
|
||||
|
||||
data Input t = MkInput { inputBounds :: (Int, Int),
|
||||
inputToken :: Assoc t [Range]
|
||||
}
|
||||
|
||||
input :: Ord t => [t] -> Input t
|
||||
input toks = MkInput inBounds inToken
|
||||
where
|
||||
inBounds = (0, length toks)
|
||||
inToken = accumAssoc id [ (tok, makeRange i j) | (i,j,tok) <- zip3 [0..] [1..] toks ]
|
||||
|
||||
inputMany :: Ord t => [[t]] -> Input t
|
||||
inputMany toks = MkInput inBounds inToken
|
||||
where
|
||||
inBounds = (0, length toks)
|
||||
inToken = accumAssoc id [ (tok, makeRange i j) | (i,j,ts) <- zip3 [0..] [1..] toks, tok <- ts ]
|
||||
|
||||
|
||||
------------------------------------------------------------
|
||||
-- * representations of syntactical analyses
|
||||
|
||||
-- ** charts as finite maps over edges
|
||||
|
||||
-- | The values of the chart, a list of key-daughters pairs,
|
||||
-- has unique keys. In essence, it is a map from 'n' to daughters.
|
||||
-- The daughters should be a set (not necessarily sorted) of rhs's.
|
||||
type SyntaxChart n e = Assoc e [SyntaxNode n [e]]
|
||||
|
||||
data SyntaxNode n e = SMeta
|
||||
| SNode n [e]
|
||||
| SString String
|
||||
| SInt Integer
|
||||
| SFloat Double
|
||||
deriving (Eq,Ord,Show)
|
||||
|
||||
groupSyntaxNodes :: Ord n => [SyntaxNode n e] -> [SyntaxNode n [e]]
|
||||
groupSyntaxNodes [] = []
|
||||
groupSyntaxNodes (SNode n0 es0:xs) = (SNode n0 (es0:ess)) : groupSyntaxNodes xs'
|
||||
where
|
||||
(ess,xs') = span xs
|
||||
|
||||
span [] = ([],[])
|
||||
span xs@(SNode n es:xs')
|
||||
| n0 == n = let (ess,xs) = span xs' in (es:ess,xs)
|
||||
| otherwise = ([],xs)
|
||||
groupSyntaxNodes (SString s:xs) = (SString s) : groupSyntaxNodes xs
|
||||
groupSyntaxNodes (SInt n:xs) = (SInt n) : groupSyntaxNodes xs
|
||||
groupSyntaxNodes (SFloat f:xs) = (SFloat f) : groupSyntaxNodes xs
|
||||
|
||||
-- ** syntax forests
|
||||
|
||||
data SyntaxForest n = FMeta
|
||||
| FNode n [[SyntaxForest n]]
|
||||
-- ^ The outer list should be a set (not necessarily sorted)
|
||||
-- of possible alternatives. Ie. the outer list
|
||||
-- is a disjunctive node, and the inner lists
|
||||
-- are (conjunctive) concatenative nodes
|
||||
| FString String
|
||||
| FInt Integer
|
||||
| FFloat Double
|
||||
deriving (Eq, Ord, Show)
|
||||
|
||||
instance Functor SyntaxForest where
|
||||
fmap f (FNode n forests) = FNode (f n) $ map (map (fmap f)) forests
|
||||
fmap _ (FString s) = FString s
|
||||
fmap _ (FInt n) = FInt n
|
||||
fmap _ (FFloat f) = FFloat f
|
||||
fmap _ (FMeta) = FMeta
|
||||
|
||||
forestName :: SyntaxForest n -> Maybe n
|
||||
forestName (FNode n _) = Just n
|
||||
forestName _ = Nothing
|
||||
|
||||
unifyManyForests :: (Monad m, Eq n) => [SyntaxForest n] -> m (SyntaxForest n)
|
||||
unifyManyForests = foldM unifyForests FMeta
|
||||
|
||||
-- | two forests can be unified, if either is 'FMeta', or both have the same parent,
|
||||
-- and all children can be unified
|
||||
unifyForests :: (Monad m, Eq n) => SyntaxForest n -> SyntaxForest n -> m (SyntaxForest n)
|
||||
unifyForests FMeta forest = return forest
|
||||
unifyForests forest FMeta = return forest
|
||||
unifyForests (FNode name1 children1) (FNode name2 children2)
|
||||
| name1 == name2 && not (null children) = return $ FNode name1 children
|
||||
where children = [ forests | forests1 <- children1, forests2 <- children2,
|
||||
sameLength forests1 forests2,
|
||||
forests <- zipWithM unifyForests forests1 forests2 ]
|
||||
unifyForests (FString s1) (FString s2)
|
||||
| s1 == s2 = return $ FString s1
|
||||
unifyForests (FInt n1) (FInt n2)
|
||||
| n1 == n2 = return $ FInt n1
|
||||
unifyForests (FFloat f1) (FFloat f2)
|
||||
| f1 == f2 = return $ FFloat f1
|
||||
unifyForests _ _ = fail "forest unification failure"
|
||||
|
||||
|
||||
-- ** conversions between representations
|
||||
|
||||
chart2forests :: (Ord n, Ord e) =>
|
||||
SyntaxChart n e -- ^ The complete chart
|
||||
-> (e -> Bool) -- ^ When is an edge 'FMeta'?
|
||||
-> [e] -- ^ The starting edges
|
||||
-> [SyntaxForest n] -- ^ The result has unique keys, ie. all 'n' are joined together.
|
||||
-- In essence, the result is a map from 'n' to forest daughters
|
||||
chart2forests chart isMeta = concatMap (edge2forests [])
|
||||
where edge2forests edges edge
|
||||
| isMeta edge = [FMeta]
|
||||
| edge `elem` edges = []
|
||||
| otherwise = map (item2forest (edge:edges)) $ chart ? edge
|
||||
item2forest edges (SMeta) = FMeta
|
||||
item2forest edges (SNode name children) =
|
||||
FNode name $ children >>= mapM (edge2forests edges)
|
||||
item2forest edges (SString s) = FString s
|
||||
item2forest edges (SInt n) = FInt n
|
||||
item2forest edges (SFloat f) = FFloat f
|
||||
|
||||
|
||||
applyProfileToForest :: SyntaxForest (CId,[Profile]) -> [SyntaxForest CId]
|
||||
applyProfileToForest (FNode (fun,profiles) children)
|
||||
| fun == wildCId = concat chForests
|
||||
| otherwise = [ FNode fun chForests | not (null chForests) ]
|
||||
where chForests = concat [ mapM (unifyManyForests . map (forests !!)) profiles |
|
||||
forests0 <- children,
|
||||
forests <- mapM applyProfileToForest forests0 ]
|
||||
applyProfileToForest (FString s) = [FString s]
|
||||
applyProfileToForest (FInt n) = [FInt n]
|
||||
applyProfileToForest (FFloat f) = [FFloat f]
|
||||
applyProfileToForest (FMeta) = [FMeta]
|
||||
|
||||
|
||||
forest2trees :: SyntaxForest CId -> [Tree]
|
||||
forest2trees (FNode n forests) = map (Fun n) $ forests >>= mapM forest2trees
|
||||
forest2trees (FString s) = [Lit (LStr s)]
|
||||
forest2trees (FInt n) = [Lit (LInt n)]
|
||||
forest2trees (FFloat f) = [Lit (LFlt f)]
|
||||
forest2trees (FMeta) = [Meta 0]
|
||||
113
src/runtime/haskell/PGF/ShowLinearize.hs
Normal file
113
src/runtime/haskell/PGF/ShowLinearize.hs
Normal file
@@ -0,0 +1,113 @@
|
||||
module PGF.ShowLinearize (
|
||||
collectWords,
|
||||
tableLinearize,
|
||||
recordLinearize,
|
||||
termLinearize,
|
||||
tabularLinearize,
|
||||
allLinearize,
|
||||
markLinearize
|
||||
) where
|
||||
|
||||
import PGF.CId
|
||||
import PGF.Data
|
||||
import PGF.Tree
|
||||
import PGF.Macros
|
||||
import PGF.Linearize
|
||||
|
||||
import GF.Data.Operations
|
||||
import Data.List
|
||||
import qualified Data.Map as Map
|
||||
|
||||
-- printing linearizations in different ways with source parameters
|
||||
|
||||
-- internal representation, only used internally in this module
|
||||
data Record =
|
||||
RR [(String,Record)]
|
||||
| RT [(String,Record)]
|
||||
| RFV [Record]
|
||||
| RS String
|
||||
| RCon String
|
||||
|
||||
prRecord :: Record -> String
|
||||
prRecord = prr where
|
||||
prr t = case t of
|
||||
RR fs -> concat $
|
||||
"{" :
|
||||
(intersperse ";" (map (\ (l,v) -> unwords [l,"=", prr v]) fs)) ++ ["}"]
|
||||
RT fs -> concat $
|
||||
"table {" :
|
||||
(intersperse ";" (map (\ (l,v) -> unwords [l,"=>",prr v]) fs)) ++ ["}"]
|
||||
RFV ts -> concat $
|
||||
"variants {" : (intersperse ";" (map prr ts)) ++ ["}"]
|
||||
RS s -> prQuotedString s
|
||||
RCon s -> s
|
||||
|
||||
-- uses the encoding of record types in PGF.paramlincat
|
||||
mkRecord :: Term -> Term -> Record
|
||||
mkRecord typ trm = case (typ,trm) of
|
||||
(_, FV ts) -> RFV $ map (mkRecord typ) ts
|
||||
(R rs, R ts) -> RR [(str lab, mkRecord ty t) | (P lab ty, t) <- zip rs ts]
|
||||
(S [FV ps,ty],R ts) -> RT [(str par, mkRecord ty t) | (par, t) <- zip ps ts]
|
||||
(_,W s (R ts)) -> mkRecord typ (R [K (KS (s ++ u)) | K (KS u) <- ts])
|
||||
(FV ps, C i) -> RCon $ str $ ps !! i
|
||||
(S [], _) -> case realizes trm of
|
||||
[s] -> RS s
|
||||
ss -> RFV $ map RS ss
|
||||
_ -> RS $ show trm ---- printTree trm
|
||||
where
|
||||
str = realize
|
||||
|
||||
-- show all branches, without labels and params
|
||||
allLinearize :: (String -> String) -> PGF -> CId -> Expr -> String
|
||||
allLinearize unlex pgf lang = concat . map (unlex . pr) . tabularLinearize pgf lang where
|
||||
pr (p,vs) = unlines vs
|
||||
|
||||
-- show all branches, with labels and params
|
||||
tableLinearize :: (String -> String) -> PGF -> CId -> Expr -> String
|
||||
tableLinearize unlex pgf lang = unlines . map pr . tabularLinearize pgf lang where
|
||||
pr (p,vs) = p +++ ":" +++ unwords (intersperse "|" (map unlex vs))
|
||||
|
||||
-- create a table from labels+params to variants
|
||||
tabularLinearize :: PGF -> CId -> Expr -> [(String,[String])]
|
||||
tabularLinearize pgf lang = branches . recLinearize pgf lang where
|
||||
branches r = case r of
|
||||
RR fs -> [(lab +++ b,s) | (lab,t) <- fs, (b,s) <- branches t]
|
||||
RT fs -> [(lab +++ b,s) | (lab,t) <- fs, (b,s) <- branches t]
|
||||
RFV rs -> concatMap branches rs
|
||||
RS s -> [([], [s])]
|
||||
RCon _ -> []
|
||||
|
||||
-- show record in GF-source-like syntax
|
||||
recordLinearize :: PGF -> CId -> Expr -> String
|
||||
recordLinearize pgf lang = prRecord . recLinearize pgf lang
|
||||
|
||||
-- create a GF-like record, forming the basis of all functions above
|
||||
recLinearize :: PGF -> CId -> Expr -> Record
|
||||
recLinearize pgf lang tree = mkRecord typ $ linTree pgf lang tree where
|
||||
typ = case expr2tree tree of
|
||||
Fun f _ -> lookParamLincat pgf lang $ valCat $ lookType pgf f
|
||||
|
||||
-- show PGF term
|
||||
termLinearize :: PGF -> CId -> Expr -> String
|
||||
termLinearize pgf lang = show . linTree pgf lang
|
||||
|
||||
-- show bracketed markup with references to tree structure
|
||||
markLinearize :: PGF -> CId -> Expr -> String
|
||||
markLinearize pgf lang = concat . take 1 . linearizesMark pgf lang
|
||||
|
||||
|
||||
-- for Morphology: word, lemma, tags
|
||||
collectWords :: PGF -> Language -> [(String, [(CId,String)])]
|
||||
collectWords pgf lang =
|
||||
concatMap collOne
|
||||
[(f,c,0) | (f,(DTyp [] c _,_,_)) <- Map.toList $ funs $ abstract pgf]
|
||||
where
|
||||
collOne (f,c,i) =
|
||||
fromRec f [showCId c] (recLinearize pgf lang (foldl EApp (EFun f) (replicate i (EMeta 888))))
|
||||
fromRec f v r = case r of
|
||||
RR rs -> concat [fromRec f v t | (_,t) <- rs]
|
||||
RT rs -> concat [fromRec f (p:v) t | (p,t) <- rs]
|
||||
RFV rs -> concatMap (fromRec f v) rs
|
||||
RS s -> [(s,[(f,unwords (reverse v))])]
|
||||
RCon c -> [] ---- inherent
|
||||
|
||||
71
src/runtime/haskell/PGF/Tree.hs
Normal file
71
src/runtime/haskell/PGF/Tree.hs
Normal file
@@ -0,0 +1,71 @@
|
||||
module PGF.Tree
|
||||
( Tree(..),
|
||||
tree2expr, expr2tree,
|
||||
prTree
|
||||
) where
|
||||
|
||||
import PGF.CId
|
||||
import PGF.Expr hiding (Tree)
|
||||
|
||||
import Data.Char
|
||||
import Data.List as List
|
||||
import Control.Monad
|
||||
import qualified Text.PrettyPrint as PP
|
||||
import qualified Text.ParserCombinators.ReadP as RP
|
||||
|
||||
-- | The tree is an evaluated expression in the abstract syntax
|
||||
-- of the grammar. The type is especially restricted to not
|
||||
-- allow unapplied lambda abstractions. The tree is used directly
|
||||
-- from the linearizer and is produced directly from the parser.
|
||||
data Tree =
|
||||
Abs [(BindType,CId)] Tree -- ^ lambda abstraction. The list of variables is non-empty
|
||||
| Var CId -- ^ variable
|
||||
| Fun CId [Tree] -- ^ function application
|
||||
| Lit Literal -- ^ literal
|
||||
| Meta {-# UNPACK #-} !MetaId -- ^ meta variable
|
||||
deriving (Eq, Ord)
|
||||
|
||||
-----------------------------------------------------
|
||||
-- Conversion Expr <-> Tree
|
||||
-----------------------------------------------------
|
||||
|
||||
-- | Converts a tree to expression. The conversion
|
||||
-- is always total, every tree is a valid expression.
|
||||
tree2expr :: Tree -> Expr
|
||||
tree2expr = tree2expr []
|
||||
where
|
||||
tree2expr ys (Fun x ts) = foldl EApp (EFun x) (List.map (tree2expr ys) ts)
|
||||
tree2expr ys (Lit l) = ELit l
|
||||
tree2expr ys (Meta n) = EMeta n
|
||||
tree2expr ys (Abs xs t) = foldr (\(b,x) e -> EAbs b x e) (tree2expr (List.map snd (reverse xs)++ys) t) xs
|
||||
tree2expr ys (Var x) = case List.lookup x (zip ys [0..]) of
|
||||
Just i -> EVar i
|
||||
Nothing -> error "unknown variable"
|
||||
|
||||
-- | Converts an expression to tree. The conversion is only partial.
|
||||
-- Variables and meta variables of function type and beta redexes are not allowed.
|
||||
expr2tree :: Expr -> Tree
|
||||
expr2tree e = abs [] [] e
|
||||
where
|
||||
abs ys xs (EAbs b x e) = abs ys ((b,x):xs) e
|
||||
abs ys xs (ETyped e _) = abs ys xs e
|
||||
abs ys xs e = case xs of
|
||||
[] -> app ys [] e
|
||||
xs -> Abs (reverse xs) (app (map snd xs++ys) [] e)
|
||||
|
||||
app xs as (EApp e1 e2) = app xs ((abs xs [] e2) : as) e1
|
||||
app xs as (ELit l)
|
||||
| List.null as = Lit l
|
||||
| otherwise = error "literal of function type encountered"
|
||||
app xs as (EMeta n)
|
||||
| List.null as = Meta n
|
||||
| otherwise = error "meta variables of function type are not allowed in trees"
|
||||
app xs as (EAbs _ x e) = error "beta redexes are not allowed in trees"
|
||||
app xs as (EVar i) = Var (xs !! i)
|
||||
app xs as (EFun f) = Fun f as
|
||||
app xs as (ETyped e _) = app xs as e
|
||||
|
||||
|
||||
prTree :: Tree -> String
|
||||
prTree = showExpr [] . tree2expr
|
||||
|
||||
103
src/runtime/haskell/PGF/Type.hs
Normal file
103
src/runtime/haskell/PGF/Type.hs
Normal file
@@ -0,0 +1,103 @@
|
||||
module PGF.Type ( Type(..), Hypo,
|
||||
readType, showType,
|
||||
mkType, mkHypo, mkDepHypo, mkImplHypo,
|
||||
pType, ppType, ppHypo ) where
|
||||
|
||||
import PGF.CId
|
||||
import {-# SOURCE #-} PGF.Expr
|
||||
import Data.Char
|
||||
import Data.List
|
||||
import qualified Text.PrettyPrint as PP
|
||||
import qualified Text.ParserCombinators.ReadP as RP
|
||||
import Control.Monad
|
||||
|
||||
-- | To read a type from a 'String', use 'readType'.
|
||||
data Type =
|
||||
DTyp [Hypo] CId [Expr]
|
||||
deriving (Eq,Ord,Show)
|
||||
|
||||
-- | 'Hypo' represents a hypothesis in a type i.e. in the type A -> B, A is the hypothesis
|
||||
type Hypo = (BindType,CId,Type)
|
||||
|
||||
-- | Reads a 'Type' from a 'String'.
|
||||
readType :: String -> Maybe Type
|
||||
readType s = case [x | (x,cs) <- RP.readP_to_S pType s, all isSpace cs] of
|
||||
[x] -> Just x
|
||||
_ -> Nothing
|
||||
|
||||
-- | renders type as 'String'. The list
|
||||
-- of identifiers is the list of all free variables
|
||||
-- in the expression in order reverse to the order
|
||||
-- of binding.
|
||||
showType :: [CId] -> Type -> String
|
||||
showType vars = PP.render . ppType 0 vars
|
||||
|
||||
-- | creates a type from list of hypothesises, category and
|
||||
-- list of arguments for the category. The operation
|
||||
-- @mkType [h_1,...,h_n] C [e_1,...,e_m]@ will create
|
||||
-- @h_1 -> ... -> h_n -> C e_1 ... e_m@
|
||||
mkType :: [Hypo] -> CId -> [Expr] -> Type
|
||||
mkType hyps cat args = DTyp hyps cat args
|
||||
|
||||
-- | creates hypothesis for non-dependent type i.e. A
|
||||
mkHypo :: Type -> Hypo
|
||||
mkHypo ty = (Explicit,wildCId,ty)
|
||||
|
||||
-- | creates hypothesis for dependent type i.e. (x : A)
|
||||
mkDepHypo :: CId -> Type -> Hypo
|
||||
mkDepHypo x ty = (Explicit,x,ty)
|
||||
|
||||
-- | creates hypothesis for dependent type with implicit argument i.e. ({x} : A)
|
||||
mkImplHypo :: CId -> Type -> Hypo
|
||||
mkImplHypo x ty = (Implicit,x,ty)
|
||||
|
||||
pType :: RP.ReadP Type
|
||||
pType = do
|
||||
RP.skipSpaces
|
||||
hyps <- RP.sepBy (pHypo >>= \h -> RP.skipSpaces >> RP.string "->" >> return h) RP.skipSpaces
|
||||
RP.skipSpaces
|
||||
(cat,args) <- pAtom
|
||||
return (DTyp (concat hyps) cat args)
|
||||
where
|
||||
pHypo =
|
||||
do (cat,args) <- pAtom
|
||||
return [(Explicit,wildCId,DTyp [] cat args)]
|
||||
RP.<++
|
||||
(RP.between (RP.char '(') (RP.char ')') $ do
|
||||
xs <- RP.option [(Explicit,wildCId)] $ do
|
||||
xs <- pBinds
|
||||
RP.skipSpaces
|
||||
RP.char ':'
|
||||
return xs
|
||||
ty <- pType
|
||||
return [(b,v,ty) | (b,v) <- xs])
|
||||
RP.<++
|
||||
(RP.between (RP.char '{') (RP.char '}') $ do
|
||||
vs <- RP.sepBy1 (RP.skipSpaces >> pCId) (RP.skipSpaces >> RP.char ',')
|
||||
RP.skipSpaces
|
||||
RP.char ':'
|
||||
ty <- pType
|
||||
return [(Implicit,v,ty) | v <- vs])
|
||||
|
||||
pAtom = do
|
||||
cat <- pCId
|
||||
RP.skipSpaces
|
||||
args <- RP.sepBy pArg RP.skipSpaces
|
||||
return (cat, args)
|
||||
|
||||
ppType :: Int -> [CId] -> Type -> PP.Doc
|
||||
ppType d scope (DTyp hyps cat args)
|
||||
| null hyps = ppRes scope cat args
|
||||
| otherwise = let (scope',hdocs) = mapAccumL ppHypo scope hyps
|
||||
in ppParens (d > 0) (foldr (\hdoc doc -> hdoc PP.<+> PP.text "->" PP.<+> doc) (ppRes scope' cat args) hdocs)
|
||||
where
|
||||
ppRes scope cat es = ppCId cat PP.<+> PP.hsep (map (ppExpr 4 scope) es)
|
||||
|
||||
ppHypo scope (Explicit,x,typ) = if x == wildCId
|
||||
then (scope,ppType 1 scope typ)
|
||||
else let y = freshName x scope
|
||||
in (y:scope,PP.parens (ppCId y PP.<+> PP.char ':' PP.<+> ppType 0 scope typ))
|
||||
ppHypo scope (Implicit,x,typ) = if x == wildCId
|
||||
then (scope,PP.parens (PP.braces (ppCId x) PP.<+> PP.char ':' PP.<+> ppType 0 scope typ))
|
||||
else let y = freshName x scope
|
||||
in (y:scope,PP.parens (PP.braces (ppCId y) PP.<+> PP.char ':' PP.<+> ppType 0 scope typ))
|
||||
524
src/runtime/haskell/PGF/TypeCheck.hs
Normal file
524
src/runtime/haskell/PGF/TypeCheck.hs
Normal file
@@ -0,0 +1,524 @@
|
||||
----------------------------------------------------------------------
|
||||
-- |
|
||||
-- Module : PGF.TypeCheck
|
||||
-- Maintainer : Krasimir Angelov
|
||||
-- Stability : (stable)
|
||||
-- Portability : (portable)
|
||||
--
|
||||
-- Type checking in abstract syntax with dependent types.
|
||||
-- The type checker also performs renaming and checking for unknown
|
||||
-- functions. The variable references are replaced by de Bruijn indices.
|
||||
--
|
||||
-----------------------------------------------------------------------------
|
||||
|
||||
module PGF.TypeCheck (checkType, checkExpr, inferExpr,
|
||||
|
||||
ppTcError, TcError(..)
|
||||
) where
|
||||
|
||||
import PGF.Data
|
||||
import PGF.Expr
|
||||
import PGF.Macros (typeOfHypo)
|
||||
import PGF.CId
|
||||
|
||||
import Data.Map as Map
|
||||
import Data.IntMap as IntMap
|
||||
import Data.Maybe as Maybe
|
||||
import Data.List as List
|
||||
import Control.Monad
|
||||
import Text.PrettyPrint
|
||||
|
||||
-----------------------------------------------------
|
||||
-- The Scope
|
||||
-----------------------------------------------------
|
||||
|
||||
data TType = TTyp Env Type
|
||||
newtype Scope = Scope [(CId,TType)]
|
||||
|
||||
emptyScope = Scope []
|
||||
|
||||
addScopedVar :: CId -> TType -> Scope -> Scope
|
||||
addScopedVar x tty (Scope gamma) = Scope ((x,tty):gamma)
|
||||
|
||||
-- | returns the type and the De Bruijn index of a local variable
|
||||
lookupVar :: CId -> Scope -> Maybe (Int,TType)
|
||||
lookupVar x (Scope gamma) = listToMaybe [(i,tty) | ((y,tty),i) <- zip gamma [0..], x == y]
|
||||
|
||||
-- | returns the type and the name of a local variable
|
||||
getVar :: Int -> Scope -> (CId,TType)
|
||||
getVar i (Scope gamma) = gamma !! i
|
||||
|
||||
scopeEnv :: Scope -> Env
|
||||
scopeEnv (Scope gamma) = let n = length gamma
|
||||
in [VGen (n-i-1) [] | i <- [0..n-1]]
|
||||
|
||||
scopeVars :: Scope -> [CId]
|
||||
scopeVars (Scope gamma) = List.map fst gamma
|
||||
|
||||
scopeSize :: Scope -> Int
|
||||
scopeSize (Scope gamma) = length gamma
|
||||
|
||||
-----------------------------------------------------
|
||||
-- The Monad
|
||||
-----------------------------------------------------
|
||||
|
||||
type MetaStore = IntMap MetaValue
|
||||
data MetaValue
|
||||
= MUnbound Scope [Expr -> TcM ()]
|
||||
| MBound Expr
|
||||
| MGuarded Expr [Expr -> TcM ()] {-# UNPACK #-} !Int -- the Int is the number of constraints that have to be solved
|
||||
-- to unlock this meta variable
|
||||
|
||||
newtype TcM a = TcM {unTcM :: Abstr -> MetaId -> MetaStore -> TcResult a}
|
||||
data TcResult a
|
||||
= Ok {-# UNPACK #-} !MetaId MetaStore a
|
||||
| Fail TcError
|
||||
|
||||
instance Monad TcM where
|
||||
return x = TcM (\abstr metaid ms -> Ok metaid ms x)
|
||||
f >>= g = TcM (\abstr metaid ms -> case unTcM f abstr metaid ms of
|
||||
Ok metaid ms x -> unTcM (g x) abstr metaid ms
|
||||
Fail e -> Fail e)
|
||||
|
||||
instance Functor TcM where
|
||||
fmap f x = TcM (\abstr metaid ms -> case unTcM x abstr metaid ms of
|
||||
Ok metaid ms x -> Ok metaid ms (f x)
|
||||
Fail e -> Fail e)
|
||||
|
||||
lookupCatHyps :: CId -> TcM [Hypo]
|
||||
lookupCatHyps cat = TcM (\abstr metaid ms -> case Map.lookup cat (cats abstr) of
|
||||
Just hyps -> Ok metaid ms hyps
|
||||
Nothing -> Fail (UnknownCat cat))
|
||||
|
||||
lookupFunType :: CId -> TcM TType
|
||||
lookupFunType fun = TcM (\abstr metaid ms -> case Map.lookup fun (funs abstr) of
|
||||
Just (ty,_,_) -> Ok metaid ms (TTyp [] ty)
|
||||
Nothing -> Fail (UnknownFun fun))
|
||||
|
||||
newMeta :: Scope -> TcM MetaId
|
||||
newMeta scope = TcM (\abstr metaid ms -> Ok (metaid+1) (IntMap.insert metaid (MUnbound scope []) ms) metaid)
|
||||
|
||||
newGuardedMeta :: Scope -> Expr -> TcM MetaId
|
||||
newGuardedMeta scope e = getFuns >>= \funs -> TcM (\abstr metaid ms -> Ok (metaid+1) (IntMap.insert metaid (MGuarded e [] 0) ms) metaid)
|
||||
|
||||
getMeta :: MetaId -> TcM MetaValue
|
||||
getMeta i = TcM (\abstr metaid ms -> Ok metaid ms $! case IntMap.lookup i ms of
|
||||
Just mv -> mv)
|
||||
setMeta :: MetaId -> MetaValue -> TcM ()
|
||||
setMeta i mv = TcM (\abstr metaid ms -> Ok metaid (IntMap.insert i mv ms) ())
|
||||
|
||||
tcError :: TcError -> TcM a
|
||||
tcError e = TcM (\abstr metaid ms -> Fail e)
|
||||
|
||||
getFuns :: TcM Funs
|
||||
getFuns = TcM (\abstr metaid ms -> Ok metaid ms (funs abstr))
|
||||
|
||||
addConstraint :: MetaId -> MetaId -> Env -> [Value] -> (Value -> TcM ()) -> TcM ()
|
||||
addConstraint i j env vs c = do
|
||||
funs <- getFuns
|
||||
mv <- getMeta j
|
||||
case mv of
|
||||
MUnbound scope cs -> addRef >> setMeta j (MUnbound scope ((\e -> release >> c (apply funs env e vs)) : cs))
|
||||
MBound e -> c (apply funs env e vs)
|
||||
MGuarded e cs x | x == 0 -> c (apply funs env e vs)
|
||||
| otherwise -> addRef >> setMeta j (MGuarded e ((\e -> release >> c (apply funs env e vs)) : cs) x)
|
||||
where
|
||||
addRef = TcM (\abstr metaid ms -> case IntMap.lookup i ms of
|
||||
Just (MGuarded e cs x) -> Ok metaid (IntMap.insert i (MGuarded e cs (x+1)) ms) ())
|
||||
|
||||
release = TcM (\abstr metaid ms -> case IntMap.lookup i ms of
|
||||
Just (MGuarded e cs x) -> if x == 1
|
||||
then unTcM (sequence_ [c e | c <- cs]) abstr metaid (IntMap.insert i (MGuarded e [] 0) ms)
|
||||
else Ok metaid (IntMap.insert i (MGuarded e cs (x-1)) ms) ())
|
||||
|
||||
-----------------------------------------------------
|
||||
-- Type errors
|
||||
-----------------------------------------------------
|
||||
|
||||
-- | If an error occurs in the typechecking phase
|
||||
-- the type checker returns not a plain text error message
|
||||
-- but a 'TcError' structure which describes the error.
|
||||
data TcError
|
||||
= UnknownCat CId -- ^ Unknown category name was found.
|
||||
| UnknownFun CId -- ^ Unknown function name was found.
|
||||
| WrongCatArgs [CId] Type CId Int Int -- ^ A category was applied to wrong number of arguments.
|
||||
-- The first integer is the number of expected arguments and
|
||||
-- the second the number of given arguments.
|
||||
-- The @[CId]@ argument is the list of free variables
|
||||
-- in the type. It should be used for the 'showType' function.
|
||||
| TypeMismatch [CId] Expr Type Type -- ^ The expression is not of the expected type.
|
||||
-- The first type is the expected type, while
|
||||
-- the second is the inferred. The @[CId]@ argument is the list
|
||||
-- of free variables in both the expression and the type.
|
||||
-- It should be used for the 'showType' and 'showExpr' functions.
|
||||
| NotFunType [CId] Expr Type -- ^ Something that is not of function type was applied to an argument.
|
||||
| CannotInferType [CId] Expr -- ^ It is not possible to infer the type of an expression.
|
||||
| UnresolvedMetaVars [CId] Expr [MetaId] -- ^ Some metavariables have to be instantiated in order to complete the typechecking.
|
||||
| UnexpectedImplArg [CId] Expr -- ^ Implicit argument was passed where the type doesn't allow it
|
||||
|
||||
-- | Renders the type checking error to a document. See 'Text.PrettyPrint'.
|
||||
ppTcError :: TcError -> Doc
|
||||
ppTcError (UnknownCat cat) = text "Category" <+> ppCId cat <+> text "is not in scope"
|
||||
ppTcError (UnknownFun fun) = text "Function" <+> ppCId fun <+> text "is not in scope"
|
||||
ppTcError (WrongCatArgs xs ty cat m n) = text "Category" <+> ppCId cat <+> text "should have" <+> int m <+> text "argument(s), but has been given" <+> int n $$
|
||||
text "In the type:" <+> ppType 0 xs ty
|
||||
ppTcError (TypeMismatch xs e ty1 ty2) = text "Couldn't match expected type" <+> ppType 0 xs ty1 $$
|
||||
text " against inferred type" <+> ppType 0 xs ty2 $$
|
||||
text "In the expression:" <+> ppExpr 0 xs e
|
||||
ppTcError (NotFunType xs e ty) = text "A function type is expected for the expression" <+> ppExpr 0 xs e <+> text "instead of type" <+> ppType 0 xs ty
|
||||
ppTcError (CannotInferType xs e) = text "Cannot infer the type of expression" <+> ppExpr 0 xs e
|
||||
ppTcError (UnresolvedMetaVars xs e ms) = text "Meta variable(s)" <+> fsep (List.map ppMeta ms) <+> text "should be resolved" $$
|
||||
text "in the expression:" <+> ppExpr 0 xs e
|
||||
ppTcError (UnexpectedImplArg xs e) = braces (ppExpr 0 xs e) <+> text "is implicit argument but not implicit argument is expected here"
|
||||
|
||||
-----------------------------------------------------
|
||||
-- checkType
|
||||
-----------------------------------------------------
|
||||
|
||||
-- | Check whether a given type is consistent with the abstract
|
||||
-- syntax of the grammar.
|
||||
checkType :: PGF -> Type -> Either TcError Type
|
||||
checkType pgf ty =
|
||||
case unTcM (tcType emptyScope ty >>= refineType) (abstract pgf) 0 IntMap.empty of
|
||||
Ok _ ms ty -> Right ty
|
||||
Fail err -> Left err
|
||||
|
||||
tcType :: Scope -> Type -> TcM Type
|
||||
tcType scope ty@(DTyp hyps cat es) = do
|
||||
(scope,hyps) <- tcHypos scope hyps
|
||||
c_hyps <- lookupCatHyps cat
|
||||
let m = length es
|
||||
n = length [ty | (Explicit,x,ty) <- c_hyps]
|
||||
(delta,es) <- tcCatArgs scope es [] c_hyps ty n m
|
||||
return (DTyp hyps cat es)
|
||||
|
||||
tcHypos :: Scope -> [Hypo] -> TcM (Scope,[Hypo])
|
||||
tcHypos scope [] = return (scope,[])
|
||||
tcHypos scope (h:hs) = do
|
||||
(scope,h ) <- tcHypo scope h
|
||||
(scope,hs) <- tcHypos scope hs
|
||||
return (scope,h:hs)
|
||||
|
||||
tcHypo :: Scope -> Hypo -> TcM (Scope,Hypo)
|
||||
tcHypo scope (b,x,ty) = do
|
||||
ty <- tcType scope ty
|
||||
if x == wildCId
|
||||
then return (scope,(b,x,ty))
|
||||
else return (addScopedVar x (TTyp (scopeEnv scope) ty) scope,(b,x,ty))
|
||||
|
||||
tcCatArgs scope [] delta [] ty0 n m = return (delta,[])
|
||||
tcCatArgs scope (EImplArg e:es) delta ((Explicit,x,ty):hs) ty0 n m = tcError (UnexpectedImplArg (scopeVars scope) e)
|
||||
tcCatArgs scope (EImplArg e:es) delta ((Implicit,x,ty):hs) ty0 n m = do
|
||||
e <- tcExpr scope e (TTyp delta ty)
|
||||
funs <- getFuns
|
||||
(delta,es) <- if x == wildCId
|
||||
then tcCatArgs scope es delta hs ty0 n m
|
||||
else tcCatArgs scope es (eval funs (scopeEnv scope) e:delta) hs ty0 n m
|
||||
return (delta,EImplArg e:es)
|
||||
tcCatArgs scope es delta ((Implicit,x,ty):hs) ty0 n m = do
|
||||
i <- newMeta scope
|
||||
(delta,es) <- if x == wildCId
|
||||
then tcCatArgs scope es delta hs ty0 n m
|
||||
else tcCatArgs scope es (VMeta i (scopeEnv scope) [] : delta) hs ty0 n m
|
||||
return (delta,EImplArg (EMeta i) : es)
|
||||
tcCatArgs scope (e:es) delta ((Explicit,x,ty):hs) ty0 n m = do
|
||||
e <- tcExpr scope e (TTyp delta ty)
|
||||
funs <- getFuns
|
||||
(delta,es) <- if x == wildCId
|
||||
then tcCatArgs scope es delta hs ty0 n m
|
||||
else tcCatArgs scope es (eval funs (scopeEnv scope) e:delta) hs ty0 n m
|
||||
return (delta,e:es)
|
||||
tcCatArgs scope _ delta _ ty0@(DTyp _ cat _) n m = do
|
||||
tcError (WrongCatArgs (scopeVars scope) ty0 cat n m)
|
||||
|
||||
-----------------------------------------------------
|
||||
-- checkExpr
|
||||
-----------------------------------------------------
|
||||
|
||||
-- | Checks an expression against a specified type.
|
||||
checkExpr :: PGF -> Expr -> Type -> Either TcError Expr
|
||||
checkExpr pgf e ty =
|
||||
case unTcM (do e <- tcExpr emptyScope e (TTyp [] ty)
|
||||
e <- refineExpr e
|
||||
checkResolvedMetaStore emptyScope e
|
||||
return e) (abstract pgf) 0 IntMap.empty of
|
||||
Ok _ ms e -> Right e
|
||||
Fail err -> Left err
|
||||
|
||||
tcExpr :: Scope -> Expr -> TType -> TcM Expr
|
||||
tcExpr scope e0@(EAbs Implicit x e) tty =
|
||||
case tty of
|
||||
TTyp delta (DTyp ((Implicit,y,ty):hs) c es) -> do e <- if y == wildCId
|
||||
then tcExpr (addScopedVar x (TTyp delta ty) scope)
|
||||
e (TTyp delta (DTyp hs c es))
|
||||
else tcExpr (addScopedVar x (TTyp delta ty) scope)
|
||||
e (TTyp ((VGen (scopeSize scope) []):delta) (DTyp hs c es))
|
||||
return (EAbs Implicit x e)
|
||||
_ -> do ty <- evalType (scopeSize scope) tty
|
||||
tcError (NotFunType (scopeVars scope) e0 ty)
|
||||
tcExpr scope e0 (TTyp delta (DTyp ((Implicit,y,ty):hs) c es)) = do
|
||||
e0 <- if y == wildCId
|
||||
then tcExpr (addScopedVar wildCId (TTyp delta ty) scope)
|
||||
e0 (TTyp delta (DTyp hs c es))
|
||||
else tcExpr (addScopedVar wildCId (TTyp delta ty) scope)
|
||||
e0 (TTyp ((VGen (scopeSize scope) []):delta) (DTyp hs c es))
|
||||
return (EAbs Implicit wildCId e0)
|
||||
tcExpr scope e0@(EAbs Explicit x e) tty =
|
||||
case tty of
|
||||
TTyp delta (DTyp ((Explicit,y,ty):hs) c es) -> do e <- if y == wildCId
|
||||
then tcExpr (addScopedVar x (TTyp delta ty) scope)
|
||||
e (TTyp delta (DTyp hs c es))
|
||||
else tcExpr (addScopedVar x (TTyp delta ty) scope)
|
||||
e (TTyp ((VGen (scopeSize scope) []):delta) (DTyp hs c es))
|
||||
return (EAbs Explicit x e)
|
||||
_ -> do ty <- evalType (scopeSize scope) tty
|
||||
tcError (NotFunType (scopeVars scope) e0 ty)
|
||||
tcExpr scope (EMeta _) tty = do
|
||||
i <- newMeta scope
|
||||
return (EMeta i)
|
||||
tcExpr scope e0 tty = do
|
||||
(e0,tty0) <- infExpr scope e0
|
||||
i <- newGuardedMeta scope e0
|
||||
eqType scope (scopeSize scope) i tty tty0
|
||||
return (EMeta i)
|
||||
|
||||
|
||||
-----------------------------------------------------
|
||||
-- inferExpr
|
||||
-----------------------------------------------------
|
||||
|
||||
-- | Tries to infer the type of a given expression. Note that
|
||||
-- even if the expression is type correct it is not always
|
||||
-- possible to infer its type in the GF type system.
|
||||
-- In this case the function returns the 'CannotInferType' error.
|
||||
inferExpr :: PGF -> Expr -> Either TcError (Expr,Type)
|
||||
inferExpr pgf e =
|
||||
case unTcM (do (e,tty) <- infExpr emptyScope e
|
||||
e <- refineExpr e
|
||||
checkResolvedMetaStore emptyScope e
|
||||
ty <- evalType 0 tty
|
||||
return (e,ty)) (abstract pgf) 1 IntMap.empty of
|
||||
Ok _ ms (e,ty) -> Right (e,ty)
|
||||
Fail err -> Left err
|
||||
|
||||
infExpr :: Scope -> Expr -> TcM (Expr,TType)
|
||||
infExpr scope e0@(EApp e1 e2) = do
|
||||
(e1,TTyp delta ty) <- infExpr scope e1
|
||||
(e0,delta,ty) <- tcArg scope e1 e2 delta ty
|
||||
return (e0,TTyp delta ty)
|
||||
infExpr scope e0@(EFun x) = do
|
||||
case lookupVar x scope of
|
||||
Just (i,tty) -> return (EVar i,tty)
|
||||
Nothing -> do tty <- lookupFunType x
|
||||
return (e0,tty)
|
||||
infExpr scope e0@(EVar i) = do
|
||||
return (e0,snd (getVar i scope))
|
||||
infExpr scope e0@(ELit l) = do
|
||||
let cat = case l of
|
||||
LStr _ -> mkCId "String"
|
||||
LInt _ -> mkCId "Int"
|
||||
LFlt _ -> mkCId "Float"
|
||||
return (e0,TTyp [] (DTyp [] cat []))
|
||||
infExpr scope (ETyped e ty) = do
|
||||
ty <- tcType scope ty
|
||||
e <- tcExpr scope e (TTyp (scopeEnv scope) ty)
|
||||
return (ETyped e ty,TTyp (scopeEnv scope) ty)
|
||||
infExpr scope (EImplArg e) = do
|
||||
(e,tty) <- infExpr scope e
|
||||
return (EImplArg e,tty)
|
||||
infExpr scope e = tcError (CannotInferType (scopeVars scope) e)
|
||||
|
||||
tcArg scope e1 e2 delta ty0@(DTyp [] c es) = do
|
||||
ty1 <- evalType (scopeSize scope) (TTyp delta ty0)
|
||||
tcError (NotFunType (scopeVars scope) e1 ty1)
|
||||
tcArg scope e1 (EImplArg e2) delta ty0@(DTyp ((Explicit,x,ty):hs) c es) = tcError (UnexpectedImplArg (scopeVars scope) e2)
|
||||
tcArg scope e1 (EImplArg e2) delta ty0@(DTyp ((Implicit,x,ty):hs) c es) = do
|
||||
e2 <- tcExpr scope e2 (TTyp delta ty)
|
||||
funs <- getFuns
|
||||
if x == wildCId
|
||||
then return (EApp e1 (EImplArg e2), delta,DTyp hs c es)
|
||||
else return (EApp e1 (EImplArg e2),eval funs (scopeEnv scope) e2:delta,DTyp hs c es)
|
||||
tcArg scope e1 e2 delta ty0@(DTyp ((Explicit,x,ty):hs) c es) = do
|
||||
e2 <- tcExpr scope e2 (TTyp delta ty)
|
||||
funs <- getFuns
|
||||
if x == wildCId
|
||||
then return (EApp e1 e2, delta,DTyp hs c es)
|
||||
else return (EApp e1 e2,eval funs (scopeEnv scope) e2:delta,DTyp hs c es)
|
||||
tcArg scope e1 e2 delta ty0@(DTyp ((Implicit,x,ty):hs) c es) = do
|
||||
i <- newMeta scope
|
||||
if x == wildCId
|
||||
then tcArg scope (EApp e1 (EImplArg (EMeta i))) e2 delta (DTyp hs c es)
|
||||
else tcArg scope (EApp e1 (EImplArg (EMeta i))) e2 (VMeta i (scopeEnv scope) [] : delta) (DTyp hs c es)
|
||||
|
||||
-----------------------------------------------------
|
||||
-- eqType
|
||||
-----------------------------------------------------
|
||||
|
||||
eqType :: Scope -> Int -> MetaId -> TType -> TType -> TcM ()
|
||||
eqType scope k i0 tty1@(TTyp delta1 ty1@(DTyp hyps1 cat1 es1)) tty2@(TTyp delta2 ty2@(DTyp hyps2 cat2 es2))
|
||||
| cat1 == cat2 = do (k,delta1,delta2) <- eqHyps k delta1 hyps1 delta2 hyps2
|
||||
sequence_ [eqExpr k delta1 e1 delta2 e2 | (e1,e2) <- zip es1 es2]
|
||||
| otherwise = raiseTypeMatchError
|
||||
where
|
||||
raiseTypeMatchError = do ty1 <- evalType k tty1
|
||||
ty2 <- evalType k tty2
|
||||
e <- refineExpr (EMeta i0)
|
||||
tcError (TypeMismatch (scopeVars scope) e ty1 ty2)
|
||||
|
||||
eqHyps :: Int -> Env -> [Hypo] -> Env -> [Hypo] -> TcM (Int,Env,Env)
|
||||
eqHyps k delta1 [] delta2 [] =
|
||||
return (k,delta1,delta2)
|
||||
eqHyps k delta1 ((_,x,ty1) : h1s) delta2 ((_,y,ty2) : h2s) = do
|
||||
eqType scope k i0 (TTyp delta1 ty1) (TTyp delta2 ty2)
|
||||
if x == wildCId && y == wildCId
|
||||
then eqHyps k delta1 h1s delta2 h2s
|
||||
else if x /= wildCId && y /= wildCId
|
||||
then eqHyps (k+1) ((VGen k []):delta1) h1s ((VGen k []):delta2) h2s
|
||||
else raiseTypeMatchError
|
||||
eqHyps k delta1 h1s delta2 h2s = raiseTypeMatchError
|
||||
|
||||
eqExpr :: Int -> Env -> Expr -> Env -> Expr -> TcM ()
|
||||
eqExpr k env1 e1 env2 e2 = do
|
||||
funs <- getFuns
|
||||
eqValue k (eval funs env1 e1) (eval funs env2 e2)
|
||||
|
||||
eqValue :: Int -> Value -> Value -> TcM ()
|
||||
eqValue k v1 v2 = do
|
||||
v1 <- deRef v1
|
||||
v2 <- deRef v2
|
||||
eqValue' k v1 v2
|
||||
|
||||
deRef v@(VMeta i env vs) = do
|
||||
mv <- getMeta i
|
||||
funs <- getFuns
|
||||
case mv of
|
||||
MBound e -> deRef (apply funs env e vs)
|
||||
MGuarded e _ x | x == 0 -> deRef (apply funs env e vs)
|
||||
| otherwise -> return v
|
||||
MUnbound _ _ -> return v
|
||||
deRef v = return v
|
||||
|
||||
eqValue' k (VSusp i env vs1 c) v2 = addConstraint i0 i env vs1 (\v1 -> eqValue k (c v1) v2)
|
||||
eqValue' k v1 (VSusp i env vs2 c) = addConstraint i0 i env vs2 (\v2 -> eqValue k v1 (c v2))
|
||||
eqValue' k (VMeta i env1 vs1) (VMeta j env2 vs2) | i == j = zipWithM_ (eqValue k) vs1 vs2
|
||||
eqValue' k (VMeta i env1 vs1) v2 = do (MUnbound scopei cs) <- getMeta i
|
||||
e2 <- mkLam i scopei env1 vs1 v2
|
||||
setMeta i (MBound e2)
|
||||
sequence_ [c e2 | c <- cs]
|
||||
eqValue' k v1 (VMeta i env2 vs2) = do (MUnbound scopei cs) <- getMeta i
|
||||
e1 <- mkLam i scopei env2 vs2 v1
|
||||
setMeta i (MBound e1)
|
||||
sequence_ [c e1 | c <- cs]
|
||||
eqValue' k (VApp f1 vs1) (VApp f2 vs2) | f1 == f2 = zipWithM_ (eqValue k) vs1 vs2
|
||||
eqValue' k (VLit l1) (VLit l2 ) | l1 == l2 = return ()
|
||||
eqValue' k (VGen i vs1) (VGen j vs2) | i == j = zipWithM_ (eqValue k) vs1 vs2
|
||||
eqValue' k (VClosure env1 (EAbs _ x1 e1)) (VClosure env2 (EAbs _ x2 e2)) = let v = VGen k []
|
||||
in eqExpr (k+1) (v:env1) e1 (v:env2) e2
|
||||
eqValue' k v1 v2 = raiseTypeMatchError
|
||||
|
||||
mkLam i scope env vs0 v = do
|
||||
let k = scopeSize scope
|
||||
vs = reverse (take k env) ++ vs0
|
||||
xs = nub [i | VGen i [] <- vs]
|
||||
if length vs == length xs
|
||||
then return ()
|
||||
else raiseTypeMatchError
|
||||
v <- occurCheck i k xs v
|
||||
funs <- getFuns
|
||||
return (addLam vs0 (value2expr funs (length xs) v))
|
||||
where
|
||||
addLam [] e = e
|
||||
addLam (v:vs) e = EAbs Explicit var (addLam vs e)
|
||||
|
||||
var = mkCId "v"
|
||||
|
||||
occurCheck i0 k xs (VApp f vs) = do vs <- mapM (occurCheck i0 k xs) vs
|
||||
return (VApp f vs)
|
||||
occurCheck i0 k xs (VLit l) = return (VLit l)
|
||||
occurCheck i0 k xs (VMeta i env vs) = do if i == i0
|
||||
then raiseTypeMatchError
|
||||
else return ()
|
||||
mv <- getMeta i
|
||||
funs <- getFuns
|
||||
case mv of
|
||||
MBound e -> occurCheck i0 k xs (apply funs env e vs)
|
||||
MGuarded e _ _ -> occurCheck i0 k xs (apply funs env e vs)
|
||||
MUnbound scopei _ | scopeSize scopei > k -> raiseTypeMatchError
|
||||
| otherwise -> do vs <- mapM (occurCheck i0 k xs) vs
|
||||
return (VMeta i env vs)
|
||||
occurCheck i0 k xs (VSusp i env vs cnt) = do addConstraint i0 i env vs (\v -> occurCheck i0 k xs (cnt v) >> return ())
|
||||
return (VSusp i env vs cnt)
|
||||
occurCheck i0 k xs (VGen i vs) = case List.findIndex (==i) xs of
|
||||
Just i -> do vs <- mapM (occurCheck i0 k xs) vs
|
||||
return (VGen i vs)
|
||||
Nothing -> raiseTypeMatchError
|
||||
occurCheck i0 k xs (VClosure env e) = do env <- mapM (occurCheck i0 k xs) env
|
||||
return (VClosure env e)
|
||||
|
||||
|
||||
-----------------------------------------------------------
|
||||
-- check for meta variables that still have to be resolved
|
||||
-----------------------------------------------------------
|
||||
|
||||
checkResolvedMetaStore :: Scope -> Expr -> TcM ()
|
||||
checkResolvedMetaStore scope e = TcM (\abstr metaid ms ->
|
||||
let xs = [i | (i,mv) <- IntMap.toList ms, not (isResolved mv)]
|
||||
in if List.null xs
|
||||
then Ok metaid ms ()
|
||||
else Fail (UnresolvedMetaVars (scopeVars scope) e xs))
|
||||
where
|
||||
isResolved (MUnbound _ []) = True
|
||||
isResolved (MGuarded _ _ _) = True
|
||||
isResolved (MBound _) = True
|
||||
isResolved _ = False
|
||||
|
||||
-----------------------------------------------------
|
||||
-- evalType
|
||||
-----------------------------------------------------
|
||||
|
||||
evalType :: Int -> TType -> TcM Type
|
||||
evalType k (TTyp delta ty) = do funs <- getFuns
|
||||
refineType (evalTy funs k delta ty)
|
||||
where
|
||||
evalTy sig k delta (DTyp hyps cat es) =
|
||||
let ((k1,delta1),hyps1) = mapAccumL (evalHypo sig) (k,delta) hyps
|
||||
in DTyp hyps1 cat (List.map (normalForm sig k1 delta1) es)
|
||||
|
||||
evalHypo sig (k,delta) (b,x,ty) =
|
||||
if x == wildCId
|
||||
then ((k, delta),(b,x,evalTy sig k delta ty))
|
||||
else ((k+1,(VGen k []):delta),(b,x,evalTy sig k delta ty))
|
||||
|
||||
|
||||
-----------------------------------------------------
|
||||
-- refinement
|
||||
-----------------------------------------------------
|
||||
|
||||
refineExpr :: Expr -> TcM Expr
|
||||
refineExpr e = TcM (\abstr metaid ms -> Ok metaid ms (refineExpr_ ms e))
|
||||
|
||||
refineExpr_ ms e = refine e
|
||||
where
|
||||
refine (EAbs b x e) = EAbs b x (refine e)
|
||||
refine (EApp e1 e2) = EApp (refine e1) (refine e2)
|
||||
refine (ELit l) = ELit l
|
||||
refine (EMeta i) = case IntMap.lookup i ms of
|
||||
Just (MBound e ) -> refine e
|
||||
Just (MGuarded e _ _) -> refine e
|
||||
_ -> EMeta i
|
||||
refine (EFun f) = EFun f
|
||||
refine (EVar i) = EVar i
|
||||
refine (ETyped e ty) = ETyped (refine e) (refineType_ ms ty)
|
||||
refine (EImplArg e) = EImplArg (refine e)
|
||||
|
||||
refineType :: Type -> TcM Type
|
||||
refineType ty = TcM (\abstr metaid ms -> Ok metaid ms (refineType_ ms ty))
|
||||
|
||||
refineType_ ms (DTyp hyps cat es) = DTyp [(b,x,refineType_ ms ty) | (b,x,ty) <- hyps] cat (List.map (refineExpr_ ms) es)
|
||||
|
||||
value2expr sig i (VApp f vs) = foldl EApp (EFun f) (List.map (value2expr sig i) vs)
|
||||
value2expr sig i (VGen j vs) = foldl EApp (EVar (i-j-1)) (List.map (value2expr sig i) vs)
|
||||
value2expr sig i (VMeta j env vs) = foldl EApp (EMeta j) (List.map (value2expr sig i) vs)
|
||||
value2expr sig i (VSusp j env vs k) = value2expr sig i (k (VGen j vs))
|
||||
value2expr sig i (VLit l) = ELit l
|
||||
value2expr sig i (VClosure env (EAbs b x e)) = EAbs b x (value2expr sig (i+1) (eval sig ((VGen i []):env) e))
|
||||
353
src/runtime/haskell/PGF/VisualizeTree.hs
Normal file
353
src/runtime/haskell/PGF/VisualizeTree.hs
Normal file
@@ -0,0 +1,353 @@
|
||||
----------------------------------------------------------------------
|
||||
-- |
|
||||
-- Module : VisualizeTree
|
||||
-- Maintainer : AR
|
||||
-- Stability : (stable)
|
||||
-- Portability : (portable)
|
||||
--
|
||||
-- > CVS $Date:
|
||||
-- > CVS $Author:
|
||||
-- > CVS $Revision:
|
||||
--
|
||||
-- Print a graph of an abstract syntax tree in Graphviz DOT format
|
||||
-- Based on BB's VisualizeGrammar
|
||||
-- FIXME: change this to use GF.Visualization.Graphviz,
|
||||
-- instead of rolling its own.
|
||||
-----------------------------------------------------------------------------
|
||||
|
||||
module PGF.VisualizeTree ( graphvizAbstractTree
|
||||
, graphvizParseTree
|
||||
, graphvizDependencyTree
|
||||
, graphvizAlignment
|
||||
, tree2mk
|
||||
, getDepLabels
|
||||
, PosText(..), readPosText
|
||||
) where
|
||||
|
||||
import PGF.CId (CId,showCId,pCId,mkCId)
|
||||
import PGF.Data
|
||||
import PGF.Tree
|
||||
import PGF.Expr (showExpr)
|
||||
import PGF.Linearize
|
||||
import PGF.Macros (lookValCat)
|
||||
|
||||
import qualified Data.Map as Map
|
||||
import Data.List (intersperse,nub,isPrefixOf,sort,sortBy)
|
||||
import Data.Char (isDigit)
|
||||
import qualified Text.ParserCombinators.ReadP as RP
|
||||
|
||||
import Debug.Trace
|
||||
|
||||
graphvizAbstractTree :: PGF -> (Bool,Bool) -> Expr -> String
|
||||
graphvizAbstractTree pgf funscats = prGraph False . tree2graph pgf funscats . expr2tree
|
||||
|
||||
tree2graph :: PGF -> (Bool,Bool) -> Tree -> [String]
|
||||
tree2graph pgf (funs,cats) = prf [] where
|
||||
prf ps t = let (nod,lab) = prn ps t in
|
||||
(nod ++ " [label = " ++ lab ++ ", style = \"solid\", shape = \"plaintext\"] ;") :
|
||||
case t of
|
||||
Fun cid trees ->
|
||||
[ pra (j:ps) nod t | (j,t) <- zip [0..] trees] ++
|
||||
concat [prf (j:ps) t | (j,t) <- zip [0..] trees]
|
||||
Abs xs (Fun cid trees) ->
|
||||
[ pra (j:ps) nod t | (j,t) <- zip [0..] trees] ++
|
||||
concat [prf (j:ps) t | (j,t) <- zip [0..] trees]
|
||||
_ -> []
|
||||
prn ps t = case t of
|
||||
Fun cid _ ->
|
||||
let
|
||||
fun = if funs then showCId cid else ""
|
||||
cat = if cats then prCat cid else ""
|
||||
colon = if funs && cats then " : " else ""
|
||||
lab = "\"" ++ fun ++ colon ++ cat ++ "\""
|
||||
in (show(show (ps :: [Int])),lab)
|
||||
Abs bs tree ->
|
||||
let fun = case tree of
|
||||
Fun cid _ -> Fun cid []
|
||||
_ -> tree
|
||||
in (show(show (ps :: [Int])),"\"" ++ esc (prTree (Abs bs fun)) ++ "\"")
|
||||
_ -> (show(show (ps :: [Int])),"\"" ++ esc (prTree t) ++ "\"")
|
||||
pra i nod t = nod ++ arr ++ fst (prn i t) ++ " [style = \"solid\"];"
|
||||
arr = " -- " -- if digr then " -> " else " -- "
|
||||
prCat = showCId . lookValCat pgf
|
||||
esc = concatMap (\c -> if c =='\\' then [c,c] else [c]) --- escape backslash in abstracts
|
||||
|
||||
prGraph digr ns = concat $ map (++"\n") $ [graph ++ "{\n"] ++ ns ++ ["}"] where
|
||||
graph = if digr then "digraph" else "graph"
|
||||
|
||||
|
||||
-- replace each non-atomic constructor with mkC, where C is the val cat
|
||||
tree2mk :: PGF -> Expr -> String
|
||||
tree2mk pgf = showExpr [] . tree2expr . t2m . expr2tree where
|
||||
t2m t = case t of
|
||||
Fun cid [] -> t
|
||||
Fun cid ts -> Fun (mk cid) (map t2m ts)
|
||||
_ -> t
|
||||
mk = mkCId . ("mk" ++) . showCId . lookValCat pgf
|
||||
|
||||
-- dependency trees from Linearize.linearizeMark
|
||||
|
||||
graphvizDependencyTree :: String -> Bool -> Maybe Labels -> Maybe String -> PGF -> CId -> Expr -> String
|
||||
graphvizDependencyTree format debug mlab ms pgf lang exp = case format of
|
||||
"malt" -> unlines (lin2dep format)
|
||||
"malt_input" -> unlines (lin2dep format)
|
||||
_ -> prGraph True (lin2dep format)
|
||||
|
||||
where
|
||||
|
||||
lin2dep format = trace (ifd (show sortedNodes ++ show nodeWords)) $ case format of
|
||||
"malt" -> map (concat . intersperse "\t") wnodes
|
||||
"malt_input" -> map (concat . intersperse "\t" . take 6) wnodes
|
||||
_ -> prelude ++ nodes ++ links
|
||||
|
||||
ifd s = if debug then s else []
|
||||
|
||||
pot = readPosText $ head $ linearizesMark pgf lang exp
|
||||
---- use Just str if you have str to match against
|
||||
|
||||
prelude = ["rankdir=LR ;", "node [shape = plaintext] ;"]
|
||||
|
||||
nodes = map mkNode nodeWords
|
||||
mkNode (i,((_,p),ss)) =
|
||||
node p ++ " [label = \"" ++ show i ++ ". " ++ ifd (show p) ++ unwords ss ++ "\"] ;"
|
||||
nodeWords = (0,((mkCId "",[]),["ROOT"])) : zip [1..] [((f,p),w)|
|
||||
((Just f,p),w) <- wlins pot]
|
||||
|
||||
links = map mkLink thelinks
|
||||
thelinks = [(word y, x, label tr y x) |
|
||||
(_,((f,x),_)) <- tail nodeWords,
|
||||
let y = dominant x]
|
||||
mkLink (x,y,l) = node x ++ " -> " ++ node y ++ " [label = \"" ++ l ++ "\"] ;"
|
||||
node = show . show
|
||||
|
||||
dominant x = case x of
|
||||
[] -> x
|
||||
_ | not (x == hx) -> hx
|
||||
_ -> dominant (init x)
|
||||
where
|
||||
hx = headArg (init x) tr x
|
||||
|
||||
headArg x0 tr x = case (tr,x) of
|
||||
(Fun f [],[_]) -> x0 ---- ??
|
||||
(Fun f ts,[_]) -> x0 ++ [getHead (length ts - 1) f]
|
||||
(Fun f ts,i:y) -> headArg x0 (ts !! i) y
|
||||
_ -> x0 ----
|
||||
|
||||
label tr y x = case span (uncurry (==)) (zip y x) of
|
||||
(xys,(_,i):_) -> getLabel i (funAt tr (map fst xys))
|
||||
_ -> "" ----
|
||||
|
||||
funAt tr x = case (tr,x) of
|
||||
(Fun f _ ,[]) -> f
|
||||
(Fun f ts,i:y) -> funAt (ts !! i) y
|
||||
_ -> mkCId (prTree tr) ----
|
||||
|
||||
word x = if elem x sortedNodes then x else
|
||||
let x' = headArg x tr (x ++[0]) in
|
||||
if x' == x then [] else word x'
|
||||
|
||||
tr = expr2tree exp
|
||||
sortedNodes = [p | (_,((_,p),_)) <- nodeWords]
|
||||
|
||||
labels = maybe Map.empty id mlab
|
||||
getHead i f = case Map.lookup f labels of
|
||||
Just ls -> length $ takeWhile (/= "head") ls
|
||||
_ -> i
|
||||
getLabel i f = case Map.lookup f labels of
|
||||
Just ls | length ls > i -> ifd (showCId f ++ "#" ++ show i ++ "=") ++ ls !! i
|
||||
_ -> showCId f ++ "#" ++ show i
|
||||
|
||||
-- to generate CoNLL format for MaltParser
|
||||
nodeMap :: Map.Map [Int] Int
|
||||
nodeMap = Map.fromList [(p,i) | (i,((_,p),_)) <- nodeWords]
|
||||
|
||||
arcMap :: Map.Map [Int] ([Int],String)
|
||||
arcMap = Map.fromList [(y,(x,l)) | (x,y,l) <- thelinks]
|
||||
|
||||
lookDomLab p = case Map.lookup p arcMap of
|
||||
Just (q,l) -> (maybe 0 id (Map.lookup q nodeMap), if null l then rootlabel else l)
|
||||
_ -> (0,rootlabel)
|
||||
|
||||
wnodes = [[show i, maltws ws, showCId fun, pos, pos, morph, show dom, lab, unspec, unspec] |
|
||||
(i, ((fun,p),ws)) <- tail nodeWords,
|
||||
let pos = showCId $ lookValCat pgf fun,
|
||||
let morph = unspec,
|
||||
let (dom,lab) = lookDomLab p
|
||||
]
|
||||
maltws = concat . intersperse "+" . words . unwords -- no spaces in column 2
|
||||
unspec = "_"
|
||||
rootlabel = "ROOT"
|
||||
|
||||
type Labels = Map.Map CId [String]
|
||||
|
||||
getDepLabels :: [String] -> Labels
|
||||
getDepLabels ss = Map.fromList [(mkCId f,ls) | f:ls <- map words ss]
|
||||
|
||||
|
||||
-- parse trees from Linearize.linearizeMark
|
||||
---- nubrec and domins are quadratic, but could be (n log n)
|
||||
|
||||
graphvizParseTree :: PGF -> CId -> Expr -> String
|
||||
graphvizParseTree pgf lang = prGraph False . lin2tree pgf . linMark where
|
||||
linMark = head . linearizesMark pgf lang
|
||||
---- use Just str if you have str to match against
|
||||
|
||||
lin2tree pgf s = trace s $ prelude ++ nodes ++ links where
|
||||
|
||||
prelude = ["rankdir=BU ;", "node [shape = record, color = white] ;"]
|
||||
|
||||
nodeRecs = zip [0..]
|
||||
(nub (filter (not . null) (nlins [postext] ++ [leaves postext])))
|
||||
nlins pts =
|
||||
nubrec [] $ [(p,cat f) | T (Just f, p) _ <- pts] :
|
||||
concatMap nlins [ts | T _ ts <- pts]
|
||||
leaves pt = [(p++[j],s) | (j,(p,s)) <-
|
||||
zip [9990..] [(p,s) | ((_,p),ss) <- wlins pt, s <- ss]]
|
||||
|
||||
nubrec es rs = case rs of
|
||||
r:rr -> let r' = filter (not . flip elem es) (nub r)
|
||||
in r' : nubrec (r' ++ es) rr
|
||||
_ -> rs
|
||||
|
||||
nodes = map mkStruct nodeRecs
|
||||
|
||||
mkStruct (i,cs) = struct i ++ "[label = \"" ++ fields cs ++ "\"] ;"
|
||||
cat = showCId . lookValCat pgf
|
||||
fields cs = concat (intersperse "|" [ mtag (showp p) ++ c | (p,c) <- cs])
|
||||
struct i = "struct" ++ show i
|
||||
|
||||
links = map mkEdge domins
|
||||
domins = nub [((i,x),(j,y)) |
|
||||
(i,xs) <- nodeRecs, (j,ys) <- nodeRecs,
|
||||
x <- xs, y <- ys, dominates x y]
|
||||
dominates (p,x) (q,y) = not (null q) && p == init q
|
||||
mkEdge ((i,x),(j,y)) =
|
||||
struct i ++ ":n" ++ uncommas (showp (fst x)) ++ ":s -- " ++
|
||||
struct j ++ ":n" ++ uncommas (showp (fst y)) ++ ":n ;"
|
||||
|
||||
postext = readPosText s
|
||||
|
||||
-- auxiliaries for graphviz syntax
|
||||
struct i = "struct" ++ show i
|
||||
mark (j,n) = "n" ++ show j ++ "a" ++ uncommas n
|
||||
uncommas = map (\c -> if c==',' then 'c' else c)
|
||||
tag s = "<" ++ s ++ ">"
|
||||
showp = init . tail . show
|
||||
mtag = tag . ('n':) . uncommas
|
||||
|
||||
-- word alignments from Linearize.linearizesMark
|
||||
-- words are chunks like {[0,1,1,0] old}
|
||||
|
||||
graphvizAlignment :: PGF -> Expr -> String
|
||||
graphvizAlignment pgf = prGraph True . lin2graph . linsMark where
|
||||
linsMark t = [s | la <- cncnames pgf, s <- take 1 (linearizesMark pgf la t)]
|
||||
|
||||
lin2graph :: [String] -> [String]
|
||||
lin2graph ss = trace (show ss) $ prelude ++ nodes ++ links
|
||||
|
||||
where
|
||||
|
||||
prelude = ["rankdir=LR ;", "node [shape = record] ;"]
|
||||
|
||||
nlins :: [(Int,[((Int,String),String)])]
|
||||
nlins = [(i, [((j,showp p),unw ws) | (j,((_,p),ws)) <- zip [0..] ws]) |
|
||||
(i,ws) <- zip [0..] (map (wlins . readPosText) ss)]
|
||||
|
||||
unw = concat . intersperse "\\ " -- space escape in graphviz
|
||||
|
||||
nodes = map mkStruct nlins
|
||||
|
||||
mkStruct (i, ws) = struct i ++ "[label = \"" ++ fields ws ++ "\"] ;"
|
||||
|
||||
fields ws = concat (intersperse "|" [tag (mark m) ++ " " ++ w | (m,w) <- ws])
|
||||
|
||||
links = nub $ concatMap mkEdge (init nlins)
|
||||
|
||||
mkEdge (i,lin) = let lin' = snd (nlins !! (i+1)) in -- next lin in the list
|
||||
[edge i v w | (v@(_,p),_) <- lin, (w@(_,q),_) <- lin', p == q]
|
||||
|
||||
edge i v w =
|
||||
struct i ++ ":" ++ mark v ++ ":e -> " ++ struct (i+1) ++ ":" ++ mark w ++ ":w ;"
|
||||
{-
|
||||
alignmentData :: PGF -> [Expr] -> Map.Map String (Map.Map String Double)
|
||||
alignmentData pgf = mkStat . concatMap (mkAlign . linsMark) where
|
||||
linsMark t =
|
||||
[s | la <- take 2 (cncnames pgf), s <- take 1 (linearizesMark pgf la t)]
|
||||
|
||||
mkStat :: [(String,String)] -> Map.Map String (Map.Map String Double)
|
||||
mkStat =
|
||||
|
||||
mkAlign :: [String] -> [(String,String)]
|
||||
mkAlign ss =
|
||||
|
||||
nlins :: [(Int,[((Int,String),String)])]
|
||||
nlins = [(i, [((j,showp p),unw ws) | (j,((_,p),ws)) <- zip [0..] vs]) |
|
||||
(i,vs) <- zip [0..] (map (wlins . readPosText) ss)]
|
||||
|
||||
nodes = map mkStruct nlins
|
||||
|
||||
mkStruct (i, ws) = struct i ++ "[label = \"" ++ fields ws ++ "\"] ;"
|
||||
|
||||
fields ws = concat (intersperse "|" [tag (mark m) ++ " " ++ w | (m,w) <- ws])
|
||||
|
||||
links = nub $ concatMap mkEdge (init nlins)
|
||||
|
||||
mkEdge (i,lin) = let lin' = snd (nlins !! (i+1)) in -- next lin in the list
|
||||
[edge i v w | (v@(_,p),_) <- lin, (w@(_,q),_) <- lin', p == q]
|
||||
|
||||
edge i v w =
|
||||
struct i ++ ":" ++ mark v ++ ":e -> " ++ struct (i+1) ++ ":" ++ mark w ++ ":w ;"
|
||||
-}
|
||||
|
||||
wlins :: PosText -> [((Maybe CId,[Int]),[String])]
|
||||
wlins pt = case pt of
|
||||
T p pts -> concatMap (lins p) pts
|
||||
M ws -> if null ws then [] else [((Nothing,[]),ws)]
|
||||
where
|
||||
lins p pt = case pt of
|
||||
T q pts -> concatMap (lins q) pts
|
||||
M ws -> if null ws then [] else [(p,ws)]
|
||||
|
||||
data PosText =
|
||||
T (Maybe CId,[Int]) [PosText]
|
||||
| M [String]
|
||||
deriving Show
|
||||
|
||||
readPosText :: String -> PosText
|
||||
readPosText = fst . head . (RP.readP_to_S pPosText) where
|
||||
pPosText = do
|
||||
RP.char '(' >> RP.skipSpaces
|
||||
p <- pPos
|
||||
RP.skipSpaces
|
||||
ts <- RP.many pPosText
|
||||
RP.char ')' >> RP.skipSpaces
|
||||
return (T p ts)
|
||||
RP.<++ do
|
||||
ws <- RP.sepBy1 (RP.munch1 (flip notElem "()")) (RP.char ' ')
|
||||
return (M ws)
|
||||
pPos = do
|
||||
fun <- (RP.char '(' >> pCId >>= \f -> RP.char ',' >> (return $ Just f))
|
||||
RP.<++ (return Nothing)
|
||||
RP.char '[' >> RP.skipSpaces
|
||||
is <- RP.sepBy (RP.munch1 isDigit) (RP.char ',')
|
||||
RP.char ']' >> RP.skipSpaces
|
||||
RP.char ')' RP.<++ return ' '
|
||||
return (fun,map read is)
|
||||
|
||||
|
||||
{-
|
||||
digraph{
|
||||
rankdir ="LR" ;
|
||||
node [shape = record] ;
|
||||
|
||||
struct1 [label = "<f0> this|<f1> very|<f2> intelligent|<f3> man"] ;
|
||||
struct2 [label = "<f0> cet|<f1> homme|<f2> tres|<f3> intelligent|<f4> ci"] ;
|
||||
|
||||
struct1:f0 -> struct2:f0 ;
|
||||
struct1:f1 -> struct2:f2 ;
|
||||
struct1:f2 -> struct2:f3 ;
|
||||
struct1:f3 -> struct2:f1 ;
|
||||
struct1:f0 -> struct2:f4 ;
|
||||
}
|
||||
-}
|
||||
|
||||
Reference in New Issue
Block a user