forked from GitHub/gf-core
Use Data.Text instead of String. Rename Abstr to Abstract, Concr to Concrete.
This commit is contained in:
@@ -1,6 +1,8 @@
|
||||
-- | Linearisation-only PGF format
|
||||
-- Closely follows description in Section 2 of Angelov, Bringert, Ranta (2009)
|
||||
-- "PGF: A Portable Run-Time Format for Type-Theoretical Grammars"
|
||||
{-# LANGUAGE OverloadedStrings #-}
|
||||
|
||||
-- | Linearisation-only grammar format.
|
||||
-- Closely follows description in Section 2 of Angelov, Bringert, Ranta (2009):
|
||||
-- "PGF: A Portable Run-Time Format for Type-Theoretical Grammars".
|
||||
module LPGF where
|
||||
|
||||
import PGF (Language)
|
||||
@@ -9,9 +11,9 @@ import PGF.Expr (Expr)
|
||||
import PGF.Tree (Tree (..), expr2tree, prTree)
|
||||
|
||||
import Data.Binary (Binary, get, put, encodeFile, decodeFile)
|
||||
import Data.Char (toUpper)
|
||||
import Data.List (isPrefixOf)
|
||||
import qualified Data.Map as Map
|
||||
import Data.Text (Text)
|
||||
import qualified Data.Text as T
|
||||
import Text.Printf (printf)
|
||||
|
||||
import Prelude hiding ((!!))
|
||||
@@ -20,18 +22,18 @@ import qualified Prelude
|
||||
-- | Linearisation-only PGF
|
||||
data LPGF = LPGF {
|
||||
absname :: CId,
|
||||
abstract :: Abstr,
|
||||
concretes :: Map.Map CId Concr
|
||||
abstract :: Abstract,
|
||||
concretes :: Map.Map CId Concrete
|
||||
} deriving (Show)
|
||||
|
||||
-- | Abstract syntax
|
||||
data Abstr = Abstr {
|
||||
-- | Abstract syntax (currently empty)
|
||||
data Abstract = Abstract {
|
||||
} deriving (Show)
|
||||
|
||||
-- | Concrete syntax
|
||||
data Concr = Concr {
|
||||
-- lincats :: Map.Map CId LinType, -- ^ assigning a linearization type to each category
|
||||
lins :: Map.Map CId LinFun -- ^ assigning a linearization function to each function
|
||||
data Concrete = Concrete {
|
||||
-- lincats :: Map.Map CId LinType, -- ^ a linearization type for each category
|
||||
lins :: Map.Map CId LinFun -- ^ a linearization function for each function
|
||||
} deriving (Show)
|
||||
|
||||
-- | Abstract function type
|
||||
@@ -49,15 +51,15 @@ data Concr = Concr {
|
||||
data LinFun =
|
||||
-- Additions
|
||||
LFError String -- ^ a runtime error, should probably not be supported at all
|
||||
| LFBind
|
||||
| LFSpace
|
||||
| LFCapit
|
||||
| LFAllCapit
|
||||
| LFPre [([String], LinFun)] LinFun
|
||||
| LFBind -- ^ join adjacent tokens
|
||||
| LFSpace -- ^ space between adjacent tokens
|
||||
| LFCapit -- ^ capitalise next character
|
||||
| LFAllCapit -- ^ capitalise next word
|
||||
| LFPre [([Text], LinFun)] LinFun
|
||||
|
||||
-- From original definition in paper
|
||||
| LFEmpty
|
||||
| LFToken String
|
||||
| LFToken Text
|
||||
| LFConcat LinFun LinFun
|
||||
| LFInt Int
|
||||
| LFTuple [LinFun]
|
||||
@@ -80,15 +82,15 @@ instance Binary LPGF where
|
||||
concretes = concs
|
||||
}
|
||||
|
||||
instance Binary Abstr where
|
||||
instance Binary Abstract where
|
||||
put abs = return ()
|
||||
get = return $ Abstr {}
|
||||
get = return $ Abstract {}
|
||||
|
||||
instance Binary Concr where
|
||||
instance Binary Concrete where
|
||||
put concr = put (lins concr)
|
||||
get = do
|
||||
ls <- get
|
||||
return $ Concr {
|
||||
return $ Concrete {
|
||||
lins = ls
|
||||
}
|
||||
|
||||
@@ -105,16 +107,24 @@ encodeFile = Data.Binary.encodeFile
|
||||
readLPGF :: FilePath -> IO LPGF
|
||||
readLPGF = Data.Binary.decodeFile
|
||||
|
||||
-- | Main linearize function
|
||||
-- | Main linearize function, to 'String'
|
||||
linearize :: LPGF -> Language -> Expr -> String
|
||||
linearize lpgf lang =
|
||||
linearize lpgf lang expr = T.unpack $ linearizeText lpgf lang expr
|
||||
|
||||
-- | Main linearize function, to 'Data.Text.Text'
|
||||
linearizeText :: LPGF -> Language -> Expr -> Text
|
||||
linearizeText lpgf lang =
|
||||
case Map.lookup lang (concretes lpgf) of
|
||||
Just concr -> linearizeConcr concr
|
||||
Just concr -> linearizeConcreteText concr
|
||||
Nothing -> error $ printf "Unknown language: %s" (showCId lang)
|
||||
|
||||
-- | Language-specific linearize function
|
||||
linearizeConcr :: Concr -> Expr -> String
|
||||
linearizeConcr concr expr = lin2string $ lin (expr2tree expr)
|
||||
-- | Language-specific linearize function, to 'String'
|
||||
linearizeConcrete :: Concrete -> Expr -> String
|
||||
linearizeConcrete concr expr = T.unpack $ linearizeConcreteText concr expr
|
||||
|
||||
-- | Language-specific linearize function, to 'Data.Text.Text'
|
||||
linearizeConcreteText :: Concrete -> Expr -> Text
|
||||
linearizeConcreteText concr expr = lin2string $ lin (expr2tree expr)
|
||||
where
|
||||
lin :: Tree -> LinFun
|
||||
lin tree = case tree of
|
||||
@@ -127,7 +137,7 @@ linearizeConcr concr expr = lin2string $ lin (expr2tree expr)
|
||||
-- | Evaluation context is a sequence of terms
|
||||
type Context = [LinFun]
|
||||
|
||||
-- | Operational semantics, Table 2
|
||||
-- | Operational semantics
|
||||
eval :: Context -> LinFun -> LinFun
|
||||
eval cxt t = case t of
|
||||
LFError err -> error err
|
||||
@@ -150,7 +160,7 @@ eval cxt t = case t of
|
||||
_ -> t
|
||||
|
||||
-- | Turn concrete syntax terms into an actual string
|
||||
lin2string :: LinFun -> String
|
||||
lin2string :: LinFun -> Text
|
||||
lin2string l = case l of
|
||||
LFEmpty -> ""
|
||||
LFBind -> "" -- when encountered at beginning/end
|
||||
@@ -160,15 +170,16 @@ lin2string l = case l of
|
||||
LFConcat (LFPre pts df) l2 -> lin2string $ LFConcat l1 l2
|
||||
where
|
||||
l2' = lin2string l2
|
||||
matches = [ l | (pfxs, l) <- pts, any (`isPrefixOf` l2') pfxs ]
|
||||
matches = [ l | (pfxs, l) <- pts, any (`T.isPrefixOf` l2') pfxs ]
|
||||
l1 = if null matches then df else head matches
|
||||
LFConcat l1 (LFConcat LFBind l2) -> lin2string l1 ++ lin2string l2
|
||||
LFConcat l1 (LFConcat LFBind l2) -> lin2string l1 `T.append` lin2string l2
|
||||
LFConcat l1 (LFConcat LFSpace l2) -> lin2string $ LFConcat l1 l2
|
||||
LFConcat LFCapit l2 -> let l = lin2string l2 in toUpper (head l) : tail l
|
||||
LFConcat LFAllCapit l2 -> let tks = words (lin2string l2) in unwords $ map toUpper (head tks) : tail tks
|
||||
LFConcat l1 l2 -> unwords $ filter (not.null) [lin2string l1, lin2string l2]
|
||||
x -> printf "[%s]" (show x)
|
||||
LFConcat LFCapit l2 -> let l = lin2string l2 in T.toUpper (T.take 1 l) `T.append` T.drop 1 l
|
||||
LFConcat LFAllCapit l2 -> let tks = T.words (lin2string l2) in T.unwords $ T.toUpper (head tks) : tail tks
|
||||
LFConcat l1 l2 -> T.unwords $ filter (not.T.null) [lin2string l1, lin2string l2]
|
||||
x -> T.pack $ printf "[%s]" (show x)
|
||||
|
||||
-- | List indexing with more verbose error messages
|
||||
(!!) :: (Show a) => [a] -> Int -> a
|
||||
(!!) xs i
|
||||
| i < 0 = error $ printf "!!: index %d too small for list: %s" i (show xs)
|
||||
|
||||
Reference in New Issue
Block a user