1
0
forked from GitHub/gf-core

Use Data.Text instead of String. Rename Abstr to Abstract, Concr to Concrete.

This commit is contained in:
John J. Camilleri
2021-02-16 16:04:40 +01:00
parent d394cacddf
commit 398b294734
4 changed files with 63 additions and 49 deletions

View File

@@ -1,6 +1,8 @@
-- | Linearisation-only PGF format
-- Closely follows description in Section 2 of Angelov, Bringert, Ranta (2009)
-- "PGF: A Portable Run-Time Format for Type-Theoretical Grammars"
{-# LANGUAGE OverloadedStrings #-}
-- | Linearisation-only grammar format.
-- Closely follows description in Section 2 of Angelov, Bringert, Ranta (2009):
-- "PGF: A Portable Run-Time Format for Type-Theoretical Grammars".
module LPGF where
import PGF (Language)
@@ -9,9 +11,9 @@ import PGF.Expr (Expr)
import PGF.Tree (Tree (..), expr2tree, prTree)
import Data.Binary (Binary, get, put, encodeFile, decodeFile)
import Data.Char (toUpper)
import Data.List (isPrefixOf)
import qualified Data.Map as Map
import Data.Text (Text)
import qualified Data.Text as T
import Text.Printf (printf)
import Prelude hiding ((!!))
@@ -20,18 +22,18 @@ import qualified Prelude
-- | Linearisation-only PGF
data LPGF = LPGF {
absname :: CId,
abstract :: Abstr,
concretes :: Map.Map CId Concr
abstract :: Abstract,
concretes :: Map.Map CId Concrete
} deriving (Show)
-- | Abstract syntax
data Abstr = Abstr {
-- | Abstract syntax (currently empty)
data Abstract = Abstract {
} deriving (Show)
-- | Concrete syntax
data Concr = Concr {
-- lincats :: Map.Map CId LinType, -- ^ assigning a linearization type to each category
lins :: Map.Map CId LinFun -- ^ assigning a linearization function to each function
data Concrete = Concrete {
-- lincats :: Map.Map CId LinType, -- ^ a linearization type for each category
lins :: Map.Map CId LinFun -- ^ a linearization function for each function
} deriving (Show)
-- | Abstract function type
@@ -49,15 +51,15 @@ data Concr = Concr {
data LinFun =
-- Additions
LFError String -- ^ a runtime error, should probably not be supported at all
| LFBind
| LFSpace
| LFCapit
| LFAllCapit
| LFPre [([String], LinFun)] LinFun
| LFBind -- ^ join adjacent tokens
| LFSpace -- ^ space between adjacent tokens
| LFCapit -- ^ capitalise next character
| LFAllCapit -- ^ capitalise next word
| LFPre [([Text], LinFun)] LinFun
-- From original definition in paper
| LFEmpty
| LFToken String
| LFToken Text
| LFConcat LinFun LinFun
| LFInt Int
| LFTuple [LinFun]
@@ -80,15 +82,15 @@ instance Binary LPGF where
concretes = concs
}
instance Binary Abstr where
instance Binary Abstract where
put abs = return ()
get = return $ Abstr {}
get = return $ Abstract {}
instance Binary Concr where
instance Binary Concrete where
put concr = put (lins concr)
get = do
ls <- get
return $ Concr {
return $ Concrete {
lins = ls
}
@@ -105,16 +107,24 @@ encodeFile = Data.Binary.encodeFile
readLPGF :: FilePath -> IO LPGF
readLPGF = Data.Binary.decodeFile
-- | Main linearize function
-- | Main linearize function, to 'String'
linearize :: LPGF -> Language -> Expr -> String
linearize lpgf lang =
linearize lpgf lang expr = T.unpack $ linearizeText lpgf lang expr
-- | Main linearize function, to 'Data.Text.Text'
linearizeText :: LPGF -> Language -> Expr -> Text
linearizeText lpgf lang =
case Map.lookup lang (concretes lpgf) of
Just concr -> linearizeConcr concr
Just concr -> linearizeConcreteText concr
Nothing -> error $ printf "Unknown language: %s" (showCId lang)
-- | Language-specific linearize function
linearizeConcr :: Concr -> Expr -> String
linearizeConcr concr expr = lin2string $ lin (expr2tree expr)
-- | Language-specific linearize function, to 'String'
linearizeConcrete :: Concrete -> Expr -> String
linearizeConcrete concr expr = T.unpack $ linearizeConcreteText concr expr
-- | Language-specific linearize function, to 'Data.Text.Text'
linearizeConcreteText :: Concrete -> Expr -> Text
linearizeConcreteText concr expr = lin2string $ lin (expr2tree expr)
where
lin :: Tree -> LinFun
lin tree = case tree of
@@ -127,7 +137,7 @@ linearizeConcr concr expr = lin2string $ lin (expr2tree expr)
-- | Evaluation context is a sequence of terms
type Context = [LinFun]
-- | Operational semantics, Table 2
-- | Operational semantics
eval :: Context -> LinFun -> LinFun
eval cxt t = case t of
LFError err -> error err
@@ -150,7 +160,7 @@ eval cxt t = case t of
_ -> t
-- | Turn concrete syntax terms into an actual string
lin2string :: LinFun -> String
lin2string :: LinFun -> Text
lin2string l = case l of
LFEmpty -> ""
LFBind -> "" -- when encountered at beginning/end
@@ -160,15 +170,16 @@ lin2string l = case l of
LFConcat (LFPre pts df) l2 -> lin2string $ LFConcat l1 l2
where
l2' = lin2string l2
matches = [ l | (pfxs, l) <- pts, any (`isPrefixOf` l2') pfxs ]
matches = [ l | (pfxs, l) <- pts, any (`T.isPrefixOf` l2') pfxs ]
l1 = if null matches then df else head matches
LFConcat l1 (LFConcat LFBind l2) -> lin2string l1 ++ lin2string l2
LFConcat l1 (LFConcat LFBind l2) -> lin2string l1 `T.append` lin2string l2
LFConcat l1 (LFConcat LFSpace l2) -> lin2string $ LFConcat l1 l2
LFConcat LFCapit l2 -> let l = lin2string l2 in toUpper (head l) : tail l
LFConcat LFAllCapit l2 -> let tks = words (lin2string l2) in unwords $ map toUpper (head tks) : tail tks
LFConcat l1 l2 -> unwords $ filter (not.null) [lin2string l1, lin2string l2]
x -> printf "[%s]" (show x)
LFConcat LFCapit l2 -> let l = lin2string l2 in T.toUpper (T.take 1 l) `T.append` T.drop 1 l
LFConcat LFAllCapit l2 -> let tks = T.words (lin2string l2) in T.unwords $ T.toUpper (head tks) : tail tks
LFConcat l1 l2 -> T.unwords $ filter (not.T.null) [lin2string l1, lin2string l2]
x -> T.pack $ printf "[%s]" (show x)
-- | List indexing with more verbose error messages
(!!) :: (Show a) => [a] -> Int -> a
(!!) xs i
| i < 0 = error $ printf "!!: index %d too small for list: %s" i (show xs)