1
0
forked from GitHub/gf-core

Use Data.Text instead of String. Rename Abstr to Abstract, Concr to Concrete.

This commit is contained in:
John J. Camilleri
2021-02-16 16:04:40 +01:00
parent d394cacddf
commit 398b294734
4 changed files with 63 additions and 49 deletions

View File

@@ -86,7 +86,8 @@ Library
-- For compatability with ghc < 8 -- For compatability with ghc < 8
-- We need transformers-compat >= 0.6.3, but that is only in newer snapshots where it is redundant. -- We need transformers-compat >= 0.6.3, but that is only in newer snapshots where it is redundant.
transformers-compat, transformers-compat,
ghc-prim ghc-prim,
text
hs-source-dirs: src/runtime/haskell hs-source-dirs: src/runtime/haskell
other-modules: other-modules:
@@ -536,6 +537,7 @@ test-suite lpgf
process, process,
random, random,
terminfo, terminfo,
text,
time, time,
transformers-compat, transformers-compat,
unix, unix,

View File

@@ -18,12 +18,13 @@ import Data.Either (lefts, rights)
import Data.List (elemIndex, find, groupBy, sortBy) import Data.List (elemIndex, find, groupBy, sortBy)
import qualified Data.Map as Map import qualified Data.Map as Map
import Data.Maybe (catMaybes) import Data.Maybe (catMaybes)
import qualified Data.Text as T
import Text.Printf (printf) import Text.Printf (printf)
mkCanon2lpgf :: Options -> SourceGrammar -> ModuleName -> IOE LPGF mkCanon2lpgf :: Options -> SourceGrammar -> ModuleName -> IOE LPGF
mkCanon2lpgf opts gr am = do mkCanon2lpgf opts gr am = do
(an,abs) <- mkAbstr ab (an,abs) <- mkAbstract ab
cncs <- mapM mkConcr cncs cncs <- mapM mkConcrete cncs
let lpgf = LPGF { let lpgf = LPGF {
L.absname = an, L.absname = an,
L.abstract = abs, L.abstract = abs,
@@ -36,11 +37,11 @@ mkCanon2lpgf opts gr am = do
where where
canon@(C.Grammar ab cncs) = grammar2canonical opts am gr canon@(C.Grammar ab cncs) = grammar2canonical opts am gr
mkAbstr :: C.Abstract -> IOE (CId, L.Abstr) mkAbstract :: C.Abstract -> IOE (CId, L.Abstract)
mkAbstr (C.Abstract modId flags cats funs) = return (mdi2i modId, L.Abstr {}) mkAbstract (C.Abstract modId flags cats funs) = return (mdi2i modId, L.Abstract {})
mkConcr :: C.Concrete -> IOE (CId, L.Concr) mkConcrete :: C.Concrete -> IOE (CId, L.Concrete)
mkConcr (C.Concrete modId absModId flags params lincats lindefs) = do mkConcrete (C.Concrete modId absModId flags params lincats lindefs) = do
let let
paramMap = mkParamMap params paramMap = mkParamMap params
paramTuples = mkParamTuples params paramTuples = mkParamTuples params
@@ -61,9 +62,9 @@ mkCanon2lpgf opts gr am = do
return $ L.LFConcat v1' v2' return $ L.LFConcat v1' v2'
C.LiteralValue ll -> case ll of C.LiteralValue ll -> case ll of
C.FloatConstant f -> return $ L.LFToken (show f) C.FloatConstant f -> return $ L.LFToken $ T.pack $ show f
C.IntConstant i -> return $ L.LFToken (show i) C.IntConstant i -> return $ L.LFToken $ T.pack $ show i
C.StrConstant s -> return $ L.LFToken s C.StrConstant s -> return $ L.LFToken $ T.pack s
C.ErrorValue err -> return $ L.LFError err C.ErrorValue err -> return $ L.LFError err
@@ -132,7 +133,7 @@ mkCanon2lpgf opts gr am = do
C.PreValue pts df -> do C.PreValue pts df -> do
pts' <- forM pts $ \(pfxs, lv) -> do pts' <- forM pts $ \(pfxs, lv) -> do
lv' <- val2lin lv lv' <- val2lin lv
return (pfxs, lv') return (map T.pack pfxs, lv')
df' <- val2lin df df' <- val2lin df
return $ L.LFPre pts' df' return $ L.LFPre pts' df'
@@ -168,7 +169,7 @@ mkCanon2lpgf opts gr am = do
unless (null $ lefts es) (error $ unlines (lefts es)) unless (null $ lefts es) (error $ unlines (lefts es))
return (mdi2i modId, L.Concr { return (mdi2i modId, L.Concrete {
L.lins = lins L.lins = lins
}) })

View File

@@ -1,6 +1,8 @@
-- | Linearisation-only PGF format {-# LANGUAGE OverloadedStrings #-}
-- Closely follows description in Section 2 of Angelov, Bringert, Ranta (2009)
-- "PGF: A Portable Run-Time Format for Type-Theoretical Grammars" -- | Linearisation-only grammar format.
-- Closely follows description in Section 2 of Angelov, Bringert, Ranta (2009):
-- "PGF: A Portable Run-Time Format for Type-Theoretical Grammars".
module LPGF where module LPGF where
import PGF (Language) import PGF (Language)
@@ -9,9 +11,9 @@ import PGF.Expr (Expr)
import PGF.Tree (Tree (..), expr2tree, prTree) import PGF.Tree (Tree (..), expr2tree, prTree)
import Data.Binary (Binary, get, put, encodeFile, decodeFile) import Data.Binary (Binary, get, put, encodeFile, decodeFile)
import Data.Char (toUpper)
import Data.List (isPrefixOf)
import qualified Data.Map as Map import qualified Data.Map as Map
import Data.Text (Text)
import qualified Data.Text as T
import Text.Printf (printf) import Text.Printf (printf)
import Prelude hiding ((!!)) import Prelude hiding ((!!))
@@ -20,18 +22,18 @@ import qualified Prelude
-- | Linearisation-only PGF -- | Linearisation-only PGF
data LPGF = LPGF { data LPGF = LPGF {
absname :: CId, absname :: CId,
abstract :: Abstr, abstract :: Abstract,
concretes :: Map.Map CId Concr concretes :: Map.Map CId Concrete
} deriving (Show) } deriving (Show)
-- | Abstract syntax -- | Abstract syntax (currently empty)
data Abstr = Abstr { data Abstract = Abstract {
} deriving (Show) } deriving (Show)
-- | Concrete syntax -- | Concrete syntax
data Concr = Concr { data Concrete = Concrete {
-- lincats :: Map.Map CId LinType, -- ^ assigning a linearization type to each category -- lincats :: Map.Map CId LinType, -- ^ a linearization type for each category
lins :: Map.Map CId LinFun -- ^ assigning a linearization function to each function lins :: Map.Map CId LinFun -- ^ a linearization function for each function
} deriving (Show) } deriving (Show)
-- | Abstract function type -- | Abstract function type
@@ -49,15 +51,15 @@ data Concr = Concr {
data LinFun = data LinFun =
-- Additions -- Additions
LFError String -- ^ a runtime error, should probably not be supported at all LFError String -- ^ a runtime error, should probably not be supported at all
| LFBind | LFBind -- ^ join adjacent tokens
| LFSpace | LFSpace -- ^ space between adjacent tokens
| LFCapit | LFCapit -- ^ capitalise next character
| LFAllCapit | LFAllCapit -- ^ capitalise next word
| LFPre [([String], LinFun)] LinFun | LFPre [([Text], LinFun)] LinFun
-- From original definition in paper -- From original definition in paper
| LFEmpty | LFEmpty
| LFToken String | LFToken Text
| LFConcat LinFun LinFun | LFConcat LinFun LinFun
| LFInt Int | LFInt Int
| LFTuple [LinFun] | LFTuple [LinFun]
@@ -80,15 +82,15 @@ instance Binary LPGF where
concretes = concs concretes = concs
} }
instance Binary Abstr where instance Binary Abstract where
put abs = return () put abs = return ()
get = return $ Abstr {} get = return $ Abstract {}
instance Binary Concr where instance Binary Concrete where
put concr = put (lins concr) put concr = put (lins concr)
get = do get = do
ls <- get ls <- get
return $ Concr { return $ Concrete {
lins = ls lins = ls
} }
@@ -105,16 +107,24 @@ encodeFile = Data.Binary.encodeFile
readLPGF :: FilePath -> IO LPGF readLPGF :: FilePath -> IO LPGF
readLPGF = Data.Binary.decodeFile readLPGF = Data.Binary.decodeFile
-- | Main linearize function -- | Main linearize function, to 'String'
linearize :: LPGF -> Language -> Expr -> String linearize :: LPGF -> Language -> Expr -> String
linearize lpgf lang = linearize lpgf lang expr = T.unpack $ linearizeText lpgf lang expr
-- | Main linearize function, to 'Data.Text.Text'
linearizeText :: LPGF -> Language -> Expr -> Text
linearizeText lpgf lang =
case Map.lookup lang (concretes lpgf) of case Map.lookup lang (concretes lpgf) of
Just concr -> linearizeConcr concr Just concr -> linearizeConcreteText concr
Nothing -> error $ printf "Unknown language: %s" (showCId lang) Nothing -> error $ printf "Unknown language: %s" (showCId lang)
-- | Language-specific linearize function -- | Language-specific linearize function, to 'String'
linearizeConcr :: Concr -> Expr -> String linearizeConcrete :: Concrete -> Expr -> String
linearizeConcr concr expr = lin2string $ lin (expr2tree expr) linearizeConcrete concr expr = T.unpack $ linearizeConcreteText concr expr
-- | Language-specific linearize function, to 'Data.Text.Text'
linearizeConcreteText :: Concrete -> Expr -> Text
linearizeConcreteText concr expr = lin2string $ lin (expr2tree expr)
where where
lin :: Tree -> LinFun lin :: Tree -> LinFun
lin tree = case tree of lin tree = case tree of
@@ -127,7 +137,7 @@ linearizeConcr concr expr = lin2string $ lin (expr2tree expr)
-- | Evaluation context is a sequence of terms -- | Evaluation context is a sequence of terms
type Context = [LinFun] type Context = [LinFun]
-- | Operational semantics, Table 2 -- | Operational semantics
eval :: Context -> LinFun -> LinFun eval :: Context -> LinFun -> LinFun
eval cxt t = case t of eval cxt t = case t of
LFError err -> error err LFError err -> error err
@@ -150,7 +160,7 @@ eval cxt t = case t of
_ -> t _ -> t
-- | Turn concrete syntax terms into an actual string -- | Turn concrete syntax terms into an actual string
lin2string :: LinFun -> String lin2string :: LinFun -> Text
lin2string l = case l of lin2string l = case l of
LFEmpty -> "" LFEmpty -> ""
LFBind -> "" -- when encountered at beginning/end LFBind -> "" -- when encountered at beginning/end
@@ -160,15 +170,16 @@ lin2string l = case l of
LFConcat (LFPre pts df) l2 -> lin2string $ LFConcat l1 l2 LFConcat (LFPre pts df) l2 -> lin2string $ LFConcat l1 l2
where where
l2' = lin2string l2 l2' = lin2string l2
matches = [ l | (pfxs, l) <- pts, any (`isPrefixOf` l2') pfxs ] matches = [ l | (pfxs, l) <- pts, any (`T.isPrefixOf` l2') pfxs ]
l1 = if null matches then df else head matches l1 = if null matches then df else head matches
LFConcat l1 (LFConcat LFBind l2) -> lin2string l1 ++ lin2string l2 LFConcat l1 (LFConcat LFBind l2) -> lin2string l1 `T.append` lin2string l2
LFConcat l1 (LFConcat LFSpace l2) -> lin2string $ LFConcat l1 l2 LFConcat l1 (LFConcat LFSpace l2) -> lin2string $ LFConcat l1 l2
LFConcat LFCapit l2 -> let l = lin2string l2 in toUpper (head l) : tail l LFConcat LFCapit l2 -> let l = lin2string l2 in T.toUpper (T.take 1 l) `T.append` T.drop 1 l
LFConcat LFAllCapit l2 -> let tks = words (lin2string l2) in unwords $ map toUpper (head tks) : tail tks LFConcat LFAllCapit l2 -> let tks = T.words (lin2string l2) in T.unwords $ T.toUpper (head tks) : tail tks
LFConcat l1 l2 -> unwords $ filter (not.null) [lin2string l1, lin2string l2] LFConcat l1 l2 -> T.unwords $ filter (not.T.null) [lin2string l1, lin2string l2]
x -> printf "[%s]" (show x) x -> T.pack $ printf "[%s]" (show x)
-- | List indexing with more verbose error messages
(!!) :: (Show a) => [a] -> Int -> a (!!) :: (Show a) => [a] -> Int -> a
(!!) xs i (!!) xs i
| i < 0 = error $ printf "!!: index %d too small for list: %s" i (show xs) | i < 0 = error $ printf "!!: index %d too small for list: %s" i (show xs)

View File

@@ -53,7 +53,7 @@ doGrammar' gname cncs = do
Just tree = readExpr ast Just tree = readExpr ast
-- Do some linearization -- Do some linearization
langs = langs =
[ printf "%s: %s" (showLanguage lang) (linearizeConcr concr tree) [ printf "%s: %s" (showLanguage lang) (linearizeConcrete concr tree)
| (lang,concr) <- Map.toList (concretes lpgf) | (lang,concr) <- Map.toList (concretes lpgf)
] ]
mapM_ putStrLn langs mapM_ putStrLn langs