diff --git a/gf.cabal b/gf.cabal index 7fdbf392f..ee21264fc 100644 --- a/gf.cabal +++ b/gf.cabal @@ -86,7 +86,8 @@ Library -- For compatability with ghc < 8 -- We need transformers-compat >= 0.6.3, but that is only in newer snapshots where it is redundant. transformers-compat, - ghc-prim + ghc-prim, + text hs-source-dirs: src/runtime/haskell other-modules: @@ -536,6 +537,7 @@ test-suite lpgf process, random, terminfo, + text, time, transformers-compat, unix, diff --git a/src/compiler/GF/Compile/GrammarToLPGF.hs b/src/compiler/GF/Compile/GrammarToLPGF.hs index 9ae1545dd..ae03f0e2c 100644 --- a/src/compiler/GF/Compile/GrammarToLPGF.hs +++ b/src/compiler/GF/Compile/GrammarToLPGF.hs @@ -18,12 +18,13 @@ import Data.Either (lefts, rights) import Data.List (elemIndex, find, groupBy, sortBy) import qualified Data.Map as Map import Data.Maybe (catMaybes) +import qualified Data.Text as T import Text.Printf (printf) mkCanon2lpgf :: Options -> SourceGrammar -> ModuleName -> IOE LPGF mkCanon2lpgf opts gr am = do - (an,abs) <- mkAbstr ab - cncs <- mapM mkConcr cncs + (an,abs) <- mkAbstract ab + cncs <- mapM mkConcrete cncs let lpgf = LPGF { L.absname = an, L.abstract = abs, @@ -36,11 +37,11 @@ mkCanon2lpgf opts gr am = do where canon@(C.Grammar ab cncs) = grammar2canonical opts am gr - mkAbstr :: C.Abstract -> IOE (CId, L.Abstr) - mkAbstr (C.Abstract modId flags cats funs) = return (mdi2i modId, L.Abstr {}) + mkAbstract :: C.Abstract -> IOE (CId, L.Abstract) + mkAbstract (C.Abstract modId flags cats funs) = return (mdi2i modId, L.Abstract {}) - mkConcr :: C.Concrete -> IOE (CId, L.Concr) - mkConcr (C.Concrete modId absModId flags params lincats lindefs) = do + mkConcrete :: C.Concrete -> IOE (CId, L.Concrete) + mkConcrete (C.Concrete modId absModId flags params lincats lindefs) = do let paramMap = mkParamMap params paramTuples = mkParamTuples params @@ -61,9 +62,9 @@ mkCanon2lpgf opts gr am = do return $ L.LFConcat v1' v2' C.LiteralValue ll -> case ll of - C.FloatConstant f -> return $ L.LFToken (show f) - C.IntConstant i -> return $ L.LFToken (show i) - C.StrConstant s -> return $ L.LFToken s + C.FloatConstant f -> return $ L.LFToken $ T.pack $ show f + C.IntConstant i -> return $ L.LFToken $ T.pack $ show i + C.StrConstant s -> return $ L.LFToken $ T.pack s C.ErrorValue err -> return $ L.LFError err @@ -132,7 +133,7 @@ mkCanon2lpgf opts gr am = do C.PreValue pts df -> do pts' <- forM pts $ \(pfxs, lv) -> do lv' <- val2lin lv - return (pfxs, lv') + return (map T.pack pfxs, lv') df' <- val2lin df return $ L.LFPre pts' df' @@ -168,7 +169,7 @@ mkCanon2lpgf opts gr am = do unless (null $ lefts es) (error $ unlines (lefts es)) - return (mdi2i modId, L.Concr { + return (mdi2i modId, L.Concrete { L.lins = lins }) diff --git a/src/runtime/haskell/LPGF.hs b/src/runtime/haskell/LPGF.hs index 66b3adee2..85be9d481 100644 --- a/src/runtime/haskell/LPGF.hs +++ b/src/runtime/haskell/LPGF.hs @@ -1,6 +1,8 @@ --- | Linearisation-only PGF format --- Closely follows description in Section 2 of Angelov, Bringert, Ranta (2009) --- "PGF: A Portable Run-Time Format for Type-Theoretical Grammars" +{-# LANGUAGE OverloadedStrings #-} + +-- | Linearisation-only grammar format. +-- Closely follows description in Section 2 of Angelov, Bringert, Ranta (2009): +-- "PGF: A Portable Run-Time Format for Type-Theoretical Grammars". module LPGF where import PGF (Language) @@ -9,9 +11,9 @@ import PGF.Expr (Expr) import PGF.Tree (Tree (..), expr2tree, prTree) import Data.Binary (Binary, get, put, encodeFile, decodeFile) -import Data.Char (toUpper) -import Data.List (isPrefixOf) import qualified Data.Map as Map +import Data.Text (Text) +import qualified Data.Text as T import Text.Printf (printf) import Prelude hiding ((!!)) @@ -20,18 +22,18 @@ import qualified Prelude -- | Linearisation-only PGF data LPGF = LPGF { absname :: CId, - abstract :: Abstr, - concretes :: Map.Map CId Concr + abstract :: Abstract, + concretes :: Map.Map CId Concrete } deriving (Show) --- | Abstract syntax -data Abstr = Abstr { +-- | Abstract syntax (currently empty) +data Abstract = Abstract { } deriving (Show) -- | Concrete syntax -data Concr = Concr { - -- lincats :: Map.Map CId LinType, -- ^ assigning a linearization type to each category - lins :: Map.Map CId LinFun -- ^ assigning a linearization function to each function +data Concrete = Concrete { + -- lincats :: Map.Map CId LinType, -- ^ a linearization type for each category + lins :: Map.Map CId LinFun -- ^ a linearization function for each function } deriving (Show) -- | Abstract function type @@ -49,15 +51,15 @@ data Concr = Concr { data LinFun = -- Additions LFError String -- ^ a runtime error, should probably not be supported at all - | LFBind - | LFSpace - | LFCapit - | LFAllCapit - | LFPre [([String], LinFun)] LinFun + | LFBind -- ^ join adjacent tokens + | LFSpace -- ^ space between adjacent tokens + | LFCapit -- ^ capitalise next character + | LFAllCapit -- ^ capitalise next word + | LFPre [([Text], LinFun)] LinFun -- From original definition in paper | LFEmpty - | LFToken String + | LFToken Text | LFConcat LinFun LinFun | LFInt Int | LFTuple [LinFun] @@ -80,15 +82,15 @@ instance Binary LPGF where concretes = concs } -instance Binary Abstr where +instance Binary Abstract where put abs = return () - get = return $ Abstr {} + get = return $ Abstract {} -instance Binary Concr where +instance Binary Concrete where put concr = put (lins concr) get = do ls <- get - return $ Concr { + return $ Concrete { lins = ls } @@ -105,16 +107,24 @@ encodeFile = Data.Binary.encodeFile readLPGF :: FilePath -> IO LPGF readLPGF = Data.Binary.decodeFile --- | Main linearize function +-- | Main linearize function, to 'String' linearize :: LPGF -> Language -> Expr -> String -linearize lpgf lang = +linearize lpgf lang expr = T.unpack $ linearizeText lpgf lang expr + +-- | Main linearize function, to 'Data.Text.Text' +linearizeText :: LPGF -> Language -> Expr -> Text +linearizeText lpgf lang = case Map.lookup lang (concretes lpgf) of - Just concr -> linearizeConcr concr + Just concr -> linearizeConcreteText concr Nothing -> error $ printf "Unknown language: %s" (showCId lang) --- | Language-specific linearize function -linearizeConcr :: Concr -> Expr -> String -linearizeConcr concr expr = lin2string $ lin (expr2tree expr) +-- | Language-specific linearize function, to 'String' +linearizeConcrete :: Concrete -> Expr -> String +linearizeConcrete concr expr = T.unpack $ linearizeConcreteText concr expr + +-- | Language-specific linearize function, to 'Data.Text.Text' +linearizeConcreteText :: Concrete -> Expr -> Text +linearizeConcreteText concr expr = lin2string $ lin (expr2tree expr) where lin :: Tree -> LinFun lin tree = case tree of @@ -127,7 +137,7 @@ linearizeConcr concr expr = lin2string $ lin (expr2tree expr) -- | Evaluation context is a sequence of terms type Context = [LinFun] --- | Operational semantics, Table 2 +-- | Operational semantics eval :: Context -> LinFun -> LinFun eval cxt t = case t of LFError err -> error err @@ -150,7 +160,7 @@ eval cxt t = case t of _ -> t -- | Turn concrete syntax terms into an actual string -lin2string :: LinFun -> String +lin2string :: LinFun -> Text lin2string l = case l of LFEmpty -> "" LFBind -> "" -- when encountered at beginning/end @@ -160,15 +170,16 @@ lin2string l = case l of LFConcat (LFPre pts df) l2 -> lin2string $ LFConcat l1 l2 where l2' = lin2string l2 - matches = [ l | (pfxs, l) <- pts, any (`isPrefixOf` l2') pfxs ] + matches = [ l | (pfxs, l) <- pts, any (`T.isPrefixOf` l2') pfxs ] l1 = if null matches then df else head matches - LFConcat l1 (LFConcat LFBind l2) -> lin2string l1 ++ lin2string l2 + LFConcat l1 (LFConcat LFBind l2) -> lin2string l1 `T.append` lin2string l2 LFConcat l1 (LFConcat LFSpace l2) -> lin2string $ LFConcat l1 l2 - LFConcat LFCapit l2 -> let l = lin2string l2 in toUpper (head l) : tail l - LFConcat LFAllCapit l2 -> let tks = words (lin2string l2) in unwords $ map toUpper (head tks) : tail tks - LFConcat l1 l2 -> unwords $ filter (not.null) [lin2string l1, lin2string l2] - x -> printf "[%s]" (show x) + LFConcat LFCapit l2 -> let l = lin2string l2 in T.toUpper (T.take 1 l) `T.append` T.drop 1 l + LFConcat LFAllCapit l2 -> let tks = T.words (lin2string l2) in T.unwords $ T.toUpper (head tks) : tail tks + LFConcat l1 l2 -> T.unwords $ filter (not.T.null) [lin2string l1, lin2string l2] + x -> T.pack $ printf "[%s]" (show x) +-- | List indexing with more verbose error messages (!!) :: (Show a) => [a] -> Int -> a (!!) xs i | i < 0 = error $ printf "!!: index %d too small for list: %s" i (show xs) diff --git a/testsuite/lpgf/run.hs b/testsuite/lpgf/run.hs index b33daa211..8f602ccb6 100644 --- a/testsuite/lpgf/run.hs +++ b/testsuite/lpgf/run.hs @@ -53,7 +53,7 @@ doGrammar' gname cncs = do Just tree = readExpr ast -- Do some linearization langs = - [ printf "%s: %s" (showLanguage lang) (linearizeConcr concr tree) + [ printf "%s: %s" (showLanguage lang) (linearizeConcrete concr tree) | (lang,concr) <- Map.toList (concretes lpgf) ] mapM_ putStrLn langs