Use Data.Text instead of String. Rename Abstr to Abstract, Concr to Concrete.

2021-02-16 16:04:40 +01:00
parent d394cacddf
commit 398b294734
4 changed files with 63 additions and 49 deletions
--- a/gf.cabal
+++ b/gf.cabal
@@ -86,7 +86,8 @@ Library
                 -- For compatability with ghc < 8
                 -- We need transformers-compat >= 0.6.3, but that is only in newer snapshots where it is redundant.
                 transformers-compat,
-                 ghc-prim
+                 ghc-prim,
                 text
  hs-source-dirs: src/runtime/haskell
  other-modules:
@@ -536,6 +537,7 @@ test-suite lpgf
    process,
    random,
    terminfo,
    text,
    time,
    transformers-compat,
    unix,
--- a/src/compiler/GF/Compile/GrammarToLPGF.hs
+++ b/src/compiler/GF/Compile/GrammarToLPGF.hs
@@ -18,12 +18,13 @@ import Data.Either (lefts, rights)
 import Data.List (elemIndex, find, groupBy, sortBy)
 import qualified Data.Map as Map
 import Data.Maybe (catMaybes)
 import qualified Data.Text as T
 import Text.Printf (printf)
 mkCanon2lpgf :: Options -> SourceGrammar -> ModuleName -> IOE LPGF
 mkCanon2lpgf opts gr am = do
-  (an,abs) <- mkAbstr ab
+  (an,abs) <- mkAbstract ab
-  cncs     <- mapM mkConcr cncs
+  cncs     <- mapM mkConcrete cncs
  let lpgf = LPGF {
    L.absname = an,
    L.abstract = abs,
@@ -36,11 +37,11 @@ mkCanon2lpgf opts gr am = do
  where
    canon@(C.Grammar ab cncs) = grammar2canonical opts am gr
-    mkAbstr :: C.Abstract -> IOE (CId, L.Abstr)
+    mkAbstract :: C.Abstract -> IOE (CId, L.Abstract)
-    mkAbstr (C.Abstract modId flags cats funs) = return (mdi2i modId, L.Abstr {})
+    mkAbstract (C.Abstract modId flags cats funs) = return (mdi2i modId, L.Abstract {})
-    mkConcr :: C.Concrete -> IOE (CId, L.Concr)
+    mkConcrete :: C.Concrete -> IOE (CId, L.Concrete)
-    mkConcr (C.Concrete modId absModId flags params lincats lindefs) = do
+    mkConcrete (C.Concrete modId absModId flags params lincats lindefs) = do
      let
        paramMap = mkParamMap params
        paramTuples = mkParamTuples params
@@ -61,9 +62,9 @@ mkCanon2lpgf opts gr am = do
                return $ L.LFConcat v1' v2'
              C.LiteralValue ll -> case ll of
-                C.FloatConstant f -> return $ L.LFToken (show f)
+                C.FloatConstant f -> return $ L.LFToken $ T.pack $ show f
-                C.IntConstant i -> return $ L.LFToken (show i)
+                C.IntConstant i -> return $ L.LFToken $ T.pack $ show i
-                C.StrConstant s -> return $ L.LFToken s
+                C.StrConstant s -> return $ L.LFToken $ T.pack s
              C.ErrorValue err -> return $ L.LFError err
@@ -132,7 +133,7 @@ mkCanon2lpgf opts gr am = do
              C.PreValue pts df -> do
                pts' <- forM pts $ \(pfxs, lv) -> do
                  lv' <- val2lin lv
-                  return (pfxs, lv')
+                  return (map T.pack pfxs, lv')
                df' <- val2lin df
                return $ L.LFPre pts' df'
@@ -168,7 +169,7 @@ mkCanon2lpgf opts gr am = do
      unless (null $ lefts es) (error $ unlines (lefts es))
-      return (mdi2i modId, L.Concr {
+      return (mdi2i modId, L.Concrete {
        L.lins = lins
      })
--- a/src/runtime/haskell/LPGF.hs
+++ b/src/runtime/haskell/LPGF.hs
@@ -1,6 +1,8 @@
-- | Linearisation-only PGF format
+{-# LANGUAGE OverloadedStrings #-}
-- Closely follows description in Section 2 of Angelov, Bringert, Ranta (2009)
+
-- "PGF: A Portable Run-Time Format for Type-Theoretical Grammars"
+-- | Linearisation-only grammar format.
 -- Closely follows description in Section 2 of Angelov, Bringert, Ranta (2009):
 -- "PGF: A Portable Run-Time Format for Type-Theoretical Grammars".
 module LPGF where
 import PGF (Language)
@@ -9,9 +11,9 @@ import PGF.Expr (Expr)
 import PGF.Tree (Tree (..), expr2tree, prTree)
 import Data.Binary (Binary, get, put, encodeFile, decodeFile)
 import Data.Char (toUpper)
 import Data.List (isPrefixOf)
 import qualified Data.Map as Map
 import Data.Text (Text)
 import qualified Data.Text as T
 import Text.Printf (printf)
 import Prelude hiding ((!!))
@@ -20,18 +22,18 @@ import qualified Prelude
 -- | Linearisation-only PGF
 data LPGF = LPGF {
  absname   :: CId,
-  abstract  :: Abstr,
+  abstract  :: Abstract,
-  concretes :: Map.Map CId Concr
+  concretes :: Map.Map CId Concrete
 } deriving (Show)
-- | Abstract syntax
+-- | Abstract syntax (currently empty)
-data Abstr = Abstr {
+data Abstract = Abstract {
 } deriving (Show)
 -- | Concrete syntax
-data Concr = Concr {
+data Concrete = Concrete {
-  -- lincats :: Map.Map CId LinType, -- ^ assigning a linearization type to each category
+  -- lincats :: Map.Map CId LinType, -- ^ a linearization type for each category
-  lins    :: Map.Map CId LinFun  -- ^ assigning a linearization function to each function
+  lins    :: Map.Map CId LinFun  -- ^ a linearization function for each function
 } deriving (Show)
 -- | Abstract function type
@@ -49,15 +51,15 @@ data Concr = Concr {
 data LinFun =
  -- Additions
    LFError String -- ^ a runtime error, should probably not be supported at all
-  | LFBind
+  | LFBind -- ^ join adjacent tokens
-  | LFSpace
+  | LFSpace -- ^ space between adjacent tokens
-  | LFCapit
+  | LFCapit -- ^ capitalise next character
-  | LFAllCapit
+  | LFAllCapit -- ^ capitalise next word
-  | LFPre [([String], LinFun)] LinFun
+  | LFPre [([Text], LinFun)] LinFun
  -- From original definition in paper
  | LFEmpty
-  | LFToken String
+  | LFToken Text
  | LFConcat LinFun LinFun
  | LFInt Int
  | LFTuple [LinFun]
@@ -80,15 +82,15 @@ instance Binary LPGF where
      concretes = concs
    }
-instance Binary Abstr where
+instance Binary Abstract where
  put abs = return ()
-  get = return $ Abstr {}
+  get = return $ Abstract {}
-instance Binary Concr where
+instance Binary Concrete where
  put concr = put (lins concr)
  get = do
    ls <- get
-    return $ Concr {
+    return $ Concrete {
      lins = ls
    }
@@ -105,16 +107,24 @@ encodeFile = Data.Binary.encodeFile
 readLPGF :: FilePath -> IO LPGF
 readLPGF = Data.Binary.decodeFile
-- | Main linearize function
+-- | Main linearize function, to 'String'
 linearize :: LPGF -> Language -> Expr -> String
-linearize lpgf lang =
+linearize lpgf lang expr = T.unpack $ linearizeText lpgf lang expr
 -- | Main linearize function, to 'Data.Text.Text'
 linearizeText :: LPGF -> Language -> Expr -> Text
 linearizeText lpgf lang =
  case Map.lookup lang (concretes lpgf) of
-    Just concr -> linearizeConcr concr
+    Just concr -> linearizeConcreteText concr
    Nothing -> error $ printf "Unknown language: %s" (showCId lang)
-- | Language-specific linearize function
+-- | Language-specific linearize function, to 'String'
-linearizeConcr :: Concr -> Expr -> String
+linearizeConcrete :: Concrete -> Expr -> String
-linearizeConcr concr expr = lin2string $ lin (expr2tree expr)
+linearizeConcrete concr expr = T.unpack $ linearizeConcreteText concr expr
 -- | Language-specific linearize function, to 'Data.Text.Text'
 linearizeConcreteText :: Concrete -> Expr -> Text
 linearizeConcreteText concr expr = lin2string $ lin (expr2tree expr)
  where
    lin :: Tree -> LinFun
    lin tree = case tree of
@@ -127,7 +137,7 @@ linearizeConcr concr expr = lin2string $ lin (expr2tree expr)
 -- | Evaluation context is a sequence of terms
 type Context = [LinFun]
-- | Operational semantics, Table 2
+-- | Operational semantics
 eval :: Context -> LinFun -> LinFun
 eval cxt t = case t of
  LFError err -> error err
@@ -150,7 +160,7 @@ eval cxt t = case t of
  _ -> t
 -- | Turn concrete syntax terms into an actual string
-lin2string :: LinFun -> String
+lin2string :: LinFun -> Text
 lin2string l = case l of
  LFEmpty -> ""
  LFBind -> "" -- when encountered at beginning/end
@@ -160,15 +170,16 @@ lin2string l = case l of
  LFConcat (LFPre pts df) l2 -> lin2string $ LFConcat l1 l2
    where
      l2' = lin2string l2
-      matches = [ l | (pfxs, l) <- pts, any (`isPrefixOf` l2') pfxs ]
+      matches = [ l | (pfxs, l) <- pts, any (`T.isPrefixOf` l2') pfxs ]
      l1 = if null matches then df else head matches
-  LFConcat l1 (LFConcat LFBind l2) -> lin2string l1 ++ lin2string l2
+  LFConcat l1 (LFConcat LFBind l2) -> lin2string l1 `T.append` lin2string l2
  LFConcat l1 (LFConcat LFSpace l2) -> lin2string $ LFConcat l1 l2
-  LFConcat LFCapit l2 -> let l = lin2string l2 in toUpper (head l) : tail l
+  LFConcat LFCapit l2 -> let l = lin2string l2 in T.toUpper (T.take 1 l) `T.append` T.drop 1 l
-  LFConcat LFAllCapit l2 -> let tks = words (lin2string l2) in unwords $ map toUpper (head tks) : tail tks
+  LFConcat LFAllCapit l2 -> let tks = T.words (lin2string l2) in T.unwords $ T.toUpper (head tks) : tail tks
-  LFConcat l1 l2 -> unwords $ filter (not.null) [lin2string l1, lin2string l2]
+  LFConcat l1 l2 -> T.unwords $ filter (not.T.null) [lin2string l1, lin2string l2]
-  x -> printf "[%s]" (show x)
+  x -> T.pack $ printf "[%s]" (show x)
 -- | List indexing with more verbose error messages
 (!!) :: (Show a) => [a] -> Int -> a
 (!!) xs i
  | i < 0 = error $ printf "!!: index %d too small for list: %s" i (show xs)
--- a/testsuite/lpgf/run.hs
+++ b/testsuite/lpgf/run.hs
@@ -53,7 +53,7 @@ doGrammar' gname cncs = do
      Just tree = readExpr ast
      -- Do some linearization
      langs =
-        [ printf "%s: %s" (showLanguage lang) (linearizeConcr concr tree)
+        [ printf "%s: %s" (showLanguage lang) (linearizeConcrete concr tree)
        | (lang,concr) <- Map.toList (concretes lpgf)
        ]
    mapM_ putStrLn langs