Use Data.Text instead of String. Rename Abstr to Abstract, Concr to Concrete.

2021-02-16 16:04:40 +01:00
parent d394cacddf
commit 398b294734
4 changed files with 63 additions and 49 deletions
--- a/src/runtime/haskell/LPGF.hs
+++ b/src/runtime/haskell/LPGF.hs
@@ -1,6 +1,8 @@
-- | Linearisation-only PGF format
-- Closely follows description in Section 2 of Angelov, Bringert, Ranta (2009)
-- "PGF: A Portable Run-Time Format for Type-Theoretical Grammars"
+{-# LANGUAGE OverloadedStrings #-}
+
+-- | Linearisation-only grammar format.
+-- Closely follows description in Section 2 of Angelov, Bringert, Ranta (2009):
+-- "PGF: A Portable Run-Time Format for Type-Theoretical Grammars".
 module LPGF where

 import PGF (Language)
@@ -9,9 +11,9 @@ import PGF.Expr (Expr)
 import PGF.Tree (Tree (..), expr2tree, prTree)

 import Data.Binary (Binary, get, put, encodeFile, decodeFile)
-import Data.Char (toUpper)
-import Data.List (isPrefixOf)
 import qualified Data.Map as Map
+import Data.Text (Text)
+import qualified Data.Text as T
 import Text.Printf (printf)

 import Prelude hiding ((!!))
@@ -20,18 +22,18 @@ import qualified Prelude
 -- | Linearisation-only PGF
 data LPGF = LPGF {
  absname   :: CId,
-  abstract  :: Abstr,
-  concretes :: Map.Map CId Concr
+  abstract  :: Abstract,
+  concretes :: Map.Map CId Concrete
 } deriving (Show)

-- | Abstract syntax
-data Abstr = Abstr {
+-- | Abstract syntax (currently empty)
+data Abstract = Abstract {
 } deriving (Show)

 -- | Concrete syntax
-data Concr = Concr {
-  -- lincats :: Map.Map CId LinType, -- ^ assigning a linearization type to each category
-  lins    :: Map.Map CId LinFun  -- ^ assigning a linearization function to each function
+data Concrete = Concrete {
+  -- lincats :: Map.Map CId LinType, -- ^ a linearization type for each category
+  lins    :: Map.Map CId LinFun  -- ^ a linearization function for each function
 } deriving (Show)

 -- | Abstract function type
@@ -49,15 +51,15 @@ data Concr = Concr {
 data LinFun =
  -- Additions
    LFError String -- ^ a runtime error, should probably not be supported at all
-  | LFBind
-  | LFSpace
-  | LFCapit
-  | LFAllCapit
-  | LFPre [([String], LinFun)] LinFun
+  | LFBind -- ^ join adjacent tokens
+  | LFSpace -- ^ space between adjacent tokens
+  | LFCapit -- ^ capitalise next character
+  | LFAllCapit -- ^ capitalise next word
+  | LFPre [([Text], LinFun)] LinFun

  -- From original definition in paper
  | LFEmpty
-  | LFToken String
+  | LFToken Text
  | LFConcat LinFun LinFun
  | LFInt Int
  | LFTuple [LinFun]
@@ -80,15 +82,15 @@ instance Binary LPGF where
      concretes = concs
    }

-instance Binary Abstr where
+instance Binary Abstract where
  put abs = return ()
-  get = return $ Abstr {}
+  get = return $ Abstract {}

-instance Binary Concr where
+instance Binary Concrete where
  put concr = put (lins concr)
  get = do
    ls <- get
-    return $ Concr {
+    return $ Concrete {
      lins = ls
    }

@@ -105,16 +107,24 @@ encodeFile = Data.Binary.encodeFile
 readLPGF :: FilePath -> IO LPGF
 readLPGF = Data.Binary.decodeFile

-- | Main linearize function
+-- | Main linearize function, to 'String'
 linearize :: LPGF -> Language -> Expr -> String
-linearize lpgf lang =
+linearize lpgf lang expr = T.unpack $ linearizeText lpgf lang expr
+
+-- | Main linearize function, to 'Data.Text.Text'
+linearizeText :: LPGF -> Language -> Expr -> Text
+linearizeText lpgf lang =
  case Map.lookup lang (concretes lpgf) of
-    Just concr -> linearizeConcr concr
+    Just concr -> linearizeConcreteText concr
    Nothing -> error $ printf "Unknown language: %s" (showCId lang)

-- | Language-specific linearize function
-linearizeConcr :: Concr -> Expr -> String
-linearizeConcr concr expr = lin2string $ lin (expr2tree expr)
+-- | Language-specific linearize function, to 'String'
+linearizeConcrete :: Concrete -> Expr -> String
+linearizeConcrete concr expr = T.unpack $ linearizeConcreteText concr expr
+
+-- | Language-specific linearize function, to 'Data.Text.Text'
+linearizeConcreteText :: Concrete -> Expr -> Text
+linearizeConcreteText concr expr = lin2string $ lin (expr2tree expr)
  where
    lin :: Tree -> LinFun
    lin tree = case tree of
@@ -127,7 +137,7 @@ linearizeConcr concr expr = lin2string $ lin (expr2tree expr)
 -- | Evaluation context is a sequence of terms
 type Context = [LinFun]

-- | Operational semantics, Table 2
+-- | Operational semantics
 eval :: Context -> LinFun -> LinFun
 eval cxt t = case t of
  LFError err -> error err
@@ -150,7 +160,7 @@ eval cxt t = case t of
  _ -> t

 -- | Turn concrete syntax terms into an actual string
-lin2string :: LinFun -> String
+lin2string :: LinFun -> Text
 lin2string l = case l of
  LFEmpty -> ""
  LFBind -> "" -- when encountered at beginning/end
@@ -160,15 +170,16 @@ lin2string l = case l of
  LFConcat (LFPre pts df) l2 -> lin2string $ LFConcat l1 l2
    where
      l2' = lin2string l2
-      matches = [ l | (pfxs, l) <- pts, any (`isPrefixOf` l2') pfxs ]
+      matches = [ l | (pfxs, l) <- pts, any (`T.isPrefixOf` l2') pfxs ]
      l1 = if null matches then df else head matches
-  LFConcat l1 (LFConcat LFBind l2) -> lin2string l1 ++ lin2string l2
+  LFConcat l1 (LFConcat LFBind l2) -> lin2string l1 `T.append` lin2string l2
  LFConcat l1 (LFConcat LFSpace l2) -> lin2string $ LFConcat l1 l2
-  LFConcat LFCapit l2 -> let l = lin2string l2 in toUpper (head l) : tail l
-  LFConcat LFAllCapit l2 -> let tks = words (lin2string l2) in unwords $ map toUpper (head tks) : tail tks
-  LFConcat l1 l2 -> unwords $ filter (not.null) [lin2string l1, lin2string l2]
-  x -> printf "[%s]" (show x)
+  LFConcat LFCapit l2 -> let l = lin2string l2 in T.toUpper (T.take 1 l) `T.append` T.drop 1 l
+  LFConcat LFAllCapit l2 -> let tks = T.words (lin2string l2) in T.unwords $ T.toUpper (head tks) : tail tks
+  LFConcat l1 l2 -> T.unwords $ filter (not.T.null) [lin2string l1, lin2string l2]
+  x -> T.pack $ printf "[%s]" (show x)

+-- | List indexing with more verbose error messages
 (!!) :: (Show a) => [a] -> Int -> a
 (!!) xs i
  | i < 0 = error $ printf "!!: index %d too small for list: %s" i (show xs)