1
0
forked from GitHub/gf-core

Move some definitions into LPGF.Internal, clean up public API.

This commit is contained in:
John J. Camilleri
2021-07-08 11:34:29 +02:00
parent 7b0637850c
commit d6416089d6
5 changed files with 298 additions and 240 deletions

View File

@@ -109,12 +109,13 @@ library
ghc-prof-options: -fprof-auto
exposed-modules:
LPGF
PGF
PGF.Internal
PGF.Haskell
LPGF
other-modules:
LPGF.Internal
PGF.Data
PGF.Macros
PGF.Binary
@@ -540,6 +541,7 @@ test-suite lpgf
GF.Text.Pretty
GF.Text.Transliterations
LPGF
LPGF.Internal
PGF
PGF.Binary
PGF.ByteCode
@@ -740,6 +742,7 @@ benchmark lpgf-bench
GF.Text.Pretty
GF.Text.Transliterations
LPGF
LPGF.Internal
PGF
PGF.Binary
PGF.ByteCode

View File

@@ -1,7 +1,7 @@
module GF.Compile.GrammarToLPGF (mkCanon2lpgf) where
import LPGF (LPGF (..))
import qualified LPGF as L
import LPGF.Internal (LPGF (..))
import qualified LPGF.Internal as L
import PGF.CId
import GF.Grammar.Grammar

View File

@@ -4,7 +4,7 @@ import PGF
import PGF.Internal(concretes,optimizePGF,unionPGF)
import PGF.Internal(putSplitAbs,encodeFile,runPut)
import LPGF(LPGF)
import qualified LPGF
import qualified LPGF.Internal as LPGF
import GF.Compile as S(batchCompile,link,linkl,srcAbsName)
import GF.CompileInParallel as P(parallelBatchCompile)
import GF.Compile.Export
@@ -193,7 +193,7 @@ writePGF opts pgf =
writeLPGF :: Options -> LPGF -> IOE FilePath
writeLPGF opts lpgf = do
let
grammarName = fromMaybe (showCId (LPGF.abstractName lpgf)) (flag optName opts)
grammarName = fromMaybe (showCId (LPGF.absname lpgf)) (flag optName opts)
outfile = outputPath opts (grammarName <.> "lpgf")
writing opts outfile $ liftIO $ LPGF.encodeFile outfile lpgf
return outfile

View File

@@ -2,193 +2,100 @@
{-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE ScopedTypeVariables #-}
-- | Linearisation-only grammar format.
-- Closely follows description in Section 2 of Angelov, Bringert, Ranta (2009):
-- | Linearisation-only portable grammar format.
--
-- LPGF is an output format from the GF compiler, intended as a smaller and faster alternative to PGF.
-- This API allows LPGF files to be used in Haskell programs.
--
-- The implementation closely follows description in Section 2 of Angelov, Bringert, Ranta (2009):
-- "PGF: A Portable Run-Time Format for Type-Theoretical Grammars".
-- http://citeseerx.ist.psu.edu/viewdoc/download?doi=10.1.1.640.6330&rep=rep1&type=pdf
module LPGF (
-- ** Types
LPGF (..), Abstract (..), Concrete (..), LinFun (..),
-- * LPGF
LPGF,
showLPGF,
readLPGF,
-- ** Reading/writing
readLPGF, LPGF.encodeFile,
-- * Identifiers
CId,
mkCId,
showCId,
readCId,
-- * Abstract syntax
Abstract,
abstractName,
-- ** Categories
-- ** Functions
-- ** Expressions
Expr,
PGF.showExpr,
PGF.readExpr,
-- ** Types
-- ** Type checking
-- * Concrete syntax
Language,
PGF.showLanguage,
PGF.readLanguage,
languages,
Concrete,
LPGF.concretes,
-- ** Linearization
linearize, linearizeText, linearizeConcrete, linearizeConcreteText,
-- ** Other
abstractName,
PGF.showLanguage, PGF.readExpr,
-- ** DEBUG only, to be removed
render, pp
linearize,
linearizeText,
linearizeConcrete,
linearizeConcreteText
) where
import LPGF.Internal
import PGF (Language)
import PGF.CId
import PGF.Expr (Expr, Literal (..))
import PGF.Tree (Tree (..), expr2tree, prTree)
import qualified PGF
-- import qualified Control.Exception as EX
import Control.Monad (liftM, liftM2, forM_)
import qualified Control.Monad.Writer as CMW
import Data.Binary (Binary, put, get, putWord8, getWord8, encodeFile, decodeFile)
import Data.Binary (decodeFile)
import Data.Either (isLeft)
import qualified Data.IntMap as IntMap
import qualified Data.Map.Strict as Map
import Data.Text (Text)
import qualified Data.Text as T
import qualified Data.Text.Encoding as TE
import Numeric (showFFloat)
import Text.Printf (printf)
import Prelude hiding ((!!))
import qualified Prelude
-- | Linearisation-only PGF
data LPGF = LPGF {
absname :: CId,
abstract :: Abstract,
concretes :: Map.Map CId Concrete
} deriving (Show)
-- | Abstract syntax (currently empty)
data Abstract = Abstract {
} deriving (Show)
-- | Concrete syntax
data Concrete = Concrete {
toks :: IntMap.IntMap Text, -- ^ all strings are stored exactly once here
-- lincats :: Map.Map CId LinType, -- ^ a linearization type for each category
lins :: Map.Map CId LinFun -- ^ a linearization function for each function
} deriving (Show)
-- | Abstract function type
-- data Type = Type [CId] CId
-- deriving (Show)
-- -- | Linearisation type
-- data LinType =
-- StrType
-- | IxType Int
-- | ProductType [LinType]
-- deriving (Show)
-- | Linearisation function
data LinFun =
-- Additions
Error String -- ^ a runtime error, should probably not be supported at all
| Bind -- ^ join adjacent tokens
| Space -- ^ space between adjacent tokens
| Capit -- ^ capitalise next character
| AllCapit -- ^ capitalise next word
| Pre [([Text], LinFun)] LinFun
| Missing CId -- ^ missing definition (inserted at runtime)
-- From original definition in paper
| Empty
| Token Text
| Concat LinFun LinFun
| Ix Int
| Tuple [LinFun]
| Projection LinFun LinFun
| Argument Int
-- For reducing LPGF file when stored
| PreIx [(Int, LinFun)] LinFun -- ^ index into `toks` map (must apply read to convert to list)
| TokenIx Int -- ^ index into `toks` map
deriving (Show, Read)
instance Binary LPGF where
put lpgf = do
put (absname lpgf)
put (abstract lpgf)
put (concretes lpgf)
get = do
an <- get
abs <- get
concs <- get
return $ LPGF {
absname = an,
abstract = abs,
concretes = concs
}
instance Binary Abstract where
put abs = return ()
get = return $ Abstract {}
instance Binary Concrete where
put concr = do
put (toks concr)
put (lins concr)
get = do
ts <- get
ls <- get
return $ Concrete {
toks = ts,
lins = ls
}
instance Binary LinFun where
put = \case
Error e -> putWord8 0 >> put e
Bind -> putWord8 1
Space -> putWord8 2
Capit -> putWord8 3
AllCapit -> putWord8 4
Pre ps d -> putWord8 5 >> put (ps,d)
Missing f -> putWord8 13 >> put f
Empty -> putWord8 6
Token t -> putWord8 7 >> put t
Concat l1 l2 -> putWord8 8 >> put (l1,l2)
Ix i -> putWord8 9 >> put i
Tuple ls -> putWord8 10 >> put ls
Projection l1 l2 -> putWord8 11 >> put (l1,l2)
Argument i -> putWord8 12 >> put i
PreIx ps d -> putWord8 15 >> put (ps,d)
TokenIx i -> putWord8 14 >> put i
get = do
tag <- getWord8
case tag of
0 -> liftM Error get
1 -> return Bind
2 -> return Space
3 -> return Capit
4 -> return AllCapit
5 -> liftM2 Pre get get
13 -> liftM Missing get
6 -> return Empty
7 -> liftM Token get
8 -> liftM2 Concat get get
9 -> liftM Ix get
10 -> liftM Tuple get
11 -> liftM2 Projection get get
12 -> liftM Argument get
15 -> liftM2 PreIx get get
14 -> liftM TokenIx get
_ -> fail "Failed to decode LPGF binary format"
instance Binary Text where
put = put . TE.encodeUtf8
get = liftM TE.decodeUtf8 get
-- | The abstract language name is the name of the top-level abstract module.
abstractName :: LPGF -> CId
abstractName = absname
encodeFile :: FilePath -> LPGF -> IO ()
encodeFile = Data.Binary.encodeFile
-- | List of all languages available in the given grammar.
languages :: LPGF -> [Language]
languages = Map.keys . LPGF.Internal.concretes
-- | Map of all languages and their corresponding concrete sytaxes.
concretes :: LPGF -> Map.Map Language Concrete
concretes = LPGF.Internal.concretes
-- | Reads file in LPGF and produces 'LPGF' term.
-- The file is usually produced with:
--
-- > $ gf --make --output-format=lpgf <grammar file name>
readLPGF :: FilePath -> IO LPGF
readLPGF = Data.Binary.decodeFile
-- | Produce pretty-printed representation of an LPGF.
showLPGF :: LPGF -> String
showLPGF = render . pp
-- | Main linearize function, to 'String'
linearize :: LPGF -> Language -> Expr -> String
linearize lpgf lang expr = T.unpack $ linearizeText lpgf lang expr
@@ -196,7 +103,7 @@ linearize lpgf lang expr = T.unpack $ linearizeText lpgf lang expr
-- | Main linearize function, to 'Data.Text.Text'
linearizeText :: LPGF -> Language -> Expr -> Text
linearizeText lpgf lang =
case Map.lookup lang (concretes lpgf) of
case Map.lookup lang (LPGF.Internal.concretes lpgf) of
Just concr -> linearizeConcreteText concr
Nothing -> error $ printf "Unknown language: %s" (showCId lang)
@@ -223,13 +130,6 @@ linearizeConcreteText concr expr = lin2string $ lin (expr2tree expr)
LFlt f -> showFFloat (Just 6) f ""
x -> error $ printf "Cannot lin: %s" (prTree x)
-- -- | Run a computation and catch any exception/errors.
-- -- Ideally this library should never throw exceptions, but we're still in development...
-- try :: a -> IO (Either String a)
-- try comp = do
-- let f = Right <$> EX.evaluate comp
-- EX.catch f (\(e :: EX.SomeException) -> return $ Left (show e))
-- | Evaluation context
data Context = Context {
cxArgs :: [LinFun], -- ^ is a sequence of terms
@@ -331,75 +231,3 @@ lin2string lf = T.unwords $ join $ flatten [lf]
isIx :: LinFun -> Bool
isIx (Ix _) = True
isIx _ = False
-- | Helper for building concat trees
mkConcat :: [LinFun] -> LinFun
mkConcat [] = Empty
mkConcat [x] = x
mkConcat xs = foldl1 Concat xs
-- | Helper for unfolding concat trees
unConcat :: LinFun -> [LinFun]
unConcat (Concat l1 l2) = concatMap unConcat [l1, l2]
unConcat lf = [lf]
------------------------------------------------------------------------------
-- Pretty-printing
type Doc = CMW.Writer [String] ()
render :: Doc -> String
render = unlines . CMW.execWriter
class PP a where
pp :: a -> Doc
instance PP LPGF where
pp (LPGF _ _ cncs) = mapM_ pp cncs
instance PP Concrete where
pp (Concrete toks lins) = do
forM_ (IntMap.toList toks) $ \(i,tok) ->
CMW.tell [show i ++ " " ++ T.unpack tok]
CMW.tell [""]
forM_ (Map.toList lins) $ \(cid,lin) -> do
CMW.tell ["# " ++ showCId cid]
pp lin
CMW.tell [""]
instance PP LinFun where
pp = pp' 0
where
pp' n = \case
Pre ps d -> do
p "Pre"
CMW.tell [ replicate (2*(n+1)) ' ' ++ show p | p <- ps ]
pp' (n+1) d
c@(Concat l1 l2) -> do
let ts = unConcat c
if any isDeep ts
then do
p "Concat"
mapM_ (pp' (n+1)) ts
else
p $ "Concat " ++ show ts
Tuple ls | any isDeep ls -> do
p "Tuple"
mapM_ (pp' (n+1)) ls
Projection l1 l2 | isDeep l1 || isDeep l2 -> do
p "Projection"
pp' (n+1) l1
pp' (n+1) l2
t -> p $ show t
where
p :: String -> Doc
p t = CMW.tell [ replicate (2*n) ' ' ++ t ]
isDeep = not . isTerm
isTerm = \case
Pre _ _ -> False
Concat _ _ -> False
Tuple _ -> False
Projection _ _ -> False
_ -> True

View File

@@ -0,0 +1,227 @@
{-# LANGUAGE LambdaCase #-}
module LPGF.Internal where
import PGF.CId
import PGF ()
import Control.Monad (liftM, liftM2, forM_)
import qualified Control.Monad.Writer as CMW
import Data.Binary (Binary, put, get, putWord8, getWord8, encodeFile)
import qualified Data.IntMap as IntMap
import qualified Data.Map.Strict as Map
import Data.Text (Text)
import qualified Data.Text as T
import qualified Data.Text.Encoding as TE
-- | Linearisation-only PGF
data LPGF = LPGF {
absname :: CId,
abstract :: Abstract,
concretes :: Map.Map CId Concrete
} deriving (Show)
-- | Abstract syntax (currently empty)
data Abstract = Abstract {
} deriving (Show)
-- | Concrete syntax
data Concrete = Concrete {
toks :: IntMap.IntMap Text, -- ^ all strings are stored exactly once here
-- lincats :: Map.Map CId LinType, -- ^ a linearization type for each category
lins :: Map.Map CId LinFun -- ^ a linearization function for each function
} deriving (Show)
-- | Abstract function type
-- data Type = Type [CId] CId
-- deriving (Show)
-- -- | Linearisation type
-- data LinType =
-- StrType
-- | IxType Int
-- | ProductType [LinType]
-- deriving (Show)
-- | Linearisation function
data LinFun =
-- Additions
Error String -- ^ a runtime error, should probably not be supported at all
| Bind -- ^ join adjacent tokens
| Space -- ^ space between adjacent tokens
| Capit -- ^ capitalise next character
| AllCapit -- ^ capitalise next word
| Pre [([Text], LinFun)] LinFun
| Missing CId -- ^ missing definition (inserted at runtime)
-- From original definition in paper
| Empty
| Token Text
| Concat LinFun LinFun
| Ix Int
| Tuple [LinFun]
| Projection LinFun LinFun
| Argument Int
-- For reducing LPGF file when stored
| PreIx [(Int, LinFun)] LinFun -- ^ index into `toks` map (must apply read to convert to list)
| TokenIx Int -- ^ index into `toks` map
deriving (Show, Read)
instance Binary LPGF where
put lpgf = do
put (absname lpgf)
put (abstract lpgf)
put (concretes lpgf)
get = do
an <- get
abs <- get
concs <- get
return $ LPGF {
absname = an,
abstract = abs,
concretes = concs
}
instance Binary Abstract where
put abs = return ()
get = return $ Abstract {}
instance Binary Concrete where
put concr = do
put (toks concr)
put (lins concr)
get = do
ts <- get
ls <- get
return $ Concrete {
toks = ts,
lins = ls
}
instance Binary LinFun where
put = \case
Error e -> putWord8 0 >> put e
Bind -> putWord8 1
Space -> putWord8 2
Capit -> putWord8 3
AllCapit -> putWord8 4
Pre ps d -> putWord8 5 >> put (ps,d)
Missing f -> putWord8 13 >> put f
Empty -> putWord8 6
Token t -> putWord8 7 >> put t
Concat l1 l2 -> putWord8 8 >> put (l1,l2)
Ix i -> putWord8 9 >> put i
Tuple ls -> putWord8 10 >> put ls
Projection l1 l2 -> putWord8 11 >> put (l1,l2)
Argument i -> putWord8 12 >> put i
PreIx ps d -> putWord8 15 >> put (ps,d)
TokenIx i -> putWord8 14 >> put i
get = do
tag <- getWord8
case tag of
0 -> liftM Error get
1 -> return Bind
2 -> return Space
3 -> return Capit
4 -> return AllCapit
5 -> liftM2 Pre get get
13 -> liftM Missing get
6 -> return Empty
7 -> liftM Token get
8 -> liftM2 Concat get get
9 -> liftM Ix get
10 -> liftM Tuple get
11 -> liftM2 Projection get get
12 -> liftM Argument get
15 -> liftM2 PreIx get get
14 -> liftM TokenIx get
_ -> fail "Failed to decode LPGF binary format"
instance Binary Text where
put = put . TE.encodeUtf8
get = liftM TE.decodeUtf8 get
encodeFile :: FilePath -> LPGF -> IO ()
encodeFile = Data.Binary.encodeFile
------------------------------------------------------------------------------
-- Utilities
-- | Helper for building concat trees
mkConcat :: [LinFun] -> LinFun
mkConcat [] = Empty
mkConcat [x] = x
mkConcat xs = foldl1 Concat xs
-- | Helper for unfolding concat trees
unConcat :: LinFun -> [LinFun]
unConcat (Concat l1 l2) = concatMap unConcat [l1, l2]
unConcat lf = [lf]
------------------------------------------------------------------------------
-- Pretty-printing
type Doc = CMW.Writer [String] ()
render :: Doc -> String
render = unlines . CMW.execWriter
class PP a where
pp :: a -> Doc
instance PP LPGF where
pp (LPGF _ _ cncs) = mapM_ pp cncs
instance PP Concrete where
pp (Concrete toks lins) = do
forM_ (IntMap.toList toks) $ \(i,tok) ->
CMW.tell [show i ++ " " ++ T.unpack tok]
CMW.tell [""]
forM_ (Map.toList lins) $ \(cid,lin) -> do
CMW.tell ["# " ++ showCId cid]
pp lin
CMW.tell [""]
instance PP LinFun where
pp = pp' 0
where
pp' n = \case
Pre ps d -> do
p "Pre"
CMW.tell [ replicate (2*(n+1)) ' ' ++ show p | p <- ps ]
pp' (n+1) d
c@(Concat l1 l2) -> do
let ts = unConcat c
if any isDeep ts
then do
p "Concat"
mapM_ (pp' (n+1)) ts
else
p $ "Concat " ++ show ts
Tuple ls | any isDeep ls -> do
p "Tuple"
mapM_ (pp' (n+1)) ls
Projection l1 l2 | isDeep l1 || isDeep l2 -> do
p "Projection"
pp' (n+1) l1
pp' (n+1) l2
t -> p $ show t
where
p :: String -> Doc
p t = CMW.tell [ replicate (2*n) ' ' ++ t ]
isDeep = not . isTerm
isTerm = \case
Pre _ _ -> False
Concat _ _ -> False
Tuple _ -> False
Projection _ _ -> False
_ -> True