1
0
forked from GitHub/gf-core
Files
gf-core/src/runtime/haskell/LPGF.hs
2021-02-28 00:34:46 +01:00

305 lines
9.2 KiB
Haskell

{-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE LambdaCase #-}
-- | Linearisation-only grammar format.
-- Closely follows description in Section 2 of Angelov, Bringert, Ranta (2009):
-- "PGF: A Portable Run-Time Format for Type-Theoretical Grammars".
module LPGF where
import PGF (Language)
import PGF.CId
import PGF.Expr (Expr)
import PGF.Tree (Tree (..), expr2tree, prTree)
import Control.Monad (liftM, liftM2, forM_)
import qualified Control.Monad.Writer as CMW
import Data.Binary (Binary, put, get, putWord8, getWord8, encodeFile, decodeFile)
import qualified Data.Map as Map
import Data.Text (Text)
import qualified Data.Text as T
import qualified Data.Text.Encoding as TE
import Text.Printf (printf)
import Prelude hiding ((!!))
import qualified Prelude
-- | Linearisation-only PGF
data LPGF = LPGF {
absname :: CId,
abstract :: Abstract,
concretes :: Map.Map CId Concrete
} deriving (Show)
-- | Abstract syntax (currently empty)
data Abstract = Abstract {
} deriving (Show)
-- | Concrete syntax
newtype Concrete = Concrete {
-- lincats :: Map.Map CId LinType, -- ^ a linearization type for each category
lins :: Map.Map CId LinFun -- ^ a linearization function for each function
} deriving (Show)
-- | Abstract function type
-- data Type = Type [CId] CId
-- deriving (Show)
-- -- | Linearisation type
-- data LinType =
-- LTStr
-- | LTInt Int
-- | LTProduct [LinType]
-- deriving (Show)
-- | Linearisation function
data LinFun =
-- Additions
LFError String -- ^ a runtime error, should probably not be supported at all
| LFBind -- ^ join adjacent tokens
| LFSpace -- ^ space between adjacent tokens
| LFCapit -- ^ capitalise next character
| LFAllCapit -- ^ capitalise next word
| LFPre [([Text], LinFun)] LinFun
| LFMissing CId -- ^ missing definition (inserted at runtime)
-- From original definition in paper
| LFEmpty
| LFToken Text
| LFConcat LinFun LinFun
| LFInt Int
| LFTuple [LinFun]
| LFProjection LinFun LinFun
| LFArgument Int
deriving (Show, Read)
instance Binary LPGF where
put lpgf = do
put (absname lpgf)
put (abstract lpgf)
put (concretes lpgf)
get = do
an <- get
abs <- get
concs <- get
return $ LPGF {
absname = an,
abstract = abs,
concretes = concs
}
instance Binary Abstract where
put abs = return ()
get = return $ Abstract {}
instance Binary Concrete where
put concr = put (lins concr)
get = do
ls <- get
return $ Concrete {
lins = ls
}
instance Binary LinFun where
put = \case
LFError e -> putWord8 0 >> put e
LFBind -> putWord8 1
LFSpace -> putWord8 2
LFCapit -> putWord8 3
LFAllCapit -> putWord8 4
LFPre ps d -> putWord8 5 >> put ([(map TE.encodeUtf8 p,l) | (p,l) <- ps],d)
LFMissing f -> putWord8 13 >> put f
LFEmpty -> putWord8 6
LFToken t -> putWord8 7 >> put (TE.encodeUtf8 t)
LFConcat l1 l2 -> putWord8 8 >> put (l1,l2)
LFInt i -> putWord8 9 >> put i
LFTuple ls -> putWord8 10 >> put ls
LFProjection l1 l2 -> putWord8 11 >> put (l1,l2)
LFArgument i -> putWord8 12 >> put i
get = do
tag <- getWord8
case tag of
0 -> liftM LFError get
1 -> return LFBind
2 -> return LFSpace
3 -> return LFCapit
4 -> return LFAllCapit
5 -> liftM2 (\ps -> LFPre [(map TE.decodeUtf8 p,l) | (p,l) <- ps]) get get
13 -> liftM LFMissing get
6 -> return LFEmpty
7 -> liftM (LFToken . TE.decodeUtf8) get
8 -> liftM2 LFConcat get get
9 -> liftM LFInt get
10 -> liftM LFTuple get
11 -> liftM2 LFProjection get get
12 -> liftM LFArgument get
_ -> fail "Failed to decode LPGF binary format"
abstractName :: LPGF -> CId
abstractName = absname
encodeFile :: FilePath -> LPGF -> IO ()
encodeFile = Data.Binary.encodeFile
readLPGF :: FilePath -> IO LPGF
readLPGF = Data.Binary.decodeFile
-- | Main linearize function, to 'String'
linearize :: LPGF -> Language -> Expr -> String
linearize lpgf lang expr = T.unpack $ linearizeText lpgf lang expr
-- | Main linearize function, to 'Data.Text.Text'
linearizeText :: LPGF -> Language -> Expr -> Text
linearizeText lpgf lang =
case Map.lookup lang (concretes lpgf) of
Just concr -> linearizeConcreteText concr
Nothing -> error $ printf "Unknown language: %s" (showCId lang)
-- | Language-specific linearize function, to 'String'
linearizeConcrete :: Concrete -> Expr -> String
linearizeConcrete concr expr = T.unpack $ linearizeConcreteText concr expr
-- | Language-specific linearize function, to 'Data.Text.Text'
linearizeConcreteText :: Concrete -> Expr -> Text
linearizeConcreteText concr expr = lin2string $ lin (expr2tree expr)
where
lin :: Tree -> LinFun
lin tree = case tree of
Fun f as ->
case Map.lookup f (lins concr) of
Just t -> eval (map lin as) t
-- _ -> error $ printf "Lookup failed for function: %s" (showCId f)
_ -> LFMissing f
x -> error $ printf "Cannot lin: %s" (prTree x)
-- | Evaluation context is a sequence of terms
type Context = [LinFun]
-- | Operational semantics
eval :: Context -> LinFun -> LinFun
eval cxt t = case t of
LFError err -> error err
LFPre pts df -> LFPre pts' df'
where
pts' = [ (strs, eval cxt t) | (strs,t) <- pts]
df' = eval cxt df
LFConcat s t -> LFConcat v w
where
v = eval cxt s
w = eval cxt t
LFTuple ts -> LFTuple vs
where vs = map (eval cxt) ts
LFProjection t u ->
case (eval cxt t, eval cxt u) of
(LFMissing f, _) -> LFMissing f
(_, LFMissing f) -> LFMissing f
(LFTuple vs, LFInt i) -> vs !! (i-1)
(tp@(LFTuple _), LFTuple is) | all isInt is -> foldl (\(LFTuple vs) (LFInt i) -> vs !! (i-1)) tp is
(t',u') -> error $ printf "Incompatible projection:\n- %s ~> %s\n- %s ~> %s" (show t) (show t') (show u) (show u')
LFArgument i -> cxt !! (i-1)
_ -> t
-- | Turn concrete syntax terms into an actual string
lin2string :: LinFun -> Text
lin2string l = case l of
LFEmpty -> ""
LFBind -> "" -- when encountered at beginning/end
LFSpace -> "" -- when encountered at beginning/end
LFToken tok -> tok
LFMissing cid -> T.pack $ printf "[%s]" (show cid)
LFTuple [l] -> lin2string l
LFTuple (l:_) -> lin2string l -- unselected table, just choose first option (see e.g. FoodsJpn)
LFPre pts df -> lin2string df -- when encountered at end
LFConcat (LFPre pts df) l2 -> lin2string $ LFConcat l1 l2
where
l2' = lin2string l2
matches = [ l | (pfxs, l) <- pts, any (`T.isPrefixOf` l2') pfxs ]
l1 = if null matches then df else head matches
LFConcat l1 (LFConcat LFBind l2) -> lin2string l1 `T.append` lin2string l2
LFConcat l1 (LFConcat LFSpace l2) -> lin2string $ LFConcat l1 l2
LFConcat LFCapit l2 -> let l = lin2string l2 in T.toUpper (T.take 1 l) `T.append` T.drop 1 l
LFConcat LFAllCapit l2 -> let tks = T.words (lin2string l2) in T.unwords $ T.toUpper (head tks) : tail tks
LFConcat l1 l2 -> T.unwords $ filter (not.T.null) [lin2string l1, lin2string l2]
x -> T.pack $ printf "[%s]" (show x)
-- | List indexing with more verbose error messages
(!!) :: (Show a) => [a] -> Int -> a
(!!) xs i
| i < 0 = error $ printf "!!: index %d too small for list: %s" i (show xs)
| i > length xs - 1 = error $ printf "!!: index %d too large for list: %s" i (show xs)
| otherwise = xs Prelude.!! i
isInt :: LinFun -> Bool
isInt (LFInt _) = True
isInt _ = False
-- | Helper for building concat trees
mkConcat :: [LinFun] -> LinFun
mkConcat [] = LFEmpty
mkConcat [x] = x
mkConcat xs = foldl1 LFConcat xs
-- | Helper for unfolding concat trees
unConcat :: LinFun -> [LinFun]
unConcat (LFConcat l1 l2) = concatMap unConcat [l1, l2]
unConcat lf = [lf]
------------------------------------------------------------------------------
-- Pretty-printing
type Doc = CMW.Writer [Text] ()
render :: Doc -> Text
render = T.unlines . CMW.execWriter
class PP a where
pp :: a -> Doc
instance PP LPGF where
pp (LPGF _ _ cncs) = mapM_ pp cncs
instance PP Concrete where
pp (Concrete lins) =
forM_ (Map.toList lins) $ \(cid,lin) -> do
CMW.tell [T.pack ("# " ++ showCId cid)]
pp lin
CMW.tell [""]
instance PP LinFun where
pp = pp' 0
where
pp' n = \case
LFPre ps d -> do
p "LFPre"
CMW.tell [ T.replicate (n+1) " " `T.append` T.pack (show p) | p <- ps ]
pp' (n+1) d
c@(LFConcat l1 l2) -> do
let ts = unConcat c
if any isDeep ts
then do
p "LFConcat"
mapM_ (pp' (n+1)) ts
else
ps $ "LFConcat " ++ show ts
LFTuple ls | any isDeep ls -> do
p "LFTuple"
mapM_ (pp' (n+1)) ls
LFProjection l1 l2 | isDeep l1 || isDeep l2 -> do
p "LFProjection"
pp' (n+1) l1
pp' (n+1) l2
t -> ps $ show t
where
p :: Text -> Doc
p t = CMW.tell [ T.replicate n " " `T.append` t ]
ps :: String -> Doc
ps = p . T.pack
isDeep = not . isTerm
isTerm = \case
LFPre _ _ -> False
LFConcat _ _ -> False
LFTuple _ -> False
LFProjection _ _ -> False
_ -> True