forked from GitHub/gf-core
305 lines
9.2 KiB
Haskell
305 lines
9.2 KiB
Haskell
{-# LANGUAGE OverloadedStrings #-}
|
|
{-# LANGUAGE LambdaCase #-}
|
|
|
|
-- | Linearisation-only grammar format.
|
|
-- Closely follows description in Section 2 of Angelov, Bringert, Ranta (2009):
|
|
-- "PGF: A Portable Run-Time Format for Type-Theoretical Grammars".
|
|
module LPGF where
|
|
|
|
import PGF (Language)
|
|
import PGF.CId
|
|
import PGF.Expr (Expr)
|
|
import PGF.Tree (Tree (..), expr2tree, prTree)
|
|
|
|
import Control.Monad (liftM, liftM2, forM_)
|
|
import qualified Control.Monad.Writer as CMW
|
|
import Data.Binary (Binary, put, get, putWord8, getWord8, encodeFile, decodeFile)
|
|
import qualified Data.Map as Map
|
|
import Data.Text (Text)
|
|
import qualified Data.Text as T
|
|
import qualified Data.Text.Encoding as TE
|
|
import Text.Printf (printf)
|
|
|
|
import Prelude hiding ((!!))
|
|
import qualified Prelude
|
|
|
|
-- | Linearisation-only PGF
|
|
data LPGF = LPGF {
|
|
absname :: CId,
|
|
abstract :: Abstract,
|
|
concretes :: Map.Map CId Concrete
|
|
} deriving (Show)
|
|
|
|
-- | Abstract syntax (currently empty)
|
|
data Abstract = Abstract {
|
|
} deriving (Show)
|
|
|
|
-- | Concrete syntax
|
|
newtype Concrete = Concrete {
|
|
-- lincats :: Map.Map CId LinType, -- ^ a linearization type for each category
|
|
lins :: Map.Map CId LinFun -- ^ a linearization function for each function
|
|
} deriving (Show)
|
|
|
|
-- | Abstract function type
|
|
-- data Type = Type [CId] CId
|
|
-- deriving (Show)
|
|
|
|
-- -- | Linearisation type
|
|
-- data LinType =
|
|
-- LTStr
|
|
-- | LTInt Int
|
|
-- | LTProduct [LinType]
|
|
-- deriving (Show)
|
|
|
|
-- | Linearisation function
|
|
data LinFun =
|
|
-- Additions
|
|
LFError String -- ^ a runtime error, should probably not be supported at all
|
|
| LFBind -- ^ join adjacent tokens
|
|
| LFSpace -- ^ space between adjacent tokens
|
|
| LFCapit -- ^ capitalise next character
|
|
| LFAllCapit -- ^ capitalise next word
|
|
| LFPre [([Text], LinFun)] LinFun
|
|
| LFMissing CId -- ^ missing definition (inserted at runtime)
|
|
|
|
-- From original definition in paper
|
|
| LFEmpty
|
|
| LFToken Text
|
|
| LFConcat LinFun LinFun
|
|
| LFInt Int
|
|
| LFTuple [LinFun]
|
|
| LFProjection LinFun LinFun
|
|
| LFArgument Int
|
|
deriving (Show, Read)
|
|
|
|
instance Binary LPGF where
|
|
put lpgf = do
|
|
put (absname lpgf)
|
|
put (abstract lpgf)
|
|
put (concretes lpgf)
|
|
get = do
|
|
an <- get
|
|
abs <- get
|
|
concs <- get
|
|
return $ LPGF {
|
|
absname = an,
|
|
abstract = abs,
|
|
concretes = concs
|
|
}
|
|
|
|
instance Binary Abstract where
|
|
put abs = return ()
|
|
get = return $ Abstract {}
|
|
|
|
instance Binary Concrete where
|
|
put concr = put (lins concr)
|
|
get = do
|
|
ls <- get
|
|
return $ Concrete {
|
|
lins = ls
|
|
}
|
|
|
|
instance Binary LinFun where
|
|
put = \case
|
|
LFError e -> putWord8 0 >> put e
|
|
LFBind -> putWord8 1
|
|
LFSpace -> putWord8 2
|
|
LFCapit -> putWord8 3
|
|
LFAllCapit -> putWord8 4
|
|
LFPre ps d -> putWord8 5 >> put ([(map TE.encodeUtf8 p,l) | (p,l) <- ps],d)
|
|
LFMissing f -> putWord8 13 >> put f
|
|
LFEmpty -> putWord8 6
|
|
LFToken t -> putWord8 7 >> put (TE.encodeUtf8 t)
|
|
LFConcat l1 l2 -> putWord8 8 >> put (l1,l2)
|
|
LFInt i -> putWord8 9 >> put i
|
|
LFTuple ls -> putWord8 10 >> put ls
|
|
LFProjection l1 l2 -> putWord8 11 >> put (l1,l2)
|
|
LFArgument i -> putWord8 12 >> put i
|
|
get = do
|
|
tag <- getWord8
|
|
case tag of
|
|
0 -> liftM LFError get
|
|
1 -> return LFBind
|
|
2 -> return LFSpace
|
|
3 -> return LFCapit
|
|
4 -> return LFAllCapit
|
|
5 -> liftM2 (\ps -> LFPre [(map TE.decodeUtf8 p,l) | (p,l) <- ps]) get get
|
|
13 -> liftM LFMissing get
|
|
6 -> return LFEmpty
|
|
7 -> liftM (LFToken . TE.decodeUtf8) get
|
|
8 -> liftM2 LFConcat get get
|
|
9 -> liftM LFInt get
|
|
10 -> liftM LFTuple get
|
|
11 -> liftM2 LFProjection get get
|
|
12 -> liftM LFArgument get
|
|
_ -> fail "Failed to decode LPGF binary format"
|
|
|
|
abstractName :: LPGF -> CId
|
|
abstractName = absname
|
|
|
|
encodeFile :: FilePath -> LPGF -> IO ()
|
|
encodeFile = Data.Binary.encodeFile
|
|
|
|
readLPGF :: FilePath -> IO LPGF
|
|
readLPGF = Data.Binary.decodeFile
|
|
|
|
-- | Main linearize function, to 'String'
|
|
linearize :: LPGF -> Language -> Expr -> String
|
|
linearize lpgf lang expr = T.unpack $ linearizeText lpgf lang expr
|
|
|
|
-- | Main linearize function, to 'Data.Text.Text'
|
|
linearizeText :: LPGF -> Language -> Expr -> Text
|
|
linearizeText lpgf lang =
|
|
case Map.lookup lang (concretes lpgf) of
|
|
Just concr -> linearizeConcreteText concr
|
|
Nothing -> error $ printf "Unknown language: %s" (showCId lang)
|
|
|
|
-- | Language-specific linearize function, to 'String'
|
|
linearizeConcrete :: Concrete -> Expr -> String
|
|
linearizeConcrete concr expr = T.unpack $ linearizeConcreteText concr expr
|
|
|
|
-- | Language-specific linearize function, to 'Data.Text.Text'
|
|
linearizeConcreteText :: Concrete -> Expr -> Text
|
|
linearizeConcreteText concr expr = lin2string $ lin (expr2tree expr)
|
|
where
|
|
lin :: Tree -> LinFun
|
|
lin tree = case tree of
|
|
Fun f as ->
|
|
case Map.lookup f (lins concr) of
|
|
Just t -> eval (map lin as) t
|
|
-- _ -> error $ printf "Lookup failed for function: %s" (showCId f)
|
|
_ -> LFMissing f
|
|
x -> error $ printf "Cannot lin: %s" (prTree x)
|
|
|
|
-- | Evaluation context is a sequence of terms
|
|
type Context = [LinFun]
|
|
|
|
-- | Operational semantics
|
|
eval :: Context -> LinFun -> LinFun
|
|
eval cxt t = case t of
|
|
LFError err -> error err
|
|
LFPre pts df -> LFPre pts' df'
|
|
where
|
|
pts' = [ (strs, eval cxt t) | (strs,t) <- pts]
|
|
df' = eval cxt df
|
|
LFConcat s t -> LFConcat v w
|
|
where
|
|
v = eval cxt s
|
|
w = eval cxt t
|
|
LFTuple ts -> LFTuple vs
|
|
where vs = map (eval cxt) ts
|
|
LFProjection t u ->
|
|
case (eval cxt t, eval cxt u) of
|
|
(LFMissing f, _) -> LFMissing f
|
|
(_, LFMissing f) -> LFMissing f
|
|
(LFTuple vs, LFInt i) -> vs !! (i-1)
|
|
(tp@(LFTuple _), LFTuple is) | all isInt is -> foldl (\(LFTuple vs) (LFInt i) -> vs !! (i-1)) tp is
|
|
(t',u') -> error $ printf "Incompatible projection:\n- %s ~> %s\n- %s ~> %s" (show t) (show t') (show u) (show u')
|
|
LFArgument i -> cxt !! (i-1)
|
|
_ -> t
|
|
|
|
-- | Turn concrete syntax terms into an actual string
|
|
lin2string :: LinFun -> Text
|
|
lin2string l = case l of
|
|
LFEmpty -> ""
|
|
LFBind -> "" -- when encountered at beginning/end
|
|
LFSpace -> "" -- when encountered at beginning/end
|
|
LFToken tok -> tok
|
|
LFMissing cid -> T.pack $ printf "[%s]" (show cid)
|
|
LFTuple [l] -> lin2string l
|
|
LFTuple (l:_) -> lin2string l -- unselected table, just choose first option (see e.g. FoodsJpn)
|
|
LFPre pts df -> lin2string df -- when encountered at end
|
|
LFConcat (LFPre pts df) l2 -> lin2string $ LFConcat l1 l2
|
|
where
|
|
l2' = lin2string l2
|
|
matches = [ l | (pfxs, l) <- pts, any (`T.isPrefixOf` l2') pfxs ]
|
|
l1 = if null matches then df else head matches
|
|
LFConcat l1 (LFConcat LFBind l2) -> lin2string l1 `T.append` lin2string l2
|
|
LFConcat l1 (LFConcat LFSpace l2) -> lin2string $ LFConcat l1 l2
|
|
LFConcat LFCapit l2 -> let l = lin2string l2 in T.toUpper (T.take 1 l) `T.append` T.drop 1 l
|
|
LFConcat LFAllCapit l2 -> let tks = T.words (lin2string l2) in T.unwords $ T.toUpper (head tks) : tail tks
|
|
LFConcat l1 l2 -> T.unwords $ filter (not.T.null) [lin2string l1, lin2string l2]
|
|
x -> T.pack $ printf "[%s]" (show x)
|
|
|
|
-- | List indexing with more verbose error messages
|
|
(!!) :: (Show a) => [a] -> Int -> a
|
|
(!!) xs i
|
|
| i < 0 = error $ printf "!!: index %d too small for list: %s" i (show xs)
|
|
| i > length xs - 1 = error $ printf "!!: index %d too large for list: %s" i (show xs)
|
|
| otherwise = xs Prelude.!! i
|
|
|
|
isInt :: LinFun -> Bool
|
|
isInt (LFInt _) = True
|
|
isInt _ = False
|
|
|
|
-- | Helper for building concat trees
|
|
mkConcat :: [LinFun] -> LinFun
|
|
mkConcat [] = LFEmpty
|
|
mkConcat [x] = x
|
|
mkConcat xs = foldl1 LFConcat xs
|
|
|
|
-- | Helper for unfolding concat trees
|
|
unConcat :: LinFun -> [LinFun]
|
|
unConcat (LFConcat l1 l2) = concatMap unConcat [l1, l2]
|
|
unConcat lf = [lf]
|
|
|
|
------------------------------------------------------------------------------
|
|
-- Pretty-printing
|
|
|
|
type Doc = CMW.Writer [Text] ()
|
|
|
|
render :: Doc -> Text
|
|
render = T.unlines . CMW.execWriter
|
|
|
|
class PP a where
|
|
pp :: a -> Doc
|
|
|
|
instance PP LPGF where
|
|
pp (LPGF _ _ cncs) = mapM_ pp cncs
|
|
|
|
instance PP Concrete where
|
|
pp (Concrete lins) =
|
|
forM_ (Map.toList lins) $ \(cid,lin) -> do
|
|
CMW.tell [T.pack ("# " ++ showCId cid)]
|
|
pp lin
|
|
CMW.tell [""]
|
|
|
|
instance PP LinFun where
|
|
pp = pp' 0
|
|
where
|
|
pp' n = \case
|
|
LFPre ps d -> do
|
|
p "LFPre"
|
|
CMW.tell [ T.replicate (n+1) " " `T.append` T.pack (show p) | p <- ps ]
|
|
pp' (n+1) d
|
|
|
|
c@(LFConcat l1 l2) -> do
|
|
let ts = unConcat c
|
|
if any isDeep ts
|
|
then do
|
|
p "LFConcat"
|
|
mapM_ (pp' (n+1)) ts
|
|
else
|
|
ps $ "LFConcat " ++ show ts
|
|
LFTuple ls | any isDeep ls -> do
|
|
p "LFTuple"
|
|
mapM_ (pp' (n+1)) ls
|
|
LFProjection l1 l2 | isDeep l1 || isDeep l2 -> do
|
|
p "LFProjection"
|
|
pp' (n+1) l1
|
|
pp' (n+1) l2
|
|
t -> ps $ show t
|
|
where
|
|
p :: Text -> Doc
|
|
p t = CMW.tell [ T.replicate n " " `T.append` t ]
|
|
ps :: String -> Doc
|
|
ps = p . T.pack
|
|
|
|
isDeep = not . isTerm
|
|
isTerm = \case
|
|
LFPre _ _ -> False
|
|
LFConcat _ _ -> False
|
|
LFTuple _ -> False
|
|
LFProjection _ _ -> False
|
|
_ -> True
|