Compare commits

...

1 Commits

Author SHA1 Message Date
John J. Camilleri
c058457337 Change Data.Text to String as a test, seemingly makes no difference. 2021-03-10 16:50:26 +01:00
3 changed files with 31 additions and 47 deletions

View File

@@ -23,8 +23,6 @@ import Data.List (elemIndex)
import qualified Data.List as L
import qualified Data.Map.Strict as Map
import Data.Maybe (fromJust, isJust)
import Data.Text (Text)
import qualified Data.Text as T
import System.Environment (lookupEnv)
import System.FilePath ((</>), (<.>))
import Text.Printf (printf)
@@ -137,9 +135,9 @@ mkCanon2lpgf opts gr am = do
return (L.Concat v1' v2', t1 <|> t2) -- t1 else t2
C.LiteralValue ll -> case ll of
C.FloatConstant f -> return (L.Token $ T.pack $ show f, Just C.FloatType)
C.IntConstant i -> return (L.Token $ T.pack $ show i, Just C.IntType)
C.StrConstant s -> return (L.Token $ T.pack s, Just C.StrType)
C.FloatConstant f -> return (L.Token $ show f, Just C.FloatType)
C.IntConstant i -> return (L.Token $ show i, Just C.IntType)
C.StrConstant s -> return (L.Token s, Just C.StrType)
C.ErrorValue err -> return (L.Error err, Nothing)
@@ -251,7 +249,7 @@ mkCanon2lpgf opts gr am = do
C.PreValue pts df -> do
pts' <- forM pts $ \(pfxs, lv) -> do
(lv', _) <- val2lin lv
return (map T.pack pfxs, lv')
return (pfxs, lv')
(df', lt) <- val2lin df
return (L.Pre pts' df', lt)
@@ -345,12 +343,12 @@ extractStrings concr = L.Concrete { L.toks = toks', L.lins = lins' }
(lins',imb') = CMS.runState (go0 (L.lins concr)) imb
toks' = IntMapBuilder.toIntMap imb'
go0 :: Map.Map CId L.LinFun -> CMS.State (IntMapBuilder.IMB Text) (Map.Map CId L.LinFun)
go0 :: Map.Map CId L.LinFun -> CMS.State (IntMapBuilder.IMB String) (Map.Map CId L.LinFun)
go0 mp = do
xs <- mapM (\(cid,lin) -> go lin >>= \lin' -> return (cid,lin')) (Map.toList mp)
return $ Map.fromList xs
go :: L.LinFun -> CMS.State (IntMapBuilder.IMB Text) L.LinFun
go :: L.LinFun -> CMS.State (IntMapBuilder.IMB String) L.LinFun
go lf = case lf of
L.Token str -> do
imb <- CMS.get
@@ -362,7 +360,7 @@ extractStrings concr = L.Concrete { L.toks = toks', L.lins = lins' }
-- pts' <- mapM (\(pfxs,lv) -> go lv >>= \lv' -> return (pfxs,lv')) pts
pts' <- forM pts $ \(pfxs,lv) -> do
imb <- CMS.get
let str = T.pack $ show pfxs
let str = show pfxs
let (ix,imb') = IntMapBuilder.insert' str imb
CMS.put imb'
lv' <- go lv

View File

@@ -16,13 +16,12 @@ import PGF.Tree (Tree (..), expr2tree, prTree)
import qualified Control.Exception as EX
import Control.Monad (liftM, liftM2, forM_)
import qualified Control.Monad.Writer as CMW
import Data.Char (toUpper)
import Data.Binary (Binary, put, get, putWord8, getWord8, encodeFile, decodeFile)
import Data.Either (isLeft)
import qualified Data.IntMap as IntMap
import Data.List (isPrefixOf)
import qualified Data.Map.Strict as Map
import Data.Text (Text)
import qualified Data.Text as T
import qualified Data.Text.Encoding as TE
import Text.Printf (printf)
import Prelude hiding ((!!))
@@ -41,7 +40,7 @@ data Abstract = Abstract {
-- | Concrete syntax
data Concrete = Concrete {
toks :: IntMap.IntMap Text, -- ^ all strings are stored exactly once here
toks :: IntMap.IntMap String, -- ^ all strings are stored exactly once here
-- lincats :: Map.Map CId LinType, -- ^ a linearization type for each category
lins :: Map.Map CId LinFun -- ^ a linearization function for each function
} deriving (Show)
@@ -65,12 +64,12 @@ data LinFun =
| Space -- ^ space between adjacent tokens
| Capit -- ^ capitalise next character
| AllCapit -- ^ capitalise next word
| Pre [([Text], LinFun)] LinFun
| Pre [([String], LinFun)] LinFun
| Missing CId -- ^ missing definition (inserted at runtime)
-- From original definition in paper
| Empty
| Token Text
| Token String
| Concat LinFun LinFun
| Ix Int
| Tuple [LinFun]
@@ -158,10 +157,6 @@ instance Binary LinFun where
14 -> liftM TokenIx get
_ -> fail "Failed to decode LPGF binary format"
instance Binary Text where
put = put . TE.encodeUtf8
get = liftM TE.decodeUtf8 get
abstractName :: LPGF -> CId
abstractName = absname
@@ -173,22 +168,14 @@ readLPGF = Data.Binary.decodeFile
-- | Main linearize function, to 'String'
linearize :: LPGF -> Language -> Expr -> String
linearize lpgf lang expr = T.unpack $ linearizeText lpgf lang expr
-- | Main linearize function, to 'Data.Text.Text'
linearizeText :: LPGF -> Language -> Expr -> Text
linearizeText lpgf lang =
linearize lpgf lang =
case Map.lookup lang (concretes lpgf) of
Just concr -> linearizeConcreteText concr
Just concr -> linearizeConcrete concr
Nothing -> error $ printf "Unknown language: %s" (showCId lang)
-- | Language-specific linearize function, to 'String'
linearizeConcrete :: Concrete -> Expr -> String
linearizeConcrete concr expr = T.unpack $ linearizeConcreteText concr expr
-- | Language-specific linearize function, to 'Data.Text.Text'
linearizeConcreteText :: Concrete -> Expr -> Text
linearizeConcreteText concr expr = lin2string $ lin (expr2tree expr)
linearizeConcrete concr expr = lin2string $ lin (expr2tree expr)
where
lin :: Tree -> LinFun
lin tree = case tree of
@@ -209,7 +196,7 @@ try comp = do
-- | Evaluation context
data Context = Context {
cxArgs :: [LinFun], -- ^ is a sequence of terms
cxToks :: IntMap.IntMap Text -- ^ token map
cxToks :: IntMap.IntMap String -- ^ token map
}
-- | Operational semantics
@@ -238,7 +225,7 @@ eval cxt t = case t of
PreIx pts df -> Pre pts' df'
where
pts' = [(pfxs, eval cxt t) | (ix, t) <- pts, let pfxs = maybe [] (read . T.unpack) $ IntMap.lookup ix (cxToks cxt)]
pts' = [(pfxs, eval cxt t) | (ix, t) <- pts, let pfxs = maybe [] read $ IntMap.lookup ix (cxToks cxt)]
df' = eval cxt df
TokenIx i -> maybe Empty Token $ IntMap.lookup i (cxToks cxt)
@@ -252,32 +239,32 @@ flattenTuple = \case
-- | Turn concrete syntax terms into an actual string.
-- This is done in two passes, first to flatten concats & evaluate pre's, then to
-- apply BIND and other predefs.
lin2string :: LinFun -> Text
lin2string lf = T.unwords $ join $ flatten [lf]
lin2string :: LinFun -> String
lin2string lf = unwords $ join $ flatten [lf]
where
-- Process bind et al into final token list
join :: [Either LinFun Text] -> [Text]
join :: [Either LinFun String] -> [String]
join elt = case elt of
Right tok:Left Bind:ls ->
case join ls of
next:ls' -> tok `T.append` next : ls'
next:ls' -> tok : next : ls'
_ -> []
Right tok:ls -> tok : join ls
Left Space:ls -> join ls
Left Capit:ls ->
case join ls of
next:ls' -> T.toUpper (T.take 1 next) `T.append` T.drop 1 next : ls'
next:ls' -> (toUpper (head next) : tail next) : ls'
_ -> []
Left AllCapit:ls ->
case join ls of
next:ls' -> T.toUpper next : ls'
next:ls' -> map toUpper next : ls'
_ -> []
Left (Missing cid):ls -> join (Right (T.pack (printf "[%s]" (show cid))) : ls)
Left (Missing cid):ls -> join (Right (printf "[%s]" (show cid)) : ls)
[] -> []
x -> error $ printf "Unhandled term in lin2string: %s" (show x)
-- Process concats, tuples, pre into flat list
flatten :: [LinFun] -> [Either LinFun Text]
flatten :: [LinFun] -> [Either LinFun String]
flatten [] = []
flatten (l:ls) = case l of
Empty -> flatten ls
@@ -291,7 +278,7 @@ lin2string lf = T.unwords $ join $ flatten [lf]
f = flatten ls
ch = case dropWhile isLeft f of
Right next:_ ->
let matches = [ l | (pfxs, l) <- pts, any (`T.isPrefixOf` next) pfxs ]
let matches = [ l | (pfxs, l) <- pts, any (`isPrefixOf` next) pfxs ]
in if null matches then df else head matches
_ -> df
in flatten (ch:ls)
@@ -336,7 +323,7 @@ instance PP LPGF where
instance PP Concrete where
pp (Concrete toks lins) = do
forM_ (IntMap.toList toks) $ \(i,tok) ->
CMW.tell [show i ++ " " ++ T.unpack tok]
CMW.tell [show i ++ " " ++ tok]
CMW.tell [""]
forM_ (Map.toList lins) $ \(cid,lin) -> do
CMW.tell ["# " ++ showCId cid]

View File

@@ -14,7 +14,6 @@ import Data.Either (isLeft)
import qualified Data.List as L
import Data.Maybe (fromJust, isJust, isNothing)
import qualified Data.Map as Map
import Data.Text (Text)
import Data.Time.Clock (getCurrentTime, diffUTCTime)
import System.Console.ANSI
import System.Directory (listDirectory, getFileSize)
@@ -155,13 +154,13 @@ linPGF2 :: PGF2.PGF -> [PGF2.Expr] -> [[String]]
linPGF2 pgf trees =
[ map (PGF2.linearize concr) trees | (_, concr) <- Map.toList (PGF2.languages pgf) ]
linLPGF :: LPGF.LPGF -> [PGF.Expr] -> [[Text]]
linLPGF :: LPGF.LPGF -> [PGF.Expr] -> [[String]]
linLPGF lpgf trees =
[ map (LPGF.linearizeConcreteText concr) trees | (_,concr) <- Map.toList (LPGF.concretes lpgf) ]
[ map (LPGF.linearizeConcrete concr) trees | (_,concr) <- Map.toList (LPGF.concretes lpgf) ]
linLPGF' :: LPGF.LPGF -> [PGF.Expr] -> IO [[Either String Text]]
linLPGF' :: LPGF.LPGF -> [PGF.Expr] -> IO [[Either String String]]
linLPGF' lpgf trees =
forM (Map.toList (LPGF.concretes lpgf)) $ \(_,concr) -> mapM (LPGF.try . LPGF.linearizeConcreteText concr) trees
forM (Map.toList (LPGF.concretes lpgf)) $ \(_,concr) -> mapM (LPGF.try . LPGF.linearizeConcrete concr) trees
-- | Produce human readable file size
-- Adapted from https://hackage.haskell.org/package/hrfsize