Add LPGF function for catching errors. Manual fixes to Phrasebook treebank.

This commit is contained in:
John J. Camilleri
2021-03-05 12:05:25 +01:00
parent 70581c2d8c
commit 575a746a3e
6 changed files with 78 additions and 57 deletions

View File

@@ -1,5 +1,6 @@
{-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE LambdaCase #-} {-# LANGUAGE LambdaCase #-}
{-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE ScopedTypeVariables #-}
-- | Linearisation-only grammar format. -- | Linearisation-only grammar format.
-- Closely follows description in Section 2 of Angelov, Bringert, Ranta (2009): -- Closely follows description in Section 2 of Angelov, Bringert, Ranta (2009):
@@ -12,6 +13,7 @@ import PGF.CId
import PGF.Expr (Expr) import PGF.Expr (Expr)
import PGF.Tree (Tree (..), expr2tree, prTree) import PGF.Tree (Tree (..), expr2tree, prTree)
import qualified Control.Exception as EX
import Control.Monad (liftM, liftM2, forM_) import Control.Monad (liftM, liftM2, forM_)
import qualified Control.Monad.Writer as CMW import qualified Control.Monad.Writer as CMW
import Data.Binary (Binary, put, get, putWord8, getWord8, encodeFile, decodeFile) import Data.Binary (Binary, put, get, putWord8, getWord8, encodeFile, decodeFile)
@@ -195,6 +197,13 @@ linearizeConcreteText concr expr = lin2string $ lin (expr2tree expr)
_ -> Missing f _ -> Missing f
x -> error $ printf "Cannot lin: %s" (prTree x) x -> error $ printf "Cannot lin: %s" (prTree x)
-- | Run a compatation and catch any exception/errors.
-- Ideally this library should never throw exceptions, but we're still in development...
try :: a -> IO (Either String a)
try comp = do
let f = Right <$> EX.evaluate comp
EX.catch f (\(e :: EX.SomeException) -> return $ Left (show e))
-- | Evaluation context -- | Evaluation context
data Context = Context { data Context = Context {
cxArgs :: [LinFun], -- ^ is a sequence of terms cxArgs :: [LinFun], -- ^ is a sequence of terms

View File

@@ -48,11 +48,11 @@ Run each command separately so that memory measurements are isolated.
The `+RTS -T -RTS` is so that GHC can report its own memory usage. The `+RTS -T -RTS` is so that GHC can report its own memory usage.
``` ```
stack build --test --bench --no-run-tests --no-run-benchmarks stack build --test --bench --no-run-tests --no-run-benchmarks &&
stack bench --benchmark-arguments "compile pgf testsuite/lpgf/foods/Foods*.gf +RTS -T -RTS" stack bench --benchmark-arguments "compile pgf testsuite/lpgf/foods/Foods*.gf +RTS -T -RTS" &&
stack bench --benchmark-arguments "compile lpgf testsuite/lpgf/foods/Foods*.gf +RTS -T -RTS" stack bench --benchmark-arguments "compile lpgf testsuite/lpgf/foods/Foods*.gf +RTS -T -RTS" &&
stack bench --benchmark-arguments "run pgf Foods.pgf testsuite/lpgf/foods/Foods-all.trees +RTS -T -RTS" stack bench --benchmark-arguments "run pgf Foods.pgf testsuite/lpgf/foods/Foods-all.trees +RTS -T -RTS" &&
stack bench --benchmark-arguments "run pgf2 Foods.pgf testsuite/lpgf/foods/Foods-all.trees +RTS -T -RTS" stack bench --benchmark-arguments "run pgf2 Foods.pgf testsuite/lpgf/foods/Foods-all.trees +RTS -T -RTS" &&
stack bench --benchmark-arguments "run lpgf Foods.lpgf testsuite/lpgf/foods/Foods-all.trees +RTS -T -RTS" stack bench --benchmark-arguments "run lpgf Foods.lpgf testsuite/lpgf/foods/Foods-all.trees +RTS -T -RTS"
``` ```

View File

@@ -10,6 +10,7 @@ import GF.Support (Options, Flags (..), Verbosity (..), noOptions, addOptions, m
import Control.DeepSeq (NFData, force) import Control.DeepSeq (NFData, force)
import Control.Exception (evaluate) import Control.Exception (evaluate)
import Control.Monad (when, forM) import Control.Monad (when, forM)
import Data.Either (isLeft)
import qualified Data.List as L import qualified Data.List as L
import Data.Maybe (fromJust, isJust, isNothing) import Data.Maybe (fromJust, isJust, isNothing)
import qualified Data.Map as Map import qualified Data.Map as Map
@@ -105,8 +106,12 @@ main = do
when doLPGF $ do when doLPGF $ do
heading "LPGF" heading "LPGF"
lpgf <- LPGF.readLPGF (dropExtension (fromJust binaryFile) <.> "lpgf") lpgf <- LPGF.readLPGF (dropExtension (fromJust binaryFile) <.> "lpgf")
timePure "- linearise: " (linLPGF lpgf trees) -- timePure "- linearise: " (linLPGF lpgf trees)
return () ress <- time "- linearise: " (linLPGF' lpgf trees)
when (any (any isLeft) ress) $ do
setSGR [SetColor Foreground Dull Red]
putStrLn "Teminated with errors"
setSGR [Reset]
stats <- getRTSStats stats <- getRTSStats
printf "Max memory: %s\n" (convertSize (fromIntegral (max_mem_in_use_bytes stats))) printf "Max memory: %s\n" (convertSize (fromIntegral (max_mem_in_use_bytes stats)))
@@ -117,6 +122,7 @@ heading s = do
putStrLn s putStrLn s
setSGR [Reset] setSGR [Reset]
-- For accurate timing, IO action must for evaluation itself (e.g., write to file)
time :: String -> IO a -> IO a time :: String -> IO a -> IO a
time desc io = do time desc io = do
start <- getCurrentTime start <- getCurrentTime
@@ -125,6 +131,7 @@ time desc io = do
putStrLn $ desc ++ show (diffUTCTime end start) putStrLn $ desc ++ show (diffUTCTime end start)
return r return r
-- Performs deep evaluation
timePure :: (NFData a) => String -> a -> IO a timePure :: (NFData a) => String -> a -> IO a
timePure desc val = time desc (return $ force val) timePure desc val = time desc (return $ force val)
@@ -152,6 +159,10 @@ linLPGF :: LPGF.LPGF -> [PGF.Expr] -> [[Text]]
linLPGF lpgf trees = linLPGF lpgf trees =
[ map (LPGF.linearizeConcreteText concr) trees | (_,concr) <- Map.toList (LPGF.concretes lpgf) ] [ map (LPGF.linearizeConcreteText concr) trees | (_,concr) <- Map.toList (LPGF.concretes lpgf) ]
linLPGF' :: LPGF.LPGF -> [PGF.Expr] -> IO [[Either String Text]]
linLPGF' lpgf trees =
forM (Map.toList (LPGF.concretes lpgf)) $ \(_,concr) -> mapM (LPGF.try . LPGF.linearizeConcreteText concr) trees
-- | Produce human readable file size -- | Produce human readable file size
-- Adapted from https://hackage.haskell.org/package/hrfsize -- Adapted from https://hackage.haskell.org/package/hrfsize
convertSize :: Integer -> String convertSize :: Integer -> String

View File

@@ -88,7 +88,7 @@ PhrasebookLav: neraksti
PhrasebookNor: skriv ikke PhrasebookNor: skriv ikke
PhrasebookPol: nie pisz PhrasebookPol: nie pisz
PhrasebookRon: nu scrie PhrasebookRon: nu scrie
PhrasebookSnd: PhrasebookSnd: ن [VWrite] [VWrite]
PhrasebookSpa: no escribas PhrasebookSpa: no escribas
PhrasebookSwe: skriv inte PhrasebookSwe: skriv inte
PhrasebookTha: อย่า เขียน PhrasebookTha: อย่า เขียน
@@ -832,7 +832,7 @@ PhrasebookLav: nelasi
PhrasebookNor: les ikke PhrasebookNor: les ikke
PhrasebookPol: nie czytaj PhrasebookPol: nie czytaj
PhrasebookRon: nu citi PhrasebookRon: nu citi
PhrasebookSnd: PhrasebookSnd: ن [VRead] [VRead]
PhrasebookSpa: no leas PhrasebookSpa: no leas
PhrasebookSwe: läs inte PhrasebookSwe: läs inte
PhrasebookTha: อย่า อ่าน PhrasebookTha: อย่า อ่าน
@@ -904,7 +904,7 @@ PhrasebookLav: lasiet
PhrasebookNor: les PhrasebookNor: les
PhrasebookPol: przeczytajcie PhrasebookPol: przeczytajcie
PhrasebookRon: citiţi PhrasebookRon: citiţi
PhrasebookSnd: PhrasebookSnd: [VRead] [VRead]
PhrasebookSpa: leed PhrasebookSpa: leed
PhrasebookSwe: läs PhrasebookSwe: läs
PhrasebookTha: อ่าน ซิ PhrasebookTha: อ่าน ซิ
@@ -1072,7 +1072,7 @@ PhrasebookLav: lasi
PhrasebookNor: les PhrasebookNor: les
PhrasebookPol: przeczytaj PhrasebookPol: przeczytaj
PhrasebookRon: citeşte PhrasebookRon: citeşte
PhrasebookSnd: PhrasebookSnd: [VRead] [VRead]
PhrasebookSpa: lee PhrasebookSpa: lee
PhrasebookSwe: läs PhrasebookSwe: läs
PhrasebookTha: อ่าน ซิ PhrasebookTha: อ่าน ซิ
@@ -1216,7 +1216,7 @@ PhrasebookLav: neraksti
PhrasebookNor: skriv ikke PhrasebookNor: skriv ikke
PhrasebookPol: nie pisz PhrasebookPol: nie pisz
PhrasebookRon: nu scrie PhrasebookRon: nu scrie
PhrasebookSnd: PhrasebookSnd: ن [VWrite] [VWrite]
PhrasebookSpa: no escribas PhrasebookSpa: no escribas
PhrasebookSwe: skriv inte PhrasebookSwe: skriv inte
PhrasebookTha: อย่า เขียน PhrasebookTha: อย่า เขียน
@@ -1360,7 +1360,7 @@ PhrasebookLav: raksti
PhrasebookNor: skriv PhrasebookNor: skriv
PhrasebookPol: napisz PhrasebookPol: napisz
PhrasebookRon: scrie PhrasebookRon: scrie
PhrasebookSnd: PhrasebookSnd: [VWrite] [VWrite]
PhrasebookSpa: escribe PhrasebookSpa: escribe
PhrasebookSwe: skriv PhrasebookSwe: skriv
PhrasebookTha: เขียน ซิ PhrasebookTha: เขียน ซิ
@@ -1480,7 +1480,7 @@ PhrasebookLav: lūdzu nerakstiet
PhrasebookNor: skriv ikke PhrasebookNor: skriv ikke
PhrasebookPol: nie pisz PhrasebookPol: nie pisz
PhrasebookRon: nu scrie PhrasebookRon: nu scrie
PhrasebookSnd: PhrasebookSnd: ن [VWrite] [VWrite]
PhrasebookSpa: no escriba PhrasebookSpa: no escriba
PhrasebookSwe: skriv inte PhrasebookSwe: skriv inte
PhrasebookTha: อย่า เขียน PhrasebookTha: อย่า เขียน
@@ -1744,7 +1744,7 @@ PhrasebookLav: rakstiet
PhrasebookNor: skriv PhrasebookNor: skriv
PhrasebookPol: napiszcie PhrasebookPol: napiszcie
PhrasebookRon: scrieţi PhrasebookRon: scrieţi
PhrasebookSnd: PhrasebookSnd: [VWrite] [VWrite]
PhrasebookSpa: escribid PhrasebookSpa: escribid
PhrasebookSwe: skriv PhrasebookSwe: skriv
PhrasebookTha: เขียน ซิ PhrasebookTha: เขียน ซิ
@@ -2344,7 +2344,7 @@ PhrasebookLav: lasi
PhrasebookNor: les PhrasebookNor: les
PhrasebookPol: przeczytaj PhrasebookPol: przeczytaj
PhrasebookRon: citeşte PhrasebookRon: citeşte
PhrasebookSnd: PhrasebookSnd: [VRead] [VRead]
PhrasebookSpa: lee PhrasebookSpa: lee
PhrasebookSwe: läs PhrasebookSwe: läs
PhrasebookTha: อ่าน ซิ PhrasebookTha: อ่าน ซิ

View File

@@ -12,9 +12,9 @@ FLAGS="+RTS -T -RTS"
stack build --test --bench --no-run-tests --no-run-benchmarks stack build --test --bench --no-run-tests --no-run-benchmarks
printf "\n-- COMPILE --\n\n" printf "\n-- COMPILE --\n\n"
DEBUG=1 stack bench --benchmark-arguments "compile pgf ${PREFIX}${1}.gf ${FLAGS}" stack bench --benchmark-arguments "compile pgf ${PREFIX}${1}.gf ${FLAGS}"
printf "\n" printf "\n"
DEBUG=1 stack bench --benchmark-arguments "compile lpgf ${PREFIX}${1}.gf ${FLAGS}" stack bench --benchmark-arguments "compile lpgf ${PREFIX}${1}.gf ${FLAGS}"
printf "\n-- RUN -- \n\n" printf "\n-- RUN -- \n\n"
stack bench --benchmark-arguments "run pgf Phrasebook.pgf ${TREES} ${FLAGS}" stack bench --benchmark-arguments "run pgf Phrasebook.pgf ${TREES} ${FLAGS}"

View File

@@ -68,13 +68,14 @@ doGrammar' path gname cncs = do
results <- forM gs $ \grp -> do results <- forM gs $ \grp -> do
let ast = drop 2 $ dropWhile (/=':') $ head grp let ast = drop 2 $ dropWhile (/=':') $ head grp
printf "- %s: %s\n" gname ast printf "- %s: %s\n" gname ast
let let Just tree = readExpr ast
Just tree = readExpr ast
-- Linearization into all languages -- Linearization into all languages
outs = outs <- forM (Map.toList (concretes lpgf)) $ \(lang,concr) -> do
[ printf "%s: %s" (showLanguage lang) (linearizeConcrete concr tree) e <- try $ linearizeConcrete concr tree
| (lang,concr) <- Map.toList (concretes lpgf) return $ case e of
] Right s -> printf "%s: %s" (showLanguage lang) s
Left e -> printf "%s: ERROR: %s" (showLanguage lang) e
-- filter out missing langs from treebank -- filter out missing langs from treebank
let golds = [ g | o <- outs, g <- tail grp, takeWhile (/=':') o == takeWhile (/=':') g ] let golds = [ g | o <- outs, g <- tail grp, takeWhile (/=':') o == takeWhile (/=':') g ]