Files
gf-core/src/server/PGFService.hs
2024-01-22 17:29:57 +01:00

465 lines
16 KiB
Haskell

{-# LANGUAGE CPP, ScopedTypeVariables #-}
module PGFService(pgfMain,
Caches,newPGFCache,readCachedPGF,readCachedNGF,
flushPGFCache,listPGFCache) where
import PGF2
import PGF2.Transactions
import Cache
import Data.Time.Format(formatTime)
import Data.Time.Format(defaultTimeLocale,rfc822DateFormat)
import Text.JSON
import Control.Concurrent
import Control.Exception
import Control.Monad
import Data.Char
import qualified Data.Map as Map
import Data.Maybe
import Data.Time (UTCTime)
import System.Process
import System.Exit
import System.IO
import System.IO.Error(isDoesNotExistError)
import System.FilePath(takeExtension)
import System.Mem(performGC)
import Network.HTTP
import Network.FastCGI hiding (Connection, writeHeaders)
import Numeric(showHex)
data Caches = Caches { qsem :: QSem,
pgfCache :: Cache PGF,
ngfCache :: Cache PGF,
labelsCache :: Cache Labels }
newPGFCache root jobs = do
let n = maybe 4 id jobs
qsem <- newQSem n
pgfCache <- newCache' root (const readPGF)
ngfCache <- newCache' root (maybe readNGF (const . checkoutPGF))
lblCache <- newCache' root (const (fmap getDepLabels . readFile))
return $ Caches qsem pgfCache ngfCache lblCache
readCachedPGF :: Caches -> FilePath -> IO PGF
readCachedPGF = readCache . pgfCache
readCachedNGF :: Caches -> FilePath -> IO PGF
readCachedNGF = readCache . ngfCache
flushPGFCache c = do flushCache (pgfCache c)
flushCache (ngfCache c)
flushCache (labelsCache c)
listPGFCache c = liftM2 (++) (listCache (pgfCache c)) (listCache (ngfCache c))
newCache' root rd = do
c <- newCache root rd
forkIO $ forever $ clean c
return c
where
clean c = do threadDelay 2000000000 -- 2000 seconds, i.e. ~33 minutes
expireCache (24*60*60) c -- 24 hours
pgfMain :: (String -> IO ()) -> Connection -> Caches -> Env -> Request -> IO ()
pgfMain logLn conn cache env rq =
case fromMaybe "grammar" (lookup "command" query) of
"download"
| ext == ".pgf" -> do tpgf <- getFile (readCache' (pgfCache cache)) path
pgfDownload conn query tpgf
| ext == ".ngf" -> do tpgf <- getFile (readCache' (ngfCache cache)) path
pgfDownload conn query tpgf
command
| ext == ".pgf" -> do tpgf <- getFile (readCache' (pgfCache cache)) path
handleErrors logLn (pgfCommand (qsem cache) command query tpgf) >>= respondHTTP conn
| ext == ".ngf" -> do tpgf <- getFile (readCache' (ngfCache cache)) path
handleErrors logLn (pgfCommand (qsem cache) command query tpgf) >>= respondHTTP conn
_ -> respondHTTP conn (Response 415 "Bad Request" [] "Extension must be .pgf or .ngf")
where
path = fromMaybe "" (lookup "PATH_TRANSLATED" env `mplus`
lookup "SCRIPT_FILENAME" env)
ext = takeExtension path
query = rqQuery rq
getFile get path =
(get path)
`catch`
(\e ->
if isDoesNotExistError e
then notFound path
else ioError e)
pgfCommand qsem command q (t,pgf) =
case command of
"parse" -> withQSem qsem $
out q t=<< join (parse # input % cat % start % limit % treeopts)
-- "parseToChart" -> withQSem qsem $
-- out q t=<< join (parseToChart # input % cat % limit)
"linearize" -> out q t=<< lin # tree % to
"bracketedLinearize"
-> out q t=<< bracketedLin # tree % to
"linearizeAll" -> out q t=<< linAll # tree % to
"translate" -> withQSem qsem $
out q t=<<join(trans # input % cat % to % start % limit%treeopts)
"lookupmorpho" -> out q t=<< morpho # from % textInput
"lookupcohorts" -> out q t=<< cohorts # from % filter % textInput
"flush" -> out q t=<< flush
"grammar" -> out q t grammar
"abstrtree" -> outputGraphviz=<< graphvizAbstractTree pgf graphvizDefaults # tree
"parsetree" -> outputGraphviz=<< (\cnc -> graphvizParseTree cnc graphvizDefaults) . snd # from % tree
"wordforword" -> out q t =<< wordforword # input % cat % to
_ -> badRequest "Unknown command" command
where
flush = do performGC
return $ showJSON ()
cat :: IO Type
cat =
case lookup "cat" q of
Nothing -> return (startCat pgf)
Just cat -> case readType cat of
Nothing -> badRequest "Bad category" cat
Just typ -> return typ
langs = languages pgf
grammar = showJSON $ makeObj
["name".=abstractName pgf,
"lastmodified".=show t,
"startcat".=showType [] (startCat pgf),
"languages".=languages]
where
languages = [makeObj ["name".= l] | (l,_)<-Map.toList langs]
parse input@((from,_),_) cat start mlimit (trie,json) =
do r <- parse' cat start mlimit input
return $ showJSON [makeObj ("from".=from:jsonParseResult json r)]
jsonParseResult json = either bad good
where
bad err = ["parseFailed".=err]
good trees = "trees".=map tp trees :[] -- :addTrie trie trees
tp (tree,prob) = makeObj ["tree".=tree
,"prob".=prob
]
-- Without caching parse results:
parse' cat start mlimit ((from,concr),input) =
case PGF2.parse concr cat (init input) of
ParseOk ts -> return (Right (maybe id take mlimit (drop start ts)))
ParseFailed _ tok -> return (Left tok)
ParseIncomplete -> return (Left "")
parseToChart ((from,concr),input) cat mlimit = undefined {-
do r <- case C.parseToChart concr cat input (-1) callbacks (fromMaybe 5 mlimit) of
ParseOk chart -> return (good chart)
ParseFailed _ tok -> return (bad tok)
ParseIncomplete -> return (bad "")
return $ showJSON [makeObj ("from".=from:r)]
where
callbacks = maybe [] cb $ lookup (C.abstractName pgf) C.literalCallbacks
cb fs = [(cat,f pgf (from,concr) input)|(cat,f)<-fs]
bad err = ["parseFailed".=err]
good (roots,chart) = ["roots".=showJSON roots,
"chart".=makeObj [show fid .= mkChartObj inf | (fid,inf)<-Map.toList chart]]
mkChartObj (brackets,prods,cat) =
makeObj ["brackets".=map mkChartBracket brackets
,"prods" .=map mkChartProd prods
,"cat" .=cat
]
mkChartBracket (s,e,ann) =
makeObj ["start".=s,"end".=e,"ann".=ann]
mkChartProd (expr,args,prob) =
makeObj ["tree".=expr,"args".=map mkChartPArg args,"prob".=prob]
mkChartPArg (PArg _ fid) = showJSON fid
-}
linAll tree to = showJSON (linAll' tree to)
linAll' tree tos =
[makeObj ["to".=to,
"texts".=linearizeAll c tree]|(to,c)<-tos]
lin tree to = showJSON (lin' tree to)
lin' tree tos =
[makeObj ["to".=to,"text".=linearize c tree]|(to,c)<-tos]
bracketedLin tree to = showJSON (bracketedLin' tree to)
bracketedLin' tree tos =
[makeObj ["to".=to,"brackets".=bracketedLinearize c tree]|(to,c)<-tos]
trans input@((from,_),_) cat to start mlimit (trie,jsontree) =
do parses <- parse' cat start mlimit input
return $
showJSON [ makeObj ["from".=from,
"translations".= jsonParses parses]]
where
jsonParses = either bad good
where
bad err = [makeObj ["error".=err]]
good parses = [makeObj ["tree".=tree
,"prob".=prob
,"linearizations".=lin' tree to]
| (tree,prob) <- parses]
morpho (from,concr) input =
showJSON [makeObj ["lemma".=l
,"analysis".=a
,"prob".=p]
| (l,a,p)<-lookupMorpho concr input]
cohorts (from,concr) filter input =
showJSON [makeObj ["start" .=s
,"word" .=w
,"morpho".=[makeObj ["lemma".=l
,"analysis".=a
,"prob".=p]
| (l,a,p)<-ms]
,"end" .=showJSON e
]
| (s,w,ms,e) <- (case filter of
Just "longest" -> filterLongest
Just "best" -> filterBest
_ -> id)
(lookupCohorts concr input)]
wordforword input@((from,_),_) cat = jsonWFW from . wordforword' input cat
where
jsonWFW from rs =
showJSON
[makeObj
["from".=from,
"translations".=[makeObj ["linearizations".=
[makeObj["to".=to,"text".=text]
| (to,text)<-rs]]]]]
wordforword' inp@((from,concr),input) cat tos =
[(to,unwords $ map (lin_word' c) pws)
|let pws=map parse_word' (words input),(to,c)<-tos]
where
lin_word' c = either id (lin1 c)
lin1 c = dropq . linearize c
where
dropq (q:' ':s) | q `elem` "+*" = s
dropq s = s
parse_word' w = if all (\c->isSpace c||isPunctuation c) w
then Left w
else parse_word w
parse_word w =
maybe (Left ("["++w++"]")) Right $
msum [parse1 w,parse1 ow,morph w,morph ow]
where
ow = case w of
c:cs | isLower c -> toUpper c : cs
| isUpper c -> toLower c : cs
s -> s
parse1 s = case PGF2.parse concr cat s of
ParseOk ((t,_):ts) -> Just t
_ -> Nothing
morph w = listToMaybe
[t | (f,a,p)<-lookupMorpho concr w,
t<-maybeToList (readExpr f)]
---
input = (,) # from % textInput
from = maybe (missing "from") return =<< getLang "from"
to = getLangs "to"
getLangs i = mapM readLang (maybe [] words (lookup i q))
getLang i =
case lookup i q of
Just lang -> fmap Just (readLang lang)
_ -> return Nothing
readLang :: String -> IO (String,Concr)
readLang lang =
case Map.lookup lang langs of
Nothing -> badRequest "Bad language" lang
Just c -> return (lang,c)
tree = do s <- maybe (missing "tree") return (lookup "tree" q)
maybe (badRequest "bad tree" s) return (readExpr s)
textInput = maybe (missing "input") return (lookup "input" q)
filter = return (lookup "filter" q)
start =
case lookup "start" q of
Just s -> case reads s of
[(val,"")] -> return val
_ -> badRequest "bad start" s
Nothing -> return 0
limit =
case lookup "limit" q of
Just s -> case reads s of
[(val,"")] -> return (Just val)
_ -> badRequest "bad limit" s
Nothing -> return Nothing
treeopts = (,) # getBool "trie" % getBool "jsontree"
getBool x = return (maybe False toBool (lookup x q))
where
toBool s = s `elem` ["","yes","true","True"]
outputGraphviz code
| fmt == "dot" = outputGraph code
| otherwise = outputGraph =<< pipeIt2graphviz fmt code
where
fmt = fromMaybe "png" (lookup "format" q)
outputGraph body =
return (Response
{ rspCode = 200
, rspReason = "OK"
, rspHeaders = [Header HdrContentType mimeType]
, rspBody = body
})
mimeType =
case fmt of
"png" -> "image/png"
"gif" -> "image/gif"
"svg" -> "image/svg+xml"
"dot" -> "text/x-graphviz; charset=UTF8"
_ -> "application/binary"
pgfDownload conn query (t,pgf) = do
let fmt = formatTime defaultTimeLocale rfc822DateFormat t
mb_langs = fmap words (lookup "lang" query)
writeHeaders conn (Response
{ rspCode = 200
, rspReason = "OK"
, rspHeaders = [ Header HdrContentType "application/pgf"
, Header HdrContentDisposition ("attachment; filename=\""++abstractName pgf++".pgf\"")
, Header HdrTransferEncoding "chunked"
, Header HdrDate fmt
]
, rspBody = ""
})
writePGF_ (writeChunk conn) pgf mb_langs
writeAscii conn "0\r\n\r\n"
where
writeChunk conn ptr len =
(do writeAscii conn (showHex len "\r\n")
writeBytes conn ptr len
writeAscii conn "\r\n"
return len)
`catch`
(\(e :: SomeException) -> return (-1))
out :: JSON a => Query -> UTCTime -> a -> IO Response
out q t r = do
let (ty,str) = case lookup "jsonp" q of
Nothing -> ("json",encode r)
Just c -> ("javascript",c ++ "(" ++ encode r ++ ")")
fmt = formatTime defaultTimeLocale rfc822DateFormat t
return (Response
{ rspCode = 200
, rspReason = "OK"
, rspHeaders = [Header HdrContentType ("application/"++ty++"; charset=utf-8")
,Header HdrLastModified fmt
]
, rspBody = str
})
{-
getInput1 x = nonEmpty # getInput x
nonEmpty (Just "") = Nothing
nonEmpty r = r
limit, depth :: CGI (Maybe Int)
limit = readInput "limit"
depth = readInput "depth"
-}
missing = badRequest "Missing parameter"
errorMissingId = badRequest "Missing identifier" ""
notFound = httpError 404 "Not found"
badRequest = httpError 400
pipeIt2graphviz :: String -> String -> IO String
pipeIt2graphviz fmt code = do
(Just inh, Just outh, _, pid) <-
createProcess (proc "dot" ["-T",fmt])
{ std_in = CreatePipe,
std_out = CreatePipe,
std_err = Inherit }
hSetBinaryMode outh True
hSetEncoding inh utf8
-- fork off a thread to start consuming the output
output <- hGetContents outh
outMVar <- newEmptyMVar
_ <- forkIO $ evaluate (length output) >> putMVar outMVar ()
-- now write and flush any input
hPutStr inh code
hFlush inh
hClose inh -- done with stdin
-- wait on the output
takeMVar outMVar
hClose outh
-- wait on the process
ex <- waitForProcess pid
case ex of
ExitSuccess -> return output
ExitFailure r -> fail ("pipeIt2graphviz: (exit " ++ show r ++ ")")
instance JSON Expr where
readJSON x = readJSON x >>= maybe (fail "Bad expression.") return . readExpr
showJSON = showJSON . showExpr []
instance JSON BracketedString where
readJSON x = return (Leaf "")
showJSON (Bracket cat fid index fun bs) =
makeObj ["cat".=cat, "fid".=fid, "index".=index, "fun".=fun, "children".=bs]
showJSON BIND = makeObj ["bind".=True]
showJSON (Leaf s) = makeObj ["token".=s]
-- * PGF utilities
{-
selectLanguage :: PGF -> Maybe (Accept Language) -> Concr
selectLanguage pgf macc = case acceptable of
[] -> case Map.elems (languages pgf) of
[] -> error "No concrete syntaxes in PGF grammar."
l:_ -> l
Language c:_ -> fromJust (langCodeLanguage pgf c)
where langCodes = mapMaybe languageCode (Map.elems (languages pgf))
acceptable = negotiate (map Language langCodes) macc
langCodeLanguage :: PGF -> String -> Maybe Concr
langCodeLanguage pgf code = listToMaybe [concr | concr <- Map.elems (languages pgf), languageCode concr == Just code]
-}
-- * General utilities
infixl 2 #,%
f .= v = (f,showJSON v)
f # x = fmap f x
f % x = ap f x
withQSem qsem = bracket_ (waitQSem qsem) (signalQSem qsem)