1
0
forked from GitHub/gf-core
Files
gf-core/src/server/PGFService.hs
2022-06-16 18:21:54 +02:00

487 lines
17 KiB
Haskell

{-# LANGUAGE CPP #-}
module PGFService(cgiMain,cgiMain',getPath,
logFile,stderrToFile,
Caches,pgfCache,newPGFCache,flushPGFCache,listPGFCache) where
import PGF2
import PGF2.Transactions
import GF.Text.Lexing
import Cache
import CGIUtils(outputJSONP,outputPlain,outputHTML,outputText,
outputBinary,outputBinary',
logError,handleCGIErrors,throwCGIError,stderrToFile)
import CGI(CGI,readInput,getInput,getVarWithDefault,
CGIResult,requestAcceptLanguage,handleErrors,setHeader,
Accept(..),Language(..),negotiate,liftIO)
import URLEncoding
import Data.Time.Clock(UTCTime)
import Data.Time.Format(formatTime)
#if MIN_VERSION_time(1,5,0)
import Data.Time.Format(defaultTimeLocale,rfc822DateFormat)
#else
import System.Locale(defaultTimeLocale,rfc822DateFormat)
#endif
import Text.JSON
import Text.PrettyPrint as PP(render, text, (<+>))
import qualified Codec.Binary.UTF8.String as UTF8 (decodeString)
import qualified Data.ByteString.Lazy as BS
import Control.Concurrent
import qualified Control.Exception as E
import Control.Monad
import Control.Monad.State(State,evalState,get,put)
import Control.Monad.Catch(bracket_)
import Data.Char
--import Data.Function (on)
import Data.List ({-sortBy,-}intersperse,mapAccumL,nub,isSuffixOf,nubBy,stripPrefix)
import qualified Data.Map as Map
import Data.Maybe
import System.Random
import System.Process
import System.Exit
import System.IO
import System.IO.Error(isDoesNotExistError)
import System.Directory(removeFile)
import System.FilePath(takeExtension,dropExtension,takeDirectory,(</>),(<.>))
import System.Mem(performGC)
catchIOE :: IO a -> (E.IOException -> IO a) -> IO a
catchIOE = E.catch
withQSem qsem = bracket_ (liftIO $ waitQSem qsem) (liftIO $ signalQSem qsem)
logFile :: FilePath
logFile = "pgf-error.log"
data Caches = Caches { qsem :: QSem,
pgfCache :: Cache PGF,
labelsCache :: Cache Labels }
newPGFCache jobs = do let n = maybe 4 id jobs
qsem <- newQSem n
pgfCache <- newCache' readGrammar
lblCache <- newCache' (const (fmap getDepLabels . readFile))
return $ Caches qsem pgfCache lblCache
flushPGFCache c = do flushCache (pgfCache c)
flushCache (labelsCache c)
listPGFCache c = listCache (pgfCache c)
readGrammar mb_pgf path =
case takeExtension path of
".pgf" -> readPGF path
".ngf" -> case mb_pgf of
Nothing -> readNGF path
Just gr -> checkoutPGF gr
_ -> error "Extension must be .pgf or .ngf"
newCache' rd = do c <- newCache rd
forkIO $ forever $ clean c
return c
where
clean c = do threadDelay 2000000000 -- 2000 seconds, i.e. ~33 minutes
expireCache (24*60*60) c -- 24 hours
getPath =
do path <- getVarWithDefault "PATH_TRANSLATED" "" -- apache mod_fastcgi
if null path
then getVarWithDefault "SCRIPT_FILENAME" "" -- lighttpd
else return path
cgiMain :: Caches -> CGI CGIResult
cgiMain cache = handleErrors . handleCGIErrors $
cgiMain' cache =<< getPath
cgiMain' :: Caches -> FilePath -> CGI CGIResult
cgiMain' cache path =
do command <- liftM (maybe "grammar" (urlDecodeUnicode . UTF8.decodeString))
(getInput "command")
case command of
"download" -> outputBinary =<< getFile BS.readFile path
_ -> do tpgf <- getFile (readCache' (pgfCache cache)) path
pgfMain (qsem cache) command tpgf
getFile get path =
either failed return =<< liftIO (E.try (get path))
where
failed e = if isDoesNotExistError e
then notFound path
else liftIO $ ioError e
pgfMain qsem command (t,pgf) =
case command of
"parse" -> withQSem qsem $
out t=<< join (parse # input % cat % start % limit % treeopts)
-- "parseToChart" -> withQSem qsem $
-- out t=<< join (parseToChart # input % cat % limit)
"linearize" -> out t=<< lin # tree % to
"bracketedLinearize"
-> out t=<< bracketedLin # tree % to
"linearizeAll" -> out t=<< linAll # tree % to
"translate" -> withQSem qsem $
out t=<<join(trans # input % cat % to % start % limit%treeopts)
"lookupmorpho" -> out t=<< morpho # from1 % textInput
"lookupcohorts" -> out t=<< cohorts # from1 % getInput "filter" % textInput
"flush" -> out t=<< flush
"grammar" -> out t grammar
"abstrtree" -> outputGraphviz=<< graphvizAbstractTree pgf graphvizDefaults # tree
"parsetree" -> outputGraphviz=<< (\cnc -> graphvizParseTree cnc graphvizDefaults) . snd # from1 %tree
"wordforword" -> out t =<< wordforword # input % cat % to
_ -> badRequest "Unknown command" command
where
flush = liftIO $ do --modifyMVar_ pc $ const $ return Map.empty
performGC
return $ showJSON ()
cat :: CGI Type
cat =
do mcat <- getInput1 "cat"
case mcat of
Nothing -> return (startCat pgf)
Just cat -> case readType cat of
Nothing -> badRequest "Bad category" cat
Just typ -> return typ
langs = languages pgf
grammar = showJSON $ makeObj
["name".=abstractName pgf,
"lastmodified".=show t,
"startcat".=showType [] (startCat pgf),
"languages".=languages]
where
languages = [makeObj ["name".= l] | (l,_)<-Map.toList langs]
parse input@((from,_),_) cat start mlimit (trie,json) =
do r <- parse' cat start mlimit input
return $ showJSON [makeObj ("from".=from:jsonParseResult json r)]
jsonParseResult json = either bad good
where
bad err = ["parseFailed".=err]
good trees = "trees".=map tp trees :[] -- :addTrie trie trees
tp (tree,prob) = makeObj ["tree".=tree
,"prob".=prob
]
-- Without caching parse results:
parse' cat start mlimit ((from,concr),input) =
case parseWithHeuristics concr cat input (-1) callbacks of
ParseOk ts -> return (Right (maybe id take mlimit (drop start ts)))
ParseFailed _ tok -> return (Left tok)
ParseIncomplete -> return (Left "")
where
callbacks = undefined
parseToChart ((from,concr),input) cat mlimit = undefined {-
do r <- case C.parseToChart concr cat input (-1) callbacks (fromMaybe 5 mlimit) of
ParseOk chart -> return (good chart)
ParseFailed _ tok -> return (bad tok)
ParseIncomplete -> return (bad "")
return $ showJSON [makeObj ("from".=from:r)]
where
callbacks = maybe [] cb $ lookup (C.abstractName pgf) C.literalCallbacks
cb fs = [(cat,f pgf (from,concr) input)|(cat,f)<-fs]
bad err = ["parseFailed".=err]
good (roots,chart) = ["roots".=showJSON roots,
"chart".=makeObj [show fid .= mkChartObj inf | (fid,inf)<-Map.toList chart]]
mkChartObj (brackets,prods,cat) =
makeObj ["brackets".=map mkChartBracket brackets
,"prods" .=map mkChartProd prods
,"cat" .=cat
]
mkChartBracket (s,e,ann) =
makeObj ["start".=s,"end".=e,"ann".=ann]
mkChartProd (expr,args,prob) =
makeObj ["tree".=expr,"args".=map mkChartPArg args,"prob".=prob]
mkChartPArg (PArg _ fid) = showJSON fid
-}
linAll tree to = showJSON (linAll' tree to)
linAll' tree (tos,unlex) =
[makeObj ["to".=to,
"texts".=map unlex (linearizeAll c tree)]|(to,c)<-tos]
lin tree to = showJSON (lin' tree to)
lin' tree (tos,unlex) =
[makeObj ["to".=to,"text".=unlex (linearize c tree)]|(to,c)<-tos]
bracketedLin tree to = showJSON (bracketedLin' tree to)
bracketedLin' tree (tos,unlex) =
[makeObj ["to".=to,"brackets".=showJSON (bracketedLinearize c tree)]|(to,c)<-tos]
trans input@((from,_),_) cat to start mlimit (trie,jsontree) =
do parses <- parse' cat start mlimit input
return $
showJSON [ makeObj ["from".=from,
"translations".= jsonParses parses]]
where
jsonParses = either bad good
where
bad err = [makeObj ["error".=err]]
good parses = [makeObj ["tree".=tree
,"prob".=prob
,"linearizations".=lin' tree to]
| (tree,prob) <- parses]
morpho (from,concr) input =
showJSON [makeObj ["lemma".=l
,"analysis".=a
,"prob".=p]
| (l,a,p)<-lookupMorpho concr input]
cohorts (from,concr) filter input =
showJSON [makeObj ["start" .=showJSON s
,"word" .=showJSON w
,"morpho".=showJSON [makeObj ["lemma".=l
,"analysis".=a
,"prob".=p]
| (l,a,p)<-ms]
,"end" .=showJSON e
]
| (s,w,ms,e) <- (case filter of
Just "longest" -> filterLongest
Just "best" -> filterBest
_ -> id)
(lookupCohorts concr input)]
wordforword input@((from,_),_) cat = jsonWFW from . wordforword' input cat
jsonWFW from rs =
showJSON
[makeObj
["from".=from,
"translations".=[makeObj ["linearizations".=
[makeObj["to".=to,"text".=text]
| (to,text)<-rs]]]]]
wordforword' inp@((from,concr),input) cat (tos,unlex) =
[(to,unlex . unwords $ map (lin_word' c) pws)
|let pws=map parse_word' (words input),(to,c)<-tos]
where
lin_word' c = either id (lin1 c)
lin1 c = dropq . linearize c
where
dropq (q:' ':s) | q `elem` "+*" = s
dropq s = s
parse_word' w = if all (\c->isSpace c||isPunctuation c) w
then Left w
else parse_word w
parse_word w =
maybe (Left ("["++w++"]")) Right $
msum [parse1 w,parse1 ow,morph w,morph ow]
where
ow = case w of
c:cs | isLower c -> toUpper c : cs
| isUpper c -> toLower c : cs
s -> s
parse1 s = case PGF2.parse concr cat s of
ParseOk ((t,_):ts) -> Just t
_ -> Nothing
morph w = listToMaybe
[t | (f,a,p)<-lookupMorpho concr w,
t<-maybeToList (readExpr f)]
---
input = lexit # from % textInput
where
lexit (from,lex) input = (from,lex input)
from = maybe (missing "from") getlexer =<< from'
where
getlexer f@(_,concr) = (,) f # c_lexer concr
from1 = maybe (missing "from") return =<< from'
from' = getLang "from"
to = (,) # getLangs "to" % unlexerC (const False)
getLangs = getLangs' readLang
getLang = getLang' readLang
readLang :: String -> CGI (String,Concr)
readLang lang =
case Map.lookup lang langs of
Nothing -> badRequest "Bad language" lang
Just c -> return (lang,c)
tree = do s <- maybe (missing "tree") return =<< getInput1 "tree"
maybe (badRequest "bad tree" s) return (readExpr s)
c_lexer concr = lexer (not . null . lookupMorpho concr)
--------------------------------------------------------------------------------
-- * Lexing
-- | Standard lexers
lexer good = maybe (return id) lexerfun =<< getInput "lexer"
where
lexerfun name =
case stringOp good ("lex"++name) of
Just fn -> return fn
Nothing -> badRequest "Unknown lexer" name
type Unlexer = String->String
-- | Unlexing for the C runtime system, &+ is already applied
unlexerC :: (String -> Bool) -> CGI Unlexer
unlexerC = unlexer' id
-- | Unlexing for the Haskell runtime system, the default is to just apply &+
unlexerH :: CGI Unlexer
unlexerH = unlexer' (unwords . bindTok . words) (const False)
unlexer' defaultUnlexer good =
maybe (return defaultUnlexer) unlexerfun =<< getInput "unlexer"
where
unlexerfun name =
case stringOp good ("unlex"++name) of
Just fn -> return (fn . cleanMarker)
Nothing -> badRequest "Unknown unlexer" name
cleanMarker ('+':cs) = cs
cleanMarker ('*':cs) = cs
cleanMarker cs = cs
out t r = do let fmt = formatTime defaultTimeLocale rfc822DateFormat t
setHeader "Last-Modified" fmt
outputJSONP r
getInput1 x = nonEmpty # getInput x
nonEmpty (Just "") = Nothing
nonEmpty r = r
textInput :: CGI String
textInput = liftM (maybe "" (urlDecodeUnicode . UTF8.decodeString)) $ getInput "input"
getLangs' readLang i = mapM readLang . maybe [] words =<< getInput i
getLang' readLang i =
do mlang <- getInput i
case mlang of
Just l@(_:_) -> Just # readLang l
_ -> return Nothing
limit, depth :: CGI (Maybe Int)
limit = readInput "limit"
depth = readInput "depth"
start :: CGI Int
start = maybe 0 id # readInput "start"
treeopts :: CGI TreeOpts
treeopts = (,) # getBool "trie" % getBool "jsontree"
getBool x = maybe False toBool # getInput x
toBool s = s `elem` ["","yes","true","True"]
missing = badRequest "Missing parameter"
errorMissingId = badRequest "Missing identifier" ""
notFound = throw 404 "Not found"
badRequest = throw 400
throw code msg extra =
throwCGIError code msg [msg ++(if null extra then "" else ": "++extra)]
format def = maybe def id # getInput "format"
type From = (Maybe Concr,String)
type To = ([Concr],Unlexer)
type TreeOpts = (Bool,Bool) -- (trie,jsontree)
outputGraphviz code =
do fmt <- format "png"
case fmt of
"gv" -> outputPlain code
_ -> outputFPS' fmt =<< liftIO (pipeIt2graphviz fmt code)
where
outputFPS' = outputBinary' . mimeType
mimeType fmt =
case fmt of
"png" -> "image/png"
"gif" -> "image/gif"
"svg" -> "image/svg+xml"
-- ...
_ -> "application/binary"
pipeIt2graphviz :: String -> String -> IO BS.ByteString
pipeIt2graphviz fmt code = do
(Just inh, Just outh, _, pid) <-
createProcess (proc "dot" ["-T",fmt])
{ std_in = CreatePipe,
std_out = CreatePipe,
std_err = Inherit }
hSetBinaryMode outh True
hSetEncoding inh utf8
-- fork off a thread to start consuming the output
output <- BS.hGetContents outh
outMVar <- newEmptyMVar
_ <- forkIO $ E.evaluate (BS.length output) >> putMVar outMVar ()
-- now write and flush any input
hPutStr inh code
hFlush inh
hClose inh -- done with stdin
-- wait on the output
takeMVar outMVar
hClose outh
-- wait on the process
ex <- waitForProcess pid
case ex of
ExitSuccess -> return output
ExitFailure r -> fail ("pipeIt2graphviz: (exit " ++ show r ++ ")")
instance JSON Expr where
readJSON x = readJSON x >>= maybe (fail "Bad expression.") return . readExpr
showJSON = showJSON . showExpr []
instance JSON BracketedString where
readJSON x = return (Leaf "")
showJSON (Bracket cat fid index fun bs) =
makeObj ["cat".=cat, "fid".=fid, "index".=index, "fun".=fun, "children".=bs]
showJSON BIND = makeObj ["bind".=True]
showJSON (Leaf s) = makeObj ["token".=s]
-- * PGF utilities
selectLanguage :: PGF -> Maybe (Accept Language) -> Concr
selectLanguage pgf macc = case acceptable of
[] -> case Map.elems (languages pgf) of
[] -> error "No concrete syntaxes in PGF grammar."
l:_ -> l
Language c:_ -> fromJust (langCodeLanguage pgf c)
where langCodes = mapMaybe languageCode (Map.elems (languages pgf))
acceptable = negotiate (map Language langCodes) macc
langCodeLanguage :: PGF -> String -> Maybe Concr
langCodeLanguage pgf code = listToMaybe [concr | concr <- Map.elems (languages pgf), languageCode concr == Just code]
-- * General utilities
infixl 2 #,%
f .= v = (f,showJSON v)
f # x = fmap f x
f % x = ap f x