diff --git a/src/runtime/haskell-bind/PGF2.hsc b/src/runtime/haskell-bind/PGF2.hsc index 89509b3e1..1caede3fa 100644 --- a/src/runtime/haskell-bind/PGF2.hsc +++ b/src/runtime/haskell-bind/PGF2.hsc @@ -22,7 +22,9 @@ module PGF2 (-- * PGF -- * Morphology MorphoAnalysis, lookupMorpho, fullFormLexicon, -- * Exceptions - PGFError(..) + PGFError(..), + -- * Grammar specific callbacks + LiteralCallback,literalCallbacks ) where import Prelude hiding (fromEnum) @@ -36,8 +38,10 @@ import Foreign.C import Data.Typeable import qualified Data.Map as Map import Data.IORef +import Data.Char(isUpper,isSpace) +import Data.List(isSuffixOf,maximumBy) +import Data.Function(on) - ----------------------------------------------------------------------- -- Functions that take a PGF. -- PGF has many Concrs. @@ -426,3 +430,70 @@ newtype PGFError = PGFError String deriving (Show, Typeable) instance Exception PGFError + +----------------------------------------------------------------------- + +type LiteralCallback = + PGF -> (ConcName,Concr) -> Int -> String -> Int -> Maybe (Expr,Float,Int) + +-- | Callbacks for the App grammar +literalCallbacks :: [(AbsName,[(Cat,LiteralCallback)])] +literalCallbacks = [("App",[("PN",nerc),("Symb",chunk)])] + +-- | Named entity recognition for the App grammar +-- (based on ../java/org/grammaticalframework/pgf/NercLiteralCallback.java) +nerc :: LiteralCallback +nerc pgf (lang,concr) lin_idx sentence offset = + case consume capitalized (drop offset sentence) of + (capwords@(_:_),rest) | + not ("Eng" `isSuffixOf` lang && name `elem` ["I","I'm"]) -> + if null ls + then pn + else case cat of + "PN" -> retLit (mkApp lemma []) + "WeekDay" -> retLit (mkApp "weekdayPN" [mkApp lemma []]) + "Month" -> retLit (mkApp "monthPN" [mkApp lemma []]) + "Language" -> Nothing + _ -> pn + where + retLit e = Just (expr,0,end_offset) + pn = retLit expr + expr = mkApp "SymbPN" [mkApp "MkSymb" [mkStr name]] + end_offset = length sentence-length rest + name = trimRight (concat capwords) + ls = [((l,getFunctionType pgf l),p)|(l,_,p)<-lookupMorpho concr name] + ((lemma,cat),_) = maximumBy (compare `on` snd) ls + _ -> Nothing + where + -- | Variant of unfoldr + consume munch xs = + case munch xs of + Nothing -> ([],xs) + Just (y,xs') -> (y:ys,xs'') + where (ys,xs'') = consume munch xs' + + getFunctionType :: PGF -> String -> Cat + getFunctionType = undefined + +-- | Callback to parse arbitrary words as chunks (from +-- ../java/org/grammaticalframework/pgf/UnknownLiteralCallback.java) +chunk :: LiteralCallback +chunk _ (_,concr) lin_idx sentence offset = + case uncapitalized (drop offset sentence) of + Just (word@(_:_),rest) | null (lookupMorpho concr word) -> + Just (expr,0,length sentence-length rest) + where + expr = mkApp "MkSymb" [mkStr (trimRight word)] + _ -> Nothing + +trimRight = reverse . dropWhile isSpace . reverse + +capitalized = capitalized' isUpper +uncapitalized = capitalized' (not.isUpper) + +capitalized' test s@(c:_) | test c = + case span (not.isSpace) s of + (name,rest1) -> + case span isSpace rest1 of + (space,rest2) -> Just (name++space,rest2) +capitalized' not s = Nothing diff --git a/src/server/PGFService.hs b/src/server/PGFService.hs index 2a73462ff..d610df45d 100644 --- a/src/server/PGFService.hs +++ b/src/server/PGFService.hs @@ -151,9 +151,14 @@ cpgfMain command (t,(pgf,pc)) = tp (tree,prob) = makeObj ["tree".=tree,"prob".=prob] -- Without caching parse results: - parse' start mlimit ((_,concr),input) = - return $ - maybe id take mlimit . drop start # C.parse concr cat input + parse' start mlimit ((from,concr),input) = + return $ + maybe id take mlimit . drop start # cparse + where + cparse = C.parse concr cat input + --cparse = C.parseWithHeuristics concr cat input (-1) callbacks + callbacks = maybe [] cb $ lookup (C.abstractName pgf) C.literalCallbacks + cb fs = [(cat,f pgf (from,concr))|(cat,f)<-fs] {- -- Caching parse results: parse' start mlimit ((from,concr),input) =