PGF2 & PGFService: work in progress on callbacks for nerc & chunks

This commit is contained in:
hallgren
2015-01-20 14:57:52 +00:00
parent 9207b45359
commit 7577de236f
2 changed files with 81 additions and 5 deletions

View File

@@ -22,7 +22,9 @@ module PGF2 (-- * PGF
-- * Morphology
MorphoAnalysis, lookupMorpho, fullFormLexicon,
-- * Exceptions
PGFError(..)
PGFError(..),
-- * Grammar specific callbacks
LiteralCallback,literalCallbacks
) where
import Prelude hiding (fromEnum)
@@ -36,8 +38,10 @@ import Foreign.C
import Data.Typeable
import qualified Data.Map as Map
import Data.IORef
import Data.Char(isUpper,isSpace)
import Data.List(isSuffixOf,maximumBy)
import Data.Function(on)
-----------------------------------------------------------------------
-- Functions that take a PGF.
-- PGF has many Concrs.
@@ -426,3 +430,70 @@ newtype PGFError = PGFError String
deriving (Show, Typeable)
instance Exception PGFError
-----------------------------------------------------------------------
type LiteralCallback =
PGF -> (ConcName,Concr) -> Int -> String -> Int -> Maybe (Expr,Float,Int)
-- | Callbacks for the App grammar
literalCallbacks :: [(AbsName,[(Cat,LiteralCallback)])]
literalCallbacks = [("App",[("PN",nerc),("Symb",chunk)])]
-- | Named entity recognition for the App grammar
-- (based on ../java/org/grammaticalframework/pgf/NercLiteralCallback.java)
nerc :: LiteralCallback
nerc pgf (lang,concr) lin_idx sentence offset =
case consume capitalized (drop offset sentence) of
(capwords@(_:_),rest) |
not ("Eng" `isSuffixOf` lang && name `elem` ["I","I'm"]) ->
if null ls
then pn
else case cat of
"PN" -> retLit (mkApp lemma [])
"WeekDay" -> retLit (mkApp "weekdayPN" [mkApp lemma []])
"Month" -> retLit (mkApp "monthPN" [mkApp lemma []])
"Language" -> Nothing
_ -> pn
where
retLit e = Just (expr,0,end_offset)
pn = retLit expr
expr = mkApp "SymbPN" [mkApp "MkSymb" [mkStr name]]
end_offset = length sentence-length rest
name = trimRight (concat capwords)
ls = [((l,getFunctionType pgf l),p)|(l,_,p)<-lookupMorpho concr name]
((lemma,cat),_) = maximumBy (compare `on` snd) ls
_ -> Nothing
where
-- | Variant of unfoldr
consume munch xs =
case munch xs of
Nothing -> ([],xs)
Just (y,xs') -> (y:ys,xs'')
where (ys,xs'') = consume munch xs'
getFunctionType :: PGF -> String -> Cat
getFunctionType = undefined
-- | Callback to parse arbitrary words as chunks (from
-- ../java/org/grammaticalframework/pgf/UnknownLiteralCallback.java)
chunk :: LiteralCallback
chunk _ (_,concr) lin_idx sentence offset =
case uncapitalized (drop offset sentence) of
Just (word@(_:_),rest) | null (lookupMorpho concr word) ->
Just (expr,0,length sentence-length rest)
where
expr = mkApp "MkSymb" [mkStr (trimRight word)]
_ -> Nothing
trimRight = reverse . dropWhile isSpace . reverse
capitalized = capitalized' isUpper
uncapitalized = capitalized' (not.isUpper)
capitalized' test s@(c:_) | test c =
case span (not.isSpace) s of
(name,rest1) ->
case span isSpace rest1 of
(space,rest2) -> Just (name++space,rest2)
capitalized' not s = Nothing

View File

@@ -151,9 +151,14 @@ cpgfMain command (t,(pgf,pc)) =
tp (tree,prob) = makeObj ["tree".=tree,"prob".=prob]
-- Without caching parse results:
parse' start mlimit ((_,concr),input) =
return $
maybe id take mlimit . drop start # C.parse concr cat input
parse' start mlimit ((from,concr),input) =
return $
maybe id take mlimit . drop start # cparse
where
cparse = C.parse concr cat input
--cparse = C.parseWithHeuristics concr cat input (-1) callbacks
callbacks = maybe [] cb $ lookup (C.abstractName pgf) C.literalCallbacks
cb fs = [(cat,f pgf (from,concr))|(cat,f)<-fs]
{-
-- Caching parse results:
parse' start mlimit ((from,concr),input) =