mirror of
https://github.com/GrammaticalFramework/gf-core.git
synced 2026-04-09 04:59:31 -06:00
Compare commits
7 Commits
lpgf-memo
...
pgf2-compl
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
84fd431afd | ||
|
|
588cd6ddb1 | ||
|
|
437bd8e7f9 | ||
|
|
e56d1b2959 | ||
|
|
450368f9bb | ||
|
|
60bc752a6f | ||
|
|
91278e2b4b |
15
WebSetup.hs
15
WebSetup.hs
@@ -26,6 +26,14 @@ import Distribution.PackageDescription(PackageDescription(..))
|
||||
so users won't see this message unless they check the log.)
|
||||
-}
|
||||
|
||||
-- | Notice about contrib grammars
|
||||
noContribMsg :: IO ()
|
||||
noContribMsg = putStr $ unlines
|
||||
[ "Example grammars are no longer included in the main GF repository, but have moved to gf-contrib."
|
||||
, "If you want them to be built, clone the following repository in the same directory as gf-core:"
|
||||
, "https://github.com/GrammaticalFramework/gf-contrib.git"
|
||||
]
|
||||
|
||||
example_grammars :: [(String, String, [String])] -- [(pgf, subdir, source modules)]
|
||||
example_grammars =
|
||||
[("Letter.pgf","letter",letterSrc)
|
||||
@@ -50,11 +58,8 @@ buildWeb gf flags (pkg,lbi) = do
|
||||
contrib_exists <- doesDirectoryExist contrib_dir
|
||||
if contrib_exists
|
||||
then mapM_ build_pgf example_grammars
|
||||
else putStr $ unlines
|
||||
[ "Example grammars are no longer included in the main GF repository, but have moved to gf-contrib."
|
||||
, "If you want these example grammars to be built, clone this repository in the same top-level directory as GF:"
|
||||
, "https://github.com/GrammaticalFramework/gf-contrib.git"
|
||||
]
|
||||
-- else noContribMsg
|
||||
else return ()
|
||||
where
|
||||
gfo_dir = buildDir lbi </> "examples"
|
||||
|
||||
|
||||
@@ -1,7 +1,11 @@
|
||||
## 1.3.0
|
||||
|
||||
- Add completion support.
|
||||
|
||||
## 1.2.1
|
||||
|
||||
- Remove deprecated pgf_print_expr_tuple
|
||||
- Added an API for cloning expressions/types/literals
|
||||
- Remove deprecated `pgf_print_expr_tuple`.
|
||||
- Added an API for cloning expressions/types/literals.
|
||||
|
||||
## 1.2.0
|
||||
|
||||
|
||||
@@ -43,30 +43,28 @@ module PGF2 (-- * PGF
|
||||
mkCId,
|
||||
exprHash, exprSize, exprFunctions, exprSubstitute,
|
||||
treeProbability,
|
||||
|
||||
-- ** Types
|
||||
Type, Hypo, BindType(..), startCat,
|
||||
readType, showType, showContext,
|
||||
mkType, unType,
|
||||
|
||||
-- ** Type checking
|
||||
-- | Dynamically-built expressions should always be type-checked before using in other functions,
|
||||
-- as the exceptions thrown by using invalid expressions may not catchable.
|
||||
checkExpr, inferExpr, checkType,
|
||||
|
||||
-- ** Computing
|
||||
compute,
|
||||
|
||||
-- * Concrete syntax
|
||||
ConcName,Concr,languages,concreteName,languageCode,
|
||||
|
||||
-- ** Linearization
|
||||
linearize,linearizeAll,tabularLinearize,tabularLinearizeAll,bracketedLinearize,bracketedLinearizeAll,
|
||||
FId, BracketedString(..), showBracketedString, flattenBracketedString,
|
||||
printName, categoryFields,
|
||||
|
||||
alignWords,
|
||||
-- ** Parsing
|
||||
ParseOutput(..), parse, parseWithHeuristics,
|
||||
parseToChart, PArg(..),
|
||||
complete,
|
||||
-- ** Sentence Lookup
|
||||
lookupSentence,
|
||||
-- ** Generation
|
||||
@@ -180,7 +178,7 @@ languageCode c = unsafePerformIO (peekUtf8CString =<< pgf_language_code (concr c
|
||||
|
||||
|
||||
-- | Generates an exhaustive possibly infinite list of
|
||||
-- all abstract syntax expressions of the given type.
|
||||
-- all abstract syntax expressions of the given type.
|
||||
-- The expressions are ordered by their probability.
|
||||
generateAll :: PGF -> Type -> [(Expr,Float)]
|
||||
generateAll p (Type ctype _) =
|
||||
@@ -469,21 +467,21 @@ newGraphvizOptions pool opts = do
|
||||
-- Functions using Concr
|
||||
-- Morpho analyses, parsing & linearization
|
||||
|
||||
-- | This triple is returned by all functions that deal with
|
||||
-- | This triple is returned by all functions that deal with
|
||||
-- the grammar's lexicon. Its first element is the name of an abstract
|
||||
-- lexical function which can produce a given word or
|
||||
-- lexical function which can produce a given word or
|
||||
-- a multiword expression (i.e. this is the lemma).
|
||||
-- After that follows a string which describes
|
||||
-- After that follows a string which describes
|
||||
-- the particular inflection form.
|
||||
--
|
||||
-- The last element is a logarithm from the
|
||||
-- the probability of the function. The probability is not
|
||||
-- the probability of the function. The probability is not
|
||||
-- conditionalized on the category of the function. This makes it
|
||||
-- possible to compare the likelihood of two functions even if they
|
||||
-- have different types.
|
||||
-- have different types.
|
||||
type MorphoAnalysis = (Fun,String,Float)
|
||||
|
||||
-- | 'lookupMorpho' takes a string which must be a single word or
|
||||
-- | 'lookupMorpho' takes a string which must be a single word or
|
||||
-- a multiword expression. It then computes the list of all possible
|
||||
-- morphological analyses.
|
||||
lookupMorpho :: Concr -> String -> [MorphoAnalysis]
|
||||
@@ -541,12 +539,12 @@ lookupCohorts lang@(Concr concr master) sent =
|
||||
return ((start,tok,ans,end):cohs)
|
||||
|
||||
filterBest :: [(Int,String,[MorphoAnalysis],Int)] -> [(Int,String,[MorphoAnalysis],Int)]
|
||||
filterBest ans =
|
||||
filterBest ans =
|
||||
reverse (iterate (maxBound :: Int) [(0,0,[],ans)] [] [])
|
||||
where
|
||||
iterate v0 [] [] res = res
|
||||
iterate v0 [] new res = iterate v0 new [] res
|
||||
iterate v0 ((_,v,conf, []):old) new res =
|
||||
iterate v0 ((_,v,conf, []):old) new res =
|
||||
case compare v0 v of
|
||||
LT -> res
|
||||
EQ -> iterate v0 old new (merge conf res)
|
||||
@@ -649,7 +647,7 @@ getAnalysis ref self c_lemma c_anal prob exn = do
|
||||
data ParseOutput a
|
||||
= ParseFailed Int String -- ^ The integer is the position in number of unicode characters where the parser failed.
|
||||
-- The string is the token where the parser have failed.
|
||||
| ParseOk a -- ^ If the parsing and the type checking are successful
|
||||
| ParseOk a -- ^ If the parsing and the type checking are successful
|
||||
-- we get the abstract syntax trees as either a list or a chart.
|
||||
| ParseIncomplete -- ^ The sentence is not complete.
|
||||
|
||||
@@ -659,9 +657,9 @@ parse lang ty sent = parseWithHeuristics lang ty sent (-1.0) []
|
||||
parseWithHeuristics :: Concr -- ^ the language with which we parse
|
||||
-> Type -- ^ the start category
|
||||
-> String -- ^ the input sentence
|
||||
-> Double -- ^ the heuristic factor.
|
||||
-- A negative value tells the parser
|
||||
-- to lookup up the default from
|
||||
-> Double -- ^ the heuristic factor.
|
||||
-- A negative value tells the parser
|
||||
-- to lookup up the default from
|
||||
-- the grammar flags
|
||||
-> [(Cat, String -> Int -> Maybe (Expr,Float,Int))]
|
||||
-- ^ a list of callbacks for literal categories.
|
||||
@@ -715,9 +713,9 @@ parseWithHeuristics lang (Type ctype touchType) sent heuristic callbacks =
|
||||
parseToChart :: Concr -- ^ the language with which we parse
|
||||
-> Type -- ^ the start category
|
||||
-> String -- ^ the input sentence
|
||||
-> Double -- ^ the heuristic factor.
|
||||
-- A negative value tells the parser
|
||||
-- to lookup up the default from
|
||||
-> Double -- ^ the heuristic factor.
|
||||
-- A negative value tells the parser
|
||||
-- to lookup up the default from
|
||||
-- the grammar flags
|
||||
-> [(Cat, String -> Int -> Maybe (Expr,Float,Int))]
|
||||
-- ^ a list of callbacks for literal categories.
|
||||
@@ -886,7 +884,7 @@ lookupSentence lang (Type ctype _) sent =
|
||||
|
||||
-- | The oracle is a triple of functions.
|
||||
-- The first two take a category name and a linearization field name
|
||||
-- and they should return True/False when the corresponding
|
||||
-- and they should return True/False when the corresponding
|
||||
-- prediction or completion is appropriate. The third function
|
||||
-- is the oracle for literals.
|
||||
type Oracle = (Maybe (Cat -> String -> Int -> Bool)
|
||||
@@ -974,6 +972,67 @@ parseWithOracle lang cat sent (predict,complete,literal) =
|
||||
return ep
|
||||
Nothing -> do return nullPtr
|
||||
|
||||
-- | Returns possible completions of the current partial input.
|
||||
complete :: Concr -- ^ the language with which we parse
|
||||
-> Type -- ^ the start category
|
||||
-> String -- ^ the input sentence (excluding token being completed)
|
||||
-> String -- ^ prefix (partial token being completed)
|
||||
-> ParseOutput [(String, CId, CId, Float)] -- ^ (token, category, function, probability)
|
||||
complete lang (Type ctype _) sent pfx =
|
||||
unsafePerformIO $ do
|
||||
parsePl <- gu_new_pool
|
||||
exn <- gu_new_exn parsePl
|
||||
sent <- newUtf8CString sent parsePl
|
||||
pfx <- newUtf8CString pfx parsePl
|
||||
enum <- pgf_complete (concr lang) ctype sent pfx exn parsePl
|
||||
failed <- gu_exn_is_raised exn
|
||||
if failed
|
||||
then do
|
||||
is_parse_error <- gu_exn_caught exn gu_exn_type_PgfParseError
|
||||
if is_parse_error
|
||||
then do
|
||||
c_err <- (#peek GuExn, data.data) exn
|
||||
c_offset <- (#peek PgfParseError, offset) c_err
|
||||
token_ptr <- (#peek PgfParseError, token_ptr) c_err
|
||||
token_len <- (#peek PgfParseError, token_len) c_err
|
||||
tok <- peekUtf8CStringLen token_ptr token_len
|
||||
gu_pool_free parsePl
|
||||
return (ParseFailed (fromIntegral (c_offset :: CInt)) tok)
|
||||
else do
|
||||
is_exn <- gu_exn_caught exn gu_exn_type_PgfExn
|
||||
if is_exn
|
||||
then do
|
||||
c_msg <- (#peek GuExn, data.data) exn
|
||||
msg <- peekUtf8CString c_msg
|
||||
gu_pool_free parsePl
|
||||
throwIO (PGFError msg)
|
||||
else do
|
||||
gu_pool_free parsePl
|
||||
throwIO (PGFError "Parsing failed")
|
||||
else do
|
||||
fpl <- newForeignPtr gu_pool_finalizer parsePl
|
||||
ParseOk <$> fromCompletions enum fpl
|
||||
where
|
||||
fromCompletions :: Ptr GuEnum -> ForeignPtr GuPool -> IO [(String, CId, CId, Float)]
|
||||
fromCompletions enum fpl =
|
||||
withGuPool $ \tmpPl -> do
|
||||
cmpEntry <- alloca $ \ptr ->
|
||||
withForeignPtr fpl $ \pl ->
|
||||
do gu_enum_next enum ptr pl
|
||||
peek ptr
|
||||
if cmpEntry == nullPtr
|
||||
then do
|
||||
finalizeForeignPtr fpl
|
||||
touchConcr lang
|
||||
return []
|
||||
else do
|
||||
tok <- peekUtf8CString =<< (#peek PgfTokenProb, tok) cmpEntry
|
||||
cat <- peekUtf8CString =<< (#peek PgfTokenProb, cat) cmpEntry
|
||||
fun <- peekUtf8CString =<< (#peek PgfTokenProb, fun) cmpEntry
|
||||
prob <- (#peek PgfTokenProb, prob) cmpEntry
|
||||
toks <- unsafeInterleaveIO (fromCompletions enum fpl)
|
||||
return ((tok, cat, fun, prob) : toks)
|
||||
|
||||
-- | Returns True if there is a linearization defined for that function in that language
|
||||
hasLinearization :: Concr -> Fun -> Bool
|
||||
hasLinearization lang id = unsafePerformIO $
|
||||
@@ -1047,7 +1106,7 @@ linearizeAll lang e = unsafePerformIO $
|
||||
|
||||
-- | Generates a table of linearizations for an expression
|
||||
tabularLinearize :: Concr -> Expr -> [(String, String)]
|
||||
tabularLinearize lang e =
|
||||
tabularLinearize lang e =
|
||||
case tabularLinearizeAll lang e of
|
||||
(lins:_) -> lins
|
||||
_ -> []
|
||||
@@ -1138,7 +1197,7 @@ data BracketedString
|
||||
-- the phrase. The 'FId' is an unique identifier for
|
||||
-- every phrase in the sentence. For context-free grammars
|
||||
-- i.e. without discontinuous constituents this identifier
|
||||
-- is also unique for every bracket. When there are discontinuous
|
||||
-- is also unique for every bracket. When there are discontinuous
|
||||
-- phrases then the identifiers are unique for every phrase but
|
||||
-- not for every bracket since the bracket represents a constituent.
|
||||
-- The different constituents could still be distinguished by using
|
||||
@@ -1148,7 +1207,7 @@ data BracketedString
|
||||
-- The second 'CId' is the name of the abstract function that generated
|
||||
-- this phrase.
|
||||
|
||||
-- | Renders the bracketed string as a string where
|
||||
-- | Renders the bracketed string as a string where
|
||||
-- the brackets are shown as @(S ...)@ where
|
||||
-- @S@ is the category.
|
||||
showBracketedString :: BracketedString -> String
|
||||
@@ -1166,7 +1225,7 @@ flattenBracketedString (Bracket _ _ _ _ bss) = concatMap flattenBracketedString
|
||||
|
||||
bracketedLinearize :: Concr -> Expr -> [BracketedString]
|
||||
bracketedLinearize lang e = unsafePerformIO $
|
||||
withGuPool $ \pl ->
|
||||
withGuPool $ \pl ->
|
||||
do exn <- gu_new_exn pl
|
||||
cts <- pgf_lzr_concretize (concr lang) (expr e) exn pl
|
||||
failed <- gu_exn_is_raised exn
|
||||
@@ -1192,7 +1251,7 @@ bracketedLinearize lang e = unsafePerformIO $
|
||||
|
||||
bracketedLinearizeAll :: Concr -> Expr -> [[BracketedString]]
|
||||
bracketedLinearizeAll lang e = unsafePerformIO $
|
||||
withGuPool $ \pl ->
|
||||
withGuPool $ \pl ->
|
||||
do exn <- gu_new_exn pl
|
||||
cts <- pgf_lzr_concretize (concr lang) (expr e) exn pl
|
||||
failed <- gu_exn_is_raised exn
|
||||
@@ -1467,7 +1526,7 @@ type LiteralCallback =
|
||||
literalCallbacks :: [(AbsName,[(Cat,LiteralCallback)])]
|
||||
literalCallbacks = [("App",[("PN",nerc),("Symb",chunk)])]
|
||||
|
||||
-- | Named entity recognition for the App grammar
|
||||
-- | Named entity recognition for the App grammar
|
||||
-- (based on ../java/org/grammaticalframework/pgf/NercLiteralCallback.java)
|
||||
nerc :: LiteralCallback
|
||||
nerc pgf (lang,concr) sentence lin_idx offset =
|
||||
|
||||
@@ -103,7 +103,7 @@ foreign import ccall unsafe "gu/file.h gu_file_in"
|
||||
|
||||
foreign import ccall safe "gu/enum.h gu_enum_next"
|
||||
gu_enum_next :: Ptr a -> Ptr (Ptr b) -> Ptr GuPool -> IO ()
|
||||
|
||||
|
||||
foreign import ccall unsafe "gu/string.h gu_string_buf_freeze"
|
||||
gu_string_buf_freeze :: Ptr GuStringBuf -> Ptr GuPool -> IO CString
|
||||
|
||||
@@ -241,7 +241,7 @@ newSequence elem_size pokeElem values pool = do
|
||||
type FId = Int
|
||||
data PArg = PArg [FId] {-# UNPACK #-} !FId deriving (Eq,Ord,Show)
|
||||
|
||||
peekFId :: Ptr a -> IO FId
|
||||
peekFId :: Ptr a -> IO FId
|
||||
peekFId c_ccat = do
|
||||
c_fid <- (#peek PgfCCat, fid) c_ccat
|
||||
return (fromIntegral (c_fid :: CInt))
|
||||
@@ -256,6 +256,7 @@ data PgfApplication
|
||||
data PgfConcr
|
||||
type PgfExpr = Ptr ()
|
||||
data PgfExprProb
|
||||
data PgfTokenProb
|
||||
data PgfExprParser
|
||||
data PgfFullFormEntry
|
||||
data PgfMorphoCallback
|
||||
@@ -422,6 +423,9 @@ foreign import ccall
|
||||
foreign import ccall "pgf/pgf.h pgf_parse_with_oracle"
|
||||
pgf_parse_with_oracle :: Ptr PgfConcr -> CString -> CString -> Ptr PgfOracleCallback -> Ptr GuExn -> Ptr GuPool -> Ptr GuPool -> IO (Ptr GuEnum)
|
||||
|
||||
foreign import ccall "pgf/pgf.h pgf_complete"
|
||||
pgf_complete :: Ptr PgfConcr -> PgfType -> CString -> CString -> Ptr GuExn -> Ptr GuPool -> IO (Ptr GuEnum)
|
||||
|
||||
foreign import ccall "pgf/pgf.h pgf_lookup_morpho"
|
||||
pgf_lookup_morpho :: Ptr PgfConcr -> CString -> Ptr PgfMorphoCallback -> Ptr GuExn -> IO ()
|
||||
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
name: pgf2
|
||||
version: 1.2.1
|
||||
version: 1.3.0
|
||||
synopsis: Bindings to the C version of the PGF runtime
|
||||
description:
|
||||
GF, Grammatical Framework, is a programming language for multilingual grammar applications.
|
||||
|
||||
Reference in New Issue
Block a user