1
0
forked from GitHub/gf-core

Compare commits

...

7 Commits

Author SHA1 Message Date
John J. Camilleri
84fd431afd Manage to get completion working in PGF2 2021-05-03 22:28:48 +02:00
John J. Camilleri
588cd6ddb1 Improvement to test script, distinguishes when input ends with whitespace 2021-05-03 20:51:24 +02:00
John J. Camilleri
437bd8e7f9 Add proper error handling in complete 2021-05-03 20:36:31 +02:00
John J. Camilleri
e56d1b2959 Second attempt. Reading enum is closer to working but all strings are empty. 2021-05-03 14:25:35 +02:00
John J. Camilleri
450368f9bb First attempt at adding support for complete in PGF2 (gives segmentation faults) 2021-05-03 13:19:08 +02:00
John J. Camilleri
60bc752a6f Add note about type-checking dynamic expressions in PGF2 Haddock
Closes #72
2021-04-30 14:59:20 +02:00
John J. Camilleri
91278e2b4b Remove notice about example grammars not being included anymore from build scripts 2021-04-30 13:39:15 +02:00
5 changed files with 110 additions and 38 deletions

View File

@@ -26,6 +26,14 @@ import Distribution.PackageDescription(PackageDescription(..))
so users won't see this message unless they check the log.) so users won't see this message unless they check the log.)
-} -}
-- | Notice about contrib grammars
noContribMsg :: IO ()
noContribMsg = putStr $ unlines
[ "Example grammars are no longer included in the main GF repository, but have moved to gf-contrib."
, "If you want them to be built, clone the following repository in the same directory as gf-core:"
, "https://github.com/GrammaticalFramework/gf-contrib.git"
]
example_grammars :: [(String, String, [String])] -- [(pgf, subdir, source modules)] example_grammars :: [(String, String, [String])] -- [(pgf, subdir, source modules)]
example_grammars = example_grammars =
[("Letter.pgf","letter",letterSrc) [("Letter.pgf","letter",letterSrc)
@@ -50,11 +58,8 @@ buildWeb gf flags (pkg,lbi) = do
contrib_exists <- doesDirectoryExist contrib_dir contrib_exists <- doesDirectoryExist contrib_dir
if contrib_exists if contrib_exists
then mapM_ build_pgf example_grammars then mapM_ build_pgf example_grammars
else putStr $ unlines -- else noContribMsg
[ "Example grammars are no longer included in the main GF repository, but have moved to gf-contrib." else return ()
, "If you want these example grammars to be built, clone this repository in the same top-level directory as GF:"
, "https://github.com/GrammaticalFramework/gf-contrib.git"
]
where where
gfo_dir = buildDir lbi </> "examples" gfo_dir = buildDir lbi </> "examples"

View File

@@ -1,7 +1,11 @@
## 1.3.0
- Add completion support.
## 1.2.1 ## 1.2.1
- Remove deprecated pgf_print_expr_tuple - Remove deprecated `pgf_print_expr_tuple`.
- Added an API for cloning expressions/types/literals - Added an API for cloning expressions/types/literals.
## 1.2.0 ## 1.2.0

View File

@@ -43,30 +43,28 @@ module PGF2 (-- * PGF
mkCId, mkCId,
exprHash, exprSize, exprFunctions, exprSubstitute, exprHash, exprSize, exprFunctions, exprSubstitute,
treeProbability, treeProbability,
-- ** Types -- ** Types
Type, Hypo, BindType(..), startCat, Type, Hypo, BindType(..), startCat,
readType, showType, showContext, readType, showType, showContext,
mkType, unType, mkType, unType,
-- ** Type checking -- ** Type checking
-- | Dynamically-built expressions should always be type-checked before using in other functions,
-- as the exceptions thrown by using invalid expressions may not catchable.
checkExpr, inferExpr, checkType, checkExpr, inferExpr, checkType,
-- ** Computing -- ** Computing
compute, compute,
-- * Concrete syntax -- * Concrete syntax
ConcName,Concr,languages,concreteName,languageCode, ConcName,Concr,languages,concreteName,languageCode,
-- ** Linearization -- ** Linearization
linearize,linearizeAll,tabularLinearize,tabularLinearizeAll,bracketedLinearize,bracketedLinearizeAll, linearize,linearizeAll,tabularLinearize,tabularLinearizeAll,bracketedLinearize,bracketedLinearizeAll,
FId, BracketedString(..), showBracketedString, flattenBracketedString, FId, BracketedString(..), showBracketedString, flattenBracketedString,
printName, categoryFields, printName, categoryFields,
alignWords, alignWords,
-- ** Parsing -- ** Parsing
ParseOutput(..), parse, parseWithHeuristics, ParseOutput(..), parse, parseWithHeuristics,
parseToChart, PArg(..), parseToChart, PArg(..),
complete,
-- ** Sentence Lookup -- ** Sentence Lookup
lookupSentence, lookupSentence,
-- ** Generation -- ** Generation
@@ -180,7 +178,7 @@ languageCode c = unsafePerformIO (peekUtf8CString =<< pgf_language_code (concr c
-- | Generates an exhaustive possibly infinite list of -- | Generates an exhaustive possibly infinite list of
-- all abstract syntax expressions of the given type. -- all abstract syntax expressions of the given type.
-- The expressions are ordered by their probability. -- The expressions are ordered by their probability.
generateAll :: PGF -> Type -> [(Expr,Float)] generateAll :: PGF -> Type -> [(Expr,Float)]
generateAll p (Type ctype _) = generateAll p (Type ctype _) =
@@ -469,21 +467,21 @@ newGraphvizOptions pool opts = do
-- Functions using Concr -- Functions using Concr
-- Morpho analyses, parsing & linearization -- Morpho analyses, parsing & linearization
-- | This triple is returned by all functions that deal with -- | This triple is returned by all functions that deal with
-- the grammar's lexicon. Its first element is the name of an abstract -- the grammar's lexicon. Its first element is the name of an abstract
-- lexical function which can produce a given word or -- lexical function which can produce a given word or
-- a multiword expression (i.e. this is the lemma). -- a multiword expression (i.e. this is the lemma).
-- After that follows a string which describes -- After that follows a string which describes
-- the particular inflection form. -- the particular inflection form.
-- --
-- The last element is a logarithm from the -- The last element is a logarithm from the
-- the probability of the function. The probability is not -- the probability of the function. The probability is not
-- conditionalized on the category of the function. This makes it -- conditionalized on the category of the function. This makes it
-- possible to compare the likelihood of two functions even if they -- possible to compare the likelihood of two functions even if they
-- have different types. -- have different types.
type MorphoAnalysis = (Fun,String,Float) type MorphoAnalysis = (Fun,String,Float)
-- | 'lookupMorpho' takes a string which must be a single word or -- | 'lookupMorpho' takes a string which must be a single word or
-- a multiword expression. It then computes the list of all possible -- a multiword expression. It then computes the list of all possible
-- morphological analyses. -- morphological analyses.
lookupMorpho :: Concr -> String -> [MorphoAnalysis] lookupMorpho :: Concr -> String -> [MorphoAnalysis]
@@ -541,12 +539,12 @@ lookupCohorts lang@(Concr concr master) sent =
return ((start,tok,ans,end):cohs) return ((start,tok,ans,end):cohs)
filterBest :: [(Int,String,[MorphoAnalysis],Int)] -> [(Int,String,[MorphoAnalysis],Int)] filterBest :: [(Int,String,[MorphoAnalysis],Int)] -> [(Int,String,[MorphoAnalysis],Int)]
filterBest ans = filterBest ans =
reverse (iterate (maxBound :: Int) [(0,0,[],ans)] [] []) reverse (iterate (maxBound :: Int) [(0,0,[],ans)] [] [])
where where
iterate v0 [] [] res = res iterate v0 [] [] res = res
iterate v0 [] new res = iterate v0 new [] res iterate v0 [] new res = iterate v0 new [] res
iterate v0 ((_,v,conf, []):old) new res = iterate v0 ((_,v,conf, []):old) new res =
case compare v0 v of case compare v0 v of
LT -> res LT -> res
EQ -> iterate v0 old new (merge conf res) EQ -> iterate v0 old new (merge conf res)
@@ -649,7 +647,7 @@ getAnalysis ref self c_lemma c_anal prob exn = do
data ParseOutput a data ParseOutput a
= ParseFailed Int String -- ^ The integer is the position in number of unicode characters where the parser failed. = ParseFailed Int String -- ^ The integer is the position in number of unicode characters where the parser failed.
-- The string is the token where the parser have failed. -- The string is the token where the parser have failed.
| ParseOk a -- ^ If the parsing and the type checking are successful | ParseOk a -- ^ If the parsing and the type checking are successful
-- we get the abstract syntax trees as either a list or a chart. -- we get the abstract syntax trees as either a list or a chart.
| ParseIncomplete -- ^ The sentence is not complete. | ParseIncomplete -- ^ The sentence is not complete.
@@ -659,9 +657,9 @@ parse lang ty sent = parseWithHeuristics lang ty sent (-1.0) []
parseWithHeuristics :: Concr -- ^ the language with which we parse parseWithHeuristics :: Concr -- ^ the language with which we parse
-> Type -- ^ the start category -> Type -- ^ the start category
-> String -- ^ the input sentence -> String -- ^ the input sentence
-> Double -- ^ the heuristic factor. -> Double -- ^ the heuristic factor.
-- A negative value tells the parser -- A negative value tells the parser
-- to lookup up the default from -- to lookup up the default from
-- the grammar flags -- the grammar flags
-> [(Cat, String -> Int -> Maybe (Expr,Float,Int))] -> [(Cat, String -> Int -> Maybe (Expr,Float,Int))]
-- ^ a list of callbacks for literal categories. -- ^ a list of callbacks for literal categories.
@@ -715,9 +713,9 @@ parseWithHeuristics lang (Type ctype touchType) sent heuristic callbacks =
parseToChart :: Concr -- ^ the language with which we parse parseToChart :: Concr -- ^ the language with which we parse
-> Type -- ^ the start category -> Type -- ^ the start category
-> String -- ^ the input sentence -> String -- ^ the input sentence
-> Double -- ^ the heuristic factor. -> Double -- ^ the heuristic factor.
-- A negative value tells the parser -- A negative value tells the parser
-- to lookup up the default from -- to lookup up the default from
-- the grammar flags -- the grammar flags
-> [(Cat, String -> Int -> Maybe (Expr,Float,Int))] -> [(Cat, String -> Int -> Maybe (Expr,Float,Int))]
-- ^ a list of callbacks for literal categories. -- ^ a list of callbacks for literal categories.
@@ -886,7 +884,7 @@ lookupSentence lang (Type ctype _) sent =
-- | The oracle is a triple of functions. -- | The oracle is a triple of functions.
-- The first two take a category name and a linearization field name -- The first two take a category name and a linearization field name
-- and they should return True/False when the corresponding -- and they should return True/False when the corresponding
-- prediction or completion is appropriate. The third function -- prediction or completion is appropriate. The third function
-- is the oracle for literals. -- is the oracle for literals.
type Oracle = (Maybe (Cat -> String -> Int -> Bool) type Oracle = (Maybe (Cat -> String -> Int -> Bool)
@@ -974,6 +972,67 @@ parseWithOracle lang cat sent (predict,complete,literal) =
return ep return ep
Nothing -> do return nullPtr Nothing -> do return nullPtr
-- | Returns possible completions of the current partial input.
complete :: Concr -- ^ the language with which we parse
-> Type -- ^ the start category
-> String -- ^ the input sentence (excluding token being completed)
-> String -- ^ prefix (partial token being completed)
-> ParseOutput [(String, CId, CId, Float)] -- ^ (token, category, function, probability)
complete lang (Type ctype _) sent pfx =
unsafePerformIO $ do
parsePl <- gu_new_pool
exn <- gu_new_exn parsePl
sent <- newUtf8CString sent parsePl
pfx <- newUtf8CString pfx parsePl
enum <- pgf_complete (concr lang) ctype sent pfx exn parsePl
failed <- gu_exn_is_raised exn
if failed
then do
is_parse_error <- gu_exn_caught exn gu_exn_type_PgfParseError
if is_parse_error
then do
c_err <- (#peek GuExn, data.data) exn
c_offset <- (#peek PgfParseError, offset) c_err
token_ptr <- (#peek PgfParseError, token_ptr) c_err
token_len <- (#peek PgfParseError, token_len) c_err
tok <- peekUtf8CStringLen token_ptr token_len
gu_pool_free parsePl
return (ParseFailed (fromIntegral (c_offset :: CInt)) tok)
else do
is_exn <- gu_exn_caught exn gu_exn_type_PgfExn
if is_exn
then do
c_msg <- (#peek GuExn, data.data) exn
msg <- peekUtf8CString c_msg
gu_pool_free parsePl
throwIO (PGFError msg)
else do
gu_pool_free parsePl
throwIO (PGFError "Parsing failed")
else do
fpl <- newForeignPtr gu_pool_finalizer parsePl
ParseOk <$> fromCompletions enum fpl
where
fromCompletions :: Ptr GuEnum -> ForeignPtr GuPool -> IO [(String, CId, CId, Float)]
fromCompletions enum fpl =
withGuPool $ \tmpPl -> do
cmpEntry <- alloca $ \ptr ->
withForeignPtr fpl $ \pl ->
do gu_enum_next enum ptr pl
peek ptr
if cmpEntry == nullPtr
then do
finalizeForeignPtr fpl
touchConcr lang
return []
else do
tok <- peekUtf8CString =<< (#peek PgfTokenProb, tok) cmpEntry
cat <- peekUtf8CString =<< (#peek PgfTokenProb, cat) cmpEntry
fun <- peekUtf8CString =<< (#peek PgfTokenProb, fun) cmpEntry
prob <- (#peek PgfTokenProb, prob) cmpEntry
toks <- unsafeInterleaveIO (fromCompletions enum fpl)
return ((tok, cat, fun, prob) : toks)
-- | Returns True if there is a linearization defined for that function in that language -- | Returns True if there is a linearization defined for that function in that language
hasLinearization :: Concr -> Fun -> Bool hasLinearization :: Concr -> Fun -> Bool
hasLinearization lang id = unsafePerformIO $ hasLinearization lang id = unsafePerformIO $
@@ -1047,7 +1106,7 @@ linearizeAll lang e = unsafePerformIO $
-- | Generates a table of linearizations for an expression -- | Generates a table of linearizations for an expression
tabularLinearize :: Concr -> Expr -> [(String, String)] tabularLinearize :: Concr -> Expr -> [(String, String)]
tabularLinearize lang e = tabularLinearize lang e =
case tabularLinearizeAll lang e of case tabularLinearizeAll lang e of
(lins:_) -> lins (lins:_) -> lins
_ -> [] _ -> []
@@ -1138,7 +1197,7 @@ data BracketedString
-- the phrase. The 'FId' is an unique identifier for -- the phrase. The 'FId' is an unique identifier for
-- every phrase in the sentence. For context-free grammars -- every phrase in the sentence. For context-free grammars
-- i.e. without discontinuous constituents this identifier -- i.e. without discontinuous constituents this identifier
-- is also unique for every bracket. When there are discontinuous -- is also unique for every bracket. When there are discontinuous
-- phrases then the identifiers are unique for every phrase but -- phrases then the identifiers are unique for every phrase but
-- not for every bracket since the bracket represents a constituent. -- not for every bracket since the bracket represents a constituent.
-- The different constituents could still be distinguished by using -- The different constituents could still be distinguished by using
@@ -1148,7 +1207,7 @@ data BracketedString
-- The second 'CId' is the name of the abstract function that generated -- The second 'CId' is the name of the abstract function that generated
-- this phrase. -- this phrase.
-- | Renders the bracketed string as a string where -- | Renders the bracketed string as a string where
-- the brackets are shown as @(S ...)@ where -- the brackets are shown as @(S ...)@ where
-- @S@ is the category. -- @S@ is the category.
showBracketedString :: BracketedString -> String showBracketedString :: BracketedString -> String
@@ -1166,7 +1225,7 @@ flattenBracketedString (Bracket _ _ _ _ bss) = concatMap flattenBracketedString
bracketedLinearize :: Concr -> Expr -> [BracketedString] bracketedLinearize :: Concr -> Expr -> [BracketedString]
bracketedLinearize lang e = unsafePerformIO $ bracketedLinearize lang e = unsafePerformIO $
withGuPool $ \pl -> withGuPool $ \pl ->
do exn <- gu_new_exn pl do exn <- gu_new_exn pl
cts <- pgf_lzr_concretize (concr lang) (expr e) exn pl cts <- pgf_lzr_concretize (concr lang) (expr e) exn pl
failed <- gu_exn_is_raised exn failed <- gu_exn_is_raised exn
@@ -1192,7 +1251,7 @@ bracketedLinearize lang e = unsafePerformIO $
bracketedLinearizeAll :: Concr -> Expr -> [[BracketedString]] bracketedLinearizeAll :: Concr -> Expr -> [[BracketedString]]
bracketedLinearizeAll lang e = unsafePerformIO $ bracketedLinearizeAll lang e = unsafePerformIO $
withGuPool $ \pl -> withGuPool $ \pl ->
do exn <- gu_new_exn pl do exn <- gu_new_exn pl
cts <- pgf_lzr_concretize (concr lang) (expr e) exn pl cts <- pgf_lzr_concretize (concr lang) (expr e) exn pl
failed <- gu_exn_is_raised exn failed <- gu_exn_is_raised exn
@@ -1467,7 +1526,7 @@ type LiteralCallback =
literalCallbacks :: [(AbsName,[(Cat,LiteralCallback)])] literalCallbacks :: [(AbsName,[(Cat,LiteralCallback)])]
literalCallbacks = [("App",[("PN",nerc),("Symb",chunk)])] literalCallbacks = [("App",[("PN",nerc),("Symb",chunk)])]
-- | Named entity recognition for the App grammar -- | Named entity recognition for the App grammar
-- (based on ../java/org/grammaticalframework/pgf/NercLiteralCallback.java) -- (based on ../java/org/grammaticalframework/pgf/NercLiteralCallback.java)
nerc :: LiteralCallback nerc :: LiteralCallback
nerc pgf (lang,concr) sentence lin_idx offset = nerc pgf (lang,concr) sentence lin_idx offset =

View File

@@ -103,7 +103,7 @@ foreign import ccall unsafe "gu/file.h gu_file_in"
foreign import ccall safe "gu/enum.h gu_enum_next" foreign import ccall safe "gu/enum.h gu_enum_next"
gu_enum_next :: Ptr a -> Ptr (Ptr b) -> Ptr GuPool -> IO () gu_enum_next :: Ptr a -> Ptr (Ptr b) -> Ptr GuPool -> IO ()
foreign import ccall unsafe "gu/string.h gu_string_buf_freeze" foreign import ccall unsafe "gu/string.h gu_string_buf_freeze"
gu_string_buf_freeze :: Ptr GuStringBuf -> Ptr GuPool -> IO CString gu_string_buf_freeze :: Ptr GuStringBuf -> Ptr GuPool -> IO CString
@@ -241,7 +241,7 @@ newSequence elem_size pokeElem values pool = do
type FId = Int type FId = Int
data PArg = PArg [FId] {-# UNPACK #-} !FId deriving (Eq,Ord,Show) data PArg = PArg [FId] {-# UNPACK #-} !FId deriving (Eq,Ord,Show)
peekFId :: Ptr a -> IO FId peekFId :: Ptr a -> IO FId
peekFId c_ccat = do peekFId c_ccat = do
c_fid <- (#peek PgfCCat, fid) c_ccat c_fid <- (#peek PgfCCat, fid) c_ccat
return (fromIntegral (c_fid :: CInt)) return (fromIntegral (c_fid :: CInt))
@@ -256,6 +256,7 @@ data PgfApplication
data PgfConcr data PgfConcr
type PgfExpr = Ptr () type PgfExpr = Ptr ()
data PgfExprProb data PgfExprProb
data PgfTokenProb
data PgfExprParser data PgfExprParser
data PgfFullFormEntry data PgfFullFormEntry
data PgfMorphoCallback data PgfMorphoCallback
@@ -422,6 +423,9 @@ foreign import ccall
foreign import ccall "pgf/pgf.h pgf_parse_with_oracle" foreign import ccall "pgf/pgf.h pgf_parse_with_oracle"
pgf_parse_with_oracle :: Ptr PgfConcr -> CString -> CString -> Ptr PgfOracleCallback -> Ptr GuExn -> Ptr GuPool -> Ptr GuPool -> IO (Ptr GuEnum) pgf_parse_with_oracle :: Ptr PgfConcr -> CString -> CString -> Ptr PgfOracleCallback -> Ptr GuExn -> Ptr GuPool -> Ptr GuPool -> IO (Ptr GuEnum)
foreign import ccall "pgf/pgf.h pgf_complete"
pgf_complete :: Ptr PgfConcr -> PgfType -> CString -> CString -> Ptr GuExn -> Ptr GuPool -> IO (Ptr GuEnum)
foreign import ccall "pgf/pgf.h pgf_lookup_morpho" foreign import ccall "pgf/pgf.h pgf_lookup_morpho"
pgf_lookup_morpho :: Ptr PgfConcr -> CString -> Ptr PgfMorphoCallback -> Ptr GuExn -> IO () pgf_lookup_morpho :: Ptr PgfConcr -> CString -> Ptr PgfMorphoCallback -> Ptr GuExn -> IO ()

View File

@@ -1,5 +1,5 @@
name: pgf2 name: pgf2
version: 1.2.1 version: 1.3.0
synopsis: Bindings to the C version of the PGF runtime synopsis: Bindings to the C version of the PGF runtime
description: description:
GF, Grammatical Framework, is a programming language for multilingual grammar applications. GF, Grammatical Framework, is a programming language for multilingual grammar applications.