Merge branch 'master' into lpgf

# Conflicts:
#	gf.cabal
#	src/compiler/GF/Compile/GrammarToCanonical.hs
#	src/compiler/GF/Grammar/Canonical.hs
#	src/compiler/GF/Infra/Option.hs
This commit is contained in:
John J. Camilleri
2021-07-07 08:36:09 +02:00
93 changed files with 2437 additions and 3275 deletions

View File

@@ -1,7 +1,11 @@
## 1.3.0
- Add completion support.
## 1.2.1
- Remove deprecated pgf_print_expr_tuple
- Added an API for cloning expressions/types/literals
- Remove deprecated `pgf_print_expr_tuple`.
- Added an API for cloning expressions/types/literals.
## 1.2.0

View File

@@ -43,30 +43,28 @@ module PGF2 (-- * PGF
mkCId,
exprHash, exprSize, exprFunctions, exprSubstitute,
treeProbability,
-- ** Types
Type, Hypo, BindType(..), startCat,
readType, showType, showContext,
mkType, unType,
-- ** Type checking
-- | Dynamically-built expressions should always be type-checked before using in other functions,
-- as the exceptions thrown by using invalid expressions may not catchable.
checkExpr, inferExpr, checkType,
-- ** Computing
compute,
-- * Concrete syntax
ConcName,Concr,languages,concreteName,languageCode,
-- ** Linearization
linearize,linearizeAll,tabularLinearize,tabularLinearizeAll,bracketedLinearize,bracketedLinearizeAll,
FId, BracketedString(..), showBracketedString, flattenBracketedString,
printName, categoryFields,
alignWords,
-- ** Parsing
ParseOutput(..), parse, parseWithHeuristics,
parseToChart, PArg(..),
complete,
-- ** Sentence Lookup
lookupSentence,
-- ** Generation
@@ -180,7 +178,7 @@ languageCode c = unsafePerformIO (peekUtf8CString =<< pgf_language_code (concr c
-- | Generates an exhaustive possibly infinite list of
-- all abstract syntax expressions of the given type.
-- all abstract syntax expressions of the given type.
-- The expressions are ordered by their probability.
generateAll :: PGF -> Type -> [(Expr,Float)]
generateAll p (Type ctype _) =
@@ -469,21 +467,21 @@ newGraphvizOptions pool opts = do
-- Functions using Concr
-- Morpho analyses, parsing & linearization
-- | This triple is returned by all functions that deal with
-- | This triple is returned by all functions that deal with
-- the grammar's lexicon. Its first element is the name of an abstract
-- lexical function which can produce a given word or
-- lexical function which can produce a given word or
-- a multiword expression (i.e. this is the lemma).
-- After that follows a string which describes
-- After that follows a string which describes
-- the particular inflection form.
--
-- The last element is a logarithm from the
-- the probability of the function. The probability is not
-- the probability of the function. The probability is not
-- conditionalized on the category of the function. This makes it
-- possible to compare the likelihood of two functions even if they
-- have different types.
-- have different types.
type MorphoAnalysis = (Fun,String,Float)
-- | 'lookupMorpho' takes a string which must be a single word or
-- | 'lookupMorpho' takes a string which must be a single word or
-- a multiword expression. It then computes the list of all possible
-- morphological analyses.
lookupMorpho :: Concr -> String -> [MorphoAnalysis]
@@ -541,12 +539,12 @@ lookupCohorts lang@(Concr concr master) sent =
return ((start,tok,ans,end):cohs)
filterBest :: [(Int,String,[MorphoAnalysis],Int)] -> [(Int,String,[MorphoAnalysis],Int)]
filterBest ans =
filterBest ans =
reverse (iterate (maxBound :: Int) [(0,0,[],ans)] [] [])
where
iterate v0 [] [] res = res
iterate v0 [] new res = iterate v0 new [] res
iterate v0 ((_,v,conf, []):old) new res =
iterate v0 ((_,v,conf, []):old) new res =
case compare v0 v of
LT -> res
EQ -> iterate v0 old new (merge conf res)
@@ -649,7 +647,7 @@ getAnalysis ref self c_lemma c_anal prob exn = do
data ParseOutput a
= ParseFailed Int String -- ^ The integer is the position in number of unicode characters where the parser failed.
-- The string is the token where the parser have failed.
| ParseOk a -- ^ If the parsing and the type checking are successful
| ParseOk a -- ^ If the parsing and the type checking are successful
-- we get the abstract syntax trees as either a list or a chart.
| ParseIncomplete -- ^ The sentence is not complete.
@@ -659,9 +657,9 @@ parse lang ty sent = parseWithHeuristics lang ty sent (-1.0) []
parseWithHeuristics :: Concr -- ^ the language with which we parse
-> Type -- ^ the start category
-> String -- ^ the input sentence
-> Double -- ^ the heuristic factor.
-- A negative value tells the parser
-- to lookup up the default from
-> Double -- ^ the heuristic factor.
-- A negative value tells the parser
-- to lookup up the default from
-- the grammar flags
-> [(Cat, String -> Int -> Maybe (Expr,Float,Int))]
-- ^ a list of callbacks for literal categories.
@@ -715,9 +713,9 @@ parseWithHeuristics lang (Type ctype touchType) sent heuristic callbacks =
parseToChart :: Concr -- ^ the language with which we parse
-> Type -- ^ the start category
-> String -- ^ the input sentence
-> Double -- ^ the heuristic factor.
-- A negative value tells the parser
-- to lookup up the default from
-> Double -- ^ the heuristic factor.
-- A negative value tells the parser
-- to lookup up the default from
-- the grammar flags
-> [(Cat, String -> Int -> Maybe (Expr,Float,Int))]
-- ^ a list of callbacks for literal categories.
@@ -886,7 +884,7 @@ lookupSentence lang (Type ctype _) sent =
-- | The oracle is a triple of functions.
-- The first two take a category name and a linearization field name
-- and they should return True/False when the corresponding
-- and they should return True/False when the corresponding
-- prediction or completion is appropriate. The third function
-- is the oracle for literals.
type Oracle = (Maybe (Cat -> String -> Int -> Bool)
@@ -974,6 +972,67 @@ parseWithOracle lang cat sent (predict,complete,literal) =
return ep
Nothing -> do return nullPtr
-- | Returns possible completions of the current partial input.
complete :: Concr -- ^ the language with which we parse
-> Type -- ^ the start category
-> String -- ^ the input sentence (excluding token being completed)
-> String -- ^ prefix (partial token being completed)
-> ParseOutput [(String, CId, CId, Float)] -- ^ (token, category, function, probability)
complete lang (Type ctype _) sent pfx =
unsafePerformIO $ do
parsePl <- gu_new_pool
exn <- gu_new_exn parsePl
sent <- newUtf8CString sent parsePl
pfx <- newUtf8CString pfx parsePl
enum <- pgf_complete (concr lang) ctype sent pfx exn parsePl
failed <- gu_exn_is_raised exn
if failed
then do
is_parse_error <- gu_exn_caught exn gu_exn_type_PgfParseError
if is_parse_error
then do
c_err <- (#peek GuExn, data.data) exn
c_offset <- (#peek PgfParseError, offset) c_err
token_ptr <- (#peek PgfParseError, token_ptr) c_err
token_len <- (#peek PgfParseError, token_len) c_err
tok <- peekUtf8CStringLen token_ptr token_len
gu_pool_free parsePl
return (ParseFailed (fromIntegral (c_offset :: CInt)) tok)
else do
is_exn <- gu_exn_caught exn gu_exn_type_PgfExn
if is_exn
then do
c_msg <- (#peek GuExn, data.data) exn
msg <- peekUtf8CString c_msg
gu_pool_free parsePl
throwIO (PGFError msg)
else do
gu_pool_free parsePl
throwIO (PGFError "Parsing failed")
else do
fpl <- newForeignPtr gu_pool_finalizer parsePl
ParseOk <$> fromCompletions enum fpl
where
fromCompletions :: Ptr GuEnum -> ForeignPtr GuPool -> IO [(String, CId, CId, Float)]
fromCompletions enum fpl =
withGuPool $ \tmpPl -> do
cmpEntry <- alloca $ \ptr ->
withForeignPtr fpl $ \pl ->
do gu_enum_next enum ptr pl
peek ptr
if cmpEntry == nullPtr
then do
finalizeForeignPtr fpl
touchConcr lang
return []
else do
tok <- peekUtf8CString =<< (#peek PgfTokenProb, tok) cmpEntry
cat <- peekUtf8CString =<< (#peek PgfTokenProb, cat) cmpEntry
fun <- peekUtf8CString =<< (#peek PgfTokenProb, fun) cmpEntry
prob <- (#peek PgfTokenProb, prob) cmpEntry
toks <- unsafeInterleaveIO (fromCompletions enum fpl)
return ((tok, cat, fun, prob) : toks)
-- | Returns True if there is a linearization defined for that function in that language
hasLinearization :: Concr -> Fun -> Bool
hasLinearization lang id = unsafePerformIO $
@@ -1047,7 +1106,7 @@ linearizeAll lang e = unsafePerformIO $
-- | Generates a table of linearizations for an expression
tabularLinearize :: Concr -> Expr -> [(String, String)]
tabularLinearize lang e =
tabularLinearize lang e =
case tabularLinearizeAll lang e of
(lins:_) -> lins
_ -> []
@@ -1138,7 +1197,7 @@ data BracketedString
-- the phrase. The 'FId' is an unique identifier for
-- every phrase in the sentence. For context-free grammars
-- i.e. without discontinuous constituents this identifier
-- is also unique for every bracket. When there are discontinuous
-- is also unique for every bracket. When there are discontinuous
-- phrases then the identifiers are unique for every phrase but
-- not for every bracket since the bracket represents a constituent.
-- The different constituents could still be distinguished by using
@@ -1148,7 +1207,7 @@ data BracketedString
-- The second 'CId' is the name of the abstract function that generated
-- this phrase.
-- | Renders the bracketed string as a string where
-- | Renders the bracketed string as a string where
-- the brackets are shown as @(S ...)@ where
-- @S@ is the category.
showBracketedString :: BracketedString -> String
@@ -1166,7 +1225,7 @@ flattenBracketedString (Bracket _ _ _ _ bss) = concatMap flattenBracketedString
bracketedLinearize :: Concr -> Expr -> [BracketedString]
bracketedLinearize lang e = unsafePerformIO $
withGuPool $ \pl ->
withGuPool $ \pl ->
do exn <- gu_new_exn pl
cts <- pgf_lzr_concretize (concr lang) (expr e) exn pl
failed <- gu_exn_is_raised exn
@@ -1192,7 +1251,7 @@ bracketedLinearize lang e = unsafePerformIO $
bracketedLinearizeAll :: Concr -> Expr -> [[BracketedString]]
bracketedLinearizeAll lang e = unsafePerformIO $
withGuPool $ \pl ->
withGuPool $ \pl ->
do exn <- gu_new_exn pl
cts <- pgf_lzr_concretize (concr lang) (expr e) exn pl
failed <- gu_exn_is_raised exn
@@ -1467,7 +1526,7 @@ type LiteralCallback =
literalCallbacks :: [(AbsName,[(Cat,LiteralCallback)])]
literalCallbacks = [("App",[("PN",nerc),("Symb",chunk)])]
-- | Named entity recognition for the App grammar
-- | Named entity recognition for the App grammar
-- (based on ../java/org/grammaticalframework/pgf/NercLiteralCallback.java)
nerc :: LiteralCallback
nerc pgf (lang,concr) sentence lin_idx offset =

View File

@@ -19,7 +19,7 @@ wildCId = "_" :: CId
type Cat = CId -- ^ Name of syntactic category
type Fun = CId -- ^ Name of function
data BindType =
data BindType =
Explicit
| Implicit
deriving Show
@@ -38,7 +38,7 @@ instance Show Expr where
show = showExpr []
instance Eq Expr where
(Expr e1 e1_touch) == (Expr e2 e2_touch) =
(Expr e1 e1_touch) == (Expr e2 e2_touch) =
unsafePerformIO $ do
res <- pgf_expr_eq e1 e2
e1_touch >> e2_touch
@@ -113,9 +113,9 @@ unApp (Expr expr touch) =
appl <- pgf_expr_unapply expr pl
if appl == nullPtr
then return Nothing
else do
else do
fun <- peekCString =<< (#peek PgfApplication, fun) appl
arity <- (#peek PgfApplication, n_args) appl :: IO CInt
arity <- (#peek PgfApplication, n_args) appl :: IO CInt
c_args <- peekArray (fromIntegral arity) (appl `plusPtr` (#offset PgfApplication, args))
return $ Just (fun, [Expr c_arg touch | c_arg <- c_args])
@@ -140,7 +140,9 @@ unStr (Expr expr touch) =
touch
return (Just s)
-- | Constructs an expression from an integer literal
-- | Constructs an expression from an integer literal.
-- Note that the C runtime does not support long integers, and you may run into overflow issues with large values.
-- See [here](https://github.com/GrammaticalFramework/gf-core/issues/109) for more details.
mkInt :: Int -> Expr
mkInt val =
unsafePerformIO $ do
@@ -267,7 +269,7 @@ foreign import ccall "wrapper"
-- in the expression in order reverse to the order
-- of binding.
showExpr :: [CId] -> Expr -> String
showExpr scope e =
showExpr scope e =
unsafePerformIO $
withGuPool $ \tmpPl ->
do (sb,out) <- newOut tmpPl

View File

@@ -103,7 +103,7 @@ foreign import ccall unsafe "gu/file.h gu_file_in"
foreign import ccall safe "gu/enum.h gu_enum_next"
gu_enum_next :: Ptr a -> Ptr (Ptr b) -> Ptr GuPool -> IO ()
foreign import ccall unsafe "gu/string.h gu_string_buf_freeze"
gu_string_buf_freeze :: Ptr GuStringBuf -> Ptr GuPool -> IO CString
@@ -241,7 +241,7 @@ newSequence elem_size pokeElem values pool = do
type FId = Int
data PArg = PArg [FId] {-# UNPACK #-} !FId deriving (Eq,Ord,Show)
peekFId :: Ptr a -> IO FId
peekFId :: Ptr a -> IO FId
peekFId c_ccat = do
c_fid <- (#peek PgfCCat, fid) c_ccat
return (fromIntegral (c_fid :: CInt))
@@ -256,6 +256,7 @@ data PgfApplication
data PgfConcr
type PgfExpr = Ptr ()
data PgfExprProb
data PgfTokenProb
data PgfExprParser
data PgfFullFormEntry
data PgfMorphoCallback
@@ -422,6 +423,9 @@ foreign import ccall
foreign import ccall "pgf/pgf.h pgf_parse_with_oracle"
pgf_parse_with_oracle :: Ptr PgfConcr -> CString -> CString -> Ptr PgfOracleCallback -> Ptr GuExn -> Ptr GuPool -> Ptr GuPool -> IO (Ptr GuEnum)
foreign import ccall "pgf/pgf.h pgf_complete"
pgf_complete :: Ptr PgfConcr -> PgfType -> CString -> CString -> Ptr GuExn -> Ptr GuPool -> IO (Ptr GuEnum)
foreign import ccall "pgf/pgf.h pgf_lookup_morpho"
pgf_lookup_morpho :: Ptr PgfConcr -> CString -> Ptr PgfMorphoCallback -> Ptr GuExn -> IO ()

View File

@@ -1,19 +1,21 @@
name: pgf2
version: 1.2.1
version: 1.3.0
cabal-version: 1.22
build-type: Simple
license: LGPL-3
license-file: LICENSE
category: Natural Language Processing
synopsis: Bindings to the C version of the PGF runtime
description:
GF, Grammatical Framework, is a programming language for multilingual grammar applications.
GF grammars are compiled into Portable Grammar Format (PGF) which can be used with the PGF runtime, written in C.
This package provides Haskell bindings to that runtime.
homepage: https://www.grammaticalframework.org
license: LGPL-3
license-file: LICENSE
homepage: https://www.grammaticalframework.org/
bug-reports: https://github.com/GrammaticalFramework/gf-core/issues
author: Krasimir Angelov
maintainer: kr.angelov@gmail.com
category: Language
build-type: Simple
extra-source-files: CHANGELOG.md, README.md
cabal-version: >=1.10
tested-with: GHC==7.10.3, GHC==8.0.2, GHC==8.10.4
library
exposed-modules:
@@ -24,9 +26,9 @@ library
PGF2.Expr,
PGF2.Type
build-depends:
base >=4.3 && <5,
containers,
pretty
base >= 4.9.1 && <4.15,
containers >= 0.5.7 && < 0.7,
pretty >= 1.1.3 && < 1.2
default-language: Haskell2010
build-tools: hsc2hs
extra-libraries: pgf gu

View File

@@ -0,0 +1,3 @@
resolver: lts-6.35 # ghc 7.10.3
allow-newer: true

View File

@@ -0,0 +1 @@
resolver: lts-9.21 # ghc 8.0.2

View File

@@ -0,0 +1 @@
resolver: lts-18.0 # ghc 8.10.4

View File

@@ -68,7 +68,7 @@ import qualified Data.ByteString.Lazy as L
import Data.ByteString.Base (inlinePerformIO)
import qualified Data.ByteString.Base as S
#else
import Data.ByteString.Internal (inlinePerformIO)
import Data.ByteString.Internal (accursedUnutterablePerformIO)
import qualified Data.ByteString.Internal as S
--import qualified Data.ByteString.Lazy.Internal as L
#endif
@@ -199,7 +199,7 @@ defaultSize = 32 * k - overhead
-- | Sequence an IO operation on the buffer
unsafeLiftIO :: (Buffer -> IO Buffer) -> Builder
unsafeLiftIO f = Builder $ \ k buf -> inlinePerformIO $ do
unsafeLiftIO f = Builder $ \ k buf -> accursedUnutterablePerformIO $ do
buf' <- f buf
return (k buf')
{-# INLINE unsafeLiftIO #-}

View File

@@ -423,7 +423,7 @@ readN n f = fmap f $ getBytes n
getPtr :: Storable a => Int -> Get a
getPtr n = do
(fp,o,_) <- readN n B.toForeignPtr
return . B.inlinePerformIO $ withForeignPtr fp $ \p -> peek (castPtr $ p `plusPtr` o)
return . B.accursedUnutterablePerformIO $ withForeignPtr fp $ \p -> peek (castPtr $ p `plusPtr` o)
{- INLINE getPtr -}
------------------------------------------------------------------------

View File

@@ -41,7 +41,7 @@ import Control.Applicative
import Control.Monad
--import Control.Monad.Identity
import Control.Monad.State
import Control.Monad.Error
import Control.Monad.Except
import Text.PrettyPrint
-----------------------------------------------------

View File

@@ -1,28 +1,30 @@
name: pgf
version: 3.10
version: 3.11.0-git
cabal-version: >= 1.20
cabal-version: 1.22
build-type: Simple
license: OtherLicense
category: Natural Language Processing
synopsis: Grammatical Framework
description: A library for interpreting the Portable Grammar Format (PGF)
homepage: http://www.grammaticalframework.org/
homepage: https://www.grammaticalframework.org/
bug-reports: https://github.com/GrammaticalFramework/gf-core/issues
maintainer: Thomas Hallgren
tested-with: GHC==7.6.3, GHC==7.8.3, GHC==7.10.3, GHC==8.0.2
tested-with: GHC==7.10.3, GHC==8.0.2, GHC==8.10.4
Library
default-language: Haskell2010
build-depends: base >= 4.6 && <5,
array,
containers,
bytestring,
utf8-string,
random,
pretty,
mtl,
exceptions
library
default-language: Haskell2010
build-depends:
base >= 4.9.1 && <4.15,
array >= 0.5.1 && < 0.6,
containers >= 0.5.7 && < 0.7,
bytestring >= 0.10.8 && < 0.11,
utf8-string >= 1.0.1.1 && < 1.1,
random >= 1.1 && < 1.3,
pretty >= 1.1.3 && < 1.2,
mtl >= 2.2.1 && < 2.3,
ghc-prim >= 0.5.0 && < 0.7,
-- For compatability with GHC < 8
fail >= 4.9.0 && < 4.10
other-modules:
-- not really part of GF but I have changed the original binary library
@@ -37,7 +39,6 @@ Library
--if impl(ghc>=7.8)
-- ghc-options: +RTS -A20M -RTS
ghc-prof-options: -fprof-auto
extensions:
exposed-modules:
PGF

View File

@@ -0,0 +1,3 @@
resolver: lts-6.35 # ghc 7.10.3
allow-newer: true

View File

@@ -0,0 +1 @@
resolver: lts-9.21 # ghc 8.0.2

View File

@@ -0,0 +1 @@
resolver: lts-18.0 # ghc 8.10.4