From 450368f9bbf2948365953ae35069b5039ba38a28 Mon Sep 17 00:00:00 2001 From: "John J. Camilleri" Date: Mon, 3 May 2021 13:19:08 +0200 Subject: [PATCH 1/5] First attempt at adding support for complete in PGF2 (gives segmentation faults) --- src/runtime/haskell-bind/CHANGELOG.md | 8 ++++-- src/runtime/haskell-bind/PGF2.hsc | 36 +++++++++++++++++++++++---- src/runtime/haskell-bind/PGF2/FFI.hsc | 7 ++++-- src/runtime/haskell-bind/pgf2.cabal | 2 +- src/runtime/haskell-bind/test.hs | 12 +++++++++ 5 files changed, 55 insertions(+), 10 deletions(-) create mode 100644 src/runtime/haskell-bind/test.hs diff --git a/src/runtime/haskell-bind/CHANGELOG.md b/src/runtime/haskell-bind/CHANGELOG.md index aed2d9c4f..570c7fd73 100644 --- a/src/runtime/haskell-bind/CHANGELOG.md +++ b/src/runtime/haskell-bind/CHANGELOG.md @@ -1,7 +1,11 @@ +## 1.3.0 + +- Add completion support. + ## 1.2.1 -- Remove deprecated pgf_print_expr_tuple -- Added an API for cloning expressions/types/literals +- Remove deprecated `pgf_print_expr_tuple`. +- Added an API for cloning expressions/types/literals. ## 1.2.0 diff --git a/src/runtime/haskell-bind/PGF2.hsc b/src/runtime/haskell-bind/PGF2.hsc index 4204867f1..bd7cf2fe9 100644 --- a/src/runtime/haskell-bind/PGF2.hsc +++ b/src/runtime/haskell-bind/PGF2.hsc @@ -43,32 +43,28 @@ module PGF2 (-- * PGF mkCId, exprHash, exprSize, exprFunctions, exprSubstitute, treeProbability, - -- ** Types Type, Hypo, BindType(..), startCat, readType, showType, showContext, mkType, unType, - -- ** Type checking -- | Dynamically-built expressions should always be type-checked before using in other functions, -- as the exceptions thrown by using invalid expressions may not catchable. checkExpr, inferExpr, checkType, - -- ** Computing compute, -- * Concrete syntax ConcName,Concr,languages,concreteName,languageCode, - -- ** Linearization linearize,linearizeAll,tabularLinearize,tabularLinearizeAll,bracketedLinearize,bracketedLinearizeAll, FId, BracketedString(..), showBracketedString, flattenBracketedString, printName, categoryFields, - alignWords, -- ** Parsing ParseOutput(..), parse, parseWithHeuristics, parseToChart, PArg(..), + complete, -- ** Sentence Lookup lookupSentence, -- ** Generation @@ -976,6 +972,36 @@ parseWithOracle lang cat sent (predict,complete,literal) = return ep Nothing -> do return nullPtr +complete :: Concr -- ^ the language with which we parse + -> Type -- ^ the start category + -> String -- ^ the input sentence + -> String -- ^ prefix (?) + -> Maybe Int -- ^ maximum number of results + -> ParseOutput [(Expr,Float)] +complete lang (Type ctype _) sent pfx mn = + unsafePerformIO $ do + parsePl <- gu_new_pool + exprPl <- gu_new_pool + exn <- gu_new_exn parsePl + + sent <- newUtf8CString sent parsePl + pfx <- newUtf8CString pfx parsePl + + enum <- pgf_complete (concr lang) ctype sent pfx exn parsePl + failed <- gu_exn_is_raised exn + if failed + then do + is_parse_error <- gu_exn_caught exn gu_exn_type_PgfParseError + if is_parse_error + then return (ParseFailed 0 "") + else throwIO (PGFError "Some other error") + -- TODO cleanup!!! + else do + parseFPl <- newForeignPtr gu_pool_finalizer parsePl + exprFPl <- newForeignPtr gu_pool_finalizer exprPl + exprs <- fromPgfExprEnum enum parseFPl (touchConcr lang >> touchForeignPtr exprFPl) + return (ParseOk exprs) + -- | Returns True if there is a linearization defined for that function in that language hasLinearization :: Concr -> Fun -> Bool hasLinearization lang id = unsafePerformIO $ diff --git a/src/runtime/haskell-bind/PGF2/FFI.hsc b/src/runtime/haskell-bind/PGF2/FFI.hsc index c72c48e3b..04952f2d8 100644 --- a/src/runtime/haskell-bind/PGF2/FFI.hsc +++ b/src/runtime/haskell-bind/PGF2/FFI.hsc @@ -103,7 +103,7 @@ foreign import ccall unsafe "gu/file.h gu_file_in" foreign import ccall safe "gu/enum.h gu_enum_next" gu_enum_next :: Ptr a -> Ptr (Ptr b) -> Ptr GuPool -> IO () - + foreign import ccall unsafe "gu/string.h gu_string_buf_freeze" gu_string_buf_freeze :: Ptr GuStringBuf -> Ptr GuPool -> IO CString @@ -241,7 +241,7 @@ newSequence elem_size pokeElem values pool = do type FId = Int data PArg = PArg [FId] {-# UNPACK #-} !FId deriving (Eq,Ord,Show) -peekFId :: Ptr a -> IO FId +peekFId :: Ptr a -> IO FId peekFId c_ccat = do c_fid <- (#peek PgfCCat, fid) c_ccat return (fromIntegral (c_fid :: CInt)) @@ -422,6 +422,9 @@ foreign import ccall foreign import ccall "pgf/pgf.h pgf_parse_with_oracle" pgf_parse_with_oracle :: Ptr PgfConcr -> CString -> CString -> Ptr PgfOracleCallback -> Ptr GuExn -> Ptr GuPool -> Ptr GuPool -> IO (Ptr GuEnum) +foreign import ccall "pgf/pgf.h pgf_complete" + pgf_complete :: Ptr PgfConcr -> PgfType -> CString -> CString -> Ptr GuExn -> Ptr GuPool -> IO (Ptr GuEnum) + foreign import ccall "pgf/pgf.h pgf_lookup_morpho" pgf_lookup_morpho :: Ptr PgfConcr -> CString -> Ptr PgfMorphoCallback -> Ptr GuExn -> IO () diff --git a/src/runtime/haskell-bind/pgf2.cabal b/src/runtime/haskell-bind/pgf2.cabal index 4ef9ed4f0..91e77c77b 100644 --- a/src/runtime/haskell-bind/pgf2.cabal +++ b/src/runtime/haskell-bind/pgf2.cabal @@ -1,5 +1,5 @@ name: pgf2 -version: 1.2.1 +version: 1.3.0 synopsis: Bindings to the C version of the PGF runtime description: GF, Grammatical Framework, is a programming language for multilingual grammar applications. diff --git a/src/runtime/haskell-bind/test.hs b/src/runtime/haskell-bind/test.hs new file mode 100644 index 000000000..16e7ff7cb --- /dev/null +++ b/src/runtime/haskell-bind/test.hs @@ -0,0 +1,12 @@ +import PGF2 +import qualified Data.Map as M + +main :: IO () +main = do + pgf <- readPGF "/Users/john/repositories/GF/contrib/foods/Foods.pgf" + let Just concr = M.lookup "FoodsEng" (languages pgf) + let pr = complete concr (startCat pgf) "this" "wi" Nothing + case pr of + ParseOk x -> print (head x) + ParseFailed _ _ -> putStrLn "parse failed" + ParseIncomplete -> putStrLn "input incomplete" From e56d1b29593bef6ea3a70b50287df3e3438207d7 Mon Sep 17 00:00:00 2001 From: "John J. Camilleri" Date: Mon, 3 May 2021 14:25:35 +0200 Subject: [PATCH 2/5] Second attempt. Reading enum is closer to working but all strings are empty. --- src/runtime/haskell-bind/PGF2.hsc | 32 ++++++++++++++++++++++--------- src/runtime/haskell-bind/test.hs | 4 ++-- 2 files changed, 25 insertions(+), 11 deletions(-) diff --git a/src/runtime/haskell-bind/PGF2.hsc b/src/runtime/haskell-bind/PGF2.hsc index bd7cf2fe9..21e8693eb 100644 --- a/src/runtime/haskell-bind/PGF2.hsc +++ b/src/runtime/haskell-bind/PGF2.hsc @@ -977,30 +977,44 @@ complete :: Concr -- ^ the language with which we parse -> String -- ^ the input sentence -> String -- ^ prefix (?) -> Maybe Int -- ^ maximum number of results - -> ParseOutput [(Expr,Float)] + -> ParseOutput [String] complete lang (Type ctype _) sent pfx mn = unsafePerformIO $ do parsePl <- gu_new_pool - exprPl <- gu_new_pool exn <- gu_new_exn parsePl - sent <- newUtf8CString sent parsePl pfx <- newUtf8CString pfx parsePl - enum <- pgf_complete (concr lang) ctype sent pfx exn parsePl failed <- gu_exn_is_raised exn if failed then do + -- TODO better error handling, cleanup is_parse_error <- gu_exn_caught exn gu_exn_type_PgfParseError if is_parse_error then return (ParseFailed 0 "") else throwIO (PGFError "Some other error") - -- TODO cleanup!!! else do - parseFPl <- newForeignPtr gu_pool_finalizer parsePl - exprFPl <- newForeignPtr gu_pool_finalizer exprPl - exprs <- fromPgfExprEnum enum parseFPl (touchConcr lang >> touchForeignPtr exprFPl) - return (ParseOk exprs) + fpl <- newForeignPtr gu_pool_finalizer parsePl + ParseOk <$> fromCompletions enum fpl + where + fromCompletions :: Ptr GuEnum -> ForeignPtr GuPool -> IO [String] + fromCompletions enum fpl = + withGuPool $ \tmpPl -> do + cmpEntry <- alloca $ \ptr -> + withForeignPtr fpl $ \pl -> + do gu_enum_next enum ptr pl + peek ptr + if cmpEntry == nullPtr + then do + finalizeForeignPtr fpl + touchConcr lang + return [] + else do + (sb,out) <- newOut tmpPl + cstr <- gu_string_buf_freeze sb tmpPl + tok <- peekUtf8CString cstr + toks <- unsafeInterleaveIO (fromCompletions enum fpl) + return (tok : toks) -- | Returns True if there is a linearization defined for that function in that language hasLinearization :: Concr -> Fun -> Bool diff --git a/src/runtime/haskell-bind/test.hs b/src/runtime/haskell-bind/test.hs index 16e7ff7cb..26836445f 100644 --- a/src/runtime/haskell-bind/test.hs +++ b/src/runtime/haskell-bind/test.hs @@ -5,8 +5,8 @@ main :: IO () main = do pgf <- readPGF "/Users/john/repositories/GF/contrib/foods/Foods.pgf" let Just concr = M.lookup "FoodsEng" (languages pgf) - let pr = complete concr (startCat pgf) "this" "wi" Nothing + let pr = complete concr (startCat pgf) "" "th" Nothing case pr of - ParseOk x -> print (head x) + ParseOk x -> print x ParseFailed _ _ -> putStrLn "parse failed" ParseIncomplete -> putStrLn "input incomplete" From 437bd8e7f956ec645aa5261cbb9085741c8398cd Mon Sep 17 00:00:00 2001 From: "John J. Camilleri" Date: Mon, 3 May 2021 20:36:31 +0200 Subject: [PATCH 3/5] Add proper error handling in complete --- src/runtime/haskell-bind/PGF2.hsc | 22 +++++++++++++++++++--- src/runtime/haskell-bind/test.hs | 18 ++++++++++++------ 2 files changed, 31 insertions(+), 9 deletions(-) diff --git a/src/runtime/haskell-bind/PGF2.hsc b/src/runtime/haskell-bind/PGF2.hsc index 21e8693eb..b3b349ee1 100644 --- a/src/runtime/haskell-bind/PGF2.hsc +++ b/src/runtime/haskell-bind/PGF2.hsc @@ -988,11 +988,27 @@ complete lang (Type ctype _) sent pfx mn = failed <- gu_exn_is_raised exn if failed then do - -- TODO better error handling, cleanup is_parse_error <- gu_exn_caught exn gu_exn_type_PgfParseError if is_parse_error - then return (ParseFailed 0 "") - else throwIO (PGFError "Some other error") + then do + c_err <- (#peek GuExn, data.data) exn + c_offset <- (#peek PgfParseError, offset) c_err + token_ptr <- (#peek PgfParseError, token_ptr) c_err + token_len <- (#peek PgfParseError, token_len) c_err + tok <- peekUtf8CStringLen token_ptr token_len + gu_pool_free parsePl + return (ParseFailed (fromIntegral (c_offset :: CInt)) tok) + else do + is_exn <- gu_exn_caught exn gu_exn_type_PgfExn + if is_exn + then do + c_msg <- (#peek GuExn, data.data) exn + msg <- peekUtf8CString c_msg + gu_pool_free parsePl + throwIO (PGFError msg) + else do + gu_pool_free parsePl + throwIO (PGFError "Parsing failed") else do fpl <- newForeignPtr gu_pool_finalizer parsePl ParseOk <$> fromCompletions enum fpl diff --git a/src/runtime/haskell-bind/test.hs b/src/runtime/haskell-bind/test.hs index 26836445f..4d345c90c 100644 --- a/src/runtime/haskell-bind/test.hs +++ b/src/runtime/haskell-bind/test.hs @@ -4,9 +4,15 @@ import qualified Data.Map as M main :: IO () main = do pgf <- readPGF "/Users/john/repositories/GF/contrib/foods/Foods.pgf" - let Just concr = M.lookup "FoodsEng" (languages pgf) - let pr = complete concr (startCat pgf) "" "th" Nothing - case pr of - ParseOk x -> print x - ParseFailed _ _ -> putStrLn "parse failed" - ParseIncomplete -> putStrLn "input incomplete" + let + Just concr = M.lookup "FoodsEng" (languages pgf) + loop = do + putStr "> " + tks <- words <$> getLine + let pr = complete concr (startCat pgf) (unwords (init tks)) (last tks) Nothing + case pr of + ParseOk x -> print x + ParseFailed x s -> putStrLn $ "parse failed at " ++ show x ++ " " ++ s + ParseIncomplete -> putStrLn "input incomplete" + loop + loop From 588cd6ddb16350ed947a975a28315806164fe651 Mon Sep 17 00:00:00 2001 From: "John J. Camilleri" Date: Mon, 3 May 2021 20:51:24 +0200 Subject: [PATCH 4/5] Improvement to test script, distinguishes when input ends with whitespace --- src/runtime/haskell-bind/test.hs | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/src/runtime/haskell-bind/test.hs b/src/runtime/haskell-bind/test.hs index 4d345c90c..1c0c9fa87 100644 --- a/src/runtime/haskell-bind/test.hs +++ b/src/runtime/haskell-bind/test.hs @@ -1,4 +1,6 @@ import PGF2 +import qualified Data.Char as C +import qualified Data.List as L import qualified Data.Map as M main :: IO () @@ -8,8 +10,13 @@ main = do Just concr = M.lookup "FoodsEng" (languages pgf) loop = do putStr "> " - tks <- words <$> getLine - let pr = complete concr (startCat pgf) (unwords (init tks)) (last tks) Nothing + input <- getLine + let + (sent,pfx) = + if C.isSpace (last input) + then (input, "") + else let toks = words input in (unwords (init toks), last toks) + let pr = complete concr (startCat pgf) sent pfx Nothing case pr of ParseOk x -> print x ParseFailed x s -> putStrLn $ "parse failed at " ++ show x ++ " " ++ s From 84fd431afd54644bbe4bfc2c09c444f5a99e35cb Mon Sep 17 00:00:00 2001 From: "John J. Camilleri" Date: Mon, 3 May 2021 22:28:48 +0200 Subject: [PATCH 5/5] Manage to get completion working in PGF2 --- src/runtime/haskell-bind/PGF2.hsc | 23 ++++++++++++----------- src/runtime/haskell-bind/PGF2/FFI.hsc | 1 + src/runtime/haskell-bind/test.hs | 25 ------------------------- 3 files changed, 13 insertions(+), 36 deletions(-) delete mode 100644 src/runtime/haskell-bind/test.hs diff --git a/src/runtime/haskell-bind/PGF2.hsc b/src/runtime/haskell-bind/PGF2.hsc index b3b349ee1..38fae67ef 100644 --- a/src/runtime/haskell-bind/PGF2.hsc +++ b/src/runtime/haskell-bind/PGF2.hsc @@ -972,13 +972,13 @@ parseWithOracle lang cat sent (predict,complete,literal) = return ep Nothing -> do return nullPtr +-- | Returns possible completions of the current partial input. complete :: Concr -- ^ the language with which we parse - -> Type -- ^ the start category - -> String -- ^ the input sentence - -> String -- ^ prefix (?) - -> Maybe Int -- ^ maximum number of results - -> ParseOutput [String] -complete lang (Type ctype _) sent pfx mn = + -> Type -- ^ the start category + -> String -- ^ the input sentence (excluding token being completed) + -> String -- ^ prefix (partial token being completed) + -> ParseOutput [(String, CId, CId, Float)] -- ^ (token, category, function, probability) +complete lang (Type ctype _) sent pfx = unsafePerformIO $ do parsePl <- gu_new_pool exn <- gu_new_exn parsePl @@ -1013,7 +1013,7 @@ complete lang (Type ctype _) sent pfx mn = fpl <- newForeignPtr gu_pool_finalizer parsePl ParseOk <$> fromCompletions enum fpl where - fromCompletions :: Ptr GuEnum -> ForeignPtr GuPool -> IO [String] + fromCompletions :: Ptr GuEnum -> ForeignPtr GuPool -> IO [(String, CId, CId, Float)] fromCompletions enum fpl = withGuPool $ \tmpPl -> do cmpEntry <- alloca $ \ptr -> @@ -1026,11 +1026,12 @@ complete lang (Type ctype _) sent pfx mn = touchConcr lang return [] else do - (sb,out) <- newOut tmpPl - cstr <- gu_string_buf_freeze sb tmpPl - tok <- peekUtf8CString cstr + tok <- peekUtf8CString =<< (#peek PgfTokenProb, tok) cmpEntry + cat <- peekUtf8CString =<< (#peek PgfTokenProb, cat) cmpEntry + fun <- peekUtf8CString =<< (#peek PgfTokenProb, fun) cmpEntry + prob <- (#peek PgfTokenProb, prob) cmpEntry toks <- unsafeInterleaveIO (fromCompletions enum fpl) - return (tok : toks) + return ((tok, cat, fun, prob) : toks) -- | Returns True if there is a linearization defined for that function in that language hasLinearization :: Concr -> Fun -> Bool diff --git a/src/runtime/haskell-bind/PGF2/FFI.hsc b/src/runtime/haskell-bind/PGF2/FFI.hsc index 04952f2d8..16f9ad46d 100644 --- a/src/runtime/haskell-bind/PGF2/FFI.hsc +++ b/src/runtime/haskell-bind/PGF2/FFI.hsc @@ -256,6 +256,7 @@ data PgfApplication data PgfConcr type PgfExpr = Ptr () data PgfExprProb +data PgfTokenProb data PgfExprParser data PgfFullFormEntry data PgfMorphoCallback diff --git a/src/runtime/haskell-bind/test.hs b/src/runtime/haskell-bind/test.hs deleted file mode 100644 index 1c0c9fa87..000000000 --- a/src/runtime/haskell-bind/test.hs +++ /dev/null @@ -1,25 +0,0 @@ -import PGF2 -import qualified Data.Char as C -import qualified Data.List as L -import qualified Data.Map as M - -main :: IO () -main = do - pgf <- readPGF "/Users/john/repositories/GF/contrib/foods/Foods.pgf" - let - Just concr = M.lookup "FoodsEng" (languages pgf) - loop = do - putStr "> " - input <- getLine - let - (sent,pfx) = - if C.isSpace (last input) - then (input, "") - else let toks = words input in (unwords (init toks), last toks) - let pr = complete concr (startCat pgf) sent pfx Nothing - case pr of - ParseOk x -> print x - ParseFailed x s -> putStrLn $ "parse failed at " ++ show x ++ " " ++ s - ParseIncomplete -> putStrLn "input incomplete" - loop - loop