mirror of
https://github.com/GrammaticalFramework/gf-core.git
synced 2026-04-09 04:59:31 -06:00
first rudimentary version of a parser
This commit is contained in:
@@ -1,3 +0,0 @@
|
||||
packages: src/runtime/haskell
|
||||
src/server
|
||||
src/compiler
|
||||
@@ -168,11 +168,9 @@ ppPmcfgRule id arg_cats res_cat (Production vars args res seqids) =
|
||||
(if null vars
|
||||
then empty
|
||||
else "∀{" <> hsep (punctuate ',' [ppLVar v <> '<' <> m | (v,m) <- vars]) <> '}' <+> '.') <+>
|
||||
(if null args
|
||||
then empty
|
||||
else hsep (intersperse (pp '*') (zipWith ppPArg arg_cats args)) <+> "->") <+>
|
||||
ppPmcfgCat res_cat res $$
|
||||
'=' <+> brackets (hcat (intersperse (pp ',') (map ppSeqId seqids))))
|
||||
ppPmcfgCat res_cat res <+> "->" <+>
|
||||
brackets (hcat (intersperse (pp ',') (zipWith ppPArg arg_cats args))) <+> '=' <+>
|
||||
brackets (hcat (intersperse (pp ',') (map ppSeqId seqids))))
|
||||
|
||||
ppPArg cat (PArg _ p) = ppPmcfgCat cat p
|
||||
|
||||
|
||||
@@ -1,211 +1,4 @@
|
||||
import Distribution.System(Platform(..),OS(..))
|
||||
import Distribution.Simple(defaultMainWithHooks,UserHooks(..),simpleUserHooks)
|
||||
import Distribution.Simple.LocalBuildInfo(LocalBuildInfo(..),absoluteInstallDirs,datadir,buildDir)
|
||||
import Distribution.Simple.Setup(BuildFlags(..),Flag(..),InstallFlags(..),CopyDest(..),CopyFlags(..),SDistFlags(..),copyDest)
|
||||
import Distribution.PackageDescription(PackageDescription(..),emptyHookedBuildInfo)
|
||||
import Distribution.Simple.BuildPaths(exeExtension)
|
||||
import System.FilePath((</>),(<.>),dropExtension)
|
||||
import System.Directory(createDirectoryIfMissing,copyFile,doesDirectoryExist,doesFileExist)
|
||||
import System.Process(rawSystem)
|
||||
import System.Exit(ExitCode(..))
|
||||
|
||||
-- | Notice about RGL not built anymore
|
||||
noRGLmsg :: IO ()
|
||||
noRGLmsg = putStrLn "Notice: the RGL is not built as part of GF anymore. See https://github.com/GrammaticalFramework/gf-rgl"
|
||||
import Distribution.Simple(defaultMain)
|
||||
|
||||
main :: IO ()
|
||||
main = defaultMainWithHooks simpleUserHooks
|
||||
{ preBuild = gfPreBuild
|
||||
, postBuild = gfPostBuild
|
||||
, preInst = gfPreInst
|
||||
, postInst = gfPostInst
|
||||
, postCopy = gfPostCopy
|
||||
}
|
||||
where
|
||||
gfPreBuild args = gfPre args . buildDistPref
|
||||
gfPreInst args = gfPre args . installDistPref
|
||||
|
||||
gfPre args distFlag = do
|
||||
return emptyHookedBuildInfo
|
||||
|
||||
gfPostBuild args flags pkg lbi = do
|
||||
-- noRGLmsg
|
||||
let gf = default_gf lbi
|
||||
buildWeb gf flags (pkg,lbi)
|
||||
|
||||
gfPostInst args flags pkg lbi = do
|
||||
-- noRGLmsg
|
||||
saveInstallPath args flags (pkg,lbi)
|
||||
installWeb (pkg,lbi)
|
||||
|
||||
gfPostCopy args flags pkg lbi = do
|
||||
-- noRGLmsg
|
||||
saveCopyPath args flags (pkg,lbi)
|
||||
copyWeb flags (pkg,lbi)
|
||||
|
||||
-- `cabal sdist` will not make a proper dist archive, for that see `make sdist`
|
||||
-- However this function should exit quietly to allow building gf in sandbox
|
||||
gfSDist pkg lbi hooks flags = do
|
||||
return ()
|
||||
|
||||
saveInstallPath :: [String] -> InstallFlags -> (PackageDescription, LocalBuildInfo) -> IO ()
|
||||
saveInstallPath args flags bi = do
|
||||
let
|
||||
dest = NoCopyDest
|
||||
dir = datadir (uncurry absoluteInstallDirs bi dest)
|
||||
writeFile dataDirFile dir
|
||||
|
||||
saveCopyPath :: [String] -> CopyFlags -> (PackageDescription, LocalBuildInfo) -> IO ()
|
||||
saveCopyPath args flags bi = do
|
||||
let
|
||||
dest = case copyDest flags of
|
||||
NoFlag -> NoCopyDest
|
||||
Flag d -> d
|
||||
dir = datadir (uncurry absoluteInstallDirs bi dest)
|
||||
writeFile dataDirFile dir
|
||||
|
||||
-- | Name of file where installation's data directory is recording
|
||||
-- This is a last-resort way in which the seprate RGL build script
|
||||
-- can determine where to put the compiled RGL files
|
||||
dataDirFile :: String
|
||||
dataDirFile = "DATA_DIR"
|
||||
|
||||
-- | Get path to locally-built gf
|
||||
default_gf :: LocalBuildInfo -> FilePath
|
||||
default_gf lbi = buildDir lbi </> exeName' </> exeNameReal
|
||||
where
|
||||
-- shadows Distribution.Simple.BuildPaths.exeExtension, which changed type signature in Cabal 2.4
|
||||
exeExtension = case hostPlatform lbi of
|
||||
Platform arch Windows -> "exe"
|
||||
_ -> ""
|
||||
exeName' = "gf"
|
||||
exeNameReal = exeName' <.> exeExtension
|
||||
|
||||
{-
|
||||
To test the GF web services, the minibar and the grammar editor, use
|
||||
"cabal install" (or "runhaskell Setup.hs install") to install gf as usual.
|
||||
Then start the server with the command "gf -server" and open
|
||||
http://localhost:41296/ in your web browser (Firefox, Safari, Opera or
|
||||
Chrome). The example grammars listed below will be available in the minibar.
|
||||
-}
|
||||
|
||||
{-
|
||||
Update 2018-07-04
|
||||
|
||||
The example grammars have now been removed from the GF repository.
|
||||
This script will look for them in ../gf-contrib and build them from there if possible.
|
||||
If not, the user will be given a message and nothing is build or copied.
|
||||
(Unfortunately cabal install seems to hide all messages from stdout,
|
||||
so users won't see this message unless they check the log.)
|
||||
-}
|
||||
|
||||
-- | Notice about contrib grammars
|
||||
noContribMsg :: IO ()
|
||||
noContribMsg = putStr $ unlines
|
||||
[ "Example grammars are no longer included in the main GF repository, but have moved to gf-contrib."
|
||||
, "If you want them to be built, clone the following repository in the same directory as gf-core:"
|
||||
, "https://github.com/GrammaticalFramework/gf-contrib.git"
|
||||
]
|
||||
|
||||
example_grammars :: [(String, String, [String])] -- [(pgf, subdir, source modules)]
|
||||
example_grammars =
|
||||
[("Letter.pgf","letter",letterSrc)
|
||||
,("Foods.pgf","foods",foodsSrc)
|
||||
,("Phrasebook.pgf","phrasebook",phrasebookSrc)
|
||||
]
|
||||
where
|
||||
foodsSrc = ["Foods"++lang++".gf"|lang<-foodsLangs]
|
||||
foodsLangs = words "Afr Amh Bul Cat Cze Dut Eng Epo Fin Fre Ger Gle Heb Hin Ice Ita Jpn Lav Mlt Mon Nep Pes Por Ron Spa Swe Tha Tsn Tur Urd"
|
||||
|
||||
phrasebookSrc = ["Phrasebook"++lang++".gf"|lang<-phrasebookLangs]
|
||||
phrasebookLangs = words "Bul Cat Chi Dan Dut Eng Lav Hin Nor Spa Swe Tha" -- only fastish languages
|
||||
|
||||
letterSrc = ["Letter"++lang++".gf"|lang<-letterLangs]
|
||||
letterLangs = words "Eng Fin Fre Heb Rus Swe"
|
||||
|
||||
contrib_dir :: FilePath
|
||||
contrib_dir = ".."</>"gf-contrib"
|
||||
|
||||
buildWeb :: String -> BuildFlags -> (PackageDescription, LocalBuildInfo) -> IO ()
|
||||
buildWeb gf flags (pkg,lbi) = do
|
||||
contrib_exists <- doesDirectoryExist contrib_dir
|
||||
if contrib_exists
|
||||
then mapM_ build_pgf example_grammars
|
||||
-- else noContribMsg
|
||||
else return ()
|
||||
where
|
||||
gfo_dir = buildDir lbi </> "examples"
|
||||
|
||||
build_pgf :: (String, String, [String]) -> IO Bool
|
||||
build_pgf (pgf,subdir,src) =
|
||||
do createDirectoryIfMissing True tmp_dir
|
||||
putStrLn $ "Building "++pgf
|
||||
execute gf args
|
||||
where
|
||||
tmp_dir = gfo_dir</>subdir
|
||||
dir = contrib_dir</>subdir
|
||||
dest = NoCopyDest
|
||||
gf_lib_path = datadir (absoluteInstallDirs pkg lbi dest) </> "lib"
|
||||
args = numJobs flags++["-make","-s"] -- ,"-optimize-pgf"
|
||||
++["--gfo-dir="++tmp_dir,
|
||||
--"--gf-lib-path="++gf_lib_path,
|
||||
"--name="++dropExtension pgf,
|
||||
"--output-dir="++gfo_dir]
|
||||
++[dir</>file|file<-src]
|
||||
|
||||
installWeb :: (PackageDescription, LocalBuildInfo) -> IO ()
|
||||
installWeb = setupWeb NoCopyDest
|
||||
|
||||
copyWeb :: CopyFlags -> (PackageDescription, LocalBuildInfo) -> IO ()
|
||||
copyWeb flags = setupWeb dest
|
||||
where
|
||||
dest = case copyDest flags of
|
||||
NoFlag -> NoCopyDest
|
||||
Flag d -> d
|
||||
|
||||
setupWeb :: CopyDest -> (PackageDescription, LocalBuildInfo) -> IO ()
|
||||
setupWeb dest (pkg,lbi) = do
|
||||
mapM_ (createDirectoryIfMissing True) [grammars_dir,cloud_dir]
|
||||
contrib_exists <- doesDirectoryExist contrib_dir
|
||||
if contrib_exists
|
||||
then mapM_ copy_pgf example_grammars
|
||||
else return () -- message already displayed from buildWeb
|
||||
where
|
||||
grammars_dir = www_dir </> "grammars"
|
||||
cloud_dir = www_dir </> "tmp" -- hmm
|
||||
www_dir = datadir (absoluteInstallDirs pkg lbi dest) </> "www"
|
||||
gfo_dir = buildDir lbi </> "examples"
|
||||
|
||||
copy_pgf :: (String, String, [String]) -> IO ()
|
||||
copy_pgf (pgf,subdir,_) =
|
||||
do let src = gfo_dir </> pgf
|
||||
let dst = grammars_dir </> pgf
|
||||
ex <- doesFileExist src
|
||||
if ex then do putStrLn $ "Installing "++dst
|
||||
copyFile src dst
|
||||
else putStrLn $ "Not installing "++dst
|
||||
|
||||
-- | Run an arbitrary system command, returning False on failure
|
||||
execute :: String -> [String] -> IO Bool
|
||||
execute command args =
|
||||
do let cmdline = command ++ " " ++ unwords (map showArg args)
|
||||
e <- rawSystem command args
|
||||
case e of
|
||||
ExitSuccess -> return True
|
||||
ExitFailure i -> do putStrLn $ "Ran: " ++ cmdline
|
||||
putStrLn $ command++" exited with exit code: " ++ show i
|
||||
return False
|
||||
where
|
||||
showArg arg = if ' ' `elem` arg then "'" ++ arg ++ "'" else arg
|
||||
|
||||
-- | This function is used to enable parallel compilation of the RGL and example grammars
|
||||
numJobs :: BuildFlags -> [String]
|
||||
numJobs flags =
|
||||
if null n
|
||||
then ["-j","+RTS","-A20M","-N","-RTS"]
|
||||
else ["-j="++n,"+RTS","-A20M","-N"++n,"-RTS"]
|
||||
where
|
||||
-- buildNumJobs is only available in Cabal>=1.20
|
||||
n = case buildNumJobs flags of
|
||||
Flag mn | mn/=Just 1-> maybe "" show mn
|
||||
_ -> ""
|
||||
main = defaultMain
|
||||
|
||||
@@ -2,7 +2,7 @@ name: gf
|
||||
version: 3.11.0-git
|
||||
|
||||
cabal-version: 1.22
|
||||
build-type: Custom
|
||||
build-type: Simple
|
||||
license: OtherLicense
|
||||
license-file: LICENSE
|
||||
category: Natural Language Processing, Compiler
|
||||
@@ -39,14 +39,6 @@ data-files:
|
||||
www/translator/*.css
|
||||
www/translator/*.js
|
||||
|
||||
custom-setup
|
||||
setup-depends:
|
||||
base >= 4.9.1,
|
||||
Cabal >= 1.22.0.0,
|
||||
directory >= 1.3.0 && < 1.4,
|
||||
filepath >= 1.4.1 && < 1.5,
|
||||
process >= 1.0.1.1 && < 1.7
|
||||
|
||||
source-repository head
|
||||
type: git
|
||||
location: https://github.com/GrammaticalFramework/gf-core.git
|
||||
@@ -192,7 +184,7 @@ executable gf
|
||||
GF.Text.Lexing
|
||||
GF.Text.Transliterations
|
||||
Paths_gf
|
||||
|
||||
|
||||
-- not really part of GF but I have changed the original binary library
|
||||
-- and we have to keep the copy for now.
|
||||
Data.Binary
|
||||
|
||||
@@ -26,6 +26,8 @@ libpgf_la_SOURCES = \
|
||||
pgf/typechecker.h \
|
||||
pgf/linearizer.cxx \
|
||||
pgf/linearizer.h \
|
||||
pgf/parser.cxx \
|
||||
pgf/parser.h \
|
||||
pgf/graphviz.cxx \
|
||||
pgf/graphviz.h \
|
||||
pgf/data.cxx \
|
||||
|
||||
@@ -47,9 +47,9 @@ void PgfConcr::release(ref<PgfConcr> concr)
|
||||
void PgfConcrLincat::release(ref<PgfConcrLincat> lincat)
|
||||
{
|
||||
for (size_t i = 0; i < lincat->fields->len; i++) {
|
||||
text_db_release(*vector_elem(lincat->fields, i));
|
||||
PgfLincatField::release(vector_elem(lincat->fields, i));
|
||||
}
|
||||
Vector<ref<PgfText>>::release(lincat->fields);
|
||||
Vector<PgfLincatField>::release(lincat->fields);
|
||||
|
||||
for (size_t i = 0; i < lincat->args->len; i++) {
|
||||
PgfLParam::release(vector_elem(lincat->args, i)->param);
|
||||
@@ -66,6 +66,13 @@ void PgfConcrLincat::release(ref<PgfConcrLincat> lincat)
|
||||
PgfDB::free(lincat, lincat->name.size+1);
|
||||
}
|
||||
|
||||
void PgfLincatField::release(ref<PgfLincatField> field)
|
||||
{
|
||||
text_db_release(field->name);
|
||||
if (field->backrefs != 0)
|
||||
Vector<PgfLincatBackref>::release(field->backrefs);
|
||||
}
|
||||
|
||||
void PgfLParam::release(ref<PgfLParam> param)
|
||||
{
|
||||
PgfDB::free(param, param->n_terms*sizeof(param->terms[0]));
|
||||
|
||||
@@ -209,17 +209,25 @@ struct PGF_INTERNAL_DECL PgfSymbolALLCAPIT {
|
||||
static const uint8_t tag = 10;
|
||||
};
|
||||
|
||||
struct PGF_INTERNAL_DECL PgfLincatBackref;
|
||||
|
||||
struct PGF_INTERNAL_DECL PgfLincatField {
|
||||
ref<PgfText> name;
|
||||
ref<Vector<PgfLincatBackref>> backrefs;
|
||||
|
||||
static void release(ref<PgfLincatField> field);
|
||||
};
|
||||
|
||||
struct PGF_INTERNAL_DECL PgfConcrLincat {
|
||||
static const uint8_t tag = 0;
|
||||
|
||||
ref<PgfAbsCat> abscat;
|
||||
|
||||
ref<Vector<ref<PgfText>>> fields;
|
||||
|
||||
size_t n_lindefs;
|
||||
ref<Vector<PgfPArg>> args;
|
||||
ref<Vector<ref<PgfPResult>>> res;
|
||||
ref<Vector<ref<PgfSequence>>> seqs;
|
||||
ref<Vector<PgfLincatField>> fields;
|
||||
|
||||
PgfText name;
|
||||
|
||||
@@ -230,6 +238,7 @@ struct PGF_INTERNAL_DECL PgfConcrLin {
|
||||
static const uint8_t tag = 1;
|
||||
|
||||
ref<PgfAbsFun> absfun;
|
||||
ref<PgfConcrLincat> lincat;
|
||||
|
||||
ref<Vector<PgfPArg>> args;
|
||||
ref<Vector<ref<PgfPResult>>> res;
|
||||
@@ -240,6 +249,12 @@ struct PGF_INTERNAL_DECL PgfConcrLin {
|
||||
static void release(ref<PgfConcrLin> lin);
|
||||
};
|
||||
|
||||
struct PGF_INTERNAL_DECL PgfLincatBackref {
|
||||
ref<PgfConcrLin> lin;
|
||||
size_t seq_index;
|
||||
size_t dot;
|
||||
};
|
||||
|
||||
struct PGF_INTERNAL_DECL PgfConcrPrintname {
|
||||
ref<PgfText> printname;
|
||||
PgfText name;
|
||||
|
||||
@@ -1,79 +0,0 @@
|
||||
#ifndef HEAP_H
|
||||
#define HEAP_H
|
||||
|
||||
template <class A>
|
||||
class PGF_INTERNAL_DECL Heap {
|
||||
public:
|
||||
Heap() {
|
||||
len = 0;
|
||||
avail = 0;
|
||||
values = NULL;
|
||||
}
|
||||
|
||||
~Heap() { free(values); }
|
||||
|
||||
void push(A value) {
|
||||
if (len >= avail) {
|
||||
avail = get_next_padovan(len+1);
|
||||
A *new_values = (A *) realloc(values, sizeof(A)*avail);
|
||||
if (new_values == NULL)
|
||||
throw pgf_systemerror(errno);
|
||||
values = new_values;
|
||||
}
|
||||
siftdown(value, 0, len);
|
||||
len++;
|
||||
}
|
||||
|
||||
bool is_empty() { return (len == 0); }
|
||||
|
||||
A top() { return values[0]; }
|
||||
|
||||
A pop() {
|
||||
A top = values[0];
|
||||
siftup(&values[len-1],0);
|
||||
len--;
|
||||
return top;
|
||||
}
|
||||
|
||||
private:
|
||||
size_t len;
|
||||
size_t avail;
|
||||
A *values;
|
||||
|
||||
void siftdown(A value, size_t startpos, size_t pos) {
|
||||
while (pos > startpos) {
|
||||
size_t parentpos = (pos - 1) >> 1;
|
||||
A parent = values[parentpos];
|
||||
|
||||
if (value >= parent)
|
||||
break;
|
||||
|
||||
values[pos] = parent;
|
||||
pos = parentpos;
|
||||
}
|
||||
|
||||
values[pos] = value;
|
||||
}
|
||||
|
||||
void siftup(A *pvalue, size_t pos) {
|
||||
size_t startpos = pos;
|
||||
size_t endpos = len;
|
||||
|
||||
size_t childpos = 2*pos + 1;
|
||||
while (childpos < endpos) {
|
||||
size_t rightpos = childpos + 1;
|
||||
if (rightpos < endpos &&
|
||||
values[childpos] >= values[rightpos]) {
|
||||
childpos = rightpos;
|
||||
}
|
||||
|
||||
values[pos] = values[childpos];
|
||||
pos = childpos;
|
||||
childpos = 2*pos + 1;
|
||||
}
|
||||
|
||||
siftdown(*pvalue, startpos, pos);
|
||||
}
|
||||
};
|
||||
|
||||
#endif
|
||||
@@ -287,11 +287,7 @@ void PgfLinearizer::TreeLinNode::check_category(PgfLinearizer *linearizer, PgfTe
|
||||
void PgfLinearizer::TreeLinNode::linearize(PgfLinearizationOutputIface *out, PgfLinearizer *linearizer, size_t lindex)
|
||||
{
|
||||
PgfText *cat = &lin->absfun->type->name;
|
||||
PgfText *field = NULL;
|
||||
ref<PgfConcrLincat> lincat = namespace_lookup(linearizer->concr->lincats, cat);
|
||||
if (lincat != 0) {
|
||||
field = &(**vector_elem(lincat->fields, lindex));
|
||||
}
|
||||
PgfText *field = &*(vector_elem(lin->lincat->fields, lindex)->name);
|
||||
|
||||
if (linearizer->pre_stack == NULL)
|
||||
out->begin_phrase(cat, fid, field, &lin->name);
|
||||
@@ -393,7 +389,7 @@ void PgfLinearizer::TreeLindefNode::linearize_arg(PgfLinearizationOutputIface *o
|
||||
void PgfLinearizer::TreeLindefNode::linearize(PgfLinearizationOutputIface *out, PgfLinearizer *linearizer, size_t lindex)
|
||||
{
|
||||
if (lincat != 0) {
|
||||
PgfText *field = &(**vector_elem(lincat->fields, lindex));
|
||||
PgfText *field = &*(vector_elem(lincat->fields, lindex)->name);
|
||||
if (linearizer->pre_stack == NULL)
|
||||
out->begin_phrase(&lincat->name, fid, field, linearizer->wild);
|
||||
else {
|
||||
@@ -546,7 +542,7 @@ void PgfLinearizer::TreeLitNode::linearize(PgfLinearizationOutputIface *out, Pgf
|
||||
{
|
||||
PgfText *field = NULL;
|
||||
if (lincat != 0) {
|
||||
field = &(**vector_elem(lincat->fields, lindex));
|
||||
field = &*(vector_elem(lincat->fields, lindex)->name);
|
||||
}
|
||||
|
||||
linearizer->flush_pre_stack(out, literal);
|
||||
|
||||
@@ -98,6 +98,14 @@ class PGF_INTERNAL_DECL PgfLinearizer : public PgfUnmarshaller {
|
||||
~TreeLitNode() { free(literal); };
|
||||
};
|
||||
|
||||
struct TreeChunksNode : public TreeNode {
|
||||
TreeChunksNode(PgfLinearizer *linearizer);
|
||||
virtual bool resolve(PgfLinearizer *linearizer);
|
||||
virtual void check_category(PgfLinearizer *linearizer, PgfText *cat);
|
||||
virtual void linearize(PgfLinearizationOutputIface *out, PgfLinearizer *linearizer, size_t lindex);
|
||||
virtual ref<PgfConcrLincat> get_lincat(PgfLinearizer *linearizer);
|
||||
};
|
||||
|
||||
TreeNode *prev;
|
||||
TreeNode *next;
|
||||
TreeNode *args;
|
||||
|
||||
455
src/runtime/c/pgf/parser.cxx
Normal file
455
src/runtime/c/pgf/parser.cxx
Normal file
@@ -0,0 +1,455 @@
|
||||
#include "data.h"
|
||||
#include "printer.h"
|
||||
#include "parser.h"
|
||||
#include <type_traits>
|
||||
#include <map>
|
||||
#include <vector>
|
||||
#include <queue>
|
||||
|
||||
// #define PARSER_DEBUG
|
||||
|
||||
class PGF_INTERNAL_DECL PgfParser::CFGCat {
|
||||
public:
|
||||
ref<PgfLincatField> field;
|
||||
size_t value;
|
||||
|
||||
// copy assignment
|
||||
bool operator<(const CFGCat& other) const
|
||||
{
|
||||
if (field < other.field)
|
||||
return true;
|
||||
else if (field == other.field)
|
||||
return (value < other.value);
|
||||
else
|
||||
return false;
|
||||
}
|
||||
};
|
||||
|
||||
struct PGF_INTERNAL_DECL PgfParser::Choice {
|
||||
size_t id;
|
||||
std::vector<Production*> prods;
|
||||
|
||||
Choice(size_t id) {
|
||||
this->id = id;
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
class PGF_INTERNAL_DECL PgfParser::Production
|
||||
{
|
||||
public:
|
||||
static
|
||||
void predict(Choice *choice, ref<PgfConcrLin> lin, size_t seq_index)
|
||||
{
|
||||
size_t n_args = lin->absfun->type->hypos->len;
|
||||
|
||||
Production *prod = (Production*)
|
||||
malloc(sizeof(Production)+sizeof(Choice*)*n_args);
|
||||
prod->lin = lin;
|
||||
prod->seq_index = seq_index;
|
||||
memset(prod->args, 0, sizeof(Choice*)*n_args);
|
||||
|
||||
prod->log(choice);
|
||||
choice->prods.push_back(prod);
|
||||
}
|
||||
|
||||
void log(Choice *res) {
|
||||
#ifdef PARSER_DEBUG
|
||||
PgfPrinter printer(NULL,0,NULL);
|
||||
printer.nprintf(10, "?%ld = ", res->id);
|
||||
printer.puts(&lin->name);
|
||||
|
||||
auto hypos = lin->absfun->type->hypos;
|
||||
for (size_t i = 0; i < hypos->len; i++) {
|
||||
if (args[i] == NULL)
|
||||
printer.efun(&hypos->data[i].type->name);
|
||||
else
|
||||
printer.nprintf(10, " ?%ld", args[i]->id);
|
||||
}
|
||||
printer.puts("\n");
|
||||
printer.dump();
|
||||
#endif
|
||||
}
|
||||
|
||||
ref<PgfConcrLin> lin;
|
||||
size_t seq_index;
|
||||
Choice *args[];
|
||||
};
|
||||
|
||||
struct PGF_INTERNAL_DECL PgfParser::ItemConts {
|
||||
State *state;
|
||||
std::vector<Item> items;
|
||||
};
|
||||
|
||||
class PGF_INTERNAL_DECL PgfParser::Item
|
||||
{
|
||||
public:
|
||||
static
|
||||
void combine(State *state, PgfLincatBackref *backref, Choice *choice)
|
||||
{
|
||||
ref<PgfSequence> seq =
|
||||
*vector_elem(backref->lin->seqs, backref->seq_index);
|
||||
|
||||
size_t index = backref->seq_index % backref->lin->lincat->fields->len;
|
||||
ref<PgfLincatField> field = vector_elem(backref->lin->lincat->fields, index);
|
||||
|
||||
// state->get_conts(field, 0);
|
||||
if (backref->dot+1 < seq->syms.len) {
|
||||
size_t n_args = backref->lin->absfun->type->hypos->len;
|
||||
|
||||
Item *item = (Item*)
|
||||
malloc(sizeof(Item)+sizeof(Choice*)*n_args);
|
||||
item->lin = backref->lin;
|
||||
item->seq_index = backref->seq_index;
|
||||
item->dot = backref->dot+1;
|
||||
|
||||
memset(item->args, 0, sizeof(Choice*)*n_args);
|
||||
ref<PgfSequence> seq =
|
||||
*vector_elem(item->lin->seqs, backref->seq_index);
|
||||
PgfSymbol sym = seq->syms.data[backref->dot];
|
||||
ref<PgfSymbolCat> symcat = ref<PgfSymbolCat>::untagged(sym);
|
||||
item->args[symcat->d] = choice;
|
||||
|
||||
item->log();
|
||||
} else {
|
||||
Production::predict(choice, backref->lin, backref->seq_index);
|
||||
}
|
||||
}
|
||||
|
||||
Production *complete()
|
||||
{
|
||||
size_t n_args = lin->absfun->type->hypos->len;
|
||||
|
||||
Production *prod = (Production*)
|
||||
malloc(sizeof(Production)+sizeof(Choice*)*n_args);
|
||||
prod->lin = lin;
|
||||
prod->seq_index = seq_index;
|
||||
memcpy(prod->args, args, sizeof(Choice*)*n_args);
|
||||
|
||||
return prod;
|
||||
}
|
||||
|
||||
void log() {
|
||||
#ifdef PARSER_DEBUG
|
||||
PgfPrinter printer(NULL,0,NULL);
|
||||
|
||||
size_t index = seq_index / lin->lincat->fields->len;
|
||||
ref<PgfPResult> res = *vector_elem(lin->res, index);
|
||||
ref<PgfDTyp> ty = lin->absfun->type;
|
||||
|
||||
if (res->vars != 0) {
|
||||
printer.lvar_ranges(res->vars);
|
||||
printer.puts(" . ");
|
||||
}
|
||||
|
||||
printer.efun(&ty->name);
|
||||
printer.puts("(");
|
||||
printer.lparam(ref<PgfLParam>::from_ptr(&res->param));
|
||||
printer.puts(") -> ");
|
||||
|
||||
printer.efun(&lin->name);
|
||||
printer.puts("[");
|
||||
size_t n_args = lin->args->len / lin->res->len;
|
||||
for (size_t i = 0; i < n_args; i++) {
|
||||
if (i > 0)
|
||||
printer.puts(",");
|
||||
|
||||
if (args[i] == NULL)
|
||||
printer.parg(vector_elem(ty->hypos, i)->type,
|
||||
vector_elem(lin->args, index*n_args + i));
|
||||
else
|
||||
printer.nprintf(10, "?%ld", args[i]->id);
|
||||
}
|
||||
|
||||
printer.nprintf(10, "]; %ld : ", seq_index % lin->lincat->fields->len);
|
||||
ref<PgfSequence> seq = *vector_elem(lin->seqs, seq_index);
|
||||
for (size_t i = 0; i < seq->syms.len; i++) {
|
||||
if (i > 0)
|
||||
printer.puts(" ");
|
||||
if (i == dot)
|
||||
printer.puts(". ");
|
||||
printer.symbol(*vector_elem(&seq->syms, i));
|
||||
}
|
||||
printer.puts("\n");
|
||||
|
||||
printer.dump();
|
||||
#endif
|
||||
}
|
||||
|
||||
|
||||
private:
|
||||
ItemConts *conts;
|
||||
ref<PgfConcrLin> lin;
|
||||
size_t seq_index;
|
||||
size_t dot;
|
||||
Choice *args[];
|
||||
};
|
||||
|
||||
class PGF_INTERNAL_DECL PgfParser::State
|
||||
{
|
||||
public:
|
||||
ItemConts *get_conts(ref<PgfLincatField> field, size_t value)
|
||||
{
|
||||
ItemConts *conts;
|
||||
CFGCat cfg_cat = {field, value};
|
||||
auto itr1 = contss.find(cfg_cat);
|
||||
if (itr1 == contss.end()) {
|
||||
conts = new ItemConts();
|
||||
conts->state = this;
|
||||
contss.insert(std::pair<CFGCat,ItemConts*>(cfg_cat, conts));
|
||||
} else {
|
||||
conts = itr1->second;
|
||||
}
|
||||
return conts;
|
||||
}
|
||||
|
||||
public:
|
||||
size_t start, end;
|
||||
State *prev, *next;
|
||||
|
||||
std::map<CFGCat,ItemConts*> contss;
|
||||
std::map<ItemConts*,Choice*> choices;
|
||||
std::priority_queue<PgfParser::Result*,std::vector<PgfParser::Result*>,PgfParser::ResultComparator> queue;
|
||||
};
|
||||
|
||||
|
||||
class PgfParser::ResultExpr : public Result
|
||||
{
|
||||
public:
|
||||
ResultExpr(Production *prod)
|
||||
{
|
||||
this->inside_prob = prod->lin->absfun->prob;
|
||||
this->outside_prob = prod->lin->lincat->abscat->prob;
|
||||
this->prod = prod;
|
||||
this->arg_index = 0;
|
||||
}
|
||||
|
||||
virtual prob_t prob()
|
||||
{
|
||||
return inside_prob+outside_prob;
|
||||
}
|
||||
|
||||
virtual PgfExpr expr(PgfUnmarshaller *u)
|
||||
{
|
||||
return u->efun(&prod->lin->name);
|
||||
}
|
||||
|
||||
virtual void proceed(PgfParser *parser, PgfUnmarshaller *u)
|
||||
{
|
||||
}
|
||||
|
||||
private:
|
||||
prob_t inside_prob;
|
||||
prob_t outside_prob;
|
||||
|
||||
Production *prod;
|
||||
size_t arg_index;
|
||||
};
|
||||
|
||||
class PgfParser::ResultMeta : public Result
|
||||
{
|
||||
public:
|
||||
ResultMeta(State *state,
|
||||
PgfExpr arg, prob_t prob,
|
||||
ResultMeta *next)
|
||||
{
|
||||
this->inside_prob = prob + (next ? next->inside_prob : 0);
|
||||
this->state = state;
|
||||
this->arg = arg;
|
||||
this->next = next;
|
||||
}
|
||||
|
||||
virtual prob_t prob()
|
||||
{
|
||||
return inside_prob;
|
||||
}
|
||||
|
||||
virtual PgfExpr expr(PgfUnmarshaller *u)
|
||||
{
|
||||
ResultMeta *res = this;
|
||||
PgfExpr expr = u->emeta(0);
|
||||
while (res->arg != 0) {
|
||||
PgfExpr expr1 = u->eapp(expr, res->arg);
|
||||
u->free_ref(expr);
|
||||
expr = expr1;
|
||||
res = res->next;
|
||||
}
|
||||
return expr;
|
||||
}
|
||||
|
||||
virtual void proceed(PgfParser *parser, PgfUnmarshaller *u)
|
||||
{
|
||||
if (state->choices.size() == 0) {
|
||||
State *prev = state;
|
||||
while (prev->prev != NULL && prev->choices.size() == 0) {
|
||||
prev = prev->prev;
|
||||
}
|
||||
|
||||
size_t size = state->start-prev->end;
|
||||
PgfText *token = (PgfText *) alloca(sizeof(PgfText)+size+1);
|
||||
token->size = size;
|
||||
memcpy(token->text,parser->sentence->text+prev->end,size);
|
||||
token->text[size] = 0;
|
||||
|
||||
PgfExpr expr = u->elit(u->lstr(token));
|
||||
prev->queue.push(new ResultMeta(prev,
|
||||
expr, 0,
|
||||
this));
|
||||
} else {
|
||||
for (auto it : state->choices) {
|
||||
ItemConts *conts = it.first;
|
||||
Choice *choice = it.second;
|
||||
|
||||
for (Production *prod : choice->prods) {
|
||||
PgfExpr expr = u->efun(&prod->lin->name);
|
||||
prob_t prob = prod->lin->absfun->prob +
|
||||
prod->lin->lincat->abscat->prob;
|
||||
conts->state->queue.push(new ResultMeta(conts->state,
|
||||
expr, prob,
|
||||
this));
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private:
|
||||
prob_t inside_prob;
|
||||
State *state;
|
||||
PgfExpr arg;
|
||||
ResultMeta *next;
|
||||
};
|
||||
|
||||
PgfParser::PgfParser(ref<PgfConcrLincat> start, PgfText *sentence)
|
||||
{
|
||||
this->start = start;
|
||||
this->sentence = textdup(sentence);
|
||||
this->last_choice_id = 0;
|
||||
this->before = NULL;
|
||||
this->after = NULL;
|
||||
this->fetch_state = NULL;
|
||||
}
|
||||
|
||||
void PgfParser::space(size_t start, size_t end, PgfExn* err)
|
||||
{
|
||||
State *prev = NULL;
|
||||
State *next = before;
|
||||
while (next != NULL && next->start < start) {
|
||||
prev = next;
|
||||
next = next->next;
|
||||
}
|
||||
|
||||
if (next == NULL || next->start != start) {
|
||||
before = new State();
|
||||
before->start = start;
|
||||
before->end = end;
|
||||
before->prev = prev;
|
||||
before->next = next;
|
||||
|
||||
if (prev != NULL) prev->next = before;
|
||||
if (next != NULL) next->prev = before;
|
||||
} else {
|
||||
before = next;
|
||||
before->end = end;
|
||||
}
|
||||
|
||||
if (end == sentence->size) {
|
||||
fetch_state = after;
|
||||
fetch_state->queue.push(new ResultMeta(after,0,0,NULL));
|
||||
}
|
||||
}
|
||||
|
||||
void PgfParser::start_matches(size_t end, PgfExn* err)
|
||||
{
|
||||
State *prev = NULL;
|
||||
State *next = before;
|
||||
while (next != NULL && next->start < end) {
|
||||
prev = next;
|
||||
next = next->next;
|
||||
}
|
||||
|
||||
if (next == NULL || next->start != end) {
|
||||
after = new State();
|
||||
after->start = end;
|
||||
after->end = end;
|
||||
after->prev = prev;
|
||||
after->next = next;
|
||||
|
||||
if (prev != NULL) prev->next = after;
|
||||
if (next != NULL) next->prev = after;
|
||||
} else {
|
||||
after = next;
|
||||
}
|
||||
}
|
||||
|
||||
void PgfParser::match(ref<PgfConcrLin> lin, size_t seq_index, PgfExn* err)
|
||||
{
|
||||
size_t index = seq_index % lin->lincat->fields->len;
|
||||
ref<PgfLincatField> field = vector_elem(lin->lincat->fields, index);
|
||||
|
||||
ItemConts *conts = before->get_conts(field, 0);
|
||||
|
||||
Choice *choice;
|
||||
auto itr2 = after->choices.find(conts);
|
||||
if (itr2 == after->choices.end()) {
|
||||
choice = new Choice(++last_choice_id);
|
||||
after->choices.insert(std::pair<ItemConts*,Choice*>(conts, choice));
|
||||
} else {
|
||||
choice = itr2->second;
|
||||
}
|
||||
|
||||
Production::predict(choice,lin,seq_index);
|
||||
/*
|
||||
if (itr2 == after->choices.end()) {
|
||||
for (size_t i = 0; i < field->backrefs->len; i++) {
|
||||
PgfLincatBackref *backref = vector_elem(field->backrefs, i);
|
||||
Item::combine(before, backref, choice);
|
||||
}
|
||||
}*/
|
||||
}
|
||||
|
||||
void PgfParser::end_matches(size_t end, PgfExn* err)
|
||||
{
|
||||
if (end == sentence->size) {
|
||||
fetch_state = after;
|
||||
fetch_state->queue.push(new ResultMeta(after,0,0,NULL));
|
||||
}
|
||||
}
|
||||
|
||||
PgfExpr PgfParser::fetch(PgfDB *db, PgfUnmarshaller *u, prob_t *prob)
|
||||
{
|
||||
DB_scope scope(db, READER_SCOPE);
|
||||
|
||||
while (fetch_state != NULL && fetch_state->queue.empty()) {
|
||||
fetch_state = fetch_state->next;
|
||||
}
|
||||
|
||||
if (fetch_state == NULL) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
while (fetch_state->prev != NULL) {
|
||||
if (!fetch_state->queue.empty()) {
|
||||
Result *res = fetch_state->queue.top();
|
||||
fetch_state->queue.pop();
|
||||
res->proceed(this,u);
|
||||
}
|
||||
|
||||
fetch_state = fetch_state->prev;
|
||||
}
|
||||
|
||||
if (fetch_state->queue.empty()) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
Result *res = fetch_state->queue.top();
|
||||
fetch_state->queue.pop();
|
||||
*prob = res->prob();
|
||||
|
||||
return res->expr(u);
|
||||
}
|
||||
|
||||
PgfParser::~PgfParser()
|
||||
{
|
||||
free(sentence);
|
||||
printf("~PgfParser()\n");
|
||||
}
|
||||
51
src/runtime/c/pgf/parser.h
Normal file
51
src/runtime/c/pgf/parser.h
Normal file
@@ -0,0 +1,51 @@
|
||||
#ifndef PARSER_H
|
||||
#define PARSER_H
|
||||
|
||||
class PGF_INTERNAL_DECL PgfParser : public PgfPhraseScanner, public PgfExprEnum {
|
||||
public:
|
||||
PgfParser(ref<PgfConcrLincat> start, PgfText *sentence);
|
||||
|
||||
void space(size_t start, size_t end, PgfExn* err);
|
||||
void start_matches(size_t end, PgfExn* err);
|
||||
void match(ref<PgfConcrLin> lin, size_t seq_index, PgfExn* err);
|
||||
void end_matches(size_t end, PgfExn* err);
|
||||
|
||||
PgfExpr fetch(PgfDB *db, PgfUnmarshaller *u, prob_t *prob);
|
||||
|
||||
virtual ~PgfParser();
|
||||
|
||||
private:
|
||||
class CFGCat;
|
||||
class State;
|
||||
class Item;
|
||||
class ItemConts;
|
||||
class Choice;
|
||||
class Production;
|
||||
|
||||
class Result {
|
||||
public:
|
||||
virtual prob_t prob() = 0;
|
||||
virtual PgfExpr expr(PgfUnmarshaller *u) = 0;
|
||||
virtual void proceed(PgfParser *parser, PgfUnmarshaller *u) = 0;
|
||||
};
|
||||
|
||||
class ResultExpr;
|
||||
class ResultMeta;
|
||||
|
||||
class ResultComparator : std::less<Result*> {
|
||||
public:
|
||||
bool operator()(Result* &lhs, Result* &rhs) const
|
||||
{
|
||||
return lhs->prob() > rhs->prob();
|
||||
}
|
||||
};
|
||||
|
||||
ref<PgfConcrLincat> start;
|
||||
PgfText *sentence;
|
||||
|
||||
size_t last_choice_id;
|
||||
|
||||
State *before, *after, *fetch_state;
|
||||
};
|
||||
|
||||
#endif
|
||||
@@ -11,6 +11,7 @@
|
||||
#include "printer.h"
|
||||
#include "typechecker.h"
|
||||
#include "linearizer.h"
|
||||
#include "parser.h"
|
||||
#include "graphviz.h"
|
||||
|
||||
static void
|
||||
@@ -815,6 +816,35 @@ pgf_is_case_sensitive(ref<PgfConcr> concr)
|
||||
return true;
|
||||
}
|
||||
|
||||
class PGF_INTERNAL_DECL PgfMorphoScanner : public PgfPhraseScanner {
|
||||
public:
|
||||
PgfMorphoScanner(PgfMorphoCallback* callback) {
|
||||
this->callback = callback;
|
||||
}
|
||||
|
||||
virtual void space(size_t start, size_t end, PgfExn* err)
|
||||
{
|
||||
}
|
||||
|
||||
virtual void start_matches(size_t end, PgfExn* err)
|
||||
{
|
||||
}
|
||||
|
||||
virtual void match(ref<PgfConcrLin> lin, size_t seq_index, PgfExn* err)
|
||||
{
|
||||
ref<PgfLincatField> field =
|
||||
vector_elem(lin->lincat->fields, seq_index % lin->lincat->fields->len);
|
||||
callback->fn(callback, &lin->absfun->name, &(*field->name), lin->lincat->abscat->prob+lin->absfun->prob, err);
|
||||
}
|
||||
|
||||
virtual void end_matches(size_t end, PgfExn* err)
|
||||
{
|
||||
}
|
||||
|
||||
private:
|
||||
PgfMorphoCallback* callback;
|
||||
};
|
||||
|
||||
PGF_API
|
||||
void pgf_lookup_morpho(PgfDB *db, PgfConcrRevision cnc_revision,
|
||||
PgfText *sentence,
|
||||
@@ -826,13 +856,45 @@ void pgf_lookup_morpho(PgfDB *db, PgfConcrRevision cnc_revision,
|
||||
|
||||
bool case_sensitive = pgf_is_case_sensitive(concr);
|
||||
|
||||
PgfMorphoScanner scanner(callback);
|
||||
phrasetable_lookup(concr->phrasetable,
|
||||
sentence, case_sensitive,
|
||||
concr->lincats,
|
||||
callback, err);
|
||||
&scanner, err);
|
||||
} PGF_API_END
|
||||
}
|
||||
|
||||
class PGF_INTERNAL_DECL PgfCohortsScanner : public PgfPhraseScanner {
|
||||
public:
|
||||
PgfCohortsScanner(PgfCohortsCallback* callback) {
|
||||
this->callback = callback;
|
||||
}
|
||||
|
||||
virtual void space(size_t start, size_t end, PgfExn* err)
|
||||
{
|
||||
match_start = end;
|
||||
}
|
||||
|
||||
virtual void start_matches(size_t match_end, PgfExn* err)
|
||||
{
|
||||
}
|
||||
|
||||
virtual void match(ref<PgfConcrLin> lin, size_t seq_index, PgfExn* err)
|
||||
{
|
||||
ref<PgfLincatField> field =
|
||||
vector_elem(lin->lincat->fields, seq_index % lin->lincat->fields->len);
|
||||
callback->morpho.fn(&callback->morpho, &lin->absfun->name, &(*field->name), lin->lincat->abscat->prob+lin->absfun->prob, err);
|
||||
}
|
||||
|
||||
virtual void end_matches(size_t match_end, PgfExn* err)
|
||||
{
|
||||
callback->fn(callback, match_start, match_end, err);
|
||||
}
|
||||
|
||||
private:
|
||||
size_t match_start;
|
||||
PgfCohortsCallback* callback;
|
||||
};
|
||||
|
||||
PGF_API
|
||||
void pgf_lookup_cohorts(PgfDB *db, PgfConcrRevision cnc_revision,
|
||||
PgfText *sentence,
|
||||
@@ -844,10 +906,10 @@ void pgf_lookup_cohorts(PgfDB *db, PgfConcrRevision cnc_revision,
|
||||
|
||||
bool case_sensitive = pgf_is_case_sensitive(concr);
|
||||
|
||||
PgfCohortsScanner scanner(callback);
|
||||
phrasetable_lookup_cohorts(concr->phrasetable,
|
||||
sentence, case_sensitive,
|
||||
concr->lincats,
|
||||
callback, err);
|
||||
&scanner, err);
|
||||
} PGF_API_END
|
||||
}
|
||||
|
||||
@@ -885,7 +947,7 @@ PGF_API
|
||||
PgfText *pgf_get_lincat_field_internal(object o, size_t i)
|
||||
{
|
||||
ref<PgfConcrLincat> lincat = o;
|
||||
return &(**vector_elem(lincat->fields, i));
|
||||
return &*(vector_elem(lincat->fields, i)->name);
|
||||
}
|
||||
|
||||
PGF_API
|
||||
@@ -903,22 +965,18 @@ PgfText *pgf_print_lindef_internal(PgfPhrasetableIds *seq_ids, object o, size_t
|
||||
PgfInternalMarshaller m;
|
||||
PgfPrinter printer(NULL,0,&m);
|
||||
|
||||
printer.efun(&lincat->name);
|
||||
printer.puts(" : ");
|
||||
|
||||
ref<PgfPResult> res = *vector_elem(lincat->res, i);
|
||||
|
||||
if (res->vars != 0) {
|
||||
printer.lvar_ranges(res->vars);
|
||||
printer.puts(" . ");
|
||||
}
|
||||
|
||||
printer.puts(" String(0) -> ");
|
||||
|
||||
printer.efun(&lincat->name);
|
||||
printer.puts("(");
|
||||
printer.lparam(ref<PgfLParam>::from_ptr(&res->param));
|
||||
printer.puts(") = [");
|
||||
printer.puts(") -> ");
|
||||
printer.efun(&lincat->name);
|
||||
printer.puts("[String(0)] = [");
|
||||
|
||||
size_t n_seqs = lincat->fields->len;
|
||||
for (size_t j = 0; j < n_seqs; j++) {
|
||||
@@ -942,20 +1000,19 @@ PgfText *pgf_print_linref_internal(PgfPhrasetableIds *seq_ids, object o, size_t
|
||||
PgfInternalMarshaller m;
|
||||
PgfPrinter printer(NULL,0,&m);
|
||||
|
||||
printer.efun(&lincat->name);
|
||||
printer.puts(" : ");
|
||||
|
||||
ref<PgfPResult> res = *vector_elem(lincat->res, lincat->n_lindefs+i);
|
||||
|
||||
if (res->vars != 0) {
|
||||
printer.lvar_ranges(res->vars);
|
||||
printer.puts(" . ");
|
||||
}
|
||||
|
||||
printer.puts("String(0) -> ");
|
||||
printer.efun(&lincat->name);
|
||||
printer.puts("[");
|
||||
printer.efun(&lincat->name);
|
||||
printer.puts("(");
|
||||
printer.lparam(vector_elem(lincat->args, lincat->n_lindefs+i)->param);
|
||||
printer.puts(") -> String(0) = [");
|
||||
printer.puts(")] = [");
|
||||
|
||||
size_t n_seqs = lincat->fields->len;
|
||||
ref<PgfSequence> seq = *vector_elem(lincat->seqs, lincat->n_lindefs*n_seqs+i);
|
||||
@@ -970,37 +1027,33 @@ PGF_API
|
||||
PgfText *pgf_print_lin_internal(PgfPhrasetableIds *seq_ids, object o, size_t i)
|
||||
{
|
||||
ref<PgfConcrLin> lin = o;
|
||||
ref<PgfDTyp> ty = lin->absfun->type;
|
||||
|
||||
PgfInternalMarshaller m;
|
||||
PgfPrinter printer(NULL,0,&m);
|
||||
|
||||
printer.efun(&lin->name);
|
||||
printer.puts(" : ");
|
||||
|
||||
ref<PgfPResult> res = *vector_elem(lin->res, i);
|
||||
ref<PgfDTyp> ty = lin->absfun->type;
|
||||
|
||||
if (res->vars != 0) {
|
||||
printer.lvar_ranges(res->vars);
|
||||
printer.puts(" . ");
|
||||
}
|
||||
|
||||
size_t n_args = lin->args->len / lin->res->len;
|
||||
for (size_t j = 0; j < n_args; j++) {
|
||||
if (j > 0)
|
||||
printer.puts(" * ");
|
||||
|
||||
printer.parg(vector_elem(ty->hypos, j)->type,
|
||||
vector_elem(lin->args, i*n_args + j));
|
||||
}
|
||||
|
||||
if (n_args > 0)
|
||||
printer.puts(" -> ");
|
||||
|
||||
printer.efun(&ty->name);
|
||||
printer.puts("(");
|
||||
printer.lparam(ref<PgfLParam>::from_ptr(&res->param));
|
||||
printer.puts(") = [");
|
||||
printer.puts(") -> ");
|
||||
|
||||
printer.efun(&lin->name);
|
||||
printer.puts("[");
|
||||
size_t n_args = lin->args->len / lin->res->len;
|
||||
for (size_t j = 0; j < n_args; j++) {
|
||||
if (j > 0)
|
||||
printer.puts(",");
|
||||
printer.parg(vector_elem(ty->hypos, j)->type,
|
||||
vector_elem(lin->args, i*n_args + j));
|
||||
}
|
||||
printer.puts("] = [");
|
||||
|
||||
size_t n_seqs = lin->seqs->len / lin->res->len;
|
||||
for (size_t j = 0; j < n_seqs; j++) {
|
||||
@@ -1439,10 +1492,11 @@ public:
|
||||
this->n_lindefs = n_lindefs;
|
||||
this->n_linrefs = n_linrefs;
|
||||
|
||||
ref<Vector<ref<PgfText>>> db_fields = vector_new<ref<PgfText>>(n_fields);
|
||||
ref<Vector<PgfLincatField>> db_fields = vector_new<PgfLincatField>(n_fields);
|
||||
for (size_t i = 0; i < n_fields; i++) {
|
||||
ref<PgfText> field = textdup_db(fields[i]);
|
||||
*vector_elem(db_fields, i) = field;
|
||||
ref<PgfText> name = textdup_db(fields[i]);
|
||||
vector_elem(db_fields, i)->name = name;
|
||||
vector_elem(db_fields, i)->backrefs = 0;
|
||||
}
|
||||
|
||||
ref<PgfConcrLincat> lincat = PgfDB::malloc<PgfConcrLincat>(abscat->name.size+1);
|
||||
@@ -2098,7 +2152,7 @@ PgfText **pgf_category_fields(PgfDB *db, PgfConcrRevision revision,
|
||||
if (fields == 0)
|
||||
throw pgf_systemerror(ENOMEM);
|
||||
for (size_t i = 0; i < n_fields; i++) {
|
||||
fields[i] = textdup(lincat->fields->data[i]);
|
||||
fields[i] = textdup(vector_elem(lincat->fields, i)->name);
|
||||
}
|
||||
*p_n_fields = n_fields;
|
||||
return fields;
|
||||
@@ -2188,7 +2242,7 @@ PgfText **pgf_tabular_linearize(PgfDB *db, PgfConcrRevision revision,
|
||||
|
||||
PgfText *text = out.get_text();
|
||||
if (text != NULL) {
|
||||
res[pos++] = textdup(&(*lincat->fields->data[i]));
|
||||
res[pos++] = textdup(&*(vector_elem(lincat->fields,i)->name));
|
||||
res[pos++] = text;
|
||||
}
|
||||
}
|
||||
@@ -2227,7 +2281,7 @@ PgfText **pgf_tabular_linearize_all(PgfDB *db, PgfConcrRevision revision,
|
||||
|
||||
PgfText *text = out.get_text();
|
||||
if (text != NULL) {
|
||||
res[pos++] = textdup(&(*lincat->fields->data[i]));
|
||||
res[pos++] = textdup(&*(vector_elem(lincat->fields, i)->name));
|
||||
res[pos++] = text;
|
||||
}
|
||||
}
|
||||
@@ -2240,7 +2294,7 @@ PgfText **pgf_tabular_linearize_all(PgfDB *db, PgfConcrRevision revision,
|
||||
return NULL;
|
||||
}
|
||||
|
||||
PGF_API_DECL
|
||||
PGF_API
|
||||
void pgf_bracketed_linearize(PgfDB *db, PgfConcrRevision revision,
|
||||
PgfExpr expr, PgfPrintContext *ctxt,
|
||||
PgfMarshaller *m,
|
||||
@@ -2260,7 +2314,7 @@ void pgf_bracketed_linearize(PgfDB *db, PgfConcrRevision revision,
|
||||
} PGF_API_END
|
||||
}
|
||||
|
||||
PGF_API_DECL
|
||||
PGF_API
|
||||
void pgf_bracketed_linearize_all(PgfDB *db, PgfConcrRevision revision,
|
||||
PgfExpr expr, PgfPrintContext *ctxt,
|
||||
PgfMarshaller *m,
|
||||
@@ -2281,6 +2335,70 @@ void pgf_bracketed_linearize_all(PgfDB *db, PgfConcrRevision revision,
|
||||
} PGF_API_END
|
||||
}
|
||||
|
||||
struct PGF_INTERNAL_DECL PgfLincatUnmarshaller : PgfUnmarshaller {
|
||||
PgfLincatUnmarshaller(ref<PgfConcr> concr) {
|
||||
this->concr = concr;
|
||||
this->lincat = 0;
|
||||
}
|
||||
|
||||
virtual PgfExpr eabs(PgfBindType btype, PgfText *name, PgfExpr body) { return 0; }
|
||||
virtual PgfExpr eapp(PgfExpr fun, PgfExpr arg) { return 0; }
|
||||
virtual PgfExpr elit(PgfLiteral lit) { return 0; }
|
||||
virtual PgfExpr emeta(PgfMetaId meta) { return 0; }
|
||||
virtual PgfExpr efun(PgfText *name) { return 0; }
|
||||
virtual PgfExpr evar(int index) { return 0; }
|
||||
virtual PgfExpr etyped(PgfExpr expr, PgfType typ) { return 0; }
|
||||
virtual PgfExpr eimplarg(PgfExpr expr) { return 0; }
|
||||
virtual PgfLiteral lint(size_t size, uintmax_t *v) { return 0; }
|
||||
virtual PgfLiteral lflt(double v) { return 0; }
|
||||
virtual PgfLiteral lstr(PgfText *v) { return 0; }
|
||||
virtual PgfType dtyp(size_t n_hypos, PgfTypeHypo *hypos,
|
||||
PgfText *cat,
|
||||
size_t n_exprs, PgfExpr *exprs) {
|
||||
lincat =
|
||||
namespace_lookup(concr->lincats, cat);
|
||||
return 0;
|
||||
}
|
||||
virtual void free_ref(object x) {};
|
||||
|
||||
ref<PgfConcr> concr;
|
||||
ref<PgfConcrLincat> lincat;
|
||||
};
|
||||
|
||||
PGF_API
|
||||
PgfExprEnum *pgf_parse(PgfDB *db, PgfConcrRevision revision,
|
||||
PgfType ty, PgfMarshaller *m,
|
||||
PgfText *sentence,
|
||||
PgfExn * err)
|
||||
{
|
||||
PGF_API_BEGIN {
|
||||
DB_scope scope(db, READER_SCOPE);
|
||||
|
||||
ref<PgfConcr> concr = db->revision2concr(revision);
|
||||
|
||||
bool case_sensitive = pgf_is_case_sensitive(concr);
|
||||
|
||||
PgfLincatUnmarshaller u(concr);
|
||||
m->match_type(&u, ty);
|
||||
if (u.lincat == 0)
|
||||
return 0;
|
||||
|
||||
PgfParser *parser = new PgfParser(u.lincat, sentence);
|
||||
phrasetable_lookup_cohorts(concr->phrasetable,
|
||||
sentence, case_sensitive,
|
||||
parser, err);
|
||||
return parser;
|
||||
} PGF_API_END
|
||||
|
||||
return NULL;
|
||||
}
|
||||
|
||||
PGF_API
|
||||
void pgf_free_expr_enum(PgfExprEnum *en)
|
||||
{
|
||||
delete en;
|
||||
}
|
||||
|
||||
PGF_API
|
||||
PgfText *pgf_get_printname(PgfDB *db, PgfConcrRevision revision,
|
||||
PgfText *fun, PgfExn* err)
|
||||
|
||||
@@ -724,6 +724,31 @@ void pgf_bracketed_linearize_all(PgfDB *db, PgfConcrRevision revision,
|
||||
PgfLinearizationOutputIface *out,
|
||||
PgfExn* err);
|
||||
|
||||
#ifdef __cplusplus
|
||||
struct PgfExprEnum {
|
||||
virtual PgfExpr fetch(PgfDB *db, PgfUnmarshaller *u, prob_t *prob)=0;
|
||||
virtual ~PgfExprEnum() {};
|
||||
};
|
||||
#else
|
||||
typedef struct PgfExprEnum PgfExprEnum;
|
||||
typedef struct PgfExprEnumVtbl PgfExprEnumVtbl;
|
||||
struct PgfExprEnumVtbl {
|
||||
PgfExpr (*fetch)(PgfExprEnum *this, PgfDB *db, PgfUnmarshaller *u, prob_t *prob);
|
||||
};
|
||||
struct PgfExprEnum {
|
||||
PgfExprEnumVtbl *vtbl;
|
||||
};
|
||||
#endif
|
||||
|
||||
PGF_API_DECL
|
||||
PgfExprEnum *pgf_parse(PgfDB *db, PgfConcrRevision revision,
|
||||
PgfType ty, PgfMarshaller *m,
|
||||
PgfText *sentence,
|
||||
PgfExn * err);
|
||||
|
||||
PGF_API_DECL
|
||||
void pgf_free_expr_enum(PgfExprEnum *en);
|
||||
|
||||
PGF_API_DECL
|
||||
PgfText *pgf_get_printname(PgfDB *db, PgfConcrRevision revision,
|
||||
PgfText *fun, PgfExn* err);
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
#include "data.h"
|
||||
#include "heap.h"
|
||||
#include <queue>
|
||||
|
||||
PgfPhrasetableIds::PgfPhrasetableIds()
|
||||
{
|
||||
@@ -231,10 +231,6 @@ int sequence_cmp(ref<PgfSequence> seq1, ref<PgfSequence> seq2)
|
||||
struct PGF_INTERNAL_DECL PgfTextSpot {
|
||||
size_t pos; // position in Unicode characters
|
||||
const uint8_t *ptr; // pointer into the spot location
|
||||
|
||||
bool operator >= (PgfTextSpot const &obj) {
|
||||
return pos >= obj.pos;
|
||||
}
|
||||
};
|
||||
|
||||
static
|
||||
@@ -479,8 +475,7 @@ PGF_INTERNAL
|
||||
void phrasetable_lookup(PgfPhrasetable table,
|
||||
PgfText *sentence,
|
||||
bool case_sensitive,
|
||||
Namespace<PgfConcrLincat> lincats,
|
||||
PgfMorphoCallback* callback, PgfExn* err)
|
||||
PgfPhraseScanner *scanner, PgfExn* err)
|
||||
{
|
||||
if (table == 0)
|
||||
return;
|
||||
@@ -491,9 +486,9 @@ void phrasetable_lookup(PgfPhrasetable table,
|
||||
const uint8_t *end = current.ptr+sentence->size;
|
||||
int cmp = text_sequence_cmp(¤t,end,table->value.seq,case_sensitive,true);
|
||||
if (cmp < 0) {
|
||||
phrasetable_lookup(table->left,sentence,case_sensitive,lincats,callback,err);
|
||||
phrasetable_lookup(table->left,sentence,case_sensitive,scanner,err);
|
||||
} else if (cmp > 0) {
|
||||
phrasetable_lookup(table->right,sentence,case_sensitive,lincats,callback,err);
|
||||
phrasetable_lookup(table->right,sentence,case_sensitive,scanner,err);
|
||||
} else {
|
||||
auto backrefs = table->value.backrefs;
|
||||
if (backrefs != 0) {
|
||||
@@ -502,13 +497,8 @@ void phrasetable_lookup(PgfPhrasetable table,
|
||||
switch (ref<PgfConcrLin>::get_tag(backref.container)) {
|
||||
case PgfConcrLin::tag: {
|
||||
ref<PgfConcrLin> lin = ref<PgfConcrLin>::untagged(backref.container);
|
||||
ref<PgfConcrLincat> lincat =
|
||||
namespace_lookup(lincats, &lin->absfun->type->name);
|
||||
if (lin->absfun->type->hypos->len == 0 && lincat != 0) {
|
||||
ref<PgfText> field =
|
||||
*vector_elem(lincat->fields, backref.seq_index % lincat->fields->len);
|
||||
|
||||
callback->fn(callback, &lin->absfun->name, &(*field), lincat->abscat->prob+lin->absfun->prob, err);
|
||||
if (lin->absfun->type->hypos->len == 0) {
|
||||
scanner->match(lin, backref.seq_index, err);
|
||||
if (err->type != PGF_EXN_NONE)
|
||||
return;
|
||||
}
|
||||
@@ -523,10 +513,10 @@ void phrasetable_lookup(PgfPhrasetable table,
|
||||
}
|
||||
|
||||
if (!case_sensitive) {
|
||||
phrasetable_lookup(table->left,sentence,false,lincats,callback,err);
|
||||
phrasetable_lookup(table->left,sentence,false,scanner,err);
|
||||
if (err->type != PGF_EXN_NONE)
|
||||
return;
|
||||
phrasetable_lookup(table->right,sentence,false,lincats,callback,err);
|
||||
phrasetable_lookup(table->right,sentence,false,scanner,err);
|
||||
if (err->type != PGF_EXN_NONE)
|
||||
return;
|
||||
}
|
||||
@@ -534,18 +524,66 @@ void phrasetable_lookup(PgfPhrasetable table,
|
||||
}
|
||||
|
||||
struct PGF_INTERNAL_DECL PgfCohortsState {
|
||||
class PgfTextSpotComparator : std::less<PgfTextSpot> {
|
||||
public:
|
||||
bool operator()(PgfTextSpot &lhs, PgfTextSpot &rhs) const
|
||||
{
|
||||
return lhs.pos > rhs.pos;
|
||||
}
|
||||
};
|
||||
|
||||
PgfTextSpot spot;
|
||||
Heap<PgfTextSpot> queue;
|
||||
std::priority_queue<PgfTextSpot, std::vector<PgfTextSpot>, PgfTextSpotComparator> queue;
|
||||
|
||||
size_t last_pos;
|
||||
size_t skip_pos;
|
||||
bool skipping;
|
||||
const uint8_t *end; // pointer into the end of the sentence
|
||||
|
||||
bool case_sensitive;
|
||||
Namespace<PgfConcrLincat> lincats;
|
||||
PgfCohortsCallback* callback;
|
||||
PgfPhraseScanner *scanner;
|
||||
PgfExn* err;
|
||||
};
|
||||
|
||||
static
|
||||
void finish_skipping(PgfCohortsState *state) {
|
||||
if (state->skipping) {
|
||||
while (!state->queue.empty()) {
|
||||
PgfTextSpot spot = state->queue.top();
|
||||
if (spot.pos >= state->spot.pos)
|
||||
break;
|
||||
|
||||
if (spot.pos != state->last_pos) {
|
||||
if (state->last_pos > 0) {
|
||||
state->scanner->space(spot.pos, spot.pos,
|
||||
state->err);
|
||||
if (state->err->type != PGF_EXN_NONE)
|
||||
return;
|
||||
}
|
||||
|
||||
state->scanner->start_matches(state->spot.pos,
|
||||
state->err);
|
||||
if (state->err->type != PGF_EXN_NONE)
|
||||
return;
|
||||
|
||||
state->scanner->end_matches(state->spot.pos,
|
||||
state->err);
|
||||
if (state->err->type != PGF_EXN_NONE)
|
||||
return;
|
||||
|
||||
state->last_pos = spot.pos;
|
||||
}
|
||||
|
||||
state->queue.pop();
|
||||
}
|
||||
|
||||
state->scanner->space(state->spot.pos, state->spot.pos,
|
||||
state->err);
|
||||
|
||||
state->last_pos = 0;
|
||||
state->skipping = false;
|
||||
}
|
||||
}
|
||||
|
||||
static
|
||||
void phrasetable_lookup_prefixes(PgfCohortsState *state,
|
||||
PgfPhrasetable table,
|
||||
@@ -561,38 +599,38 @@ void phrasetable_lookup_prefixes(PgfCohortsState *state,
|
||||
} else if (cmp > 0) {
|
||||
ptrdiff_t len = current.ptr - state->spot.ptr;
|
||||
|
||||
if (min <= len)
|
||||
phrasetable_lookup_prefixes(state,table->left,min,len);
|
||||
if (min <= len-1)
|
||||
phrasetable_lookup_prefixes(state,table->left,min,len-1);
|
||||
|
||||
if (len+1 <= max)
|
||||
phrasetable_lookup_prefixes(state,table->right,len+1,max);
|
||||
if (len <= max)
|
||||
phrasetable_lookup_prefixes(state,table->right,len,max);
|
||||
} else {
|
||||
ptrdiff_t len = current.ptr - state->spot.ptr;
|
||||
|
||||
finish_skipping(state);
|
||||
if (state->err->type != PGF_EXN_NONE)
|
||||
return;
|
||||
|
||||
if (min <= len)
|
||||
phrasetable_lookup_prefixes(state,table->left,min,len);
|
||||
|
||||
auto backrefs = table->value.backrefs;
|
||||
if (len > 0 && backrefs != 0) {
|
||||
if (state->skip_pos != (size_t) -1) {
|
||||
state->callback->fn(state->callback,
|
||||
state->skip_pos,
|
||||
state->spot.pos,
|
||||
state->err);
|
||||
if (state->err->type != PGF_EXN_NONE)
|
||||
return;
|
||||
state->skip_pos = (size_t) -1;
|
||||
}
|
||||
if (state->last_pos != current.pos) {
|
||||
if (state->last_pos > 0) {
|
||||
state->scanner->end_matches(state->last_pos,
|
||||
state->err);
|
||||
if (state->err->type != PGF_EXN_NONE)
|
||||
return;
|
||||
}
|
||||
|
||||
if (state->last_pos > 0 && state->last_pos != current.pos) {
|
||||
state->callback->fn(state->callback,
|
||||
state->spot.pos,
|
||||
state->last_pos,
|
||||
state->err);
|
||||
state->scanner->start_matches(current.pos,
|
||||
state->err);
|
||||
if (state->err->type != PGF_EXN_NONE)
|
||||
return;
|
||||
|
||||
state->last_pos = current.pos;
|
||||
}
|
||||
state->last_pos = current.pos;
|
||||
state->queue.push(current);
|
||||
|
||||
for (size_t i = 0; i < backrefs->len; i++) {
|
||||
@@ -600,17 +638,10 @@ void phrasetable_lookup_prefixes(PgfCohortsState *state,
|
||||
switch (ref<PgfConcrLin>::get_tag(backref.container)) {
|
||||
case PgfConcrLin::tag: {
|
||||
ref<PgfConcrLin> lin = ref<PgfConcrLin>::untagged(backref.container);
|
||||
ref<PgfConcrLincat> lincat =
|
||||
namespace_lookup(state->lincats, &lin->absfun->type->name);
|
||||
if (lin->absfun->type->hypos->len == 0 && lincat != 0) {
|
||||
ref<PgfText> field =
|
||||
*vector_elem(lincat->fields, backref.seq_index % lincat->fields->len);
|
||||
|
||||
state->callback->morpho.fn(&state->callback->morpho,
|
||||
&lin->absfun->name,
|
||||
&(*field),
|
||||
lincat->abscat->prob+lin->absfun->prob,
|
||||
state->err);
|
||||
if (lin->absfun->type->hypos->len == 0) {
|
||||
state->scanner->match(lin,
|
||||
backref.seq_index,
|
||||
state->err);
|
||||
if (state->err->type != PGF_EXN_NONE)
|
||||
return;
|
||||
}
|
||||
@@ -633,8 +664,7 @@ PGF_INTERNAL
|
||||
void phrasetable_lookup_cohorts(PgfPhrasetable table,
|
||||
PgfText *sentence,
|
||||
bool case_sensitive,
|
||||
Namespace<PgfConcrLincat> lincats,
|
||||
PgfCohortsCallback* callback, PgfExn* err)
|
||||
PgfPhraseScanner *scanner, PgfExn* err)
|
||||
{
|
||||
PgfTextSpot spot;
|
||||
spot.pos = 0;
|
||||
@@ -645,15 +675,16 @@ void phrasetable_lookup_cohorts(PgfPhrasetable table,
|
||||
state.spot.ptr = NULL;
|
||||
state.queue.push(spot);
|
||||
state.last_pos = 0;
|
||||
state.skip_pos = (size_t) -1;
|
||||
state.skipping = false;
|
||||
state.end = (uint8_t *) &sentence->text[sentence->size];
|
||||
state.case_sensitive = case_sensitive;
|
||||
state.lincats = lincats;
|
||||
state.callback = callback;
|
||||
state.scanner = scanner;
|
||||
state.err = err;
|
||||
|
||||
while (!state.queue.is_empty()) {
|
||||
PgfTextSpot spot = state.queue.pop();
|
||||
while (!state.queue.empty()) {
|
||||
PgfTextSpot spot = state.queue.top();
|
||||
state.queue.pop();
|
||||
|
||||
if (spot.pos != state.spot.pos) {
|
||||
state.spot = spot;
|
||||
|
||||
@@ -667,36 +698,38 @@ void phrasetable_lookup_cohorts(PgfPhrasetable table,
|
||||
state.spot.ptr = ptr;
|
||||
}
|
||||
|
||||
state.skip_pos = (size_t) -1;
|
||||
state.scanner->space(spot.pos,state.spot.pos,state.err);
|
||||
if (state.err->type != PGF_EXN_NONE)
|
||||
return;
|
||||
|
||||
while (state.spot.ptr < state.end) {
|
||||
phrasetable_lookup_prefixes(&state, table, 1, sentence->size);
|
||||
if (state.err->type != PGF_EXN_NONE)
|
||||
return;
|
||||
|
||||
if (state.last_pos > 0) {
|
||||
// We found at least one match.
|
||||
// The last range is yet to be reported.
|
||||
state.callback->fn(state.callback,
|
||||
state.spot.pos,
|
||||
state.last_pos,
|
||||
state.err);
|
||||
state.scanner->end_matches(state.last_pos,
|
||||
state.err);
|
||||
if (state.err->type != PGF_EXN_NONE)
|
||||
return;
|
||||
state.last_pos = 0;
|
||||
break;
|
||||
} else {
|
||||
// We didn't find any matches at this position,
|
||||
// therefore we must skip one character and try again.
|
||||
if (state.skip_pos == (size_t) -1)
|
||||
state.skip_pos = state.spot.pos;
|
||||
// No matches were found, try the next position
|
||||
if (!state.skipping) {
|
||||
while (!state.queue.empty() &&
|
||||
state.queue.top().pos < state.spot.pos) {
|
||||
state.queue.pop();
|
||||
}
|
||||
state.queue.push(state.spot);
|
||||
state.skipping = true;
|
||||
}
|
||||
|
||||
const uint8_t *ptr = state.spot.ptr;
|
||||
uint32_t ucs = pgf_utf8_decode(&ptr);
|
||||
if (pgf_utf8_is_space(ucs)) {
|
||||
state.callback->fn(state.callback,
|
||||
state.skip_pos,
|
||||
state.spot.pos,
|
||||
state.err);
|
||||
if (state.err->type != PGF_EXN_NONE)
|
||||
return;
|
||||
state.skip_pos = -1;
|
||||
state.queue.push(state.spot);
|
||||
break;
|
||||
}
|
||||
@@ -704,16 +737,10 @@ void phrasetable_lookup_cohorts(PgfPhrasetable table,
|
||||
state.spot.ptr = ptr;
|
||||
}
|
||||
}
|
||||
|
||||
if (state.skip_pos != (size_t) -1) {
|
||||
state.callback->fn(state.callback,
|
||||
state.skip_pos,
|
||||
state.spot.pos,
|
||||
state.err);
|
||||
if (state.err->type != PGF_EXN_NONE)
|
||||
return;
|
||||
state.skip_pos = (size_t) -1;
|
||||
}
|
||||
|
||||
finish_skipping(&state);
|
||||
if (state.err->type != PGF_EXN_NONE)
|
||||
return;
|
||||
|
||||
state.spot = spot;
|
||||
}
|
||||
@@ -748,10 +775,10 @@ void phrasetable_iter(PgfConcr *concr,
|
||||
ref<PgfConcrLincat> lincat =
|
||||
namespace_lookup(concr->lincats, &lin->absfun->type->name);
|
||||
if (lincat != 0) {
|
||||
ref<PgfText> field =
|
||||
*vector_elem(lincat->fields, backref.seq_index % lincat->fields->len);
|
||||
ref<PgfLincatField> field =
|
||||
vector_elem(lincat->fields, backref.seq_index % lincat->fields->len);
|
||||
|
||||
callback->fn(callback, &lin->absfun->name, &(*field), lincat->abscat->prob+lin->absfun->prob, err);
|
||||
callback->fn(callback, &lin->absfun->name, &(*field->name), lincat->abscat->prob+lin->absfun->prob, err);
|
||||
if (err->type != PGF_EXN_NONE)
|
||||
return;
|
||||
}
|
||||
|
||||
@@ -68,19 +68,27 @@ PgfPhrasetable phrasetable_delete(PgfPhrasetable table,
|
||||
PGF_INTERNAL_DECL
|
||||
size_t phrasetable_size(PgfPhrasetable table);
|
||||
|
||||
class PgfConcrLin;
|
||||
|
||||
class PGF_INTERNAL_DECL PgfPhraseScanner {
|
||||
public:
|
||||
virtual void space(size_t start, size_t end, PgfExn* err)=0;
|
||||
virtual void start_matches(size_t pos, PgfExn* err)=0;
|
||||
virtual void match(ref<PgfConcrLin> lin, size_t seq_index, PgfExn* err)=0;
|
||||
virtual void end_matches(size_t pos, PgfExn* err)=0;
|
||||
};
|
||||
|
||||
PGF_INTERNAL_DECL
|
||||
void phrasetable_lookup(PgfPhrasetable table,
|
||||
PgfText *sentence,
|
||||
bool case_sensitive,
|
||||
Namespace<struct PgfConcrLincat> lincats,
|
||||
PgfMorphoCallback* callback, PgfExn* err);
|
||||
PgfPhraseScanner *scanner, PgfExn* err);
|
||||
|
||||
PGF_INTERNAL_DECL
|
||||
void phrasetable_lookup_cohorts(PgfPhrasetable table,
|
||||
PgfText *sentence,
|
||||
bool case_sensitive,
|
||||
Namespace<PgfConcrLincat> lincats,
|
||||
PgfCohortsCallback* callback, PgfExn* err);
|
||||
PgfPhraseScanner *scanner, PgfExn* err);
|
||||
|
||||
PGF_INTERNAL_DECL
|
||||
void phrasetable_iter(PgfConcr *concr,
|
||||
|
||||
@@ -33,7 +33,7 @@ public:
|
||||
PgfPrinter(PgfPrintContext *context, int priority,
|
||||
PgfMarshaller *marshaller);
|
||||
|
||||
PgfPrinter() { free(res); }
|
||||
~PgfPrinter() { free(res); }
|
||||
|
||||
// Push a new variable in the printing context. If the name
|
||||
// collides with an existing variable, the variable is renamed
|
||||
@@ -52,6 +52,12 @@ public:
|
||||
|
||||
PgfText *get_text();
|
||||
|
||||
void dump() {
|
||||
PgfText *text = get_text();
|
||||
fprintf(stderr, "%.*s", (int) text->size, text->text);
|
||||
free(text);
|
||||
};
|
||||
|
||||
void hypo(PgfTypeHypo *hypo, int prio);
|
||||
|
||||
void parg(ref<PgfDTyp> ty, ref<PgfPArg> parg);
|
||||
|
||||
@@ -667,7 +667,7 @@ ref<PgfConcrLincat> PgfReader::read_lincat()
|
||||
{
|
||||
ref<PgfConcrLincat> lincat = read_name(&PgfConcrLincat::name);
|
||||
lincat->abscat = namespace_lookup(abstract->cats, &lincat->name);
|
||||
lincat->fields = read_vector(&PgfReader::read_text2);
|
||||
lincat->fields = read_vector(&PgfReader::read_lincat_field);
|
||||
lincat->n_lindefs = read_len();
|
||||
lincat->args = read_vector(&PgfReader::read_parg);
|
||||
lincat->res = read_vector(&PgfReader::read_presult2);
|
||||
@@ -675,6 +675,12 @@ ref<PgfConcrLincat> PgfReader::read_lincat()
|
||||
return lincat;
|
||||
}
|
||||
|
||||
void PgfReader::read_lincat_field(ref<PgfLincatField> field)
|
||||
{
|
||||
field->name = read_text();
|
||||
field->backrefs = 0;
|
||||
}
|
||||
|
||||
ref<PgfConcrLin> PgfReader::read_lin()
|
||||
{
|
||||
ref<PgfConcrLin> lin = read_name(&PgfConcrLin::name);
|
||||
@@ -682,6 +688,76 @@ ref<PgfConcrLin> PgfReader::read_lin()
|
||||
lin->args = read_vector(&PgfReader::read_parg);
|
||||
lin->res = read_vector(&PgfReader::read_presult2);
|
||||
lin->seqs = read_seq_ids(lin.tagged());
|
||||
|
||||
lin->lincat =
|
||||
namespace_lookup(concrete->lincats, &lin->absfun->type->name);
|
||||
if (lin->lincat == 0)
|
||||
throw pgf_error("Found a lin which uses a category without a lincat");
|
||||
|
||||
ref<Vector<PgfHypo>> hypos = lin->absfun->type->hypos;
|
||||
ref<PgfConcrLincat> lincats[hypos->len];
|
||||
for (size_t d = 0; d < hypos->len; d++) {
|
||||
lincats[d] =
|
||||
namespace_lookup(concrete->lincats,
|
||||
&vector_elem(hypos,d)->type->name);
|
||||
if (lincats[d] == 0)
|
||||
throw pgf_error("Found a lin which uses a category without a lincat");
|
||||
}
|
||||
|
||||
size_t n_fields = lin->lincat->fields->len;
|
||||
for (size_t seq_index = 0; seq_index < lin->seqs->len; seq_index++) {
|
||||
ref<PgfSequence> seq = *vector_elem(lin->seqs,seq_index);
|
||||
ref<PgfPResult> result = *vector_elem(lin->res, seq_index / n_fields);
|
||||
|
||||
size_t dot = 0;
|
||||
if (dot < seq->syms.len) {
|
||||
PgfSymbol sym = *vector_elem(&seq->syms,dot);
|
||||
switch (ref<PgfSymbol>::get_tag(sym)) {
|
||||
case PgfSymbolCat::tag: {
|
||||
auto sym_cat = ref<PgfSymbolCat>::untagged(sym);
|
||||
ref<PgfConcrLincat> lincat = lincats[sym_cat->d];
|
||||
|
||||
size_t max_values = 1;
|
||||
size_t ranges[sym_cat->r.n_terms];
|
||||
for (size_t i = 0; i < sym_cat->r.n_terms; i++) {
|
||||
size_t range = 1;
|
||||
for (size_t j = 0; j < result->vars->len; j++) {
|
||||
auto var_range = vector_elem(result->vars, j);
|
||||
if (var_range->var == sym_cat->r.terms[i].var) {
|
||||
range = var_range->range;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
ranges[i] = range;
|
||||
max_values *= range;
|
||||
}
|
||||
|
||||
for (size_t values = 0; values < max_values; values++) {
|
||||
size_t v = values;
|
||||
size_t index = sym_cat->r.i0;
|
||||
for (size_t i = 0; i < sym_cat->r.n_terms; i++) {
|
||||
index += sym_cat->r.terms[i].factor * (v % ranges[i]);
|
||||
v = v / ranges[i];
|
||||
}
|
||||
|
||||
ref<Vector<PgfLincatBackref>> backrefs =
|
||||
vector_elem(lincat->fields,index)->backrefs;
|
||||
backrefs =
|
||||
vector_resize(backrefs, backrefs->len+1,
|
||||
PgfDB::get_txn_id());
|
||||
vector_elem(lincat->fields,index)->backrefs = backrefs;
|
||||
ref<PgfLincatBackref> backref =
|
||||
vector_elem(backrefs,backrefs->len-1);
|
||||
backref->lin = lin;
|
||||
backref->seq_index = seq_index;
|
||||
backref->dot = dot;
|
||||
}
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
return lin;
|
||||
}
|
||||
|
||||
|
||||
@@ -69,6 +69,7 @@ public:
|
||||
void merge_abstract(ref<PgfAbstr> abstract);
|
||||
|
||||
ref<PgfConcrLincat> read_lincat();
|
||||
void read_lincat_field(ref<PgfLincatField> field);
|
||||
ref<PgfLParam> read_lparam();
|
||||
void read_variable_range(ref<PgfVariableRange> var_info);
|
||||
void read_parg(ref<PgfPArg> parg);
|
||||
|
||||
@@ -383,13 +383,18 @@ void PgfWriter::write_phrasetable_helper(PgfPhrasetable table)
|
||||
void PgfWriter::write_lincat(ref<PgfConcrLincat> lincat)
|
||||
{
|
||||
write_name(&lincat->name);
|
||||
write_vector(lincat->fields, &PgfWriter::write_text);
|
||||
write_vector(lincat->fields, &PgfWriter::write_lincat_field);
|
||||
write_len(lincat->n_lindefs);
|
||||
write_vector(lincat->args, &PgfWriter::write_parg);
|
||||
write_vector(lincat->res, &PgfWriter::write_presult);
|
||||
write_vector(lincat->seqs, &PgfWriter::write_seq_id);
|
||||
}
|
||||
|
||||
void PgfWriter::write_lincat_field(ref<PgfLincatField> field)
|
||||
{
|
||||
write_text(field->name);
|
||||
}
|
||||
|
||||
void PgfWriter::write_lin(ref<PgfConcrLin> lin)
|
||||
{
|
||||
write_name(&lin->name);
|
||||
|
||||
@@ -39,6 +39,7 @@ public:
|
||||
void write_abstract(ref<PgfAbstr> abstract);
|
||||
|
||||
void write_lincat(ref<PgfConcrLincat> lincat);
|
||||
void write_lincat_field(ref<PgfLincatField> field);
|
||||
void write_variable_range(ref<PgfVariableRange> var);
|
||||
void write_lparam(ref<PgfLParam> lparam);
|
||||
void write_parg(ref<PgfPArg> linarg);
|
||||
|
||||
@@ -97,7 +97,7 @@ import Foreign
|
||||
import Foreign.C
|
||||
import Control.Monad(forM,forM_)
|
||||
import Control.Exception(bracket,mask_,throwIO)
|
||||
import System.IO.Unsafe(unsafePerformIO)
|
||||
import System.IO.Unsafe(unsafePerformIO, unsafeInterleaveIO)
|
||||
import System.Random
|
||||
import qualified Data.Map as Map
|
||||
import Data.IORef
|
||||
@@ -673,7 +673,30 @@ data ParseOutput a
|
||||
| ParseIncomplete -- ^ The sentence is not complete.
|
||||
|
||||
parse :: Concr -> Type -> String -> ParseOutput [(Expr,Float)]
|
||||
parse lang ty sent = parseWithHeuristics lang ty sent (-1.0) []
|
||||
parse c ty sent =
|
||||
unsafePerformIO $
|
||||
withForeignPtr (c_revision c) $ \c_revision ->
|
||||
withForeignPtr marshaller $ \m ->
|
||||
bracket (newStablePtr ty) freeStablePtr $ \c_ty ->
|
||||
withText sent $ \c_sent -> do
|
||||
c_enum <- withPgfExn "parse" (pgf_parse (c_db c) c_revision c_ty m c_sent)
|
||||
c_fetch <- (#peek PgfExprEnumVtbl, fetch) =<< (#peek PgfExprEnum, vtbl) c_enum
|
||||
exprs <- unsafeInterleaveIO (fetchLazy c_fetch c_enum)
|
||||
return (ParseOk exprs)
|
||||
where
|
||||
fetchLazy c_fetch c_enum =
|
||||
withForeignPtr (c_revision c) $ \c_revision ->
|
||||
withForeignPtr unmarshaller $ \u ->
|
||||
alloca $ \p_prob -> do
|
||||
c_expr <- callFetch c_fetch c_enum (c_db c) u p_prob
|
||||
if c_expr == castPtrToStablePtr nullPtr
|
||||
then do pgf_free_expr_enum c_enum
|
||||
return []
|
||||
else do expr <- deRefStablePtr c_expr
|
||||
freeStablePtr c_expr
|
||||
prob <- peek p_prob
|
||||
rest <- unsafeInterleaveIO (fetchLazy c_fetch c_enum)
|
||||
return ((expr,prob) : rest)
|
||||
|
||||
parseWithHeuristics :: Concr -- ^ the language with which we parse
|
||||
-> Type -- ^ the start category
|
||||
|
||||
@@ -50,6 +50,7 @@ data PgfProbsCallback
|
||||
data PgfMorphoCallback
|
||||
data PgfCohortsCallback
|
||||
data PgfPhrasetableIds
|
||||
data PgfExprEnum
|
||||
|
||||
type Wrapper a = a -> IO (FunPtr a)
|
||||
type Dynamic a = FunPtr a -> a
|
||||
@@ -253,6 +254,12 @@ foreign import ccall pgf_bracketed_linearize :: Ptr PgfDB -> Ptr Concr -> Stable
|
||||
|
||||
foreign import ccall pgf_bracketed_linearize_all :: Ptr PgfDB -> Ptr Concr -> StablePtr Expr -> Ptr PgfPrintContext -> Ptr PgfMarshaller -> Ptr PgfLinearizationOutputIface -> Ptr PgfExn -> IO ()
|
||||
|
||||
foreign import ccall pgf_parse :: Ptr PgfDB -> Ptr Concr -> StablePtr Type -> Ptr PgfMarshaller -> Ptr PgfText -> Ptr PgfExn -> IO (Ptr PgfExprEnum)
|
||||
|
||||
foreign import ccall "dynamic" callFetch :: Dynamic (Ptr PgfExprEnum -> Ptr PgfDB -> Ptr PgfUnmarshaller -> Ptr (#type prob_t) -> IO (StablePtr Expr))
|
||||
|
||||
foreign import ccall pgf_free_expr_enum :: Ptr PgfExprEnum -> IO ()
|
||||
|
||||
foreign import ccall "wrapper" wrapSymbol0 :: Wrapper (Ptr PgfLinearizationOutputIface -> IO ())
|
||||
|
||||
foreign import ccall "wrapper" wrapSymbol1 :: Wrapper (Ptr PgfLinearizationOutputIface -> Ptr PgfText -> IO ())
|
||||
|
||||
@@ -18,42 +18,42 @@ concrete basic_cnc {
|
||||
lincat Float = [
|
||||
"s"
|
||||
]
|
||||
lindef Float : String(0) -> Float(0) = [S0]
|
||||
linref Float : Float(0) -> String(0) = [S0]
|
||||
lindef Float(0) -> Float[String(0)] = [S0]
|
||||
linref String(0) -> Float[Float(0)] = [S0]
|
||||
lincat Int = [
|
||||
"s"
|
||||
]
|
||||
lindef Int : String(0) -> Int(0) = [S0]
|
||||
linref Int : Int(0) -> String(0) = [S0]
|
||||
lindef Int(0) -> Int[String(0)] = [S0]
|
||||
linref String(0) -> Int[Int(0)] = [S0]
|
||||
lincat N = [
|
||||
"s"
|
||||
]
|
||||
lindef N : String(0) -> N(0) = [S0]
|
||||
linref N : ∀{i<2} . N(i) -> String(0) = [S0]
|
||||
lindef N(0) -> N[String(0)] = [S0]
|
||||
linref ∀{i<2} . String(0) -> N[N(i)] = [S0]
|
||||
lincat P = [
|
||||
"s"
|
||||
]
|
||||
lindef P : String(0) -> P(0) = [S0]
|
||||
linref P : P(0) -> String(0) = [S0]
|
||||
lindef P(0) -> P[String(0)] = [S0]
|
||||
linref String(0) -> P[P(0)] = [S0]
|
||||
lincat S = [
|
||||
""
|
||||
]
|
||||
lindef S : String(0) -> S(0) = [S0]
|
||||
linref S : S(0) -> String(0) = [S0]
|
||||
lindef S(0) -> S[String(0)] = [S0]
|
||||
linref String(0) -> S[S(0)] = [S0]
|
||||
lincat String = [
|
||||
"s"
|
||||
]
|
||||
lindef String : String(0) -> String(0) = [S0]
|
||||
linref String : String(0) -> String(0) = [S0]
|
||||
lin c : ∀{i<2} . N(i) -> S(0) = [S0]
|
||||
lin floatLit : Float(0) -> S(0) = [S0]
|
||||
lin ind : ∀{i<2} . P(0) * P(0) * N(i) -> P(0) = [S1]
|
||||
lin intLit : Int(0) -> S(0) = [S0]
|
||||
lin nat : ∀{i<2} . N(i) -> P(0) = [S5]
|
||||
lin s : N(0) -> N(0) = [S2]
|
||||
lin s : N(1) -> N(0) = [S4]
|
||||
lin stringLit : String(0) -> S(0) = [S0]
|
||||
lin z : N(1) = [S3]
|
||||
lindef String(0) -> String[String(0)] = [S0]
|
||||
linref String(0) -> String[String(0)] = [S0]
|
||||
lin ∀{i<2} . S(0) -> c[N(i)] = [S0]
|
||||
lin S(0) -> floatLit[Float(0)] = [S0]
|
||||
lin ∀{i<2} . P(0) -> ind[P(0),P(0),N(i)] = [S1]
|
||||
lin S(0) -> intLit[Int(0)] = [S0]
|
||||
lin ∀{i<2} . P(0) -> nat[N(i)] = [S5]
|
||||
lin N(0) -> s[N(0)] = [S2]
|
||||
lin N(0) -> s[N(1)] = [S4]
|
||||
lin S(0) -> stringLit[String(0)] = [S0]
|
||||
lin N(1) -> z[] = [S3]
|
||||
sequences {
|
||||
S0 = <0,0>
|
||||
S1 = <0,0> "&" "λ" SOFT_BIND <1,$0> SOFT_BIND "," SOFT_BIND <1,$1> "." <1,0>
|
||||
|
||||
Reference in New Issue
Block a user