forked from GitHub/gf-core
restored the word alignment API
This commit is contained in:
@@ -26,6 +26,8 @@ libpgf_la_SOURCES = \
|
||||
pgf/typechecker.h \
|
||||
pgf/linearizer.cxx \
|
||||
pgf/linearizer.h \
|
||||
pgf/aligner.cxx \
|
||||
pgf/aligner.h \
|
||||
pgf/parser.cxx \
|
||||
pgf/parser.h \
|
||||
pgf/graphviz.cxx \
|
||||
|
||||
153
src/runtime/c/pgf/aligner.cxx
Normal file
153
src/runtime/c/pgf/aligner.cxx
Normal file
@@ -0,0 +1,153 @@
|
||||
#include "data.h"
|
||||
#include "printer.h"
|
||||
#include "aligner.h"
|
||||
|
||||
PgfAlignerOutput::PgfAlignerOutput() : printer(NULL, 0, NULL)
|
||||
{
|
||||
n_phrases = 0;
|
||||
last_phrase = NULL;
|
||||
phrases = NULL;
|
||||
n_matches = 0;
|
||||
bind = true;
|
||||
nonexist = false;
|
||||
}
|
||||
|
||||
PgfAlignerOutput::~PgfAlignerOutput()
|
||||
{
|
||||
free_phrases(phrases, n_phrases);
|
||||
}
|
||||
|
||||
void PgfAlignerOutput::free_phrases(PgfAlignmentPhrase **phrases, size_t n_phrases)
|
||||
{
|
||||
if (phrases) {
|
||||
for (size_t i = 0; i < n_phrases; i++) {
|
||||
free(phrases[i]->phrase);
|
||||
free(phrases[i]);
|
||||
}
|
||||
free(phrases);
|
||||
}
|
||||
}
|
||||
|
||||
PgfAlignmentPhrase **PgfAlignerOutput::get_phrases(size_t *n_phrases)
|
||||
{
|
||||
if (nonexist) {
|
||||
*n_phrases = 0;
|
||||
return NULL;
|
||||
}
|
||||
|
||||
*n_phrases = this->n_phrases;
|
||||
PgfAlignmentPhrase **res = phrases;
|
||||
this->n_phrases = 0;
|
||||
this->last_phrase = NULL;
|
||||
this->phrases = NULL;
|
||||
return res;
|
||||
}
|
||||
|
||||
void PgfAlignerOutput::push_parent(int fid)
|
||||
{
|
||||
parent_current.push_back(fid);
|
||||
|
||||
if (last_phrase != NULL) {
|
||||
for (size_t i = 0; i < last_phrase->n_fids; i++) {
|
||||
if (fid == last_phrase->fids[i]) {
|
||||
n_matches++;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void PgfAlignerOutput::symbol_token(PgfText *tok)
|
||||
{
|
||||
if (nonexist)
|
||||
return;
|
||||
|
||||
size_t n_parents = parent_stack.size();
|
||||
int fid = parent_stack.back();
|
||||
|
||||
// how many nodes so far are involved in the current compound word
|
||||
size_t n_fids = parent_current.size();
|
||||
|
||||
if (bind) {
|
||||
// here we glue tokens
|
||||
|
||||
bind = false;
|
||||
|
||||
bool found = false;
|
||||
for (int current_fid : parent_current) {
|
||||
if (fid == current_fid) {
|
||||
found = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
// add the tree node id to the list of parents if it has not
|
||||
// been added already.
|
||||
if (!found) {
|
||||
push_parent(fid);
|
||||
}
|
||||
} else {
|
||||
// here we start a new (compound) word
|
||||
flush();
|
||||
parent_current.clear();
|
||||
push_parent(fid);
|
||||
}
|
||||
|
||||
printer.puts(tok);
|
||||
}
|
||||
|
||||
void PgfAlignerOutput::begin_phrase(PgfText *cat, int fid, PgfText *ann, PgfText *fun)
|
||||
{
|
||||
parent_stack.push_back(fid);
|
||||
}
|
||||
|
||||
void PgfAlignerOutput::end_phrase(PgfText *cat, int fid, PgfText *ann, PgfText *fun)
|
||||
{
|
||||
parent_stack.pop_back();
|
||||
}
|
||||
|
||||
void PgfAlignerOutput::symbol_ne()
|
||||
{
|
||||
nonexist = true;
|
||||
}
|
||||
|
||||
void PgfAlignerOutput::symbol_bind()
|
||||
{
|
||||
bind = true;
|
||||
}
|
||||
|
||||
void PgfAlignerOutput::flush()
|
||||
{
|
||||
size_t n_fids = parent_current.size();
|
||||
|
||||
if (n_matches == n_fids &&
|
||||
n_matches == last_phrase->n_fids) {
|
||||
// if the current compound word has the same parents
|
||||
// as the last one then we just combine them with a space
|
||||
|
||||
PgfText *phrase = printer.get_text();
|
||||
printer.puts(last_phrase->phrase); free(last_phrase->phrase);
|
||||
printer.puts(" ");
|
||||
printer.puts(phrase); free(phrase);
|
||||
|
||||
last_phrase->phrase = printer.get_text();
|
||||
} else {
|
||||
// push the current word to the buffer of words
|
||||
|
||||
PgfAlignmentPhrase* phrase = (PgfAlignmentPhrase*)
|
||||
malloc(sizeof(PgfAlignmentPhrase)+n_fids*sizeof(int));
|
||||
phrase->phrase = printer.get_text();
|
||||
phrase->n_fids = n_fids;
|
||||
for (size_t i = 0; i < n_fids; i++) {
|
||||
phrase->fids[i] = parent_current[i];
|
||||
}
|
||||
|
||||
phrases = (PgfAlignmentPhrase**)
|
||||
realloc(phrases, (n_phrases+1)*sizeof(PgfAlignmentPhrase*));
|
||||
phrases[n_phrases++] = phrase;
|
||||
|
||||
last_phrase = phrase;
|
||||
}
|
||||
|
||||
n_matches = 0;
|
||||
}
|
||||
36
src/runtime/c/pgf/aligner.h
Normal file
36
src/runtime/c/pgf/aligner.h
Normal file
@@ -0,0 +1,36 @@
|
||||
#ifndef ALIGNER_H
|
||||
#define ALIGNER_H
|
||||
|
||||
#include <vector>
|
||||
|
||||
class PGF_INTERNAL_DECL PgfAlignerOutput : public PgfLinearizationOutputIface {
|
||||
public:
|
||||
PgfAlignerOutput();
|
||||
~PgfAlignerOutput();
|
||||
|
||||
virtual void symbol_token(PgfText *tok);
|
||||
virtual void begin_phrase(PgfText *cat, int fid, PgfText *ann, PgfText *fun);
|
||||
virtual void end_phrase(PgfText *cat, int fid, PgfText *ann, PgfText *fun);
|
||||
virtual void symbol_ne();
|
||||
virtual void symbol_bind();
|
||||
virtual void flush();
|
||||
|
||||
PgfAlignmentPhrase **get_phrases(size_t *n_phrases);
|
||||
|
||||
static void free_phrases(PgfAlignmentPhrase **phrases, size_t n_phrases);
|
||||
|
||||
private:
|
||||
bool bind;
|
||||
bool nonexist;
|
||||
std::vector<int> parent_current;
|
||||
std::vector<int> parent_stack;
|
||||
size_t n_phrases;
|
||||
PgfAlignmentPhrase *last_phrase;
|
||||
PgfAlignmentPhrase **phrases;
|
||||
size_t n_matches;
|
||||
PgfPrinter printer;
|
||||
|
||||
void push_parent(int fid);
|
||||
};
|
||||
|
||||
#endif
|
||||
@@ -13,6 +13,7 @@
|
||||
#include "linearizer.h"
|
||||
#include "parser.h"
|
||||
#include "graphviz.h"
|
||||
#include "aligner.h"
|
||||
|
||||
static void
|
||||
pgf_exn_clear(PgfExn* err)
|
||||
@@ -2641,3 +2642,121 @@ pgf_graphviz_parse_tree(PgfDB *db, PgfConcrRevision revision,
|
||||
|
||||
return NULL;
|
||||
}
|
||||
|
||||
PGF_API PgfText *
|
||||
pgf_graphviz_word_alignment(PgfDB *db, PgfConcrRevision* revisions, size_t n_revisions,
|
||||
PgfExpr expr, PgfPrintContext *ctxt,
|
||||
PgfMarshaller *m,
|
||||
PgfGraphvizOptions* opts,
|
||||
PgfExn* err)
|
||||
{
|
||||
PGF_API_BEGIN {
|
||||
DB_scope scope(db, READER_SCOPE);
|
||||
|
||||
PgfPrinter printer(NULL, 0, NULL);
|
||||
|
||||
printer.puts("digraph {\n");
|
||||
printer.puts("rankdir=LR ;\n");
|
||||
printer.puts("node [shape = record");
|
||||
if (opts->leafFont != NULL && *opts->leafFont)
|
||||
printer.nprintf(40, ", fontname = \"%s\"", opts->leafFont);
|
||||
if (opts->leafColor != NULL && *opts->leafColor)
|
||||
printer.nprintf(40, ", fontcolor = \"%s\"", opts->leafColor);
|
||||
printer.puts("] ;\n\n");
|
||||
if (opts->leafEdgeStyle != NULL && *opts->leafEdgeStyle)
|
||||
printer.nprintf(40, "edge [style = %s];\n", opts->leafEdgeStyle);
|
||||
printer.puts("\n");
|
||||
|
||||
size_t last_n_phrases = 0;
|
||||
PgfAlignmentPhrase **last_phrases = NULL;
|
||||
for (size_t i = 0; i < n_revisions; i++) {
|
||||
ref<PgfConcr> concr = db->revision2concr(revisions[i]);
|
||||
|
||||
PgfAlignerOutput out;
|
||||
PgfLinearizer linearizer(ctxt, concr, m);
|
||||
m->match_expr(&linearizer, expr);
|
||||
linearizer.reverse_and_label(true);
|
||||
if (linearizer.resolve()) {
|
||||
linearizer.linearize(&out, 0);
|
||||
out.flush();
|
||||
|
||||
printer.nprintf(40, " struct%zu[label=\"", i);
|
||||
|
||||
size_t n_phrases;
|
||||
PgfAlignmentPhrase **phrases =
|
||||
out.get_phrases(&n_phrases);
|
||||
|
||||
for (size_t j = 0; j < n_phrases; j++) {
|
||||
PgfAlignmentPhrase* phrase = phrases[j];
|
||||
if (j > 0)
|
||||
printer.puts(" | ");
|
||||
printer.nprintf(16, "<n%zu> ", j);
|
||||
printer.puts(phrase->phrase);
|
||||
}
|
||||
|
||||
printer.puts("\"] ;\n");
|
||||
|
||||
if (last_phrases != NULL) {
|
||||
for (size_t j = 0; j < n_phrases; j++) {
|
||||
PgfAlignmentPhrase* phrase = phrases[j];
|
||||
|
||||
for (size_t k = 0; k < phrase->n_fids; k++) {
|
||||
int fid = phrase->fids[k];
|
||||
|
||||
for (size_t l = 0; l < last_n_phrases; l++) {
|
||||
PgfAlignmentPhrase* last_phrase = last_phrases[l];
|
||||
|
||||
for (size_t r = 0; r < last_phrase->n_fids; r++) {
|
||||
int last_fid = last_phrase->fids[r];
|
||||
if (fid == last_fid) {
|
||||
printer.nprintf(50, "struct%zu:n%zu:e -> struct%zu:n%zu:w ;\n",i-1,l,i,j);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
PgfAlignerOutput::free_phrases(last_phrases, last_n_phrases);
|
||||
|
||||
last_n_phrases = n_phrases;
|
||||
last_phrases = phrases;
|
||||
}
|
||||
}
|
||||
|
||||
PgfAlignerOutput::free_phrases(last_phrases, last_n_phrases);
|
||||
|
||||
printer.puts("}");
|
||||
|
||||
return printer.get_text();
|
||||
} PGF_API_END
|
||||
|
||||
return NULL;
|
||||
}
|
||||
|
||||
PGF_API
|
||||
PgfAlignmentPhrase **
|
||||
pgf_align_words(PgfDB *db, PgfConcrRevision revision,
|
||||
PgfExpr expr, PgfPrintContext *ctxt,
|
||||
PgfMarshaller *m,
|
||||
size_t *n_phrases /* out */,
|
||||
PgfExn* err)
|
||||
{
|
||||
PGF_API_BEGIN {
|
||||
DB_scope scope(db, READER_SCOPE);
|
||||
|
||||
ref<PgfConcr> concr = db->revision2concr(revision);
|
||||
|
||||
PgfAlignerOutput out;
|
||||
PgfLinearizer linearizer(ctxt, concr, m);
|
||||
m->match_expr(&linearizer, expr);
|
||||
linearizer.reverse_and_label(true);
|
||||
if (linearizer.resolve()) {
|
||||
linearizer.linearize(&out, 0);
|
||||
out.flush();
|
||||
return out.get_phrases(n_phrases);
|
||||
}
|
||||
} PGF_API_END
|
||||
|
||||
return NULL;
|
||||
}
|
||||
|
||||
@@ -825,4 +825,25 @@ pgf_graphviz_parse_tree(PgfDB *db, PgfConcrRevision revision,
|
||||
PgfGraphvizOptions* opts,
|
||||
PgfExn *err);
|
||||
|
||||
PGF_API_DECL PgfText *
|
||||
pgf_graphviz_word_alignment(PgfDB *db, PgfConcrRevision* revisions, size_t n_revisions,
|
||||
PgfExpr expr, PgfPrintContext *ctxt,
|
||||
PgfMarshaller *m,
|
||||
PgfGraphvizOptions* opts,
|
||||
PgfExn* err);
|
||||
|
||||
typedef struct {
|
||||
PgfText *phrase;
|
||||
int n_fids;
|
||||
int fids[];
|
||||
} PgfAlignmentPhrase;
|
||||
|
||||
PGF_API_DECL
|
||||
PgfAlignmentPhrase **
|
||||
pgf_align_words(PgfDB *db, PgfConcrRevision revision,
|
||||
PgfExpr expr, PgfPrintContext *ctxt,
|
||||
PgfMarshaller *m,
|
||||
size_t *n_phrases /* out */,
|
||||
PgfExn* err);
|
||||
|
||||
#endif // PGF_H_
|
||||
|
||||
@@ -489,7 +489,26 @@ printName c fun =
|
||||
else return Nothing
|
||||
|
||||
alignWords :: Concr -> Expr -> [(String, [Int])]
|
||||
alignWords = error "TODO: alignWords"
|
||||
alignWords c e = unsafePerformIO $
|
||||
withForeignPtr (c_revision c) $ \c_revision ->
|
||||
bracket (newStablePtr e) freeStablePtr $ \c_e ->
|
||||
withForeignPtr marshaller $ \m ->
|
||||
alloca $ \p_n_phrases -> do
|
||||
c_phrases <- withPgfExn "alignWords" (pgf_align_words (c_db c) c_revision c_e nullPtr m p_n_phrases)
|
||||
n_phrases <- peek p_n_phrases
|
||||
arr <- peekArray (fromIntegral n_phrases) c_phrases
|
||||
free c_phrases
|
||||
mapM peekAlignmentPhrase arr
|
||||
where
|
||||
peekAlignmentPhrase :: Ptr PgfAlignmentPhrase -> IO (String, [Int])
|
||||
peekAlignmentPhrase ptr = do
|
||||
c_phrase <- (#peek PgfAlignmentPhrase, phrase) ptr
|
||||
phrase <- peekText c_phrase
|
||||
n_fids <- (#peek PgfAlignmentPhrase, n_fids) ptr
|
||||
(fids :: [CInt]) <- peekArray (fromIntegral (n_fids :: CInt)) (ptr `plusPtr` (#offset PgfAlignmentPhrase, fids))
|
||||
free c_phrase
|
||||
free ptr
|
||||
return (phrase, map fromIntegral fids)
|
||||
|
||||
gizaAlignment = error "TODO: gizaAlignment"
|
||||
|
||||
@@ -989,7 +1008,6 @@ bracketedLinearizeAll c e = unsafePerformIO $ do
|
||||
then writeIORef ref (False,[],[],all)
|
||||
else writeIORef ref (False,[],[],reverse bs:all)
|
||||
|
||||
|
||||
generateAll :: PGF -> Type -> [(Expr,Float)]
|
||||
generateAll p ty = error "TODO: generateAll"
|
||||
|
||||
@@ -1184,7 +1202,32 @@ graphvizParseTree c opts e =
|
||||
else peekText c_text
|
||||
|
||||
graphvizWordAlignment :: [Concr] -> GraphvizOptions -> Expr -> String
|
||||
graphvizWordAlignment cs opts e = error "TODO: graphvizWordAlignment"
|
||||
graphvizWordAlignment [] opts e = ""
|
||||
graphvizWordAlignment cs opts e =
|
||||
unsafePerformIO $
|
||||
withPgfConcrs cs $ \c_db c_revisions n_revisions ->
|
||||
bracket (newStablePtr e) freeStablePtr $ \c_e ->
|
||||
withForeignPtr marshaller $ \m ->
|
||||
withGraphvizOptions opts $ \c_opts ->
|
||||
bracket (withPgfExn "graphvizWordAlignment" (pgf_graphviz_word_alignment c_db c_revisions n_revisions c_e nullPtr m c_opts)) free $ \c_text ->
|
||||
if c_text == nullPtr
|
||||
then return ""
|
||||
else peekText c_text
|
||||
where
|
||||
withPgfConcrs cs f =
|
||||
allocaArray len $ \array ->
|
||||
pokeAll array nullPtr array cs
|
||||
where
|
||||
len = length cs
|
||||
|
||||
pokeAll ptr c_db0 array [] = f c_db0 array (fromIntegral len)
|
||||
pokeAll ptr c_db0 array (c:cs)
|
||||
| c_db0 /= nullPtr && c_db0 /= c_db c =
|
||||
throwIO (PGFError "graphvizWordAlignment" "The concrete languages must be from the same grammar")
|
||||
| otherwise =
|
||||
withForeignPtr (c_revision c) $ \c_revision -> do
|
||||
poke ptr c_revision
|
||||
pokeAll (ptr `plusPtr` (#size PgfConcrRevision)) (c_db c) array cs
|
||||
|
||||
|
||||
type Labels = Map.Map Fun [String]
|
||||
|
||||
@@ -51,6 +51,7 @@ data PgfMorphoCallback
|
||||
data PgfCohortsCallback
|
||||
data PgfPhrasetableIds
|
||||
data PgfExprEnum
|
||||
data PgfAlignmentPhrase
|
||||
|
||||
type Wrapper a = a -> IO (FunPtr a)
|
||||
type Dynamic a = FunPtr a -> a
|
||||
@@ -254,6 +255,8 @@ foreign import ccall pgf_bracketed_linearize :: Ptr PgfDB -> Ptr Concr -> Stable
|
||||
|
||||
foreign import ccall pgf_bracketed_linearize_all :: Ptr PgfDB -> Ptr Concr -> StablePtr Expr -> Ptr PgfPrintContext -> Ptr PgfMarshaller -> Ptr PgfLinearizationOutputIface -> Ptr PgfExn -> IO ()
|
||||
|
||||
foreign import ccall pgf_align_words :: Ptr PgfDB -> Ptr Concr -> StablePtr Expr -> Ptr PgfPrintContext -> Ptr PgfMarshaller -> Ptr CSize -> Ptr PgfExn -> IO (Ptr (Ptr PgfAlignmentPhrase))
|
||||
|
||||
foreign import ccall pgf_parse :: Ptr PgfDB -> Ptr Concr -> StablePtr Type -> Ptr PgfMarshaller -> Ptr PgfText -> Ptr PgfExn -> IO (Ptr PgfExprEnum)
|
||||
|
||||
foreign import ccall "dynamic" callFetch :: Dynamic (Ptr PgfExprEnum -> Ptr PgfDB -> Ptr PgfUnmarshaller -> Ptr (#type prob_t) -> IO (StablePtr Expr))
|
||||
@@ -288,6 +291,8 @@ foreign import ccall pgf_graphviz_abstract_tree :: Ptr PgfDB -> Ptr PGF -> Stabl
|
||||
|
||||
foreign import ccall pgf_graphviz_parse_tree :: Ptr PgfDB -> Ptr Concr -> StablePtr Expr -> Ptr PgfPrintContext -> Ptr PgfMarshaller -> Ptr PgfGraphvizOptions -> Ptr PgfExn -> IO (Ptr PgfText)
|
||||
|
||||
foreign import ccall pgf_graphviz_word_alignment :: Ptr PgfDB -> Ptr (Ptr Concr) -> CSize -> StablePtr Expr -> Ptr PgfPrintContext -> Ptr PgfMarshaller -> Ptr PgfGraphvizOptions -> Ptr PgfExn -> IO (Ptr PgfText)
|
||||
|
||||
|
||||
-----------------------------------------------------------------------
|
||||
-- Texts
|
||||
|
||||
Reference in New Issue
Block a user