diff --git a/src/runtime/c/Makefile.am b/src/runtime/c/Makefile.am index 8cd82fb20..1912a89be 100644 --- a/src/runtime/c/Makefile.am +++ b/src/runtime/c/Makefile.am @@ -26,6 +26,8 @@ libpgf_la_SOURCES = \ pgf/typechecker.h \ pgf/linearizer.cxx \ pgf/linearizer.h \ + pgf/aligner.cxx \ + pgf/aligner.h \ pgf/parser.cxx \ pgf/parser.h \ pgf/graphviz.cxx \ diff --git a/src/runtime/c/pgf/aligner.cxx b/src/runtime/c/pgf/aligner.cxx new file mode 100644 index 000000000..9ebd46d2f --- /dev/null +++ b/src/runtime/c/pgf/aligner.cxx @@ -0,0 +1,153 @@ +#include "data.h" +#include "printer.h" +#include "aligner.h" + +PgfAlignerOutput::PgfAlignerOutput() : printer(NULL, 0, NULL) +{ + n_phrases = 0; + last_phrase = NULL; + phrases = NULL; + n_matches = 0; + bind = true; + nonexist = false; +} + +PgfAlignerOutput::~PgfAlignerOutput() +{ + free_phrases(phrases, n_phrases); +} + +void PgfAlignerOutput::free_phrases(PgfAlignmentPhrase **phrases, size_t n_phrases) +{ + if (phrases) { + for (size_t i = 0; i < n_phrases; i++) { + free(phrases[i]->phrase); + free(phrases[i]); + } + free(phrases); + } +} + +PgfAlignmentPhrase **PgfAlignerOutput::get_phrases(size_t *n_phrases) +{ + if (nonexist) { + *n_phrases = 0; + return NULL; + } + + *n_phrases = this->n_phrases; + PgfAlignmentPhrase **res = phrases; + this->n_phrases = 0; + this->last_phrase = NULL; + this->phrases = NULL; + return res; +} + +void PgfAlignerOutput::push_parent(int fid) +{ + parent_current.push_back(fid); + + if (last_phrase != NULL) { + for (size_t i = 0; i < last_phrase->n_fids; i++) { + if (fid == last_phrase->fids[i]) { + n_matches++; + break; + } + } + } +} + +void PgfAlignerOutput::symbol_token(PgfText *tok) +{ + if (nonexist) + return; + + size_t n_parents = parent_stack.size(); + int fid = parent_stack.back(); + + // how many nodes so far are involved in the current compound word + size_t n_fids = parent_current.size(); + + if (bind) { + // here we glue tokens + + bind = false; + + bool found = false; + for (int current_fid : parent_current) { + if (fid == current_fid) { + found = true; + break; + } + } + + // add the tree node id to the list of parents if it has not + // been added already. + if (!found) { + push_parent(fid); + } + } else { + // here we start a new (compound) word + flush(); + parent_current.clear(); + push_parent(fid); + } + + printer.puts(tok); +} + +void PgfAlignerOutput::begin_phrase(PgfText *cat, int fid, PgfText *ann, PgfText *fun) +{ + parent_stack.push_back(fid); +} + +void PgfAlignerOutput::end_phrase(PgfText *cat, int fid, PgfText *ann, PgfText *fun) +{ + parent_stack.pop_back(); +} + +void PgfAlignerOutput::symbol_ne() +{ + nonexist = true; +} + +void PgfAlignerOutput::symbol_bind() +{ + bind = true; +} + +void PgfAlignerOutput::flush() +{ + size_t n_fids = parent_current.size(); + + if (n_matches == n_fids && + n_matches == last_phrase->n_fids) { + // if the current compound word has the same parents + // as the last one then we just combine them with a space + + PgfText *phrase = printer.get_text(); + printer.puts(last_phrase->phrase); free(last_phrase->phrase); + printer.puts(" "); + printer.puts(phrase); free(phrase); + + last_phrase->phrase = printer.get_text(); + } else { + // push the current word to the buffer of words + + PgfAlignmentPhrase* phrase = (PgfAlignmentPhrase*) + malloc(sizeof(PgfAlignmentPhrase)+n_fids*sizeof(int)); + phrase->phrase = printer.get_text(); + phrase->n_fids = n_fids; + for (size_t i = 0; i < n_fids; i++) { + phrase->fids[i] = parent_current[i]; + } + + phrases = (PgfAlignmentPhrase**) + realloc(phrases, (n_phrases+1)*sizeof(PgfAlignmentPhrase*)); + phrases[n_phrases++] = phrase; + + last_phrase = phrase; + } + + n_matches = 0; +} diff --git a/src/runtime/c/pgf/aligner.h b/src/runtime/c/pgf/aligner.h new file mode 100644 index 000000000..5ed9ee559 --- /dev/null +++ b/src/runtime/c/pgf/aligner.h @@ -0,0 +1,36 @@ +#ifndef ALIGNER_H +#define ALIGNER_H + +#include + +class PGF_INTERNAL_DECL PgfAlignerOutput : public PgfLinearizationOutputIface { +public: + PgfAlignerOutput(); + ~PgfAlignerOutput(); + + virtual void symbol_token(PgfText *tok); + virtual void begin_phrase(PgfText *cat, int fid, PgfText *ann, PgfText *fun); + virtual void end_phrase(PgfText *cat, int fid, PgfText *ann, PgfText *fun); + virtual void symbol_ne(); + virtual void symbol_bind(); + virtual void flush(); + + PgfAlignmentPhrase **get_phrases(size_t *n_phrases); + + static void free_phrases(PgfAlignmentPhrase **phrases, size_t n_phrases); + +private: + bool bind; + bool nonexist; + std::vector parent_current; + std::vector parent_stack; + size_t n_phrases; + PgfAlignmentPhrase *last_phrase; + PgfAlignmentPhrase **phrases; + size_t n_matches; + PgfPrinter printer; + + void push_parent(int fid); +}; + +#endif diff --git a/src/runtime/c/pgf/pgf.cxx b/src/runtime/c/pgf/pgf.cxx index 50c5e6f5e..9a74adf95 100644 --- a/src/runtime/c/pgf/pgf.cxx +++ b/src/runtime/c/pgf/pgf.cxx @@ -13,6 +13,7 @@ #include "linearizer.h" #include "parser.h" #include "graphviz.h" +#include "aligner.h" static void pgf_exn_clear(PgfExn* err) @@ -2641,3 +2642,121 @@ pgf_graphviz_parse_tree(PgfDB *db, PgfConcrRevision revision, return NULL; } + +PGF_API PgfText * +pgf_graphviz_word_alignment(PgfDB *db, PgfConcrRevision* revisions, size_t n_revisions, + PgfExpr expr, PgfPrintContext *ctxt, + PgfMarshaller *m, + PgfGraphvizOptions* opts, + PgfExn* err) +{ + PGF_API_BEGIN { + DB_scope scope(db, READER_SCOPE); + + PgfPrinter printer(NULL, 0, NULL); + + printer.puts("digraph {\n"); + printer.puts("rankdir=LR ;\n"); + printer.puts("node [shape = record"); + if (opts->leafFont != NULL && *opts->leafFont) + printer.nprintf(40, ", fontname = \"%s\"", opts->leafFont); + if (opts->leafColor != NULL && *opts->leafColor) + printer.nprintf(40, ", fontcolor = \"%s\"", opts->leafColor); + printer.puts("] ;\n\n"); + if (opts->leafEdgeStyle != NULL && *opts->leafEdgeStyle) + printer.nprintf(40, "edge [style = %s];\n", opts->leafEdgeStyle); + printer.puts("\n"); + + size_t last_n_phrases = 0; + PgfAlignmentPhrase **last_phrases = NULL; + for (size_t i = 0; i < n_revisions; i++) { + ref concr = db->revision2concr(revisions[i]); + + PgfAlignerOutput out; + PgfLinearizer linearizer(ctxt, concr, m); + m->match_expr(&linearizer, expr); + linearizer.reverse_and_label(true); + if (linearizer.resolve()) { + linearizer.linearize(&out, 0); + out.flush(); + + printer.nprintf(40, " struct%zu[label=\"", i); + + size_t n_phrases; + PgfAlignmentPhrase **phrases = + out.get_phrases(&n_phrases); + + for (size_t j = 0; j < n_phrases; j++) { + PgfAlignmentPhrase* phrase = phrases[j]; + if (j > 0) + printer.puts(" | "); + printer.nprintf(16, " ", j); + printer.puts(phrase->phrase); + } + + printer.puts("\"] ;\n"); + + if (last_phrases != NULL) { + for (size_t j = 0; j < n_phrases; j++) { + PgfAlignmentPhrase* phrase = phrases[j]; + + for (size_t k = 0; k < phrase->n_fids; k++) { + int fid = phrase->fids[k]; + + for (size_t l = 0; l < last_n_phrases; l++) { + PgfAlignmentPhrase* last_phrase = last_phrases[l]; + + for (size_t r = 0; r < last_phrase->n_fids; r++) { + int last_fid = last_phrase->fids[r]; + if (fid == last_fid) { + printer.nprintf(50, "struct%zu:n%zu:e -> struct%zu:n%zu:w ;\n",i-1,l,i,j); + } + } + } + } + } + } + + PgfAlignerOutput::free_phrases(last_phrases, last_n_phrases); + + last_n_phrases = n_phrases; + last_phrases = phrases; + } + } + + PgfAlignerOutput::free_phrases(last_phrases, last_n_phrases); + + printer.puts("}"); + + return printer.get_text(); + } PGF_API_END + + return NULL; +} + +PGF_API +PgfAlignmentPhrase ** +pgf_align_words(PgfDB *db, PgfConcrRevision revision, + PgfExpr expr, PgfPrintContext *ctxt, + PgfMarshaller *m, + size_t *n_phrases /* out */, + PgfExn* err) +{ + PGF_API_BEGIN { + DB_scope scope(db, READER_SCOPE); + + ref concr = db->revision2concr(revision); + + PgfAlignerOutput out; + PgfLinearizer linearizer(ctxt, concr, m); + m->match_expr(&linearizer, expr); + linearizer.reverse_and_label(true); + if (linearizer.resolve()) { + linearizer.linearize(&out, 0); + out.flush(); + return out.get_phrases(n_phrases); + } + } PGF_API_END + + return NULL; +} diff --git a/src/runtime/c/pgf/pgf.h b/src/runtime/c/pgf/pgf.h index 8ddcf993a..fc82a2f16 100644 --- a/src/runtime/c/pgf/pgf.h +++ b/src/runtime/c/pgf/pgf.h @@ -825,4 +825,25 @@ pgf_graphviz_parse_tree(PgfDB *db, PgfConcrRevision revision, PgfGraphvizOptions* opts, PgfExn *err); +PGF_API_DECL PgfText * +pgf_graphviz_word_alignment(PgfDB *db, PgfConcrRevision* revisions, size_t n_revisions, + PgfExpr expr, PgfPrintContext *ctxt, + PgfMarshaller *m, + PgfGraphvizOptions* opts, + PgfExn* err); + +typedef struct { + PgfText *phrase; + int n_fids; + int fids[]; +} PgfAlignmentPhrase; + +PGF_API_DECL +PgfAlignmentPhrase ** +pgf_align_words(PgfDB *db, PgfConcrRevision revision, + PgfExpr expr, PgfPrintContext *ctxt, + PgfMarshaller *m, + size_t *n_phrases /* out */, + PgfExn* err); + #endif // PGF_H_ diff --git a/src/runtime/haskell/PGF2.hsc b/src/runtime/haskell/PGF2.hsc index 3b59e85d2..fe0d1f181 100644 --- a/src/runtime/haskell/PGF2.hsc +++ b/src/runtime/haskell/PGF2.hsc @@ -489,7 +489,26 @@ printName c fun = else return Nothing alignWords :: Concr -> Expr -> [(String, [Int])] -alignWords = error "TODO: alignWords" +alignWords c e = unsafePerformIO $ + withForeignPtr (c_revision c) $ \c_revision -> + bracket (newStablePtr e) freeStablePtr $ \c_e -> + withForeignPtr marshaller $ \m -> + alloca $ \p_n_phrases -> do + c_phrases <- withPgfExn "alignWords" (pgf_align_words (c_db c) c_revision c_e nullPtr m p_n_phrases) + n_phrases <- peek p_n_phrases + arr <- peekArray (fromIntegral n_phrases) c_phrases + free c_phrases + mapM peekAlignmentPhrase arr + where + peekAlignmentPhrase :: Ptr PgfAlignmentPhrase -> IO (String, [Int]) + peekAlignmentPhrase ptr = do + c_phrase <- (#peek PgfAlignmentPhrase, phrase) ptr + phrase <- peekText c_phrase + n_fids <- (#peek PgfAlignmentPhrase, n_fids) ptr + (fids :: [CInt]) <- peekArray (fromIntegral (n_fids :: CInt)) (ptr `plusPtr` (#offset PgfAlignmentPhrase, fids)) + free c_phrase + free ptr + return (phrase, map fromIntegral fids) gizaAlignment = error "TODO: gizaAlignment" @@ -989,7 +1008,6 @@ bracketedLinearizeAll c e = unsafePerformIO $ do then writeIORef ref (False,[],[],all) else writeIORef ref (False,[],[],reverse bs:all) - generateAll :: PGF -> Type -> [(Expr,Float)] generateAll p ty = error "TODO: generateAll" @@ -1184,7 +1202,32 @@ graphvizParseTree c opts e = else peekText c_text graphvizWordAlignment :: [Concr] -> GraphvizOptions -> Expr -> String -graphvizWordAlignment cs opts e = error "TODO: graphvizWordAlignment" +graphvizWordAlignment [] opts e = "" +graphvizWordAlignment cs opts e = + unsafePerformIO $ + withPgfConcrs cs $ \c_db c_revisions n_revisions -> + bracket (newStablePtr e) freeStablePtr $ \c_e -> + withForeignPtr marshaller $ \m -> + withGraphvizOptions opts $ \c_opts -> + bracket (withPgfExn "graphvizWordAlignment" (pgf_graphviz_word_alignment c_db c_revisions n_revisions c_e nullPtr m c_opts)) free $ \c_text -> + if c_text == nullPtr + then return "" + else peekText c_text + where + withPgfConcrs cs f = + allocaArray len $ \array -> + pokeAll array nullPtr array cs + where + len = length cs + + pokeAll ptr c_db0 array [] = f c_db0 array (fromIntegral len) + pokeAll ptr c_db0 array (c:cs) + | c_db0 /= nullPtr && c_db0 /= c_db c = + throwIO (PGFError "graphvizWordAlignment" "The concrete languages must be from the same grammar") + | otherwise = + withForeignPtr (c_revision c) $ \c_revision -> do + poke ptr c_revision + pokeAll (ptr `plusPtr` (#size PgfConcrRevision)) (c_db c) array cs type Labels = Map.Map Fun [String] diff --git a/src/runtime/haskell/PGF2/FFI.hsc b/src/runtime/haskell/PGF2/FFI.hsc index 726df010a..d05f82708 100644 --- a/src/runtime/haskell/PGF2/FFI.hsc +++ b/src/runtime/haskell/PGF2/FFI.hsc @@ -51,6 +51,7 @@ data PgfMorphoCallback data PgfCohortsCallback data PgfPhrasetableIds data PgfExprEnum +data PgfAlignmentPhrase type Wrapper a = a -> IO (FunPtr a) type Dynamic a = FunPtr a -> a @@ -254,6 +255,8 @@ foreign import ccall pgf_bracketed_linearize :: Ptr PgfDB -> Ptr Concr -> Stable foreign import ccall pgf_bracketed_linearize_all :: Ptr PgfDB -> Ptr Concr -> StablePtr Expr -> Ptr PgfPrintContext -> Ptr PgfMarshaller -> Ptr PgfLinearizationOutputIface -> Ptr PgfExn -> IO () +foreign import ccall pgf_align_words :: Ptr PgfDB -> Ptr Concr -> StablePtr Expr -> Ptr PgfPrintContext -> Ptr PgfMarshaller -> Ptr CSize -> Ptr PgfExn -> IO (Ptr (Ptr PgfAlignmentPhrase)) + foreign import ccall pgf_parse :: Ptr PgfDB -> Ptr Concr -> StablePtr Type -> Ptr PgfMarshaller -> Ptr PgfText -> Ptr PgfExn -> IO (Ptr PgfExprEnum) foreign import ccall "dynamic" callFetch :: Dynamic (Ptr PgfExprEnum -> Ptr PgfDB -> Ptr PgfUnmarshaller -> Ptr (#type prob_t) -> IO (StablePtr Expr)) @@ -288,6 +291,8 @@ foreign import ccall pgf_graphviz_abstract_tree :: Ptr PgfDB -> Ptr PGF -> Stabl foreign import ccall pgf_graphviz_parse_tree :: Ptr PgfDB -> Ptr Concr -> StablePtr Expr -> Ptr PgfPrintContext -> Ptr PgfMarshaller -> Ptr PgfGraphvizOptions -> Ptr PgfExn -> IO (Ptr PgfText) +foreign import ccall pgf_graphviz_word_alignment :: Ptr PgfDB -> Ptr (Ptr Concr) -> CSize -> StablePtr Expr -> Ptr PgfPrintContext -> Ptr PgfMarshaller -> Ptr PgfGraphvizOptions -> Ptr PgfExn -> IO (Ptr PgfText) + ----------------------------------------------------------------------- -- Texts