gf-core/src/runtime/haskell/PGF2.hsc

{-# LANGUAGE ExistentialQuantification, DeriveDataTypeable, ScopedTypeVariables #-}
-------------------------------------------------
-- |
-- Module      : PGF2
-- Maintainer  : Krasimir Angelov
-- Stability   : stable
-- Portability : portable
--
-- This module is an Application Programming Interface to
-- load and interpret grammars compiled in the Portable Grammar Format (PGF).
-- The PGF format is produced as the final output from the GF compiler.
-- The API is meant to be used for embedding GF grammars in Haskell
-- programs
-------------------------------------------------

module PGF2 (-- * PGF
             PGF,readPGF,bootNGF,readNGF,newNGF,writePGF,showPGF,
             readPGFWithProbs, bootNGFWithProbs,

             -- * Abstract syntax
             AbsName,abstractName,globalFlag,abstractFlag,

             -- ** Categories
             Cat,categories,categoryContext,categoryProbability,

             -- ** Functions
             Fun, functions, functionsByPrefix, functionsByCat,
             functionType, functionIsConstructor, functionProbability,

             -- ** Expressions
             Expr(..), Literal(..), showExpr, readExpr,
             mkAbs,    unAbs, Var,
             mkApp,    unApp, unapply,
             mkVar,    unVar,
             mkStr,    unStr,
             mkInt,    unInt,
             mkDouble, unDouble,
             mkFloat,  unFloat,
             mkMeta,   unMeta,
             -- extra
             exprSize, exprFunctions, exprSubstitute, exprProbability,

             -- ** Types
             Type(..), Hypo, BindType(..), startCat,
             readType, showType, readContext, showContext,
             mkType, unType,
             mkHypo, mkDepHypo, mkImplHypo,

             -- ** Type checking
             -- | Dynamically-built expressions should always be type-checked before using in other functions,
             -- as the exceptions thrown by using invalid expressions may not catchable.
             checkExpr, inferExpr, checkType, checkContext,

             -- ** Computing
             compute,

             -- ** Generation
             generateAll, generateAllDepth,
             generateAllFrom, generateAllFromDepth,
             generateRandom, generateRandomDepth,
             generateRandomFrom, generateRandomFromDepth,

             -- ** Morphological Analysis
             MorphoAnalysis, lookupMorpho, lookupCohorts, fullFormLexicon,
             filterBest, filterLongest,

             -- ** Visualizations
             GraphvizOptions(..), graphvizDefaults,
             graphvizAbstractTree, graphvizParseTree,
             Labels, getDepLabels,
             graphvizDependencyTree, conlls2latexDoc, getCncDepLabels,
             graphvizWordAlignment,

             -- * Concrete syntax
             ConcName,Concr,languages,concreteName,languageCode,concreteFlag,

             -- ** Linearization
             linearize, linearizeAll, tabularLinearize, tabularLinearizeAll,
             FId, BracketedString(..), showBracketedString, flattenBracketedString,
             bracketedLinearize, bracketedLinearizeAll,
             hasLinearization, categoryFields,
             printName, alignWords, gizaAlignment,

             -- ** Parsing
             ParseOutput(..), parse, parseWithHeuristics, complete,

             -- * Exceptions
             PGFError(..),

             -- * Auxiliaries
             readProbabilitiesFromFile
             ) where

import Prelude hiding ((<>))

import PGF2.Expr
import PGF2.FFI

import Foreign
import Foreign.C
import Control.Monad(forM,forM_)
import Control.Exception(bracket,mask_,throwIO)
import System.IO.Unsafe(unsafePerformIO, unsafeInterleaveIO)
import System.Random
import qualified Data.Map as Map
import Data.IORef
import Data.List(intersperse,groupBy)
import Data.Char(isUpper,isSpace,isPunctuation)
import Data.Maybe(maybe)
import Text.PrettyPrint

#include <pgf/pgf.h>

-- | Reads a PGF file and keeps it in memory.
readPGF :: FilePath -> IO PGF
readPGF fpath = readPGFWithProbs fpath Nothing

readPGFWithProbs :: FilePath -> Maybe (Map.Map String Double) -> IO PGF
readPGFWithProbs fpath mb_probs =
  withCString fpath $ \c_fpath ->
  alloca $ \p_revision ->
  withProbsCallback mb_probs $ \c_pcallback ->
  mask_ $ do
    c_db <- withPgfExn "readPGF" (pgf_read_pgf c_fpath p_revision c_pcallback)
    c_revision <- peek p_revision
    fptr <- newForeignPtrEnv pgf_free_revision c_db c_revision
    langs <- getConcretes c_db fptr
    return (PGF c_db fptr langs)

-- | Reads a PGF file and stores the unpacked data in an NGF file
-- ready to be shared with other process, or used for quick startup.
-- The NGF file is platform dependent and should not be copied
-- between machines.
bootNGF :: FilePath -> FilePath -> IO PGF
bootNGF pgf_path ngf_path = bootNGFWithProbs pgf_path Nothing ngf_path

bootNGFWithProbs :: FilePath -> Maybe (Map.Map String Double) -> FilePath -> IO PGF
bootNGFWithProbs pgf_path mb_probs ngf_path =
  withCString pgf_path $ \c_pgf_path ->
  withCString ngf_path $ \c_ngf_path ->
  alloca $ \p_revision ->
  withProbsCallback mb_probs $ \c_pcallback ->
  mask_ $ do
    c_db <- withPgfExn "bootNGF" (pgf_boot_ngf c_pgf_path c_ngf_path p_revision c_pcallback)
    c_revision <- peek p_revision
    fptr <- newForeignPtrEnv pgf_free_revision c_db c_revision
    langs <- getConcretes c_db fptr
    return (PGF c_db fptr langs)

withProbsCallback :: Maybe (Map.Map String Double) -> (Ptr PgfProbsCallback -> IO a) -> IO a
withProbsCallback Nothing      f = f nullPtr
withProbsCallback (Just probs) f =
  allocaBytes (#size PgfProbsCallback) $ \callback ->
  bracket (wrapProbsCallback getProb) freeHaskellFunPtr $ \fptr -> do
    (#poke PgfProbsCallback, fn) callback fptr
    f callback
  where
    getProb _ c_name = do
      name <- peekText c_name
      case Map.lookup name probs of
        Nothing -> return nan
        Just p  -> return p

    nan = log (-1)

-- | Reads the grammar from an already booted NGF file.
-- The function fails if the file does not exist.
readNGF :: FilePath -> IO PGF
readNGF fpath =
  withCString fpath $ \c_fpath ->
  alloca $ \p_revision ->
  mask_ $ do
    c_db <- withPgfExn "readNGF" (pgf_read_ngf c_fpath p_revision)
    c_revision <- peek p_revision
    fptr <- newForeignPtrEnv pgf_free_revision c_db c_revision
    langs <- getConcretes c_db fptr
    return (PGF c_db fptr langs)

-- | Creates a new NGF file with a grammar with the given abstract_name.
-- Aside from the name, the grammar is otherwise empty but can be later
-- populated with new functions and categories. If fpath is Nothing then
-- the file is not stored on the disk but only in memory.
newNGF :: AbsName -> Maybe FilePath -> Int -> IO PGF
newNGF abs_name mb_fpath init_size =
  withText abs_name $ \c_abs_name ->
  maybe (\f -> f nullPtr) withCString mb_fpath $ \c_fpath ->
  alloca $ \p_revision ->
  mask_ $ do
    c_db <- withPgfExn "newNGF" (pgf_new_ngf c_abs_name c_fpath (fromIntegral init_size) p_revision)
    c_revision <- peek p_revision
    fptr <- newForeignPtrEnv pgf_free_revision c_db c_revision
    return (PGF c_db fptr Map.empty)

writePGF :: FilePath -> PGF -> Maybe [ConcName] -> IO ()
writePGF fpath p mb_langs =
  withCString fpath $ \c_fpath ->
  withForeignPtr (a_revision p) $ \c_revision ->
  maybe (\f -> f nullPtr) (withLangs []) mb_langs $ \c_langs ->
    withPgfExn "writePGF" (pgf_write_pgf c_fpath (a_db p) c_revision c_langs)
  where
    withLangs clangs []           f = withArray0 nullPtr (reverse clangs) f
    withLangs clangs (lang:langs) f = withText lang $ \clang -> withLangs (clang:clangs) langs f

showPGF :: PGF -> String
showPGF p =
  render (text "abstract" <+> ppAbstractName p <+> char '{' $$
          nest 2 (ppStartCat p $$
                  ppAbsCats p $$
                  ppAbsFuns p) $$
          char '}' $$
          Map.foldrWithKey (\name concr doc -> ppConcr name concr $$ doc) empty (languages p))
  where
    ppStartCat p =
      unsafePerformIO $
      withForeignPtr (a_revision p) $ \c_revision -> do
        c_text <- withPgfExn "showPGF" (pgf_print_start_cat_internal (a_db p) c_revision)
        if c_text == nullPtr
          then return empty
          else do s <- peekText c_text
                  return (text "flags" <+> text s)

    ppAbstractName p =
      unsafePerformIO $
      withForeignPtr (a_revision p) $ \c_revision ->
      bracket (withPgfExn "showPGF" (pgf_abstract_name (a_db p) c_revision)) free $ \c_text ->
      bracket (pgf_print_ident c_text) free $ \c_text ->
        fmap text (peekText c_text)

    ppAbsCats p = unsafePerformIO $ do
      ref <- newIORef empty
      (allocaBytes (#size PgfItor) $ \itor ->
       bracket (wrapItorCallback (getCategories ref)) freeHaskellFunPtr $ \fptr ->
       withForeignPtr (a_revision p) $ \c_revision -> do
         (#poke PgfItor, fn) itor fptr
         withPgfExn "showPGF" (pgf_iter_categories (a_db p) c_revision itor))
      readIORef ref
      where
        getCategories :: IORef Doc -> ItorCallback
        getCategories ref itor key val exn = do
          def <- bracket (pgf_print_category_internal val) free peekText
          modifyIORef ref $ (\doc -> doc $$ text def)

    ppAbsFuns p = unsafePerformIO $ do
      ref <- newIORef empty
      (allocaBytes (#size PgfItor) $ \itor ->
       bracket (wrapItorCallback (getFunctions ref)) freeHaskellFunPtr $ \fptr ->
       withForeignPtr (a_revision p) $ \c_revision -> do
         (#poke PgfItor, fn) itor fptr
         withPgfExn "showPGF" (pgf_iter_functions (a_db p) c_revision itor))
      readIORef ref
      where
        getFunctions :: IORef Doc -> ItorCallback
        getFunctions ref itor key val exn = do
          def <- bracket (pgf_print_function_internal val) free peekText
          modifyIORef ref (\doc -> doc $$ text def)

    ppConcr name c = unsafePerformIO $ do
      (seq_ids,doc3) <- prepareSequences c -- run first to update all seq_id
      doc1 <- ppLincats seq_ids c
      doc2 <- ppLins seq_ids c
      pgf_release_phrasetable_ids seq_ids
      return (text "concrete" <+> text name <+> char '{' $$
              nest 2 (doc1 $$
                      doc2 $$
                      (text "sequences" <+> char '{' $$
                       nest 2 doc3 $$
                       char '}')) $$
              char '}')

    ppLincats seq_ids c = do
      ref <- newIORef empty
      (allocaBytes (#size PgfItor) $ \itor ->
       bracket (wrapItorCallback (getLincats ref)) freeHaskellFunPtr $ \fptr ->
       withForeignPtr (c_revision c) $ \c_revision -> do
         (#poke PgfItor, fn) itor fptr
         withPgfExn "showPGF" (pgf_iter_lincats (a_db p) c_revision itor))
      readIORef ref
      where
        getLincats :: IORef Doc -> ItorCallback
        getLincats ref itor key val exn = do
          name   <- bracket (pgf_print_ident key) free $ \c_text -> do
                      peekText c_text
          (n_fields,n_lindefs,n_linrefs) <-
                    allocaBytes (3*(#size size_t)) $ \pcounts -> do
                      pgf_get_lincat_counts_internal val pcounts
                      n_fields  <- peekElemOff pcounts 0
                      n_lindefs <- peekElemOff pcounts 1
                      n_linrefs <- peekElemOff pcounts 2
                      return (n_fields,n_lindefs,n_linrefs)
          fields <- forM (init [0..n_fields]) $ \i -> do
                      pgf_get_lincat_field_internal val i >>= peekText
          let def = text "lincat" <+> (text name <+> char '=' <+> char '[' $$
                                       nest 2 (vcat (map (text.show) fields)) $$
                                       char ']')
          modifyIORef ref $ (\doc -> doc $$ def)
          forM_ (init [0..n_lindefs]) $ \i -> do
            def <- bracket (pgf_print_lindef_internal seq_ids val i) free $ \c_text -> do
                     fmap text (peekText c_text)
            modifyIORef ref (\doc -> doc $$ text "lindef" <+> def)
          forM_ (init [0..n_linrefs]) $ \i -> do
            def <- bracket (pgf_print_linref_internal seq_ids val i) free $ \c_text -> do
                     fmap text (peekText c_text)
            modifyIORef ref $ (\doc -> doc $$ text "linref" <+> def)

    ppLins seq_ids c = do
      ref <- newIORef empty
      (allocaBytes (#size PgfItor) $ \itor ->
       bracket (wrapItorCallback (getLins ref)) freeHaskellFunPtr $ \fptr ->
       withForeignPtr (c_revision c) $ \c_revision -> do
         (#poke PgfItor, fn) itor fptr
         withPgfExn "showPGF" (pgf_iter_lins (a_db p) c_revision itor))
      readIORef ref
      where
        getLins :: IORef Doc -> ItorCallback
        getLins ref itor key val exn = do
          n_prods <- pgf_get_lin_get_prod_count val
          forM_ (init [0..n_prods]) $ \i -> do
            def <- bracket (pgf_print_lin_internal seq_ids val i) free $ \c_text -> do
                     fmap text (peekText c_text)
            modifyIORef ref (\doc -> doc $$ text "lin" <+> def)
            return ()

    prepareSequences c = do
      ref <- newIORef empty
      seq_ids <- (allocaBytes (#size PgfSequenceItor) $ \itor ->
                  bracket (wrapSequenceItorCallback (getSequences ref)) freeHaskellFunPtr $ \fptr ->
                  withForeignPtr (c_revision c) $ \c_revision -> do
                    (#poke PgfSequenceItor, fn) itor fptr
                    withPgfExn "showPGF" (pgf_iter_sequences (a_db p) c_revision itor nullPtr))
      doc <- readIORef ref
      return (seq_ids, doc)
      where
        getSequences :: IORef Doc -> SequenceItorCallback
        getSequences ref itor seq_id val exn = do
          def <- bracket (pgf_print_sequence_internal seq_id val) free $ \c_text -> do
                     fmap text (peekText c_text)
          modifyIORef ref $ (\doc -> doc $$ def)
          return 0

-- | The abstract language name is the name of the top-level
-- abstract module
abstractName :: PGF -> AbsName
abstractName p =
  unsafePerformIO $
  withForeignPtr (a_revision p) $ \c_revision ->
  bracket (withPgfExn "abstractName" (pgf_abstract_name (a_db p) c_revision)) free $ \c_text -> do
    peekText c_text

-- | The start category is defined in the grammar with
-- the \'startcat\' flag. This is usually the sentence category
-- but it is not necessary. Despite that there is a start category
-- defined you can parse with any category. The start category
-- definition is just for convenience.
startCat :: PGF -> Type
startCat p =
  unsafePerformIO $
  withForeignPtr (a_revision p) $ \c_revision -> do
    c_typ <- withPgfExn "startCat" (pgf_start_cat (a_db p) c_revision unmarshaller)
    typ <- deRefStablePtr c_typ
    freeStablePtr c_typ
    return typ

-- | The type of a function
functionType :: PGF -> Fun -> Maybe Type
functionType p fn =
  unsafePerformIO $
  withForeignPtr (a_revision p) $ \c_revision ->
  withText fn $ \c_fn -> do
    c_typ <- withPgfExn "functionType" (pgf_function_type (a_db p) c_revision c_fn unmarshaller)
    if c_typ == castPtrToStablePtr nullPtr
      then return Nothing
      else do typ <- deRefStablePtr c_typ
              freeStablePtr c_typ
              return (Just typ)

functionIsConstructor :: PGF -> Fun -> Bool
functionIsConstructor p fun =
  unsafePerformIO $
  withText fun $ \c_fun ->
  withForeignPtr (a_revision p) $ \c_revision ->
      do res <- withPgfExn "functionIsConstructor" (pgf_function_is_constructor (a_db p) c_revision c_fun)
         return (res /= 0)

functionProbability :: PGF -> Fun -> Float
functionProbability p fun =
  unsafePerformIO $
  withText fun $ \c_fun ->
  withForeignPtr (a_revision p) $ \c_revision ->
      withPgfExn "functionProbability" (pgf_function_prob (a_db p) c_revision c_fun)

exprProbability :: PGF -> Expr -> Float
exprProbability p e =
  unsafePerformIO $
  withForeignPtr (a_revision p) $ \c_revision ->
  bracket (newStablePtr e) freeStablePtr $ \c_e ->
    withPgfExn "exprProbability" (pgf_expr_prob (a_db p) c_revision c_e marshaller)

checkExpr :: PGF -> Expr -> Type -> Either String Expr
checkExpr = error "TODO: checkExpr"

-- | Tries to infer the type of an expression. Note that
-- even if the expression is type correct it is not always
-- possible to infer its type in the GF type system.
-- In this case the function returns an error.
inferExpr :: PGF -> Expr -> Either String (Expr, Type)
inferExpr p e =
  unsafePerformIO $
  withForeignPtr (a_revision p) $ \c_revision ->
  bracket (newStablePtr e) freeStablePtr $ \c_e ->
  alloca $ \p_e ->
  allocaBytes (#size PgfExn) $ \c_exn -> do
    poke p_e c_e
    c_ty <- pgf_infer_expr (a_db p) c_revision p_e marshaller unmarshaller c_exn
    ex_type <- (#peek PgfExn, type) c_exn :: IO (#type PgfExnType)
    case ex_type of
      (#const PGF_EXN_NONE) -> do
         c_e <- peek p_e
         e  <- deRefStablePtr c_e
         freeStablePtr c_e
         ty <- deRefStablePtr c_ty
         freeStablePtr c_ty
         return (Right (e,ty))
      (#const PGF_EXN_SYSTEM_ERROR) -> do
         errno <- (#peek PgfExn, code) c_exn
         c_msg <- (#peek PgfExn, msg) c_exn
         mb_fpath <- if c_msg == nullPtr
                       then return Nothing
                       else fmap Just (peekCString c_msg)
         ioError (errnoToIOError "inferExpr" (Errno errno) Nothing mb_fpath)
      (#const PGF_EXN_PGF_ERROR) -> do
         c_msg <- (#peek PgfExn, msg) c_exn
         msg <- peekCString c_msg
         free c_msg
         return (Left msg)
      _ -> throwIO (PGFError "inferExpr" "An unidentified error occurred")

-- | Check whether a type is consistent with the abstract
-- syntax of the grammar.
checkType :: PGF -> Type -> Either String Type
checkType pgf ty = Right ty

-- | Check whether a context is consistent with the abstract
-- syntax of the grammar.
checkContext :: PGF -> [Hypo] -> Either String [Hypo]
checkContext pgf ctxt = Right ctxt

compute :: PGF -> Expr -> Expr
compute = error "TODO: compute"

concreteName :: Concr -> ConcName
concreteName c =
  unsafePerformIO $
  withForeignPtr (c_revision c) $ \c_revision ->
  bracket (withPgfExn "concreteName" (pgf_concrete_name (c_db c) c_revision)) free $ \c_text ->
    peekText c_text

languageCode :: Concr -> Maybe String
languageCode c =
  unsafePerformIO $
  withForeignPtr (c_revision c) $ \c_revision ->
  bracket (withPgfExn "languageCode" (pgf_concrete_language_code (c_db c) c_revision)) free $ \c_text ->
    if c_text == nullPtr
      then return Nothing
      else fmap Just (peekText c_text)

concreteFlag :: Concr -> String -> Maybe Literal
concreteFlag c name =
  unsafePerformIO $
  withText name $ \c_name ->
  withForeignPtr (c_revision c) $ \c_revision -> do
    c_lit <- withPgfExn "concreteFlag" (pgf_get_concrete_flag (c_db c) c_revision c_name unmarshaller)
    if c_lit == castPtrToStablePtr nullPtr
      then return Nothing
      else do lit <- deRefStablePtr c_lit
              freeStablePtr c_lit
              return (Just lit)

printName :: Concr -> Fun -> Maybe String
printName c fun =
  unsafePerformIO $
  withText fun $ \c_fun ->
  withForeignPtr (c_revision c) $ \c_revision ->
  bracket (withPgfExn "printName" (pgf_get_printname (c_db c) c_revision c_fun)) free $ \c_name -> do
    if c_name /= nullPtr
      then fmap Just $ peekText c_name
      else return Nothing

alignWords :: Concr -> Expr -> [(String, [Int])]
alignWords c e = unsafePerformIO $
  withForeignPtr (c_revision c) $ \c_revision ->
  bracket (newStablePtr e) freeStablePtr $ \c_e ->
  alloca $ \p_n_phrases -> do
    c_phrases <-  withPgfExn "alignWords" (pgf_align_words (c_db c) c_revision c_e nullPtr marshaller p_n_phrases)
    n_phrases <- peek p_n_phrases
    arr <- peekArray (fromIntegral n_phrases) c_phrases
    free c_phrases
    mapM peekAlignmentPhrase arr
  where
    peekAlignmentPhrase :: Ptr PgfAlignmentPhrase -> IO (String, [Int])
    peekAlignmentPhrase ptr = do
      c_phrase <- (#peek PgfAlignmentPhrase, phrase) ptr
      phrase <- peekText c_phrase
      n_fids <- (#peek PgfAlignmentPhrase, n_fids) ptr
      (fids :: [CInt]) <- peekArray (fromIntegral (n_fids :: CInt)) (ptr `plusPtr` (#offset PgfAlignmentPhrase, fids))
      free c_phrase
      free ptr
      return (phrase, map fromIntegral fids)

gizaAlignment = error "TODO: gizaAlignment"

-----------------------------------------------------------------------------
-- Functions using Concr
-- Morpho analyses, parsing & linearization

-- | This triple is returned by all functions that deal with
-- the grammar's lexicon. Its first element is the name of an abstract
-- lexical function which can produce a given word or
-- a multiword expression (i.e. this is the lemma).
-- After that follows a string which describes
-- the particular inflection form.
--
-- The last element is a logarithm from the
-- the probability of the function. The probability is not
-- conditionalized on the category of the function. This makes it
-- possible to compare the likelihood of two functions even if they
-- have different types.
type MorphoAnalysis = (Fun,String,Float)

-- | 'lookupMorpho' takes a string which must be a single word or
-- a multiword expression. It then computes the list of all possible
-- morphological analyses.
lookupMorpho :: Concr -> String -> [MorphoAnalysis]
lookupMorpho c sent = unsafePerformIO $ do
  ref <- newIORef []
  (withText sent $ \c_sent ->
   allocaBytes (#size PgfMorphoCallback) $ \itor ->
   bracket (wrapMorphoCallback (getMorphology ref)) freeHaskellFunPtr $ \fptr ->
   withForeignPtr (c_revision c) $ \c_revision -> do
     (#poke PgfMorphoCallback, fn) itor fptr
     withPgfExn "lookupMorpho" (pgf_lookup_morpho (c_db c) c_revision c_sent itor))
  fmap reverse (readIORef ref)

-- | 'lookupCohorts' takes an arbitrary string an produces
-- a list of all places where lexical items from the grammar have been
-- identified (i.e. cohorts). The list consists of triples of the format @(start,ans,end)@,
-- where @start-end@ identifies the span in the text and @ans@ is
-- the list of possible morphological analyses similar to 'lookupMorpho'.
--
-- The list is sorted first by the @start@ position and after than
-- by the @end@ position. This can be used for instance if you want to
-- filter only the longest matches.
lookupCohorts :: Concr -> String -> [(Int,String,[MorphoAnalysis],Int)]
lookupCohorts c sent = unsafePerformIO $ do
  morpho_ref  <- newIORef []
  cohorts_ref <- newIORef []
  (withText sent $ \c_sent ->
   allocaBytes (#size PgfCohortsCallback) $ \itor ->
   bracket (wrapMorphoCallback (getMorphology morpho_ref)) freeHaskellFunPtr $ \morpho_fptr ->
   bracket (wrapCohortsCallback (getCohorts morpho_ref cohorts_ref)) freeHaskellFunPtr $ \cohorts_fptr ->
   withForeignPtr (c_revision c) $ \c_revision -> do
     (#poke PgfCohortsCallback, morpho.fn) itor morpho_fptr
     (#poke PgfCohortsCallback, fn) itor cohorts_fptr
     withPgfExn "lookupCohorts" (pgf_lookup_cohorts (c_db c) c_revision c_sent itor))
  fmap reverse (readIORef cohorts_ref)
  where
    getCohorts morpho_ref cohorts_ref _ start' end' exn = do
      ans <- readIORef morpho_ref
      let start = fromIntegral start'
          end   = fromIntegral end'
          word  = take (end-start) (drop start sent)
      modifyIORef cohorts_ref ((:) (start, word, reverse ans, end))
      writeIORef morpho_ref []

getMorphology ref _ c_name c_field c_prob exn = do
  name  <- peekText c_name
  field <- peekText c_field
  let prob = realToFrac c_prob
      ann = (name,field,prob)
  modifyIORef ref ((:) ann)

filterBest :: [(Int,String,[MorphoAnalysis],Int)] -> [(Int,String,[MorphoAnalysis],Int)]
filterBest ans =
  reverse (iterate (maxBound :: Int) [(0,0,[],ans)] [] [])
  where
    iterate v0 []                      []  res = res
    iterate v0 []                      new res = iterate v0 new []  res
    iterate v0 ((_,v,conf,    []):old) new res =
      case compare v0 v of
        LT                                    -> res
        EQ                                    -> iterate v0 old new (merge conf res)
        GT                                    -> iterate v  old new conf
    iterate v0 ((_,v,conf,an:ans):old) new res = iterate v0 old (insert (v+valueOf an) conf an ans [] new) res

    valueOf (_,_,[],_) = 2
    valueOf _          = 1

    insert v conf an@(start,_,_,end) ans l_new []                               =
      match start v conf ans ((end,v,comb conf an,filter end ans):l_new) []
    insert v conf an@(start,_,_,end) ans l_new (new@(end0,v0,conf0,ans0):r_new) =
      case compare end0 end of
        LT         -> insert v conf an ans (new:l_new) r_new
        EQ -> case compare v0 v of
                LT -> match start v conf ans ((end,v,                     conf0,ans0):    l_new) r_new
                EQ -> match start v conf ans ((end,v,merge (comb conf an) conf0,ans0):    l_new) r_new
                GT -> match start v conf ans ((end,v,comb conf an,              ans0):    l_new) r_new
        GT         -> match start v conf ans ((end,v,comb conf an,    filter end ans):new:l_new) r_new

    match start0 v conf (an@(start,_,_,end):ans) l_new r_new
      | start0 == start                 = insert v conf an ans l_new r_new
    match start0 v conf ans l_new r_new = revOn l_new r_new

    comb ((start0,w0,an0,end0):conf) (start,w,an,end)
      | end0 == start && (unk w0 an0 || unk w an) = (start0,w0++w,[],end):conf
    comb conf an = an:conf

    filter end                     [] = []
    filter end (next@(start,_,_,_):ans)
      | end <= start                  = next:ans
      | otherwise                     = filter end ans

    revOn []     ys = ys
    revOn (x:xs) ys = revOn xs (x:ys)

    merge []                           ans                          = ans
    merge ans                          []                           = ans
    merge (an1@(start1,_,_,end1):ans1) (an2@(start2,_,_,end2):ans2) =
      case compare (start1,end1) (start2,end2) of
        GT -> an1 : merge ans1 (an2:ans2)
        EQ -> an1 : merge ans1 ans2
        LT -> an2 : merge (an1:ans1) ans2

filterLongest :: [(Int,String,[MorphoAnalysis],Int)] -> [(Int,String,[MorphoAnalysis],Int)]
filterLongest []       = []
filterLongest (an:ans) = longest an ans
  where
    longest prev                   [] = [prev]
    longest prev@(start0,_,_,end0) (next@(start,_,_,end):ans)
      | start0 == start               = longest next ans
      | otherwise                     = filter prev (next:ans)

    filter prev                      [] = [prev]
    filter prev@(start0,w0,an0,end0) (next@(start,w,an,end):ans)
      | end0 == start && (unk w0 an0 || unk w an)
                                        = filter (start0,w0++w,[],end) ans
      | end0 <=  start                  = prev : longest next ans
      | otherwise                       = filter prev ans

unk w [] | any (not . isPunctuation) w = True
unk _ _                                = False

fullFormLexicon :: Concr -> [(String, [MorphoAnalysis])]
fullFormLexicon c = unsafePerformIO $ do
  ref <- newIORef []
  (allocaBytes (#size PgfSequenceItor) $ \itor1 ->
   bracket (wrapSequenceItorCallback (getSequences ref)) freeHaskellFunPtr $ \fptr1 ->
   allocaBytes (#size PgfMorphoCallback) $ \itor2 ->
   bracket (wrapMorphoCallback (getMorphology ref)) freeHaskellFunPtr $ \fptr2 ->
   withForeignPtr (c_revision c) $ \c_revision -> do
     (#poke PgfSequenceItor,   fn) itor1 fptr1
     (#poke PgfMorphoCallback, fn) itor2 fptr2
     seq_ids <- withPgfExn "fullFormLexicon" (pgf_iter_sequences (c_db c) c_revision itor1 itor2)
     pgf_release_phrasetable_ids seq_ids)
  fmap (reverse2 []) (readIORef ref)
  where
    getSequences ref _ seq_id val exn = do
      bracket (pgf_sequence_get_text_internal val) free $ \c_text ->
        if c_text == nullPtr
          then return 1
          else do form <- peekText c_text
                  case form of
                    [] -> return 1
                    _  -> do modifyIORef ref $ (\lexicon -> (form, []) : lexicon)
                             return 0

    getMorphology ref _ c_name c_field c_prob exn = do
      name  <- peekText c_name
      field <- peekText c_field
      let prob = realToFrac c_prob
          ann = (name,field,prob)
      modifyIORef ref (\((form,anns) : lexicon) -> (form,ann:anns) : lexicon)

    reverse2 ys []           = ys
    reverse2 ys ((x1,x2):xs) = reverse2 ((x1,reverse x2):ys) xs


-- | This data type encodes the different outcomes which you could get from the parser.
data ParseOutput a
  = ParseFailed Int String         -- ^ The integer is the position in number of unicode characters where the parser failed.
                                   -- The string is the token where the parser have failed.
  | ParseOk a                      -- ^ If the parsing and the type checking are successful
                                   -- we get the abstract syntax trees as either a list or a chart.
  | ParseIncomplete                -- ^ The sentence is not complete.

parse :: Concr -> Type -> String -> ParseOutput [(Expr,Float)]
parse c ty sent =
  unsafePerformIO $
  withForeignPtr (c_revision c) $ \c_revision ->
  bracket (newStablePtr ty) freeStablePtr $ \c_ty ->
  withText sent $ \c_sent -> do
    c_enum <- withPgfExn "parse" (pgf_parse (c_db c) c_revision c_ty marshaller c_sent)
    exprs <- enumerateExprs (c_db c) c_enum
    return (ParseOk exprs)

enumerateExprs c_db c_enum_ptr = do
  c_enum <- newForeignPtrEnv pgf_free_expr_enum unmarshaller c_enum_ptr
  c_fetch <- (#peek PgfExprEnumVtbl, fetch) =<< (#peek PgfExprEnum, vtbl) c_enum_ptr
  unsafeInterleaveIO (fetchLazy c_fetch c_enum)
  where
    fetchLazy c_fetch c_enum =
      withForeignPtr c_enum $ \c_enum_ptr ->
      alloca $ \p_prob -> do
        c_expr <- callFetch c_fetch c_enum_ptr c_db unmarshaller p_prob
        if c_expr == castPtrToStablePtr nullPtr
          then do return []
          else do expr <- deRefStablePtr c_expr
                  prob <- peek p_prob
                  rest <- unsafeInterleaveIO (fetchLazy c_fetch c_enum)
                  return ((expr,prob) : rest)

parseWithHeuristics :: Concr      -- ^ the language with which we parse
                    -> Type       -- ^ the start category
                    -> String     -- ^ the input sentence
                    -> Double     -- ^ the heuristic factor.
                                  -- A negative value tells the parser
                                  -- to lookup up the default from
                                  -- the grammar flags
                    -> [(Cat, String -> Int -> Maybe (Expr,Float,Int))]
                                  -- ^ a list of callbacks for literal categories.
                                  -- The arguments of the callback are:
                                  -- the index of the constituent for the literal category;
                                  -- the input sentence; the current offset in the sentence.
                                  -- If a literal has been recognized then the output should
                                  -- be Just (expr,probability,end_offset)
                    -> ParseOutput [(Expr,Float)]
parseWithHeuristics = error "TODO: parseWithHeuristics"

-- | Returns possible completions of the current partial input.
complete :: Concr      -- ^ the language with which we parse
         -> Type       -- ^ the start category
         -> String     -- ^ the input sentence (excluding token being completed)
         -> String     -- ^ prefix (partial token being completed)
         -> ParseOutput [(String, Fun, Cat, Float)]  -- ^ (token, category, function, probability)
complete = error "TODO: complete"

-- | Returns True if there is a linearization defined for that function in that language
hasLinearization :: Concr -> Fun -> Bool
hasLinearization c name =
  unsafePerformIO $
  withText name $ \c_name ->
  withForeignPtr (c_revision c) $ \c_revision -> do
    c_res <- withPgfExn "hasLinearization" (pgf_has_linearization (c_db c) c_revision c_name)
    return (c_res /= 0)

categoryFields :: Concr -> Cat -> Maybe [String]
categoryFields c cat =
  unsafePerformIO $
  withForeignPtr (c_revision c) $ \c_revision ->
  withText cat $ \c_cat ->
  alloca $ \p_n_fields ->
  bracket (withPgfExn "categoryFields" (pgf_category_fields (c_db c) c_revision c_cat p_n_fields)) free $ \c_fields ->
    if c_fields == nullPtr
      then return Nothing
      else do n_fields <- peek p_n_fields
              fs <- peekFields n_fields c_fields
              return (Just fs)
  where
    peekFields n_fields c_fields
      | n_fields == 0 = return []
      | otherwise     = do c_text <- peek c_fields
                           f  <- peekText c_text
                           free c_text
                           fs <- peekFields (n_fields-1) (c_fields `plusPtr` (#size PgfText*))
                           return (f:fs)

-- | Linearizes an expression as a string in the language
linearize :: Concr -> Expr -> String
linearize c e =
  unsafePerformIO $
  withForeignPtr (c_revision c) $ \c_revision ->
  bracket (newStablePtr e) freeStablePtr $ \c_e ->
  bracket (withPgfExn "linearize" (pgf_linearize (c_db c) c_revision c_e nullPtr marshaller)) free $ \c_text ->
    if c_text == nullPtr
      then return ""
      else peekText c_text

-- | Generates all possible linearizations of an expression
linearizeAll :: Concr -> Expr -> [String]
linearizeAll c e =
  unsafePerformIO $
  withForeignPtr (c_revision c) $ \c_revision ->
  bracket (newStablePtr e) freeStablePtr $ \c_e ->
  alloca $ \p_n_fields ->
  bracket (withPgfExn "linearizeAll" (pgf_linearize_all (c_db c) c_revision c_e nullPtr marshaller p_n_fields)) free $ \c_texts -> do
    n_fields <- peek p_n_fields
    peekTexts n_fields c_texts
  where
    peekTexts 0 c_texts = return []
    peekTexts n c_texts = do
      c_text <- peek c_texts
      text <- peekText c_text
      free c_text
      texts <- peekTexts (n-1) (c_texts `plusPtr` (#size PgfText*))
      return (text:texts)

-- | Generates a table of linearizations for an expression
tabularLinearize :: Concr -> Expr -> [(String, String)]
tabularLinearize c e =
  unsafePerformIO $
  withForeignPtr (c_revision c) $ \c_revision ->
  bracket (newStablePtr e) freeStablePtr $ \c_e ->
  bracket (withPgfExn "tabularLinearize" (pgf_tabular_linearize (c_db c) c_revision c_e nullPtr marshaller)) free $ \c_texts -> do
    if c_texts == nullPtr
      then return []
      else peekTable c_texts
  where
    peekTable c_texts = do
      c_field <- peekElemOff c_texts 0
      c_lin   <- peekElemOff c_texts 1
      if c_field == nullPtr && c_lin == nullPtr
        then return []
        else do field <- peekText c_field
                free c_field
                lin   <- peekText c_lin
                free c_lin
                table <- peekTable (c_texts `plusPtr` (2*(#size PgfText*)))
                return ((field,lin):table)

-- | Generates a table of linearizations for an expression
tabularLinearizeAll :: Concr -> Expr -> [[(String, String)]]
tabularLinearizeAll c e =
  unsafePerformIO $
  withForeignPtr (c_revision c) $ \c_revision ->
  bracket (newStablePtr e) freeStablePtr $ \c_e ->
  bracket (withPgfExn "tabularLinearizeAll" (pgf_tabular_linearize_all (c_db c) c_revision c_e nullPtr marshaller)) free peekTables
  where
    peekTables c_texts = do
      c_field <- peekElemOff c_texts 0
      c_lin   <- peekElemOff c_texts 1
      if c_field == nullPtr && c_lin == nullPtr
        then return [[]]
        else if c_field == nullPtr
               then do tables <- peekTables (c_texts `plusPtr` (#size PgfText*))
                       return ([]:tables)
               else do field <- peekText c_field
                       free c_field
                       lin   <- peekText c_lin
                       free c_lin
                       (table:tables) <- peekTables (c_texts `plusPtr` (2*(#size PgfText*)))
                       return (((field,lin):table):tables)

type FId = Int

-- | BracketedString represents a sentence that is linearized
-- as usual but we also want to retain the ''brackets'' that
-- mark the beginning and the end of each constituent.
data BracketedString
  = Leaf String                                                                -- ^ this is the leaf i.e. a single token
  | BIND                                                                       -- ^ the surrounding tokens must be bound together
  | Bracket Cat {-# UNPACK #-} !FId String Fun [BracketedString]
                                                                               -- ^ this is a bracket. The 'Cat' is the category of
                                                                               -- the phrase. The 'FId' is an unique identifier for
                                                                               -- every phrase in the sentence. For context-free grammars
                                                                               -- i.e. without discontinuous constituents this identifier
                                                                               -- is also unique for every bracket. When there are discontinuous
                                                                               -- phrases then the identifiers are unique for every phrase but
                                                                               -- not for every bracket since the bracket represents a constituent.
                                                                               -- The different constituents could still be distinguished by using
                                                                               -- the analysis string. If the grammar is reduplicating
                                                                               -- then the constituent indices will be the same for all brackets
                                                                               -- that represents the same constituent.
                                                                               -- The 'Fun' is the name of the abstract function that generated
                                                                               -- this phrase.

-- | Renders the bracketed string as a string where
-- the brackets are shown as @(S ...)@ where
-- @S@ is the category.
showBracketedString :: BracketedString -> String
showBracketedString = render . ppBracketedString

ppBracketedString (Leaf t) = text t
ppBracketedString BIND     = text "&+"
ppBracketedString (Bracket cat fid _ _ bss) = parens (text cat <> colon <> int fid <+> hsep (map ppBracketedString bss))

-- | Extracts the sequence of tokens from the bracketed string
flattenBracketedString :: BracketedString -> [String]
flattenBracketedString (Leaf w)              = [w]
flattenBracketedString (Bracket _ _ _ _ bss) = concatMap flattenBracketedString bss

bracketedLinearize :: Concr -> Expr -> [BracketedString]
bracketedLinearize c e = unsafePerformIO $ do
  ref <- newIORef (False,[],[])
  (withForeignPtr (c_revision c) $ \c_revision ->
   bracket (newStablePtr e) freeStablePtr $ \c_e ->
   allocaBytes (#size PgfLinearizationOutputIface) $ \c_out ->
   allocaBytes (#size PgfLinearizationOutputIfaceVtbl) $ \vtbl ->
   bracket (wrapSymbol1 (symbol_token ref)) freeHaskellFunPtr $ \c_symbol_token ->
   bracket (wrapSymbol2 (begin_phrase ref)) freeHaskellFunPtr $ \c_begin_phrase ->
   bracket (wrapSymbol2 (end_phrase ref)) freeHaskellFunPtr $ \c_end_phrase ->
   bracket (wrapSymbol0 (symbol_bind ref)) freeHaskellFunPtr $ \c_symbol_bind ->
   bracket (wrapSymbol0 (symbol_ne ref)) freeHaskellFunPtr $ \c_symbol_ne -> do
   bracket (wrapSymbol0 (flush ref)) freeHaskellFunPtr $ \c_flush -> do
     (#poke PgfLinearizationOutputIfaceVtbl, symbol_token) vtbl c_symbol_token
     (#poke PgfLinearizationOutputIfaceVtbl, begin_phrase) vtbl c_begin_phrase
     (#poke PgfLinearizationOutputIfaceVtbl, end_phrase) vtbl c_end_phrase
     (#poke PgfLinearizationOutputIfaceVtbl, symbol_bind) vtbl c_symbol_bind
     (#poke PgfLinearizationOutputIfaceVtbl, symbol_ne) vtbl c_symbol_ne
     (#poke PgfLinearizationOutputIfaceVtbl, flush) vtbl c_flush
     (#poke PgfLinearizationOutputIface, vtbl) c_out vtbl
     withPgfExn "bracketedLinearize" (pgf_bracketed_linearize (c_db c) c_revision c_e nullPtr marshaller c_out))
  (ne,_,bs) <- readIORef ref
  (if ne
     then return []
     else return (reverse bs))
  where
    symbol_token ref _ c_text = do
      (ne,stack,bs) <- readIORef ref
      token <- peekText c_text
      writeIORef ref (ne,stack,Leaf token : bs)

    begin_phrase ref _ c_cat c_fid c_ann c_fun = do
      (ne,stack,bs) <- readIORef ref
      writeIORef ref (ne,bs:stack,[])

    end_phrase ref _ c_cat c_fid c_ann c_fun = do
      (ne,bs':stack,bs) <- readIORef ref
      if null bs
        then writeIORef ref (ne,stack, bs')
        else do cat <- peekText c_cat
                let fid = fromIntegral c_fid
                ann <- peekText c_ann
                fun <- peekText c_fun
                writeIORef ref (ne,stack,Bracket cat fid ann fun (reverse bs) : bs')

    symbol_bind ref _ = do
      (ne,stack,bs) <- readIORef ref
      writeIORef ref (ne,stack,BIND : bs)

    symbol_ne ref _ = do
      (ne,stack,bs) <- readIORef ref
      writeIORef ref (True,stack,bs)

    flush _ _ = return ()

bracketedLinearizeAll :: Concr -> Expr -> [[BracketedString]]
bracketedLinearizeAll c e = unsafePerformIO $ do
  ref <- newIORef (False,[],[],[])
  (withForeignPtr (c_revision c) $ \c_revision ->
   bracket (newStablePtr e) freeStablePtr $ \c_e ->
   allocaBytes (#size PgfLinearizationOutputIface) $ \c_out ->
   allocaBytes (#size PgfLinearizationOutputIfaceVtbl) $ \vtbl ->
   bracket (wrapSymbol1 (symbol_token ref)) freeHaskellFunPtr $ \c_symbol_token ->
   bracket (wrapSymbol2 (begin_phrase ref)) freeHaskellFunPtr $ \c_begin_phrase ->
   bracket (wrapSymbol2 (end_phrase ref)) freeHaskellFunPtr $ \c_end_phrase ->
   bracket (wrapSymbol0 (symbol_bind ref)) freeHaskellFunPtr $ \c_symbol_bind ->
   bracket (wrapSymbol0 (symbol_ne ref)) freeHaskellFunPtr $ \c_symbol_ne -> do
   bracket (wrapSymbol0 (flush ref)) freeHaskellFunPtr $ \c_flush -> do
     (#poke PgfLinearizationOutputIfaceVtbl, symbol_token) vtbl c_symbol_token
     (#poke PgfLinearizationOutputIfaceVtbl, begin_phrase) vtbl c_begin_phrase
     (#poke PgfLinearizationOutputIfaceVtbl, end_phrase) vtbl c_end_phrase
     (#poke PgfLinearizationOutputIfaceVtbl, symbol_bind) vtbl c_symbol_bind
     (#poke PgfLinearizationOutputIfaceVtbl, symbol_ne) vtbl c_symbol_ne
     (#poke PgfLinearizationOutputIfaceVtbl, flush) vtbl c_flush
     (#poke PgfLinearizationOutputIface, vtbl) c_out vtbl
     withPgfExn "bracketedLinearizeAll" (pgf_bracketed_linearize_all (c_db c) c_revision c_e nullPtr marshaller c_out))
  (_,_,_,all) <- readIORef ref
  return all
  where
    symbol_token ref _ c_text = do
      (ne,stack,bs,all) <- readIORef ref
      token <- peekText c_text
      writeIORef ref (ne,stack,Leaf token : bs,all)

    begin_phrase ref _ c_cat c_fid c_ann c_fun = do
      (ne,stack,bs,all) <- readIORef ref
      writeIORef ref (ne,bs:stack,[],all)

    end_phrase ref _ c_cat c_fid c_ann c_fun = do
      (ne,bs':stack,bs,all) <- readIORef ref
      if null bs
        then writeIORef ref (ne,stack,bs',all)
        else do cat <- peekText c_cat
                let fid = fromIntegral c_fid
                ann <- peekText c_ann
                fun <- peekText c_fun
                writeIORef ref (ne,stack,Bracket cat fid ann fun (reverse bs) : bs',all)

    symbol_bind ref _ = do
      (ne,stack,bs,all) <- readIORef ref
      writeIORef ref (ne,stack,BIND : bs,all)

    symbol_ne ref _ = do
      (ne,stack,bs,all) <- readIORef ref
      writeIORef ref (True,[],[],all)

    flush ref _ = do
      (ne,_,bs,all) <- readIORef ref
      if ne
        then writeIORef ref (False,[],[],all)
        else writeIORef ref (False,[],[],reverse bs:all)

generateAll :: PGF -> Type -> [(Expr,Float)]
generateAll p ty = generateAllDepth p ty maxBound

generateAllDepth :: PGF -> Type -> Int -> [(Expr,Float)]
generateAllDepth p ty dp =
  unsafePerformIO $
  bracket (newStablePtr ty) freeStablePtr $ \c_ty ->
  withForeignPtr (a_revision p) $ \c_revision ->
  mask_ $ do
    c_enum <- withPgfExn "generateAllDepth" (pgf_generate_all (a_db p) c_revision c_ty (fromIntegral dp) marshaller unmarshaller)
    enumerateExprs (a_db p) c_enum

generateAllFrom :: PGF -> Expr -> [(Expr,Float)]
generateAllFrom p ty = generateAllFromDepth p ty maxBound

generateAllFromDepth :: PGF -> Expr -> Int -> [(Expr,Float)]
generateAllFromDepth p ty = error "TODO: generateFromDepth"

-- | Generates a potentially infinite list of random
-- abstract syntax expressions. This is usefull for tree bank generation
-- which after that can be used for grammar testing.
generateRandom :: RandomGen g => g -> PGF -> Type -> [(Expr,Float)]
generateRandom g pgf ty = generateRandomDepth g pgf ty maxBound

generateRandomDepth :: RandomGen g => g -> PGF -> Type -> Int -> [(Expr,Float)]
generateRandomDepth g p ty dp =
  let (seed,_) = random g
  in generate seed
  where
    generate seed =
      unsafePerformIO $
      bracket (newStablePtr ty) freeStablePtr $ \c_ty ->
      withForeignPtr (a_revision p) $ \c_revision ->
      alloca $ \p_seed ->
      alloca $ \p_prob ->
      mask_ $ do
        poke p_seed seed
        c_expr <- withPgfExn "generateRandomDepth" (pgf_generate_random (a_db p) c_revision c_ty (fromIntegral dp) p_seed p_prob marshaller unmarshaller)
        if castStablePtrToPtr c_expr == nullPtr
          then return []
          else do expr <- deRefStablePtr c_expr
                  freeStablePtr c_expr
                  seed <- peek p_seed
                  prob <- peek p_prob
                  return ((expr,prob):generate seed)

generateRandomFrom :: RandomGen g => g -> PGF -> Expr -> [(Expr,Float)]
generateRandomFrom g p e = generateRandomFromDepth g p e maxBound

generateRandomFromDepth :: RandomGen g => g -> PGF -> Expr -> Int -> [(Expr,Float)]
generateRandomFromDepth g p e dp =
  let (seed,_) = random g
  in generate seed
  where
    generate seed =
      unsafePerformIO $
      bracket (newStablePtr e) freeStablePtr $ \c_e ->
      withForeignPtr (a_revision p) $ \c_revision ->
      alloca $ \p_seed ->
      alloca $ \p_prob ->
      mask_ $ do
        poke p_seed seed
        c_expr <- withPgfExn "generateRandomFromDepth" (pgf_generate_random_from (a_db p) c_revision c_e (fromIntegral dp) p_seed p_prob marshaller unmarshaller)
        if castStablePtrToPtr c_expr == nullPtr
          then return []
          else do expr <- deRefStablePtr c_expr
                  freeStablePtr c_expr
                  seed <- peek p_seed
                  prob <- peek p_prob
                  return ((expr,prob):generate seed)

-- | List of all functions defined in the abstract syntax
categories :: PGF -> [Cat]
categories p =
  unsafePerformIO $ do
    ref <- newIORef []
    (allocaBytes (#size PgfItor) $ \itor ->
     bracket (wrapItorCallback (getCategories ref)) freeHaskellFunPtr $ \fptr ->
     withForeignPtr (a_revision p) $ \c_revision -> do
      (#poke PgfItor, fn) itor fptr
      withPgfExn "categories" (pgf_iter_categories (a_db p) c_revision itor)
      cs <- readIORef ref
      return (reverse cs))
  where
    getCategories :: IORef [String] -> ItorCallback
    getCategories ref itor key _ exn = do
      names <- readIORef ref
      name  <- peekText key
      writeIORef ref $ (name : names)

categoryContext :: PGF -> Cat -> Maybe [Hypo]
categoryContext p cat =
  unsafePerformIO $
  withText cat $ \c_cat ->
  alloca $ \p_n_hypos ->
  withForeignPtr (a_revision p) $ \c_revision ->
  mask_ $ do
    c_hypos <- withPgfExn "categoryContext" (pgf_category_context (a_db p) c_revision c_cat p_n_hypos unmarshaller)
    if c_hypos == nullPtr
      then return Nothing
      else do n_hypos <- peek p_n_hypos
              hypos <- peekHypos c_hypos 0 n_hypos
              free c_hypos
              return (Just hypos)
  where
    peekHypos :: Ptr PgfTypeHypo -> CSize -> CSize -> IO [Hypo]
    peekHypos c_hypo i n
      | i < n     = do c_cat <- (#peek PgfTypeHypo, cid) c_hypo
                       cat <- peekText c_cat
                       free c_cat
                       c_ty <- (#peek PgfTypeHypo, type) c_hypo
                       ty <- deRefStablePtr c_ty
                       freeStablePtr c_ty
                       bt  <- fmap unmarshalBindType ((#peek PgfTypeHypo, bind_type) c_hypo)
                       hs <- peekHypos (plusPtr c_hypo (#size PgfTypeHypo)) (i+1) n
                       return ((bt,cat,ty) : hs)
      | otherwise = return []

categoryProbability :: PGF -> Cat -> Float
categoryProbability p cat =
  unsafePerformIO $
  withText cat $ \c_cat ->
  withForeignPtr (a_revision p) $ \c_revision ->
      withPgfExn "categoryProbability" (pgf_category_prob (a_db p) c_revision c_cat)

-- | List of all functions defined in the abstract syntax
functions :: PGF -> [Fun]
functions p =
  unsafePerformIO $ do
    ref <- newIORef []
    (allocaBytes (#size PgfItor) $ \itor ->
     bracket (wrapItorCallback (getFunctions ref)) freeHaskellFunPtr $ \fptr ->
     withForeignPtr (a_revision p) $ \c_revision -> do
      (#poke PgfItor, fn) itor fptr
      withPgfExn "functions" (pgf_iter_functions (a_db p) c_revision itor)
      fs <- readIORef ref
      return (reverse fs))
  where
    getFunctions :: IORef [String] -> ItorCallback
    getFunctions ref itor key _ exn = do
      names <- readIORef ref
      name  <- peekText key
      writeIORef ref $ (name : names)

-- | List of all functions whose names start with a given prefix
functionsByPrefix :: PGF -> String -> [Fun]
functionsByPrefix p prefix =
  unsafePerformIO $ do
    ref <- newIORef []
    (withText prefix $ \c_prefix ->
     allocaBytes (#size PgfItor) $ \itor ->
     bracket (wrapItorCallback (getFunctions ref)) freeHaskellFunPtr $ \fptr ->
     withForeignPtr (a_revision p) $ \c_revision -> do
      (#poke PgfItor, fn) itor fptr
      withPgfExn "functions" (pgf_iter_functions_by_prefix (a_db p) c_revision c_prefix itor)
      fs <- readIORef ref
      return (reverse fs))
  where
    getFunctions :: IORef [String] -> ItorCallback
    getFunctions ref itor key _ exn = do
      names <- readIORef ref
      name  <- peekText key
      writeIORef ref $ (name : names)

-- | List of all functions defined in the abstract syntax
functionsByCat :: PGF -> Cat -> [Fun]
functionsByCat p cat =
  unsafePerformIO $ do
    ref <- newIORef []
    (withText cat $ \c_cat ->
     allocaBytes (#size PgfItor) $ \itor ->
     bracket (wrapItorCallback (getFunctions ref)) freeHaskellFunPtr $ \fptr ->
     withForeignPtr (a_revision p) $ \c_revision -> do
      (#poke PgfItor, fn) itor fptr
      withPgfExn "functionsByCat" (pgf_iter_functions_by_cat (a_db p) c_revision c_cat itor)
      fs <- readIORef ref
      return (reverse fs))
  where
    getFunctions :: IORef [String] -> ItorCallback
    getFunctions ref itor key _ exn = do
      names <- readIORef ref
      name  <- peekText key
      writeIORef ref $ (name : names)

globalFlag :: PGF -> String -> Maybe Literal
globalFlag p name =
  unsafePerformIO $
  withText name $ \c_name ->
  withForeignPtr (a_revision p) $ \c_revision -> do
    c_lit <- withPgfExn "globalFlag" (pgf_get_global_flag (a_db p) c_revision c_name unmarshaller)
    if c_lit == castPtrToStablePtr nullPtr
      then return Nothing
      else do lit <- deRefStablePtr c_lit
              freeStablePtr c_lit
              return (Just lit)

abstractFlag :: PGF -> String -> Maybe Literal
abstractFlag p name =
  unsafePerformIO $
  withText name $ \c_name ->
  withForeignPtr (a_revision p) $ \c_revision -> do
    c_lit <- withPgfExn "abstractFlag" (pgf_get_abstract_flag (a_db p) c_revision c_name unmarshaller)
    if c_lit == castPtrToStablePtr nullPtr
      then return Nothing
      else do lit <- deRefStablePtr c_lit
              freeStablePtr c_lit
              return (Just lit)

-----------------------------------------------------------------------------
-- Graphviz

data GraphvizOptions = GraphvizOptions {noLeaves :: Bool,
                                        noFun :: Bool,
                                        noCat :: Bool,
                                        noDep :: Bool,
                                        nodeFont :: String,
                                        leafFont :: String,
                                        nodeColor :: String,
                                        leafColor :: String,
                                        nodeEdgeStyle :: String,
                                        leafEdgeStyle :: String
                                       }

graphvizDefaults = GraphvizOptions False False False True "" "" "" "" "" ""

withGraphvizOptions :: GraphvizOptions -> (Ptr PgfGraphvizOptions -> IO a) -> IO a
withGraphvizOptions opts f =
  allocaBytes (#size PgfGraphvizOptions) $ \c_opts ->
  withCString (nodeFont opts) $ \c_nodeFont ->
  withCString (leafFont opts) $ \c_leafFont ->
  withCString (nodeColor opts) $ \c_nodeColor ->
  withCString (leafColor opts) $ \c_leafColor ->
  withCString (nodeEdgeStyle opts) $ \c_nodeEdgeStyle ->
  withCString (leafEdgeStyle opts) $ \c_leafEdgeStyle -> do
    (#poke PgfGraphvizOptions, noLeaves) c_opts (if noLeaves opts then 1 else 0 :: CInt)
    (#poke PgfGraphvizOptions, noFun)    c_opts (if noFun    opts then 1 else 0 :: CInt)
    (#poke PgfGraphvizOptions, noCat)    c_opts (if noCat    opts then 1 else 0 :: CInt)
    (#poke PgfGraphvizOptions, noDep)    c_opts (if noDep    opts then 1 else 0 :: CInt)
    (#poke PgfGraphvizOptions, nodeFont) c_opts c_nodeFont
    (#poke PgfGraphvizOptions, leafFont) c_opts c_leafFont
    (#poke PgfGraphvizOptions, nodeColor) c_opts c_nodeColor
    (#poke PgfGraphvizOptions, leafColor) c_opts c_leafColor
    (#poke PgfGraphvizOptions, nodeEdgeStyle) c_opts c_nodeEdgeStyle
    (#poke PgfGraphvizOptions, leafEdgeStyle) c_opts c_leafEdgeStyle
    f c_opts

-- | Renders an abstract syntax tree in a Graphviz format.
graphvizAbstractTree :: PGF -> GraphvizOptions -> Expr -> String
graphvizAbstractTree p opts e =
  unsafePerformIO $
  withForeignPtr (a_revision p) $ \c_revision ->
  bracket (newStablePtr e) freeStablePtr $ \c_e ->
  withGraphvizOptions opts $ \c_opts ->
  bracket (withPgfExn "graphvizAbstractTree" (pgf_graphviz_abstract_tree (a_db p) c_revision c_e marshaller c_opts)) free $ \c_text ->
    peekText c_text

graphvizParseTree :: Concr -> GraphvizOptions -> Expr -> String
graphvizParseTree c opts e =
  unsafePerformIO $
  withForeignPtr (c_revision c) $ \c_revision ->
  bracket (newStablePtr e) freeStablePtr $ \c_e ->
  withGraphvizOptions opts $ \c_opts ->
  bracket (withPgfExn "graphvizParseTree" (pgf_graphviz_parse_tree  (c_db c) c_revision c_e nullPtr marshaller c_opts)) free $ \c_text ->
    if c_text == nullPtr
      then return ""
      else peekText c_text

graphvizWordAlignment :: [Concr] -> GraphvizOptions -> Expr -> String
graphvizWordAlignment [] opts e = ""
graphvizWordAlignment cs opts e =
  unsafePerformIO $
  withPgfConcrs cs $ \c_db c_revisions n_revisions ->
  bracket (newStablePtr e) freeStablePtr $ \c_e ->
  withGraphvizOptions opts $ \c_opts ->
  bracket (withPgfExn "graphvizWordAlignment" (pgf_graphviz_word_alignment c_db c_revisions n_revisions c_e nullPtr marshaller c_opts)) free $ \c_text ->
    if c_text == nullPtr
      then return ""
      else peekText c_text
  where
    withPgfConcrs cs f =
      allocaArray len $ \array ->
        pokeAll array nullPtr array cs
      where
        len = length cs

        pokeAll ptr c_db0 array []     = f c_db0 array (fromIntegral len)
        pokeAll ptr c_db0 array (c:cs)
          | c_db0 /= nullPtr && c_db0 /= c_db c =
                throwIO (PGFError "graphvizWordAlignment" "The concrete languages must be from the same grammar")
          | otherwise =
              withForeignPtr (c_revision c) $ \c_revision -> do
                poke ptr c_revision
                pokeAll (ptr `plusPtr` (#size PgfConcrRevision)) (c_db c) array cs


type Labels = Map.Map Fun [String]

getDepLabels :: String -> Labels
getDepLabels s = Map.fromList [(f,ls) | f:ls <- map words (lines s)]

-- | Visualize word dependency tree.
graphvizDependencyTree
  :: String -- ^ Output format: @"latex"@, @"conll"@, @"malt_tab"@, @"malt_input"@ or @"dot"@
  -> Bool -- ^ Include extra information (debug)
  -> Maybe Labels -- ^ abstract label information obtained with 'getDepLabels'
  -> Maybe CncLabels -- ^ concrete label information obtained with ' ' (was: unused (was: @Maybe String@))
  -> Concr
  -> Expr
  -> String -- ^ Rendered output in the specified format
graphvizDependencyTree format debug mlab mclab concr t = error "TODO: graphvizDependencyTree"

---------------------- should be a separate module?

-- visualization with latex output. AR Nov 2015

conlls2latexDoc :: [String] -> String
conlls2latexDoc =
  render .
  latexDoc .
  vcat .
  intersperse (text "" $+$ app "vspace" (text "4mm")) .
  map conll2latex .
  filter (not . null)

conll2latex :: String -> Doc
conll2latex = ppLaTeX . conll2latex' . parseCoNLL

conll2latex' :: CoNLL -> [LaTeX]
conll2latex' = dep2latex . conll2dep'

data Dep = Dep {
    wordLength  :: Int -> Double        -- length of word at position int       -- was: fixed width, millimetres (>= 20.0)
  , tokens      :: [(String,String)]    -- word, pos (0..)
  , deps        :: [((Int,Int),String)] -- from, to, label
  , root        :: Int                  -- root word position
  }

-- some general measures
defaultWordLength = 20.0  -- the default fixed width word length, making word 100 units
defaultUnit       = 0.2   -- unit in latex pictures, 0.2 millimetres
spaceLength       = 10.0
charWidth = 1.8

wsize rwld  w  = 100 * rwld w + spaceLength                   -- word length, units
wpos rwld i    = sum [wsize rwld j | j <- [0..i-1]]           -- start position of the i'th word
wdist rwld x y = sum [wsize rwld i | i <- [min x y .. max x y - 1]]    -- distance between words x and y
labelheight h  = h + arcbase + 3    -- label just above arc; 25 would put it just below
labelstart c   = c - 15.0           -- label starts 15u left of arc centre
arcbase        = 30.0               -- arcs start and end 40u above the bottom
arcfactor r    = r * 600            -- reduction of arc size from word distance
xyratio        = 3                  -- width/height ratio of arcs

putArc :: (Int -> Double) -> Int -> Int -> Int -> String -> [DrawingCommand]
putArc frwld height x y label = [oval,arrowhead,labelling] where
  oval = Put (ctr,arcbase) (OvalTop (wdth,hght))
  arrowhead = Put (endp,arcbase + 5) (ArrowDown 5)   -- downgoing arrow 5u above the arc base
  labelling = Put (labelstart ctr,labelheight (hght/2)) (TinyText label)
  dxy  = wdist frwld x y             -- distance between words, >>= 20.0
  ndxy = 100 * rwld * fromIntegral height  -- distance that is indep of word length
  hdxy = dxy / 2                     -- half the distance
  wdth = dxy - (arcfactor rwld)/dxy  -- longer arcs are wider in proportion
  hght = ndxy / (xyratio * rwld)      -- arc height is independent of word length
  begp = min x y                     -- begin position of oval
  ctr  = wpos frwld begp + hdxy + (if x < y then 20 else  10)  -- LR arcs are farther right from center of oval
  endp = (if x < y then (+) else (-)) ctr (wdth/2)            -- the point of the arrow
  rwld = 0.5 ----

dep2latex :: Dep -> [LaTeX]
dep2latex d =
  [Comment (unwords (map fst (tokens d))),
   Picture defaultUnit (width,height) (
     [Put (wpos rwld i,0) (Text w) | (i,w) <- zip [0..] (map fst (tokens d))]   -- words
  ++ [Put (wpos rwld i,15) (TinyText w) | (i,w) <- zip [0..] (map snd (tokens d))]   -- pos tags 15u above bottom
  ++ concat [putArc rwld (aheight x y) x y label | ((x,y),label) <- deps d]    -- arcs and labels
  ++ [Put (wpos rwld (root d) + 15,height) (ArrowDown (height-arcbase))]
  ++ [Put (wpos rwld (root d) + 20,height - 10) (TinyText "ROOT")]
  )]
 where
   wld i  = wordLength d i  -- >= 20.0
   rwld i = (wld i) / defaultWordLength       -- >= 1.0
   aheight x y = depth (min x y) (max x y) + 1    ---- abs (x-y)
   arcs = [(min u v, max u v) | ((u,v),_) <- deps d]
   depth x y = case [(u,v) | (u,v) <- arcs, (x < u && v <= y) || (x == u && v < y)] of ---- only projective arcs counted
     [] -> 0
     uvs -> 1 + maximum (0:[depth u v | (u,v) <- uvs])
   width = {-round-} (sum [wsize rwld w | (w,_) <- zip [0..] (tokens d)]) + {-round-} spaceLength * fromIntegral ((length (tokens d)) - 1)
   height = 50 + 20 * {-round-} (maximum (0:[aheight x y | ((x,y),_) <- deps d]))

type CoNLL = [[String]]
parseCoNLL :: String -> CoNLL
parseCoNLL = map words . lines

--conll2dep :: String -> Dep
--conll2dep = conll2dep' . parseCoNLL

conll2dep' :: CoNLL -> Dep
conll2dep' ls = Dep {
    wordLength = wld
  , tokens = toks
  , deps = dps
  , root = head $ [read x-1 | x:_:_:_:_:_:"0":_ <- ls] ++ [1]
  }
 where
   wld i = maximum (0:[charWidth * fromIntegral (length w) | w <- let (tok,pos) = toks !! i in [tok,pos]])
   toks = [(w,c) | _:w:_:c:_ <- ls]
   dps = [((read y-1, read x-1),lab) | x:_:_:_:_:_:y:lab:_ <- ls, y /="0"]
   --maxdist = maximum [abs (x-y) | ((x,y),_) <- dps]


-- * LaTeX Pictures (see https://en.wikibooks.org/wiki/LaTeX/Picture)

-- We render both LaTeX and SVG from this intermediate representation of
-- LaTeX pictures.

data LaTeX = Comment String | Picture UnitLengthMM Size [DrawingCommand]
data DrawingCommand = Put Position Object
data Object = Text String | TinyText String | OvalTop Size | ArrowDown Length

type UnitLengthMM = Double
type Size = (Double,Double)
type Position = (Double,Double)
type Length = Double


-- * latex formatting
ppLaTeX = vcat . map ppLaTeX1
  where
    ppLaTeX1 el =
      case el of
        Comment s -> comment s
        Picture unit size cmds ->
          app "setlength{\\unitlength}" (text (show unit ++ "mm"))
          $$ hang (app "begin" (text "picture")<>text (show size)) 2
                  (vcat (map ppDrawingCommand cmds))
          $$ app "end" (text "picture")
          $$ text ""

    ppDrawingCommand (Put pos obj) = put pos (ppObject obj)

    ppObject obj =
      case obj of
        Text s -> text s
        TinyText s -> small (text s)
        OvalTop size -> text "\\oval" <> text (show size) <> text "[t]"
        ArrowDown len -> app "vector(0,-1)" (text (show len))

    put p@(_,_) = app ("put" ++ show p)
    small w = text "{\\tiny" <+> w <> text "}"
    comment s = text "%%" <+> text s -- line break show follow

app macro arg = text "\\" <> text macro <> text "{" <> arg <> text "}"


latexDoc :: Doc -> Doc
latexDoc body =
  vcat [text "\\documentclass{article}",
        text "\\usepackage[utf8]{inputenc}",
        text "\\begin{document}",
        body,
        text "\\end{document}"]


----------------------------------
-- concrete syntax annotations (local) on top of conll
-- examples of annotations:
-- UseComp {"not"} PART neg head
-- UseComp {*} AUX cop head

type CncLabels = [(String, String -> Maybe (String -> String,String,String))]
-- (fun, word -> (pos,label,target))
-- the pos can remain unchanged, as in the current notation in the article

fixCoNLL :: CncLabels -> CoNLL -> CoNLL
fixCoNLL labels conll = map fixc conll where
  fixc row = case row of
    (i:word:fun:pos:cat:x_:"0":"dep":xs) -> (i:word:fun:pos:cat:x_:"0":"root":xs) --- change the root label from dep to root
    (i:word:fun:pos:cat:x_:j:label:xs) -> case look (fun,word) of
      Just (pos',label',"head") -> (i:word:fun:pos' pos:cat:x_:j :label':xs)
      Just (pos',label',target) -> (i:word:fun:pos' pos:cat:x_: getDep j target:label':xs)
      _ -> row
    _ -> row

  look (fun,word) = case lookup fun labels of
    Just relabel -> case relabel word of
      Just row -> Just row
      _ -> case lookup "*" labels of
        Just starlabel -> starlabel word
        _ -> Nothing
    _ -> case lookup "*" labels of
        Just starlabel -> starlabel word
        _ -> Nothing

  getDep j label = maybe j id $ lookup (label,j) [((label,j),i) | i:word:fun:pos:cat:x_:j:label:xs <- conll]

getCncDepLabels :: String -> CncLabels
getCncDepLabels = map merge .  groupBy (\ (x,_) (a,_) -> x == a) . concatMap analyse . filter choose . lines where
  --- choose is for compatibility with the general notation
  choose line = notElem '(' line && elem '{' line --- ignoring non-local (with "(") and abstract (without "{") rules

  analyse line = case break (=='{') line of
    (beg,_:ws) -> case break (=='}') ws of
      (toks,_:target) -> case (words beg, words target) of
        (fun:_,[    label,j]) -> [(fun, (tok, (id,       label,j))) | tok <- getToks toks]
        (fun:_,[pos,label,j]) -> [(fun, (tok, (const pos,label,j))) | tok <- getToks toks]
        _ -> []
      _ -> []
    _ -> []
  merge rules@((fun,_):_) = (fun, \tok ->
    case lookup tok (map snd rules) of
      Just new -> return new
      _ -> lookup "*"  (map snd rules)
    )
  getToks = words . map (\c -> if elem c "\"," then ' ' else c)

printCoNLL :: CoNLL -> String
printCoNLL = unlines . map (concat . intersperse "\t")

-----------------------------------------------------------------------
-- Expressions & types

-- | renders an expression as a 'String'. The list
-- of identifiers is the list of all free variables
-- in the expression in order reverse to the order
-- of binding.
showExpr :: [Var] -> Expr -> String
showExpr scope e =
  unsafePerformIO $
  bracket (newPrintCtxt scope) freePrintCtxt $ \pctxt ->
  bracket (newStablePtr e) freeStablePtr $ \c_e ->
  bracket (pgf_print_expr c_e pctxt 1 marshaller) free $ \c_text ->
    peekText c_text

newPrintCtxt :: [Var] -> IO (Ptr PgfPrintContext)
newPrintCtxt []     = return nullPtr
newPrintCtxt (x:xs) = do
  pctxt <- newTextEx (#offset PgfPrintContext, name) x
  newPrintCtxt xs >>= (#poke PgfPrintContext, next) pctxt
  return pctxt

freePrintCtxt :: Ptr PgfPrintContext -> IO ()
freePrintCtxt pctxt
  | pctxt == nullPtr = return ()
  | otherwise        = do
      (#peek PgfPrintContext, next) pctxt >>= freePrintCtxt
      free pctxt

-- | parses a 'String' as an expression
readExpr :: String -> Maybe Expr
readExpr str =
  unsafePerformIO $
  withText str $ \c_str ->
  mask_ $ do
    c_expr <- pgf_read_expr c_str unmarshaller
    if c_expr == castPtrToStablePtr nullPtr
      then return Nothing
      else do expr <- deRefStablePtr c_expr
              freeStablePtr c_expr
              return (Just expr)

-- | renders a type as a 'String'. The list
-- of identifiers is the list of all free variables
-- in the type in order reverse to the order
-- of binding.
showType :: [Var] -> Type -> String
showType scope ty =
  unsafePerformIO $
  bracket (newPrintCtxt scope) freePrintCtxt $ \pctxt ->
  bracket (newStablePtr ty) freeStablePtr $ \c_ty ->
  bracket (pgf_print_type c_ty pctxt 0 marshaller) free $ \c_text ->
    peekText c_text

showContext :: [Var] -> [(BindType,Var,Type)] -> String
showContext scope hypos =
  unsafePerformIO $
  withHypos hypos $ \n_hypos c_hypos ->
  bracket (newPrintCtxt scope) freePrintCtxt $ \pctxt ->
  bracket (pgf_print_context n_hypos c_hypos pctxt 0 marshaller) free $ \c_text ->
    peekText c_text

-- | parses a 'String' as a type
readType :: String -> Maybe Type
readType str =
  unsafePerformIO $
  withText str $ \c_str -> do
    c_ty <- pgf_read_type c_str unmarshaller
    if c_ty == castPtrToStablePtr nullPtr
      then return Nothing
      else do ty <- deRefStablePtr c_ty
              freeStablePtr c_ty
              return (Just ty)

readContext :: String -> Maybe [Hypo]
readContext str =
  unsafePerformIO $
  withText str $ \c_str ->
  alloca $ \p_n_hypos -> do
    c_hypos <- pgf_read_context c_str unmarshaller p_n_hypos
    n_hypos <- peek p_n_hypos
    if c_hypos == nullPtr && n_hypos /= 0
      then return Nothing
      else do hypos <- peekHypos Nothing n_hypos c_hypos
              free c_hypos
              return (Just hypos)
  where
    peekHypos mb_last 0       p_hypo = do
      case mb_last of
        Just last -> freeStablePtr last
        _         -> return ()
      return []
    peekHypos mb_last n_hypos p_hypo = do
      bt  <- fmap unmarshalBindType ((#peek PgfTypeHypo, bind_type) p_hypo)
      c_cid <- (#peek PgfTypeHypo, cid) p_hypo
      cid <- peekText c_cid
      free c_cid
      c_ty <- (#peek PgfTypeHypo, type) p_hypo
      ty <- deRefStablePtr c_ty
      case mb_last of
        Just last | last /= c_ty -> freeStablePtr last
        _                        -> return ()
      hs  <- peekHypos (Just c_ty) (n_hypos-1) (p_hypo `plusPtr` (#size PgfTypeHypo))
      return ((bt,cid,ty):hs)

readProbabilitiesFromFile :: FilePath -> IO (Map.Map String Double)
readProbabilitiesFromFile fpath = do
  s <- readFile fpath
  return $ Map.fromList [(f,read p) | f:p:_ <- map words (lines s)]