the compiler now compiles with the new runtime

2026-05-04 08:42:50 -06:00 · 2021-09-13 18:32:57 +02:00
parent c5ce2fd4b7
commit cf7673525f
26 changed files with 765 additions and 105 deletions
--- a/src/runtime/haskell/PGF2.hsc
+++ b/src/runtime/haskell/PGF2.hsc
@@ -14,17 +14,20 @@
 -------------------------------------------------

 module PGF2 (-- * PGF
-             PGF,readPGF,bootNGF,readNGF,
+             PGF,readPGF,bootNGF,readNGF,writePGF,showPGF,

             -- * Abstract syntax
             AbsName,abstractName,globalFlag,abstractFlag,
+
             -- ** Categories
-             Cat,categories,categoryContext,categoryProb,
+             Cat,categories,categoryContext,categoryProbability,
+
             -- ** Functions
             Fun, functions, functionsByCat,
-             functionType, functionIsConstructor, functionProb,
+             functionType, functionIsConstructor, functionProbability,
+
             -- ** Expressions
-             Expr(..), Literal(..), showExpr, readExpr,
+             Expr(..), Literal(..), showExpr, readExpr, pExpr, pIdent,
             mkAbs,    unAbs,
             mkApp,    unApp, unapply,
             mkStr,    unStr,
@@ -33,20 +36,58 @@ module PGF2 (-- * PGF
             mkFloat,  unFloat,
             mkMeta,   unMeta,
             -- extra
-             exprSize, exprFunctions,
+             exprSize, exprFunctions, exprSubstitute, exprProbability,
+
             -- ** Types
             Type(..), Hypo, BindType(..), startCat,
-             readType, showType,
+             readType, showType, showContext,
             mkType, unType,
             mkHypo, mkDepHypo, mkImplHypo,

+             -- ** Type checking
+             -- | Dynamically-built expressions should always be type-checked before using in other functions,
+             -- as the exceptions thrown by using invalid expressions may not catchable.
+             checkExpr, inferExpr, checkType,
+
+             -- ** Computing
+             compute,
+
+             -- ** Generation
+             generateAll, generateAllFrom, generateRandom, generateRandomFrom,
+
+             -- ** Morphological Analysis
+             MorphoAnalysis, lookupMorpho, lookupCohorts, fullFormLexicon,
+             filterBest, filterLongest,
+
+             -- ** Visualizations
+             GraphvizOptions(..), graphvizDefaults,
+             graphvizAbstractTree, graphvizParseTree,
+             Labels, getDepLabels,
+             graphvizDependencyTree, conlls2latexDoc, getCncDepLabels,
+             graphvizWordAlignment,
+
             -- * Concrete syntax
-             ConcName,
+             ConcName,Concr,languages,concreteName,languageCode,
+
+             -- ** Linearization
+             linearize, linearizeAll, tabularLinearize, tabularLinearizeAll,
+             FId, BracketedString(..), showBracketedString, flattenBracketedString,
+             bracketedLinearize, bracketedLinearizeAll,
+             hasLinearization,
+             printName, alignWords, gizaAlignment,
+
+             -- ** Parsing
+             ParseOutput(..), parse, parseWithHeuristics, complete,

             -- * Exceptions
-             PGFError(..)
+             PGFError(..),
+
+             -- * Auxiliaries             
+             readProbabilitiesFromFile
             ) where

+import Prelude hiding ((<>))
+
 import PGF2.Expr
 import PGF2.FFI

@@ -54,15 +95,16 @@ import Foreign
 import Foreign.C
 import Control.Exception(mask_,bracket)
 import System.IO.Unsafe(unsafePerformIO)
+import System.Random
 import qualified Foreign.Concurrent as C
 import qualified Data.Map as Map
 import Data.IORef
+import Data.List(intersperse,groupBy)
+import Data.Char(isUpper,isSpace,isPunctuation)
+import Text.PrettyPrint

 #include <pgf/pgf.h>

-type AbsName  = String -- ^ Name of abstract syntax
-type ConcName = String -- ^ Name of concrete syntax
-
 -- | Reads a PGF file and keeps it in memory.
 readPGF :: FilePath -> IO PGF
 readPGF fpath =
@@ -106,6 +148,12 @@ readNGF fpath =
    fptr2 <- C.newForeignPtr c_revision (withForeignPtr fptr1 (\c_db -> pgf_free_revision c_db c_revision))
    return (PGF fptr1 fptr2 Map.empty)

+writePGF :: FilePath -> PGF -> IO ()
+writePGF = error "TODO: writePGF"
+
+showPGF :: PGF -> String
+showPGF = error "TODO: showPGF"
+
 -- | The abstract language name is the name of the top-level
 -- abstract module
 abstractName :: PGF -> AbsName
@@ -156,14 +204,273 @@ functionIsConstructor p fun =
      do res <- withPgfExn (pgf_function_is_constructor c_db c_revision c_fun)
         return (res /= 0)

-functionProb :: PGF -> Fun -> Float
-functionProb p fun =
+functionProbability :: PGF -> Fun -> Float
+functionProbability p fun =
  unsafePerformIO $
  withText fun $ \c_fun ->
  withForeignPtr (a_db p) $ \c_db ->
  withForeignPtr (revision p) $ \c_revision ->
      withPgfExn (pgf_function_prob c_db c_revision c_fun)

+exprProbability :: PGF -> Expr -> Float
+exprProbability = error "TODO: exprProbability"
+
+checkExpr :: PGF -> Expr -> Type -> Either String Expr
+checkExpr = error "TODO: checkExpr"
+
+-- | Tries to infer the type of an expression. Note that
+-- even if the expression is type correct it is not always
+-- possible to infer its type in the GF type system.
+-- In this case the function returns an error.
+inferExpr :: PGF -> Expr -> Either String (Expr, Type)
+inferExpr = error "TODO: inferExpr"
+
+-- | Check whether a type is consistent with the abstract
+-- syntax of the grammar.
+checkType :: PGF -> Type -> Either String Type
+checkType = error "TODO: checkType"
+  
+compute :: PGF -> Expr -> Expr
+compute = error "TODO: compute"
+
+concreteName :: Concr -> ConcName
+concreteName c = error "TODO: concreteName"
+
+languageCode :: Concr -> Maybe String
+languageCode c = error "TODO: languageCode"
+
+printName :: Concr -> Fun -> Maybe String
+printName lang fun = error "TODO: printName"
+
+alignWords :: Concr -> Expr -> [(String, [Int])]
+alignWords = error "TODO: alignWords"
+
+gizaAlignment = error "TODO: gizaAlignment"
+
+-----------------------------------------------------------------------------
+-- Functions using Concr
+-- Morpho analyses, parsing & linearization
+
+-- | This triple is returned by all functions that deal with
+-- the grammar's lexicon. Its first element is the name of an abstract
+-- lexical function which can produce a given word or
+-- a multiword expression (i.e. this is the lemma).
+-- After that follows a string which describes
+-- the particular inflection form.
+--
+-- The last element is a logarithm from the
+-- the probability of the function. The probability is not
+-- conditionalized on the category of the function. This makes it
+-- possible to compare the likelihood of two functions even if they
+-- have different types.
+type MorphoAnalysis = (Fun,String,Float)
+
+-- | 'lookupMorpho' takes a string which must be a single word or
+-- a multiword expression. It then computes the list of all possible
+-- morphological analyses.
+lookupMorpho :: Concr -> String -> [MorphoAnalysis]
+lookupMorpho = error "TODO: lookupMorpho"
+
+-- | 'lookupCohorts' takes an arbitrary string an produces
+-- a list of all places where lexical items from the grammar have been
+-- identified (i.e. cohorts). The list consists of triples of the format @(start,ans,end)@,
+-- where @start-end@ identifies the span in the text and @ans@ is
+-- the list of possible morphological analyses similar to 'lookupMorpho'.
+--
+-- The list is sorted first by the @start@ position and after than
+-- by the @end@ position. This can be used for instance if you want to
+-- filter only the longest matches.
+lookupCohorts :: Concr -> String -> [(Int,String,[MorphoAnalysis],Int)]
+lookupCohorts = error "TODO: lookupCohorts"
+
+filterBest :: [(Int,String,[MorphoAnalysis],Int)] -> [(Int,String,[MorphoAnalysis],Int)]
+filterBest ans =
+  reverse (iterate (maxBound :: Int) [(0,0,[],ans)] [] [])
+  where
+    iterate v0 []                      []  res = res
+    iterate v0 []                      new res = iterate v0 new []  res
+    iterate v0 ((_,v,conf,    []):old) new res =
+      case compare v0 v of
+        LT                                    -> res
+        EQ                                    -> iterate v0 old new (merge conf res)
+        GT                                    -> iterate v  old new conf
+    iterate v0 ((_,v,conf,an:ans):old) new res = iterate v0 old (insert (v+valueOf an) conf an ans [] new) res
+
+    valueOf (_,_,[],_) = 2
+    valueOf _          = 1
+
+    insert v conf an@(start,_,_,end) ans l_new []                               =
+      match start v conf ans ((end,v,comb conf an,filter end ans):l_new) []
+    insert v conf an@(start,_,_,end) ans l_new (new@(end0,v0,conf0,ans0):r_new) =
+      case compare end0 end of
+        LT         -> insert v conf an ans (new:l_new) r_new
+        EQ -> case compare v0 v of
+                LT -> match start v conf ans ((end,v,                     conf0,ans0):    l_new) r_new
+                EQ -> match start v conf ans ((end,v,merge (comb conf an) conf0,ans0):    l_new) r_new
+                GT -> match start v conf ans ((end,v,comb conf an,              ans0):    l_new) r_new
+        GT         -> match start v conf ans ((end,v,comb conf an,    filter end ans):new:l_new) r_new
+
+    match start0 v conf (an@(start,_,_,end):ans) l_new r_new
+      | start0 == start                 = insert v conf an ans l_new r_new
+    match start0 v conf ans l_new r_new = revOn l_new r_new
+
+    comb ((start0,w0,an0,end0):conf) (start,w,an,end)
+      | end0 == start && (unk w0 an0 || unk w an) = (start0,w0++w,[],end):conf
+    comb conf an = an:conf
+
+    filter end                     [] = []
+    filter end (next@(start,_,_,_):ans)
+      | end <= start                  = next:ans
+      | otherwise                     = filter end ans
+
+    revOn []     ys = ys
+    revOn (x:xs) ys = revOn xs (x:ys)
+
+    merge []                           ans                          = ans
+    merge ans                          []                           = ans
+    merge (an1@(start1,_,_,end1):ans1) (an2@(start2,_,_,end2):ans2) =
+      case compare (start1,end1) (start2,end2) of
+        GT -> an1 : merge ans1 (an2:ans2)
+        EQ -> an1 : merge ans1 ans2
+        LT -> an2 : merge (an1:ans1) ans2
+
+filterLongest :: [(Int,String,[MorphoAnalysis],Int)] -> [(Int,String,[MorphoAnalysis],Int)]
+filterLongest []       = []
+filterLongest (an:ans) = longest an ans
+  where
+    longest prev                   [] = [prev]
+    longest prev@(start0,_,_,end0) (next@(start,_,_,end):ans)
+      | start0 == start               = longest next ans
+      | otherwise                     = filter prev (next:ans)
+
+    filter prev                      [] = [prev]
+    filter prev@(start0,w0,an0,end0) (next@(start,w,an,end):ans)
+      | end0 == start && (unk w0 an0 || unk w an)
+                                        = filter (start0,w0++w,[],end) ans
+      | end0 <=  start                  = prev : longest next ans
+      | otherwise                       = filter prev ans
+
+unk w [] | any (not . isPunctuation) w = True
+unk _ _                                = False
+
+fullFormLexicon :: Concr -> [(String, [MorphoAnalysis])]
+fullFormLexicon lang = error "TODO: fullFormLexicon"
+
+-- | This data type encodes the different outcomes which you could get from the parser.
+data ParseOutput a
+  = ParseFailed Int String         -- ^ The integer is the position in number of unicode characters where the parser failed.
+                                   -- The string is the token where the parser have failed.
+  | ParseOk a                      -- ^ If the parsing and the type checking are successful
+                                   -- we get the abstract syntax trees as either a list or a chart.
+  | ParseIncomplete                -- ^ The sentence is not complete.
+
+parse :: Concr -> Type -> String -> ParseOutput [(Expr,Float)]
+parse lang ty sent = parseWithHeuristics lang ty sent (-1.0) []
+
+parseWithHeuristics :: Concr      -- ^ the language with which we parse
+                    -> Type       -- ^ the start category
+                    -> String     -- ^ the input sentence
+                    -> Double     -- ^ the heuristic factor.
+                                  -- A negative value tells the parser
+                                  -- to lookup up the default from
+                                  -- the grammar flags
+                    -> [(Cat, String -> Int -> Maybe (Expr,Float,Int))]
+                                  -- ^ a list of callbacks for literal categories.
+                                  -- The arguments of the callback are:
+                                  -- the index of the constituent for the literal category;
+                                  -- the input sentence; the current offset in the sentence.
+                                  -- If a literal has been recognized then the output should
+                                  -- be Just (expr,probability,end_offset)
+                    -> ParseOutput [(Expr,Float)]
+parseWithHeuristics = error "TODO: parseWithHeuristics"
+
+-- | Returns possible completions of the current partial input.
+complete :: Concr      -- ^ the language with which we parse
+         -> Type       -- ^ the start category
+         -> String     -- ^ the input sentence (excluding token being completed)
+         -> String     -- ^ prefix (partial token being completed)
+         -> ParseOutput [(String, Fun, Cat, Float)]  -- ^ (token, category, function, probability)
+complete = error "TODO: complete"
+
+-- | Returns True if there is a linearization defined for that function in that language
+hasLinearization :: Concr -> Fun -> Bool
+hasLinearization = error "TODO: linearize"
+
+-- | Linearizes an expression as a string in the language
+linearize :: Concr -> Expr -> String
+linearize lang e = error "TODO: linearize"
+
+-- | Generates all possible linearizations of an expression
+linearizeAll :: Concr -> Expr -> [String]
+linearizeAll lang e = error "TODO: linearizeAll"
+
+-- | Generates a table of linearizations for an expression
+tabularLinearize :: Concr -> Expr -> [(String, String)]
+tabularLinearize lang e =
+  case tabularLinearizeAll lang e of
+    (lins:_) -> lins
+    _        -> []
+
+-- | Generates a table of linearizations for an expression
+tabularLinearizeAll :: Concr -> Expr -> [[(String, String)]]
+tabularLinearizeAll lang e = error "TODO: tabularLinearizeAll"
+
+type FId = Int
+
+-- | BracketedString represents a sentence that is linearized
+-- as usual but we also want to retain the ''brackets'' that
+-- mark the beginning and the end of each constituent.
+data BracketedString
+  = Leaf String                                                                -- ^ this is the leaf i.e. a single token
+  | BIND                                                                       -- ^ the surrounding tokens must be bound together
+  | Bracket Cat {-# UNPACK #-} !FId String Fun [BracketedString]
+                                                                               -- ^ this is a bracket. The 'Cat' is the category of
+                                                                               -- the phrase. The 'FId' is an unique identifier for
+                                                                               -- every phrase in the sentence. For context-free grammars
+                                                                               -- i.e. without discontinuous constituents this identifier
+                                                                               -- is also unique for every bracket. When there are discontinuous
+                                                                               -- phrases then the identifiers are unique for every phrase but
+                                                                               -- not for every bracket since the bracket represents a constituent.
+                                                                               -- The different constituents could still be distinguished by using
+                                                                               -- the analysis string. If the grammar is reduplicating
+                                                                               -- then the constituent indices will be the same for all brackets
+                                                                               -- that represents the same constituent.
+                                                                               -- The 'Fun' is the name of the abstract function that generated
+                                                                               -- this phrase.
+
+-- | Renders the bracketed string as a string where
+-- the brackets are shown as @(S ...)@ where
+-- @S@ is the category.
+showBracketedString :: BracketedString -> String
+showBracketedString = render . ppBracketedString
+
+ppBracketedString (Leaf t) = text t
+ppBracketedString BIND     = text "&+"
+ppBracketedString (Bracket cat fid _ _ bss) = parens (text cat <> colon <> int fid <+> hsep (map ppBracketedString bss))
+
+-- | Extracts the sequence of tokens from the bracketed string
+flattenBracketedString :: BracketedString -> [String]
+flattenBracketedString (Leaf w)              = [w]
+flattenBracketedString (Bracket _ _ _ _ bss) = concatMap flattenBracketedString bss
+
+bracketedLinearize :: Concr -> Expr -> [BracketedString]
+bracketedLinearize = error "TODO: bracketedLinearize"
+
+bracketedLinearizeAll :: Concr -> Expr -> [[BracketedString]]
+bracketedLinearizeAll = error "TODO: bracketedLinearizeAll"
+
+generateAll :: PGF -> Type -> [(Expr,Float)]
+generateAll p ty = error "TODO: generateAll"
+
+generateAllFrom :: PGF -> Expr -> [(Expr,Float)]
+generateAllFrom p ty = error "TODO: generateAllFrom"
+
+generateRandom :: StdGen -> PGF -> Type -> [a]
+generateRandom = error "TODO: generateRandom"
+
+generateRandomFrom :: StdGen -> PGF -> Expr -> [a]
+generateRandomFrom = error "TODO: generateRandomFrom"
+
 -- | List of all functions defined in the abstract syntax
 categories :: PGF -> [Cat]
 categories p =
@@ -184,7 +491,7 @@ categories p =
      name  <- peekText key
      writeIORef ref $ (name : names)

-categoryContext :: PGF -> Cat -> [Hypo]
+categoryContext :: PGF -> Cat -> Maybe [Hypo]
 categoryContext p cat =
  unsafePerformIO $
  withText cat $ \c_cat ->
@@ -195,11 +502,11 @@ categoryContext p cat =
  mask_ $ do
    c_hypos <- withPgfExn (pgf_category_context c_db c_revision c_cat p_n_hypos u)
    if c_hypos == nullPtr
-      then return []
+      then return Nothing
      else do n_hypos <- peek p_n_hypos
              hypos <- peekHypos c_hypos 0 n_hypos
              free c_hypos
-              return hypos
+              return (Just hypos)
  where
    peekHypos :: Ptr PgfTypeHypo -> CSize -> CSize -> IO [Hypo]
    peekHypos c_hypo i n
@@ -214,8 +521,8 @@ categoryContext p cat =
                       return ((bt,cat,ty) : hs)
      | otherwise = return []

-categoryProb :: PGF -> Cat -> Float
-categoryProb p cat =
+categoryProbability :: PGF -> Cat -> Float
+categoryProbability p cat =
  unsafePerformIO $
  withText cat $ \c_cat ->
  withForeignPtr (a_db p) $ \c_db ->
@@ -291,6 +598,256 @@ abstractFlag p name =
              freeStablePtr c_lit
              return (Just lit)

+-----------------------------------------------------------------------------
+-- Graphviz
+
+data GraphvizOptions = GraphvizOptions {noLeaves :: Bool,
+                                        noFun :: Bool,
+                                        noCat :: Bool,
+                                        noDep :: Bool,
+                                        nodeFont :: String,
+                                        leafFont :: String,
+                                        nodeColor :: String,
+                                        leafColor :: String,
+                                        nodeEdgeStyle :: String,
+                                        leafEdgeStyle :: String
+                                       }
+
+graphvizDefaults = GraphvizOptions False False False True "" "" "" "" "" ""
+
+-- | Renders an abstract syntax tree in a Graphviz format.
+graphvizAbstractTree :: PGF -> GraphvizOptions -> Expr -> String
+graphvizAbstractTree p opts e = error "TODO: graphvizAbstractTree"
+
+graphvizParseTree :: Concr -> GraphvizOptions -> Expr -> String
+graphvizParseTree c opts e = error "TODO: graphvizParseTree"
+
+graphvizWordAlignment :: [Concr] -> GraphvizOptions -> Expr -> String
+graphvizWordAlignment cs opts e = error "TODO: graphvizWordAlignment"
+
+
+type Labels = Map.Map Fun [String]
+
+getDepLabels :: String -> Labels
+getDepLabels s = Map.fromList [(f,ls) | f:ls <- map words (lines s)]
+
+-- | Visualize word dependency tree.
+graphvizDependencyTree
+  :: String -- ^ Output format: @"latex"@, @"conll"@, @"malt_tab"@, @"malt_input"@ or @"dot"@
+  -> Bool -- ^ Include extra information (debug)
+  -> Maybe Labels -- ^ abstract label information obtained with 'getDepLabels'
+  -> Maybe CncLabels -- ^ concrete label information obtained with ' ' (was: unused (was: @Maybe String@))
+  -> Concr
+  -> Expr
+  -> String -- ^ Rendered output in the specified format
+graphvizDependencyTree format debug mlab mclab concr t = error "TODO: graphvizDependencyTree"
+
+---------------------- should be a separate module?
+
+-- visualization with latex output. AR Nov 2015
+
+conlls2latexDoc :: [String] -> String
+conlls2latexDoc =
+  render .
+  latexDoc .
+  vcat .
+  intersperse (text "" $+$ app "vspace" (text "4mm")) .
+  map conll2latex .
+  filter (not . null)
+
+conll2latex :: String -> Doc
+conll2latex = ppLaTeX . conll2latex' . parseCoNLL
+
+conll2latex' :: CoNLL -> [LaTeX]
+conll2latex' = dep2latex . conll2dep'
+
+data Dep = Dep {
+    wordLength  :: Int -> Double        -- length of word at position int       -- was: fixed width, millimetres (>= 20.0)
+  , tokens      :: [(String,String)]    -- word, pos (0..)
+  , deps        :: [((Int,Int),String)] -- from, to, label
+  , root        :: Int                  -- root word position
+  }
+
+-- some general measures
+defaultWordLength = 20.0  -- the default fixed width word length, making word 100 units
+defaultUnit       = 0.2   -- unit in latex pictures, 0.2 millimetres
+spaceLength       = 10.0
+charWidth = 1.8
+
+wsize rwld  w  = 100 * rwld w + spaceLength                   -- word length, units
+wpos rwld i    = sum [wsize rwld j | j <- [0..i-1]]           -- start position of the i'th word
+wdist rwld x y = sum [wsize rwld i | i <- [min x y .. max x y - 1]]    -- distance between words x and y
+labelheight h  = h + arcbase + 3    -- label just above arc; 25 would put it just below
+labelstart c   = c - 15.0           -- label starts 15u left of arc centre
+arcbase        = 30.0               -- arcs start and end 40u above the bottom
+arcfactor r    = r * 600            -- reduction of arc size from word distance
+xyratio        = 3                  -- width/height ratio of arcs
+
+putArc :: (Int -> Double) -> Int -> Int -> Int -> String -> [DrawingCommand]
+putArc frwld height x y label = [oval,arrowhead,labelling] where
+  oval = Put (ctr,arcbase) (OvalTop (wdth,hght))
+  arrowhead = Put (endp,arcbase + 5) (ArrowDown 5)   -- downgoing arrow 5u above the arc base
+  labelling = Put (labelstart ctr,labelheight (hght/2)) (TinyText label)
+  dxy  = wdist frwld x y             -- distance between words, >>= 20.0
+  ndxy = 100 * rwld * fromIntegral height  -- distance that is indep of word length
+  hdxy = dxy / 2                     -- half the distance
+  wdth = dxy - (arcfactor rwld)/dxy  -- longer arcs are wider in proportion
+  hght = ndxy / (xyratio * rwld)      -- arc height is independent of word length
+  begp = min x y                     -- begin position of oval
+  ctr  = wpos frwld begp + hdxy + (if x < y then 20 else  10)  -- LR arcs are farther right from center of oval
+  endp = (if x < y then (+) else (-)) ctr (wdth/2)            -- the point of the arrow
+  rwld = 0.5 ----
+
+dep2latex :: Dep -> [LaTeX]
+dep2latex d =
+  [Comment (unwords (map fst (tokens d))),
+   Picture defaultUnit (width,height) (
+     [Put (wpos rwld i,0) (Text w) | (i,w) <- zip [0..] (map fst (tokens d))]   -- words
+  ++ [Put (wpos rwld i,15) (TinyText w) | (i,w) <- zip [0..] (map snd (tokens d))]   -- pos tags 15u above bottom
+  ++ concat [putArc rwld (aheight x y) x y label | ((x,y),label) <- deps d]    -- arcs and labels
+  ++ [Put (wpos rwld (root d) + 15,height) (ArrowDown (height-arcbase))]
+  ++ [Put (wpos rwld (root d) + 20,height - 10) (TinyText "ROOT")]
+  )]
+ where
+   wld i  = wordLength d i  -- >= 20.0
+   rwld i = (wld i) / defaultWordLength       -- >= 1.0
+   aheight x y = depth (min x y) (max x y) + 1    ---- abs (x-y)
+   arcs = [(min u v, max u v) | ((u,v),_) <- deps d]
+   depth x y = case [(u,v) | (u,v) <- arcs, (x < u && v <= y) || (x == u && v < y)] of ---- only projective arcs counted
+     [] -> 0
+     uvs -> 1 + maximum (0:[depth u v | (u,v) <- uvs])
+   width = {-round-} (sum [wsize rwld w | (w,_) <- zip [0..] (tokens d)]) + {-round-} spaceLength * fromIntegral ((length (tokens d)) - 1)
+   height = 50 + 20 * {-round-} (maximum (0:[aheight x y | ((x,y),_) <- deps d]))
+
+type CoNLL = [[String]]
+parseCoNLL :: String -> CoNLL
+parseCoNLL = map words . lines
+
+--conll2dep :: String -> Dep
+--conll2dep = conll2dep' . parseCoNLL
+
+conll2dep' :: CoNLL -> Dep
+conll2dep' ls = Dep {
+    wordLength = wld 
+  , tokens = toks
+  , deps = dps
+  , root = head $ [read x-1 | x:_:_:_:_:_:"0":_ <- ls] ++ [1]
+  }
+ where
+   wld i = maximum (0:[charWidth * fromIntegral (length w) | w <- let (tok,pos) = toks !! i in [tok,pos]])
+   toks = [(w,c) | _:w:_:c:_ <- ls]
+   dps = [((read y-1, read x-1),lab) | x:_:_:_:_:_:y:lab:_ <- ls, y /="0"]
+   --maxdist = maximum [abs (x-y) | ((x,y),_) <- dps]
+
+
+-- * LaTeX Pictures (see https://en.wikibooks.org/wiki/LaTeX/Picture)
+
+-- We render both LaTeX and SVG from this intermediate representation of
+-- LaTeX pictures.
+
+data LaTeX = Comment String | Picture UnitLengthMM Size [DrawingCommand]
+data DrawingCommand = Put Position Object
+data Object = Text String | TinyText String | OvalTop Size | ArrowDown Length
+
+type UnitLengthMM = Double
+type Size = (Double,Double)
+type Position = (Double,Double)
+type Length = Double
+
+
+-- * latex formatting
+ppLaTeX = vcat . map ppLaTeX1
+  where
+    ppLaTeX1 el =
+      case el of
+        Comment s -> comment s
+        Picture unit size cmds ->
+          app "setlength{\\unitlength}" (text (show unit ++ "mm"))
+          $$ hang (app "begin" (text "picture")<>text (show size)) 2
+                  (vcat (map ppDrawingCommand cmds))
+          $$ app "end" (text "picture")
+          $$ text ""
+
+    ppDrawingCommand (Put pos obj) = put pos (ppObject obj)
+
+    ppObject obj =
+      case obj of
+        Text s -> text s
+        TinyText s -> small (text s)
+        OvalTop size -> text "\\oval" <> text (show size) <> text "[t]"
+        ArrowDown len -> app "vector(0,-1)" (text (show len))
+
+    put p@(_,_) = app ("put" ++ show p)
+    small w = text "{\\tiny" <+> w <> text "}"
+    comment s = text "%%" <+> text s -- line break show follow
+    
+app macro arg = text "\\" <> text macro <> text "{" <> arg <> text "}"
+
+
+latexDoc :: Doc -> Doc
+latexDoc body =
+  vcat [text "\\documentclass{article}",
+        text "\\usepackage[utf8]{inputenc}",
+        text "\\begin{document}",
+        body,
+        text "\\end{document}"]
+
+
+----------------------------------
+-- concrete syntax annotations (local) on top of conll
+-- examples of annotations:
+-- UseComp {"not"} PART neg head 
+-- UseComp {*} AUX cop head
+
+type CncLabels = [(String, String -> Maybe (String -> String,String,String))]
+-- (fun, word -> (pos,label,target))
+-- the pos can remain unchanged, as in the current notation in the article
+
+fixCoNLL :: CncLabels -> CoNLL -> CoNLL
+fixCoNLL labels conll = map fixc conll where
+  fixc row = case row of
+    (i:word:fun:pos:cat:x_:"0":"dep":xs) -> (i:word:fun:pos:cat:x_:"0":"root":xs) --- change the root label from dep to root 
+    (i:word:fun:pos:cat:x_:j:label:xs) -> case look (fun,word) of
+      Just (pos',label',"head") -> (i:word:fun:pos' pos:cat:x_:j :label':xs)
+      Just (pos',label',target) -> (i:word:fun:pos' pos:cat:x_: getDep j target:label':xs)
+      _ -> row
+    _ -> row
+    
+  look (fun,word) = case lookup fun labels of
+    Just relabel -> case relabel word of
+      Just row -> Just row
+      _ -> case lookup "*" labels of
+        Just starlabel -> starlabel word
+        _ -> Nothing
+    _ -> case lookup "*" labels of
+        Just starlabel -> starlabel word
+        _ -> Nothing
+  
+  getDep j label = maybe j id $ lookup (label,j) [((label,j),i) | i:word:fun:pos:cat:x_:j:label:xs <- conll]
+
+getCncDepLabels :: String -> CncLabels
+getCncDepLabels = map merge .  groupBy (\ (x,_) (a,_) -> x == a) . concatMap analyse . filter choose . lines where
+  --- choose is for compatibility with the general notation
+  choose line = notElem '(' line && elem '{' line --- ignoring non-local (with "(") and abstract (without "{") rules
+  
+  analyse line = case break (=='{') line of
+    (beg,_:ws) -> case break (=='}') ws of
+      (toks,_:target) -> case (words beg, words target) of
+        (fun:_,[    label,j]) -> [(fun, (tok, (id,       label,j))) | tok <- getToks toks]
+        (fun:_,[pos,label,j]) -> [(fun, (tok, (const pos,label,j))) | tok <- getToks toks]
+        _ -> []
+      _ -> []
+    _ -> []
+  merge rules@((fun,_):_) = (fun, \tok ->
+    case lookup tok (map snd rules) of
+      Just new -> return new
+      _ -> lookup "*"  (map snd rules)
+    )
+  getToks = words . map (\c -> if elem c "\"," then ' ' else c)
+
+printCoNLL :: CoNLL -> String
+printCoNLL = unlines . map (concat . intersperse "\t")
+
 -----------------------------------------------------------------------
 -- Expressions & types

@@ -335,6 +892,12 @@ readExpr str =
              freeStablePtr c_expr
              return (Just expr)

+pIdent :: ReadS String
+pIdent = error "TODO: pIdent"
+
+pExpr :: ReadS Expr
+pExpr = error "TODO: pExpr"
+
 -- | renders a type as a 'String'. The list
 -- of identifiers is the list of all free variables
 -- in the type in order reverse to the order
@@ -348,6 +911,9 @@ showType scope ty =
  bracket (pgf_print_type c_ty pctxt 0 m) free $ \c_text ->
    peekText c_text

+showContext :: [Var] -> [(BindType,Var,Type)] -> String
+showContext = error "TODO: showContext"
+
 -- | parses a 'String' as a type
 readType :: String -> Maybe Type
 readType str =
@@ -360,3 +926,8 @@ readType str =
      else do ty <- deRefStablePtr c_ty
              freeStablePtr c_ty
              return (Just ty)
+
+readProbabilitiesFromFile :: FilePath -> IO (Map.Map String Double)
+readProbabilitiesFromFile fpath = do
+  s <- readFile fpath
+  return $ Map.fromList [(f,read p) | f:p:_ <- map words (lines s)]