mirror of
https://github.com/GrammaticalFramework/gf-core.git
synced 2026-04-30 14:52:51 -06:00
manually copy the "c-runtime" branch from the old repository.
This commit is contained in:
@@ -21,25 +21,21 @@
|
||||
module PGF2 (-- * PGF
|
||||
PGF,readPGF,showPGF,
|
||||
|
||||
-- * Identifiers
|
||||
CId,
|
||||
|
||||
-- * Abstract syntax
|
||||
AbsName,abstractName,
|
||||
-- ** Categories
|
||||
Cat,categories,categoryContext,
|
||||
Cat,categories,categoryContext,categoryProbability,
|
||||
-- ** Functions
|
||||
Fun, functions, functionsByCat,
|
||||
functionType, functionIsConstructor, hasLinearization,
|
||||
functionType, functionIsDataCon, hasLinearization,
|
||||
-- ** Expressions
|
||||
Expr,showExpr,readExpr,pExpr,
|
||||
Expr,showExpr,readExpr,pExpr,pIdent,
|
||||
mkAbs,unAbs,
|
||||
mkApp,unApp,
|
||||
mkApp,unApp,unapply,
|
||||
mkStr,unStr,
|
||||
mkInt,unInt,
|
||||
mkFloat,unFloat,
|
||||
mkMeta,unMeta,
|
||||
mkCId,
|
||||
exprHash, exprSize, exprFunctions, exprSubstitute,
|
||||
treeProbability,
|
||||
|
||||
@@ -58,13 +54,13 @@ module PGF2 (-- * PGF
|
||||
ConcName,Concr,languages,concreteName,languageCode,
|
||||
|
||||
-- ** Linearization
|
||||
linearize,linearizeAll,tabularLinearize,tabularLinearizeAll,bracketedLinearize,bracketedLinearizeAll,
|
||||
linearize,linearizeAll,tabularLinearize,tabularLinearizeAll,bracketedLinearize,
|
||||
FId, LIndex, BracketedString(..), showBracketedString, flattenBracketedString,
|
||||
printName,
|
||||
|
||||
alignWords,
|
||||
-- ** Parsing
|
||||
ParseOutput(..), parse, parseWithHeuristics,
|
||||
ParseOutput(..), parse, parseWithHeuristics, complete,
|
||||
-- ** Sentence Lookup
|
||||
lookupSentence,
|
||||
-- ** Generation
|
||||
@@ -73,7 +69,9 @@ module PGF2 (-- * PGF
|
||||
MorphoAnalysis, lookupMorpho, fullFormLexicon,
|
||||
-- ** Visualizations
|
||||
GraphvizOptions(..), graphvizDefaults,
|
||||
graphvizAbstractTree, graphvizParseTree, graphvizWordAlignment,
|
||||
graphvizAbstractTree, graphvizParseTree,
|
||||
graphvizDependencyTree, conlls2latexDoc, getCncDepLabels,
|
||||
graphvizWordAlignment,
|
||||
|
||||
-- * Exceptions
|
||||
PGFError(..),
|
||||
@@ -82,7 +80,7 @@ module PGF2 (-- * PGF
|
||||
LiteralCallback,literalCallbacks
|
||||
) where
|
||||
|
||||
import Prelude hiding (fromEnum,(<>)) -- GHC 8.4.1 clash with Text.PrettyPrint
|
||||
import Prelude hiding (fromEnum)
|
||||
import Control.Exception(Exception,throwIO)
|
||||
import Control.Monad(forM_)
|
||||
import System.IO.Unsafe(unsafePerformIO,unsafeInterleaveIO)
|
||||
@@ -97,7 +95,8 @@ import Data.Typeable
|
||||
import qualified Data.Map as Map
|
||||
import Data.IORef
|
||||
import Data.Char(isUpper,isSpace)
|
||||
import Data.List(isSuffixOf,maximumBy,nub)
|
||||
import Data.List(isSuffixOf,maximumBy,nub,mapAccumL,intersperse,groupBy,find)
|
||||
import Data.Maybe(fromMaybe)
|
||||
import Data.Function(on)
|
||||
|
||||
|
||||
@@ -110,8 +109,8 @@ import Data.Function(on)
|
||||
-- to Concr but has lost its reference to PGF.
|
||||
|
||||
|
||||
type AbsName = CId -- ^ Name of abstract syntax
|
||||
type ConcName = CId -- ^ Name of concrete syntax
|
||||
type AbsName = String -- ^ Name of abstract syntax
|
||||
type ConcName = String -- ^ Name of concrete syntax
|
||||
|
||||
-- | Reads file in Portable Grammar Format and produces
|
||||
-- 'PGF' structure. The file is usually produced with:
|
||||
@@ -136,7 +135,22 @@ readPGF fpath =
|
||||
throwIO (PGFError "The grammar cannot be loaded")
|
||||
else return pgf
|
||||
pgfFPtr <- newForeignPtr gu_pool_finalizer pool
|
||||
return (PGF pgf (touchForeignPtr pgfFPtr))
|
||||
let touch = touchForeignPtr pgfFPtr
|
||||
ref <- newIORef Map.empty
|
||||
allocaBytes (#size GuMapItor) $ \itor ->
|
||||
do fptr <- wrapMapItorCallback (getLanguages ref touch)
|
||||
(#poke GuMapItor, fn) itor fptr
|
||||
pgf_iter_languages pgf itor nullPtr
|
||||
freeHaskellFunPtr fptr
|
||||
langs <- readIORef ref
|
||||
return (PGF pgf langs touch)
|
||||
where
|
||||
getLanguages :: IORef (Map.Map String Concr) -> Touch -> MapItorCallback
|
||||
getLanguages ref touch itor key value exn = do
|
||||
langs <- readIORef ref
|
||||
name <- peekUtf8CString (castPtr key)
|
||||
concr <- fmap (\ptr -> Concr ptr touch) $ peek (castPtr value)
|
||||
writeIORef ref $! Map.insert name concr langs
|
||||
|
||||
showPGF :: PGF -> String
|
||||
showPGF p =
|
||||
@@ -144,29 +158,15 @@ showPGF p =
|
||||
withGuPool $ \tmpPl ->
|
||||
do (sb,out) <- newOut tmpPl
|
||||
exn <- gu_new_exn tmpPl
|
||||
pgf_print (pgf p) out exn
|
||||
withArrayLen ((map concr . Map.elems . languages) p) $ \n_concrs concrs ->
|
||||
pgf_print (pgf p) (fromIntegral n_concrs) concrs out exn
|
||||
touchPGF p
|
||||
s <- gu_string_buf_freeze sb tmpPl
|
||||
peekUtf8CString s
|
||||
|
||||
-- | List of all languages available in the grammar.
|
||||
languages :: PGF -> Map.Map ConcName Concr
|
||||
languages p =
|
||||
unsafePerformIO $
|
||||
do ref <- newIORef Map.empty
|
||||
allocaBytes (#size GuMapItor) $ \itor ->
|
||||
do fptr <- wrapMapItorCallback (getLanguages ref)
|
||||
(#poke GuMapItor, fn) itor fptr
|
||||
pgf_iter_languages (pgf p) itor nullPtr
|
||||
freeHaskellFunPtr fptr
|
||||
readIORef ref
|
||||
where
|
||||
getLanguages :: IORef (Map.Map String Concr) -> MapItorCallback
|
||||
getLanguages ref itor key value exn = do
|
||||
langs <- readIORef ref
|
||||
name <- peekUtf8CString (castPtr key)
|
||||
concr <- fmap (\ptr -> Concr ptr (touchPGF p)) $ peek (castPtr value)
|
||||
writeIORef ref $! Map.insert name concr langs
|
||||
languages p = langs p
|
||||
|
||||
-- | The abstract language name is the name of the top-level
|
||||
-- abstract module
|
||||
@@ -242,8 +242,8 @@ functionType p fn =
|
||||
else Just (Type c_type (touchPGF p)))
|
||||
|
||||
-- | The type of a function
|
||||
functionIsConstructor :: PGF -> Fun -> Bool
|
||||
functionIsConstructor p fn =
|
||||
functionIsDataCon :: PGF -> Fun -> Bool
|
||||
functionIsDataCon p fn =
|
||||
unsafePerformIO $
|
||||
withGuPool $ \tmpPl -> do
|
||||
c_fn <- newUtf8CString fn tmpPl
|
||||
@@ -253,15 +253,15 @@ functionIsConstructor p fn =
|
||||
|
||||
-- | Checks an expression against a specified type.
|
||||
checkExpr :: PGF -> Expr -> Type -> Either String Expr
|
||||
checkExpr (PGF p _) (Expr c_expr touch1) (Type c_ty touch2) =
|
||||
checkExpr p (Expr c_expr touch1) (Type c_ty touch2) =
|
||||
unsafePerformIO $
|
||||
alloca $ \pexpr ->
|
||||
withGuPool $ \tmpPl -> do
|
||||
exn <- gu_new_exn tmpPl
|
||||
exprPl <- gu_new_pool
|
||||
poke pexpr c_expr
|
||||
pgf_check_expr p pexpr c_ty exn exprPl
|
||||
touch1 >> touch2
|
||||
pgf_check_expr (pgf p) pexpr c_ty exn exprPl
|
||||
touchPGF p >> touch1 >> touch2
|
||||
status <- gu_exn_is_raised exn
|
||||
if not status
|
||||
then do exprFPl <- newForeignPtr gu_pool_finalizer exprPl
|
||||
@@ -280,15 +280,15 @@ checkExpr (PGF p _) (Expr c_expr touch1) (Type c_ty touch2) =
|
||||
-- possible to infer its type in the GF type system.
|
||||
-- In this case the function returns an error.
|
||||
inferExpr :: PGF -> Expr -> Either String (Expr, Type)
|
||||
inferExpr (PGF p _) (Expr c_expr touch1) =
|
||||
inferExpr p (Expr c_expr touch1) =
|
||||
unsafePerformIO $
|
||||
alloca $ \pexpr ->
|
||||
withGuPool $ \tmpPl -> do
|
||||
exn <- gu_new_exn tmpPl
|
||||
exprPl <- gu_new_pool
|
||||
poke pexpr c_expr
|
||||
c_ty <- pgf_infer_expr p pexpr exn exprPl
|
||||
touch1
|
||||
c_ty <- pgf_infer_expr (pgf p) pexpr exn exprPl
|
||||
touchPGF p >> touch1
|
||||
status <- gu_exn_is_raised exn
|
||||
if not status
|
||||
then do exprFPl <- newForeignPtr gu_pool_finalizer exprPl
|
||||
@@ -306,15 +306,15 @@ inferExpr (PGF p _) (Expr c_expr touch1) =
|
||||
-- | Check whether a type is consistent with the abstract
|
||||
-- syntax of the grammar.
|
||||
checkType :: PGF -> Type -> Either String Type
|
||||
checkType (PGF p _) (Type c_ty touch1) =
|
||||
checkType p (Type c_ty touch1) =
|
||||
unsafePerformIO $
|
||||
alloca $ \pty ->
|
||||
withGuPool $ \tmpPl -> do
|
||||
exn <- gu_new_exn tmpPl
|
||||
typePl <- gu_new_pool
|
||||
poke pty c_ty
|
||||
pgf_check_type p pty exn typePl
|
||||
touch1
|
||||
pgf_check_type (pgf p) pty exn typePl
|
||||
touchPGF p >> touch1
|
||||
status <- gu_exn_is_raised exn
|
||||
if not status
|
||||
then do typeFPl <- newForeignPtr gu_pool_finalizer typePl
|
||||
@@ -329,13 +329,13 @@ checkType (PGF p _) (Type c_ty touch1) =
|
||||
else throwIO (PGFError msg)
|
||||
|
||||
compute :: PGF -> Expr -> Expr
|
||||
compute (PGF p _) (Expr c_expr touch1) =
|
||||
compute p (Expr c_expr touch1) =
|
||||
unsafePerformIO $
|
||||
withGuPool $ \tmpPl -> do
|
||||
exn <- gu_new_exn tmpPl
|
||||
exprPl <- gu_new_pool
|
||||
c_expr <- pgf_compute p c_expr exn tmpPl exprPl
|
||||
touch1
|
||||
c_expr <- pgf_compute (pgf p) c_expr exn tmpPl exprPl
|
||||
touchPGF p >> touch1
|
||||
status <- gu_exn_is_raised exn
|
||||
if not status
|
||||
then do exprFPl <- newForeignPtr gu_pool_finalizer exprPl
|
||||
@@ -346,10 +346,10 @@ compute (PGF p _) (Expr c_expr touch1) =
|
||||
throwIO (PGFError msg)
|
||||
|
||||
treeProbability :: PGF -> Expr -> Float
|
||||
treeProbability (PGF p _) (Expr c_expr touch1) =
|
||||
treeProbability p (Expr c_expr touch1) =
|
||||
unsafePerformIO $ do
|
||||
res <- pgf_compute_tree_probability p c_expr
|
||||
touch1
|
||||
res <- pgf_compute_tree_probability (pgf p) c_expr
|
||||
touchPGF p >> touch1
|
||||
return (realToFrac res)
|
||||
|
||||
exprHash :: Int32 -> Expr -> Int32
|
||||
@@ -447,6 +447,433 @@ graphvizWordAlignment cs opts e =
|
||||
s <- gu_string_buf_freeze sb tmpPl
|
||||
peekUtf8CString s
|
||||
|
||||
|
||||
type Labels = Map.Map Fun [String]
|
||||
|
||||
-- | Visualize word dependency tree.
|
||||
graphvizDependencyTree
|
||||
:: String -- ^ Output format: @"latex"@, @"conll"@, @"malt_tab"@, @"malt_input"@ or @"dot"@
|
||||
-> Bool -- ^ Include extra information (debug)
|
||||
-> Maybe Labels -- ^ abstract label information obtained with 'getDepLabels'
|
||||
-> Maybe CncLabels -- ^ concrete label information obtained with ' ' (was: unused (was: @Maybe String@))
|
||||
-> Concr
|
||||
-> Expr
|
||||
-> String -- ^ Rendered output in the specified format
|
||||
graphvizDependencyTree format debug mlab mclab concr t =
|
||||
case format of
|
||||
"latex" -> render . ppLaTeX $ conll2latex' conll
|
||||
"svg" -> render . ppSVG . toSVG $ conll2latex' conll
|
||||
"conll" -> printCoNLL conll
|
||||
"malt_tab" -> render $ vcat (map (hcat . intersperse (char '\t') . (\ws -> [ws !! 0,ws !! 1,ws !! 3,ws !! 6,ws !! 7])) wnodes)
|
||||
"malt_input" -> render $ vcat (map (hcat . intersperse (char '\t') . take 6) wnodes)
|
||||
_ -> render $ text "digraph {" $$
|
||||
space $$
|
||||
nest 2 (text "rankdir=LR ;" $$
|
||||
text "node [shape = plaintext] ;" $$
|
||||
vcat nodes $$
|
||||
vcat links) $$
|
||||
text "}"
|
||||
where
|
||||
conll = maybe conll0 (\ls -> fixCoNLL ls conll0) mclab
|
||||
conll0 = (map.map) render wnodes
|
||||
nodes = map mkNode leaves
|
||||
links = map mkLink [(fid, fromMaybe (dep_lbl,nil) (lookup fid deps)) | ((cat,fid,fun),_,w) <- tail leaves]
|
||||
|
||||
-- CoNLL format: ID FORM LEMMA PLEMMA POS PPOS FEAT PFEAT HEAD PHEAD DEPREL PDEPREL
|
||||
-- P variants are automatically predicted rather than gold standard
|
||||
|
||||
wnodes = [[int i, maltws ws, text fun, text (posCat cat), text cat, unspec, int parent, text lab, unspec, unspec] |
|
||||
((cat,fid,fun),i,ws) <- tail leaves,
|
||||
let (lab,parent) = fromMaybe (dep_lbl,0)
|
||||
(do (lbl,fid) <- lookup fid deps
|
||||
(_,i,_) <- find (\((_,fid1,_),i,_) -> fid == fid1) leaves
|
||||
return (lbl,i))
|
||||
]
|
||||
maltws = text . concat . intersperse "+" . words -- no spaces in column 2
|
||||
|
||||
nil = -1
|
||||
|
||||
bss = bracketedLinearize concr t
|
||||
|
||||
root = ("_",nil,"_")
|
||||
|
||||
leaves = (root,0,root_lbl) : (groupAndIndexIt 1 . concatMap (getLeaves root)) bss
|
||||
deps = let (_,(h,deps)) = getDeps 0 [] t
|
||||
in (h,(dep_lbl,nil)):deps
|
||||
|
||||
groupAndIndexIt id [] = []
|
||||
groupAndIndexIt id ((p,w):pws) = (p,id,w) : groupAndIndexIt (id+1) pws
|
||||
--- groupAndIndexIt id ((p,w):pws) = let (ws,pws1) = collect pws
|
||||
--- in (p,id,unwords (w:ws)) : groupAndIndexIt (id+1) pws1
|
||||
where
|
||||
collect pws@((p1,w):pws1)
|
||||
| p == p1 = let (ws,pws2) = collect pws1
|
||||
in (w:ws,pws2)
|
||||
collect pws = ([],pws)
|
||||
|
||||
getLeaves parent bs =
|
||||
case bs of
|
||||
Leaf w -> [(parent,w)]
|
||||
Bracket cat fid _ fun bss -> concatMap (getLeaves (cat,fid,fun)) bss
|
||||
|
||||
mkNode ((_,p,_),i,w) =
|
||||
tag p <+> brackets (text "label = " <> doubleQuotes (int i <> char '.' <+> text w)) <+> semi
|
||||
|
||||
mkLink (x,(lbl,y)) = tag y <+> text "->" <+> tag x <+> text "[label = " <> doubleQuotes (text lbl) <> text "] ;"
|
||||
|
||||
labels = maybe Map.empty id mlab
|
||||
clabels = maybe [] id mclab
|
||||
|
||||
posCat cat = case Map.lookup cat labels of
|
||||
Just [p] -> p
|
||||
_ -> cat
|
||||
|
||||
getDeps n_fid xs e =
|
||||
case unAbs e of
|
||||
Just (_, x, e) -> getDeps n_fid (x:xs) e
|
||||
Nothing -> case unApp e of
|
||||
Just (f,es) -> let (n_fid_1,ds) = descend n_fid xs es
|
||||
(mb_h, deps) = selectHead f ds
|
||||
in case mb_h of
|
||||
Just (fid,deps0) -> (n_fid_1+1,(fid,deps0++
|
||||
[(n_fid_1,(dep_lbl,fid))]++
|
||||
concat [(m,(lbl,fid)):ds | (lbl,(m,ds)) <- deps]))
|
||||
Nothing -> (n_fid_1+1,(n_fid_1,concat [(m,(lbl,n_fid_1)):ds | (lbl,(m,ds)) <- deps]))
|
||||
Nothing -> (n_fid+1,(n_fid,[]))
|
||||
|
||||
descend n_fid xs es = mapAccumL (\n_fid e -> getDeps n_fid xs e) n_fid es
|
||||
|
||||
selectHead f ds =
|
||||
case Map.lookup f labels of
|
||||
Just lbls -> extractHead (zip lbls ds)
|
||||
Nothing -> extractLast ds
|
||||
where
|
||||
extractHead [] = (Nothing, [])
|
||||
extractHead (ld@(l,d):lds)
|
||||
| l == head_lbl = (Just d,lds)
|
||||
| otherwise = let (mb_h,deps) = extractHead lds
|
||||
in (mb_h,ld:deps)
|
||||
|
||||
extractLast [] = (Nothing, [])
|
||||
extractLast (d:ds)
|
||||
| null ds = (Just d,[])
|
||||
| otherwise = let (mb_h,deps) = extractLast ds
|
||||
in (mb_h,(dep_lbl,d):deps)
|
||||
|
||||
dep_lbl = "dep"
|
||||
head_lbl = "head"
|
||||
root_lbl = "ROOT"
|
||||
unspec = text "_"
|
||||
|
||||
|
||||
---------------------- should be a separate module?
|
||||
|
||||
-- visualization with latex output. AR Nov 2015
|
||||
|
||||
conlls2latexDoc :: [String] -> String
|
||||
conlls2latexDoc =
|
||||
render .
|
||||
latexDoc .
|
||||
vcat .
|
||||
intersperse (text "" $+$ app "vspace" (text "4mm")) .
|
||||
map conll2latex .
|
||||
filter (not . null)
|
||||
|
||||
conll2latex :: String -> Doc
|
||||
conll2latex = ppLaTeX . conll2latex' . parseCoNLL
|
||||
|
||||
conll2latex' :: CoNLL -> [LaTeX]
|
||||
conll2latex' = dep2latex . conll2dep'
|
||||
|
||||
data Dep = Dep {
|
||||
wordLength :: Int -> Double -- length of word at position int -- was: fixed width, millimetres (>= 20.0)
|
||||
, tokens :: [(String,String)] -- word, pos (0..)
|
||||
, deps :: [((Int,Int),String)] -- from, to, label
|
||||
, root :: Int -- root word position
|
||||
}
|
||||
|
||||
-- some general measures
|
||||
defaultWordLength = 20.0 -- the default fixed width word length, making word 100 units
|
||||
defaultUnit = 0.2 -- unit in latex pictures, 0.2 millimetres
|
||||
spaceLength = 10.0
|
||||
charWidth = 1.8
|
||||
|
||||
wsize rwld w = 100 * rwld w + spaceLength -- word length, units
|
||||
wpos rwld i = sum [wsize rwld j | j <- [0..i-1]] -- start position of the i'th word
|
||||
wdist rwld x y = sum [wsize rwld i | i <- [min x y .. max x y - 1]] -- distance between words x and y
|
||||
labelheight h = h + arcbase + 3 -- label just above arc; 25 would put it just below
|
||||
labelstart c = c - 15.0 -- label starts 15u left of arc centre
|
||||
arcbase = 30.0 -- arcs start and end 40u above the bottom
|
||||
arcfactor r = r * 600 -- reduction of arc size from word distance
|
||||
xyratio = 3 -- width/height ratio of arcs
|
||||
|
||||
putArc :: (Int -> Double) -> Int -> Int -> Int -> String -> [DrawingCommand]
|
||||
putArc frwld height x y label = [oval,arrowhead,labelling] where
|
||||
oval = Put (ctr,arcbase) (OvalTop (wdth,hght))
|
||||
arrowhead = Put (endp,arcbase + 5) (ArrowDown 5) -- downgoing arrow 5u above the arc base
|
||||
labelling = Put (labelstart ctr,labelheight (hght/2)) (TinyText label)
|
||||
dxy = wdist frwld x y -- distance between words, >>= 20.0
|
||||
ndxy = 100 * rwld * fromIntegral height -- distance that is indep of word length
|
||||
hdxy = dxy / 2 -- half the distance
|
||||
wdth = dxy - (arcfactor rwld)/dxy -- longer arcs are wider in proportion
|
||||
hght = ndxy / (xyratio * rwld) -- arc height is independent of word length
|
||||
begp = min x y -- begin position of oval
|
||||
ctr = wpos frwld begp + hdxy + (if x < y then 20 else 10) -- LR arcs are farther right from center of oval
|
||||
endp = (if x < y then (+) else (-)) ctr (wdth/2) -- the point of the arrow
|
||||
rwld = 0.5 ----
|
||||
|
||||
dep2latex :: Dep -> [LaTeX]
|
||||
dep2latex d =
|
||||
[Comment (unwords (map fst (tokens d))),
|
||||
Picture defaultUnit (width,height) (
|
||||
[Put (wpos rwld i,0) (Text w) | (i,w) <- zip [0..] (map fst (tokens d))] -- words
|
||||
++ [Put (wpos rwld i,15) (TinyText w) | (i,w) <- zip [0..] (map snd (tokens d))] -- pos tags 15u above bottom
|
||||
++ concat [putArc rwld (aheight x y) x y label | ((x,y),label) <- deps d] -- arcs and labels
|
||||
++ [Put (wpos rwld (root d) + 15,height) (ArrowDown (height-arcbase))]
|
||||
++ [Put (wpos rwld (root d) + 20,height - 10) (TinyText "ROOT")]
|
||||
)]
|
||||
where
|
||||
wld i = wordLength d i -- >= 20.0
|
||||
rwld i = (wld i) / defaultWordLength -- >= 1.0
|
||||
aheight x y = depth (min x y) (max x y) + 1 ---- abs (x-y)
|
||||
arcs = [(min u v, max u v) | ((u,v),_) <- deps d]
|
||||
depth x y = case [(u,v) | (u,v) <- arcs, (x < u && v <= y) || (x == u && v < y)] of ---- only projective arcs counted
|
||||
[] -> 0
|
||||
uvs -> 1 + maximum (0:[depth u v | (u,v) <- uvs])
|
||||
width = {-round-} (sum [wsize rwld w | (w,_) <- zip [0..] (tokens d)]) + {-round-} spaceLength * fromIntegral ((length (tokens d)) - 1)
|
||||
height = 50 + 20 * {-round-} (maximum (0:[aheight x y | ((x,y),_) <- deps d]))
|
||||
|
||||
type CoNLL = [[String]]
|
||||
parseCoNLL :: String -> CoNLL
|
||||
parseCoNLL = map words . lines
|
||||
|
||||
--conll2dep :: String -> Dep
|
||||
--conll2dep = conll2dep' . parseCoNLL
|
||||
|
||||
conll2dep' :: CoNLL -> Dep
|
||||
conll2dep' ls = Dep {
|
||||
wordLength = wld
|
||||
, tokens = toks
|
||||
, deps = dps
|
||||
, root = head $ [read x-1 | x:_:_:_:_:_:"0":_ <- ls] ++ [1]
|
||||
}
|
||||
where
|
||||
wld i = maximum (0:[charWidth * fromIntegral (length w) | w <- let (tok,pos) = toks !! i in [tok,pos]])
|
||||
toks = [(w,c) | _:w:_:c:_ <- ls]
|
||||
dps = [((read y-1, read x-1),lab) | x:_:_:_:_:_:y:lab:_ <- ls, y /="0"]
|
||||
--maxdist = maximum [abs (x-y) | ((x,y),_) <- dps]
|
||||
|
||||
|
||||
-- * LaTeX Pictures (see https://en.wikibooks.org/wiki/LaTeX/Picture)
|
||||
|
||||
-- We render both LaTeX and SVG from this intermediate representation of
|
||||
-- LaTeX pictures.
|
||||
|
||||
data LaTeX = Comment String | Picture UnitLengthMM Size [DrawingCommand]
|
||||
data DrawingCommand = Put Position Object
|
||||
data Object = Text String | TinyText String | OvalTop Size | ArrowDown Length
|
||||
|
||||
type UnitLengthMM = Double
|
||||
type Size = (Double,Double)
|
||||
type Position = (Double,Double)
|
||||
type Length = Double
|
||||
|
||||
|
||||
-- * latex formatting
|
||||
ppLaTeX = vcat . map ppLaTeX1
|
||||
where
|
||||
ppLaTeX1 el =
|
||||
case el of
|
||||
Comment s -> comment s
|
||||
Picture unit size cmds ->
|
||||
app "setlength{\\unitlength}" (text (show unit ++ "mm"))
|
||||
$$ hang (app "begin" (text "picture")<>text (show size)) 2
|
||||
(vcat (map ppDrawingCommand cmds))
|
||||
$$ app "end" (text "picture")
|
||||
$$ text ""
|
||||
|
||||
ppDrawingCommand (Put pos obj) = put pos (ppObject obj)
|
||||
|
||||
ppObject obj =
|
||||
case obj of
|
||||
Text s -> text s
|
||||
TinyText s -> small (text s)
|
||||
OvalTop size -> text "\\oval" <> text (show size) <> text "[t]"
|
||||
ArrowDown len -> app "vector(0,-1)" (text (show len))
|
||||
|
||||
put p@(_,_) = app ("put" ++ show p)
|
||||
small w = text "{\\tiny" <+> w <> text "}"
|
||||
comment s = text "%%" <+> text s -- line break show follow
|
||||
|
||||
app macro arg = text "\\" <> text macro <> text "{" <> arg <> text "}"
|
||||
|
||||
|
||||
latexDoc :: Doc -> Doc
|
||||
latexDoc body =
|
||||
vcat [text "\\documentclass{article}",
|
||||
text "\\usepackage[utf8]{inputenc}",
|
||||
text "\\begin{document}",
|
||||
body,
|
||||
text "\\end{document}"]
|
||||
|
||||
-- * SVG (see https://www.w3.org/Graphics/SVG/IG/resources/svgprimer.html)
|
||||
|
||||
-- | Render LaTeX pictures as SVG
|
||||
toSVG = concatMap toSVG1
|
||||
where
|
||||
toSVG1 el =
|
||||
case el of
|
||||
Comment s -> []
|
||||
Picture unit size@(w,h) cmds ->
|
||||
[Elem "svg" ["width".=x1,"height".=y0+5,
|
||||
("viewBox",unwords (map show [0,0,x1,y0+5])),
|
||||
("version","1.1"),
|
||||
("xmlns","http://www.w3.org/2000/svg")]
|
||||
(white_bg:concatMap draw cmds)]
|
||||
where
|
||||
white_bg =
|
||||
Elem "rect" ["x".=0,"y".=0,"width".=x1,"height".=y0+5,
|
||||
("fill","white")] []
|
||||
|
||||
draw (Put pos obj) = objectSVG pos obj
|
||||
|
||||
objectSVG pos obj =
|
||||
case obj of
|
||||
Text s -> [text 16 pos s]
|
||||
TinyText s -> [text 10 pos s]
|
||||
OvalTop size -> [ovalTop pos size]
|
||||
ArrowDown len -> arrowDown pos len
|
||||
|
||||
text h (x,y) s =
|
||||
Elem "text" ["x".=xc x,"y".=yc y-2,"font-size".=h]
|
||||
[CharData s]
|
||||
|
||||
ovalTop (x,y) (w,h) =
|
||||
Elem "path" [("d",path),("stroke","black"),("fill","none")] []
|
||||
where
|
||||
x1 = x-w/2
|
||||
x2 = min x (x1+r)
|
||||
x3 = max x (x4-r)
|
||||
x4 = x+w/2
|
||||
y1 = y
|
||||
y2 = y+r
|
||||
r = h/2
|
||||
sx = show . xc
|
||||
sy = show . yc
|
||||
path = unwords (["M",sx x1,sy y1,"Q",sx x1,sy y2,sx x2,sy y2,
|
||||
"L",sx x3,sy y2,"Q",sx x4,sy y2,sx x4,sy y1])
|
||||
|
||||
arrowDown (x,y) len =
|
||||
[Elem "line" ["x1".=xc x,"y1".=yc y,"x2".=xc x,"y2".=y2,
|
||||
("stroke","black")] [],
|
||||
Elem "path" [("d",unwords arrowhead)] []]
|
||||
where
|
||||
x2 = xc x
|
||||
y2 = yc (y-len)
|
||||
arrowhead = "M":map show [x2,y2,x2-3,y2-6,x2+3,y2-6]
|
||||
|
||||
xc x = num x+5
|
||||
yc y = y0-num y
|
||||
x1 = num w+10
|
||||
y0 = num h+20
|
||||
num x = round (scale*x)
|
||||
scale = unit*5
|
||||
|
||||
infix 0 .=
|
||||
n.=v = (n,show v)
|
||||
|
||||
-- * SVG is XML
|
||||
|
||||
data SVG = CharData String | Elem TagName Attrs [SVG]
|
||||
type TagName = String
|
||||
type Attrs = [(String,String)]
|
||||
|
||||
ppSVG svg =
|
||||
vcat [text "<?xml version=\"1.0\" standalone=\"no\"?>",
|
||||
text "<!DOCTYPE svg PUBLIC \"-//W3C//DTD SVG 1.1//EN\"",
|
||||
text "\"http://www.w3.org/Graphics/SVG/1.1/DTD/svg11.dtd\">",
|
||||
text "",
|
||||
vcat (map ppSVG1 svg)] -- It should be a single <svg> element...
|
||||
where
|
||||
ppSVG1 svg1 =
|
||||
case svg1 of
|
||||
CharData s -> text (encode s)
|
||||
Elem tag attrs [] ->
|
||||
text "<"<>text tag<>cat (map attr attrs) <> text "/>"
|
||||
Elem tag attrs svg ->
|
||||
cat [text "<"<>text tag<>cat (map attr attrs) <> text ">",
|
||||
nest 2 (cat (map ppSVG1 svg)),
|
||||
text "</"<>text tag<>text ">"]
|
||||
|
||||
attr (n,v) = text " "<>text n<>text "=\""<>text (encode v)<>text "\""
|
||||
|
||||
encode s = foldr encodeEntity "" s
|
||||
|
||||
encodeEntity = encodeEntity' (const False)
|
||||
encodeEntity' esc c r =
|
||||
case c of
|
||||
'&' -> "&"++r
|
||||
'<' -> "<"++r
|
||||
'>' -> ">"++r
|
||||
_ -> c:r
|
||||
|
||||
|
||||
----------------------------------
|
||||
-- concrete syntax annotations (local) on top of conll
|
||||
-- examples of annotations:
|
||||
-- UseComp {"not"} PART neg head
|
||||
-- UseComp {*} AUX cop head
|
||||
|
||||
type CncLabels = [(String, String -> Maybe (String -> String,String,String))]
|
||||
-- (fun, word -> (pos,label,target))
|
||||
-- the pos can remain unchanged, as in the current notation in the article
|
||||
|
||||
fixCoNLL :: CncLabels -> CoNLL -> CoNLL
|
||||
fixCoNLL labels conll = map fixc conll where
|
||||
fixc row = case row of
|
||||
(i:word:fun:pos:cat:x_:"0":"dep":xs) -> (i:word:fun:pos:cat:x_:"0":"root":xs) --- change the root label from dep to root
|
||||
(i:word:fun:pos:cat:x_:j:label:xs) -> case look (fun,word) of
|
||||
Just (pos',label',"head") -> (i:word:fun:pos' pos:cat:x_:j :label':xs)
|
||||
Just (pos',label',target) -> (i:word:fun:pos' pos:cat:x_: getDep j target:label':xs)
|
||||
_ -> row
|
||||
_ -> row
|
||||
|
||||
look (fun,word) = case lookup fun labels of
|
||||
Just relabel -> case relabel word of
|
||||
Just row -> Just row
|
||||
_ -> case lookup "*" labels of
|
||||
Just starlabel -> starlabel word
|
||||
_ -> Nothing
|
||||
_ -> case lookup "*" labels of
|
||||
Just starlabel -> starlabel word
|
||||
_ -> Nothing
|
||||
|
||||
getDep j label = maybe j id $ lookup (label,j) [((label,j),i) | i:word:fun:pos:cat:x_:j:label:xs <- conll]
|
||||
|
||||
getCncDepLabels :: String -> CncLabels
|
||||
getCncDepLabels = map merge . groupBy (\ (x,_) (a,_) -> x == a) . concatMap analyse . filter choose . lines where
|
||||
--- choose is for compatibility with the general notation
|
||||
choose line = notElem '(' line && elem '{' line --- ignoring non-local (with "(") and abstract (without "{") rules
|
||||
|
||||
analyse line = case break (=='{') line of
|
||||
(beg,_:ws) -> case break (=='}') ws of
|
||||
(toks,_:target) -> case (words beg, words target) of
|
||||
(fun:_,[ label,j]) -> [(fun, (tok, (id, label,j))) | tok <- getToks toks]
|
||||
(fun:_,[pos,label,j]) -> [(fun, (tok, (const pos,label,j))) | tok <- getToks toks]
|
||||
_ -> []
|
||||
_ -> []
|
||||
_ -> []
|
||||
merge rules@((fun,_):_) = (fun, \tok ->
|
||||
case lookup tok (map snd rules) of
|
||||
Just new -> return new
|
||||
_ -> lookup "*" (map snd rules)
|
||||
)
|
||||
getToks = words . map (\c -> if elem c "\"," then ' ' else c)
|
||||
|
||||
printCoNLL :: CoNLL -> String
|
||||
printCoNLL = unlines . map (concat . intersperse "\t")
|
||||
|
||||
|
||||
newGraphvizOptions :: Ptr GuPool -> GraphvizOptions -> IO (Ptr PgfGraphvizOptions)
|
||||
newGraphvizOptions pool opts = do
|
||||
c_opts <- gu_malloc pool (#size PgfGraphvizOptions)
|
||||
@@ -542,7 +969,7 @@ parseWithHeuristics :: Concr -- ^ the language with which we parse
|
||||
-- If a literal has been recognized then the output should
|
||||
-- be Just (expr,probability,end_offset)
|
||||
-> ParseOutput
|
||||
parseWithHeuristics lang (Type ctype touchType) sent heuristic callbacks =
|
||||
parseWithHeuristics lang (Type ctype _) sent heuristic callbacks =
|
||||
unsafePerformIO $
|
||||
do exprPl <- gu_new_pool
|
||||
parsePl <- gu_new_pool
|
||||
@@ -550,7 +977,6 @@ parseWithHeuristics lang (Type ctype touchType) sent heuristic callbacks =
|
||||
sent <- newUtf8CString sent parsePl
|
||||
callbacks_map <- mkCallbacksMap (concr lang) callbacks parsePl
|
||||
enum <- pgf_parse_with_heuristics (concr lang) ctype sent heuristic callbacks_map exn parsePl exprPl
|
||||
touchType
|
||||
failed <- gu_exn_is_raised exn
|
||||
if failed
|
||||
then do is_parse_error <- gu_exn_caught exn gu_exn_type_PgfParseError
|
||||
@@ -618,6 +1044,26 @@ mkCallbacksMap concr callbacks pool = do
|
||||
|
||||
predict_callback _ _ _ = return nullPtr
|
||||
|
||||
complete :: Concr -- ^ the language with which we do word completion
|
||||
-> Type -- ^ the start category
|
||||
-> String -- ^ the input sentence
|
||||
-> String -- ^ prefix for the word to be completed
|
||||
-> [(String, Cat, Fun, Float)]
|
||||
complete lang (Type ctype _) sent prefix =
|
||||
unsafePerformIO $
|
||||
do pl <- gu_new_pool
|
||||
exn <- gu_new_exn pl
|
||||
sent <- newUtf8CString sent pl
|
||||
prefix <- newUtf8CString prefix pl
|
||||
enum <- pgf_complete (concr lang) ctype sent prefix exn pl
|
||||
failed <- gu_exn_is_raised exn
|
||||
if failed
|
||||
then do gu_pool_free pl
|
||||
return []
|
||||
else do fpl <- newForeignPtr gu_pool_finalizer pl
|
||||
tokens <- fromPgfTokenEnum enum fpl
|
||||
return tokens
|
||||
|
||||
lookupSentence :: Concr -- ^ the language with which we parse
|
||||
-> Type -- ^ the start category
|
||||
-> String -- ^ the input sentence
|
||||
@@ -862,9 +1308,8 @@ type LIndex = Int
|
||||
-- mark the beginning and the end of each constituent.
|
||||
data BracketedString
|
||||
= Leaf String -- ^ this is the leaf i.e. a single token
|
||||
| BIND -- ^ the surrounding tokens must be bound together
|
||||
| Bracket CId {-# UNPACK #-} !FId {-# UNPACK #-} !LIndex CId [BracketedString]
|
||||
-- ^ this is a bracket. The 'CId' is the category of
|
||||
| Bracket Cat {-# UNPACK #-} !FId {-# UNPACK #-} !LIndex Fun [BracketedString]
|
||||
-- ^ this is a bracket. The 'Cat' is the category of
|
||||
-- the phrase. The 'FId' is an unique identifier for
|
||||
-- every phrase in the sentence. For context-free grammars
|
||||
-- i.e. without discontinuous constituents this identifier
|
||||
@@ -875,7 +1320,7 @@ data BracketedString
|
||||
-- the constituent index i.e. 'LIndex'. If the grammar is reduplicating
|
||||
-- then the constituent indices will be the same for all brackets
|
||||
-- that represents the same constituent.
|
||||
-- The second 'CId' is the name of the abstract function that generated
|
||||
-- The 'Fun' is the name of the abstract function that generated
|
||||
-- this phrase.
|
||||
|
||||
-- | Renders the bracketed string as a string where
|
||||
@@ -885,13 +1330,11 @@ showBracketedString :: BracketedString -> String
|
||||
showBracketedString = render . ppBracketedString
|
||||
|
||||
ppBracketedString (Leaf t) = text t
|
||||
ppBracketedString BIND = text "&+"
|
||||
ppBracketedString (Bracket cat fid index _ bss) = parens (text cat <> colon <> int fid <+> hsep (map ppBracketedString bss))
|
||||
|
||||
-- | Extracts the sequence of tokens from the bracketed string
|
||||
flattenBracketedString :: BracketedString -> [String]
|
||||
flattenBracketedString (Leaf w) = [w]
|
||||
flattenBracketedString BIND = []
|
||||
flattenBracketedString (Bracket _ _ _ _ bss) = concatMap flattenBracketedString bss
|
||||
|
||||
bracketedLinearize :: Concr -> Expr -> [BracketedString]
|
||||
@@ -909,8 +1352,27 @@ bracketedLinearize lang e = unsafePerformIO $
|
||||
return []
|
||||
else do ctree <- pgf_lzr_wrap_linref ctree pl
|
||||
ref <- newIORef ([],[])
|
||||
withBracketLinFuncs ref exn $ \ppLinFuncs ->
|
||||
pgf_lzr_linearize (concr lang) ctree 0 ppLinFuncs pl
|
||||
allocaBytes (#size PgfLinFuncs) $ \pLinFuncs ->
|
||||
alloca $ \ppLinFuncs -> do
|
||||
fptr_symbol_token <- wrapSymbolTokenCallback (symbol_token ref)
|
||||
fptr_begin_phrase <- wrapPhraseCallback (begin_phrase ref)
|
||||
fptr_end_phrase <- wrapPhraseCallback (end_phrase ref)
|
||||
fptr_symbol_ne <- wrapSymbolNonExistCallback (symbol_ne exn)
|
||||
fptr_symbol_meta <- wrapSymbolMetaCallback (symbol_meta ref)
|
||||
(#poke PgfLinFuncs, symbol_token) pLinFuncs fptr_symbol_token
|
||||
(#poke PgfLinFuncs, begin_phrase) pLinFuncs fptr_begin_phrase
|
||||
(#poke PgfLinFuncs, end_phrase) pLinFuncs fptr_end_phrase
|
||||
(#poke PgfLinFuncs, symbol_ne) pLinFuncs fptr_symbol_ne
|
||||
(#poke PgfLinFuncs, symbol_bind) pLinFuncs nullPtr
|
||||
(#poke PgfLinFuncs, symbol_capit) pLinFuncs nullPtr
|
||||
(#poke PgfLinFuncs, symbol_meta) pLinFuncs fptr_symbol_meta
|
||||
poke ppLinFuncs pLinFuncs
|
||||
pgf_lzr_linearize (concr lang) ctree 0 ppLinFuncs pl
|
||||
freeHaskellFunPtr fptr_symbol_token
|
||||
freeHaskellFunPtr fptr_begin_phrase
|
||||
freeHaskellFunPtr fptr_end_phrase
|
||||
freeHaskellFunPtr fptr_symbol_ne
|
||||
freeHaskellFunPtr fptr_symbol_meta
|
||||
failed <- gu_exn_is_raised exn
|
||||
if failed
|
||||
then do is_nonexist <- gu_exn_caught exn gu_exn_type_PgfLinNonExist
|
||||
@@ -919,65 +1381,6 @@ bracketedLinearize lang e = unsafePerformIO $
|
||||
else throwExn exn
|
||||
else do (_,bs) <- readIORef ref
|
||||
return (reverse bs)
|
||||
|
||||
bracketedLinearizeAll :: Concr -> Expr -> [[BracketedString]]
|
||||
bracketedLinearizeAll lang e = unsafePerformIO $
|
||||
withGuPool $ \pl ->
|
||||
do exn <- gu_new_exn pl
|
||||
cts <- pgf_lzr_concretize (concr lang) (expr e) exn pl
|
||||
failed <- gu_exn_is_raised exn
|
||||
if failed
|
||||
then do touchExpr e
|
||||
throwExn exn
|
||||
else do ref <- newIORef ([],[])
|
||||
bss <- withBracketLinFuncs ref exn $ \ppLinFuncs ->
|
||||
collect ref cts ppLinFuncs exn pl
|
||||
touchExpr e
|
||||
return bss
|
||||
where
|
||||
collect ref cts ppLinFuncs exn pl = withGuPool $ \tmpPl -> do
|
||||
ctree <- alloca $ \ptr -> do gu_enum_next cts ptr tmpPl
|
||||
peek ptr
|
||||
if ctree == nullPtr
|
||||
then return []
|
||||
else do ctree <- pgf_lzr_wrap_linref ctree pl
|
||||
pgf_lzr_linearize (concr lang) ctree 0 ppLinFuncs pl
|
||||
failed <- gu_exn_is_raised exn
|
||||
if failed
|
||||
then do is_nonexist <- gu_exn_caught exn gu_exn_type_PgfLinNonExist
|
||||
if is_nonexist
|
||||
then collect ref cts ppLinFuncs exn pl
|
||||
else throwExn exn
|
||||
else do (_,bs) <- readIORef ref
|
||||
writeIORef ref ([],[])
|
||||
bss <- collect ref cts ppLinFuncs exn pl
|
||||
return (reverse bs : bss)
|
||||
|
||||
withBracketLinFuncs ref exn f =
|
||||
allocaBytes (#size PgfLinFuncs) $ \pLinFuncs ->
|
||||
alloca $ \ppLinFuncs -> do
|
||||
fptr_symbol_token <- wrapSymbolTokenCallback (symbol_token ref)
|
||||
fptr_begin_phrase <- wrapPhraseCallback (begin_phrase ref)
|
||||
fptr_end_phrase <- wrapPhraseCallback (end_phrase ref)
|
||||
fptr_symbol_ne <- wrapSymbolNonExistCallback (symbol_ne exn)
|
||||
fptr_symbol_bind <- wrapSymbolBindCallback (symbol_bind ref)
|
||||
fptr_symbol_meta <- wrapSymbolMetaCallback (symbol_meta ref)
|
||||
(#poke PgfLinFuncs, symbol_token) pLinFuncs fptr_symbol_token
|
||||
(#poke PgfLinFuncs, begin_phrase) pLinFuncs fptr_begin_phrase
|
||||
(#poke PgfLinFuncs, end_phrase) pLinFuncs fptr_end_phrase
|
||||
(#poke PgfLinFuncs, symbol_ne) pLinFuncs fptr_symbol_ne
|
||||
(#poke PgfLinFuncs, symbol_bind) pLinFuncs fptr_symbol_bind
|
||||
(#poke PgfLinFuncs, symbol_capit) pLinFuncs nullPtr
|
||||
(#poke PgfLinFuncs, symbol_meta) pLinFuncs fptr_symbol_meta
|
||||
poke ppLinFuncs pLinFuncs
|
||||
res <- f ppLinFuncs
|
||||
freeHaskellFunPtr fptr_symbol_token
|
||||
freeHaskellFunPtr fptr_begin_phrase
|
||||
freeHaskellFunPtr fptr_end_phrase
|
||||
freeHaskellFunPtr fptr_symbol_ne
|
||||
freeHaskellFunPtr fptr_symbol_bind
|
||||
freeHaskellFunPtr fptr_symbol_meta
|
||||
return res
|
||||
where
|
||||
symbol_token ref _ c_token = do
|
||||
(stack,bs) <- readIORef ref
|
||||
@@ -1000,22 +1403,17 @@ withBracketLinFuncs ref exn f =
|
||||
gu_exn_raise exn gu_exn_type_PgfLinNonExist
|
||||
return ()
|
||||
|
||||
symbol_bind ref _ = do
|
||||
(stack,bs) <- readIORef ref
|
||||
writeIORef ref (stack,BIND : bs)
|
||||
return ()
|
||||
|
||||
symbol_meta ref _ meta_id = do
|
||||
(stack,bs) <- readIORef ref
|
||||
writeIORef ref (stack,Leaf "?" : bs)
|
||||
|
||||
throwExn exn = do
|
||||
is_exn <- gu_exn_caught exn gu_exn_type_PgfExn
|
||||
if is_exn
|
||||
then do c_msg <- (#peek GuExn, data.data) exn
|
||||
msg <- peekUtf8CString c_msg
|
||||
throwIO (PGFError msg)
|
||||
else do throwIO (PGFError "The abstract tree cannot be linearized")
|
||||
throwExn exn = do
|
||||
is_exn <- gu_exn_caught exn gu_exn_type_PgfExn
|
||||
if is_exn
|
||||
then do c_msg <- (#peek GuExn, data.data) exn
|
||||
msg <- peekUtf8CString c_msg
|
||||
throwIO (PGFError msg)
|
||||
else do throwIO (PGFError "The abstract tree cannot be linearized")
|
||||
|
||||
alignWords :: Concr -> Expr -> [(String, [Int])]
|
||||
alignWords lang e = unsafePerformIO $
|
||||
@@ -1128,16 +1526,17 @@ categories p =
|
||||
name <- peekUtf8CString (castPtr key)
|
||||
writeIORef ref $! (name : names)
|
||||
|
||||
categoryContext :: PGF -> Cat -> [Hypo]
|
||||
categoryContext :: PGF -> Cat -> Maybe [Hypo]
|
||||
categoryContext p cat =
|
||||
unsafePerformIO $
|
||||
withGuPool $ \tmpPl ->
|
||||
do c_cat <- newUtf8CString cat tmpPl
|
||||
c_hypos <- pgf_category_context (pgf p) c_cat
|
||||
if c_hypos == nullPtr
|
||||
then return []
|
||||
then return Nothing
|
||||
else do n_hypos <- (#peek GuSeq, len) c_hypos
|
||||
peekHypos (c_hypos `plusPtr` (#offset GuSeq, data)) 0 n_hypos
|
||||
hypos <- peekHypos (c_hypos `plusPtr` (#offset GuSeq, data)) 0 n_hypos
|
||||
return (Just hypos)
|
||||
where
|
||||
peekHypos :: Ptr a -> Int -> Int -> IO [Hypo]
|
||||
peekHypos c_hypo i n
|
||||
@@ -1152,8 +1551,8 @@ categoryContext p cat =
|
||||
toBindType (#const PGF_BIND_TYPE_EXPLICIT) = Explicit
|
||||
toBindType (#const PGF_BIND_TYPE_IMPLICIT) = Implicit
|
||||
|
||||
categoryProb :: PGF -> Cat -> Float
|
||||
categoryProb p cat =
|
||||
categoryProbability :: PGF -> Cat -> Float
|
||||
categoryProbability p cat =
|
||||
unsafePerformIO $
|
||||
withGuPool $ \tmpPl ->
|
||||
do c_cat <- newUtf8CString cat tmpPl
|
||||
@@ -1164,7 +1563,7 @@ categoryProb p cat =
|
||||
-----------------------------------------------------------------------------
|
||||
-- Helper functions
|
||||
|
||||
fromPgfExprEnum :: Ptr GuEnum -> ForeignPtr GuPool -> IO () -> IO [(Expr, Float)]
|
||||
fromPgfExprEnum :: Ptr GuEnum -> ForeignPtr GuPool -> Touch -> IO [(Expr, Float)]
|
||||
fromPgfExprEnum enum fpl touch =
|
||||
do pgfExprProb <- alloca $ \ptr ->
|
||||
withForeignPtr fpl $ \pl ->
|
||||
@@ -1178,6 +1577,22 @@ fromPgfExprEnum enum fpl touch =
|
||||
prob <- (#peek PgfExprProb, prob) pgfExprProb
|
||||
return ((Expr expr touch,prob) : ts)
|
||||
|
||||
fromPgfTokenEnum :: Ptr GuEnum -> ForeignPtr GuPool -> IO [(String, Cat, Fun, Float)]
|
||||
fromPgfTokenEnum enum fpl =
|
||||
do pgfTokenProb <- alloca $ \ptr ->
|
||||
withForeignPtr fpl $ \pl ->
|
||||
do gu_enum_next enum ptr pl
|
||||
peek ptr
|
||||
if pgfTokenProb == nullPtr
|
||||
then do finalizeForeignPtr fpl
|
||||
return []
|
||||
else do tok <- (#peek PgfTokenProb, tok) pgfTokenProb >>= peekUtf8CString
|
||||
cat <- (#peek PgfTokenProb, cat) pgfTokenProb >>= peekUtf8CString
|
||||
fun <- (#peek PgfTokenProb, fun) pgfTokenProb >>= peekUtf8CString
|
||||
prob <- (#peek PgfTokenProb, prob) pgfTokenProb
|
||||
ts <- unsafeInterleaveIO (fromPgfTokenEnum enum fpl)
|
||||
return ((tok,cat,fun,prob) : ts)
|
||||
|
||||
-----------------------------------------------------------------------
|
||||
-- Exceptions
|
||||
|
||||
@@ -1256,3 +1671,7 @@ capitalized' test s@(c:_) | test c =
|
||||
case span isSpace rest1 of
|
||||
(space,rest2) -> Just (name++space,rest2)
|
||||
capitalized' not s = Nothing
|
||||
|
||||
tag i
|
||||
| i < 0 = char 'r' <> int (negate i)
|
||||
| otherwise = char 'n' <> int i
|
||||
|
||||
Reference in New Issue
Block a user