forked from GitHub/gf-core
copy the types for BracketedString from the Haskell runtime to the Haskell bindings
This commit is contained in:
@@ -44,6 +44,8 @@ module PGF2 (-- * PGF
|
|||||||
ConcName,Concr,languages,
|
ConcName,Concr,languages,
|
||||||
-- ** Linearization
|
-- ** Linearization
|
||||||
linearize,linearizeAll,
|
linearize,linearizeAll,
|
||||||
|
FId, LIndex, BracketedString(..), showBracketedString, flattenBracketedString,
|
||||||
|
|
||||||
alignWords,
|
alignWords,
|
||||||
-- ** Parsing
|
-- ** Parsing
|
||||||
parse, parseWithHeuristics,
|
parse, parseWithHeuristics,
|
||||||
@@ -65,6 +67,7 @@ import Prelude hiding (fromEnum)
|
|||||||
import Control.Exception(Exception,throwIO)
|
import Control.Exception(Exception,throwIO)
|
||||||
import Control.Monad(forM_)
|
import Control.Monad(forM_)
|
||||||
import System.IO.Unsafe(unsafePerformIO,unsafeInterleaveIO)
|
import System.IO.Unsafe(unsafePerformIO,unsafeInterleaveIO)
|
||||||
|
import Text.PrettyPrint
|
||||||
import PGF2.Expr
|
import PGF2.Expr
|
||||||
import PGF2.FFI
|
import PGF2.FFI
|
||||||
|
|
||||||
@@ -541,6 +544,43 @@ linearizeAll lang e = unsafePerformIO $
|
|||||||
else do gu_pool_free pl
|
else do gu_pool_free pl
|
||||||
throwIO (PGFError "The abstract tree cannot be linearized")
|
throwIO (PGFError "The abstract tree cannot be linearized")
|
||||||
|
|
||||||
|
type FId = Int
|
||||||
|
type LIndex = Int
|
||||||
|
|
||||||
|
-- | BracketedString represents a sentence that is linearized
|
||||||
|
-- as usual but we also want to retain the ''brackets'' that
|
||||||
|
-- mark the beginning and the end of each constituent.
|
||||||
|
data BracketedString
|
||||||
|
= Leaf String -- ^ this is the leaf i.e. a single token
|
||||||
|
| Bracket CId {-# UNPACK #-} !FId {-# UNPACK #-} !LIndex CId [BracketedString]
|
||||||
|
-- ^ this is a bracket. The 'CId' is the category of
|
||||||
|
-- the phrase. The 'FId' is an unique identifier for
|
||||||
|
-- every phrase in the sentence. For context-free grammars
|
||||||
|
-- i.e. without discontinuous constituents this identifier
|
||||||
|
-- is also unique for every bracket. When there are discontinuous
|
||||||
|
-- phrases then the identifiers are unique for every phrase but
|
||||||
|
-- not for every bracket since the bracket represents a constituent.
|
||||||
|
-- The different constituents could still be distinguished by using
|
||||||
|
-- the constituent index i.e. 'LIndex'. If the grammar is reduplicating
|
||||||
|
-- then the constituent indices will be the same for all brackets
|
||||||
|
-- that represents the same constituent.
|
||||||
|
-- The second 'CId' is the name of the abstract function that generated
|
||||||
|
-- this phrase.
|
||||||
|
|
||||||
|
-- | Renders the bracketed string as a string where
|
||||||
|
-- the brackets are shown as @(S ...)@ where
|
||||||
|
-- @S@ is the category.
|
||||||
|
showBracketedString :: BracketedString -> String
|
||||||
|
showBracketedString = render . ppBracketedString
|
||||||
|
|
||||||
|
ppBracketedString (Leaf t) = text t
|
||||||
|
ppBracketedString (Bracket cat fid index _ bss) = parens (ppCId cat <> colon <> int fid <+> hsep (map ppBracketedString bss))
|
||||||
|
|
||||||
|
-- | Extracts the sequence of tokens from the bracketed string
|
||||||
|
flattenBracketedString :: BracketedString -> [String]
|
||||||
|
flattenBracketedString (Leaf w) = [w]
|
||||||
|
flattenBracketedString (Bracket _ _ _ _ bss) = concatMap flattenBracketedString bss
|
||||||
|
|
||||||
alignWords :: Concr -> Expr -> [(String, [Int])]
|
alignWords :: Concr -> Expr -> [(String, [Int])]
|
||||||
alignWords lang e = unsafePerformIO $
|
alignWords lang e = unsafePerformIO $
|
||||||
withGuPool $ \pl ->
|
withGuPool $ \pl ->
|
||||||
|
|||||||
Reference in New Issue
Block a user