mirror of
https://github.com/GrammaticalFramework/gf-core.git
synced 2026-04-30 06:52:49 -06:00
"Committed_by_peb"
This commit is contained in:
50
src/GF/Formalism/CFG.hs
Normal file
50
src/GF/Formalism/CFG.hs
Normal file
@@ -0,0 +1,50 @@
|
||||
----------------------------------------------------------------------
|
||||
-- |
|
||||
-- Maintainer : PL
|
||||
-- Stability : (stable)
|
||||
-- Portability : (portable)
|
||||
--
|
||||
-- > CVS $Date: 2005/04/11 13:52:49 $
|
||||
-- > CVS $Author: peb $
|
||||
-- > CVS $Revision: 1.1 $
|
||||
--
|
||||
-- CFG formalism
|
||||
-----------------------------------------------------------------------------
|
||||
|
||||
module GF.Formalism.CFG where
|
||||
|
||||
import GF.Formalism.Utilities
|
||||
import GF.Infra.Print
|
||||
import GF.Data.Assoc (accumAssoc)
|
||||
import GF.Data.SortedList (groupPairs)
|
||||
import GF.Data.Utilities (mapSnd)
|
||||
|
||||
------------------------------------------------------------
|
||||
-- type definitions
|
||||
|
||||
type CFGrammar c n t = [CFRule c n t]
|
||||
data CFRule c n t = CFRule c [Symbol c t] n
|
||||
deriving (Eq, Ord, Show)
|
||||
|
||||
type CFChart c n t = CFGrammar (Edge c) n t
|
||||
|
||||
|
||||
------------------------------------------------------------
|
||||
-- building syntax charts from grammars
|
||||
|
||||
grammar2chart :: (Ord n, Ord e) => CFGrammar e n t -> SyntaxChart n e
|
||||
grammar2chart cfchart = accumAssoc groupPairs $
|
||||
[ (lhs, (name, filterCats rhs)) |
|
||||
CFRule lhs rhs name <- cfchart ]
|
||||
|
||||
|
||||
----------------------------------------------------------------------
|
||||
-- pretty-printing
|
||||
|
||||
instance (Print n, Print c, Print t) => Print (CFRule c n t) where
|
||||
prt (CFRule cat rhs name) = prt name ++ " : " ++ prt cat ++
|
||||
( if null rhs then ""
|
||||
else " --> " ++ prtSep " " rhs )
|
||||
prtList = prtSep "\n"
|
||||
|
||||
|
||||
45
src/GF/Formalism/GCFG.hs
Normal file
45
src/GF/Formalism/GCFG.hs
Normal file
@@ -0,0 +1,45 @@
|
||||
----------------------------------------------------------------------
|
||||
-- |
|
||||
-- Maintainer : PL
|
||||
-- Stability : (stable)
|
||||
-- Portability : (portable)
|
||||
--
|
||||
-- > CVS $Date: 2005/04/11 13:52:50 $
|
||||
-- > CVS $Author: peb $
|
||||
-- > CVS $Revision: 1.1 $
|
||||
--
|
||||
-- Basic GCFG formalism (derived from Pollard 1984)
|
||||
-----------------------------------------------------------------------------
|
||||
|
||||
module GF.Formalism.GCFG
|
||||
( Grammar, Rule(..), Abstract(..), Concrete(..)
|
||||
) where
|
||||
|
||||
import GF.Infra.Print
|
||||
|
||||
----------------------------------------------------------------------
|
||||
|
||||
type Grammar c n l t = [Rule c n l t]
|
||||
data Rule c n l t = Rule (Abstract c n) (Concrete l t)
|
||||
deriving (Eq, Ord, Show)
|
||||
|
||||
data Abstract cat name = Abs cat [cat] name
|
||||
deriving (Eq, Ord, Show)
|
||||
data Concrete lin term = Cnc lin [lin] term
|
||||
deriving (Eq, Ord, Show)
|
||||
|
||||
----------------------------------------------------------------------
|
||||
|
||||
instance (Print c, Print n, Print l, Print t) => Print (Rule n c l t) where
|
||||
prt (Rule abs cnc) = prt abs ++ " := " ++ prt cnc
|
||||
prtList = prtSep "\n"
|
||||
|
||||
instance (Print c, Print n) => Print (Abstract c n) where
|
||||
prt (Abs cat args name) = prt name ++ ". " ++ prt cat ++
|
||||
( if null args then ""
|
||||
else " -> " ++ prtSep " " args )
|
||||
|
||||
instance (Print l, Print t) => Print (Concrete l t) where
|
||||
prt (Cnc lcat args term) = prt term ++ " : " ++ prt lcat ++
|
||||
( if null args then ""
|
||||
else " / " ++ prtSep " " args)
|
||||
47
src/GF/Formalism/MCFG.hs
Normal file
47
src/GF/Formalism/MCFG.hs
Normal file
@@ -0,0 +1,47 @@
|
||||
----------------------------------------------------------------------
|
||||
-- |
|
||||
-- Maintainer : PL
|
||||
-- Stability : (stable)
|
||||
-- Portability : (portable)
|
||||
--
|
||||
-- > CVS $Date: 2005/04/11 13:52:50 $
|
||||
-- > CVS $Author: peb $
|
||||
-- > CVS $Revision: 1.1 $
|
||||
--
|
||||
-- Definitions of multiple context-free grammars
|
||||
-----------------------------------------------------------------------------
|
||||
|
||||
module GF.Formalism.MCFG where
|
||||
|
||||
import GF.Formalism.Utilities
|
||||
import GF.Formalism.GCFG
|
||||
|
||||
import GF.Infra.Print
|
||||
|
||||
------------------------------------------------------------
|
||||
-- grammar types
|
||||
|
||||
-- | the lables in the linearization record should be in the same
|
||||
-- order as specified by the linearization type @[lbl]@
|
||||
type MCFGrammar cat name lbl tok = Grammar cat name [lbl] [Lin cat lbl tok]
|
||||
type MCFRule cat name lbl tok = Rule cat name [lbl] [Lin cat lbl tok]
|
||||
|
||||
-- | variants are encoded as several linearizations with the same label
|
||||
data Lin cat lbl tok = Lin lbl [Symbol (cat, lbl, Int) tok]
|
||||
deriving (Eq, Ord, Show)
|
||||
|
||||
instantiateArgs :: [cat] -> Lin cat' lbl tok -> Lin cat lbl tok
|
||||
instantiateArgs args (Lin lbl lin) = Lin lbl (map instSym lin)
|
||||
where instSym = mapSymbol instCat id
|
||||
instCat (_, lbl, nr) = (args !! nr, lbl, nr)
|
||||
|
||||
------------------------------------------------------------
|
||||
-- pretty-printing
|
||||
|
||||
instance (Print c, Print l, Print t) => Print (Lin c l t) where
|
||||
prt (Lin lbl lin) = prt lbl ++ " = " ++ prtSep " " (map (symbol prArg (show.prt)) lin)
|
||||
where prArg (cat, lbl, nr) = prt cat ++ "@" ++ prt nr ++ prt lbl
|
||||
prtList = prtBefore "\n\t"
|
||||
|
||||
|
||||
|
||||
217
src/GF/Formalism/SimpleGFC.hs
Normal file
217
src/GF/Formalism/SimpleGFC.hs
Normal file
@@ -0,0 +1,217 @@
|
||||
----------------------------------------------------------------------
|
||||
-- |
|
||||
-- Maintainer : PL
|
||||
-- Stability : (stable)
|
||||
-- Portability : (portable)
|
||||
--
|
||||
-- > CVS $Date: 2005/04/11 13:52:50 $
|
||||
-- > CVS $Author: peb $
|
||||
-- > CVS $Revision: 1.1 $
|
||||
--
|
||||
-- Simplistic GFC format
|
||||
-----------------------------------------------------------------------------
|
||||
|
||||
module GF.Formalism.SimpleGFC where
|
||||
|
||||
import Monad (liftM)
|
||||
import qualified AbsGFC
|
||||
import qualified Ident
|
||||
import GF.Formalism.GCFG
|
||||
import GF.Infra.Print
|
||||
|
||||
----------------------------------------------------------------------
|
||||
|
||||
-- * basic (leaf) types
|
||||
|
||||
type Name = Ident.Ident
|
||||
type Cat = Ident.Ident
|
||||
type Constr = AbsGFC.CIdent
|
||||
type Var = Ident.Ident
|
||||
type Token = String
|
||||
type Label = AbsGFC.Label
|
||||
|
||||
-- ** type coercions etc
|
||||
|
||||
constr2name :: Constr -> Name
|
||||
constr2name (AbsGFC.CIQ _ name) = name
|
||||
|
||||
anyVar :: Var
|
||||
anyVar = Ident.wildIdent
|
||||
|
||||
----------------------------------------------------------------------
|
||||
|
||||
-- * simple GFC
|
||||
|
||||
type SimpleGrammar = Grammar Decl Name LinType (Maybe Term)
|
||||
type SimpleRule = Rule Decl Name LinType (Maybe Term)
|
||||
|
||||
-- ** dependent type declarations
|
||||
|
||||
data Decl = Var ::: Type
|
||||
deriving (Eq, Ord, Show)
|
||||
data Type = Cat :@ [Atom]
|
||||
deriving (Eq, Ord, Show)
|
||||
data Atom = ACon Constr
|
||||
| AVar Var
|
||||
deriving (Eq, Ord, Show)
|
||||
|
||||
decl2cat :: Decl -> Cat
|
||||
decl2cat (_ ::: (cat :@ _)) = cat
|
||||
|
||||
-- ** linearization types and terms
|
||||
|
||||
data LinType = RecT [(Label, LinType)]
|
||||
| TblT LinType LinType
|
||||
| ConT Constr [Term]
|
||||
| StrT
|
||||
deriving (Eq, Ord, Show)
|
||||
|
||||
isBaseType :: LinType -> Bool
|
||||
isBaseType (ConT _ _) = True
|
||||
isBaseType (StrT) = True
|
||||
isBaseType _ = False
|
||||
|
||||
data Term = Arg Int Cat Path -- ^ argument variable, the 'Path' is a path
|
||||
-- pointing into the term
|
||||
| Constr :^ [Term] -- ^ constructor
|
||||
| Rec [(Label, Term)] -- ^ record
|
||||
| Term :. Label -- ^ record projection
|
||||
| Tbl [(Term, Term)] -- ^ table of patterns\/terms
|
||||
| Term :! Term -- ^ table selection
|
||||
| Variants [Term] -- ^ variants
|
||||
| Term :++ Term -- ^ concatenation
|
||||
| Token Token -- ^ single token
|
||||
| Empty -- ^ empty string
|
||||
| Wildcard -- ^ wildcard pattern variable
|
||||
| Var Var -- ^ bound pattern variable
|
||||
|
||||
-- Res CIdent -- resource identifier
|
||||
-- Int Integer -- integer
|
||||
deriving (Eq, Ord, Show)
|
||||
|
||||
-- ** calculations on terms
|
||||
|
||||
(+.) :: Term -> Label -> Term
|
||||
Variants terms +. lbl = variants $ map (+. lbl) terms
|
||||
Rec record +. lbl = maybe err id $ lookup lbl record
|
||||
where err = error $ "(+.), label not in record: " ++ show (Rec record) ++ " +. " ++ show lbl
|
||||
Arg arg cat path +. lbl = Arg arg cat (path ++. lbl)
|
||||
term +. lbl = term :. lbl
|
||||
|
||||
(+!) :: Term -> Term -> Term
|
||||
Variants terms +! pat = variants $ map (+! pat) terms
|
||||
term +! Variants pats = variants $ map (term +!) pats
|
||||
term +! arg@(Arg _ _ _) = term :! arg
|
||||
Tbl table +! pat = maybe err id $ lookup pat table
|
||||
where err = error $ "(+!), pattern not in table: " ++ show (Tbl table) ++ " +! " ++ show pat
|
||||
Arg arg cat path +! pat = Arg arg cat (path ++! pat)
|
||||
term +! pat = term :! pat
|
||||
|
||||
(?++) :: Term -> Term -> Term
|
||||
Variants terms ?++ term = variants $ map (?++ term) terms
|
||||
term ?++ Variants terms = variants $ map (term ?++) terms
|
||||
Empty ?++ term = term
|
||||
term ?++ Empty = term
|
||||
term1 ?++ term2 = term1 :++ term2
|
||||
|
||||
variants :: [Term] -> Term
|
||||
variants terms0 = case concatMap flatten terms0 of
|
||||
[term] -> term
|
||||
terms -> Variants terms
|
||||
where flatten (Variants ts) = ts
|
||||
flatten t = [t]
|
||||
|
||||
-- ** enumerations
|
||||
|
||||
enumerateTerms :: Maybe Term -> LinType -> [Term]
|
||||
enumerateTerms arg (StrT) = maybe err return arg
|
||||
where err = error "enumeratePatterns: parameter type should not be string"
|
||||
enumerateTerms arg (ConT _ terms) = terms
|
||||
enumerateTerms arg (RecT rtype)
|
||||
= liftM Rec $ mapM enumAssign rtype
|
||||
where enumAssign (lbl, ctype) = liftM ((,) lbl) $ enumerateTerms arg ctype
|
||||
enumerateTerms arg (TblT ptype ctype)
|
||||
= liftM Tbl $ mapM enumCase $ enumeratePatterns ptype
|
||||
where enumCase pat = liftM ((,) pat) $ enumerateTerms (fmap (+! pat) arg) ctype
|
||||
|
||||
enumeratePatterns :: LinType -> [Term]
|
||||
enumeratePatterns = enumerateTerms Nothing
|
||||
|
||||
----------------------------------------------------------------------
|
||||
|
||||
-- * paths of record projections and table selections
|
||||
|
||||
newtype Path = Path [Either Label Term] deriving (Eq, Ord, Show)
|
||||
|
||||
emptyPath :: Path
|
||||
emptyPath = Path []
|
||||
|
||||
-- ** calculations on paths
|
||||
|
||||
(++.) :: Path -> Label -> Path
|
||||
Path path ++. lbl = Path (Left lbl : path)
|
||||
|
||||
(++!) :: Path -> Term -> Path
|
||||
Path path ++! sel = Path (Right sel : path)
|
||||
|
||||
lintypeFollowPath :: Path -> LinType -> LinType
|
||||
lintypeFollowPath (Path path) = follow path
|
||||
where follow [] ctype = ctype
|
||||
follow (Right pat : path) (TblT _ ctype) = follow path ctype
|
||||
follow (Left lbl : path) (RecT rec)
|
||||
= maybe err (follow path) $ lookup lbl rec
|
||||
where err = error $ "follow: " ++ prt rec ++ " . " ++ prt lbl
|
||||
|
||||
termFollowPath :: Path -> Term -> Term
|
||||
termFollowPath (Path path) = follow (reverse path)
|
||||
where follow [] term = term
|
||||
follow (Right pat : path) term = follow path (term +! pat)
|
||||
follow (Left lbl : path) term = follow path (term +. lbl)
|
||||
|
||||
lintype2paths :: Path -> LinType -> [Path]
|
||||
lintype2paths path (ConT _ _) = []
|
||||
lintype2paths path (StrT) = [ path ]
|
||||
lintype2paths path (RecT rec) = concat [ lintype2paths (path ++. lbl) ctype |
|
||||
(lbl, ctype) <- rec ]
|
||||
lintype2paths path (TblT pt vt) = concat [ lintype2paths (path ++! pat) vt |
|
||||
pat <- enumeratePatterns pt ]
|
||||
|
||||
----------------------------------------------------------------------
|
||||
|
||||
instance Print Decl where
|
||||
prt (var ::: typ)
|
||||
| var == anyVar = prt typ
|
||||
| otherwise = prt var ++ ":" ++ prt typ
|
||||
|
||||
instance Print Type where
|
||||
prt (cat :@ ats) = prt cat ++ prtList ats
|
||||
|
||||
instance Print Atom where
|
||||
prt (ACon con) = prt con
|
||||
prt (AVar var) = "?" ++ prt var
|
||||
|
||||
instance Print LinType where
|
||||
prt (RecT rec) = "{" ++ concat [ prt l ++ ":" ++ prt t ++ "; " | (l,t) <- rec ] ++ "}"
|
||||
prt (TblT t1 t2) = "(" ++ prt t1 ++ " => " ++ prt t2 ++ ")"
|
||||
prt (ConT t ts) = prt t ++ "[" ++ prtSep "|" ts ++ "]"
|
||||
prt (StrT) = "Str"
|
||||
|
||||
instance Print Term where
|
||||
prt (Arg n c p) = prt c ++ "@" ++ prt n ++ "(" ++ prt p ++ ")"
|
||||
prt (c :^ []) = prt c
|
||||
prt (c :^ ts) = prt c ++ prtList ts
|
||||
prt (Rec rec) = "{" ++ concat [ prt l ++ "=" ++ prt t ++ "; " | (l,t) <- rec ] ++ "}"
|
||||
prt (Tbl tbl) = "[" ++ concat [ prt p ++ "=>" ++ prt t ++ "; " | (p,t) <- tbl ] ++ "]"
|
||||
prt (Variants ts) = "{| " ++ prtSep " | " ts ++ " |}"
|
||||
prt (t1 :++ t2) = prt t1 ++ "++" ++ prt t2
|
||||
prt (Token t) = prt t
|
||||
prt (Empty) = "[]"
|
||||
prt (Wildcard) = "_"
|
||||
prt (term :. lbl) = prt term ++ "." ++ prt lbl
|
||||
prt (term :! sel) = prt term ++ "!" ++ prt sel
|
||||
prt (Var var) = "?" ++ prt var
|
||||
|
||||
instance Print Path where
|
||||
prt (Path path) = concatMap prtEither (reverse path)
|
||||
where prtEither (Left lbl) = "." ++ prt lbl
|
||||
prtEither (Right patt) = "!" ++ prt patt
|
||||
46
src/GF/Formalism/Symbol.hs
Normal file
46
src/GF/Formalism/Symbol.hs
Normal file
@@ -0,0 +1,46 @@
|
||||
----------------------------------------------------------------------
|
||||
-- |
|
||||
-- Maintainer : PL
|
||||
-- Stability : (stable)
|
||||
-- Portability : (portable)
|
||||
--
|
||||
-- > CVS $Date: 2005/04/11 13:52:50 $
|
||||
-- > CVS $Author: peb $
|
||||
-- > CVS $Revision: 1.1 $
|
||||
--
|
||||
-- Basic type declarations and functions to be used in grammar formalisms
|
||||
-----------------------------------------------------------------------------
|
||||
|
||||
|
||||
module GF.Formalism.Symbol where
|
||||
|
||||
import GF.Infra.Print
|
||||
|
||||
------------------------------------------------------------
|
||||
-- symbols
|
||||
|
||||
data Symbol c t = Cat c | Tok t
|
||||
deriving (Eq, Ord, Show)
|
||||
|
||||
symbol :: (c -> a) -> (t -> a) -> Symbol c t -> a
|
||||
symbol fc ft (Cat cat) = fc cat
|
||||
symbol fc ft (Tok tok) = ft tok
|
||||
|
||||
mapSymbol :: (c -> d) -> (t -> u) -> Symbol c t -> Symbol d u
|
||||
mapSymbol fc ft = symbol (Cat . fc) (Tok . ft)
|
||||
|
||||
------------------------------------------------------------
|
||||
-- pretty-printing
|
||||
|
||||
instance (Print c, Print t) => Print (Symbol c t) where
|
||||
prt = symbol prt (simpleShow . prt)
|
||||
where simpleShow str = "\"" ++ concatMap mkEsc str ++ "\""
|
||||
mkEsc '\\' = "\\\\"
|
||||
mkEsc '\"' = "\\\""
|
||||
mkEsc '\n' = "\\n"
|
||||
mkEsc '\t' = "\\t"
|
||||
mkEsc chr = [chr]
|
||||
prtList = prtSep " "
|
||||
|
||||
|
||||
|
||||
271
src/GF/Formalism/Utilities.hs
Normal file
271
src/GF/Formalism/Utilities.hs
Normal file
@@ -0,0 +1,271 @@
|
||||
----------------------------------------------------------------------
|
||||
-- |
|
||||
-- Maintainer : PL
|
||||
-- Stability : (stable)
|
||||
-- Portability : (portable)
|
||||
--
|
||||
-- > CVS $Date: 2005/04/11 13:52:50 $
|
||||
-- > CVS $Author: peb $
|
||||
-- > CVS $Revision: 1.1 $
|
||||
--
|
||||
-- Basic type declarations and functions for grammar formalisms
|
||||
-----------------------------------------------------------------------------
|
||||
|
||||
|
||||
module GF.Formalism.Utilities where
|
||||
|
||||
import Monad
|
||||
import Array
|
||||
import List (groupBy)
|
||||
|
||||
import GF.Data.SortedList
|
||||
import GF.Data.Assoc
|
||||
import GF.Data.Utilities (sameLength, foldMerge, splitBy)
|
||||
|
||||
import GF.Infra.Print
|
||||
|
||||
------------------------------------------------------------
|
||||
-- * symbols
|
||||
|
||||
data Symbol c t = Cat c | Tok t
|
||||
deriving (Eq, Ord, Show)
|
||||
|
||||
symbol :: (c -> a) -> (t -> a) -> Symbol c t -> a
|
||||
symbol fc ft (Cat cat) = fc cat
|
||||
symbol fc ft (Tok tok) = ft tok
|
||||
|
||||
mapSymbol :: (c -> d) -> (t -> u) -> Symbol c t -> Symbol d u
|
||||
mapSymbol fc ft = symbol (Cat . fc) (Tok . ft)
|
||||
|
||||
filterCats :: [Symbol c t] -> [c]
|
||||
filterCats syms = [ cat | Cat cat <- syms ]
|
||||
|
||||
filterToks :: [Symbol c t] -> [t]
|
||||
filterToks syms = [ tok | Tok tok <- syms ]
|
||||
|
||||
|
||||
------------------------------------------------------------
|
||||
-- * edges
|
||||
|
||||
data Edge s = Edge Int Int s
|
||||
deriving (Eq, Ord, Show)
|
||||
|
||||
instance Functor Edge where
|
||||
fmap f (Edge i j s) = Edge i j (f s)
|
||||
|
||||
|
||||
------------------------------------------------------------
|
||||
-- * representaions of input tokens
|
||||
|
||||
data Input t = MkInput { inputEdges :: [Edge t],
|
||||
inputBounds :: (Int, Int),
|
||||
inputFrom :: Array Int (Assoc t [Int]),
|
||||
inputTo :: Array Int (Assoc t [Int]),
|
||||
inputToken :: Assoc t [(Int, Int)]
|
||||
}
|
||||
|
||||
makeInput :: Ord t => [Edge t] -> Input t
|
||||
input :: Ord t => [t] -> Input t
|
||||
inputMany :: Ord t => [[t]] -> Input t
|
||||
|
||||
instance Show t => Show (Input t) where
|
||||
show input = "makeInput " ++ show (inputEdges input)
|
||||
|
||||
----------
|
||||
|
||||
makeInput inEdges | null inEdges = input []
|
||||
| otherwise = MkInput inEdges inBounds inFrom inTo inToken
|
||||
where inBounds = foldr1 minmax [ (i, j) | Edge i j _ <- inEdges ]
|
||||
where minmax (a, b) (a', b') = (min a a', max b b')
|
||||
inFrom = fmap (accumAssoc id) $ accumArray (<++>) [] inBounds $
|
||||
[ (i, [(tok, j)]) | Edge i j tok <- inEdges ]
|
||||
inTo = fmap (accumAssoc id) $ accumArray (<++>) [] inBounds
|
||||
[ (j, [(tok, i)]) | Edge i j tok <- inEdges ]
|
||||
inToken = accumAssoc id [ (tok, (i, j)) | Edge i j tok <- inEdges ]
|
||||
|
||||
input toks = MkInput inEdges inBounds inFrom inTo inToken
|
||||
where inEdges = zipWith3 Edge [0..] [1..] toks
|
||||
inBounds = (0, length toks)
|
||||
inFrom = listArray inBounds $
|
||||
[ listAssoc [(tok, [j])] | (tok, j) <- zip toks [1..] ] ++ [ listAssoc [] ]
|
||||
inTo = listArray inBounds $
|
||||
[ listAssoc [] ] ++ [ listAssoc [(tok, [i])] | (tok, i) <- zip toks [0..] ]
|
||||
inToken = accumAssoc id [ (tok, (i, j)) | Edge i j tok <- inEdges ]
|
||||
|
||||
inputMany toks = MkInput inEdges inBounds inFrom inTo inToken
|
||||
where inEdges = [ Edge i j t | (i, j, ts) <- zip3 [0..] [1..] toks, t <- ts ]
|
||||
inBounds = (0, length toks)
|
||||
inFrom = listArray inBounds $
|
||||
[ listAssoc [ (t, [j]) | t <- nubsort ts ] | (ts, j) <- zip toks [1..] ]
|
||||
++ [ listAssoc [] ]
|
||||
inTo = listArray inBounds $
|
||||
[ listAssoc [] ] ++
|
||||
[ listAssoc [ (t, [i]) | t <- nubsort ts ] | (ts, i) <- zip toks [0..] ]
|
||||
inToken = accumAssoc id [ (tok, (i, j)) | Edge i j tok <- inEdges ]
|
||||
|
||||
|
||||
------------------------------------------------------------
|
||||
-- * charts, forests & trees
|
||||
|
||||
-- | The values of the chart, a list of key-daughters pairs,
|
||||
-- has unique keys. In essence, it is a map from 'n' to daughters.
|
||||
-- The daughters should be a set (not necessarily sorted) of rhs's.
|
||||
type SyntaxChart n e = Assoc e [(n, [[e]])]
|
||||
|
||||
-- better(?) representation of forests:
|
||||
-- data Forest n = F (SMap n (SList [Forest n])) Bool
|
||||
-- ==
|
||||
-- type Forest n = GeneralTrie n (SList [Forest n]) Bool
|
||||
-- (the Bool == isMeta)
|
||||
|
||||
data SyntaxForest n = FMeta
|
||||
| FNode n [[SyntaxForest n]]
|
||||
-- ^ The outer list should be a set (not necessarily sorted)
|
||||
-- of possible alternatives. Ie. the outer list
|
||||
-- is a disjunctive node, and the inner lists
|
||||
-- are (conjunctive) concatenative nodes
|
||||
deriving (Eq, Ord, Show)
|
||||
|
||||
data SyntaxTree n = TMeta | TNode n [SyntaxTree n]
|
||||
deriving (Eq, Ord, Show)
|
||||
|
||||
forestName :: SyntaxForest n -> Maybe n
|
||||
forestName (FNode n _) = Just n
|
||||
forestName (FMeta) = Nothing
|
||||
|
||||
treeName :: SyntaxTree n -> Maybe n
|
||||
treeName (TNode n _) = Just n
|
||||
treeName (TMeta) = Nothing
|
||||
|
||||
instance Functor SyntaxTree where
|
||||
fmap f (TNode n trees) = TNode (f n) $ map (fmap f) trees
|
||||
fmap f (TMeta) = TMeta
|
||||
|
||||
instance Functor SyntaxForest where
|
||||
fmap f (FNode n forests) = FNode (f n) $ map (map (fmap f)) forests
|
||||
fmap f (FMeta) = FMeta
|
||||
|
||||
{- måste tänka mer på detta:
|
||||
compactForests :: Ord n => [SyntaxForest n] -> SList (SyntaxForest n)
|
||||
compactForests = map joinForests . groupBy eqNames . sortForests
|
||||
where eqNames f g = forestName f == forestName g
|
||||
sortForests = foldMerge mergeForests [] . map return
|
||||
mergeForests [] gs = gs
|
||||
mergeForests fs [] = fs
|
||||
mergeForests fs@(f:fs') gs@(g:gs')
|
||||
= case forestName f `compare` forestName g of
|
||||
LT -> f : mergeForests fs' gs
|
||||
GT -> g : mergeForests fs gs'
|
||||
EQ -> f : g : mergeForests fs' gs'
|
||||
joinForests fs = case forestName (head fs) of
|
||||
Nothing -> FMeta
|
||||
Just name -> FNode name $
|
||||
compactDaughters $
|
||||
concat [ fss | FNode _ fss <- fs ]
|
||||
compactDaughters fss = case head fss of
|
||||
[] -> [[]]
|
||||
[_] -> map return $ compactForests $ concat fss
|
||||
_ -> nubsort fss
|
||||
-}
|
||||
|
||||
-- ** conversions between representations
|
||||
|
||||
forest2trees :: SyntaxForest n -> SList (SyntaxTree n)
|
||||
forest2trees (FNode n forests) = map (TNode n) $ forests >>= mapM forest2trees
|
||||
forest2trees (FMeta) = [TMeta]
|
||||
|
||||
chart2forests :: (Ord n, Ord e) =>
|
||||
SyntaxChart n e -- ^ The complete chart
|
||||
-> (e -> Bool) -- ^ When is an edge 'FMeta'?
|
||||
-> [e] -- ^ The starting edges
|
||||
-> SList (SyntaxForest n) -- ^ The result has unique keys, ie. all 'n' are joined together.
|
||||
-- In essence, the result is a map from 'n' to forest daughters
|
||||
|
||||
-- simplest implementation
|
||||
chart2forests chart isMeta = concatMap edge2forests
|
||||
where edge2forests edge = if isMeta edge then [FMeta]
|
||||
else map item2forest $ chart ? edge
|
||||
item2forest (name, children) = FNode name $ children >>= mapM edge2forests
|
||||
|
||||
{-
|
||||
-- more intelligent(?) implementation,
|
||||
-- requiring that charts and forests are sorted maps and sorted sets
|
||||
chart2forests chart isMeta = es2fs
|
||||
where e2fs e = if isMeta e then [FMeta] else map i2f $ chart ? e
|
||||
es2fs es = if null metas then fs else FMeta : fs
|
||||
where (metas, nonMetas) = splitBy isMeta es
|
||||
fs = map i2f $ unionMap (<++>) $ map (chart ?) nonMetas
|
||||
i2f (name, children) = FNode name $
|
||||
case head children of
|
||||
[] -> [[]]
|
||||
[_] -> map return $ es2fs $ concat children
|
||||
_ -> children >>= mapM e2fs
|
||||
-}
|
||||
|
||||
|
||||
-- ** operations on forests
|
||||
|
||||
unifyManyForests :: (Monad m, Eq n) => [SyntaxForest n] -> m (SyntaxForest n)
|
||||
unifyManyForests = foldM unifyForests FMeta
|
||||
|
||||
-- | two forests can be unified, if either is 'FMeta', or both have the same parent,
|
||||
-- and all children can be unified
|
||||
unifyForests :: (Monad m, Eq n) => SyntaxForest n -> SyntaxForest n -> m (SyntaxForest n)
|
||||
unifyForests FMeta forest = return forest
|
||||
unifyForests forest FMeta = return forest
|
||||
unifyForests (FNode name1 children1) (FNode name2 children2)
|
||||
| name1 == name2 && not (null children) = return $ FNode name1 children
|
||||
| otherwise = fail "forest unification failure"
|
||||
where children = [ forests | forests1 <- children1, forests2 <- children2,
|
||||
sameLength forests1 forests2,
|
||||
forests <- zipWithM unifyForests forests1 forests2 ]
|
||||
|
||||
|
||||
-- ** operations on trees
|
||||
|
||||
unifyManyTrees :: (Monad m, Eq n) => [SyntaxTree n] -> m (SyntaxTree n)
|
||||
unifyManyTrees = foldM unifyTrees TMeta
|
||||
|
||||
-- | two trees can be unified, if either is 'TMeta',
|
||||
-- or both have the same parent, and their children can be unified
|
||||
unifyTrees :: (Monad m, Eq n) => SyntaxTree n -> SyntaxTree n -> m (SyntaxTree n)
|
||||
unifyTrees TMeta tree = return tree
|
||||
unifyTrees tree TMeta = return tree
|
||||
unifyTrees (TNode name1 children1) (TNode name2 children2)
|
||||
| name1 == name2 && sameLength children1 children2
|
||||
= liftM (TNode name1) $ zipWithM unifyTrees children1 children2
|
||||
| otherwise = fail "tree unification failure"
|
||||
|
||||
|
||||
|
||||
------------------------------------------------------------
|
||||
-- pretty-printing
|
||||
|
||||
instance (Print c, Print t) => Print (Symbol c t) where
|
||||
prt = symbol prt (simpleShow . prt)
|
||||
where simpleShow str = "\"" ++ concatMap mkEsc str ++ "\""
|
||||
mkEsc '\\' = "\\\\"
|
||||
mkEsc '\"' = "\\\""
|
||||
mkEsc '\n' = "\\n"
|
||||
mkEsc '\t' = "\\t"
|
||||
mkEsc chr = [chr]
|
||||
prtList = prtSep " "
|
||||
|
||||
instance Print t => Print (Input t) where
|
||||
prt input = "input " ++ prt (inputEdges input)
|
||||
|
||||
instance (Print s) => Print (Edge s) where
|
||||
prt (Edge i j s) = "[" ++ show i ++ "-" ++ show j ++ ": " ++ prt s ++ "]"
|
||||
prtList = prtSep ""
|
||||
|
||||
instance (Print s) => Print (SyntaxTree s) where
|
||||
prt (TNode s trees) = prt s ++ "^{" ++ prtSep " " trees ++ "}"
|
||||
prt (TMeta) = "?"
|
||||
prtList = prtAfter "\n"
|
||||
|
||||
instance (Print s) => Print (SyntaxForest s) where
|
||||
prt (FNode s forests) = prt s ++ "^{" ++ prtSep " | " (map (prtSep " ") forests) ++ "}"
|
||||
prt (FMeta) = "?"
|
||||
prtList = prtAfter "\n"
|
||||
|
||||
|
||||
Reference in New Issue
Block a user