forked from GitHub/gf-core
207 lines
6.5 KiB
Haskell
207 lines
6.5 KiB
Haskell
----------------------------------------------------------------------
|
|
-- |
|
|
-- Module : ChartParser
|
|
-- Maintainer : Peter Ljunglöf
|
|
-- Stability : (stable)
|
|
-- Portability : (portable)
|
|
--
|
|
-- > CVS $Date: 2005/04/21 16:21:12 $
|
|
-- > CVS $Author: bringert $
|
|
-- > CVS $Revision: 1.10 $
|
|
--
|
|
-- Bottom-up Kilbury chart parser from "Pure Functional Parsing", chapter 5.
|
|
-- OBSOLETE -- should use new MCFG parsers instead
|
|
-----------------------------------------------------------------------------
|
|
|
|
module GF.CF.ChartParser (chartParser) where
|
|
|
|
-- import Tracing
|
|
-- import PrintParser
|
|
-- import PrintSimplifiedTerm
|
|
|
|
import GF.Data.Operations
|
|
import GF.CF.CF
|
|
import GF.CF.CFIdent
|
|
import GF.CF.PPrCF (prCFItem)
|
|
|
|
import GF.Data.OrdSet
|
|
import GF.Data.OrdMap2
|
|
|
|
import Data.List (groupBy)
|
|
|
|
type Token = CFTok
|
|
type Name = CFFun
|
|
type Category = CFItem
|
|
type Grammar = ([Production], Terminal)
|
|
type Production = (Name, Category, [Category])
|
|
type Terminal = Token -> [(Category, Maybe Name)]
|
|
type GParser = Grammar -> Category -> [Token] -> ([ParseTree],String)
|
|
data ParseTree = Node Name Category [ParseTree] | Leaf Token
|
|
|
|
maxTake :: Int
|
|
-- maxTake = 1000
|
|
maxTake = maxBound
|
|
|
|
--------------------------------------------------
|
|
-- converting between GF parsing and CFG parsing
|
|
|
|
buildParser :: GParser -> CF -> CFCat -> CFParser
|
|
buildParser gparser cf = parse
|
|
where
|
|
parse = \start input ->
|
|
let parse2 = parse' (CFNonterm start) input in
|
|
(take maxTake [(parse2tree t, []) | t <- fst parse2], snd parse2)
|
|
parse' = gparser (cf2grammar cf)
|
|
|
|
cf2grammar :: CF -> Grammar
|
|
cf2grammar cf = (productions, terminal)
|
|
where
|
|
productions = [ (name, CFNonterm cat, rhs) |
|
|
(name, (cat, rhs)) <- cfRules ]
|
|
terminal tok = [ (CFNonterm cat, Just name) |
|
|
(cat, name) <- cfPredef tok ]
|
|
++
|
|
[ (item, Nothing) |
|
|
item <- elems rhsItems,
|
|
matchCFTerm item tok ]
|
|
cfRules = rulesOfCF cf
|
|
cfPredef = predefOfCF cf
|
|
rhsItems :: Set Category
|
|
rhsItems = union [ makeSet rhs | (_, (_, rhs)) <- cfRules ]
|
|
|
|
parse2tree :: ParseTree -> CFTree
|
|
parse2tree (Node name (CFNonterm cat) trees) = CFTree (name, (cat, trees'))
|
|
where
|
|
trees' = [ parse2tree t | t@(Node _ _ _) <- trees ] -- ignore leafs
|
|
|
|
maybeNode :: Maybe Name -> Category -> Token -> ParseTree
|
|
maybeNode (Just name) cat tok = Node name cat [Leaf tok]
|
|
maybeNode Nothing _ tok = Leaf tok
|
|
|
|
|
|
--------------------------------------------------
|
|
-- chart parsing (bottom up kilbury-like)
|
|
|
|
type Chart = [CState]
|
|
type CState = Set Edge
|
|
type Edge = (Int, Category, [Category])
|
|
type Passive = (Int, Int, Category)
|
|
|
|
chartParser :: CF -> CFCat -> CFParser
|
|
chartParser = buildParser chartParser0
|
|
|
|
chartParser0 :: GParser
|
|
chartParser0 (productions, terminal) = cparse
|
|
where
|
|
emptyCats :: Set Category
|
|
emptyCats = empties emptySet
|
|
where
|
|
empties cats | cats==cats' = cats
|
|
| otherwise = empties cats'
|
|
where cats' = makeSet [ cat | (_, cat, rhs) <- productions,
|
|
all (`elemSet` cats) rhs ]
|
|
|
|
grammarMap :: Map Category [(Name, [Category])]
|
|
grammarMap = makeMapWith (++)
|
|
[ (cat, [(name,rhs)]) | (name, cat, rhs) <- productions ]
|
|
|
|
leftCornerMap :: Map Category (Set (Category,[Category]))
|
|
leftCornerMap = makeMapWith (<++>) [ (a, unitSet (b, bs)) |
|
|
(_, b, abs) <- productions,
|
|
(a : bs) <- removeNullable abs ]
|
|
|
|
removeNullable :: [Category] -> [[Category]]
|
|
removeNullable [] = []
|
|
removeNullable cats@(cat:cats')
|
|
| cat `elemSet` emptyCats = cats : removeNullable cats'
|
|
| otherwise = [cats]
|
|
|
|
cparse :: Category -> [Token] -> ([ParseTree], String)
|
|
cparse start input = -- trace "ChartParser" $
|
|
case lookup (0, length input, start) $
|
|
-- tracePrt "#edgeTrees" (prt . map (length.snd)) $
|
|
edgeTrees of
|
|
Just trees -> -- tracePrt "#trees" (prt . length . fst) $
|
|
(trees, "Chart:" ++++ prChart passiveEdges)
|
|
Nothing -> ([], "Chart:" ++++ prChart passiveEdges)
|
|
where
|
|
finalChart :: Chart
|
|
finalChart = map buildState initialChart
|
|
|
|
finalChartMap :: [Map Category (Set Edge)]
|
|
finalChartMap = map stateMap finalChart
|
|
|
|
stateMap :: CState -> Map Category (Set Edge)
|
|
stateMap state = makeMapWith (<++>) [ (a, unitSet (i,b,bs)) |
|
|
(i, b, a:bs) <- elems state ]
|
|
|
|
initialChart :: Chart
|
|
initialChart = -- tracePrt "#initialChart" (prt . map (length.elems)) $
|
|
emptySet : map initialState (zip [0..] input)
|
|
where initialState (j, sym) = makeSet [ (j, cat, []) |
|
|
(cat, _) <- terminal sym ]
|
|
|
|
buildState :: CState -> CState
|
|
buildState = limit more
|
|
where more (j, a, []) = ordSet [ (j, b, bs) |
|
|
(b, bs) <- elems (lookupWith emptySet leftCornerMap a) ]
|
|
<++>
|
|
lookupWith emptySet (finalChartMap !! j) a
|
|
more (j, b, a:bs) = ordSet [ (j, b, bs) |
|
|
a `elemSet` emptyCats ]
|
|
|
|
passiveEdges :: [Passive]
|
|
passiveEdges = -- tracePrt "#passiveEdges" (prt . length) $
|
|
[ (i, j, cat) |
|
|
(j, state) <- zip [0..] $
|
|
-- tracePrt "#passiveChart"
|
|
-- (prt . map (length.filter (\(_,_,x)->null x).elems)) $
|
|
-- tracePrt "#activeChart" (prt . map (length.elems)) $
|
|
finalChart,
|
|
(i, cat, []) <- elems state ]
|
|
++
|
|
[ (i, i, cat) |
|
|
i <- [0 .. length input],
|
|
cat <- elems emptyCats ]
|
|
|
|
edgeTrees :: [ (Passive, [ParseTree]) ]
|
|
edgeTrees = [ (edge, treesFor edge) | edge <- passiveEdges ]
|
|
|
|
edgeTreesMap :: Map (Int, Category) [(Int, [ParseTree])]
|
|
edgeTreesMap = makeMapWith (++) [ ((i,c), [(j,trees)]) |
|
|
((i,j,c), trees) <- edgeTrees ]
|
|
|
|
treesFor :: Passive -> [ParseTree]
|
|
treesFor (i, j, cat) = [ Node name cat trees |
|
|
(name, rhs) <- lookupWith [] grammarMap cat,
|
|
trees <- children rhs i j ]
|
|
++
|
|
[ maybeNode name cat tok |
|
|
i == j-1,
|
|
let tok = input !! i,
|
|
Just name <- [lookup cat (terminal tok)] ]
|
|
|
|
children :: [Category] -> Int -> Int -> [[ParseTree]]
|
|
children [] i k = [ [] | i == k ]
|
|
children (c:cs) i k = [ tree : rest |
|
|
i <= k,
|
|
(j, trees) <- lookupWith [] edgeTreesMap (i,c),
|
|
rest <- children cs j k,
|
|
tree <- trees ]
|
|
|
|
|
|
{-
|
|
instance Print ParseTree where
|
|
prt (Node name cat trees) = prt name++"."++prt cat++"^{"++prtSep "," trees++"}"
|
|
prt (Leaf token) = prt token
|
|
-}
|
|
|
|
-- AR 10/12/2002
|
|
|
|
prChart :: [Passive] -> String
|
|
prChart = unlines . map (unwords . map prOne) . positions where
|
|
prOne (i,j,it) = show i ++ "-" ++ show j ++ "-" ++ prCFItem it
|
|
positions = groupBy (\ (i,_,_) (j,_,_) -> i == j)
|
|
|
|
|