1
0
forked from GitHub/gf-core
Files
gf-core/src/GF/CF/ChartParser.hs

207 lines
6.5 KiB
Haskell

----------------------------------------------------------------------
-- |
-- Module : ChartParser
-- Maintainer : Peter Ljunglöf
-- Stability : (stable)
-- Portability : (portable)
--
-- > CVS $Date: 2005/04/21 16:21:12 $
-- > CVS $Author: bringert $
-- > CVS $Revision: 1.10 $
--
-- Bottom-up Kilbury chart parser from "Pure Functional Parsing", chapter 5.
-- OBSOLETE -- should use new MCFG parsers instead
-----------------------------------------------------------------------------
module GF.CF.ChartParser (chartParser) where
-- import Tracing
-- import PrintParser
-- import PrintSimplifiedTerm
import GF.Data.Operations
import GF.CF.CF
import GF.CF.CFIdent
import GF.CF.PPrCF (prCFItem)
import GF.Data.OrdSet
import GF.Data.OrdMap2
import Data.List (groupBy)
type Token = CFTok
type Name = CFFun
type Category = CFItem
type Grammar = ([Production], Terminal)
type Production = (Name, Category, [Category])
type Terminal = Token -> [(Category, Maybe Name)]
type GParser = Grammar -> Category -> [Token] -> ([ParseTree],String)
data ParseTree = Node Name Category [ParseTree] | Leaf Token
maxTake :: Int
-- maxTake = 1000
maxTake = maxBound
--------------------------------------------------
-- converting between GF parsing and CFG parsing
buildParser :: GParser -> CF -> CFCat -> CFParser
buildParser gparser cf = parse
where
parse = \start input ->
let parse2 = parse' (CFNonterm start) input in
(take maxTake [(parse2tree t, []) | t <- fst parse2], snd parse2)
parse' = gparser (cf2grammar cf)
cf2grammar :: CF -> Grammar
cf2grammar cf = (productions, terminal)
where
productions = [ (name, CFNonterm cat, rhs) |
(name, (cat, rhs)) <- cfRules ]
terminal tok = [ (CFNonterm cat, Just name) |
(cat, name) <- cfPredef tok ]
++
[ (item, Nothing) |
item <- elems rhsItems,
matchCFTerm item tok ]
cfRules = rulesOfCF cf
cfPredef = predefOfCF cf
rhsItems :: Set Category
rhsItems = union [ makeSet rhs | (_, (_, rhs)) <- cfRules ]
parse2tree :: ParseTree -> CFTree
parse2tree (Node name (CFNonterm cat) trees) = CFTree (name, (cat, trees'))
where
trees' = [ parse2tree t | t@(Node _ _ _) <- trees ] -- ignore leafs
maybeNode :: Maybe Name -> Category -> Token -> ParseTree
maybeNode (Just name) cat tok = Node name cat [Leaf tok]
maybeNode Nothing _ tok = Leaf tok
--------------------------------------------------
-- chart parsing (bottom up kilbury-like)
type Chart = [CState]
type CState = Set Edge
type Edge = (Int, Category, [Category])
type Passive = (Int, Int, Category)
chartParser :: CF -> CFCat -> CFParser
chartParser = buildParser chartParser0
chartParser0 :: GParser
chartParser0 (productions, terminal) = cparse
where
emptyCats :: Set Category
emptyCats = empties emptySet
where
empties cats | cats==cats' = cats
| otherwise = empties cats'
where cats' = makeSet [ cat | (_, cat, rhs) <- productions,
all (`elemSet` cats) rhs ]
grammarMap :: Map Category [(Name, [Category])]
grammarMap = makeMapWith (++)
[ (cat, [(name,rhs)]) | (name, cat, rhs) <- productions ]
leftCornerMap :: Map Category (Set (Category,[Category]))
leftCornerMap = makeMapWith (<++>) [ (a, unitSet (b, bs)) |
(_, b, abs) <- productions,
(a : bs) <- removeNullable abs ]
removeNullable :: [Category] -> [[Category]]
removeNullable [] = []
removeNullable cats@(cat:cats')
| cat `elemSet` emptyCats = cats : removeNullable cats'
| otherwise = [cats]
cparse :: Category -> [Token] -> ([ParseTree], String)
cparse start input = -- trace "ChartParser" $
case lookup (0, length input, start) $
-- tracePrt "#edgeTrees" (prt . map (length.snd)) $
edgeTrees of
Just trees -> -- tracePrt "#trees" (prt . length . fst) $
(trees, "Chart:" ++++ prChart passiveEdges)
Nothing -> ([], "Chart:" ++++ prChart passiveEdges)
where
finalChart :: Chart
finalChart = map buildState initialChart
finalChartMap :: [Map Category (Set Edge)]
finalChartMap = map stateMap finalChart
stateMap :: CState -> Map Category (Set Edge)
stateMap state = makeMapWith (<++>) [ (a, unitSet (i,b,bs)) |
(i, b, a:bs) <- elems state ]
initialChart :: Chart
initialChart = -- tracePrt "#initialChart" (prt . map (length.elems)) $
emptySet : map initialState (zip [0..] input)
where initialState (j, sym) = makeSet [ (j, cat, []) |
(cat, _) <- terminal sym ]
buildState :: CState -> CState
buildState = limit more
where more (j, a, []) = ordSet [ (j, b, bs) |
(b, bs) <- elems (lookupWith emptySet leftCornerMap a) ]
<++>
lookupWith emptySet (finalChartMap !! j) a
more (j, b, a:bs) = ordSet [ (j, b, bs) |
a `elemSet` emptyCats ]
passiveEdges :: [Passive]
passiveEdges = -- tracePrt "#passiveEdges" (prt . length) $
[ (i, j, cat) |
(j, state) <- zip [0..] $
-- tracePrt "#passiveChart"
-- (prt . map (length.filter (\(_,_,x)->null x).elems)) $
-- tracePrt "#activeChart" (prt . map (length.elems)) $
finalChart,
(i, cat, []) <- elems state ]
++
[ (i, i, cat) |
i <- [0 .. length input],
cat <- elems emptyCats ]
edgeTrees :: [ (Passive, [ParseTree]) ]
edgeTrees = [ (edge, treesFor edge) | edge <- passiveEdges ]
edgeTreesMap :: Map (Int, Category) [(Int, [ParseTree])]
edgeTreesMap = makeMapWith (++) [ ((i,c), [(j,trees)]) |
((i,j,c), trees) <- edgeTrees ]
treesFor :: Passive -> [ParseTree]
treesFor (i, j, cat) = [ Node name cat trees |
(name, rhs) <- lookupWith [] grammarMap cat,
trees <- children rhs i j ]
++
[ maybeNode name cat tok |
i == j-1,
let tok = input !! i,
Just name <- [lookup cat (terminal tok)] ]
children :: [Category] -> Int -> Int -> [[ParseTree]]
children [] i k = [ [] | i == k ]
children (c:cs) i k = [ tree : rest |
i <= k,
(j, trees) <- lookupWith [] edgeTreesMap (i,c),
rest <- children cs j k,
tree <- trees ]
{-
instance Print ParseTree where
prt (Node name cat trees) = prt name++"."++prt cat++"^{"++prtSep "," trees++"}"
prt (Leaf token) = prt token
-}
-- AR 10/12/2002
prChart :: [Passive] -> String
prChart = unlines . map (unwords . map prOne) . positions where
prOne (i,j,it) = show i ++ "-" ++ show j ++ "-" ++ prCFItem it
positions = groupBy (\ (i,_,_) (j,_,_) -> i == j)