mirror of
https://github.com/GrammaticalFramework/gf-core.git
synced 2026-05-01 07:12:50 -06:00
GF/src is now for 2.9, and the new sources are in src-3.0 - keep it this way until the release of GF 3
This commit is contained in:
206
src-3.0/GF/CF/ChartParser.hs
Normal file
206
src-3.0/GF/CF/ChartParser.hs
Normal file
@@ -0,0 +1,206 @@
|
||||
----------------------------------------------------------------------
|
||||
-- |
|
||||
-- Module : ChartParser
|
||||
-- Maintainer : Peter Ljunglöf
|
||||
-- Stability : (stable)
|
||||
-- Portability : (portable)
|
||||
--
|
||||
-- > CVS $Date: 2005/04/21 16:21:12 $
|
||||
-- > CVS $Author: bringert $
|
||||
-- > CVS $Revision: 1.10 $
|
||||
--
|
||||
-- Bottom-up Kilbury chart parser from "Pure Functional Parsing", chapter 5.
|
||||
-- OBSOLETE -- should use new MCFG parsers instead
|
||||
-----------------------------------------------------------------------------
|
||||
|
||||
module GF.CF.ChartParser (chartParser) where
|
||||
|
||||
-- import Tracing
|
||||
-- import PrintParser
|
||||
-- import PrintSimplifiedTerm
|
||||
|
||||
import GF.Data.Operations
|
||||
import GF.CF.CF
|
||||
import GF.CF.CFIdent
|
||||
import GF.CF.PPrCF (prCFItem)
|
||||
|
||||
import GF.Data.OrdSet
|
||||
import GF.Data.OrdMap2
|
||||
|
||||
import Data.List (groupBy)
|
||||
|
||||
type Token = CFTok
|
||||
type Name = CFFun
|
||||
type Category = CFItem
|
||||
type Grammar = ([Production], Terminal)
|
||||
type Production = (Name, Category, [Category])
|
||||
type Terminal = Token -> [(Category, Maybe Name)]
|
||||
type GParser = Grammar -> Category -> [Token] -> ([ParseTree],String)
|
||||
data ParseTree = Node Name Category [ParseTree] | Leaf Token
|
||||
|
||||
maxTake :: Int
|
||||
-- maxTake = 1000
|
||||
maxTake = maxBound
|
||||
|
||||
--------------------------------------------------
|
||||
-- converting between GF parsing and CFG parsing
|
||||
|
||||
buildParser :: GParser -> CF -> CFCat -> CFParser
|
||||
buildParser gparser cf = parse
|
||||
where
|
||||
parse = \start input ->
|
||||
let parse2 = parse' (CFNonterm start) input in
|
||||
(take maxTake [(parse2tree t, []) | t <- fst parse2], snd parse2)
|
||||
parse' = gparser (cf2grammar cf)
|
||||
|
||||
cf2grammar :: CF -> Grammar
|
||||
cf2grammar cf = (productions, terminal)
|
||||
where
|
||||
productions = [ (name, CFNonterm cat, rhs) |
|
||||
(name, (cat, rhs)) <- cfRules ]
|
||||
terminal tok = [ (CFNonterm cat, Just name) |
|
||||
(cat, name) <- cfPredef tok ]
|
||||
++
|
||||
[ (item, Nothing) |
|
||||
item <- elems rhsItems,
|
||||
matchCFTerm item tok ]
|
||||
cfRules = rulesOfCF cf
|
||||
cfPredef = predefOfCF cf
|
||||
rhsItems :: Set Category
|
||||
rhsItems = union [ makeSet rhs | (_, (_, rhs)) <- cfRules ]
|
||||
|
||||
parse2tree :: ParseTree -> CFTree
|
||||
parse2tree (Node name (CFNonterm cat) trees) = CFTree (name, (cat, trees'))
|
||||
where
|
||||
trees' = [ parse2tree t | t@(Node _ _ _) <- trees ] -- ignore leafs
|
||||
|
||||
maybeNode :: Maybe Name -> Category -> Token -> ParseTree
|
||||
maybeNode (Just name) cat tok = Node name cat [Leaf tok]
|
||||
maybeNode Nothing _ tok = Leaf tok
|
||||
|
||||
|
||||
--------------------------------------------------
|
||||
-- chart parsing (bottom up kilbury-like)
|
||||
|
||||
type Chart = [CState]
|
||||
type CState = Set Edge
|
||||
type Edge = (Int, Category, [Category])
|
||||
type Passive = (Int, Int, Category)
|
||||
|
||||
chartParser :: CF -> CFCat -> CFParser
|
||||
chartParser = buildParser chartParser0
|
||||
|
||||
chartParser0 :: GParser
|
||||
chartParser0 (productions, terminal) = cparse
|
||||
where
|
||||
emptyCats :: Set Category
|
||||
emptyCats = empties emptySet
|
||||
where
|
||||
empties cats | cats==cats' = cats
|
||||
| otherwise = empties cats'
|
||||
where cats' = makeSet [ cat | (_, cat, rhs) <- productions,
|
||||
all (`elemSet` cats) rhs ]
|
||||
|
||||
grammarMap :: Map Category [(Name, [Category])]
|
||||
grammarMap = makeMapWith (++)
|
||||
[ (cat, [(name,rhs)]) | (name, cat, rhs) <- productions ]
|
||||
|
||||
leftCornerMap :: Map Category (Set (Category,[Category]))
|
||||
leftCornerMap = makeMapWith (<++>) [ (a, unitSet (b, bs)) |
|
||||
(_, b, abs) <- productions,
|
||||
(a : bs) <- removeNullable abs ]
|
||||
|
||||
removeNullable :: [Category] -> [[Category]]
|
||||
removeNullable [] = []
|
||||
removeNullable cats@(cat:cats')
|
||||
| cat `elemSet` emptyCats = cats : removeNullable cats'
|
||||
| otherwise = [cats]
|
||||
|
||||
cparse :: Category -> [Token] -> ([ParseTree], String)
|
||||
cparse start input = -- trace "ChartParser" $
|
||||
case lookup (0, length input, start) $
|
||||
-- tracePrt "#edgeTrees" (prt . map (length.snd)) $
|
||||
edgeTrees of
|
||||
Just trees -> -- tracePrt "#trees" (prt . length . fst) $
|
||||
(trees, "Chart:" ++++ prChart passiveEdges)
|
||||
Nothing -> ([], "Chart:" ++++ prChart passiveEdges)
|
||||
where
|
||||
finalChart :: Chart
|
||||
finalChart = map buildState initialChart
|
||||
|
||||
finalChartMap :: [Map Category (Set Edge)]
|
||||
finalChartMap = map stateMap finalChart
|
||||
|
||||
stateMap :: CState -> Map Category (Set Edge)
|
||||
stateMap state = makeMapWith (<++>) [ (a, unitSet (i,b,bs)) |
|
||||
(i, b, a:bs) <- elems state ]
|
||||
|
||||
initialChart :: Chart
|
||||
initialChart = -- tracePrt "#initialChart" (prt . map (length.elems)) $
|
||||
emptySet : map initialState (zip [0..] input)
|
||||
where initialState (j, sym) = makeSet [ (j, cat, []) |
|
||||
(cat, _) <- terminal sym ]
|
||||
|
||||
buildState :: CState -> CState
|
||||
buildState = limit more
|
||||
where more (j, a, []) = ordSet [ (j, b, bs) |
|
||||
(b, bs) <- elems (lookupWith emptySet leftCornerMap a) ]
|
||||
<++>
|
||||
lookupWith emptySet (finalChartMap !! j) a
|
||||
more (j, b, a:bs) = ordSet [ (j, b, bs) |
|
||||
a `elemSet` emptyCats ]
|
||||
|
||||
passiveEdges :: [Passive]
|
||||
passiveEdges = -- tracePrt "#passiveEdges" (prt . length) $
|
||||
[ (i, j, cat) |
|
||||
(j, state) <- zip [0..] $
|
||||
-- tracePrt "#passiveChart"
|
||||
-- (prt . map (length.filter (\(_,_,x)->null x).elems)) $
|
||||
-- tracePrt "#activeChart" (prt . map (length.elems)) $
|
||||
finalChart,
|
||||
(i, cat, []) <- elems state ]
|
||||
++
|
||||
[ (i, i, cat) |
|
||||
i <- [0 .. length input],
|
||||
cat <- elems emptyCats ]
|
||||
|
||||
edgeTrees :: [ (Passive, [ParseTree]) ]
|
||||
edgeTrees = [ (edge, treesFor edge) | edge <- passiveEdges ]
|
||||
|
||||
edgeTreesMap :: Map (Int, Category) [(Int, [ParseTree])]
|
||||
edgeTreesMap = makeMapWith (++) [ ((i,c), [(j,trees)]) |
|
||||
((i,j,c), trees) <- edgeTrees ]
|
||||
|
||||
treesFor :: Passive -> [ParseTree]
|
||||
treesFor (i, j, cat) = [ Node name cat trees |
|
||||
(name, rhs) <- lookupWith [] grammarMap cat,
|
||||
trees <- children rhs i j ]
|
||||
++
|
||||
[ maybeNode name cat tok |
|
||||
i == j-1,
|
||||
let tok = input !! i,
|
||||
Just name <- [lookup cat (terminal tok)] ]
|
||||
|
||||
children :: [Category] -> Int -> Int -> [[ParseTree]]
|
||||
children [] i k = [ [] | i == k ]
|
||||
children (c:cs) i k = [ tree : rest |
|
||||
i <= k,
|
||||
(j, trees) <- lookupWith [] edgeTreesMap (i,c),
|
||||
rest <- children cs j k,
|
||||
tree <- trees ]
|
||||
|
||||
|
||||
{-
|
||||
instance Print ParseTree where
|
||||
prt (Node name cat trees) = prt name++"."++prt cat++"^{"++prtSep "," trees++"}"
|
||||
prt (Leaf token) = prt token
|
||||
-}
|
||||
|
||||
-- AR 10/12/2002
|
||||
|
||||
prChart :: [Passive] -> String
|
||||
prChart = unlines . map (unwords . map prOne) . positions where
|
||||
prOne (i,j,it) = show i ++ "-" ++ show j ++ "-" ++ prCFItem it
|
||||
positions = groupBy (\ (i,_,_) (j,_,_) -> i == j)
|
||||
|
||||
|
||||
Reference in New Issue
Block a user