diff --git a/src/GF/CFGM/PrintCFGrammar.hs b/src/GF/CFGM/PrintCFGrammar.hs index 6178139e2..1d353be31 100644 --- a/src/GF/CFGM/PrintCFGrammar.hs +++ b/src/GF/CFGM/PrintCFGrammar.hs @@ -5,9 +5,9 @@ -- Stability : (stable) -- Portability : (portable) -- --- > CVS $Date: 2005/03/21 13:54:24 $ +-- > CVS $Date: 2005/03/21 14:17:44 $ -- > CVS $Author: peb $ --- > CVS $Revision: 1.9 $ +-- > CVS $Revision: 1.10 $ -- -- Handles printing a CFGrammar in CFGM format. ----------------------------------------------------------------------------- @@ -20,12 +20,11 @@ import Ident import GFC import Modules import qualified GF.Conversion.ConvertGrammar as Cnv -import qualified GF.Parsing.PrintParser as Prt +import qualified GF.Printing.PrintParser as Prt import qualified GF.Conversion.CFGrammar as CFGrammar import qualified GF.Conversion.GrammarTypes as GT import qualified AbsCFG import qualified GF.Parsing.Parser as Parser -import qualified GF.Parsing.PrintParser as PrintParser import ErrM import qualified Option diff --git a/src/GF/Data/Assoc.hs b/src/GF/Data/Assoc.hs new file mode 100644 index 000000000..261fdb980 --- /dev/null +++ b/src/GF/Data/Assoc.hs @@ -0,0 +1,131 @@ +---------------------------------------------------------------------- +-- | +-- Module : Assoc +-- Maintainer : Peter Ljunglöf +-- Stability : Stable +-- Portability : Haskell 98 +-- +-- > CVS $Date: 2005/03/21 14:17:39 $ +-- > CVS $Author: peb $ +-- > CVS $Revision: 1.1 $ +-- +-- Association lists, or finite maps, +-- including sets as maps with result type @()@. +-- function names stolen from module @Array@. +-- /O(log n)/ key lookup +----------------------------------------------------------------------------- + +module GF.Data.Assoc ( Assoc, + Set, + listAssoc, + listSet, + accumAssoc, + aAssocs, + aElems, + assocMap, + lookupAssoc, + lookupWith, + (?), + (?=) + ) where + +import GF.Data.SortedList + +infixl 9 ?, ?= + +-- | a set is a finite map with empty values +type Set a = Assoc a () + +-- | creating a finite map from a sorted key-value list +listAssoc :: Ord a => SList (a, b) -> Assoc a b + +-- | creating a set from a sorted list +listSet :: Ord a => SList a -> Set a + +-- | building a finite map from a list of keys and 'b's, +-- and a function that combines a sorted list of 'b's into a value +accumAssoc :: (Ord a, Ord c) => (SList c -> b) -> [(a, c)] -> Assoc a b + +-- | all key-value pairs from an association list +aAssocs :: Ord a => Assoc a b -> SList (a, b) + +-- | all keys from an association list +aElems :: Ord a => Assoc a b -> SList a + +-- fmap :: Ord a => (b -> b') -> Assoc a b -> Assoc a b' + +-- | mapping values to other values. +-- the mapping function can take the key as information +assocMap :: Ord a => (a -> b -> b') -> Assoc a b -> Assoc a b' + +-- | monadic lookup function, +-- returning failure if the key does not exist +lookupAssoc :: (Ord a, Monad m) => Assoc a b -> a -> m b + +-- | if the key does not exist, +-- the first argument is returned +lookupWith :: Ord a => b -> Assoc a b -> a -> b + +-- | if the values are monadic, we can return the value type +(?) :: (Ord a, Monad m) => Assoc a (m b) -> a -> m b + +-- | checking wheter the map contains a given key +(?=) :: Ord a => Assoc a b -> a -> Bool + + +------------------------------------------------------------ + +data Assoc a b = ANil | ANode (Assoc a b) a b (Assoc a b) + deriving (Eq, Show) + +listAssoc as = assoc + where (assoc, []) = sl2bst (length as) as + sl2bst 0 xs = (ANil, xs) + sl2bst 1 (x:xs) = (ANode ANil (fst x) (snd x) ANil, xs) + sl2bst n xs = (ANode left (fst x) (snd x) right, zs) + where llen = (n-1) `div` 2 + rlen = n - 1 - llen + (left, x:ys) = sl2bst llen xs + (right, zs) = sl2bst rlen ys + +listSet as = listAssoc (zip as (repeat ())) + +accumAssoc join = listAssoc . map (mapSnd join) . groupPairs . nubsort + where mapSnd f (a, b) = (a, f b) + +aAssocs as = prs as [] + where prs ANil = id + prs (ANode left a b right) = prs left . ((a,b) :) . prs right + +aElems = map fst . aAssocs + + +instance Ord a => Functor (Assoc a) where + fmap f = assocMap (const f) + +assocMap f ANil = ANil +assocMap f (ANode left a b right) = ANode (assocMap f left) a (f a b) (assocMap f right) + + +lookupAssoc ANil _ = fail "key not found" +lookupAssoc (ANode left a b right) a' = case compare a a' of + GT -> lookupAssoc left a' + LT -> lookupAssoc right a' + EQ -> return b + +lookupWith z ANil _ = z +lookupWith z (ANode left a b right) a' = case compare a a' of + GT -> lookupWith z left a' + LT -> lookupWith z right a' + EQ -> b + +(?) = lookupWith (fail "key not found") + +(?=) = \assoc -> maybe False (const True) . lookupAssoc assoc + + + + + + + diff --git a/src/GF/Data/BacktrackM.hs b/src/GF/Data/BacktrackM.hs new file mode 100644 index 000000000..5abc9863d --- /dev/null +++ b/src/GF/Data/BacktrackM.hs @@ -0,0 +1,123 @@ +---------------------------------------------------------------------- +-- | +-- Module : BacktrackM +-- Maintainer : PL +-- Stability : (stable) +-- Portability : (portable) +-- +-- > CVS $Date: 2005/03/21 14:17:39 $ +-- > CVS $Author: peb $ +-- > CVS $Revision: 1.1 $ +-- +-- Backtracking state monad, with r/o environment +----------------------------------------------------------------------------- + + +module GF.Data.BacktrackM ( -- * the backtracking state monad + BacktrackM, + -- * controlling the monad + failure, + (|||), + -- * handling the state & environment + readEnv, + readState, + writeState, + -- * monad specific utilities + member, + -- * running the monad + runBM, + solutions, + finalStates + ) where + +import Monad + +------------------------------------------------------------ +-- type declarations + +-- * controlling the monad + +failure :: BacktrackM e s a +(|||) :: BacktrackM e s a -> BacktrackM e s a -> BacktrackM e s a + +instance MonadPlus (BacktrackM e s) where + mzero = failure + mplus = (|||) + +-- * handling the state & environment + +readEnv :: BacktrackM e s e +readState :: BacktrackM e s s +writeState :: s -> BacktrackM e s () + +-- * monad specific utilities + +member :: [a] -> BacktrackM e s a +member = msum . map return + +-- * running the monad + +runBM :: BacktrackM e s a -> e -> s -> [(s, a)] + +solutions :: BacktrackM e s a -> e -> s -> [a] +solutions bm e s = map snd $ runBM bm e s + +finalStates :: BacktrackM e s () -> e -> s -> [s] +finalStates bm e s = map fst $ runBM bm e s + + +{- +---------------------------------------------------------------------- +-- implementation as lists of successes + +newtype BacktrackM e s a = BM (e -> s -> [(s, a)]) + +runBM (BM m) = m + +readEnv = BM (\e s -> [(s, e)]) +readState = BM (\e s -> [(s, s)]) +writeState s = BM (\e _ -> [(s, ())]) + +failure = BM (\e s -> []) +BM m ||| BM n = BM (\e s -> m e s ++ n e s) + +instance Monad (BacktrackM e s) where + return a = BM (\e s -> [(s, a)]) + BM m >>= k = BM (\e s -> concat [ n e s' | (s', a) <- m e s, let BM n = k a ]) + fail _ = failure +-} + +---------------------------------------------------------------------- +-- Combining endomorphisms and continuations +-- a la Ralf Hinze + +newtype Backtr a = B (forall b . (a -> b -> b) -> b -> b) + +instance Monad Backtr where + return a = B (\c f -> c a f) + B m >>= k = B (\c f -> m (\a -> unBacktr (k a) c) f) + where unBacktr (B m) = m + +failureB = B (\c f -> f) +B m |||| B n = B (\c f -> m c (n c f)) + +runB (B m) = m (:) [] + +-- BacktrackM = state monad transformer over the backtracking monad + +newtype BacktrackM e s a = BM (e -> s -> Backtr (s, a)) + +runBM (BM m) e s = runB (m e s) + +readEnv = BM (\e s -> return (s, e)) +readState = BM (\e s -> return (s, s)) +writeState s = BM (\e _ -> return (s, ())) + +failure = BM (\e s -> failureB) +BM m ||| BM n = BM (\e s -> m e s |||| n e s) + +instance Monad (BacktrackM e s) where + return a = BM (\e s -> return (s, a)) + BM m >>= k = BM (\e s -> do (s', a) <- m e s + unBM (k a) e s') + where unBM (BM m) = m diff --git a/src/GF/Data/RedBlackSet.hs b/src/GF/Data/RedBlackSet.hs new file mode 100644 index 000000000..8a1b8a743 --- /dev/null +++ b/src/GF/Data/RedBlackSet.hs @@ -0,0 +1,150 @@ +---------------------------------------------------------------------- +-- | +-- Module : RedBlackSet +-- Maintainer : Peter Ljunglöf +-- Stability : Stable +-- Portability : Haskell 98 +-- +-- > CVS $Date: 2005/03/21 14:17:39 $ +-- > CVS $Author: peb $ +-- > CVS $Revision: 1.1 $ +-- +-- Modified version of Okasaki's red-black trees +-- incorporating sets and set-valued maps +---------------------------------------------------------------------- + +module GF.Data.RedBlackSet ( -- * Red-black sets + RedBlackSet, + rbEmpty, + rbList, + rbElem, + rbLookup, + rbInsert, + rbMap, + rbOrdMap, + -- * Red-black finite maps + RedBlackMap, + rbmEmpty, + rbmList, + rbmElem, + rbmLookup, + rbmInsert, + rbmOrdMap + ) where + +-------------------------------------------------------------------------------- +-- sets + +data Color = R | B deriving (Eq, Show) +data RedBlackSet a = E | T Color (RedBlackSet a) a (RedBlackSet a) + deriving (Eq, Show) + +rbBalance B (T R (T R a x b) y c) z d = T R (T B a x b) y (T B c z d) +rbBalance B (T R a x (T R b y c)) z d = T R (T B a x b) y (T B c z d) +rbBalance B a x (T R (T R b y c) z d) = T R (T B a x b) y (T B c z d) +rbBalance B a x (T R b y (T R c z d)) = T R (T B a x b) y (T B c z d) +rbBalance color a x b = T color a x b + +rbBlack (T _ a x b) = T B a x b + +-- | the empty set +rbEmpty :: RedBlackSet a +rbEmpty = E + +-- | the elements of a set as a sorted list +rbList :: RedBlackSet a -> [a] +rbList tree = rbl tree [] + where rbl E = id + rbl (T _ left a right) = rbl right . (a:) . rbl left + +-- | checking for containment +rbElem :: Ord a => a -> RedBlackSet a -> Bool +rbElem _ E = False +rbElem a (T _ left a' right) + = case compare a a' of + LT -> rbElem a left + GT -> rbElem a right + EQ -> True + +-- | looking up a key in a set of keys and values +rbLookup :: Ord k => k -> RedBlackSet (k, a) -> Maybe a +rbLookup _ E = Nothing +rbLookup a (T _ left (a',b) right) + = case compare a a' of + LT -> rbLookup a left + GT -> rbLookup a right + EQ -> Just b + +-- | inserting a new element. +-- returns 'Nothing' if the element is already contained +rbInsert :: Ord a => a -> RedBlackSet a -> Maybe (RedBlackSet a) +rbInsert value tree = fmap rbBlack (rbins tree) + where rbins E = Just (T R E value E) + rbins (T color left value' right) + = case compare value value' of + LT -> do left' <- rbins left + return (rbBalance color left' value' right) + GT -> do right' <- rbins right + return (rbBalance color left value' right') + EQ -> Nothing + +-- | mapping each value of a key-value set +rbMap :: (a -> b) -> RedBlackSet (k, a) -> RedBlackSet (k, b) +rbMap f E = E +rbMap f (T color left (key, value) right) + = T color (rbMap f left) (key, f value) (rbMap f right) + +-- | mapping each element to another type. +-- /observe/ that the mapping function needs to preserve +-- the order between objects +rbOrdMap :: (a -> b) -> RedBlackSet a -> RedBlackSet b +rbOrdMap f E = E +rbOrdMap f (T color left value right) + = T color (rbOrdMap f left) (f value) (rbOrdMap f right) + +---------------------------------------------------------------------- +-- finite maps + +type RedBlackMap k a = RedBlackSet (k, RedBlackSet a) + +-- | the empty map +rbmEmpty :: RedBlackMap k a +rbmEmpty = E + +-- | converting a map to a key-value list, sorted on the keys, +-- and for each key, a sorted list of values +rbmList :: RedBlackMap k a -> [(k, [a])] +rbmList tree = [ (k, rbList sub) | (k, sub) <- rbList tree ] + +-- | checking whether a key-value pair is contained in the map +rbmElem :: (Ord k, Ord a) => k -> a -> RedBlackMap k a -> Bool +rbmElem key value = maybe False (rbElem value) . rbLookup key + +-- | looking up a key, returning a (sorted) list of all matching values +rbmLookup :: Ord k => k -> RedBlackMap k a -> [a] +rbmLookup key = maybe [] rbList . rbLookup key + +-- | inserting a key-value pair. +-- returns 'Nothing' if the pair is already contained in the map +rbmInsert :: (Ord k, Ord a) => k -> a -> RedBlackMap k a -> Maybe (RedBlackMap k a) +rbmInsert key value tree = fmap rbBlack (rbins tree) + where rbins E = Just (T R E (key, T B E value E) E) + rbins (T color left item@(key', vtree) right) + = case compare key key' of + LT -> do left' <- rbins left + return (rbBalance color left' item right) + GT -> do right' <- rbins right + return (rbBalance color left item right') + EQ -> do vtree' <- rbInsert value vtree + return (T color left (key', vtree') right) + +-- | mapping each value to another type. +-- /observe/ that the mapping function needs to preserve +-- order between objects +rbmOrdMap :: (a -> b) -> RedBlackMap k a -> RedBlackMap k b +rbmOrdMap f E = E +rbmOrdMap f (T color left (key, tree) right) + = T color (rbmOrdMap f left) (key, rbOrdMap f tree) (rbmOrdMap f right) + + + diff --git a/src/GF/Data/SortedList.hs b/src/GF/Data/SortedList.hs new file mode 100644 index 000000000..0b340b533 --- /dev/null +++ b/src/GF/Data/SortedList.hs @@ -0,0 +1,108 @@ +---------------------------------------------------------------------- +-- | +-- Module : SortedList +-- Maintainer : Peter Ljunglöf +-- Stability : stable +-- Portability : portable +-- +-- > CVS $Date: 2005/03/21 14:17:39 $ +-- > CVS $Author: peb $ +-- > CVS $Revision: 1.1 $ +-- +-- Sets as sorted lists +-- +-- * /O(n)/ union, difference and intersection +-- +-- * /O(n log n)/ creating a set from a list (=sorting) +-- +-- * /O(n^2)/ fixed point iteration +----------------------------------------------------------------------------- + +module GF.Data.SortedList ( SList, + nubsort, union, + (<++>), (<\\>), (<**>), + limit, + hasCommonElements, subset, + groupPairs, groupUnion + ) where + +import List (groupBy) + +-- | The list must be sorted and contain no duplicates. +type SList a = [a] + +-- | Group a set of key-value pairs into +-- a set of unique keys with sets of values +groupPairs :: Ord a => SList (a, b) -> SList (a, SList b) +groupPairs = map mapFst . groupBy eqFst + where mapFst as = (fst (head as), map snd as) + eqFst a b = fst a == fst b + +-- | Group a set of key-(sets-of-values) pairs into +-- a set of unique keys with sets of values +groupUnion :: (Ord a, Ord b) => SList (a, SList b) -> SList (a, SList b) +groupUnion = map unionSnd . groupPairs + where unionSnd (a, bs) = (a, union bs) + +-- | True is the two sets has common elements +hasCommonElements :: Ord a => SList a -> SList a -> Bool +hasCommonElements as bs = not (null (as <**> bs)) + +-- | True if the first argument is a subset of the second argument +subset :: Ord a => SList a -> SList a -> Bool +xs `subset` ys = null (xs <\\> ys) + +-- | Create a set from any list. +-- This function can also be used as an alternative to @nub@ in @List.hs@ +nubsort :: Ord a => [a] -> SList a +nubsort = union . map return + +-- | The union of a list of sets +union :: Ord a => [SList a] -> SList a +union [] = [] +union [as] = as +union abs = let (as, bs) = split abs in union as <++> union bs + where split (a:b:abs) = let (as, bs) = split abs in (a:as, b:bs) + split as = (as, []) + +-- | The union of two sets +(<++>) :: Ord a => SList a -> SList a -> SList a +[] <++> bs = bs +as <++> [] = as +as@(a:as') <++> bs@(b:bs') = case compare a b of + LT -> a : (as' <++> bs) + GT -> b : (as <++> bs') + EQ -> a : (as' <++> bs') + +-- | The difference of two sets +(<\\>) :: Ord a => SList a -> SList a -> SList a +[] <\\> bs = [] +as <\\> [] = as +as@(a:as') <\\> bs@(b:bs') = case compare a b of + LT -> a : (as' <\\> bs) + GT -> (as <\\> bs') + EQ -> (as' <\\> bs') + +-- | The intersection of two sets +(<**>) :: Ord a => SList a -> SList a -> SList a +[] <**> bs = [] +as <**> [] = [] +as@(a:as') <**> bs@(b:bs') = case compare a b of + LT -> (as' <**> bs) + GT -> (as <**> bs') + EQ -> a : (as' <**> bs') + +-- | A fixed point iteration +limit :: Ord a => (a -> SList a) -- ^ The iterator function + -> SList a -- ^ The initial set + -> SList a -- ^ The result of the iteration +limit more start = limit' start start + where limit' chart agenda | null new' = chart + | otherwise = limit' (chart <++> new') new' + where new = union (map more agenda) + new'= new <\\> chart + + + + + diff --git a/src/GF/Parsing/CFParserGeneral.hs b/src/GF/Parsing/CFParserGeneral.hs new file mode 100644 index 000000000..cc24820b7 --- /dev/null +++ b/src/GF/Parsing/CFParserGeneral.hs @@ -0,0 +1,85 @@ +---------------------------------------------------------------------- +-- | +-- Module : CFParserGeneral +-- Maintainer : Peter Ljunglöf +-- Stability : (stable) +-- Portability : (portable) +-- +-- > CVS $Date: 2005/03/21 14:17:41 $ +-- > CVS $Author: peb $ +-- > CVS $Revision: 1.1 $ +-- +-- Several implementations of CFG chart parsing +----------------------------------------------------------------------------- + +module GF.Parsing.CFParserGeneral (parse, + Strategy + ) where + +import Tracing + +import GF.Parsing.Parser +import GF.Conversion.CFGrammar +import GF.Parsing.GeneralChart +import GF.Data.Assoc + +parse :: (Ord n, Ord c, Ord t) => Strategy -> CFParser n c t +parse strategy grammar start = extract . process strategy grammar start + +type Strategy = (Bool, Bool) -- (isBottomup, isTopdown) + +extract :: [Item n (Symbol c t)] -> [Edge (Rule n c t)] +extract edges = + edges' + where edges' = [ Edge j k (Rule cat (reverse found) name) | + Edge j k (Cat cat, found, [], Just name) <- edges ] + +process :: (Ord n, Ord c, Ord t) => Strategy -> PInfo n c t -> + [c] -> Input t -> [Item n (Symbol c t)] +process (isBottomup, isTopdown) grammar start + = trace ("CFParserGeneral" ++ + (if isBottomup then " BU" else "") ++ + (if isTopdown then " TD" else "")) $ + buildChart keyof [predict, combine] . axioms + where axioms input = initial ++ scan input + + scan input = map (fmap mkEdge) (inputEdges input) + mkEdge tok = (Tok tok, [], [], Nothing) + + -- the combine rule + combine chart (Edge j k (next, _, [], _)) + = [ edge `forwardTo` k | edge <- chartLookup chart (Active next j) ] + combine chart edge@(Edge _ j (_, _, next:_, _)) + = [ edge `forwardTo` k | Edge _ k _ <- chartLookup chart (Passive next j) ] + + -- initial predictions + initial = [ loopingEdge 0 rule | cat <- start, rule <- tdRuleLookup ? cat ] + + -- predictions + predict chart (Edge j k (next, _, [], _)) | isBottomup + = [ loopingEdge j rule `forwardTo` k | rule <- bottomupRules grammar ? next ] + -- - - - - - - - - - ^^^^^^^^^^^^^ Kilbury prediction: move dot forward + predict chart (Edge _ k (_, _, Cat cat:_, _)) + = [ loopingEdge k rule | rule <- tdRuleLookup ? cat ] + predict _ _ = [] + + tdRuleLookup | isTopdown = topdownRules grammar + | isBottomup = emptyLeftcornerRules grammar + +-- internal representation of parse items + +type Item n s = Edge (s, [s], [s], Maybe n) +type IChart n s = Chart (Item n s) (IKey s) +data IKey s = Active s Int + | Passive s Int + deriving (Eq, Ord, Show) + +keyof (Edge _ j (_, _, next:_, _)) = Active next j +keyof (Edge j _ (cat, _, [], _)) = Passive cat j + +forwardTo (Edge i j (cat, found, next:tofind, name)) k = Edge i k (cat, next:found, tofind, name) + +loopingEdge k (Rule cat tofind name) = Edge k k (Cat cat, [], tofind, Just name) + + + diff --git a/src/GF/Parsing/CFParserIncremental.hs b/src/GF/Parsing/CFParserIncremental.hs new file mode 100644 index 000000000..3b9951721 --- /dev/null +++ b/src/GF/Parsing/CFParserIncremental.hs @@ -0,0 +1,143 @@ +---------------------------------------------------------------------- +-- | +-- Module : CFParserIncremental +-- Maintainer : PL +-- Stability : (stable) +-- Portability : (portable) +-- +-- > CVS $Date: 2005/03/21 14:17:41 $ +-- > CVS $Author: peb $ +-- > CVS $Revision: 1.1 $ +-- +-- Incremental chart parsing for context-free grammars +----------------------------------------------------------------------------- + + + +module GF.Parsing.CFParserIncremental (parse, + Strategy) where + +import Tracing +import GF.Printing.PrintParser + +-- haskell modules: +import Array +-- gf modules: +import GF.Data.SortedList +import GF.Data.Assoc +import Operations +-- parser modules: +import GF.Parsing.Parser +import GF.Conversion.CFGrammar +import GF.Parsing.IncrementalChart + + +type Strategy = ((Bool, Bool), (Bool, Bool)) -- (predict:(BU, TD), filter:(BU, TD)) + +parse :: (Ord n, Ord c, Ord t, Show t) => + Strategy -> CFParser n c t +parse ((isPredictBU, isPredictTD), (isFilterBU, isFilterTD)) grammar start input = + trace2 "CFParserIncremental" + ((if isPredictBU then "BU-predict " else "") ++ + (if isPredictTD then "TD-predict " else "") ++ + (if isFilterBU then "BU-filter " else "") ++ + (if isFilterTD then "TD-filter " else "")) $ + trace2 "input" (show (inputTo input)) $ + finalEdges + where finalEdges = [ Edge j k (Rule cat (reverse found) name) | + (k, state) <- + tracePrt "#passiveChart" + (prt . map (length . (?Passive) . snd)) $ + tracePrt "#activeChart" + (prt . map (length . concatMap snd . aAssocs . snd)) $ + assocs finalChart, + Item j (Rule cat _Nil name) found <- state ? Passive ] + + finalChart = buildChart keyof rules axioms $ inputBounds input + + axioms 0 = --tracePrt ("axioms 0") (prtSep "\n") $ + union $ map (tdInfer 0) start + axioms k = --tracePrt ("axioms "++show k) (prtSep "\n") $ + union [ buInfer j k (Tok token) | + (token, js) <- aAssocs (inputTo input ! k), j <- js ] + + rules k (Item j (Rule cat [] _) _) + = buInfer j k (Cat cat) + rules k (Item j rule@(Rule _ (Cat next:_) _) found) + = tdInfer k next <++> + -- hack for empty rules: + [ Item j (forward rule) (Cat next:found) | + emptyCategories grammar ?= next ] + rules _ _ = [] + + buInfer j k next = --tracePrt ("buInfer "++show(j,k)++" "++prt next) (prtSep "\n") $ + buPredict j k next <++> buCombine j k next + tdInfer k next = tdPredict k next + + -- the combine rule + buCombine j k next + | j == k = [] -- hack for empty rules + | otherwise = [ Item i (forward rule) (next:found) | + Item i rule found <- (finalChart ! j) ? Active next ] + + -- kilbury bottom-up prediction + buPredict j k next + = [ Item j rule [next] | isPredictBU, + rule <- map forward $ --tracePrt ("buRules "++prt next) (prtSep "\n") $ + bottomupRules grammar ? next, + buFilter rule k, + tdFilter rule j k ] + + -- top-down prediction + tdPredict k cat + = [ Item k rule [] | isPredictTD || isFilterTD, + rule <- topdownRules grammar ? cat, + buFilter rule k ] <++> + -- hack for empty rules: + [ Item k rule [] | isPredictBU, + rule <- emptyLeftcornerRules grammar ? cat ] + + -- bottom up filtering: input symbol k can begin the given symbol list (first set) + -- leftcornerTokens DOESN'T WORK WITH EMPTY RULES!!! + buFilter (Rule _ (Cat cat:_) _) k | isFilterBU + = k < snd (inputBounds input) && + hasCommonElements (leftcornerTokens grammar ? cat) + (aElems (inputFrom input ! k)) + buFilter _ _ = True + + -- top down filtering: 'cat' is reachable by an active edge ending in node j < k + tdFilter (Rule cat _ _) j k | isFilterTD && j < k + = (tdFilters ! j) ?= cat + tdFilter _ _ _ = True + + tdFilters = listArray (inputBounds input) $ + map (listSet . limit leftCats . activeCats) [0..] + activeCats j = [ next | Active (Cat next) <- aElems (finalChart ! j) ] + leftCats cat = [ left | Rule _cat (Cat left:_) _ <- topdownRules grammar ? cat ] + + +-- type declarations, items & keys +data Item n c t = Item Int (Rule n c t) [Symbol c t] + deriving (Eq, Ord, Show) + +data IKey c t = Active (Symbol c t) | Passive + deriving (Eq, Ord, Show) + +keyof :: Item n c t -> IKey c t +keyof (Item _ (Rule _ (next:_) _) _) = Active next +keyof (Item _ (Rule _ [] _) _) = Passive + +forward :: Rule n c t -> Rule n c t +forward (Rule cat (_:rest) name) = Rule cat rest name + + +instance (Print n, Print c, Print t) => Print (Item n c t) where + prt (Item k (Rule cat rhs name) syms) + = "<" ++show k++ ": "++prt name++". "++ + prt cat++" -> "++prt rhs++" / "++prt syms++">" + +instance (Print c, Print t) => Print (IKey c t) where + prt (Active sym) = "?" ++ prt sym + prt (Passive) = "!" + + diff --git a/src/GF/Parsing/GeneralChart.hs b/src/GF/Parsing/GeneralChart.hs new file mode 100644 index 000000000..61f933932 --- /dev/null +++ b/src/GF/Parsing/GeneralChart.hs @@ -0,0 +1,85 @@ +---------------------------------------------------------------------- +-- | +-- Module : GeneralChart +-- Maintainer : Peter Ljunglöf +-- Stability : (stable) +-- Portability : (portable) +-- +-- > CVS $Date: 2005/03/21 14:17:42 $ +-- > CVS $Author: peb $ +-- > CVS $Revision: 1.1 $ +-- +-- Simple implementation of deductive chart parsing +----------------------------------------------------------------------------- + + +module GF.Parsing.GeneralChart (-- * Type definition + Chart, + -- * Main functions + chartLookup, + buildChart, + -- * Probably not needed + emptyChart, + chartMember, + chartInsert, + chartList, + addToChart + ) where + +-- import Trace + +import GF.Data.RedBlackSet + +-- main functions + +chartLookup :: (Ord item, Ord key) => Chart item key -> key -> [item] +buildChart :: (Ord item, Ord key) => (item -> key) -> + [Chart item key -> item -> [item]] -> [item] -> [item] + +buildChart keyof rules axioms = chartList (addItems axioms emptyChart) + where addItems [] = id + addItems (item:items) = addItems items . addItem item + + -- addItem item | trace ("+ "++show item++"\n") False = undefined + addItem item = addToChart item (keyof item) + (\chart -> foldr (consequence item) chart rules) + + consequence item rule chart = addItems (rule chart item) chart + +-- probably not needed + +emptyChart :: (Ord item, Ord key) => Chart item key +chartMember :: (Ord item, Ord key) => Chart item key -> item -> key -> Bool +chartInsert :: (Ord item, Ord key) => Chart item key -> item -> key -> Maybe (Chart item key) +chartList :: (Ord item, Ord key) => Chart item key -> [item] +addToChart :: (Ord item, Ord key) => item -> key -> (Chart item key -> Chart item key) -> Chart item key -> Chart item key + +addToChart item key after chart = maybe chart after (chartInsert chart item key) + + +-------------------------------------------------------------------------------- +-- key charts as red/black trees + +newtype Chart item key = KC (RedBlackMap key item) + deriving Show + +emptyChart = KC rbmEmpty +chartMember (KC tree) item key = rbmElem key item tree +chartInsert (KC tree) item key = fmap KC (rbmInsert key item tree) +chartLookup (KC tree) key = rbmLookup key tree +chartList (KC tree) = concatMap snd (rbmList tree) +--------------------------------------------------------------------------------} + + +{-------------------------------------------------------------------------------- +-- key charts as unsorted association lists -- OBSOLETE! + +newtype Chart item key = SC [(key, item)] + +emptyChart = SC [] +chartMember (SC chart) item key = (key,item) `elem` chart +chartInsert (SC chart) item key = if (key,item) `elem` chart then Nothing else Just (SC ((key,item):chart)) +chartLookup (SC chart) key = [ item | (key',item) <- chart, key == key' ] +chartList (SC chart) = map snd chart +--------------------------------------------------------------------------------} + diff --git a/src/GF/Parsing/IncrementalChart.hs b/src/GF/Parsing/IncrementalChart.hs new file mode 100644 index 000000000..a040ddd60 --- /dev/null +++ b/src/GF/Parsing/IncrementalChart.hs @@ -0,0 +1,49 @@ +---------------------------------------------------------------------- +-- | +-- Module : IncrementalChart +-- Maintainer : PL +-- Stability : (stable) +-- Portability : (portable) +-- +-- > CVS $Date: 2005/03/21 14:17:42 $ +-- > CVS $Author: peb $ +-- > CVS $Revision: 1.1 $ +-- +-- Implementation of /incremental/ deductive parsing, +-- i.e. parsing one word at the time. +----------------------------------------------------------------------------- + + +module GF.Parsing.IncrementalChart (-- * Type definitions + IncrementalChart, + -- * Functions + buildChart, + chartList + ) where + +import Array +import GF.Data.SortedList +import GF.Data.Assoc + +buildChart :: (Ord item, Ord key) => (item -> key) -> + (Int -> item -> SList item) -> + (Int -> SList item) -> + (Int, Int) -> IncrementalChart item key + +chartList :: (Ord item, Ord key) => (Int -> item -> edge) -> IncrementalChart item key -> [edge] + +type IncrementalChart item key = Array Int (Assoc key (SList item)) + +---------- + +buildChart keyof rules axioms bounds = finalChartArray + where buildState k = limit (rules k) $ axioms k + finalChartList = map buildState [fst bounds .. snd bounds] + finalChartArray = listArray bounds $ map stateAssoc finalChartList + stateAssoc state = accumAssoc id [ (keyof item, item) | item <- state ] + +chartList combine chart = [ combine k item | + (k, state) <- assocs chart, + item <- concatMap snd $ aAssocs state ] + + diff --git a/src/GF/Parsing/MCFParserBasic.hs b/src/GF/Parsing/MCFParserBasic.hs new file mode 100644 index 000000000..03a1d8b9d --- /dev/null +++ b/src/GF/Parsing/MCFParserBasic.hs @@ -0,0 +1,156 @@ +---------------------------------------------------------------------- +-- | +-- Module : MCFParserBasic +-- Maintainer : Peter Ljunglöf +-- Stability : (stable) +-- Portability : (portable) +-- +-- > CVS $Date: 2005/03/21 14:17:42 $ +-- > CVS $Author: peb $ +-- > CVS $Revision: 1.1 $ +-- +-- Simplest possible implementation of MCFG chart parsing +----------------------------------------------------------------------------- + +module GF.Parsing.MCFParserBasic (parse + ) where + +import Tracing + +import Ix +import GF.Parsing.Parser +import GF.Conversion.MCFGrammar +import GF.Parsing.GeneralChart +import GF.Data.Assoc +import GF.Data.SortedList +import GF.Printing.PrintParser + + +parse :: (Ord n, Ord c, Ord l, Ord t, + Print n, Print c, Print l, Print t) => + MCFParser n c l t +parse grammar start = edges2chart . extract . process grammar + + +extract :: [Item n c l t] -> [(n, MEdge c l, [MEdge c l])] +extract items = tracePrt "#passives" (prt.length) $ + --trace2 "passives" (prtAfter "\n" [ i | i@(PItem _) <- items ]) $ + [ item | PItem item <- items ] + + +process :: (Ord n, Ord c, Ord l, Ord t, + Print n, Print c, Print l, Print t) => + Grammar n c l t -> Input t -> [Item n c l t] +process grammar input = buildChart keyof rules axioms + where axioms = initial + rules = [combine, scan, predict] + + -- axioms + initial = traceItems "axiom" [] $ + [ nextLin name tofind (addNull cat) (map addNull args) | + Rule cat args tofind name <- grammar ] + + addNull a = (a, []) + + -- predict + predict chart i1@(Item name tofind rho (Lin lbl []) (cat, found0) children) + = traceItems "predict" [i1] + [ nextLin name tofind (cat, found) children | + let found = insertRow lbl rho found0 ] + predict _ _ = [] + + -- combine + combine chart active@(Item _ _ _ (Lin _ (Cat(cat,_,_):_)) _ _) + = do passive <- chartLookup chart (Passive cat) + combineItems active passive + combine chart passive@(PItem (_, (cat, _), _)) + = do active <- chartLookup chart (Active cat) + combineItems active passive + combine _ _ = [] + + combineItems i1@(Item name tofind rho0 (Lin lbl (Cat(_,lbl',nr):rest)) found children0) + i2@(PItem (_, found', _)) + = traceItems "combine" [i1,i2] + [ Item name tofind rho (Lin lbl rest) found children | + rho1 <- lookupLbl lbl' found', + let rho = concatRange rho0 rho1, + children <- updateChild nr children0 (snd found') ] + + -- scan + scan chart i1@(Item name tofind rho0 (Lin lbl (Tok tok:rest)) found children) + = traceItems "scan" [i1] + [ Item name tofind rho (Lin lbl rest) found children | + let rho = concatRange rho0 (rangeOfToken tok) ] + scan _ _ = [] + + -- utilities + rangeOfToken tok = makeRange $ inputToken input ? tok + + zeroRange = makeRange $ map (\i -> (i,i)) $ range $ inputBounds input + + nextLin name [] found children = PItem (name, found, children) + nextLin name (lin : tofind) found children + = Item name tofind zeroRange lin found children + +lookupLbl a = map snd . filter (\b -> a == fst b) . snd +updateChild nr children found = updateIndex nr children $ + \child -> if null (snd child) + then [ (fst child, found) ] + else [ child | snd child == found ] + +insertRow lbl rho [] = [(lbl, rho)] +insertRow lbl rho rows'@(row@(lbl', rho') : rows) + = case compare lbl lbl' of + LT -> row : insertRow lbl rho rows + GT -> (lbl, rho) : rows' + EQ -> (lbl, unionRange rho rho') : rows + + +-- internal representation of parse items + +data Item n c l t + = Item n [Lin c l t] -- tofind + Range (Lin c l t) -- current row + (MEdge c l) -- found rows + [MEdge c l] -- found children + | PItem (n, MEdge c l, [MEdge c l]) + deriving (Eq, Ord, Show) + +data IKey c = Passive c | Active c | AnyItem + deriving (Eq, Ord, Show) + +keyof (PItem (_, (cat, _), _)) = Passive cat +keyof (Item _ _ _ (Lin _ (Cat(cat,_,_):_)) _ _) = Active cat +keyof _ = AnyItem + + +-- tracing + +--type TraceItem = Item String String Char String +traceItems :: (Print n, Print l, Print c, Print t) => + String -> [Item n c l t] -> [Item n c l t] -> [Item n c l t] +traceItems rule trigs items + | null items || True = items + | otherwise = trace ("\n" ++ rule ++ ":" ++ + unlines [ "\t" ++ prt i | i <- trigs ] ++ "=>" ++ + unlines [ "\t" ++ prt i | i <- items ]) items + +-- pretty-printing + +instance (Print n, Print c, Print l, Print t) => Print (Item n c l t) where + prt (Item name tofind rho lin (cat, found) children) + = prt name ++ ". " ++ prt cat ++ prtRhs (map fst children) ++ + " { " ++ prt rho ++ prt lin ++ " ; " ++ + concat [ prt lbl ++ "=" ++ prt ln ++ " " | + Lin lbl ln <- tofind ] ++ "; " ++ + concat [ prt lbl ++ "=" ++ prt rho ++ " " | + (lbl, rho) <- found ] ++ "} " ++ + concat [ "[ " ++ concat [ prt lbl ++ "=" ++ prt rho ++ " " | + (lbl,rho) <- child ] ++ "] " | + child <- map snd children ] + prt (PItem (name, edge, edges)) + = prt name ++ ". " ++ prt edge ++ prtRhs edges + +prtRhs [] = "" +prtRhs rhs = " -> " ++ prtSep " " rhs + diff --git a/src/GF/Parsing/ParseCF.hs b/src/GF/Parsing/ParseCF.hs new file mode 100644 index 000000000..20f45e3f2 --- /dev/null +++ b/src/GF/Parsing/ParseCF.hs @@ -0,0 +1,82 @@ +---------------------------------------------------------------------- +-- | +-- Module : ParseCF +-- Maintainer : PL +-- Stability : (stable) +-- Portability : (portable) +-- +-- > CVS $Date: 2005/03/21 14:17:42 $ +-- > CVS $Author: peb $ +-- > CVS $Revision: 1.1 $ +-- +-- Chart parsing of grammars in CF format +----------------------------------------------------------------------------- + +module GF.Parsing.ParseCF (parse, alternatives) where + +import Tracing +import GF.Printing.PrintParser +import GF.Printing.PrintSimplifiedTerm + +import GF.Data.SortedList (nubsort) +import GF.Data.Assoc +import qualified CF +import qualified CFIdent as CFI +import GF.Parsing.Parser +import GF.Conversion.CFGrammar +import qualified GF.Parsing.ParseCFG as P + +type Token = CFI.CFTok +type Name = CFI.CFFun +type Category = CFI.CFCat + +alternatives :: [(String, [String])] +alternatives = [ ("gb", ["G","GB","_gen","_genBU"]), + ("gt", ["GT","_genTD"]), + ("ibn", ["","I","B","IB","IBN","_inc","BU","_incBU"]), + ("ibb", ["BB","IBB","BU_BUF","_incBU_BUF"]), + ("ibt", ["BT","IBT","BU_TDF","_incBU_TDF"]), + ("iba", ["BA","IBA","BU_BTF","BU_TBF","_incBU_BTF","_incBU_TBF"]), + ("itn", ["T","IT","ITN","TD","_incTD"]), + ("itb", ["TB","ITB","TD_BUF","_incTD_BUF"]) + ] + +parse :: String -> CF.CF -> Category -> CF.CFParser +parse = buildParser . P.parse + +buildParser :: CFParser Name Category Token -> CF.CF -> Category -> CF.CFParser +buildParser parser cf start tokens = trace "ParseCF" $ + (parseResults, parseInformation) + where parseInformation = prtSep "\n" trees + parseResults = {-take maxTake-} [ (tree2cfTree t, []) | t <- trees ] + theInput = input tokens + edges = tracePrt "#edges" (prt.length) $ + parser pInf [start] theInput + chart = tracePrt "#chart" (prt . map (length.snd) . aAssocs) $ + edges2chart theInput $ map (fmap addCategory) edges + forests = tracePrt "#forests" (prt.length) $ + chart2forests chart (const False) $ + uncurry Edge (inputBounds theInput) start + trees = tracePrt "#trees" (prt.length) $ + concatMap forest2trees forests + pInf = pInfo $ cf2grammar cf (nubsort tokens) + + +addCategory (Rule cat rhs name) = Rule cat rhs (name, cat) + +tree2cfTree (TNode (name, cat) trees) = CF.CFTree (name, (cat, map tree2cfTree trees)) + +cf2grammar :: CF.CF -> [Token] -> Grammar Name Category Token +cf2grammar cf tokens = [ Rule cat rhs name | + (name, (cat, rhs0)) <- cfRules, + rhs <- mapM item2symbol rhs0 ] + where cfRules = concatMap (CF.predefRules (CF.predefOfCF cf)) tokens ++ + CF.rulesOfCF cf + item2symbol (CF.CFNonterm cat) = [Cat cat] + item2symbol item = map Tok $ filter (CF.matchCFTerm item) tokens + +-- maxTake :: Int +-- maxTake = 500 +-- maxTake = maxBound + + diff --git a/src/GF/Parsing/ParseCFG.hs b/src/GF/Parsing/ParseCFG.hs new file mode 100644 index 000000000..1005d5656 --- /dev/null +++ b/src/GF/Parsing/ParseCFG.hs @@ -0,0 +1,43 @@ +---------------------------------------------------------------------- +-- | +-- Module : ParseCFG +-- Maintainer : PL +-- Stability : (stable) +-- Portability : (portable) +-- +-- > CVS $Date: 2005/03/21 14:17:42 $ +-- > CVS $Author: peb $ +-- > CVS $Revision: 1.1 $ +-- +-- Main parsing module for context-free grammars +----------------------------------------------------------------------------- + + +module GF.Parsing.ParseCFG (parse) where + +import Char (toLower) +import GF.Parsing.Parser +import GF.Conversion.CFGrammar +import qualified GF.Parsing.CFParserGeneral as PGen +import qualified GF.Parsing.CFParserIncremental as PInc + + +parse :: (Ord n, Ord c, Ord t, Show t) => + String -> CFParser n c t +parse = decodeParser . map toLower + +decodeParser ['g',s] = PGen.parse (decodeStrategy s) +decodeParser ['i',s,f] = PInc.parse (decodeStrategy s, decodeFilter f) +decodeParser _ = decodeParser "ibn" + +decodeStrategy 'b' = (True, False) +decodeStrategy 't' = (False, True) + +decodeFilter 'a' = (True, True) +decodeFilter 'b' = (True, False) +decodeFilter 't' = (False, True) +decodeFilter 'n' = (False, False) + + + + diff --git a/src/GF/Parsing/ParseGFC.hs b/src/GF/Parsing/ParseGFC.hs new file mode 100644 index 000000000..0d0d5c662 --- /dev/null +++ b/src/GF/Parsing/ParseGFC.hs @@ -0,0 +1,177 @@ +---------------------------------------------------------------------- +-- | +-- Module : ParseGFC +-- Maintainer : PL +-- Stability : (stable) +-- Portability : (portable) +-- +-- > CVS $Date: 2005/03/21 14:17:43 $ +-- > CVS $Author: peb $ +-- > CVS $Revision: 1.1 $ +-- +-- The main parsing module, parsing GFC grammars +-- by translating to simpler formats, such as PMCFG and CFG +---------------------------------------------------------------------- + +module GF.Parsing.ParseGFC (newParser) where + +import Tracing +import GF.Printing.PrintParser +import qualified PrGrammar + +-- Haskell modules +import Monad +-- import Ratio ((%)) +-- GF modules +import qualified Grammar as GF +import Values +import qualified Macros +import qualified Modules as Mods +import qualified AbsGFC +import qualified Ident +import qualified ShellState as SS +import Operations +import GF.Data.SortedList +-- Conversion and parser modules +import GF.Data.Assoc +import GF.Parsing.Parser +-- import ConvertGrammar +import GF.Conversion.GrammarTypes +import qualified GF.Conversion.MCFGrammar as M +import qualified GF.Conversion.CFGrammar as C +import qualified GF.Parsing.ParseMCFG as PM +import qualified GF.Parsing.ParseCFG as PC +--import MCFRange + +newParser :: String -> SS.StateGrammar -> GF.Cat -> String -> Err [GF.Term] + +-- parsing via MCFG +newParser (m:strategy) gr (_, startCat) inString + | m=='m' || m=='M' = trace2 "Parser" "MCFG" $ Ok terms + where terms = map (ptree2term abstract) trees + trees = --tracePrt "trees" (prtBefore "\n") $ + tracePrt "#trees" (prt . length) $ + concatMap forest2trees forests + forests = --tracePrt "forests" (prtBefore "\n") $ + tracePrt "#forests" (prt . length) $ + concatMap (chart2forests chart isMeta) finalEdges + isMeta = null . snd + finalEdges = tracePrt "finalEdges" (prtBefore "\n") $ + filter isFinalEdge $ aElems chart +-- nubsort [ (cat, [(lbl, E.makeRange [(i,j)])]) | +-- let (i, j) = inputBounds inTokens, +-- E.Rule cat _ [E.Lin lbl _] _ <- pInf, +-- isStartCat cat ] + isFinalEdge (cat, rows) + = isStartCat cat && + inputBounds inTokens `elem` concat [ rho | (_, M.Rng rho) <- rows ] + chart = --tracePrt "chart" (prtBefore "\n" . aAssocs) $ + tracePrt "#chart" (prt . map (length.snd) . aAssocs) $ + PM.parse strategy pInf starters inTokens + inTokens = input $ map AbsGFC.KS $ words inString + pInf = -- tracePrt "avg rec" (\gr -> show (sum [ length rec | E.Rule _ _ rec _ <- gr ] % length gr)) $ + mcfPInfo $ SS.statePInfo gr + starters = tracePrt "startCats" prt $ + filter isStartCat $ nubsort [ cat | M.Rule cat _ _ _ <- pInf ] + isStartCat (MCFCat cat _) = cat == startCat + abstract = tracePrt "abstract module" PrGrammar.prt $ + SS.absId gr + +-- parsing via CFG +newParser (c:strategy) gr (_, startCat) inString + | c=='c' || c=='C' = trace2 "Parser" "CFG" $ Ok terms + where terms = -- tracePrt "terms" (unlines . map PrGrammar.prt) $ + map (ptree2term abstract) trees + trees = tracePrt "#trees" (prt . length) $ + --tracePrt "trees" (prtSep "\n") $ + concatMap forest2trees forests + forests = tracePrt "$cfForests" (prt) $ -- . length) $ + tracePrt "forests" (unlines . map prt) $ + concatMap convertFromCFForest cfForests + cfForests= tracePrt "cfForests" (unlines . map prt) $ + concatMap (chart2forests chart (const False)) finalEdges + finalEdges = tracePrt "finalChartEdges" prt $ + map (uncurry Edge (inputBounds inTokens)) starters + chart = --tracePrt "finalChartEdges" (prt . (? finalEdge)) $ + tracePrt "#chart" (prt . map (length.snd) . aAssocs) $ + C.edges2chart inTokens edges + edges = --tracePrt "finalEdges" + --(prt . filter (\(Edge i j _) -> (i,j)==inputBounds inTokens)) $ + tracePrt "#edges" (prt . length) $ + PC.parse strategy pInf starters inTokens + inTokens = input $ map AbsGFC.KS $ words inString + pInf = cfPInfo $ SS.statePInfo gr + starters = tracePrt "startCats" prt $ + filter isStartCat $ map fst $ aAssocs $ C.topdownRules pInf + isStartCat (CFCat (MCFCat cat _) _) = cat == startCat + abstract = tracePrt "abstract module" PrGrammar.prt $ + SS.absId gr + --ifNull (Ident.identC "ABS") last $ + --[i | (i, Mods.ModMod m) <- Mods.modules (SS.grammar gr), Mods.isModAbs m] + +newParser "" gr start inString = newParser "c" gr start inString + +newParser opt gr (_,cat) _ = + Bad ("new-parser '" ++ opt ++ "' not defined yet") + +ptree2term :: Ident.Ident -> ParseTree Name -> GF.Term +ptree2term a (TNode f ts) = Macros.mkApp (Macros.qq (a,f)) (map (ptree2term a) ts) +ptree2term a (TMeta) = GF.Meta (GF.MetaSymb 0) + +---------------------------------------------------------------------- +-- conversion and unification of forests + +convertFromCFForest :: ParseForest CFName -> [ParseForest Name] +convertFromCFForest (FNode (CFName name profile) children) + | isCoercion name = concat chForests + | otherwise = [ FNode name chForests | not (null chForests) ] + where chForests = concat [ mapM (checkProfile forests) profile | + forests0 <- children, + forests <- mapM convertFromCFForest forests0 ] + checkProfile forests = unifyManyForests . map (forests !!) + -- foldM unifyForests FMeta . map (forests !!) + +isCoercion Ident.IW = True +isCoercion _ = False + +unifyManyForests :: Eq n => [ParseForest n] -> [ParseForest n] +unifyManyForests [] = [FMeta] +unifyManyForests [f] = [f] +unifyManyForests (f:g:fs) = do h <- unifyForests f g + unifyManyForests (h:fs) + +unifyForests :: Eq n => ParseForest n -> ParseForest n -> [ParseForest n] +unifyForests FMeta forest = [forest] +unifyForests forest FMeta = [forest] +unifyForests (FNode name1 children1) (FNode name2 children2) + = [ FNode name1 children | name1 == name2, not (null children) ] + where children = [ forests | forests1 <- children1, forests2 <- children2, + forests <- zipWithM unifyForests forests1 forests2 ] + + + +{- +---------------------------------------------------------------------- +-- conversion and unification for parse trees instead of forests + +convertFromCFTree :: ParseTree CFName -> [ParseTree Name] +convertFromCFTree (TNode (CFName name profile) children0) + = [ TNode name children | + children1 <- mapM convertFromCFTree children0, + children <- mapM (checkProfile children1) profile ] + where checkProfile trees = unifyManyTrees . map (trees !!) + +unifyManyTrees :: Eq n => [ParseTree n] -> [ParseTree n] +unifyManyTrees [] = [TMeta] +unifyManyTrees [f] = [f] +unifyManyTrees (f:g:fs) = do h <- unifyTrees f g + unifyManyTrees (h:fs) + +unifyTrees TMeta tree = [tree] +unifyTrees tree TMeta = [tree] +unifyTrees (TNode name1 children1) (TNode name2 children2) + = [ TNode name1 children | name1 == name2, + children <- zipWithM unifyTrees children1 children2 ] + +-} + diff --git a/src/GF/Parsing/ParseMCFG.hs b/src/GF/Parsing/ParseMCFG.hs new file mode 100644 index 000000000..4afc44bb7 --- /dev/null +++ b/src/GF/Parsing/ParseMCFG.hs @@ -0,0 +1,37 @@ +---------------------------------------------------------------------- +-- | +-- Module : ParseMCFG +-- Maintainer : PL +-- Stability : (stable) +-- Portability : (portable) +-- +-- > CVS $Date: 2005/03/21 14:17:43 $ +-- > CVS $Author: peb $ +-- > CVS $Revision: 1.1 $ +-- +-- Main module for MCFG parsing +----------------------------------------------------------------------------- + + +module GF.Parsing.ParseMCFG (parse) where + +import Char (toLower) +import GF.Parsing.Parser +import GF.Conversion.MCFGrammar +import qualified GF.Parsing.MCFParserBasic as PBas +import GF.Printing.PrintParser +---- import qualified MCFParserBasic2 as PBas2 -- file not found AR + + +parse :: (Ord n, Ord c, Ord l, Ord t, + Print n, Print c, Print l, Print t) => + String -> MCFParser n c l t +parse str = decodeParser (map toLower str) + +decodeParser "b" = PBas.parse +---- decodeParser "c" = PBas2.parse +decodeParser _ = decodeParser "c" + + + + diff --git a/src/GF/Parsing/Parser.hs b/src/GF/Parsing/Parser.hs new file mode 100644 index 000000000..0c18514f9 --- /dev/null +++ b/src/GF/Parsing/Parser.hs @@ -0,0 +1,187 @@ +---------------------------------------------------------------------- +-- | +-- Module : Parser +-- Maintainer : PL +-- Stability : (stable) +-- Portability : (portable) +-- +-- > CVS $Date: 2005/03/21 14:17:43 $ +-- > CVS $Author: peb $ +-- > CVS $Revision: 1.1 $ +-- +-- Basic type declarations and functions to be used when parsing +----------------------------------------------------------------------------- + + +module GF.Parsing.Parser ( -- * Symbols + Symbol(..), symbol, mapSymbol, + -- * Edges + Edge(..), + -- * Parser input + Input(..), makeInput, input, inputMany, + -- * charts, parse forests & trees + ParseChart, ParseForest(..), ParseTree(..), + chart2forests, forest2trees + ) where + +-- haskell modules: +import Monad +import Array +-- gf modules: +import GF.Data.SortedList +import GF.Data.Assoc +-- parsing modules: +import GF.Printing.PrintParser + +------------------------------------------------------------ +-- symbols + +data Symbol c t = Cat c | Tok t + deriving (Eq, Ord, Show) + +symbol :: (c -> a) -> (t -> a) -> Symbol c t -> a +mapSymbol :: (c -> d) -> (t -> u) -> Symbol c t -> Symbol d u + +---------- + +symbol fc ft (Cat cat) = fc cat +symbol fc ft (Tok tok) = ft tok + +mapSymbol fc ft = symbol (Cat . fc) (Tok . ft) + + +------------------------------------------------------------ +-- edges + +data Edge s = Edge Int Int s + deriving (Eq, Ord, Show) + +instance Functor Edge where + fmap f (Edge i j s) = Edge i j (f s) + + +------------------------------------------------------------ +-- parser input + +data Input t = MkInput { inputEdges :: [Edge t], + inputBounds :: (Int, Int), + inputFrom :: Array Int (Assoc t [Int]), + inputTo :: Array Int (Assoc t [Int]), + inputToken :: Assoc t [(Int, Int)] + } + +makeInput :: Ord t => [Edge t] -> Input t +input :: Ord t => [t] -> Input t +inputMany :: Ord t => [[t]] -> Input t + +---------- + +makeInput inEdges | null inEdges = input [] + | otherwise = MkInput inEdges inBounds inFrom inTo inToken + where inBounds = foldr1 minmax [ (i, j) | Edge i j _ <- inEdges ] + where minmax (a, b) (a', b') = (min a a', max b b') + inFrom = fmap (accumAssoc id) $ accumArray (<++>) [] inBounds $ + [ (i, [(tok, j)]) | Edge i j tok <- inEdges ] + inTo = fmap (accumAssoc id) $ accumArray (<++>) [] inBounds + [ (j, [(tok, i)]) | Edge i j tok <- inEdges ] + inToken = accumAssoc id [ (tok, (i, j)) | Edge i j tok <- inEdges ] + +input toks = MkInput inEdges inBounds inFrom inTo inToken + where inEdges = zipWith3 Edge [0..] [1..] toks + inBounds = (0, length toks) + inFrom = listArray inBounds $ + [ listAssoc [(tok, [j])] | (tok, j) <- zip toks [1..] ] ++ [ listAssoc [] ] + inTo = listArray inBounds $ + [ listAssoc [] ] ++ [ listAssoc [(tok, [i])] | (tok, i) <- zip toks [0..] ] + inToken = accumAssoc id [ (tok, (i, j)) | Edge i j tok <- inEdges ] + +inputMany toks = MkInput inEdges inBounds inFrom inTo inToken + where inEdges = [ Edge i j t | (i, j, ts) <- zip3 [0..] [1..] toks, t <- ts ] + inBounds = (0, length toks) + inFrom = listArray inBounds $ + [ listAssoc [ (t, [j]) | t <- nubsort ts ] | (ts, j) <- zip toks [1..] ] + ++ [ listAssoc [] ] + inTo = listArray inBounds $ + [ listAssoc [] ] ++ + [ listAssoc [ (t, [i]) | t <- nubsort ts ] | (ts, i) <- zip toks [0..] ] + inToken = accumAssoc id [ (tok, (i, j)) | Edge i j tok <- inEdges ] + + +------------------------------------------------------------ +-- charts, parse forests & trees + +type ParseChart n e = Assoc e [(n, [[e]])] + +data ParseForest n = FNode n [[ParseForest n]] | FMeta + deriving (Eq, Ord, Show) + +data ParseTree n = TNode n [ParseTree n] | TMeta + deriving (Eq, Ord, Show) + +chart2forests :: Ord e => ParseChart n e -> (e -> Bool) -> e -> [ParseForest n] + +--filterCoercions :: (n -> Bool) -> ParseForest n -> [ParseForest n] + +forest2trees :: ParseForest n -> [ParseTree n] + +instance Functor ParseTree where + fmap f (TNode n trees) = TNode (f n) $ map (fmap f) trees + fmap f (TMeta) = TMeta + +instance Functor ParseForest where + fmap f (FNode n forests) = FNode (f n) $ map (map (fmap f)) forests + fmap f (FMeta) = FMeta + +---------- + +chart2forests chart isMeta = edge2forests + where item2forest (name, children) = FNode name $ + do edges <- children + mapM edge2forests edges + edge2forests edge + | isMeta edge = [FMeta] + | otherwise = filter checkForest $ map item2forest $ chart ? edge + checkForest (FNode _ children) = not (null children) + +-- filterCoercions _ (FMeta) = [FMeta] +-- filterCoercions isCoercion (FNode s forests) +-- | isCoercion s = do [forest] <- forests ; filterCoercions isCoercion forest +-- | otherwise = FNode s $ do children <- forests ; mapM (filterCoercions isCoercion) + +forest2trees (FNode s forests) = map (TNode s) $ forests >>= mapM forest2trees +forest2trees (FMeta) = [TMeta] + + + +------------------------------------------------------------ +-- pretty-printing + +instance (Print c, Print t) => Print (Symbol c t) where + prt = symbol prt (simpleShow.prt) + prtList = prtSep " " + +simpleShow :: String -> String +simpleShow s = "\"" ++ concatMap mkEsc s ++ "\"" + where + mkEsc :: Char -> String + mkEsc c = case c of + _ | elem c "\\\"" -> '\\' : [c] + '\n' -> "\\n" + '\t' -> "\\t" + _ -> [c] + +instance (Print s) => Print (Edge s) where + prt (Edge i j s) = "[" ++ show i ++ "-" ++ show j ++ ": " ++ prt s ++ "]" + prtList = prtSep "" + +instance (Print s) => Print (ParseTree s) where + prt (TNode s trees) = prt s ++ "^{" ++ prtSep " " trees ++ "}" + prt (TMeta) = "?" + prtList = prtAfter "\n" + +instance (Print s) => Print (ParseForest s) where + prt (FNode s forests) = prt s ++ "^{" ++ prtSep " | " (map (prtSep " ") forests) ++ "}" + prt (FMeta) = "?" + prtList = prtAfter "\n" + + diff --git a/src/GF/Printing/PrintParser.hs b/src/GF/Printing/PrintParser.hs new file mode 100644 index 000000000..3971f0a40 --- /dev/null +++ b/src/GF/Printing/PrintParser.hs @@ -0,0 +1,79 @@ +---------------------------------------------------------------------- +-- | +-- Module : PrintParser +-- Maintainer : PL +-- Stability : (stable) +-- Portability : (portable) +-- +-- > CVS $Date: 2005/03/21 14:17:44 $ +-- > CVS $Author: peb $ +-- > CVS $Revision: 1.1 $ +-- +-- Pretty-printing of parser objects +----------------------------------------------------------------------------- + +module GF.Printing.PrintParser (Print(..), + prtBefore, prtAfter, prtSep, + prtBeforeAfter, + prIO + ) where + +-- haskell modules: +import List (intersperse) +-- gf modules: +import Operations (Err(..)) +import Ident (Ident(..)) +import qualified PrintGFC as P + +------------------------------------------------------------ + +prtBefore :: Print a => String -> [a] -> String +prtBefore before = prtBeforeAfter before "" + +prtAfter :: Print a => String -> [a] -> String +prtAfter after = prtBeforeAfter "" after + +prtSep :: Print a => String -> [a] -> String +prtSep sep = concat . intersperse sep . map prt + +prtBeforeAfter :: Print a => String -> String -> [a] -> String +prtBeforeAfter before after as = concat [ before ++ prt a ++ after | a <- as ] + +prIO :: Print a => a -> IO () +prIO = putStr . prt + +class Print a where + prt :: a -> String + prtList :: [a] -> String + prtList as = "[" ++ prtSep "," as ++ "]" + +instance Print a => Print [a] where + prt = prtList + +instance (Print a, Print b) => Print (a, b) where + prt (a, b) = "(" ++ prt a ++ "," ++ prt b ++ ")" + +instance (Print a, Print b, Print c) => Print (a, b, c) where + prt (a, b, c) = "(" ++ prt a ++ "," ++ prt b ++ "," ++ prt c ++ ")" + +instance (Print a, Print b, Print c, Print d) => Print (a, b, c, d) where + prt (a, b, c, d) = "(" ++ prt a ++ "," ++ prt b ++ "," ++ prt c ++ "," ++ prt d ++ ")" + +instance Print Char where + prt = return + prtList = id + +instance Print Int where + prt = show + +instance Print Integer where + prt = show + +instance Print a => Print (Err a) where + prt (Ok a) = prt a + prt (Bad str) = str + +instance Print Ident where + prt ident = str + where str = P.printTree ident + diff --git a/src/GF/Printing/PrintSimplifiedTerm.hs b/src/GF/Printing/PrintSimplifiedTerm.hs new file mode 100644 index 000000000..9425f6f4d --- /dev/null +++ b/src/GF/Printing/PrintSimplifiedTerm.hs @@ -0,0 +1,122 @@ +---------------------------------------------------------------------- +-- | +-- Module : PrintSimplifiedTerm +-- Maintainer : PL +-- Stability : (stable) +-- Portability : (portable) +-- +-- > CVS $Date: 2005/03/21 14:17:44 $ +-- > CVS $Author: peb $ +-- > CVS $Revision: 1.1 $ +-- +-- Instances for printing terms in a simplified format +----------------------------------------------------------------------------- + + +module GF.Printing.PrintSimplifiedTerm () where + +import AbsGFC +import CF +import CFIdent +import GF.Printing.PrintParser + +instance Print Term where + prt (Arg arg) = prt arg + prt (con `Con` []) = prt con + prt (con `Con` terms) = prt con ++ "(" ++ prtSep ", " terms ++ ")" + prt (LI ident) = prt ident + prt (R record) = "{" ++ prtSep ";" record ++ "}" + prt (term `P` lbl) = prt term ++ "." ++ prt lbl + prt (T _ table) = "table{" ++ prtSep ";" table ++ "}" + prt (term `S` sel) = prt term ++ "!" ++ prt sel + prt (FV terms) = "variants{" ++ prtSep "|" terms ++ "}" + prt (term `C` term') = prt term ++ " " ++ prt term' + prt (K tokn) = show (prt tokn) + prt (E) = show "" + +instance Print Patt where + prt (con `PC` []) = prt con + prt (con `PC` pats) = prt con ++ "(" ++ prtSep "," pats ++ ")" + prt (PV ident) = prt ident + prt (PW) = "_" + prt (PR record) = "{" ++ prtSep ";" record ++ "}" + +instance Print Label where + prt (L ident) = prt ident + prt (LV nr) = "$" ++ show nr + +instance Print Tokn where + prt (KS str) = str + prt tokn@(KP _ _) = show tokn + +instance Print ArgVar where + prt (A cat argNr) = prt cat ++ "#" ++ show argNr + +instance Print CIdent where + prt (CIQ _ ident) = prt ident + +instance Print Case where + prt (pats `Cas` term) = prtSep "|" pats ++ "=>" ++ prt term + +instance Print Assign where + prt (lbl `Ass` term) = prt lbl ++ "=" ++ prt term + +instance Print PattAssign where + prt (lbl `PAss` pat) = prt lbl ++ "=" ++ prt pat + +instance Print Atom where + prt (AC c) = prt c + prt (AD c) = "<" ++ prt c ++ ">" + prt (AV i) = "$" ++ prt i + prt (AM n) = "?" ++ show n + prt (AS s) = show s + prt (AI n) = show n + prt (AT s) = show s + +instance Print CType where + prt (RecType rtype) = "{" ++ prtSep ";" rtype ++ "}" + prt (Table ptype vtype) = "(" ++ prt ptype ++ "=>" ++ prt vtype ++ ")" + prt (Cn cn) = prt cn + prt (TStr) = "Str" + +instance Print Labelling where + prt (lbl `Lbg` ctype) = prt lbl ++ ":" ++ prt ctype + +instance Print CFItem where + prt (CFTerm regexp) = prt regexp + prt (CFNonterm cat) = prt cat + +instance Print RegExp where + prt (RegAlts words) = "("++prtSep "|" words ++ ")" + prt (RegSpec tok) = prt tok + +instance Print CFTok where + prt (TS str) = str + prt tok = show tok + +instance Print CFCat where + prt (CFCat (cid,lbl)) = prt cid ++ "-" ++ prt lbl + +instance Print CFFun where + prt (CFFun fun) = prt (fst fun) + +sizeCT :: CType -> Int +sizeCT (RecType rt) = 1 + sum [ sizeCT t | _ `Lbg` t <- rt ] +sizeCT (Table pt vt) = 1 + sizeCT pt + sizeCT vt +sizeCT (Cn cn) = 1 +sizeCT (TStr) = 1 + +sizeT :: Term -> Int +sizeT (_ `Con` ts) = 2 + sum (map sizeT ts) +sizeT (R rec) = 1 + sum [ sizeT t | _ `Ass` t <- rec ] +sizeT (t `P` _) = 1 + sizeT t +sizeT (T _ tbl) = 1 + sum [ sum (map sizeP ps) + sizeT t | ps `Cas` t <- tbl ] +sizeT (t `S` s) = 1 + sizeT t + sizeT s +sizeT (t `C` t') = 1 + sizeT t + sizeT t' +sizeT (FV ts) = 1 + sum (map sizeT ts) +sizeT _ = 1 + +sizeP :: Patt -> Int +sizeP (con `PC` pats) = 2 + sum (map sizeP pats) +sizeP (PR record) = 1 + sum [ sizeP p | _ `PAss` p <- record ] +sizeP _ = 1 diff --git a/src/GF/Speech/PrGSL.hs b/src/GF/Speech/PrGSL.hs index f69bd0956..024fc9f31 100644 --- a/src/GF/Speech/PrGSL.hs +++ b/src/GF/Speech/PrGSL.hs @@ -5,9 +5,9 @@ -- Stability : (stable) -- Portability : (portable) -- --- > CVS $Date: 2005/03/21 13:54:44 $ +-- > CVS $Date: 2005/03/21 14:17:44 $ -- > CVS $Author: peb $ --- > CVS $Revision: 1.12 $ +-- > CVS $Revision: 1.13 $ -- -- This module prints a CFG as a Nuance GSL 2.0 grammar. -- @@ -22,7 +22,7 @@ import Ident import GF.Conversion.CFGrammar import GF.Parsing.Parser (Symbol(..)) import GF.Conversion.GrammarTypes -import GF.Parsing.PrintParser +import GF.Printing.PrintParser import Option import Data.Char (toUpper,toLower) diff --git a/src/GF/Speech/PrJSGF.hs b/src/GF/Speech/PrJSGF.hs index b8f36fed1..ade23da91 100644 --- a/src/GF/Speech/PrJSGF.hs +++ b/src/GF/Speech/PrJSGF.hs @@ -5,9 +5,9 @@ -- Stability : (stable) -- Portability : (portable) -- --- > CVS $Date: 2005/03/21 13:54:44 $ +-- > CVS $Date: 2005/03/21 14:17:44 $ -- > CVS $Author: peb $ --- > CVS $Revision: 1.6 $ +-- > CVS $Revision: 1.7 $ -- -- This module prints a CFG as a JSGF grammar. -- @@ -24,7 +24,7 @@ import Ident import GF.Conversion.CFGrammar import GF.Parsing.Parser (Symbol(..)) import GF.Conversion.GrammarTypes -import GF.Parsing.PrintParser +import GF.Printing.PrintParser import Option jsgfPrinter :: Ident -- ^ Grammar name diff --git a/src/GF/Speech/SRG.hs b/src/GF/Speech/SRG.hs index ad2239202..c2f8fc33c 100644 --- a/src/GF/Speech/SRG.hs +++ b/src/GF/Speech/SRG.hs @@ -5,9 +5,9 @@ -- Stability : (stable) -- Portability : (portable) -- --- > CVS $Date: 2005/03/21 13:54:44 $ +-- > CVS $Date: 2005/03/21 14:17:44 $ -- > CVS $Author: peb $ --- > CVS $Revision: 1.8 $ +-- > CVS $Revision: 1.9 $ -- -- Representation of, conversion to, and utilities for -- printing of a general Speech Recognition Grammar. @@ -24,7 +24,7 @@ import Ident import GF.Conversion.CFGrammar import GF.Parsing.Parser (Symbol(..)) import GF.Conversion.GrammarTypes -import GF.Parsing.PrintParser +import GF.Printing.PrintParser import TransformCFG import Option diff --git a/src/GF/Speech/TransformCFG.hs b/src/GF/Speech/TransformCFG.hs index ff804da11..6c6f5091b 100644 --- a/src/GF/Speech/TransformCFG.hs +++ b/src/GF/Speech/TransformCFG.hs @@ -5,9 +5,9 @@ -- Stability : (stable) -- Portability : (portable) -- --- > CVS $Date: 2005/03/21 13:54:44 $ +-- > CVS $Date: 2005/03/21 14:17:45 $ -- > CVS $Author: peb $ --- > CVS $Revision: 1.6 $ +-- > CVS $Revision: 1.7 $ -- -- This module does some useful transformations on CFGs. -- @@ -20,7 +20,7 @@ import Ident import GF.Conversion.CFGrammar import GF.Parsing.Parser (Symbol(..)) import GF.Conversion.GrammarTypes -import GF.Parsing.PrintParser +import GF.Printing.PrintParser import Data.FiniteMap import Data.List diff --git a/src/GF/UseGrammar/Custom.hs b/src/GF/UseGrammar/Custom.hs index c9eac9c11..9c7c9e15e 100644 --- a/src/GF/UseGrammar/Custom.hs +++ b/src/GF/UseGrammar/Custom.hs @@ -5,9 +5,9 @@ -- Stability : (stable) -- Portability : (portable) -- --- > CVS $Date: 2005/03/21 13:54:44 $ +-- > CVS $Date: 2005/03/21 14:17:45 $ -- > CVS $Author: peb $ --- > CVS $Revision: 1.45 $ +-- > CVS $Revision: 1.46 $ -- -- A database for customizable GF shell commands. -- @@ -74,7 +74,7 @@ import qualified GF.Parsing.ParseCF as PCF -- grammar conversions -- peb 19/4-04 -- see also customGrammarPrinter import qualified GF.Conversion.ConvertGrammar as Cnv -import qualified GF.Parsing.PrintParser as Prt +import qualified GF.Printing.PrintParser as Prt import GFC import qualified MkGFC as MC