From 95c6e8a58f36331d56aae6dde021269b9ab5a358 Mon Sep 17 00:00:00 2001 From: peb Date: Thu, 14 Apr 2005 17:38:36 +0000 Subject: [PATCH] "Committed_by_peb" --- src/GF/CFGM/PrintCFGrammar.hs | 58 +++++++++++-------- src/GF/Conversion/GFC.hs | 24 +++++++- src/GF/Conversion/RemoveSingletons.hs | 82 +++++++++++++++++++++++++++ src/GF/Parsing/CF.hs | 65 +++++++++++++++++++++ src/GF/Parsing/CFG.hs | 6 +- src/GF/Speech/PrGSL.hs | 22 ++++--- src/GF/Speech/PrJSGF.hs | 20 ++++--- src/GF/Speech/SRG.hs | 28 +++++---- src/GF/Speech/TransformCFG.hs | 58 +++++++++++-------- src/GF/UseGrammar/Custom.hs | 30 ++++++---- src/GF/UseGrammar/Parsing.hs | 6 +- 11 files changed, 302 insertions(+), 97 deletions(-) create mode 100644 src/GF/Conversion/RemoveSingletons.hs create mode 100644 src/GF/Parsing/CF.hs diff --git a/src/GF/CFGM/PrintCFGrammar.hs b/src/GF/CFGM/PrintCFGrammar.hs index f4c01b39a..bb213e32b 100644 --- a/src/GF/CFGM/PrintCFGrammar.hs +++ b/src/GF/CFGM/PrintCFGrammar.hs @@ -5,9 +5,9 @@ -- Stability : (stable) -- Portability : (portable) -- --- > CVS $Date: 2005/04/11 13:53:38 $ +-- > CVS $Date: 2005/04/14 18:38:36 $ -- > CVS $Author: peb $ --- > CVS $Revision: 1.13 $ +-- > CVS $Revision: 1.14 $ -- -- Handles printing a CFGrammar in CFGM format. ----------------------------------------------------------------------------- @@ -19,12 +19,20 @@ import qualified PrintCFG import Ident import GFC import Modules -import qualified GF.OldParsing.ConvertGrammar as Cnv -import qualified GF.Printing.PrintParser as Prt -import qualified GF.OldParsing.CFGrammar as CFGrammar -import qualified GF.OldParsing.GrammarTypes as GT + +-- import qualified GF.OldParsing.ConvertGrammar as Cnv +-- import qualified GF.Printing.PrintParser as Prt +-- import qualified GF.OldParsing.CFGrammar as CFGrammar +-- import qualified GF.OldParsing.GrammarTypes as GT +-- import qualified AbsCFG +-- import qualified GF.OldParsing.Utilities as Parser +import qualified GF.Conversion.GFC as Cnv +import GF.Infra.Print (prt) +import GF.Formalism.CFG (CFRule(..)) +import qualified GF.Conversion.Types as GT import qualified AbsCFG -import qualified GF.OldParsing.Utilities as Parser +import GF.Formalism.Utilities (Symbol(..)) + import ErrM import qualified Option @@ -48,8 +56,9 @@ getFlag fs x = listToMaybe [v | Flg (IC k) (IC v) <- fs, k == x] -- | OBS! Should use 'ShellState.statePInfo' or 'ShellState.pInfos' -- instead of 'Cnv.pInfo' (which recalculates the grammar every time) prLangAsCFGM :: CanonGrammar -> Ident -> Maybe String -> String -prLangAsCFGM gr i start = prCFGrammarAsCFGM (Cnv.cfg (Cnv.pInfo opts gr i)) i start - where opts = Option.Opts [Option.gfcConversion "nondet"] +prLangAsCFGM gr i start = prCFGrammarAsCFGM (Cnv.gfc2cfg (gr, i)) i start +-- prLangAsCFGM gr i start = prCFGrammarAsCFGM (Cnv.cfg (Cnv.pInfo opts gr i)) i start +-- where opts = Option.Opts [Option.gfcConversion "nondet"] {- prCFGrammarAsCFGM :: GT.CFGrammar -> Ident -> Maybe String -> String @@ -57,21 +66,21 @@ prCFGrammarAsCFGM gr i@(IC lang) start = (header . startcat . rules . footer) "" where header = showString "grammar " . showString lang . showString "\n" startcat = maybe id (\s -> showString "startcat " . showString (s++"{}.s") . showString ";\n") start - rules0 = map Prt.prt gr + rules0 = map prt gr rules = showString $ concat $ map (\l -> init l++";\n") rules0 footer = showString "end grammar\n" -} -prCFGrammarAsCFGM :: GT.CFGrammar -> Ident -> Maybe String -> String +prCFGrammarAsCFGM :: GT.CGrammar -> Ident -> Maybe String -> String prCFGrammarAsCFGM gr i start = PrintCFG.printTree $ cfGrammarToCFGM gr i start -cfGrammarToCFGM :: GT.CFGrammar -> Ident -> Maybe String -> AbsCFG.Grammar +cfGrammarToCFGM :: GT.CGrammar -> Ident -> Maybe String -> AbsCFG.Grammar cfGrammarToCFGM gr i start = AbsCFG.Grammar (identToCFGMIdent i) flags (map ruleToCFGMRule gr) where flags = maybe [] (\c -> [AbsCFG.StartCat $ strToCFGMCat (c++"{}.s")]) start -ruleToCFGMRule :: GT.CFRule -> AbsCFG.Rule +ruleToCFGMRule :: GT.CRule -> AbsCFG.Rule -- new version, without the MCFName constructor: -ruleToCFGMRule (CFGrammar.Rule c rhs (GT.CFName fun profile)) +ruleToCFGMRule (CFRule c rhs (GT.Name fun profile)) = AbsCFG.Rule fun' p' c' rhs' where fun' = identToFun fun @@ -84,17 +93,20 @@ ruleToCFGMRule (CFGrammar.Rule c rhs (GT.CFName (GT.MCFName fun cat args) lbl pr = AbsCFG.Rule fun' n' p' c' rhs' where fun' = identToCFGMIdent fun - n' = strToCFGMName (Prt.prt cat ++ concat [ "/" ++ Prt.prt arg | arg <- args ] ++ Prt.prt lbl) + n' = strToCFGMName (prt cat ++ concat [ "/" ++ prt arg | arg <- args ] ++ prt lbl) p' = profileToCFGMProfile profile c' = catToCFGMCat c rhs' = map symbolToGFCMSymbol rhs -} -profileToCFGMProfile :: GT.CFProfile -> AbsCFG.Profile -profileToCFGMProfile = AbsCFG.Profile . map (AbsCFG.Ints . map fromIntegral) +profileToCFGMProfile :: [GT.Profile a] -> AbsCFG.Profile +profileToCFGMProfile = AbsCFG.Profile . map cnvProfile + where cnvProfile (GT.Unify ns) = AbsCFG.Ints $ map fromIntegral ns + cnvProfile (GT.Constant a) = AbsCFG.Ints [] + -- this should be replaced with a new constructor in 'AbsCFG' identToCFGMIdent :: Ident -> AbsCFG.Ident -identToCFGMIdent = AbsCFG.Ident . Prt.prt +identToCFGMIdent = AbsCFG.Ident . prt identToFun :: Ident -> AbsCFG.Fun identToFun IW = AbsCFG.Coerce @@ -103,12 +115,12 @@ identToFun i = AbsCFG.Cons (identToCFGMIdent i) strToCFGMCat :: String -> AbsCFG.Category strToCFGMCat = AbsCFG.Category . AbsCFG.SingleQuoteString . quoteSingle -catToCFGMCat :: GT.CFCat -> AbsCFG.Category -catToCFGMCat = strToCFGMCat . Prt.prt +catToCFGMCat :: GT.CCat -> AbsCFG.Category +catToCFGMCat = strToCFGMCat . prt -symbolToGFCMSymbol :: Parser.Symbol GT.CFCat GT.Tokn -> AbsCFG.Symbol -symbolToGFCMSymbol (Parser.Cat c) = AbsCFG.CatS (catToCFGMCat c) -symbolToGFCMSymbol (Parser.Tok t) = AbsCFG.TermS (Prt.prt t) +symbolToGFCMSymbol :: Symbol GT.CCat GT.Token -> AbsCFG.Symbol +symbolToGFCMSymbol (Cat c) = AbsCFG.CatS (catToCFGMCat c) +symbolToGFCMSymbol (Tok t) = AbsCFG.TermS (prt t) quoteSingle :: String -> String quoteSingle s = "'" ++ escapeSingle s ++ "'" diff --git a/src/GF/Conversion/GFC.hs b/src/GF/Conversion/GFC.hs index 21b52d2b1..765fb10e0 100644 --- a/src/GF/Conversion/GFC.hs +++ b/src/GF/Conversion/GFC.hs @@ -4,9 +4,9 @@ -- Stability : (stable) -- Portability : (portable) -- --- > CVS $Date: 2005/04/14 11:42:05 $ +-- > CVS $Date: 2005/04/14 18:38:36 $ -- > CVS $Author: peb $ --- > CVS $Revision: 1.3 $ +-- > CVS $Revision: 1.4 $ -- -- All conversions from GFC ----------------------------------------------------------------------------- @@ -25,6 +25,9 @@ import qualified GF.Conversion.RemoveSingletons as RemSing import qualified GF.Conversion.SimpleToMCFG as S2M import qualified GF.Conversion.MCFGtoCFG as M2C +---------------------------------------------------------------------- +-- * single step conversions + gfc2simple :: (CanonGrammar, Ident) -> SGrammar gfc2simple = G2S.convertGrammar @@ -43,4 +46,21 @@ simple2mcfg_strict = S2M.convertGrammarStrict mcfg2cfg :: MGrammar -> CGrammar mcfg2cfg = M2C.convertGrammar +---------------------------------------------------------------------- +-- * GFC -> MCFG + +-- | default conversion: +-- +-- - instantiating finite dependencies ('removeSingletons . simple2finite') +-- - nondeterministic MCFG conversion ('simple2mcfg_nondet') +gfc2mcfg :: (CanonGrammar, Ident) -> MGrammar +gfc2mcfg = simple2mcfg_nondet . removeSingletons . simple2finite . gfc2simple + +---------------------------------------------------------------------- +-- * GFC -> CFG + +-- | default conversion = default mcfg conversion + trivial cfg conversion +gfc2cfg :: (CanonGrammar, Ident) -> CGrammar +gfc2cfg = mcfg2cfg . gfc2mcfg + diff --git a/src/GF/Conversion/RemoveSingletons.hs b/src/GF/Conversion/RemoveSingletons.hs new file mode 100644 index 000000000..9c5ff274e --- /dev/null +++ b/src/GF/Conversion/RemoveSingletons.hs @@ -0,0 +1,82 @@ +---------------------------------------------------------------------- +-- | +-- Maintainer : PL +-- Stability : (stable) +-- Portability : (portable) +-- +-- > CVS $Date: 2005/04/14 18:41:21 $ +-- > CVS $Author: peb $ +-- > CVS $Revision: 1.1 $ +-- +-- Instantiating all types which only have one single element. +-- +-- Should be merged into 'GF.Conversion.FiniteToSimple' +----------------------------------------------------------------------------- + +module GF.Conversion.RemoveSingletons where + +import GF.System.Tracing +import GF.Infra.Print + +import GF.Formalism.Utilities +import GF.Formalism.GCFG +import GF.Formalism.SimpleGFC +import GF.Conversion.Types + +import GF.Data.SortedList +import GF.Data.Assoc + +import List (mapAccumL) + +convertGrammar :: SGrammar -> SGrammar +convertGrammar grammar = if singles == emptyAssoc then grammar + else tracePrt "#singleton-removed rules" (prt . length) $ + map (convertRule singles) grammar + where singles = calcSingletons grammar + +convertRule :: Assoc SCat (SyntaxForest Fun, Maybe STerm) -> SRule -> SRule +convertRule singles rule@(Rule (Abs _ decls _) _) + = if all (Nothing ==) singleArgs then rule + else instantiateSingles singleArgs rule + where singleArgs = map (lookupAssoc singles . decl2cat) decls + +instantiateSingles :: [Maybe (SyntaxForest Fun, Maybe STerm)] -> SRule -> SRule +instantiateSingles singleArgs (Rule (Abs decl decls (Name fun profile)) (Cnc lcat lcats lterm)) + = Rule (Abs decl decls' (Name fun profile')) (Cnc lcat lcats' lterm') + where (decls', lcats') = unzip [ (d, l) | (Nothing, d, l) <- zip3 singleArgs decls lcats ] + profile' = map (fmap fst) exProfile `composeProfiles` profile + newArgs = map (fmap snd) exProfile + lterm' = fmap (instantiateLin newArgs) lterm + exProfile = snd $ mapAccumL mkProfile 0 singleArgs + mkProfile nr (Just trm) = (nr, Constant trm) + mkProfile nr (Nothing) = (nr+1, Unify [nr]) + +instantiateLin :: [Profile (Maybe STerm)] -> STerm -> STerm +instantiateLin newArgs = inst + where inst (Arg nr cat path) + = case newArgs !! nr of + Unify [nr'] -> Arg nr' cat path + Constant (Just term) -> termFollowPath path term + Constant Nothing -> error "instantiateLin: argument has no linearization" + inst (cn :^ terms) = cn :^ map inst terms + inst (Rec rec) = Rec [ (lbl, inst term) | (lbl, term) <- rec ] + inst (term :. lbl) = inst term +. lbl + inst (Tbl tbl) = Tbl [ (pat, inst term) | (pat, term) <- tbl ] + inst (term :! sel) = inst term +! inst sel + inst (Variants ts) = variants (map inst ts) + inst (t1 :++ t2) = inst t1 ?++ inst t2 + inst term = term + +---------------------------------------------------------------------- + +calcSingletons :: SGrammar -> Assoc SCat (SyntaxForest Fun, Maybe STerm) +calcSingletons rules = listAssoc singleCats + where singleCats = tracePrt "singleton cats" (prtSep " ") $ + [ (cat, (constantNameToForest name, lin)) | + (cat, [([], name, lin)]) <- rulesByCat ] + rulesByCat = groupPairs $ nubsort + [ (decl2cat cat, (args, name, lin)) | + Rule (Abs cat args name) (Cnc _ _ lin) <- rules ] + + + diff --git a/src/GF/Parsing/CF.hs b/src/GF/Parsing/CF.hs new file mode 100644 index 000000000..3079a47ec --- /dev/null +++ b/src/GF/Parsing/CF.hs @@ -0,0 +1,65 @@ +---------------------------------------------------------------------- +-- | +-- Maintainer : PL +-- Stability : (stable) +-- Portability : (portable) +-- +-- > CVS $Date: 2005/04/14 18:41:22 $ +-- > CVS $Author: peb $ +-- > CVS $Revision: 1.1 $ +-- +-- Chart parsing of grammars in CF format +----------------------------------------------------------------------------- + +module GF.NewParsing.CF (parse) where + +import GF.System.Tracing +import GF.Infra.Print + +import GF.Data.SortedList (nubsort) +import GF.Data.Assoc +import qualified CF +import qualified CFIdent as CFI +import GF.Formalism.Utilities +import GF.Formalism.CFG +import qualified GF.NewParsing.CFG as P + +type Token = CFI.CFTok +type Name = CFI.CFFun +type Category = CFI.CFCat + +parse :: String -> CF.CF -> Category -> CF.CFParser +parse = buildParser . P.parseCF + +buildParser :: P.CFParser Category Name Token -> CF.CF -> Category -> CF.CFParser +buildParser parser cf start tokens = trace "ParseCF" $ + (parseResults, parseInformation) + where parseInformation = prtSep "\n" trees + parseResults = [ (tree2cfTree t, []) | t <- trees ] + theInput = input tokens + edges = tracePrt "#edges" (prt.length) $ + parser pInf [start] theInput + chart = tracePrt "#chart" (prt . map (length.snd) . aAssocs) $ + grammar2chart $ map addCategory edges + forests = tracePrt "#forests" (prt.length) $ + chart2forests chart (const False) + [ uncurry Edge (inputBounds theInput) start ] + trees = tracePrt "#trees" (prt.length) $ + concatMap forest2trees forests + pInf = P.buildCFPInfo $ cf2grammar cf (nubsort tokens) + + +addCategory (CFRule cat rhs name) = CFRule cat rhs (name, cat) + +tree2cfTree (TNode (name, Edge _ _ cat) trees) = CF.CFTree (name, (cat, map tree2cfTree trees)) + +cf2grammar :: CF.CF -> [Token] -> CFGrammar Category Name Token +cf2grammar cf tokens = [ CFRule cat rhs name | + (name, (cat, rhs0)) <- cfRules, + rhs <- mapM item2symbol rhs0 ] + where cfRules = concatMap (CF.predefRules (CF.predefOfCF cf)) tokens ++ + CF.rulesOfCF cf + item2symbol (CF.CFNonterm cat) = [Cat cat] + item2symbol item = map Tok $ filter (CF.matchCFTerm item) tokens + + diff --git a/src/GF/Parsing/CFG.hs b/src/GF/Parsing/CFG.hs index 6af1de8ac..3133e8758 100644 --- a/src/GF/Parsing/CFG.hs +++ b/src/GF/Parsing/CFG.hs @@ -4,9 +4,9 @@ -- Stability : (stable) -- Portability : (portable) -- --- > CVS $Date: 2005/04/11 13:52:51 $ +-- > CVS $Date: 2005/04/14 18:38:36 $ -- > CVS $Author: peb $ --- > CVS $Revision: 1.1 $ +-- > CVS $Revision: 1.2 $ -- -- CFG parsing ----------------------------------------------------------------------------- @@ -24,7 +24,7 @@ import qualified GF.NewParsing.CFG.General as Gen ---------------------------------------------------------------------- -- parsing ---parseCF :: (Ord n, Ord c, Ord t) => String -> CFParser c n t +parseCF :: (Ord n, Ord c, Ord t) => String -> CFParser c n t parseCF "gb" = Gen.parse bottomup parseCF "gt" = Gen.parse topdown parseCF "ib" = Inc.parse (bottomup, noFilter) diff --git a/src/GF/Speech/PrGSL.hs b/src/GF/Speech/PrGSL.hs index d59412ebd..84e3f2a74 100644 --- a/src/GF/Speech/PrGSL.hs +++ b/src/GF/Speech/PrGSL.hs @@ -5,9 +5,9 @@ -- Stability : (stable) -- Portability : (portable) -- --- > CVS $Date: 2005/04/11 13:53:38 $ +-- > CVS $Date: 2005/04/14 18:38:36 $ -- > CVS $Author: peb $ --- > CVS $Revision: 1.15 $ +-- > CVS $Revision: 1.16 $ -- -- This module prints a CFG as a Nuance GSL 2.0 grammar. -- @@ -19,16 +19,20 @@ module PrGSL (gslPrinter) where import SRG import Ident -import GF.OldParsing.CFGrammar -import GF.OldParsing.Utilities (Symbol(..)) -import GF.OldParsing.GrammarTypes -import GF.Printing.PrintParser +-- import GF.OldParsing.CFGrammar +-- import GF.OldParsing.Utilities (Symbol(..)) +-- import GF.OldParsing.GrammarTypes +-- import GF.Printing.PrintParser +import GF.Formalism.CFG +import GF.Formalism.Utilities (Symbol(..)) +import GF.Conversion.Types +import GF.Infra.Print import Option import Data.Char (toUpper,toLower) gslPrinter :: Ident -- ^ Grammar name - -> Options -> CFGrammar -> String + -> Options -> CGrammar -> String gslPrinter name opts cfg = prGSL srg "" where srg = makeSRG name opts cfg @@ -55,13 +59,13 @@ firstToUpper :: String -> String firstToUpper [] = [] firstToUpper (x:xs) = toUpper x : xs -rmPunct :: [Symbol String Tokn] -> [Symbol String Tokn] +rmPunct :: [Symbol String Token] -> [Symbol String Token] rmPunct [] = [] rmPunct (Tok t:ss) | all isPunct (prt t) = rmPunct ss rmPunct (s:ss) = s : rmPunct ss -- Nuance does not like upper case characters in tokens -showToken :: Tokn -> String +showToken :: Token -> String showToken t = map toLower (prt t) isPunct :: Char -> Bool diff --git a/src/GF/Speech/PrJSGF.hs b/src/GF/Speech/PrJSGF.hs index 9562ff5ac..975685d81 100644 --- a/src/GF/Speech/PrJSGF.hs +++ b/src/GF/Speech/PrJSGF.hs @@ -5,9 +5,9 @@ -- Stability : (stable) -- Portability : (portable) -- --- > CVS $Date: 2005/04/11 13:53:39 $ +-- > CVS $Date: 2005/04/14 18:38:36 $ -- > CVS $Author: peb $ --- > CVS $Revision: 1.9 $ +-- > CVS $Revision: 1.10 $ -- -- This module prints a CFG as a JSGF grammar. -- @@ -21,14 +21,18 @@ module PrJSGF (jsgfPrinter) where import SRG import Ident -import GF.OldParsing.CFGrammar -import GF.OldParsing.Utilities (Symbol(..)) -import GF.OldParsing.GrammarTypes -import GF.Printing.PrintParser +-- import GF.OldParsing.CFGrammar +-- import GF.OldParsing.Utilities (Symbol(..)) +-- import GF.OldParsing.GrammarTypes +-- import GF.Printing.PrintParser +import GF.Formalism.CFG +import GF.Formalism.Utilities (Symbol(..)) +import GF.Conversion.Types +import GF.Infra.Print import Option jsgfPrinter :: Ident -- ^ Grammar name - -> Options -> CFGrammar -> String + -> Options -> CGrammar -> String jsgfPrinter name opts cfg = prJSGF srg "" where srg = makeSRG name opts cfg @@ -53,7 +57,7 @@ prJSGF (SRG{grammarName=name,startCat=start,origStartCat=origStart,rules=rs}) prSymbol (Tok t) = wrap "\"" (prtS t) "\"" prCat c = showChar '<' . showString c . showChar '>' -rmPunct :: [Symbol String Tokn] -> [Symbol String Tokn] +rmPunct :: [Symbol String Token] -> [Symbol String Token] rmPunct [] = [] rmPunct (Tok t:ss) | all isPunct (prt t) = rmPunct ss rmPunct (s:ss) = s : rmPunct ss diff --git a/src/GF/Speech/SRG.hs b/src/GF/Speech/SRG.hs index 9ec684295..e1ac0efc4 100644 --- a/src/GF/Speech/SRG.hs +++ b/src/GF/Speech/SRG.hs @@ -5,9 +5,9 @@ -- Stability : (stable) -- Portability : (portable) -- --- > CVS $Date: 2005/04/11 13:53:39 $ +-- > CVS $Date: 2005/04/14 18:38:36 $ -- > CVS $Author: peb $ --- > CVS $Revision: 1.11 $ +-- > CVS $Revision: 1.12 $ -- -- Representation of, conversion to, and utilities for -- printing of a general Speech Recognition Grammar. @@ -21,10 +21,14 @@ module SRG where import Ident -import GF.OldParsing.CFGrammar -import GF.OldParsing.Utilities (Symbol(..)) -import GF.OldParsing.GrammarTypes -import GF.Printing.PrintParser +-- import GF.OldParsing.CFGrammar +-- import GF.OldParsing.Utilities (Symbol(..)) +-- import GF.OldParsing.GrammarTypes +-- import GF.Printing.PrintParser +import GF.Formalism.CFG +import GF.Formalism.Utilities (Symbol(..)) +import GF.Conversion.Types +import GF.Infra.Print import TransformCFG import Option @@ -40,7 +44,7 @@ data SRG = SRG { grammarName :: String -- ^ grammar name } data SRGRule = SRGRule String String [SRGAlt] -- ^ SRG category name, original category name -- and productions -type SRGAlt = [Symbol String Tokn] +type SRGAlt = [Symbol String Token] -- | SRG category name and original name type CatName = (String,String) @@ -49,7 +53,7 @@ type CatNames = FiniteMap String String makeSRG :: Ident -- ^ Grammar name -> Options -- ^ Grammar options - -> CFGrammar -- ^ A context-free grammar + -> CGrammar -- ^ A context-free grammar -> SRG makeSRG i opts gr = SRG { grammarName = name, startCat = start, @@ -71,11 +75,11 @@ cfgRulesToSRGRule names rs@(r:_) = SRGRule cat origCat rhs renameCat (Cat c) = Cat (lookupFM_ names c) renameCat t = t -ruleCat :: Rule n c t -> c -ruleCat (Rule c _ _) = c +ruleCat :: CFRule c n t -> c +ruleCat (CFRule c _ _) = c -ruleRhs :: Rule n c t -> [Symbol c t] -ruleRhs (Rule _ r _) = r +ruleRhs :: CFRule c n t -> [Symbol c t] +ruleRhs (CFRule _ r _) = r mkCatNames :: String -- ^ Category name prefix -> [String] -- ^ Original category names diff --git a/src/GF/Speech/TransformCFG.hs b/src/GF/Speech/TransformCFG.hs index 8dd81cb91..6a1b7c817 100644 --- a/src/GF/Speech/TransformCFG.hs +++ b/src/GF/Speech/TransformCFG.hs @@ -5,22 +5,28 @@ -- Stability : (stable) -- Portability : (portable) -- --- > CVS $Date: 2005/04/11 13:53:39 $ +-- > CVS $Date: 2005/04/14 18:38:36 $ -- > CVS $Author: peb $ --- > CVS $Revision: 1.9 $ +-- > CVS $Revision: 1.10 $ -- -- This module does some useful transformations on CFGs. -- -- FIXME: remove cycles +-- +-- peb thinks: most of this module should be moved to GF.Conversion... ----------------------------------------------------------------------------- module TransformCFG (makeNice, CFRule_) where import Ident -import GF.OldParsing.CFGrammar -import GF.OldParsing.Utilities (Symbol(..)) -import GF.OldParsing.GrammarTypes -import GF.Printing.PrintParser +-- import GF.OldParsing.CFGrammar +-- import GF.OldParsing.Utilities (Symbol(..)) +-- import GF.OldParsing.GrammarTypes +-- import GF.Printing.PrintParser +import GF.Formalism.CFG +import GF.Formalism.Utilities (Symbol(..), mapSymbol) +import GF.Conversion.Types +import GF.Infra.Print import Data.FiniteMap import Data.List @@ -30,63 +36,65 @@ import Debug.Trace -- | not very nice to get replace the structured CFCat type with a simple string -type CFRule_ = Rule CFName String Tokn +type CFRule_ = CFRule Cat_ Name Token +type Cat_ = String -type CFRules = FiniteMap String [CFRule_] +type CFRules = FiniteMap Cat_ [CFRule_] -makeNice :: CFGrammar -> [CFRule_] +makeNice :: CGrammar -> [CFRule_] makeNice = concat . eltsFM . makeNice' . groupProds . cfgToCFRules where makeNice' = removeLeftRecursion . removeEmptyCats -cfgToCFRules :: CFGrammar -> [CFRule_] -cfgToCFRules cfg = [Rule (catToString c) (map symb r) n | Rule c r n <- cfg] - where symb (Cat c) = Cat (catToString c) - symb (Tok t) = Tok t +cfgToCFRules :: CGrammar -> [CFRule_] +cfgToCFRules cfg = [CFRule (catToString c) (map symb r) n | CFRule c r n <- cfg] + where symb = mapSymbol catToString id + -- symb (Cat c) = Cat (catToString c) + -- symb (Tok t) = Tok t catToString = prt -- | Group productions by their lhs categories groupProds :: [CFRule_] -> CFRules groupProds = addListToFM_C (++) emptyFM . map (\rs -> (ruleCat rs,[rs])) - where ruleCat (Rule c _ _) = c + where ruleCat (CFRule c _ _) = c -- | Remove productions which use categories which have no productions removeEmptyCats :: CFRules -> CFRules removeEmptyCats rss = listToFM $ fix removeEmptyCats' $ fmToList rss where - removeEmptyCats' :: [(String,[CFRule_])] -> [(String,[CFRule_])] + removeEmptyCats' :: [(Cat_,[CFRule_])] -> [(Cat_,[CFRule_])] removeEmptyCats' rs = k' where keep = filter (not . null . snd) rs - allCats = nub [c | (_,r) <- rs, Rule _ rhs _ <- r, Cat c <- rhs] + allCats = nub [c | (_,r) <- rs, CFRule _ rhs _ <- r, Cat c <- rhs] emptyCats = filter (nothingOrNull . flip lookup rs) allCats k' = map (\ (c,xs) -> (c, filter (not . anyUsedBy emptyCats) xs)) keep -anyUsedBy :: [String] -> CFRule_ -> Bool -anyUsedBy ss (Rule _ r _) = or [c `elem` ss | Cat c <- r] +anyUsedBy :: [Cat_] -> CFRule_ -> Bool +anyUsedBy ss (CFRule _ r _) = or [c `elem` ss | Cat c <- r] removeLeftRecursion :: CFRules -> CFRules removeLeftRecursion rs = listToFM $ concatMap removeDirectLeftRecursion $ map handleProds $ fmToList rs where handleProds (c, r) = (c, concatMap handleProd r) - handleProd (Rule ai (Cat aj:alpha) n) | aj < ai = + handleProd (CFRule ai (Cat aj:alpha) n) | aj < ai = -- FIXME: this will give multiple rules with the same name - [Rule ai (beta ++ alpha) n | Rule _ beta _ <- fromJust (lookupFM rs aj)] + [CFRule ai (beta ++ alpha) n | CFRule _ beta _ <- fromJust (lookupFM rs aj)] handleProd r = [r] -removeDirectLeftRecursion :: (String,[CFRule_]) -- ^ All productions for a category - -> [(String,[CFRule_])] +removeDirectLeftRecursion :: (Cat_,[CFRule_]) -- ^ All productions for a category + -> [(Cat_,[CFRule_])] removeDirectLeftRecursion (a,rs) | null dr = [(a,rs)] | otherwise = [(a, as), (a', a's)] where a' = a ++ "'" -- FIXME: this might not be unique (dr,nr) = partition isDirectLeftRecursive rs as = maybeEndWithA' nr - is = [Rule a' (tail r) n | Rule _ r n <- dr] + is = [CFRule a' (tail r) n | CFRule _ r n <- dr] a's = maybeEndWithA' is - maybeEndWithA' xs = xs ++ [Rule c (r++[Cat a']) n | Rule c r n <- xs] + maybeEndWithA' xs = xs ++ [CFRule c (r++[Cat a']) n | CFRule c r n <- xs] isDirectLeftRecursive :: CFRule_ -> Bool -isDirectLeftRecursive (Rule c (Cat c':_) _) = c == c' +isDirectLeftRecursive (CFRule c (Cat c':_) _) = c == c' isDirectLeftRecursive _ = False diff --git a/src/GF/UseGrammar/Custom.hs b/src/GF/UseGrammar/Custom.hs index 441d6bd14..1bd44851f 100644 --- a/src/GF/UseGrammar/Custom.hs +++ b/src/GF/UseGrammar/Custom.hs @@ -5,9 +5,9 @@ -- Stability : (stable) -- Portability : (portable) -- --- > CVS $Date: 2005/04/14 11:42:06 $ +-- > CVS $Date: 2005/04/14 18:38:36 $ -- > CVS $Author: peb $ --- > CVS $Revision: 1.53 $ +-- > CVS $Revision: 1.54 $ -- -- A database for customizable GF shell commands. -- @@ -66,14 +66,15 @@ import GrammarToHaskell -- the cf parsing algorithms import ChartParser -- or some other CF Parser -import qualified GF.OldParsing.ParseCF as PCFOld +import qualified GF.NewParsing.CF as PCF +import qualified GF.OldParsing.ParseCF as PCFOld -- OBSOLETE --import qualified ParseGFCviaCFG as PGFC --import NewChartParser --import NewerChartParser -- grammar conversions -- peb 19/4-04 -- see also customGrammarPrinter -import qualified GF.OldParsing.ConvertGrammar as CnvOld +import qualified GF.OldParsing.ConvertGrammar as CnvOld -- OBSOLETE import qualified GF.Printing.PrintParser as Prt --import qualified GF.Data.Assoc as Assoc --import qualified GF.OldParsing.ConvertFiniteGFC as Fin @@ -238,10 +239,10 @@ customGrammarPrinter = ,(strCI "srg", prSRG . stateCF) ,(strCI "gsl", \s -> let opts = stateOptions s name = cncId s - in gslPrinter name opts $ CnvOld.cfg $ statePInfoOld s) + in gslPrinter name opts $ stateCFG s) ,(strCI "jsgf", \s -> let opts = stateOptions s name = cncId s - in jsgfPrinter name opts $ CnvOld.cfg $ statePInfoOld s) + in jsgfPrinter name opts $ stateCFG s) ,(strCI "plbnf", prLBNF True) ,(strCI "lbnf", prLBNF False) ,(strCI "bnf", prBNF False) @@ -266,7 +267,6 @@ customGrammarPrinter = ,(strCI "finite", Prt2.prt . Cnv.simple2finite . Cnv.gfc2simple . stateGrammarLang) ,(strCI "single", Prt2.prt . Cnv.removeSingletons . Cnv.simple2finite . Cnv.gfc2simple . stateGrammarLang) ,(strCI "sg-sg", Prt2.prt . Cnv.removeSingletons . Cnv.removeSingletons . Cnv.simple2finite . Cnv.gfc2simple . stateGrammarLang) - ,(strCI "sg-sg-sg", Prt2.prt . Cnv.removeSingletons . Cnv.removeSingletons . Cnv.removeSingletons . Cnv.simple2finite . Cnv.gfc2simple . stateGrammarLang) ,(strCI "mcfg-old", Prt.prt . CnvOld.mcfg . statePInfoOld) ,(strCI "cfg-old", Prt.prt . CnvOld.cfg . statePInfoOld) ] @@ -354,14 +354,20 @@ customStringCommand = customParser = customData "Parsers, selected by option -parser=x" $ [ - (strCI "chart", PCFOld.parse "ibn" . stateCF) - ,(strCI "old", chartParser . stateCF) + (strCI "chart", PCFOld.parse "ibn" . stateCF) -- DEPRECATED + ,(strCI "general", PCF.parse "gb" . stateCF) + ,(strCI "general-bottomup", PCF.parse "gt" . stateCF) + ,(strCI "general-topdown", PCF.parse "gt" . stateCF) + ,(strCI "incremental", PCF.parse "ib" . stateCF) + ,(strCI "incremental-bottomup", PCF.parse "ib" . stateCF) + ,(strCI "incremental-topdown", PCF.parse "it" . stateCF) + ,(strCI "old", chartParser . stateCF) -- DEPRECATED ,(strCI "myparser", myParser) -- add your own parsers here ] - -- 31/5-04, peb: - ++ [ (strCI ("chart"++name), PCFOld.parse descr . stateCF) | - (descr, names) <- PCFOld.alternatives, name <- names ] + -- 31/5-04, peb: (DEPRECATED) + -- ++ [ (strCI ("chart"++name), PCFOld.parse descr . stateCF) | + -- (descr, names) <- PCFOld.alternatives, name <- names ] customTokenizer = customData "Tokenizers, selected by option -lexer=x" $ diff --git a/src/GF/UseGrammar/Parsing.hs b/src/GF/UseGrammar/Parsing.hs index a50de2db7..5dd7bef78 100644 --- a/src/GF/UseGrammar/Parsing.hs +++ b/src/GF/UseGrammar/Parsing.hs @@ -5,9 +5,9 @@ -- Stability : (stable) -- Portability : (portable) -- --- > CVS $Date: 2005/04/14 11:42:06 $ +-- > CVS $Date: 2005/04/14 18:38:36 $ -- > CVS $Author: peb $ --- > CVS $Revision: 1.17 $ +-- > CVS $Revision: 1.18 $ -- -- (Description of the module) ----------------------------------------------------------------------------- @@ -35,7 +35,7 @@ import Custom import ShellState import PPrCF (prCFTree) -import qualified GF.OldParsing.ParseGFC as NewOld +import qualified GF.OldParsing.ParseGFC as NewOld -- OBSOLETE import qualified GF.NewParsing.GFC as New import Operations