diff --git a/src/GF/Speech/PrSRGS.hs b/src/GF/Speech/PrSRGS.hs index d6221bbd5..627dc7364 100644 --- a/src/GF/Speech/PrSRGS.hs +++ b/src/GF/Speech/PrSRGS.hs @@ -11,7 +11,7 @@ -- categories in the grammar ----------------------------------------------------------------------------- -module GF.Speech.PrSRGS (srgsXmlPrinter) where +module GF.Speech.PrSRGS (srgsXmlPrinter, srgsXmlNonRecursivePrinter) where import GF.Data.Utilities import GF.Data.XML @@ -42,6 +42,10 @@ srgsXmlPrinter :: Maybe SISRFormat -> StateGrammar -> String srgsXmlPrinter sisr probs opts s = prSrgsXml sisr probs $ makeSimpleSRG opts s +srgsXmlNonRecursivePrinter :: Options -> StateGrammar -> String +srgsXmlNonRecursivePrinter opts s = prSrgsXml Nothing False $ makeNonRecursiveSRG opts s + + prSrgsXml :: Maybe SISRFormat -> Bool -> SRG -> String prSrgsXml sisr probs srg@(SRG{grammarName=name,startCat=start, origStartCat=origStart,grammarLanguage=l,rules=rs}) diff --git a/src/GF/Speech/SRG.hs b/src/GF/Speech/SRG.hs index 43969ab0d..c53991aa5 100644 --- a/src/GF/Speech/SRG.hs +++ b/src/GF/Speech/SRG.hs @@ -20,6 +20,7 @@ module GF.Speech.SRG (SRG(..), SRGRule(..), SRGAlt(..), SRGItem, SRGCat, SRGNT, CFTerm , makeSRG , makeSimpleSRG + , makeNonRecursiveSRG , lookupFM_, prtS , cfgCatToGFCat, srgTopCats ) where @@ -90,6 +91,16 @@ makeSimpleSRG opt s = makeSRG preprocess opt s . fix (topDownFilter origStart . bottomUpFilter) . removeCycles +makeNonRecursiveSRG :: Options + -> StateGrammar + -> SRG +makeNonRecursiveSRG opt s = removeRecursion $ makeSRG preprocess opt s + where + preprocess origStart = mergeIdentical + . makeRegular + . fix (topDownFilter origStart . bottomUpFilter) + . removeCycles + makeSRG :: (Cat_ -> CFRules -> CFRules) -> Options -- ^ Grammar options -> StateGrammar @@ -181,6 +192,67 @@ groupTokens (Cat c:ss) = Cat c : groupTokens ss ungroupTokens :: RE (Symbol SRGNT [Token]) -> RE (Symbol SRGNT Token) ungroupTokens = joinRE . mapRE (symbol (RESymbol . Cat) (REConcat . map (RESymbol . Tok))) +-- +-- * Full recursion removal +-- + +{- +S -> foo +S -> apa +S -> bar S +S -> baz S +=> +S -> (bar|baz)* (foo|apa) +-} + +-- | Removes recursion from a right-linear SRG by converting to EBNF. +-- FIXME: corrupts semantics and probabilities +removeRecursion :: SRG -> SRG +removeRecursion srg = srg' + where + srg' = srg { rules = [SRGRule lhs orig [SRGAlt Nothing dummyCFTerm (f lhs alts)] + | SRGRule lhs orig alts <- rules srg] } + dummyCFTerm = CFMeta "dummy" + getRHS cat = unionRE [ rhs | SRGRule lhs _ alts <- rules srg', lhs == cat, + SRGAlt _ _ rhs <- alts] + mutRec = srgMutRec srg + -- Replaces all cats in same mutually recursive set as LHS + -- (except the LHS category itself) with + -- their respective right-hand sides. + -- This makes all rules either non-recursive, or directly right-recursive. + -- NOTE: this fails (loops) if the input grammar is not right-linear. + -- Then replaces all direct right-recursion by Kleene stars. + f lhs alts = recToKleene $ mapRE' g $ unionRE [rhs | SRGAlt _ _ rhs <- alts] + where + g (Cat (c,_)) | isRelatedTo mutRec lhs c && c /= lhs = getRHS c + g t = RESymbol t + recToKleene rhs = concatRE [repeatRE (unionRE r), unionRE nr] + where (r,nr) = partition isRecursive (normalSplitRE rhs) + isRecursive re = lhs `elem` srgItemUses re + +-- | Converts any regexp which does not contain Kleene stars to a +-- disjunctive normal form. +{- +(a|b) (c|d) => [a c, a d, b c, b d] +(a|b) | (c d) => [a, b, c d] +(a b) | (c d) => [a b, c d] +-} +normalSplitRE :: SRGItem -> [SRGItem] +normalSplitRE (REUnion xs) = concatMap normalSplitRE xs +normalSplitRE (REConcat xs) = map concatRE $ sequence $ map normalSplitRE xs +normalSplitRE x = [x] + +srgMutRec :: SRG -> Rel SRGCat +srgMutRec = reflexiveSubrelation . symmetricSubrelation . transitiveClosure . srgUses + +srgUses :: SRG -> Rel SRGCat +srgUses srg = mkRel [(lhs,c) | SRGRule lhs _ alts <- rules srg, + SRGAlt _ _ rhs <- alts, + c <- srgItemUses rhs] + +srgItemUses :: SRGItem -> [SRGCat] +srgItemUses rhs = [c | Cat (c,_) <- symbolsRE rhs] + -- -- * Utilities for building and printing SRGs -- diff --git a/src/GF/UseGrammar/Custom.hs b/src/GF/UseGrammar/Custom.hs index eeb2b0ae2..243affe75 100644 --- a/src/GF/UseGrammar/Custom.hs +++ b/src/GF/UseGrammar/Custom.hs @@ -252,6 +252,7 @@ customGrammarPrinter = ,(strCI "jsgf", jsgfPrinter Nothing) ,(strCI "jsgf_sisr_old", jsgfPrinter (Just SISR.SISROld)) ,(strCI "srgs_xml", srgsXmlPrinter Nothing False) + ,(strCI "srgs_xml_non_rec", srgsXmlNonRecursivePrinter) ,(strCI "srgs_xml_prob", srgsXmlPrinter Nothing True) ,(strCI "srgs_xml_sisr_old", srgsXmlPrinter (Just SISR.SISROld) False) ,(strCI "srgs_abnf", srgsAbnfPrinter Nothing False)