From ce7072085947f4981c8d6d49b571e3cf5683fbb6 Mon Sep 17 00:00:00 2001 From: krasimir Date: Tue, 22 Mar 2016 10:28:15 +0000 Subject: [PATCH] CFGtoPGF is now extended to support context-free grammars with primitive parameters --- src/compiler/GF/Command/Importing.hs | 4 +- src/compiler/GF/Compile/CFGtoPGF.hs | 90 ++++++++------ src/compiler/GF/Compiler.hs | 4 +- src/compiler/GF/Grammar/BNFC.hs | 40 +++---- src/compiler/GF/Grammar/CFG.hs | 170 ++++++++++++++------------- src/compiler/GF/Grammar/EBNF.hs | 18 ++- src/compiler/GF/Speech/CFGToFA.hs | 20 ++-- src/compiler/GF/Speech/PGFToCFG.hs | 8 +- src/compiler/GF/Speech/SRG.hs | 4 +- 9 files changed, 192 insertions(+), 166 deletions(-) diff --git a/src/compiler/GF/Command/Importing.hs b/src/compiler/GF/Command/Importing.hs index f3d93d87b..f4e51e3e7 100644 --- a/src/compiler/GF/Command/Importing.hs +++ b/src/compiler/GF/Command/Importing.hs @@ -54,8 +54,8 @@ importCF opts files get convert = impCF impCF = do rules <- fmap (convert . concat) $ mapM (get opts) files startCat <- case rules of - (CFRule cat _ _ : _) -> return cat - _ -> fail "empty CFG" + (Rule cat _ _ : _) -> return cat + _ -> fail "empty CFG" let pgf = cf2pgf (last files) (uniqueFuns (mkCFG startCat Set.empty rules)) probs <- maybe (return . defaultProbabilities) readProbabilitiesFromFile (flag optProbsFile opts) pgf return $ setProbabilities probs diff --git a/src/compiler/GF/Compile/CFGtoPGF.hs b/src/compiler/GF/Compile/CFGtoPGF.hs index 5bf1d1be1..f9ab8afcf 100644 --- a/src/compiler/GF/Compile/CFGtoPGF.hs +++ b/src/compiler/GF/Compile/CFGtoPGF.hs @@ -17,7 +17,7 @@ import Data.List -- the compiler ---------- -------------------------- -cf2pgf :: FilePath -> CFG -> PGF +cf2pgf :: FilePath -> ParamCFG -> PGF cf2pgf fpath cf = let pgf = PGF Map.empty aname (cf2abstr cf) (Map.singleton cname (cf2concr cf)) in updateProductionIndices pgf @@ -26,18 +26,21 @@ cf2pgf fpath cf = aname = mkCId (name ++ "Abs") cname = mkCId name -cf2abstr :: CFG -> Abstr +cf2abstr :: ParamCFG -> Abstr cf2abstr cfg = Abstr aflags afuns acats where - aflags = Map.singleton (mkCId "startcat") (LStr (cfgStartCat cfg)) - acats = Map.fromList [(mkCId cat, ([], [(0,mkRuleName rule) - | rule <- Set.toList rules], 0)) - | (cat,rules) <- Map.toList (cfgRules cfg)] - afuns = Map.fromList [(mkRuleName rule, (cftype [mkCId c | NonTerminal c <- ruleRhs rule] (mkCId cat), 0, Nothing, 0)) - | (cat,rules) <- Map.toList (cfgRules cfg) - , rule <- Set.toList rules] + aflags = Map.singleton (mkCId "startcat") (LStr (fst (cfgStartCat cfg))) -cf2concr :: CFG -> Concr + acats = Map.fromList [(cat, ([], [(0,mkRuleName rule) | rule <- rules], 0)) + | (cat,rules) <- (Map.toList . Map.fromListWith (++)) + [(cat2id cat, catRules cfg cat) | + cat <- allCats' cfg]] + afuns = Map.fromList [(mkRuleName rule, (cftype [cat2id c | NonTerminal c <- ruleRhs rule] (cat2id (ruleLhs rule)), 0, Nothing, 0)) + | rule <- allRules cfg] + + cat2id = mkCId . fst + +cf2concr :: ParamCFG -> Concr cf2concr cfg = Concr Map.empty Map.empty cncfuns lindefsrefs lindefsrefs sequences productions @@ -46,51 +49,64 @@ cf2concr cfg = Concr Map.empty Map.empty IntMap.empty totalCats where + cats = allCats' cfg + rules = allRules cfg + sequences0 = Set.fromList (listArray (0,0) [SymCat 0 0] : - [mkSequence rule | rules <- Map.elems (cfgRules cfg), rule <- Set.toList rules]) + map mkSequence rules) sequences = listArray (0,Set.size sequences0-1) (Set.toList sequences0) idFun = CncFun wildCId (listArray (0,0) [seqid]) where seq = listArray (0,0) [SymCat 0 0] seqid = binSearch seq sequences (bounds sequences) - ((fun_cnt,cncfuns0),productions0) = mapAccumL convertRules (1,[idFun]) (Map.toList (cfgRules cfg)) - productions = IntMap.fromList productions0 + ((fun_cnt,cncfuns0),productions0) = mapAccumL (convertRule cs) (1,[idFun]) rules + productions = foldl addProd IntMap.empty (concat (productions0++coercions)) cncfuns = listArray (0,fun_cnt-1) (reverse cncfuns0) lbls = listArray (0,0) ["s"] - (totalCats,cnccats0) = mapAccumL mkCncCat 0 (Map.toList (cfgRules cfg)) - cnccats = Map.fromList ((mkCId "Int", CncCat fidInt fidInt lbls) : - (mkCId "Float", CncCat fidFloat fidFloat lbls) : - (mkCId "String", CncCat fidString fidString lbls) : - cnccats0) + (fid,cnccats0) = (mapAccumL mkCncCat 0 . Map.toList . Map.fromListWith max) + [(c,p) | (c,ps) <- cats, p <- ps] + ((totalCats,cs), coercions) = mapAccumL mkCoercions (fid,Map.empty) cats + cnccats = Map.fromList cnccats0 - lindefsrefs = - IntMap.fromList (map mkLinDefRef (Map.keys (cfgRules cfg))) + lindefsrefs = + IntMap.fromList (map mkLinDefRef cats) - convertRules st (cat,rules) = - let (st',prods) = mapAccumL convertRule st (Set.toList rules) - in (st',(cat2fid cat,Set.fromList prods)) - - convertRule (funid,funs) rule = - let args = [PArg [] (cat2fid c) | NonTerminal c <- ruleRhs rule] + convertRule cs (funid,funs) rule = + let args = [PArg [] (cat2arg c) | NonTerminal c <- ruleRhs rule] prod = PApply funid args seqid = binSearch (mkSequence rule) sequences (bounds sequences) fun = CncFun (mkRuleName rule) (listArray (0,0) [seqid]) funid' = funid+1 - in funid' `seq` ((funid',fun:funs),prod) + in funid' `seq` ((funid',fun:funs),let (c,ps) = ruleLhs rule in [(cat2fid c p, prod) | p <- ps]) mkSequence rule = listArray (0,length syms-1) syms where syms = snd $ mapAccumL convertSymbol 0 (ruleRhs rule) - convertSymbol d (NonTerminal c) = (d+1,if c `elem` ["Int","Float","String"] then SymLit d 0 else SymCat d 0) - convertSymbol d (Terminal t) = (d, SymKS t) + convertSymbol d (NonTerminal (c,_)) = (d+1,if c `elem` ["Int","Float","String"] then SymLit d 0 else SymCat d 0) + convertSymbol d (Terminal t) = (d, SymKS t) - mkCncCat fid (cat,_) = (fid+1, (mkCId cat,CncCat fid fid lbls)) + mkCncCat fid (cat,n) + | cat == "Int" = (fid, (mkCId cat, CncCat fidInt fidInt lbls)) + | cat == "Float" = (fid, (mkCId cat, CncCat fidFloat fidFloat lbls)) + | cat == "String" = (fid, (mkCId cat, CncCat fidString fidString lbls)) + | otherwise = let fid' = fid+n+1 + in fid' `seq` (fid', (mkCId cat,CncCat fid (fid+n) lbls)) - mkLinDefRef cat = - (cat2fid cat,[0]) + mkCoercions (fid,cs) c@(cat,[p]) = ((fid,cs),[]) + mkCoercions (fid,cs) c@(cat,ps ) = + let fid' = fid+1 + in fid' `seq` ((fid', Map.insert c fid cs), [(fid,PCoerce (cat2fid cat p)) | p <- ps]) + + mkLinDefRef (cat,_) = + (cat2fid cat 0,[0]) + + addProd prods (fid,prod) = + case IntMap.lookup fid prods of + Just set -> IntMap.insert fid (Set.insert prod set) prods + Nothing -> IntMap.insert fid (Set.singleton prod) prods binSearch v arr (i,j) | i <= j = case compare v (arr ! k) of @@ -101,11 +117,17 @@ cf2concr cfg = Concr Map.empty Map.empty where k = (i+j) `div` 2 - cat2fid cat = + cat2fid cat p = case Map.lookup (mkCId cat) cnccats of - Just (CncCat fid _ _) -> fid + Just (CncCat fid _ _) -> fid+p _ -> error "cat2fid" + cat2arg c@(cat,[p]) = cat2fid cat p + cat2arg c@(cat,ps ) = + case Map.lookup c cs of + Just fid -> fid + Nothing -> error "cat2arg" + mkRuleName rule = case ruleName rule of CFObj n _ -> n diff --git a/src/compiler/GF/Compiler.hs b/src/compiler/GF/Compiler.hs index 66d88eb69..a057f074f 100644 --- a/src/compiler/GF/Compiler.hs +++ b/src/compiler/GF/Compiler.hs @@ -89,8 +89,8 @@ compileCFFiles opts fs = do bnfc_rules <- fmap concat $ mapM (getBNFCRules opts) fs let rules = bnfc2cf bnfc_rules startCat <- case rules of - (CFRule cat _ _ : _) -> return cat - _ -> fail "empty CFG" + (Rule cat _ _ : _) -> return cat + _ -> fail "empty CFG" let pgf = cf2pgf (last fs) (uniqueFuns (mkCFG startCat Set.empty rules)) unless (flag optStopAfterPhase opts == Compile) $ do probs <- liftIO (maybe (return . defaultProbabilities) readProbabilitiesFromFile (flag optProbsFile opts) pgf) diff --git a/src/compiler/GF/Grammar/BNFC.hs b/src/compiler/GF/Grammar/BNFC.hs index 8438d392d..dbc3d8edf 100644 --- a/src/compiler/GF/Grammar/BNFC.hs +++ b/src/compiler/GF/Grammar/BNFC.hs @@ -16,7 +16,7 @@ module GF.Grammar.BNFC(BNFCRule(..), BNFCSymbol, Symbol(..), CFTerm(..), bnfc2cf import GF.Grammar.CFG import PGF (Token, mkCId) -import Data.List (lookup, partition) +import Data.List (partition) type IsList = Bool type BNFCSymbol = Symbol (Cat, IsList) Token @@ -42,7 +42,7 @@ type IsSeparator = Bool type SepTermSymb = String type SepMap = [(Cat, (IsNonempty, IsSeparator, SepTermSymb))] -bnfc2cf :: [BNFCRule] -> [CFRule] +bnfc2cf :: [BNFCRule] -> [ParamCFRule] bnfc2cf rules = concatMap (transformRules (map makeSepTerm rules1)) rules2 where (rules1,rules2) = partition isSepTerm rules makeSepTerm (BNFCTerminator ne c s) = (c, (ne, False, s)) @@ -53,46 +53,46 @@ isSepTerm (BNFCTerminator {}) = True isSepTerm (BNFCSeparator {}) = True isSepTerm _ = False -transformRules :: SepMap -> BNFCRule -> [CFRule] -transformRules sepMap (BNFCRule c smbs@(s:ss) r) = CFRule c cfSmbs r : rls +transformRules :: SepMap -> BNFCRule -> [ParamCFRule] +transformRules sepMap (BNFCRule c smbs@(s:ss) r) = Rule (c,[0]) cfSmbs r : rls where smbs' = map transformSymb smbs cfSmbs = [snd s | s <- smbs'] ids = filter (/= "") [fst s | s <- smbs'] rls = concatMap (createListRules sepMap) ids transformRules sepMap (BNFCCoercions c num) = rules ++ [lastRule] where rules = map (fRules c) [0..num-1] - lastRule = CFRule c' ss rn + lastRule = Rule (c',[0]) ss rn where c' = c ++ show num - ss = [Terminal "(", NonTerminal c, Terminal ")"] + ss = [Terminal "(", NonTerminal (c,[0]), Terminal ")"] rn = CFObj (mkCId $ "coercion_" ++ c) [] -fRules c n = CFRule c' ss rn +fRules c n = Rule (c',[0]) ss rn where c' = if n == 0 then c else c ++ show n - ss = [NonTerminal (c ++ show (n+1))] - rn = CFObj (mkCId $ "coercion_" ++ c')[] + ss = [NonTerminal (c ++ show (n+1),[0])] + rn = CFObj (mkCId $ "coercion_" ++ c') [] -transformSymb :: BNFCSymbol -> (String, CFSymbol) +transformSymb :: BNFCSymbol -> (String, ParamCFSymbol) transformSymb s = case s of - NonTerminal (c,False) -> ("", NonTerminal c) - NonTerminal (c,True ) -> (c , NonTerminal $ "List" ++ c) + NonTerminal (c,False) -> ("", NonTerminal (c,[0])) + NonTerminal (c,True ) -> (c , NonTerminal $ ("List" ++ c,[0])) Terminal t -> ("", Terminal t) -createListRules :: SepMap -> String -> [CFRule] +createListRules :: SepMap -> String -> [ParamCFRule] createListRules sepMap c = case lookup c sepMap of Just (ne, isSep, symb) -> createListRules' ne isSep symb c Nothing -> createListRules' False True "" c -createListRules':: IsNonempty -> IsSeparator -> SepTermSymb -> String -> [CFRule] +createListRules':: IsNonempty -> IsSeparator -> SepTermSymb -> String -> [ParamCFRule] createListRules' ne isSep symb c = ruleCons : [ruleBase] - where ruleBase = CFRule ("List" ++ c) smbs rn + where ruleBase = Rule ("List" ++ c,[0]) smbs rn where smbs = if isSep - then [NonTerminal c | ne] - else [NonTerminal c | ne] ++ + then [NonTerminal (c,[0]) | ne] + else [NonTerminal (c,[0]) | ne] ++ [Terminal symb | symb /= "" && ne] rn = CFObj (mkCId $ "Base" ++ c) [] - ruleCons = CFRule ("List" ++ c) smbs rn - where smbs = [NonTerminal c] ++ + ruleCons = Rule ("List" ++ c,[0]) smbs rn + where smbs = [NonTerminal (c,[0])] ++ [Terminal symb | symb /= ""] ++ - [NonTerminal ("List" ++ c)] + [NonTerminal ("List" ++ c,[0])] rn = CFObj (mkCId $ "Cons" ++ c) [] diff --git a/src/compiler/GF/Grammar/CFG.hs b/src/compiler/GF/Grammar/CFG.hs index 37d46e39b..0a8d48b4f 100644 --- a/src/compiler/GF/Grammar/CFG.hs +++ b/src/compiler/GF/Grammar/CFG.hs @@ -8,16 +8,11 @@ module GF.Grammar.CFG where import GF.Data.Utilities import PGF ---import GF.Infra.Option import GF.Data.Relation ---import Control.Monad ---import Control.Monad.State (State, get, put, evalState) import Data.Map (Map) import qualified Data.Map as Map import Data.List ---import Data.Maybe (fromMaybe) ---import Data.Monoid (mconcat) import Data.Set (Set) import qualified Data.Set as Set @@ -30,15 +25,19 @@ type Cat = String data Symbol c t = NonTerminal c | Terminal t deriving (Eq, Ord, Show) -type CFSymbol = Symbol Cat Token - -data CFRule = CFRule { - lhsCat :: Cat, - ruleRhs :: [CFSymbol], +data Rule c t = Rule { + ruleLhs :: c, + ruleRhs :: [Symbol c t], ruleName :: CFTerm } deriving (Eq, Ord, Show) +data Grammar c t = Grammar { + cfgStartCat :: c, + cfgExternalCats :: Set c, + cfgRules :: Map c (Set (Rule c t)) } + deriving (Eq, Ord, Show) + data CFTerm = CFObj CId [CFTerm] -- ^ an abstract syntax function with arguments | CFAbs Int CFTerm -- ^ A lambda abstraction. The Int is the variable id. @@ -48,11 +47,14 @@ data CFTerm | CFMeta CId -- ^ A metavariable deriving (Eq, Ord, Show) -data CFG = CFG { cfgStartCat :: Cat, - cfgExternalCats :: Set Cat, - cfgRules :: Map Cat (Set CFRule) } - deriving (Eq, Ord, Show) +type CFSymbol = Symbol Cat Token +type CFRule = Rule Cat Token +type CFG = Grammar Cat Token +type Param = Int +type ParamCFSymbol = Symbol (Cat,[Param]) Token +type ParamCFRule = Rule (Cat,[Param]) Token +type ParamCFG = Grammar (Cat,[Param]) Token -- -- * Grammar filtering @@ -64,25 +66,25 @@ data CFG = CFG { cfgStartCat :: Cat, -- one should we pick? -- FIXME: Does not (yet) remove productions which are cyclic -- because of empty productions. -removeCycles :: CFG -> CFG +removeCycles :: (Ord c,Ord t) => Grammar c t -> Grammar c t removeCycles = onRules f where f rs = filter (not . isCycle) rs - where alias = transitiveClosure $ mkRel [(c,c') | CFRule c [NonTerminal c'] _ <- rs] - isCycle (CFRule c [NonTerminal c'] _) = isRelatedTo alias c' c + where alias = transitiveClosure $ mkRel [(c,c') | Rule c [NonTerminal c'] _ <- rs] + isCycle (Rule c [NonTerminal c'] _) = isRelatedTo alias c' c isCycle _ = False -- | Better bottom-up filter that also removes categories which contain no finite -- strings. -bottomUpFilter :: CFG -> CFG +bottomUpFilter :: (Ord c,Ord t) => Grammar c t -> Grammar c t bottomUpFilter gr = fix grow (gr { cfgRules = Map.empty }) where grow g = g `unionCFG` filterCFG (all (okSym g) . ruleRhs) gr okSym g = symbol (`elem` allCats g) (const True) -- | Removes categories which are not reachable from any external category. -topDownFilter :: CFG -> CFG +topDownFilter :: (Ord c,Ord t) => Grammar c t -> Grammar c t topDownFilter cfg = filterCFGCats (`Set.member` keep) cfg where - rhsCats = [ (lhsCat r, c') | r <- allRules cfg, c' <- filterCats (ruleRhs r) ] + rhsCats = [ (ruleLhs r, c') | r <- allRules cfg, c' <- filterCats (ruleRhs r) ] uses = reflexiveClosure_ (allCats cfg) $ transitiveClosure $ mkRel rhsCats keep = Set.unions $ map (allRelated uses) $ Set.toList $ cfgExternalCats cfg @@ -95,12 +97,12 @@ mergeIdentical g = onRules (map subst) g m = Map.fromList [(y,concat (intersperse "+" xs)) | (_,xs) <- buildMultiMap [(rulesKey rs,c) | (c,rs) <- Map.toList (cfgRules g)], y <- xs] -- build data to compare for each category: a set of name,rhs pairs - rulesKey = Set.map (\ (CFRule _ r n) -> (n,r)) - subst (CFRule c r n) = CFRule (substCat c) (map (mapSymbol substCat id) r) n + rulesKey = Set.map (\ (Rule _ r n) -> (n,r)) + subst (Rule c r n) = Rule (substCat c) (map (mapSymbol substCat id) r) n substCat c = Map.findWithDefault (error $ "mergeIdentical: " ++ c) c m -- | Keeps only the start category as an external category. -purgeExternalCats :: CFG -> CFG +purgeExternalCats :: Grammar c t -> Grammar c t purgeExternalCats cfg = cfg { cfgExternalCats = Set.singleton (cfgStartCat cfg) } -- @@ -113,7 +115,7 @@ removeLeftRecursion :: CFG -> CFG removeLeftRecursion gr = gr { cfgRules = groupProds $ concat [scheme1, scheme2, scheme3, scheme4] } where - scheme1 = [CFRule a [x,NonTerminal a_x] n' | + scheme1 = [Rule a [x,NonTerminal a_x] n' | a <- retainedLeftRecursive, x <- properLeftCornersOf a, not (isLeftRecursive x), @@ -123,27 +125,27 @@ removeLeftRecursion gr a_x `Set.member` newCats, let n' = symbol (\_ -> CFApp (CFRes 1) (CFRes 0)) (\_ -> CFRes 0) x] - scheme2 = [CFRule a_x (beta++[NonTerminal a_b]) n' | + scheme2 = [Rule a_x (beta++[NonTerminal a_b]) n' | a <- retainedLeftRecursive, b@(NonTerminal b') <- properLeftCornersOf a, isLeftRecursive b, - CFRule _ (x:beta) n <- catRules gr b', + Rule _ (x:beta) n <- catRules gr b', let a_x = mkCat (NonTerminal a) x, let a_b = mkCat (NonTerminal a) b, let i = length $ filterCats beta, let n' = symbol (\_ -> CFAbs 1 (CFApp (CFRes i) (shiftTerm n))) (\_ -> CFApp (CFRes i) n) x] - scheme3 = [CFRule a_x beta n' | + scheme3 = [Rule a_x beta n' | a <- retainedLeftRecursive, x <- properLeftCornersOf a, - CFRule _ (x':beta) n <- catRules gr a, + Rule _ (x':beta) n <- catRules gr a, x == x', let a_x = mkCat (NonTerminal a) x, let n' = symbol (\_ -> CFAbs 1 (shiftTerm n)) (\_ -> n) x] scheme4 = catSetRules gr $ Set.fromList $ filter (not . isLeftRecursive . NonTerminal) cats - newCats = Set.fromList (map lhsCat (scheme2 ++ scheme3)) + newCats = Set.fromList (map ruleLhs (scheme2 ++ scheme3)) shiftTerm :: CFTerm -> CFTerm shiftTerm (CFObj f ts) = CFObj f (map shiftTerm ts) @@ -155,7 +157,7 @@ removeLeftRecursion gr cats = allCats gr -- rules = allRules gr - directLeftCorner = mkRel [(NonTerminal c,t) | CFRule c (t:_) _ <- allRules gr] + directLeftCorner = mkRel [(NonTerminal c,t) | Rule c (t:_) _ <- allRules gr] -- leftCorner = reflexiveClosure_ (map NonTerminal cats) $ transitiveClosure directLeftCorner properLeftCorner = transitiveClosure directLeftCorner properLeftCornersOf = Set.toList . allRelated properLeftCorner . NonTerminal @@ -176,11 +178,12 @@ removeLeftRecursion gr where showSymbol = symbol id show -- | Get the sets of mutually recursive non-terminals for a grammar. -mutRecCats :: Bool -- ^ If true, all categories will be in some set. +mutRecCats :: Ord c + => Bool -- ^ If true, all categories will be in some set. -- If false, only recursive categories will be included. - -> CFG -> [Set Cat] + -> Grammar c t -> [Set c] mutRecCats incAll g = equivalenceClasses $ refl $ symmetricSubrelation $ transitiveClosure r - where r = mkRel [(c,c') | CFRule c ss _ <- allRules g, NonTerminal c' <- ss] + where r = mkRel [(c,c') | Rule c ss _ <- allRules g, NonTerminal c' <- ss] refl = if incAll then reflexiveClosure_ (allCats g) else reflexiveSubrelation -- @@ -199,107 +202,108 @@ makeRegular g = g { cfgRules = groupProds $ concatMap trSet (mutRecCats True g) where trSet cs | allXLinear cs rs = rs | otherwise = concatMap handleCat (Set.toList cs) where rs = catSetRules g cs - handleCat c = [CFRule c' [] (mkCFTerm (c++"-empty"))] -- introduce A' -> e + handleCat c = [Rule c' [] (mkCFTerm (c++"-empty"))] -- introduce A' -> e ++ concatMap (makeRightLinearRules c) (catRules g c) where c' = newCat c - makeRightLinearRules b' (CFRule c ss n) = + makeRightLinearRules b' (Rule c ss n) = case ys of [] -> newRule b' (xs ++ [NonTerminal (newCat c)]) n -- no non-terminals left (NonTerminal b:zs) -> newRule b' (xs ++ [NonTerminal b]) n - ++ makeRightLinearRules (newCat b) (CFRule c zs n) + ++ makeRightLinearRules (newCat b) (Rule c zs n) where (xs,ys) = break (`catElem` cs) ss -- don't add rules on the form A -> A newRule c rhs n | rhs == [NonTerminal c] = [] - | otherwise = [CFRule c rhs n] + | otherwise = [Rule c rhs n] newCat c = c ++ "$" -- -- * CFG Utilities -- -mkCFG :: Cat -> Set Cat -> [CFRule] -> CFG -mkCFG start ext rs = CFG { cfgStartCat = start, cfgExternalCats = ext, cfgRules = groupProds rs } +mkCFG :: (Ord c,Ord t) => c -> Set c -> [Rule c t] -> Grammar c t +mkCFG start ext rs = Grammar { cfgStartCat = start, cfgExternalCats = ext, cfgRules = groupProds rs } -groupProds :: [CFRule] -> Map Cat (Set CFRule) -groupProds = Map.fromListWith Set.union . map (\r -> (lhsCat r,Set.singleton r)) +groupProds :: (Ord c,Ord t) => [Rule c t] -> Map c (Set (Rule c t)) +groupProds = Map.fromListWith Set.union . map (\r -> (ruleLhs r,Set.singleton r)) -uniqueFuns :: CFG -> CFG -uniqueFuns cfg = CFG {cfgStartCat = cfgStartCat cfg - ,cfgExternalCats = cfgExternalCats cfg - ,cfgRules = Map.fromList (snd (mapAccumL uniqueFunSet Set.empty (Map.toList (cfgRules cfg)))) - } +uniqueFuns :: (Ord c,Ord t) => Grammar c t -> Grammar c t +uniqueFuns cfg = Grammar {cfgStartCat = cfgStartCat cfg + ,cfgExternalCats = cfgExternalCats cfg + ,cfgRules = Map.fromList (snd (mapAccumL uniqueFunSet Set.empty (Map.toList (cfgRules cfg)))) + } where uniqueFunSet funs (cat,rules) = let (funs',rules') = mapAccumL uniqueFun funs (Set.toList rules) in (funs',(cat,Set.fromList rules')) - uniqueFun funs (CFRule cat items (CFObj fun args)) = (Set.insert fun' funs,CFRule cat items (CFObj fun' args)) + uniqueFun funs (Rule cat items (CFObj fun args)) = (Set.insert fun' funs,Rule cat items (CFObj fun' args)) where fun' = head [fun'|suffix<-"":map show ([2..]::[Int]), let fun'=mkCId (showCId fun++suffix), not (fun' `Set.member` funs)] -- | Gets all rules in a CFG. -allRules :: CFG -> [CFRule] -allRules = concat . map Set.toList . Map.elems . cfgRules +allRules :: Grammar c t -> [Rule c t] +allRules = concatMap Set.toList . Map.elems . cfgRules -- | Gets all rules in a CFG, grouped by their LHS categories. -allRulesGrouped :: CFG -> [(Cat,[CFRule])] +allRulesGrouped :: Grammar c t -> [(c,[Rule c t])] allRulesGrouped = Map.toList . Map.map Set.toList . cfgRules -- | Gets all categories which have rules. -allCats :: CFG -> [Cat] +allCats :: Grammar c t -> [c] allCats = Map.keys . cfgRules -- | Gets all categories which have rules or occur in a RHS. -allCats' :: CFG -> [Cat] +allCats' :: (Ord c,Ord t) => Grammar c t -> [c] allCats' cfg = Set.toList (Map.keysSet (cfgRules cfg) `Set.union` Set.fromList [c | rs <- Map.elems (cfgRules cfg), r <- Set.toList rs, NonTerminal c <- ruleRhs r]) -- | Gets all rules for the given category. -catRules :: CFG -> Cat -> [CFRule] +catRules :: Ord c => Grammar c t -> c -> [Rule c t] catRules gr c = Set.toList $ Map.findWithDefault Set.empty c (cfgRules gr) -- | Gets all rules for categories in the given set. catSetRules :: CFG -> Set Cat -> [CFRule] catSetRules gr cs = allRules $ filterCFGCats (`Set.member` cs) gr -mapCFGCats :: (Cat -> Cat) -> CFG -> CFG -mapCFGCats f cfg = mkCFG (f (cfgStartCat cfg)) - (Set.map f (cfgExternalCats cfg)) - [CFRule (f lhs) (map (mapSymbol f id) rhs) t | CFRule lhs rhs t <- allRules cfg] +mapCFGCats :: (Ord c,Ord c',Ord t) => (c -> c') -> Grammar c t -> Grammar c' t +mapCFGCats f cfg = Grammar (f (cfgStartCat cfg)) + (Set.map f (cfgExternalCats cfg)) + (groupProds [Rule (f lhs) (map (mapSymbol f id) rhs) t | Rule lhs rhs t <- allRules cfg]) -onCFG :: (Map Cat (Set CFRule) -> Map Cat (Set CFRule)) -> CFG -> CFG -onCFG f cfg = cfg { cfgRules = f (cfgRules cfg) } - -onRules :: ([CFRule] -> [CFRule]) -> CFG -> CFG +onRules :: (Ord c,Ord t) => ([Rule c t] -> [Rule c t]) -> Grammar c t -> Grammar c t onRules f cfg = cfg { cfgRules = groupProds $ f $ allRules cfg } -- | Clean up CFG after rules have been removed. -cleanCFG :: CFG -> CFG -cleanCFG = onCFG (Map.filter (not . Set.null)) +cleanCFG :: Ord c => Grammar c t -> Grammar c t +cleanCFG cfg = cfg{ cfgRules = Map.filter (not . Set.null) (cfgRules cfg) } -- | Combine two CFGs. -unionCFG :: CFG -> CFG -> CFG -unionCFG x y = onCFG (\rs -> Map.unionWith Set.union rs (cfgRules y)) x +unionCFG :: (Ord c,Ord t) => Grammar c t -> Grammar c t -> Grammar c t +unionCFG x y = x { cfgRules = Map.unionWith Set.union (cfgRules x) (cfgRules y) } -filterCFG :: (CFRule -> Bool) -> CFG -> CFG -filterCFG p = cleanCFG . onCFG (Map.map (Set.filter p)) +filterCFG :: (Rule c t -> Bool) -> Grammar c t -> Grammar c t +filterCFG p cfg = cfg { cfgRules = Map.mapMaybe filterRules (cfgRules cfg) } + where + filterRules rules = + let rules' = Set.filter p rules + in if Set.null rules' then Nothing else Just rules' -filterCFGCats :: (Cat -> Bool) -> CFG -> CFG -filterCFGCats p = onCFG (Map.filterWithKey (\c _ -> p c)) +filterCFGCats :: (c -> Bool) -> Grammar c t -> Grammar c t +filterCFGCats p cfg = cfg { cfgRules = Map.filterWithKey (\c _ -> p c) (cfgRules cfg) } -countCats :: CFG -> Int +countCats :: Ord c => Grammar c t -> Int countCats = Map.size . cfgRules . cleanCFG -countRules :: CFG -> Int +countRules :: Grammar c t -> Int countRules = length . allRules prCFG :: CFG -> String prCFG = prProductions . map prRule . allRules where - prRule r = (lhsCat r, unwords (map prSym (ruleRhs r))) + prRule r = (ruleLhs r, unwords (map prSym (ruleRhs r))) prSym = symbol id (\t -> "\""++ t ++"\"") prProductions :: [(Cat,String)] -> String @@ -325,8 +329,8 @@ prCFTerm = pr 0 -- * CFRule Utilities -- -ruleFun :: CFRule -> CId -ruleFun (CFRule _ _ t) = f t +ruleFun :: Rule c t -> CId +ruleFun (Rule _ _ t) = f t where f (CFObj n _) = n f (CFApp _ x) = f x f (CFAbs _ x) = f x @@ -334,29 +338,31 @@ ruleFun (CFRule _ _ t) = f t -- | Check if any of the categories used on the right-hand side -- are in the given list of categories. -anyUsedBy :: [Cat] -> CFRule -> Bool -anyUsedBy cs (CFRule _ ss _) = any (`elem` cs) (filterCats ss) +anyUsedBy :: Eq c => [c] -> Rule c t -> Bool +anyUsedBy cs (Rule _ ss _) = any (`elem` cs) (filterCats ss) mkCFTerm :: String -> CFTerm mkCFTerm n = CFObj (mkCId n) [] -ruleIsNonRecursive :: Set Cat -> CFRule -> Bool +ruleIsNonRecursive :: Ord c => Set c -> Rule c t -> Bool ruleIsNonRecursive cs = noCatsInSet cs . ruleRhs -- | Check if all the rules are right-linear, or all the rules are -- left-linear, with respect to given categories. -allXLinear :: Set Cat -> [CFRule] -> Bool +allXLinear :: Ord c => Set c -> [Rule c t] -> Bool allXLinear cs rs = all (isRightLinear cs) rs || all (isLeftLinear cs) rs -- | Checks if a context-free rule is right-linear. -isRightLinear :: Set Cat -- ^ The categories to consider - -> CFRule -- ^ The rule to check for right-linearity +isRightLinear :: Ord c + => Set c -- ^ The categories to consider + -> Rule c t -- ^ The rule to check for right-linearity -> Bool isRightLinear cs = noCatsInSet cs . safeInit . ruleRhs -- | Checks if a context-free rule is left-linear. -isLeftLinear :: Set Cat -- ^ The categories to consider - -> CFRule -- ^ The rule to check for left-linearity +isLeftLinear :: Ord c + => Set c -- ^ The categories to consider + -> Rule c t -- ^ The rule to check for left-linearity -> Bool isLeftLinear cs = noCatsInSet cs . drop 1 . ruleRhs diff --git a/src/compiler/GF/Grammar/EBNF.hs b/src/compiler/GF/Grammar/EBNF.hs index 8d0addfd7..9d617c26a 100644 --- a/src/compiler/GF/Grammar/EBNF.hs +++ b/src/compiler/GF/Grammar/EBNF.hs @@ -18,8 +18,6 @@ import GF.Data.Operations import GF.Grammar.CFG import PGF (mkCId) -import Data.List - type EBNF = [ERule] type ERule = (ECat, ERHS) type ECat = (String,[Int]) @@ -35,14 +33,14 @@ data ERHS = | EOpt ERHS | EEmpty -type CFRHS = [CFSymbol] -type CFJustRule = (Cat, CFRHS) +type CFRHS = [ParamCFSymbol] +type CFJustRule = ((Cat,[Param]), CFRHS) -ebnf2cf :: EBNF -> [CFRule] +ebnf2cf :: EBNF -> [ParamCFRule] ebnf2cf ebnf = - [CFRule cat items (mkCFF i cat) | (i,(cat,items)) <- zip [0..] (normEBNF ebnf)] + [Rule cat items (mkCFF i cat) | (i,(cat,items)) <- zip [0..] (normEBNF ebnf)] where - mkCFF i c = CFObj (mkCId ("Mk" ++ c ++ "_" ++ show i)) [] + mkCFF i (c,_) = CFObj (mkCId ("Mk" ++ c ++ "_" ++ show i)) [] normEBNF :: EBNF -> [CFJustRule] normEBNF erules = let @@ -101,7 +99,7 @@ substERules g (cat,itss) = (cat, map sub itss) where sub (EIPlus r : ii) = EIPlus (substERules g r) : ii sub (EIOpt r : ii) = EIOpt (substERules g r) : ii -} -eitem2cfitem :: EItem -> CFSymbol +eitem2cfitem :: EItem -> ParamCFSymbol eitem2cfitem it = case it of EITerm a -> Terminal a EINonTerm cat -> NonTerminal (mkCFCatE cat) @@ -143,8 +141,8 @@ mkECat ints = ("C", ints) prECat (c,[]) = c prECat (c,ints) = c ++ "_" ++ prTList "_" (map show ints) -mkCFCatE :: ECat -> Cat -mkCFCatE = prECat +mkCFCatE :: ECat -> (Cat,[Param]) +mkCFCatE c = (prECat c,[0]) {- updECat _ (c,[]) = (c,[]) updECat ii (c,_) = (c,ii) diff --git a/src/compiler/GF/Speech/CFGToFA.hs b/src/compiler/GF/Speech/CFGToFA.hs index 5319c0bbe..0a530e594 100644 --- a/src/compiler/GF/Speech/CFGToFA.hs +++ b/src/compiler/GF/Speech/CFGToFA.hs @@ -75,14 +75,14 @@ make_fa c@(g,ns) q0 alpha q1 fa = case mrRec n of -- the set Ni is right-recursive or cyclic RightR -> - let new = [(getState c, xs, q1) | CFRule c xs _ <- nrs] - ++ [(getState c, xs, getState d) | CFRule c ss _ <- rs, + let new = [(getState c, xs, q1) | Rule c xs _ <- nrs] + ++ [(getState c, xs, getState d) | Rule c ss _ <- rs, let (xs,NonTerminal d) = (init ss,last ss)] in make_fas new $ newTransition q0 (getState a) Nothing fa' -- the set Ni is left-recursive LeftR -> - let new = [(q0, xs, getState c) | CFRule c xs _ <- nrs] - ++ [(getState d, xs, getState c) | CFRule c (NonTerminal d:xs) _ <- rs] + let new = [(q0, xs, getState c) | Rule c xs _ <- nrs] + ++ [(getState d, xs, getState c) | Rule c (NonTerminal d:xs) _ <- rs] in make_fas new $ newTransition (getState a) q1 Nothing fa' where (fa',stateMap) = addStatesForCats ni fa @@ -91,7 +91,7 @@ make_fa c@(g,ns) q0 alpha q1 fa = x stateMap -- a is not recursive Nothing -> let rs = catRules g a - in foldl' (\f (CFRule _ b _) -> make_fa_ q0 b q1 f) fa rs + in foldl' (\f (Rule _ b _) -> make_fa_ q0 b q1 f) fa rs (x:beta) -> let (fa',q) = newState () fa in make_fa_ q beta q1 $ make_fa_ q0 [x] q fa' where @@ -190,15 +190,15 @@ make_fa1 mr q0 alpha q1 fa = case mrRec mr of NotR -> -- the set is a non-recursive (always singleton) set of categories -- so the set of category rules is the set of rules for the whole set - make_fas [(q0, b, q1) | CFRule _ b _ <- mrNonRecRules mr] fa + make_fas [(q0, b, q1) | Rule _ b _ <- mrNonRecRules mr] fa RightR -> -- the set is right-recursive or cyclic - let new = [(getState c, xs, q1) | CFRule c xs _ <- mrNonRecRules mr] - ++ [(getState c, xs, getState d) | CFRule c ss _ <- mrRecRules mr, + let new = [(getState c, xs, q1) | Rule c xs _ <- mrNonRecRules mr] + ++ [(getState c, xs, getState d) | Rule c ss _ <- mrRecRules mr, let (xs,NonTerminal d) = (init ss,last ss)] in make_fas new $ newTransition q0 (getState a) Nothing fa' LeftR -> -- the set is left-recursive - let new = [(q0, xs, getState c) | CFRule c xs _ <- mrNonRecRules mr] - ++ [(getState d, xs, getState c) | CFRule c (NonTerminal d:xs) _ <- mrRecRules mr] + let new = [(q0, xs, getState c) | Rule c xs _ <- mrNonRecRules mr] + ++ [(getState d, xs, getState c) | Rule c (NonTerminal d:xs) _ <- mrRecRules mr] in make_fas new $ newTransition (getState a) q1 Nothing fa' where (fa',stateMap) = addStatesForCats (mrCats mr) fa diff --git a/src/compiler/GF/Speech/PGFToCFG.hs b/src/compiler/GF/Speech/PGFToCFG.hs index 49c679aea..8cb01f3a9 100644 --- a/src/compiler/GF/Speech/PGFToCFG.hs +++ b/src/compiler/GF/Speech/PGFToCFG.hs @@ -64,17 +64,17 @@ pgfToCFG pgf lang = mkCFG (showCId (lookStartCat pgf)) extCats (startRules ++ co extCats :: Set Cat - extCats = Set.fromList $ map lhsCat startRules + extCats = Set.fromList $ map ruleLhs startRules startRules :: [CFRule] - startRules = [CFRule (showCId c) [NonTerminal (fcatToCat fc r)] (CFRes 0) + startRules = [Rule (showCId c) [NonTerminal (fcatToCat fc r)] (CFRes 0) | (c,CncCat s e lbls) <- Map.toList (cnccats cnc), fc <- range (s,e), not (isPredefFId fc), r <- [0..catLinArity fc-1]] ruleToCFRule :: (FId,Production) -> [CFRule] ruleToCFRule (c,PApply funid args) = - [CFRule (fcatToCat c l) (mkRhs row) (profilesToTerm [fixProfile row n | n <- [0..length args-1]]) + [Rule (fcatToCat c l) (mkRhs row) (profilesToTerm [fixProfile row n | n <- [0..length args-1]]) | (l,seqid) <- Array.assocs rhs , let row = sequences cnc ! seqid , not (containsLiterals row)] @@ -119,5 +119,5 @@ pgfToCFG pgf lang = mkCFG (showCId (lookStartCat pgf)) extCats (startRules ++ co profileToTerm t [] = CFMeta t profileToTerm _ xs = CFRes (last xs) -- FIXME: unify ruleToCFRule (c,PCoerce c') = - [CFRule (fcatToCat c l) [NonTerminal (fcatToCat c' l)] (CFRes 0) + [Rule (fcatToCat c l) [NonTerminal (fcatToCat c' l)] (CFRes 0) | l <- [0..catLinArity c-1]] diff --git a/src/compiler/GF/Speech/SRG.hs b/src/compiler/GF/Speech/SRG.hs index a0a616561..9d51e52e9 100644 --- a/src/compiler/GF/Speech/SRG.hs +++ b/src/compiler/GF/Speech/SRG.hs @@ -129,9 +129,9 @@ renameCats prefix cfg = mapCFGCats renameCat cfg badCat c = error ("GF.Speech.SRG.renameCats: " ++ c ++ "\n" ++ prCFG cfg) cfRulesToSRGRule :: [CFRule] -> SRGRule -cfRulesToSRGRule rs@(r:_) = SRGRule (lhsCat r) rhs +cfRulesToSRGRule rs@(r:_) = SRGRule (ruleLhs r) rhs where - alts = [((n,Nothing),mkSRGSymbols 0 ss) | CFRule c ss n <- rs] + alts = [((n,Nothing),mkSRGSymbols 0 ss) | Rule c ss n <- rs] rhs = [SRGAlt p n (srgItem sss) | ((n,p),sss) <- buildMultiMap alts ] mkSRGSymbols _ [] = []