1
0
forked from GitHub/gf-core

CFGtoPGF is now extended to support context-free grammars with primitive parameters

This commit is contained in:
krasimir
2016-03-22 10:28:15 +00:00
parent fbdf21d862
commit ce70720859
9 changed files with 192 additions and 166 deletions

View File

@@ -54,8 +54,8 @@ importCF opts files get convert = impCF
impCF = do
rules <- fmap (convert . concat) $ mapM (get opts) files
startCat <- case rules of
(CFRule cat _ _ : _) -> return cat
_ -> fail "empty CFG"
(Rule cat _ _ : _) -> return cat
_ -> fail "empty CFG"
let pgf = cf2pgf (last files) (uniqueFuns (mkCFG startCat Set.empty rules))
probs <- maybe (return . defaultProbabilities) readProbabilitiesFromFile (flag optProbsFile opts) pgf
return $ setProbabilities probs

View File

@@ -17,7 +17,7 @@ import Data.List
-- the compiler ----------
--------------------------
cf2pgf :: FilePath -> CFG -> PGF
cf2pgf :: FilePath -> ParamCFG -> PGF
cf2pgf fpath cf =
let pgf = PGF Map.empty aname (cf2abstr cf) (Map.singleton cname (cf2concr cf))
in updateProductionIndices pgf
@@ -26,18 +26,21 @@ cf2pgf fpath cf =
aname = mkCId (name ++ "Abs")
cname = mkCId name
cf2abstr :: CFG -> Abstr
cf2abstr :: ParamCFG -> Abstr
cf2abstr cfg = Abstr aflags afuns acats
where
aflags = Map.singleton (mkCId "startcat") (LStr (cfgStartCat cfg))
acats = Map.fromList [(mkCId cat, ([], [(0,mkRuleName rule)
| rule <- Set.toList rules], 0))
| (cat,rules) <- Map.toList (cfgRules cfg)]
afuns = Map.fromList [(mkRuleName rule, (cftype [mkCId c | NonTerminal c <- ruleRhs rule] (mkCId cat), 0, Nothing, 0))
| (cat,rules) <- Map.toList (cfgRules cfg)
, rule <- Set.toList rules]
aflags = Map.singleton (mkCId "startcat") (LStr (fst (cfgStartCat cfg)))
cf2concr :: CFG -> Concr
acats = Map.fromList [(cat, ([], [(0,mkRuleName rule) | rule <- rules], 0))
| (cat,rules) <- (Map.toList . Map.fromListWith (++))
[(cat2id cat, catRules cfg cat) |
cat <- allCats' cfg]]
afuns = Map.fromList [(mkRuleName rule, (cftype [cat2id c | NonTerminal c <- ruleRhs rule] (cat2id (ruleLhs rule)), 0, Nothing, 0))
| rule <- allRules cfg]
cat2id = mkCId . fst
cf2concr :: ParamCFG -> Concr
cf2concr cfg = Concr Map.empty Map.empty
cncfuns lindefsrefs lindefsrefs
sequences productions
@@ -46,51 +49,64 @@ cf2concr cfg = Concr Map.empty Map.empty
IntMap.empty
totalCats
where
cats = allCats' cfg
rules = allRules cfg
sequences0 = Set.fromList (listArray (0,0) [SymCat 0 0] :
[mkSequence rule | rules <- Map.elems (cfgRules cfg), rule <- Set.toList rules])
map mkSequence rules)
sequences = listArray (0,Set.size sequences0-1) (Set.toList sequences0)
idFun = CncFun wildCId (listArray (0,0) [seqid])
where
seq = listArray (0,0) [SymCat 0 0]
seqid = binSearch seq sequences (bounds sequences)
((fun_cnt,cncfuns0),productions0) = mapAccumL convertRules (1,[idFun]) (Map.toList (cfgRules cfg))
productions = IntMap.fromList productions0
((fun_cnt,cncfuns0),productions0) = mapAccumL (convertRule cs) (1,[idFun]) rules
productions = foldl addProd IntMap.empty (concat (productions0++coercions))
cncfuns = listArray (0,fun_cnt-1) (reverse cncfuns0)
lbls = listArray (0,0) ["s"]
(totalCats,cnccats0) = mapAccumL mkCncCat 0 (Map.toList (cfgRules cfg))
cnccats = Map.fromList ((mkCId "Int", CncCat fidInt fidInt lbls) :
(mkCId "Float", CncCat fidFloat fidFloat lbls) :
(mkCId "String", CncCat fidString fidString lbls) :
cnccats0)
(fid,cnccats0) = (mapAccumL mkCncCat 0 . Map.toList . Map.fromListWith max)
[(c,p) | (c,ps) <- cats, p <- ps]
((totalCats,cs), coercions) = mapAccumL mkCoercions (fid,Map.empty) cats
cnccats = Map.fromList cnccats0
lindefsrefs =
IntMap.fromList (map mkLinDefRef (Map.keys (cfgRules cfg)))
lindefsrefs =
IntMap.fromList (map mkLinDefRef cats)
convertRules st (cat,rules) =
let (st',prods) = mapAccumL convertRule st (Set.toList rules)
in (st',(cat2fid cat,Set.fromList prods))
convertRule (funid,funs) rule =
let args = [PArg [] (cat2fid c) | NonTerminal c <- ruleRhs rule]
convertRule cs (funid,funs) rule =
let args = [PArg [] (cat2arg c) | NonTerminal c <- ruleRhs rule]
prod = PApply funid args
seqid = binSearch (mkSequence rule) sequences (bounds sequences)
fun = CncFun (mkRuleName rule) (listArray (0,0) [seqid])
funid' = funid+1
in funid' `seq` ((funid',fun:funs),prod)
in funid' `seq` ((funid',fun:funs),let (c,ps) = ruleLhs rule in [(cat2fid c p, prod) | p <- ps])
mkSequence rule = listArray (0,length syms-1) syms
where
syms = snd $ mapAccumL convertSymbol 0 (ruleRhs rule)
convertSymbol d (NonTerminal c) = (d+1,if c `elem` ["Int","Float","String"] then SymLit d 0 else SymCat d 0)
convertSymbol d (Terminal t) = (d, SymKS t)
convertSymbol d (NonTerminal (c,_)) = (d+1,if c `elem` ["Int","Float","String"] then SymLit d 0 else SymCat d 0)
convertSymbol d (Terminal t) = (d, SymKS t)
mkCncCat fid (cat,_) = (fid+1, (mkCId cat,CncCat fid fid lbls))
mkCncCat fid (cat,n)
| cat == "Int" = (fid, (mkCId cat, CncCat fidInt fidInt lbls))
| cat == "Float" = (fid, (mkCId cat, CncCat fidFloat fidFloat lbls))
| cat == "String" = (fid, (mkCId cat, CncCat fidString fidString lbls))
| otherwise = let fid' = fid+n+1
in fid' `seq` (fid', (mkCId cat,CncCat fid (fid+n) lbls))
mkLinDefRef cat =
(cat2fid cat,[0])
mkCoercions (fid,cs) c@(cat,[p]) = ((fid,cs),[])
mkCoercions (fid,cs) c@(cat,ps ) =
let fid' = fid+1
in fid' `seq` ((fid', Map.insert c fid cs), [(fid,PCoerce (cat2fid cat p)) | p <- ps])
mkLinDefRef (cat,_) =
(cat2fid cat 0,[0])
addProd prods (fid,prod) =
case IntMap.lookup fid prods of
Just set -> IntMap.insert fid (Set.insert prod set) prods
Nothing -> IntMap.insert fid (Set.singleton prod) prods
binSearch v arr (i,j)
| i <= j = case compare v (arr ! k) of
@@ -101,11 +117,17 @@ cf2concr cfg = Concr Map.empty Map.empty
where
k = (i+j) `div` 2
cat2fid cat =
cat2fid cat p =
case Map.lookup (mkCId cat) cnccats of
Just (CncCat fid _ _) -> fid
Just (CncCat fid _ _) -> fid+p
_ -> error "cat2fid"
cat2arg c@(cat,[p]) = cat2fid cat p
cat2arg c@(cat,ps ) =
case Map.lookup c cs of
Just fid -> fid
Nothing -> error "cat2arg"
mkRuleName rule =
case ruleName rule of
CFObj n _ -> n

View File

@@ -89,8 +89,8 @@ compileCFFiles opts fs = do
bnfc_rules <- fmap concat $ mapM (getBNFCRules opts) fs
let rules = bnfc2cf bnfc_rules
startCat <- case rules of
(CFRule cat _ _ : _) -> return cat
_ -> fail "empty CFG"
(Rule cat _ _ : _) -> return cat
_ -> fail "empty CFG"
let pgf = cf2pgf (last fs) (uniqueFuns (mkCFG startCat Set.empty rules))
unless (flag optStopAfterPhase opts == Compile) $
do probs <- liftIO (maybe (return . defaultProbabilities) readProbabilitiesFromFile (flag optProbsFile opts) pgf)

View File

@@ -16,7 +16,7 @@ module GF.Grammar.BNFC(BNFCRule(..), BNFCSymbol, Symbol(..), CFTerm(..), bnfc2cf
import GF.Grammar.CFG
import PGF (Token, mkCId)
import Data.List (lookup, partition)
import Data.List (partition)
type IsList = Bool
type BNFCSymbol = Symbol (Cat, IsList) Token
@@ -42,7 +42,7 @@ type IsSeparator = Bool
type SepTermSymb = String
type SepMap = [(Cat, (IsNonempty, IsSeparator, SepTermSymb))]
bnfc2cf :: [BNFCRule] -> [CFRule]
bnfc2cf :: [BNFCRule] -> [ParamCFRule]
bnfc2cf rules = concatMap (transformRules (map makeSepTerm rules1)) rules2
where (rules1,rules2) = partition isSepTerm rules
makeSepTerm (BNFCTerminator ne c s) = (c, (ne, False, s))
@@ -53,46 +53,46 @@ isSepTerm (BNFCTerminator {}) = True
isSepTerm (BNFCSeparator {}) = True
isSepTerm _ = False
transformRules :: SepMap -> BNFCRule -> [CFRule]
transformRules sepMap (BNFCRule c smbs@(s:ss) r) = CFRule c cfSmbs r : rls
transformRules :: SepMap -> BNFCRule -> [ParamCFRule]
transformRules sepMap (BNFCRule c smbs@(s:ss) r) = Rule (c,[0]) cfSmbs r : rls
where smbs' = map transformSymb smbs
cfSmbs = [snd s | s <- smbs']
ids = filter (/= "") [fst s | s <- smbs']
rls = concatMap (createListRules sepMap) ids
transformRules sepMap (BNFCCoercions c num) = rules ++ [lastRule]
where rules = map (fRules c) [0..num-1]
lastRule = CFRule c' ss rn
lastRule = Rule (c',[0]) ss rn
where c' = c ++ show num
ss = [Terminal "(", NonTerminal c, Terminal ")"]
ss = [Terminal "(", NonTerminal (c,[0]), Terminal ")"]
rn = CFObj (mkCId $ "coercion_" ++ c) []
fRules c n = CFRule c' ss rn
fRules c n = Rule (c',[0]) ss rn
where c' = if n == 0 then c else c ++ show n
ss = [NonTerminal (c ++ show (n+1))]
rn = CFObj (mkCId $ "coercion_" ++ c')[]
ss = [NonTerminal (c ++ show (n+1),[0])]
rn = CFObj (mkCId $ "coercion_" ++ c') []
transformSymb :: BNFCSymbol -> (String, CFSymbol)
transformSymb :: BNFCSymbol -> (String, ParamCFSymbol)
transformSymb s = case s of
NonTerminal (c,False) -> ("", NonTerminal c)
NonTerminal (c,True ) -> (c , NonTerminal $ "List" ++ c)
NonTerminal (c,False) -> ("", NonTerminal (c,[0]))
NonTerminal (c,True ) -> (c , NonTerminal $ ("List" ++ c,[0]))
Terminal t -> ("", Terminal t)
createListRules :: SepMap -> String -> [CFRule]
createListRules :: SepMap -> String -> [ParamCFRule]
createListRules sepMap c =
case lookup c sepMap of
Just (ne, isSep, symb) -> createListRules' ne isSep symb c
Nothing -> createListRules' False True "" c
createListRules':: IsNonempty -> IsSeparator -> SepTermSymb -> String -> [CFRule]
createListRules':: IsNonempty -> IsSeparator -> SepTermSymb -> String -> [ParamCFRule]
createListRules' ne isSep symb c = ruleCons : [ruleBase]
where ruleBase = CFRule ("List" ++ c) smbs rn
where ruleBase = Rule ("List" ++ c,[0]) smbs rn
where smbs = if isSep
then [NonTerminal c | ne]
else [NonTerminal c | ne] ++
then [NonTerminal (c,[0]) | ne]
else [NonTerminal (c,[0]) | ne] ++
[Terminal symb | symb /= "" && ne]
rn = CFObj (mkCId $ "Base" ++ c) []
ruleCons = CFRule ("List" ++ c) smbs rn
where smbs = [NonTerminal c] ++
ruleCons = Rule ("List" ++ c,[0]) smbs rn
where smbs = [NonTerminal (c,[0])] ++
[Terminal symb | symb /= ""] ++
[NonTerminal ("List" ++ c)]
[NonTerminal ("List" ++ c,[0])]
rn = CFObj (mkCId $ "Cons" ++ c) []

View File

@@ -8,16 +8,11 @@ module GF.Grammar.CFG where
import GF.Data.Utilities
import PGF
--import GF.Infra.Option
import GF.Data.Relation
--import Control.Monad
--import Control.Monad.State (State, get, put, evalState)
import Data.Map (Map)
import qualified Data.Map as Map
import Data.List
--import Data.Maybe (fromMaybe)
--import Data.Monoid (mconcat)
import Data.Set (Set)
import qualified Data.Set as Set
@@ -30,15 +25,19 @@ type Cat = String
data Symbol c t = NonTerminal c | Terminal t
deriving (Eq, Ord, Show)
type CFSymbol = Symbol Cat Token
data CFRule = CFRule {
lhsCat :: Cat,
ruleRhs :: [CFSymbol],
data Rule c t = Rule {
ruleLhs :: c,
ruleRhs :: [Symbol c t],
ruleName :: CFTerm
}
deriving (Eq, Ord, Show)
data Grammar c t = Grammar {
cfgStartCat :: c,
cfgExternalCats :: Set c,
cfgRules :: Map c (Set (Rule c t)) }
deriving (Eq, Ord, Show)
data CFTerm
= CFObj CId [CFTerm] -- ^ an abstract syntax function with arguments
| CFAbs Int CFTerm -- ^ A lambda abstraction. The Int is the variable id.
@@ -48,11 +47,14 @@ data CFTerm
| CFMeta CId -- ^ A metavariable
deriving (Eq, Ord, Show)
data CFG = CFG { cfgStartCat :: Cat,
cfgExternalCats :: Set Cat,
cfgRules :: Map Cat (Set CFRule) }
deriving (Eq, Ord, Show)
type CFSymbol = Symbol Cat Token
type CFRule = Rule Cat Token
type CFG = Grammar Cat Token
type Param = Int
type ParamCFSymbol = Symbol (Cat,[Param]) Token
type ParamCFRule = Rule (Cat,[Param]) Token
type ParamCFG = Grammar (Cat,[Param]) Token
--
-- * Grammar filtering
@@ -64,25 +66,25 @@ data CFG = CFG { cfgStartCat :: Cat,
-- one should we pick?
-- FIXME: Does not (yet) remove productions which are cyclic
-- because of empty productions.
removeCycles :: CFG -> CFG
removeCycles :: (Ord c,Ord t) => Grammar c t -> Grammar c t
removeCycles = onRules f
where f rs = filter (not . isCycle) rs
where alias = transitiveClosure $ mkRel [(c,c') | CFRule c [NonTerminal c'] _ <- rs]
isCycle (CFRule c [NonTerminal c'] _) = isRelatedTo alias c' c
where alias = transitiveClosure $ mkRel [(c,c') | Rule c [NonTerminal c'] _ <- rs]
isCycle (Rule c [NonTerminal c'] _) = isRelatedTo alias c' c
isCycle _ = False
-- | Better bottom-up filter that also removes categories which contain no finite
-- strings.
bottomUpFilter :: CFG -> CFG
bottomUpFilter :: (Ord c,Ord t) => Grammar c t -> Grammar c t
bottomUpFilter gr = fix grow (gr { cfgRules = Map.empty })
where grow g = g `unionCFG` filterCFG (all (okSym g) . ruleRhs) gr
okSym g = symbol (`elem` allCats g) (const True)
-- | Removes categories which are not reachable from any external category.
topDownFilter :: CFG -> CFG
topDownFilter :: (Ord c,Ord t) => Grammar c t -> Grammar c t
topDownFilter cfg = filterCFGCats (`Set.member` keep) cfg
where
rhsCats = [ (lhsCat r, c') | r <- allRules cfg, c' <- filterCats (ruleRhs r) ]
rhsCats = [ (ruleLhs r, c') | r <- allRules cfg, c' <- filterCats (ruleRhs r) ]
uses = reflexiveClosure_ (allCats cfg) $ transitiveClosure $ mkRel rhsCats
keep = Set.unions $ map (allRelated uses) $ Set.toList $ cfgExternalCats cfg
@@ -95,12 +97,12 @@ mergeIdentical g = onRules (map subst) g
m = Map.fromList [(y,concat (intersperse "+" xs))
| (_,xs) <- buildMultiMap [(rulesKey rs,c) | (c,rs) <- Map.toList (cfgRules g)], y <- xs]
-- build data to compare for each category: a set of name,rhs pairs
rulesKey = Set.map (\ (CFRule _ r n) -> (n,r))
subst (CFRule c r n) = CFRule (substCat c) (map (mapSymbol substCat id) r) n
rulesKey = Set.map (\ (Rule _ r n) -> (n,r))
subst (Rule c r n) = Rule (substCat c) (map (mapSymbol substCat id) r) n
substCat c = Map.findWithDefault (error $ "mergeIdentical: " ++ c) c m
-- | Keeps only the start category as an external category.
purgeExternalCats :: CFG -> CFG
purgeExternalCats :: Grammar c t -> Grammar c t
purgeExternalCats cfg = cfg { cfgExternalCats = Set.singleton (cfgStartCat cfg) }
--
@@ -113,7 +115,7 @@ removeLeftRecursion :: CFG -> CFG
removeLeftRecursion gr
= gr { cfgRules = groupProds $ concat [scheme1, scheme2, scheme3, scheme4] }
where
scheme1 = [CFRule a [x,NonTerminal a_x] n' |
scheme1 = [Rule a [x,NonTerminal a_x] n' |
a <- retainedLeftRecursive,
x <- properLeftCornersOf a,
not (isLeftRecursive x),
@@ -123,27 +125,27 @@ removeLeftRecursion gr
a_x `Set.member` newCats,
let n' = symbol (\_ -> CFApp (CFRes 1) (CFRes 0))
(\_ -> CFRes 0) x]
scheme2 = [CFRule a_x (beta++[NonTerminal a_b]) n' |
scheme2 = [Rule a_x (beta++[NonTerminal a_b]) n' |
a <- retainedLeftRecursive,
b@(NonTerminal b') <- properLeftCornersOf a,
isLeftRecursive b,
CFRule _ (x:beta) n <- catRules gr b',
Rule _ (x:beta) n <- catRules gr b',
let a_x = mkCat (NonTerminal a) x,
let a_b = mkCat (NonTerminal a) b,
let i = length $ filterCats beta,
let n' = symbol (\_ -> CFAbs 1 (CFApp (CFRes i) (shiftTerm n)))
(\_ -> CFApp (CFRes i) n) x]
scheme3 = [CFRule a_x beta n' |
scheme3 = [Rule a_x beta n' |
a <- retainedLeftRecursive,
x <- properLeftCornersOf a,
CFRule _ (x':beta) n <- catRules gr a,
Rule _ (x':beta) n <- catRules gr a,
x == x',
let a_x = mkCat (NonTerminal a) x,
let n' = symbol (\_ -> CFAbs 1 (shiftTerm n))
(\_ -> n) x]
scheme4 = catSetRules gr $ Set.fromList $ filter (not . isLeftRecursive . NonTerminal) cats
newCats = Set.fromList (map lhsCat (scheme2 ++ scheme3))
newCats = Set.fromList (map ruleLhs (scheme2 ++ scheme3))
shiftTerm :: CFTerm -> CFTerm
shiftTerm (CFObj f ts) = CFObj f (map shiftTerm ts)
@@ -155,7 +157,7 @@ removeLeftRecursion gr
cats = allCats gr
-- rules = allRules gr
directLeftCorner = mkRel [(NonTerminal c,t) | CFRule c (t:_) _ <- allRules gr]
directLeftCorner = mkRel [(NonTerminal c,t) | Rule c (t:_) _ <- allRules gr]
-- leftCorner = reflexiveClosure_ (map NonTerminal cats) $ transitiveClosure directLeftCorner
properLeftCorner = transitiveClosure directLeftCorner
properLeftCornersOf = Set.toList . allRelated properLeftCorner . NonTerminal
@@ -176,11 +178,12 @@ removeLeftRecursion gr
where showSymbol = symbol id show
-- | Get the sets of mutually recursive non-terminals for a grammar.
mutRecCats :: Bool -- ^ If true, all categories will be in some set.
mutRecCats :: Ord c
=> Bool -- ^ If true, all categories will be in some set.
-- If false, only recursive categories will be included.
-> CFG -> [Set Cat]
-> Grammar c t -> [Set c]
mutRecCats incAll g = equivalenceClasses $ refl $ symmetricSubrelation $ transitiveClosure r
where r = mkRel [(c,c') | CFRule c ss _ <- allRules g, NonTerminal c' <- ss]
where r = mkRel [(c,c') | Rule c ss _ <- allRules g, NonTerminal c' <- ss]
refl = if incAll then reflexiveClosure_ (allCats g) else reflexiveSubrelation
--
@@ -199,107 +202,108 @@ makeRegular g = g { cfgRules = groupProds $ concatMap trSet (mutRecCats True g)
where trSet cs | allXLinear cs rs = rs
| otherwise = concatMap handleCat (Set.toList cs)
where rs = catSetRules g cs
handleCat c = [CFRule c' [] (mkCFTerm (c++"-empty"))] -- introduce A' -> e
handleCat c = [Rule c' [] (mkCFTerm (c++"-empty"))] -- introduce A' -> e
++ concatMap (makeRightLinearRules c) (catRules g c)
where c' = newCat c
makeRightLinearRules b' (CFRule c ss n) =
makeRightLinearRules b' (Rule c ss n) =
case ys of
[] -> newRule b' (xs ++ [NonTerminal (newCat c)]) n -- no non-terminals left
(NonTerminal b:zs) -> newRule b' (xs ++ [NonTerminal b]) n
++ makeRightLinearRules (newCat b) (CFRule c zs n)
++ makeRightLinearRules (newCat b) (Rule c zs n)
where (xs,ys) = break (`catElem` cs) ss
-- don't add rules on the form A -> A
newRule c rhs n | rhs == [NonTerminal c] = []
| otherwise = [CFRule c rhs n]
| otherwise = [Rule c rhs n]
newCat c = c ++ "$"
--
-- * CFG Utilities
--
mkCFG :: Cat -> Set Cat -> [CFRule] -> CFG
mkCFG start ext rs = CFG { cfgStartCat = start, cfgExternalCats = ext, cfgRules = groupProds rs }
mkCFG :: (Ord c,Ord t) => c -> Set c -> [Rule c t] -> Grammar c t
mkCFG start ext rs = Grammar { cfgStartCat = start, cfgExternalCats = ext, cfgRules = groupProds rs }
groupProds :: [CFRule] -> Map Cat (Set CFRule)
groupProds = Map.fromListWith Set.union . map (\r -> (lhsCat r,Set.singleton r))
groupProds :: (Ord c,Ord t) => [Rule c t] -> Map c (Set (Rule c t))
groupProds = Map.fromListWith Set.union . map (\r -> (ruleLhs r,Set.singleton r))
uniqueFuns :: CFG -> CFG
uniqueFuns cfg = CFG {cfgStartCat = cfgStartCat cfg
,cfgExternalCats = cfgExternalCats cfg
,cfgRules = Map.fromList (snd (mapAccumL uniqueFunSet Set.empty (Map.toList (cfgRules cfg))))
}
uniqueFuns :: (Ord c,Ord t) => Grammar c t -> Grammar c t
uniqueFuns cfg = Grammar {cfgStartCat = cfgStartCat cfg
,cfgExternalCats = cfgExternalCats cfg
,cfgRules = Map.fromList (snd (mapAccumL uniqueFunSet Set.empty (Map.toList (cfgRules cfg))))
}
where
uniqueFunSet funs (cat,rules) =
let (funs',rules') = mapAccumL uniqueFun funs (Set.toList rules)
in (funs',(cat,Set.fromList rules'))
uniqueFun funs (CFRule cat items (CFObj fun args)) = (Set.insert fun' funs,CFRule cat items (CFObj fun' args))
uniqueFun funs (Rule cat items (CFObj fun args)) = (Set.insert fun' funs,Rule cat items (CFObj fun' args))
where
fun' = head [fun'|suffix<-"":map show ([2..]::[Int]),
let fun'=mkCId (showCId fun++suffix),
not (fun' `Set.member` funs)]
-- | Gets all rules in a CFG.
allRules :: CFG -> [CFRule]
allRules = concat . map Set.toList . Map.elems . cfgRules
allRules :: Grammar c t -> [Rule c t]
allRules = concatMap Set.toList . Map.elems . cfgRules
-- | Gets all rules in a CFG, grouped by their LHS categories.
allRulesGrouped :: CFG -> [(Cat,[CFRule])]
allRulesGrouped :: Grammar c t -> [(c,[Rule c t])]
allRulesGrouped = Map.toList . Map.map Set.toList . cfgRules
-- | Gets all categories which have rules.
allCats :: CFG -> [Cat]
allCats :: Grammar c t -> [c]
allCats = Map.keys . cfgRules
-- | Gets all categories which have rules or occur in a RHS.
allCats' :: CFG -> [Cat]
allCats' :: (Ord c,Ord t) => Grammar c t -> [c]
allCats' cfg = Set.toList (Map.keysSet (cfgRules cfg) `Set.union`
Set.fromList [c | rs <- Map.elems (cfgRules cfg),
r <- Set.toList rs,
NonTerminal c <- ruleRhs r])
-- | Gets all rules for the given category.
catRules :: CFG -> Cat -> [CFRule]
catRules :: Ord c => Grammar c t -> c -> [Rule c t]
catRules gr c = Set.toList $ Map.findWithDefault Set.empty c (cfgRules gr)
-- | Gets all rules for categories in the given set.
catSetRules :: CFG -> Set Cat -> [CFRule]
catSetRules gr cs = allRules $ filterCFGCats (`Set.member` cs) gr
mapCFGCats :: (Cat -> Cat) -> CFG -> CFG
mapCFGCats f cfg = mkCFG (f (cfgStartCat cfg))
(Set.map f (cfgExternalCats cfg))
[CFRule (f lhs) (map (mapSymbol f id) rhs) t | CFRule lhs rhs t <- allRules cfg]
mapCFGCats :: (Ord c,Ord c',Ord t) => (c -> c') -> Grammar c t -> Grammar c' t
mapCFGCats f cfg = Grammar (f (cfgStartCat cfg))
(Set.map f (cfgExternalCats cfg))
(groupProds [Rule (f lhs) (map (mapSymbol f id) rhs) t | Rule lhs rhs t <- allRules cfg])
onCFG :: (Map Cat (Set CFRule) -> Map Cat (Set CFRule)) -> CFG -> CFG
onCFG f cfg = cfg { cfgRules = f (cfgRules cfg) }
onRules :: ([CFRule] -> [CFRule]) -> CFG -> CFG
onRules :: (Ord c,Ord t) => ([Rule c t] -> [Rule c t]) -> Grammar c t -> Grammar c t
onRules f cfg = cfg { cfgRules = groupProds $ f $ allRules cfg }
-- | Clean up CFG after rules have been removed.
cleanCFG :: CFG -> CFG
cleanCFG = onCFG (Map.filter (not . Set.null))
cleanCFG :: Ord c => Grammar c t -> Grammar c t
cleanCFG cfg = cfg{ cfgRules = Map.filter (not . Set.null) (cfgRules cfg) }
-- | Combine two CFGs.
unionCFG :: CFG -> CFG -> CFG
unionCFG x y = onCFG (\rs -> Map.unionWith Set.union rs (cfgRules y)) x
unionCFG :: (Ord c,Ord t) => Grammar c t -> Grammar c t -> Grammar c t
unionCFG x y = x { cfgRules = Map.unionWith Set.union (cfgRules x) (cfgRules y) }
filterCFG :: (CFRule -> Bool) -> CFG -> CFG
filterCFG p = cleanCFG . onCFG (Map.map (Set.filter p))
filterCFG :: (Rule c t -> Bool) -> Grammar c t -> Grammar c t
filterCFG p cfg = cfg { cfgRules = Map.mapMaybe filterRules (cfgRules cfg) }
where
filterRules rules =
let rules' = Set.filter p rules
in if Set.null rules' then Nothing else Just rules'
filterCFGCats :: (Cat -> Bool) -> CFG -> CFG
filterCFGCats p = onCFG (Map.filterWithKey (\c _ -> p c))
filterCFGCats :: (c -> Bool) -> Grammar c t -> Grammar c t
filterCFGCats p cfg = cfg { cfgRules = Map.filterWithKey (\c _ -> p c) (cfgRules cfg) }
countCats :: CFG -> Int
countCats :: Ord c => Grammar c t -> Int
countCats = Map.size . cfgRules . cleanCFG
countRules :: CFG -> Int
countRules :: Grammar c t -> Int
countRules = length . allRules
prCFG :: CFG -> String
prCFG = prProductions . map prRule . allRules
where
prRule r = (lhsCat r, unwords (map prSym (ruleRhs r)))
prRule r = (ruleLhs r, unwords (map prSym (ruleRhs r)))
prSym = symbol id (\t -> "\""++ t ++"\"")
prProductions :: [(Cat,String)] -> String
@@ -325,8 +329,8 @@ prCFTerm = pr 0
-- * CFRule Utilities
--
ruleFun :: CFRule -> CId
ruleFun (CFRule _ _ t) = f t
ruleFun :: Rule c t -> CId
ruleFun (Rule _ _ t) = f t
where f (CFObj n _) = n
f (CFApp _ x) = f x
f (CFAbs _ x) = f x
@@ -334,29 +338,31 @@ ruleFun (CFRule _ _ t) = f t
-- | Check if any of the categories used on the right-hand side
-- are in the given list of categories.
anyUsedBy :: [Cat] -> CFRule -> Bool
anyUsedBy cs (CFRule _ ss _) = any (`elem` cs) (filterCats ss)
anyUsedBy :: Eq c => [c] -> Rule c t -> Bool
anyUsedBy cs (Rule _ ss _) = any (`elem` cs) (filterCats ss)
mkCFTerm :: String -> CFTerm
mkCFTerm n = CFObj (mkCId n) []
ruleIsNonRecursive :: Set Cat -> CFRule -> Bool
ruleIsNonRecursive :: Ord c => Set c -> Rule c t -> Bool
ruleIsNonRecursive cs = noCatsInSet cs . ruleRhs
-- | Check if all the rules are right-linear, or all the rules are
-- left-linear, with respect to given categories.
allXLinear :: Set Cat -> [CFRule] -> Bool
allXLinear :: Ord c => Set c -> [Rule c t] -> Bool
allXLinear cs rs = all (isRightLinear cs) rs || all (isLeftLinear cs) rs
-- | Checks if a context-free rule is right-linear.
isRightLinear :: Set Cat -- ^ The categories to consider
-> CFRule -- ^ The rule to check for right-linearity
isRightLinear :: Ord c
=> Set c -- ^ The categories to consider
-> Rule c t -- ^ The rule to check for right-linearity
-> Bool
isRightLinear cs = noCatsInSet cs . safeInit . ruleRhs
-- | Checks if a context-free rule is left-linear.
isLeftLinear :: Set Cat -- ^ The categories to consider
-> CFRule -- ^ The rule to check for left-linearity
isLeftLinear :: Ord c
=> Set c -- ^ The categories to consider
-> Rule c t -- ^ The rule to check for left-linearity
-> Bool
isLeftLinear cs = noCatsInSet cs . drop 1 . ruleRhs

View File

@@ -18,8 +18,6 @@ import GF.Data.Operations
import GF.Grammar.CFG
import PGF (mkCId)
import Data.List
type EBNF = [ERule]
type ERule = (ECat, ERHS)
type ECat = (String,[Int])
@@ -35,14 +33,14 @@ data ERHS =
| EOpt ERHS
| EEmpty
type CFRHS = [CFSymbol]
type CFJustRule = (Cat, CFRHS)
type CFRHS = [ParamCFSymbol]
type CFJustRule = ((Cat,[Param]), CFRHS)
ebnf2cf :: EBNF -> [CFRule]
ebnf2cf :: EBNF -> [ParamCFRule]
ebnf2cf ebnf =
[CFRule cat items (mkCFF i cat) | (i,(cat,items)) <- zip [0..] (normEBNF ebnf)]
[Rule cat items (mkCFF i cat) | (i,(cat,items)) <- zip [0..] (normEBNF ebnf)]
where
mkCFF i c = CFObj (mkCId ("Mk" ++ c ++ "_" ++ show i)) []
mkCFF i (c,_) = CFObj (mkCId ("Mk" ++ c ++ "_" ++ show i)) []
normEBNF :: EBNF -> [CFJustRule]
normEBNF erules = let
@@ -101,7 +99,7 @@ substERules g (cat,itss) = (cat, map sub itss) where
sub (EIPlus r : ii) = EIPlus (substERules g r) : ii
sub (EIOpt r : ii) = EIOpt (substERules g r) : ii
-}
eitem2cfitem :: EItem -> CFSymbol
eitem2cfitem :: EItem -> ParamCFSymbol
eitem2cfitem it = case it of
EITerm a -> Terminal a
EINonTerm cat -> NonTerminal (mkCFCatE cat)
@@ -143,8 +141,8 @@ mkECat ints = ("C", ints)
prECat (c,[]) = c
prECat (c,ints) = c ++ "_" ++ prTList "_" (map show ints)
mkCFCatE :: ECat -> Cat
mkCFCatE = prECat
mkCFCatE :: ECat -> (Cat,[Param])
mkCFCatE c = (prECat c,[0])
{-
updECat _ (c,[]) = (c,[])
updECat ii (c,_) = (c,ii)

View File

@@ -75,14 +75,14 @@ make_fa c@(g,ns) q0 alpha q1 fa =
case mrRec n of
-- the set Ni is right-recursive or cyclic
RightR ->
let new = [(getState c, xs, q1) | CFRule c xs _ <- nrs]
++ [(getState c, xs, getState d) | CFRule c ss _ <- rs,
let new = [(getState c, xs, q1) | Rule c xs _ <- nrs]
++ [(getState c, xs, getState d) | Rule c ss _ <- rs,
let (xs,NonTerminal d) = (init ss,last ss)]
in make_fas new $ newTransition q0 (getState a) Nothing fa'
-- the set Ni is left-recursive
LeftR ->
let new = [(q0, xs, getState c) | CFRule c xs _ <- nrs]
++ [(getState d, xs, getState c) | CFRule c (NonTerminal d:xs) _ <- rs]
let new = [(q0, xs, getState c) | Rule c xs _ <- nrs]
++ [(getState d, xs, getState c) | Rule c (NonTerminal d:xs) _ <- rs]
in make_fas new $ newTransition (getState a) q1 Nothing fa'
where
(fa',stateMap) = addStatesForCats ni fa
@@ -91,7 +91,7 @@ make_fa c@(g,ns) q0 alpha q1 fa =
x stateMap
-- a is not recursive
Nothing -> let rs = catRules g a
in foldl' (\f (CFRule _ b _) -> make_fa_ q0 b q1 f) fa rs
in foldl' (\f (Rule _ b _) -> make_fa_ q0 b q1 f) fa rs
(x:beta) -> let (fa',q) = newState () fa
in make_fa_ q beta q1 $ make_fa_ q0 [x] q fa'
where
@@ -190,15 +190,15 @@ make_fa1 mr q0 alpha q1 fa =
case mrRec mr of
NotR -> -- the set is a non-recursive (always singleton) set of categories
-- so the set of category rules is the set of rules for the whole set
make_fas [(q0, b, q1) | CFRule _ b _ <- mrNonRecRules mr] fa
make_fas [(q0, b, q1) | Rule _ b _ <- mrNonRecRules mr] fa
RightR -> -- the set is right-recursive or cyclic
let new = [(getState c, xs, q1) | CFRule c xs _ <- mrNonRecRules mr]
++ [(getState c, xs, getState d) | CFRule c ss _ <- mrRecRules mr,
let new = [(getState c, xs, q1) | Rule c xs _ <- mrNonRecRules mr]
++ [(getState c, xs, getState d) | Rule c ss _ <- mrRecRules mr,
let (xs,NonTerminal d) = (init ss,last ss)]
in make_fas new $ newTransition q0 (getState a) Nothing fa'
LeftR -> -- the set is left-recursive
let new = [(q0, xs, getState c) | CFRule c xs _ <- mrNonRecRules mr]
++ [(getState d, xs, getState c) | CFRule c (NonTerminal d:xs) _ <- mrRecRules mr]
let new = [(q0, xs, getState c) | Rule c xs _ <- mrNonRecRules mr]
++ [(getState d, xs, getState c) | Rule c (NonTerminal d:xs) _ <- mrRecRules mr]
in make_fas new $ newTransition (getState a) q1 Nothing fa'
where
(fa',stateMap) = addStatesForCats (mrCats mr) fa

View File

@@ -64,17 +64,17 @@ pgfToCFG pgf lang = mkCFG (showCId (lookStartCat pgf)) extCats (startRules ++ co
extCats :: Set Cat
extCats = Set.fromList $ map lhsCat startRules
extCats = Set.fromList $ map ruleLhs startRules
startRules :: [CFRule]
startRules = [CFRule (showCId c) [NonTerminal (fcatToCat fc r)] (CFRes 0)
startRules = [Rule (showCId c) [NonTerminal (fcatToCat fc r)] (CFRes 0)
| (c,CncCat s e lbls) <- Map.toList (cnccats cnc),
fc <- range (s,e), not (isPredefFId fc),
r <- [0..catLinArity fc-1]]
ruleToCFRule :: (FId,Production) -> [CFRule]
ruleToCFRule (c,PApply funid args) =
[CFRule (fcatToCat c l) (mkRhs row) (profilesToTerm [fixProfile row n | n <- [0..length args-1]])
[Rule (fcatToCat c l) (mkRhs row) (profilesToTerm [fixProfile row n | n <- [0..length args-1]])
| (l,seqid) <- Array.assocs rhs
, let row = sequences cnc ! seqid
, not (containsLiterals row)]
@@ -119,5 +119,5 @@ pgfToCFG pgf lang = mkCFG (showCId (lookStartCat pgf)) extCats (startRules ++ co
profileToTerm t [] = CFMeta t
profileToTerm _ xs = CFRes (last xs) -- FIXME: unify
ruleToCFRule (c,PCoerce c') =
[CFRule (fcatToCat c l) [NonTerminal (fcatToCat c' l)] (CFRes 0)
[Rule (fcatToCat c l) [NonTerminal (fcatToCat c' l)] (CFRes 0)
| l <- [0..catLinArity c-1]]

View File

@@ -129,9 +129,9 @@ renameCats prefix cfg = mapCFGCats renameCat cfg
badCat c = error ("GF.Speech.SRG.renameCats: " ++ c ++ "\n" ++ prCFG cfg)
cfRulesToSRGRule :: [CFRule] -> SRGRule
cfRulesToSRGRule rs@(r:_) = SRGRule (lhsCat r) rhs
cfRulesToSRGRule rs@(r:_) = SRGRule (ruleLhs r) rhs
where
alts = [((n,Nothing),mkSRGSymbols 0 ss) | CFRule c ss n <- rs]
alts = [((n,Nothing),mkSRGSymbols 0 ss) | Rule c ss n <- rs]
rhs = [SRGAlt p n (srgItem sss) | ((n,p),sss) <- buildMultiMap alts ]
mkSRGSymbols _ [] = []