1
0
forked from GitHub/gf-core
Files
gf-core/src/GF/CF/CF.hs

214 lines
5.9 KiB
Haskell

----------------------------------------------------------------------
-- |
-- Module : CF
-- Maintainer : AR
-- Stability : (stable)
-- Portability : (portable)
--
-- > CVS $Date: 2005/04/21 16:21:07 $
-- > CVS $Author: bringert $
-- > CVS $Revision: 1.6 $
--
-- context-free grammars. AR 15\/12\/1999 -- 30\/3\/2000 -- 2\/6\/2001 -- 3\/12\/2001
-----------------------------------------------------------------------------
module GF.CF.CF (-- * Types
CF(..), CFRule, CFRuleGroup,
CFItem(..), CFTree(..), CFPredef, CFParser,
RegExp(..), CFWord,
-- * Functions
cfParseResults,
-- ** to construct CF grammars
emptyCF, emptyCFPredef, rules2CF, groupCFRules,
-- ** to construct rules
atomCFRule, atomCFTerm, atomRegExp, altsCFTerm,
-- ** to construct trees
atomCFTree, buildCFTree,
-- ** to decide whether a token matches a terminal item
matchCFTerm, satRegExp,
-- ** to analyse a CF grammar
catsOfCF, rulesOfCF, ruleGroupsOfCF, rulesForCFCat,
valCatCF, valItemsCF, valFunCF,
startCat, predefOfCF, appCFPredef, valCFItem,
cfTokens, wordsOfRegExp, forCFItem,
isCircularCF, predefRules
) where
import GF.Data.Operations
import GF.Data.Str
import GF.Canon.AbsGFC
import GF.Canon.GFC
import GF.CF.CFIdent
import Data.List (nub,nubBy)
import Data.Char (isUpper, isLower, toUpper, toLower)
-- CF grammar data types
-- | abstract type CF.
-- Invariant: each category has all its rules grouped with it
-- also: the list is never empty (the category is just missing then)
newtype CF = CF ([CFRuleGroup], CFPredef)
type CFRule = (CFFun, (CFCat, [CFItem]))
type CFRuleGroup = (CFCat,[CFRule])
-- | CFPredef is a hack for variable symbols and literals; normally = @const []@
data CFItem = CFTerm RegExp | CFNonterm CFCat deriving (Eq, Ord,Show)
newtype CFTree = CFTree (CFFun,(CFCat, [CFTree])) deriving (Eq, Show)
-- | recognize literals, variables, etc
type CFPredef = CFTok -> [(CFCat, CFFun)]
-- | Wadler style + return information
type CFParser = [CFTok] -> ([(CFTree,[CFTok])],String)
cfParseResults :: ([(CFTree,[CFTok])],String) -> [CFTree]
cfParseResults rs = [b | (b,[]) <- fst rs]
-- | terminals are regular expressions on words; to be completed to full regexp
data RegExp =
RegAlts [CFWord] -- ^ list of alternative words
| RegSpec CFTok -- ^ special token
deriving (Eq, Ord, Show)
type CFWord = String
-- the above types should be kept abstract, and the following functions used
-- to construct CF grammars
emptyCF :: CF
emptyCF = CF ([], emptyCFPredef)
emptyCFPredef :: CFPredef
emptyCFPredef = const []
rules2CF :: [CFRule] -> CF
rules2CF rs = CF (groupCFRules rs, emptyCFPredef)
groupCFRules :: [CFRule] -> [(CFCat,[CFRule])]
groupCFRules = foldr ins [] where
ins rule crs = case crs of
(c,r) : rs | compatCF c cat -> (c,rule:r) : rs
cr : rs -> cr : ins rule rs
_ -> [(cat,[rule])]
where
cat = valCatCF rule
-- to construct rules
-- | make a rule from a single token without constituents
atomCFRule :: CFCat -> CFFun -> CFTok -> CFRule
atomCFRule c f s = (f, (c, [atomCFTerm s]))
-- | usual terminal
atomCFTerm :: CFTok -> CFItem
atomCFTerm = CFTerm . atomRegExp
atomRegExp :: CFTok -> RegExp
atomRegExp t = case t of
TS s -> RegAlts [s]
_ -> RegSpec t
-- | terminal consisting of alternatives
altsCFTerm :: [String] -> CFItem
altsCFTerm = CFTerm . RegAlts
-- to construct trees
-- | make a tree without constituents
atomCFTree :: CFCat -> CFFun -> CFTree
atomCFTree c f = buildCFTree c f []
-- | make a tree with constituents.
buildCFTree :: CFCat -> CFFun -> [CFTree] -> CFTree
buildCFTree c f trees = CFTree (f,(c,trees))
{- ----
cfMeta0 :: CFTree
cfMeta0 = atomCFTree uCFCat metaCFFun
-- used in happy
litCFTree :: String -> CFTree --- Maybe CFTree
litCFTree s = maybe cfMeta0 id $ do
(c,f) <- getCFLiteral s
return $ buildCFTree c f []
-}
-- to decide whether a token matches a terminal item
matchCFTerm :: CFItem -> CFTok -> Bool
matchCFTerm (CFTerm t) s = satRegExp t s
matchCFTerm _ _ = False
satRegExp :: RegExp -> CFTok -> Bool
satRegExp r t = case (r,t) of
(RegAlts tt, TS s) -> elem s tt
(RegAlts tt, TC s) -> or [elem s' tt | s' <- caseUpperOrLower s]
(RegSpec x, _) -> t == x ---
_ -> False
where
caseUpperOrLower s = case s of
c:cs | isUpper c -> [s, toLower c : cs]
c:cs | isLower c -> [s, toUpper c : cs]
_ -> [s]
-- to analyse a CF grammar
catsOfCF :: CF -> [CFCat]
catsOfCF (CF (rr,_)) = map fst rr
rulesOfCF :: CF -> [CFRule]
rulesOfCF (CF (rr,_)) = concatMap snd rr
ruleGroupsOfCF :: CF -> [(CFCat,[CFRule])]
ruleGroupsOfCF (CF (rr,_)) = rr
rulesForCFCat :: CF -> CFCat -> [CFRule]
rulesForCFCat (CF (rr,_)) cat = maybe [] id $ lookup cat rr
valCatCF :: CFRule -> CFCat
valCatCF (_,(c,_)) = c
valItemsCF :: CFRule -> [CFItem]
valItemsCF (_,(_,i)) = i
valFunCF :: CFRule -> CFFun
valFunCF (f,(_,_)) = f
startCat :: CF -> CFCat
startCat (CF (rr,_)) = fst (head rr) --- hardly useful
predefOfCF :: CF -> CFPredef
predefOfCF (CF (_,f)) = f
appCFPredef :: CF -> CFTok -> [(CFCat, CFFun)]
appCFPredef = ($) . predefOfCF
valCFItem :: CFItem -> Either RegExp CFCat
valCFItem (CFTerm r) = Left r
valCFItem (CFNonterm nt) = Right nt
cfTokens :: CF -> [CFWord]
cfTokens cf = nub $ concat $ [ wordsOfRegExp i | r <- rulesOfCF cf,
CFTerm i <- valItemsCF r]
wordsOfRegExp :: RegExp -> [CFWord]
wordsOfRegExp (RegAlts tt) = tt
wordsOfRegExp _ = []
forCFItem :: CFTok -> CFRule -> Bool
forCFItem a (_,(_, CFTerm r : _)) = satRegExp r a
forCFItem _ _ = False
-- | we should make a test of circular chains, too
isCircularCF :: CFRule -> Bool
isCircularCF (_,(c', CFNonterm c:[])) = compatCF c' c
isCircularCF _ = False
-- | coercion to the older predef cf type
predefRules :: CFPredef -> CFTok -> [CFRule]
predefRules pre s = [atomCFRule c f s | (c,f) <- pre s]