forked from GitHub/gf-core
214 lines
5.9 KiB
Haskell
214 lines
5.9 KiB
Haskell
----------------------------------------------------------------------
|
|
-- |
|
|
-- Module : CF
|
|
-- Maintainer : AR
|
|
-- Stability : (stable)
|
|
-- Portability : (portable)
|
|
--
|
|
-- > CVS $Date: 2005/04/21 16:21:07 $
|
|
-- > CVS $Author: bringert $
|
|
-- > CVS $Revision: 1.6 $
|
|
--
|
|
-- context-free grammars. AR 15\/12\/1999 -- 30\/3\/2000 -- 2\/6\/2001 -- 3\/12\/2001
|
|
-----------------------------------------------------------------------------
|
|
|
|
module GF.CF.CF (-- * Types
|
|
CF(..), CFRule, CFRuleGroup,
|
|
CFItem(..), CFTree(..), CFPredef, CFParser,
|
|
RegExp(..), CFWord,
|
|
-- * Functions
|
|
cfParseResults,
|
|
-- ** to construct CF grammars
|
|
emptyCF, emptyCFPredef, rules2CF, groupCFRules,
|
|
-- ** to construct rules
|
|
atomCFRule, atomCFTerm, atomRegExp, altsCFTerm,
|
|
-- ** to construct trees
|
|
atomCFTree, buildCFTree,
|
|
-- ** to decide whether a token matches a terminal item
|
|
matchCFTerm, satRegExp,
|
|
-- ** to analyse a CF grammar
|
|
catsOfCF, rulesOfCF, ruleGroupsOfCF, rulesForCFCat,
|
|
valCatCF, valItemsCF, valFunCF,
|
|
startCat, predefOfCF, appCFPredef, valCFItem,
|
|
cfTokens, wordsOfRegExp, forCFItem,
|
|
isCircularCF, predefRules
|
|
) where
|
|
|
|
import GF.Data.Operations
|
|
import GF.Data.Str
|
|
import GF.Canon.AbsGFC
|
|
import GF.Canon.GFC
|
|
import GF.CF.CFIdent
|
|
import Data.List (nub,nubBy)
|
|
import Data.Char (isUpper, isLower, toUpper, toLower)
|
|
|
|
-- CF grammar data types
|
|
|
|
-- | abstract type CF.
|
|
-- Invariant: each category has all its rules grouped with it
|
|
-- also: the list is never empty (the category is just missing then)
|
|
newtype CF = CF ([CFRuleGroup], CFPredef)
|
|
type CFRule = (CFFun, (CFCat, [CFItem]))
|
|
type CFRuleGroup = (CFCat,[CFRule])
|
|
|
|
-- | CFPredef is a hack for variable symbols and literals; normally = @const []@
|
|
data CFItem = CFTerm RegExp | CFNonterm CFCat deriving (Eq, Ord,Show)
|
|
|
|
newtype CFTree = CFTree (CFFun,(CFCat, [CFTree])) deriving (Eq, Show)
|
|
|
|
-- | recognize literals, variables, etc
|
|
type CFPredef = CFTok -> [(CFCat, CFFun)]
|
|
|
|
-- | Wadler style + return information
|
|
type CFParser = [CFTok] -> ([(CFTree,[CFTok])],String)
|
|
|
|
cfParseResults :: ([(CFTree,[CFTok])],String) -> [CFTree]
|
|
cfParseResults rs = [b | (b,[]) <- fst rs]
|
|
|
|
-- | terminals are regular expressions on words; to be completed to full regexp
|
|
data RegExp =
|
|
RegAlts [CFWord] -- ^ list of alternative words
|
|
| RegSpec CFTok -- ^ special token
|
|
deriving (Eq, Ord, Show)
|
|
|
|
type CFWord = String
|
|
|
|
-- the above types should be kept abstract, and the following functions used
|
|
|
|
-- to construct CF grammars
|
|
|
|
emptyCF :: CF
|
|
emptyCF = CF ([], emptyCFPredef)
|
|
|
|
emptyCFPredef :: CFPredef
|
|
emptyCFPredef = const []
|
|
|
|
rules2CF :: [CFRule] -> CF
|
|
rules2CF rs = CF (groupCFRules rs, emptyCFPredef)
|
|
|
|
groupCFRules :: [CFRule] -> [(CFCat,[CFRule])]
|
|
groupCFRules = foldr ins [] where
|
|
ins rule crs = case crs of
|
|
(c,r) : rs | compatCF c cat -> (c,rule:r) : rs
|
|
cr : rs -> cr : ins rule rs
|
|
_ -> [(cat,[rule])]
|
|
where
|
|
cat = valCatCF rule
|
|
|
|
-- to construct rules
|
|
|
|
-- | make a rule from a single token without constituents
|
|
atomCFRule :: CFCat -> CFFun -> CFTok -> CFRule
|
|
atomCFRule c f s = (f, (c, [atomCFTerm s]))
|
|
|
|
-- | usual terminal
|
|
atomCFTerm :: CFTok -> CFItem
|
|
atomCFTerm = CFTerm . atomRegExp
|
|
|
|
atomRegExp :: CFTok -> RegExp
|
|
atomRegExp t = case t of
|
|
TS s -> RegAlts [s]
|
|
_ -> RegSpec t
|
|
|
|
-- | terminal consisting of alternatives
|
|
altsCFTerm :: [String] -> CFItem
|
|
altsCFTerm = CFTerm . RegAlts
|
|
|
|
|
|
-- to construct trees
|
|
|
|
-- | make a tree without constituents
|
|
atomCFTree :: CFCat -> CFFun -> CFTree
|
|
atomCFTree c f = buildCFTree c f []
|
|
|
|
-- | make a tree with constituents.
|
|
buildCFTree :: CFCat -> CFFun -> [CFTree] -> CFTree
|
|
buildCFTree c f trees = CFTree (f,(c,trees))
|
|
|
|
{- ----
|
|
cfMeta0 :: CFTree
|
|
cfMeta0 = atomCFTree uCFCat metaCFFun
|
|
|
|
-- used in happy
|
|
litCFTree :: String -> CFTree --- Maybe CFTree
|
|
litCFTree s = maybe cfMeta0 id $ do
|
|
(c,f) <- getCFLiteral s
|
|
return $ buildCFTree c f []
|
|
-}
|
|
|
|
-- to decide whether a token matches a terminal item
|
|
|
|
matchCFTerm :: CFItem -> CFTok -> Bool
|
|
matchCFTerm (CFTerm t) s = satRegExp t s
|
|
matchCFTerm _ _ = False
|
|
|
|
satRegExp :: RegExp -> CFTok -> Bool
|
|
satRegExp r t = case (r,t) of
|
|
(RegAlts tt, TS s) -> elem s tt
|
|
(RegAlts tt, TC s) -> or [elem s' tt | s' <- caseUpperOrLower s]
|
|
(RegSpec x, _) -> t == x ---
|
|
_ -> False
|
|
where
|
|
caseUpperOrLower s = case s of
|
|
c:cs | isUpper c -> [s, toLower c : cs]
|
|
c:cs | isLower c -> [s, toUpper c : cs]
|
|
_ -> [s]
|
|
|
|
-- to analyse a CF grammar
|
|
|
|
catsOfCF :: CF -> [CFCat]
|
|
catsOfCF (CF (rr,_)) = map fst rr
|
|
|
|
rulesOfCF :: CF -> [CFRule]
|
|
rulesOfCF (CF (rr,_)) = concatMap snd rr
|
|
|
|
ruleGroupsOfCF :: CF -> [(CFCat,[CFRule])]
|
|
ruleGroupsOfCF (CF (rr,_)) = rr
|
|
|
|
rulesForCFCat :: CF -> CFCat -> [CFRule]
|
|
rulesForCFCat (CF (rr,_)) cat = maybe [] id $ lookup cat rr
|
|
|
|
valCatCF :: CFRule -> CFCat
|
|
valCatCF (_,(c,_)) = c
|
|
|
|
valItemsCF :: CFRule -> [CFItem]
|
|
valItemsCF (_,(_,i)) = i
|
|
|
|
valFunCF :: CFRule -> CFFun
|
|
valFunCF (f,(_,_)) = f
|
|
|
|
startCat :: CF -> CFCat
|
|
startCat (CF (rr,_)) = fst (head rr) --- hardly useful
|
|
|
|
predefOfCF :: CF -> CFPredef
|
|
predefOfCF (CF (_,f)) = f
|
|
|
|
appCFPredef :: CF -> CFTok -> [(CFCat, CFFun)]
|
|
appCFPredef = ($) . predefOfCF
|
|
|
|
valCFItem :: CFItem -> Either RegExp CFCat
|
|
valCFItem (CFTerm r) = Left r
|
|
valCFItem (CFNonterm nt) = Right nt
|
|
|
|
cfTokens :: CF -> [CFWord]
|
|
cfTokens cf = nub $ concat $ [ wordsOfRegExp i | r <- rulesOfCF cf,
|
|
CFTerm i <- valItemsCF r]
|
|
|
|
wordsOfRegExp :: RegExp -> [CFWord]
|
|
wordsOfRegExp (RegAlts tt) = tt
|
|
wordsOfRegExp _ = []
|
|
|
|
forCFItem :: CFTok -> CFRule -> Bool
|
|
forCFItem a (_,(_, CFTerm r : _)) = satRegExp r a
|
|
forCFItem _ _ = False
|
|
|
|
-- | we should make a test of circular chains, too
|
|
isCircularCF :: CFRule -> Bool
|
|
isCircularCF (_,(c', CFNonterm c:[])) = compatCF c' c
|
|
isCircularCF _ = False
|
|
|
|
-- | coercion to the older predef cf type
|
|
predefRules :: CFPredef -> CFTok -> [CFRule]
|
|
predefRules pre s = [atomCFRule c f s | (c,f) <- pre s]
|
|
|