GF/src is now for 2.9, and the new sources are in src-3.0 - keep it this way until the release of GF 3

This commit is contained in:
aarne
2008-05-21 09:26:44 +00:00
parent 915a1de717
commit 055c0d0d5a
536 changed files with 0 additions and 0 deletions

213
src-3.0/GF/CF/CF.hs Normal file
View File

@@ -0,0 +1,213 @@
----------------------------------------------------------------------
-- |
-- Module : CF
-- Maintainer : AR
-- Stability : (stable)
-- Portability : (portable)
--
-- > CVS $Date: 2005/04/21 16:21:07 $
-- > CVS $Author: bringert $
-- > CVS $Revision: 1.6 $
--
-- context-free grammars. AR 15\/12\/1999 -- 30\/3\/2000 -- 2\/6\/2001 -- 3\/12\/2001
-----------------------------------------------------------------------------
module GF.CF.CF (-- * Types
CF(..), CFRule, CFRuleGroup,
CFItem(..), CFTree(..), CFPredef, CFParser,
RegExp(..), CFWord,
-- * Functions
cfParseResults,
-- ** to construct CF grammars
emptyCF, emptyCFPredef, rules2CF, groupCFRules,
-- ** to construct rules
atomCFRule, atomCFTerm, atomRegExp, altsCFTerm,
-- ** to construct trees
atomCFTree, buildCFTree,
-- ** to decide whether a token matches a terminal item
matchCFTerm, satRegExp,
-- ** to analyse a CF grammar
catsOfCF, rulesOfCF, ruleGroupsOfCF, rulesForCFCat,
valCatCF, valItemsCF, valFunCF,
startCat, predefOfCF, appCFPredef, valCFItem,
cfTokens, wordsOfRegExp, forCFItem,
isCircularCF, predefRules
) where
import GF.Data.Operations
import GF.Data.Str
import GF.Canon.AbsGFC
import GF.Canon.GFC
import GF.CF.CFIdent
import Data.List (nub,nubBy)
import Data.Char (isUpper, isLower, toUpper, toLower)
-- CF grammar data types
-- | abstract type CF.
-- Invariant: each category has all its rules grouped with it
-- also: the list is never empty (the category is just missing then)
newtype CF = CF ([CFRuleGroup], CFPredef)
type CFRule = (CFFun, (CFCat, [CFItem]))
type CFRuleGroup = (CFCat,[CFRule])
-- | CFPredef is a hack for variable symbols and literals; normally = @const []@
data CFItem = CFTerm RegExp | CFNonterm CFCat deriving (Eq, Ord,Show)
newtype CFTree = CFTree (CFFun,(CFCat, [CFTree])) deriving (Eq, Show)
-- | recognize literals, variables, etc
type CFPredef = CFTok -> [(CFCat, CFFun)]
-- | Wadler style + return information
type CFParser = [CFTok] -> ([(CFTree,[CFTok])],String)
cfParseResults :: ([(CFTree,[CFTok])],String) -> [CFTree]
cfParseResults rs = [b | (b,[]) <- fst rs]
-- | terminals are regular expressions on words; to be completed to full regexp
data RegExp =
RegAlts [CFWord] -- ^ list of alternative words
| RegSpec CFTok -- ^ special token
deriving (Eq, Ord, Show)
type CFWord = String
-- the above types should be kept abstract, and the following functions used
-- to construct CF grammars
emptyCF :: CF
emptyCF = CF ([], emptyCFPredef)
emptyCFPredef :: CFPredef
emptyCFPredef = const []
rules2CF :: [CFRule] -> CF
rules2CF rs = CF (groupCFRules rs, emptyCFPredef)
groupCFRules :: [CFRule] -> [(CFCat,[CFRule])]
groupCFRules = foldr ins [] where
ins rule crs = case crs of
(c,r) : rs | compatCF c cat -> (c,rule:r) : rs
cr : rs -> cr : ins rule rs
_ -> [(cat,[rule])]
where
cat = valCatCF rule
-- to construct rules
-- | make a rule from a single token without constituents
atomCFRule :: CFCat -> CFFun -> CFTok -> CFRule
atomCFRule c f s = (f, (c, [atomCFTerm s]))
-- | usual terminal
atomCFTerm :: CFTok -> CFItem
atomCFTerm = CFTerm . atomRegExp
atomRegExp :: CFTok -> RegExp
atomRegExp t = case t of
TS s -> RegAlts [s]
_ -> RegSpec t
-- | terminal consisting of alternatives
altsCFTerm :: [String] -> CFItem
altsCFTerm = CFTerm . RegAlts
-- to construct trees
-- | make a tree without constituents
atomCFTree :: CFCat -> CFFun -> CFTree
atomCFTree c f = buildCFTree c f []
-- | make a tree with constituents.
buildCFTree :: CFCat -> CFFun -> [CFTree] -> CFTree
buildCFTree c f trees = CFTree (f,(c,trees))
{- ----
cfMeta0 :: CFTree
cfMeta0 = atomCFTree uCFCat metaCFFun
-- used in happy
litCFTree :: String -> CFTree --- Maybe CFTree
litCFTree s = maybe cfMeta0 id $ do
(c,f) <- getCFLiteral s
return $ buildCFTree c f []
-}
-- to decide whether a token matches a terminal item
matchCFTerm :: CFItem -> CFTok -> Bool
matchCFTerm (CFTerm t) s = satRegExp t s
matchCFTerm _ _ = False
satRegExp :: RegExp -> CFTok -> Bool
satRegExp r t = case (r,t) of
(RegAlts tt, TS s) -> elem s tt
(RegAlts tt, TC s) -> or [elem s' tt | s' <- caseUpperOrLower s]
(RegSpec x, _) -> t == x ---
_ -> False
where
caseUpperOrLower s = case s of
c:cs | isUpper c -> [s, toLower c : cs]
c:cs | isLower c -> [s, toUpper c : cs]
_ -> [s]
-- to analyse a CF grammar
catsOfCF :: CF -> [CFCat]
catsOfCF (CF (rr,_)) = map fst rr
rulesOfCF :: CF -> [CFRule]
rulesOfCF (CF (rr,_)) = concatMap snd rr
ruleGroupsOfCF :: CF -> [(CFCat,[CFRule])]
ruleGroupsOfCF (CF (rr,_)) = rr
rulesForCFCat :: CF -> CFCat -> [CFRule]
rulesForCFCat (CF (rr,_)) cat = maybe [] id $ lookup cat rr
valCatCF :: CFRule -> CFCat
valCatCF (_,(c,_)) = c
valItemsCF :: CFRule -> [CFItem]
valItemsCF (_,(_,i)) = i
valFunCF :: CFRule -> CFFun
valFunCF (f,(_,_)) = f
startCat :: CF -> CFCat
startCat (CF (rr,_)) = fst (head rr) --- hardly useful
predefOfCF :: CF -> CFPredef
predefOfCF (CF (_,f)) = f
appCFPredef :: CF -> CFTok -> [(CFCat, CFFun)]
appCFPredef = ($) . predefOfCF
valCFItem :: CFItem -> Either RegExp CFCat
valCFItem (CFTerm r) = Left r
valCFItem (CFNonterm nt) = Right nt
cfTokens :: CF -> [CFWord]
cfTokens cf = nub $ concat $ [ wordsOfRegExp i | r <- rulesOfCF cf,
CFTerm i <- valItemsCF r]
wordsOfRegExp :: RegExp -> [CFWord]
wordsOfRegExp (RegAlts tt) = tt
wordsOfRegExp _ = []
forCFItem :: CFTok -> CFRule -> Bool
forCFItem a (_,(_, CFTerm r : _)) = satRegExp r a
forCFItem _ _ = False
-- | we should make a test of circular chains, too
isCircularCF :: CFRule -> Bool
isCircularCF (_,(c', CFNonterm c:[])) = compatCF c' c
isCircularCF _ = False
-- | coercion to the older predef cf type
predefRules :: CFPredef -> CFTok -> [CFRule]
predefRules pre s = [atomCFRule c f s | (c,f) <- pre s]

253
src-3.0/GF/CF/CFIdent.hs Normal file
View File

@@ -0,0 +1,253 @@
----------------------------------------------------------------------
-- |
-- Module : CFIdent
-- Maintainer : AR
-- Stability : (stable)
-- Portability : (portable)
--
-- > CVS $Date: 2005/11/14 16:03:40 $
-- > CVS $Author: aarne $
-- > CVS $Revision: 1.13 $
--
-- symbols (categories, functions) for context-free grammars.
-----------------------------------------------------------------------------
module GF.CF.CFIdent (-- * Tokens and categories
CFTok(..), CFCat(..),
tS, tC, tL, tI, tF, tV, tM, tInt,
prCFTok,
-- * Function names and profiles
CFFun(..), Profile,
wordsCFTok,
-- * CF Functions
mkCFFun, varCFFun, consCFFun, string2CFFun, stringCFFun,
intCFFun, floatCFFun, dummyCFFun,
cfFun2String, cfFun2Ident, cfFun2Profile, metaCFFun,
-- * CF Categories
mkCIdent, ident2CFCat, labels2CFCat, string2CFCat,
catVarCF, cat2CFCat, cfCatString, cfCatInt,cfCatFloat,
moduleOfCFCat, cfCat2Cat, cfCat2Ident, lexCFCat,
-- * CF Tokens
string2CFTok, str2cftoks,
-- * Comparisons
compatToks, compatTok, compatCFFun, compatCF,
wordsLits
) where
import GF.Data.Operations
import GF.Canon.GFC
import GF.Infra.Ident
import GF.Grammar.Values (cPredefAbs)
import GF.Canon.AbsGFC
import GF.Grammar.Macros (ident2label)
import GF.Grammar.PrGrammar
import GF.Data.Str
import Data.Char (toLower, toUpper, isSpace)
import Data.List (intersperse)
-- | this type should be abstract
data CFTok =
TS String -- ^ normal strings
| TC String -- ^ strings that are ambiguous between upper or lower case
| TL String -- ^ string literals
| TI Integer -- ^ integer literals
| TF Double -- ^ float literals
| TV Ident -- ^ variables
| TM Int String -- ^ metavariables; the integer identifies it
deriving (Eq, Ord, Show)
-- | this type should be abstract
newtype CFCat = CFCat (CIdent,Label) deriving (Eq, Ord, Show)
tS :: String -> CFTok
tC :: String -> CFTok
tL :: String -> CFTok
tI :: String -> CFTok
tF :: String -> CFTok
tV :: String -> CFTok
tM :: String -> CFTok
tS = TS
tC = TC
tL = TL
tI = TI . read
tF = TF . read
tV = TV . identC
tM = TM 0
tInt :: Integer -> CFTok
tInt = TI
prCFTok :: CFTok -> String
prCFTok t = case t of
TS s -> s
TC s -> s
TL s -> s
TI i -> show i
TF i -> show i
TV x -> prt x
TM i m -> m --- "?" --- m
-- | to build trees: the Atom contains a GF function, @Cn | Meta | Vr | Literal@
newtype CFFun = CFFun (Atom, Profile) deriving (Eq,Ord,Show)
-- - - - - - - - - - - - - - - - - - - - - ^^^ added by peb, 21/5-04
type Profile = [([[Int]],[Int])]
wordsCFTok :: CFTok -> [String]
wordsCFTok t = case t of
TC (c:cs) -> [c':cs | c' <- [toUpper c, toLower c]]
_ -> [prCFTok t]
-- the following functions should be used instead of constructors
-- to construct CF functions
mkCFFun :: Atom -> CFFun
mkCFFun t = CFFun (t,[])
varCFFun :: Ident -> CFFun
varCFFun = mkCFFun . AV
consCFFun :: CIdent -> CFFun
consCFFun = mkCFFun . AC
-- | standard way of making cf fun
string2CFFun :: String -> String -> CFFun
string2CFFun m c = consCFFun $ mkCIdent m c
stringCFFun :: String -> CFFun
stringCFFun = mkCFFun . AS
intCFFun :: Integer -> CFFun
intCFFun = mkCFFun . AI
floatCFFun :: Double -> CFFun
floatCFFun = mkCFFun . AF
-- | used in lexer-by-need rules
dummyCFFun :: CFFun
dummyCFFun = varCFFun $ identC "_"
cfFun2String :: CFFun -> String
cfFun2String (CFFun (f,_)) = prt f
cfFun2Ident :: CFFun -> Ident
cfFun2Ident (CFFun (f,_)) = identC $ prt_ f ---
cfFun2Profile :: CFFun -> Profile
cfFun2Profile (CFFun (_,p)) = p
{- ----
strPro2cfFun :: String -> Profile -> CFFun
strPro2cfFun str p = (CFFun (AC (Ident str), p))
-}
metaCFFun :: CFFun
metaCFFun = mkCFFun $ AM 0
-- to construct CF categories
-- | belongs elsewhere
mkCIdent :: String -> String -> CIdent
mkCIdent m c = CIQ (identC m) (identC c)
ident2CFCat :: CIdent -> Ident -> CFCat
ident2CFCat mc d = CFCat (mc, L d)
labels2CFCat :: CIdent -> [Label] -> CFCat
labels2CFCat mc d = CFCat (mc, L (identC (concat (intersperse "." (map prt d))))) ----
-- | standard way of making cf cat: label s
string2CFCat :: String -> String -> CFCat
string2CFCat m c = ident2CFCat (mkCIdent m c) (identC "s")
idents2CFCat :: Ident -> Ident -> CFCat
idents2CFCat m c = ident2CFCat (CIQ m c) (identC "s")
catVarCF :: CFCat
catVarCF = ident2CFCat (mkCIdent "_" "#Var") (identC "_") ----
cat2CFCat :: (Ident,Ident) -> CFCat
cat2CFCat = uncurry idents2CFCat
-- | literals
cfCatString :: CFCat
cfCatString = string2CFCat (prt cPredefAbs) "String"
cfCatInt, cfCatFloat :: CFCat
cfCatInt = string2CFCat (prt cPredefAbs) "Int"
cfCatFloat = string2CFCat (prt cPredefAbs) "Float"
{- ----
uCFCat :: CFCat
uCFCat = cat2CFCat uCat
-}
moduleOfCFCat :: CFCat -> Ident
moduleOfCFCat (CFCat (CIQ m _, _)) = m
-- | the opposite direction
cfCat2Cat :: CFCat -> (Ident,Ident)
cfCat2Cat (CFCat (CIQ m c,_)) = (m,c)
cfCat2Ident :: CFCat -> Ident
cfCat2Ident = snd . cfCat2Cat
lexCFCat :: CFCat -> CFCat
lexCFCat cat = ident2CFCat (uncurry CIQ (cfCat2Cat cat)) (identC "*")
-- to construct CF tokens
string2CFTok :: String -> CFTok
string2CFTok = tS
str2cftoks :: Str -> [CFTok]
str2cftoks = map tS . wordsLits . sstr
-- decide if two token lists look the same (in parser postprocessing)
compatToks :: [CFTok] -> [CFTok] -> Bool
compatToks ts us = and [compatTok t u | (t,u) <- zip ts us]
compatTok :: CFTok -> CFTok -> Bool
compatTok (TM _ _) _ = True --- hack because metas are renamed
compatTok _ (TM _ _) = True
compatTok t u = any (`elem` (alts t)) (alts u) where
alts u = case u of
TC (c:s) -> [toLower c : s, toUpper c : s]
TL s -> [s, prQuotedString s]
_ -> [prCFTok u]
-- | decide if two CFFuns have the same function head (profiles may differ)
compatCFFun :: CFFun -> CFFun -> Bool
compatCFFun (CFFun (f,_)) (CFFun (g,_)) = f == g
-- | decide whether two categories match
-- the modifiers can be from different modules, but on the same extension
-- path, so there is no clash, and they can be safely ignored ---
compatCF :: CFCat -> CFCat -> Bool
----compatCF = (==)
compatCF (CFCat (CIQ _ c, l)) (CFCat (CIQ _ c', l')) = c==c' && l==l'
-- | Like 'words', but does not split on whitespace inside
-- double quotes.wordsLits :: String -> [String]
-- Also treats escaped quotes in quotes (AR 21/12/2005) by breaks
-- instead of break
wordsLits [] = []
wordsLits (c:cs) | isSpace c = wordsLits (dropWhile isSpace cs)
| isQuote c
= let (l,rs) = breaks (==c) cs
rs' = drop 1 rs
in ([c]++l++[c]):wordsLits rs'
| otherwise = let (w,rs) = break isSpaceQ cs
in (c:w):wordsLits rs
where
breaks c cs = case break c cs of
(l@(_:_),d:rs) | last l == '\\' ->
let (r,ts) = breaks c rs in (l++[d]++r, ts)
v -> v
isQuote c = elem c "\"'"
isSpaceQ c = isSpace c ---- || isQuote c

View File

@@ -0,0 +1,62 @@
----------------------------------------------------------------------
-- |
-- Module : CFtoGrammar
-- Maintainer : AR
-- Stability : (stable)
-- Portability : (portable)
--
-- > CVS $Date: 2005/04/21 16:21:09 $
-- > CVS $Author: bringert $
-- > CVS $Revision: 1.7 $
--
-- 26\/1\/2000 -- 18\/4 -- 24\/3\/2004
-----------------------------------------------------------------------------
module GF.CF.CFtoGrammar (cf2grammar) where
import GF.Infra.Ident
import GF.Grammar.Grammar
import qualified GF.Source.AbsGF as A
import qualified GF.Source.GrammarToSource as S
import GF.Grammar.Macros
import GF.CF.CF
import GF.CF.CFIdent
import GF.CF.PPrCF
import GF.Data.Operations
import Data.List (nub)
import Data.Char (isSpace)
cf2grammar :: CF -> [A.TopDef]
cf2grammar cf = concatMap S.trAnyDef (abs ++ conc) where
rules = rulesOfCF cf
abs = cats ++ funs
conc = lintypes ++ lins
cats = [(cat, AbsCat (yes []) (yes [])) |
cat <- nub (concat (map cf2cat rules))] ----notPredef cat
lintypes = [(cat, CncCat (yes defLinType) nope nope) | (cat,AbsCat _ _) <- cats]
(funs,lins) = unzip (map cf2rule rules)
cf2cat :: CFRule -> [Ident]
cf2cat (_,(cat, items)) = map cfCat2Ident $ cat : [c | CFNonterm c <- items]
cf2rule :: CFRule -> ((Ident,Info),(Ident,Info))
cf2rule (fun, (cat, items)) = (def,ldef) where
f = cfFun2Ident fun
def = (f, AbsFun (yes (mkProd (args', Cn (cfCat2Ident cat), []))) nope)
args0 = zip (map (mkIdent "x") [0..]) items
args = [(v, Cn (cfCat2Ident c)) | (v, CFNonterm c) <- args0]
args' = [(zIdent "_", Cn (cfCat2Ident c)) | (_, CFNonterm c) <- args0]
ldef = (f, CncFun
Nothing
(yes (mkAbs (map fst args)
(mkRecord (const theLinLabel) [foldconcat (map mkIt args0)])))
nope)
mkIt (v, CFNonterm _) = P (Vr v) theLinLabel
mkIt (_, CFTerm (RegAlts [a])) = K a
mkIt _ = K "" --- regexp not recognized in input CF ; use EBNF for this
foldconcat [] = K ""
foldconcat tt = foldr1 C tt

214
src-3.0/GF/CF/CanonToCF.hs Normal file
View File

@@ -0,0 +1,214 @@
----------------------------------------------------------------------
-- |
-- Module : CanonToCF
-- Maintainer : AR
-- Stability : (stable)
-- Portability : (portable)
--
-- > CVS $Date: 2005/11/14 16:03:41 $
-- > CVS $Author: aarne $
-- > CVS $Revision: 1.15 $
--
-- AR 27\/1\/2000 -- 3\/12\/2001 -- 8\/6\/2003
-----------------------------------------------------------------------------
module GF.CF.CanonToCF (canon2cf) where
import GF.System.Tracing -- peb 8/6-04
import GF.Data.Operations
import GF.Infra.Option
import GF.Infra.Ident
import GF.Canon.AbsGFC
import GF.Grammar.LookAbs (allBindCatsOf)
import GF.Canon.GFC
import GF.Grammar.Values (isPredefCat,cPredefAbs)
import GF.Grammar.PrGrammar
import GF.Canon.CMacros
import qualified GF.Infra.Modules as M
import GF.CF.CF
import GF.CF.CFIdent
import GF.UseGrammar.Morphology
import GF.Data.Trie2
import Data.List (nub,partition)
import Control.Monad
-- | The main function: for a given cnc module 'm', build the CF grammar with all the
-- rules coming from modules that 'm' extends. The categories are qualified by
-- the abstract module name 'a' that 'm' is of.
-- The ign argument tells what rules not to generate a parser for.
canon2cf :: Options -> (Ident -> Bool) -> CanonGrammar -> Ident -> Err CF
canon2cf opts ign gr c = tracePrt "#size of CF" (err id (show.length.rulesOfCF)) $ do -- peb 8/6-04
let ms = M.allExtends gr c
a <- M.abstractOfConcrete gr c
let cncs = [m | (n, M.ModMod m) <- M.modules gr, elem n ms]
let mms = [(a, tree2list (M.jments m)) | m <- cncs]
cnc <- liftM M.jments $ M.lookupModMod gr c
rules0 <- liftM concat $ mapM (uncurry (cnc2cfCond opts ign cnc)) mms
let bindcats = map snd $ allBindCatsOf gr
let rules = filter (not . isCircularCF) rules0 ---- temporarily here
let grules = groupCFRules rules
let predef = mkCFPredef opts bindcats grules
return $ CF predef
cnc2cfCond :: Options -> (Ident -> Bool) -> BinTree Ident Info ->
Ident -> [(Ident,Info)] -> Err [CFRule]
cnc2cfCond opts ign cnc m gr =
liftM concat $
mapM lin2cf [(m,fun,cat,args,lin) |
(fun, CncFun cat args lin _) <- gr, notign fun, is fun]
where
is f = isInBinTree f cnc
notign = not . ign
type IFun = Ident
type ICat = CIdent
-- | all CF rules corresponding to a linearization rule
lin2cf :: (Ident, IFun, ICat, [ArgVar], Term) -> Err [CFRule]
lin2cf (m,fun,cat,args,lin) = errIn ("building CF rule for" +++ prt fun) $ do
let rhss0 = allLinBranches lin -- :: [([Label], Term)]
rhss1 <- mapM (mkCFItems m) rhss0 -- :: [([Label], [[PreCFItem]])]
mapM (mkCfRules m fun cat args) rhss1 >>= return . nub . concat
-- | making sequences of CF items from every branch in a linearization
mkCFItems :: Ident -> ([Label], Term) -> Err ([Label], [[PreCFItem]])
mkCFItems m (labs,t) = do
items <- term2CFItems m t
return (labs, items)
-- | making CF rules from sequences of CF items
mkCfRules :: Ident -> IFun -> ICat -> [ArgVar] -> ([Label], [[PreCFItem]]) -> Err [CFRule]
mkCfRules m fun cat args (lab, itss) = mapM mkOneRule itss
where
mkOneRule its = do
let nonterms = zip [0..] [(pos,d,v) | PNonterm _ pos d v <- its]
profile = mkProfile nonterms
cfcat = labels2CFCat (redirectIdent m cat) lab
cffun = CFFun (AC (CIQ m fun), profile)
cfits = map precf2cf its
return (cffun,(cfcat,cfits))
mkProfile nonterms = map mkOne args
where
mkOne (A c i) = mkOne (AB c 0 i)
mkOne (AB _ b i) = (map mkB [0..b-1], [k | (k,(j,_,True)) <- nonterms, j==i])
where
mkB x = [k | (k,(j, [LV y], False)) <- nonterms, j == i, y == x]
-- | intermediate data structure of CFItems with information for profiles
data PreCFItem =
PTerm RegExp -- ^ like ordinary Terminal
| PNonterm CIdent Integer [Label] Bool -- ^ cat, position, part\/bind, whether arg
deriving Eq
precf2cf :: PreCFItem -> CFItem
precf2cf (PTerm r) = CFTerm r
precf2cf (PNonterm cm _ ls True) = CFNonterm (labels2CFCat cm ls)
precf2cf (PNonterm _ _ _ False) = CFNonterm catVarCF
-- | the main job in translating linearization rules into sequences of cf items
term2CFItems :: Ident -> Term -> Err [[PreCFItem]]
term2CFItems m t = errIn "forming cf items" $ case t of
S c _ -> t2c c
T _ cc -> do
its <- mapM t2c [t | Cas _ t <- cc]
tryMkCFTerm (concat its)
V _ cc -> do
its <- mapM t2c [t | t <- cc]
tryMkCFTerm (concat its)
C t1 t2 -> do
its1 <- t2c t1
its2 <- t2c t2
return [x ++ y | x <- its1, y <- its2]
FV ts -> do
its <- mapM t2c ts
tryMkCFTerm (concat its)
P (S c _) _ -> t2c c --- w-around for bug in Compute? AR 31/1/2006
P arg s -> extrR arg s
K (KS s) -> return [[PTerm (RegAlts [s]) | not (null s)]]
E -> return [[]]
K (KP d vs) -> do
let its = [PTerm (RegAlts [s]) | s <- d]
let itss = [[PTerm (RegAlts [s]) | s <- t] | Var t _ <- vs]
tryMkCFTerm (its : itss)
_ -> return [] ---- prtBad "no cf for" t ----
where
t2c = term2CFItems m
-- optimize the number of rules by a factorization
tryMkCFTerm :: [[PreCFItem]] -> Err [[PreCFItem]]
tryMkCFTerm ii@(its:itss) | all (\x -> length x == length its) itss =
case mapM mkOne (counterparts ii) of
Ok tt -> return [tt]
_ -> return ii
where
mkOne cfits = case mapM mkOneTerm cfits of
Ok tt -> return $ PTerm (RegAlts (concat (nub tt)))
_ -> mkOneNonTerm cfits
mkOneTerm (PTerm (RegAlts t)) = return t
mkOneTerm _ = Bad ""
mkOneNonTerm (n@(PNonterm _ _ _ _) : cc) =
if all (== n) cc
then return n
else Bad ""
mkOneNonTerm _ = Bad ""
counterparts ll = [map (!! i) ll | i <- [0..length (head ll) - 1]]
tryMkCFTerm itss = return itss
extrR arg lab = case (arg0,labs) of
(Arg (A cat pos), [(LV _)]) -> return [[PNonterm (cIQ cat) pos labs False]]
(Arg (AB cat b pos), [(LV _)]) -> return [[PNonterm (cIQ cat) pos labs False]]
(Arg (A cat pos), _) -> return [[PNonterm (cIQ cat) pos labs True]]
(Arg (AB cat b pos), _) -> return [[PNonterm (cIQ cat) pos labs True]]
---- ??
_ -> prtBad "cannot extract record field from" arg
where
(arg0,labs) = headProj arg [lab]
headProj r ls = case r of
P r0 l0 -> headProj r0 (l0:ls)
S r0 _ -> headProj r0 ls
_ -> (r,ls)
cIQ c = if isPredefCat c then CIQ cPredefAbs c else CIQ m c
mkCFPredef :: Options -> [Ident] -> [CFRuleGroup] -> ([CFRuleGroup],CFPredef)
mkCFPredef opts binds rules = (ruls, \s -> preds0 s ++ look s) where
(ruls,preds) = if oElem lexerByNeed opts -- option -cflexer
then predefLexer rules
else (rules,emptyTrie)
preds0 s =
[(cat, metaCFFun) | TM _ _ <- [s], cat <- cats] ++
[(cat, varCFFun x) | TV x <- [s], cat <- catVarCF : bindcats] ++
[(cfCatString, stringCFFun t) | TL t <- [s]] ++
[(cfCatInt, intCFFun t) | TI t <- [s]] ++
[(cfCatFloat, floatCFFun t) | TF t <- [s]]
cats = nub [c | (_,rs) <- rules, (_,(_,its)) <- rs, CFNonterm c <- its]
bindcats = [c | c <- cats, elem (cfCat2Ident c) binds]
look = concatMap snd . map (trieLookup preds) . wordsCFTok --- for TC tokens
--- TODO: integrate with morphology
--- predefLexer :: [CFRuleGroup] -> ([CFRuleGroup],BinTree (CFTok,[(CFCat, CFFun)]))
predefLexer groups = (reverse ruls, tcompile preds) where
(ruls,preds) = foldr mkOne ([],[]) groups
mkOne group@(cat,rules) (rs,ps) = (rule:rs,pre ++ ps) where
(rule,pre) = case partition isLexical rules of
([],_) -> (group,[])
(ls,rest) -> ((cat,rest), concatMap mkLexRule ls)
isLexical (f,(c,its)) = case its of
[CFTerm (RegAlts ws)] -> True
_ -> False
mkLexRule r = case r of
(fun,(cat,[CFTerm (RegAlts ws)])) -> [(w, [(cat,fun)]) | w <- ws]
_ -> []

View File

@@ -0,0 +1,206 @@
----------------------------------------------------------------------
-- |
-- Module : ChartParser
-- Maintainer : Peter Ljunglöf
-- Stability : (stable)
-- Portability : (portable)
--
-- > CVS $Date: 2005/04/21 16:21:12 $
-- > CVS $Author: bringert $
-- > CVS $Revision: 1.10 $
--
-- Bottom-up Kilbury chart parser from "Pure Functional Parsing", chapter 5.
-- OBSOLETE -- should use new MCFG parsers instead
-----------------------------------------------------------------------------
module GF.CF.ChartParser (chartParser) where
-- import Tracing
-- import PrintParser
-- import PrintSimplifiedTerm
import GF.Data.Operations
import GF.CF.CF
import GF.CF.CFIdent
import GF.CF.PPrCF (prCFItem)
import GF.Data.OrdSet
import GF.Data.OrdMap2
import Data.List (groupBy)
type Token = CFTok
type Name = CFFun
type Category = CFItem
type Grammar = ([Production], Terminal)
type Production = (Name, Category, [Category])
type Terminal = Token -> [(Category, Maybe Name)]
type GParser = Grammar -> Category -> [Token] -> ([ParseTree],String)
data ParseTree = Node Name Category [ParseTree] | Leaf Token
maxTake :: Int
-- maxTake = 1000
maxTake = maxBound
--------------------------------------------------
-- converting between GF parsing and CFG parsing
buildParser :: GParser -> CF -> CFCat -> CFParser
buildParser gparser cf = parse
where
parse = \start input ->
let parse2 = parse' (CFNonterm start) input in
(take maxTake [(parse2tree t, []) | t <- fst parse2], snd parse2)
parse' = gparser (cf2grammar cf)
cf2grammar :: CF -> Grammar
cf2grammar cf = (productions, terminal)
where
productions = [ (name, CFNonterm cat, rhs) |
(name, (cat, rhs)) <- cfRules ]
terminal tok = [ (CFNonterm cat, Just name) |
(cat, name) <- cfPredef tok ]
++
[ (item, Nothing) |
item <- elems rhsItems,
matchCFTerm item tok ]
cfRules = rulesOfCF cf
cfPredef = predefOfCF cf
rhsItems :: Set Category
rhsItems = union [ makeSet rhs | (_, (_, rhs)) <- cfRules ]
parse2tree :: ParseTree -> CFTree
parse2tree (Node name (CFNonterm cat) trees) = CFTree (name, (cat, trees'))
where
trees' = [ parse2tree t | t@(Node _ _ _) <- trees ] -- ignore leafs
maybeNode :: Maybe Name -> Category -> Token -> ParseTree
maybeNode (Just name) cat tok = Node name cat [Leaf tok]
maybeNode Nothing _ tok = Leaf tok
--------------------------------------------------
-- chart parsing (bottom up kilbury-like)
type Chart = [CState]
type CState = Set Edge
type Edge = (Int, Category, [Category])
type Passive = (Int, Int, Category)
chartParser :: CF -> CFCat -> CFParser
chartParser = buildParser chartParser0
chartParser0 :: GParser
chartParser0 (productions, terminal) = cparse
where
emptyCats :: Set Category
emptyCats = empties emptySet
where
empties cats | cats==cats' = cats
| otherwise = empties cats'
where cats' = makeSet [ cat | (_, cat, rhs) <- productions,
all (`elemSet` cats) rhs ]
grammarMap :: Map Category [(Name, [Category])]
grammarMap = makeMapWith (++)
[ (cat, [(name,rhs)]) | (name, cat, rhs) <- productions ]
leftCornerMap :: Map Category (Set (Category,[Category]))
leftCornerMap = makeMapWith (<++>) [ (a, unitSet (b, bs)) |
(_, b, abs) <- productions,
(a : bs) <- removeNullable abs ]
removeNullable :: [Category] -> [[Category]]
removeNullable [] = []
removeNullable cats@(cat:cats')
| cat `elemSet` emptyCats = cats : removeNullable cats'
| otherwise = [cats]
cparse :: Category -> [Token] -> ([ParseTree], String)
cparse start input = -- trace "ChartParser" $
case lookup (0, length input, start) $
-- tracePrt "#edgeTrees" (prt . map (length.snd)) $
edgeTrees of
Just trees -> -- tracePrt "#trees" (prt . length . fst) $
(trees, "Chart:" ++++ prChart passiveEdges)
Nothing -> ([], "Chart:" ++++ prChart passiveEdges)
where
finalChart :: Chart
finalChart = map buildState initialChart
finalChartMap :: [Map Category (Set Edge)]
finalChartMap = map stateMap finalChart
stateMap :: CState -> Map Category (Set Edge)
stateMap state = makeMapWith (<++>) [ (a, unitSet (i,b,bs)) |
(i, b, a:bs) <- elems state ]
initialChart :: Chart
initialChart = -- tracePrt "#initialChart" (prt . map (length.elems)) $
emptySet : map initialState (zip [0..] input)
where initialState (j, sym) = makeSet [ (j, cat, []) |
(cat, _) <- terminal sym ]
buildState :: CState -> CState
buildState = limit more
where more (j, a, []) = ordSet [ (j, b, bs) |
(b, bs) <- elems (lookupWith emptySet leftCornerMap a) ]
<++>
lookupWith emptySet (finalChartMap !! j) a
more (j, b, a:bs) = ordSet [ (j, b, bs) |
a `elemSet` emptyCats ]
passiveEdges :: [Passive]
passiveEdges = -- tracePrt "#passiveEdges" (prt . length) $
[ (i, j, cat) |
(j, state) <- zip [0..] $
-- tracePrt "#passiveChart"
-- (prt . map (length.filter (\(_,_,x)->null x).elems)) $
-- tracePrt "#activeChart" (prt . map (length.elems)) $
finalChart,
(i, cat, []) <- elems state ]
++
[ (i, i, cat) |
i <- [0 .. length input],
cat <- elems emptyCats ]
edgeTrees :: [ (Passive, [ParseTree]) ]
edgeTrees = [ (edge, treesFor edge) | edge <- passiveEdges ]
edgeTreesMap :: Map (Int, Category) [(Int, [ParseTree])]
edgeTreesMap = makeMapWith (++) [ ((i,c), [(j,trees)]) |
((i,j,c), trees) <- edgeTrees ]
treesFor :: Passive -> [ParseTree]
treesFor (i, j, cat) = [ Node name cat trees |
(name, rhs) <- lookupWith [] grammarMap cat,
trees <- children rhs i j ]
++
[ maybeNode name cat tok |
i == j-1,
let tok = input !! i,
Just name <- [lookup cat (terminal tok)] ]
children :: [Category] -> Int -> Int -> [[ParseTree]]
children [] i k = [ [] | i == k ]
children (c:cs) i k = [ tree : rest |
i <= k,
(j, trees) <- lookupWith [] edgeTreesMap (i,c),
rest <- children cs j k,
tree <- trees ]
{-
instance Print ParseTree where
prt (Node name cat trees) = prt name++"."++prt cat++"^{"++prtSep "," trees++"}"
prt (Leaf token) = prt token
-}
-- AR 10/12/2002
prChart :: [Passive] -> String
prChart = unlines . map (unwords . map prOne) . positions where
prOne (i,j,it) = show i ++ "-" ++ show j ++ "-" ++ prCFItem it
positions = groupBy (\ (i,_,_) (j,_,_) -> i == j)

191
src-3.0/GF/CF/EBNF.hs Normal file
View File

@@ -0,0 +1,191 @@
----------------------------------------------------------------------
-- |
-- Module : EBNF
-- Maintainer : AR
-- Stability : (stable)
-- Portability : (portable)
--
-- > CVS $Date: 2005/04/21 16:21:13 $
-- > CVS $Author: bringert $
-- > CVS $Revision: 1.5 $
--
-- (Description of the module)
-----------------------------------------------------------------------------
module GF.CF.EBNF (pEBNFasGrammar) where
import GF.Data.Operations
import GF.Data.Parsers
import GF.Infra.Comments
import GF.CF.CF
import GF.CF.CFIdent
import GF.Grammar.Grammar
import GF.Grammar.PrGrammar
import GF.CF.CFtoGrammar
import qualified GF.Source.AbsGF as A
import Data.List (nub, partition)
-- AR 18/4/2000 - 31/3/2004
-- Extended BNF grammar with token type a
-- put a = String for simple applications
type EBNF = [ERule]
type ERule = (ECat, ERHS)
type ECat = (String,[Int])
type ETok = String
ebnfID = "EBNF" ---- make this parametric!
data ERHS =
ETerm ETok
| ENonTerm ECat
| ESeq ERHS ERHS
| EAlt ERHS ERHS
| EStar ERHS
| EPlus ERHS
| EOpt ERHS
| EEmpty
type CFRHS = [CFItem]
type CFJustRule = (CFCat, CFRHS)
ebnf2gf :: EBNF -> [A.TopDef]
ebnf2gf = cf2grammar . rules2CF . ebnf2cf
ebnf2cf :: EBNF -> [CFRule]
ebnf2cf ebnf = [(mkCFF i rule,rule) | (i,rule) <- zip [0..] (normEBNF ebnf)] where
mkCFF i (CFCat (_,c), _) = string2CFFun ebnfID ("Mk" ++ prt c ++ "_" ++ show i)
normEBNF :: EBNF -> [CFJustRule]
normEBNF erules = let
erules1 = [normERule ([i],r) | (i,r) <- zip [0..] erules]
erules2 = erules1 ---refreshECats erules1 --- this seems to be just bad !
erules3 = concat (map pickERules erules2)
erules4 = nubERules erules3
in [(mkCFCatE cat, map eitem2cfitem its) | (cat,itss) <- erules3, its <- itss]
refreshECats :: [NormERule] -> [NormERule]
refreshECats rules = [recas [i] rule | (i,rule) <- zip [0..] rules] where
recas ii (cat,its) = (updECat ii cat, [recss ii 0 s | s <- its])
recss ii n [] = []
recss ii n (s:ss) = recit (ii ++ [n]) s : recss ii (n+1) ss
recit ii it = case it of
EINonTerm cat -> EINonTerm (updECat ii cat)
EIStar (cat,t) -> EIStar (updECat ii cat, [recss ii 0 s | s <- t])
EIPlus (cat,t) -> EIPlus (updECat ii cat, [recss ii 0 s | s <- t])
EIOpt (cat,t) -> EIOpt (updECat ii cat, [recss ii 0 s | s <- t])
_ -> it
pickERules :: NormERule -> [NormERule]
pickERules rule@(cat,alts) = rule : concat (map pics (concat alts)) where
pics it = case it of
EIStar ru@(cat,t) -> mkEStarRules cat ++ pickERules ru
EIPlus ru@(cat,t) -> mkEPlusRules cat ++ pickERules ru
EIOpt ru@(cat,t) -> mkEOptRules cat ++ pickERules ru
_ -> []
mkEStarRules cat = [(cat', [[],[EINonTerm cat, EINonTerm cat']])]
where cat' = mkNewECat cat "Star"
mkEPlusRules cat = [(cat', [[EINonTerm cat],[EINonTerm cat, EINonTerm cat']])]
where cat' = mkNewECat cat "Plus"
mkEOptRules cat = [(cat', [[],[EINonTerm cat]])]
where cat' = mkNewECat cat "Opt"
nubERules :: [NormERule] -> [NormERule]
nubERules rules = nub optim where
optim = map (substERules (map mkSubst replaces)) irreducibles
(replaces,irreducibles) = partition reducible rules
reducible (cat,[items]) = isNewCat cat && all isOldIt items
reducible _ = False
isNewCat (_,ints) = ints == []
isOldIt (EITerm _) = True
isOldIt (EINonTerm cat) = not (isNewCat cat)
isOldIt _ = False
mkSubst (cat,its) = (cat, head its) -- def of reducible: its must be singleton
--- the optimization assumes each cat has at most one EBNF rule.
substERules :: [(ECat,[EItem])] -> NormERule -> NormERule
substERules g (cat,itss) = (cat, map sub itss) where
sub [] = []
sub (i@(EINonTerm cat') : ii) = case lookup cat g of
Just its -> its ++ sub ii
_ -> i : sub ii
sub (EIStar r : ii) = EIStar (substERules g r) : ii
sub (EIPlus r : ii) = EIPlus (substERules g r) : ii
sub (EIOpt r : ii) = EIOpt (substERules g r) : ii
eitem2cfitem :: EItem -> CFItem
eitem2cfitem it = case it of
EITerm a -> atomCFTerm $ tS a
EINonTerm cat -> CFNonterm (mkCFCatE cat)
EIStar (cat,_) -> CFNonterm (mkCFCatE (mkNewECat cat "Star"))
EIPlus (cat,_) -> CFNonterm (mkCFCatE (mkNewECat cat "Plus"))
EIOpt (cat,_) -> CFNonterm (mkCFCatE (mkNewECat cat "Opt"))
type NormERule = (ECat,[[EItem]]) -- disjunction of sequences of items
data EItem =
EITerm String
| EINonTerm ECat
| EIStar NormERule
| EIPlus NormERule
| EIOpt NormERule
deriving Eq
normERule :: ([Int],ERule) -> NormERule
normERule (ii,(cat,rhs)) =
(cat,[map (mkEItem (ii ++ [i])) r' | (i,r') <- zip [0..] (disjNorm rhs)]) where
disjNorm r = case r of
ESeq r1 r2 -> [x ++ y | x <- disjNorm r1, y <- disjNorm r2]
EAlt r1 r2 -> disjNorm r1 ++ disjNorm r2
EEmpty -> [[]]
_ -> [[r]]
mkEItem :: [Int] -> ERHS -> EItem
mkEItem ii rhs = case rhs of
ETerm a -> EITerm a
ENonTerm cat -> EINonTerm cat
EStar r -> EIStar (normERule (ii,(mkECat ii, r)))
EPlus r -> EIPlus (normERule (ii,(mkECat ii, r)))
EOpt r -> EIOpt (normERule (ii,(mkECat ii, r)))
_ -> EINonTerm ("?????",[])
-- _ -> error "should not happen in ebnf" ---
mkECat ints = ("C", ints)
prECat (c,[]) = c
prECat (c,ints) = c ++ "_" ++ prTList "_" (map show ints)
mkCFCatE :: ECat -> CFCat
mkCFCatE = string2CFCat ebnfID . prECat
updECat _ (c,[]) = (c,[])
updECat ii (c,_) = (c,ii)
mkNewECat (c,ii) str = (c ++ str,ii)
------ parser for EBNF grammars
pEBNFasGrammar :: String -> Err [A.TopDef]
pEBNFasGrammar = parseResultErr (pEBNF *** ebnf2gf) . remComments
pEBNF :: Parser Char EBNF
pEBNF = longestOfMany (pJ pERule)
pERule :: Parser Char ERule
pERule = pECat ... pJ (literals ":=" ||| literals "::=") +.. pERHS 0 ..+ jL ";"
pERHS :: Int -> Parser Char ERHS
pERHS 0 = pTList "|" (pERHS 1) *** foldr1 EAlt
pERHS 1 = longestOfMany (pJ (pERHS 2)) *** foldr ESeq EEmpty
pERHS 2 = pERHS 3 ... pJ pUnaryEOp *** (\ (a,f) -> f a)
pERHS 3 = pQuotedString *** ETerm
||| pECat *** ENonTerm ||| pParenth (pERHS 0)
pUnaryEOp :: Parser Char (ERHS -> ERHS)
pUnaryEOp =
lits "*" <<< EStar ||| lits "+" <<< EPlus ||| lits "?" <<< EOpt ||| succeed id
pECat = pIdent *** (\c -> (c,[]))

102
src-3.0/GF/CF/PPrCF.hs Normal file
View File

@@ -0,0 +1,102 @@
----------------------------------------------------------------------
-- |
-- Module : PPrCF
-- Maintainer : AR
-- Stability : (stable)
-- Portability : (portable)
--
-- > CVS $Date: 2005/11/15 17:56:13 $
-- > CVS $Author: aarne $
-- > CVS $Revision: 1.13 $
--
-- printing and parsing CF grammars, rules, and trees AR 26/1/2000 -- 9/6/2003
--
-- use the Print class instead!
-----------------------------------------------------------------------------
module GF.CF.PPrCF (prCF, prCFTree, prCFRule, prCFFun, prCFCat, prCFItem, prRegExp, pCF) where
import GF.Data.Operations
import GF.CF.CF
import GF.CF.CFIdent
import GF.Canon.AbsGFC
import GF.Grammar.PrGrammar
import Data.Char
import Data.List
prCF :: CF -> String
prCF = unlines . (map prCFRule) . rulesOfCF -- hiding the literal recogn function
prCFTree :: CFTree -> String
prCFTree (CFTree (fun, (_,trees))) = prCFFun fun ++ prs trees where
prs [] = ""
prs ts = " " ++ unwords (map ps ts)
ps t@(CFTree (_,(_,[]))) = prCFTree t
ps t = prParenth (prCFTree t)
{-# NOINLINE prCFTree #-}
-- Workaround ghc 6.8.2 bug
prCFRule :: CFRule -> String
prCFRule (fun,(cat,its)) =
prCFFun fun ++ "." +++ prCFCat cat +++ "::=" +++
unwords (map prCFItem its) +++ ";"
prCFFun :: CFFun -> String
prCFFun = prCFFun' True ---- False -- print profiles for debug
prCFFun' :: Bool -> CFFun -> String
prCFFun' profs (CFFun (t, p)) = prt_ t ++ pp p where
pp p = if (not profs || normal p) then "" else "_" ++ concat (map show p)
normal p = and [x==y && null b | ((b,x),y) <- zip p (map (:[]) [0..])]
prCFCat :: CFCat -> String
prCFCat (CFCat (c,l)) = prt_ c ++ case prt_ l of
"s" -> []
_ -> "-" ++ prt_ l ----
prCFItem :: CFItem -> String
prCFItem (CFNonterm c) = prCFCat c
prCFItem (CFTerm a) = prRegExp a
prRegExp :: RegExp -> String
prRegExp (RegAlts tt) = case tt of
[t] -> prQuotedString t
_ -> prParenth (prTList " | " (map prQuotedString tt))
-- rules have an amazingly easy parser, if we use the format
-- fun. C -> item1 item2 ... where unquoted items are treated as cats
-- Actually would be nice to add profiles to this.
getCFRule :: String -> String -> Err [CFRule]
getCFRule mo s = getcf (wrds s) where
getcf ws = case ws of
fun : cat : a : its | isArrow a ->
Ok [(string2CFFun mo (init fun),
(string2CFCat mo cat, map mkIt its))]
cat : a : its | isArrow a ->
Ok [(string2CFFun mo (mkFun cat it),
(string2CFCat mo cat, map mkIt it)) | it <- chunk its]
_ -> Bad (" invalid rule:" +++ s)
isArrow a = elem a ["->", "::="]
mkIt w = case w of
('"':w@(_:_)) -> atomCFTerm (string2CFTok (init w))
_ -> CFNonterm (string2CFCat mo w)
chunk its = case its of
[] -> [[]]
_ -> chunks "|" its
mkFun cat its = case its of
[] -> cat ++ "_"
_ -> concat $ intersperse "_" (cat : map clean its) -- CLE style
clean = filter isAlphaNum -- to form valid identifiers
wrds = takeWhile (/= ";") . words -- to permit semicolon in the end
pCF :: String -> String -> Err [CFRule]
pCF mo s = do
rules <- mapM (getCFRule mo) $ filter isRule $ lines s
return $ concat rules
where
isRule line = case dropWhile isSpace line of
'-':'-':_ -> False
_ -> not $ all isSpace line

150
src-3.0/GF/CF/PrLBNF.hs Normal file
View File

@@ -0,0 +1,150 @@
----------------------------------------------------------------------
-- |
-- Module : PrLBNF
-- Maintainer : AR
-- Stability : (stable)
-- Portability : (portable)
--
-- > CVS $Date: 2005/06/17 14:15:16 $
-- > CVS $Author: bringert $
-- > CVS $Revision: 1.11 $
--
-- Printing CF grammars generated from GF as LBNF grammar for BNFC.
-- AR 26/1/2000 -- 9/6/2003 (PPrCF) -- 8/11/2003 -- 27/9/2004.
-- With primitive error messaging, by rules and rule tails commented out
-----------------------------------------------------------------------------
module GF.CF.PrLBNF (prLBNF,prBNF) where
import GF.CF.CF
import GF.CF.CFIdent
import GF.Canon.AbsGFC
import GF.Infra.Ident
import GF.Grammar.PrGrammar
import GF.Compile.ShellState
import GF.Canon.GFC
import GF.Canon.Look
import GF.Data.Operations
import GF.Infra.Modules
import Data.Char
import Data.List (nub)
prLBNF :: Bool -> StateGrammar -> String
prLBNF new gr = unlines $ pragmas ++ (map (prCFRule cs) rules')
where
cs = map IC ["Int","String"] ++ [catIdPlus c | (_,(c,_)) <- rules]
cf = stateCF gr
(pragmas,rules) = if new -- tries to treat precedence levels
then mkLBNF (stateGrammarST gr) $ rulesOfCF cf
else ([],rulesOfCF cf) -- "normal" behaviour
rules' = concatMap expand rules
expand (f,(c,its)) = [(f,(c,it)) | it <- combinations (map expIt its)]
expIt i = case i of
CFTerm (RegAlts ss) -> [CFTerm (RegAlts [s]) | s <- ss]
_ -> [i]
-- | a hack to hide the LBNF details
prBNF :: Bool -> StateGrammar -> String
prBNF b = unlines . (map (unwords . unLBNF . drop 1 . words)) . lines . prLBNF b
where
unLBNF r = case r of
"---":ts -> ts
";":"---":ts -> ts
c:ts -> c : unLBNF ts
_ -> r
--- | awful low level code without abstraction over label names etc
mkLBNF :: CanonGrammar -> [CFRule] -> ([String],[CFRule])
mkLBNF gr rules = (coercions, nub $ concatMap mkRule rules) where
coercions = ["coercions" +++ prt_ c +++ show n +++ ";" |
(_,ModMod m) <- modules gr,
(c,CncCat (RecType ls) _ _) <- tree2list $ jments m,
Lbg (L (IC "p")) (TInts n) <- ls
]
precedences = [(f,(prec,assoc)) |
(_,ModMod m) <- modules gr,
(f,CncFun _ _ (R lin) _) <- tree2list $ jments m,
(Just prec, Just assoc) <- [(
lookup "p" [(lab,p) | Ass (L (IC lab)) (EInt p) <- lin],
lookup "a" [(lab,a) | Ass (L (IC lab)) (Par (CIQ _ (IC a)) []) <- lin]
)]
]
precfuns = map fst precedences
mkRule r@(fun@(CFFun (t, p)),(cat,its)) = case t of
AC (CIQ _ c) -> case lookup c precedences of
Just (prec,assoc) -> [(fun,(mkCat prec cat,mkIts cat prec assoc 0 its))]
_ -> return r
AD (CIQ _ c) -> case lookup c precedences of
Just (prec,assoc) -> [(fun,(mkCat prec cat,mkIts cat prec assoc 0 its))]
_ -> return r
_ -> return r
mkIts cat prec assoc i its = case its of
CFTerm (RegAlts ["("]):n@(CFNonterm k):CFTerm (RegAlts [")"]):rest | k==cat ->
mkIts cat prec assoc i $ n:rest -- remove variants with parentheses
CFNonterm k:rest | k==cat ->
CFNonterm (mkNonterm prec assoc i k) : mkIts cat prec assoc (i+1) rest
it:rest -> it:mkIts cat prec assoc i rest
[] -> []
mkCat prec (CFCat ((CIQ m (IC c)),l)) = CFCat ((CIQ m (IC (c ++ show prec ++ "+"))),l)
mkNonterm prec assoc i cat = mkCat prec' cat
where
prec' = case (assoc,i) of
("PL",0) -> prec
("PR",0) -> prec + 1
("PR",_) -> prec
_ -> prec + 1
catId ((CFCat ((CIQ _ c),l))) = c
catIdPlus ((CFCat ((CIQ _ c@(IC s)),l))) = case reverse s of
'+':cs -> IC $ reverse $ dropWhile isDigit cs
_ -> c
prCFRule :: [Ident] -> CFRule -> String
prCFRule cs (fun,(cat,its)) =
prCFFun cat fun ++ "." +++ prCFCat True cat +++ "::=" +++ --- err in cat -> in syntax
unwords (map (prCFItem cs) its) +++ ";"
prCFFun :: CFCat -> CFFun -> String
prCFFun (CFCat (_,l)) (CFFun (t, p)) = case t of
AC (CIQ _ x) -> let f = prId True x in (f ++ lab +++ f2 f +++ prP p)
AD (CIQ _ x) -> let f = prId True x in (f ++ lab +++ f2 f +++ prP p)
_ -> prErr True $ prt t
where
lab = prLab l
f2 f = if null lab then "" else f
prP = concatMap show
prId b i = case i of
IC "Int" -> "Integer"
IC "#Var" -> "Ident"
IC "Var" -> "Ident"
IC "id_" -> "_"
IC s@(c:_) | last s == '+' -> init s -- hack to save precedence information
IC s@(c:_) | isUpper c -> s ++ if isDigit (last s) then "_" else ""
_ -> prErr b $ prt i
prLab i = case i of
L (IC "s") -> "" ---
L (IC "_") -> "" ---
_ -> let x = prt i in "_" ++ x ++ if isDigit (last x) then "_" else ""
-- | just comment out the rest if you cannot interpret the function name in LBNF
-- two versions, depending on whether in the beginning of a rule or elsewhere;
-- in the latter case, error just terminates the rule
prErr :: Bool -> String -> String
prErr b s = (if b then "" else " ;") +++ "---" +++ s
prCFCat :: Bool -> CFCat -> String
prCFCat b (CFCat ((CIQ _ c),l)) = prId b c ++ prLab l ----
-- | if a category does not have a production of its own, we replace it by Ident
prCFItem cs (CFNonterm c) = if elem (catIdPlus c) cs then prCFCat False c else "Ident"
prCFItem _ (CFTerm a) = prRegExp a
prRegExp (RegAlts tt) = case tt of
[t] -> prQuotedString t
_ -> prErr False $ prParenth (prTList " | " (map prQuotedString tt))

106
src-3.0/GF/CF/Profile.hs Normal file
View File

@@ -0,0 +1,106 @@
----------------------------------------------------------------------
-- |
-- Module : Profile
-- Maintainer : AR
-- Stability : (stable)
-- Portability : (portable)
--
-- > CVS $Date: 2005/04/21 16:21:14 $
-- > CVS $Author: bringert $
-- > CVS $Revision: 1.8 $
--
-- restoring parse trees for discontinuous constituents, bindings, etc. AR 25/1/2001
-- revised 8/4/2002 for the new profile structure
-----------------------------------------------------------------------------
module GF.CF.Profile (postParse) where
import GF.Canon.AbsGFC
import GF.Canon.GFC
import qualified GF.Infra.Ident as I
import GF.Canon.CMacros
---import MMacros
import GF.CF.CF
import GF.CF.CFIdent
import GF.CF.PPrCF -- for error msg
import GF.Grammar.PrGrammar
import GF.Data.Operations
import Control.Monad
import Data.List (nub)
-- | the job is done in two passes:
--
-- 1. tree2term: restore constituent order from Profile
--
-- 2. term2trm: restore Bindings from Binds
postParse :: CFTree -> Err Exp
postParse tree = do
iterm <- errIn ("postprocessing parse tree" +++ prCFTree tree) $ tree2term tree
return $ term2trm iterm
-- | an intermediate data structure
data ITerm = ITerm (Atom, BindVs) [ITerm] | IMeta deriving (Eq,Show)
type BindVs = [[I.Ident]]
-- | (1) restore constituent order from Profile
tree2term :: CFTree -> Err ITerm
-- tree2term (CFTree (f,(_,[t]))) | f == dummyCFFun = tree2term t -- not used
tree2term (CFTree (cff@(CFFun (fun,pro)), (_,trees))) = case fun of
AM _ -> return IMeta
_ -> do
args <- mapM mkArg pro
binds <- mapM mkBinds pro
return $ ITerm (fun, binds) args
where
mkArg (_,arg) = case arg of
[x] -> do -- one occurrence
trx <- trees !? x
tree2term trx
[] -> return IMeta -- suppression
_ -> do -- reduplication
trees' <- mapM (trees !?) arg
xs1 <- mapM tree2term trees'
xs2 <- checkArity xs1
unif xs2
checkArity xs = if length (nub [length xx | ITerm _ xx <- xs']) > 1
then Bad "arity error"
else return xs'
where xs' = [t | t@(ITerm _ _) <- xs]
unif xs = case [t | t@(ITerm _ _) <- xs] of
[] -> return $ IMeta
(ITerm fp@(f,_) xx : ts) -> do
let hs = [h | ITerm (h,_) _ <- ts, h /= f]
testErr (null hs) -- if fails, hs must be nonempty
("unification expects" +++ prt f +++ "but found" +++ prt (head hs))
xx' <- mapM unifArg [0 .. length xx - 1]
return $ ITerm fp xx'
where
unifArg i = unif [zz !! i | ITerm _ zz <- xs]
mkBinds (xss,_) = mapM mkBind xss
mkBind xs = do
ts <- mapM (trees !?) xs
let vs = [x | CFTree (CFFun (AV x,_),(_,[])) <- ts]
testErr (length ts == length vs) "non-variable in bound position"
case vs of
[x] -> return x
[] -> return $ I.identC "h_" ---- uBoundVar
y:ys -> do
testErr (all (==y) ys) ("fail to unify bindings of" +++ prt y)
return y
-- | (2) restore Bindings from Binds
term2trm :: ITerm -> Exp
term2trm IMeta = EAtom (AM 0) ---- mExp0
term2trm (ITerm (fun, binds) terms) =
let bterms = zip binds terms
in mkAppAtom fun [mkAbsR xs (term2trm t) | (xs,t) <- bterms]
--- these are deprecated
where
mkAbsR c e = foldr EAbs e c
mkAppAtom a = mkApp (EAtom a)
mkApp = foldl EApp