mirror of
https://github.com/GrammaticalFramework/gf-core.git
synced 2026-05-01 15:22:50 -06:00
Founding the newly structured GF2.0 cvs archive.
This commit is contained in:
180
src/GF/CF/CF.hs
Normal file
180
src/GF/CF/CF.hs
Normal file
@@ -0,0 +1,180 @@
|
||||
module CF where
|
||||
|
||||
import Operations
|
||||
import Str
|
||||
import AbsGFC
|
||||
import GFC
|
||||
import CFIdent
|
||||
import List (nub,nubBy)
|
||||
import Char (isUpper, isLower, toUpper, toLower)
|
||||
|
||||
-- context-free grammars. AR 15/12/1999 -- 30/3/2000 -- 2/6/2001 -- 3/12/2001
|
||||
|
||||
-- CF grammar data types
|
||||
|
||||
-- abstract type CF.
|
||||
-- Invariant: each category has all its rules grouped with it
|
||||
-- also: the list is never empty (the category is just missing then)
|
||||
newtype CF = CF ([(CFCat,[CFRule])], CFPredef)
|
||||
type CFRule = (CFFun, (CFCat, [CFItem]))
|
||||
|
||||
-- CFPredef is a hack for variable symbols and literals; normally = const []
|
||||
data CFItem = CFTerm RegExp | CFNonterm CFCat deriving (Eq, Ord,Show)
|
||||
|
||||
newtype CFTree = CFTree (CFFun,(CFCat, [CFTree])) deriving (Eq, Show)
|
||||
|
||||
type CFPredef = CFTok -> [(CFCat, CFFun)] -- recognize literals, variables, etc
|
||||
|
||||
-- Wadler style + return information
|
||||
type CFParser = [CFTok] -> ([(CFTree,[CFTok])],String)
|
||||
|
||||
cfParseResults :: ([(CFTree,[CFTok])],String) -> [CFTree]
|
||||
cfParseResults rs = [b | (b,[]) <- fst rs]
|
||||
|
||||
-- terminals are regular expressions on words; to be completed to full regexp
|
||||
data RegExp =
|
||||
RegAlts [CFWord] -- list of alternative words
|
||||
| RegSpec CFTok -- special token
|
||||
deriving (Eq, Ord, Show)
|
||||
|
||||
type CFWord = String
|
||||
|
||||
-- the above types should be kept abstract, and the following functions used
|
||||
|
||||
-- to construct CF grammars
|
||||
|
||||
emptyCF :: CF
|
||||
emptyCF = CF ([], emptyCFPredef)
|
||||
|
||||
emptyCFPredef :: CFPredef
|
||||
emptyCFPredef = const []
|
||||
|
||||
rules2CF :: [CFRule] -> CF
|
||||
rules2CF rs = CF (groupCFRules rs, emptyCFPredef)
|
||||
|
||||
groupCFRules :: [CFRule] -> [(CFCat,[CFRule])]
|
||||
groupCFRules = foldr ins [] where
|
||||
ins rule crs = case crs of
|
||||
(c,r) : rs | compatCF c cat -> (c,rule:r) : rs
|
||||
cr : rs -> cr : ins rule rs
|
||||
_ -> [(cat,[rule])]
|
||||
where
|
||||
cat = valCatCF rule
|
||||
|
||||
-- to construct rules
|
||||
|
||||
-- make a rule from a single token without constituents
|
||||
atomCFRule :: CFCat -> CFFun -> CFTok -> CFRule
|
||||
atomCFRule c f s = (f, (c, [atomCFTerm s]))
|
||||
|
||||
-- usual terminal
|
||||
atomCFTerm :: CFTok -> CFItem
|
||||
atomCFTerm = CFTerm . atomRegExp
|
||||
|
||||
atomRegExp :: CFTok -> RegExp
|
||||
atomRegExp t = case t of
|
||||
TS s -> RegAlts [s]
|
||||
_ -> RegSpec t
|
||||
|
||||
-- terminal consisting of alternatives
|
||||
altsCFTerm :: [String] -> CFItem
|
||||
altsCFTerm = CFTerm . RegAlts
|
||||
|
||||
|
||||
-- to construct trees
|
||||
|
||||
-- make a tree without constituents
|
||||
atomCFTree :: CFCat -> CFFun -> CFTree
|
||||
atomCFTree c f = buildCFTree c f []
|
||||
|
||||
-- make a tree with constituents.
|
||||
buildCFTree :: CFCat -> CFFun -> [CFTree] -> CFTree
|
||||
buildCFTree c f trees = CFTree (f,(c,trees))
|
||||
|
||||
{- ----
|
||||
cfMeta0 :: CFTree
|
||||
cfMeta0 = atomCFTree uCFCat metaCFFun
|
||||
|
||||
-- used in happy
|
||||
litCFTree :: String -> CFTree --- Maybe CFTree
|
||||
litCFTree s = maybe cfMeta0 id $ do
|
||||
(c,f) <- getCFLiteral s
|
||||
return $ buildCFTree c f []
|
||||
-}
|
||||
|
||||
-- to decide whether a token matches a terminal item
|
||||
|
||||
matchCFTerm :: CFItem -> CFTok -> Bool
|
||||
matchCFTerm (CFTerm t) s = satRegExp t s
|
||||
matchCFTerm _ _ = False
|
||||
|
||||
satRegExp :: RegExp -> CFTok -> Bool
|
||||
satRegExp r t = case (r,t) of
|
||||
(RegAlts tt, TS s) -> elem s tt
|
||||
(RegAlts tt, TC s) -> or [elem s' tt | s' <- caseUpperOrLower s]
|
||||
(RegSpec x, _) -> t == x ---
|
||||
_ -> False
|
||||
where
|
||||
caseUpperOrLower s = case s of
|
||||
c:cs | isUpper c -> [s, toLower c : cs]
|
||||
c:cs | isLower c -> [s, toUpper c : cs]
|
||||
_ -> [s]
|
||||
|
||||
-- to analyse a CF grammar
|
||||
|
||||
catsOfCF :: CF -> [CFCat]
|
||||
catsOfCF (CF (rr,_)) = map fst rr
|
||||
|
||||
rulesOfCF :: CF -> [CFRule]
|
||||
rulesOfCF (CF (rr,_)) = concatMap snd rr
|
||||
|
||||
ruleGroupsOfCF :: CF -> [(CFCat,[CFRule])]
|
||||
ruleGroupsOfCF (CF (rr,_)) = rr
|
||||
|
||||
rulesForCFCat :: CF -> CFCat -> [CFRule]
|
||||
rulesForCFCat (CF (rr,_)) cat = maybe [] id $ lookup cat rr
|
||||
|
||||
valCatCF :: CFRule -> CFCat
|
||||
valCatCF (_,(c,_)) = c
|
||||
|
||||
valItemsCF :: CFRule -> [CFItem]
|
||||
valItemsCF (_,(_,i)) = i
|
||||
|
||||
valFunCF :: CFRule -> CFFun
|
||||
valFunCF (f,(_,_)) = f
|
||||
|
||||
startCat :: CF -> CFCat
|
||||
startCat (CF (rr,_)) = fst (head rr) --- hardly useful
|
||||
|
||||
predefOfCF :: CF -> CFPredef
|
||||
predefOfCF (CF (_,f)) = f
|
||||
|
||||
appCFPredef :: CF -> CFTok -> [(CFCat, CFFun)]
|
||||
appCFPredef = ($) . predefOfCF
|
||||
|
||||
valCFItem :: CFItem -> Either RegExp CFCat
|
||||
valCFItem (CFTerm r) = Left r
|
||||
valCFItem (CFNonterm nt) = Right nt
|
||||
|
||||
cfTokens :: CF -> [CFWord]
|
||||
cfTokens cf = nub $ concat $ [ wordsOfRegExp i | r <- rulesOfCF cf,
|
||||
CFTerm i <- valItemsCF r]
|
||||
|
||||
wordsOfRegExp :: RegExp -> [CFWord]
|
||||
wordsOfRegExp (RegAlts tt) = tt
|
||||
wordsOfRegExp _ = []
|
||||
|
||||
forCFItem :: CFTok -> CFRule -> Bool
|
||||
forCFItem a (_,(_, CFTerm r : _)) = satRegExp r a
|
||||
forCFItem _ _ = False
|
||||
|
||||
isCircularCF :: CFRule -> Bool
|
||||
isCircularCF (_,(c', CFNonterm c:[])) = compatCF c' c
|
||||
isCircularCF _ = False
|
||||
--- we should make a test of circular chains, too
|
||||
|
||||
-- coercion to the older predef cf type
|
||||
|
||||
predefRules :: CFPredef -> CFTok -> [CFRule]
|
||||
predefRules pre s = [atomCFRule c f s | (c,f) <- pre s]
|
||||
|
||||
151
src/GF/CF/CFIdent.hs
Normal file
151
src/GF/CF/CFIdent.hs
Normal file
@@ -0,0 +1,151 @@
|
||||
module CFIdent where
|
||||
|
||||
import Operations
|
||||
import GFC
|
||||
import Ident
|
||||
import AbsGFC
|
||||
import PrGrammar
|
||||
import Str
|
||||
import Char (toLower, toUpper)
|
||||
|
||||
-- symbols (categories, functions) for context-free grammars.
|
||||
|
||||
-- these types should be abstract
|
||||
|
||||
data CFTok =
|
||||
TS String -- normal strings
|
||||
| TC String -- strings that are ambiguous between upper or lower case
|
||||
| TL String -- string literals
|
||||
| TI Int -- integer literals
|
||||
| TV Ident -- variables
|
||||
| TM Int String -- metavariables; the integer identifies it
|
||||
deriving (Eq, Ord, Show)
|
||||
|
||||
newtype CFCat = CFCat (CIdent,Label) deriving (Eq, Ord, Show)
|
||||
|
||||
tS, tC, tL, tI, tV, tM :: String -> CFTok
|
||||
tS = TS
|
||||
tC = TC
|
||||
tL = TL
|
||||
tI = TI . read
|
||||
tV = TV . identC
|
||||
tM = TM 0
|
||||
|
||||
tInt :: Int -> CFTok
|
||||
tInt = TI
|
||||
|
||||
prCFTok :: CFTok -> String
|
||||
prCFTok t = case t of
|
||||
TS s -> s
|
||||
TC s -> s
|
||||
TL s -> s
|
||||
TI i -> show i
|
||||
TV x -> prt x
|
||||
TM i _ -> "?" ---
|
||||
|
||||
-- to build trees: the Atom contains a GF function, Cn | Meta | Vr | Literal
|
||||
newtype CFFun = CFFun (Atom, Profile) deriving (Eq,Show)
|
||||
|
||||
type Profile = [([[Int]],[Int])]
|
||||
|
||||
|
||||
-- the following functions should be used instead of constructors
|
||||
|
||||
-- to construct CF functions
|
||||
|
||||
mkCFFun :: Atom -> CFFun
|
||||
mkCFFun t = CFFun (t,[])
|
||||
|
||||
{- ----
|
||||
getCFLiteral :: String -> Maybe (CFCat, CFFun)
|
||||
getCFLiteral s = case lookupLiteral' s of
|
||||
Ok (c, lit) -> Just (cat2CFCat c, mkCFFun lit)
|
||||
_ -> Nothing
|
||||
-}
|
||||
|
||||
varCFFun :: Ident -> CFFun
|
||||
varCFFun = mkCFFun . AV
|
||||
|
||||
consCFFun :: CIdent -> CFFun
|
||||
consCFFun = mkCFFun . AC
|
||||
|
||||
{- ----
|
||||
string2CFFun :: String -> CFFun
|
||||
string2CFFun = consCFFun . Ident
|
||||
-}
|
||||
|
||||
cfFun2String :: CFFun -> String
|
||||
cfFun2String (CFFun (f,_)) = prt f
|
||||
|
||||
cfFun2Profile :: CFFun -> Profile
|
||||
cfFun2Profile (CFFun (_,p)) = p
|
||||
|
||||
{- ----
|
||||
strPro2cfFun :: String -> Profile -> CFFun
|
||||
strPro2cfFun str p = (CFFun (AC (Ident str), p))
|
||||
-}
|
||||
|
||||
metaCFFun :: CFFun
|
||||
metaCFFun = mkCFFun $ AM 0
|
||||
|
||||
-- to construct CF categories
|
||||
|
||||
-- belongs elsewhere
|
||||
mkCIdent :: String -> String -> CIdent
|
||||
mkCIdent m c = CIQ (identC m) (identC c)
|
||||
|
||||
ident2CFCat :: CIdent -> Ident -> CFCat
|
||||
ident2CFCat mc d = CFCat (mc, L d)
|
||||
|
||||
-- standard way of making cf cat: label s
|
||||
string2CFCat :: String -> String -> CFCat
|
||||
string2CFCat m c = ident2CFCat (mkCIdent m c) (identC "s")
|
||||
|
||||
idents2CFCat :: Ident -> Ident -> CFCat
|
||||
idents2CFCat m c = ident2CFCat (CIQ m c) (identC "s")
|
||||
|
||||
catVarCF :: CFCat
|
||||
catVarCF = ident2CFCat (mkCIdent "_" "#Var") (identC "_") ----
|
||||
|
||||
{- ----
|
||||
uCFCat :: CFCat
|
||||
uCFCat = cat2CFCat uCat
|
||||
-}
|
||||
|
||||
moduleOfCFCat :: CFCat -> Ident
|
||||
moduleOfCFCat (CFCat (CIQ m _, _)) = m
|
||||
|
||||
-- the opposite direction
|
||||
cfCat2Cat :: CFCat -> CIdent
|
||||
cfCat2Cat (CFCat (s,_)) = s
|
||||
|
||||
|
||||
-- to construct CF tokens
|
||||
|
||||
string2CFTok :: String -> CFTok
|
||||
string2CFTok = tS
|
||||
|
||||
str2cftoks :: Str -> [CFTok]
|
||||
str2cftoks = map tS . words . sstr
|
||||
|
||||
-- decide if two token lists look the same (in parser postprocessing)
|
||||
|
||||
compatToks :: [CFTok] -> [CFTok] -> Bool
|
||||
compatToks ts us = and [compatTok t u | (t,u) <- zip ts us]
|
||||
|
||||
compatTok t u = any (`elem` (alts t)) (alts u) where
|
||||
alts u = case u of
|
||||
TC (c:s) -> [toLower c : s, toUpper c : s]
|
||||
_ -> [prCFTok u]
|
||||
|
||||
-- decide if two CFFuns have the same function head (profiles may differ)
|
||||
|
||||
compatCFFun :: CFFun -> CFFun -> Bool
|
||||
compatCFFun (CFFun (f,_)) (CFFun (g,_)) = f == g
|
||||
|
||||
-- decide whether two categories match
|
||||
-- the modifiers can be from different modules, but on the same extension
|
||||
-- path, so there is no clash, and they can be safely ignored ---
|
||||
compatCF :: CFCat -> CFCat -> Bool
|
||||
----compatCF = (==)
|
||||
compatCF (CFCat (CIQ _ c, l)) (CFCat (CIQ _ c', l')) = c==c' && l==l'
|
||||
157
src/GF/CF/CanonToCF.hs
Normal file
157
src/GF/CF/CanonToCF.hs
Normal file
@@ -0,0 +1,157 @@
|
||||
module CanonToCF where
|
||||
|
||||
import Operations
|
||||
import Option
|
||||
import Ident
|
||||
import AbsGFC
|
||||
import GFC
|
||||
import PrGrammar
|
||||
import CMacros
|
||||
import qualified Modules as M
|
||||
import CF
|
||||
import CFIdent
|
||||
import List (nub)
|
||||
import Monad
|
||||
|
||||
-- AR 27/1/2000 -- 3/12/2001 -- 8/6/2003
|
||||
|
||||
-- The main function: for a given cnc module m, build the CF grammar with all the
|
||||
-- rules coming from modules that m extends. The categories are qualified by
|
||||
-- the abstract module name a that m is of.
|
||||
|
||||
canon2cf :: Options -> CanonGrammar -> Ident -> Err CF
|
||||
canon2cf opts gr c = do
|
||||
let ms = M.allExtends gr c
|
||||
a <- M.abstractOfConcrete gr c
|
||||
let cncs = [m | (n, M.ModMod m) <- M.modules gr, elem n ms]
|
||||
let mms = [(a, tree2list (M.jments m)) | m <- cncs]
|
||||
rules0 <- liftM concat $ mapM (uncurry (cnc2cfCond opts)) mms
|
||||
let rules = filter (not . isCircularCF) rules0 ---- temporarily here
|
||||
let predef = const [] ---- mkCFPredef cfcats
|
||||
return $ CF (groupCFRules rules, predef)
|
||||
|
||||
cnc2cfCond :: Options -> Ident -> [(Ident,Info)] -> Err [CFRule]
|
||||
cnc2cfCond opts m gr =
|
||||
liftM concat $
|
||||
mapM lin2cf [(m,fun,cat,args,lin) | (fun, CncFun cat args lin _) <- gr]
|
||||
|
||||
type IFun = Ident
|
||||
type ICat = CIdent
|
||||
|
||||
-- all CF rules corresponding to a linearization rule
|
||||
lin2cf :: (Ident, IFun, ICat, [ArgVar], Term) -> Err [CFRule]
|
||||
lin2cf (m,fun,cat,args,lin) = errIn ("building CF rule for" +++ prt fun) $ do
|
||||
rhss0 <- allLinValues lin -- :: [(Label, [([Patt],Term)])]
|
||||
rhss1 <- mapM (mkCFItems m) (concat rhss0) -- :: [(Label, [[PreCFItem]])]
|
||||
mapM (mkCfRules m fun cat args) rhss1 >>= return . nub . concat
|
||||
|
||||
-- making sequences of CF items from every branch in a linearization
|
||||
mkCFItems :: Ident -> (Label, [([Patt],Term)]) -> Err (Label, [[PreCFItem]])
|
||||
mkCFItems m (lab,pts) = do
|
||||
itemss <- mapM (term2CFItems m) (map snd pts)
|
||||
return (lab, concat itemss) ---- combinations? (test!)
|
||||
|
||||
-- making CF rules from sequences of CF items
|
||||
mkCfRules :: Ident -> IFun -> ICat -> [ArgVar] -> (Label, [[PreCFItem]]) -> Err [CFRule]
|
||||
mkCfRules m fun cat args (lab, itss) = mapM mkOneRule itss
|
||||
where
|
||||
mkOneRule its = do
|
||||
let nonterms = zip [0..] [(pos,d,v) | PNonterm _ pos d v <- its]
|
||||
profile = mkProfile nonterms
|
||||
cfcat = CFCat (redirectIdent m cat,lab)
|
||||
cffun = CFFun (AC (CIQ m fun), profile)
|
||||
cfits = map precf2cf its
|
||||
return (cffun,(cfcat,cfits))
|
||||
mkProfile nonterms = map mkOne args
|
||||
where
|
||||
mkOne (A c i) = mkOne (AB c 0 i)
|
||||
mkOne (AB _ b i) = (map mkB [0..b-1], [k | (k,(j,_,True)) <- nonterms, j==i])
|
||||
where
|
||||
mkB j = [p | (p,(k, LV l,False)) <- nonterms, k == i, l == j]
|
||||
|
||||
-- intermediate data structure of CFItems with information for profiles
|
||||
data PreCFItem =
|
||||
PTerm RegExp -- like ordinary Terminal
|
||||
| PNonterm CIdent Integer Label Bool -- cat, position, part/bind, whether arg
|
||||
deriving Eq
|
||||
|
||||
precf2cf :: PreCFItem -> CFItem
|
||||
precf2cf (PTerm r) = CFTerm r
|
||||
precf2cf (PNonterm cm _ (L c) True) = CFNonterm (ident2CFCat cm c)
|
||||
precf2cf (PNonterm _ _ _ False) = CFNonterm catVarCF
|
||||
|
||||
|
||||
-- the main job in translating linearization rules into sequences of cf items
|
||||
term2CFItems :: Ident -> Term -> Err [[PreCFItem]]
|
||||
term2CFItems m t = errIn "forming cf items" $ case t of
|
||||
S c _ -> t2c c
|
||||
|
||||
T _ cc -> do
|
||||
its <- mapM t2c [t | Cas _ t <- cc]
|
||||
tryMkCFTerm (concat its)
|
||||
|
||||
C t1 t2 -> do
|
||||
its1 <- t2c t1
|
||||
its2 <- t2c t2
|
||||
return [x ++ y | x <- its1, y <- its2]
|
||||
|
||||
FV ts -> do
|
||||
its <- mapM t2c ts
|
||||
tryMkCFTerm (concat its)
|
||||
|
||||
P arg s -> extrR arg s
|
||||
|
||||
K (KS s) -> return [[PTerm (RegAlts [s]) | not (null s)]]
|
||||
|
||||
E -> return [[]]
|
||||
|
||||
K (KP d vs) -> do
|
||||
let its = [PTerm (RegAlts [s]) | s <- d]
|
||||
let itss = [[PTerm (RegAlts [s]) | s <- t] | Var t _ <- vs]
|
||||
tryMkCFTerm (its : itss)
|
||||
|
||||
_ -> prtBad "no cf for" t ----
|
||||
|
||||
where
|
||||
|
||||
t2c = term2CFItems m
|
||||
|
||||
-- optimize the number of rules by a factorization
|
||||
tryMkCFTerm :: [[PreCFItem]] -> Err [[PreCFItem]]
|
||||
tryMkCFTerm ii@(its:itss) | all (\x -> length x == length its) itss =
|
||||
case mapM mkOne (counterparts ii) of
|
||||
Ok tt -> return [tt]
|
||||
_ -> return ii
|
||||
where
|
||||
mkOne cfits = case mapM mkOneTerm cfits of
|
||||
Ok tt -> return $ PTerm (RegAlts (concat (nub tt)))
|
||||
_ -> mkOneNonTerm cfits
|
||||
mkOneTerm (PTerm (RegAlts t)) = return t
|
||||
mkOneTerm _ = Bad ""
|
||||
mkOneNonTerm (n@(PNonterm _ _ _ _) : cc) =
|
||||
if all (== n) cc
|
||||
then return n
|
||||
else Bad ""
|
||||
mkOneNonTerm _ = Bad ""
|
||||
counterparts ll = [map (!! i) ll | i <- [0..length (head ll) - 1]]
|
||||
tryMkCFTerm itss = return itss
|
||||
|
||||
extrR arg lab = case (arg,lab) of
|
||||
(Arg (A cat pos), l@(L _)) -> return [[PNonterm (CIQ m cat) pos l True]]
|
||||
(Arg (A cat pos), l@(LV _)) -> return [[PNonterm (CIQ m cat) pos l False]]
|
||||
(Arg (AB cat pos b), l@(L _)) -> return [[PNonterm (CIQ m cat) pos l True]]
|
||||
(Arg (AB cat pos b), l@(LV _)) -> return [[PNonterm (CIQ m cat) pos l False]]
|
||||
---- ??
|
||||
_ -> prtBad "cannot extract record field from" arg
|
||||
|
||||
{- Proof + 1 @ 4 catVarCF :: CFCat
|
||||
PNonterm CIdent Integer Label Bool -- cat, position, part/bind, whether arg
|
||||
|
||||
|
||||
mkCFPredef :: [CFCat] -> CFPredef
|
||||
mkCFPredef cats s =
|
||||
[(cat, metaCFFun) | TM _ _ <- [s], cat <- cats] ++
|
||||
[(cat, varCFFun x) | TV x <- [s], cat <- cats] ++
|
||||
[(cat, lit) | TL t <- [s], Just (cat,lit) <- [getCFLiteral t]] ++
|
||||
[(cat, lit) | TI i <- [s], Just (cat,lit) <- [getCFLiteral (show i)]] ---
|
||||
-}
|
||||
166
src/GF/CF/ChartParser.hs
Normal file
166
src/GF/CF/ChartParser.hs
Normal file
@@ -0,0 +1,166 @@
|
||||
|
||||
module ChartParser (chartParser) where
|
||||
|
||||
import Operations
|
||||
import CF
|
||||
import CFIdent
|
||||
import PPrCF (prCFItem)
|
||||
|
||||
import OrdSet
|
||||
import OrdMap2
|
||||
|
||||
import List (groupBy)
|
||||
|
||||
type Token = CFTok
|
||||
type Name = CFFun
|
||||
type Category = CFItem
|
||||
type Grammar = ([Production], Terminal)
|
||||
type Production = (Name, Category, [Category])
|
||||
type Terminal = Token -> [(Category, Maybe Name)]
|
||||
type GParser = Grammar -> Category -> [Token] -> ([ParseTree],String)
|
||||
data ParseTree = Node Name Category [ParseTree] | Leaf Token
|
||||
|
||||
--------------------------------------------------
|
||||
-- converting between GF parsing and CFG parsing
|
||||
|
||||
buildParser :: GParser -> CF -> CFCat -> CFParser
|
||||
buildParser gparser cf = parse
|
||||
where
|
||||
parse = \start input ->
|
||||
let parse2 = parse' (CFNonterm start) input in
|
||||
([(parse2tree t, []) | t <- fst parse2], snd parse2)
|
||||
parse' = gparser (cf2grammar cf)
|
||||
|
||||
cf2grammar :: CF -> Grammar
|
||||
cf2grammar cf = (productions, terminal)
|
||||
where
|
||||
productions = [ (name, CFNonterm cat, rhs) |
|
||||
(name, (cat, rhs)) <- cfRules ]
|
||||
terminal tok = [ (CFNonterm cat, Just name) |
|
||||
(cat, name) <- cfPredef tok ]
|
||||
++
|
||||
[ (item, Nothing) |
|
||||
item <- elems rhsItems,
|
||||
matchCFTerm item tok ]
|
||||
cfRules = rulesOfCF cf
|
||||
cfPredef = predefOfCF cf
|
||||
rhsItems :: Set Category
|
||||
rhsItems = union [ makeSet rhs | (_, (_, rhs)) <- cfRules ]
|
||||
|
||||
parse2tree :: ParseTree -> CFTree
|
||||
parse2tree (Node name (CFNonterm cat) trees) = CFTree (name, (cat, trees'))
|
||||
where
|
||||
trees' = [ parse2tree t | t@(Node _ _ _) <- trees ] -- ignore leafs
|
||||
|
||||
maybeNode :: Maybe Name -> Category -> Token -> ParseTree
|
||||
maybeNode (Just name) cat tok = Node name cat [Leaf tok]
|
||||
maybeNode Nothing _ tok = Leaf tok
|
||||
|
||||
|
||||
--------------------------------------------------
|
||||
-- chart parsing (bottom up kilbury-like)
|
||||
|
||||
type Chart = [CState]
|
||||
type CState = Set Edge
|
||||
type Edge = (Int, Category, [Category])
|
||||
type Passive = (Int, Int, Category)
|
||||
|
||||
chartParser :: CF -> CFCat -> CFParser
|
||||
chartParser = buildParser chartParser0
|
||||
|
||||
chartParser0 :: GParser
|
||||
chartParser0 (productions, terminal) = cparse
|
||||
where
|
||||
emptyCats :: Set Category
|
||||
emptyCats = empties emptySet
|
||||
where
|
||||
empties cats | cats==cats' = cats
|
||||
| otherwise = empties cats'
|
||||
where cats' = makeSet [ cat | (_, cat, rhs) <- productions,
|
||||
all (`elemSet` cats) rhs ]
|
||||
|
||||
grammarMap :: Map Category [(Name, [Category])]
|
||||
grammarMap = makeMapWith (++)
|
||||
[ (cat, [(name,rhs)]) | (name, cat, rhs) <- productions ]
|
||||
|
||||
leftCornerMap :: Map Category (Set (Category,[Category]))
|
||||
leftCornerMap = makeMapWith (<++>) [ (a, unitSet (b, bs)) |
|
||||
(_, b, abs) <- productions,
|
||||
(a : bs) <- removeNullable abs ]
|
||||
|
||||
removeNullable :: [Category] -> [[Category]]
|
||||
removeNullable [] = []
|
||||
removeNullable cats@(cat:cats')
|
||||
| cat `elemSet` emptyCats = cats : removeNullable cats'
|
||||
| otherwise = [cats]
|
||||
|
||||
cparse :: Category -> [Token] -> ([ParseTree], String)
|
||||
cparse start input = case lookup (0, length input, start) edgeTrees of
|
||||
Just trees -> (trees, "Chart:" ++++ prChart passiveEdges)
|
||||
Nothing -> ([], "Chart:" ++++ prChart passiveEdges)
|
||||
where
|
||||
finalChart :: Chart
|
||||
finalChart = map buildState initialChart
|
||||
|
||||
finalChartMap :: [Map Category (Set Edge)]
|
||||
finalChartMap = map stateMap finalChart
|
||||
|
||||
stateMap :: CState -> Map Category (Set Edge)
|
||||
stateMap state = makeMapWith (<++>) [ (a, unitSet (i,b,bs)) |
|
||||
(i, b, a:bs) <- elems state ]
|
||||
|
||||
initialChart :: Chart
|
||||
initialChart = emptySet : map initialState (zip [0..] input)
|
||||
where initialState (j, sym) = makeSet [ (j, cat, []) |
|
||||
(cat, _) <- terminal sym ]
|
||||
|
||||
buildState :: CState -> CState
|
||||
buildState = limit more
|
||||
where more (j, a, []) = ordSet [ (j, b, bs) |
|
||||
(b, bs) <- elems (lookupWith emptySet leftCornerMap a) ]
|
||||
<++>
|
||||
lookupWith emptySet (finalChartMap !! j) a
|
||||
more (j, b, a:bs) = ordSet [ (j, b, bs) |
|
||||
a `elemSet` emptyCats ]
|
||||
|
||||
passiveEdges :: [Passive]
|
||||
passiveEdges = [ (i, j, cat) |
|
||||
(j, state) <- zip [0..] finalChart,
|
||||
(i, cat, []) <- elems state ]
|
||||
++
|
||||
[ (i, i, cat) |
|
||||
i <- [0 .. length input],
|
||||
cat <- elems emptyCats ]
|
||||
|
||||
edgeTrees :: [ (Passive, [ParseTree]) ]
|
||||
edgeTrees = [ (edge, treesFor edge) | edge <- passiveEdges ]
|
||||
|
||||
edgeTreesMap :: Map (Int, Category) [(Int, [ParseTree])]
|
||||
edgeTreesMap = makeMapWith (++) [ ((i,c), [(j,trees)]) |
|
||||
((i,j,c), trees) <- edgeTrees ]
|
||||
|
||||
treesFor :: Passive -> [ParseTree]
|
||||
treesFor (i, j, cat) = [ Node name cat trees |
|
||||
(name, rhs) <- lookupWith [] grammarMap cat,
|
||||
trees <- children rhs i j ]
|
||||
++
|
||||
[ maybeNode name cat tok |
|
||||
i == j-1,
|
||||
let tok = input !! i,
|
||||
Just name <- [lookup cat (terminal tok)] ]
|
||||
|
||||
children :: [Category] -> Int -> Int -> [[ParseTree]]
|
||||
children [] i k = [ [] | i == k ]
|
||||
children (c:cs) i k = [ tree : rest |
|
||||
i <= k,
|
||||
(j, trees) <- lookupWith [] edgeTreesMap (i,c),
|
||||
rest <- children cs j k,
|
||||
tree <- trees ]
|
||||
|
||||
|
||||
-- AR 10/12/2002
|
||||
|
||||
prChart :: [Passive] -> String
|
||||
prChart = unlines . map (unwords . map prOne) . positions where
|
||||
prOne (i,j,it) = show i ++ "-" ++ show j ++ "-" ++ prCFItem it
|
||||
positions = groupBy (\ (i,_,_) (j,_,_) -> i == j)
|
||||
59
src/GF/CF/PPrCF.hs
Normal file
59
src/GF/CF/PPrCF.hs
Normal file
@@ -0,0 +1,59 @@
|
||||
module PPrCF where
|
||||
|
||||
import Operations
|
||||
import CF
|
||||
import CFIdent
|
||||
import AbsGFC
|
||||
import PrGrammar
|
||||
|
||||
-- printing and parsing CF grammars, rules, and trees AR 26/1/2000 -- 9/6/2003
|
||||
---- use the Print class instead!
|
||||
|
||||
prCF :: CF -> String
|
||||
prCF = unlines . (map prCFRule) . rulesOfCF -- hiding the literal recogn function
|
||||
|
||||
prCFTree :: CFTree -> String
|
||||
prCFTree (CFTree (fun, (_,trees))) = prCFFun fun ++ prs trees where
|
||||
prs [] = ""
|
||||
prs ts = " " ++ unwords (map ps ts)
|
||||
ps t@(CFTree (_,(_,[]))) = prCFTree t
|
||||
ps t = prParenth (prCFTree t)
|
||||
|
||||
prCFRule :: CFRule -> String
|
||||
prCFRule (fun,(cat,its)) =
|
||||
prCFFun fun ++ "." +++ prCFCat cat +++ "::=" +++
|
||||
unwords (map prCFItem its) +++ ";"
|
||||
|
||||
prCFFun :: CFFun -> String
|
||||
prCFFun = prCFFun' True ---- False -- print profiles for debug
|
||||
|
||||
prCFFun' :: Bool -> CFFun -> String
|
||||
prCFFun' profs (CFFun (t, p)) = prt t ++ pp p where
|
||||
pp p = if (not profs || normal p) then "" else "_" ++ concat (map show p)
|
||||
normal p = and [x==y && null b | ((b,x),y) <- zip p (map (:[]) [0..])]
|
||||
|
||||
prCFCat :: CFCat -> String
|
||||
prCFCat (CFCat (c,l)) = prt c ++ "-" ++ prt l ----
|
||||
|
||||
prCFItem (CFNonterm c) = prCFCat c
|
||||
prCFItem (CFTerm a) = prRegExp a
|
||||
|
||||
prRegExp (RegAlts tt) = case tt of
|
||||
[t] -> prQuotedString t
|
||||
_ -> prParenth (prTList " | " (map prQuotedString tt))
|
||||
|
||||
{- ----
|
||||
-- rules have an amazingly easy parser, if we use the format
|
||||
-- fun. C -> item1 item2 ... where unquoted items are treated as cats
|
||||
-- Actually would be nice to add profiles to this.
|
||||
|
||||
getCFRule :: String -> Maybe CFRule
|
||||
getCFRule s = getcf (wrds s) where
|
||||
getcf ww | length ww > 2 && ww !! 2 `elem` ["->", "::="] =
|
||||
Just (string2CFFun (init fun), (string2CFCat cat, map mkIt its)) where
|
||||
fun : cat : _ : its = words s
|
||||
mkIt ('"':w@(_:_)) = atomCFTerm (string2CFTok (init w))
|
||||
mkIt w = CFNonterm (string2CFCat w)
|
||||
getcf _ = Nothing
|
||||
wrds = takeWhile (/= ";") . words -- to permit semicolon in the end
|
||||
-}
|
||||
95
src/GF/CF/Profile.hs
Normal file
95
src/GF/CF/Profile.hs
Normal file
@@ -0,0 +1,95 @@
|
||||
module Profile (postParse) where
|
||||
|
||||
import AbsGFC
|
||||
import GFC
|
||||
import qualified Ident as I
|
||||
import CMacros
|
||||
---import MMacros
|
||||
import CF
|
||||
import CFIdent
|
||||
import PPrCF -- for error msg
|
||||
import PrGrammar
|
||||
|
||||
import Operations
|
||||
|
||||
import Monad
|
||||
import List (nub)
|
||||
|
||||
|
||||
-- restoring parse trees for discontinuous constituents, bindings, etc. AR 25/1/2001
|
||||
-- revised 8/4/2002 for the new profile structure
|
||||
|
||||
postParse :: CFTree -> Err Exp
|
||||
postParse tree = do
|
||||
iterm <- errIn "postprocessing initial parse tree" $ tree2term tree
|
||||
return $ term2trm iterm
|
||||
|
||||
-- an intermediate data structure
|
||||
data ITerm = ITerm (Atom, BindVs) [ITerm] | IMeta deriving (Eq,Show)
|
||||
type BindVs = [[I.Ident]]
|
||||
|
||||
-- the job is done in two passes:
|
||||
-- (1) tree2term: restore constituent order from Profile
|
||||
-- (2) term2trm: restore Bindings from Binds
|
||||
|
||||
tree2term :: CFTree -> Err ITerm
|
||||
tree2term (CFTree (cff@(CFFun (fun,pro)), (_,trees))) = case fun of
|
||||
AM _ -> return IMeta
|
||||
_ -> do
|
||||
args <- mapM mkArg pro
|
||||
binds <- mapM mkBinds pro
|
||||
return $ ITerm (fun, binds) args
|
||||
where
|
||||
mkArg (_,arg) = case arg of
|
||||
[x] -> do -- one occurrence
|
||||
trx <- trees !? x
|
||||
tree2term trx
|
||||
[] -> return IMeta -- suppression
|
||||
_ -> do -- reduplication
|
||||
trees' <- mapM (trees !?) arg
|
||||
xs1 <- mapM tree2term trees'
|
||||
xs2 <- checkArity xs1
|
||||
unif xs2
|
||||
|
||||
checkArity xs = if length (nub [length xx | ITerm _ xx <- xs']) > 1
|
||||
then Bad "arity error"
|
||||
else return xs'
|
||||
where xs' = [t | t@(ITerm _ _) <- xs]
|
||||
unif [] = return $ IMeta
|
||||
unif xs@(ITerm fp@(f,_) xx : ts) = do
|
||||
let hs = [h | ITerm (h,_) _ <- ts]
|
||||
testErr (all (==f) hs) -- if fails, hs must be nonempty
|
||||
("unification expects" +++ prt f +++ "but found" +++ prt (head hs))
|
||||
xx' <- mapM unifArg [0 .. length xx - 1]
|
||||
return $ ITerm fp xx'
|
||||
where
|
||||
unifArg i = tryUnif [zz !! i | ITerm _ zz <- xs]
|
||||
tryUnif xx = case [t | t@(ITerm _ _) <- xx] of
|
||||
[] -> return IMeta
|
||||
x:xs -> if all (==x) xs
|
||||
then return x
|
||||
else Bad "failed to unify"
|
||||
|
||||
mkBinds (xss,_) = mapM mkBind xss
|
||||
mkBind xs = do
|
||||
ts <- mapM (trees !?) xs
|
||||
let vs = [x | CFTree (CFFun (AV x,_),(_,[])) <- ts]
|
||||
testErr (length ts == length vs) "non-variable in bound position"
|
||||
case vs of
|
||||
[x] -> return x
|
||||
[] -> return $ I.identC "h_" ---- uBoundVar
|
||||
y:ys -> do
|
||||
testErr (all (==y) ys) ("fail to unify bindings of" +++ prt y)
|
||||
return y
|
||||
|
||||
term2trm :: ITerm -> Exp
|
||||
term2trm IMeta = EAtom (AM 0) ---- mExp0
|
||||
term2trm (ITerm (fun, binds) terms) =
|
||||
let bterms = zip binds terms
|
||||
in mkAppAtom fun [mkAbsR xs (term2trm t) | (xs,t) <- bterms]
|
||||
|
||||
--- these are deprecated
|
||||
where
|
||||
mkAbsR c e = foldr EAbs e c
|
||||
mkAppAtom a = mkApp (EAtom a)
|
||||
mkApp = foldl EApp
|
||||
Reference in New Issue
Block a user