mirror of
https://github.com/GrammaticalFramework/gf-core.git
synced 2026-04-09 04:59:31 -06:00
now we compile context-free grammars directly to PGF without going via GF source code. This makes it quick and lightweight to compile big grammars such as the Berkley grammar
This commit is contained in:
@@ -2,11 +2,12 @@ module GF.Command.Importing (importGrammar, importSource) where
|
||||
|
||||
import PGF
|
||||
import PGF.Data
|
||||
import PGF.Optimize
|
||||
|
||||
import GF.Compile
|
||||
import GF.Compile.Multi (readMulti)
|
||||
import GF.Compile.GetGrammar (getCFRules, getEBNFRules)
|
||||
import GF.Grammar (identS, SourceGrammar) -- for cc command
|
||||
import GF.Grammar (SourceGrammar) -- for cc command
|
||||
import GF.Grammar.CFG
|
||||
import GF.Grammar.EBNF
|
||||
import GF.Compile.CFGtoPGF
|
||||
@@ -65,6 +66,7 @@ importCF opts files get convert = do
|
||||
startCat <- case rules of
|
||||
(CFRule cat _ _ : _) -> return cat
|
||||
_ -> fail "empty CFG"
|
||||
let gf = cf2gf (last files) (uniqueFuns (mkCFG startCat Set.empty rules))
|
||||
gr <- compileSourceGrammar opts gf
|
||||
link opts (identS (justModuleName (last files) ++ "Abs"), (), gr)
|
||||
let pgf = cf2pgf (last files) (uniqueFuns (mkCFG startCat Set.empty rules))
|
||||
probs <- liftIO (maybe (return . defaultProbabilities) readProbabilitiesFromFile (flag optProbsFile opts) pgf)
|
||||
return $ setProbabilities probs
|
||||
$ if flag optOptimizePGF opts then optimizePGF pgf else pgf
|
||||
|
||||
@@ -1,58 +1,111 @@
|
||||
module GF.Compile.CFGtoPGF (cf2gf) where
|
||||
module GF.Compile.CFGtoPGF (cf2pgf) where
|
||||
|
||||
import GF.Grammar.Grammar hiding (Cat)
|
||||
import GF.Grammar.Macros
|
||||
import GF.Grammar.CFG
|
||||
import GF.Infra.Ident(Ident,identS)
|
||||
import GF.Infra.Option
|
||||
import GF.Infra.UseIO
|
||||
|
||||
import GF.Data.Operations
|
||||
|
||||
import PGF(showCId)
|
||||
import PGF
|
||||
import PGF.Data
|
||||
import PGF.Macros
|
||||
import PGF.Optimize
|
||||
|
||||
import qualified Data.Set as Set
|
||||
import qualified Data.Map as Map
|
||||
|
||||
import qualified Data.IntMap as IntMap
|
||||
import qualified Data.ByteString as BS
|
||||
import Data.Array.IArray
|
||||
import Data.List
|
||||
|
||||
--------------------------
|
||||
-- the compiler ----------
|
||||
--------------------------
|
||||
|
||||
cf2gf :: FilePath -> CFG -> SourceGrammar
|
||||
cf2gf fpath cf = mGrammar [
|
||||
(aname, ModInfo MTAbstract MSComplete (modifyFlags (\fs -> fs{optStartCat = Just cat})) [] Nothing [] [] fpath Nothing abs),
|
||||
(cname, ModInfo (MTConcrete aname) MSComplete noOptions [] Nothing [] [] fpath Nothing cnc)
|
||||
]
|
||||
cf2pgf :: FilePath -> CFG -> PGF
|
||||
cf2pgf fpath cf =
|
||||
let pgf = PGF Map.empty aname (cf2abstr cf) (Map.singleton cname (cf2concr cf))
|
||||
in updateProductionIndices pgf
|
||||
where
|
||||
name = justModuleName fpath
|
||||
(abs,cnc,cat) = cf2grammar cf
|
||||
aname = identS $ name ++ "Abs"
|
||||
cname = identS name
|
||||
aname = mkCId (name ++ "Abs")
|
||||
cname = mkCId name
|
||||
|
||||
cf2abstr :: CFG -> Abstr
|
||||
cf2abstr cfg = Abstr aflags afuns acats BS.empty
|
||||
where
|
||||
aflags = Map.singleton (mkCId "startcat") (LStr (cfgStartCat cfg))
|
||||
acats = Map.fromList [(mkCId cat, ([], [(0,mkRuleName rule)
|
||||
| rule <- Set.toList rules], 0, 0))
|
||||
| (cat,rules) <- Map.toList (cfgRules cfg)]
|
||||
afuns = Map.fromList [(mkRuleName rule, (cftype [mkCId c | NonTerminal c <- ruleRhs rule] (mkCId cat), 0, Nothing, 0, 0))
|
||||
| (cat,rules) <- Map.toList (cfgRules cfg)
|
||||
, rule <- Set.toList rules]
|
||||
|
||||
cf2grammar :: CFG -> (BinTree Ident Info, BinTree Ident Info, String)
|
||||
cf2grammar cfg = (buildTree abs, buildTree conc, cfgStartCat cfg) where
|
||||
abs = cats ++ funs
|
||||
conc = lincats ++ lins
|
||||
cats = [(identS cat, AbsCat (Just (L NoLoc []))) | cat <- Map.keys (cfgRules cfg)]
|
||||
lincats = [(cat, CncCat (Just (L loc defLinType)) Nothing Nothing Nothing Nothing) | (cat,AbsCat (Just (L loc _))) <- cats]
|
||||
(funs,lins) = unzip (map cf2rule (concatMap Set.toList (Map.elems (cfgRules cfg))))
|
||||
cf2concr :: CFG -> Concr
|
||||
cf2concr cfg = Concr Map.empty Map.empty
|
||||
cncfuns lindefsrefs lindefsrefs
|
||||
sequences productions
|
||||
IntMap.empty Map.empty
|
||||
cnccats
|
||||
IntMap.empty
|
||||
totalCats
|
||||
where
|
||||
sequences0 = Set.fromList (listArray (0,0) [SymCat 0 0] :
|
||||
[mkSequence rule | rules <- Map.elems (cfgRules cfg), rule <- Set.toList rules])
|
||||
sequences = listArray (0,Set.size sequences0-1) (Set.toList sequences0)
|
||||
|
||||
cf2rule :: CFRule -> ((Ident,Info),(Ident,Info))
|
||||
cf2rule (CFRule cat items (CFObj fun _)) = (def,ldef) where
|
||||
f = identS (showCId fun)
|
||||
def = (f, AbsFun (Just (L NoLoc (mkProd args' (Cn (identS cat)) []))) Nothing Nothing (Just True))
|
||||
args0 = zip (map (identS . ("x" ++) . show) [0..]) items
|
||||
args = [((Explicit,v), Cn (identS c)) | (v, NonTerminal c) <- args0]
|
||||
args' = [(Explicit,identS "_", Cn (identS c)) | (_, NonTerminal c) <- args0]
|
||||
ldef = (f, CncFun
|
||||
Nothing
|
||||
(Just (L NoLoc (mkAbs (map fst args)
|
||||
(mkRecord (const theLinLabel) [foldconcat (map mkIt args0)]))))
|
||||
Nothing
|
||||
Nothing)
|
||||
mkIt (v, NonTerminal _) = P (Vr v) theLinLabel
|
||||
mkIt (_, Terminal a) = K a
|
||||
foldconcat [] = K ""
|
||||
foldconcat tt = foldr1 C tt
|
||||
idFun = CncFun wildCId (listArray (0,0) [seqid])
|
||||
where
|
||||
seq = listArray (0,0) [SymCat 0 0]
|
||||
seqid = binSearch seq sequences (bounds sequences)
|
||||
((fun_cnt,cncfuns0),productions0) = mapAccumL convertRules (1,[idFun]) (Map.toList (cfgRules cfg))
|
||||
productions = IntMap.fromList productions0
|
||||
cncfuns = listArray (0,fun_cnt-1) (reverse cncfuns0)
|
||||
|
||||
lbls = listArray (0,0) ["s"]
|
||||
(totalCats,cnccats0) = mapAccumL mkCncCat 0 (Map.toList (cfgRules cfg))
|
||||
cnccats = Map.fromList cnccats0
|
||||
|
||||
lindefsrefs =
|
||||
IntMap.fromList (map mkLinDefRef (Map.keys (cfgRules cfg)))
|
||||
|
||||
convertRules st (cat,rules) =
|
||||
let (st',prods) = mapAccumL convertRule st (Set.toList rules)
|
||||
in (st',(cat2fid cat,Set.fromList prods))
|
||||
|
||||
convertRule (funid,funs) rule =
|
||||
let args = [PArg [] (cat2fid c) | NonTerminal c <- ruleRhs rule]
|
||||
prod = PApply funid args
|
||||
seqid = binSearch (mkSequence rule) sequences (bounds sequences)
|
||||
fun = CncFun (mkRuleName rule) (listArray (0,0) [seqid])
|
||||
funid' = funid+1
|
||||
in funid' `seq` ((funid',fun:funs),prod)
|
||||
|
||||
mkSequence rule = listArray (0,length syms-1) syms
|
||||
where
|
||||
syms = snd $ mapAccumL convertSymbol 0 (ruleRhs rule)
|
||||
|
||||
convertSymbol d (NonTerminal _) = (d+1,SymCat d 0)
|
||||
convertSymbol d (Terminal t) = (d, SymKS t)
|
||||
|
||||
mkCncCat fid (cat,_) = (fid+1, (mkCId cat,CncCat fid fid lbls))
|
||||
|
||||
mkLinDefRef cat =
|
||||
(cat2fid cat,[0])
|
||||
|
||||
binSearch v arr (i,j)
|
||||
| i <= j = case compare v (arr ! k) of
|
||||
LT -> binSearch v arr (i,k-1)
|
||||
EQ -> k
|
||||
GT -> binSearch v arr (k+1,j)
|
||||
| otherwise = error "binSearch"
|
||||
where
|
||||
k = (i+j) `div` 2
|
||||
|
||||
cat2fid cat =
|
||||
case Map.lookup (mkCId cat) cnccats of
|
||||
Just (CncCat fid _ _) -> fid
|
||||
_ -> error "cat2fid"
|
||||
|
||||
mkRuleName rule =
|
||||
case ruleName rule of
|
||||
CFObj n _ -> n
|
||||
_ -> wildCId
|
||||
|
||||
@@ -12,7 +12,7 @@ import GF.Compile.CFGtoPGF
|
||||
import GF.Compile.GetGrammar
|
||||
import GF.Grammar.CFG
|
||||
|
||||
import GF.Infra.Ident(identS,showIdent)
|
||||
import GF.Infra.Ident(showIdent)
|
||||
import GF.Infra.UseIO
|
||||
import GF.Infra.Option
|
||||
import GF.Data.ErrM
|
||||
@@ -68,13 +68,13 @@ compileCFFiles opts fs = do
|
||||
startCat <- case rules of
|
||||
(CFRule cat _ _ : _) -> return cat
|
||||
_ -> fail "empty CFG"
|
||||
let gf = cf2gf (last fs) (uniqueFuns (mkCFG startCat Set.empty rules))
|
||||
gr <- compileSourceGrammar opts gf
|
||||
let pgf = cf2pgf (last fs) (uniqueFuns (mkCFG startCat Set.empty rules))
|
||||
let cnc = justModuleName (last fs)
|
||||
unless (flag optStopAfterPhase opts == Compile) $
|
||||
do pgf <- link opts (identS cnc, (), gr)
|
||||
writePGF opts pgf
|
||||
writeOutputs opts pgf
|
||||
do probs <- liftIO (maybe (return . defaultProbabilities) readProbabilitiesFromFile (flag optProbsFile opts) pgf)
|
||||
let pgf' = setProbabilities probs $ if flag optOptimizePGF opts then optimizePGF pgf else pgf
|
||||
writePGF opts pgf'
|
||||
writeOutputs opts pgf'
|
||||
|
||||
unionPGFFiles :: Options -> [FilePath] -> IOE ()
|
||||
unionPGFFiles opts fs =
|
||||
|
||||
Reference in New Issue
Block a user