forked from GitHub/gf-core
120 lines
4.5 KiB
Haskell
120 lines
4.5 KiB
Haskell
----------------------------------------------------------------------
|
|
-- |
|
|
-- Module : GF.Speech.PGFToCFG
|
|
--
|
|
-- Approximates PGF grammars with context-free grammars.
|
|
----------------------------------------------------------------------
|
|
module GF.Speech.PGFToCFG (bnfPrinter, pgfToCFG) where
|
|
|
|
import PGF2
|
|
import PGF2.Internal
|
|
import GF.Grammar.CFG hiding (Symbol)
|
|
|
|
import Data.Map (Map)
|
|
import qualified Data.Map as Map
|
|
import qualified Data.IntMap as IntMap
|
|
import Data.Set (Set)
|
|
import qualified Data.Set as Set
|
|
|
|
bnfPrinter :: PGF -> Concr -> String
|
|
bnfPrinter = toBNF id
|
|
|
|
toBNF :: (CFG -> CFG) -> PGF -> Concr -> String
|
|
toBNF f pgf cnc = prCFG $ f $ pgfToCFG pgf cnc
|
|
|
|
type Profile = [Int]
|
|
|
|
pgfToCFG :: PGF -> Concr -> CFG
|
|
pgfToCFG pgf cnc = error "TODO: pgfToCFG" {- mkCFG start_cat extCats (startRules ++ concatMap ruleToCFRule rules)
|
|
where
|
|
(_,start_cat,_) = unType (startCat pgf)
|
|
|
|
rules :: [(FId,Production)]
|
|
rules = [(fcat,prod) | fcat <- [0..concrTotalCats cnc],
|
|
prod <- concrProductions cnc fcat]
|
|
|
|
fcatCats :: Map FId Cat
|
|
fcatCats = Map.fromList [(fc, c ++ "_" ++ show i)
|
|
| (c,s,e,lbls) <- concrCategories cnc,
|
|
(fc,i) <- zip [s..e] [1..]]
|
|
|
|
fcatCat :: FId -> Cat
|
|
fcatCat c = Map.findWithDefault ("Unknown_" ++ show c) c fcatCats
|
|
|
|
fcatToCat :: FId -> Int -> Cat
|
|
fcatToCat c l = fcatCat c ++ row
|
|
where row = if catLinArity c == 1 then "" else "_" ++ show l
|
|
|
|
-- gets the number of fields in the lincat for the given category
|
|
catLinArity :: FId -> Int
|
|
catLinArity c = maximum (1:[length rhs | ((_,rhs), _) <- topdownRules c])
|
|
|
|
topdownRules cat = f cat []
|
|
where
|
|
f cat rules = foldr g rules (concrProductions cnc cat)
|
|
|
|
g (PApply funid args) rules = (concrFunction cnc funid,args) : rules
|
|
g (PCoerce cat) rules = f cat rules
|
|
|
|
|
|
extCats :: Set Cat
|
|
extCats = Set.fromList $ map ruleLhs startRules
|
|
|
|
startRules :: [CFRule]
|
|
startRules = [Rule c [NonTerminal (fcatToCat fc r)] (CFRes 0)
|
|
| (c,s,e,lbls) <- concrCategories cnc,
|
|
fc <- [s..e], not (isPredefFId fc),
|
|
r <- [0..catLinArity fc-1]]
|
|
|
|
ruleToCFRule :: (FId,Production) -> [CFRule]
|
|
ruleToCFRule (c,PApply funid args) =
|
|
[Rule (fcatToCat c l) (mkRhs row) (profilesToTerm [fixProfile row n | n <- [0..length args-1]])
|
|
| (l,seqid) <- zip [0..] rhs
|
|
, let row = concrSequence cnc seqid
|
|
, not (containsLiterals row)]
|
|
where
|
|
(f, rhs) = concrFunction cnc funid
|
|
|
|
mkRhs :: [Symbol] -> [CFSymbol]
|
|
mkRhs = concatMap symbolToCFSymbol
|
|
|
|
containsLiterals :: [Symbol] -> Bool
|
|
containsLiterals row = not (null ([n | SymLit n _ <- row] ++
|
|
[n | SymVar n _ <- row]))
|
|
|
|
symbolToCFSymbol :: Symbol -> [CFSymbol]
|
|
symbolToCFSymbol (SymCat n l) = [let PArg _ fid = args!!n in NonTerminal (fcatToCat fid l)]
|
|
symbolToCFSymbol (SymKS t) = [Terminal t]
|
|
symbolToCFSymbol (SymKP syms as) = concatMap symbolToCFSymbol syms
|
|
---- ++ [t | Alt ss _ <- as, t <- ss]
|
|
---- should be alternatives in [[CFSymbol]]
|
|
---- AR 3/6/2010
|
|
symbolToCFSymbol SymBIND = [Terminal "&+"]
|
|
symbolToCFSymbol SymSOFT_BIND = []
|
|
symbolToCFSymbol SymSOFT_SPACE = []
|
|
symbolToCFSymbol SymCAPIT = [Terminal "&|"]
|
|
symbolToCFSymbol SymALL_CAPIT = [Terminal "&|"]
|
|
symbolToCFSymbol SymNE = []
|
|
|
|
fixProfile :: [Symbol] -> Int -> Profile
|
|
fixProfile row i = [k | (k,j) <- nts, j == i]
|
|
where
|
|
nts = zip [0..] [j | nt <- row, j <- getPos nt]
|
|
|
|
getPos (SymCat j _) = [j]
|
|
getPos (SymLit j _) = [j]
|
|
getPos _ = []
|
|
|
|
profilesToTerm :: [Profile] -> CFTerm
|
|
profilesToTerm ps = CFObj f (zipWith profileToTerm argTypes ps)
|
|
where Just (hypos,_,_) = fmap unType (functionType pgf f)
|
|
argTypes = [cat | (_,_,ty) <- hypos, let (_,cat,_) = unType ty]
|
|
|
|
profileToTerm :: Fun -> Profile -> CFTerm
|
|
profileToTerm t [] = CFMeta t
|
|
profileToTerm _ xs = CFRes (last xs) -- FIXME: unify
|
|
ruleToCFRule (c,PCoerce c') =
|
|
[Rule (fcatToCat c l) [NonTerminal (fcatToCat c' l)] (CFRes 0)
|
|
| l <- [0..catLinArity c-1]]
|
|
-}
|