1
0
forked from GitHub/gf-core

"Committed_by_peb"

This commit is contained in:
peb
2005-02-09 11:46:54 +00:00
parent 56c80bf8d9
commit 71c316cfc5
55 changed files with 485 additions and 339 deletions

View File

@@ -9,7 +9,7 @@
-- > CVS $Author $
-- > CVS $Revision $
--
-- (Description of the module)
-- context-free grammars. AR 15/12/1999 -- 30/3/2000 -- 2/6/2001 -- 3/12/2001
-----------------------------------------------------------------------------
module CF where
@@ -22,34 +22,33 @@ import CFIdent
import List (nub,nubBy)
import Char (isUpper, isLower, toUpper, toLower)
-- context-free grammars. AR 15/12/1999 -- 30/3/2000 -- 2/6/2001 -- 3/12/2001
-- CF grammar data types
-- abstract type CF.
-- | abstract type CF.
-- Invariant: each category has all its rules grouped with it
-- also: the list is never empty (the category is just missing then)
newtype CF = CF ([CFRuleGroup], CFPredef)
type CFRule = (CFFun, (CFCat, [CFItem]))
type CFRuleGroup = (CFCat,[CFRule])
-- CFPredef is a hack for variable symbols and literals; normally = const []
-- | CFPredef is a hack for variable symbols and literals; normally = @const []@
data CFItem = CFTerm RegExp | CFNonterm CFCat deriving (Eq, Ord,Show)
newtype CFTree = CFTree (CFFun,(CFCat, [CFTree])) deriving (Eq, Show)
type CFPredef = CFTok -> [(CFCat, CFFun)] -- recognize literals, variables, etc
-- | recognize literals, variables, etc
type CFPredef = CFTok -> [(CFCat, CFFun)]
-- Wadler style + return information
-- | Wadler style + return information
type CFParser = [CFTok] -> ([(CFTree,[CFTok])],String)
cfParseResults :: ([(CFTree,[CFTok])],String) -> [CFTree]
cfParseResults rs = [b | (b,[]) <- fst rs]
-- terminals are regular expressions on words; to be completed to full regexp
-- | terminals are regular expressions on words; to be completed to full regexp
data RegExp =
RegAlts [CFWord] -- list of alternative words
| RegSpec CFTok -- special token
RegAlts [CFWord] -- ^ list of alternative words
| RegSpec CFTok -- ^ special token
deriving (Eq, Ord, Show)
type CFWord = String
@@ -78,11 +77,11 @@ groupCFRules = foldr ins [] where
-- to construct rules
-- make a rule from a single token without constituents
-- | make a rule from a single token without constituents
atomCFRule :: CFCat -> CFFun -> CFTok -> CFRule
atomCFRule c f s = (f, (c, [atomCFTerm s]))
-- usual terminal
-- | usual terminal
atomCFTerm :: CFTok -> CFItem
atomCFTerm = CFTerm . atomRegExp
@@ -91,18 +90,18 @@ atomRegExp t = case t of
TS s -> RegAlts [s]
_ -> RegSpec t
-- terminal consisting of alternatives
-- | terminal consisting of alternatives
altsCFTerm :: [String] -> CFItem
altsCFTerm = CFTerm . RegAlts
-- to construct trees
-- make a tree without constituents
-- | make a tree without constituents
atomCFTree :: CFCat -> CFFun -> CFTree
atomCFTree c f = buildCFTree c f []
-- make a tree with constituents.
-- | make a tree with constituents.
buildCFTree :: CFCat -> CFFun -> [CFTree] -> CFTree
buildCFTree c f trees = CFTree (f,(c,trees))
@@ -188,8 +187,7 @@ isCircularCF (_,(c', CFNonterm c:[])) = compatCF c' c
isCircularCF _ = False
--- we should make a test of circular chains, too
-- coercion to the older predef cf type
-- | coercion to the older predef cf type
predefRules :: CFPredef -> CFTok -> [CFRule]
predefRules pre s = [atomCFRule c f s | (c,f) <- pre s]

View File

@@ -9,7 +9,7 @@
-- > CVS $Author $
-- > CVS $Revision $
--
-- (Description of the module)
-- symbols (categories, functions) for context-free grammars.
-----------------------------------------------------------------------------
module CFIdent where
@@ -24,19 +24,17 @@ import PrGrammar
import Str
import Char (toLower, toUpper)
-- symbols (categories, functions) for context-free grammars.
-- these types should be abstract
-- this type should be abstract
data CFTok =
TS String -- normal strings
| TC String -- strings that are ambiguous between upper or lower case
| TL String -- string literals
| TI Int -- integer literals
| TV Ident -- variables
| TM Int String -- metavariables; the integer identifies it
TS String -- ^ normal strings
| TC String -- ^ strings that are ambiguous between upper or lower case
| TL String -- ^ string literals
| TI Int -- ^ integer literals
| TV Ident -- ^ variables
| TM Int String -- ^ metavariables; the integer identifies it
deriving (Eq, Ord, Show)
-- | this type should be abstract
newtype CFCat = CFCat (CIdent,Label) deriving (Eq, Ord, Show)
tS, tC, tL, tI, tV, tM :: String -> CFTok
@@ -59,7 +57,7 @@ prCFTok t = case t of
TV x -> prt x
TM i m -> m --- "?" --- m
-- to build trees: the Atom contains a GF function, Cn | Meta | Vr | Literal
-- | to build trees: the Atom contains a GF function, @Cn | Meta | Vr | Literal@
newtype CFFun = CFFun (Atom, Profile) deriving (Eq,Ord,Show)
-- - - - - - - - - - - - - - - - - - - - - ^^^ added by peb, 21/5-04
@@ -83,7 +81,7 @@ varCFFun = mkCFFun . AV
consCFFun :: CIdent -> CFFun
consCFFun = mkCFFun . AC
-- standard way of making cf fun
-- | standard way of making cf fun
string2CFFun :: String -> String -> CFFun
string2CFFun m c = consCFFun $ mkCIdent m c
@@ -115,14 +113,14 @@ metaCFFun = mkCFFun $ AM 0
-- to construct CF categories
-- belongs elsewhere
-- | belongs elsewhere
mkCIdent :: String -> String -> CIdent
mkCIdent m c = CIQ (identC m) (identC c)
ident2CFCat :: CIdent -> Ident -> CFCat
ident2CFCat mc d = CFCat (mc, L d)
-- standard way of making cf cat: label s
-- | standard way of making cf cat: label s
string2CFCat :: String -> String -> CFCat
string2CFCat m c = ident2CFCat (mkCIdent m c) (identC "s")
@@ -135,7 +133,7 @@ catVarCF = ident2CFCat (mkCIdent "_" "#Var") (identC "_") ----
cat2CFCat :: (Ident,Ident) -> CFCat
cat2CFCat = uncurry idents2CFCat
---- literals
-- | literals
cfCatString = string2CFCat (prt cPredefAbs) "String"
cfCatInt = string2CFCat (prt cPredefAbs) "Int"
@@ -149,7 +147,7 @@ uCFCat = cat2CFCat uCat
moduleOfCFCat :: CFCat -> Ident
moduleOfCFCat (CFCat (CIQ m _, _)) = m
-- the opposite direction
-- | the opposite direction
cfCat2Cat :: CFCat -> (Ident,Ident)
cfCat2Cat (CFCat (CIQ m c,_)) = (m,c)
@@ -179,12 +177,11 @@ compatTok t u = any (`elem` (alts t)) (alts u) where
TC (c:s) -> [toLower c : s, toUpper c : s]
_ -> [prCFTok u]
-- decide if two CFFuns have the same function head (profiles may differ)
-- | decide if two CFFuns have the same function head (profiles may differ)
compatCFFun :: CFFun -> CFFun -> Bool
compatCFFun (CFFun (f,_)) (CFFun (g,_)) = f == g
-- decide whether two categories match
-- | decide whether two categories match
-- the modifiers can be from different modules, but on the same extension
-- path, so there is no clash, and they can be safely ignored ---
compatCF :: CFCat -> CFCat -> Bool

View File

@@ -9,10 +9,10 @@
-- > CVS $Author $
-- > CVS $Revision $
--
-- (Description of the module)
-- 26\/1\/2000 -- 18\/4 -- 24\/3\/2004
-----------------------------------------------------------------------------
module CFtoGrammar where
module CFtoGrammar (cf2grammar) where
import Ident
import Grammar
@@ -29,8 +29,6 @@ import Operations
import List (nub)
import Char (isSpace)
-- 26/1/2000 -- 18/4 -- 24/3/2004
cf2grammar :: CF -> [A.TopDef]
cf2grammar cf = concatMap S.trAnyDef (abs ++ conc) where
rules = rulesOfCF cf

View File

@@ -1,7 +1,7 @@
----------------------------------------------------------------------
-- |
-- Module : (Module)
-- Maintainer : (Maintainer)
-- Module : CFtoSRG
-- Maintainer : Markus Forsberg
-- Stability : (stable)
-- Portability : (portable)
--
@@ -9,27 +9,12 @@
-- > CVS $Author $
-- > CVS $Revision $
--
-- (Description of the module)
-- This module prints a CF as a SRG (Speech Recognition Grammar).
-- Created : 21 January, 2001.
-- Modified : 16 April, 2004 by Aarne Ranta for GF 2.
-----------------------------------------------------------------------------
{-
**************************************************************
GF Module
Description : This module prints a CF as a SRG (Speech
Recognition Grammar).
Author : Markus Forsberg (markus@cs.chalmers.se)
License : GPL (GNU General Public License)
Created : 21 January, 2001
Modified : 16 April, 2004 by Aarne Ranta for GF 2
**************************************************************
-}
module CFtoSRG where
module CFtoSRG (prSRG) where
import Operations
import CF

View File

@@ -9,10 +9,10 @@
-- > CVS $Author $
-- > CVS $Revision $
--
-- (Description of the module)
-- AR 27\/1\/2000 -- 3\/12\/2001 -- 8\/6\/2003
-----------------------------------------------------------------------------
module CanonToCF where
module CanonToCF (canon2cf) where
import Tracing -- peb 8/6-04
@@ -33,12 +33,9 @@ import Trie2
import List (nub,partition)
import Monad
-- AR 27/1/2000 -- 3/12/2001 -- 8/6/2003
-- The main function: for a given cnc module m, build the CF grammar with all the
-- rules coming from modules that m extends. The categories are qualified by
-- the abstract module name a that m is of.
-- | The main function: for a given cnc module 'm', build the CF grammar with all the
-- rules coming from modules that 'm' extends. The categories are qualified by
-- the abstract module name 'a' that 'm' is of.
canon2cf :: Options -> CanonGrammar -> Ident -> Err CF
canon2cf opts gr c = tracePrt "#size of CF" (err id (show.length.rulesOfCF)) $ do -- peb 8/6-04
let ms = M.allExtends gr c
@@ -60,20 +57,20 @@ cnc2cfCond opts m gr =
type IFun = Ident
type ICat = CIdent
-- all CF rules corresponding to a linearization rule
-- | all CF rules corresponding to a linearization rule
lin2cf :: (Ident, IFun, ICat, [ArgVar], Term) -> Err [CFRule]
lin2cf (m,fun,cat,args,lin) = errIn ("building CF rule for" +++ prt fun) $ do
rhss0 <- allLinValues lin -- :: [(Label, [([Patt],Term)])]
rhss1 <- mapM (mkCFItems m) (concat rhss0) -- :: [(Label, [[PreCFItem]])]
mapM (mkCfRules m fun cat args) rhss1 >>= return . nub . concat
-- making sequences of CF items from every branch in a linearization
-- | making sequences of CF items from every branch in a linearization
mkCFItems :: Ident -> (Label, [([Patt],Term)]) -> Err (Label, [[PreCFItem]])
mkCFItems m (lab,pts) = do
itemss <- mapM (term2CFItems m) (map snd pts)
return (lab, concat itemss) ---- combinations? (test!)
-- making CF rules from sequences of CF items
-- | making CF rules from sequences of CF items
mkCfRules :: Ident -> IFun -> ICat -> [ArgVar] -> (Label, [[PreCFItem]]) -> Err [CFRule]
mkCfRules m fun cat args (lab, itss) = mapM mkOneRule itss
where
@@ -91,10 +88,10 @@ mkCfRules m fun cat args (lab, itss) = mapM mkOneRule itss
where
mkB x = [k | (k,(j, LV y,False)) <- nonterms, j == i, y == x]
-- intermediate data structure of CFItems with information for profiles
-- | intermediate data structure of CFItems with information for profiles
data PreCFItem =
PTerm RegExp -- like ordinary Terminal
| PNonterm CIdent Integer Label Bool -- cat, position, part/bind, whether arg
PTerm RegExp -- ^ like ordinary Terminal
| PNonterm CIdent Integer Label Bool -- ^ cat, position, part\/bind, whether arg
deriving Eq
precf2cf :: PreCFItem -> CFItem
@@ -103,7 +100,7 @@ precf2cf (PNonterm cm _ (L c) True) = CFNonterm (ident2CFCat cm c)
precf2cf (PNonterm _ _ _ False) = CFNonterm catVarCF
-- the main job in translating linearization rules into sequences of cf items
-- | the main job in translating linearization rules into sequences of cf items
term2CFItems :: Ident -> Term -> Err [[PreCFItem]]
term2CFItems m t = errIn "forming cf items" $ case t of
S c _ -> t2c c

View File

@@ -1,7 +1,7 @@
----------------------------------------------------------------------
-- |
-- Module : (Module)
-- Maintainer : (Maintainer)
-- Module : ChartParser
-- Maintainer : Peter Ljunglöf
-- Stability : (stable)
-- Portability : (portable)
--
@@ -9,22 +9,10 @@
-- > CVS $Author $
-- > CVS $Revision $
--
-- (Description of the module)
-- Bottom-up Kilbury chart parser from "Pure Functional Parsing", chapter 5.
-- OBSOLETE -- should use new MCFG parsers instead
-----------------------------------------------------------------------------
{- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
Filename: ChartParser.hs
Author: Peter Ljunglöf
Time-stamp: <2004-05-25 02:20:01 peb>
Description: Bottom-up Kilbury chart parser from
"Pure Functional Parsing", chapter 5
DESIRED CHANGES: - The modules OrdSet and OrdMap2 are obsolete
and should be changed to newer versions
- Also, should use the CFG parsers in parsing/
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -}
module ChartParser (chartParser) where
import Tracing

View File

@@ -12,7 +12,7 @@
-- (Description of the module)
-----------------------------------------------------------------------------
module EBNF where
module EBNF (pEBNFasGrammar) where
import Operations
import Parsers

View File

@@ -9,10 +9,12 @@
-- > CVS $Author $
-- > CVS $Revision $
--
-- (Description of the module)
-- printing and parsing CF grammars, rules, and trees AR 26/1/2000 -- 9/6/2003
--
-- use the Print class instead!
-----------------------------------------------------------------------------
module PPrCF where
module PPrCF (prCF, prCFTree, prCFRule, prCFFun, prCFCat, prCFItem, prRegExp, pCF) where
import Operations
import CF
@@ -22,9 +24,6 @@ import PrGrammar
import Char
-- printing and parsing CF grammars, rules, and trees AR 26/1/2000 -- 9/6/2003
---- use the Print class instead!
prCF :: CF -> String
prCF = unlines . (map prCFRule) . rulesOfCF -- hiding the literal recogn function

View File

@@ -9,7 +9,9 @@
-- > CVS $Author $
-- > CVS $Revision $
--
-- (Description of the module)
-- Printing CF grammars generated from GF as LBNF grammar for BNFC.
-- AR 26/1/2000 -- 9/6/2003 (PPrCF) -- 8/11/2003 -- 27/9/2004.
-- With primitive error messaging, by rules and rule tails commented out
-----------------------------------------------------------------------------
module PrLBNF (prLBNF,prBNF) where
@@ -29,10 +31,6 @@ import Modules
import Char
import List (nub)
-- Printing CF grammars generated from GF as LBNF grammar for BNFC.
-- AR 26/1/2000 -- 9/6/2003 (PPrCF) -- 8/11/2003 -- 27/9/2004
-- With primitive error messaging, by rules and rule tails commented out
prLBNF :: Bool -> StateGrammar -> String
prLBNF new gr = unlines $ pragmas ++ (map (prCFRule cs) rules)
where
@@ -42,7 +40,7 @@ prLBNF new gr = unlines $ pragmas ++ (map (prCFRule cs) rules)
then mkLBNF (stateGrammarST gr) $ rulesOfCF cf
else ([],rulesOfCF cf) -- "normal" behaviour
-- a hack to hide the LBNF details
-- | a hack to hide the LBNF details
prBNF :: Bool -> StateGrammar -> String
prBNF b = unlines . (map (unwords . unLBNF . drop 1 . words)) . lines . prLBNF b
where
@@ -52,7 +50,7 @@ prBNF b = unlines . (map (unwords . unLBNF . drop 1 . words)) . lines . prLBNF b
c:ts -> c : unLBNF ts
_ -> r
--- awful low level code without abstraction over label names etc
--- | awful low level code without abstraction over label names etc
mkLBNF :: CanonGrammar -> [CFRule] -> ([String],[CFRule])
mkLBNF gr rules = (coercions, nub $ concatMap mkRule rules) where
coercions = ["coercions" +++ prt_ c +++ show n +++ ";" |
@@ -129,7 +127,7 @@ prLab i = case i of
L (IC "_") -> "" ---
_ -> let x = prt i in "_" ++ x ++ if isDigit (last x) then "_" else ""
-- just comment out the rest if you cannot interpret the function name in LBNF
-- | just comment out the rest if you cannot interpret the function name in LBNF
-- two versions, depending on whether in the beginning of a rule or elsewhere;
-- in the latter case, error just terminates the rule
prErr :: Bool -> String -> String
@@ -138,7 +136,7 @@ prErr b s = (if b then "" else " ;") +++ "---" +++ s
prCFCat :: Bool -> CFCat -> String
prCFCat b (CFCat ((CIQ _ c),l)) = prId b c ++ prLab l ----
-- if a category does not have a production of its own, we replace it by Ident
-- | if a category does not have a production of its own, we replace it by Ident
prCFItem cs (CFNonterm c) = if elem (catIdPlus c) cs then prCFCat False c else "Ident"
prCFItem _ (CFTerm a) = prRegExp a

View File

@@ -9,7 +9,8 @@
-- > CVS $Author $
-- > CVS $Revision $
--
-- (Description of the module)
-- restoring parse trees for discontinuous constituents, bindings, etc. AR 25/1/2001
-- revised 8/4/2002 for the new profile structure
-----------------------------------------------------------------------------
module Profile (postParse) where
@@ -29,23 +30,21 @@ import Operations
import Monad
import List (nub)
-- restoring parse trees for discontinuous constituents, bindings, etc. AR 25/1/2001
-- revised 8/4/2002 for the new profile structure
-- | the job is done in two passes:
--
-- 1. tree2term: restore constituent order from Profile
--
-- 2. term2trm: restore Bindings from Binds
postParse :: CFTree -> Err Exp
postParse tree = do
iterm <- errIn ("postprocessing parse tree" +++ prCFTree tree) $ tree2term tree
return $ term2trm iterm
-- an intermediate data structure
-- | an intermediate data structure
data ITerm = ITerm (Atom, BindVs) [ITerm] | IMeta deriving (Eq,Show)
type BindVs = [[I.Ident]]
-- the job is done in two passes:
-- (1) tree2term: restore constituent order from Profile
-- (2) term2trm: restore Bindings from Binds
-- | (1) restore constituent order from Profile
tree2term :: CFTree -> Err ITerm
-- tree2term (CFTree (f,(_,[t]))) | f == dummyCFFun = tree2term t -- not used
tree2term (CFTree (cff@(CFFun (fun,pro)), (_,trees))) = case fun of
@@ -93,6 +92,7 @@ tree2term (CFTree (cff@(CFFun (fun,pro)), (_,trees))) = case fun of
testErr (all (==y) ys) ("fail to unify bindings of" +++ prt y)
return y
-- | (2) restore Bindings from Binds
term2trm :: ITerm -> Exp
term2trm IMeta = EAtom (AM 0) ---- mExp0
term2trm (ITerm (fun, binds) terms) =