mirror of
https://github.com/GrammaticalFramework/gf-core.git
synced 2026-05-27 19:58:55 -06:00
Lexer by need.
This commit is contained in:
@@ -45,6 +45,7 @@ Highlights, preliminary version
|
|||||||
<li> New syntax alternatives for local definitions: <tt>let</tt> without
|
<li> New syntax alternatives for local definitions: <tt>let</tt> without
|
||||||
braces and <tt>where</tt>.
|
braces and <tt>where</tt>.
|
||||||
<li> Pattern variables can be used on lhs's of <tt>oper</tt> definitions.
|
<li> Pattern variables can be used on lhs's of <tt>oper</tt> definitions.
|
||||||
|
<li> New Unicode transliterations (by Harad Hammarström).
|
||||||
|
|
||||||
<h4>New parser (forthcoming)</h4>
|
<h4>New parser (forthcoming)</h4>
|
||||||
|
|
||||||
@@ -62,7 +63,8 @@ Highlights, preliminary version
|
|||||||
<li> Haskell source code organized into subdirectories.
|
<li> Haskell source code organized into subdirectories.
|
||||||
<li> BNF Converter used for defining the languages GF and GFC, which also
|
<li> BNF Converter used for defining the languages GF and GFC, which also
|
||||||
give reliable LaTeX documentation.
|
give reliable LaTeX documentation.
|
||||||
|
<li> Lexican rules sorted out by option <tt>-cflexer</tt> for efficient
|
||||||
|
parsing with large lexica.
|
||||||
|
|
||||||
|
|
||||||
<!-- NEW -->
|
<!-- NEW -->
|
||||||
|
|||||||
@@ -507,7 +507,7 @@ oper
|
|||||||
DitransVerb = TransVerb ** {s3 : Preposition} ;
|
DitransVerb = TransVerb ** {s3 : Preposition} ;
|
||||||
|
|
||||||
mkDitransVerb : Verb -> Preposition -> Preposition -> DitransVerb = \v,p1,p2 ->
|
mkDitransVerb : Verb -> Preposition -> Preposition -> DitransVerb = \v,p1,p2 ->
|
||||||
v ** {s3 = p1 ; s3 = p2} ;
|
v ** {s2 = p1 ; s3 = p2} ;
|
||||||
|
|
||||||
complDitransVerb :
|
complDitransVerb :
|
||||||
Bool -> DitransVerb -> NounPhrase -> NounPhrase -> VerbPhrase =
|
Bool -> DitransVerb -> NounPhrase -> NounPhrase -> VerbPhrase =
|
||||||
@@ -538,7 +538,7 @@ oper
|
|||||||
s3 = \\g,n => spelar.s3 ! g ! n ++ (if_then_else Str postp bra.s [])
|
s3 = \\g,n => spelar.s3 ! g ! n ++ (if_then_else Str postp bra.s [])
|
||||||
} ;
|
} ;
|
||||||
|
|
||||||
advAdjPhrase : Adverb -> AdjPhrase -> AdjPhrase = \mycket, dyr ->
|
advAdjPhrase : SS -> AdjPhrase -> AdjPhrase = \mycket, dyr ->
|
||||||
{s = \\a,c => mycket.s ++ dyr.s ! a ! c ;
|
{s = \\a,c => mycket.s ++ dyr.s ! a ! c ;
|
||||||
p = dyr.p
|
p = dyr.p
|
||||||
} ;
|
} ;
|
||||||
@@ -903,7 +903,7 @@ oper
|
|||||||
-- This class covers adverbials such as "annars", "därför", which are prefixed
|
-- This class covers adverbials such as "annars", "därför", which are prefixed
|
||||||
-- to a sentence to form a phrase.
|
-- to a sentence to form a phrase.
|
||||||
|
|
||||||
advSentence : Adverb -> Sentence -> Utterance = \annars,soverhan ->
|
advSentence : SS -> Sentence -> Utterance = \annars,soverhan ->
|
||||||
ss (annars.s ++ soverhan.s ! Inv ++ ".") ;
|
ss (annars.s ++ soverhan.s ! Inv ++ ".") ;
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
@@ -15,8 +15,9 @@ import Char (isUpper, isLower, toUpper, toLower)
|
|||||||
-- abstract type CF.
|
-- abstract type CF.
|
||||||
-- Invariant: each category has all its rules grouped with it
|
-- Invariant: each category has all its rules grouped with it
|
||||||
-- also: the list is never empty (the category is just missing then)
|
-- also: the list is never empty (the category is just missing then)
|
||||||
newtype CF = CF ([(CFCat,[CFRule])], CFPredef)
|
newtype CF = CF ([CFRuleGroup], CFPredef)
|
||||||
type CFRule = (CFFun, (CFCat, [CFItem]))
|
type CFRule = (CFFun, (CFCat, [CFItem]))
|
||||||
|
type CFRuleGroup = (CFCat,[CFRule])
|
||||||
|
|
||||||
-- CFPredef is a hack for variable symbols and literals; normally = const []
|
-- CFPredef is a hack for variable symbols and literals; normally = const []
|
||||||
data CFItem = CFTerm RegExp | CFNonterm CFCat deriving (Eq, Ord,Show)
|
data CFItem = CFTerm RegExp | CFNonterm CFCat deriving (Eq, Ord,Show)
|
||||||
|
|||||||
@@ -3,7 +3,9 @@ module CFIdent where
|
|||||||
import Operations
|
import Operations
|
||||||
import GFC
|
import GFC
|
||||||
import Ident
|
import Ident
|
||||||
|
import Values (cPredefAbs)
|
||||||
import AbsGFC
|
import AbsGFC
|
||||||
|
import Macros (ident2label)
|
||||||
import PrGrammar
|
import PrGrammar
|
||||||
import Str
|
import Str
|
||||||
import Char (toLower, toUpper)
|
import Char (toLower, toUpper)
|
||||||
@@ -48,6 +50,10 @@ newtype CFFun = CFFun (Atom, Profile) deriving (Eq,Show)
|
|||||||
|
|
||||||
type Profile = [([[Int]],[Int])]
|
type Profile = [([[Int]],[Int])]
|
||||||
|
|
||||||
|
wordsCFTok :: CFTok -> [String]
|
||||||
|
wordsCFTok t = case t of
|
||||||
|
TC (c:cs) -> [c':cs | c' <- [toUpper c, toLower c]]
|
||||||
|
_ -> [prCFTok t]
|
||||||
|
|
||||||
-- the following functions should be used instead of constructors
|
-- the following functions should be used instead of constructors
|
||||||
|
|
||||||
@@ -68,6 +74,9 @@ stringCFFun = mkCFFun . AS
|
|||||||
intCFFun :: Int -> CFFun
|
intCFFun :: Int -> CFFun
|
||||||
intCFFun = mkCFFun . AI . toInteger
|
intCFFun = mkCFFun . AI . toInteger
|
||||||
|
|
||||||
|
dummyCFFun :: CFFun
|
||||||
|
dummyCFFun = varCFFun $ identC "_" --- used in lexer-by-need rules
|
||||||
|
|
||||||
cfFun2String :: CFFun -> String
|
cfFun2String :: CFFun -> String
|
||||||
cfFun2String (CFFun (f,_)) = prt f
|
cfFun2String (CFFun (f,_)) = prt f
|
||||||
|
|
||||||
@@ -105,8 +114,8 @@ cat2CFCat :: (Ident,Ident) -> CFCat
|
|||||||
cat2CFCat = uncurry idents2CFCat
|
cat2CFCat = uncurry idents2CFCat
|
||||||
|
|
||||||
---- literals
|
---- literals
|
||||||
cfCatString = string2CFCat "Predef" "String"
|
cfCatString = string2CFCat (prt cPredefAbs) "String"
|
||||||
cfCatInt = string2CFCat "Predef" "Int"
|
cfCatInt = string2CFCat (prt cPredefAbs) "Int"
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
@@ -122,6 +131,9 @@ moduleOfCFCat (CFCat (CIQ m _, _)) = m
|
|||||||
cfCat2Cat :: CFCat -> (Ident,Ident)
|
cfCat2Cat :: CFCat -> (Ident,Ident)
|
||||||
cfCat2Cat (CFCat (CIQ m c,_)) = (m,c)
|
cfCat2Cat (CFCat (CIQ m c,_)) = (m,c)
|
||||||
|
|
||||||
|
lexCFCat :: CFCat -> CFCat
|
||||||
|
lexCFCat cat = ident2CFCat (uncurry CIQ (cfCat2Cat cat)) (identC "*")
|
||||||
|
|
||||||
-- to construct CF tokens
|
-- to construct CF tokens
|
||||||
|
|
||||||
string2CFTok :: String -> CFTok
|
string2CFTok :: String -> CFTok
|
||||||
|
|||||||
@@ -5,12 +5,14 @@ import Option
|
|||||||
import Ident
|
import Ident
|
||||||
import AbsGFC
|
import AbsGFC
|
||||||
import GFC
|
import GFC
|
||||||
|
import Values (isPredefCat,cPredefAbs)
|
||||||
import PrGrammar
|
import PrGrammar
|
||||||
import CMacros
|
import CMacros
|
||||||
import qualified Modules as M
|
import qualified Modules as M
|
||||||
import CF
|
import CF
|
||||||
import CFIdent
|
import CFIdent
|
||||||
import List (nub)
|
import Morphology
|
||||||
|
import List (nub,partition)
|
||||||
import Monad
|
import Monad
|
||||||
|
|
||||||
-- AR 27/1/2000 -- 3/12/2001 -- 8/6/2003
|
-- AR 27/1/2000 -- 3/12/2001 -- 8/6/2003
|
||||||
@@ -28,8 +30,8 @@ canon2cf opts gr c = do
|
|||||||
rules0 <- liftM concat $ mapM (uncurry (cnc2cfCond opts)) mms
|
rules0 <- liftM concat $ mapM (uncurry (cnc2cfCond opts)) mms
|
||||||
let rules = filter (not . isCircularCF) rules0 ---- temporarily here
|
let rules = filter (not . isCircularCF) rules0 ---- temporarily here
|
||||||
let grules = groupCFRules rules
|
let grules = groupCFRules rules
|
||||||
let predef = mkCFPredef $ map fst grules
|
let predef = mkCFPredef opts grules
|
||||||
return $ CF (grules, predef)
|
return $ CF predef
|
||||||
|
|
||||||
cnc2cfCond :: Options -> Ident -> [(Ident,Info)] -> Err [CFRule]
|
cnc2cfCond :: Options -> Ident -> [(Ident,Info)] -> Err [CFRule]
|
||||||
cnc2cfCond opts m gr =
|
cnc2cfCond opts m gr =
|
||||||
@@ -138,16 +140,40 @@ term2CFItems m t = errIn "forming cf items" $ case t of
|
|||||||
tryMkCFTerm itss = return itss
|
tryMkCFTerm itss = return itss
|
||||||
|
|
||||||
extrR arg lab = case (arg,lab) of
|
extrR arg lab = case (arg,lab) of
|
||||||
(Arg (A cat pos), l@(L _)) -> return [[PNonterm (CIQ m cat) pos l True]]
|
(Arg (A cat pos), l@(L _)) -> return [[PNonterm (cIQ cat) pos l True]]
|
||||||
(Arg (A cat pos), l@(LV _)) -> return [[PNonterm (CIQ m cat) pos l False]]
|
(Arg (A cat pos), l@(LV _)) -> return [[PNonterm (cIQ cat) pos l False]]
|
||||||
(Arg (AB cat pos b), l@(L _)) -> return [[PNonterm (CIQ m cat) pos l True]]
|
(Arg (AB cat pos b), l@(L _)) -> return [[PNonterm (cIQ cat) pos l True]]
|
||||||
(Arg (AB cat pos b), l@(LV _)) -> return [[PNonterm (CIQ m cat) pos l False]]
|
(Arg (AB cat pos b), l@(LV _)) -> return [[PNonterm (cIQ cat) pos l False]]
|
||||||
---- ??
|
---- ??
|
||||||
_ -> prtBad "cannot extract record field from" arg
|
_ -> prtBad "cannot extract record field from" arg
|
||||||
|
cIQ c = if isPredefCat c then CIQ cPredefAbs c else CIQ m c
|
||||||
|
|
||||||
mkCFPredef :: [CFCat] -> CFPredef
|
mkCFPredef :: Options -> [CFRuleGroup] -> ([CFRuleGroup],CFPredef)
|
||||||
mkCFPredef cats s =
|
mkCFPredef opts rules = (ruls, \s -> preds0 s ++ look s) where
|
||||||
[(cat, metaCFFun) | TM _ _ <- [s], cat <- cats] ++
|
(ruls,preds) = if oElem lexerByNeed opts -- option -cflexer
|
||||||
[(cat, varCFFun x) | TV x <- [s], cat <- cats] ++
|
then predefLexer rules
|
||||||
[(cfCatString, stringCFFun t) | TL t <- [s]] ++
|
else (rules,NT)
|
||||||
[(cfCatInt, intCFFun t) | TI t <- [s]]
|
preds0 s =
|
||||||
|
[(cat, metaCFFun) | TM _ _ <- [s], cat <- cats] ++
|
||||||
|
[(cat, varCFFun x) | TV x <- [s], cat <- cats] ++
|
||||||
|
[(cfCatString, stringCFFun t) | TL t <- [s]] ++
|
||||||
|
[(cfCatInt, intCFFun t) | TI t <- [s]]
|
||||||
|
cats = map fst rules
|
||||||
|
look s = errVal [] $ liftM concat $
|
||||||
|
mapM (flip justLookupTree preds . tS) $ wordsCFTok s --- for TC tokens
|
||||||
|
|
||||||
|
--- TODO: use trie instead of bintree; integrate with morphology
|
||||||
|
predefLexer :: [CFRuleGroup] -> ([CFRuleGroup],BinTree (CFTok,[(CFCat, CFFun)]))
|
||||||
|
predefLexer groups = (reverse ruls, sorted2tree $ sortAssocs preds) where
|
||||||
|
(ruls,preds) = foldr mkOne ([],[]) groups
|
||||||
|
mkOne group@(cat,rules) (rs,ps) = (rule:rs,pre ++ ps) where
|
||||||
|
(rule,pre) = case partition isLexical rules of
|
||||||
|
([],_) -> (group,[])
|
||||||
|
(ls,rest) -> ((cat,rest), concatMap mkLexRule ls) --- useLexRule cat : rest
|
||||||
|
isLexical (f,(c,its)) = case its of
|
||||||
|
[CFTerm (RegAlts ws)] -> True
|
||||||
|
_ -> False
|
||||||
|
-- useLexRule cat = (dummyCFFun,(cat,[CFNonterm (lexCFCat cat)])) -- not needed
|
||||||
|
mkLexRule r = case r of
|
||||||
|
(fun,(cat,[CFTerm (RegAlts ws)])) -> [(tS w, (cat,fun)) | w <- ws]
|
||||||
|
_ -> []
|
||||||
|
|||||||
@@ -33,6 +33,7 @@ type BindVs = [[I.Ident]]
|
|||||||
-- (2) term2trm: restore Bindings from Binds
|
-- (2) term2trm: restore Bindings from Binds
|
||||||
|
|
||||||
tree2term :: CFTree -> Err ITerm
|
tree2term :: CFTree -> Err ITerm
|
||||||
|
-- tree2term (CFTree (f,(_,[t]))) | f == dummyCFFun = tree2term t -- not used
|
||||||
tree2term (CFTree (cff@(CFFun (fun,pro)), (_,trees))) = case fun of
|
tree2term (CFTree (cff@(CFFun (fun,pro)), (_,trees))) = case fun of
|
||||||
AM _ -> return IMeta
|
AM _ -> return IMeta
|
||||||
_ -> do
|
_ -> do
|
||||||
|
|||||||
@@ -191,4 +191,5 @@ evalPrintname gr c ppr lin =
|
|||||||
C x y -> C (oneBranch x) (oneBranch y)
|
C x y -> C (oneBranch x) (oneBranch y)
|
||||||
S x _ -> oneBranch x
|
S x _ -> oneBranch x
|
||||||
P x _ -> oneBranch x
|
P x _ -> oneBranch x
|
||||||
|
Alts (d,_) -> oneBranch d
|
||||||
_ -> t
|
_ -> t
|
||||||
|
|||||||
@@ -88,6 +88,7 @@ stateGrammarST = grammar
|
|||||||
stateCF = cf
|
stateCF = cf
|
||||||
stateMorpho = morpho
|
stateMorpho = morpho
|
||||||
stateOptions = loptions
|
stateOptions = loptions
|
||||||
|
stateGrammarWords = map fst . tree2list . stateMorpho
|
||||||
|
|
||||||
cncModuleIdST = stateGrammarST
|
cncModuleIdST = stateGrammarST
|
||||||
|
|
||||||
|
|||||||
@@ -32,12 +32,21 @@ type Ss = [String]
|
|||||||
|
|
||||||
matchPrefix :: Ss -> [(Ss,[String])] -> [String] -> Ss
|
matchPrefix :: Ss -> [(Ss,[String])] -> [String] -> Ss
|
||||||
matchPrefix s vs t =
|
matchPrefix s vs t =
|
||||||
head ([u | (u,as) <- vs, any (\c -> isPrefixOf c (concat t)) as] ++ [s])
|
head $ [u |
|
||||||
|
(u,as) <- vs,
|
||||||
|
any (\c -> isPrefixOf c (concat (unmarkup t))) as
|
||||||
|
] ++ [s]
|
||||||
|
|
||||||
matchSuffix :: String -> Ss -> [(Ss,[String])] -> Ss
|
matchSuffix :: String -> Ss -> [(Ss,[String])] -> Ss
|
||||||
matchSuffix t s vs =
|
matchSuffix t s vs =
|
||||||
head ([u | (u,as) <- vs, any (\c -> isSuffixOf c t) as] ++ [s])
|
head ([u | (u,as) <- vs, any (\c -> isSuffixOf c t) as] ++ [s])
|
||||||
|
|
||||||
|
unmarkup :: [String] -> [String]
|
||||||
|
unmarkup = filter (not . isXMLtag) where
|
||||||
|
isXMLtag s = case s of
|
||||||
|
'<':cs@(_:_) -> last cs == '>'
|
||||||
|
_ -> False
|
||||||
|
|
||||||
str2strings :: Str -> Ss
|
str2strings :: Str -> Ss
|
||||||
str2strings (Str st) = alls st where
|
str2strings (Str st) = alls st where
|
||||||
alls st = case st of
|
alls st = case st of
|
||||||
|
|||||||
@@ -44,6 +44,8 @@ cPredefAbs = identC "PredefAbs"
|
|||||||
cInt = identC "Int"
|
cInt = identC "Int"
|
||||||
cString = identC "String"
|
cString = identC "String"
|
||||||
|
|
||||||
|
isPredefCat c = elem c [cInt,cString]
|
||||||
|
|
||||||
eType :: Exp
|
eType :: Exp
|
||||||
eType = Sort "Type"
|
eType = Sort "Type"
|
||||||
|
|
||||||
|
|||||||
@@ -125,6 +125,7 @@ newParser = iOpt "new"
|
|||||||
noCF = iOpt "nocf"
|
noCF = iOpt "nocf"
|
||||||
checkCirc = iOpt "nocirc"
|
checkCirc = iOpt "nocirc"
|
||||||
noCheckCirc = iOpt "nocheckcirc"
|
noCheckCirc = iOpt "nocheckcirc"
|
||||||
|
lexerByNeed = iOpt "cflexer"
|
||||||
|
|
||||||
-- linearization
|
-- linearization
|
||||||
allLin = iOpt "all"
|
allLin = iOpt "all"
|
||||||
|
|||||||
@@ -148,6 +148,7 @@ customGrammarPrinter =
|
|||||||
,(strCI "lbnf", prLBNF . stateCF)
|
,(strCI "lbnf", prLBNF . stateCF)
|
||||||
,(strCI "morpho", prMorpho . stateMorpho)
|
,(strCI "morpho", prMorpho . stateMorpho)
|
||||||
,(strCI "opts", prOpts . stateOptions)
|
,(strCI "opts", prOpts . stateOptions)
|
||||||
|
,(strCI "words", unwords . stateGrammarWords)
|
||||||
{- ----
|
{- ----
|
||||||
(strCI "gf", prt . st2grammar . stateGrammarST) -- DEFAULT
|
(strCI "gf", prt . st2grammar . stateGrammarST) -- DEFAULT
|
||||||
,(strCI "canon", showCanon "Lang" . stateGrammarST)
|
,(strCI "canon", showCanon "Lang" . stateGrammarST)
|
||||||
|
|||||||
@@ -92,10 +92,10 @@ tokens2trms opts sg cn parser as = do
|
|||||||
verb = oElem beVerbose opts
|
verb = oElem beVerbose opts
|
||||||
forgive = oElem forgiveParse opts
|
forgive = oElem forgiveParse opts
|
||||||
|
|
||||||
unknown ts = case filter noMatch ts of
|
unknown ts = case filter noMatch [t | t@(TS _) <- ts] of
|
||||||
[] -> "where all words are known"
|
[] -> "where all words are known"
|
||||||
us -> "with the unknown tokens" +++ show us --- needs to be fixed for literals
|
us -> "with the unknown tokens" +++ show us --- needs to be fixed for literals
|
||||||
terminals = map TS $ cfTokens $ stateCF sg
|
terminals = map TS $ stateGrammarWords sg
|
||||||
noMatch t = all (not . compatTok t) terminals
|
noMatch t = all (not . compatTok t) terminals
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
@@ -1 +1 @@
|
|||||||
module Today where today = "Fri Nov 14 14:45:24 CET 2003"
|
module Today where today = "Mon Nov 17 17:04:28 CET 2003"
|
||||||
|
|||||||
Reference in New Issue
Block a user