1
0
forked from GitHub/gf-core

lexer=ignore

This commit is contained in:
aarne
2005-09-29 12:20:08 +00:00
parent 60b6ce0f07
commit 74e7f84e0f
3 changed files with 16 additions and 6 deletions

View File

@@ -4,7 +4,7 @@
concrete PredicEng of Predic = CategoriesEng ** concrete PredicEng of Predic = CategoriesEng **
open Prelude, SyntaxEng, DeptypEng in { open Prelude, SyntaxEng, DeptypEng in {
flags optimize=all ; flags optimize=all_subs ;
lincat lincat
VType, CType = SS ; VType, CType = SS ;

View File

@@ -5,9 +5,9 @@
-- Stability : (stable) -- Stability : (stable)
-- Portability : (portable) -- Portability : (portable)
-- --
-- > CVS $Date: 2005/09/18 22:55:46 $ -- > CVS $Date: 2005/09/29 13:20:08 $
-- > CVS $Author: aarne $ -- > CVS $Author: aarne $
-- > CVS $Revision: 1.74 $ -- > CVS $Revision: 1.75 $
-- --
-- A database for customizable GF shell commands. -- A database for customizable GF shell commands.
-- --
@@ -410,6 +410,7 @@ customTokenizer =
,(strCI "codelit", lexHaskellLiteral . stateIsWord) ,(strCI "codelit", lexHaskellLiteral . stateIsWord)
,(strCI "textlit", lexTextLiteral . stateIsWord) ,(strCI "textlit", lexTextLiteral . stateIsWord)
,(strCI "codeC", const $ lexC2M) ,(strCI "codeC", const $ lexC2M)
,(strCI "ignore", \gr -> lexIgnore (stateIsWord gr) . tokLits)
,(strCI "codeCHigh", const $ lexC2M' True) ,(strCI "codeCHigh", const $ lexC2M' True)
-- add your own tokenizers here -- add your own tokenizers here
] ]

View File

@@ -5,9 +5,9 @@
-- Stability : (stable) -- Stability : (stable)
-- Portability : (portable) -- Portability : (portable)
-- --
-- > CVS $Date: 2005/04/21 16:23:52 $ -- > CVS $Date: 2005/09/29 13:20:08 $
-- > CVS $Author: bringert $ -- > CVS $Author: aarne $
-- > CVS $Revision: 1.13 $ -- > CVS $Revision: 1.14 $
-- --
-- lexers = tokenizers, to prepare input for GF grammars. AR 4\/1\/2002. -- lexers = tokenizers, to prepare input for GF grammars. AR 4\/1\/2002.
-- an entry for each is included in 'Custom.customTokenizer' -- an entry for each is included in 'Custom.customTokenizer'
@@ -22,6 +22,7 @@ module GF.UseGrammar.Tokenize ( tokWords,
lexText, lexText,
lexC2M, lexC2M', lexC2M, lexC2M',
lexTextLiteral, lexTextLiteral,
lexIgnore
) where ) where
import GF.Data.Operations import GF.Data.Operations
@@ -184,3 +185,11 @@ lexHaskellVar isKnown = unknown2var isKnown . lexHaskell
eitherUpper isKnown w@(c:cs) = isKnown (toLower c : cs) || isKnown (toUpper c : cs) eitherUpper isKnown w@(c:cs) = isKnown (toLower c : cs) || isKnown (toUpper c : cs)
eitherUpper isKnown w = isKnown w eitherUpper isKnown w = isKnown w
-- ignore unknown tokens (e.g. keyword spotting)
lexIgnore :: (String -> Bool) -> [CFTok] -> [CFTok]
lexIgnore isKnown = concatMap mkOne where
mkOne t@(TS s)
| isKnown s = [t]
| otherwise = []
mkOne t = [t]