forked from GitHub/gf-core
lexer=ignore
This commit is contained in:
@@ -4,7 +4,7 @@
|
|||||||
concrete PredicEng of Predic = CategoriesEng **
|
concrete PredicEng of Predic = CategoriesEng **
|
||||||
open Prelude, SyntaxEng, DeptypEng in {
|
open Prelude, SyntaxEng, DeptypEng in {
|
||||||
|
|
||||||
flags optimize=all ;
|
flags optimize=all_subs ;
|
||||||
|
|
||||||
lincat
|
lincat
|
||||||
VType, CType = SS ;
|
VType, CType = SS ;
|
||||||
|
|||||||
@@ -5,9 +5,9 @@
|
|||||||
-- Stability : (stable)
|
-- Stability : (stable)
|
||||||
-- Portability : (portable)
|
-- Portability : (portable)
|
||||||
--
|
--
|
||||||
-- > CVS $Date: 2005/09/18 22:55:46 $
|
-- > CVS $Date: 2005/09/29 13:20:08 $
|
||||||
-- > CVS $Author: aarne $
|
-- > CVS $Author: aarne $
|
||||||
-- > CVS $Revision: 1.74 $
|
-- > CVS $Revision: 1.75 $
|
||||||
--
|
--
|
||||||
-- A database for customizable GF shell commands.
|
-- A database for customizable GF shell commands.
|
||||||
--
|
--
|
||||||
@@ -410,6 +410,7 @@ customTokenizer =
|
|||||||
,(strCI "codelit", lexHaskellLiteral . stateIsWord)
|
,(strCI "codelit", lexHaskellLiteral . stateIsWord)
|
||||||
,(strCI "textlit", lexTextLiteral . stateIsWord)
|
,(strCI "textlit", lexTextLiteral . stateIsWord)
|
||||||
,(strCI "codeC", const $ lexC2M)
|
,(strCI "codeC", const $ lexC2M)
|
||||||
|
,(strCI "ignore", \gr -> lexIgnore (stateIsWord gr) . tokLits)
|
||||||
,(strCI "codeCHigh", const $ lexC2M' True)
|
,(strCI "codeCHigh", const $ lexC2M' True)
|
||||||
-- add your own tokenizers here
|
-- add your own tokenizers here
|
||||||
]
|
]
|
||||||
|
|||||||
@@ -5,9 +5,9 @@
|
|||||||
-- Stability : (stable)
|
-- Stability : (stable)
|
||||||
-- Portability : (portable)
|
-- Portability : (portable)
|
||||||
--
|
--
|
||||||
-- > CVS $Date: 2005/04/21 16:23:52 $
|
-- > CVS $Date: 2005/09/29 13:20:08 $
|
||||||
-- > CVS $Author: bringert $
|
-- > CVS $Author: aarne $
|
||||||
-- > CVS $Revision: 1.13 $
|
-- > CVS $Revision: 1.14 $
|
||||||
--
|
--
|
||||||
-- lexers = tokenizers, to prepare input for GF grammars. AR 4\/1\/2002.
|
-- lexers = tokenizers, to prepare input for GF grammars. AR 4\/1\/2002.
|
||||||
-- an entry for each is included in 'Custom.customTokenizer'
|
-- an entry for each is included in 'Custom.customTokenizer'
|
||||||
@@ -22,6 +22,7 @@ module GF.UseGrammar.Tokenize ( tokWords,
|
|||||||
lexText,
|
lexText,
|
||||||
lexC2M, lexC2M',
|
lexC2M, lexC2M',
|
||||||
lexTextLiteral,
|
lexTextLiteral,
|
||||||
|
lexIgnore
|
||||||
) where
|
) where
|
||||||
|
|
||||||
import GF.Data.Operations
|
import GF.Data.Operations
|
||||||
@@ -184,3 +185,11 @@ lexHaskellVar isKnown = unknown2var isKnown . lexHaskell
|
|||||||
eitherUpper isKnown w@(c:cs) = isKnown (toLower c : cs) || isKnown (toUpper c : cs)
|
eitherUpper isKnown w@(c:cs) = isKnown (toLower c : cs) || isKnown (toUpper c : cs)
|
||||||
eitherUpper isKnown w = isKnown w
|
eitherUpper isKnown w = isKnown w
|
||||||
|
|
||||||
|
-- ignore unknown tokens (e.g. keyword spotting)
|
||||||
|
|
||||||
|
lexIgnore :: (String -> Bool) -> [CFTok] -> [CFTok]
|
||||||
|
lexIgnore isKnown = concatMap mkOne where
|
||||||
|
mkOne t@(TS s)
|
||||||
|
| isKnown s = [t]
|
||||||
|
| otherwise = []
|
||||||
|
mkOne t = [t]
|
||||||
|
|||||||
Reference in New Issue
Block a user