From 74e7f84e0f9c241c807dc7e57f0d4f5cff8041e6 Mon Sep 17 00:00:00 2001 From: aarne Date: Thu, 29 Sep 2005 12:20:08 +0000 Subject: [PATCH] lexer=ignore --- lib/resource/english/PredicEng.gf | 2 +- src/GF/UseGrammar/Custom.hs | 5 +++-- src/GF/UseGrammar/Tokenize.hs | 15 ++++++++++++--- 3 files changed, 16 insertions(+), 6 deletions(-) diff --git a/lib/resource/english/PredicEng.gf b/lib/resource/english/PredicEng.gf index d90440c04..062d441d2 100644 --- a/lib/resource/english/PredicEng.gf +++ b/lib/resource/english/PredicEng.gf @@ -4,7 +4,7 @@ concrete PredicEng of Predic = CategoriesEng ** open Prelude, SyntaxEng, DeptypEng in { - flags optimize=all ; + flags optimize=all_subs ; lincat VType, CType = SS ; diff --git a/src/GF/UseGrammar/Custom.hs b/src/GF/UseGrammar/Custom.hs index c7c68362b..15e909004 100644 --- a/src/GF/UseGrammar/Custom.hs +++ b/src/GF/UseGrammar/Custom.hs @@ -5,9 +5,9 @@ -- Stability : (stable) -- Portability : (portable) -- --- > CVS $Date: 2005/09/18 22:55:46 $ +-- > CVS $Date: 2005/09/29 13:20:08 $ -- > CVS $Author: aarne $ --- > CVS $Revision: 1.74 $ +-- > CVS $Revision: 1.75 $ -- -- A database for customizable GF shell commands. -- @@ -410,6 +410,7 @@ customTokenizer = ,(strCI "codelit", lexHaskellLiteral . stateIsWord) ,(strCI "textlit", lexTextLiteral . stateIsWord) ,(strCI "codeC", const $ lexC2M) + ,(strCI "ignore", \gr -> lexIgnore (stateIsWord gr) . tokLits) ,(strCI "codeCHigh", const $ lexC2M' True) -- add your own tokenizers here ] diff --git a/src/GF/UseGrammar/Tokenize.hs b/src/GF/UseGrammar/Tokenize.hs index 6a8119ac0..bfc0e53bb 100644 --- a/src/GF/UseGrammar/Tokenize.hs +++ b/src/GF/UseGrammar/Tokenize.hs @@ -5,9 +5,9 @@ -- Stability : (stable) -- Portability : (portable) -- --- > CVS $Date: 2005/04/21 16:23:52 $ --- > CVS $Author: bringert $ --- > CVS $Revision: 1.13 $ +-- > CVS $Date: 2005/09/29 13:20:08 $ +-- > CVS $Author: aarne $ +-- > CVS $Revision: 1.14 $ -- -- lexers = tokenizers, to prepare input for GF grammars. AR 4\/1\/2002. -- an entry for each is included in 'Custom.customTokenizer' @@ -22,6 +22,7 @@ module GF.UseGrammar.Tokenize ( tokWords, lexText, lexC2M, lexC2M', lexTextLiteral, + lexIgnore ) where import GF.Data.Operations @@ -184,3 +185,11 @@ lexHaskellVar isKnown = unknown2var isKnown . lexHaskell eitherUpper isKnown w@(c:cs) = isKnown (toLower c : cs) || isKnown (toUpper c : cs) eitherUpper isKnown w = isKnown w +-- ignore unknown tokens (e.g. keyword spotting) + +lexIgnore :: (String -> Bool) -> [CFTok] -> [CFTok] +lexIgnore isKnown = concatMap mkOne where + mkOne t@(TS s) + | isKnown s = [t] + | otherwise = [] + mkOne t = [t]