From 74e7f84e0f9c241c807dc7e57f0d4f5cff8041e6 Mon Sep 17 00:00:00 2001
From: aarne <unknown>
Date: Thu, 29 Sep 2005 12:20:08 +0000
Subject: [PATCH] lexer=ignore

---
 lib/resource/english/PredicEng.gf |  2 +-
 src/GF/UseGrammar/Custom.hs       |  5 +++--
 src/GF/UseGrammar/Tokenize.hs     | 15 ++++++++++++---
 3 files changed, 16 insertions(+), 6 deletions(-)
diff --git a/lib/resource/english/PredicEng.gf b/lib/resource/english/PredicEng.gf
index d90440c04..062d441d2 100644
--- a/lib/resource/english/PredicEng.gf
+++ b/lib/resource/english/PredicEng.gf
@@ -4,7 +4,7 @@
 concrete PredicEng of Predic = CategoriesEng ** 
   open Prelude, SyntaxEng, DeptypEng in {
 
-  flags optimize=all ;
+  flags optimize=all_subs ;
 
   lincat
     VType, CType = SS ;
diff --git a/src/GF/UseGrammar/Custom.hs b/src/GF/UseGrammar/Custom.hs
index c7c68362b..15e909004 100644
--- a/src/GF/UseGrammar/Custom.hs
+++ b/src/GF/UseGrammar/Custom.hs
@@ -5,9 +5,9 @@
 -- Stability   : (stable)
 -- Portability : (portable)
 --
--- > CVS $Date: 2005/09/18 22:55:46 $ 
+-- > CVS $Date: 2005/09/29 13:20:08 $ 
 -- > CVS $Author: aarne $
--- > CVS $Revision: 1.74 $
+-- > CVS $Revision: 1.75 $
 --
 -- A database for customizable GF shell commands. 
 --
@@ -410,6 +410,7 @@ customTokenizer =
   ,(strCI "codelit",   lexHaskellLiteral . stateIsWord)
   ,(strCI "textlit",   lexTextLiteral . stateIsWord)
   ,(strCI "codeC",     const $ lexC2M)
+  ,(strCI "ignore",    \gr -> lexIgnore (stateIsWord gr) . tokLits)
   ,(strCI "codeCHigh", const $ lexC2M' True)
 -- add your own tokenizers here
   ]
diff --git a/src/GF/UseGrammar/Tokenize.hs b/src/GF/UseGrammar/Tokenize.hs
index 6a8119ac0..bfc0e53bb 100644
--- a/src/GF/UseGrammar/Tokenize.hs
+++ b/src/GF/UseGrammar/Tokenize.hs
@@ -5,9 +5,9 @@
 -- Stability   : (stable)
 -- Portability : (portable)
 --
--- > CVS $Date: 2005/04/21 16:23:52 $ 
--- > CVS $Author: bringert $
--- > CVS $Revision: 1.13 $
+-- > CVS $Date: 2005/09/29 13:20:08 $ 
+-- > CVS $Author: aarne $
+-- > CVS $Revision: 1.14 $
 --
 -- lexers = tokenizers, to prepare input for GF grammars. AR 4\/1\/2002.
 -- an entry for each is included in 'Custom.customTokenizer'
@@ -22,6 +22,7 @@ module GF.UseGrammar.Tokenize ( tokWords,
 		  lexText,
 		  lexC2M, lexC2M',
 		  lexTextLiteral,
+                  lexIgnore
 		) where
 
 import GF.Data.Operations
@@ -184,3 +185,11 @@ lexHaskellVar     isKnown = unknown2var isKnown . lexHaskell
 eitherUpper isKnown w@(c:cs) = isKnown (toLower c : cs) || isKnown (toUpper c : cs)
 eitherUpper isKnown w = isKnown w
 
+-- ignore unknown tokens (e.g. keyword spotting)
+
+lexIgnore :: (String -> Bool) -> [CFTok] -> [CFTok]
+lexIgnore isKnown = concatMap mkOne where
+  mkOne t@(TS s) 
+    | isKnown s = [t]
+    | otherwise = []
+  mkOne t       = [t]