experiments with unlexer

2004-08-15 21:02:10 +00:00
parent f65d08638b
commit c96162ba8b
4 changed files with 36 additions and 3 deletions
--- a/src/GF/Text/Text.hs
+++ b/src/GF/Text/Text.hs
@@ -6,7 +6,25 @@ import Char
 -- elementary text postprocessing. AR 21/11/2001
 -- This is very primitive indeed. The functions should work on
 -- token lists and not on strings. AR 5/12/2002
+-- XML hack 14/8/2004; not in use yet

+-- does not apply untokenizer within XML tags --- heuristic "< "
+-- this function is applied from top level...
+untokWithXML :: (String -> String) -> String -> String
+untokWithXML unt s = case s of
+  '<':cs@(c:_) | isAlpha c -> '<':beg ++ ">" ++ unto (drop 1 rest) where 
+                  (beg,rest) = span (/='>') cs
+  '<':cs -> '<':unto cs ---
+  [] -> []
+  _ -> unt beg ++ unto rest where
+               (beg,rest) = span (/='<') s
+ where
+   unto = untokWithXML unt
+
+-- ... whereas this one is embedded on a branch
+exceptXML :: (String -> String) -> String -> String
+exceptXML unt s = '<':beg ++ ">" ++ unt (drop 1 rest) where 
+  (beg,rest) = span (/='>') s

 formatAsTextLit :: String -> String
 formatAsTextLit = formatAsText . unwords . map unStringLit . words 
@@ -62,3 +80,13 @@ unStringLit s = case s of
  _ -> s
 where
   strlim = (=='\'')
+
+concatRemSpace :: String -> String
+concatRemSpace = concat . words
+{-
+concatRemSpace s = case s of
+  '<':cs -> exceptXML concatRemSpace cs
+  c : cs | isSpace c -> concatRemSpace cs
+  c :cs -> c : concatRemSpace cs
+  _ -> s
+-}