From face83bb180b10f50ea3fca5a2374b383011b3c7 Mon Sep 17 00:00:00 2001 From: aarne Date: Sun, 15 Aug 2004 21:02:10 +0000 Subject: [PATCH] experiments with unlexer --- src/GF/API.hs | 6 +++++- src/GF/Compile/ShellState.hs | 3 ++- src/GF/Text/Text.hs | 28 ++++++++++++++++++++++++++++ src/GF/UseGrammar/Custom.hs | 2 +- 4 files changed, 36 insertions(+), 3 deletions(-) diff --git a/src/GF/API.hs b/src/GF/API.hs index 2d23da0f6..5a55f5b1f 100644 --- a/src/GF/API.hs +++ b/src/GF/API.hs @@ -26,6 +26,7 @@ import CMacros import Transfer import qualified Generate as Gen +import Text (untokWithXML) import Option import Custom import ShellState @@ -208,7 +209,10 @@ optLinearizeTree opts0 gr t = case getOptVal opts transferFun of | otherwise = return . unlines . map untok . optIntOrOne . linTree2strings mk g c g = grammar gr c = cncId gr - untok = customOrDefault opts useUntokenizer customUntokenizer gr + untok = if False ---- oElem (markLin markOptXML) opts + then untokWithXML unt + else unt + unt = customOrDefault opts useUntokenizer customUntokenizer gr optIntOrOne = take $ optIntOrN opts flagNumber 1 {- ---- diff --git a/src/GF/Compile/ShellState.hs b/src/GF/Compile/ShellState.hs index e1e64e85c..4b1e5a8f3 100644 --- a/src/GF/Compile/ShellState.hs +++ b/src/GF/Compile/ShellState.hs @@ -122,7 +122,8 @@ updateShellState opts sh ((_,sgr,gr),rts) = do a' = ifNull Nothing (return . head) $ allAbstracts cgr0 abstr0 <- case abstract sh of Just a -> do - -- test that abstract is compatible + -- test that abstract is compatible --- unsafe exception for old? + --- if True oElem showOld opts then return () else testErr (maybe True (a==) a') ("expected abstract" +++ P.prt a) return $ Just a _ -> return a' diff --git a/src/GF/Text/Text.hs b/src/GF/Text/Text.hs index 2fbf97fd3..de29e9026 100644 --- a/src/GF/Text/Text.hs +++ b/src/GF/Text/Text.hs @@ -6,7 +6,25 @@ import Char -- elementary text postprocessing. AR 21/11/2001 -- This is very primitive indeed. The functions should work on -- token lists and not on strings. AR 5/12/2002 +-- XML hack 14/8/2004; not in use yet +-- does not apply untokenizer within XML tags --- heuristic "< " +-- this function is applied from top level... +untokWithXML :: (String -> String) -> String -> String +untokWithXML unt s = case s of + '<':cs@(c:_) | isAlpha c -> '<':beg ++ ">" ++ unto (drop 1 rest) where + (beg,rest) = span (/='>') cs + '<':cs -> '<':unto cs --- + [] -> [] + _ -> unt beg ++ unto rest where + (beg,rest) = span (/='<') s + where + unto = untokWithXML unt + +-- ... whereas this one is embedded on a branch +exceptXML :: (String -> String) -> String -> String +exceptXML unt s = '<':beg ++ ">" ++ unt (drop 1 rest) where + (beg,rest) = span (/='>') s formatAsTextLit :: String -> String formatAsTextLit = formatAsText . unwords . map unStringLit . words @@ -62,3 +80,13 @@ unStringLit s = case s of _ -> s where strlim = (=='\'') + +concatRemSpace :: String -> String +concatRemSpace = concat . words +{- +concatRemSpace s = case s of + '<':cs -> exceptXML concatRemSpace cs + c : cs | isSpace c -> concatRemSpace cs + c :cs -> c : concatRemSpace cs + _ -> s +-} \ No newline at end of file diff --git a/src/GF/UseGrammar/Custom.hs b/src/GF/UseGrammar/Custom.hs index 3fb386c79..60c906fa0 100644 --- a/src/GF/UseGrammar/Custom.hs +++ b/src/GF/UseGrammar/Custom.hs @@ -301,7 +301,7 @@ customUntokenizer = ,(strCI "code", const $ formatAsCode) ,(strCI "textlit", const $ formatAsTextLit) ,(strCI "codelit", const $ formatAsCodeLit) - ,(strCI "concat", const $ concat . words) + ,(strCI "concat", const $ concatRemSpace) ,(strCI "glue", const $ performBinds) ,(strCI "reverse", const $ reverse) ,(strCI "bind", const $ performBinds) -- backward compat