experiments with unlexer

This commit is contained in:
aarne
2004-08-15 21:02:10 +00:00
parent a454e7f6c4
commit face83bb18
4 changed files with 36 additions and 3 deletions

View File

@@ -26,6 +26,7 @@ import CMacros
import Transfer import Transfer
import qualified Generate as Gen import qualified Generate as Gen
import Text (untokWithXML)
import Option import Option
import Custom import Custom
import ShellState import ShellState
@@ -208,7 +209,10 @@ optLinearizeTree opts0 gr t = case getOptVal opts transferFun of
| otherwise = return . unlines . map untok . optIntOrOne . linTree2strings mk g c | otherwise = return . unlines . map untok . optIntOrOne . linTree2strings mk g c
g = grammar gr g = grammar gr
c = cncId gr c = cncId gr
untok = customOrDefault opts useUntokenizer customUntokenizer gr untok = if False ---- oElem (markLin markOptXML) opts
then untokWithXML unt
else unt
unt = customOrDefault opts useUntokenizer customUntokenizer gr
optIntOrOne = take $ optIntOrN opts flagNumber 1 optIntOrOne = take $ optIntOrN opts flagNumber 1
{- ---- {- ----

View File

@@ -122,7 +122,8 @@ updateShellState opts sh ((_,sgr,gr),rts) = do
a' = ifNull Nothing (return . head) $ allAbstracts cgr0 a' = ifNull Nothing (return . head) $ allAbstracts cgr0
abstr0 <- case abstract sh of abstr0 <- case abstract sh of
Just a -> do Just a -> do
-- test that abstract is compatible -- test that abstract is compatible --- unsafe exception for old?
--- if True oElem showOld opts then return () else
testErr (maybe True (a==) a') ("expected abstract" +++ P.prt a) testErr (maybe True (a==) a') ("expected abstract" +++ P.prt a)
return $ Just a return $ Just a
_ -> return a' _ -> return a'

View File

@@ -6,7 +6,25 @@ import Char
-- elementary text postprocessing. AR 21/11/2001 -- elementary text postprocessing. AR 21/11/2001
-- This is very primitive indeed. The functions should work on -- This is very primitive indeed. The functions should work on
-- token lists and not on strings. AR 5/12/2002 -- token lists and not on strings. AR 5/12/2002
-- XML hack 14/8/2004; not in use yet
-- does not apply untokenizer within XML tags --- heuristic "< "
-- this function is applied from top level...
untokWithXML :: (String -> String) -> String -> String
untokWithXML unt s = case s of
'<':cs@(c:_) | isAlpha c -> '<':beg ++ ">" ++ unto (drop 1 rest) where
(beg,rest) = span (/='>') cs
'<':cs -> '<':unto cs ---
[] -> []
_ -> unt beg ++ unto rest where
(beg,rest) = span (/='<') s
where
unto = untokWithXML unt
-- ... whereas this one is embedded on a branch
exceptXML :: (String -> String) -> String -> String
exceptXML unt s = '<':beg ++ ">" ++ unt (drop 1 rest) where
(beg,rest) = span (/='>') s
formatAsTextLit :: String -> String formatAsTextLit :: String -> String
formatAsTextLit = formatAsText . unwords . map unStringLit . words formatAsTextLit = formatAsText . unwords . map unStringLit . words
@@ -62,3 +80,13 @@ unStringLit s = case s of
_ -> s _ -> s
where where
strlim = (=='\'') strlim = (=='\'')
concatRemSpace :: String -> String
concatRemSpace = concat . words
{-
concatRemSpace s = case s of
'<':cs -> exceptXML concatRemSpace cs
c : cs | isSpace c -> concatRemSpace cs
c :cs -> c : concatRemSpace cs
_ -> s
-}

View File

@@ -301,7 +301,7 @@ customUntokenizer =
,(strCI "code", const $ formatAsCode) ,(strCI "code", const $ formatAsCode)
,(strCI "textlit", const $ formatAsTextLit) ,(strCI "textlit", const $ formatAsTextLit)
,(strCI "codelit", const $ formatAsCodeLit) ,(strCI "codelit", const $ formatAsCodeLit)
,(strCI "concat", const $ concat . words) ,(strCI "concat", const $ concatRemSpace)
,(strCI "glue", const $ performBinds) ,(strCI "glue", const $ performBinds)
,(strCI "reverse", const $ reverse) ,(strCI "reverse", const $ reverse)
,(strCI "bind", const $ performBinds) -- backward compat ,(strCI "bind", const $ performBinds) -- backward compat