1
0
forked from GitHub/gf-core
Files
gf-core/src/GF/Text/Text.hs
2004-08-15 21:02:10 +00:00

92 lines
3.1 KiB
Haskell

module Text where
import Operations
import Char
-- elementary text postprocessing. AR 21/11/2001
-- This is very primitive indeed. The functions should work on
-- token lists and not on strings. AR 5/12/2002
-- XML hack 14/8/2004; not in use yet
-- does not apply untokenizer within XML tags --- heuristic "< "
-- this function is applied from top level...
untokWithXML :: (String -> String) -> String -> String
untokWithXML unt s = case s of
'<':cs@(c:_) | isAlpha c -> '<':beg ++ ">" ++ unto (drop 1 rest) where
(beg,rest) = span (/='>') cs
'<':cs -> '<':unto cs ---
[] -> []
_ -> unt beg ++ unto rest where
(beg,rest) = span (/='<') s
where
unto = untokWithXML unt
-- ... whereas this one is embedded on a branch
exceptXML :: (String -> String) -> String -> String
exceptXML unt s = '<':beg ++ ">" ++ unt (drop 1 rest) where
(beg,rest) = span (/='>') s
formatAsTextLit :: String -> String
formatAsTextLit = formatAsText . unwords . map unStringLit . words
--- hope that there will be deforestation...
formatAsCodeLit :: String -> String
formatAsCodeLit = formatAsCode . unwords . map unStringLit . words
formatAsText :: String -> String
formatAsText = unwords . format . cap . words where
format ws = case ws of
w : c : ww | major c -> (w ++ c) : format (cap ww)
w : c : ww | minor c -> (w ++ c) : format ww
c : ww | para c -> "\n\n" : format ww
w : ww -> w : format ww
[] -> []
cap (p:(c:cs):ww) | para p = p : (toUpper c : cs) : ww
cap ((c:cs):ww) = (toUpper c : cs) : ww
cap [] = []
major = flip elem (map singleton ".!?")
minor = flip elem (map singleton ",:;")
para = (=="<p>")
formatAsCode :: String -> String
formatAsCode = rend 0 . words where
-- render from BNF Converter
rend i ss = case ss of
"[" :ts -> cons "[" $ rend i ts
"(" :ts -> cons "(" $ rend i ts
"{" :ts -> cons "{" $ new (i+1) $ rend (i+1) ts
"}" : ";":ts -> new (i-1) $ space "}" $ cons ";" $ new (i-1) $ rend (i-1) ts
"}" :ts -> new (i-1) $ cons "}" $ new (i-1) $ rend (i-1) ts
";" :ts -> cons ";" $ new i $ rend i ts
t : "," :ts -> cons t $ space "," $ rend i ts
t : ")" :ts -> cons t $ cons ")" $ rend i ts
t : "]" :ts -> cons t $ cons "]" $ rend i ts
t :ts -> space t $ rend i ts
_ -> ""
cons s t = s ++ t
new i s = '\n' : replicate (2*i) ' ' ++ dropWhile isSpace s
space t s = if null s then t else t ++ " " ++ s
performBinds :: String -> String
performBinds = unwords . format . words where
format ws = case ws of
w : "&+" : u : ws -> format ((w ++ u) : ws)
w : ws -> w : format ws
[] -> []
unStringLit :: String -> String
unStringLit s = case s of
c : cs | strlim c && strlim (last cs) -> init cs
_ -> s
where
strlim = (=='\'')
concatRemSpace :: String -> String
concatRemSpace = concat . words
{-
concatRemSpace s = case s of
'<':cs -> exceptXML concatRemSpace cs
c : cs | isSpace c -> concatRemSpace cs
c :cs -> c : concatRemSpace cs
_ -> s
-}