diff --git a/gf.cabal b/gf.cabal
index 3e823f26e..625865b1b 100644
--- a/gf.cabal
+++ b/gf.cabal
@@ -99,6 +99,7 @@ Library
PGF.Optimize
PGF.Printer
PGF.Utilities
+ PGF.Lexing
other-modules:
PGF.CId
PGF.Expr
diff --git a/src/compiler/GF/Text/Lexing.hs b/src/compiler/GF/Text/Lexing.hs
index 87d6ba4f7..29647a786 100644
--- a/src/compiler/GF/Text/Lexing.hs
+++ b/src/compiler/GF/Text/Lexing.hs
@@ -1,12 +1,12 @@
+-- | Lexers and unlexers - they work on space-separated word strings
module GF.Text.Lexing (stringOp,opInEnv) where
import GF.Text.Transliterations
+import PGF.Lexing
-import Data.Char
+import Data.Char (isSpace)
import Data.List (intersperse)
--- lexers and unlexers - they work on space-separated word strings
-
stringOp :: String -> Maybe (String -> String)
stringOp name = case name of
"chars" -> Just $ appLexer (filter (not . all isSpace) . map return)
@@ -51,84 +51,3 @@ appUnlexer f = f . words
wrapHTML :: String -> String
wrapHTML = unlines . tag . intersperse "
" . lines where
tag ss = "":"
":"":"":"" : ss ++ ["",""]
-
-lexText :: String -> [String]
-lexText = uncap . lext where
- lext s = case s of
- c:cs | isMajorPunct c -> [c] : uncap (lext cs)
- c:cs | isMinorPunct c -> [c] : lext cs
- c:cs | isSpace c -> lext cs
- _:_ -> let (w,cs) = break (\x -> isSpace x || isPunct x) s in w : lext cs
- _ -> [s]
- uncap s = case s of
- (c:cs):ws -> (toLower c : cs):ws
- _ -> s
-
--- | Haskell lexer, usable for much code
-lexCode :: String -> [String]
-lexCode ss = case lex ss of
- [(w@(_:_),ws)] -> w : lexCode ws
- _ -> []
-
--- | LaTeX style lexer, with "math" environment using Code between $...$
-lexMixed :: String -> [String]
-lexMixed = concat . alternate False where
- alternate env s = case s of
- _:_ -> case break (=='$') s of
- (t,[]) -> lex env t : []
- (t,c:m) -> lex env t : [[c]] : alternate (not env) m
- _ -> []
- lex env = if env then lexCode else lexText
-
-bindTok :: [String] -> String
-bindTok ws = case ws of
- w:"&+":ws2 -> w ++ bindTok ws2
- w:[] -> w
- w:ws2 -> w ++ " " ++ bindTok ws2
- [] -> ""
-
-unlexText :: [String] -> String
-unlexText = unlext where
- unlext s = case s of
- w:[] -> w
- w:[c]:[] | isPunct c -> w ++ [c]
- w:[c]:cs | isMajorPunct c -> w ++ [c] ++ " " ++ capitInit (unlext cs)
- w:[c]:cs | isMinorPunct c -> w ++ [c] ++ " " ++ unlext cs
- w:ws -> w ++ " " ++ unlext ws
- _ -> []
-
--- capitalize first letter
-capitInit s = case s of
- c:cs -> toUpper c : cs
- _ -> s
-
--- unquote each string of form "foo"
-unquote = map unq where
- unq s = case s of
- '"':cs@(_:_) | last cs == '"' -> init cs
- _ -> s
-
-unlexCode :: [String] -> String
-unlexCode s = case s of
- w:[] -> w
- [c]:cs | isParen c -> [c] ++ unlexCode cs
- w:cs@([c]:_) | isClosing c -> w ++ unlexCode cs
- w:ws -> w ++ " " ++ unlexCode ws
- _ -> []
-
-
-unlexMixed :: [String] -> String
-unlexMixed = concat . alternate False where
- alternate env s = case s of
- _:_ -> case break (=="$") s of
- (t,[]) -> unlex env t : []
- (t,c:m) -> unlex env t : sep env c : alternate (not env) m
- _ -> []
- unlex env = if env then unlexCode else unlexText
- sep env c = if env then c ++ " " else " " ++ c
-
-isPunct = flip elem ".?!,:;"
-isMajorPunct = flip elem ".?!"
-isMinorPunct = flip elem ",:;"
-isParen = flip elem "()[]{}"
-isClosing = flip elem ")]}"
diff --git a/src/runtime/haskell/PGF/Lexing.hs b/src/runtime/haskell/PGF/Lexing.hs
new file mode 100644
index 000000000..808a2af6f
--- /dev/null
+++ b/src/runtime/haskell/PGF/Lexing.hs
@@ -0,0 +1,91 @@
+module PGF.Lexing where
+import Data.Char(isSpace,toLower,toUpper)
+
+-- * Text lexing
+lexText :: String -> [String]
+lexText = uncap . lext where
+ lext s = case s of
+ c:cs | isMajorPunct c -> [c] : uncap (lext cs)
+ c:cs | isMinorPunct c -> [c] : lext cs
+ c:cs | isSpace c -> lext cs
+ _:_ -> let (w,cs) = break (\x -> isSpace x || isPunct x) s in w : lext cs
+ _ -> [s]
+ uncap s = case s of
+ (c:cs):ws -> (toLower c : cs):ws
+ _ -> s
+
+unlexText :: [String] -> String
+unlexText = unlext where
+ unlext s = case s of
+ w:[] -> w
+ w:[c]:[] | isPunct c -> w ++ [c]
+ w:[c]:cs | isMajorPunct c -> w ++ [c] ++ " " ++ capitInit (unlext cs)
+ w:[c]:cs | isMinorPunct c -> w ++ [c] ++ " " ++ unlext cs
+ w:ws -> w ++ " " ++ unlext ws
+ _ -> []
+
+-- | Bind tokens separated by Prelude.BIND, i.e. &+
+bindTok :: [String] -> String
+bindTok ws = case ws of
+ w:"&+":ws2 -> w ++ bindTok ws2
+ w:[] -> w
+ w:ws2 -> w ++ " " ++ bindTok ws2
+ [] -> ""
+
+-- * Code lexing
+
+-- | Haskell lexer, usable for much code
+lexCode :: String -> [String]
+lexCode ss = case lex ss of
+ [(w@(_:_),ws)] -> w : lexCode ws
+ _ -> []
+
+unlexCode :: [String] -> String
+unlexCode s = case s of
+ w:[] -> w
+ [c]:cs | isParen c -> [c] ++ unlexCode cs
+ w:cs@([c]:_) | isClosing c -> w ++ unlexCode cs
+ w:ws -> w ++ " " ++ unlexCode ws
+ _ -> []
+
+
+-- * Mixed lexing
+
+-- | LaTeX style lexer, with "math" environment using Code between $...$
+lexMixed :: String -> [String]
+lexMixed = concat . alternate False where
+ alternate env s = case s of
+ _:_ -> case break (=='$') s of
+ (t,[]) -> lex env t : []
+ (t,c:m) -> lex env t : [[c]] : alternate (not env) m
+ _ -> []
+ lex env = if env then lexCode else lexText
+
+unlexMixed :: [String] -> String
+unlexMixed = concat . alternate False where
+ alternate env s = case s of
+ _:_ -> case break (=="$") s of
+ (t,[]) -> unlex env t : []
+ (t,c:m) -> unlex env t : sep env c : alternate (not env) m
+ _ -> []
+ unlex env = if env then unlexCode else unlexText
+ sep env c = if env then c ++ " " else " " ++ c
+
+-- * Additional lexing uitilties
+
+-- | Capitalize first letter
+capitInit s = case s of
+ c:cs -> toUpper c : cs
+ _ -> s
+
+-- | Unquote each string wrapped in double quotes
+unquote = map unq where
+ unq s = case s of
+ '"':cs@(_:_) | last cs == '"' -> init cs
+ _ -> s
+
+isPunct = flip elem ".?!,:;"
+isMajorPunct = flip elem ".?!"
+isMinorPunct = flip elem ",:;"
+isParen = flip elem "()[]{}"
+isClosing = flip elem ")]}"