mirror of
https://github.com/GrammaticalFramework/gf-core.git
synced 2026-05-11 12:12:51 -06:00
capitals in lextext and unlextext; notice that a sentence starting with a proper name now gets lexed with a small letter if lextext is used
This commit is contained in:
@@ -17,7 +17,7 @@ stringOp name = case name of
|
|||||||
"lexmixed" -> Just $ appLexer lexMixed
|
"lexmixed" -> Just $ appLexer lexMixed
|
||||||
"words" -> Just $ appLexer words
|
"words" -> Just $ appLexer words
|
||||||
"bind" -> Just $ appUnlexer bindTok
|
"bind" -> Just $ appUnlexer bindTok
|
||||||
"uncars" -> Just $ appUnlexer concat
|
"unchars" -> Just $ appUnlexer concat
|
||||||
"unlextext" -> Just $ appUnlexer unlexText
|
"unlextext" -> Just $ appUnlexer unlexText
|
||||||
"unlexcode" -> Just $ appUnlexer unlexCode
|
"unlexcode" -> Just $ appUnlexer unlexCode
|
||||||
"unlexmixed" -> Just $ appUnlexer unlexMixed
|
"unlexmixed" -> Just $ appUnlexer unlexMixed
|
||||||
@@ -40,11 +40,16 @@ wrapHTML = unlines . tag . intersperse "<br>" . lines where
|
|||||||
tag ss = "<html>":"<head>":"<meta http-equiv=\"Content-Type\" content=\"text/html; charset=UTF-8\" />":"</head>":"<body>" : ss ++ ["</body>","</html>"]
|
tag ss = "<html>":"<head>":"<meta http-equiv=\"Content-Type\" content=\"text/html; charset=UTF-8\" />":"</head>":"<body>" : ss ++ ["</body>","</html>"]
|
||||||
|
|
||||||
lexText :: String -> [String]
|
lexText :: String -> [String]
|
||||||
lexText s = case s of
|
lexText = uncap . lext where
|
||||||
c:cs | isPunct c -> [c] : lexText cs
|
lext s = case s of
|
||||||
c:cs | isSpace c -> lexText cs
|
c:cs | isMajorPunct c -> [c] : uncap (lext cs)
|
||||||
_:_ -> let (w,cs) = break (\x -> isSpace x || isPunct x) s in w : lexText cs
|
c:cs | isMinorPunct c -> [c] : lext cs
|
||||||
|
c:cs | isSpace c -> lext cs
|
||||||
|
_:_ -> let (w,cs) = break (\x -> isSpace x || isPunct x) s in w : lext cs
|
||||||
_ -> [s]
|
_ -> [s]
|
||||||
|
uncap s = case s of
|
||||||
|
(c:cs):ws -> (toLower c : cs):ws
|
||||||
|
_ -> s
|
||||||
|
|
||||||
-- | Haskell lexer, usable for much code
|
-- | Haskell lexer, usable for much code
|
||||||
lexCode :: String -> [String]
|
lexCode :: String -> [String]
|
||||||
@@ -70,12 +75,17 @@ bindTok ws = case ws of
|
|||||||
[] -> ""
|
[] -> ""
|
||||||
|
|
||||||
unlexText :: [String] -> String
|
unlexText :: [String] -> String
|
||||||
unlexText s = case s of
|
unlexText = cap . unlext where
|
||||||
|
unlext s = case s of
|
||||||
w:[] -> w
|
w:[] -> w
|
||||||
w:[c]:[] | isPunct c -> w ++ [c]
|
w:[c]:[] | isPunct c -> w ++ [c]
|
||||||
w:[c]:cs | isPunct c -> w ++ [c] ++ " " ++ unlexText cs
|
w:[c]:cs | isMajorPunct c -> w ++ [c] ++ " " ++ cap (unlext cs)
|
||||||
w:ws -> w ++ " " ++ unlexText ws
|
w:[c]:cs | isMinorPunct c -> w ++ [c] ++ " " ++ unlext cs
|
||||||
|
w:ws -> w ++ " " ++ unlext ws
|
||||||
_ -> []
|
_ -> []
|
||||||
|
cap s = case s of
|
||||||
|
c:cs -> toUpper c : cs
|
||||||
|
_ -> s
|
||||||
|
|
||||||
unlexCode :: [String] -> String
|
unlexCode :: [String] -> String
|
||||||
unlexCode s = case s of
|
unlexCode s = case s of
|
||||||
@@ -97,5 +107,7 @@ unlexMixed = concat . alternate False where
|
|||||||
sep env c = if env then c ++ " " else " " ++ c
|
sep env c = if env then c ++ " " else " " ++ c
|
||||||
|
|
||||||
isPunct = flip elem ".?!,:;"
|
isPunct = flip elem ".?!,:;"
|
||||||
|
isMajorPunct = flip elem ".?!"
|
||||||
|
isMinorPunct = flip elem ",:;"
|
||||||
isParen = flip elem "()[]{}"
|
isParen = flip elem "()[]{}"
|
||||||
isClosing = flip elem ")]}"
|
isClosing = flip elem ")]}"
|
||||||
|
|||||||
Reference in New Issue
Block a user