forked from GitHub/gf-core
made -fcfg default parser; added lexer textvars
This commit is contained in:
@@ -450,6 +450,7 @@ customTokenizer =
|
||||
,(strCI "chars", const $ sg . map (tS . singleton))
|
||||
,(strCI "code", const $ sg . lexHaskell)
|
||||
,(strCI "codevars", \gr -> sg . (lexHaskellVar $ stateIsWord gr))
|
||||
,(strCI "textvars", \gr -> sg . (lexTextVar $ stateIsWord gr))
|
||||
,(strCI "text", const $ sg . lexText)
|
||||
,(strCI "unglue", \gr -> sg . map tS . decomposeWords (stateMorpho gr))
|
||||
,(strCI "codelit", \gr -> sg . (lexHaskellLiteral $ stateIsWord gr))
|
||||
|
||||
@@ -54,34 +54,9 @@ parseStringMsg os sg cat s = do
|
||||
return (ts, unlines $ reverse ss)
|
||||
|
||||
parseStringC :: Options -> StateGrammar -> CFCat -> String -> Check [Tree]
|
||||
|
||||
---- to test peb's new parser 6/10/2003
|
||||
---- (obsoleted by "newer" below)
|
||||
-- parseStringC opts0 sg cat s
|
||||
-- | oElem newParser opts0 = do
|
||||
-- let pm = maybe "" id $ getOptVal opts0 useParser -- -parser=pm
|
||||
-- ct = cfCat2Cat cat
|
||||
-- ts <- checkErr $ NewOld.newParser pm sg ct s
|
||||
-- mapM (checkErr . annotate (stateGrammarST sg) . refreshMetas []) ts
|
||||
|
||||
-- to use peb's newer parser 7/4-05
|
||||
parseStringC opts0 sg cat s
|
||||
| oElem newCParser opts0 || oElem newMParser opts0 || oElem newFParser opts0 || oElem newParser opts0 || oElem newerParser opts0 = do
|
||||
let opts = unionOptions opts0 $ stateOptions sg
|
||||
algorithm | oElem newCParser opts0 = "c"
|
||||
| oElem newMParser opts0 = "m"
|
||||
| oElem newFParser opts0 = "f"
|
||||
| otherwise = "c" -- default algorithm
|
||||
strategy = maybe "bottomup" id $ getOptVal opts useParser -- -parser=bottomup/topdown
|
||||
tokenizer = customOrDefault opts useTokenizer customTokenizer sg
|
||||
toks = case tokenizer s of
|
||||
t:_ -> t
|
||||
_ -> [] ---- no support for undet. tok.
|
||||
ts <- checkErr $ New.parse algorithm strategy (pInfo sg) (absId sg) cat toks
|
||||
ts' <- mapM (checkErr . annotate (stateGrammarST sg) . refreshMetas []) ts
|
||||
return $ optIntOrAll opts flagNumber ts'
|
||||
|
||||
parseStringC opts0 sg cat s = do
|
||||
parseStringC opts0 sg cat s
|
||||
| oElem (iOpt "old") opts0 ||
|
||||
(not (oElem (iOpt "fcfg") opts0) && stateHasHOAS sg) = do
|
||||
let opts = unionOptions opts0 $ stateOptions sg
|
||||
cf = stateCF sg
|
||||
gr = stateGrammarST sg
|
||||
@@ -92,6 +67,26 @@ parseStringC opts0 sg cat s = do
|
||||
then doUntil (not . null) $ map (tokens2trms opts sg cn parser) toks
|
||||
else mapM (tokens2trms opts sg cn parser) toks >>= return . concat
|
||||
|
||||
---- | or [oElem p opts0 |
|
||||
---- p <- [newCParser,newMParser,newFParser,newParser,newerParser] = do
|
||||
|
||||
| otherwise = do
|
||||
let opts = unionOptions opts0 $ stateOptions sg
|
||||
algorithm | oElem newCParser opts0 = "c"
|
||||
| oElem newMParser opts0 = "m"
|
||||
| oElem newFParser opts0 = "f"
|
||||
| otherwise = "f" -- default algorithm: FCFG
|
||||
strategy = maybe "bottomup" id $ getOptVal opts useParser
|
||||
-- -parser=bottomup/topdown
|
||||
tokenizer = customOrDefault opts useTokenizer customTokenizer sg
|
||||
toks = case tokenizer s of
|
||||
t:_ -> t
|
||||
_ -> [] ---- no support for undet. tok.
|
||||
ts <- checkErr $ New.parse algorithm strategy (pInfo sg) (absId sg) cat toks
|
||||
ts' <- mapM (checkErr . annotate (stateGrammarST sg) . refreshMetas []) ts
|
||||
return $ optIntOrAll opts flagNumber ts'
|
||||
|
||||
|
||||
tokens2trms :: Options ->StateGrammar ->Ident -> CFParser -> [CFTok] -> Check [Tree]
|
||||
tokens2trms opts sg cn parser toks = trees2trms opts sg cn toks trees info
|
||||
where result = parser toks
|
||||
|
||||
@@ -20,6 +20,7 @@ module GF.UseGrammar.Tokenize ( tokWords,
|
||||
lexHaskellLiteral,
|
||||
lexHaskellVar,
|
||||
lexText,
|
||||
lexTextVar,
|
||||
lexC2M, lexC2M',
|
||||
lexTextLiteral,
|
||||
lexIgnore,
|
||||
@@ -58,6 +59,10 @@ isFloat s = case s of
|
||||
'.':cs@(_:_) -> all isDigit cs
|
||||
_ -> False
|
||||
|
||||
isString s = case s of
|
||||
c:cs@(_:_) -> (c == '\'' && d == '\'') || (c == '"' && d == '"') where d = last cs
|
||||
_ -> False
|
||||
|
||||
|
||||
mkCFTok :: String -> CFTok
|
||||
mkCFTok s = case s of
|
||||
@@ -86,6 +91,7 @@ mkLit s
|
||||
| all isDigit s = tI s
|
||||
| otherwise = tL s
|
||||
|
||||
-- obsolete
|
||||
mkTL :: String -> CFTok
|
||||
mkTL s
|
||||
| isFloat s = tF s
|
||||
@@ -104,6 +110,7 @@ lexText :: String -> [CFTok]
|
||||
lexText = uncap . lx where
|
||||
|
||||
lx s = case s of
|
||||
'?':'?':cs -> tS "??" : lx cs
|
||||
p : cs | isMPunct p -> tS [p] : uncap (lx cs)
|
||||
p : cs | isPunct p -> tS [p] : lx cs
|
||||
s : cs | isSpace s -> lx cs
|
||||
@@ -177,7 +184,7 @@ unknown2string isKnown = map mkOne where
|
||||
| isFloat s = tF s
|
||||
| all isDigit s = tI s
|
||||
| otherwise = tL s
|
||||
mkOne t@(TC s) = if isKnown s then t else mkTL s
|
||||
mkOne t@(TC s) = if isKnown s then t else mkLit s
|
||||
mkOne t = t
|
||||
|
||||
unknown2var :: (String -> Bool) -> [CFTok] -> [CFTok]
|
||||
@@ -186,6 +193,7 @@ unknown2var isKnown = map mkOne where
|
||||
mkOne t@(TS s)
|
||||
| isKnown s = t
|
||||
| isFloat s = tF s
|
||||
| isString s = tL (init (tail s))
|
||||
| all isDigit s = tI s
|
||||
| otherwise = tV s
|
||||
mkOne t@(TC s) = if isKnown s then t else tV s
|
||||
@@ -197,6 +205,8 @@ lexTextLiteral isKnown = unknown2string (eitherUpper isKnown) . lexText
|
||||
lexHaskellLiteral isKnown = unknown2string isKnown . lexHaskell
|
||||
|
||||
lexHaskellVar isKnown = unknown2var isKnown . lexHaskell
|
||||
lexTextVar isKnown = unknown2var (eitherUpper isKnown) . lexText
|
||||
|
||||
|
||||
eitherUpper isKnown w@(c:cs) = isKnown (toLower c : cs) || isKnown (toUpper c : cs)
|
||||
eitherUpper isKnown w = isKnown w
|
||||
|
||||
Reference in New Issue
Block a user