forked from GitHub/gf-core
Founding the newly structured GF2.0 cvs archive.
This commit is contained in:
98
src/GF/UseGrammar/Parsing.hs
Normal file
98
src/GF/UseGrammar/Parsing.hs
Normal file
@@ -0,0 +1,98 @@
|
||||
module Parsing where
|
||||
|
||||
import CheckM
|
||||
import qualified AbsGFC as C
|
||||
import GFC
|
||||
import MkGFC (trExp) ----
|
||||
import CMacros
|
||||
import Linear
|
||||
import Str
|
||||
import CF
|
||||
import CFIdent
|
||||
import Ident
|
||||
import TypeCheck
|
||||
import Values
|
||||
--import CFMethod
|
||||
import Tokenize
|
||||
import Profile
|
||||
import Option
|
||||
import Custom
|
||||
import ShellState
|
||||
|
||||
import Operations
|
||||
|
||||
import List (nub)
|
||||
import Monad (liftM)
|
||||
|
||||
-- AR 26/1/2000 -- 8/4 -- 28/1/2001 -- 9/12/2002
|
||||
|
||||
parseString :: Options -> StateGrammar -> CFCat -> String -> Err [Tree]
|
||||
parseString os sg cat = liftM fst . parseStringMsg os sg cat
|
||||
|
||||
parseStringMsg :: Options -> StateGrammar -> CFCat -> String -> Err ([Tree],String)
|
||||
parseStringMsg os sg cat s = do
|
||||
(ts,(_,ss)) <- checkStart $ parseStringC os sg cat s
|
||||
return (ts,unlines ss)
|
||||
|
||||
parseStringC :: Options -> StateGrammar -> CFCat -> String -> Check [Tree]
|
||||
parseStringC opts0 sg cat s = do
|
||||
let opts = unionOptions opts0 $ stateOptions sg
|
||||
cf = stateCF sg
|
||||
gr = stateGrammarST sg
|
||||
cn = cncId sg
|
||||
tok = customOrDefault opts useTokenizer customTokenizer sg
|
||||
parser = customOrDefault opts useParser customParser sg cat
|
||||
tokens2trms opts sg cn parser (tok s)
|
||||
|
||||
tokens2trms :: Options ->StateGrammar ->Ident -> CFParser -> [CFTok] -> Check [Tree]
|
||||
tokens2trms opts sg cn parser as = do
|
||||
let res@(trees,info) = parser as
|
||||
ts0 <- return $ nub (cfParseResults res)
|
||||
ts <- case () of
|
||||
_ | null ts0 -> checkWarn "No success in cf parsing" >> return []
|
||||
_ | raw -> do
|
||||
ts1 <- return (map cf2trm0 ts0) ----- should not need annot
|
||||
mapM (checkErr . (annotate gr) . trExp) ts1 ---- complicated
|
||||
_ -> do
|
||||
(ts1,_) <- checkErr $ mapErr postParse ts0
|
||||
ts2 <- mapM (checkErr . (annotate gr) . trExp) ts1 ----
|
||||
if forgive then return ts2 else do
|
||||
let tsss = [(t, allLinsOfTree gr cn t) | t <- ts2]
|
||||
ps = [t | (t,ss) <- tsss,
|
||||
any (compatToks as) (map str2cftoks ss)]
|
||||
if null ps
|
||||
then raise $ "Failure in morphology." ++
|
||||
if verb
|
||||
then "\nPossible corrections: " +++++
|
||||
unlines (nub (map sstr (concatMap snd tsss)))
|
||||
else ""
|
||||
else return ps
|
||||
|
||||
if verb
|
||||
then checkWarn $ " the token list" +++ show as ++++ unknown as +++++ info
|
||||
else return ()
|
||||
|
||||
return $ optIntOrAll opts flagNumber $ nub ts
|
||||
where
|
||||
gr = stateGrammarST sg
|
||||
|
||||
raw = oElem rawParse opts
|
||||
verb = oElem beVerbose opts
|
||||
forgive = oElem forgiveParse opts
|
||||
|
||||
unknown ts = case filter noMatch ts of
|
||||
[] -> "where all words are known"
|
||||
us -> "with the unknown tokens" +++ show us --- needs to be fixed for literals
|
||||
terminals = map TS $ cfTokens $ stateCF sg
|
||||
noMatch t = all (not . compatTok t) terminals
|
||||
|
||||
|
||||
--- too much type checking in building term info? return FullTerm to save work?
|
||||
|
||||
-- raw parsing: so simple it is for a context-free CF grammar
|
||||
cf2trm0 :: CFTree -> C.Exp
|
||||
cf2trm0 (CFTree (fun, (_, trees))) = mkAppAtom (cffun2trm fun) (map cf2trm0 trees)
|
||||
where
|
||||
cffun2trm (CFFun (fun,_)) = fun
|
||||
mkApp = foldl C.EApp
|
||||
mkAppAtom a = mkApp (C.EAtom a)
|
||||
Reference in New Issue
Block a user