forked from GitHub/gf-core
138 lines
4.6 KiB
Haskell
138 lines
4.6 KiB
Haskell
----------------------------------------------------------------------
|
|
-- |
|
|
-- Module : EmbedParsing
|
|
-- Maintainer : AR
|
|
-- Stability : (stable)
|
|
-- Portability : (portable)
|
|
--
|
|
-- > CVS $Date:
|
|
-- > CVS $Author:
|
|
-- > CVS $Revision:
|
|
--
|
|
-- just one parse method, for use in embedded GF systems
|
|
-----------------------------------------------------------------------------
|
|
|
|
module GF.Embed.EmbedParsing where
|
|
|
|
import GF.Infra.CheckM
|
|
import qualified GF.Canon.AbsGFC as C
|
|
import GF.Canon.GFC
|
|
import GF.Canon.MkGFC (trExp) ----
|
|
import GF.Canon.CMacros
|
|
import GF.Grammar.MMacros (refreshMetas)
|
|
import GF.UseGrammar.Linear
|
|
import GF.Data.Str
|
|
import GF.CF.CF
|
|
import GF.CF.CFIdent
|
|
import GF.Infra.Ident
|
|
import GF.Grammar.TypeCheck
|
|
import GF.Grammar.Values
|
|
import GF.UseGrammar.Tokenize
|
|
import GF.CF.Profile
|
|
import GF.Infra.Option
|
|
import GF.Compile.ShellState
|
|
import GF.Embed.EmbedCustom
|
|
import GF.CF.PPrCF (prCFTree)
|
|
|
|
import qualified GF.OldParsing.ParseCF as PCFOld -- OBSOLETE
|
|
|
|
|
|
-- import qualified GF.Parsing.GFC as New
|
|
|
|
import GF.Data.Operations
|
|
|
|
import Data.List (nub)
|
|
import Control.Monad (liftM)
|
|
|
|
-- AR 26/1/2000 -- 8/4 -- 28/1/2001 -- 9/12/2002
|
|
|
|
parseString :: Options -> StateGrammar -> CFCat -> String -> Err [Tree]
|
|
parseString os sg cat = liftM fst . parseStringMsg os sg cat
|
|
|
|
parseStringMsg :: Options -> StateGrammar -> CFCat -> String -> Err ([Tree],String)
|
|
parseStringMsg os sg cat s = do
|
|
(ts,(_,ss)) <- checkStart $ parseStringC os sg cat s
|
|
return (ts,unlines ss)
|
|
|
|
parseStringC :: Options -> StateGrammar -> CFCat -> String -> Check [Tree]
|
|
parseStringC opts0 sg cat s
|
|
|
|
| otherwise = do
|
|
let opts = unionOptions opts0 $ stateOptions sg
|
|
cf = stateCF sg
|
|
gr = stateGrammarST sg
|
|
cn = cncId sg
|
|
tok = customOrDefault opts useTokenizer customTokenizer sg
|
|
parser = PCFOld.parse "ibn" (stateCF sg) cat -- customOrDefault opts useParser customParser sg cat
|
|
tokens2trms opts sg cn parser (tok s)
|
|
|
|
tokens2trms :: Options ->StateGrammar ->Ident -> CFParser -> [CFTok] -> Check [Tree]
|
|
tokens2trms opts sg cn parser toks = trees2trms opts sg cn toks trees info
|
|
where result = parser toks
|
|
info = snd result
|
|
trees = {- nub $ -} cfParseResults result -- peb 25/5-04: removed nub (O(n^2))
|
|
|
|
trees2trms :: Options -> StateGrammar -> Ident -> [CFTok] -> [CFTree] -> String -> Check [Tree]
|
|
trees2trms opts sg cn as ts0 info = do
|
|
ts <- case () of
|
|
_ | null ts0 -> checkWarn "No success in cf parsing" >> return []
|
|
_ | raw -> do
|
|
ts1 <- return (map cf2trm0 ts0) ----- should not need annot
|
|
checks [
|
|
mapM (checkErr . (annotate gr) . trExp) ts1 ---- complicated, often fails
|
|
,checkWarn (unlines ("Raw CF trees:":(map prCFTree ts0))) >> return []
|
|
]
|
|
_ -> do
|
|
let num = optIntOrN opts flagRawtrees 999999
|
|
let (ts01,rest) = splitAt num ts0
|
|
if null rest then return ()
|
|
else checkWarn ("Warning: only" +++ show num +++ "raw parses out of" +++
|
|
show (length ts0) +++
|
|
"considered; use -rawtrees=<Int> to see more"
|
|
)
|
|
(ts1,ss) <- checkErr $ mapErrN 1 postParse ts01
|
|
if null ts1 then raise ss else return ()
|
|
ts2 <- mapM (checkErr . annotate gr . refreshMetas [] . trExp) ts1 ----
|
|
if forgive then return ts2 else do
|
|
let tsss = [(t, allLinsOfTree gr cn t) | t <- ts2]
|
|
ps = [t | (t,ss) <- tsss,
|
|
any (compatToks as) (map str2cftoks ss)]
|
|
if null ps
|
|
then raise $ "Failure in morphology." ++
|
|
if verb
|
|
then "\nPossible corrections: " +++++
|
|
unlines (nub (map sstr (concatMap snd tsss)))
|
|
else ""
|
|
else return ps
|
|
|
|
if verb
|
|
then checkWarn $ " the token list" +++ show as ++++ unknown as +++++ info
|
|
else return ()
|
|
|
|
return $ optIntOrAll opts flagNumber $ nub ts
|
|
where
|
|
gr = stateGrammarST sg
|
|
|
|
raw = oElem rawParse opts
|
|
verb = oElem beVerbose opts
|
|
forgive = oElem forgiveParse opts
|
|
|
|
unknown ts = case filter noMatch [t | t@(TS _) <- ts] of
|
|
[] -> "where all words are known"
|
|
us -> "with the unknown tokens" +++ show us --- needs to be fixed for literals
|
|
terminals = map TS $ stateGrammarWords sg
|
|
noMatch t = all (not . compatTok t) terminals
|
|
|
|
|
|
--- too much type checking in building term info? return FullTerm to save work?
|
|
|
|
-- | raw parsing: so simple it is for a context-free CF grammar
|
|
cf2trm0 :: CFTree -> C.Exp
|
|
cf2trm0 (CFTree (fun, (_, trees))) = mkAppAtom (cffun2trm fun) (map cf2trm0 trees)
|
|
where
|
|
cffun2trm (CFFun (fun,_)) = fun
|
|
mkApp = foldl C.EApp
|
|
mkAppAtom a = mkApp (C.EAtom a)
|
|
|
|
|