mirror of
https://github.com/GrammaticalFramework/gf-core.git
synced 2026-04-09 04:59:31 -06:00
treebanks in shell state: i -treebank and lt
This commit is contained in:
@@ -238,14 +238,16 @@ compatCF (CFCat (CIQ _ c, l)) (CFCat (CIQ _ c', l')) = c==c' && l==l'
|
||||
-- instead of break
|
||||
wordsLits [] = []
|
||||
wordsLits (c:cs) | isSpace c = wordsLits (dropWhile isSpace cs)
|
||||
| c == '\'' || c == '"'
|
||||
| isQuote c
|
||||
= let (l,rs) = breaks (==c) cs
|
||||
rs' = drop 1 rs
|
||||
in ([c]++l++[c]):wordsLits rs'
|
||||
| otherwise = let (w,rs) = break isSpace cs
|
||||
| otherwise = let (w,rs) = break isSpaceQ cs
|
||||
in (c:w):wordsLits rs
|
||||
where
|
||||
breaks c cs = case break c cs of
|
||||
(l@(_:_),d:rs) | last l == '\\' ->
|
||||
let (r,ts) = breaks c rs in (l++[d]++r, ts)
|
||||
v -> v
|
||||
isQuote c = elem c "\"'"
|
||||
isSpaceQ c = isSpace c ---- || isQuote c
|
||||
|
||||
@@ -44,6 +44,7 @@ import qualified GF.Conversion.GFC as Cnv
|
||||
import qualified GF.Parsing.GFC as Prs
|
||||
|
||||
import Data.List (nub,nubBy)
|
||||
import qualified Data.Map as Map
|
||||
|
||||
-- AR 11/11/2001 -- 17/6/2003 (for modules) ---- unfinished
|
||||
|
||||
@@ -61,6 +62,7 @@ data ShellState = ShSt {
|
||||
-- (large, with parameters, no-so overgenerating)
|
||||
pInfos :: [(Ident, Prs.PInfo)], -- ^ parsing information (compiled mcfg&cfg grammars)
|
||||
morphos :: [(Ident,Morpho)], -- ^ morphologies
|
||||
treebanks :: [(Ident,Treebank)], -- ^ treebanks
|
||||
probss :: [(Ident,Probs)], -- ^ probability distributions
|
||||
gloptions :: Options, -- ^ global options
|
||||
readFiles :: [(FilePath,ModTime)],-- ^ files read
|
||||
@@ -73,6 +75,8 @@ data ShellState = ShSt {
|
||||
transfers :: [(Ident,T.Env)] -- ^ transfer modules
|
||||
}
|
||||
|
||||
type Treebank = Map.Map String [(String,String)] -- lang, tree
|
||||
|
||||
actualConcretes :: ShellState -> [((Ident,Ident),Bool)]
|
||||
actualConcretes sh = nub [((c,c),b) |
|
||||
Just a <- [abstract sh],
|
||||
@@ -102,6 +106,7 @@ emptyShellState = ShSt {
|
||||
cfgs = [],
|
||||
pInfos = [],
|
||||
morphos = [],
|
||||
treebanks = [],
|
||||
probss = [],
|
||||
gloptions = noOptions,
|
||||
readFiles = [],
|
||||
@@ -249,6 +254,7 @@ updateShellState opts ign mcnc sh ((_,sgr,gr),rts) = do
|
||||
cfgs = zip concrs cfgs,
|
||||
pInfos = zip concrs pInfos,
|
||||
morphos = zip concrs morphos,
|
||||
treebanks = treebanks sh,
|
||||
probss = zip concrs probss,
|
||||
gloptions = gloptions sh, --- opts, -- this would be command-line options
|
||||
readFiles = [ft | ft@(f,_) <- readFiles sh, notInrts f] ++ rts,
|
||||
@@ -298,6 +304,7 @@ purgeShellState sh = ShSt {
|
||||
cfgs = cfgs sh,
|
||||
pInfos = pInfos sh,
|
||||
morphos = morphos sh,
|
||||
treebanks = treebanks sh,
|
||||
probss = probss sh,
|
||||
gloptions = gloptions sh,
|
||||
readFiles = [],
|
||||
@@ -314,17 +321,17 @@ purgeShellState sh = ShSt {
|
||||
acncs = maybe [] singleton abstr ++ map (snd . fst) (actualConcretes sh)
|
||||
|
||||
changeMain :: Maybe Ident -> ShellState -> Err ShellState
|
||||
changeMain Nothing (ShSt _ _ cs ms ss cfs old_pis mcfgs cfgs pinfos mos pbs os rs acs s trs) =
|
||||
return (ShSt Nothing Nothing [] ms ss cfs old_pis mcfgs cfgs pinfos mos pbs os rs acs s trs)
|
||||
changeMain Nothing (ShSt _ _ cs ms ss cfs old_pis mcfgs cfgs pinfos mos tbs pbs os rs acs s trs) =
|
||||
return (ShSt Nothing Nothing [] ms ss cfs old_pis mcfgs cfgs pinfos mos tbs pbs os rs acs s trs)
|
||||
changeMain
|
||||
(Just c) st@(ShSt _ _ cs ms ss cfs old_pis mcfgs cfgs pinfos mos pbs os rs acs s trs) =
|
||||
(Just c) st@(ShSt _ _ cs ms ss cfs old_pis mcfgs cfgs pinfos mos tbs pbs os rs acs s trs) =
|
||||
case lookup c (M.modules ms) of
|
||||
Just _ -> do
|
||||
a <- M.abstractOfConcrete ms c
|
||||
let cas = M.allConcretes ms a
|
||||
let cs' = [((c,c),True) | c <- cas]
|
||||
return (ShSt (Just a) (Just c) cs' ms ss cfs old_pis mcfgs cfgs
|
||||
pinfos mos pbs os rs acs s trs)
|
||||
pinfos mos tbs pbs os rs acs s trs)
|
||||
_ -> P.prtBad "The state has no concrete syntax named" c
|
||||
|
||||
-- | form just one state grammar, if unique, from a canonical grammar
|
||||
@@ -472,6 +479,13 @@ addTransfer :: (Ident,T.Env) -> ShellState -> ShellState
|
||||
addTransfer it@(i,_) sh =
|
||||
sh {transfers = it : filter ((/= i) . fst) (transfers sh)}
|
||||
|
||||
addTreebank :: (Ident,Treebank) -> ShellState -> ShellState
|
||||
addTreebank it@(i,_) sh =
|
||||
sh {treebanks = it : filter ((/= i) . fst) (treebanks sh)}
|
||||
|
||||
findTreebank :: ShellState -> Ident -> Err Treebank
|
||||
findTreebank sh i = maybeErr "no treebank found" $ lookup i $ treebanks sh
|
||||
|
||||
-- modify state
|
||||
|
||||
type ShellStateOper = ShellState -> ShellState
|
||||
@@ -496,8 +510,8 @@ changeOptions f sh = sh {gloptions = f (gloptions sh)}
|
||||
changeModTimes :: [(FilePath,ModTime)] -> ShellStateOper
|
||||
--- __________ this is OBSOLETE
|
||||
changeModTimes mfs
|
||||
(ShSt a c cs can src cfs old_pinfos mcfgs cfgs pinfos ms pbs os ff ts ss trs) =
|
||||
ShSt a c cs can src cfs old_pinfos mcfgs cfgs pinfos ms pbs os ff' ts ss trs
|
||||
(ShSt a c cs can src cfs old_pinfos mcfgs cfgs pinfos ms tbs pbs os ff ts ss trs) =
|
||||
ShSt a c cs can src cfs old_pinfos mcfgs cfgs pinfos ms tbs pbs os ff' ts ss trs
|
||||
where
|
||||
ff' = mfs ++ [mf | mf@(f,_) <- ff, notElem f (map fst mfs)]
|
||||
|
||||
|
||||
@@ -181,6 +181,10 @@ execLine put (c@(co, os), arg, cs) (outps,st) = do
|
||||
execC :: CommandOpt -> ShellIO
|
||||
execC co@(comm, opts0) sa@(sh@(st,(h,_,_,_)),a) = checkOptions st co >> case comm of
|
||||
|
||||
CImport file | oElem (iOpt "treebank") opts -> do
|
||||
ss <- readFileIf file >>= return . lines
|
||||
let tb = pre2treebank $ getTreebank ss
|
||||
changeState (addTreebank (I.identC (takeWhile (/='.') file), tb)) sa
|
||||
CImport file | oElem fromExamples opts -> do
|
||||
es <- liftM nub $ getGFEFiles opts file
|
||||
system $ "gf -examples" +++ unlines es
|
||||
@@ -303,6 +307,15 @@ execC co@(comm, opts0) sa@(sh@(st,(h,_,_,_)),a) = checkOptions st co >> case com
|
||||
comm = "command" ----
|
||||
returnArg (AString $ unlines $ mkTreebank opts st comm ts) sa
|
||||
|
||||
CLookupTreebank -> do
|
||||
case treebanks st of
|
||||
[] -> returnArg (AError "no treebank") sa
|
||||
(_,tb):_ -> do
|
||||
let s = prCommandArg a
|
||||
let tes = map (string2treeErr gro . snd) $ lookupTreebank tb s
|
||||
terms = [t | Ok t <- tes]
|
||||
returnArg (ATrms terms) sa
|
||||
|
||||
CShowTreeGraph | oElem emitCode opts -> do -- -o
|
||||
returnArg (AString $ visualizeTrees opts $ strees $ s2t a) sa
|
||||
CShowTreeGraph -> do
|
||||
|
||||
@@ -66,6 +66,7 @@ txtHelpFile =
|
||||
"\n -o do emit code (default with new grammar format)" ++
|
||||
"\n -ex preprocess .gfe files if needed" ++
|
||||
"\n -prob read probabilities from top grammar file (format --# prob Fun Double)" ++
|
||||
"\n -treebank read a treebank file to memory (xml format)" ++
|
||||
"\n flags:" ++
|
||||
"\n -abs set the name used for abstract syntax (with -old option)" ++
|
||||
"\n -cnc set the name used for concrete syntax (with -old option)" ++
|
||||
@@ -238,11 +239,18 @@ txtHelpFile =
|
||||
"\n -c compare to existing xml-formatted treebank" ++
|
||||
"\n -trees return the trees of the treebank" ++
|
||||
"\n -xml wrap the treebank (or comparison results) with XML tags" ++
|
||||
"\n -mem write the treebank in memory instead of a file TODO" ++
|
||||
"\n examples:" ++
|
||||
"\n gr -cat=S -number=100 | tb -xml | wf tb.xml -- random treebank into file" ++
|
||||
"\n rf tb.xml | tb -c -- compare-test treebank from file" ++
|
||||
"\n rf old.xml | tb -trees | tb -xml -- create new treebank from old" ++
|
||||
"\n" ++
|
||||
"\nlt, lookup_treebank: lt String" ++
|
||||
"\n Lookup a string in a treebank and return the resulting trees." ++
|
||||
"\n Use 'tb' to create a treebank and 'i -treebank' to read it in memory." ++
|
||||
"\n flag:" ++
|
||||
"\n -treebank use this treebank (instead of the latest introduced one) TODO" ++
|
||||
"\n" ++
|
||||
"\ntt, test_tokenizer: tt String" ++
|
||||
"\n Show the token list sent to the parser when String is parsed." ++
|
||||
"\n HINT: can be useful when debugging the parser." ++
|
||||
|
||||
@@ -114,6 +114,7 @@ pCommand ws = case ws of
|
||||
"cc" : s -> aUnit $ CComputeConcrete $ unwords s
|
||||
"so" : s -> aUnit $ CShowOpers $ unwords s
|
||||
"tb" : [] -> aUnit CTreeBank
|
||||
"lt" : s -> aString CLookupTreebank s
|
||||
|
||||
"tq" : i:o:[] -> aUnit (CTranslationQuiz (language i) (language o))
|
||||
"tl":i:o:[] -> aUnit (CTranslationList (language i) (language o))
|
||||
|
||||
@@ -57,6 +57,8 @@ data Command =
|
||||
| CComputeConcrete String
|
||||
| CShowOpers String
|
||||
|
||||
| CLookupTreebank
|
||||
|
||||
| CTranslationQuiz Language Language
|
||||
| CTranslationList Language Language
|
||||
| CMorphoQuiz
|
||||
@@ -169,8 +171,9 @@ optionsOfCommand co = case co of
|
||||
CSetFlag -> both "utf8 table struct record all multi"
|
||||
"cat lang lexer parser number depth rawtrees unlexer optimize path conversion printer"
|
||||
|
||||
CImport _ -> both "old v s src gfc retain nocf nocheckcirc cflexer noemit o make ex prob"
|
||||
"abs cnc res path optimize conversion cat preproc probs noparse"
|
||||
CImport _ ->
|
||||
both "old v s src gfc retain nocf nocheckcirc cflexer noemit o make ex prob treebank"
|
||||
"abs cnc res path optimize conversion cat preproc probs noparse"
|
||||
CRemoveLanguage _ -> none
|
||||
CEmptyState -> none
|
||||
CStripState -> none
|
||||
|
||||
@@ -12,7 +12,14 @@
|
||||
-- Purpose: to generate treebanks.
|
||||
-----------------------------------------------------------------------------
|
||||
|
||||
module GF.UseGrammar.Treebank (mkTreebank,testTreebank,treesTreebank) where
|
||||
module GF.UseGrammar.Treebank (
|
||||
mkTreebank,
|
||||
testTreebank,
|
||||
treesTreebank,
|
||||
getTreebank,
|
||||
lookupTreebank,
|
||||
pre2treebank
|
||||
) where
|
||||
|
||||
import GF.Compile.ShellState
|
||||
import GF.UseGrammar.Linear (linTree2string)
|
||||
@@ -28,6 +35,7 @@ import GF.Grammar.Values (tree2exp)
|
||||
import GF.Data.Operations
|
||||
import GF.Infra.Option
|
||||
import qualified GF.Grammar.Abstract as A
|
||||
import qualified Data.Map as M
|
||||
|
||||
-- Generate a treebank with a multilingual grammar. AR 8/2/2006
|
||||
-- (c) Aarne Ranta 2006 under GNU GPL
|
||||
@@ -74,11 +82,13 @@ puts = return -- putStrLn
|
||||
ret = [] -- return ()
|
||||
--
|
||||
|
||||
type PreTreebank = [(String,[(String,String)])]
|
||||
|
||||
getTreebanks :: [String] -> [(String,String,String)]
|
||||
getTreebanks = concatMap grps . getTreebank where
|
||||
grps (t,lls) = [(t,x,y) | (x,y) <- lls]
|
||||
|
||||
getTreebank :: [String] -> [(String,[(String,String)])]
|
||||
getTreebank :: [String] -> PreTreebank
|
||||
getTreebank ll = case ll of
|
||||
l:ls@(_:_:_) ->
|
||||
let (l1,l2) = getItem ls
|
||||
@@ -97,6 +107,12 @@ getTreebank ll = case ll of
|
||||
|
||||
getLang = takeWhile (/='"') . tail . dropWhile (/='"')
|
||||
|
||||
lookupTreebank :: Treebank -> String -> [(String,String)]
|
||||
lookupTreebank tb s = maybe [] id $ M.lookup s tb
|
||||
|
||||
pre2treebank :: PreTreebank -> Treebank
|
||||
pre2treebank tb = M.fromListWith (++) [(s,[(l,t)]) | (t,ls) <- tb, (l,s) <- ls]
|
||||
|
||||
annot :: StateGrammar -> String -> A.Tree
|
||||
annot gr s = errVal (error "illegal tree") $ do
|
||||
let t = tree2exp $ string2tree gr s
|
||||
|
||||
@@ -37,6 +37,7 @@ i, import: i File
|
||||
-o do emit code (default with new grammar format)
|
||||
-ex preprocess .gfe files if needed
|
||||
-prob read probabilities from top grammar file (format --# prob Fun Double)
|
||||
-treebank read a treebank file to memory (xml format)
|
||||
flags:
|
||||
-abs set the name used for abstract syntax (with -old option)
|
||||
-cnc set the name used for concrete syntax (with -old option)
|
||||
@@ -209,11 +210,18 @@ tb, tree_bank: tb
|
||||
-c compare to existing xml-formatted treebank
|
||||
-trees return the trees of the treebank
|
||||
-xml wrap the treebank (or comparison results) with XML tags
|
||||
-mem write the treebank in memory instead of a file TODO
|
||||
examples:
|
||||
gr -cat=S -number=100 | tb -xml | wf tb.xml -- random treebank into file
|
||||
rf tb.xml | tb -c -- compare-test treebank from file
|
||||
rf old.xml | tb -trees | tb -xml -- create new treebank from old
|
||||
|
||||
lt, lookup_treebank: lt String
|
||||
Lookup a string in a treebank and return the resulting trees.
|
||||
Use 'tb' to create a treebank and 'i -treebank' to read it in memory.
|
||||
flag:
|
||||
-treebank use this treebank (instead of the latest introduced one) TODO
|
||||
|
||||
tt, test_tokenizer: tt String
|
||||
Show the token list sent to the parser when String is parsed.
|
||||
HINT: can be useful when debugging the parser.
|
||||
|
||||
Reference in New Issue
Block a user