forked from GitHub/gf-core
command tb for creating treebanks
This commit is contained in:
@@ -405,7 +405,7 @@ allActiveGrammars :: ShellState -> [StateGrammar]
|
||||
|
||||
globalOptions = gloptions
|
||||
--allLanguages = map (fst . fst) . concretes
|
||||
allLanguages = M.allConcreteModules . canModules
|
||||
allLanguages = map (snd . fst) . actualConcretes
|
||||
allTransfers = map fst . transfers
|
||||
allCategories = map fst . allCatsOf . canModules
|
||||
|
||||
|
||||
@@ -27,6 +27,7 @@ import qualified GF.Canon.CMacros as CMacros
|
||||
import qualified GF.Compile.GrammarToCanon as GrammarToCanon
|
||||
import GF.Grammar.Values
|
||||
import GF.UseGrammar.GetTree
|
||||
import GF.UseGrammar.Treebank
|
||||
|
||||
import GF.Shell.ShellCommands
|
||||
|
||||
@@ -289,6 +290,11 @@ execC co@(comm, opts0) sa@(sh@(st,(h,_,_,_)),a) = checkOptions st co >> case com
|
||||
_ -> Nothing
|
||||
returnArg (ATrms $ generateTrees opts gro mt) sa
|
||||
|
||||
CTreeBank -> do
|
||||
let ts = strees $ s2t $ snd sa
|
||||
comm = "command" ----
|
||||
justOutput opts (mkTreebank opts st comm ts) sa
|
||||
|
||||
CShowTreeGraph | oElem emitCode opts -> do -- -o
|
||||
returnArg (AString $ visualizeTrees opts $ strees $ s2t a) sa
|
||||
CShowTreeGraph -> do
|
||||
|
||||
@@ -229,6 +229,13 @@ txtHelpFile =
|
||||
"\n examples:" ++
|
||||
"\n p -lang=Cncdecimal \"123\" | at num2bin | l -- convert dec to bin" ++
|
||||
"\n" ++
|
||||
"\ntb, tree_bank: tb" ++
|
||||
"\n Generate a multilingual treebank from a list of trees." ++
|
||||
"\n flags:" ++
|
||||
"\n -xml wrap the treebank with XML tags" ++
|
||||
"\n examples:" ++
|
||||
"\n gr -cat=S -number=100 | tb" ++
|
||||
"\n" ++
|
||||
"\ntt, test_tokenizer: tt String" ++
|
||||
"\n Show the token list sent to the parser when String is parsed." ++
|
||||
"\n HINT: can be useful when debugging the parser." ++
|
||||
|
||||
@@ -113,6 +113,7 @@ pCommand ws = case ws of
|
||||
"tt" : s -> aString CTestTokenizer s
|
||||
"cc" : s -> aUnit $ CComputeConcrete $ unwords s
|
||||
"so" : s -> aUnit $ CShowOpers $ unwords s
|
||||
"tb" : [] -> aUnit CTreeBank
|
||||
|
||||
"tq" : i:o:[] -> aUnit (CTranslationQuiz (language i) (language o))
|
||||
"tl":i:o:[] -> aUnit (CTranslationList (language i) (language o))
|
||||
|
||||
@@ -48,6 +48,7 @@ data Command =
|
||||
| CTranslate Language Language
|
||||
| CGenerateRandom
|
||||
| CGenerateTrees
|
||||
| CTreeBank
|
||||
| CPutTerm
|
||||
| CWrapTerm I.Ident
|
||||
| CApplyTransfer (Maybe I.Ident, I.Ident)
|
||||
@@ -182,6 +183,7 @@ optionsOfCommand co = case co of
|
||||
CGenerateRandom -> both "cf prob" "cat lang number depth"
|
||||
CGenerateTrees -> both "metas" "atoms depth alts cat lang number"
|
||||
CPutTerm -> flags "transform number"
|
||||
CTreeBank -> opts "xml"
|
||||
CWrapTerm _ -> opts "c"
|
||||
CApplyTransfer _ -> flags "lang transfer"
|
||||
CMorphoAnalyse -> both "short" "lang"
|
||||
|
||||
69
src/GF/UseGrammar/Treebank.hs
Normal file
69
src/GF/UseGrammar/Treebank.hs
Normal file
@@ -0,0 +1,69 @@
|
||||
----------------------------------------------------------------------
|
||||
-- |
|
||||
-- Module : Treebank
|
||||
-- Maintainer : AR
|
||||
-- Stability : (stable)
|
||||
-- Portability : (portable)
|
||||
--
|
||||
-- Generate multilingual treebanks. AR 8\/2\/2006
|
||||
--
|
||||
-- (c) Aarne Ranta 2006 under GNU GPL
|
||||
--
|
||||
-- Purpose: to generate treebanks.
|
||||
-----------------------------------------------------------------------------
|
||||
|
||||
module GF.UseGrammar.Treebank (mkTreebank) where
|
||||
|
||||
|
||||
import GF.Compile.ShellState (ShellState,grammar2shellState,canModules,stateGrammarOfLang,abstract,grammar,firstStateGrammar,allLanguages,allCategories)
|
||||
import GF.UseGrammar.Linear (linTree2string)
|
||||
import GF.UseGrammar.Custom
|
||||
import GF.Canon.CMacros (noMark)
|
||||
import GF.Grammar.Grammar (Trm)
|
||||
import GF.Grammar.MMacros (exp2tree)
|
||||
import GF.Grammar.Macros (zIdent)
|
||||
import GF.Grammar.PrGrammar (prt_)
|
||||
import GF.Grammar.Values (tree2exp)
|
||||
import GF.Data.Operations
|
||||
import GF.Infra.Option
|
||||
import qualified GF.Grammar.Abstract as A
|
||||
|
||||
-- Generate a treebank with a multilingual grammar. AR 8/2/2006
|
||||
-- (c) Aarne Ranta 2006 under GNU GPL
|
||||
|
||||
-- | the main function
|
||||
mkTreebank :: Options -> ShellState -> String -> [A.Tree] -> IO ()
|
||||
mkTreebank opts sh com trees = putInXML opts "treebank" comm(mapM_ mkItem trees)
|
||||
where
|
||||
mkItem t = putInXML opts "item" cat (mkTree t >>mapM_ (mkLin t) langs)
|
||||
mkTree t = putInXML opts "tree" [] (putStrLn $ showTree t)
|
||||
mkLin t lg = putInXML opts "lin" (lang lg) (putStrLn $ linearize sh lg t)
|
||||
|
||||
langs = [prt_ l | l <- allLanguages sh]
|
||||
comm = "" --- " command=" ++ show com +++ "abstract=" ++ show abstr
|
||||
abstr = "" --- "Abs" ----
|
||||
cat = "" --- " cat=" ++ show "S" ----
|
||||
lang lg = " lang=" ++ show (prt_ (zIdent lg))
|
||||
|
||||
|
||||
putInXML :: Options -> String -> String -> IO () -> IO ()
|
||||
putInXML opts tag attrs io = do
|
||||
ifXML $ putStrLn $ tagXML $ tag ++ attrs
|
||||
io
|
||||
ifXML $ putStrLn $ tagXML $ '/':tag
|
||||
where
|
||||
ifXML c = if oElem showXML opts then c else return ()
|
||||
|
||||
tagXML :: String -> String
|
||||
tagXML s = "<" ++ s ++ ">"
|
||||
|
||||
--- these handy functions are borrowed from EmbedAPI
|
||||
|
||||
linearize mgr lang =
|
||||
untok .
|
||||
linTree2string noMark (canModules mgr) (zIdent lang)
|
||||
where
|
||||
sgr = stateGrammarOfLang mgr (zIdent lang)
|
||||
untok = customOrDefault noOptions useUntokenizer customUntokenizer sgr
|
||||
|
||||
showTree t = prt_ $ tree2exp t
|
||||
@@ -200,6 +200,13 @@ at, apply_transfer: at (Module.Fun | Fun)
|
||||
examples:
|
||||
p -lang=Cncdecimal "123" | at num2bin | l -- convert dec to bin
|
||||
|
||||
tb, tree_bank: tb
|
||||
Generate a multilingual treebank from a list of trees.
|
||||
flags:
|
||||
-xml wrap the treebank with XML tags
|
||||
examples:
|
||||
gr -cat=S -number=100 | tb
|
||||
|
||||
tt, test_tokenizer: tt String
|
||||
Show the token list sent to the parser when String is parsed.
|
||||
HINT: can be useful when debugging the parser.
|
||||
|
||||
Reference in New Issue
Block a user