1
0
forked from GitHub/gf-core

testing treebanks

This commit is contained in:
aarne
2006-02-08 21:58:36 +00:00
parent c9ae662c24
commit f916352116
6 changed files with 88 additions and 26 deletions

View File

@@ -14,6 +14,15 @@ Changes in functionality since May 17, 2005, release of GF Version 2.2
<p> <p>
8/2 (AR) The command <tt>tb = tree_bank</tt> for creating and testing against
multilingual treebanks. Example uses:
<pre>
gr -cat=S -number=100 | tb -xml | wf tb.xml -- random treebank into file
rf tb.txt | tb -c -- read comparison treebank from file
</pre>
<p>
10/1 (AR) Forbade variable binding inside negation and Kleene star 10/1 (AR) Forbade variable binding inside negation and Kleene star
patterns. patterns.

View File

@@ -290,10 +290,13 @@ execC co@(comm, opts0) sa@(sh@(st,(h,_,_,_)),a) = checkOptions st co >> case com
_ -> Nothing _ -> Nothing
returnArg (ATrms $ generateTrees opts gro mt) sa returnArg (ATrms $ generateTrees opts gro mt) sa
CTreeBank | oElem doCompute opts -> do -- -c
let bank = prCommandArg a
returnArg (AString $ unlines $ testTreebank opts st bank) sa
CTreeBank -> do CTreeBank -> do
let ts = strees $ s2t $ snd sa let ts = strees $ s2t $ snd sa
comm = "command" ---- comm = "command" ----
justOutput opts (mkTreebank opts st comm ts) sa returnArg (AString $ unlines $ mkTreebank opts st comm ts) sa
CShowTreeGraph | oElem emitCode opts -> do -- -o CShowTreeGraph | oElem emitCode opts -> do -- -o
returnArg (AString $ visualizeTrees opts $ strees $ s2t a) sa returnArg (AString $ visualizeTrees opts $ strees $ s2t a) sa

View File

@@ -230,11 +230,14 @@ txtHelpFile =
"\n p -lang=Cncdecimal \"123\" | at num2bin | l -- convert dec to bin" ++ "\n p -lang=Cncdecimal \"123\" | at num2bin | l -- convert dec to bin" ++
"\n" ++ "\n" ++
"\ntb, tree_bank: tb" ++ "\ntb, tree_bank: tb" ++
"\n Generate a multilingual treebank from a list of trees." ++ "\n Generate a multilingual treebank from a list of trees (default) or compare" ++
"\n flags:" ++ "\n to an existing treebank." ++
"\n -xml wrap the treebank with XML tags" ++ "\n options:" ++
"\n -c compare to existing xml-formatted treebank" ++
"\n -xml wrap the treebank (or comparison results) with XML tags" ++
"\n examples:" ++ "\n examples:" ++
"\n gr -cat=S -number=100 | tb" ++ "\n gr -cat=S -number=100 | tb -xml | wf tb.xml -- random treebank into file" ++
"\n rf tb.txt | tb -c -- read comparison treebank from file" ++
"\n" ++ "\n" ++
"\ntt, test_tokenizer: tt String" ++ "\ntt, test_tokenizer: tt String" ++
"\n Show the token list sent to the parser when String is parsed." ++ "\n Show the token list sent to the parser when String is parsed." ++

View File

@@ -183,7 +183,7 @@ optionsOfCommand co = case co of
CGenerateRandom -> both "cf prob" "cat lang number depth" CGenerateRandom -> both "cf prob" "cat lang number depth"
CGenerateTrees -> both "metas" "atoms depth alts cat lang number" CGenerateTrees -> both "metas" "atoms depth alts cat lang number"
CPutTerm -> flags "transform number" CPutTerm -> flags "transform number"
CTreeBank -> opts "xml" CTreeBank -> opts "c xml"
CWrapTerm _ -> opts "c" CWrapTerm _ -> opts "c"
CApplyTransfer _ -> flags "lang transfer" CApplyTransfer _ -> flags "lang transfer"
CMorphoAnalyse -> both "short" "lang" CMorphoAnalyse -> both "short" "lang"

View File

@@ -12,12 +12,13 @@
-- Purpose: to generate treebanks. -- Purpose: to generate treebanks.
----------------------------------------------------------------------------- -----------------------------------------------------------------------------
module GF.UseGrammar.Treebank (mkTreebank) where module GF.UseGrammar.Treebank (mkTreebank,testTreebank) where
import GF.Compile.ShellState
import GF.Compile.ShellState (ShellState,grammar2shellState,canModules,stateGrammarOfLang,abstract,grammar,firstStateGrammar,allLanguages,allCategories)
import GF.UseGrammar.Linear (linTree2string) import GF.UseGrammar.Linear (linTree2string)
import GF.UseGrammar.Custom import GF.UseGrammar.Custom
import GF.UseGrammar.GetTree (string2tree)
import GF.Grammar.TypeCheck (annotate)
import GF.Canon.CMacros (noMark) import GF.Canon.CMacros (noMark)
import GF.Grammar.Grammar (Trm) import GF.Grammar.Grammar (Trm)
import GF.Grammar.MMacros (exp2tree) import GF.Grammar.MMacros (exp2tree)
@@ -31,13 +32,14 @@ import qualified GF.Grammar.Abstract as A
-- Generate a treebank with a multilingual grammar. AR 8/2/2006 -- Generate a treebank with a multilingual grammar. AR 8/2/2006
-- (c) Aarne Ranta 2006 under GNU GPL -- (c) Aarne Ranta 2006 under GNU GPL
-- | the main function -- | the main functions
mkTreebank :: Options -> ShellState -> String -> [A.Tree] -> IO () mkTreebank :: Options -> ShellState -> String -> [A.Tree] -> Res
mkTreebank opts sh com trees = putInXML opts "treebank" comm(mapM_ mkItem tris) mkTreebank opts sh com trees = putInXML opts "treebank" comm (concatMap mkItem tris)
where where
mkItem(t,i)= putInXML opts "item" (cat i) (mkTree t >>mapM_ (mkLin t) langs) mkItem(t,i)= putInXML opts "item" (cat i) (mkTree t ++ concatMap (mkLin t) langs)
mkTree t = putInXML opts "tree" [] (putStrLn $ showTree t) -- mkItem(t,i)= putInXML opts "item" (cat i) (mkTree t >>mapM_ (mkLin t) langs)
mkLin t lg = putInXML opts "lin" (lang lg) (putStrLn $ linearize sh lg t) mkTree t = putInXML opts "tree" [] (puts $ showTree t)
mkLin t lg = putInXML opts "lin" (lang lg) (puts $ linearize sh lg t)
langs = [prt_ l | l <- allLanguages sh] langs = [prt_ l | l <- allLanguages sh]
comm = "" --- " command=" ++ show com +++ "abstract=" ++ show abstr comm = "" --- " command=" ++ show com +++ "abstract=" ++ show abstr
@@ -46,14 +48,56 @@ mkTreebank opts sh com trees = putInXML opts "treebank" comm(mapM_ mkItem tris)
lang lg = " lang=" ++ show (prt_ (zIdent lg)) lang lg = " lang=" ++ show (prt_ (zIdent lg))
tris = zip trees [1..] tris = zip trees [1..]
testTreebank :: Options -> ShellState -> String -> Res
putInXML :: Options -> String -> String -> IO () -> IO () testTreebank opts sh = putInXML opts "diff" [] . concatMap testOne . getTreebank . lines
putInXML opts tag attrs io = do
ifXML $ putStrLn $ tagXML $ tag ++ attrs
io
ifXML $ putStrLn $ tagXML $ '/':tag
where where
ifXML c = if oElem showXML opts then c else return () testOne (e,lang,str) = do
let tr = annot gr e
let str0 = linearize sh lang tr
if str == str0 then ret else putInXML opts "diff" [] $ do
putInXML opts "tree" [] (puts $ showTree tr)
putInXML opts "old" (" lang=" ++ show (prt_ (zIdent lang))) $ puts str0
putInXML opts "new" (" lang=" ++ show (prt_ (zIdent lang))) $ puts str
gr = firstStateGrammar sh
-- string vs. IO
type Res = [String] -- IO ()
puts :: String -> Res
puts = return -- putStrLn
ret = [] -- return ()
--
getTreebank :: [String] -> [(String,String,String)]
getTreebank ll = case ll of
[] -> []
l:ls ->
let (l1,l2) = getItem ls
(tr,lins) = getTree l1
lglins = getLins lins
in [(tr,lang,str) | (lang,str) <- lglins] ++ getTreebank l2
where
getItem = span ((/="</item") . take 6)
getTree (_:ss) = let (t1,t2) = span ((/="</tree") . take 6) ss in (last t1, drop 1 t2)
getLins (beg:str:end:ss) = (getLang beg, str):getLins ss
getLins _ = []
getLang = takeWhile (/='"') . tail . dropWhile (/='"')
annot :: StateGrammar -> String -> A.Tree
annot gr s = errVal (error "illegal tree") $ do
let t = tree2exp $ string2tree gr s
annotate (grammar gr) t
putInXML :: Options -> String -> String -> Res -> Res
putInXML opts tag attrs io =
(ifXML $ puts $ tagXML $ tag ++ attrs) ++
io ++
(ifXML $ puts $ tagXML $ '/':tag)
where
ifXML c = if oElem showXML opts then c else []
tagXML :: String -> String tagXML :: String -> String
tagXML s = "<" ++ s ++ ">" tagXML s = "<" ++ s ++ ">"

View File

@@ -201,11 +201,14 @@ at, apply_transfer: at (Module.Fun | Fun)
p -lang=Cncdecimal "123" | at num2bin | l -- convert dec to bin p -lang=Cncdecimal "123" | at num2bin | l -- convert dec to bin
tb, tree_bank: tb tb, tree_bank: tb
Generate a multilingual treebank from a list of trees. Generate a multilingual treebank from a list of trees (default) or compare
flags: to an existing treebank.
-xml wrap the treebank with XML tags options:
-c compare to existing xml-formatted treebank
-xml wrap the treebank (or comparison results) with XML tags
examples: examples:
gr -cat=S -number=100 | tb gr -cat=S -number=100 | tb -xml | wf tb.xml -- random treebank into file
rf tb.txt | tb -c -- read comparison treebank from file
tt, test_tokenizer: tt String tt, test_tokenizer: tt String
Show the token list sent to the parser when String is parsed. Show the token list sent to the parser when String is parsed.