1
0
forked from GitHub/gf-core

testing treebanks

This commit is contained in:
aarne
2006-02-08 21:58:36 +00:00
parent a1e79f3634
commit 676058581d
9 changed files with 116 additions and 28 deletions

View File

@@ -14,6 +14,15 @@ Changes in functionality since May 17, 2005, release of GF Version 2.2
<p> <p>
8/2 (AR) The command <tt>tb = tree_bank</tt> for creating and testing against
multilingual treebanks. Example uses:
<pre>
gr -cat=S -number=100 | tb -xml | wf tb.xml -- random treebank into file
rf tb.txt | tb -c -- read comparison treebank from file
</pre>
<p>
10/1 (AR) Forbade variable binding inside negation and Kleene star 10/1 (AR) Forbade variable binding inside negation and Kleene star
patterns. patterns.

View File

@@ -1,4 +1,4 @@
all: gslt:
txt2tags gslt-sem-2006.txt txt2tags gslt-sem-2006.txt
htmls gslt-sem-2006.html htmls gslt-sem-2006.html

View File

@@ -7,7 +7,7 @@
<P ALIGN="center"><CENTER><H1>Grammars as Software Libraries</H1> <P ALIGN="center"><CENTER><H1>Grammars as Software Libraries</H1>
<FONT SIZE="4"> <FONT SIZE="4">
<I>Author: Aarne Ranta &lt;aarne (at) cs.chalmers.se&gt;</I><BR> <I>Author: Aarne Ranta &lt;aarne (at) cs.chalmers.se&gt;</I><BR>
Last update: Mon Jan 30 10:53:18 2006 Last update: Wed Feb 8 19:06:27 2006
</FONT></CENTER> </FONT></CENTER>
<P> <P>
@@ -64,6 +64,7 @@ Student projects on grammar libraries:
<UL> <UL>
<LI>Inger Andersson &amp; Therese Söderberg: Spanish morphology <LI>Inger Andersson &amp; Therese Söderberg: Spanish morphology
<LI>Ludmilla Bogavac: Russian morphology <LI>Ludmilla Bogavac: Russian morphology
<LI>Karin Cavallin: comparison with Svenska Akademins Grammatik
<LI>Ali El Dada: Arabic morphology and syntax <LI>Ali El Dada: Arabic morphology and syntax
<LI>Muhammad Humayoun: Urdu morphology <LI>Muhammad Humayoun: Urdu morphology
<LI>Michael Pellauer: Estonian morphology <LI>Michael Pellauer: Estonian morphology
@@ -79,6 +80,21 @@ Technology, also:
<LI>Peter Ljunglöf <LI>Peter Ljunglöf
</UL> </UL>
<P>
Various grammar library contributions from the multilingual Chalmers comminity:
</P>
<UL>
<LI>Koen Claessen, Carlos Gonzalía, Patrik Jansson, Wojciech Mostowski, Karol Ostrovsky,
David Wahlstedt
</UL>
<P>
Resource library patches and suggestions from the WebALT staff:
</P>
<UL>
<LI>Lauri Carlson, Glòria Casanellas, Anni Laine, Wanjiku Ng'ang'a, Jordi Saludes
</UL>
<P> <P>
<!-- NEW --> <!-- NEW -->
</P> </P>

View File

@@ -47,6 +47,7 @@ Staff contributions to grammar libraries:
Student projects on grammar libraries: Student projects on grammar libraries:
- Inger Andersson & Therese Söderberg: Spanish morphology - Inger Andersson & Therese Söderberg: Spanish morphology
- Ludmilla Bogavac: Russian morphology - Ludmilla Bogavac: Russian morphology
- Karin Cavallin: comparison with Svenska Akademins Grammatik
- Ali El Dada: Arabic morphology and syntax - Ali El Dada: Arabic morphology and syntax
- Muhammad Humayoun: Urdu morphology - Muhammad Humayoun: Urdu morphology
- Michael Pellauer: Estonian morphology - Michael Pellauer: Estonian morphology
@@ -59,6 +60,15 @@ Technology, also:
- Peter Ljunglöf - Peter Ljunglöf
Various grammar library contributions from the multilingual Chalmers comminity:
- Koen Claessen, Carlos Gonzalía, Patrik Jansson, Wojciech Mostowski, Karol Ostrovsky,
David Wahlstedt
Resource library patches and suggestions from the WebALT staff:
- Lauri Carlson, Glòria Casanellas, Anni Laine, Wanjiku Ng'ang'a, Jordi Saludes
#NEW #NEW
==Software Libraries== ==Software Libraries==

View File

@@ -290,10 +290,13 @@ execC co@(comm, opts0) sa@(sh@(st,(h,_,_,_)),a) = checkOptions st co >> case com
_ -> Nothing _ -> Nothing
returnArg (ATrms $ generateTrees opts gro mt) sa returnArg (ATrms $ generateTrees opts gro mt) sa
CTreeBank | oElem doCompute opts -> do -- -c
let bank = prCommandArg a
returnArg (AString $ unlines $ testTreebank opts st bank) sa
CTreeBank -> do CTreeBank -> do
let ts = strees $ s2t $ snd sa let ts = strees $ s2t $ snd sa
comm = "command" ---- comm = "command" ----
justOutput opts (mkTreebank opts st comm ts) sa returnArg (AString $ unlines $ mkTreebank opts st comm ts) sa
CShowTreeGraph | oElem emitCode opts -> do -- -o CShowTreeGraph | oElem emitCode opts -> do -- -o
returnArg (AString $ visualizeTrees opts $ strees $ s2t a) sa returnArg (AString $ visualizeTrees opts $ strees $ s2t a) sa

View File

@@ -230,11 +230,14 @@ txtHelpFile =
"\n p -lang=Cncdecimal \"123\" | at num2bin | l -- convert dec to bin" ++ "\n p -lang=Cncdecimal \"123\" | at num2bin | l -- convert dec to bin" ++
"\n" ++ "\n" ++
"\ntb, tree_bank: tb" ++ "\ntb, tree_bank: tb" ++
"\n Generate a multilingual treebank from a list of trees." ++ "\n Generate a multilingual treebank from a list of trees (default) or compare" ++
"\n flags:" ++ "\n to an existing treebank." ++
"\n -xml wrap the treebank with XML tags" ++ "\n options:" ++
"\n -c compare to existing xml-formatted treebank" ++
"\n -xml wrap the treebank (or comparison results) with XML tags" ++
"\n examples:" ++ "\n examples:" ++
"\n gr -cat=S -number=100 | tb" ++ "\n gr -cat=S -number=100 | tb -xml | wf tb.xml -- random treebank into file" ++
"\n rf tb.txt | tb -c -- read comparison treebank from file" ++
"\n" ++ "\n" ++
"\ntt, test_tokenizer: tt String" ++ "\ntt, test_tokenizer: tt String" ++
"\n Show the token list sent to the parser when String is parsed." ++ "\n Show the token list sent to the parser when String is parsed." ++

View File

@@ -183,7 +183,7 @@ optionsOfCommand co = case co of
CGenerateRandom -> both "cf prob" "cat lang number depth" CGenerateRandom -> both "cf prob" "cat lang number depth"
CGenerateTrees -> both "metas" "atoms depth alts cat lang number" CGenerateTrees -> both "metas" "atoms depth alts cat lang number"
CPutTerm -> flags "transform number" CPutTerm -> flags "transform number"
CTreeBank -> opts "xml" CTreeBank -> opts "c xml"
CWrapTerm _ -> opts "c" CWrapTerm _ -> opts "c"
CApplyTransfer _ -> flags "lang transfer" CApplyTransfer _ -> flags "lang transfer"
CMorphoAnalyse -> both "short" "lang" CMorphoAnalyse -> both "short" "lang"

View File

@@ -12,12 +12,13 @@
-- Purpose: to generate treebanks. -- Purpose: to generate treebanks.
----------------------------------------------------------------------------- -----------------------------------------------------------------------------
module GF.UseGrammar.Treebank (mkTreebank) where module GF.UseGrammar.Treebank (mkTreebank,testTreebank) where
import GF.Compile.ShellState
import GF.Compile.ShellState (ShellState,grammar2shellState,canModules,stateGrammarOfLang,abstract,grammar,firstStateGrammar,allLanguages,allCategories)
import GF.UseGrammar.Linear (linTree2string) import GF.UseGrammar.Linear (linTree2string)
import GF.UseGrammar.Custom import GF.UseGrammar.Custom
import GF.UseGrammar.GetTree (string2tree)
import GF.Grammar.TypeCheck (annotate)
import GF.Canon.CMacros (noMark) import GF.Canon.CMacros (noMark)
import GF.Grammar.Grammar (Trm) import GF.Grammar.Grammar (Trm)
import GF.Grammar.MMacros (exp2tree) import GF.Grammar.MMacros (exp2tree)
@@ -31,13 +32,14 @@ import qualified GF.Grammar.Abstract as A
-- Generate a treebank with a multilingual grammar. AR 8/2/2006 -- Generate a treebank with a multilingual grammar. AR 8/2/2006
-- (c) Aarne Ranta 2006 under GNU GPL -- (c) Aarne Ranta 2006 under GNU GPL
-- | the main function -- | the main functions
mkTreebank :: Options -> ShellState -> String -> [A.Tree] -> IO () mkTreebank :: Options -> ShellState -> String -> [A.Tree] -> Res
mkTreebank opts sh com trees = putInXML opts "treebank" comm(mapM_ mkItem tris) mkTreebank opts sh com trees = putInXML opts "treebank" comm (concatMap mkItem tris)
where where
mkItem(t,i)= putInXML opts "item" (cat i) (mkTree t >>mapM_ (mkLin t) langs) mkItem(t,i)= putInXML opts "item" (cat i) (mkTree t ++ concatMap (mkLin t) langs)
mkTree t = putInXML opts "tree" [] (putStrLn $ showTree t) -- mkItem(t,i)= putInXML opts "item" (cat i) (mkTree t >>mapM_ (mkLin t) langs)
mkLin t lg = putInXML opts "lin" (lang lg) (putStrLn $ linearize sh lg t) mkTree t = putInXML opts "tree" [] (puts $ showTree t)
mkLin t lg = putInXML opts "lin" (lang lg) (puts $ linearize sh lg t)
langs = [prt_ l | l <- allLanguages sh] langs = [prt_ l | l <- allLanguages sh]
comm = "" --- " command=" ++ show com +++ "abstract=" ++ show abstr comm = "" --- " command=" ++ show com +++ "abstract=" ++ show abstr
@@ -46,14 +48,56 @@ mkTreebank opts sh com trees = putInXML opts "treebank" comm(mapM_ mkItem tris)
lang lg = " lang=" ++ show (prt_ (zIdent lg)) lang lg = " lang=" ++ show (prt_ (zIdent lg))
tris = zip trees [1..] tris = zip trees [1..]
testTreebank :: Options -> ShellState -> String -> Res
putInXML :: Options -> String -> String -> IO () -> IO () testTreebank opts sh = putInXML opts "diff" [] . concatMap testOne . getTreebank . lines
putInXML opts tag attrs io = do
ifXML $ putStrLn $ tagXML $ tag ++ attrs
io
ifXML $ putStrLn $ tagXML $ '/':tag
where where
ifXML c = if oElem showXML opts then c else return () testOne (e,lang,str) = do
let tr = annot gr e
let str0 = linearize sh lang tr
if str == str0 then ret else putInXML opts "diff" [] $ do
putInXML opts "tree" [] (puts $ showTree tr)
putInXML opts "old" (" lang=" ++ show (prt_ (zIdent lang))) $ puts str0
putInXML opts "new" (" lang=" ++ show (prt_ (zIdent lang))) $ puts str
gr = firstStateGrammar sh
-- string vs. IO
type Res = [String] -- IO ()
puts :: String -> Res
puts = return -- putStrLn
ret = [] -- return ()
--
getTreebank :: [String] -> [(String,String,String)]
getTreebank ll = case ll of
[] -> []
l:ls ->
let (l1,l2) = getItem ls
(tr,lins) = getTree l1
lglins = getLins lins
in [(tr,lang,str) | (lang,str) <- lglins] ++ getTreebank l2
where
getItem = span ((/="</item") . take 6)
getTree (_:ss) = let (t1,t2) = span ((/="</tree") . take 6) ss in (last t1, drop 1 t2)
getLins (beg:str:end:ss) = (getLang beg, str):getLins ss
getLins _ = []
getLang = takeWhile (/='"') . tail . dropWhile (/='"')
annot :: StateGrammar -> String -> A.Tree
annot gr s = errVal (error "illegal tree") $ do
let t = tree2exp $ string2tree gr s
annotate (grammar gr) t
putInXML :: Options -> String -> String -> Res -> Res
putInXML opts tag attrs io =
(ifXML $ puts $ tagXML $ tag ++ attrs) ++
io ++
(ifXML $ puts $ tagXML $ '/':tag)
where
ifXML c = if oElem showXML opts then c else []
tagXML :: String -> String tagXML :: String -> String
tagXML s = "<" ++ s ++ ">" tagXML s = "<" ++ s ++ ">"

View File

@@ -201,11 +201,14 @@ at, apply_transfer: at (Module.Fun | Fun)
p -lang=Cncdecimal "123" | at num2bin | l -- convert dec to bin p -lang=Cncdecimal "123" | at num2bin | l -- convert dec to bin
tb, tree_bank: tb tb, tree_bank: tb
Generate a multilingual treebank from a list of trees. Generate a multilingual treebank from a list of trees (default) or compare
flags: to an existing treebank.
-xml wrap the treebank with XML tags options:
-c compare to existing xml-formatted treebank
-xml wrap the treebank (or comparison results) with XML tags
examples: examples:
gr -cat=S -number=100 | tb gr -cat=S -number=100 | tb -xml | wf tb.xml -- random treebank into file
rf tb.txt | tb -c -- read comparison treebank from file
tt, test_tokenizer: tt String tt, test_tokenizer: tt String
Show the token list sent to the parser when String is parsed. Show the token list sent to the parser when String is parsed.