forked from GitHub/gf-core
testing treebanks
This commit is contained in:
@@ -14,6 +14,15 @@ Changes in functionality since May 17, 2005, release of GF Version 2.2
|
|||||||
|
|
||||||
<p>
|
<p>
|
||||||
|
|
||||||
|
8/2 (AR) The command <tt>tb = tree_bank</tt> for creating and testing against
|
||||||
|
multilingual treebanks. Example uses:
|
||||||
|
<pre>
|
||||||
|
gr -cat=S -number=100 | tb -xml | wf tb.xml -- random treebank into file
|
||||||
|
rf tb.txt | tb -c -- read comparison treebank from file
|
||||||
|
</pre>
|
||||||
|
|
||||||
|
<p>
|
||||||
|
|
||||||
10/1 (AR) Forbade variable binding inside negation and Kleene star
|
10/1 (AR) Forbade variable binding inside negation and Kleene star
|
||||||
patterns.
|
patterns.
|
||||||
|
|
||||||
|
|||||||
@@ -1,4 +1,4 @@
|
|||||||
all:
|
gslt:
|
||||||
txt2tags gslt-sem-2006.txt
|
txt2tags gslt-sem-2006.txt
|
||||||
htmls gslt-sem-2006.html
|
htmls gslt-sem-2006.html
|
||||||
|
|
||||||
|
|||||||
@@ -7,7 +7,7 @@
|
|||||||
<P ALIGN="center"><CENTER><H1>Grammars as Software Libraries</H1>
|
<P ALIGN="center"><CENTER><H1>Grammars as Software Libraries</H1>
|
||||||
<FONT SIZE="4">
|
<FONT SIZE="4">
|
||||||
<I>Author: Aarne Ranta <aarne (at) cs.chalmers.se></I><BR>
|
<I>Author: Aarne Ranta <aarne (at) cs.chalmers.se></I><BR>
|
||||||
Last update: Mon Jan 30 10:53:18 2006
|
Last update: Wed Feb 8 19:06:27 2006
|
||||||
</FONT></CENTER>
|
</FONT></CENTER>
|
||||||
|
|
||||||
<P>
|
<P>
|
||||||
@@ -64,6 +64,7 @@ Student projects on grammar libraries:
|
|||||||
<UL>
|
<UL>
|
||||||
<LI>Inger Andersson & Therese Söderberg: Spanish morphology
|
<LI>Inger Andersson & Therese Söderberg: Spanish morphology
|
||||||
<LI>Ludmilla Bogavac: Russian morphology
|
<LI>Ludmilla Bogavac: Russian morphology
|
||||||
|
<LI>Karin Cavallin: comparison with Svenska Akademins Grammatik
|
||||||
<LI>Ali El Dada: Arabic morphology and syntax
|
<LI>Ali El Dada: Arabic morphology and syntax
|
||||||
<LI>Muhammad Humayoun: Urdu morphology
|
<LI>Muhammad Humayoun: Urdu morphology
|
||||||
<LI>Michael Pellauer: Estonian morphology
|
<LI>Michael Pellauer: Estonian morphology
|
||||||
@@ -79,6 +80,21 @@ Technology, also:
|
|||||||
<LI>Peter Ljunglöf
|
<LI>Peter Ljunglöf
|
||||||
</UL>
|
</UL>
|
||||||
|
|
||||||
|
<P>
|
||||||
|
Various grammar library contributions from the multilingual Chalmers comminity:
|
||||||
|
</P>
|
||||||
|
<UL>
|
||||||
|
<LI>Koen Claessen, Carlos Gonzalía, Patrik Jansson, Wojciech Mostowski, Karol Ostrovsky,
|
||||||
|
David Wahlstedt
|
||||||
|
</UL>
|
||||||
|
|
||||||
|
<P>
|
||||||
|
Resource library patches and suggestions from the WebALT staff:
|
||||||
|
</P>
|
||||||
|
<UL>
|
||||||
|
<LI>Lauri Carlson, Glòria Casanellas, Anni Laine, Wanjiku Ng'ang'a, Jordi Saludes
|
||||||
|
</UL>
|
||||||
|
|
||||||
<P>
|
<P>
|
||||||
<!-- NEW -->
|
<!-- NEW -->
|
||||||
</P>
|
</P>
|
||||||
|
|||||||
@@ -47,6 +47,7 @@ Staff contributions to grammar libraries:
|
|||||||
Student projects on grammar libraries:
|
Student projects on grammar libraries:
|
||||||
- Inger Andersson & Therese Söderberg: Spanish morphology
|
- Inger Andersson & Therese Söderberg: Spanish morphology
|
||||||
- Ludmilla Bogavac: Russian morphology
|
- Ludmilla Bogavac: Russian morphology
|
||||||
|
- Karin Cavallin: comparison with Svenska Akademins Grammatik
|
||||||
- Ali El Dada: Arabic morphology and syntax
|
- Ali El Dada: Arabic morphology and syntax
|
||||||
- Muhammad Humayoun: Urdu morphology
|
- Muhammad Humayoun: Urdu morphology
|
||||||
- Michael Pellauer: Estonian morphology
|
- Michael Pellauer: Estonian morphology
|
||||||
@@ -59,6 +60,15 @@ Technology, also:
|
|||||||
- Peter Ljunglöf
|
- Peter Ljunglöf
|
||||||
|
|
||||||
|
|
||||||
|
Various grammar library contributions from the multilingual Chalmers comminity:
|
||||||
|
- Koen Claessen, Carlos Gonzalía, Patrik Jansson, Wojciech Mostowski, Karol Ostrovsky,
|
||||||
|
David Wahlstedt
|
||||||
|
|
||||||
|
|
||||||
|
Resource library patches and suggestions from the WebALT staff:
|
||||||
|
- Lauri Carlson, Glòria Casanellas, Anni Laine, Wanjiku Ng'ang'a, Jordi Saludes
|
||||||
|
|
||||||
|
|
||||||
#NEW
|
#NEW
|
||||||
|
|
||||||
==Software Libraries==
|
==Software Libraries==
|
||||||
|
|||||||
@@ -290,10 +290,13 @@ execC co@(comm, opts0) sa@(sh@(st,(h,_,_,_)),a) = checkOptions st co >> case com
|
|||||||
_ -> Nothing
|
_ -> Nothing
|
||||||
returnArg (ATrms $ generateTrees opts gro mt) sa
|
returnArg (ATrms $ generateTrees opts gro mt) sa
|
||||||
|
|
||||||
|
CTreeBank | oElem doCompute opts -> do -- -c
|
||||||
|
let bank = prCommandArg a
|
||||||
|
returnArg (AString $ unlines $ testTreebank opts st bank) sa
|
||||||
CTreeBank -> do
|
CTreeBank -> do
|
||||||
let ts = strees $ s2t $ snd sa
|
let ts = strees $ s2t $ snd sa
|
||||||
comm = "command" ----
|
comm = "command" ----
|
||||||
justOutput opts (mkTreebank opts st comm ts) sa
|
returnArg (AString $ unlines $ mkTreebank opts st comm ts) sa
|
||||||
|
|
||||||
CShowTreeGraph | oElem emitCode opts -> do -- -o
|
CShowTreeGraph | oElem emitCode opts -> do -- -o
|
||||||
returnArg (AString $ visualizeTrees opts $ strees $ s2t a) sa
|
returnArg (AString $ visualizeTrees opts $ strees $ s2t a) sa
|
||||||
|
|||||||
@@ -230,11 +230,14 @@ txtHelpFile =
|
|||||||
"\n p -lang=Cncdecimal \"123\" | at num2bin | l -- convert dec to bin" ++
|
"\n p -lang=Cncdecimal \"123\" | at num2bin | l -- convert dec to bin" ++
|
||||||
"\n" ++
|
"\n" ++
|
||||||
"\ntb, tree_bank: tb" ++
|
"\ntb, tree_bank: tb" ++
|
||||||
"\n Generate a multilingual treebank from a list of trees." ++
|
"\n Generate a multilingual treebank from a list of trees (default) or compare" ++
|
||||||
"\n flags:" ++
|
"\n to an existing treebank." ++
|
||||||
"\n -xml wrap the treebank with XML tags" ++
|
"\n options:" ++
|
||||||
|
"\n -c compare to existing xml-formatted treebank" ++
|
||||||
|
"\n -xml wrap the treebank (or comparison results) with XML tags" ++
|
||||||
"\n examples:" ++
|
"\n examples:" ++
|
||||||
"\n gr -cat=S -number=100 | tb" ++
|
"\n gr -cat=S -number=100 | tb -xml | wf tb.xml -- random treebank into file" ++
|
||||||
|
"\n rf tb.txt | tb -c -- read comparison treebank from file" ++
|
||||||
"\n" ++
|
"\n" ++
|
||||||
"\ntt, test_tokenizer: tt String" ++
|
"\ntt, test_tokenizer: tt String" ++
|
||||||
"\n Show the token list sent to the parser when String is parsed." ++
|
"\n Show the token list sent to the parser when String is parsed." ++
|
||||||
|
|||||||
@@ -183,7 +183,7 @@ optionsOfCommand co = case co of
|
|||||||
CGenerateRandom -> both "cf prob" "cat lang number depth"
|
CGenerateRandom -> both "cf prob" "cat lang number depth"
|
||||||
CGenerateTrees -> both "metas" "atoms depth alts cat lang number"
|
CGenerateTrees -> both "metas" "atoms depth alts cat lang number"
|
||||||
CPutTerm -> flags "transform number"
|
CPutTerm -> flags "transform number"
|
||||||
CTreeBank -> opts "xml"
|
CTreeBank -> opts "c xml"
|
||||||
CWrapTerm _ -> opts "c"
|
CWrapTerm _ -> opts "c"
|
||||||
CApplyTransfer _ -> flags "lang transfer"
|
CApplyTransfer _ -> flags "lang transfer"
|
||||||
CMorphoAnalyse -> both "short" "lang"
|
CMorphoAnalyse -> both "short" "lang"
|
||||||
|
|||||||
@@ -12,12 +12,13 @@
|
|||||||
-- Purpose: to generate treebanks.
|
-- Purpose: to generate treebanks.
|
||||||
-----------------------------------------------------------------------------
|
-----------------------------------------------------------------------------
|
||||||
|
|
||||||
module GF.UseGrammar.Treebank (mkTreebank) where
|
module GF.UseGrammar.Treebank (mkTreebank,testTreebank) where
|
||||||
|
|
||||||
|
import GF.Compile.ShellState
|
||||||
import GF.Compile.ShellState (ShellState,grammar2shellState,canModules,stateGrammarOfLang,abstract,grammar,firstStateGrammar,allLanguages,allCategories)
|
|
||||||
import GF.UseGrammar.Linear (linTree2string)
|
import GF.UseGrammar.Linear (linTree2string)
|
||||||
import GF.UseGrammar.Custom
|
import GF.UseGrammar.Custom
|
||||||
|
import GF.UseGrammar.GetTree (string2tree)
|
||||||
|
import GF.Grammar.TypeCheck (annotate)
|
||||||
import GF.Canon.CMacros (noMark)
|
import GF.Canon.CMacros (noMark)
|
||||||
import GF.Grammar.Grammar (Trm)
|
import GF.Grammar.Grammar (Trm)
|
||||||
import GF.Grammar.MMacros (exp2tree)
|
import GF.Grammar.MMacros (exp2tree)
|
||||||
@@ -31,13 +32,14 @@ import qualified GF.Grammar.Abstract as A
|
|||||||
-- Generate a treebank with a multilingual grammar. AR 8/2/2006
|
-- Generate a treebank with a multilingual grammar. AR 8/2/2006
|
||||||
-- (c) Aarne Ranta 2006 under GNU GPL
|
-- (c) Aarne Ranta 2006 under GNU GPL
|
||||||
|
|
||||||
-- | the main function
|
-- | the main functions
|
||||||
mkTreebank :: Options -> ShellState -> String -> [A.Tree] -> IO ()
|
mkTreebank :: Options -> ShellState -> String -> [A.Tree] -> Res
|
||||||
mkTreebank opts sh com trees = putInXML opts "treebank" comm(mapM_ mkItem tris)
|
mkTreebank opts sh com trees = putInXML opts "treebank" comm (concatMap mkItem tris)
|
||||||
where
|
where
|
||||||
mkItem(t,i)= putInXML opts "item" (cat i) (mkTree t >>mapM_ (mkLin t) langs)
|
mkItem(t,i)= putInXML opts "item" (cat i) (mkTree t ++ concatMap (mkLin t) langs)
|
||||||
mkTree t = putInXML opts "tree" [] (putStrLn $ showTree t)
|
-- mkItem(t,i)= putInXML opts "item" (cat i) (mkTree t >>mapM_ (mkLin t) langs)
|
||||||
mkLin t lg = putInXML opts "lin" (lang lg) (putStrLn $ linearize sh lg t)
|
mkTree t = putInXML opts "tree" [] (puts $ showTree t)
|
||||||
|
mkLin t lg = putInXML opts "lin" (lang lg) (puts $ linearize sh lg t)
|
||||||
|
|
||||||
langs = [prt_ l | l <- allLanguages sh]
|
langs = [prt_ l | l <- allLanguages sh]
|
||||||
comm = "" --- " command=" ++ show com +++ "abstract=" ++ show abstr
|
comm = "" --- " command=" ++ show com +++ "abstract=" ++ show abstr
|
||||||
@@ -46,14 +48,56 @@ mkTreebank opts sh com trees = putInXML opts "treebank" comm(mapM_ mkItem tris)
|
|||||||
lang lg = " lang=" ++ show (prt_ (zIdent lg))
|
lang lg = " lang=" ++ show (prt_ (zIdent lg))
|
||||||
tris = zip trees [1..]
|
tris = zip trees [1..]
|
||||||
|
|
||||||
|
testTreebank :: Options -> ShellState -> String -> Res
|
||||||
putInXML :: Options -> String -> String -> IO () -> IO ()
|
testTreebank opts sh = putInXML opts "diff" [] . concatMap testOne . getTreebank . lines
|
||||||
putInXML opts tag attrs io = do
|
|
||||||
ifXML $ putStrLn $ tagXML $ tag ++ attrs
|
|
||||||
io
|
|
||||||
ifXML $ putStrLn $ tagXML $ '/':tag
|
|
||||||
where
|
where
|
||||||
ifXML c = if oElem showXML opts then c else return ()
|
testOne (e,lang,str) = do
|
||||||
|
let tr = annot gr e
|
||||||
|
let str0 = linearize sh lang tr
|
||||||
|
if str == str0 then ret else putInXML opts "diff" [] $ do
|
||||||
|
putInXML opts "tree" [] (puts $ showTree tr)
|
||||||
|
putInXML opts "old" (" lang=" ++ show (prt_ (zIdent lang))) $ puts str0
|
||||||
|
putInXML opts "new" (" lang=" ++ show (prt_ (zIdent lang))) $ puts str
|
||||||
|
gr = firstStateGrammar sh
|
||||||
|
|
||||||
|
-- string vs. IO
|
||||||
|
type Res = [String] -- IO ()
|
||||||
|
puts :: String -> Res
|
||||||
|
puts = return -- putStrLn
|
||||||
|
ret = [] -- return ()
|
||||||
|
--
|
||||||
|
|
||||||
|
getTreebank :: [String] -> [(String,String,String)]
|
||||||
|
getTreebank ll = case ll of
|
||||||
|
[] -> []
|
||||||
|
l:ls ->
|
||||||
|
let (l1,l2) = getItem ls
|
||||||
|
(tr,lins) = getTree l1
|
||||||
|
lglins = getLins lins
|
||||||
|
in [(tr,lang,str) | (lang,str) <- lglins] ++ getTreebank l2
|
||||||
|
where
|
||||||
|
getItem = span ((/="</item") . take 6)
|
||||||
|
|
||||||
|
getTree (_:ss) = let (t1,t2) = span ((/="</tree") . take 6) ss in (last t1, drop 1 t2)
|
||||||
|
|
||||||
|
getLins (beg:str:end:ss) = (getLang beg, str):getLins ss
|
||||||
|
getLins _ = []
|
||||||
|
|
||||||
|
getLang = takeWhile (/='"') . tail . dropWhile (/='"')
|
||||||
|
|
||||||
|
annot :: StateGrammar -> String -> A.Tree
|
||||||
|
annot gr s = errVal (error "illegal tree") $ do
|
||||||
|
let t = tree2exp $ string2tree gr s
|
||||||
|
annotate (grammar gr) t
|
||||||
|
|
||||||
|
putInXML :: Options -> String -> String -> Res -> Res
|
||||||
|
putInXML opts tag attrs io =
|
||||||
|
(ifXML $ puts $ tagXML $ tag ++ attrs) ++
|
||||||
|
io ++
|
||||||
|
(ifXML $ puts $ tagXML $ '/':tag)
|
||||||
|
where
|
||||||
|
ifXML c = if oElem showXML opts then c else []
|
||||||
|
|
||||||
|
|
||||||
tagXML :: String -> String
|
tagXML :: String -> String
|
||||||
tagXML s = "<" ++ s ++ ">"
|
tagXML s = "<" ++ s ++ ">"
|
||||||
|
|||||||
11
src/HelpFile
11
src/HelpFile
@@ -201,11 +201,14 @@ at, apply_transfer: at (Module.Fun | Fun)
|
|||||||
p -lang=Cncdecimal "123" | at num2bin | l -- convert dec to bin
|
p -lang=Cncdecimal "123" | at num2bin | l -- convert dec to bin
|
||||||
|
|
||||||
tb, tree_bank: tb
|
tb, tree_bank: tb
|
||||||
Generate a multilingual treebank from a list of trees.
|
Generate a multilingual treebank from a list of trees (default) or compare
|
||||||
flags:
|
to an existing treebank.
|
||||||
-xml wrap the treebank with XML tags
|
options:
|
||||||
|
-c compare to existing xml-formatted treebank
|
||||||
|
-xml wrap the treebank (or comparison results) with XML tags
|
||||||
examples:
|
examples:
|
||||||
gr -cat=S -number=100 | tb
|
gr -cat=S -number=100 | tb -xml | wf tb.xml -- random treebank into file
|
||||||
|
rf tb.txt | tb -c -- read comparison treebank from file
|
||||||
|
|
||||||
tt, test_tokenizer: tt String
|
tt, test_tokenizer: tt String
|
||||||
Show the token list sent to the parser when String is parsed.
|
Show the token list sent to the parser when String is parsed.
|
||||||
|
|||||||
Reference in New Issue
Block a user