diff --git a/doc/gf-history.html b/doc/gf-history.html index b7b83918c..620688f22 100644 --- a/doc/gf-history.html +++ b/doc/gf-history.html @@ -14,6 +14,15 @@ Changes in functionality since May 17, 2005, release of GF Version 2.2

+8/2 (AR) The command tb = tree_bank for creating and testing against +multilingual treebanks. Example uses: +

+  gr -cat=S -number=100 | tb -xml | wf tb.xml -- random treebank into file
+  rf tb.txt | tb -c                           -- read comparison treebank from file
+
+ +

+ 10/1 (AR) Forbade variable binding inside negation and Kleene star patterns. diff --git a/src/GF/Shell.hs b/src/GF/Shell.hs index 150b756c1..1c7f4527e 100644 --- a/src/GF/Shell.hs +++ b/src/GF/Shell.hs @@ -290,10 +290,13 @@ execC co@(comm, opts0) sa@(sh@(st,(h,_,_,_)),a) = checkOptions st co >> case com _ -> Nothing returnArg (ATrms $ generateTrees opts gro mt) sa + CTreeBank | oElem doCompute opts -> do -- -c + let bank = prCommandArg a + returnArg (AString $ unlines $ testTreebank opts st bank) sa CTreeBank -> do let ts = strees $ s2t $ snd sa comm = "command" ---- - justOutput opts (mkTreebank opts st comm ts) sa + returnArg (AString $ unlines $ mkTreebank opts st comm ts) sa CShowTreeGraph | oElem emitCode opts -> do -- -o returnArg (AString $ visualizeTrees opts $ strees $ s2t a) sa diff --git a/src/GF/Shell/HelpFile.hs b/src/GF/Shell/HelpFile.hs index 14c87e621..5764e9f14 100644 --- a/src/GF/Shell/HelpFile.hs +++ b/src/GF/Shell/HelpFile.hs @@ -230,11 +230,14 @@ txtHelpFile = "\n p -lang=Cncdecimal \"123\" | at num2bin | l -- convert dec to bin" ++ "\n" ++ "\ntb, tree_bank: tb" ++ - "\n Generate a multilingual treebank from a list of trees." ++ - "\n flags:" ++ - "\n -xml wrap the treebank with XML tags" ++ + "\n Generate a multilingual treebank from a list of trees (default) or compare" ++ + "\n to an existing treebank." ++ + "\n options:" ++ + "\n -c compare to existing xml-formatted treebank" ++ + "\n -xml wrap the treebank (or comparison results) with XML tags" ++ "\n examples:" ++ - "\n gr -cat=S -number=100 | tb" ++ + "\n gr -cat=S -number=100 | tb -xml | wf tb.xml -- random treebank into file" ++ + "\n rf tb.txt | tb -c -- read comparison treebank from file" ++ "\n" ++ "\ntt, test_tokenizer: tt String" ++ "\n Show the token list sent to the parser when String is parsed." ++ diff --git a/src/GF/Shell/ShellCommands.hs b/src/GF/Shell/ShellCommands.hs index 935b0be09..7d10ef882 100644 --- a/src/GF/Shell/ShellCommands.hs +++ b/src/GF/Shell/ShellCommands.hs @@ -183,7 +183,7 @@ optionsOfCommand co = case co of CGenerateRandom -> both "cf prob" "cat lang number depth" CGenerateTrees -> both "metas" "atoms depth alts cat lang number" CPutTerm -> flags "transform number" - CTreeBank -> opts "xml" + CTreeBank -> opts "c xml" CWrapTerm _ -> opts "c" CApplyTransfer _ -> flags "lang transfer" CMorphoAnalyse -> both "short" "lang" diff --git a/src/GF/UseGrammar/Treebank.hs b/src/GF/UseGrammar/Treebank.hs index 838e23597..99f0da281 100644 --- a/src/GF/UseGrammar/Treebank.hs +++ b/src/GF/UseGrammar/Treebank.hs @@ -12,12 +12,13 @@ -- Purpose: to generate treebanks. ----------------------------------------------------------------------------- -module GF.UseGrammar.Treebank (mkTreebank) where +module GF.UseGrammar.Treebank (mkTreebank,testTreebank) where - -import GF.Compile.ShellState (ShellState,grammar2shellState,canModules,stateGrammarOfLang,abstract,grammar,firstStateGrammar,allLanguages,allCategories) +import GF.Compile.ShellState import GF.UseGrammar.Linear (linTree2string) import GF.UseGrammar.Custom +import GF.UseGrammar.GetTree (string2tree) +import GF.Grammar.TypeCheck (annotate) import GF.Canon.CMacros (noMark) import GF.Grammar.Grammar (Trm) import GF.Grammar.MMacros (exp2tree) @@ -31,13 +32,14 @@ import qualified GF.Grammar.Abstract as A -- Generate a treebank with a multilingual grammar. AR 8/2/2006 -- (c) Aarne Ranta 2006 under GNU GPL --- | the main function -mkTreebank :: Options -> ShellState -> String -> [A.Tree] -> IO () -mkTreebank opts sh com trees = putInXML opts "treebank" comm(mapM_ mkItem tris) +-- | the main functions +mkTreebank :: Options -> ShellState -> String -> [A.Tree] -> Res +mkTreebank opts sh com trees = putInXML opts "treebank" comm (concatMap mkItem tris) where - mkItem(t,i)= putInXML opts "item" (cat i) (mkTree t >>mapM_ (mkLin t) langs) - mkTree t = putInXML opts "tree" [] (putStrLn $ showTree t) - mkLin t lg = putInXML opts "lin" (lang lg) (putStrLn $ linearize sh lg t) + mkItem(t,i)= putInXML opts "item" (cat i) (mkTree t ++ concatMap (mkLin t) langs) +-- mkItem(t,i)= putInXML opts "item" (cat i) (mkTree t >>mapM_ (mkLin t) langs) + mkTree t = putInXML opts "tree" [] (puts $ showTree t) + mkLin t lg = putInXML opts "lin" (lang lg) (puts $ linearize sh lg t) langs = [prt_ l | l <- allLanguages sh] comm = "" --- " command=" ++ show com +++ "abstract=" ++ show abstr @@ -46,14 +48,56 @@ mkTreebank opts sh com trees = putInXML opts "treebank" comm(mapM_ mkItem tris) lang lg = " lang=" ++ show (prt_ (zIdent lg)) tris = zip trees [1..] - -putInXML :: Options -> String -> String -> IO () -> IO () -putInXML opts tag attrs io = do - ifXML $ putStrLn $ tagXML $ tag ++ attrs - io - ifXML $ putStrLn $ tagXML $ '/':tag +testTreebank :: Options -> ShellState -> String -> Res +testTreebank opts sh = putInXML opts "diff" [] . concatMap testOne . getTreebank . lines where - ifXML c = if oElem showXML opts then c else return () + testOne (e,lang,str) = do + let tr = annot gr e + let str0 = linearize sh lang tr + if str == str0 then ret else putInXML opts "diff" [] $ do + putInXML opts "tree" [] (puts $ showTree tr) + putInXML opts "old" (" lang=" ++ show (prt_ (zIdent lang))) $ puts str0 + putInXML opts "new" (" lang=" ++ show (prt_ (zIdent lang))) $ puts str + gr = firstStateGrammar sh + +-- string vs. IO +type Res = [String] -- IO () +puts :: String -> Res +puts = return -- putStrLn +ret = [] -- return () +-- + +getTreebank :: [String] -> [(String,String,String)] +getTreebank ll = case ll of + [] -> [] + l:ls -> + let (l1,l2) = getItem ls + (tr,lins) = getTree l1 + lglins = getLins lins + in [(tr,lang,str) | (lang,str) <- lglins] ++ getTreebank l2 + where + getItem = span ((/=" String -> A.Tree +annot gr s = errVal (error "illegal tree") $ do + let t = tree2exp $ string2tree gr s + annotate (grammar gr) t + +putInXML :: Options -> String -> String -> Res -> Res +putInXML opts tag attrs io = + (ifXML $ puts $ tagXML $ tag ++ attrs) ++ + io ++ + (ifXML $ puts $ tagXML $ '/':tag) + where + ifXML c = if oElem showXML opts then c else [] + tagXML :: String -> String tagXML s = "<" ++ s ++ ">" diff --git a/src/HelpFile b/src/HelpFile index 8b54e5585..0f6a12d8b 100644 --- a/src/HelpFile +++ b/src/HelpFile @@ -201,11 +201,14 @@ at, apply_transfer: at (Module.Fun | Fun) p -lang=Cncdecimal "123" | at num2bin | l -- convert dec to bin tb, tree_bank: tb - Generate a multilingual treebank from a list of trees. - flags: - -xml wrap the treebank with XML tags + Generate a multilingual treebank from a list of trees (default) or compare + to an existing treebank. + options: + -c compare to existing xml-formatted treebank + -xml wrap the treebank (or comparison results) with XML tags examples: - gr -cat=S -number=100 | tb + gr -cat=S -number=100 | tb -xml | wf tb.xml -- random treebank into file + rf tb.txt | tb -c -- read comparison treebank from file tt, test_tokenizer: tt String Show the token list sent to the parser when String is parsed.