From a0849d8a5a54ef7321c291c0a6e55065f8cf0d5e Mon Sep 17 00:00:00 2001 From: Inari Listenmaa Date: Thu, 24 May 2018 22:36:23 +0200 Subject: [PATCH] (gftest) Add --show-context + combine -o with -f,-c,-b --- src/tools/gftest/Grammar.hs | 15 +++- src/tools/gftest/Main.hs | 137 ++++++++++++++++++----------- src/tools/gftest/README.md | 171 ++++++++++++++++++++++++++++++------ 3 files changed, 242 insertions(+), 81 deletions(-) diff --git a/src/tools/gftest/Grammar.hs b/src/tools/gftest/Grammar.hs index 4e6d0c6e9..0724987b2 100644 --- a/src/tools/gftest/Grammar.hs +++ b/src/tools/gftest/Grammar.hs @@ -9,7 +9,7 @@ module Grammar -- Categories, coercions , ccats, ccatOf, arity , coerces, uncoerce - , uncoerceAbsCat + , uncoerceAbsCat, mkCC -- Testing and comparison , testTree, testFun @@ -17,7 +17,7 @@ module Grammar , treesUsingFun -- Contexts - , contextsFor + , contextsFor, dummyHole -- FEAT , featIth, featCard @@ -327,6 +327,13 @@ toGrammar pgf langName = cseq2Either (I.SymCat x y) = Right (x,y) cseq2Either x = Left (show x) + +mkCC gr fid = CC ccat fid + where ccat = case [ cat | (cat,bg,end,_) <- concrCats gr + , fid `elem` [bg..end] ] of + [] -> Nothing -- means it's coercion + xs -> Just $ the xs + -- parsing and reading trees mkTree :: Grammar -> PGF2.Expr -> Tree mkTree gr = disambTree . ambTree @@ -983,9 +990,9 @@ testFun debug gr trans startcat funname = , let testcases_ctxs = catMaybes [ M.lookup cat cat_testcase_ctxs | cat <- cats ] , not $ null testcases_ctxs - , let fstLen = \(a,_) (b,_) -> length (flatten a) `compare` length (flatten b) + , let fstLen (a,_) (b,_) = length (flatten a) `compare` length (flatten b) , let (App tp subtrees,_) = -- pick smallest test case to be the representative - head $ sortBy fstLen testcases_ctxs + minimumBy fstLen testcases_ctxs , let newTop = -- debug: put coerced contexts under a separate test case if debug then tp { ctyp = (fst $ ctyp tp, coe)} else tp ] diff --git a/src/tools/gftest/Main.hs b/src/tools/gftest/Main.hs index afcf17830..d68d78457 100644 --- a/src/tools/gftest/Main.hs +++ b/src/tools/gftest/Main.hs @@ -32,6 +32,7 @@ data GfTest , show_funs :: Bool , funs_of_arity :: Maybe Int , show_coercions:: Bool + , show_contexts :: Maybe Int , concr_string :: String -- Information about fields @@ -69,6 +70,7 @@ gftest = GfTest , show_funs = def &= help "Show all available functions" , funs_of_arity = def &= A.typ "2" &= help "Show all functions of arity 2" , show_coercions= def &= help "Show coercions in the grammar" + , show_contexts = def &= A.typ "8410" &= help "Show contexts for a given concrete type (given as FId)" , debug = def &= help "Show debug output" , equal_fields = def &= A.name "q" &= help "Show fields whose strings are always identical" , empty_fields = def &= A.name "e" &= help "Show fields whose strings are always empty" @@ -103,7 +105,7 @@ main = do gr <- readGrammar langName grName grTrans <- sequence [ readGrammar lt grName | lt <- langTrans ] - -- in case the language given by the user was not valid, use some language that *is* in the grammar + -- if language given by the user was not valid, use default language from Grammar let langName = concrLang gr let startcat = startCat gr `fromMaybe` start_cat args @@ -143,39 +145,66 @@ main = do , xs@(_:_) <- [ S.toList vs ] ] ----------------------------------------------------------------------------- -- Testing functions - + -- Test a tree - case tree args of - [] -> return () - t -> output $ testTree' (readTree gr t) 1 + let trees = case tree args of + [] -> [] + ts -> lines ts + output $ + unlines [ testTree' (readTree gr tree) 1 | tree <- trees ] -- Test a function - case category args of - [] -> return () - cat -> output $ unlines - [ testTree' t n - | (t,n) <- treesUsingFun gr (functionsByCat gr cat) `zip` [1..]] + let substrs xs = filter (/="*") $ groupBy (\a b -> a/='*' && b/='*') xs + let cats = case category args of + [] -> [] + cs -> if '*' `elem` cs + then let subs = substrs cs + in nub [ cat | (cat,_,_,_) <- concrCats gr + , all (`isInfixOf` cat) subs ] + else words cs + output $ + unlines [ testTree' t n + | cat <- cats + , (t,n) <- treesUsingFun gr (functionsByCat gr cat) `zip` [1..]] -- Test all functions in a category - case function args of - [] -> return () - fs -> let funs = if '*' `elem` fs - then let subs = filter (/="*") $ groupBy (\a b -> a/='*' && b/='*') fs - in nub [ f | s <- symbols gr, let f = show s - , all (`isInfixOf` f) subs - , arity s >= 1 ] - else words fs - in output $ unlines - [ testFun (debug args) gr grTrans startcat f - | f <- funs ] + let funs = case function args of + [] -> [] + fs -> if '*' `elem` fs + then let subs = substrs fs + in nub [ f | s <- symbols gr, let f = show s + , all (`isInfixOf` f) subs + , arity s >= 1 ] + else words fs + output $ + unlines [ testFun (debug args) gr grTrans startcat f + | f <- funs ] ----------------------------------------------------------------------------- -- Information about the grammar + -- Show contexts for a particular concrete category + case show_contexts args of + Nothing -> return () + Just fid -> mapM_ print + [ ctx dummyHole + | start <- ccats gr startcat + , ctx <- contextsFor gr start (mkCC gr fid) ] + -- Show available categories when (show_cats args) $ do putStrLn "* Categories in the grammar:" - putStrLn $ unlines [ cat | (cat,_,_,_) <- concrCats gr ] + let concrcats = sortBy (\(_,a,_,_) (_,b,_,_) -> a `compare` b) (concrCats gr) + sequence_ [ do putStrLn cat + when (debug args) $ + putStrLn $ unwords $ + [ " Compiles to concrete" ] ++ + [ "categories " ++ show bg++"—"++show end + | bg/=end ] ++ + [ "category " ++ show bg + | bg==end ] + | (cat,bg,end,_) <- concrcats + , end >= 0] -- Show available functions when (show_funs args) $ do @@ -279,6 +308,19 @@ main = do putStrLn $ "* " ++ show (featIth gr start n 0) putStrLn $ "* " ++ show (featIth gr start n (i-1)) + +------------------------------------------------------------------------------- +-- Read trees from treebank. + + treebank' <- + case treebank args of + Nothing -> return [] + Just fp -> do + tb <- readFile fp + return [ readTree gr s + | s <- lines tb ] + mapM_ print treebank' + ------------------------------------------------------------------------------- -- Comparison with old grammar @@ -308,36 +350,44 @@ main = do [ appendFile ccatChangeFile $ unlines $ ("* All concrete cats in the "++age++" grammar:"): - [ show cats | cats <- concrCats g ] + [ show cts | cts <- concrCats g ] | (g,age) <- [(ogr,"old"),(gr,"new")] ] putStrLn $ "Created file " ++ ccatChangeFile -------------------------------------------------------------------------- - -- print out tests for all functions in the changed cats + -- Print out tests for all functions in the changed cats. + -- If -f, -c or --treebank specified, use them. + + let f cat = (cat, treesUsingFun gr $ functionsByCat gr cat) + + byCat = [ f cat | cat <- cats ] -- from command line arg -c + changed = [ f cat | (cat,_,_,_) <- difcats + , only_changed_cats args ] + byFun = [ (cat, treesUsingFun gr fs) + | funName <- funs -- comes from command line arg -f + , let fs@(s:_) = lookupSymbol gr funName + , let cat = snd $ Grammar.typ s ] + fromTb = [ (cat,[tree]) | tree <- treebank' + , let (CC (Just cat) _) = ccatOf tree ] + + treesToTest = + case concat [byFun, byCat, changed, fromTb] of + [] -> [ f cat -- nothing else specified -> test all functions + | (cat,_,_,_) <- concrCats gr ] + xs -> S.toList $ S.fromList xs - let changedFuns = - if only_changed_cats args - then [ (cat,functionsByCat gr cat) | (cat,_,_,_) <- difcats ] - else - case category args of - [] -> case function args of - [] -> [ (cat,functionsByCat gr cat) - | (cat,_,_,_) <- concrCats gr ] - fn -> [ (snd $ Grammar.typ f, [f]) - | f <- lookupSymbol gr fn ] - ct -> [ (ct,functionsByCat gr ct) ] writeLinFile file grammar otherGrammar = do writeFile file "" putStrLn "Testing functions in… " diff <- concat `fmap` sequence [ do let cs = [ compareTree grammar otherGrammar grTrans t - | t <- treesUsingFun grammar funs ] + | t <- trees ] putStr $ cat ++ " \r" -- prevent lazy evaluation; make printout accurate appendFile ("/tmp/"++file) (unwords $ map show cs) return cs - | (cat,funs) <- changedFuns ] + | (cat,trees) <- treesToTest ] let relevantDiff = go [] [] diff where go res seen [] = res go res seen (Comparison f ls:cs) = @@ -379,19 +429,6 @@ main = do putStrLn $ "Created files " ++ langName ++ "-(old|new)-funs.org" -------------------------------------------------------------------------------- --- Read trees from treebank. No fancier functionality yet. - - case treebank args of - Nothing -> return () - Just fp -> do - tb <- readFile fp - sequence_ [ do let tree = readTree gr str - ccat = ccatOf tree - putStrLn $ unlines [ "", showTree tree ++ " : " ++ show ccat] - putStrLn $ linearize gr tree - | str <- lines tb ] - where diff --git a/src/tools/gftest/README.md b/src/tools/gftest/README.md index 5134300fd..beecaf191 100644 --- a/src/tools/gftest/README.md +++ b/src/tools/gftest/README.md @@ -27,9 +27,14 @@ document, as well as the full list of options to give to `gftest`. - [Empty or always identical fields: `-e`, `-q`](#empty-or-always-identical-fields--e--q) - [Unused fields: `-u`](#unused-fields--u) - [Erased trees: `-r`](#erased-trees--r) + - [Debug information: `-d`](#debug-intormation--d) +- [Detailed information about the grammar](#detailed-information-about-the-grammar) + - [--show-cats](#--show-cats) + - [--show-funs](#--show-funs) - [--show-coercions](#--show-coercions) + - [--show-contexts](#--show-contexts) - [--count-trees](#--count-trees) - + - [--funs-of-arity](#--funs-of-arity) ## Installation @@ -71,6 +76,7 @@ Common flags: --show-funs Show all available functions --funs-of-arity=2 Show all functions of arity 2 --show-coercions Show coercions in the grammar + --show-contexts=8410 Show contexts for a given concrete type (given as FId) --concr-string=the Show all functions that include given string -q --equal-fields Show fields whose strings are always identical -e --empty-fields Show fields whose strings are always empty @@ -188,8 +194,9 @@ then you can call the following: Give a grammar, a concrete syntax, and an old version of the same grammar as a separate PGF file. The program generates test sentences -for all functions, linearises with both grammars, and outputs those -that differ between the versions. It writes the differences into files. +for all functions (if no other arguments), linearises with both +grammars, and outputs those that differ between the versions. It +writes the differences into files. Example: @@ -206,20 +213,20 @@ Created files TestLangEng-(old|new)-funs.org changed. Shows e.g. if you added or removed a parameter or a field. -* TestLangEng-lin-diff.org: All trees that have different -linearisations in the following format. **This is usually the most -relevant file.** +* **TestLangEng-lin-diff.org** (usually the most relevant file): All +trees that have different linearisations in the following format. + ``` -* send_V3 + * send_V3 -** UseCl (TTAnt TPres ASimul) PPos (PredVP (UsePron we_Pron) (ReflVP (Slash3V3 ∅ (UsePron it_Pron)))) -TestLangDut> we sturen onszelf ernaar -TestLangDut-OLD> we sturen zichzelf ernaar + ** UseCl (TTAnt TPres ASimul) PPos (PredVP (UsePron we_Pron) (ReflVP (Slash3V3 ∅ (UsePron it_Pron)))) + TestLangDut> we sturen onszelf ernaar + TestLangDut-OLD> we sturen zichzelf ernaar -** UseCl (TTAnt TPast ASimul) PPos (PredVP (UsePron we_Pron) (ReflVP (Slash3V3 ∅ (UsePron it_Pron)))) -TestLangDut> we stuurden onszelf ernaar -TestLangDut-OLD> we stuurden zichzelf ernaar + ** UseCl (TTAnt TPast ASimul) PPos (PredVP (UsePron we_Pron) (ReflVP (Slash3V3 ∅ (UsePron it_Pron)))) + TestLangDut> we stuurden onszelf ernaar + TestLangDut-OLD> we stuurden zichzelf ernaar ``` * TestLangEng-old-funs.org and TestLangEng-new-funs.org: groups the @@ -227,24 +234,43 @@ TestLangDut-OLD> we stuurden zichzelf ernaar e.g. added or removed parameters, and that has created new versions of some functions: say you didn't have gender in nouns, but now you have, then all functions taking nouns have suddenly a gendered - version. **This is kind of hard to read, don't worry too much if the - output doesn't make any sense.** + version. (This is kind of hard to read, don't worry too much if the + output doesn't make any sense.) -You can give an additional parameter, `--only-changed-cats`, if you -only want to test functions in those categories that you have changed, -like this: `gftest -g TestLang -l Eng -o TestLangOld ---only-changed-cats`. This makes it run faster. +#### Additional arguments to `-o` + +The default mode is to test all functions, but you can also give any +combination of `-s`, `-f`, `-c`, `--treebank`/`-b` and `--only-changed-cats`. + +With `-s`, you can change the start category in which contexts are +generated. + +With `-f` and `-c`, it tests only the specified functions and +categories. +With `-b FILEPATH` (`-b`=`--treebank`), it tests only the trees in the file. + +With `--only-changed-cats`, it only test functions in those categories +that have changed between the two versions. + +Examples: + +* `gftest -g TestLang -l Eng -o TestLangOld` tests all functions +* `gftest -g TestLang -l Eng -o TestLangOld -s S` tests all functions in start category S +* `gftest -g TestLang -l Eng -o TestLangOld --only-changed-cats` tests only changed categories. If no categories have changed (and no other arguments specified), tests everything. +* `gftest -g TestLang -l Eng -o TestLangOld -f "AdjCN AdvCN" -c Adv -b trees.txt` tests functions, `AdjCN` and `AdvCN`; same for all functions that produce an `Adv`, and all trees in trees.txt. ### Information about a particular string: `--concr-string` -Show all functions where the given concrete string appears as syncategorematic string (i.e. not from the arguments). +Show all functions that introduce the string given as an argument. Example: -* `gftest -l Eng --concr-string it` +* `gftest -g Lang -l Eng --concr-string it` which gives the answer `==> CleftAdv, CleftNP, DefArt, ImpersCl, it_Pron` +(Note that you have the same feature in GF shell, command `morpho_analyse`/`ma`.) + ### Write into a file: `-w` @@ -353,10 +379,11 @@ Show trees that are erased in some function, i.e. a function `F : A -> B -> C` h Example: -`gftest -g Lang -l "Dut Eng" -r` -output: + ``` +> gftest -g Lang -l "Dut Eng" -r + * Erased trees: ** RelCl (ExistNP something_NP) : RCl @@ -372,15 +399,56 @@ output: In the first result, an argument of type `RCl` is missing in the tree constructed by `RelNP`, and in the second result, the argument `write_V2` is missing in the tree constructed by `PPartNP`. In both cases, the English linearisation contains all the arguments, but in the Dutch one they are missing. (This bug is already fixed, just showing it here to demonstrate the feature.) +## Detailed information about the grammar + +### Debug information: `-d` + +When combined with `-f`, `-c` or `-t`, two things happen: + +1) The trees are linearised using `tabularLinearize`, which shows the +inflection table of all forms. +2) You can see traces of pruning that happens in testing functions: +contexts that are common to several concrete categories are put under +a separate test case. + +When combined with `--show-cats`, also the concrete categories are +shown. + +### --show-cats + +Shows the categories in the grammar. With `--debug`/`-d`, shows also +concrete categories. + +Example: + +``` +> gftest -g Foods -l Spa --show-cats -d + +* Categories in the grammar: +Comment + Compiles to concrete category 0 +Item + Compiles to concrete categories 1—4 +Kind + Compiles to concrete categories 5—6 +Quality + Compiles to concrete categories 7—8 +Question + Compiles to concrete category 9 +``` + +### --show-funs + +Shows the functions in the grammar. (Nothing fancy happens with other flags.) + ### --show-coercions First I'll explain what *coercions* are, then why it may be interesting to show them. Let's take a Spanish Foods grammar, and -consider the category `Quality`—those `Good Pizza` and `Vegan Pizza` -that you saw in the previous section. `Good` -"bueno/buena/buenos/buenas" goes before the noun it modifies, whereas -`Vegan` "vegano/vegana/…" goes after, so these will become different +consider the category `Quality`, e.g. `Good` and `Vegan`. +`Good` "bueno/buena/buenos/buenas" goes before the noun it modifies, +whereas `Vegan` "vegano/vegana/…" goes after, so these will become different *concrete categories* in the PGF: `Quality_before` and `Quality_after`. (In reality, they are something like `Quality_7` and `Quality_8` though.) @@ -406,6 +474,55 @@ Quality_8--->_11 (Just mentally replace 7 with `before`, 8 with `after` and 11 with `whatever`.) +### --show-contexts + +Show contexts for a given concrete category, given as an FId +(i.e. Int). The concrete category may be a coercion or a normal +category. By combining with [`-s`](#start-category-for-context--s), +you can change the start category of the context. + +(You can get a list of all concrete categories by pairing `--show-cats` +with `--debug`: see [`--show-cats`](#--show-cats).) + +Examples: + +* First, find out some concrete categories: + +``` + > gftest -g Foods -l Spa --show-cats -d + … + Quality + Compiles to concrete categories 7—8 + … +``` + +* Then, list the contexts for some of them, say `Quality_7`: + +``` + > gftest -g Foods -l Spa --show-contexts 7 + + Pred (That (Mod ∅ Wine)) Vegan + Pred (That Wine) ∅ + Pred (These (Mod ∅ Wine)) Vegan + Pred (These Wine) ∅ + Pred (That (Mod ∅ Pizza)) Vegan + Pred (That Pizza) ∅ + Pred (These (Mod ∅ Pizza)) Vegan + Pred (These Pizza) ∅ +``` + +* Check out from [`--show-coercions`](#--show-coercions) how to find +coercions, and you can try `--show-contexts` with them: + +``` + > gftest -g Foods -l Spa --show-contexts 11 + + Pred (That Wine) ∅ + Pred (These Wine) ∅ + Pred (That Pizza) ∅ + Pred (These Pizza) ∅ +``` + ### --count-trees Number of trees up to given size. Gives a number how many trees, and a