1
0
forked from GitHub/gf-core

(gftest) Add --show-context + combine -o with -f,-c,-b

This commit is contained in:
Inari Listenmaa
2018-05-24 22:36:23 +02:00
parent 19c6090e85
commit a0849d8a5a
3 changed files with 242 additions and 81 deletions

View File

@@ -9,7 +9,7 @@ module Grammar
-- Categories, coercions
, ccats, ccatOf, arity
, coerces, uncoerce
, uncoerceAbsCat
, uncoerceAbsCat, mkCC
-- Testing and comparison
, testTree, testFun
@@ -17,7 +17,7 @@ module Grammar
, treesUsingFun
-- Contexts
, contextsFor
, contextsFor, dummyHole
-- FEAT
, featIth, featCard
@@ -327,6 +327,13 @@ toGrammar pgf langName =
cseq2Either (I.SymCat x y) = Right (x,y)
cseq2Either x = Left (show x)
mkCC gr fid = CC ccat fid
where ccat = case [ cat | (cat,bg,end,_) <- concrCats gr
, fid `elem` [bg..end] ] of
[] -> Nothing -- means it's coercion
xs -> Just $ the xs
-- parsing and reading trees
mkTree :: Grammar -> PGF2.Expr -> Tree
mkTree gr = disambTree . ambTree
@@ -983,9 +990,9 @@ testFun debug gr trans startcat funname =
, let testcases_ctxs = catMaybes [ M.lookup cat cat_testcase_ctxs
| cat <- cats ]
, not $ null testcases_ctxs
, let fstLen = \(a,_) (b,_) -> length (flatten a) `compare` length (flatten b)
, let fstLen (a,_) (b,_) = length (flatten a) `compare` length (flatten b)
, let (App tp subtrees,_) = -- pick smallest test case to be the representative
head $ sortBy fstLen testcases_ctxs
minimumBy fstLen testcases_ctxs
, let newTop = -- debug: put coerced contexts under a separate test case
if debug then tp { ctyp = (fst $ ctyp tp, coe)} else tp
]

View File

@@ -32,6 +32,7 @@ data GfTest
, show_funs :: Bool
, funs_of_arity :: Maybe Int
, show_coercions:: Bool
, show_contexts :: Maybe Int
, concr_string :: String
-- Information about fields
@@ -69,6 +70,7 @@ gftest = GfTest
, show_funs = def &= help "Show all available functions"
, funs_of_arity = def &= A.typ "2" &= help "Show all functions of arity 2"
, show_coercions= def &= help "Show coercions in the grammar"
, show_contexts = def &= A.typ "8410" &= help "Show contexts for a given concrete type (given as FId)"
, debug = def &= help "Show debug output"
, equal_fields = def &= A.name "q" &= help "Show fields whose strings are always identical"
, empty_fields = def &= A.name "e" &= help "Show fields whose strings are always empty"
@@ -103,7 +105,7 @@ main = do
gr <- readGrammar langName grName
grTrans <- sequence [ readGrammar lt grName | lt <- langTrans ]
-- in case the language given by the user was not valid, use some language that *is* in the grammar
-- if language given by the user was not valid, use default language from Grammar
let langName = concrLang gr
let startcat = startCat gr `fromMaybe` start_cat args
@@ -143,39 +145,66 @@ main = do
, xs@(_:_) <- [ S.toList vs ] ]
-----------------------------------------------------------------------------
-- Testing functions
-- Test a tree
case tree args of
[] -> return ()
t -> output $ testTree' (readTree gr t) 1
let trees = case tree args of
[] -> []
ts -> lines ts
output $
unlines [ testTree' (readTree gr tree) 1 | tree <- trees ]
-- Test a function
case category args of
[] -> return ()
cat -> output $ unlines
[ testTree' t n
| (t,n) <- treesUsingFun gr (functionsByCat gr cat) `zip` [1..]]
let substrs xs = filter (/="*") $ groupBy (\a b -> a/='*' && b/='*') xs
let cats = case category args of
[] -> []
cs -> if '*' `elem` cs
then let subs = substrs cs
in nub [ cat | (cat,_,_,_) <- concrCats gr
, all (`isInfixOf` cat) subs ]
else words cs
output $
unlines [ testTree' t n
| cat <- cats
, (t,n) <- treesUsingFun gr (functionsByCat gr cat) `zip` [1..]]
-- Test all functions in a category
case function args of
[] -> return ()
fs -> let funs = if '*' `elem` fs
then let subs = filter (/="*") $ groupBy (\a b -> a/='*' && b/='*') fs
in nub [ f | s <- symbols gr, let f = show s
, all (`isInfixOf` f) subs
, arity s >= 1 ]
else words fs
in output $ unlines
[ testFun (debug args) gr grTrans startcat f
| f <- funs ]
let funs = case function args of
[] -> []
fs -> if '*' `elem` fs
then let subs = substrs fs
in nub [ f | s <- symbols gr, let f = show s
, all (`isInfixOf` f) subs
, arity s >= 1 ]
else words fs
output $
unlines [ testFun (debug args) gr grTrans startcat f
| f <- funs ]
-----------------------------------------------------------------------------
-- Information about the grammar
-- Show contexts for a particular concrete category
case show_contexts args of
Nothing -> return ()
Just fid -> mapM_ print
[ ctx dummyHole
| start <- ccats gr startcat
, ctx <- contextsFor gr start (mkCC gr fid) ]
-- Show available categories
when (show_cats args) $ do
putStrLn "* Categories in the grammar:"
putStrLn $ unlines [ cat | (cat,_,_,_) <- concrCats gr ]
let concrcats = sortBy (\(_,a,_,_) (_,b,_,_) -> a `compare` b) (concrCats gr)
sequence_ [ do putStrLn cat
when (debug args) $
putStrLn $ unwords $
[ " Compiles to concrete" ] ++
[ "categories " ++ show bg++""++show end
| bg/=end ] ++
[ "category " ++ show bg
| bg==end ]
| (cat,bg,end,_) <- concrcats
, end >= 0]
-- Show available functions
when (show_funs args) $ do
@@ -279,6 +308,19 @@ main = do
putStrLn $ "* " ++ show (featIth gr start n 0)
putStrLn $ "* " ++ show (featIth gr start n (i-1))
-------------------------------------------------------------------------------
-- Read trees from treebank.
treebank' <-
case treebank args of
Nothing -> return []
Just fp -> do
tb <- readFile fp
return [ readTree gr s
| s <- lines tb ]
mapM_ print treebank'
-------------------------------------------------------------------------------
-- Comparison with old grammar
@@ -308,36 +350,44 @@ main = do
[ appendFile ccatChangeFile $
unlines $
("* All concrete cats in the "++age++" grammar:"):
[ show cats | cats <- concrCats g ]
[ show cts | cts <- concrCats g ]
| (g,age) <- [(ogr,"old"),(gr,"new")] ]
putStrLn $ "Created file " ++ ccatChangeFile
--------------------------------------------------------------------------
-- print out tests for all functions in the changed cats
-- Print out tests for all functions in the changed cats.
-- If -f, -c or --treebank specified, use them.
let f cat = (cat, treesUsingFun gr $ functionsByCat gr cat)
byCat = [ f cat | cat <- cats ] -- from command line arg -c
changed = [ f cat | (cat,_,_,_) <- difcats
, only_changed_cats args ]
byFun = [ (cat, treesUsingFun gr fs)
| funName <- funs -- comes from command line arg -f
, let fs@(s:_) = lookupSymbol gr funName
, let cat = snd $ Grammar.typ s ]
fromTb = [ (cat,[tree]) | tree <- treebank'
, let (CC (Just cat) _) = ccatOf tree ]
treesToTest =
case concat [byFun, byCat, changed, fromTb] of
[] -> [ f cat -- nothing else specified -> test all functions
| (cat,_,_,_) <- concrCats gr ]
xs -> S.toList $ S.fromList xs
let changedFuns =
if only_changed_cats args
then [ (cat,functionsByCat gr cat) | (cat,_,_,_) <- difcats ]
else
case category args of
[] -> case function args of
[] -> [ (cat,functionsByCat gr cat)
| (cat,_,_,_) <- concrCats gr ]
fn -> [ (snd $ Grammar.typ f, [f])
| f <- lookupSymbol gr fn ]
ct -> [ (ct,functionsByCat gr ct) ]
writeLinFile file grammar otherGrammar = do
writeFile file ""
putStrLn "Testing functions in… "
diff <- concat `fmap`
sequence [ do let cs = [ compareTree grammar otherGrammar grTrans t
| t <- treesUsingFun grammar funs ]
| t <- trees ]
putStr $ cat ++ " \r"
-- prevent lazy evaluation; make printout accurate
appendFile ("/tmp/"++file) (unwords $ map show cs)
return cs
| (cat,funs) <- changedFuns ]
| (cat,trees) <- treesToTest ]
let relevantDiff = go [] [] diff where
go res seen [] = res
go res seen (Comparison f ls:cs) =
@@ -379,19 +429,6 @@ main = do
putStrLn $ "Created files " ++ langName ++ "-(old|new)-funs.org"
-------------------------------------------------------------------------------
-- Read trees from treebank. No fancier functionality yet.
case treebank args of
Nothing -> return ()
Just fp -> do
tb <- readFile fp
sequence_ [ do let tree = readTree gr str
ccat = ccatOf tree
putStrLn $ unlines [ "", showTree tree ++ " : " ++ show ccat]
putStrLn $ linearize gr tree
| str <- lines tb ]
where

View File

@@ -27,9 +27,14 @@ document, as well as the full list of options to give to `gftest`.
- [Empty or always identical fields: `-e`, `-q`](#empty-or-always-identical-fields--e--q)
- [Unused fields: `-u`](#unused-fields--u)
- [Erased trees: `-r`](#erased-trees--r)
- [Debug information: `-d`](#debug-intormation--d)
- [Detailed information about the grammar](#detailed-information-about-the-grammar)
- [--show-cats](#--show-cats)
- [--show-funs](#--show-funs)
- [--show-coercions](#--show-coercions)
- [--show-contexts](#--show-contexts)
- [--count-trees](#--count-trees)
- [--funs-of-arity](#--funs-of-arity)
## Installation
@@ -71,6 +76,7 @@ Common flags:
--show-funs Show all available functions
--funs-of-arity=2 Show all functions of arity 2
--show-coercions Show coercions in the grammar
--show-contexts=8410 Show contexts for a given concrete type (given as FId)
--concr-string=the Show all functions that include given string
-q --equal-fields Show fields whose strings are always identical
-e --empty-fields Show fields whose strings are always empty
@@ -188,8 +194,9 @@ then you can call the following:
Give a grammar, a concrete syntax, and an old version of the same
grammar as a separate PGF file. The program generates test sentences
for all functions, linearises with both grammars, and outputs those
that differ between the versions. It writes the differences into files.
for all functions (if no other arguments), linearises with both
grammars, and outputs those that differ between the versions. It
writes the differences into files.
Example:
@@ -206,20 +213,20 @@ Created files TestLangEng-(old|new)-funs.org
changed. Shows e.g. if you added or removed a parameter or a
field.
* TestLangEng-lin-diff.org: All trees that have different
linearisations in the following format. **This is usually the most
relevant file.**
* **TestLangEng-lin-diff.org** (usually the most relevant file): All
trees that have different linearisations in the following format.
```
* send_V3
* send_V3
** UseCl (TTAnt TPres ASimul) PPos (PredVP (UsePron we_Pron) (ReflVP (Slash3V3 ∅ (UsePron it_Pron))))
TestLangDut> we sturen onszelf ernaar
TestLangDut-OLD> we sturen zichzelf ernaar
** UseCl (TTAnt TPres ASimul) PPos (PredVP (UsePron we_Pron) (ReflVP (Slash3V3 ∅ (UsePron it_Pron))))
TestLangDut> we sturen onszelf ernaar
TestLangDut-OLD> we sturen zichzelf ernaar
** UseCl (TTAnt TPast ASimul) PPos (PredVP (UsePron we_Pron) (ReflVP (Slash3V3 ∅ (UsePron it_Pron))))
TestLangDut> we stuurden onszelf ernaar
TestLangDut-OLD> we stuurden zichzelf ernaar
** UseCl (TTAnt TPast ASimul) PPos (PredVP (UsePron we_Pron) (ReflVP (Slash3V3 ∅ (UsePron it_Pron))))
TestLangDut> we stuurden onszelf ernaar
TestLangDut-OLD> we stuurden zichzelf ernaar
```
* TestLangEng-old-funs.org and TestLangEng-new-funs.org: groups the
@@ -227,24 +234,43 @@ TestLangDut-OLD> we stuurden zichzelf ernaar
e.g. added or removed parameters, and that has created new versions of
some functions: say you didn't have gender in nouns, but now you
have, then all functions taking nouns have suddenly a gendered
version. **This is kind of hard to read, don't worry too much if the
output doesn't make any sense.**
version. (This is kind of hard to read, don't worry too much if the
output doesn't make any sense.)
You can give an additional parameter, `--only-changed-cats`, if you
only want to test functions in those categories that you have changed,
like this: `gftest -g TestLang -l Eng -o TestLangOld
--only-changed-cats`. This makes it run faster.
#### Additional arguments to `-o`
The default mode is to test all functions, but you can also give any
combination of `-s`, `-f`, `-c`, `--treebank`/`-b` and `--only-changed-cats`.
With `-s`, you can change the start category in which contexts are
generated.
With `-f` and `-c`, it tests only the specified functions and
categories.
With `-b FILEPATH` (`-b`=`--treebank`), it tests only the trees in the file.
With `--only-changed-cats`, it only test functions in those categories
that have changed between the two versions.
Examples:
* `gftest -g TestLang -l Eng -o TestLangOld` tests all functions
* `gftest -g TestLang -l Eng -o TestLangOld -s S` tests all functions in start category S
* `gftest -g TestLang -l Eng -o TestLangOld --only-changed-cats` tests only changed categories. If no categories have changed (and no other arguments specified), tests everything.
* `gftest -g TestLang -l Eng -o TestLangOld -f "AdjCN AdvCN" -c Adv -b trees.txt` tests functions, `AdjCN` and `AdvCN`; same for all functions that produce an `Adv`, and all trees in trees.txt.
### Information about a particular string: `--concr-string`
Show all functions where the given concrete string appears as syncategorematic string (i.e. not from the arguments).
Show all functions that introduce the string given as an argument.
Example:
* `gftest -l Eng --concr-string it`
* `gftest -g Lang -l Eng --concr-string it`
which gives the answer `==> CleftAdv, CleftNP, DefArt, ImpersCl, it_Pron`
(Note that you have the same feature in GF shell, command `morpho_analyse`/`ma`.)
### Write into a file: `-w`
@@ -353,10 +379,11 @@ Show trees that are erased in some function, i.e. a function `F : A -> B -> C` h
Example:
`gftest -g Lang -l "Dut Eng" -r`
output:
```
> gftest -g Lang -l "Dut Eng" -r
* Erased trees:
** RelCl (ExistNP something_NP) : RCl
@@ -372,15 +399,56 @@ output:
In the first result, an argument of type `RCl` is missing in the tree constructed by `RelNP`, and in the second result, the argument `write_V2` is missing in the tree constructed by `PPartNP`. In both cases, the English linearisation contains all the arguments, but in the Dutch one they are missing. (This bug is already fixed, just showing it here to demonstrate the feature.)
## Detailed information about the grammar
### Debug information: `-d`
When combined with `-f`, `-c` or `-t`, two things happen:
1) The trees are linearised using `tabularLinearize`, which shows the
inflection table of all forms.
2) You can see traces of pruning that happens in testing functions:
contexts that are common to several concrete categories are put under
a separate test case.
When combined with `--show-cats`, also the concrete categories are
shown.
### --show-cats
Shows the categories in the grammar. With `--debug`/`-d`, shows also
concrete categories.
Example:
```
> gftest -g Foods -l Spa --show-cats -d
* Categories in the grammar:
Comment
Compiles to concrete category 0
Item
Compiles to concrete categories 1—4
Kind
Compiles to concrete categories 5—6
Quality
Compiles to concrete categories 7—8
Question
Compiles to concrete category 9
```
### --show-funs
Shows the functions in the grammar. (Nothing fancy happens with other flags.)
### --show-coercions
First I'll explain what *coercions* are, then why it may be
interesting to show them. Let's take a Spanish Foods grammar, and
consider the category `Quality`—those `Good Pizza` and `Vegan Pizza`
that you saw in the previous section. `Good`
"bueno/buena/buenos/buenas" goes before the noun it modifies, whereas
`Vegan` "vegano/vegana/…" goes after, so these will become different
consider the category `Quality`, e.g. `Good` and `Vegan`.
`Good` "bueno/buena/buenos/buenas" goes before the noun it modifies,
whereas `Vegan` "vegano/vegana/…" goes after, so these will become different
*concrete categories* in the PGF: `Quality_before` and
`Quality_after`. (In reality, they are something like `Quality_7` and
`Quality_8` though.)
@@ -406,6 +474,55 @@ Quality_8--->_11
(Just mentally replace 7 with `before`, 8 with `after` and 11 with `whatever`.)
### --show-contexts
Show contexts for a given concrete category, given as an FId
(i.e. Int). The concrete category may be a coercion or a normal
category. By combining with [`-s`](#start-category-for-context--s),
you can change the start category of the context.
(You can get a list of all concrete categories by pairing `--show-cats`
with `--debug`: see [`--show-cats`](#--show-cats).)
Examples:
* First, find out some concrete categories:
```
> gftest -g Foods -l Spa --show-cats -d
Quality
Compiles to concrete categories 7—8
```
* Then, list the contexts for some of them, say `Quality_7`:
```
> gftest -g Foods -l Spa --show-contexts 7
Pred (That (Mod ∅ Wine)) Vegan
Pred (That Wine) ∅
Pred (These (Mod ∅ Wine)) Vegan
Pred (These Wine) ∅
Pred (That (Mod ∅ Pizza)) Vegan
Pred (That Pizza) ∅
Pred (These (Mod ∅ Pizza)) Vegan
Pred (These Pizza) ∅
```
* Check out from [`--show-coercions`](#--show-coercions) how to find
coercions, and you can try `--show-contexts` with them:
```
> gftest -g Foods -l Spa --show-contexts 11
Pred (That Wine) ∅
Pred (These Wine) ∅
Pred (That Pizza) ∅
Pred (These Pizza) ∅
```
### --count-trees
Number of trees up to given size. Gives a number how many trees, and a