1
0
forked from GitHub/gf-core

Merge remote-tracking branch 'upstream/master' into por

This commit is contained in:
odanoburu
2018-05-28 16:22:54 -03:00
9 changed files with 272 additions and 88 deletions

View File

@@ -1834,7 +1834,7 @@ Some expressions are moreover left- or right-associative.
</TR>
<TR>
<TD>5</TD>
<TD><CODE>pre {"a" ; "an"/vowel}</CODE></TD>
<TD><CODE>pre {vowel => "an" ; _ => "a"}</CODE></TD>
<TD>prefix-dependent choice</TD>
</TR>
<TR>
@@ -2264,7 +2264,7 @@ Expressions of type <CODE>Str</CODE> have the following canonical forms:
<LI><B>the empty token list</B>, <CODE>[]</CODE>
<LI><B>concatenation</B>, <I>s</I> <CODE>++</CODE> <I>t</I>, where <I>s,t</I> : <CODE>Str</CODE>
<LI><B>prefix-dependent choice</B>,
<CODE>pre {</CODE> <I>s</I> ; <i>s</i><sub>1</sub> <CODE>/</CODE> <i>p</i><sub>1</sub> ; ... ; <i>s</i><sub>n</sub> <CODE>/</CODE> <i>p</i><sub>n</sub>}, where
<CODE>pre {p<sub>1</sub> => s<sub>1</sub> ; ... ; p<sub>n</sub> => s<sub>n</sub> ; _ => s }, where
<UL>
<LI><I>s</I>, <i>s</i><sub>1</sub>,...,<i>s</i><sub>n</sub>, <i>p</i><sub>1</sub>,...,<i>p</i><sub>n</sub> : <CODE>Str</CODE>
</UL>
@@ -2344,13 +2344,16 @@ A prime example of prefix-dependent choice operation is the following
approximative expression for the English indefinite article:
</P>
<PRE>
pre {"a" ; "an" / variants {"a" ; "e" ; "i" ; "o"}}
pre {
("a" | "e" | "i" | "o") => "an" ;
_ => "a"
} ;
</PRE>
<P>
This expression can be computed in the context of a subsequent token:
</P>
<UL>
<LI><CODE>pre {</CODE> <I>s</I> ; <i>s</i><sub>1</sub> <CODE>/</CODE> <i>p</i><sub>1</sub> ; ... ; <i>s</i><sub>n</sub> <CODE>/</CODE> <i>p</i><sub>n</sub><CODE>} ++</CODE> <I>t</I>
<LI><CODE>pre {p<sub>1</sub> => s<sub>1</sub> ; ... ; p<sub>n</sub> => s<sub>n</sub> ; _ => s } ++ t</CODE>
==>
<UL>
<LI><i>s</i><sub>i</sub> for the first <I>i</I> such that the prefix <i>p</i><sub>i</sub>
@@ -2374,6 +2377,11 @@ subsequent token depends on a run-time variable.
The prefix-dependent choice expression itself may not depend on run-time
variables.
</P>
<P>
<I>There is an older syntax for prefix-dependent choice,
namely: <code>pre { s ; s1 / p1 ; ... ; sn / pn}</code>. This syntax
will not accept strings as patterns.</I>
</P>
<P>
<I>In GF prior to 3.0, a specific type</I> <CODE>Strs</CODE>
<I>is used for defining prefixes,</I>

View File

@@ -58,7 +58,7 @@ module PGF(
-- * Operations
-- ** Linearization
linearize, linearizeAllLang, linearizeAll, bracketedLinearize, tabularLinearizes,
linearize, linearizeAllLang, linearizeAll, bracketedLinearize, bracketedLinearizeAll, tabularLinearizes,
groupResults, -- lins of trees by language, removing duplicates
showPrintName,

View File

@@ -4,6 +4,7 @@ module PGF.Linearize
, linearizeAll
, linearizeAllLang
, bracketedLinearize
, bracketedLinearizeAll
, tabularLinearizes
) where
@@ -47,6 +48,12 @@ bracketedLinearize pgf lang = head . map (snd . untokn Nothing . firstLin cnc) .
head [] = []
head (bs:bss) = bs
-- | Linearizes given expression as a bracketed string in the language
bracketedLinearizeAll :: PGF -> Language -> Tree -> [[BracketedString]]
bracketedLinearizeAll pgf lang = map (snd . untokn Nothing . firstLin cnc) . linTree pgf cnc
where
cnc = lookMap (error "no lang") lang (concretes pgf)
firstLin cnc arg@(ct@(cat,n_fid),fid,fun,es,(xs,lin)) =
case IntMap.lookup fid (linrefs cnc) of
Just (funid:_) -> snd (mkLinTable cnc (const True) [] funid [arg]) ! 0

View File

@@ -10,3 +10,6 @@ Once this is done type:
$ make
$ make install
For Windows you might have to uncomment the lines around
WINDOWS_FLAGS in the Makefile.

View File

@@ -7,6 +7,11 @@ JNI_INCLUDES = $(if $(wildcard /usr/lib/jvm/default-java/include/.*), -I/usr/lib
$(if $(wildcard /Library/Java/Home/include/.*), -I/Library/Java/Home/include/ -I/Library/Java/Home/include/darwin, \
$(error No JNI headers found))))
# For Windows replace the previous line with something like this:
#
# JNI_INCLUDES = -I "C:/Program Files/Java/jdk1.8.0_171/include" -I "C:/Program Files/Java/jdk1.8.0_171/include/win32" -I "C:/MinGW/msys/1.0/local/include"
# WINDOWS_FLAGS = -L"C:/MinGW/msys/1.0/local/lib" -no-undefined
INSTALL_PATH = /usr/local/lib
LIBTOOL = glibtool --tag=CC
@@ -15,7 +20,7 @@ LIBTOOL = $(if $(shell command -v glibtool 2>/dev/null), glibtool --tag=CC, libt
all: libjpgf.la jpgf.jar
libjpgf.la: $(patsubst %.c, %.lo, $(C_SOURCES))
$(LIBTOOL) --mode=link gcc $(CFLAGS) -g -O -o libjpgf.la -shared $^ -rpath $(INSTALL_PATH) -lgu -lpgf -lsg
$(LIBTOOL) --mode=link gcc $(CFLAGS) -g -O -o libjpgf.la -shared $^ -rpath $(INSTALL_PATH) -lgu -lpgf -lsg $(WINDOWS_FLAGS)
%.lo : %.c
$(LIBTOOL) --mode=compile gcc $(CFLAGS) -g -O -c $(JNI_INCLUDES) -std=c99 -shared $< -o $@

View File

@@ -9,7 +9,7 @@ TRANSLATION PIPELINE
The module translation_pipeline.py is a Python replica of the
translation pipeline used in Wide-coverage Translation demo.
The pipeline allows for
1. simulataneous batch translation from one language into multiple languages
1. simultaneous batch translation from one language into multiple languages
2. K-best translations
3. translate both text files and sgm files.

View File

@@ -9,7 +9,7 @@ module Grammar
-- Categories, coercions
, ccats, ccatOf, arity
, coerces, uncoerce
, uncoerceAbsCat
, uncoerceAbsCat, mkCC
-- Testing and comparison
, testTree, testFun
@@ -17,7 +17,7 @@ module Grammar
, treesUsingFun
-- Contexts
, contextsFor
, contextsFor, dummyHole
-- FEAT
, featIth, featCard
@@ -327,6 +327,13 @@ toGrammar pgf langName =
cseq2Either (I.SymCat x y) = Right (x,y)
cseq2Either x = Left (show x)
mkCC gr fid = CC ccat fid
where ccat = case [ cat | (cat,bg,end,_) <- concrCats gr
, fid `elem` [bg..end] ] of
[] -> Nothing -- means it's coercion
xs -> Just $ the xs
-- parsing and reading trees
mkTree :: Grammar -> PGF2.Expr -> Tree
mkTree gr = disambTree . ambTree
@@ -983,9 +990,9 @@ testFun debug gr trans startcat funname =
, let testcases_ctxs = catMaybes [ M.lookup cat cat_testcase_ctxs
| cat <- cats ]
, not $ null testcases_ctxs
, let fstLen = \(a,_) (b,_) -> length (flatten a) `compare` length (flatten b)
, let fstLen (a,_) (b,_) = length (flatten a) `compare` length (flatten b)
, let (App tp subtrees,_) = -- pick smallest test case to be the representative
head $ sortBy fstLen testcases_ctxs
minimumBy fstLen testcases_ctxs
, let newTop = -- debug: put coerced contexts under a separate test case
if debug then tp { ctyp = (fst $ ctyp tp, coe)} else tp
]

View File

@@ -32,6 +32,7 @@ data GfTest
, show_funs :: Bool
, funs_of_arity :: Maybe Int
, show_coercions:: Bool
, show_contexts :: Maybe Int
, concr_string :: String
-- Information about fields
@@ -69,6 +70,7 @@ gftest = GfTest
, show_funs = def &= help "Show all available functions"
, funs_of_arity = def &= A.typ "2" &= help "Show all functions of arity 2"
, show_coercions= def &= help "Show coercions in the grammar"
, show_contexts = def &= A.typ "8410" &= help "Show contexts for a given concrete type (given as FId)"
, debug = def &= help "Show debug output"
, equal_fields = def &= A.name "q" &= help "Show fields whose strings are always identical"
, empty_fields = def &= A.name "e" &= help "Show fields whose strings are always empty"
@@ -103,7 +105,7 @@ main = do
gr <- readGrammar langName grName
grTrans <- sequence [ readGrammar lt grName | lt <- langTrans ]
-- in case the language given by the user was not valid, use some language that *is* in the grammar
-- if language given by the user was not valid, use default language from Grammar
let langName = concrLang gr
let startcat = startCat gr `fromMaybe` start_cat args
@@ -143,39 +145,66 @@ main = do
, xs@(_:_) <- [ S.toList vs ] ]
-----------------------------------------------------------------------------
-- Testing functions
-- Test a tree
case tree args of
[] -> return ()
t -> output $ testTree' (readTree gr t) 1
let trees = case tree args of
[] -> []
ts -> lines ts
output $
unlines [ testTree' (readTree gr tree) 1 | tree <- trees ]
-- Test a function
case category args of
[] -> return ()
cat -> output $ unlines
[ testTree' t n
| (t,n) <- treesUsingFun gr (functionsByCat gr cat) `zip` [1..]]
let substrs xs = filter (/="*") $ groupBy (\a b -> a/='*' && b/='*') xs
let cats = case category args of
[] -> []
cs -> if '*' `elem` cs
then let subs = substrs cs
in nub [ cat | (cat,_,_,_) <- concrCats gr
, all (`isInfixOf` cat) subs ]
else words cs
output $
unlines [ testTree' t n
| cat <- cats
, (t,n) <- treesUsingFun gr (functionsByCat gr cat) `zip` [1..]]
-- Test all functions in a category
case function args of
[] -> return ()
fs -> let funs = if '*' `elem` fs
then let subs = filter (/="*") $ groupBy (\a b -> a/='*' && b/='*') fs
in nub [ f | s <- symbols gr, let f = show s
, all (`isInfixOf` f) subs
, arity s >= 1 ]
else words fs
in output $ unlines
[ testFun (debug args) gr grTrans startcat f
| f <- funs ]
let funs = case function args of
[] -> []
fs -> if '*' `elem` fs
then let subs = substrs fs
in nub [ f | s <- symbols gr, let f = show s
, all (`isInfixOf` f) subs
, arity s >= 1 ]
else words fs
output $
unlines [ testFun (debug args) gr grTrans startcat f
| f <- funs ]
-----------------------------------------------------------------------------
-- Information about the grammar
-- Show contexts for a particular concrete category
case show_contexts args of
Nothing -> return ()
Just fid -> mapM_ print
[ ctx dummyHole
| start <- ccats gr startcat
, ctx <- contextsFor gr start (mkCC gr fid) ]
-- Show available categories
when (show_cats args) $ do
putStrLn "* Categories in the grammar:"
putStrLn $ unlines [ cat | (cat,_,_,_) <- concrCats gr ]
let concrcats = sortBy (\(_,a,_,_) (_,b,_,_) -> a `compare` b) (concrCats gr)
sequence_ [ do putStrLn cat
when (debug args) $
putStrLn $ unwords $
[ " Compiles to concrete" ] ++
[ "categories " ++ show bg++""++show end
| bg/=end ] ++
[ "category " ++ show bg
| bg==end ]
| (cat,bg,end,_) <- concrcats
, end >= 0]
-- Show available functions
when (show_funs args) $ do
@@ -279,6 +308,19 @@ main = do
putStrLn $ "* " ++ show (featIth gr start n 0)
putStrLn $ "* " ++ show (featIth gr start n (i-1))
-------------------------------------------------------------------------------
-- Read trees from treebank.
treebank' <-
case treebank args of
Nothing -> return []
Just fp -> do
tb <- readFile fp
return [ readTree gr s
| s <- lines tb ]
mapM_ print treebank'
-------------------------------------------------------------------------------
-- Comparison with old grammar
@@ -308,36 +350,44 @@ main = do
[ appendFile ccatChangeFile $
unlines $
("* All concrete cats in the "++age++" grammar:"):
[ show cats | cats <- concrCats g ]
[ show cts | cts <- concrCats g ]
| (g,age) <- [(ogr,"old"),(gr,"new")] ]
putStrLn $ "Created file " ++ ccatChangeFile
--------------------------------------------------------------------------
-- print out tests for all functions in the changed cats
-- Print out tests for all functions in the changed cats.
-- If -f, -c or --treebank specified, use them.
let f cat = (cat, treesUsingFun gr $ functionsByCat gr cat)
byCat = [ f cat | cat <- cats ] -- from command line arg -c
changed = [ f cat | (cat,_,_,_) <- difcats
, only_changed_cats args ]
byFun = [ (cat, treesUsingFun gr fs)
| funName <- funs -- comes from command line arg -f
, let fs@(s:_) = lookupSymbol gr funName
, let cat = snd $ Grammar.typ s ]
fromTb = [ (cat,[tree]) | tree <- treebank'
, let (CC (Just cat) _) = ccatOf tree ]
treesToTest =
case concat [byFun, byCat, changed, fromTb] of
[] -> [ f cat -- nothing else specified -> test all functions
| (cat,_,_,_) <- concrCats gr ]
xs -> S.toList $ S.fromList xs
let changedFuns =
if only_changed_cats args
then [ (cat,functionsByCat gr cat) | (cat,_,_,_) <- difcats ]
else
case category args of
[] -> case function args of
[] -> [ (cat,functionsByCat gr cat)
| (cat,_,_,_) <- concrCats gr ]
fn -> [ (snd $ Grammar.typ f, [f])
| f <- lookupSymbol gr fn ]
ct -> [ (ct,functionsByCat gr ct) ]
writeLinFile file grammar otherGrammar = do
writeFile file ""
putStrLn "Testing functions in… "
diff <- concat `fmap`
sequence [ do let cs = [ compareTree grammar otherGrammar grTrans t
| t <- treesUsingFun grammar funs ]
| t <- trees ]
putStr $ cat ++ " \r"
-- prevent lazy evaluation; make printout accurate
appendFile ("/tmp/"++file) (unwords $ map show cs)
return cs
| (cat,funs) <- changedFuns ]
| (cat,trees) <- treesToTest ]
let relevantDiff = go [] [] diff where
go res seen [] = res
go res seen (Comparison f ls:cs) =
@@ -379,19 +429,6 @@ main = do
putStrLn $ "Created files " ++ langName ++ "-(old|new)-funs.org"
-------------------------------------------------------------------------------
-- Read trees from treebank. No fancier functionality yet.
case treebank args of
Nothing -> return ()
Just fp -> do
tb <- readFile fp
sequence_ [ do let tree = readTree gr str
ccat = ccatOf tree
putStrLn $ unlines [ "", showTree tree ++ " : " ++ show ccat]
putStrLn $ linearize gr tree
| str <- lines tb ]
where

View File

@@ -27,9 +27,14 @@ document, as well as the full list of options to give to `gftest`.
- [Empty or always identical fields: `-e`, `-q`](#empty-or-always-identical-fields--e--q)
- [Unused fields: `-u`](#unused-fields--u)
- [Erased trees: `-r`](#erased-trees--r)
- [Debug information: `-d`](#debug-intormation--d)
- [Detailed information about the grammar](#detailed-information-about-the-grammar)
- [--show-cats](#--show-cats)
- [--show-funs](#--show-funs)
- [--show-coercions](#--show-coercions)
- [--show-contexts](#--show-contexts)
- [--count-trees](#--count-trees)
- [--funs-of-arity](#--funs-of-arity)
## Installation
@@ -71,6 +76,7 @@ Common flags:
--show-funs Show all available functions
--funs-of-arity=2 Show all functions of arity 2
--show-coercions Show coercions in the grammar
--show-contexts=8410 Show contexts for a given concrete type (given as FId)
--concr-string=the Show all functions that include given string
-q --equal-fields Show fields whose strings are always identical
-e --empty-fields Show fields whose strings are always empty
@@ -188,8 +194,9 @@ then you can call the following:
Give a grammar, a concrete syntax, and an old version of the same
grammar as a separate PGF file. The program generates test sentences
for all functions, linearises with both grammars, and outputs those
that differ between the versions. It writes the differences into files.
for all functions (if no other arguments), linearises with both
grammars, and outputs those that differ between the versions. It
writes the differences into files.
Example:
@@ -206,20 +213,20 @@ Created files TestLangEng-(old|new)-funs.org
changed. Shows e.g. if you added or removed a parameter or a
field.
* TestLangEng-lin-diff.org: All trees that have different
linearisations in the following format. **This is usually the most
relevant file.**
* **TestLangEng-lin-diff.org** (usually the most relevant file): All
trees that have different linearisations in the following format.
```
* send_V3
* send_V3
** UseCl (TTAnt TPres ASimul) PPos (PredVP (UsePron we_Pron) (ReflVP (Slash3V3 ∅ (UsePron it_Pron))))
TestLangDut> we sturen onszelf ernaar
TestLangDut-OLD> we sturen zichzelf ernaar
** UseCl (TTAnt TPres ASimul) PPos (PredVP (UsePron we_Pron) (ReflVP (Slash3V3 ∅ (UsePron it_Pron))))
TestLangDut> we sturen onszelf ernaar
TestLangDut-OLD> we sturen zichzelf ernaar
** UseCl (TTAnt TPast ASimul) PPos (PredVP (UsePron we_Pron) (ReflVP (Slash3V3 ∅ (UsePron it_Pron))))
TestLangDut> we stuurden onszelf ernaar
TestLangDut-OLD> we stuurden zichzelf ernaar
** UseCl (TTAnt TPast ASimul) PPos (PredVP (UsePron we_Pron) (ReflVP (Slash3V3 ∅ (UsePron it_Pron))))
TestLangDut> we stuurden onszelf ernaar
TestLangDut-OLD> we stuurden zichzelf ernaar
```
* TestLangEng-old-funs.org and TestLangEng-new-funs.org: groups the
@@ -227,24 +234,43 @@ TestLangDut-OLD> we stuurden zichzelf ernaar
e.g. added or removed parameters, and that has created new versions of
some functions: say you didn't have gender in nouns, but now you
have, then all functions taking nouns have suddenly a gendered
version. **This is kind of hard to read, don't worry too much if the
output doesn't make any sense.**
version. (This is kind of hard to read, don't worry too much if the
output doesn't make any sense.)
You can give an additional parameter, `--only-changed-cats`, if you
only want to test functions in those categories that you have changed,
like this: `gftest -g TestLang -l Eng -o TestLangOld
--only-changed-cats`. This makes it run faster.
#### Additional arguments to `-o`
The default mode is to test all functions, but you can also give any
combination of `-s`, `-f`, `-c`, `--treebank`/`-b` and `--only-changed-cats`.
With `-s`, you can change the start category in which contexts are
generated.
With `-f` and `-c`, it tests only the specified functions and
categories.
With `-b FILEPATH` (`-b`=`--treebank`), it tests only the trees in the file.
With `--only-changed-cats`, it only test functions in those categories
that have changed between the two versions.
Examples:
* `gftest -g TestLang -l Eng -o TestLangOld` tests all functions
* `gftest -g TestLang -l Eng -o TestLangOld -s S` tests all functions in start category S
* `gftest -g TestLang -l Eng -o TestLangOld --only-changed-cats` tests only changed categories. If no categories have changed (and no other arguments specified), tests everything.
* `gftest -g TestLang -l Eng -o TestLangOld -f "AdjCN AdvCN" -c Adv -b trees.txt` tests functions, `AdjCN` and `AdvCN`; same for all functions that produce an `Adv`, and all trees in trees.txt.
### Information about a particular string: `--concr-string`
Show all functions where the given concrete string appears as syncategorematic string (i.e. not from the arguments).
Show all functions that introduce the string given as an argument.
Example:
* `gftest -l Eng --concr-string it`
* `gftest -g Lang -l Eng --concr-string it`
which gives the answer `==> CleftAdv, CleftNP, DefArt, ImpersCl, it_Pron`
(Note that you have the same feature in GF shell, command `morpho_analyse`/`ma`.)
### Write into a file: `-w`
@@ -353,10 +379,11 @@ Show trees that are erased in some function, i.e. a function `F : A -> B -> C` h
Example:
`gftest -g Lang -l "Dut Eng" -r`
output:
```
> gftest -g Lang -l "Dut Eng" -r
* Erased trees:
** RelCl (ExistNP something_NP) : RCl
@@ -372,15 +399,56 @@ output:
In the first result, an argument of type `RCl` is missing in the tree constructed by `RelNP`, and in the second result, the argument `write_V2` is missing in the tree constructed by `PPartNP`. In both cases, the English linearisation contains all the arguments, but in the Dutch one they are missing. (This bug is already fixed, just showing it here to demonstrate the feature.)
## Detailed information about the grammar
### Debug information: `-d`
When combined with `-f`, `-c` or `-t`, two things happen:
1) The trees are linearised using `tabularLinearize`, which shows the
inflection table of all forms.
2) You can see traces of pruning that happens in testing functions:
contexts that are common to several concrete categories are put under
a separate test case.
When combined with `--show-cats`, also the concrete categories are
shown.
### --show-cats
Shows the categories in the grammar. With `--debug`/`-d`, shows also
concrete categories.
Example:
```
> gftest -g Foods -l Spa --show-cats -d
* Categories in the grammar:
Comment
Compiles to concrete category 0
Item
Compiles to concrete categories 1—4
Kind
Compiles to concrete categories 5—6
Quality
Compiles to concrete categories 7—8
Question
Compiles to concrete category 9
```
### --show-funs
Shows the functions in the grammar. (Nothing fancy happens with other flags.)
### --show-coercions
First I'll explain what *coercions* are, then why it may be
interesting to show them. Let's take a Spanish Foods grammar, and
consider the category `Quality`—those `Good Pizza` and `Vegan Pizza`
that you saw in the previous section. `Good`
"bueno/buena/buenos/buenas" goes before the noun it modifies, whereas
`Vegan` "vegano/vegana/…" goes after, so these will become different
consider the category `Quality`, e.g. `Good` and `Vegan`.
`Good` "bueno/buena/buenos/buenas" goes before the noun it modifies,
whereas `Vegan` "vegano/vegana/…" goes after, so these will become different
*concrete categories* in the PGF: `Quality_before` and
`Quality_after`. (In reality, they are something like `Quality_7` and
`Quality_8` though.)
@@ -406,6 +474,55 @@ Quality_8--->_11
(Just mentally replace 7 with `before`, 8 with `after` and 11 with `whatever`.)
### --show-contexts
Show contexts for a given concrete category, given as an FId
(i.e. Int). The concrete category may be a coercion or a normal
category. By combining with [`-s`](#start-category-for-context--s),
you can change the start category of the context.
(You can get a list of all concrete categories by pairing `--show-cats`
with `--debug`: see [`--show-cats`](#--show-cats).)
Examples:
* First, find out some concrete categories:
```
> gftest -g Foods -l Spa --show-cats -d
Quality
Compiles to concrete categories 7—8
```
* Then, list the contexts for some of them, say `Quality_7`:
```
> gftest -g Foods -l Spa --show-contexts 7
Pred (That (Mod ∅ Wine)) Vegan
Pred (That Wine) ∅
Pred (These (Mod ∅ Wine)) Vegan
Pred (These Wine) ∅
Pred (That (Mod ∅ Pizza)) Vegan
Pred (That Pizza) ∅
Pred (These (Mod ∅ Pizza)) Vegan
Pred (These Pizza) ∅
```
* Check out from [`--show-coercions`](#--show-coercions) how to find
coercions, and you can try `--show-contexts` with them:
```
> gftest -g Foods -l Spa --show-contexts 11
Pred (That Wine) ∅
Pred (These Wine) ∅
Pred (That Pizza) ∅
Pred (These Pizza) ∅
```
### --count-trees
Number of trees up to given size. Gives a number how many trees, and a