forked from GitHub/gf-core
Merge remote-tracking branch 'upstream/master' into por
This commit is contained in:
@@ -1834,7 +1834,7 @@ Some expressions are moreover left- or right-associative.
|
|||||||
</TR>
|
</TR>
|
||||||
<TR>
|
<TR>
|
||||||
<TD>5</TD>
|
<TD>5</TD>
|
||||||
<TD><CODE>pre {"a" ; "an"/vowel}</CODE></TD>
|
<TD><CODE>pre {vowel => "an" ; _ => "a"}</CODE></TD>
|
||||||
<TD>prefix-dependent choice</TD>
|
<TD>prefix-dependent choice</TD>
|
||||||
</TR>
|
</TR>
|
||||||
<TR>
|
<TR>
|
||||||
@@ -2264,7 +2264,7 @@ Expressions of type <CODE>Str</CODE> have the following canonical forms:
|
|||||||
<LI><B>the empty token list</B>, <CODE>[]</CODE>
|
<LI><B>the empty token list</B>, <CODE>[]</CODE>
|
||||||
<LI><B>concatenation</B>, <I>s</I> <CODE>++</CODE> <I>t</I>, where <I>s,t</I> : <CODE>Str</CODE>
|
<LI><B>concatenation</B>, <I>s</I> <CODE>++</CODE> <I>t</I>, where <I>s,t</I> : <CODE>Str</CODE>
|
||||||
<LI><B>prefix-dependent choice</B>,
|
<LI><B>prefix-dependent choice</B>,
|
||||||
<CODE>pre {</CODE> <I>s</I> ; <i>s</i><sub>1</sub> <CODE>/</CODE> <i>p</i><sub>1</sub> ; ... ; <i>s</i><sub>n</sub> <CODE>/</CODE> <i>p</i><sub>n</sub>}, where
|
<CODE>pre {p<sub>1</sub> => s<sub>1</sub> ; ... ; p<sub>n</sub> => s<sub>n</sub> ; _ => s }, where
|
||||||
<UL>
|
<UL>
|
||||||
<LI><I>s</I>, <i>s</i><sub>1</sub>,...,<i>s</i><sub>n</sub>, <i>p</i><sub>1</sub>,...,<i>p</i><sub>n</sub> : <CODE>Str</CODE>
|
<LI><I>s</I>, <i>s</i><sub>1</sub>,...,<i>s</i><sub>n</sub>, <i>p</i><sub>1</sub>,...,<i>p</i><sub>n</sub> : <CODE>Str</CODE>
|
||||||
</UL>
|
</UL>
|
||||||
@@ -2344,13 +2344,16 @@ A prime example of prefix-dependent choice operation is the following
|
|||||||
approximative expression for the English indefinite article:
|
approximative expression for the English indefinite article:
|
||||||
</P>
|
</P>
|
||||||
<PRE>
|
<PRE>
|
||||||
pre {"a" ; "an" / variants {"a" ; "e" ; "i" ; "o"}}
|
pre {
|
||||||
|
("a" | "e" | "i" | "o") => "an" ;
|
||||||
|
_ => "a"
|
||||||
|
} ;
|
||||||
</PRE>
|
</PRE>
|
||||||
<P>
|
<P>
|
||||||
This expression can be computed in the context of a subsequent token:
|
This expression can be computed in the context of a subsequent token:
|
||||||
</P>
|
</P>
|
||||||
<UL>
|
<UL>
|
||||||
<LI><CODE>pre {</CODE> <I>s</I> ; <i>s</i><sub>1</sub> <CODE>/</CODE> <i>p</i><sub>1</sub> ; ... ; <i>s</i><sub>n</sub> <CODE>/</CODE> <i>p</i><sub>n</sub><CODE>} ++</CODE> <I>t</I>
|
<LI><CODE>pre {p<sub>1</sub> => s<sub>1</sub> ; ... ; p<sub>n</sub> => s<sub>n</sub> ; _ => s } ++ t</CODE>
|
||||||
==>
|
==>
|
||||||
<UL>
|
<UL>
|
||||||
<LI><i>s</i><sub>i</sub> for the first <I>i</I> such that the prefix <i>p</i><sub>i</sub>
|
<LI><i>s</i><sub>i</sub> for the first <I>i</I> such that the prefix <i>p</i><sub>i</sub>
|
||||||
@@ -2374,6 +2377,11 @@ subsequent token depends on a run-time variable.
|
|||||||
The prefix-dependent choice expression itself may not depend on run-time
|
The prefix-dependent choice expression itself may not depend on run-time
|
||||||
variables.
|
variables.
|
||||||
</P>
|
</P>
|
||||||
|
<P>
|
||||||
|
<I>There is an older syntax for prefix-dependent choice,
|
||||||
|
namely: <code>pre { s ; s1 / p1 ; ... ; sn / pn}</code>. This syntax
|
||||||
|
will not accept strings as patterns.</I>
|
||||||
|
</P>
|
||||||
<P>
|
<P>
|
||||||
<I>In GF prior to 3.0, a specific type</I> <CODE>Strs</CODE>
|
<I>In GF prior to 3.0, a specific type</I> <CODE>Strs</CODE>
|
||||||
<I>is used for defining prefixes,</I>
|
<I>is used for defining prefixes,</I>
|
||||||
|
|||||||
@@ -58,7 +58,7 @@ module PGF(
|
|||||||
|
|
||||||
-- * Operations
|
-- * Operations
|
||||||
-- ** Linearization
|
-- ** Linearization
|
||||||
linearize, linearizeAllLang, linearizeAll, bracketedLinearize, tabularLinearizes,
|
linearize, linearizeAllLang, linearizeAll, bracketedLinearize, bracketedLinearizeAll, tabularLinearizes,
|
||||||
groupResults, -- lins of trees by language, removing duplicates
|
groupResults, -- lins of trees by language, removing duplicates
|
||||||
showPrintName,
|
showPrintName,
|
||||||
|
|
||||||
|
|||||||
@@ -4,6 +4,7 @@ module PGF.Linearize
|
|||||||
, linearizeAll
|
, linearizeAll
|
||||||
, linearizeAllLang
|
, linearizeAllLang
|
||||||
, bracketedLinearize
|
, bracketedLinearize
|
||||||
|
, bracketedLinearizeAll
|
||||||
, tabularLinearizes
|
, tabularLinearizes
|
||||||
) where
|
) where
|
||||||
|
|
||||||
@@ -47,6 +48,12 @@ bracketedLinearize pgf lang = head . map (snd . untokn Nothing . firstLin cnc) .
|
|||||||
head [] = []
|
head [] = []
|
||||||
head (bs:bss) = bs
|
head (bs:bss) = bs
|
||||||
|
|
||||||
|
-- | Linearizes given expression as a bracketed string in the language
|
||||||
|
bracketedLinearizeAll :: PGF -> Language -> Tree -> [[BracketedString]]
|
||||||
|
bracketedLinearizeAll pgf lang = map (snd . untokn Nothing . firstLin cnc) . linTree pgf cnc
|
||||||
|
where
|
||||||
|
cnc = lookMap (error "no lang") lang (concretes pgf)
|
||||||
|
|
||||||
firstLin cnc arg@(ct@(cat,n_fid),fid,fun,es,(xs,lin)) =
|
firstLin cnc arg@(ct@(cat,n_fid),fid,fun,es,(xs,lin)) =
|
||||||
case IntMap.lookup fid (linrefs cnc) of
|
case IntMap.lookup fid (linrefs cnc) of
|
||||||
Just (funid:_) -> snd (mkLinTable cnc (const True) [] funid [arg]) ! 0
|
Just (funid:_) -> snd (mkLinTable cnc (const True) [] funid [arg]) ! 0
|
||||||
|
|||||||
@@ -10,3 +10,6 @@ Once this is done type:
|
|||||||
|
|
||||||
$ make
|
$ make
|
||||||
$ make install
|
$ make install
|
||||||
|
|
||||||
|
For Windows you might have to uncomment the lines around
|
||||||
|
WINDOWS_FLAGS in the Makefile.
|
||||||
|
|||||||
@@ -7,6 +7,11 @@ JNI_INCLUDES = $(if $(wildcard /usr/lib/jvm/default-java/include/.*), -I/usr/lib
|
|||||||
$(if $(wildcard /Library/Java/Home/include/.*), -I/Library/Java/Home/include/ -I/Library/Java/Home/include/darwin, \
|
$(if $(wildcard /Library/Java/Home/include/.*), -I/Library/Java/Home/include/ -I/Library/Java/Home/include/darwin, \
|
||||||
$(error No JNI headers found))))
|
$(error No JNI headers found))))
|
||||||
|
|
||||||
|
# For Windows replace the previous line with something like this:
|
||||||
|
#
|
||||||
|
# JNI_INCLUDES = -I "C:/Program Files/Java/jdk1.8.0_171/include" -I "C:/Program Files/Java/jdk1.8.0_171/include/win32" -I "C:/MinGW/msys/1.0/local/include"
|
||||||
|
# WINDOWS_FLAGS = -L"C:/MinGW/msys/1.0/local/lib" -no-undefined
|
||||||
|
|
||||||
INSTALL_PATH = /usr/local/lib
|
INSTALL_PATH = /usr/local/lib
|
||||||
LIBTOOL = glibtool --tag=CC
|
LIBTOOL = glibtool --tag=CC
|
||||||
|
|
||||||
@@ -15,7 +20,7 @@ LIBTOOL = $(if $(shell command -v glibtool 2>/dev/null), glibtool --tag=CC, libt
|
|||||||
all: libjpgf.la jpgf.jar
|
all: libjpgf.la jpgf.jar
|
||||||
|
|
||||||
libjpgf.la: $(patsubst %.c, %.lo, $(C_SOURCES))
|
libjpgf.la: $(patsubst %.c, %.lo, $(C_SOURCES))
|
||||||
$(LIBTOOL) --mode=link gcc $(CFLAGS) -g -O -o libjpgf.la -shared $^ -rpath $(INSTALL_PATH) -lgu -lpgf -lsg
|
$(LIBTOOL) --mode=link gcc $(CFLAGS) -g -O -o libjpgf.la -shared $^ -rpath $(INSTALL_PATH) -lgu -lpgf -lsg $(WINDOWS_FLAGS)
|
||||||
|
|
||||||
%.lo : %.c
|
%.lo : %.c
|
||||||
$(LIBTOOL) --mode=compile gcc $(CFLAGS) -g -O -c $(JNI_INCLUDES) -std=c99 -shared $< -o $@
|
$(LIBTOOL) --mode=compile gcc $(CFLAGS) -g -O -c $(JNI_INCLUDES) -std=c99 -shared $< -o $@
|
||||||
|
|||||||
@@ -9,7 +9,7 @@ TRANSLATION PIPELINE
|
|||||||
The module translation_pipeline.py is a Python replica of the
|
The module translation_pipeline.py is a Python replica of the
|
||||||
translation pipeline used in Wide-coverage Translation demo.
|
translation pipeline used in Wide-coverage Translation demo.
|
||||||
The pipeline allows for
|
The pipeline allows for
|
||||||
1. simulataneous batch translation from one language into multiple languages
|
1. simultaneous batch translation from one language into multiple languages
|
||||||
2. K-best translations
|
2. K-best translations
|
||||||
3. translate both text files and sgm files.
|
3. translate both text files and sgm files.
|
||||||
|
|
||||||
|
|||||||
@@ -9,7 +9,7 @@ module Grammar
|
|||||||
-- Categories, coercions
|
-- Categories, coercions
|
||||||
, ccats, ccatOf, arity
|
, ccats, ccatOf, arity
|
||||||
, coerces, uncoerce
|
, coerces, uncoerce
|
||||||
, uncoerceAbsCat
|
, uncoerceAbsCat, mkCC
|
||||||
|
|
||||||
-- Testing and comparison
|
-- Testing and comparison
|
||||||
, testTree, testFun
|
, testTree, testFun
|
||||||
@@ -17,7 +17,7 @@ module Grammar
|
|||||||
, treesUsingFun
|
, treesUsingFun
|
||||||
|
|
||||||
-- Contexts
|
-- Contexts
|
||||||
, contextsFor
|
, contextsFor, dummyHole
|
||||||
|
|
||||||
-- FEAT
|
-- FEAT
|
||||||
, featIth, featCard
|
, featIth, featCard
|
||||||
@@ -327,6 +327,13 @@ toGrammar pgf langName =
|
|||||||
cseq2Either (I.SymCat x y) = Right (x,y)
|
cseq2Either (I.SymCat x y) = Right (x,y)
|
||||||
cseq2Either x = Left (show x)
|
cseq2Either x = Left (show x)
|
||||||
|
|
||||||
|
|
||||||
|
mkCC gr fid = CC ccat fid
|
||||||
|
where ccat = case [ cat | (cat,bg,end,_) <- concrCats gr
|
||||||
|
, fid `elem` [bg..end] ] of
|
||||||
|
[] -> Nothing -- means it's coercion
|
||||||
|
xs -> Just $ the xs
|
||||||
|
|
||||||
-- parsing and reading trees
|
-- parsing and reading trees
|
||||||
mkTree :: Grammar -> PGF2.Expr -> Tree
|
mkTree :: Grammar -> PGF2.Expr -> Tree
|
||||||
mkTree gr = disambTree . ambTree
|
mkTree gr = disambTree . ambTree
|
||||||
@@ -983,9 +990,9 @@ testFun debug gr trans startcat funname =
|
|||||||
, let testcases_ctxs = catMaybes [ M.lookup cat cat_testcase_ctxs
|
, let testcases_ctxs = catMaybes [ M.lookup cat cat_testcase_ctxs
|
||||||
| cat <- cats ]
|
| cat <- cats ]
|
||||||
, not $ null testcases_ctxs
|
, not $ null testcases_ctxs
|
||||||
, let fstLen = \(a,_) (b,_) -> length (flatten a) `compare` length (flatten b)
|
, let fstLen (a,_) (b,_) = length (flatten a) `compare` length (flatten b)
|
||||||
, let (App tp subtrees,_) = -- pick smallest test case to be the representative
|
, let (App tp subtrees,_) = -- pick smallest test case to be the representative
|
||||||
head $ sortBy fstLen testcases_ctxs
|
minimumBy fstLen testcases_ctxs
|
||||||
, let newTop = -- debug: put coerced contexts under a separate test case
|
, let newTop = -- debug: put coerced contexts under a separate test case
|
||||||
if debug then tp { ctyp = (fst $ ctyp tp, coe)} else tp
|
if debug then tp { ctyp = (fst $ ctyp tp, coe)} else tp
|
||||||
]
|
]
|
||||||
|
|||||||
@@ -32,6 +32,7 @@ data GfTest
|
|||||||
, show_funs :: Bool
|
, show_funs :: Bool
|
||||||
, funs_of_arity :: Maybe Int
|
, funs_of_arity :: Maybe Int
|
||||||
, show_coercions:: Bool
|
, show_coercions:: Bool
|
||||||
|
, show_contexts :: Maybe Int
|
||||||
, concr_string :: String
|
, concr_string :: String
|
||||||
|
|
||||||
-- Information about fields
|
-- Information about fields
|
||||||
@@ -69,6 +70,7 @@ gftest = GfTest
|
|||||||
, show_funs = def &= help "Show all available functions"
|
, show_funs = def &= help "Show all available functions"
|
||||||
, funs_of_arity = def &= A.typ "2" &= help "Show all functions of arity 2"
|
, funs_of_arity = def &= A.typ "2" &= help "Show all functions of arity 2"
|
||||||
, show_coercions= def &= help "Show coercions in the grammar"
|
, show_coercions= def &= help "Show coercions in the grammar"
|
||||||
|
, show_contexts = def &= A.typ "8410" &= help "Show contexts for a given concrete type (given as FId)"
|
||||||
, debug = def &= help "Show debug output"
|
, debug = def &= help "Show debug output"
|
||||||
, equal_fields = def &= A.name "q" &= help "Show fields whose strings are always identical"
|
, equal_fields = def &= A.name "q" &= help "Show fields whose strings are always identical"
|
||||||
, empty_fields = def &= A.name "e" &= help "Show fields whose strings are always empty"
|
, empty_fields = def &= A.name "e" &= help "Show fields whose strings are always empty"
|
||||||
@@ -103,7 +105,7 @@ main = do
|
|||||||
gr <- readGrammar langName grName
|
gr <- readGrammar langName grName
|
||||||
grTrans <- sequence [ readGrammar lt grName | lt <- langTrans ]
|
grTrans <- sequence [ readGrammar lt grName | lt <- langTrans ]
|
||||||
|
|
||||||
-- in case the language given by the user was not valid, use some language that *is* in the grammar
|
-- if language given by the user was not valid, use default language from Grammar
|
||||||
let langName = concrLang gr
|
let langName = concrLang gr
|
||||||
|
|
||||||
let startcat = startCat gr `fromMaybe` start_cat args
|
let startcat = startCat gr `fromMaybe` start_cat args
|
||||||
@@ -143,39 +145,66 @@ main = do
|
|||||||
, xs@(_:_) <- [ S.toList vs ] ]
|
, xs@(_:_) <- [ S.toList vs ] ]
|
||||||
-----------------------------------------------------------------------------
|
-----------------------------------------------------------------------------
|
||||||
-- Testing functions
|
-- Testing functions
|
||||||
|
|
||||||
-- Test a tree
|
-- Test a tree
|
||||||
case tree args of
|
let trees = case tree args of
|
||||||
[] -> return ()
|
[] -> []
|
||||||
t -> output $ testTree' (readTree gr t) 1
|
ts -> lines ts
|
||||||
|
output $
|
||||||
|
unlines [ testTree' (readTree gr tree) 1 | tree <- trees ]
|
||||||
|
|
||||||
-- Test a function
|
-- Test a function
|
||||||
case category args of
|
let substrs xs = filter (/="*") $ groupBy (\a b -> a/='*' && b/='*') xs
|
||||||
[] -> return ()
|
let cats = case category args of
|
||||||
cat -> output $ unlines
|
[] -> []
|
||||||
[ testTree' t n
|
cs -> if '*' `elem` cs
|
||||||
| (t,n) <- treesUsingFun gr (functionsByCat gr cat) `zip` [1..]]
|
then let subs = substrs cs
|
||||||
|
in nub [ cat | (cat,_,_,_) <- concrCats gr
|
||||||
|
, all (`isInfixOf` cat) subs ]
|
||||||
|
else words cs
|
||||||
|
output $
|
||||||
|
unlines [ testTree' t n
|
||||||
|
| cat <- cats
|
||||||
|
, (t,n) <- treesUsingFun gr (functionsByCat gr cat) `zip` [1..]]
|
||||||
|
|
||||||
-- Test all functions in a category
|
-- Test all functions in a category
|
||||||
case function args of
|
let funs = case function args of
|
||||||
[] -> return ()
|
[] -> []
|
||||||
fs -> let funs = if '*' `elem` fs
|
fs -> if '*' `elem` fs
|
||||||
then let subs = filter (/="*") $ groupBy (\a b -> a/='*' && b/='*') fs
|
then let subs = substrs fs
|
||||||
in nub [ f | s <- symbols gr, let f = show s
|
in nub [ f | s <- symbols gr, let f = show s
|
||||||
, all (`isInfixOf` f) subs
|
, all (`isInfixOf` f) subs
|
||||||
, arity s >= 1 ]
|
, arity s >= 1 ]
|
||||||
else words fs
|
else words fs
|
||||||
in output $ unlines
|
output $
|
||||||
[ testFun (debug args) gr grTrans startcat f
|
unlines [ testFun (debug args) gr grTrans startcat f
|
||||||
| f <- funs ]
|
| f <- funs ]
|
||||||
|
|
||||||
-----------------------------------------------------------------------------
|
-----------------------------------------------------------------------------
|
||||||
-- Information about the grammar
|
-- Information about the grammar
|
||||||
|
|
||||||
|
-- Show contexts for a particular concrete category
|
||||||
|
case show_contexts args of
|
||||||
|
Nothing -> return ()
|
||||||
|
Just fid -> mapM_ print
|
||||||
|
[ ctx dummyHole
|
||||||
|
| start <- ccats gr startcat
|
||||||
|
, ctx <- contextsFor gr start (mkCC gr fid) ]
|
||||||
|
|
||||||
-- Show available categories
|
-- Show available categories
|
||||||
when (show_cats args) $ do
|
when (show_cats args) $ do
|
||||||
putStrLn "* Categories in the grammar:"
|
putStrLn "* Categories in the grammar:"
|
||||||
putStrLn $ unlines [ cat | (cat,_,_,_) <- concrCats gr ]
|
let concrcats = sortBy (\(_,a,_,_) (_,b,_,_) -> a `compare` b) (concrCats gr)
|
||||||
|
sequence_ [ do putStrLn cat
|
||||||
|
when (debug args) $
|
||||||
|
putStrLn $ unwords $
|
||||||
|
[ " Compiles to concrete" ] ++
|
||||||
|
[ "categories " ++ show bg++"—"++show end
|
||||||
|
| bg/=end ] ++
|
||||||
|
[ "category " ++ show bg
|
||||||
|
| bg==end ]
|
||||||
|
| (cat,bg,end,_) <- concrcats
|
||||||
|
, end >= 0]
|
||||||
|
|
||||||
-- Show available functions
|
-- Show available functions
|
||||||
when (show_funs args) $ do
|
when (show_funs args) $ do
|
||||||
@@ -279,6 +308,19 @@ main = do
|
|||||||
putStrLn $ "* " ++ show (featIth gr start n 0)
|
putStrLn $ "* " ++ show (featIth gr start n 0)
|
||||||
putStrLn $ "* " ++ show (featIth gr start n (i-1))
|
putStrLn $ "* " ++ show (featIth gr start n (i-1))
|
||||||
|
|
||||||
|
|
||||||
|
-------------------------------------------------------------------------------
|
||||||
|
-- Read trees from treebank.
|
||||||
|
|
||||||
|
treebank' <-
|
||||||
|
case treebank args of
|
||||||
|
Nothing -> return []
|
||||||
|
Just fp -> do
|
||||||
|
tb <- readFile fp
|
||||||
|
return [ readTree gr s
|
||||||
|
| s <- lines tb ]
|
||||||
|
mapM_ print treebank'
|
||||||
|
|
||||||
-------------------------------------------------------------------------------
|
-------------------------------------------------------------------------------
|
||||||
-- Comparison with old grammar
|
-- Comparison with old grammar
|
||||||
|
|
||||||
@@ -308,36 +350,44 @@ main = do
|
|||||||
[ appendFile ccatChangeFile $
|
[ appendFile ccatChangeFile $
|
||||||
unlines $
|
unlines $
|
||||||
("* All concrete cats in the "++age++" grammar:"):
|
("* All concrete cats in the "++age++" grammar:"):
|
||||||
[ show cats | cats <- concrCats g ]
|
[ show cts | cts <- concrCats g ]
|
||||||
| (g,age) <- [(ogr,"old"),(gr,"new")] ]
|
| (g,age) <- [(ogr,"old"),(gr,"new")] ]
|
||||||
|
|
||||||
putStrLn $ "Created file " ++ ccatChangeFile
|
putStrLn $ "Created file " ++ ccatChangeFile
|
||||||
|
|
||||||
--------------------------------------------------------------------------
|
--------------------------------------------------------------------------
|
||||||
-- print out tests for all functions in the changed cats
|
-- Print out tests for all functions in the changed cats.
|
||||||
|
-- If -f, -c or --treebank specified, use them.
|
||||||
|
|
||||||
|
let f cat = (cat, treesUsingFun gr $ functionsByCat gr cat)
|
||||||
|
|
||||||
|
byCat = [ f cat | cat <- cats ] -- from command line arg -c
|
||||||
|
changed = [ f cat | (cat,_,_,_) <- difcats
|
||||||
|
, only_changed_cats args ]
|
||||||
|
byFun = [ (cat, treesUsingFun gr fs)
|
||||||
|
| funName <- funs -- comes from command line arg -f
|
||||||
|
, let fs@(s:_) = lookupSymbol gr funName
|
||||||
|
, let cat = snd $ Grammar.typ s ]
|
||||||
|
fromTb = [ (cat,[tree]) | tree <- treebank'
|
||||||
|
, let (CC (Just cat) _) = ccatOf tree ]
|
||||||
|
|
||||||
|
treesToTest =
|
||||||
|
case concat [byFun, byCat, changed, fromTb] of
|
||||||
|
[] -> [ f cat -- nothing else specified -> test all functions
|
||||||
|
| (cat,_,_,_) <- concrCats gr ]
|
||||||
|
xs -> S.toList $ S.fromList xs
|
||||||
|
|
||||||
let changedFuns =
|
|
||||||
if only_changed_cats args
|
|
||||||
then [ (cat,functionsByCat gr cat) | (cat,_,_,_) <- difcats ]
|
|
||||||
else
|
|
||||||
case category args of
|
|
||||||
[] -> case function args of
|
|
||||||
[] -> [ (cat,functionsByCat gr cat)
|
|
||||||
| (cat,_,_,_) <- concrCats gr ]
|
|
||||||
fn -> [ (snd $ Grammar.typ f, [f])
|
|
||||||
| f <- lookupSymbol gr fn ]
|
|
||||||
ct -> [ (ct,functionsByCat gr ct) ]
|
|
||||||
writeLinFile file grammar otherGrammar = do
|
writeLinFile file grammar otherGrammar = do
|
||||||
writeFile file ""
|
writeFile file ""
|
||||||
putStrLn "Testing functions in… "
|
putStrLn "Testing functions in… "
|
||||||
diff <- concat `fmap`
|
diff <- concat `fmap`
|
||||||
sequence [ do let cs = [ compareTree grammar otherGrammar grTrans t
|
sequence [ do let cs = [ compareTree grammar otherGrammar grTrans t
|
||||||
| t <- treesUsingFun grammar funs ]
|
| t <- trees ]
|
||||||
putStr $ cat ++ " \r"
|
putStr $ cat ++ " \r"
|
||||||
-- prevent lazy evaluation; make printout accurate
|
-- prevent lazy evaluation; make printout accurate
|
||||||
appendFile ("/tmp/"++file) (unwords $ map show cs)
|
appendFile ("/tmp/"++file) (unwords $ map show cs)
|
||||||
return cs
|
return cs
|
||||||
| (cat,funs) <- changedFuns ]
|
| (cat,trees) <- treesToTest ]
|
||||||
let relevantDiff = go [] [] diff where
|
let relevantDiff = go [] [] diff where
|
||||||
go res seen [] = res
|
go res seen [] = res
|
||||||
go res seen (Comparison f ls:cs) =
|
go res seen (Comparison f ls:cs) =
|
||||||
@@ -379,19 +429,6 @@ main = do
|
|||||||
|
|
||||||
putStrLn $ "Created files " ++ langName ++ "-(old|new)-funs.org"
|
putStrLn $ "Created files " ++ langName ++ "-(old|new)-funs.org"
|
||||||
|
|
||||||
-------------------------------------------------------------------------------
|
|
||||||
-- Read trees from treebank. No fancier functionality yet.
|
|
||||||
|
|
||||||
case treebank args of
|
|
||||||
Nothing -> return ()
|
|
||||||
Just fp -> do
|
|
||||||
tb <- readFile fp
|
|
||||||
sequence_ [ do let tree = readTree gr str
|
|
||||||
ccat = ccatOf tree
|
|
||||||
putStrLn $ unlines [ "", showTree tree ++ " : " ++ show ccat]
|
|
||||||
putStrLn $ linearize gr tree
|
|
||||||
| str <- lines tb ]
|
|
||||||
|
|
||||||
|
|
||||||
where
|
where
|
||||||
|
|
||||||
|
|||||||
@@ -27,9 +27,14 @@ document, as well as the full list of options to give to `gftest`.
|
|||||||
- [Empty or always identical fields: `-e`, `-q`](#empty-or-always-identical-fields--e--q)
|
- [Empty or always identical fields: `-e`, `-q`](#empty-or-always-identical-fields--e--q)
|
||||||
- [Unused fields: `-u`](#unused-fields--u)
|
- [Unused fields: `-u`](#unused-fields--u)
|
||||||
- [Erased trees: `-r`](#erased-trees--r)
|
- [Erased trees: `-r`](#erased-trees--r)
|
||||||
|
- [Debug information: `-d`](#debug-intormation--d)
|
||||||
|
- [Detailed information about the grammar](#detailed-information-about-the-grammar)
|
||||||
|
- [--show-cats](#--show-cats)
|
||||||
|
- [--show-funs](#--show-funs)
|
||||||
- [--show-coercions](#--show-coercions)
|
- [--show-coercions](#--show-coercions)
|
||||||
|
- [--show-contexts](#--show-contexts)
|
||||||
- [--count-trees](#--count-trees)
|
- [--count-trees](#--count-trees)
|
||||||
|
- [--funs-of-arity](#--funs-of-arity)
|
||||||
|
|
||||||
## Installation
|
## Installation
|
||||||
|
|
||||||
@@ -71,6 +76,7 @@ Common flags:
|
|||||||
--show-funs Show all available functions
|
--show-funs Show all available functions
|
||||||
--funs-of-arity=2 Show all functions of arity 2
|
--funs-of-arity=2 Show all functions of arity 2
|
||||||
--show-coercions Show coercions in the grammar
|
--show-coercions Show coercions in the grammar
|
||||||
|
--show-contexts=8410 Show contexts for a given concrete type (given as FId)
|
||||||
--concr-string=the Show all functions that include given string
|
--concr-string=the Show all functions that include given string
|
||||||
-q --equal-fields Show fields whose strings are always identical
|
-q --equal-fields Show fields whose strings are always identical
|
||||||
-e --empty-fields Show fields whose strings are always empty
|
-e --empty-fields Show fields whose strings are always empty
|
||||||
@@ -188,8 +194,9 @@ then you can call the following:
|
|||||||
|
|
||||||
Give a grammar, a concrete syntax, and an old version of the same
|
Give a grammar, a concrete syntax, and an old version of the same
|
||||||
grammar as a separate PGF file. The program generates test sentences
|
grammar as a separate PGF file. The program generates test sentences
|
||||||
for all functions, linearises with both grammars, and outputs those
|
for all functions (if no other arguments), linearises with both
|
||||||
that differ between the versions. It writes the differences into files.
|
grammars, and outputs those that differ between the versions. It
|
||||||
|
writes the differences into files.
|
||||||
|
|
||||||
Example:
|
Example:
|
||||||
|
|
||||||
@@ -206,20 +213,20 @@ Created files TestLangEng-(old|new)-funs.org
|
|||||||
changed. Shows e.g. if you added or removed a parameter or a
|
changed. Shows e.g. if you added or removed a parameter or a
|
||||||
field.
|
field.
|
||||||
|
|
||||||
* TestLangEng-lin-diff.org: All trees that have different
|
* **TestLangEng-lin-diff.org** (usually the most relevant file): All
|
||||||
linearisations in the following format. **This is usually the most
|
trees that have different linearisations in the following format.
|
||||||
relevant file.**
|
|
||||||
```
|
```
|
||||||
* send_V3
|
* send_V3
|
||||||
|
|
||||||
** UseCl (TTAnt TPres ASimul) PPos (PredVP (UsePron we_Pron) (ReflVP (Slash3V3 ∅ (UsePron it_Pron))))
|
** UseCl (TTAnt TPres ASimul) PPos (PredVP (UsePron we_Pron) (ReflVP (Slash3V3 ∅ (UsePron it_Pron))))
|
||||||
TestLangDut> we sturen onszelf ernaar
|
TestLangDut> we sturen onszelf ernaar
|
||||||
TestLangDut-OLD> we sturen zichzelf ernaar
|
TestLangDut-OLD> we sturen zichzelf ernaar
|
||||||
|
|
||||||
|
|
||||||
** UseCl (TTAnt TPast ASimul) PPos (PredVP (UsePron we_Pron) (ReflVP (Slash3V3 ∅ (UsePron it_Pron))))
|
** UseCl (TTAnt TPast ASimul) PPos (PredVP (UsePron we_Pron) (ReflVP (Slash3V3 ∅ (UsePron it_Pron))))
|
||||||
TestLangDut> we stuurden onszelf ernaar
|
TestLangDut> we stuurden onszelf ernaar
|
||||||
TestLangDut-OLD> we stuurden zichzelf ernaar
|
TestLangDut-OLD> we stuurden zichzelf ernaar
|
||||||
```
|
```
|
||||||
|
|
||||||
* TestLangEng-old-funs.org and TestLangEng-new-funs.org: groups the
|
* TestLangEng-old-funs.org and TestLangEng-new-funs.org: groups the
|
||||||
@@ -227,24 +234,43 @@ TestLangDut-OLD> we stuurden zichzelf ernaar
|
|||||||
e.g. added or removed parameters, and that has created new versions of
|
e.g. added or removed parameters, and that has created new versions of
|
||||||
some functions: say you didn't have gender in nouns, but now you
|
some functions: say you didn't have gender in nouns, but now you
|
||||||
have, then all functions taking nouns have suddenly a gendered
|
have, then all functions taking nouns have suddenly a gendered
|
||||||
version. **This is kind of hard to read, don't worry too much if the
|
version. (This is kind of hard to read, don't worry too much if the
|
||||||
output doesn't make any sense.**
|
output doesn't make any sense.)
|
||||||
|
|
||||||
You can give an additional parameter, `--only-changed-cats`, if you
|
#### Additional arguments to `-o`
|
||||||
only want to test functions in those categories that you have changed,
|
|
||||||
like this: `gftest -g TestLang -l Eng -o TestLangOld
|
The default mode is to test all functions, but you can also give any
|
||||||
--only-changed-cats`. This makes it run faster.
|
combination of `-s`, `-f`, `-c`, `--treebank`/`-b` and `--only-changed-cats`.
|
||||||
|
|
||||||
|
With `-s`, you can change the start category in which contexts are
|
||||||
|
generated.
|
||||||
|
|
||||||
|
With `-f` and `-c`, it tests only the specified functions and
|
||||||
|
categories.
|
||||||
|
With `-b FILEPATH` (`-b`=`--treebank`), it tests only the trees in the file.
|
||||||
|
|
||||||
|
With `--only-changed-cats`, it only test functions in those categories
|
||||||
|
that have changed between the two versions.
|
||||||
|
|
||||||
|
Examples:
|
||||||
|
|
||||||
|
* `gftest -g TestLang -l Eng -o TestLangOld` tests all functions
|
||||||
|
* `gftest -g TestLang -l Eng -o TestLangOld -s S` tests all functions in start category S
|
||||||
|
* `gftest -g TestLang -l Eng -o TestLangOld --only-changed-cats` tests only changed categories. If no categories have changed (and no other arguments specified), tests everything.
|
||||||
|
* `gftest -g TestLang -l Eng -o TestLangOld -f "AdjCN AdvCN" -c Adv -b trees.txt` tests functions, `AdjCN` and `AdvCN`; same for all functions that produce an `Adv`, and all trees in trees.txt.
|
||||||
|
|
||||||
### Information about a particular string: `--concr-string`
|
### Information about a particular string: `--concr-string`
|
||||||
|
|
||||||
Show all functions where the given concrete string appears as syncategorematic string (i.e. not from the arguments).
|
Show all functions that introduce the string given as an argument.
|
||||||
|
|
||||||
Example:
|
Example:
|
||||||
|
|
||||||
* `gftest -l Eng --concr-string it`
|
* `gftest -g Lang -l Eng --concr-string it`
|
||||||
|
|
||||||
which gives the answer `==> CleftAdv, CleftNP, DefArt, ImpersCl, it_Pron`
|
which gives the answer `==> CleftAdv, CleftNP, DefArt, ImpersCl, it_Pron`
|
||||||
|
|
||||||
|
(Note that you have the same feature in GF shell, command `morpho_analyse`/`ma`.)
|
||||||
|
|
||||||
|
|
||||||
### Write into a file: `-w`
|
### Write into a file: `-w`
|
||||||
|
|
||||||
@@ -353,10 +379,11 @@ Show trees that are erased in some function, i.e. a function `F : A -> B -> C` h
|
|||||||
|
|
||||||
Example:
|
Example:
|
||||||
|
|
||||||
`gftest -g Lang -l "Dut Eng" -r`
|
|
||||||
|
|
||||||
output:
|
|
||||||
```
|
```
|
||||||
|
> gftest -g Lang -l "Dut Eng" -r
|
||||||
|
|
||||||
* Erased trees:
|
* Erased trees:
|
||||||
|
|
||||||
** RelCl (ExistNP something_NP) : RCl
|
** RelCl (ExistNP something_NP) : RCl
|
||||||
@@ -372,15 +399,56 @@ output:
|
|||||||
|
|
||||||
In the first result, an argument of type `RCl` is missing in the tree constructed by `RelNP`, and in the second result, the argument `write_V2` is missing in the tree constructed by `PPartNP`. In both cases, the English linearisation contains all the arguments, but in the Dutch one they are missing. (This bug is already fixed, just showing it here to demonstrate the feature.)
|
In the first result, an argument of type `RCl` is missing in the tree constructed by `RelNP`, and in the second result, the argument `write_V2` is missing in the tree constructed by `PPartNP`. In both cases, the English linearisation contains all the arguments, but in the Dutch one they are missing. (This bug is already fixed, just showing it here to demonstrate the feature.)
|
||||||
|
|
||||||
|
## Detailed information about the grammar
|
||||||
|
|
||||||
|
### Debug information: `-d`
|
||||||
|
|
||||||
|
When combined with `-f`, `-c` or `-t`, two things happen:
|
||||||
|
|
||||||
|
1) The trees are linearised using `tabularLinearize`, which shows the
|
||||||
|
inflection table of all forms.
|
||||||
|
2) You can see traces of pruning that happens in testing functions:
|
||||||
|
contexts that are common to several concrete categories are put under
|
||||||
|
a separate test case.
|
||||||
|
|
||||||
|
When combined with `--show-cats`, also the concrete categories are
|
||||||
|
shown.
|
||||||
|
|
||||||
|
### --show-cats
|
||||||
|
|
||||||
|
Shows the categories in the grammar. With `--debug`/`-d`, shows also
|
||||||
|
concrete categories.
|
||||||
|
|
||||||
|
Example:
|
||||||
|
|
||||||
|
```
|
||||||
|
> gftest -g Foods -l Spa --show-cats -d
|
||||||
|
|
||||||
|
* Categories in the grammar:
|
||||||
|
Comment
|
||||||
|
Compiles to concrete category 0
|
||||||
|
Item
|
||||||
|
Compiles to concrete categories 1—4
|
||||||
|
Kind
|
||||||
|
Compiles to concrete categories 5—6
|
||||||
|
Quality
|
||||||
|
Compiles to concrete categories 7—8
|
||||||
|
Question
|
||||||
|
Compiles to concrete category 9
|
||||||
|
```
|
||||||
|
|
||||||
|
### --show-funs
|
||||||
|
|
||||||
|
Shows the functions in the grammar. (Nothing fancy happens with other flags.)
|
||||||
|
|
||||||
|
|
||||||
### --show-coercions
|
### --show-coercions
|
||||||
|
|
||||||
First I'll explain what *coercions* are, then why it may be
|
First I'll explain what *coercions* are, then why it may be
|
||||||
interesting to show them. Let's take a Spanish Foods grammar, and
|
interesting to show them. Let's take a Spanish Foods grammar, and
|
||||||
consider the category `Quality`—those `Good Pizza` and `Vegan Pizza`
|
consider the category `Quality`, e.g. `Good` and `Vegan`.
|
||||||
that you saw in the previous section. `Good`
|
`Good` "bueno/buena/buenos/buenas" goes before the noun it modifies,
|
||||||
"bueno/buena/buenos/buenas" goes before the noun it modifies, whereas
|
whereas `Vegan` "vegano/vegana/…" goes after, so these will become different
|
||||||
`Vegan` "vegano/vegana/…" goes after, so these will become different
|
|
||||||
*concrete categories* in the PGF: `Quality_before` and
|
*concrete categories* in the PGF: `Quality_before` and
|
||||||
`Quality_after`. (In reality, they are something like `Quality_7` and
|
`Quality_after`. (In reality, they are something like `Quality_7` and
|
||||||
`Quality_8` though.)
|
`Quality_8` though.)
|
||||||
@@ -406,6 +474,55 @@ Quality_8--->_11
|
|||||||
|
|
||||||
(Just mentally replace 7 with `before`, 8 with `after` and 11 with `whatever`.)
|
(Just mentally replace 7 with `before`, 8 with `after` and 11 with `whatever`.)
|
||||||
|
|
||||||
|
### --show-contexts
|
||||||
|
|
||||||
|
Show contexts for a given concrete category, given as an FId
|
||||||
|
(i.e. Int). The concrete category may be a coercion or a normal
|
||||||
|
category. By combining with [`-s`](#start-category-for-context--s),
|
||||||
|
you can change the start category of the context.
|
||||||
|
|
||||||
|
(You can get a list of all concrete categories by pairing `--show-cats`
|
||||||
|
with `--debug`: see [`--show-cats`](#--show-cats).)
|
||||||
|
|
||||||
|
Examples:
|
||||||
|
|
||||||
|
* First, find out some concrete categories:
|
||||||
|
|
||||||
|
```
|
||||||
|
> gftest -g Foods -l Spa --show-cats -d
|
||||||
|
…
|
||||||
|
Quality
|
||||||
|
Compiles to concrete categories 7—8
|
||||||
|
…
|
||||||
|
```
|
||||||
|
|
||||||
|
* Then, list the contexts for some of them, say `Quality_7`:
|
||||||
|
|
||||||
|
```
|
||||||
|
> gftest -g Foods -l Spa --show-contexts 7
|
||||||
|
|
||||||
|
Pred (That (Mod ∅ Wine)) Vegan
|
||||||
|
Pred (That Wine) ∅
|
||||||
|
Pred (These (Mod ∅ Wine)) Vegan
|
||||||
|
Pred (These Wine) ∅
|
||||||
|
Pred (That (Mod ∅ Pizza)) Vegan
|
||||||
|
Pred (That Pizza) ∅
|
||||||
|
Pred (These (Mod ∅ Pizza)) Vegan
|
||||||
|
Pred (These Pizza) ∅
|
||||||
|
```
|
||||||
|
|
||||||
|
* Check out from [`--show-coercions`](#--show-coercions) how to find
|
||||||
|
coercions, and you can try `--show-contexts` with them:
|
||||||
|
|
||||||
|
```
|
||||||
|
> gftest -g Foods -l Spa --show-contexts 11
|
||||||
|
|
||||||
|
Pred (That Wine) ∅
|
||||||
|
Pred (These Wine) ∅
|
||||||
|
Pred (That Pizza) ∅
|
||||||
|
Pred (These Pizza) ∅
|
||||||
|
```
|
||||||
|
|
||||||
### --count-trees
|
### --count-trees
|
||||||
|
|
||||||
Number of trees up to given size. Gives a number how many trees, and a
|
Number of trees up to given size. Gives a number how many trees, and a
|
||||||
|
|||||||
Reference in New Issue
Block a user