example substitutions

This commit is contained in:
aarne
2005-06-03 20:51:58 +00:00
parent 1c88337022
commit b067b3b5e0
9 changed files with 127 additions and 31 deletions

View File

@@ -18,7 +18,7 @@ Changes in functionality since May 17, 2005, release of GF Version 2.2
<b>grammar writing by examples</b>. Files of this format are first
converted to <tt>.gf</tt> files by the command
<pre>
gf -makeconcrete File.gfe
gf -examples File.gfe
</pre>
See <a href="../lib/resource/doc/examples/QuestionsI.gfe">
<tt>../lib/resource/doc/examples/QuestionsI.gfe</tt></a>

View File

@@ -1,6 +1,6 @@
--# -resource=../../english/LangEng.gf
-- to compile: gf -makeconcrete QuestionsI.gfe
-- to compile: gf -examples QuestionsI.gfe
incomplete concrete QuestionsI of Questions = open Resource in {
lincat

View File

@@ -1,6 +1,6 @@
--# -resource=../../english/LangEng.gf
-- to compile: gf -makeconcrete QuestionsI.gfe
-- to compile: gf -examples QuestionsI.gfe
incomplete concrete QuestionsI of Questions = open Resource in {
lincat

View File

@@ -710,7 +710,7 @@ generates
<a href="example/QuestionsI.gf">QuestionsI.gf</a>,
when you execute the command
<pre>
gf -makeconcrete QuestionsI.gfe
gf -examples QuestionsI.gfe
</pre>
Of course, the grammar of any language can be created by
parsing any language, as long as they have a common resource API.
@@ -718,6 +718,74 @@ The use of English resource is generally recommended, because it
is smaller and faster to parse than the other languages.
<!-- NEW -->
<h2>Constants and variables in examples</h2>
The file <a href="example/QuestionsI.gfe">QuestionsI.gfe</a> uses
as resource <tt>LangEng</tt>, which contains all resource syntax and
a lexicon of ca. 300 words. A linearization rule, such as
<pre>
lin Who love_V2 man_N = in Phr "who loves men ?" ;
</pre>
uses as argument variables constants for words that can be found in
the lexicon. It is due to this that the example can be parsed.
When the resulting rule,
<pre>
lin Who love_V2 man_N =
QuestPhrase (UseQCl (PosTP TPresent ASimul)
(QPredV2 who8one_IP love_V2 (IndefNumNP NoNum (UseN man_N)))) ;
</pre>
is read by the GF compiler, the identifiers <tt>love_V2</tt> and
<tt>man_N</tt> are not treated as constants, but, following
the normal binding rules of functional languages, as bound variables.
This is what gives the example method the generality that is needed.
<p>
To write linearization rules by examples one thus has to know at
least one abstract syntax constant for each category for which
one needs a variable.
<!-- NEW -->
<h2>Extending the lexicon on the fly</h2>
The greatest limitation of the example method is that the lexicon
may lack many of the words that are needed in examples. If parsing
fails because of this, the compiler gives a list of unknown words
in its error message. An obvious solution is,
of course, to extend the resource lexicon and try again.
A more light-weight solution is to add a <b>substitution</b> to
the example. For instance, if you want the example "the pope"
but the lexicon does not have the word "pope", you can write
<pre>
lin Pope = in NP "the man" {man_N = regN "pope"} ;
</pre>
The resulting linearization rule is initially
<pre>
lin Pope = DefOneNP (UseN man_N) ;
</pre>
but the substitution changes this to
<pre>
lin Pope = DefOneNP (UseN (regN "pope")) ;
</pre>
In this way, you do not have to extend the resource lexicon, but you
need to open the Paradigms module to compile the resulting term.
<p>
Of course, the substituted expressions may come from another language
than the main language of the example:
<pre>
lin Pope = in NP "the man" {man_N = regN "pape" masculine} ;
</pre>
If many substitutions are needed, semicolons are used as separators:
<pre>
{man_N = regN "pope" ; walk_N = regV "pray"} ;
</pre>
<!-- NEW -->
<h2>Implementation details: the structure of low-level files</h2>

View File

@@ -5,9 +5,9 @@
-- Stability : (stability)
-- Portability : (portability)
--
-- > CVS $Date: 2005/06/02 17:31:56 $
-- > CVS $Date: 2005/06/03 21:51:58 $
-- > CVS $Author: aarne $
-- > CVS $Revision: 1.26 $
-- > CVS $Revision: 1.27 $
--
-- The Main module of GF program.
-----------------------------------------------------------------------------
@@ -60,7 +60,7 @@ main = do
_ -> putStrLnFlush "expecting exactly one gf file to compile"
_ | opt makeConcrete -> do
mapM_ mkConcrete fs
mkConcretes fs
_ | opt doBatch -> do
if opt beSilent then return () else putStrLnFlush "<gfbatch>"
@@ -85,7 +85,7 @@ helpMsg = unlines [
" -noemit do not emit code when compiling",
" -v be verbose when compiling",
" -batch structure session by XML tags (use > to send into a file)",
" -makeconcrete batch-compile .gfe file to concrete syntax using parser",
" -examples batch-compile .gfe file by parsing examples",
" -help show this message",
"To use the GUI: jgf <option>* <file>*"
]

View File

@@ -12,11 +12,14 @@
-- Compile a gfl file into a concrete syntax by using the parser on a resource grammar.
-----------------------------------------------------------------------------
module GF.Compile.MkConcrete (mkConcrete) where
module GF.Compile.MkConcrete (mkConcretes,mkCncLine) where
import GF.Grammar.Values (Tree,tree2exp)
import GF.Grammar.PrGrammar (prt_)
import GF.Compile.ShellState (absId,firstStateGrammar)
import GF.Grammar.Grammar (Term(Q,QC)) ---
import GF.Grammar.Macros (composSafeOp, record2subst)
import GF.Compile.ShellState (firstStateGrammar)
import GF.Compile.PGrammar (pTerm)
import GF.API
import qualified GF.Embed.EmbedAPI as EA
@@ -35,22 +38,32 @@ import Control.Monad
-- Format of resource path (on first line):
-- --# -resource=PATH
-- Other lines are copied verbatim.
-- Assumes: resource has been built with
-- The resource has to be built with
-- i -src -optimize=share SOURCE
-- because mcfg parsing is used.
-- A sequence of files can be processed with the same resource without
-- rebuilding the grammar and parser.
mkConcrete :: FilePath -> IO ()
mkConcrete file = do
mkConcretes :: [FilePath] -> IO ()
mkConcretes [] = putStrLn "no files to process"
mkConcretes files@(file:_) = do
cont <- liftM lines $ readFileIf file
let res = getResPath cont
egr <- appIOE $
optFile2grammar (options [useOptimizer "share",fromSource,beSilent,notEmitCode]) res --- for -mcfg
optFile2grammar (options
[useOptimizer "share",fromSource,beSilent,notEmitCode]) res --- for -mcfg
gr <- err (\s -> putStrLn s >> error "resource file rejected") return egr
let abs = prt_ $ absId gr
let parser cat = errVal ([],"No parse") .
optParseArgErrMsg (options [newMParser, firstCat cat, beVerbose]) gr
let morpho = isKnownWord gr
mapM_ (mkConcrete parser morpho) files
type Parser = String -> String -> ([Tree],String)
type Morpho = String -> Bool
mkConcrete :: Parser -> Morpho -> FilePath -> IO ()
mkConcrete parser morpho file = do
cont <- liftM lines $ readFileIf file
let out = suffixFile "gf" $ justModuleName file
writeFile out ""
mapM_ (mkCnc out parser morpho) cont
@@ -60,8 +73,7 @@ getResPath s = case head (dropWhile (all isSpace) s) of
'-':'-':'#':path -> reverse (takeWhile (not . (=='=')) (reverse path))
_ -> error "first line must be --# -resource=<PATH>"
mkCnc :: FilePath -> (String -> String -> ([Tree],String)) -> (String -> Bool) ->
String -> IO ()
mkCnc :: FilePath -> Parser -> Morpho -> String -> IO ()
mkCnc out parser morpho line = do
let (res,msg) = mkCncLine parser morpho line
appendFile out res
@@ -77,15 +89,15 @@ mkCncLine parser morpho line = case words line of
where
mkLinRule key s =
let
(pre,str) = span (/= "in") s
(pre,str) = span (/= "in") s
([cat],rest) = splitAt 1 $ tail str
lin = init (tail (unwords (init rest))) -- unquote
(lin,subst) = span (/= '"') $ tail $ unwords rest
def
| last pre /= "=" = line -- ordinary lin rule
| otherwise = case parser cat lin of
([t],_) -> ind ++ key +++ unwords pre +++ prt_ (tree2exp t) +++ ";"
(t:_,_) -> ind ++ key +++ unwords pre +++ prt_ (tree2exp t) +++ ";"
+++ "-- AMBIGUOUS"
(t:ts,_) -> ind ++ key +++ unwords pre +++
doSubst (init (tail subst)) (tree2exp t) +++ ";" ++
if null ts then [] else " -- AMBIGUOUS"
([],msg) -> "{-" ++ line ++++ morph lin ++++ "-}"
in
(def,def)
@@ -93,3 +105,13 @@ mkCncLine parser morpho line = case words line of
[] -> ""
ws -> "unknown words: " ++ unwords ws
ind = takeWhile isSpace line
doSubst :: String -> Term -> String
doSubst subst0 trm = prt_ $ subt subst trm where
subst
| all isSpace subst0 = []
| otherwise = err error id $ pTerm subst0 >>= record2subst
subt g t = case t of
Q _ c -> maybe t id $ lookup c g
QC _ c -> maybe t id $ lookup c g
_ -> composSafeOp (subt g) t

View File

@@ -5,9 +5,9 @@
-- Stability : (stable)
-- Portability : (portable)
--
-- > CVS $Date: 2005/05/09 15:45:00 $
-- > CVS $Date: 2005/06/03 21:51:58 $
-- > CVS $Author: aarne $
-- > CVS $Revision: 1.21 $
-- > CVS $Revision: 1.22 $
--
-- Macros for constructing and analysing source code terms.
--
@@ -280,6 +280,11 @@ mkRecTypeN int lab typs = RecType [ (lab i, t) | (i,t) <- zip [int..] typs]
mkRecType :: (Int -> Label) -> [Type] -> Type
mkRecType = mkRecTypeN 0
record2subst :: Term -> Err Substitution
record2subst t = case t of
R fs -> return [(zIdent x, t) | (LIdent x,(_,t)) <- fs]
_ -> prtBad "record expected, found" t
typeType, typePType, typeStr, typeTok, typeStrs :: Term
typeType = srt "Type"

View File

@@ -5,9 +5,9 @@
-- Stability : (stable)
-- Portability : (portable)
--
-- > CVS $Date: 2005/06/02 10:23:52 $
-- > CVS $Date: 2005/06/03 21:51:59 $
-- > CVS $Author: aarne $
-- > CVS $Revision: 1.28 $
-- > CVS $Revision: 1.29 $
--
-- Options and flags used in GF shell commands and files.
--
@@ -244,7 +244,7 @@ nostripQualif = iOpt "nostrip"
showAll = iOpt "all"
showMulti = iOpt "multi"
fromSource = iOpt "src"
makeConcrete = iOpt "makeconcrete"
makeConcrete = iOpt "examples"
-- ** mainly for stand-alone

View File

@@ -5,9 +5,9 @@
-- Stability : (stable)
-- Portability : (portable)
--
-- > CVS $Date: 2005/04/21 16:23:47 $
-- > CVS $Author: bringert $
-- > CVS $Revision: 1.7 $
-- > CVS $Date: 2005/06/03 21:51:59 $
-- > CVS $Author: aarne $
-- > CVS $Revision: 1.8 $
--
-- how to form linearizable trees from strings and from terms of different levels
--
@@ -39,6 +39,7 @@ string2tree :: StateGrammar -> String -> Tree
string2tree gr = errVal uTree . string2treeErr gr
string2treeErr :: StateGrammar -> String -> Err Tree
string2treeErr _ "" = Bad "empty string"
string2treeErr gr s = do
t <- pTerm s
let t1 = refreshMetas [] t