mirror of
https://github.com/GrammaticalFramework/gf-core.git
synced 2026-04-09 04:59:31 -06:00
example substitutions
This commit is contained in:
@@ -18,7 +18,7 @@ Changes in functionality since May 17, 2005, release of GF Version 2.2
|
||||
<b>grammar writing by examples</b>. Files of this format are first
|
||||
converted to <tt>.gf</tt> files by the command
|
||||
<pre>
|
||||
gf -makeconcrete File.gfe
|
||||
gf -examples File.gfe
|
||||
</pre>
|
||||
See <a href="../lib/resource/doc/examples/QuestionsI.gfe">
|
||||
<tt>../lib/resource/doc/examples/QuestionsI.gfe</tt></a>
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
--# -resource=../../english/LangEng.gf
|
||||
|
||||
-- to compile: gf -makeconcrete QuestionsI.gfe
|
||||
-- to compile: gf -examples QuestionsI.gfe
|
||||
|
||||
incomplete concrete QuestionsI of Questions = open Resource in {
|
||||
lincat
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
--# -resource=../../english/LangEng.gf
|
||||
|
||||
-- to compile: gf -makeconcrete QuestionsI.gfe
|
||||
-- to compile: gf -examples QuestionsI.gfe
|
||||
|
||||
incomplete concrete QuestionsI of Questions = open Resource in {
|
||||
lincat
|
||||
|
||||
@@ -710,7 +710,7 @@ generates
|
||||
<a href="example/QuestionsI.gf">QuestionsI.gf</a>,
|
||||
when you execute the command
|
||||
<pre>
|
||||
gf -makeconcrete QuestionsI.gfe
|
||||
gf -examples QuestionsI.gfe
|
||||
</pre>
|
||||
Of course, the grammar of any language can be created by
|
||||
parsing any language, as long as they have a common resource API.
|
||||
@@ -718,6 +718,74 @@ The use of English resource is generally recommended, because it
|
||||
is smaller and faster to parse than the other languages.
|
||||
|
||||
|
||||
<!-- NEW -->
|
||||
<h2>Constants and variables in examples</h2>
|
||||
|
||||
The file <a href="example/QuestionsI.gfe">QuestionsI.gfe</a> uses
|
||||
as resource <tt>LangEng</tt>, which contains all resource syntax and
|
||||
a lexicon of ca. 300 words. A linearization rule, such as
|
||||
<pre>
|
||||
lin Who love_V2 man_N = in Phr "who loves men ?" ;
|
||||
</pre>
|
||||
uses as argument variables constants for words that can be found in
|
||||
the lexicon. It is due to this that the example can be parsed.
|
||||
When the resulting rule,
|
||||
<pre>
|
||||
lin Who love_V2 man_N =
|
||||
QuestPhrase (UseQCl (PosTP TPresent ASimul)
|
||||
(QPredV2 who8one_IP love_V2 (IndefNumNP NoNum (UseN man_N)))) ;
|
||||
</pre>
|
||||
is read by the GF compiler, the identifiers <tt>love_V2</tt> and
|
||||
<tt>man_N</tt> are not treated as constants, but, following
|
||||
the normal binding rules of functional languages, as bound variables.
|
||||
This is what gives the example method the generality that is needed.
|
||||
|
||||
<p>
|
||||
|
||||
To write linearization rules by examples one thus has to know at
|
||||
least one abstract syntax constant for each category for which
|
||||
one needs a variable.
|
||||
|
||||
|
||||
|
||||
<!-- NEW -->
|
||||
<h2>Extending the lexicon on the fly</h2>
|
||||
|
||||
The greatest limitation of the example method is that the lexicon
|
||||
may lack many of the words that are needed in examples. If parsing
|
||||
fails because of this, the compiler gives a list of unknown words
|
||||
in its error message. An obvious solution is,
|
||||
of course, to extend the resource lexicon and try again.
|
||||
A more light-weight solution is to add a <b>substitution</b> to
|
||||
the example. For instance, if you want the example "the pope"
|
||||
but the lexicon does not have the word "pope", you can write
|
||||
<pre>
|
||||
lin Pope = in NP "the man" {man_N = regN "pope"} ;
|
||||
</pre>
|
||||
The resulting linearization rule is initially
|
||||
<pre>
|
||||
lin Pope = DefOneNP (UseN man_N) ;
|
||||
</pre>
|
||||
but the substitution changes this to
|
||||
<pre>
|
||||
lin Pope = DefOneNP (UseN (regN "pope")) ;
|
||||
</pre>
|
||||
In this way, you do not have to extend the resource lexicon, but you
|
||||
need to open the Paradigms module to compile the resulting term.
|
||||
|
||||
<p>
|
||||
|
||||
Of course, the substituted expressions may come from another language
|
||||
than the main language of the example:
|
||||
<pre>
|
||||
lin Pope = in NP "the man" {man_N = regN "pape" masculine} ;
|
||||
</pre>
|
||||
If many substitutions are needed, semicolons are used as separators:
|
||||
<pre>
|
||||
{man_N = regN "pope" ; walk_N = regV "pray"} ;
|
||||
</pre>
|
||||
|
||||
|
||||
<!-- NEW -->
|
||||
<h2>Implementation details: the structure of low-level files</h2>
|
||||
|
||||
|
||||
@@ -5,9 +5,9 @@
|
||||
-- Stability : (stability)
|
||||
-- Portability : (portability)
|
||||
--
|
||||
-- > CVS $Date: 2005/06/02 17:31:56 $
|
||||
-- > CVS $Date: 2005/06/03 21:51:58 $
|
||||
-- > CVS $Author: aarne $
|
||||
-- > CVS $Revision: 1.26 $
|
||||
-- > CVS $Revision: 1.27 $
|
||||
--
|
||||
-- The Main module of GF program.
|
||||
-----------------------------------------------------------------------------
|
||||
@@ -60,7 +60,7 @@ main = do
|
||||
_ -> putStrLnFlush "expecting exactly one gf file to compile"
|
||||
|
||||
_ | opt makeConcrete -> do
|
||||
mapM_ mkConcrete fs
|
||||
mkConcretes fs
|
||||
|
||||
_ | opt doBatch -> do
|
||||
if opt beSilent then return () else putStrLnFlush "<gfbatch>"
|
||||
@@ -85,7 +85,7 @@ helpMsg = unlines [
|
||||
" -noemit do not emit code when compiling",
|
||||
" -v be verbose when compiling",
|
||||
" -batch structure session by XML tags (use > to send into a file)",
|
||||
" -makeconcrete batch-compile .gfe file to concrete syntax using parser",
|
||||
" -examples batch-compile .gfe file by parsing examples",
|
||||
" -help show this message",
|
||||
"To use the GUI: jgf <option>* <file>*"
|
||||
]
|
||||
|
||||
@@ -12,11 +12,14 @@
|
||||
-- Compile a gfl file into a concrete syntax by using the parser on a resource grammar.
|
||||
-----------------------------------------------------------------------------
|
||||
|
||||
module GF.Compile.MkConcrete (mkConcrete) where
|
||||
module GF.Compile.MkConcrete (mkConcretes,mkCncLine) where
|
||||
|
||||
import GF.Grammar.Values (Tree,tree2exp)
|
||||
import GF.Grammar.PrGrammar (prt_)
|
||||
import GF.Compile.ShellState (absId,firstStateGrammar)
|
||||
import GF.Grammar.Grammar (Term(Q,QC)) ---
|
||||
import GF.Grammar.Macros (composSafeOp, record2subst)
|
||||
import GF.Compile.ShellState (firstStateGrammar)
|
||||
import GF.Compile.PGrammar (pTerm)
|
||||
import GF.API
|
||||
import qualified GF.Embed.EmbedAPI as EA
|
||||
|
||||
@@ -35,22 +38,32 @@ import Control.Monad
|
||||
-- Format of resource path (on first line):
|
||||
-- --# -resource=PATH
|
||||
-- Other lines are copied verbatim.
|
||||
-- Assumes: resource has been built with
|
||||
-- The resource has to be built with
|
||||
-- i -src -optimize=share SOURCE
|
||||
-- because mcfg parsing is used.
|
||||
-- A sequence of files can be processed with the same resource without
|
||||
-- rebuilding the grammar and parser.
|
||||
|
||||
|
||||
mkConcrete :: FilePath -> IO ()
|
||||
mkConcrete file = do
|
||||
mkConcretes :: [FilePath] -> IO ()
|
||||
mkConcretes [] = putStrLn "no files to process"
|
||||
mkConcretes files@(file:_) = do
|
||||
cont <- liftM lines $ readFileIf file
|
||||
let res = getResPath cont
|
||||
egr <- appIOE $
|
||||
optFile2grammar (options [useOptimizer "share",fromSource,beSilent,notEmitCode]) res --- for -mcfg
|
||||
optFile2grammar (options
|
||||
[useOptimizer "share",fromSource,beSilent,notEmitCode]) res --- for -mcfg
|
||||
gr <- err (\s -> putStrLn s >> error "resource file rejected") return egr
|
||||
let abs = prt_ $ absId gr
|
||||
let parser cat = errVal ([],"No parse") .
|
||||
optParseArgErrMsg (options [newMParser, firstCat cat, beVerbose]) gr
|
||||
let morpho = isKnownWord gr
|
||||
mapM_ (mkConcrete parser morpho) files
|
||||
|
||||
type Parser = String -> String -> ([Tree],String)
|
||||
type Morpho = String -> Bool
|
||||
|
||||
mkConcrete :: Parser -> Morpho -> FilePath -> IO ()
|
||||
mkConcrete parser morpho file = do
|
||||
cont <- liftM lines $ readFileIf file
|
||||
let out = suffixFile "gf" $ justModuleName file
|
||||
writeFile out ""
|
||||
mapM_ (mkCnc out parser morpho) cont
|
||||
@@ -60,8 +73,7 @@ getResPath s = case head (dropWhile (all isSpace) s) of
|
||||
'-':'-':'#':path -> reverse (takeWhile (not . (=='=')) (reverse path))
|
||||
_ -> error "first line must be --# -resource=<PATH>"
|
||||
|
||||
mkCnc :: FilePath -> (String -> String -> ([Tree],String)) -> (String -> Bool) ->
|
||||
String -> IO ()
|
||||
mkCnc :: FilePath -> Parser -> Morpho -> String -> IO ()
|
||||
mkCnc out parser morpho line = do
|
||||
let (res,msg) = mkCncLine parser morpho line
|
||||
appendFile out res
|
||||
@@ -77,15 +89,15 @@ mkCncLine parser morpho line = case words line of
|
||||
where
|
||||
mkLinRule key s =
|
||||
let
|
||||
(pre,str) = span (/= "in") s
|
||||
(pre,str) = span (/= "in") s
|
||||
([cat],rest) = splitAt 1 $ tail str
|
||||
lin = init (tail (unwords (init rest))) -- unquote
|
||||
(lin,subst) = span (/= '"') $ tail $ unwords rest
|
||||
def
|
||||
| last pre /= "=" = line -- ordinary lin rule
|
||||
| otherwise = case parser cat lin of
|
||||
([t],_) -> ind ++ key +++ unwords pre +++ prt_ (tree2exp t) +++ ";"
|
||||
(t:_,_) -> ind ++ key +++ unwords pre +++ prt_ (tree2exp t) +++ ";"
|
||||
+++ "-- AMBIGUOUS"
|
||||
(t:ts,_) -> ind ++ key +++ unwords pre +++
|
||||
doSubst (init (tail subst)) (tree2exp t) +++ ";" ++
|
||||
if null ts then [] else " -- AMBIGUOUS"
|
||||
([],msg) -> "{-" ++ line ++++ morph lin ++++ "-}"
|
||||
in
|
||||
(def,def)
|
||||
@@ -93,3 +105,13 @@ mkCncLine parser morpho line = case words line of
|
||||
[] -> ""
|
||||
ws -> "unknown words: " ++ unwords ws
|
||||
ind = takeWhile isSpace line
|
||||
|
||||
doSubst :: String -> Term -> String
|
||||
doSubst subst0 trm = prt_ $ subt subst trm where
|
||||
subst
|
||||
| all isSpace subst0 = []
|
||||
| otherwise = err error id $ pTerm subst0 >>= record2subst
|
||||
subt g t = case t of
|
||||
Q _ c -> maybe t id $ lookup c g
|
||||
QC _ c -> maybe t id $ lookup c g
|
||||
_ -> composSafeOp (subt g) t
|
||||
|
||||
@@ -5,9 +5,9 @@
|
||||
-- Stability : (stable)
|
||||
-- Portability : (portable)
|
||||
--
|
||||
-- > CVS $Date: 2005/05/09 15:45:00 $
|
||||
-- > CVS $Date: 2005/06/03 21:51:58 $
|
||||
-- > CVS $Author: aarne $
|
||||
-- > CVS $Revision: 1.21 $
|
||||
-- > CVS $Revision: 1.22 $
|
||||
--
|
||||
-- Macros for constructing and analysing source code terms.
|
||||
--
|
||||
@@ -280,6 +280,11 @@ mkRecTypeN int lab typs = RecType [ (lab i, t) | (i,t) <- zip [int..] typs]
|
||||
mkRecType :: (Int -> Label) -> [Type] -> Type
|
||||
mkRecType = mkRecTypeN 0
|
||||
|
||||
record2subst :: Term -> Err Substitution
|
||||
record2subst t = case t of
|
||||
R fs -> return [(zIdent x, t) | (LIdent x,(_,t)) <- fs]
|
||||
_ -> prtBad "record expected, found" t
|
||||
|
||||
typeType, typePType, typeStr, typeTok, typeStrs :: Term
|
||||
|
||||
typeType = srt "Type"
|
||||
|
||||
@@ -5,9 +5,9 @@
|
||||
-- Stability : (stable)
|
||||
-- Portability : (portable)
|
||||
--
|
||||
-- > CVS $Date: 2005/06/02 10:23:52 $
|
||||
-- > CVS $Date: 2005/06/03 21:51:59 $
|
||||
-- > CVS $Author: aarne $
|
||||
-- > CVS $Revision: 1.28 $
|
||||
-- > CVS $Revision: 1.29 $
|
||||
--
|
||||
-- Options and flags used in GF shell commands and files.
|
||||
--
|
||||
@@ -244,7 +244,7 @@ nostripQualif = iOpt "nostrip"
|
||||
showAll = iOpt "all"
|
||||
showMulti = iOpt "multi"
|
||||
fromSource = iOpt "src"
|
||||
makeConcrete = iOpt "makeconcrete"
|
||||
makeConcrete = iOpt "examples"
|
||||
|
||||
-- ** mainly for stand-alone
|
||||
|
||||
|
||||
@@ -5,9 +5,9 @@
|
||||
-- Stability : (stable)
|
||||
-- Portability : (portable)
|
||||
--
|
||||
-- > CVS $Date: 2005/04/21 16:23:47 $
|
||||
-- > CVS $Author: bringert $
|
||||
-- > CVS $Revision: 1.7 $
|
||||
-- > CVS $Date: 2005/06/03 21:51:59 $
|
||||
-- > CVS $Author: aarne $
|
||||
-- > CVS $Revision: 1.8 $
|
||||
--
|
||||
-- how to form linearizable trees from strings and from terms of different levels
|
||||
--
|
||||
@@ -39,6 +39,7 @@ string2tree :: StateGrammar -> String -> Tree
|
||||
string2tree gr = errVal uTree . string2treeErr gr
|
||||
|
||||
string2treeErr :: StateGrammar -> String -> Err Tree
|
||||
string2treeErr _ "" = Bad "empty string"
|
||||
string2treeErr gr s = do
|
||||
t <- pTerm s
|
||||
let t1 = refreshMetas [] t
|
||||
|
||||
Reference in New Issue
Block a user