diff --git a/doc/gf-history.html b/doc/gf-history.html index 5830578e5..2215f5dae 100644 --- a/doc/gf-history.html +++ b/doc/gf-history.html @@ -14,6 +14,19 @@ Changes in functionality since May 17, 2005, release of GF Version 2.2
+18/5 (AR) Introduced a wordlist format gfwl for +quick creation of language exercises and (in future) multilingual lexica. +The format is now very simple: +
+ # Svenska - Franska - Finska + berg - montagne - vuori + klättra - grimper / escalader - kiivetä / kiipeillä ++but can be extended to cover paradigm functions in addition to just +words. + +
+ 3/4 (AR) The predefined abstract syntax type Int now has two inherent parameters indicating its last digit and its size. The (hard-coded) linearization type is diff --git a/lib/resource-1.0/Makefile b/lib/resource-1.0/Makefile index 9fb83211c..33e33764b 100644 --- a/lib/resource-1.0/Makefile +++ b/lib/resource-1.0/Makefile @@ -19,7 +19,7 @@ langs: present: chmod u+x mkPresent - $(GF) -make -src -preproc=./mkPresent */Lang??*.gf + $(GF) -make -src -preproc=./mkPresent */Lang??*.gf +RTS -M800M -K100M mv */*.gfc */*.gfr ../present mathematical: diff --git a/src/GF.hs b/src/GF.hs index 478a40d3f..8cbc45f68 100644 --- a/src/GF.hs +++ b/src/GF.hs @@ -22,6 +22,7 @@ import GF.API.IOGrammar import GF.Compile.ShellState import GF.Compile.Compile import GF.Compile.MkConcrete +import GF.Compile.Wordlist import GF.Shell import GF.Shell.SubShell import GF.Shell.ShellCommands @@ -91,6 +92,11 @@ main = do es <- liftM (nub . concat) $ mapM (getGFEFiles os) fs mkConcretes os es doGF (removeOption fromExamples os) fs + -- preprocessing gfwl + else if (length fs == 1 && fileSuffix (head fs) == "gfwl") + then do + fs' <- mkWordlist (head fs) + doGF os fs' else doGF os fs helpMsg = unlines [ diff --git a/src/GF/Compile/Wordlist.hs b/src/GF/Compile/Wordlist.hs new file mode 100644 index 000000000..d581ed683 --- /dev/null +++ b/src/GF/Compile/Wordlist.hs @@ -0,0 +1,107 @@ +---------------------------------------------------------------------- +-- | +-- Module : Wordlist +-- Maintainer : AR +-- Stability : (stable) +-- Portability : (portable) +-- +-- > CVS $Date: +-- > CVS $Author: +-- > CVS $Revision: +-- +-- Compile a gfwl file (multilingual word list) to an abstract + concretes +----------------------------------------------------------------------------- + +module GF.Compile.Wordlist (mkWordlist) where + +import GF.Data.Operations +import GF.Infra.UseIO +import Data.List +import Data.Char + +-- read File.gfwl, write File.gf (abstract) and a set of concretes +-- return the names of the concretes + +mkWordlist :: FilePath -> IO [FilePath] +mkWordlist file = do + s <- readFileIf file + let abs = fileBody file + let (cnchs,wlist) = pWordlist abs $ filter notComment $ lines s + let (gr,grs) = mkGrammars abs cnchs wlist + let cncfs = [cnc ++ ".gf" | (cnc,_) <- cnchs] + mapM_ (uncurry writeFile) $ (abs ++ ".gf",gr) : zip cncfs grs + putStrLn $ "wrote " ++ unwords ((abs ++ ".gf") : cncfs) + return cncfs + +{- +-- syntax of files, e.g. + + # Svenska - Franska - Finska -- names of concretes + + berg - montagne - vuori -- word entry + +-- this creates: + + cat S ; + fun berg_S : S ; + lin berg_S = {s = ["berg"]} ; + lin berg_S = {s = ["montagne"]} ; + lin berg_S = {s = ["vuori"]} ; + +-- support for different categories to be elaborated. The syntax it + + Verb . klättra - grimper / escalader - kiivetä / kiipeillä + +-- notice that a word can have several alternative (separator /) +-- and that an alternative can consist of several words +-} + +type CncHeader = (String,String) -- module name, module header + +type Wordlist = [(String, [[String]])] -- cat, variants for each cnc + + +pWordlist :: String -> [String] -> ([CncHeader],Wordlist) +pWordlist abs ls = (headers,rules) where + (hs,rs) = span ((=="#") . take 1) ls + headers = map mkHeader $ chunks "-" $ filter (/="#") $ words $ concat hs + rules = map (mkRule . words) rs + + mkHeader ws = case ws of + w:ws2 -> (w, unwords ("concrete":w:"of":abs:"=":ws2)) + mkRule ws = case ws of + cat:".":vs -> (cat, mkWords vs) + _ -> ("S", mkWords ws) + mkWords = map (map unwords . chunks "/") . chunks "-" + + +mkGrammars :: String -> [CncHeader] -> Wordlist -> (String,[String]) +mkGrammars ab hs wl = (abs,cncs) where + abs = unlines $ map unwords $ + ["abstract",ab,"=","{"]: + cats ++ + funs ++ + [["}"]] + + cncs = [unlines $ (h ++ " {") : map lin rs ++ ["}"] | ((_,h),rs) <- zip hs rss] + + cats = [["cat",c,";"] | c <- nub $ map fst wl] + funs = [["fun", f , ":", c,";"] | (f,c,_) <- wlf] + + wlf = [(ident f c, c, ws) | (c,ws@(f:_)) <- wl] + + rss = [[(f, wss !! i) | (f,_,wss) <- wlf] | i <- [0..length hs - 1]] + + lin (f,ss) = unwords ["lin", f, "=", "{s", "=", val ss, "}", ";"] + + val ss = case ss of + [w] -> quote w + _ -> "variants {" ++ unwords (intersperse ";" (map quote ss)) ++ "}" + + quote w = "[" ++ prQuotedString w ++ "]" + + ident f c = concat $ intersperse "_" $ words (head f) ++ [c] + + +notComment s = not (all isSpace s) && take 2 s /= "--" + diff --git a/src/GF/Shell.hs b/src/GF/Shell.hs index bdbf6d62c..4b9660ced 100644 --- a/src/GF/Shell.hs +++ b/src/GF/Shell.hs @@ -50,6 +50,7 @@ import GF.Infra.Option import GF.UseGrammar.Information import GF.Shell.HelpFile import GF.Compile.PrOld +import GF.Compile.Wordlist import GF.Grammar.PrGrammar import Control.Monad (foldM,liftM) @@ -189,6 +190,10 @@ execLine put (c@(co, os), arg, cs) (outps,st) = do execC :: CommandOpt -> ShellIO execC co@(comm, opts0) sa@(sh@(st,(h,_,_,_)),a) = checkOptions st co >> case comm of + CImport file | fileSuffix file == "gfwl" -> do + fs <- mkWordlist file + foldM (\x y -> execC (CImport y, opts) x) sa fs + CImport file | oElem fromExamples opts -> do es <- liftM nub $ getGFEFiles opts file system $ "gf -examples" +++ unlines es diff --git a/src/GF/Shell/HelpFile.hs b/src/GF/Shell/HelpFile.hs index 8d7b72c08..39706e9d8 100644 --- a/src/GF/Shell/HelpFile.hs +++ b/src/GF/Shell/HelpFile.hs @@ -49,6 +49,7 @@ txtHelpFile = "\n .gfr precompiled GF resource " ++ "\n .gfcm multilingual canonical GF" ++ "\n .gfe example-based grammar files (only with the -ex option)" ++ + "\n .gfwl multilingual word list (preprocessed to abs + cncs)" ++ "\n .ebnf Extended BNF format" ++ "\n .cf Context-free (BNF) format" ++ "\n .trc TransferCore format" ++ diff --git a/src/HelpFile b/src/HelpFile index 634b0ee45..3b3a443ff 100644 --- a/src/HelpFile +++ b/src/HelpFile @@ -20,6 +20,7 @@ i, import: i File .gfr precompiled GF resource .gfcm multilingual canonical GF .gfe example-based grammar files (only with the -ex option) + .gfwl multilingual word list (preprocessed to abs + cncs) .ebnf Extended BNF format .cf Context-free (BNF) format .trc TransferCore format