mirror of
https://github.com/GrammaticalFramework/gf-core.git
synced 2026-04-23 11:42:49 -06:00
Wordlist format
This commit is contained in:
@@ -14,6 +14,19 @@ Changes in functionality since May 17, 2005, release of GF Version 2.2
|
|||||||
|
|
||||||
<p>
|
<p>
|
||||||
|
|
||||||
|
18/5 (AR) Introduced a wordlist format <tt>gfwl</tt> for
|
||||||
|
quick creation of language exercises and (in future) multilingual lexica.
|
||||||
|
The format is now very simple:
|
||||||
|
<pre>
|
||||||
|
# Svenska - Franska - Finska
|
||||||
|
berg - montagne - vuori
|
||||||
|
klättra - grimper / escalader - kiivetä / kiipeillä
|
||||||
|
</pre>
|
||||||
|
but can be extended to cover paradigm functions in addition to just
|
||||||
|
words.
|
||||||
|
|
||||||
|
<p>
|
||||||
|
|
||||||
3/4 (AR) The predefined abstract syntax type <tt>Int</tt> now has two
|
3/4 (AR) The predefined abstract syntax type <tt>Int</tt> now has two
|
||||||
inherent parameters indicating its last digit and its size. The (hard-coded)
|
inherent parameters indicating its last digit and its size. The (hard-coded)
|
||||||
linearization type is
|
linearization type is
|
||||||
|
|||||||
@@ -22,6 +22,7 @@ import GF.API.IOGrammar
|
|||||||
import GF.Compile.ShellState
|
import GF.Compile.ShellState
|
||||||
import GF.Compile.Compile
|
import GF.Compile.Compile
|
||||||
import GF.Compile.MkConcrete
|
import GF.Compile.MkConcrete
|
||||||
|
import GF.Compile.Wordlist
|
||||||
import GF.Shell
|
import GF.Shell
|
||||||
import GF.Shell.SubShell
|
import GF.Shell.SubShell
|
||||||
import GF.Shell.ShellCommands
|
import GF.Shell.ShellCommands
|
||||||
@@ -91,6 +92,11 @@ main = do
|
|||||||
es <- liftM (nub . concat) $ mapM (getGFEFiles os) fs
|
es <- liftM (nub . concat) $ mapM (getGFEFiles os) fs
|
||||||
mkConcretes os es
|
mkConcretes os es
|
||||||
doGF (removeOption fromExamples os) fs
|
doGF (removeOption fromExamples os) fs
|
||||||
|
-- preprocessing gfwl
|
||||||
|
else if (length fs == 1 && fileSuffix (head fs) == "gfwl")
|
||||||
|
then do
|
||||||
|
fs' <- mkWordlist (head fs)
|
||||||
|
doGF os fs'
|
||||||
else doGF os fs
|
else doGF os fs
|
||||||
|
|
||||||
helpMsg = unlines [
|
helpMsg = unlines [
|
||||||
|
|||||||
107
src/GF/Compile/Wordlist.hs
Normal file
107
src/GF/Compile/Wordlist.hs
Normal file
@@ -0,0 +1,107 @@
|
|||||||
|
----------------------------------------------------------------------
|
||||||
|
-- |
|
||||||
|
-- Module : Wordlist
|
||||||
|
-- Maintainer : AR
|
||||||
|
-- Stability : (stable)
|
||||||
|
-- Portability : (portable)
|
||||||
|
--
|
||||||
|
-- > CVS $Date:
|
||||||
|
-- > CVS $Author:
|
||||||
|
-- > CVS $Revision:
|
||||||
|
--
|
||||||
|
-- Compile a gfwl file (multilingual word list) to an abstract + concretes
|
||||||
|
-----------------------------------------------------------------------------
|
||||||
|
|
||||||
|
module GF.Compile.Wordlist (mkWordlist) where
|
||||||
|
|
||||||
|
import GF.Data.Operations
|
||||||
|
import GF.Infra.UseIO
|
||||||
|
import Data.List
|
||||||
|
import Data.Char
|
||||||
|
|
||||||
|
-- read File.gfwl, write File.gf (abstract) and a set of concretes
|
||||||
|
-- return the names of the concretes
|
||||||
|
|
||||||
|
mkWordlist :: FilePath -> IO [FilePath]
|
||||||
|
mkWordlist file = do
|
||||||
|
s <- readFileIf file
|
||||||
|
let abs = fileBody file
|
||||||
|
let (cnchs,wlist) = pWordlist abs $ filter notComment $ lines s
|
||||||
|
let (gr,grs) = mkGrammars abs cnchs wlist
|
||||||
|
let cncfs = [cnc ++ ".gf" | (cnc,_) <- cnchs]
|
||||||
|
mapM_ (uncurry writeFile) $ (abs ++ ".gf",gr) : zip cncfs grs
|
||||||
|
putStrLn $ "wrote " ++ unwords ((abs ++ ".gf") : cncfs)
|
||||||
|
return cncfs
|
||||||
|
|
||||||
|
{-
|
||||||
|
-- syntax of files, e.g.
|
||||||
|
|
||||||
|
# Svenska - Franska - Finska -- names of concretes
|
||||||
|
|
||||||
|
berg - montagne - vuori -- word entry
|
||||||
|
|
||||||
|
-- this creates:
|
||||||
|
|
||||||
|
cat S ;
|
||||||
|
fun berg_S : S ;
|
||||||
|
lin berg_S = {s = ["berg"]} ;
|
||||||
|
lin berg_S = {s = ["montagne"]} ;
|
||||||
|
lin berg_S = {s = ["vuori"]} ;
|
||||||
|
|
||||||
|
-- support for different categories to be elaborated. The syntax it
|
||||||
|
|
||||||
|
Verb . klättra - grimper / escalader - kiivetä / kiipeillä
|
||||||
|
|
||||||
|
-- notice that a word can have several alternative (separator /)
|
||||||
|
-- and that an alternative can consist of several words
|
||||||
|
-}
|
||||||
|
|
||||||
|
type CncHeader = (String,String) -- module name, module header
|
||||||
|
|
||||||
|
type Wordlist = [(String, [[String]])] -- cat, variants for each cnc
|
||||||
|
|
||||||
|
|
||||||
|
pWordlist :: String -> [String] -> ([CncHeader],Wordlist)
|
||||||
|
pWordlist abs ls = (headers,rules) where
|
||||||
|
(hs,rs) = span ((=="#") . take 1) ls
|
||||||
|
headers = map mkHeader $ chunks "-" $ filter (/="#") $ words $ concat hs
|
||||||
|
rules = map (mkRule . words) rs
|
||||||
|
|
||||||
|
mkHeader ws = case ws of
|
||||||
|
w:ws2 -> (w, unwords ("concrete":w:"of":abs:"=":ws2))
|
||||||
|
mkRule ws = case ws of
|
||||||
|
cat:".":vs -> (cat, mkWords vs)
|
||||||
|
_ -> ("S", mkWords ws)
|
||||||
|
mkWords = map (map unwords . chunks "/") . chunks "-"
|
||||||
|
|
||||||
|
|
||||||
|
mkGrammars :: String -> [CncHeader] -> Wordlist -> (String,[String])
|
||||||
|
mkGrammars ab hs wl = (abs,cncs) where
|
||||||
|
abs = unlines $ map unwords $
|
||||||
|
["abstract",ab,"=","{"]:
|
||||||
|
cats ++
|
||||||
|
funs ++
|
||||||
|
[["}"]]
|
||||||
|
|
||||||
|
cncs = [unlines $ (h ++ " {") : map lin rs ++ ["}"] | ((_,h),rs) <- zip hs rss]
|
||||||
|
|
||||||
|
cats = [["cat",c,";"] | c <- nub $ map fst wl]
|
||||||
|
funs = [["fun", f , ":", c,";"] | (f,c,_) <- wlf]
|
||||||
|
|
||||||
|
wlf = [(ident f c, c, ws) | (c,ws@(f:_)) <- wl]
|
||||||
|
|
||||||
|
rss = [[(f, wss !! i) | (f,_,wss) <- wlf] | i <- [0..length hs - 1]]
|
||||||
|
|
||||||
|
lin (f,ss) = unwords ["lin", f, "=", "{s", "=", val ss, "}", ";"]
|
||||||
|
|
||||||
|
val ss = case ss of
|
||||||
|
[w] -> quote w
|
||||||
|
_ -> "variants {" ++ unwords (intersperse ";" (map quote ss)) ++ "}"
|
||||||
|
|
||||||
|
quote w = "[" ++ prQuotedString w ++ "]"
|
||||||
|
|
||||||
|
ident f c = concat $ intersperse "_" $ words (head f) ++ [c]
|
||||||
|
|
||||||
|
|
||||||
|
notComment s = not (all isSpace s) && take 2 s /= "--"
|
||||||
|
|
||||||
@@ -50,6 +50,7 @@ import GF.Infra.Option
|
|||||||
import GF.UseGrammar.Information
|
import GF.UseGrammar.Information
|
||||||
import GF.Shell.HelpFile
|
import GF.Shell.HelpFile
|
||||||
import GF.Compile.PrOld
|
import GF.Compile.PrOld
|
||||||
|
import GF.Compile.Wordlist
|
||||||
import GF.Grammar.PrGrammar
|
import GF.Grammar.PrGrammar
|
||||||
|
|
||||||
import Control.Monad (foldM,liftM)
|
import Control.Monad (foldM,liftM)
|
||||||
@@ -189,6 +190,10 @@ execLine put (c@(co, os), arg, cs) (outps,st) = do
|
|||||||
execC :: CommandOpt -> ShellIO
|
execC :: CommandOpt -> ShellIO
|
||||||
execC co@(comm, opts0) sa@(sh@(st,(h,_,_,_)),a) = checkOptions st co >> case comm of
|
execC co@(comm, opts0) sa@(sh@(st,(h,_,_,_)),a) = checkOptions st co >> case comm of
|
||||||
|
|
||||||
|
CImport file | fileSuffix file == "gfwl" -> do
|
||||||
|
fs <- mkWordlist file
|
||||||
|
foldM (\x y -> execC (CImport y, opts) x) sa fs
|
||||||
|
|
||||||
CImport file | oElem fromExamples opts -> do
|
CImport file | oElem fromExamples opts -> do
|
||||||
es <- liftM nub $ getGFEFiles opts file
|
es <- liftM nub $ getGFEFiles opts file
|
||||||
system $ "gf -examples" +++ unlines es
|
system $ "gf -examples" +++ unlines es
|
||||||
|
|||||||
@@ -49,6 +49,7 @@ txtHelpFile =
|
|||||||
"\n .gfr precompiled GF resource " ++
|
"\n .gfr precompiled GF resource " ++
|
||||||
"\n .gfcm multilingual canonical GF" ++
|
"\n .gfcm multilingual canonical GF" ++
|
||||||
"\n .gfe example-based grammar files (only with the -ex option)" ++
|
"\n .gfe example-based grammar files (only with the -ex option)" ++
|
||||||
|
"\n .gfwl multilingual word list (preprocessed to abs + cncs)" ++
|
||||||
"\n .ebnf Extended BNF format" ++
|
"\n .ebnf Extended BNF format" ++
|
||||||
"\n .cf Context-free (BNF) format" ++
|
"\n .cf Context-free (BNF) format" ++
|
||||||
"\n .trc TransferCore format" ++
|
"\n .trc TransferCore format" ++
|
||||||
|
|||||||
@@ -20,6 +20,7 @@ i, import: i File
|
|||||||
.gfr precompiled GF resource
|
.gfr precompiled GF resource
|
||||||
.gfcm multilingual canonical GF
|
.gfcm multilingual canonical GF
|
||||||
.gfe example-based grammar files (only with the -ex option)
|
.gfe example-based grammar files (only with the -ex option)
|
||||||
|
.gfwl multilingual word list (preprocessed to abs + cncs)
|
||||||
.ebnf Extended BNF format
|
.ebnf Extended BNF format
|
||||||
.cf Context-free (BNF) format
|
.cf Context-free (BNF) format
|
||||||
.trc TransferCore format
|
.trc TransferCore format
|
||||||
|
|||||||
Reference in New Issue
Block a user