GF/src is now for 2.9, and the new sources are in src-3.0 - keep it this way until the release of GF 3

2008-05-21 09:26:44 +00:00
parent 915a1de717
commit 055c0d0d5a
536 changed files with 0 additions and 0 deletions
--- a/src-3.0/GF/UseGrammar/Custom.hs
+++ b/src-3.0/GF/UseGrammar/Custom.hs
@@ -0,0 +1,494 @@
+----------------------------------------------------------------------
+-- |
+-- Module      : Custom
+-- Maintainer  : AR
+-- Stability   : (stable)
+-- Portability : (portable)
+--
+-- > CVS $Date: 2005/11/16 10:21:21 $ 
+-- > CVS $Author: peb $
+-- > CVS $Revision: 1.85 $
+--
+-- A database for customizable GF shell commands. 
+--
+-- databases for customizable commands. AR 21\/11\/2001.
+-- for:  grammar parsers, grammar printers, term commands, string commands.
+-- idea: items added here are usable throughout GF; nothing else need be edited.
+-- they are often usable through the API: hence API cannot be imported here!
+--
+-- Major redesign 3\/4\/2002: the first entry in each database is DEFAULT.
+-- If no other value is given, the default is selected.
+-- Because of this, two invariants have to be preserved:
+--
+--  - no databases may be empty
+--
+--  - additions are made to the end of the database
+-----------------------------------------------------------------------------
+
+module GF.UseGrammar.Custom where
+
+import GF.Data.Operations
+import GF.Text.Text
+import GF.UseGrammar.Tokenize
+import GF.Grammar.Values
+import qualified GF.Grammar.Grammar as G
+import qualified GF.Canon.AbsGFC as A
+import qualified GF.Canon.GFC as C
+
+import qualified GF.Devel.GFCCtoJS as JS
+import GF.Canon.CanonToGFCC
+import qualified GF.Devel.GFCCtoHaskell as CCH
+
+import qualified GF.Source.AbsGF as GF
+import qualified GF.Grammar.MMacros as MM
+import GF.Grammar.AbsCompute
+import GF.Grammar.TypeCheck
+import GF.UseGrammar.Generate
+import GF.UseGrammar.MatchTerm
+import GF.UseGrammar.Linear (unoptimizeCanon)
+------import Compile
+import GF.Compile.ShellState
+import GF.UseGrammar.Editing
+import GF.UseGrammar.Paraphrases
+import GF.Infra.Option
+import GF.CF.CF
+import GF.CF.CFIdent
+
+import GF.Canon.CanonToGrammar
+import GF.CF.PPrCF
+import GF.CF.PrLBNF
+import GF.Grammar.PrGrammar
+import GF.Compile.PrOld
+import GF.Canon.MkGFC
+import GF.Speech.PrGSL (gslPrinter)
+import GF.Speech.PrJSGF (jsgfPrinter)
+import GF.Speech.PrSRGS
+import GF.Speech.PrSRGS_ABNF
+import qualified GF.Speech.SISR as SISR
+import GF.Speech.PrSLF
+import GF.Speech.PrFA (faGraphvizPrinter,regularPrinter,faCPrinter)
+import GF.Speech.PrRegExp (regexpPrinter,multiRegexpPrinter)
+import GF.Speech.GrammarToVoiceXML (grammar2vxml)
+
+import GF.Data.Zipper
+
+import GF.UseGrammar.Statistics
+import GF.UseGrammar.Morphology
+import GF.UseGrammar.Information
+import GF.API.GrammarToHaskell
+import GF.API.GrammarToTransfer
+-----import GrammarToCanon (showCanon, showCanonOpt)
+-----import qualified GrammarToGFC as GFC
+import GF.Probabilistic.Probabilistic (prProbs)
+
+-- the cf parsing algorithms
+import GF.CF.ChartParser -- OBSOLETE
+import qualified GF.Parsing.CF as PCF
+import qualified GF.OldParsing.ParseCF as PCFOld -- OBSOLETE
+
+-- grammar conversions -- peb 19/4-04
+-- see also customGrammarPrinter
+import qualified GF.OldParsing.ConvertGrammar as CnvOld -- OBSOLETE
+import qualified GF.Printing.PrintParser as PrtOld -- OBSOLETE
+import qualified GF.Infra.Print as Prt
+import qualified GF.Conversion.GFC as Cnv
+import qualified GF.Conversion.Types as CnvTypes
+import qualified GF.Conversion.Haskell as CnvHaskell
+import qualified GF.Conversion.Prolog as CnvProlog
+import qualified GF.Conversion.TypeGraph as CnvTypeGraph
+import GF.Canon.Unparametrize
+import GF.Canon.Subexpressions
+import GF.Canon.AbsToBNF
+
+import GF.Canon.GFC
+import qualified GF.Canon.MkGFC as MC
+import GF.CFGM.PrintCFGrammar (prCanonAsCFGM)
+import GF.Visualization.VisualizeGrammar (visualizeCanonGrammar, visualizeSourceGrammar)
+
+import GF.API.MyParser
+
+import qualified GF.Infra.Modules as M
+import GF.Infra.UseIO
+
+import Control.Monad
+import Data.Char
+import Data.Maybe (fromMaybe)
+
+-- character codings
+import GF.Text.Unicode
+import GF.Text.UTF8 (decodeUTF8)
+import GF.Text.Greek (mkGreek)
+import GF.Text.Arabic (mkArabic)
+import GF.Text.Hebrew (mkHebrew)
+import GF.Text.Russian (mkRussian, mkRusKOI8)
+import GF.Text.Ethiopic (mkEthiopic)
+import GF.Text.Tamil (mkTamil)
+import GF.Text.OCSCyrillic (mkOCSCyrillic)
+import GF.Text.LatinASupplement (mkLatinASupplement)
+import GF.Text.Devanagari (mkDevanagari)
+import GF.Text.Hiragana (mkJapanese)
+import GF.Text.ExtendedArabic (mkArabic0600)
+import GF.Text.ExtendedArabic (mkExtendedArabic)
+import GF.Text.ExtraDiacritics (mkExtraDiacritics)
+
+-- minimal version also used in Hugs. AR 2/12/2002. 
+
+-- databases for customizable commands. AR 21/11/2001
+-- for:  grammar parsers, grammar printers, term commands, string commands
+-- idea: items added here are usable throughout GF; nothing else need be edited
+-- they are often usable through the API: hence API cannot be imported here!
+
+-- Major redesign 3/4/2002: the first entry in each database is DEFAULT.
+-- If no other value is given, the default is selected.
+-- Because of this, two invariants have to be preserved:
+--  - no databases may be empty
+--  - additions are made to the end of the database
+
+-- * these are the databases; the comment gives the name of the flag
+
+-- | grammarFormat, \"-format=x\" or file suffix
+customGrammarParser :: CustomData (FilePath -> IOE C.CanonGrammar) 
+
+-- | grammarPrinter, \"-printer=x\"
+customGrammarPrinter :: CustomData (Options -> StateGrammar -> String)             
+
+-- | multiGrammarPrinter, \"-printer=x\"
+customMultiGrammarPrinter :: CustomData (Options -> CanonGrammar -> String)    
+
+-- | syntaxPrinter, \"-printer=x\"
+customSyntaxPrinter  :: CustomData (GF.Grammar -> String)        
+
+-- | termPrinter, \"-printer=x\"
+customTermPrinter  :: CustomData (StateGrammar -> Tree -> String)
+
+-- | termCommand, \"-transform=x\"
+customTermCommand    :: CustomData (StateGrammar -> Tree -> [Tree])
+
+-- | editCommand, \"-edit=x\"
+customEditCommand    :: CustomData (StateGrammar -> Action)
+
+-- | filterString, \"-filter=x\"
+customStringCommand  :: CustomData (StateGrammar -> String -> String)
+
+-- | useParser, \"-parser=x\"
+customParser         :: CustomData (StateGrammar -> CFCat -> CFParser)
+
+-- | useTokenizer, \"-lexer=x\"
+customTokenizer      :: CustomData (StateGrammar -> String -> [[CFTok]])  
+
+-- | useUntokenizer, \"-unlexer=x\" --- should be from token list to string
+customUntokenizer    :: CustomData (StateGrammar -> String -> String)  
+
+-- | uniCoding, \"-coding=x\"
+--
+-- contains conversions from different codings to the internal
+-- unicode coding
+customUniCoding :: CustomData (String -> String)
+
+-- | this is the way of selecting an item
+customOrDefault :: Options -> OptFun -> CustomData a -> a
+customOrDefault opts optfun db = maybe (defaultCustomVal db) id $ 
+                                   customAsOptVal opts optfun db
+
+-- | to produce menus of custom operations
+customInfo :: CustomData a -> (String, [String])
+customInfo c = (titleCustomData c, map (ciStr . fst) (dbCustomData c))
+
+-------------------------------
+-- * types and stuff 
+
+type CommandId = String
+
+strCI :: String -> CommandId
+strCI = id
+
+ciStr :: CommandId -> String
+ciStr = id
+
+ciOpt :: CommandId -> Option
+ciOpt = iOpt
+
+newtype CustomData a = CustomData (String, [(CommandId,a)])
+
+customData :: String -> [(CommandId, a)] -> CustomData a
+customData title db = CustomData (title,db)
+
+dbCustomData :: CustomData a -> [(CommandId, a)]
+dbCustomData (CustomData (_,db)) = db
+
+titleCustomData :: CustomData a -> String
+titleCustomData (CustomData (t,_)) = t
+
+lookupCustom :: CustomData a -> CommandId -> Maybe a
+lookupCustom = flip lookup . dbCustomData
+
+customAsOptVal :: Options -> OptFun -> CustomData a -> Maybe a
+customAsOptVal opts optfun db = do
+  arg <- getOptVal opts optfun
+  lookupCustom db (strCI arg)
+
+-- | take the first entry from the database
+defaultCustomVal :: CustomData a -> a
+defaultCustomVal (CustomData (s,db)) = 
+  ifNull (error ("empty database:" +++ s)) (snd . head) db
+
+-------------------------------------------------------------------------
+-- * and here's the customizable part:
+
+-- grammar parsers: the ID is also used as file name suffix
+customGrammarParser = 
+  customData "Grammar parsers, selected by file name suffix" $
+  [
+------   (strCI "gf",  compileModule noOptions) -- DEFAULT
+-- add your own grammar parsers here
+  ] 
+
+
+customGrammarPrinter = 
+  customData "Grammar printers, selected by option -printer=x" $
+  [
+   (strCI "gfc",     \_ -> prCanon . stateGrammarST) -- DEFAULT
+  ,(strCI "gf",      \_ -> err id prGrammar . canon2sourceGrammar . stateGrammarST)
+  ,(strCI "cf",      \_ -> prCF . stateCF)
+  ,(strCI "old",     \_ -> printGrammarOld . stateGrammarST)
+  ,(strCI "gsl",     gslPrinter)
+  ,(strCI "jsgf",    jsgfPrinter Nothing)
+  ,(strCI "jsgf_sisr_old", jsgfPrinter (Just SISR.SISROld))
+  ,(strCI "srgs_xml",           srgsXmlPrinter Nothing False)
+  ,(strCI "srgs_xml_non_rec",   srgsXmlNonRecursivePrinter)
+  ,(strCI "srgs_xml_prob",      srgsXmlPrinter Nothing True)
+  ,(strCI "srgs_xml_sisr_old",  srgsXmlPrinter (Just SISR.SISROld) False)
+  ,(strCI "srgs_abnf",          srgsAbnfPrinter Nothing False)
+  ,(strCI "srgs_abnf_non_rec",  srgsAbnfNonRecursivePrinter)
+  ,(strCI "srgs_abnf_sisr_old", srgsAbnfPrinter (Just SISR.SISROld) False)
+  ,(strCI "vxml", grammar2vxml)
+  ,(strCI "slf",  slfPrinter)
+  ,(strCI "slf_graphviz", slfGraphvizPrinter)
+  ,(strCI "slf_sub", slfSubPrinter)
+  ,(strCI "slf_sub_graphviz", slfSubGraphvizPrinter)
+  ,(strCI "fa_graphviz", faGraphvizPrinter)
+  ,(strCI "fa_c",  faCPrinter)
+  ,(strCI "regexp", regexpPrinter)
+  ,(strCI "regexps", multiRegexpPrinter)
+  ,(strCI "regular", regularPrinter)
+  ,(strCI "plbnf",   \_ -> prLBNF True)
+  ,(strCI "lbnf",    \_ -> prLBNF False)
+  ,(strCI "bnf",     \_ -> prBNF False)
+  ,(strCI "absbnf",  \_ -> abstract2bnf . stateGrammarST)
+  ,(strCI "haskell", \_ -> grammar2haskell . stateGrammarST)
+  ,(strCI "gfcc_haskell", \opts -> CCH.grammar2haskell . 
+                                canon2gfcc opts . stateGrammarST)
+  ,(strCI "haskell_gadt", \_ -> grammar2haskellGADT . stateGrammarST)
+  ,(strCI "transfer", \_ -> grammar2transfer . stateGrammarST)
+  ,(strCI "morpho",  \_ -> prMorpho . stateMorpho)
+  ,(strCI "fullform",\_ -> prFullForm . stateMorpho)
+  ,(strCI "opts",    \_ -> prOpts . stateOptions)
+  ,(strCI "words",   \_ -> unwords . stateGrammarWords)
+  ,(strCI "printnames", \_ -> C.prPrintnamesGrammar . stateGrammarST)
+  ,(strCI "stat",    \_ -> prStatistics . stateGrammarST)
+  ,(strCI "probs",   \_ -> prProbs . stateProbs)
+  ,(strCI "unpar",   \_ -> prCanon . unparametrizeCanon . stateGrammarST)
+  ,(strCI "subs",    \_ -> prSubtermStat . stateGrammarST)
+
+{- ----
+   (strCI "gf",      prt  . st2grammar . stateGrammarST)  -- DEFAULT
+  ,(strCI "canon",   showCanon "Lang" . stateGrammarST)
+  ,(strCI "gfc",     GFC.showGFC . stateGrammarST)
+  ,(strCI "canonOpt",showCanonOpt "Lang" . stateGrammarST)
+-}
+
+-- add your own grammar printers here
+
+-- grammar conversions:
+  ,(strCI "mcfg",     \_ -> Prt.prt . stateMCFG)
+  ,(strCI "fcfg",     \_ -> Prt.prt . fst . stateFCFG)
+  ,(strCI "cfg",      \_ -> Prt.prt . stateCFG)
+  ,(strCI "pinfo",    \_ -> Prt.prt . statePInfo)
+  ,(strCI "abstract", \_ -> Prt.prtAfter "\n" . Cnv.gfc2abstract . stateGrammarLang)
+
+  ,(strCI "functiongraph",\_ -> CnvTypeGraph.prtFunctionGraph . Cnv.gfc2simple noOptions . stateGrammarLang)
+  ,(strCI "typegraph",    \_ -> CnvTypeGraph.prtTypeGraph . Cnv.gfc2simple noOptions . stateGrammarLang)
+
+  ,(strCI "gfc-haskell",  \_ -> CnvHaskell.prtSGrammar . uncurry Cnv.gfc2simple . stateGrammarLangOpts)
+  ,(strCI "mcfg-haskell", \_ -> CnvHaskell.prtMGrammar . stateMCFG)
+  ,(strCI "cfg-haskell",  \_ -> CnvHaskell.prtCGrammar . stateCFG)
+  ,(strCI "gfc-prolog",   \_ -> CnvProlog.prtSGrammar . uncurry Cnv.gfc2simple . stateGrammarLangOpts)
+  ,(strCI "mcfg-prolog",  \_ -> CnvProlog.prtMGrammar . stateMCFG)
+  ,(strCI "cfg-prolog",   \_ -> CnvProlog.prtCGrammar . stateCFG)
+
+-- obsolete, or only for testing:
+  ,(strCI "abs-skvatt", \_ -> Cnv.abstract2skvatt . Cnv.gfc2abstract . stateGrammarLang)
+  ,(strCI "cfg-skvatt", \_ -> Cnv.cfg2skvatt . stateCFG)
+  ,(strCI "simple",   \_ -> Prt.prt . uncurry Cnv.gfc2simple . stateGrammarLangOpts)
+  ,(strCI "mcfg-erasing", \_ -> Prt.prt . fst . snd . uncurry Cnv.convertGFC . stateGrammarLangOpts)
+--  ,(strCI "mcfg-old", PrtOld.prt . CnvOld.mcfg . statePInfoOld)
+--  ,(strCI "cfg-old",  PrtOld.prt . CnvOld.cfg . statePInfoOld)
+  ] 
+  where stateGrammarLangOpts s = (stateOptions s, stateGrammarLang s)
+
+customMultiGrammarPrinter = 
+  customData "Printers for multiple grammars, selected by option -printer=x" $
+  [
+   (strCI "gfcm", const MC.prCanon)
+  ,(strCI "gfcc", canon2gfccPr)
+  ,(strCI "js", \opts -> JS.gfcc2js . canon2gfcc opts)
+  ,(strCI "header", const (MC.prCanonMGr . unoptimizeCanon))
+  ,(strCI "cfgm", prCanonAsCFGM)
+  ,(strCI "graph", visualizeCanonGrammar)
+  ,(strCI "missing", const missingLinCanonGrammar)
+
+-- to prolog format:
+  ,(strCI "gfc-prolog", CnvProlog.prtSMulti)
+  ,(strCI "mcfg-prolog", CnvProlog.prtMMulti)
+  ,(strCI "cfg-prolog", CnvProlog.prtCMulti)
+  ]
+
+
+customSyntaxPrinter = 
+  customData "Syntax printers, selected by option -printer=x" $
+  [ 
+-- add your own grammar printers here
+  ] 
+
+
+customTermPrinter = 
+  customData "Term printers, selected by option -printer=x" $
+  [ 
+    (strCI "gf",     const prt) -- DEFAULT
+-- add your own term printers here
+  ]
+
+customTermCommand = 
+  customData "Term transformers, selected by option -transform=x" $
+  [
+   (strCI "identity",   \_ t -> [t]) -- DEFAULT
+  ,(strCI "compute",    \g t -> let gr = grammar g in
+                                  err (const [t]) return 
+                                    (exp2termCommand gr (computeAbsTerm gr) t))
+  ,(strCI "nodup",      \_ t -> if (hasDupIdent $ tree2exp t) then [] else [t])
+  ,(strCI "nodupatom",  \_ t -> if (hasDupAtom  $ tree2exp t) then [] else [t])
+  ,(strCI "paraphrase", \g t -> let gr = grammar g in
+                                  exp2termlistCommand gr (mkParaphrases gr) t)
+
+  ,(strCI "generate",   \g t -> let gr = grammar g
+                                    cat = actCat $ tree2loc t --- not needed
+                                in
+                         [tr | t <- generateTrees noOptions gr cat 2 Nothing (Just t), 
+                               Ok tr <- [annotate gr $ MM.qualifTerm (absId g) t]])
+  ,(strCI "typecheck",   \g t -> err (const []) (return . loc2tree)
+                                    (reCheckStateReject (grammar g) (tree2loc t)))
+  ,(strCI "solve",      \g t -> err (const []) (return . loc2tree)
+                                   (solveAll (grammar g) (tree2loc t)
+                                      >>= rejectUnsolvable))
+  ,(strCI "context",    \g t -> err (const [t]) (return . loc2tree)
+                                   (contextRefinements (grammar g) (tree2loc t)))
+  ,(strCI "reindex",    \g t -> let gr = grammar g in
+                                  err (const [t]) return 
+                                    (exp2termCommand gr (return . MM.reindexTerm) t))
+---  ,(strCI "delete",     \g t -> [MM.mExp0])
+-- add your own term commands here
+  ]
+
+customEditCommand = 
+  customData "Editor state transformers, selected by option -edit=x" $
+  [
+   (strCI "identity",   const return) -- DEFAULT
+  ,(strCI "typecheck",  \g -> reCheckState (grammar g))
+  ,(strCI "solve",      \g -> solveAll (grammar g))
+  ,(strCI "context",    \g -> contextRefinements (grammar g))
+  ,(strCI "compute",    \g -> computeSubTree (grammar g))
+  ,(strCI "paraphrase", const return) --- done ad hoc on top level
+  ,(strCI "generate",   const return) --- done ad hoc on top level
+  ,(strCI "transfer",   const return) --- done ad hoc on top level
+-- add your own edit commands here
+  ]
+
+customStringCommand = 
+  customData "String filters, selected by option -filter=x" $
+  [
+   (strCI "identity",  const $ id)   -- DEFAULT
+  ,(strCI "erase",     const $ const "")
+  ,(strCI "take100",   const $ take 100)
+  ,(strCI "text",      const $ formatAsText)
+  ,(strCI "code",      const $ formatAsCode)
+----  ,(strCI "latexfile", const $ mkLatexFile)
+  ,(strCI "length",    const $ show . length)
+-- add your own string commands here
+  ]
+
+customParser = 
+  customData "Parsers, selected by option -parser=x" $
+  [
+   (strCI "chart",  PCFOld.parse "ibn" . stateCF) -- DEPRECATED
+  ,(strCI "bottomup",  PCF.parse "gb" . stateCF)
+  ,(strCI "topdown",  PCF.parse "gt" . stateCF)
+-- commented for now, since there's a bug in the incremental algorithm:
+--   ,(strCI "incremental",  PCF.parse "ib" . stateCF)
+--   ,(strCI "incremental-bottomup",  PCF.parse "ib" . stateCF)
+--   ,(strCI "incremental-topdown",  PCF.parse "it" . stateCF)
+  ,(strCI "old",    chartParser . stateCF) -- DEPRECATED
+  ,(strCI "myparser", myParser)
+-- add your own parsers here
+  ]
+
+customTokenizer =
+  let sg = singleton in 
+  customData "Tokenizers, selected by option -lexer=x" $
+  [
+   (strCI "words",     const $ sg . tokWords)
+  ,(strCI "literals",  const $ sg . tokLits)
+  ,(strCI "vars",      const $ sg . tokVars)
+  ,(strCI "chars",     const $ sg . map (tS . singleton))
+  ,(strCI "code",      const $ sg . lexHaskell)
+  ,(strCI "codevars",  \gr -> sg . (lexHaskellVar $ stateIsWord gr))
+  ,(strCI "textvars",  \gr -> sg . (lexTextVar $ stateIsWord gr))
+  ,(strCI "text",      const $ sg . lexText)
+  ,(strCI "unglue",    \gr -> sg . map tS . decomposeWords (stateMorpho gr))
+  ,(strCI "codelit",   \gr -> sg . (lexHaskellLiteral $ stateIsWord gr))
+  ,(strCI "textlit",   \gr -> sg . (lexTextLiteral $ stateIsWord gr))
+  ,(strCI "codeC",     const $ sg . lexC2M)
+  ,(strCI "ignore",    \gr -> sg . lexIgnore (stateIsWord gr) . tokLits)
+  ,(strCI "subseqs",   \gr -> subSequences . lexIgnore (stateIsWord gr) . tokLits)
+  ,(strCI "codeCHigh", const $ sg . lexC2M' True)
+-- add your own tokenizers here
+  ]
+
+customUntokenizer = 
+  customData "Untokenizers, selected by option -unlexer=x" $
+  [
+   (strCI "unwords",   const $ id)   -- DEFAULT
+  ,(strCI "text",      const $ formatAsText)
+  ,(strCI "html",      const $ formatAsHTML)
+  ,(strCI "latex",     const $ formatAsLatex)
+  ,(strCI "code",      const $ formatAsCode)
+  ,(strCI "concat",    const $ filter (not . isSpace))
+  ,(strCI "textlit",   const $ formatAsTextLit)
+  ,(strCI "codelit",   const $ formatAsCodeLit)
+  ,(strCI "concat",    const $ concatRemSpace)
+  ,(strCI "glue",      const $ performBinds)
+  ,(strCI "finnish",   const $ performBindsFinnish)
+  ,(strCI "reverse",   const $ reverse)
+  ,(strCI "bind",      const $ performBinds) -- backward compat
+-- add your own untokenizers here
+  ]
+
+customUniCoding = 
+  customData "Alphabet codings, selected by option -coding=x" $
+  [
+   (strCI "latin1",           id) -- DEFAULT
+  ,(strCI "utf8",             decodeUTF8)
+  ,(strCI "greek",            treat [] mkGreek)
+  ,(strCI "hebrew",           mkHebrew)
+  ,(strCI "arabic",           mkArabic)
+  ,(strCI "russian",          treat [] mkRussian)
+  ,(strCI "russianKOI8",      mkRusKOI8)
+  ,(strCI "ethiopic",         mkEthiopic)
+  ,(strCI "tamil",            mkTamil)
+  ,(strCI "OCScyrillic",      mkOCSCyrillic)
+  ,(strCI "devanagari",       mkDevanagari)
+  ,(strCI "latinasupplement", mkLatinASupplement)
+  ,(strCI "japanese",         mkJapanese)
+  ,(strCI "arabic0600",       mkArabic0600)
+  ,(strCI "extendedarabic",   mkExtendedArabic)
+  ,(strCI "extradiacritics",  mkExtraDiacritics)
+  ]
--- a/src-3.0/GF/UseGrammar/Editing.hs
+++ b/src-3.0/GF/UseGrammar/Editing.hs
@@ -0,0 +1,435 @@
+----------------------------------------------------------------------
+-- |
+-- Module      : Editing
+-- Maintainer  : AR
+-- Stability   : (stable)
+-- Portability : (portable)
+--
+-- > CVS $Date: 2005/04/21 16:23:45 $ 
+-- > CVS $Author: bringert $
+-- > CVS $Revision: 1.14 $
+--
+-- generic tree editing, with some grammar notions assumed. AR 18\/8\/2001.
+-- 19\/6\/2003 for GFC
+-----------------------------------------------------------------------------
+
+module GF.UseGrammar.Editing where
+
+import GF.Grammar.Abstract
+import qualified GF.Canon.GFC as GFC
+import GF.Grammar.TypeCheck
+import GF.Grammar.LookAbs
+import GF.Grammar.AbsCompute
+import GF.Grammar.Macros (errorCat)
+
+import GF.Data.Operations
+import GF.Data.Zipper
+
+-- generic tree editing, with some grammar notions assumed. AR 18/8/2001 
+-- 19/6/2003 for GFC
+
+type CGrammar = GFC.CanonGrammar
+
+type State = Loc TrNode
+
+-- | the "empty" state
+initState :: State
+initState = tree2loc uTree 
+
+isRootState :: State -> Bool
+isRootState s = case actPath s of
+  Top -> True
+  _ -> False
+
+actTree :: State -> Tree
+actTree (Loc (t,_)) = t
+
+actPath :: State -> Path TrNode
+actPath (Loc (_,p)) = p
+
+actVal :: State -> Val
+actVal = valNode . nodeTree . actTree
+
+actCat :: State -> Cat
+actCat = errVal errorCat . val2cat . actVal ---- undef
+
+actAtom :: State -> Atom
+actAtom = atomTree . actTree
+
+actFun :: State -> Err Fun
+actFun s = case actAtom s of
+  AtC f -> return f
+  t -> prtBad "active atom: expected function, found" t
+
+actExp :: State -> Exp
+actExp = tree2exp . actTree
+
+-- | current local bindings
+actBinds :: State -> Binds
+actBinds = bindsNode . nodeTree . actTree
+
+-- | constraints in current subtree
+actConstrs :: State -> Constraints
+actConstrs = allConstrsTree . actTree   
+
+-- | constraints in the whole tree
+allConstrs :: State -> Constraints
+allConstrs = allConstrsTree . loc2tree   
+
+-- | metas in current subtree
+actMetas :: State -> [Meta]
+actMetas = metasTree . actTree   
+
+-- | metas in the whole tree
+allMetas :: State -> [Meta]
+allMetas = metasTree . loc2tree
+
+actTreeBody :: State -> Tree
+actTreeBody = bodyTree . actTree
+
+allPrevBinds :: State -> Binds
+allPrevBinds = concatMap bindsNode . traverseCollect . actPath
+
+allBinds :: State -> Binds
+allBinds s = actBinds s ++ allPrevBinds s
+
+actGen :: State -> Int
+actGen = length . allBinds -- symbol generator for VGen
+
+allPrevVars :: State -> [Var]
+allPrevVars = map fst . allPrevBinds
+
+allVars :: State -> [Var]
+allVars = map fst . allBinds
+
+vGenIndex :: State -> Int
+vGenIndex = length . allBinds
+
+actIsMeta :: State -> Bool
+actIsMeta = atomIsMeta . actAtom
+
+actMeta :: State -> Err Meta
+actMeta = getMetaAtom . actAtom
+
+-- | meta substs are not only on the actual path...
+entireMetaSubst :: State -> MetaSubst
+entireMetaSubst = concatMap metaSubstsNode . scanTree . loc2tree
+
+isCompleteTree :: Tree -> Bool
+isCompleteTree = null . filter atomIsMeta . map atomNode . scanTree 
+
+isCompleteState :: State -> Bool
+isCompleteState = isCompleteTree . loc2tree
+
+initStateCat :: Context -> Cat -> Err State
+initStateCat cont cat = do
+  return $ tree2loc (Tr (mkNode [] mAtom (cat2val cont cat) ([],[]), []))
+
+-- | this function only concerns the body of an expression...
+annotateInState :: CGrammar -> Exp -> State -> Err Tree
+annotateInState gr exp state = do
+  let binds = allBinds state
+      val   = actVal state
+  annotateIn gr binds exp (Just val)
+
+-- | ...whereas this one works with lambda abstractions
+annotateExpInState :: CGrammar -> Exp -> State -> Err Tree
+annotateExpInState gr exp state = do
+  let cont  = allPrevBinds state
+      binds = actBinds state
+      val   = actVal state
+  typ <- mkProdVal binds val
+  annotateIn gr binds exp (Just typ)
+  
+treeByExp :: (Exp -> Err Exp) -> CGrammar -> Exp -> State -> Err Tree
+treeByExp trans gr exp0 state = do
+  exp <- trans exp0
+  annotateExpInState gr exp state
+
+-- * actions
+
+type Action = State -> Err State
+
+newCat :: CGrammar -> Cat -> Action
+newCat gr cat@(m,c) _ = do
+  cont <- lookupCatContext gr m c
+  testErr (null cont) "start cat must have null context" -- for easier meta refresh
+  initStateCat cont cat
+
+newFun :: CGrammar -> Fun -> Action
+newFun gr fun@(m,c) _ = do
+  typ <- lookupFunType gr m c
+  cat <- valCat typ
+  st1 <- newCat gr cat initState
+  refineWithAtom True gr (qq fun) st1
+
+newTree :: Tree -> Action
+newTree t _ = return $ tree2loc t
+
+newExpTC :: CGrammar -> Exp -> Action
+newExpTC gr t s = annotate gr (refreshMetas [] t) >>= flip newTree s
+
+goNextMeta, goPrevMeta, goNextNewMeta, goPrevNewMeta, goNextMetaIfCan :: Action
+
+goNextMeta = repeatUntilErr actIsMeta goAhead -- can be the location itself
+goPrevMeta = repeatUntilErr actIsMeta goBack
+
+goNextNewMeta s = goAhead s >>= goNextMeta    -- always goes away from location
+goPrevNewMeta s = goBack  s >>= goPrevMeta
+
+goNextMetaIfCan = actionIfPossible goNextMeta
+
+actionIfPossible :: Action -> Action
+actionIfPossible a s = return $ errVal s (a s)
+
+goFirstMeta, goLastMeta :: Action
+goFirstMeta s = goNextMeta $ goRoot s
+goLastMeta  s = goLast s >>= goPrevMeta
+
+noMoreMetas :: State -> Bool
+noMoreMetas = err (const True) (const False) . goNextMeta 
+
+replaceSubTree :: Tree -> Action
+replaceSubTree tree state = changeLoc state tree
+
+refineOrReplaceWithTree :: Bool -> CGrammar -> Tree -> Action
+refineOrReplaceWithTree der gr tree state = case actMeta state of
+  Ok m -> refineWithTreeReal der gr tree m state
+  _ -> do
+    let tree1 = addBinds (actBinds state) $ tree
+    state' <- replaceSubTree tree1 state
+    reCheckState gr state'
+
+refineWithTree :: Bool -> CGrammar -> Tree -> Action
+refineWithTree der gr tree state = do
+  m <- errIn "move pointer to meta" $ actMeta state
+  refineWithTreeReal der gr tree m state
+
+refineWithTreeReal :: Bool -> CGrammar -> Tree -> Meta -> Action
+refineWithTreeReal der gr tree m state = do
+  state' <- replaceSubTree tree state
+  let cs0     = allConstrs state'
+      (cs,ms) = splitConstraints gr cs0
+      v       = vClos $ tree2exp (bodyTree tree)
+      msubst  = (m,v) : ms
+  metaSubstRefinements gr msubst $ 
+    mapLoc (reduceConstraintsNode gr . performMetaSubstNode msubst) state'
+
+  -- without dep. types, no constraints, no grammar needed - simply: do
+  --   testErr (actIsMeta state) "move pointer to meta"
+  --   replaceSubTree tree state
+
+refineAllNodes :: Action -> Action
+refineAllNodes act state = do  
+  let estate0 = goFirstMeta state
+  case estate0 of
+    Bad _ -> return state
+    Ok state0 -> do 
+      (state',n) <- tryRefine 0 state0
+      if n==0 
+        then return state
+        else actionIfPossible goFirstMeta state'
+ where
+   tryRefine n state = err (const $ return (state,n)) return $ do 
+     state' <- goNextMeta state
+     meta   <- actMeta state'
+     case act state' of
+       Ok state2 -> tryRefine (n+1) state2
+       _ -> err (const $ return (state',n)) return $ do
+         state2 <- goNextNewMeta state' 
+         tryRefine n state2
+
+uniqueRefinements :: CGrammar -> Action
+uniqueRefinements = refineAllNodes . uniqueRefine
+
+metaSubstRefinements :: CGrammar -> MetaSubst -> Action
+metaSubstRefinements gr = refineAllNodes . metaSubstRefine gr
+
+contextRefinements :: CGrammar -> Action
+contextRefinements gr = refineAllNodes contextRefine where
+  contextRefine state = case varRefinementsState state of
+    [(e,_)] -> refineWithAtom False gr e state
+    _ -> Bad "no unique refinement in context"
+  varRefinementsState state = 
+    [r | r@(e,_) <- refinementsState gr state, isVariable e]
+
+uniqueRefine :: CGrammar -> Action
+uniqueRefine gr state = case refinementsState gr state of
+  [(e,(_,True))] -> Bad "only circular refinement"
+  [(e,_)] -> refineWithAtom False gr e state
+  _ -> Bad "no unique refinement"
+
+metaSubstRefine :: CGrammar -> MetaSubst -> Action
+metaSubstRefine gr msubst state = do
+  m <- errIn "move pointer to meta" $ actMeta state
+  case lookup m msubst of
+    Just v -> do
+       e <- val2expSafe v
+       refineWithExpTC False gr e state
+    _ -> Bad "no metavariable substitution available"
+
+refineWithExpTC :: Bool -> CGrammar -> Exp -> Action
+refineWithExpTC der gr exp0 state = do
+  let oldmetas = allMetas state
+      exp = refreshMetas oldmetas exp0
+  tree0 <- annotateInState gr exp state
+  let tree = addBinds (actBinds state) $ tree0
+  refineWithTree der gr tree state
+
+refineWithAtom :: Bool -> CGrammar -> Ref -> Action -- function or variable
+refineWithAtom der gr at state = do
+  val <- lookupRef gr (allBinds state) at
+  typ <- val2exp val
+  let oldvars  = allVars state
+  exp <- ref2exp oldvars typ at
+  refineWithExpTC der gr exp state
+
+-- | in this command, we know that the result is well-typed, since computation
+-- rules have been type checked and the result is equal
+computeSubTree :: CGrammar -> Action
+computeSubTree gr state = do
+  let exp = tree2exp (actTree state)
+  tree <- treeByExp (compute gr) gr exp state
+  replaceSubTree tree state
+
+-- | but here we don't, since the transfer flag isn't type checked,
+-- and computing the transfer function is not checked to preserve equality
+transferSubTree :: Maybe Fun -> CGrammar -> Action
+transferSubTree Nothing _ s = return s
+transferSubTree (Just fun) gr state = do
+  let exp = mkApp (qq fun) [tree2exp $ actTree state]
+  tree <- treeByExp (compute gr) gr exp state
+  state' <- replaceSubTree tree state
+  reCheckState gr state'
+
+deleteSubTree :: CGrammar -> Action
+deleteSubTree gr state = 
+  if isRootState state
+     then do
+       let cat = actCat state
+       newCat gr cat state
+     else do
+       let metas = allMetas state
+           binds = actBinds state
+           exp = refreshMetas metas mExp0
+       tree <- annotateInState gr exp state
+       state' <- replaceSubTree (addBinds binds tree) state
+       reCheckState gr state' --- must be unfortunately done. 20/11/2001
+
+wrapWithFun :: CGrammar -> (Fun,Int) -> Action
+wrapWithFun gr (f@(m,c),i) state = do
+  typ <- lookupFunType gr m c
+  let olds = allPrevVars state
+      oldmetas = allMetas state
+  exp0  <- fun2wrap olds ((f,i),typ) (tree2exp (actTreeBody state))
+  let exp = refreshMetas oldmetas exp0
+  tree0 <- annotateInState gr exp state  
+  let tree = addBinds (actBinds state) $ tree0
+  state' <- replaceSubTree tree state
+  reCheckState gr state' --- must be unfortunately done. 20/11/2001
+
+alphaConvert :: CGrammar -> (Var,Var) -> Action
+alphaConvert gr (x,x') state = do
+  let oldvars = allPrevVars state
+  testErr (notElem x' oldvars) ("clash with previous bindings" +++ show x')
+  let binds0  = actBinds state
+      vars0   = map fst binds0
+  testErr (notElem x' vars0) ("clash with other bindings" +++ show x')
+  let binds   = [(if z==x then x' else z, t) | (z,t) <- binds0]
+      vars    = map fst binds
+  exp' <- alphaConv (vars ++ oldvars) (x,x') (tree2exp (actTreeBody state))
+  let exp = mkAbs vars exp'
+  tree <- annotateExpInState gr exp state
+  replaceSubTree tree state
+
+changeFunHead :: CGrammar -> Fun -> Action
+changeFunHead gr f state = do
+  let state' = changeNode (changeAtom (const (atomC f))) state
+  reCheckState gr state' --- must be done because of constraints elsewhere
+
+peelFunHead :: CGrammar -> (Fun,Int) -> Action
+peelFunHead gr (f@(m,c),i) state = do
+  tree0 <- nthSubtree i $ actTree state
+  let tree = addBinds (actBinds state) $ tree0
+  state' <- replaceSubTree tree state
+  reCheckState gr state' --- must be unfortunately done. 20/11/2001
+
+-- | an expensive operation
+reCheckState :: CGrammar -> State -> Err State
+reCheckState gr st = annotate gr (tree2exp (loc2tree st)) >>= return . tree2loc
+
+-- | a variant that returns Bad instead of a tree with unsolvable constraints
+reCheckStateReject :: CGrammar -> State -> Err State
+reCheckStateReject gr st = do
+  st' <- reCheckState gr st
+  rejectUnsolvable st'
+
+rejectUnsolvable :: State -> Err State
+rejectUnsolvable st = case (constrsNode $ nodeTree $ actTree st) of 
+  [] -> return st
+  cs -> Bad $ "Unsolvable constraints:" +++ prConstraints cs
+
+-- | extract metasubstitutions from constraints and solve them
+solveAll :: CGrammar -> State -> Err State
+solveAll gr st = solve st >>= solve where 
+  solve st0 = do ---- why need twice?
+    st <- reCheckState gr st0
+    let cs0     = allConstrs st
+        (cs,ms) = splitConstraints gr cs0
+    metaSubstRefinements gr ms $ 
+      mapLoc (reduceConstraintsNode gr . performMetaSubstNode ms) st
+
+-- * active refinements
+
+refinementsState :: CGrammar -> State -> [(Term,(Val,Bool))]
+refinementsState gr state = 
+  let filt = possibleRefVal gr state in 
+  if actIsMeta state 
+     then refsForType filt gr (allBinds state) (actVal state)
+     else []
+
+wrappingsState :: CGrammar -> State -> [((Fun,Int),Type)]
+wrappingsState gr state
+  | actIsMeta state = []
+  | isRootState state = funs 
+  | otherwise = [rule | rule@(_,typ) <- funs, possibleRefVal gr state aval typ]
+ where 
+   funs = funsOnType (possibleRefVal gr state) gr aval
+   aval = actVal state
+
+peelingsState :: CGrammar -> State -> [(Fun,Int)]
+peelingsState gr state
+  | actIsMeta state = []
+  | isRootState state = 
+      err (const []) (\f -> [(f,i) | i <- [0 .. arityTree tree - 1]]) $ actFun state
+  | otherwise = 
+      err (const []) 
+          (\f -> [fi | (fi@(g,_),typ) <- funs, 
+                       possibleRefVal gr state aval typ,g==f]) $ actFun state
+ where 
+   funs = funsOnType (possibleRefVal gr state) gr aval
+   aval = actVal state
+   tree = actTree state
+
+headChangesState :: CGrammar -> State -> [Fun]
+headChangesState gr state = errVal [] $ do 
+  f@(m,c) <-  funAtom (actAtom state)
+  typ0 <- lookupFunType gr m c
+  return [fun | (fun,typ) <- funRulesOf gr, fun /= f, typ == typ0] 
+                                                          --- alpha-conv !
+
+possibleRefVal :: CGrammar -> State -> Val -> Type -> Bool
+possibleRefVal gr state val typ = errVal True $ do --- was False
+  vtyp <- valType typ
+  let gen = actGen state
+  cs   <- return [(val, vClos vtyp)] --- eqVal gen val (vClos vtyp) --- only poss cs
+  return $ possibleConstraints gr cs --- a simple heuristic
+
+possibleTreeVal :: CGrammar -> State -> Tree -> Bool
+possibleTreeVal gr state tree = errVal True $ do --- was False
+  let aval = actVal state
+  let gval = valTree tree
+  let gen = actGen state
+  cs  <- return [(aval, gval)] --- eqVal gen val (vClos vtyp) --- only poss cs
+  return $ possibleConstraints gr cs --- a simple heuristic
+
--- a/src-3.0/GF/UseGrammar/Generate.hs
+++ b/src-3.0/GF/UseGrammar/Generate.hs
@@ -0,0 +1,116 @@
+----------------------------------------------------------------------
+-- |
+-- Module      : Generate
+-- Maintainer  : AR
+-- Stability   : (stable)
+-- Portability : (portable)
+--
+-- > CVS $Date: 2005/10/12 12:38:30 $ 
+-- > CVS $Author: aarne $
+-- > CVS $Revision: 1.16 $
+--
+-- Generate all trees of given category and depth. AR 30\/4\/2004
+--
+-- (c) Aarne Ranta 2004 under GNU GPL
+--
+-- Purpose: to generate corpora. We use simple types and don't
+-- guarantee the correctness of bindings\/dependences.
+-----------------------------------------------------------------------------
+
+module GF.UseGrammar.Generate (generateTrees,generateAll) where
+
+import GF.Canon.GFC
+import GF.Grammar.LookAbs
+import GF.Grammar.PrGrammar
+import GF.Grammar.Macros
+import GF.Grammar.Values
+import GF.Grammar.Grammar (Cat)
+import GF.Grammar.SGrammar
+import GF.Data.Operations
+import GF.Data.Zipper
+import GF.Infra.Option
+import Data.List
+
+-- Generate all trees of given category and depth. AR 30/4/2004
+-- (c) Aarne Ranta 2004 under GNU GPL
+--
+-- Purpose: to generate corpora. We use simple types and don't
+-- guarantee the correctness of bindings/dependences.
+
+
+-- | the main function takes an abstract syntax and returns a list of trees
+generateTrees :: 
+  Options -> GFCGrammar -> Cat -> Int -> Maybe Int -> Maybe Tree -> [Exp]
+generateTrees opts gr cat n mn mt = map str2tr $ generate gr' opts cat' n mn mt'
+  where
+    gr'  = gr2sgr opts emptyProbs gr
+    cat' = prt $ snd cat
+    mt'  = maybe Nothing (return . tr2str) mt
+---    ifm  = oElem withMetas opts
+    ifm  = oElem showOld opts
+
+generateAll :: Options -> (Exp -> IO ()) -> GFCGrammar -> Cat -> IO ()
+generateAll opts io gr cat = mapM_ (io . str2tr) $ num $ gen cat'
+  where
+    num  = optIntOrAll opts flagNumber
+    gr'  = gr2sgr opts emptyProbs gr
+    cat' = prt $ snd cat
+    gen c = generate gr' opts c 10 Nothing Nothing
+
+
+
+------------------------------------------
+-- do the main thing with a simpler data structure
+-- the first Int gives tree depth, the second constrains subtrees
+-- chosen for each branch. A small number, such as 2, is a good choice
+-- if the depth is large (more than 3)
+-- If a tree is given as argument, generation concerns its metavariables.
+
+generate :: SGrammar -> Options -> SCat -> Int -> Maybe Int -> Maybe STree -> [STree]
+generate gr opts cat i mn mt = case mt of
+  Nothing -> gen opts cat
+  Just t  -> genM t
+ where
+--- now use ifm to choose between two algorithms
+  gen opts cat 
+   | oElem (iOpt "mem") opts   = concat $ errVal [] $ lookupTree id cat $ allTrees -- -old
+   | oElem (iOpt "nonub") opts =       concatMap (\i -> gener i cat) [0..i-1] -- some duplicates
+   | otherwise                 = nub $ concatMap (\i -> gener i cat) [0..i-1] -- new
+
+  gener 0 c = [SApp (f, []) | (f,([],_)) <- funs c]
+  gener i c = [
+    tr | 
+      (f,(cs,_)) <- funs c,
+      let alts = map (gener (i-1)) cs,
+      ts <- combinations alts,
+      let tr = SApp (f, ts)
+--      depth tr >= i -- NO!
+    ]
+
+  allTrees = genAll i
+
+  -- dynamic generation
+  genAll :: Int -> BinTree SCat [[STree]]
+  genAll i = iter i genNext (mapTree (\ (c,_) -> (c,[[]])) gr)
+
+  iter 0 f tr = tr
+  iter n f tr = iter (n-1) f (f tr)
+
+  genNext tr = mapTree (genNew tr) tr
+
+  genNew tr (cat,ts) = let size = length ts in
+    (cat, [SApp (f, xs) | 
+            (f,(cs,_)) <- funs cat, 
+            xs <- combinations (map look cs),
+            let fxs = SApp (f, xs),
+            depth fxs == size]
+         : ts)
+   where
+     look c = concat $ errVal [] $ lookupTree id c tr
+
+  funs cat = maybe id take mn $ errVal [] $ lookupTree id cat gr
+
+  genM t = case t of
+    SApp (f,ts) -> [SApp (f,ts') | ts' <- combinations (map genM ts)]
+    SMeta k     -> gen opts k 
+    _ -> [t]
--- a/src-3.0/GF/UseGrammar/GetTree.hs
+++ b/src-3.0/GF/UseGrammar/GetTree.hs
@@ -0,0 +1,74 @@
+----------------------------------------------------------------------
+-- |
+-- Module      : GetTree
+-- Maintainer  : AR
+-- Stability   : (stable)
+-- Portability : (portable)
+--
+-- > CVS $Date: 2005/09/15 16:22:02 $ 
+-- > CVS $Author: aarne $
+-- > CVS $Revision: 1.9 $
+--
+-- how to form linearizable trees from strings and from terms of different levels
+--
+-- 'String' --> raw 'Term' --> annot, qualif 'Term' --> 'Tree'
+-----------------------------------------------------------------------------
+
+module GF.UseGrammar.GetTree where
+
+import GF.Canon.GFC
+import GF.Grammar.Values
+import qualified GF.Grammar.Grammar as G
+import GF.Infra.Ident
+import GF.Grammar.MMacros
+import GF.Grammar.Macros
+import GF.Compile.Rename
+import GF.Grammar.TypeCheck
+import GF.Grammar.AbsCompute (beta)
+import GF.Compile.PGrammar
+import GF.Compile.ShellState
+
+import GF.Data.Operations
+
+import Data.Char
+
+-- how to form linearizable trees from strings and from terms of different levels
+--
+-- String --> raw Term --> annot, qualif Term --> Tree
+
+string2tree :: StateGrammar -> String -> Tree
+string2tree gr = errVal uTree . string2treeErr gr
+
+string2treeErr :: StateGrammar -> String -> Err Tree
+string2treeErr _ "" = Bad "empty string"
+string2treeErr gr s = do
+  t <- pTerm s
+  let t0 = beta [] t
+  let t1 = refreshMetas [] t0
+  let t2 = qualifTerm abstr t1
+  annotate grc t2
+ where
+   abstr = absId gr
+   grc = grammar gr
+
+string2Cat, string2Fun :: StateGrammar -> String -> (Ident,Ident)
+string2Cat gr c = (absId gr,identC c)
+string2Fun = string2Cat
+
+strings2Cat, strings2Fun :: String -> (Ident,Ident)
+strings2Cat s = (identC m, identC (drop 1 c)) where (m,c) = span (/= '.') s
+strings2Fun = strings2Cat
+
+string2ref :: StateGrammar -> String -> Err G.Term
+string2ref gr s = case s of 
+  'x':'_':ds -> return $ freshAsTerm ds --- hack for generated vars
+  '"':_:_ -> return $ G.K $ init $ tail s
+  _:_ | all isDigit s -> return $ G.EInt $ read s
+  _ | elem '.' s -> return $ uncurry G.Q $ strings2Fun s
+  _ -> return $ G.Vr $ identC s
+
+string2cat :: StateGrammar -> String -> Err G.Cat
+string2cat gr s = 
+  if elem '.' s 
+    then return $ strings2Fun s
+    else return $ curry id (absId gr) (identC s)
--- a/src-3.0/GF/UseGrammar/Information.hs
+++ b/src-3.0/GF/UseGrammar/Information.hs
@@ -0,0 +1,162 @@
+----------------------------------------------------------------------
+-- |
+-- Module      : Information
+-- Maintainer  : AR
+-- Stability   : (stable)
+-- Portability : (portable)
+--
+-- > CVS $Date: 2005/10/05 20:02:20 $ 
+-- > CVS $Author: aarne $
+-- > CVS $Revision: 1.7 $
+--
+-- information on module, category, function, operation, parameter,... 
+-- AR 16\/9\/2003.
+-- uses source grammar
+-----------------------------------------------------------------------------
+
+module GF.UseGrammar.Information (
+   showInformation,
+   missingLinCanonGrammar
+   ) where
+
+import GF.Grammar.Grammar
+import GF.Infra.Ident
+import GF.Infra.Modules
+import GF.Infra.Option
+import GF.CF.CF
+import GF.CF.PPrCF
+import GF.Compile.ShellState
+import GF.Grammar.PrGrammar
+import GF.Grammar.Lookup
+import GF.Grammar.Macros (zIdent)
+import qualified GF.Canon.GFC as GFC
+import qualified GF.Canon.AbsGFC as AbsGFC
+
+import GF.Data.Operations
+import GF.Infra.UseIO
+
+-- information on module, category, function, operation, parameter,... AR 16/9/2003
+-- uses source grammar
+
+-- | the top level function
+showInformation :: Options -> ShellState -> Ident -> IOE ()
+showInformation opts st c = do
+  is <- ioeErr $ getInformation opts st c
+  if null is 
+     then putStrLnE "Identifier not in scope" 
+     else mapM_ (putStrLnE . prInformationM c) is
+ where
+   prInformationM c (i,m) = prInformation opts c i ++ "file:" +++ m ++ "\n"
+
+-- | the data type of different kinds of information
+data Information = 
+   IModAbs  SourceAbs
+ | IModRes  SourceRes
+ | IModCnc  SourceCnc
+ | IModule  SourceAbs -- ^ to be deprecated
+ | ICatAbs  Ident Context [Ident]
+ | ICatCnc  Ident Type    [CFRule] Term
+ | IFunAbs  Ident Type    (Maybe Term)
+ | IFunCnc  Ident Type    [CFRule] Term
+ | IOper    Ident Type    Term
+ | IParam   Ident         [Param] [Term]
+ | IValue   Ident Type
+
+type CatId = AbsGFC.CIdent
+type FunId = AbsGFC.CIdent
+
+prInformation :: Options -> Ident -> Information -> String
+prInformation opts c i = unlines $ prt c : case i of
+  IModule m -> [
+    "module of type" +++ show (mtype m),
+    "extends" +++ show (extends m),
+    "opens" +++ show (opens m),
+    "defines" +++ unwords (map prt (ownConstants (jments m)))
+    ]
+  ICatAbs m co _ -> [
+    "category in abstract module" +++ prt m,
+    if null co then "not a dependent type" 
+      else "dependent type with context" +++ prContext co
+    ]
+  ICatCnc m ty cfs tr -> [
+    "category in concrete module" +++ prt m,
+    "linearization type" +++ prt ty
+    ]
+  IFunAbs m ty _ -> [
+    "function in abstract module" +++ prt m,
+    "type" +++ prt ty
+    ]
+  IFunCnc m ty cfs tr -> [
+    "function in concrete module" +++ prt m,
+    "linearization" +++ prt tr
+   ---    "linearization type" +++ prt ty
+    ]
+  IOper m ty tr -> [
+    "operation in resource module" +++ prt m,
+    "type" +++ prt ty,
+    "definition" +++ prt tr 
+    ]
+  IParam m ty ts -> [
+    "parameter type in resource module" +++ prt m,
+    "constructors" +++ unwords (map prParam ty),
+    "values" +++ unwords (map prt ts)
+    ]
+  IValue m ty -> [
+    "parameter constructor in resource module" +++ prt m,
+    "type" +++ show ty
+    ]
+
+-- | also finds out if an identifier is defined in many places
+getInformation :: Options -> ShellState -> Ident -> Err [(Information,FilePath)]
+getInformation opts st c = allChecks $ [
+  do
+    m <- lookupModule src c 
+    case m of
+      ModMod mo -> returnm c $ IModule mo
+      _ -> prtBad "not a source module" c
+  ] ++ map lookInSrc ss ++ map lookInCan cs
+ where
+   lookInSrc (i,m) = do
+     j <- lookupInfo m c 
+     case j of
+       AbsCat (Yes co) _ -> returnm i $ ICatAbs i co [] ---
+       AbsFun (Yes ty) _ -> returnm i $ IFunAbs i ty Nothing ---
+       CncCat (Yes ty) _ _ -> do
+         ---- let cat = ident2CFCat i c
+         ---- rs <- concat [rs | (c,rs) <- cf, ]
+         returnm i $ ICatCnc i ty [] ty ---       
+       CncFun _ (Yes tr) _ -> do
+         rs <- return []
+         returnm i $ IFunCnc i tr rs tr ---       
+       ResOper (Yes ty) (Yes tr) -> returnm i $ IOper i ty tr
+       ResParam (Yes (ps,_)) -> do
+         ts <- allParamValues src (QC i c)
+         returnm i $ IParam i ps ts
+       ResValue (Yes (ty,_)) -> returnm i $ IValue i ty ---
+       
+       _ -> prtBad "nothing available for" i
+   lookInCan (i,m) = do
+     Bad "nothing available yet in canonical"
+
+   returnm m i = return (i, pathOfModule st m)
+
+   src = srcModules st
+   can = canModules st
+   ss  = [(i,m) | (i,ModMod m) <- modules src]
+   cs  = [(i,m) | (i,ModMod m) <- modules can]
+   cf  = concatMap ruleGroupsOfCF $ map snd $ cfs st
+
+ownConstants :: BinTree Ident Info -> [Ident]
+ownConstants = map fst . filter isOwn . tree2list where
+  isOwn (c,i) = case i of
+    AnyInd _ _ -> False
+    _ -> True
+
+missingLinCanonGrammar :: GFC.CanonGrammar -> String
+missingLinCanonGrammar cgr = 
+  unlines $ concat [prt_ c : missing js | (c,js) <- concretes] where
+  missing js = map (("  " ++) . prt_) $ filter (not . flip isInBinTree js) abstract
+  abstract = err (const []) (map fst . tree2list . jments) $ lookupModMod cgr absId
+  absId = maybe (zIdent "") id $ greatestAbstract cgr
+  concretes = [(cnc,jments mo) | 
+    cnc <- allConcretes cgr absId, Ok mo <- [lookupModMod cgr cnc]]
--- a/src-3.0/GF/UseGrammar/Linear.hs
+++ b/src-3.0/GF/UseGrammar/Linear.hs
@@ -0,0 +1,292 @@
+----------------------------------------------------------------------
+-- |
+-- Module      : Linear
+-- Maintainer  : AR
+-- Stability   : (stable)
+-- Portability : (portable)
+--
+-- > CVS $Date: 2005/11/14 16:03:41 $ 
+-- > CVS $Author: aarne $
+-- > CVS $Revision: 1.19 $
+--
+-- Linearization for canonical GF. AR 7\/6\/2003
+-----------------------------------------------------------------------------
+
+module GF.UseGrammar.Linear where
+
+import GF.Canon.GFC
+import GF.Canon.AbsGFC
+import qualified GF.Grammar.Abstract as A
+import GF.Canon.MkGFC (rtQIdent) ----
+import GF.Infra.Ident
+import GF.Grammar.PrGrammar
+import GF.Canon.CMacros
+import GF.Canon.Look
+import GF.Grammar.LookAbs
+import GF.Grammar.MMacros
+import GF.Grammar.TypeCheck (annotate) ----
+import GF.Data.Str
+import GF.Text.Text
+----import TypeCheck -- to annotate
+
+import GF.Data.Operations
+import GF.Data.Zipper
+import qualified GF.Infra.Modules as M
+
+import Control.Monad
+import Data.List (intersperse)
+
+-- Linearization for canonical GF. AR 7/6/2003
+
+-- | The worker function: linearize a Tree, return
+-- a record. Possibly mark subtrees.
+--
+-- NB. Constants in trees are annotated by the name of the abstract module.
+-- A concrete module name must be given to find (and choose) linearization rules.
+--
+--  - If no marking is wanted, 'noMark'  :: 'Marker'.
+--
+--  - For xml marking, use     'markXML' :: 'Marker'
+linearizeToRecord :: CanonGrammar -> Marker -> Ident -> A.Tree -> Err Term
+linearizeToRecord gr mk m = lin [] where
+
+  lin ts t@(Tr (n,xs)) = errIn ("linearization of" +++ prt t) $ do
+
+    let binds = A.bindsNode n
+        at    = A.atomNode n
+        fmk   = markSubtree mk n ts (A.isFocusNode n)
+    c   <- A.val2cat $ A.valNode n
+    xs' <- mapM (\ (i,x) -> lin (i:ts) x) $ zip [0..] xs
+
+    r <- case at of
+      A.AtC f -> lookf c t f >>= comp xs'
+      A.AtI i -> return $ recInt i
+      A.AtL s -> return $ recS $ tK $ prt at
+      A.AtF i -> return $ recS $ tK $ prt at
+      A.AtV x -> lookCat c >>= comp [tK (prt_ at)]
+      A.AtM m -> lookCat c >>= comp [tK (prt_ at)] 
+
+    r' <- case r of     -- to see stg in case the result is variants {}
+      FV [] -> lookCat c >>= comp [tK (prt_ t)]
+      _ -> return r
+
+    return $ fmk $ mkBinds binds r'
+
+  look = lookupLin gr . redirectIdent m . rtQIdent
+  comp = ccompute gr
+  mkBinds bs bdy = case bdy of
+    R fs  -> R $ [Ass (LV i) (tK (prt t)) | (i,(t,_)) <- zip [0..] bs] ++ fs
+    FV rs -> FV $ map (mkBinds bs) rs
+
+  recS t = R [Ass (L (identC "s")) t] ----
+
+  recInt i = R [
+              Ass (L (identC "last")) (EInt (rem i 10)),
+              Ass (L (identC "s"))  (tK $ show i),
+              Ass (L (identC "size")) (EInt (if i > 9 then 1 else 0))
+              ]
+
+  lookCat = return . errVal defLindef . look 
+         ---- should always be given in the module
+
+  -- to show missing linearization as term
+  lookf c t f = case look f of
+    Ok h -> return h
+    _ -> lookCat c >>= comp [tK (prt_ t)]
+
+
+-- | thus the special case:
+linearizeNoMark :: CanonGrammar -> Ident -> A.Tree -> Err Term
+linearizeNoMark gr = linearizeToRecord gr noMark
+
+-- | expand tables in linearized term to full, normal-order tables
+--
+-- NB expand from inside-out so that values are not looked up in copies of branches
+
+expandLinTables :: CanonGrammar -> Term -> Err Term
+expandLinTables gr t = case t of
+  R rs -> liftM (R . map (uncurry Ass)) $ mapPairsM exp [(l,r) | Ass l r <- rs]
+  T ty rs -> do
+    rs' <- mapPairsM exp [(l,r) | Cas l r <- rs] -- expand from inside-out
+    let t' = T ty $ map (uncurry Cas) rs'
+    vs  <- alls ty
+    ps  <- mapM term2patt vs
+    ts' <- mapM (comp . S t') $ vs
+    return $ T ty [Cas [p] t | (p,t) <- zip ps ts']
+  V ty ts0 -> do
+    ts  <- mapM exp ts0 -- expand from inside-out
+    vs  <- alls ty
+    ps  <- mapM term2patt vs
+    return $ T ty [Cas [p] t | (p,t) <- zip ps ts]
+  FV ts -> liftM FV $ mapM exp ts
+  _ -> composOp exp t
+ where
+   alls = allParamValues gr
+   exp  = expandLinTables gr
+   comp = ccompute gr []
+
+-- Do this for an entire grammar:
+
+unoptimizeCanon :: CanonGrammar -> CanonGrammar
+unoptimizeCanon g@(M.MGrammar ms) = M.MGrammar $ map (unoptimizeCanonMod g) ms 
+
+unoptimizeCanonMod :: CanonGrammar -> CanonModule -> CanonModule
+unoptimizeCanonMod g = convMod where
+  convMod (m, M.ModMod (M.Module (M.MTConcrete a) x flags me os defs)) = 
+    (m, M.ModMod (M.Module (M.MTConcrete a) x flags me os (mapTree convDef defs)))
+  convMod mm = mm
+  convDef (c,CncCat ty df pr) = (c,CncCat ty (convT df) (convT pr))
+  convDef (f,CncFun c xs li pr) = (f,CncFun c xs (convT li) (convT pr))
+  convDef cd = cd
+  convT = err error id . exp
+  -- a version of expandLinTables that does not destroy share optimization
+  exp t = case t of
+    R rs -> liftM (R . map (uncurry Ass)) $ mapPairsM exp [(l,r) | Ass l r <- rs]
+    T ty rs@[Cas [_] _] -> do
+      rs' <- mapPairsM exp [(l,r) | Cas l r <- rs] -- expand from inside-out
+      let t' = T ty $ map (uncurry Cas) rs'
+      vs  <- alls ty
+      ps  <- mapM term2patt vs
+      ts' <- mapM (comp . S t') $ vs
+      return $ T ty [Cas [p] t | (p,t) <- zip ps ts']
+    V ty ts0 -> do
+      ts  <- mapM exp ts0 -- expand from inside-out
+      vs  <- alls ty
+      ps  <- mapM term2patt vs
+      return $ T ty [Cas [p] t | (p,t) <- zip ps ts]
+    FV ts -> liftM FV $ mapM exp ts
+    I _   -> comp t
+    _ -> composOp exp t
+   where
+     alls = allParamValues g
+     comp = ccompute g []
+
+
+-- | from records, one can get to records of tables of strings
+rec2strTables :: Term -> Err [[(Label,[([Patt],[Str])])]]
+rec2strTables r = do
+  vs <- allLinValues r
+  mapM (mapPairsM (mapPairsM strsFromTerm)) vs
+
+-- | from these tables, one may want to extract the ones for the "s" label
+strTables2sTables :: [[(Label,[([Patt],[Str])])]] -> [[([Patt],[Str])]]
+strTables2sTables ts = [t | r <- ts, (l,t) <- r, l == linLab0]
+
+linLab0 :: Label
+linLab0 = L (identC "s")
+
+-- | to get lists of token lists is easy
+sTables2strs :: [[([Patt],[Str])]] -> [[Str]]
+sTables2strs = map snd . concat
+
+-- | from this, to get a list of strings
+strs2strings :: [[Str]] -> [String]
+strs2strings = map unlex
+
+-- | this is just unwords; use an unlexer from Text to postprocess
+unlex :: [Str] -> String
+unlex = concat . map sstr . take 1 ----
+
+-- | finally, a top-level function to get a string from an expression
+linTree2string :: Marker -> CanonGrammar -> Ident -> A.Tree -> String
+linTree2string mk gr m e = head $ linTree2strings mk gr m e -- never empty
+
+-- | you can also get many strings
+linTree2strings :: Marker -> CanonGrammar -> Ident -> A.Tree -> [String]
+linTree2strings mk gr m e = err return id $ do
+  t  <- linearizeToRecord gr mk m e
+  r  <- expandLinTables gr t
+  ts <- rec2strTables r
+  let ss = strs2strings $ sTables2strs $ strTables2sTables ts
+  ifNull (prtBad "empty linearization of" e) return ss   -- thus never empty
+
+-- | argument is a Tree, value is a list of strs; needed in Parsing
+allLinsOfTree :: CanonGrammar -> Ident -> A.Tree -> [Str]
+allLinsOfTree gr a e = err (singleton . str) id $ do
+  e' <- return e ---- annotateExp gr e
+  r  <- linearizeNoMark gr a e'
+  r' <- expandLinTables gr r
+  ts <- rec2strTables r'
+  return $ concat $ sTables2strs $ strTables2sTables ts
+
+-- | the value is a list of structures arranged as records of tables of terms
+allLinsAsRec :: CanonGrammar -> Ident -> A.Tree -> Err [[(Label,[([Patt],Term)])]]
+allLinsAsRec gr c t = linearizeNoMark gr c t >>= expandLinTables gr >>= allLinValues
+
+-- | the value is a list of structures arranged as records of tables of strings
+-- only taking into account string fields
+-- True: sep. by /, False: sep by \n
+allLinTables :: 
+  Bool -> CanonGrammar ->Ident ->A.Tree ->Err [[(Label,[([Patt],[String])])]]
+allLinTables slash gr c t = do
+  r' <- allLinsAsRec gr c t
+  mapM (mapM getS) r'
+ where 
+   getS (lab,pss) = liftM (curry id lab) $ mapM gets pss
+   gets (ps,t) = liftM (curry id ps . cc . map str2strings) $ strsFromTerm t
+   cc = concat . intersperse [if slash then "/" else "\n"]
+
+-- | the value is a list of strings gathered from all fields
+
+allLinBranchFields :: CanonGrammar -> Ident -> A.Tree -> Err [String]
+allLinBranchFields gr c trm = do
+  r <- linearizeNoMark gr c trm >>= expandLinTables gr
+  return [s | (_,t) <- allLinBranches r, s <- gets t]
+ where 
+   gets t = concat [cc (map str2strings s) | Ok s <- [strsFromTerm t]]
+   cc = concat . intersperse ["/"]
+
+prLinTable :: Bool -> [[(Label,[([Patt],[String])])]] -> [String]
+prLinTable pars = concatMap prOne . concat where
+  prOne (lab,pss) = (if pars then ((prt lab) :) else id) (map pr pss) ----
+  pr (ps,ss) = (if pars then ((unwords (map prt_ ps) +++ ":") +++)
+                             else id) (unwords ss)
+
+{-
+-- the value is a list of strs
+allLinStrings :: CanonGrammar -> Tree -> [Str]
+allLinStrings gr ft = case allLinsAsStrs gr ft of
+  Ok ts -> map snd $ concat $ map snd $ concat ts
+  Bad s -> [str s]
+
+-- the value is a list of strs, not forgetting their arguments
+allLinsAsStrs :: CanonGrammar -> Tree -> Err [[(Label,[([Patt],Str)])]]
+allLinsAsStrs gr ft = do
+  lpts <- allLinearizations gr ft
+  return $ concat $ mapM (mapPairsM (mapPairsM strsFromTerm)) lpts
+
+
+-- to a list of strings
+linearizeToStrings :: CanonGrammar -> ([Int] ->Term -> Term) -> Tree -> Err [String]
+linearizeToStrings gr mk = liftM (map unlex) . linearizeToStrss gr mk
+
+-- to a list of token lists
+linearizeToStrss :: CanonGrammar -> ([Int] -> Term -> Term) -> Tree -> Err [[Str]]
+linearizeToStrss gr mk e = do
+  R rs <- linearizeToRecord gr mk e ----
+  t <- lookupErr linLab0 [(r,s) | Ass r s <- rs]
+  return $ map strsFromTerm $ allInTable t 
+-}
+
+-- | the value is a list of strings, not forgetting their arguments
+allLinsOfFun :: CanonGrammar -> CIdent -> Err [[(Label,[([Patt],Term)])]]
+allLinsOfFun gr f = do
+  t <- lookupLin gr f 
+  allAllLinValues t --- all fields, not only s. 11/12/2005
+
+
+-- | returns printname if one exists; otherwise linearizes with metas
+printOrLinearize :: CanonGrammar -> Ident -> A.Fun -> String
+printOrLinearize gr c f@(m, d) = errVal (prt fq) $ 
+  case lookupPrintname gr (CIQ c d) of
+    Ok t -> do
+      ss <- strsFromTerm t
+      let s = strs2strings [ss]
+      return $ ifNull (prt fq) head s
+    _ -> do
+      ty  <- lookupFunType gr m d
+      f'  <- ref2exp [] ty (A.QC m d)
+      tr  <- annotate gr f' 
+      return $ linTree2string noMark gr c tr
+   where
+     fq = CIQ m d  
--- a/src-3.0/GF/UseGrammar/MatchTerm.hs
+++ b/src-3.0/GF/UseGrammar/MatchTerm.hs
@@ -0,0 +1,50 @@
+----------------------------------------------------------------------
+-- |
+-- Module      : MatchTerm
+-- Maintainer  : AR
+-- Stability   : (stable)
+-- Portability : (portable)
+--
+--
+-- functions for matching with terms. AR 16/3/2006
+-----------------------------------------------------------------------------
+
+module GF.UseGrammar.MatchTerm where
+
+import GF.Data.Operations
+import GF.Data.Zipper
+
+import GF.Grammar.Grammar
+import GF.Grammar.PrGrammar
+import GF.Infra.Ident
+import GF.Grammar.Values
+import GF.Grammar.Macros
+import GF.Grammar.MMacros
+
+import Control.Monad
+import Data.List
+
+-- test if a term has duplicated idents, either any or just atoms
+
+hasDupIdent, hasDupAtom :: Exp -> Bool
+hasDupIdent = (>1) . maximum . map length . group . sort . allConstants True
+hasDupAtom  = (>1) . maximum . map length . group . sort . allConstants False
+
+-- test if a certain ident occurs in term
+
+grepIdent :: Ident -> Exp -> Bool
+grepIdent c = elem c . allConstants True
+
+-- form the list of all constants, optionally ignoring all but atoms
+
+allConstants :: Bool -> Exp -> [Ident]
+allConstants alsoApp = err (const []) snd . flip appSTM [] . collect where
+  collect e = case e of
+    Q _ c  -> add c e
+    QC _ c -> add c e
+    Cn c   -> add c e
+    App f a | not alsoApp -> case f of
+      App g b -> collect b >> collect a 
+      _ -> collect a
+    _ -> composOp collect e
+  add c e = updateSTM (c:) >> return e
--- a/src-3.0/GF/UseGrammar/Morphology.hs
+++ b/src-3.0/GF/UseGrammar/Morphology.hs
@@ -0,0 +1,140 @@
+----------------------------------------------------------------------
+-- |
+-- Module      : Morphology
+-- Maintainer  : AR
+-- Stability   : (stable)
+-- Portability : (portable)
+--
+-- > CVS $Date: 2005/04/21 16:23:49 $ 
+-- > CVS $Author: bringert $
+-- > CVS $Revision: 1.8 $
+--
+-- Morphological analyser constructed from a GF grammar.
+--
+-- we first found the binary search tree sorted by word forms more efficient
+-- than a trie, at least for grammars with 7000 word forms
+-- (18\/11\/2003) but this may change since we have to use a trie 
+-- for decompositions and also want to use it in the parser
+-----------------------------------------------------------------------------
+
+module GF.UseGrammar.Morphology where
+
+import GF.Canon.AbsGFC
+import GF.Canon.GFC
+import GF.Grammar.PrGrammar
+import GF.Canon.CMacros
+import GF.Canon.Look
+import GF.Grammar.LookAbs
+import GF.Infra.Ident
+import qualified GF.Grammar.Macros as M
+import GF.UseGrammar.Linear
+
+import GF.Data.Operations
+import GF.Data.Glue
+
+import Data.Char
+import Data.List (sortBy, intersperse)
+import Control.Monad (liftM)
+import GF.Data.Trie2
+
+-- construct a morphological analyser from a GF grammar. AR 11/4/2001
+
+-- we first found the binary search tree sorted by word forms more efficient
+-- than a trie, at least for grammars with 7000 word forms
+-- (18\/11\/2003) but this may change since we have to use a trie 
+-- for decompositions and also want to use it in the parser
+
+type Morpho = Trie Char String
+
+emptyMorpho :: Morpho
+emptyMorpho = emptyTrie
+
+appMorpho :: Morpho -> String -> (String,[String])
+appMorpho = appMorphoOnly
+---- add lookup for literals
+
+-- without literals
+appMorphoOnly :: Morpho -> String -> (String,[String])
+appMorphoOnly m s = trieLookup m s
+
+-- recognize word, exluding literals
+isKnownWord :: Morpho -> String -> Bool
+isKnownWord mo = not . null . snd . appMorphoOnly mo
+
+mkMorpho :: CanonGrammar -> Ident -> Morpho 
+mkMorpho gr a = tcompile $ concatMap mkOne $ allItems where
+
+  comp = ccompute gr [] -- to undo 'values' optimization
+
+  mkOne (Left (fun,c))  = map (prOne fun c) $ allLins fun
+  mkOne (Right (fun,_)) = map (prSyn fun) $ allSyns fun
+  
+  -- gather forms of lexical items
+  allLins fun@(m,f) = errVal [] $ do
+    ts <- lookupLin gr (CIQ a f) >>= comp >>= allAllLinValues  
+    ss <- mapM (mapPairsM (mapPairsM (liftM wordsInTerm . comp))) ts
+    return [(p,s) | (p,fs) <- concat $ map snd $ concat ss, s <- fs]
+  prOne (_,f) c (ps,s) = (s, [prt f +++ tagPrt c +++ unwords (map prt_ ps)])
+
+  -- gather syncategorematic words
+  allSyns fun@(m,f) = errVal [] $ do
+    tss <- allLinsOfFun gr (CIQ a f) 
+    let ss = [s | ts <- tss, (_,fs) <- ts, (_,s) <- fs]
+    return $ concat $ map wordsInTerm ss
+  prSyn f s = (s, ["+<syncategorematic>" ++ tagPrt f])
+
+  -- all words, Left from lexical rules and Right syncategorematic
+  allItems = [lexRole t (f,c) | (f,c,t) <- allFuns] where
+    allFuns = [(f,c,t) | (f,t) <- funRulesOf gr, Ok c <- [M.valCat t]]
+    lexRole t = case M.typeForm t of
+      Ok ([],_,_) -> Left
+      _ -> Right
+
+-- printing full-form lexicon and results
+
+prMorpho :: Morpho -> String
+prMorpho = unlines . map prMorphoAnalysis . collapse
+
+prMorphoAnalysis :: (String,[String]) -> String
+prMorphoAnalysis (w,fs0) = 
+  let fs = filter (not . null) fs0  in
+  if null fs then w ++++ "*" else unlines (w:fs)
+
+prMorphoAnalysisShort :: (String,[String]) -> String
+prMorphoAnalysisShort (w,fs) = prBracket (w' ++ prTList "/" fs) where
+  w' = if null fs then w +++ "*" else ""
+
+tagPrt :: Print a => (a,a) -> String
+tagPrt (m,c) = "+" ++ prt c --- module name
+
+-- | print all words recognized
+allMorphoWords :: Morpho -> [String]
+allMorphoWords = map fst . collapse
+
+-- analyse running text and show results either in short form or on separate lines
+
+-- | analyse running text and show just the word, with "*" if not found
+morphoTextStatus :: Morpho -> String -> String
+morphoTextStatus mo = unlines . map (prMark . appMorpho mo) . words where
+  prMark (w,fs) = if null fs then "*" +++ w else w
+
+-- | analyse running text and show results in short form, one word per line
+morphoTextShort :: Morpho -> String -> String
+morphoTextShort mo = unlines . map (prMorphoAnalysisShort . appMorpho mo) . words
+
+-- | analyse running text and show results on separate lines
+morphoText :: Morpho -> String -> String
+morphoText mo = unlines . map (('\n':) . prMorphoAnalysis . appMorpho mo) . words
+
+-- format used in the Italian Verb Engine
+prFullForm :: Morpho -> String
+prFullForm = unlines . map prOne . collapse where
+  prOne (s,ps) = s ++ " : " ++ unwords (intersperse "/" ps)
+
+-- using Huet's unglueing method to find word boundaries
+---- it would be much better to use a trie also for morphological analysis,
+---- so this is for the sake of experiment
+---- Moreover, we should specify the cases in which this happens - not all words
+
+decomposeWords :: Morpho -> String -> [String]
+decomposeWords mo s = errVal (words s) $ decomposeSimple mo s
--- a/src-3.0/GF/UseGrammar/Paraphrases.hs
+++ b/src-3.0/GF/UseGrammar/Paraphrases.hs
@@ -0,0 +1,70 @@
+----------------------------------------------------------------------
+-- |
+-- Module      : Paraphrases
+-- Maintainer  : AR
+-- Stability   : (stable)
+-- Portability : (portable)
+--
+-- > CVS $Date: 2005/04/21 16:23:49 $ 
+-- > CVS $Author: bringert $
+-- > CVS $Revision: 1.6 $
+--
+-- paraphrases of GF terms. AR 6\/10\/1998 -- 24\/9\/1999 -- 5\/7\/2000 -- 5\/6\/2002
+--
+-- Copyright (c) Aarne Ranta 1998--99, under GNU General Public License (see GPL)
+--
+-- thus inherited from the old GF. Incomplete and inefficient...
+-----------------------------------------------------------------------------
+
+module GF.UseGrammar.Paraphrases (mkParaphrases) where
+
+import GF.Grammar.Abstract
+import GF.Grammar.PrGrammar
+import GF.Grammar.LookAbs
+import GF.Grammar.AbsCompute
+
+import GF.Data.Operations
+
+import Data.List (nub)
+
+-- paraphrases of GF terms. AR 6/10/1998 -- 24/9/1999 -- 5/7/2000 -- 5/6/2002
+-- Copyright (c) Aarne Ranta 1998--99, under GNU General Public License (see GPL)
+-- thus inherited from the old GF. Incomplete and inefficient...
+
+mkParaphrases :: GFCGrammar -> Term -> [Term]
+mkParaphrases st = nub . map (beta []) . paraphrases (allDefs st)
+
+type Definition = (Fun,Term)
+
+paraphrases :: [Definition] -> Term -> [Term]
+paraphrases th t =
+  paraImmed th t ++
+---  paraMatch th t ++
+  case t of
+    App c a -> [App d b | d <- paraphrases th c, b <- paraphrases th a]
+    Abs x b -> [Abs x d | d <- paraphrases th b]
+    c       -> []
+  ++ [t]
+
+paraImmed :: [Definition] -> Term -> [Term]
+paraImmed defs t = 
+  [Q m f | ((m,f), u) <- defs, t == u] ++ --- eqTerm
+  case t of
+  ----  Cn c -> [u | (f, u) <- defs, eqStrIdent f c]
+    _    -> []
+
+{- ---
+paraMatch :: [Definition] -> Trm -> [Trm]
+paraMatch th@defs t = 
+ [mkApp (Cn f) xx | (PC f zz, u) <- defs, 
+                    let (fs,sn) = fullApp u, fs == h, length sn == length zz] ++
+ case findAMatch defs t of
+   Ok (g,b) -> [substTerm [] g b]
+   _        -> []
+  where 
+    (h,xx)    = fullApp t
+    fullApp c = case c of 
+                  App f a -> (f', a' ++ [a]) where (f',a') = fullApp f
+                  c       -> (c,[])
+
+-}
--- a/src-3.0/GF/UseGrammar/Parsing.hs
+++ b/src-3.0/GF/UseGrammar/Parsing.hs
@@ -0,0 +1,177 @@
+----------------------------------------------------------------------
+-- |
+-- Module      : Parsing
+-- Maintainer  : AR
+-- Stability   : (stable)
+-- Portability : (portable)
+--
+-- > CVS $Date: 2005/06/02 10:23:52 $ 
+-- > CVS $Author: aarne $
+-- > CVS $Revision: 1.25 $
+--
+-- (Description of the module)
+-----------------------------------------------------------------------------
+
+module GF.UseGrammar.Parsing where
+
+import GF.Infra.CheckM
+import qualified GF.Canon.AbsGFC as C
+import GF.Canon.GFC
+import GF.Canon.MkGFC (trExp) ----
+import GF.Canon.CMacros
+import GF.Grammar.MMacros (refreshMetas)
+import GF.UseGrammar.Linear
+import GF.Data.Str
+import GF.CF.CF
+import GF.CF.CFIdent
+import GF.Infra.Ident
+import GF.Grammar.TypeCheck
+import GF.Grammar.Values
+--import CFMethod
+import GF.UseGrammar.Tokenize
+import GF.UseGrammar.Morphology (isKnownWord)
+import GF.CF.Profile
+import GF.Infra.Option
+import GF.UseGrammar.Custom
+import GF.Compile.ShellState
+
+import GF.CF.PPrCF (prCFTree)
+-- import qualified GF.OldParsing.ParseGFC as NewOld -- OBSOLETE
+import qualified GF.Parsing.GFC as New
+
+import GF.Data.Operations
+
+import Data.List (nub,sortBy)
+import Data.Char (toLower)
+import Control.Monad (liftM)
+
+-- AR 26/1/2000 -- 8/4 -- 28/1/2001 -- 9/12/2002
+
+parseString :: Options -> StateGrammar -> CFCat -> String -> Err [Tree]
+parseString os sg cat = liftM fst . parseStringMsg os sg cat
+
+parseStringMsg :: Options -> StateGrammar -> CFCat -> String -> Err ([Tree],String)
+parseStringMsg os sg cat s = do
+  case checkStart $ parseStringC os sg cat s of
+    Ok (ts,(_,ss)) -> return (ts, unlines $ reverse ss)
+    Bad s -> return ([],s)
+
+parseStringC :: Options -> StateGrammar -> CFCat -> String -> Check [Tree]
+parseStringC opts0 sg cat s 
+ | oElem (iOpt "old") opts0 ||
+   (not (oElem (iOpt "fcfg") opts0) && stateHasHOAS sg) = do
+  let opts = unionOptions opts0 $ stateOptions sg
+      cf  = stateCF sg
+      gr  = stateGrammarST sg
+      cn  = cncId sg
+      toks = customOrDefault opts useTokenizer customTokenizer sg s
+      parser = customOrDefault opts useParser customParser sg cat
+  if oElem (iOpt "cut") opts 
+    then doUntil (not . null) $ map (tokens2trms opts sg cn parser) toks
+    else mapM (tokens2trms opts sg cn parser) toks >>= return . concat
+
+---- | or [oElem p opts0 | 
+----        p <- [newCParser,newMParser,newFParser,newParser,newerParser] = do  
+
+ | otherwise = do
+  let opts      = unionOptions opts0 $ stateOptions sg
+      algorithm | oElem newCParser opts0 = "c"
+		| oElem newMParser opts0 = "m"
+		| oElem newFParser opts0 = "f"
+		| otherwise              = "f" -- default algorithm: FCFG
+      strategy  = maybe "bottomup" id $ getOptVal opts useParser 
+                      -- -parser=bottomup/topdown
+      tokenizer = customOrDefault opts useTokenizer customTokenizer sg
+      toks = case tokenizer s of
+               t:_ -> t
+               _ -> [] ---- no support for undet. tok.
+      unknowns = 
+        [w | TC w <- toks, unk w && unk (uncap w)] ++ [w | TS w <- toks, unk w]
+       where
+        unk w = not $ isKnownWord (morpho sg) w
+        uncap (c:cs) = toLower c : cs
+        uncap s = s
+
+  case unknowns of
+    _:_ | oElem (iOpt "trynextlang") opts -> return []
+    _:_  -> fail $ "Unknown words:" +++ unwords unknowns
+    _ -> do  
+  
+     ts <- checkErr $ New.parse algorithm strategy (pInfo sg) (absId sg) cat toks
+     ts' <- checkErr $ 
+             allChecks $ map (annotate (stateGrammarST sg) . refreshMetas []) ts
+     return $ optIntOrAll opts flagNumber ts'
+
+
+tokens2trms :: Options ->StateGrammar ->Ident -> CFParser -> [CFTok] -> Check [Tree]
+tokens2trms opts sg cn parser toks = trees2trms opts sg cn toks trees info
+    where result = parser toks
+	  info   = snd result
+	  trees  = {- nub $ -} cfParseResults result -- peb 25/5-04: removed nub (O(n^2))
+
+trees2trms :: 
+  Options -> StateGrammar -> Ident -> [CFTok] -> [CFTree] -> String -> Check [Tree]
+trees2trms opts sg cn as ts0 info = do
+  let s = unwords $ map prCFTok as
+  ts  <- case () of
+    _ | null ts0 -> checkWarn ("No success in cf parsing" +++ s) >> return []
+    _ | raw      -> do
+      ts1 <- return (map cf2trm0 ts0) ----- should not need annot
+      checks [
+         mapM (checkErr . (annotate gr) . trExp) ts1 ---- complicated, often fails
+        ,checkWarn (unlines ("Raw CF trees:":(map prCFTree ts0))) >> return []
+        ]
+    _ -> do
+      let num = optIntOrN opts flagRawtrees 999999
+      let (ts01,rest) = splitAt num ts0
+      if null rest then return () 
+         else raise ("Warning: only" +++ show num +++ "raw parses out of" +++ 
+                          show (length ts0) +++ 
+                          "considered; use -rawtrees=<Int> to see more"
+                     )
+      (ts1,ss) <- checkErr $ mapErrN 1 postParse ts01
+      if null ts1 then raise ss else return ()
+      ts2 <- checkErr $ 
+               allChecks $ map (annotate gr . refreshMetas [] . trExp) ts1 ---- 
+      if forgive then return ts2 else do
+        let tsss = [(t, allLinsOfTree gr cn t) | t <- ts2]
+            ps = [t | (t,ss) <- tsss, 
+                      any (compatToks as) (map str2cftoks ss)]
+        if null ps 
+           then raise $ "Failure in morphology." ++
+                  if verb 
+                     then "\nPossible corrections: " +++++
+                          unlines (nub (map sstr (concatMap snd tsss)))
+                     else ""
+           else return ps
+  if verb 
+     then checkWarn $ " the token list" +++ show as ++++ unknownWords sg as +++++ info
+     else return ()
+
+  return $ optIntOrAll opts flagNumber $ nub ts
+ where
+   gr  = stateGrammarST sg
+
+   raw     = oElem rawParse opts
+   verb    = oElem beVerbose opts
+   forgive = oElem forgiveParse opts
+
+---- Operatins.allChecks :: ErrorMonad m => [m a] -> m [a]
+
+unknownWords sg ts = case filter noMatch [t | t@(TS _) <- ts] of
+     [] -> "where all words are known"
+     us -> "with the unknown tokens" +++ show us --- needs to be fixed for literals
+  where
+   terminals = map TS $ stateGrammarWords sg
+   noMatch t = all (not . compatTok t) terminals 
+     
+
+--- too much type checking in building term info? return FullTerm to save work?
+
+-- | raw parsing: so simple it is for a context-free CF grammar
+cf2trm0 :: CFTree -> C.Exp
+cf2trm0 (CFTree (fun, (_, trees))) = mkAppAtom (cffun2trm fun) (map cf2trm0 trees)
+ where
+   cffun2trm (CFFun (fun,_)) = fun
+   mkApp = foldl C.EApp
+   mkAppAtom a = mkApp (C.EAtom a)
--- a/src-3.0/GF/UseGrammar/Randomized.hs
+++ b/src-3.0/GF/UseGrammar/Randomized.hs
@@ -0,0 +1,66 @@
+----------------------------------------------------------------------
+-- |
+-- Module      : Randomized
+-- Maintainer  : AR
+-- Stability   : (stable)
+-- Portability : (portable)
+--
+-- > CVS $Date: 2005/04/21 16:23:51 $ 
+-- > CVS $Author: bringert $
+-- > CVS $Revision: 1.8 $
+--
+-- random generation and refinement. AR 22\/8\/2001.
+-- implemented as sequence of refinement menu selecsions, encoded as integers
+-----------------------------------------------------------------------------
+
+module GF.UseGrammar.Randomized where
+
+import GF.Grammar.Abstract
+import GF.UseGrammar.Editing
+
+import GF.Data.Operations
+import GF.Data.Zipper
+
+--- import Arch (myStdGen) --- circular for hbc
+import System.Random --- (mkStdGen, StdGen, randoms) --- bad import for hbc
+
+-- random generation and refinement. AR 22/8/2001
+-- implemented as sequence of refinement menu selecsions, encoded as integers
+
+myStdGen :: Int -> StdGen
+myStdGen = mkStdGen ---
+
+-- | build one random tree; use mx to prevent infinite search
+mkRandomTree :: StdGen -> Int -> CGrammar -> Either Cat Fun -> Err Tree
+mkRandomTree gen mx gr cat = mkTreeFromInts (take mx (randoms gen)) gr cat
+
+refineRandom :: StdGen -> Int -> CGrammar -> Action
+refineRandom gen mx = mkStateFromInts $ take mx $ map abs (randoms gen)
+
+-- | build a tree from a list of integers
+mkTreeFromInts :: [Int] -> CGrammar -> Either Cat Fun -> Err Tree
+mkTreeFromInts ints gr catfun = do
+  st0   <- either (\cat -> newCat gr cat initState)
+                  (\fun -> newFun gr fun initState)
+                  catfun
+  state <- mkStateFromInts ints gr st0
+  return $ loc2tree state
+
+mkStateFromInts :: [Int] -> CGrammar -> Action
+mkStateFromInts ints gr z = mkRandomState ints z >>= reCheckState gr where
+  mkRandomState [] state = do
+    testErr (isCompleteState state) "not completed"
+    return state
+  mkRandomState (n:ns) state = do 
+    let refs  = refinementsState gr state
+        refs0 = map (not . snd . snd) refs
+    testErr (not (null refs0)) $ "no nonrecursive refinements available for" +++ 
+                                              prt (actVal state)
+    (ref,_) <- (refs !? (n `mod` (length refs)))
+    state1  <- refineWithAtom False gr ref state
+    if isCompleteState state1 
+       then return state1
+       else do 
+         state2  <- goNextMeta state1
+	 mkRandomState ns state2
+
--- a/src-3.0/GF/UseGrammar/Session.hs
+++ b/src-3.0/GF/UseGrammar/Session.hs
@@ -0,0 +1,181 @@
+----------------------------------------------------------------------
+-- |
+-- Module      : Session
+-- Maintainer  : AR
+-- Stability   : (stable)
+-- Portability : (portable)
+--
+-- > CVS $Date: 2005/08/17 15:13:55 $ 
+-- > CVS $Author: aarne $
+-- > CVS $Revision: 1.12 $
+--
+-- (Description of the module)
+-----------------------------------------------------------------------------
+
+module GF.UseGrammar.Session where
+
+import GF.Grammar.Abstract
+import GF.Infra.Option
+import GF.UseGrammar.Custom
+import GF.UseGrammar.Editing
+import GF.Compile.ShellState ---- grammar
+
+import GF.Data.Operations
+import GF.Data.Zipper (keepPosition) ---
+
+-- First version 8/2001. Adapted to GFC with modules 19/6/2003.
+-- Nothing had to be changed, which is a sign of good modularity.
+
+-- keep these abstract
+
+-- | 'Exp'-list: candidate refinements,clipboard
+type SState = [(State,([Exp],[Clip]),SInfo)] 
+
+-- | 'String' is message, 'Int' is the view
+type SInfo  = ([String],(Int,Options))       
+
+initSState :: SState
+initSState = [(initState, ([],[]), (["Select 'New' category to start"],(0,noOptions)))] 
+             -- instead of empty
+
+type Clip = Tree ---- (Exp,Type)
+
+-- | (peb): Something wrong with this definition??
+-- Shouldn't the result type be 'SInfo'?
+--
+-- > okInfo :: Int -> SInfo == ([String], (Int, Options))
+okInfo :: n -> ([s], (n, Bool))
+okInfo n = ([],(n,True))
+
+stateSState :: SState -> State
+candsSState :: SState -> [Exp]
+clipSState  :: SState -> [Clip]
+infoSState  :: SState -> SInfo
+msgSState   :: SState -> [String]
+viewSState  :: SState -> Int
+optsSState  :: SState -> Options
+
+stateSState ((s,_,_):_)     = s
+candsSState ((_,(ts,_),_):_)= ts
+clipSState  ((_,(_,ts),_):_)= ts
+infoSState  ((_,_,i):_)     = i
+msgSState   ((_,_,(m,_)):_) = m
+viewSState  ((_,_,(_,(v,_))):_) = v
+optsSState  ((_,_,(_,(_,o))):_) = o
+
+treeSState :: SState -> Tree
+treeSState = actTree . stateSState
+
+
+-- | from state to state
+type ECommand = SState -> SState
+
+-- * elementary commands
+
+-- ** change state, drop cands, drop message, preserve options
+
+changeState :: State -> ECommand
+changeState s ss = changeMsg [] $ (s,([],clipSState ss),infoSState ss) : ss 
+
+changeCands :: [Exp] -> ECommand
+changeCands ts ss@((s,(_,cb),(_,b)):_) = (s,(ts,cb),(candInfo ts,b)) : ss   
+
+addtoClip :: Clip -> ECommand
+addtoClip t ss@((s,(ts,cb),(i,b)):_) = (s,(ts,t:cb),(i,b)) : ss   
+
+removeClip :: Int -> ECommand
+removeClip n ss@((s,(ts,cb),(i,b)):_) = (s,(ts, drop n cb),(i,b)) : ss   
+
+changeMsg :: [String] -> ECommand
+changeMsg m ((s,ts,(_,b)):ss) = (s,ts,(m,b)) : ss   -- just change message
+changeMsg m _                 = (s,ts,(m,b)) : []  where [(s,ts,(_,b))] = initSState
+
+changeView :: ECommand
+changeView ((s,ts,(m,(v,b))):ss) = (s,ts,(m,(v+1,b))) : ss    -- toggle view
+
+withMsg :: [String] -> ECommand -> ECommand
+withMsg m c = changeMsg m . c
+
+changeStOptions :: (Options -> Options) -> ECommand
+changeStOptions f ((s,ts,(m,(v,o))):ss) = (s,ts,(m,(v, f o))) : ss
+
+noNeedForMsg :: ECommand
+noNeedForMsg = changeMsg [] -- everything's all right: no message
+
+candInfo :: [Exp] -> [String]
+candInfo ts = case length ts of
+  0 -> ["no acceptable alternative"]
+  1 -> ["just one acceptable alternative"]
+  n -> [show n +++ "alternatives to select"]
+
+-- * keep SState abstract from this on
+
+-- ** editing commands
+
+action2command :: Action -> ECommand
+action2command act state = case act (stateSState state) of 
+  Ok  s -> changeState s state 
+  Bad m -> changeMsg [m] state
+  
+action2commandNext :: Action -> ECommand -- move to next meta after execution
+action2commandNext act = action2command (\s -> act s >>= goNextMetaIfCan)
+
+action2commandKeep :: Action -> ECommand -- keep old position after execution
+action2commandKeep act = action2command (\s -> keepPosition act s)
+
+undoCommand :: Int -> ECommand
+undoCommand n ss =
+  let k = length ss in
+  if k < n 
+     then changeMsg ["cannot go all the way back"] [last ss]
+     else changeMsg ["successful undo"] (drop n ss)
+
+selectCand :: CGrammar -> Int -> ECommand
+selectCand gr i state = err (\m -> changeMsg [m] state) id $ do
+  exp <- candsSState state !? i
+  let s = stateSState state
+  tree <- annotateInState gr exp s
+  return $ case replaceSubTree tree s of
+      Ok st' -> changeState st' state
+      Bad s  -> changeMsg [s] state
+
+refineByExps :: Bool -> CGrammar -> [Exp] -> ECommand
+refineByExps der gr trees = case trees of
+  [t] -> action2commandNext (refineWithExpTC der gr t)
+  _  -> changeCands trees
+
+refineByTrees :: Bool -> CGrammar -> [Tree] -> ECommand
+refineByTrees der gr trees = case trees of
+  [t] -> action2commandNext (refineOrReplaceWithTree der gr t)
+  _  -> changeCands  $ map tree2exp trees
+
+replaceByTrees :: CGrammar -> [Exp] -> ECommand
+replaceByTrees gr trees = case trees of
+  [t] -> action2commandNext (\s -> 
+            annotateExpInState gr t s >>= flip replaceSubTree s)
+  _  -> changeCands trees
+
+replaceByEditCommand :: StateGrammar -> String -> ECommand
+replaceByEditCommand gr co = 
+  action2commandKeep $
+  maybe return ($ gr) $
+  lookupCustom customEditCommand (strCI co) 
+
+replaceByTermCommand :: Bool -> StateGrammar -> String -> Tree -> ECommand ----
+replaceByTermCommand der gr co exp = 
+  let g = grammar gr in
+    refineByTrees der g $ maybe [exp] (\f -> f gr exp) $
+      lookupCustom customTermCommand (strCI co)
+
+possClipsSState :: StateGrammar -> SState -> [(Int,Clip)]
+possClipsSState gr s = filter poss $ zip [0..] (clipSState s) 
+ where
+  poss = possibleTreeVal cgr st . snd
+  st   = stateSState s
+  cgr  = grammar gr
+
+getNumberedClip :: Int -> SState -> Err Clip
+getNumberedClip i s = if length cs > i then return (cs !! i) 
+                         else Bad "not enough clips"
+  where
+   cs = clipSState s
--- a/src-3.0/GF/UseGrammar/Statistics.hs
+++ b/src-3.0/GF/UseGrammar/Statistics.hs
@@ -0,0 +1,44 @@
+----------------------------------------------------------------------
+-- |
+-- Module      : Statistics
+-- Maintainer  : AR
+-- Stability   : (stable)
+-- Portability : (portable)
+--
+-- > CVS $Date: 2005/09/04 11:45:38 $ 
+-- > CVS $Author: aarne $
+-- > CVS $Revision: 1.1 $
+--
+-- statistics on canonical grammar: amounts of generated code
+-- AR 4\/9\/2005.
+-- uses canonical grammar
+-----------------------------------------------------------------------------
+
+module GF.UseGrammar.Statistics (prStatistics) where
+
+import GF.Infra.Modules
+import GF.Infra.Option
+import GF.Grammar.PrGrammar
+import GF.Canon.GFC
+import GF.Canon.MkGFC
+
+import GF.Data.Operations
+
+import Data.List (sortBy)
+
+-- | the top level function
+prStatistics :: CanonGrammar -> String
+prStatistics can = unlines $ [
+  show (length mods)  ++ "\t\t modules",
+  show chars ++ "\t\t gfc size",
+  "",
+  "Top 40 definitions"
+  ] ++
+  [show d ++ "\t\t " ++ f | (d,f) <- tops]
+ where
+   tops = take 40 $ reverse $ sortBy (\ (i,_) (j,_) -> compare i j) defs
+   defs = [(length (prt (info2def j)), name m j) | (m,j) <- infos]
+   infos = [(m,j) | (m,ModMod mo) <- mods, j <- tree2list (jments mo)]
+   name m (f,_) = prt m ++ "." ++ prt f
+   mods = modules can
+   chars = length $ prCanon can
--- a/src-3.0/GF/UseGrammar/Tokenize.hs
+++ b/src-3.0/GF/UseGrammar/Tokenize.hs
@@ -0,0 +1,222 @@
+----------------------------------------------------------------------
+-- |
+-- Module      : Tokenize
+-- Maintainer  : AR
+-- Stability   : (stable)
+-- Portability : (portable)
+--
+-- > CVS $Date: 2005/09/29 13:20:08 $ 
+-- > CVS $Author: aarne $
+-- > CVS $Revision: 1.14 $
+--
+-- lexers = tokenizers, to prepare input for GF grammars. AR 4\/1\/2002.
+-- an entry for each is included in 'Custom.customTokenizer'
+-----------------------------------------------------------------------------
+
+module GF.UseGrammar.Tokenize ( tokWords,
+		  tokLits,
+		  tokVars,
+		  lexHaskell,
+		  lexHaskellLiteral,
+		  lexHaskellVar,
+		  lexText,
+		  lexTextVar,
+		  lexC2M, lexC2M',
+		  lexTextLiteral,
+                  lexIgnore,
+                  wordsLits
+		) where
+
+import GF.Data.Operations
+---- import UseGrammar (isLiteral,identC)
+import GF.CF.CFIdent
+
+import Data.Char
+
+-- lexers = tokenizers, to prepare input for GF grammars. AR 4/1/2002
+-- an entry for each is included in Custom.customTokenizer
+
+-- | just words
+tokWords :: String -> [CFTok]
+tokWords = map tS . words
+
+tokLits :: String -> [CFTok]
+tokLits = map mkCFTok . mergeStr . wordsLits where
+  mergeStr ss = case ss of
+    w@(c:cs):rest | elem c "\'\"" && c /= last w -> getStr [w] rest
+    w      :rest                  -> w : mergeStr rest
+    [] -> []
+  getStr v ss = case ss of
+    w@(_:_):rest | elem (last w) "\'\"" -> (unwords (reverse (w:v))) : mergeStr rest
+    w      :rest                 -> getStr (w:v) rest
+    [] -> reverse v
+
+tokVars :: String -> [CFTok]
+tokVars = map mkCFTokVar . wordsLits
+
+isFloat s = case s of
+  c:cs | isDigit c -> isFloat cs
+  '.':cs@(_:_)     -> all isDigit cs
+  _ -> False
+
+isString s = case s of
+  c:cs@(_:_) -> (c == '\'' && d == '\'') || (c == '"' && d == '"') where d = last cs
+  _ -> False
+
+
+mkCFTok :: String -> CFTok
+mkCFTok s = case s of
+  '"' :cs@(_:_) | last cs == '"'  -> tL $ init cs
+  '\'':cs@(_:_) | last cs == '\'' -> tL $ init cs --- 's Gravenhage
+  _:_ | isFloat s -> tF s
+  _:_ | all isDigit s -> tI s
+  _ -> tS s
+
+mkCFTokVar :: String -> CFTok
+mkCFTokVar s = case s of
+  '?':_:_      -> tM s --- "?" --- compat with prCF
+  'x':'_':_    -> tV s
+  'x':[]       -> tV s
+  '$':xs@(_:_) -> if last s == '$' then tV (init xs) else tS s
+  _            -> tS s
+
+mkTokVars :: (String -> [CFTok]) -> String -> [CFTok]
+mkTokVars tok = map tv . tok where
+  tv (TS s) = mkCFTokVar s
+  tv t = t
+
+mkLit :: String -> CFTok
+mkLit s 
+  | isFloat s = tF s
+  | all isDigit s = tI s
+  | otherwise = tL s
+
+-- obsolete
+mkTL :: String -> CFTok
+mkTL s 
+  | isFloat s = tF s
+  | all isDigit s = tI s
+  | otherwise = tL ("'" ++ s ++ "'")
+
+
+-- | Haskell lexer, usable for much code
+lexHaskell :: String -> [CFTok]
+lexHaskell ss = case lex ss of
+  [(w@(_:_),ws)] -> tS w : lexHaskell ws
+  _ -> []
+
+-- | somewhat shaky text lexer
+lexText :: String -> [CFTok]
+lexText = uncap . lx where
+
+  lx s = case s of
+    '?':'?':cs          -> tS "??" : lx cs
+    p : cs | isMPunct p -> tS [p] : uncap (lx cs)
+    p : cs | isPunct p  -> tS [p] : lx cs
+    s : cs | isSpace s  -> lx cs
+    _ : _  -> getWord s
+    _ -> []
+
+  getWord s  = tS w : lx ws where (w,ws) = span isNotSpec s
+  isMPunct c = elem c ".!?"
+  isPunct c  = elem c ",:;()\""
+  isNotSpec c = not (isMPunct c || isPunct c || isSpace c)
+  uncap (TS (c:cs) : ws) = tC (c:cs) : ws
+  uncap s = s
+
+-- | lexer for C--, a mini variant of C
+lexC2M :: String -> [CFTok]
+lexC2M = lexC2M' False
+
+lexC2M' :: Bool -> String -> [CFTok]
+lexC2M' isHigherOrder s = case s of
+  '#':cs                 -> lexC $ dropWhile (/='\n') cs
+  '/':'*':cs             -> lexC $ dropComment cs
+  c:cs   | isSpace c     -> lexC cs
+  c:cs   | isAlpha c     -> getId s
+  c:cs   | isDigit c     -> getLit s
+  c:d:cs | isSymb [c,d]  -> tS [c,d] : lexC cs
+  c:cs   | isSymb [c]    -> tS [c]   : lexC cs
+  _                      -> [] --- covers end of file and unknown characters
+ where
+  lexC = lexC2M' isHigherOrder
+  getId s  = mkT i : lexC cs where (i,cs) = span isIdChar s
+  getLit s = tI i  : lexC cs where (i,cs) = span isDigit s ---- Float!
+  isIdChar c = isAlpha c || isDigit c || elem c "'_"
+  isSymb = reservedAnsiCSymbol 
+  dropComment s = case s of
+    '*':'/':cs -> cs
+    _:cs -> dropComment cs
+    _ -> []
+  mkT i = if (isRes i) then (tS i) else 
+            if isHigherOrder then (tV i) else (tL ("'" ++ i ++ "'"))
+  isRes = reservedAnsiC
+
+
+reservedAnsiCSymbol s = case lookupTree show s ansiCtree of
+  Ok True -> True 
+  _ -> False
+
+reservedAnsiC s = case lookupTree show s ansiCtree of
+  Ok False -> True 
+  _ -> False
+
+-- | for an efficient lexer: precompile this!
+ansiCtree = buildTree $ [(s,True)  | s <- reservedAnsiCSymbols] ++
+                        [(s,False) | s <- reservedAnsiCWords]
+
+reservedAnsiCSymbols = words $
+    "<<= >>= << >> ++ -- == <= >= *= += -= %= /= &= ^= |= " ++ 
+    "^ { } = , ; + * - ( ) < > & % ! ~"
+
+reservedAnsiCWords = words $ 
+    "auto break case char const continue default " ++ 
+    "do double else enum extern float for goto if int " ++
+    "long register return short signed sizeof static struct switch typedef " ++
+    "union unsigned void volatile while " ++
+    "main printin putchar"  --- these are not ansi-C
+
+-- | turn unknown tokens into string literals; not recursively for literals 123, 'foo'
+unknown2string :: (String -> Bool) -> [CFTok] -> [CFTok]
+unknown2string isKnown = map mkOne where
+  mkOne t@(TS s) 
+    | isKnown s = t
+    | isFloat s = tF s
+    | all isDigit s = tI s 
+    | otherwise = tL s
+  mkOne t@(TC s) = if isKnown s then t else mkLit s
+  mkOne t        = t
+
+unknown2var :: (String -> Bool) -> [CFTok] -> [CFTok]
+unknown2var isKnown = map mkOne where
+  mkOne t@(TS "??") = if isKnown "??" then t else tM "??"
+  mkOne t@(TS s) 
+    | isKnown s = t
+    | isFloat s = tF s
+    | isString s = tL (init (tail s))
+    | all isDigit s = tI s 
+    | otherwise = tV s
+  mkOne t@(TC s) = if isKnown s then t else tV s
+  mkOne t        = t
+
+lexTextLiteral, lexHaskellLiteral, lexHaskellVar :: (String -> Bool) -> String -> [CFTok]
+
+lexTextLiteral    isKnown = unknown2string (eitherUpper isKnown) . lexText
+lexHaskellLiteral isKnown = unknown2string isKnown . lexHaskell
+
+lexHaskellVar     isKnown = unknown2var isKnown . lexHaskell
+lexTextVar    isKnown = unknown2var (eitherUpper isKnown) . lexText
+
+
+eitherUpper isKnown w@(c:cs) = isKnown (toLower c : cs) || isKnown (toUpper c : cs)
+eitherUpper isKnown w = isKnown w
+
+-- ignore unknown tokens (e.g. keyword spotting)
+
+lexIgnore :: (String -> Bool) -> [CFTok] -> [CFTok]
+lexIgnore isKnown = concatMap mkOne where
+  mkOne t@(TS s) 
+    | isKnown s = [t]
+    | otherwise = []
+  mkOne t       = [t]
+
--- a/src-3.0/GF/UseGrammar/Transfer.hs
+++ b/src-3.0/GF/UseGrammar/Transfer.hs
@@ -0,0 +1,79 @@
+----------------------------------------------------------------------
+-- |
+-- Module      : Transfer
+-- Maintainer  : AR
+-- Stability   : (stable)
+-- Portability : (portable)
+--
+-- > CVS $Date: 2005/04/21 16:23:53 $ 
+-- > CVS $Author: bringert $
+-- > CVS $Revision: 1.5 $
+--
+-- linearize, parse, etc, by transfer. AR 9\/10\/2003
+-----------------------------------------------------------------------------
+
+module GF.UseGrammar.Transfer where
+
+import GF.Grammar.Grammar
+import GF.Grammar.Values
+import GF.Grammar.AbsCompute
+import qualified GF.Canon.GFC as GFC
+import GF.Grammar.LookAbs
+import GF.Grammar.MMacros
+import GF.Grammar.Macros
+import GF.Grammar.PrGrammar
+import GF.Grammar.TypeCheck
+
+import GF.Infra.Ident
+import GF.Data.Operations
+
+import qualified Transfer.Core.Abs as T
+
+import Control.Monad
+
+
+-- transfer is done in T.Exp - we only need these conversions.
+
+exp2core :: Ident -> Exp -> T.Exp
+exp2core f = T.EApp (T.EVar (var f)) . exp2c where
+ exp2c e = case e of
+  App f a -> T.EApp (exp2c f) (exp2c a)
+  Abs x b -> T.EAbs (T.PVVar (var x)) (exp2c b) ---- should be syntactic abstr
+  Q  _ c  -> T.EVar (var c)
+  QC _ c  -> T.EVar (var c)
+  K s     -> T.EStr s
+  EInt i  -> T.EInteger $ toInteger i
+  Meta m  -> T.EMeta (T.TMeta (prt m))   ---- which meta symbol?
+  Vr x    -> T.EVar (var x)              ---- should be syntactic var
+
+ var x = T.CIdent $ prt x
+
+core2exp :: T.Exp -> Exp
+core2exp e = case e of
+  T.EApp f a -> App (core2exp f) (core2exp a)
+  T.EAbs (T.PVVar x) b -> Abs (var x) (core2exp b) ---- only from syntactic abstr
+  T.EVar c   -> Vr (var c)               -- GF annotates to Q or QC
+  T.EStr s   -> K s
+  T.EInteger i -> EInt $ fromInteger i
+  T.EMeta _  -> uExp  -- meta symbol 0, refreshed by GF
+ where
+   var :: T.CIdent -> Ident
+   var (T.CIdent x) = zIdent x
+
+
+
+-- The following are now obsolete (30/11/2005)
+-- linearize, parse, etc, by transfer. AR 9/10/2003
+
+doTransfer :: GFC.CanonGrammar -> Ident -> Tree -> Err Tree
+doTransfer gr tra t = do
+  cat <- liftM snd $ val2cat $ valTree t
+  f   <- lookupTransfer gr tra cat
+  e   <- compute gr $ App f $ tree2exp t
+  annotate gr e
+
+useByTransfer :: (Tree -> Err a) -> GFC.CanonGrammar -> Ident -> (Tree -> Err a)
+useByTransfer lin gr tra t = doTransfer gr tra t >>= lin
+
+mkByTransfer :: (a -> Err [Tree]) -> GFC.CanonGrammar -> Ident -> (a -> Err [Tree])
+mkByTransfer parse gr tra s = parse s >>= mapM (doTransfer gr tra)
--- a/src-3.0/GF/UseGrammar/TreeSelections.hs
+++ b/src-3.0/GF/UseGrammar/TreeSelections.hs
@@ -0,0 +1,77 @@
+----------------------------------------------------------------------
+-- |
+-- Module      : TreeSelections
+-- Maintainer  : AR
+-- Stability   : (stable)
+-- Portability : (portable)
+--
+-- choose shallowest trees, and remove an overload resolution prefix
+-----------------------------------------------------------------------------
+
+module GF.UseGrammar.TreeSelections (
+
+  getOverloadResults, smallestTrs, sizeTr, depthTr
+
+  ) where
+
+import GF.Grammar.Abstract
+import GF.Grammar.Macros
+
+import GF.Data.Operations
+import GF.Data.Zipper
+import Data.List
+
+-- AR 2/7/2007
+-- The top-level function takes a set of trees (typically parses)
+-- and returns the list of those trees that have the minimum size.
+-- In addition, the overload prefix "ovrld123_", is removed
+-- from each constructor in which it appears. This is used for 
+-- showing the library API constructors in a parsable grammar.
+-- TODO: access the generic functions smallestTrs, sizeTr, depthTr from shell
+
+getOverloadResults :: [Tree] -> [Tree]
+getOverloadResults = smallestTrs sizeTr . map (mkOverload "ovrld") 
+
+-- NB: this does not always give the desired result, since
+-- some genuine alternatives may be deeper: now we will exclude the 
+-- latter of
+--
+--   mkCl this_NP love_V2 (mkNP that_NP here_Adv)
+--   mkCl this_NP (mkVP (mkVP love_V2 that_NP) here_Adv)
+--
+-- A perfect method would know the definitional equivalences of constructors.
+--
+-- Notice also that size is a better measure than depth, because:
+-- 1. Global depth does not exclude the latter of
+--
+--   mkCl (mkNP he_Pron) love_V2 that_NP
+--   mkCl (mkNP he_Pron) (mkVP love_V2 that_NP)
+--
+-- 2. Length is needed to exclude the latter of
+--
+--   mkS (mkCl (mkNP he_Pron) love_V2 that_NP)
+--   mkS presentTense (mkCl (mkNP he_Pron) love_V2 that_NP)
+--
+
+smallestTrs :: (Tr a -> Int) -> [Tr a] -> [Tr a]
+smallestTrs size ts = map fst $ filter ((==mx) . snd) tds where
+  tds = [(t, size t) | t <- ts]
+  mx = minimum $ map snd tds
+
+depthTr :: Tr a -> Int
+depthTr (Tr (_, ts)) = case ts of
+  [] -> 1
+  _ -> 1 + (maximum $ map depthTr ts)
+
+sizeTr :: Tr a -> Int
+sizeTr (Tr (_, ts)) = 1 + sum (map sizeTr ts)
+
+-- remove from each constant a prefix starting with "pref", up to first "_"
+-- example format: ovrld123_mkNP
+
+mkOverload :: String -> Tree -> Tree
+mkOverload pref = mapTr (changeAtom overAtom) where
+  overAtom a = case a of
+    AtC (m, IC f) | isPrefixOf pref f ->
+      AtC (m, IC (tail (dropWhile (/='_') f)))
+    _ -> a
--- a/src-3.0/GF/UseGrammar/Treebank.hs
+++ b/src-3.0/GF/UseGrammar/Treebank.hs
@@ -0,0 +1,251 @@
+----------------------------------------------------------------------
+-- |
+-- Module      : Treebank
+-- Maintainer  : AR
+-- Stability   : (stable)
+-- Portability : (portable)
+--
+-- Generate multilingual treebanks. AR 8\/2\/2006
+--
+-- (c) Aarne Ranta 2006 under GNU GPL
+--
+-- Purpose: to generate treebanks.
+-----------------------------------------------------------------------------
+
+module GF.UseGrammar.Treebank (
+  mkMultiTreebank,
+  mkUniTreebank,
+  multi2uniTreebank,
+  uni2multiTreebank,
+  testMultiTreebank,
+  treesTreebank,
+  getTreebank,
+  getUniTreebank,
+  readUniTreebanks,
+  readMultiTreebank,
+  lookupTreebank,
+  assocsTreebank,
+  isWordInTreebank,
+  printAssoc,
+  mkCompactTreebank
+  ) where
+
+import GF.Compile.ShellState 
+import GF.UseGrammar.Linear -- (linTree2string)
+import GF.UseGrammar.Custom
+import GF.UseGrammar.GetTree (string2tree)
+import GF.Grammar.TypeCheck (annotate)
+import GF.Canon.CMacros (noMark)
+import GF.Grammar.Grammar (Trm)
+import GF.Grammar.MMacros (exp2tree)
+import GF.Grammar.Macros (zIdent)
+import GF.Grammar.PrGrammar (prt_,prt)
+import GF.Grammar.Values (tree2exp)
+import GF.Data.Operations
+import GF.Infra.Option
+import GF.Infra.Ident (Ident)
+import GF.Infra.UseIO
+import qualified GF.Grammar.Abstract as A
+import qualified Data.Map as M
+import qualified Data.Set as S
+import qualified Data.List as L
+import Control.Monad (liftM)
+import System.FilePath
+
+-- Generate a treebank with a multilingual grammar. AR 8/2/2006
+-- (c) Aarne Ranta 2006 under GNU GPL
+
+-- keys  are trees; format: XML file
+type MultiTreebank = [(String,[(String,String)])]       -- tree,lang,lin
+
+-- keys are strings; format: string TAB tree TAB ... TAB tree
+type UniTreebank   = Treebank -- M.Map String [String]  -- string,tree
+
+-- both formats can be read from both kinds of files
+readUniTreebanks :: FilePath -> IO [(Ident,UniTreebank)]
+readUniTreebanks file = do
+  s <- readFileIf file
+  return $ if isMultiTreebank s 
+    then multi2uniTreebank $ getTreebank $ lines s
+    else
+      let tb = getUniTreebank $ lines s
+      in [(zIdent (dropExtension file),tb)]
+
+readMultiTreebank :: FilePath -> IO MultiTreebank
+readMultiTreebank file = do
+  s <- readFileIf file
+  return $ if isMultiTreebank s 
+    then getTreebank $ lines s
+    else uni2multiTreebank (zIdent (dropExtension file)) $ getUniTreebank $ lines s
+
+isMultiTreebank :: String -> Bool
+isMultiTreebank s = take 10 s == "<treebank>" 
+
+multi2uniTreebank :: MultiTreebank -> [(Ident,UniTreebank)]
+multi2uniTreebank mt@((_,lls):_) = [(zIdent la, mkTb la) | (la,_) <- lls] where
+  mkTb la = M.fromListWith (++) [(s,[t]) | (t,lls) <- mt, (l,s) <- lls, l==la]
+multi2uniTreebank [] = []
+
+uni2multiTreebank ::  Ident -> UniTreebank -> MultiTreebank
+uni2multiTreebank la tb = 
+  [(t,[(prt_ la, s)]) | (s,ts) <- assocsTreebank tb, t <- ts]
+
+-- | the main functions 
+
+-- builds a treebank where trees are the keys, and writes a file (opt. XML)
+mkMultiTreebank :: Options -> ShellState -> String -> [A.Tree] -> Res
+mkMultiTreebank opts sh com trees 
+  | oElem (iOpt "compact") opts = mkCompactTreebank opts sh trees
+mkMultiTreebank opts sh com trees = 
+ putInXML opts "treebank" comm (concatMap mkItem tris) where
+   mkItem(t,i)= putInXML opts "item" (cat i)  (mkTree t ++ concatMap (mkLin t) langs)
+--   mkItem(t,i)= putInXML opts "item" (cat i)  (mkTree t >>mapM_ (mkLin t) langs)
+   mkTree t   = putInXML opts "tree" []       (puts $ showTree t)
+   mkLin t lg = putInXML opts "lin" (lang lg) (puts $ linearize opts sh lg t)
+
+   langs   = [prt_ l | l <- allLanguages sh]
+   comm    = "" --- " command=" ++ show com +++ "abstract=" ++ show abstr
+   abstr   = "" --- "Abs" ----
+   cat i   = " number=" ++ show (show i) --- " cat=" ++ show "S" ----
+   lang lg = " lang=" ++ show (prt_ (zIdent lg))
+   tris    = zip trees [1..]
+
+-- builds a unilingual treebank where strings are the keys into an internal treebank
+
+mkUniTreebank :: Options -> ShellState -> Language -> [A.Tree] -> Treebank
+mkUniTreebank opts sh lg trees = M.fromListWith (++) [(lin t, [prt_ t]) | t <- trees]
+ where
+   lang  = prt_ lg 
+   lin t = linearize opts sh lang t
+
+-- reads a treebank and linearizes its trees again, printing all differences
+testMultiTreebank :: Options -> ShellState -> String -> Res
+testMultiTreebank opts sh = putInXML opts "testtreebank" [] . 
+                            concatMap testOne . 
+                            getTreebanks . lines
+ where
+  testOne (e,lang,str0) = do
+    let tr = annot gr e
+    let str = linearize opts sh lang tr
+    if str == str0 then ret else putInXML opts "diff" [] $ concat [
+      putInXML opts "tree" [] (puts $ showTree tr),
+      putInXML opts "old"  (" lang=" ++ show (prt_ (zIdent lang))) $ puts str0,
+      putInXML opts "new"  (" lang=" ++ show (prt_ (zIdent lang))) $ puts str
+      ]
+  gr = firstStateGrammar sh
+
+-- writes all the trees of the treebank
+treesTreebank :: Options -> String -> [String]
+treesTreebank _ = terms . getTreebank . lines where
+  terms ts = [t | (t,_) <- ts]
+
+-- string vs. IO
+type Res = [String] -- IO ()
+puts :: String -> Res
+puts = return  -- putStrLn
+ret = [] -- return ()
+--
+
+-- here strings are keys
+assocsTreebank :: UniTreebank -> [(String,[String])]
+assocsTreebank = M.assocs
+
+isWordInTreebank :: UniTreebank -> String -> Bool
+isWordInTreebank tb w = S.member w (S.fromList (concatMap words (M.keys tb)))
+
+printAssoc (s, ts) = s ++ concat ["\t" ++ t | t <- ts]
+
+getTreebanks :: [String] -> [(String,String,String)]
+getTreebanks = concatMap grps . getTreebank where
+  grps (t,lls) = [(t,x,y) | (x,y) <- lls]
+
+getTreebank :: [String] -> MultiTreebank
+getTreebank ll = case ll of
+  l:ls@(_:_:_) -> 
+    let (l1,l2)   = getItem ls
+        (tr,lins) = getTree l1
+        lglins    = getLins lins
+    in (tr,lglins) : getTreebank l2
+  _ -> []
+ where
+   getItem = span ((/="</item") . take 6)
+
+   getTree (_:ss) = 
+     let (t1,t2) = span ((/="</tree") . take 6) ss in (last t1, drop 1 t2)
+
+   getLins (beg:str:end:ss) = (getLang beg, str):getLins ss
+   getLins _ = []
+
+   getLang = takeWhile (/='"') . tail . dropWhile (/='"')
+
+getUniTreebank :: [String] -> UniTreebank
+getUniTreebank ls = M.fromListWith (++) [(s, ts) | s:ts <- map chop ls] where
+  chop = chunks '\t'
+
+lookupTreebank :: Treebank -> String -> [String]
+lookupTreebank tb s = maybe [] id $ M.lookup s tb 
+
+annot :: StateGrammar -> String -> A.Tree
+annot gr s = errVal (error "illegal tree") $ do
+  let t = tree2exp $ string2tree gr s
+  annotate (grammar gr) t
+
+putInXML :: Options -> String -> String -> Res -> Res
+putInXML opts tag attrs io = 
+  (ifXML $ puts $ tagXML $ tag ++ attrs) ++
+  io ++
+  (ifXML $ puts $ tagXML $ '/':tag)
+ where
+  ifXML c = if oElem showXML opts then c else []
+
+
+tagXML :: String -> String
+tagXML s = "<" ++ s ++ ">"
+
+-- print the treebank in a compact format:
+-- first a sorted list of all words, referrable by index
+-- then the linearization of each tree, as sequences of word indices
+-- this format is usable in embedded translation systems.
+
+mkCompactTreebank :: Options -> ShellState -> [A.Tree] -> [String]
+mkCompactTreebank opts sh = printCompactTreebank . mkJustMultiTreebank opts sh
+
+printCompactTreebank :: (MultiTreebank,[String]) -> [String]
+printCompactTreebank (tb,lgs) = (stat:langs:unwords ws : "\n" : linss) where
+  ws = L.sort $ L.nub $ concat $ map (concatMap (words . snd) . snd) tb
+
+  linss = map (unwords . pad) linss0
+  linss0 = map (map (show . encode) . words) allExs
+  allExs = concat [[snd (ls !! i) | (_,ls) <- tb] | i <- [0..length lgs - 1]]
+  encode w = maybe undefined id $ M.lookup w wmap
+  wmap = M.fromAscList $ zip ws [1..]
+  stat = unwords $ map show [length ws, length lgs, length tb, smax]
+  langs = unwords lgs
+  smax = maximum $ map length linss0
+  pad ws = ws ++ replicate (smax - length ws) "0"
+
+-- [(String,[(String,String)])]       -- tree,lang,lin
+mkJustMultiTreebank :: Options -> ShellState -> [A.Tree] -> (MultiTreebank,[String])
+mkJustMultiTreebank opts sh ts = 
+    ([(prt_ t, [(la, lin la t) | la <- langs]) | t <- ts],langs) where
+  langs = map prt_ $ allLanguages sh
+  lin = linearize opts sh
+
+
+--- these handy functions are borrowed from EmbedAPI
+
+linearize opts mgr lang = lin where
+  sgr   = stateGrammarOfLangOpt False mgr zlang
+  cgr   = canModules mgr
+  zlang = zIdent lang
+  untok = customOrDefault (addOptions opts (stateOptions sgr)) useUntokenizer customUntokenizer sgr
+  lin   
+    | oElem showRecord opts = err id id . liftM prt . linearizeNoMark cgr zlang
+    | oElem tableLin opts = 
+        err id id . liftM (unlines . map untok . prLinTable True) .  allLinTables True cgr zlang
+    | oElem showAll opts =  
+        err id id . liftM (unlines . map untok . prLinTable False) . allLinTables False cgr zlang
+
+    | otherwise = untok . linTree2string noMark cgr zlang 
+
+showTree t = prt_ $ tree2exp t