1
0
forked from GitHub/gf-core

GF/src is now for 2.9, and the new sources are in src-3.0 - keep it this way until the release of GF 3

This commit is contained in:
aarne
2008-05-21 09:26:44 +00:00
parent 915a1de717
commit 055c0d0d5a
536 changed files with 0 additions and 0 deletions

View File

@@ -0,0 +1,494 @@
----------------------------------------------------------------------
-- |
-- Module : Custom
-- Maintainer : AR
-- Stability : (stable)
-- Portability : (portable)
--
-- > CVS $Date: 2005/11/16 10:21:21 $
-- > CVS $Author: peb $
-- > CVS $Revision: 1.85 $
--
-- A database for customizable GF shell commands.
--
-- databases for customizable commands. AR 21\/11\/2001.
-- for: grammar parsers, grammar printers, term commands, string commands.
-- idea: items added here are usable throughout GF; nothing else need be edited.
-- they are often usable through the API: hence API cannot be imported here!
--
-- Major redesign 3\/4\/2002: the first entry in each database is DEFAULT.
-- If no other value is given, the default is selected.
-- Because of this, two invariants have to be preserved:
--
-- - no databases may be empty
--
-- - additions are made to the end of the database
-----------------------------------------------------------------------------
module GF.UseGrammar.Custom where
import GF.Data.Operations
import GF.Text.Text
import GF.UseGrammar.Tokenize
import GF.Grammar.Values
import qualified GF.Grammar.Grammar as G
import qualified GF.Canon.AbsGFC as A
import qualified GF.Canon.GFC as C
import qualified GF.Devel.GFCCtoJS as JS
import GF.Canon.CanonToGFCC
import qualified GF.Devel.GFCCtoHaskell as CCH
import qualified GF.Source.AbsGF as GF
import qualified GF.Grammar.MMacros as MM
import GF.Grammar.AbsCompute
import GF.Grammar.TypeCheck
import GF.UseGrammar.Generate
import GF.UseGrammar.MatchTerm
import GF.UseGrammar.Linear (unoptimizeCanon)
------import Compile
import GF.Compile.ShellState
import GF.UseGrammar.Editing
import GF.UseGrammar.Paraphrases
import GF.Infra.Option
import GF.CF.CF
import GF.CF.CFIdent
import GF.Canon.CanonToGrammar
import GF.CF.PPrCF
import GF.CF.PrLBNF
import GF.Grammar.PrGrammar
import GF.Compile.PrOld
import GF.Canon.MkGFC
import GF.Speech.PrGSL (gslPrinter)
import GF.Speech.PrJSGF (jsgfPrinter)
import GF.Speech.PrSRGS
import GF.Speech.PrSRGS_ABNF
import qualified GF.Speech.SISR as SISR
import GF.Speech.PrSLF
import GF.Speech.PrFA (faGraphvizPrinter,regularPrinter,faCPrinter)
import GF.Speech.PrRegExp (regexpPrinter,multiRegexpPrinter)
import GF.Speech.GrammarToVoiceXML (grammar2vxml)
import GF.Data.Zipper
import GF.UseGrammar.Statistics
import GF.UseGrammar.Morphology
import GF.UseGrammar.Information
import GF.API.GrammarToHaskell
import GF.API.GrammarToTransfer
-----import GrammarToCanon (showCanon, showCanonOpt)
-----import qualified GrammarToGFC as GFC
import GF.Probabilistic.Probabilistic (prProbs)
-- the cf parsing algorithms
import GF.CF.ChartParser -- OBSOLETE
import qualified GF.Parsing.CF as PCF
import qualified GF.OldParsing.ParseCF as PCFOld -- OBSOLETE
-- grammar conversions -- peb 19/4-04
-- see also customGrammarPrinter
import qualified GF.OldParsing.ConvertGrammar as CnvOld -- OBSOLETE
import qualified GF.Printing.PrintParser as PrtOld -- OBSOLETE
import qualified GF.Infra.Print as Prt
import qualified GF.Conversion.GFC as Cnv
import qualified GF.Conversion.Types as CnvTypes
import qualified GF.Conversion.Haskell as CnvHaskell
import qualified GF.Conversion.Prolog as CnvProlog
import qualified GF.Conversion.TypeGraph as CnvTypeGraph
import GF.Canon.Unparametrize
import GF.Canon.Subexpressions
import GF.Canon.AbsToBNF
import GF.Canon.GFC
import qualified GF.Canon.MkGFC as MC
import GF.CFGM.PrintCFGrammar (prCanonAsCFGM)
import GF.Visualization.VisualizeGrammar (visualizeCanonGrammar, visualizeSourceGrammar)
import GF.API.MyParser
import qualified GF.Infra.Modules as M
import GF.Infra.UseIO
import Control.Monad
import Data.Char
import Data.Maybe (fromMaybe)
-- character codings
import GF.Text.Unicode
import GF.Text.UTF8 (decodeUTF8)
import GF.Text.Greek (mkGreek)
import GF.Text.Arabic (mkArabic)
import GF.Text.Hebrew (mkHebrew)
import GF.Text.Russian (mkRussian, mkRusKOI8)
import GF.Text.Ethiopic (mkEthiopic)
import GF.Text.Tamil (mkTamil)
import GF.Text.OCSCyrillic (mkOCSCyrillic)
import GF.Text.LatinASupplement (mkLatinASupplement)
import GF.Text.Devanagari (mkDevanagari)
import GF.Text.Hiragana (mkJapanese)
import GF.Text.ExtendedArabic (mkArabic0600)
import GF.Text.ExtendedArabic (mkExtendedArabic)
import GF.Text.ExtraDiacritics (mkExtraDiacritics)
-- minimal version also used in Hugs. AR 2/12/2002.
-- databases for customizable commands. AR 21/11/2001
-- for: grammar parsers, grammar printers, term commands, string commands
-- idea: items added here are usable throughout GF; nothing else need be edited
-- they are often usable through the API: hence API cannot be imported here!
-- Major redesign 3/4/2002: the first entry in each database is DEFAULT.
-- If no other value is given, the default is selected.
-- Because of this, two invariants have to be preserved:
-- - no databases may be empty
-- - additions are made to the end of the database
-- * these are the databases; the comment gives the name of the flag
-- | grammarFormat, \"-format=x\" or file suffix
customGrammarParser :: CustomData (FilePath -> IOE C.CanonGrammar)
-- | grammarPrinter, \"-printer=x\"
customGrammarPrinter :: CustomData (Options -> StateGrammar -> String)
-- | multiGrammarPrinter, \"-printer=x\"
customMultiGrammarPrinter :: CustomData (Options -> CanonGrammar -> String)
-- | syntaxPrinter, \"-printer=x\"
customSyntaxPrinter :: CustomData (GF.Grammar -> String)
-- | termPrinter, \"-printer=x\"
customTermPrinter :: CustomData (StateGrammar -> Tree -> String)
-- | termCommand, \"-transform=x\"
customTermCommand :: CustomData (StateGrammar -> Tree -> [Tree])
-- | editCommand, \"-edit=x\"
customEditCommand :: CustomData (StateGrammar -> Action)
-- | filterString, \"-filter=x\"
customStringCommand :: CustomData (StateGrammar -> String -> String)
-- | useParser, \"-parser=x\"
customParser :: CustomData (StateGrammar -> CFCat -> CFParser)
-- | useTokenizer, \"-lexer=x\"
customTokenizer :: CustomData (StateGrammar -> String -> [[CFTok]])
-- | useUntokenizer, \"-unlexer=x\" --- should be from token list to string
customUntokenizer :: CustomData (StateGrammar -> String -> String)
-- | uniCoding, \"-coding=x\"
--
-- contains conversions from different codings to the internal
-- unicode coding
customUniCoding :: CustomData (String -> String)
-- | this is the way of selecting an item
customOrDefault :: Options -> OptFun -> CustomData a -> a
customOrDefault opts optfun db = maybe (defaultCustomVal db) id $
customAsOptVal opts optfun db
-- | to produce menus of custom operations
customInfo :: CustomData a -> (String, [String])
customInfo c = (titleCustomData c, map (ciStr . fst) (dbCustomData c))
-------------------------------
-- * types and stuff
type CommandId = String
strCI :: String -> CommandId
strCI = id
ciStr :: CommandId -> String
ciStr = id
ciOpt :: CommandId -> Option
ciOpt = iOpt
newtype CustomData a = CustomData (String, [(CommandId,a)])
customData :: String -> [(CommandId, a)] -> CustomData a
customData title db = CustomData (title,db)
dbCustomData :: CustomData a -> [(CommandId, a)]
dbCustomData (CustomData (_,db)) = db
titleCustomData :: CustomData a -> String
titleCustomData (CustomData (t,_)) = t
lookupCustom :: CustomData a -> CommandId -> Maybe a
lookupCustom = flip lookup . dbCustomData
customAsOptVal :: Options -> OptFun -> CustomData a -> Maybe a
customAsOptVal opts optfun db = do
arg <- getOptVal opts optfun
lookupCustom db (strCI arg)
-- | take the first entry from the database
defaultCustomVal :: CustomData a -> a
defaultCustomVal (CustomData (s,db)) =
ifNull (error ("empty database:" +++ s)) (snd . head) db
-------------------------------------------------------------------------
-- * and here's the customizable part:
-- grammar parsers: the ID is also used as file name suffix
customGrammarParser =
customData "Grammar parsers, selected by file name suffix" $
[
------ (strCI "gf", compileModule noOptions) -- DEFAULT
-- add your own grammar parsers here
]
customGrammarPrinter =
customData "Grammar printers, selected by option -printer=x" $
[
(strCI "gfc", \_ -> prCanon . stateGrammarST) -- DEFAULT
,(strCI "gf", \_ -> err id prGrammar . canon2sourceGrammar . stateGrammarST)
,(strCI "cf", \_ -> prCF . stateCF)
,(strCI "old", \_ -> printGrammarOld . stateGrammarST)
,(strCI "gsl", gslPrinter)
,(strCI "jsgf", jsgfPrinter Nothing)
,(strCI "jsgf_sisr_old", jsgfPrinter (Just SISR.SISROld))
,(strCI "srgs_xml", srgsXmlPrinter Nothing False)
,(strCI "srgs_xml_non_rec", srgsXmlNonRecursivePrinter)
,(strCI "srgs_xml_prob", srgsXmlPrinter Nothing True)
,(strCI "srgs_xml_sisr_old", srgsXmlPrinter (Just SISR.SISROld) False)
,(strCI "srgs_abnf", srgsAbnfPrinter Nothing False)
,(strCI "srgs_abnf_non_rec", srgsAbnfNonRecursivePrinter)
,(strCI "srgs_abnf_sisr_old", srgsAbnfPrinter (Just SISR.SISROld) False)
,(strCI "vxml", grammar2vxml)
,(strCI "slf", slfPrinter)
,(strCI "slf_graphviz", slfGraphvizPrinter)
,(strCI "slf_sub", slfSubPrinter)
,(strCI "slf_sub_graphviz", slfSubGraphvizPrinter)
,(strCI "fa_graphviz", faGraphvizPrinter)
,(strCI "fa_c", faCPrinter)
,(strCI "regexp", regexpPrinter)
,(strCI "regexps", multiRegexpPrinter)
,(strCI "regular", regularPrinter)
,(strCI "plbnf", \_ -> prLBNF True)
,(strCI "lbnf", \_ -> prLBNF False)
,(strCI "bnf", \_ -> prBNF False)
,(strCI "absbnf", \_ -> abstract2bnf . stateGrammarST)
,(strCI "haskell", \_ -> grammar2haskell . stateGrammarST)
,(strCI "gfcc_haskell", \opts -> CCH.grammar2haskell .
canon2gfcc opts . stateGrammarST)
,(strCI "haskell_gadt", \_ -> grammar2haskellGADT . stateGrammarST)
,(strCI "transfer", \_ -> grammar2transfer . stateGrammarST)
,(strCI "morpho", \_ -> prMorpho . stateMorpho)
,(strCI "fullform",\_ -> prFullForm . stateMorpho)
,(strCI "opts", \_ -> prOpts . stateOptions)
,(strCI "words", \_ -> unwords . stateGrammarWords)
,(strCI "printnames", \_ -> C.prPrintnamesGrammar . stateGrammarST)
,(strCI "stat", \_ -> prStatistics . stateGrammarST)
,(strCI "probs", \_ -> prProbs . stateProbs)
,(strCI "unpar", \_ -> prCanon . unparametrizeCanon . stateGrammarST)
,(strCI "subs", \_ -> prSubtermStat . stateGrammarST)
{- ----
(strCI "gf", prt . st2grammar . stateGrammarST) -- DEFAULT
,(strCI "canon", showCanon "Lang" . stateGrammarST)
,(strCI "gfc", GFC.showGFC . stateGrammarST)
,(strCI "canonOpt",showCanonOpt "Lang" . stateGrammarST)
-}
-- add your own grammar printers here
-- grammar conversions:
,(strCI "mcfg", \_ -> Prt.prt . stateMCFG)
,(strCI "fcfg", \_ -> Prt.prt . fst . stateFCFG)
,(strCI "cfg", \_ -> Prt.prt . stateCFG)
,(strCI "pinfo", \_ -> Prt.prt . statePInfo)
,(strCI "abstract", \_ -> Prt.prtAfter "\n" . Cnv.gfc2abstract . stateGrammarLang)
,(strCI "functiongraph",\_ -> CnvTypeGraph.prtFunctionGraph . Cnv.gfc2simple noOptions . stateGrammarLang)
,(strCI "typegraph", \_ -> CnvTypeGraph.prtTypeGraph . Cnv.gfc2simple noOptions . stateGrammarLang)
,(strCI "gfc-haskell", \_ -> CnvHaskell.prtSGrammar . uncurry Cnv.gfc2simple . stateGrammarLangOpts)
,(strCI "mcfg-haskell", \_ -> CnvHaskell.prtMGrammar . stateMCFG)
,(strCI "cfg-haskell", \_ -> CnvHaskell.prtCGrammar . stateCFG)
,(strCI "gfc-prolog", \_ -> CnvProlog.prtSGrammar . uncurry Cnv.gfc2simple . stateGrammarLangOpts)
,(strCI "mcfg-prolog", \_ -> CnvProlog.prtMGrammar . stateMCFG)
,(strCI "cfg-prolog", \_ -> CnvProlog.prtCGrammar . stateCFG)
-- obsolete, or only for testing:
,(strCI "abs-skvatt", \_ -> Cnv.abstract2skvatt . Cnv.gfc2abstract . stateGrammarLang)
,(strCI "cfg-skvatt", \_ -> Cnv.cfg2skvatt . stateCFG)
,(strCI "simple", \_ -> Prt.prt . uncurry Cnv.gfc2simple . stateGrammarLangOpts)
,(strCI "mcfg-erasing", \_ -> Prt.prt . fst . snd . uncurry Cnv.convertGFC . stateGrammarLangOpts)
-- ,(strCI "mcfg-old", PrtOld.prt . CnvOld.mcfg . statePInfoOld)
-- ,(strCI "cfg-old", PrtOld.prt . CnvOld.cfg . statePInfoOld)
]
where stateGrammarLangOpts s = (stateOptions s, stateGrammarLang s)
customMultiGrammarPrinter =
customData "Printers for multiple grammars, selected by option -printer=x" $
[
(strCI "gfcm", const MC.prCanon)
,(strCI "gfcc", canon2gfccPr)
,(strCI "js", \opts -> JS.gfcc2js . canon2gfcc opts)
,(strCI "header", const (MC.prCanonMGr . unoptimizeCanon))
,(strCI "cfgm", prCanonAsCFGM)
,(strCI "graph", visualizeCanonGrammar)
,(strCI "missing", const missingLinCanonGrammar)
-- to prolog format:
,(strCI "gfc-prolog", CnvProlog.prtSMulti)
,(strCI "mcfg-prolog", CnvProlog.prtMMulti)
,(strCI "cfg-prolog", CnvProlog.prtCMulti)
]
customSyntaxPrinter =
customData "Syntax printers, selected by option -printer=x" $
[
-- add your own grammar printers here
]
customTermPrinter =
customData "Term printers, selected by option -printer=x" $
[
(strCI "gf", const prt) -- DEFAULT
-- add your own term printers here
]
customTermCommand =
customData "Term transformers, selected by option -transform=x" $
[
(strCI "identity", \_ t -> [t]) -- DEFAULT
,(strCI "compute", \g t -> let gr = grammar g in
err (const [t]) return
(exp2termCommand gr (computeAbsTerm gr) t))
,(strCI "nodup", \_ t -> if (hasDupIdent $ tree2exp t) then [] else [t])
,(strCI "nodupatom", \_ t -> if (hasDupAtom $ tree2exp t) then [] else [t])
,(strCI "paraphrase", \g t -> let gr = grammar g in
exp2termlistCommand gr (mkParaphrases gr) t)
,(strCI "generate", \g t -> let gr = grammar g
cat = actCat $ tree2loc t --- not needed
in
[tr | t <- generateTrees noOptions gr cat 2 Nothing (Just t),
Ok tr <- [annotate gr $ MM.qualifTerm (absId g) t]])
,(strCI "typecheck", \g t -> err (const []) (return . loc2tree)
(reCheckStateReject (grammar g) (tree2loc t)))
,(strCI "solve", \g t -> err (const []) (return . loc2tree)
(solveAll (grammar g) (tree2loc t)
>>= rejectUnsolvable))
,(strCI "context", \g t -> err (const [t]) (return . loc2tree)
(contextRefinements (grammar g) (tree2loc t)))
,(strCI "reindex", \g t -> let gr = grammar g in
err (const [t]) return
(exp2termCommand gr (return . MM.reindexTerm) t))
--- ,(strCI "delete", \g t -> [MM.mExp0])
-- add your own term commands here
]
customEditCommand =
customData "Editor state transformers, selected by option -edit=x" $
[
(strCI "identity", const return) -- DEFAULT
,(strCI "typecheck", \g -> reCheckState (grammar g))
,(strCI "solve", \g -> solveAll (grammar g))
,(strCI "context", \g -> contextRefinements (grammar g))
,(strCI "compute", \g -> computeSubTree (grammar g))
,(strCI "paraphrase", const return) --- done ad hoc on top level
,(strCI "generate", const return) --- done ad hoc on top level
,(strCI "transfer", const return) --- done ad hoc on top level
-- add your own edit commands here
]
customStringCommand =
customData "String filters, selected by option -filter=x" $
[
(strCI "identity", const $ id) -- DEFAULT
,(strCI "erase", const $ const "")
,(strCI "take100", const $ take 100)
,(strCI "text", const $ formatAsText)
,(strCI "code", const $ formatAsCode)
---- ,(strCI "latexfile", const $ mkLatexFile)
,(strCI "length", const $ show . length)
-- add your own string commands here
]
customParser =
customData "Parsers, selected by option -parser=x" $
[
(strCI "chart", PCFOld.parse "ibn" . stateCF) -- DEPRECATED
,(strCI "bottomup", PCF.parse "gb" . stateCF)
,(strCI "topdown", PCF.parse "gt" . stateCF)
-- commented for now, since there's a bug in the incremental algorithm:
-- ,(strCI "incremental", PCF.parse "ib" . stateCF)
-- ,(strCI "incremental-bottomup", PCF.parse "ib" . stateCF)
-- ,(strCI "incremental-topdown", PCF.parse "it" . stateCF)
,(strCI "old", chartParser . stateCF) -- DEPRECATED
,(strCI "myparser", myParser)
-- add your own parsers here
]
customTokenizer =
let sg = singleton in
customData "Tokenizers, selected by option -lexer=x" $
[
(strCI "words", const $ sg . tokWords)
,(strCI "literals", const $ sg . tokLits)
,(strCI "vars", const $ sg . tokVars)
,(strCI "chars", const $ sg . map (tS . singleton))
,(strCI "code", const $ sg . lexHaskell)
,(strCI "codevars", \gr -> sg . (lexHaskellVar $ stateIsWord gr))
,(strCI "textvars", \gr -> sg . (lexTextVar $ stateIsWord gr))
,(strCI "text", const $ sg . lexText)
,(strCI "unglue", \gr -> sg . map tS . decomposeWords (stateMorpho gr))
,(strCI "codelit", \gr -> sg . (lexHaskellLiteral $ stateIsWord gr))
,(strCI "textlit", \gr -> sg . (lexTextLiteral $ stateIsWord gr))
,(strCI "codeC", const $ sg . lexC2M)
,(strCI "ignore", \gr -> sg . lexIgnore (stateIsWord gr) . tokLits)
,(strCI "subseqs", \gr -> subSequences . lexIgnore (stateIsWord gr) . tokLits)
,(strCI "codeCHigh", const $ sg . lexC2M' True)
-- add your own tokenizers here
]
customUntokenizer =
customData "Untokenizers, selected by option -unlexer=x" $
[
(strCI "unwords", const $ id) -- DEFAULT
,(strCI "text", const $ formatAsText)
,(strCI "html", const $ formatAsHTML)
,(strCI "latex", const $ formatAsLatex)
,(strCI "code", const $ formatAsCode)
,(strCI "concat", const $ filter (not . isSpace))
,(strCI "textlit", const $ formatAsTextLit)
,(strCI "codelit", const $ formatAsCodeLit)
,(strCI "concat", const $ concatRemSpace)
,(strCI "glue", const $ performBinds)
,(strCI "finnish", const $ performBindsFinnish)
,(strCI "reverse", const $ reverse)
,(strCI "bind", const $ performBinds) -- backward compat
-- add your own untokenizers here
]
customUniCoding =
customData "Alphabet codings, selected by option -coding=x" $
[
(strCI "latin1", id) -- DEFAULT
,(strCI "utf8", decodeUTF8)
,(strCI "greek", treat [] mkGreek)
,(strCI "hebrew", mkHebrew)
,(strCI "arabic", mkArabic)
,(strCI "russian", treat [] mkRussian)
,(strCI "russianKOI8", mkRusKOI8)
,(strCI "ethiopic", mkEthiopic)
,(strCI "tamil", mkTamil)
,(strCI "OCScyrillic", mkOCSCyrillic)
,(strCI "devanagari", mkDevanagari)
,(strCI "latinasupplement", mkLatinASupplement)
,(strCI "japanese", mkJapanese)
,(strCI "arabic0600", mkArabic0600)
,(strCI "extendedarabic", mkExtendedArabic)
,(strCI "extradiacritics", mkExtraDiacritics)
]

View File

@@ -0,0 +1,435 @@
----------------------------------------------------------------------
-- |
-- Module : Editing
-- Maintainer : AR
-- Stability : (stable)
-- Portability : (portable)
--
-- > CVS $Date: 2005/04/21 16:23:45 $
-- > CVS $Author: bringert $
-- > CVS $Revision: 1.14 $
--
-- generic tree editing, with some grammar notions assumed. AR 18\/8\/2001.
-- 19\/6\/2003 for GFC
-----------------------------------------------------------------------------
module GF.UseGrammar.Editing where
import GF.Grammar.Abstract
import qualified GF.Canon.GFC as GFC
import GF.Grammar.TypeCheck
import GF.Grammar.LookAbs
import GF.Grammar.AbsCompute
import GF.Grammar.Macros (errorCat)
import GF.Data.Operations
import GF.Data.Zipper
-- generic tree editing, with some grammar notions assumed. AR 18/8/2001
-- 19/6/2003 for GFC
type CGrammar = GFC.CanonGrammar
type State = Loc TrNode
-- | the "empty" state
initState :: State
initState = tree2loc uTree
isRootState :: State -> Bool
isRootState s = case actPath s of
Top -> True
_ -> False
actTree :: State -> Tree
actTree (Loc (t,_)) = t
actPath :: State -> Path TrNode
actPath (Loc (_,p)) = p
actVal :: State -> Val
actVal = valNode . nodeTree . actTree
actCat :: State -> Cat
actCat = errVal errorCat . val2cat . actVal ---- undef
actAtom :: State -> Atom
actAtom = atomTree . actTree
actFun :: State -> Err Fun
actFun s = case actAtom s of
AtC f -> return f
t -> prtBad "active atom: expected function, found" t
actExp :: State -> Exp
actExp = tree2exp . actTree
-- | current local bindings
actBinds :: State -> Binds
actBinds = bindsNode . nodeTree . actTree
-- | constraints in current subtree
actConstrs :: State -> Constraints
actConstrs = allConstrsTree . actTree
-- | constraints in the whole tree
allConstrs :: State -> Constraints
allConstrs = allConstrsTree . loc2tree
-- | metas in current subtree
actMetas :: State -> [Meta]
actMetas = metasTree . actTree
-- | metas in the whole tree
allMetas :: State -> [Meta]
allMetas = metasTree . loc2tree
actTreeBody :: State -> Tree
actTreeBody = bodyTree . actTree
allPrevBinds :: State -> Binds
allPrevBinds = concatMap bindsNode . traverseCollect . actPath
allBinds :: State -> Binds
allBinds s = actBinds s ++ allPrevBinds s
actGen :: State -> Int
actGen = length . allBinds -- symbol generator for VGen
allPrevVars :: State -> [Var]
allPrevVars = map fst . allPrevBinds
allVars :: State -> [Var]
allVars = map fst . allBinds
vGenIndex :: State -> Int
vGenIndex = length . allBinds
actIsMeta :: State -> Bool
actIsMeta = atomIsMeta . actAtom
actMeta :: State -> Err Meta
actMeta = getMetaAtom . actAtom
-- | meta substs are not only on the actual path...
entireMetaSubst :: State -> MetaSubst
entireMetaSubst = concatMap metaSubstsNode . scanTree . loc2tree
isCompleteTree :: Tree -> Bool
isCompleteTree = null . filter atomIsMeta . map atomNode . scanTree
isCompleteState :: State -> Bool
isCompleteState = isCompleteTree . loc2tree
initStateCat :: Context -> Cat -> Err State
initStateCat cont cat = do
return $ tree2loc (Tr (mkNode [] mAtom (cat2val cont cat) ([],[]), []))
-- | this function only concerns the body of an expression...
annotateInState :: CGrammar -> Exp -> State -> Err Tree
annotateInState gr exp state = do
let binds = allBinds state
val = actVal state
annotateIn gr binds exp (Just val)
-- | ...whereas this one works with lambda abstractions
annotateExpInState :: CGrammar -> Exp -> State -> Err Tree
annotateExpInState gr exp state = do
let cont = allPrevBinds state
binds = actBinds state
val = actVal state
typ <- mkProdVal binds val
annotateIn gr binds exp (Just typ)
treeByExp :: (Exp -> Err Exp) -> CGrammar -> Exp -> State -> Err Tree
treeByExp trans gr exp0 state = do
exp <- trans exp0
annotateExpInState gr exp state
-- * actions
type Action = State -> Err State
newCat :: CGrammar -> Cat -> Action
newCat gr cat@(m,c) _ = do
cont <- lookupCatContext gr m c
testErr (null cont) "start cat must have null context" -- for easier meta refresh
initStateCat cont cat
newFun :: CGrammar -> Fun -> Action
newFun gr fun@(m,c) _ = do
typ <- lookupFunType gr m c
cat <- valCat typ
st1 <- newCat gr cat initState
refineWithAtom True gr (qq fun) st1
newTree :: Tree -> Action
newTree t _ = return $ tree2loc t
newExpTC :: CGrammar -> Exp -> Action
newExpTC gr t s = annotate gr (refreshMetas [] t) >>= flip newTree s
goNextMeta, goPrevMeta, goNextNewMeta, goPrevNewMeta, goNextMetaIfCan :: Action
goNextMeta = repeatUntilErr actIsMeta goAhead -- can be the location itself
goPrevMeta = repeatUntilErr actIsMeta goBack
goNextNewMeta s = goAhead s >>= goNextMeta -- always goes away from location
goPrevNewMeta s = goBack s >>= goPrevMeta
goNextMetaIfCan = actionIfPossible goNextMeta
actionIfPossible :: Action -> Action
actionIfPossible a s = return $ errVal s (a s)
goFirstMeta, goLastMeta :: Action
goFirstMeta s = goNextMeta $ goRoot s
goLastMeta s = goLast s >>= goPrevMeta
noMoreMetas :: State -> Bool
noMoreMetas = err (const True) (const False) . goNextMeta
replaceSubTree :: Tree -> Action
replaceSubTree tree state = changeLoc state tree
refineOrReplaceWithTree :: Bool -> CGrammar -> Tree -> Action
refineOrReplaceWithTree der gr tree state = case actMeta state of
Ok m -> refineWithTreeReal der gr tree m state
_ -> do
let tree1 = addBinds (actBinds state) $ tree
state' <- replaceSubTree tree1 state
reCheckState gr state'
refineWithTree :: Bool -> CGrammar -> Tree -> Action
refineWithTree der gr tree state = do
m <- errIn "move pointer to meta" $ actMeta state
refineWithTreeReal der gr tree m state
refineWithTreeReal :: Bool -> CGrammar -> Tree -> Meta -> Action
refineWithTreeReal der gr tree m state = do
state' <- replaceSubTree tree state
let cs0 = allConstrs state'
(cs,ms) = splitConstraints gr cs0
v = vClos $ tree2exp (bodyTree tree)
msubst = (m,v) : ms
metaSubstRefinements gr msubst $
mapLoc (reduceConstraintsNode gr . performMetaSubstNode msubst) state'
-- without dep. types, no constraints, no grammar needed - simply: do
-- testErr (actIsMeta state) "move pointer to meta"
-- replaceSubTree tree state
refineAllNodes :: Action -> Action
refineAllNodes act state = do
let estate0 = goFirstMeta state
case estate0 of
Bad _ -> return state
Ok state0 -> do
(state',n) <- tryRefine 0 state0
if n==0
then return state
else actionIfPossible goFirstMeta state'
where
tryRefine n state = err (const $ return (state,n)) return $ do
state' <- goNextMeta state
meta <- actMeta state'
case act state' of
Ok state2 -> tryRefine (n+1) state2
_ -> err (const $ return (state',n)) return $ do
state2 <- goNextNewMeta state'
tryRefine n state2
uniqueRefinements :: CGrammar -> Action
uniqueRefinements = refineAllNodes . uniqueRefine
metaSubstRefinements :: CGrammar -> MetaSubst -> Action
metaSubstRefinements gr = refineAllNodes . metaSubstRefine gr
contextRefinements :: CGrammar -> Action
contextRefinements gr = refineAllNodes contextRefine where
contextRefine state = case varRefinementsState state of
[(e,_)] -> refineWithAtom False gr e state
_ -> Bad "no unique refinement in context"
varRefinementsState state =
[r | r@(e,_) <- refinementsState gr state, isVariable e]
uniqueRefine :: CGrammar -> Action
uniqueRefine gr state = case refinementsState gr state of
[(e,(_,True))] -> Bad "only circular refinement"
[(e,_)] -> refineWithAtom False gr e state
_ -> Bad "no unique refinement"
metaSubstRefine :: CGrammar -> MetaSubst -> Action
metaSubstRefine gr msubst state = do
m <- errIn "move pointer to meta" $ actMeta state
case lookup m msubst of
Just v -> do
e <- val2expSafe v
refineWithExpTC False gr e state
_ -> Bad "no metavariable substitution available"
refineWithExpTC :: Bool -> CGrammar -> Exp -> Action
refineWithExpTC der gr exp0 state = do
let oldmetas = allMetas state
exp = refreshMetas oldmetas exp0
tree0 <- annotateInState gr exp state
let tree = addBinds (actBinds state) $ tree0
refineWithTree der gr tree state
refineWithAtom :: Bool -> CGrammar -> Ref -> Action -- function or variable
refineWithAtom der gr at state = do
val <- lookupRef gr (allBinds state) at
typ <- val2exp val
let oldvars = allVars state
exp <- ref2exp oldvars typ at
refineWithExpTC der gr exp state
-- | in this command, we know that the result is well-typed, since computation
-- rules have been type checked and the result is equal
computeSubTree :: CGrammar -> Action
computeSubTree gr state = do
let exp = tree2exp (actTree state)
tree <- treeByExp (compute gr) gr exp state
replaceSubTree tree state
-- | but here we don't, since the transfer flag isn't type checked,
-- and computing the transfer function is not checked to preserve equality
transferSubTree :: Maybe Fun -> CGrammar -> Action
transferSubTree Nothing _ s = return s
transferSubTree (Just fun) gr state = do
let exp = mkApp (qq fun) [tree2exp $ actTree state]
tree <- treeByExp (compute gr) gr exp state
state' <- replaceSubTree tree state
reCheckState gr state'
deleteSubTree :: CGrammar -> Action
deleteSubTree gr state =
if isRootState state
then do
let cat = actCat state
newCat gr cat state
else do
let metas = allMetas state
binds = actBinds state
exp = refreshMetas metas mExp0
tree <- annotateInState gr exp state
state' <- replaceSubTree (addBinds binds tree) state
reCheckState gr state' --- must be unfortunately done. 20/11/2001
wrapWithFun :: CGrammar -> (Fun,Int) -> Action
wrapWithFun gr (f@(m,c),i) state = do
typ <- lookupFunType gr m c
let olds = allPrevVars state
oldmetas = allMetas state
exp0 <- fun2wrap olds ((f,i),typ) (tree2exp (actTreeBody state))
let exp = refreshMetas oldmetas exp0
tree0 <- annotateInState gr exp state
let tree = addBinds (actBinds state) $ tree0
state' <- replaceSubTree tree state
reCheckState gr state' --- must be unfortunately done. 20/11/2001
alphaConvert :: CGrammar -> (Var,Var) -> Action
alphaConvert gr (x,x') state = do
let oldvars = allPrevVars state
testErr (notElem x' oldvars) ("clash with previous bindings" +++ show x')
let binds0 = actBinds state
vars0 = map fst binds0
testErr (notElem x' vars0) ("clash with other bindings" +++ show x')
let binds = [(if z==x then x' else z, t) | (z,t) <- binds0]
vars = map fst binds
exp' <- alphaConv (vars ++ oldvars) (x,x') (tree2exp (actTreeBody state))
let exp = mkAbs vars exp'
tree <- annotateExpInState gr exp state
replaceSubTree tree state
changeFunHead :: CGrammar -> Fun -> Action
changeFunHead gr f state = do
let state' = changeNode (changeAtom (const (atomC f))) state
reCheckState gr state' --- must be done because of constraints elsewhere
peelFunHead :: CGrammar -> (Fun,Int) -> Action
peelFunHead gr (f@(m,c),i) state = do
tree0 <- nthSubtree i $ actTree state
let tree = addBinds (actBinds state) $ tree0
state' <- replaceSubTree tree state
reCheckState gr state' --- must be unfortunately done. 20/11/2001
-- | an expensive operation
reCheckState :: CGrammar -> State -> Err State
reCheckState gr st = annotate gr (tree2exp (loc2tree st)) >>= return . tree2loc
-- | a variant that returns Bad instead of a tree with unsolvable constraints
reCheckStateReject :: CGrammar -> State -> Err State
reCheckStateReject gr st = do
st' <- reCheckState gr st
rejectUnsolvable st'
rejectUnsolvable :: State -> Err State
rejectUnsolvable st = case (constrsNode $ nodeTree $ actTree st) of
[] -> return st
cs -> Bad $ "Unsolvable constraints:" +++ prConstraints cs
-- | extract metasubstitutions from constraints and solve them
solveAll :: CGrammar -> State -> Err State
solveAll gr st = solve st >>= solve where
solve st0 = do ---- why need twice?
st <- reCheckState gr st0
let cs0 = allConstrs st
(cs,ms) = splitConstraints gr cs0
metaSubstRefinements gr ms $
mapLoc (reduceConstraintsNode gr . performMetaSubstNode ms) st
-- * active refinements
refinementsState :: CGrammar -> State -> [(Term,(Val,Bool))]
refinementsState gr state =
let filt = possibleRefVal gr state in
if actIsMeta state
then refsForType filt gr (allBinds state) (actVal state)
else []
wrappingsState :: CGrammar -> State -> [((Fun,Int),Type)]
wrappingsState gr state
| actIsMeta state = []
| isRootState state = funs
| otherwise = [rule | rule@(_,typ) <- funs, possibleRefVal gr state aval typ]
where
funs = funsOnType (possibleRefVal gr state) gr aval
aval = actVal state
peelingsState :: CGrammar -> State -> [(Fun,Int)]
peelingsState gr state
| actIsMeta state = []
| isRootState state =
err (const []) (\f -> [(f,i) | i <- [0 .. arityTree tree - 1]]) $ actFun state
| otherwise =
err (const [])
(\f -> [fi | (fi@(g,_),typ) <- funs,
possibleRefVal gr state aval typ,g==f]) $ actFun state
where
funs = funsOnType (possibleRefVal gr state) gr aval
aval = actVal state
tree = actTree state
headChangesState :: CGrammar -> State -> [Fun]
headChangesState gr state = errVal [] $ do
f@(m,c) <- funAtom (actAtom state)
typ0 <- lookupFunType gr m c
return [fun | (fun,typ) <- funRulesOf gr, fun /= f, typ == typ0]
--- alpha-conv !
possibleRefVal :: CGrammar -> State -> Val -> Type -> Bool
possibleRefVal gr state val typ = errVal True $ do --- was False
vtyp <- valType typ
let gen = actGen state
cs <- return [(val, vClos vtyp)] --- eqVal gen val (vClos vtyp) --- only poss cs
return $ possibleConstraints gr cs --- a simple heuristic
possibleTreeVal :: CGrammar -> State -> Tree -> Bool
possibleTreeVal gr state tree = errVal True $ do --- was False
let aval = actVal state
let gval = valTree tree
let gen = actGen state
cs <- return [(aval, gval)] --- eqVal gen val (vClos vtyp) --- only poss cs
return $ possibleConstraints gr cs --- a simple heuristic

View File

@@ -0,0 +1,116 @@
----------------------------------------------------------------------
-- |
-- Module : Generate
-- Maintainer : AR
-- Stability : (stable)
-- Portability : (portable)
--
-- > CVS $Date: 2005/10/12 12:38:30 $
-- > CVS $Author: aarne $
-- > CVS $Revision: 1.16 $
--
-- Generate all trees of given category and depth. AR 30\/4\/2004
--
-- (c) Aarne Ranta 2004 under GNU GPL
--
-- Purpose: to generate corpora. We use simple types and don't
-- guarantee the correctness of bindings\/dependences.
-----------------------------------------------------------------------------
module GF.UseGrammar.Generate (generateTrees,generateAll) where
import GF.Canon.GFC
import GF.Grammar.LookAbs
import GF.Grammar.PrGrammar
import GF.Grammar.Macros
import GF.Grammar.Values
import GF.Grammar.Grammar (Cat)
import GF.Grammar.SGrammar
import GF.Data.Operations
import GF.Data.Zipper
import GF.Infra.Option
import Data.List
-- Generate all trees of given category and depth. AR 30/4/2004
-- (c) Aarne Ranta 2004 under GNU GPL
--
-- Purpose: to generate corpora. We use simple types and don't
-- guarantee the correctness of bindings/dependences.
-- | the main function takes an abstract syntax and returns a list of trees
generateTrees ::
Options -> GFCGrammar -> Cat -> Int -> Maybe Int -> Maybe Tree -> [Exp]
generateTrees opts gr cat n mn mt = map str2tr $ generate gr' opts cat' n mn mt'
where
gr' = gr2sgr opts emptyProbs gr
cat' = prt $ snd cat
mt' = maybe Nothing (return . tr2str) mt
--- ifm = oElem withMetas opts
ifm = oElem showOld opts
generateAll :: Options -> (Exp -> IO ()) -> GFCGrammar -> Cat -> IO ()
generateAll opts io gr cat = mapM_ (io . str2tr) $ num $ gen cat'
where
num = optIntOrAll opts flagNumber
gr' = gr2sgr opts emptyProbs gr
cat' = prt $ snd cat
gen c = generate gr' opts c 10 Nothing Nothing
------------------------------------------
-- do the main thing with a simpler data structure
-- the first Int gives tree depth, the second constrains subtrees
-- chosen for each branch. A small number, such as 2, is a good choice
-- if the depth is large (more than 3)
-- If a tree is given as argument, generation concerns its metavariables.
generate :: SGrammar -> Options -> SCat -> Int -> Maybe Int -> Maybe STree -> [STree]
generate gr opts cat i mn mt = case mt of
Nothing -> gen opts cat
Just t -> genM t
where
--- now use ifm to choose between two algorithms
gen opts cat
| oElem (iOpt "mem") opts = concat $ errVal [] $ lookupTree id cat $ allTrees -- -old
| oElem (iOpt "nonub") opts = concatMap (\i -> gener i cat) [0..i-1] -- some duplicates
| otherwise = nub $ concatMap (\i -> gener i cat) [0..i-1] -- new
gener 0 c = [SApp (f, []) | (f,([],_)) <- funs c]
gener i c = [
tr |
(f,(cs,_)) <- funs c,
let alts = map (gener (i-1)) cs,
ts <- combinations alts,
let tr = SApp (f, ts)
-- depth tr >= i -- NO!
]
allTrees = genAll i
-- dynamic generation
genAll :: Int -> BinTree SCat [[STree]]
genAll i = iter i genNext (mapTree (\ (c,_) -> (c,[[]])) gr)
iter 0 f tr = tr
iter n f tr = iter (n-1) f (f tr)
genNext tr = mapTree (genNew tr) tr
genNew tr (cat,ts) = let size = length ts in
(cat, [SApp (f, xs) |
(f,(cs,_)) <- funs cat,
xs <- combinations (map look cs),
let fxs = SApp (f, xs),
depth fxs == size]
: ts)
where
look c = concat $ errVal [] $ lookupTree id c tr
funs cat = maybe id take mn $ errVal [] $ lookupTree id cat gr
genM t = case t of
SApp (f,ts) -> [SApp (f,ts') | ts' <- combinations (map genM ts)]
SMeta k -> gen opts k
_ -> [t]

View File

@@ -0,0 +1,74 @@
----------------------------------------------------------------------
-- |
-- Module : GetTree
-- Maintainer : AR
-- Stability : (stable)
-- Portability : (portable)
--
-- > CVS $Date: 2005/09/15 16:22:02 $
-- > CVS $Author: aarne $
-- > CVS $Revision: 1.9 $
--
-- how to form linearizable trees from strings and from terms of different levels
--
-- 'String' --> raw 'Term' --> annot, qualif 'Term' --> 'Tree'
-----------------------------------------------------------------------------
module GF.UseGrammar.GetTree where
import GF.Canon.GFC
import GF.Grammar.Values
import qualified GF.Grammar.Grammar as G
import GF.Infra.Ident
import GF.Grammar.MMacros
import GF.Grammar.Macros
import GF.Compile.Rename
import GF.Grammar.TypeCheck
import GF.Grammar.AbsCompute (beta)
import GF.Compile.PGrammar
import GF.Compile.ShellState
import GF.Data.Operations
import Data.Char
-- how to form linearizable trees from strings and from terms of different levels
--
-- String --> raw Term --> annot, qualif Term --> Tree
string2tree :: StateGrammar -> String -> Tree
string2tree gr = errVal uTree . string2treeErr gr
string2treeErr :: StateGrammar -> String -> Err Tree
string2treeErr _ "" = Bad "empty string"
string2treeErr gr s = do
t <- pTerm s
let t0 = beta [] t
let t1 = refreshMetas [] t0
let t2 = qualifTerm abstr t1
annotate grc t2
where
abstr = absId gr
grc = grammar gr
string2Cat, string2Fun :: StateGrammar -> String -> (Ident,Ident)
string2Cat gr c = (absId gr,identC c)
string2Fun = string2Cat
strings2Cat, strings2Fun :: String -> (Ident,Ident)
strings2Cat s = (identC m, identC (drop 1 c)) where (m,c) = span (/= '.') s
strings2Fun = strings2Cat
string2ref :: StateGrammar -> String -> Err G.Term
string2ref gr s = case s of
'x':'_':ds -> return $ freshAsTerm ds --- hack for generated vars
'"':_:_ -> return $ G.K $ init $ tail s
_:_ | all isDigit s -> return $ G.EInt $ read s
_ | elem '.' s -> return $ uncurry G.Q $ strings2Fun s
_ -> return $ G.Vr $ identC s
string2cat :: StateGrammar -> String -> Err G.Cat
string2cat gr s =
if elem '.' s
then return $ strings2Fun s
else return $ curry id (absId gr) (identC s)

View File

@@ -0,0 +1,162 @@
----------------------------------------------------------------------
-- |
-- Module : Information
-- Maintainer : AR
-- Stability : (stable)
-- Portability : (portable)
--
-- > CVS $Date: 2005/10/05 20:02:20 $
-- > CVS $Author: aarne $
-- > CVS $Revision: 1.7 $
--
-- information on module, category, function, operation, parameter,...
-- AR 16\/9\/2003.
-- uses source grammar
-----------------------------------------------------------------------------
module GF.UseGrammar.Information (
showInformation,
missingLinCanonGrammar
) where
import GF.Grammar.Grammar
import GF.Infra.Ident
import GF.Infra.Modules
import GF.Infra.Option
import GF.CF.CF
import GF.CF.PPrCF
import GF.Compile.ShellState
import GF.Grammar.PrGrammar
import GF.Grammar.Lookup
import GF.Grammar.Macros (zIdent)
import qualified GF.Canon.GFC as GFC
import qualified GF.Canon.AbsGFC as AbsGFC
import GF.Data.Operations
import GF.Infra.UseIO
-- information on module, category, function, operation, parameter,... AR 16/9/2003
-- uses source grammar
-- | the top level function
showInformation :: Options -> ShellState -> Ident -> IOE ()
showInformation opts st c = do
is <- ioeErr $ getInformation opts st c
if null is
then putStrLnE "Identifier not in scope"
else mapM_ (putStrLnE . prInformationM c) is
where
prInformationM c (i,m) = prInformation opts c i ++ "file:" +++ m ++ "\n"
-- | the data type of different kinds of information
data Information =
IModAbs SourceAbs
| IModRes SourceRes
| IModCnc SourceCnc
| IModule SourceAbs -- ^ to be deprecated
| ICatAbs Ident Context [Ident]
| ICatCnc Ident Type [CFRule] Term
| IFunAbs Ident Type (Maybe Term)
| IFunCnc Ident Type [CFRule] Term
| IOper Ident Type Term
| IParam Ident [Param] [Term]
| IValue Ident Type
type CatId = AbsGFC.CIdent
type FunId = AbsGFC.CIdent
prInformation :: Options -> Ident -> Information -> String
prInformation opts c i = unlines $ prt c : case i of
IModule m -> [
"module of type" +++ show (mtype m),
"extends" +++ show (extends m),
"opens" +++ show (opens m),
"defines" +++ unwords (map prt (ownConstants (jments m)))
]
ICatAbs m co _ -> [
"category in abstract module" +++ prt m,
if null co then "not a dependent type"
else "dependent type with context" +++ prContext co
]
ICatCnc m ty cfs tr -> [
"category in concrete module" +++ prt m,
"linearization type" +++ prt ty
]
IFunAbs m ty _ -> [
"function in abstract module" +++ prt m,
"type" +++ prt ty
]
IFunCnc m ty cfs tr -> [
"function in concrete module" +++ prt m,
"linearization" +++ prt tr
--- "linearization type" +++ prt ty
]
IOper m ty tr -> [
"operation in resource module" +++ prt m,
"type" +++ prt ty,
"definition" +++ prt tr
]
IParam m ty ts -> [
"parameter type in resource module" +++ prt m,
"constructors" +++ unwords (map prParam ty),
"values" +++ unwords (map prt ts)
]
IValue m ty -> [
"parameter constructor in resource module" +++ prt m,
"type" +++ show ty
]
-- | also finds out if an identifier is defined in many places
getInformation :: Options -> ShellState -> Ident -> Err [(Information,FilePath)]
getInformation opts st c = allChecks $ [
do
m <- lookupModule src c
case m of
ModMod mo -> returnm c $ IModule mo
_ -> prtBad "not a source module" c
] ++ map lookInSrc ss ++ map lookInCan cs
where
lookInSrc (i,m) = do
j <- lookupInfo m c
case j of
AbsCat (Yes co) _ -> returnm i $ ICatAbs i co [] ---
AbsFun (Yes ty) _ -> returnm i $ IFunAbs i ty Nothing ---
CncCat (Yes ty) _ _ -> do
---- let cat = ident2CFCat i c
---- rs <- concat [rs | (c,rs) <- cf, ]
returnm i $ ICatCnc i ty [] ty ---
CncFun _ (Yes tr) _ -> do
rs <- return []
returnm i $ IFunCnc i tr rs tr ---
ResOper (Yes ty) (Yes tr) -> returnm i $ IOper i ty tr
ResParam (Yes (ps,_)) -> do
ts <- allParamValues src (QC i c)
returnm i $ IParam i ps ts
ResValue (Yes (ty,_)) -> returnm i $ IValue i ty ---
_ -> prtBad "nothing available for" i
lookInCan (i,m) = do
Bad "nothing available yet in canonical"
returnm m i = return (i, pathOfModule st m)
src = srcModules st
can = canModules st
ss = [(i,m) | (i,ModMod m) <- modules src]
cs = [(i,m) | (i,ModMod m) <- modules can]
cf = concatMap ruleGroupsOfCF $ map snd $ cfs st
ownConstants :: BinTree Ident Info -> [Ident]
ownConstants = map fst . filter isOwn . tree2list where
isOwn (c,i) = case i of
AnyInd _ _ -> False
_ -> True
missingLinCanonGrammar :: GFC.CanonGrammar -> String
missingLinCanonGrammar cgr =
unlines $ concat [prt_ c : missing js | (c,js) <- concretes] where
missing js = map ((" " ++) . prt_) $ filter (not . flip isInBinTree js) abstract
abstract = err (const []) (map fst . tree2list . jments) $ lookupModMod cgr absId
absId = maybe (zIdent "") id $ greatestAbstract cgr
concretes = [(cnc,jments mo) |
cnc <- allConcretes cgr absId, Ok mo <- [lookupModMod cgr cnc]]

View File

@@ -0,0 +1,292 @@
----------------------------------------------------------------------
-- |
-- Module : Linear
-- Maintainer : AR
-- Stability : (stable)
-- Portability : (portable)
--
-- > CVS $Date: 2005/11/14 16:03:41 $
-- > CVS $Author: aarne $
-- > CVS $Revision: 1.19 $
--
-- Linearization for canonical GF. AR 7\/6\/2003
-----------------------------------------------------------------------------
module GF.UseGrammar.Linear where
import GF.Canon.GFC
import GF.Canon.AbsGFC
import qualified GF.Grammar.Abstract as A
import GF.Canon.MkGFC (rtQIdent) ----
import GF.Infra.Ident
import GF.Grammar.PrGrammar
import GF.Canon.CMacros
import GF.Canon.Look
import GF.Grammar.LookAbs
import GF.Grammar.MMacros
import GF.Grammar.TypeCheck (annotate) ----
import GF.Data.Str
import GF.Text.Text
----import TypeCheck -- to annotate
import GF.Data.Operations
import GF.Data.Zipper
import qualified GF.Infra.Modules as M
import Control.Monad
import Data.List (intersperse)
-- Linearization for canonical GF. AR 7/6/2003
-- | The worker function: linearize a Tree, return
-- a record. Possibly mark subtrees.
--
-- NB. Constants in trees are annotated by the name of the abstract module.
-- A concrete module name must be given to find (and choose) linearization rules.
--
-- - If no marking is wanted, 'noMark' :: 'Marker'.
--
-- - For xml marking, use 'markXML' :: 'Marker'
linearizeToRecord :: CanonGrammar -> Marker -> Ident -> A.Tree -> Err Term
linearizeToRecord gr mk m = lin [] where
lin ts t@(Tr (n,xs)) = errIn ("linearization of" +++ prt t) $ do
let binds = A.bindsNode n
at = A.atomNode n
fmk = markSubtree mk n ts (A.isFocusNode n)
c <- A.val2cat $ A.valNode n
xs' <- mapM (\ (i,x) -> lin (i:ts) x) $ zip [0..] xs
r <- case at of
A.AtC f -> lookf c t f >>= comp xs'
A.AtI i -> return $ recInt i
A.AtL s -> return $ recS $ tK $ prt at
A.AtF i -> return $ recS $ tK $ prt at
A.AtV x -> lookCat c >>= comp [tK (prt_ at)]
A.AtM m -> lookCat c >>= comp [tK (prt_ at)]
r' <- case r of -- to see stg in case the result is variants {}
FV [] -> lookCat c >>= comp [tK (prt_ t)]
_ -> return r
return $ fmk $ mkBinds binds r'
look = lookupLin gr . redirectIdent m . rtQIdent
comp = ccompute gr
mkBinds bs bdy = case bdy of
R fs -> R $ [Ass (LV i) (tK (prt t)) | (i,(t,_)) <- zip [0..] bs] ++ fs
FV rs -> FV $ map (mkBinds bs) rs
recS t = R [Ass (L (identC "s")) t] ----
recInt i = R [
Ass (L (identC "last")) (EInt (rem i 10)),
Ass (L (identC "s")) (tK $ show i),
Ass (L (identC "size")) (EInt (if i > 9 then 1 else 0))
]
lookCat = return . errVal defLindef . look
---- should always be given in the module
-- to show missing linearization as term
lookf c t f = case look f of
Ok h -> return h
_ -> lookCat c >>= comp [tK (prt_ t)]
-- | thus the special case:
linearizeNoMark :: CanonGrammar -> Ident -> A.Tree -> Err Term
linearizeNoMark gr = linearizeToRecord gr noMark
-- | expand tables in linearized term to full, normal-order tables
--
-- NB expand from inside-out so that values are not looked up in copies of branches
expandLinTables :: CanonGrammar -> Term -> Err Term
expandLinTables gr t = case t of
R rs -> liftM (R . map (uncurry Ass)) $ mapPairsM exp [(l,r) | Ass l r <- rs]
T ty rs -> do
rs' <- mapPairsM exp [(l,r) | Cas l r <- rs] -- expand from inside-out
let t' = T ty $ map (uncurry Cas) rs'
vs <- alls ty
ps <- mapM term2patt vs
ts' <- mapM (comp . S t') $ vs
return $ T ty [Cas [p] t | (p,t) <- zip ps ts']
V ty ts0 -> do
ts <- mapM exp ts0 -- expand from inside-out
vs <- alls ty
ps <- mapM term2patt vs
return $ T ty [Cas [p] t | (p,t) <- zip ps ts]
FV ts -> liftM FV $ mapM exp ts
_ -> composOp exp t
where
alls = allParamValues gr
exp = expandLinTables gr
comp = ccompute gr []
-- Do this for an entire grammar:
unoptimizeCanon :: CanonGrammar -> CanonGrammar
unoptimizeCanon g@(M.MGrammar ms) = M.MGrammar $ map (unoptimizeCanonMod g) ms
unoptimizeCanonMod :: CanonGrammar -> CanonModule -> CanonModule
unoptimizeCanonMod g = convMod where
convMod (m, M.ModMod (M.Module (M.MTConcrete a) x flags me os defs)) =
(m, M.ModMod (M.Module (M.MTConcrete a) x flags me os (mapTree convDef defs)))
convMod mm = mm
convDef (c,CncCat ty df pr) = (c,CncCat ty (convT df) (convT pr))
convDef (f,CncFun c xs li pr) = (f,CncFun c xs (convT li) (convT pr))
convDef cd = cd
convT = err error id . exp
-- a version of expandLinTables that does not destroy share optimization
exp t = case t of
R rs -> liftM (R . map (uncurry Ass)) $ mapPairsM exp [(l,r) | Ass l r <- rs]
T ty rs@[Cas [_] _] -> do
rs' <- mapPairsM exp [(l,r) | Cas l r <- rs] -- expand from inside-out
let t' = T ty $ map (uncurry Cas) rs'
vs <- alls ty
ps <- mapM term2patt vs
ts' <- mapM (comp . S t') $ vs
return $ T ty [Cas [p] t | (p,t) <- zip ps ts']
V ty ts0 -> do
ts <- mapM exp ts0 -- expand from inside-out
vs <- alls ty
ps <- mapM term2patt vs
return $ T ty [Cas [p] t | (p,t) <- zip ps ts]
FV ts -> liftM FV $ mapM exp ts
I _ -> comp t
_ -> composOp exp t
where
alls = allParamValues g
comp = ccompute g []
-- | from records, one can get to records of tables of strings
rec2strTables :: Term -> Err [[(Label,[([Patt],[Str])])]]
rec2strTables r = do
vs <- allLinValues r
mapM (mapPairsM (mapPairsM strsFromTerm)) vs
-- | from these tables, one may want to extract the ones for the "s" label
strTables2sTables :: [[(Label,[([Patt],[Str])])]] -> [[([Patt],[Str])]]
strTables2sTables ts = [t | r <- ts, (l,t) <- r, l == linLab0]
linLab0 :: Label
linLab0 = L (identC "s")
-- | to get lists of token lists is easy
sTables2strs :: [[([Patt],[Str])]] -> [[Str]]
sTables2strs = map snd . concat
-- | from this, to get a list of strings
strs2strings :: [[Str]] -> [String]
strs2strings = map unlex
-- | this is just unwords; use an unlexer from Text to postprocess
unlex :: [Str] -> String
unlex = concat . map sstr . take 1 ----
-- | finally, a top-level function to get a string from an expression
linTree2string :: Marker -> CanonGrammar -> Ident -> A.Tree -> String
linTree2string mk gr m e = head $ linTree2strings mk gr m e -- never empty
-- | you can also get many strings
linTree2strings :: Marker -> CanonGrammar -> Ident -> A.Tree -> [String]
linTree2strings mk gr m e = err return id $ do
t <- linearizeToRecord gr mk m e
r <- expandLinTables gr t
ts <- rec2strTables r
let ss = strs2strings $ sTables2strs $ strTables2sTables ts
ifNull (prtBad "empty linearization of" e) return ss -- thus never empty
-- | argument is a Tree, value is a list of strs; needed in Parsing
allLinsOfTree :: CanonGrammar -> Ident -> A.Tree -> [Str]
allLinsOfTree gr a e = err (singleton . str) id $ do
e' <- return e ---- annotateExp gr e
r <- linearizeNoMark gr a e'
r' <- expandLinTables gr r
ts <- rec2strTables r'
return $ concat $ sTables2strs $ strTables2sTables ts
-- | the value is a list of structures arranged as records of tables of terms
allLinsAsRec :: CanonGrammar -> Ident -> A.Tree -> Err [[(Label,[([Patt],Term)])]]
allLinsAsRec gr c t = linearizeNoMark gr c t >>= expandLinTables gr >>= allLinValues
-- | the value is a list of structures arranged as records of tables of strings
-- only taking into account string fields
-- True: sep. by /, False: sep by \n
allLinTables ::
Bool -> CanonGrammar ->Ident ->A.Tree ->Err [[(Label,[([Patt],[String])])]]
allLinTables slash gr c t = do
r' <- allLinsAsRec gr c t
mapM (mapM getS) r'
where
getS (lab,pss) = liftM (curry id lab) $ mapM gets pss
gets (ps,t) = liftM (curry id ps . cc . map str2strings) $ strsFromTerm t
cc = concat . intersperse [if slash then "/" else "\n"]
-- | the value is a list of strings gathered from all fields
allLinBranchFields :: CanonGrammar -> Ident -> A.Tree -> Err [String]
allLinBranchFields gr c trm = do
r <- linearizeNoMark gr c trm >>= expandLinTables gr
return [s | (_,t) <- allLinBranches r, s <- gets t]
where
gets t = concat [cc (map str2strings s) | Ok s <- [strsFromTerm t]]
cc = concat . intersperse ["/"]
prLinTable :: Bool -> [[(Label,[([Patt],[String])])]] -> [String]
prLinTable pars = concatMap prOne . concat where
prOne (lab,pss) = (if pars then ((prt lab) :) else id) (map pr pss) ----
pr (ps,ss) = (if pars then ((unwords (map prt_ ps) +++ ":") +++)
else id) (unwords ss)
{-
-- the value is a list of strs
allLinStrings :: CanonGrammar -> Tree -> [Str]
allLinStrings gr ft = case allLinsAsStrs gr ft of
Ok ts -> map snd $ concat $ map snd $ concat ts
Bad s -> [str s]
-- the value is a list of strs, not forgetting their arguments
allLinsAsStrs :: CanonGrammar -> Tree -> Err [[(Label,[([Patt],Str)])]]
allLinsAsStrs gr ft = do
lpts <- allLinearizations gr ft
return $ concat $ mapM (mapPairsM (mapPairsM strsFromTerm)) lpts
-- to a list of strings
linearizeToStrings :: CanonGrammar -> ([Int] ->Term -> Term) -> Tree -> Err [String]
linearizeToStrings gr mk = liftM (map unlex) . linearizeToStrss gr mk
-- to a list of token lists
linearizeToStrss :: CanonGrammar -> ([Int] -> Term -> Term) -> Tree -> Err [[Str]]
linearizeToStrss gr mk e = do
R rs <- linearizeToRecord gr mk e ----
t <- lookupErr linLab0 [(r,s) | Ass r s <- rs]
return $ map strsFromTerm $ allInTable t
-}
-- | the value is a list of strings, not forgetting their arguments
allLinsOfFun :: CanonGrammar -> CIdent -> Err [[(Label,[([Patt],Term)])]]
allLinsOfFun gr f = do
t <- lookupLin gr f
allAllLinValues t --- all fields, not only s. 11/12/2005
-- | returns printname if one exists; otherwise linearizes with metas
printOrLinearize :: CanonGrammar -> Ident -> A.Fun -> String
printOrLinearize gr c f@(m, d) = errVal (prt fq) $
case lookupPrintname gr (CIQ c d) of
Ok t -> do
ss <- strsFromTerm t
let s = strs2strings [ss]
return $ ifNull (prt fq) head s
_ -> do
ty <- lookupFunType gr m d
f' <- ref2exp [] ty (A.QC m d)
tr <- annotate gr f'
return $ linTree2string noMark gr c tr
where
fq = CIQ m d

View File

@@ -0,0 +1,50 @@
----------------------------------------------------------------------
-- |
-- Module : MatchTerm
-- Maintainer : AR
-- Stability : (stable)
-- Portability : (portable)
--
--
-- functions for matching with terms. AR 16/3/2006
-----------------------------------------------------------------------------
module GF.UseGrammar.MatchTerm where
import GF.Data.Operations
import GF.Data.Zipper
import GF.Grammar.Grammar
import GF.Grammar.PrGrammar
import GF.Infra.Ident
import GF.Grammar.Values
import GF.Grammar.Macros
import GF.Grammar.MMacros
import Control.Monad
import Data.List
-- test if a term has duplicated idents, either any or just atoms
hasDupIdent, hasDupAtom :: Exp -> Bool
hasDupIdent = (>1) . maximum . map length . group . sort . allConstants True
hasDupAtom = (>1) . maximum . map length . group . sort . allConstants False
-- test if a certain ident occurs in term
grepIdent :: Ident -> Exp -> Bool
grepIdent c = elem c . allConstants True
-- form the list of all constants, optionally ignoring all but atoms
allConstants :: Bool -> Exp -> [Ident]
allConstants alsoApp = err (const []) snd . flip appSTM [] . collect where
collect e = case e of
Q _ c -> add c e
QC _ c -> add c e
Cn c -> add c e
App f a | not alsoApp -> case f of
App g b -> collect b >> collect a
_ -> collect a
_ -> composOp collect e
add c e = updateSTM (c:) >> return e

View File

@@ -0,0 +1,140 @@
----------------------------------------------------------------------
-- |
-- Module : Morphology
-- Maintainer : AR
-- Stability : (stable)
-- Portability : (portable)
--
-- > CVS $Date: 2005/04/21 16:23:49 $
-- > CVS $Author: bringert $
-- > CVS $Revision: 1.8 $
--
-- Morphological analyser constructed from a GF grammar.
--
-- we first found the binary search tree sorted by word forms more efficient
-- than a trie, at least for grammars with 7000 word forms
-- (18\/11\/2003) but this may change since we have to use a trie
-- for decompositions and also want to use it in the parser
-----------------------------------------------------------------------------
module GF.UseGrammar.Morphology where
import GF.Canon.AbsGFC
import GF.Canon.GFC
import GF.Grammar.PrGrammar
import GF.Canon.CMacros
import GF.Canon.Look
import GF.Grammar.LookAbs
import GF.Infra.Ident
import qualified GF.Grammar.Macros as M
import GF.UseGrammar.Linear
import GF.Data.Operations
import GF.Data.Glue
import Data.Char
import Data.List (sortBy, intersperse)
import Control.Monad (liftM)
import GF.Data.Trie2
-- construct a morphological analyser from a GF grammar. AR 11/4/2001
-- we first found the binary search tree sorted by word forms more efficient
-- than a trie, at least for grammars with 7000 word forms
-- (18\/11\/2003) but this may change since we have to use a trie
-- for decompositions and also want to use it in the parser
type Morpho = Trie Char String
emptyMorpho :: Morpho
emptyMorpho = emptyTrie
appMorpho :: Morpho -> String -> (String,[String])
appMorpho = appMorphoOnly
---- add lookup for literals
-- without literals
appMorphoOnly :: Morpho -> String -> (String,[String])
appMorphoOnly m s = trieLookup m s
-- recognize word, exluding literals
isKnownWord :: Morpho -> String -> Bool
isKnownWord mo = not . null . snd . appMorphoOnly mo
mkMorpho :: CanonGrammar -> Ident -> Morpho
mkMorpho gr a = tcompile $ concatMap mkOne $ allItems where
comp = ccompute gr [] -- to undo 'values' optimization
mkOne (Left (fun,c)) = map (prOne fun c) $ allLins fun
mkOne (Right (fun,_)) = map (prSyn fun) $ allSyns fun
-- gather forms of lexical items
allLins fun@(m,f) = errVal [] $ do
ts <- lookupLin gr (CIQ a f) >>= comp >>= allAllLinValues
ss <- mapM (mapPairsM (mapPairsM (liftM wordsInTerm . comp))) ts
return [(p,s) | (p,fs) <- concat $ map snd $ concat ss, s <- fs]
prOne (_,f) c (ps,s) = (s, [prt f +++ tagPrt c +++ unwords (map prt_ ps)])
-- gather syncategorematic words
allSyns fun@(m,f) = errVal [] $ do
tss <- allLinsOfFun gr (CIQ a f)
let ss = [s | ts <- tss, (_,fs) <- ts, (_,s) <- fs]
return $ concat $ map wordsInTerm ss
prSyn f s = (s, ["+<syncategorematic>" ++ tagPrt f])
-- all words, Left from lexical rules and Right syncategorematic
allItems = [lexRole t (f,c) | (f,c,t) <- allFuns] where
allFuns = [(f,c,t) | (f,t) <- funRulesOf gr, Ok c <- [M.valCat t]]
lexRole t = case M.typeForm t of
Ok ([],_,_) -> Left
_ -> Right
-- printing full-form lexicon and results
prMorpho :: Morpho -> String
prMorpho = unlines . map prMorphoAnalysis . collapse
prMorphoAnalysis :: (String,[String]) -> String
prMorphoAnalysis (w,fs0) =
let fs = filter (not . null) fs0 in
if null fs then w ++++ "*" else unlines (w:fs)
prMorphoAnalysisShort :: (String,[String]) -> String
prMorphoAnalysisShort (w,fs) = prBracket (w' ++ prTList "/" fs) where
w' = if null fs then w +++ "*" else ""
tagPrt :: Print a => (a,a) -> String
tagPrt (m,c) = "+" ++ prt c --- module name
-- | print all words recognized
allMorphoWords :: Morpho -> [String]
allMorphoWords = map fst . collapse
-- analyse running text and show results either in short form or on separate lines
-- | analyse running text and show just the word, with "*" if not found
morphoTextStatus :: Morpho -> String -> String
morphoTextStatus mo = unlines . map (prMark . appMorpho mo) . words where
prMark (w,fs) = if null fs then "*" +++ w else w
-- | analyse running text and show results in short form, one word per line
morphoTextShort :: Morpho -> String -> String
morphoTextShort mo = unlines . map (prMorphoAnalysisShort . appMorpho mo) . words
-- | analyse running text and show results on separate lines
morphoText :: Morpho -> String -> String
morphoText mo = unlines . map (('\n':) . prMorphoAnalysis . appMorpho mo) . words
-- format used in the Italian Verb Engine
prFullForm :: Morpho -> String
prFullForm = unlines . map prOne . collapse where
prOne (s,ps) = s ++ " : " ++ unwords (intersperse "/" ps)
-- using Huet's unglueing method to find word boundaries
---- it would be much better to use a trie also for morphological analysis,
---- so this is for the sake of experiment
---- Moreover, we should specify the cases in which this happens - not all words
decomposeWords :: Morpho -> String -> [String]
decomposeWords mo s = errVal (words s) $ decomposeSimple mo s

View File

@@ -0,0 +1,70 @@
----------------------------------------------------------------------
-- |
-- Module : Paraphrases
-- Maintainer : AR
-- Stability : (stable)
-- Portability : (portable)
--
-- > CVS $Date: 2005/04/21 16:23:49 $
-- > CVS $Author: bringert $
-- > CVS $Revision: 1.6 $
--
-- paraphrases of GF terms. AR 6\/10\/1998 -- 24\/9\/1999 -- 5\/7\/2000 -- 5\/6\/2002
--
-- Copyright (c) Aarne Ranta 1998--99, under GNU General Public License (see GPL)
--
-- thus inherited from the old GF. Incomplete and inefficient...
-----------------------------------------------------------------------------
module GF.UseGrammar.Paraphrases (mkParaphrases) where
import GF.Grammar.Abstract
import GF.Grammar.PrGrammar
import GF.Grammar.LookAbs
import GF.Grammar.AbsCompute
import GF.Data.Operations
import Data.List (nub)
-- paraphrases of GF terms. AR 6/10/1998 -- 24/9/1999 -- 5/7/2000 -- 5/6/2002
-- Copyright (c) Aarne Ranta 1998--99, under GNU General Public License (see GPL)
-- thus inherited from the old GF. Incomplete and inefficient...
mkParaphrases :: GFCGrammar -> Term -> [Term]
mkParaphrases st = nub . map (beta []) . paraphrases (allDefs st)
type Definition = (Fun,Term)
paraphrases :: [Definition] -> Term -> [Term]
paraphrases th t =
paraImmed th t ++
--- paraMatch th t ++
case t of
App c a -> [App d b | d <- paraphrases th c, b <- paraphrases th a]
Abs x b -> [Abs x d | d <- paraphrases th b]
c -> []
++ [t]
paraImmed :: [Definition] -> Term -> [Term]
paraImmed defs t =
[Q m f | ((m,f), u) <- defs, t == u] ++ --- eqTerm
case t of
---- Cn c -> [u | (f, u) <- defs, eqStrIdent f c]
_ -> []
{- ---
paraMatch :: [Definition] -> Trm -> [Trm]
paraMatch th@defs t =
[mkApp (Cn f) xx | (PC f zz, u) <- defs,
let (fs,sn) = fullApp u, fs == h, length sn == length zz] ++
case findAMatch defs t of
Ok (g,b) -> [substTerm [] g b]
_ -> []
where
(h,xx) = fullApp t
fullApp c = case c of
App f a -> (f', a' ++ [a]) where (f',a') = fullApp f
c -> (c,[])
-}

View File

@@ -0,0 +1,177 @@
----------------------------------------------------------------------
-- |
-- Module : Parsing
-- Maintainer : AR
-- Stability : (stable)
-- Portability : (portable)
--
-- > CVS $Date: 2005/06/02 10:23:52 $
-- > CVS $Author: aarne $
-- > CVS $Revision: 1.25 $
--
-- (Description of the module)
-----------------------------------------------------------------------------
module GF.UseGrammar.Parsing where
import GF.Infra.CheckM
import qualified GF.Canon.AbsGFC as C
import GF.Canon.GFC
import GF.Canon.MkGFC (trExp) ----
import GF.Canon.CMacros
import GF.Grammar.MMacros (refreshMetas)
import GF.UseGrammar.Linear
import GF.Data.Str
import GF.CF.CF
import GF.CF.CFIdent
import GF.Infra.Ident
import GF.Grammar.TypeCheck
import GF.Grammar.Values
--import CFMethod
import GF.UseGrammar.Tokenize
import GF.UseGrammar.Morphology (isKnownWord)
import GF.CF.Profile
import GF.Infra.Option
import GF.UseGrammar.Custom
import GF.Compile.ShellState
import GF.CF.PPrCF (prCFTree)
-- import qualified GF.OldParsing.ParseGFC as NewOld -- OBSOLETE
import qualified GF.Parsing.GFC as New
import GF.Data.Operations
import Data.List (nub,sortBy)
import Data.Char (toLower)
import Control.Monad (liftM)
-- AR 26/1/2000 -- 8/4 -- 28/1/2001 -- 9/12/2002
parseString :: Options -> StateGrammar -> CFCat -> String -> Err [Tree]
parseString os sg cat = liftM fst . parseStringMsg os sg cat
parseStringMsg :: Options -> StateGrammar -> CFCat -> String -> Err ([Tree],String)
parseStringMsg os sg cat s = do
case checkStart $ parseStringC os sg cat s of
Ok (ts,(_,ss)) -> return (ts, unlines $ reverse ss)
Bad s -> return ([],s)
parseStringC :: Options -> StateGrammar -> CFCat -> String -> Check [Tree]
parseStringC opts0 sg cat s
| oElem (iOpt "old") opts0 ||
(not (oElem (iOpt "fcfg") opts0) && stateHasHOAS sg) = do
let opts = unionOptions opts0 $ stateOptions sg
cf = stateCF sg
gr = stateGrammarST sg
cn = cncId sg
toks = customOrDefault opts useTokenizer customTokenizer sg s
parser = customOrDefault opts useParser customParser sg cat
if oElem (iOpt "cut") opts
then doUntil (not . null) $ map (tokens2trms opts sg cn parser) toks
else mapM (tokens2trms opts sg cn parser) toks >>= return . concat
---- | or [oElem p opts0 |
---- p <- [newCParser,newMParser,newFParser,newParser,newerParser] = do
| otherwise = do
let opts = unionOptions opts0 $ stateOptions sg
algorithm | oElem newCParser opts0 = "c"
| oElem newMParser opts0 = "m"
| oElem newFParser opts0 = "f"
| otherwise = "f" -- default algorithm: FCFG
strategy = maybe "bottomup" id $ getOptVal opts useParser
-- -parser=bottomup/topdown
tokenizer = customOrDefault opts useTokenizer customTokenizer sg
toks = case tokenizer s of
t:_ -> t
_ -> [] ---- no support for undet. tok.
unknowns =
[w | TC w <- toks, unk w && unk (uncap w)] ++ [w | TS w <- toks, unk w]
where
unk w = not $ isKnownWord (morpho sg) w
uncap (c:cs) = toLower c : cs
uncap s = s
case unknowns of
_:_ | oElem (iOpt "trynextlang") opts -> return []
_:_ -> fail $ "Unknown words:" +++ unwords unknowns
_ -> do
ts <- checkErr $ New.parse algorithm strategy (pInfo sg) (absId sg) cat toks
ts' <- checkErr $
allChecks $ map (annotate (stateGrammarST sg) . refreshMetas []) ts
return $ optIntOrAll opts flagNumber ts'
tokens2trms :: Options ->StateGrammar ->Ident -> CFParser -> [CFTok] -> Check [Tree]
tokens2trms opts sg cn parser toks = trees2trms opts sg cn toks trees info
where result = parser toks
info = snd result
trees = {- nub $ -} cfParseResults result -- peb 25/5-04: removed nub (O(n^2))
trees2trms ::
Options -> StateGrammar -> Ident -> [CFTok] -> [CFTree] -> String -> Check [Tree]
trees2trms opts sg cn as ts0 info = do
let s = unwords $ map prCFTok as
ts <- case () of
_ | null ts0 -> checkWarn ("No success in cf parsing" +++ s) >> return []
_ | raw -> do
ts1 <- return (map cf2trm0 ts0) ----- should not need annot
checks [
mapM (checkErr . (annotate gr) . trExp) ts1 ---- complicated, often fails
,checkWarn (unlines ("Raw CF trees:":(map prCFTree ts0))) >> return []
]
_ -> do
let num = optIntOrN opts flagRawtrees 999999
let (ts01,rest) = splitAt num ts0
if null rest then return ()
else raise ("Warning: only" +++ show num +++ "raw parses out of" +++
show (length ts0) +++
"considered; use -rawtrees=<Int> to see more"
)
(ts1,ss) <- checkErr $ mapErrN 1 postParse ts01
if null ts1 then raise ss else return ()
ts2 <- checkErr $
allChecks $ map (annotate gr . refreshMetas [] . trExp) ts1 ----
if forgive then return ts2 else do
let tsss = [(t, allLinsOfTree gr cn t) | t <- ts2]
ps = [t | (t,ss) <- tsss,
any (compatToks as) (map str2cftoks ss)]
if null ps
then raise $ "Failure in morphology." ++
if verb
then "\nPossible corrections: " +++++
unlines (nub (map sstr (concatMap snd tsss)))
else ""
else return ps
if verb
then checkWarn $ " the token list" +++ show as ++++ unknownWords sg as +++++ info
else return ()
return $ optIntOrAll opts flagNumber $ nub ts
where
gr = stateGrammarST sg
raw = oElem rawParse opts
verb = oElem beVerbose opts
forgive = oElem forgiveParse opts
---- Operatins.allChecks :: ErrorMonad m => [m a] -> m [a]
unknownWords sg ts = case filter noMatch [t | t@(TS _) <- ts] of
[] -> "where all words are known"
us -> "with the unknown tokens" +++ show us --- needs to be fixed for literals
where
terminals = map TS $ stateGrammarWords sg
noMatch t = all (not . compatTok t) terminals
--- too much type checking in building term info? return FullTerm to save work?
-- | raw parsing: so simple it is for a context-free CF grammar
cf2trm0 :: CFTree -> C.Exp
cf2trm0 (CFTree (fun, (_, trees))) = mkAppAtom (cffun2trm fun) (map cf2trm0 trees)
where
cffun2trm (CFFun (fun,_)) = fun
mkApp = foldl C.EApp
mkAppAtom a = mkApp (C.EAtom a)

View File

@@ -0,0 +1,66 @@
----------------------------------------------------------------------
-- |
-- Module : Randomized
-- Maintainer : AR
-- Stability : (stable)
-- Portability : (portable)
--
-- > CVS $Date: 2005/04/21 16:23:51 $
-- > CVS $Author: bringert $
-- > CVS $Revision: 1.8 $
--
-- random generation and refinement. AR 22\/8\/2001.
-- implemented as sequence of refinement menu selecsions, encoded as integers
-----------------------------------------------------------------------------
module GF.UseGrammar.Randomized where
import GF.Grammar.Abstract
import GF.UseGrammar.Editing
import GF.Data.Operations
import GF.Data.Zipper
--- import Arch (myStdGen) --- circular for hbc
import System.Random --- (mkStdGen, StdGen, randoms) --- bad import for hbc
-- random generation and refinement. AR 22/8/2001
-- implemented as sequence of refinement menu selecsions, encoded as integers
myStdGen :: Int -> StdGen
myStdGen = mkStdGen ---
-- | build one random tree; use mx to prevent infinite search
mkRandomTree :: StdGen -> Int -> CGrammar -> Either Cat Fun -> Err Tree
mkRandomTree gen mx gr cat = mkTreeFromInts (take mx (randoms gen)) gr cat
refineRandom :: StdGen -> Int -> CGrammar -> Action
refineRandom gen mx = mkStateFromInts $ take mx $ map abs (randoms gen)
-- | build a tree from a list of integers
mkTreeFromInts :: [Int] -> CGrammar -> Either Cat Fun -> Err Tree
mkTreeFromInts ints gr catfun = do
st0 <- either (\cat -> newCat gr cat initState)
(\fun -> newFun gr fun initState)
catfun
state <- mkStateFromInts ints gr st0
return $ loc2tree state
mkStateFromInts :: [Int] -> CGrammar -> Action
mkStateFromInts ints gr z = mkRandomState ints z >>= reCheckState gr where
mkRandomState [] state = do
testErr (isCompleteState state) "not completed"
return state
mkRandomState (n:ns) state = do
let refs = refinementsState gr state
refs0 = map (not . snd . snd) refs
testErr (not (null refs0)) $ "no nonrecursive refinements available for" +++
prt (actVal state)
(ref,_) <- (refs !? (n `mod` (length refs)))
state1 <- refineWithAtom False gr ref state
if isCompleteState state1
then return state1
else do
state2 <- goNextMeta state1
mkRandomState ns state2

View File

@@ -0,0 +1,181 @@
----------------------------------------------------------------------
-- |
-- Module : Session
-- Maintainer : AR
-- Stability : (stable)
-- Portability : (portable)
--
-- > CVS $Date: 2005/08/17 15:13:55 $
-- > CVS $Author: aarne $
-- > CVS $Revision: 1.12 $
--
-- (Description of the module)
-----------------------------------------------------------------------------
module GF.UseGrammar.Session where
import GF.Grammar.Abstract
import GF.Infra.Option
import GF.UseGrammar.Custom
import GF.UseGrammar.Editing
import GF.Compile.ShellState ---- grammar
import GF.Data.Operations
import GF.Data.Zipper (keepPosition) ---
-- First version 8/2001. Adapted to GFC with modules 19/6/2003.
-- Nothing had to be changed, which is a sign of good modularity.
-- keep these abstract
-- | 'Exp'-list: candidate refinements,clipboard
type SState = [(State,([Exp],[Clip]),SInfo)]
-- | 'String' is message, 'Int' is the view
type SInfo = ([String],(Int,Options))
initSState :: SState
initSState = [(initState, ([],[]), (["Select 'New' category to start"],(0,noOptions)))]
-- instead of empty
type Clip = Tree ---- (Exp,Type)
-- | (peb): Something wrong with this definition??
-- Shouldn't the result type be 'SInfo'?
--
-- > okInfo :: Int -> SInfo == ([String], (Int, Options))
okInfo :: n -> ([s], (n, Bool))
okInfo n = ([],(n,True))
stateSState :: SState -> State
candsSState :: SState -> [Exp]
clipSState :: SState -> [Clip]
infoSState :: SState -> SInfo
msgSState :: SState -> [String]
viewSState :: SState -> Int
optsSState :: SState -> Options
stateSState ((s,_,_):_) = s
candsSState ((_,(ts,_),_):_)= ts
clipSState ((_,(_,ts),_):_)= ts
infoSState ((_,_,i):_) = i
msgSState ((_,_,(m,_)):_) = m
viewSState ((_,_,(_,(v,_))):_) = v
optsSState ((_,_,(_,(_,o))):_) = o
treeSState :: SState -> Tree
treeSState = actTree . stateSState
-- | from state to state
type ECommand = SState -> SState
-- * elementary commands
-- ** change state, drop cands, drop message, preserve options
changeState :: State -> ECommand
changeState s ss = changeMsg [] $ (s,([],clipSState ss),infoSState ss) : ss
changeCands :: [Exp] -> ECommand
changeCands ts ss@((s,(_,cb),(_,b)):_) = (s,(ts,cb),(candInfo ts,b)) : ss
addtoClip :: Clip -> ECommand
addtoClip t ss@((s,(ts,cb),(i,b)):_) = (s,(ts,t:cb),(i,b)) : ss
removeClip :: Int -> ECommand
removeClip n ss@((s,(ts,cb),(i,b)):_) = (s,(ts, drop n cb),(i,b)) : ss
changeMsg :: [String] -> ECommand
changeMsg m ((s,ts,(_,b)):ss) = (s,ts,(m,b)) : ss -- just change message
changeMsg m _ = (s,ts,(m,b)) : [] where [(s,ts,(_,b))] = initSState
changeView :: ECommand
changeView ((s,ts,(m,(v,b))):ss) = (s,ts,(m,(v+1,b))) : ss -- toggle view
withMsg :: [String] -> ECommand -> ECommand
withMsg m c = changeMsg m . c
changeStOptions :: (Options -> Options) -> ECommand
changeStOptions f ((s,ts,(m,(v,o))):ss) = (s,ts,(m,(v, f o))) : ss
noNeedForMsg :: ECommand
noNeedForMsg = changeMsg [] -- everything's all right: no message
candInfo :: [Exp] -> [String]
candInfo ts = case length ts of
0 -> ["no acceptable alternative"]
1 -> ["just one acceptable alternative"]
n -> [show n +++ "alternatives to select"]
-- * keep SState abstract from this on
-- ** editing commands
action2command :: Action -> ECommand
action2command act state = case act (stateSState state) of
Ok s -> changeState s state
Bad m -> changeMsg [m] state
action2commandNext :: Action -> ECommand -- move to next meta after execution
action2commandNext act = action2command (\s -> act s >>= goNextMetaIfCan)
action2commandKeep :: Action -> ECommand -- keep old position after execution
action2commandKeep act = action2command (\s -> keepPosition act s)
undoCommand :: Int -> ECommand
undoCommand n ss =
let k = length ss in
if k < n
then changeMsg ["cannot go all the way back"] [last ss]
else changeMsg ["successful undo"] (drop n ss)
selectCand :: CGrammar -> Int -> ECommand
selectCand gr i state = err (\m -> changeMsg [m] state) id $ do
exp <- candsSState state !? i
let s = stateSState state
tree <- annotateInState gr exp s
return $ case replaceSubTree tree s of
Ok st' -> changeState st' state
Bad s -> changeMsg [s] state
refineByExps :: Bool -> CGrammar -> [Exp] -> ECommand
refineByExps der gr trees = case trees of
[t] -> action2commandNext (refineWithExpTC der gr t)
_ -> changeCands trees
refineByTrees :: Bool -> CGrammar -> [Tree] -> ECommand
refineByTrees der gr trees = case trees of
[t] -> action2commandNext (refineOrReplaceWithTree der gr t)
_ -> changeCands $ map tree2exp trees
replaceByTrees :: CGrammar -> [Exp] -> ECommand
replaceByTrees gr trees = case trees of
[t] -> action2commandNext (\s ->
annotateExpInState gr t s >>= flip replaceSubTree s)
_ -> changeCands trees
replaceByEditCommand :: StateGrammar -> String -> ECommand
replaceByEditCommand gr co =
action2commandKeep $
maybe return ($ gr) $
lookupCustom customEditCommand (strCI co)
replaceByTermCommand :: Bool -> StateGrammar -> String -> Tree -> ECommand ----
replaceByTermCommand der gr co exp =
let g = grammar gr in
refineByTrees der g $ maybe [exp] (\f -> f gr exp) $
lookupCustom customTermCommand (strCI co)
possClipsSState :: StateGrammar -> SState -> [(Int,Clip)]
possClipsSState gr s = filter poss $ zip [0..] (clipSState s)
where
poss = possibleTreeVal cgr st . snd
st = stateSState s
cgr = grammar gr
getNumberedClip :: Int -> SState -> Err Clip
getNumberedClip i s = if length cs > i then return (cs !! i)
else Bad "not enough clips"
where
cs = clipSState s

View File

@@ -0,0 +1,44 @@
----------------------------------------------------------------------
-- |
-- Module : Statistics
-- Maintainer : AR
-- Stability : (stable)
-- Portability : (portable)
--
-- > CVS $Date: 2005/09/04 11:45:38 $
-- > CVS $Author: aarne $
-- > CVS $Revision: 1.1 $
--
-- statistics on canonical grammar: amounts of generated code
-- AR 4\/9\/2005.
-- uses canonical grammar
-----------------------------------------------------------------------------
module GF.UseGrammar.Statistics (prStatistics) where
import GF.Infra.Modules
import GF.Infra.Option
import GF.Grammar.PrGrammar
import GF.Canon.GFC
import GF.Canon.MkGFC
import GF.Data.Operations
import Data.List (sortBy)
-- | the top level function
prStatistics :: CanonGrammar -> String
prStatistics can = unlines $ [
show (length mods) ++ "\t\t modules",
show chars ++ "\t\t gfc size",
"",
"Top 40 definitions"
] ++
[show d ++ "\t\t " ++ f | (d,f) <- tops]
where
tops = take 40 $ reverse $ sortBy (\ (i,_) (j,_) -> compare i j) defs
defs = [(length (prt (info2def j)), name m j) | (m,j) <- infos]
infos = [(m,j) | (m,ModMod mo) <- mods, j <- tree2list (jments mo)]
name m (f,_) = prt m ++ "." ++ prt f
mods = modules can
chars = length $ prCanon can

View File

@@ -0,0 +1,222 @@
----------------------------------------------------------------------
-- |
-- Module : Tokenize
-- Maintainer : AR
-- Stability : (stable)
-- Portability : (portable)
--
-- > CVS $Date: 2005/09/29 13:20:08 $
-- > CVS $Author: aarne $
-- > CVS $Revision: 1.14 $
--
-- lexers = tokenizers, to prepare input for GF grammars. AR 4\/1\/2002.
-- an entry for each is included in 'Custom.customTokenizer'
-----------------------------------------------------------------------------
module GF.UseGrammar.Tokenize ( tokWords,
tokLits,
tokVars,
lexHaskell,
lexHaskellLiteral,
lexHaskellVar,
lexText,
lexTextVar,
lexC2M, lexC2M',
lexTextLiteral,
lexIgnore,
wordsLits
) where
import GF.Data.Operations
---- import UseGrammar (isLiteral,identC)
import GF.CF.CFIdent
import Data.Char
-- lexers = tokenizers, to prepare input for GF grammars. AR 4/1/2002
-- an entry for each is included in Custom.customTokenizer
-- | just words
tokWords :: String -> [CFTok]
tokWords = map tS . words
tokLits :: String -> [CFTok]
tokLits = map mkCFTok . mergeStr . wordsLits where
mergeStr ss = case ss of
w@(c:cs):rest | elem c "\'\"" && c /= last w -> getStr [w] rest
w :rest -> w : mergeStr rest
[] -> []
getStr v ss = case ss of
w@(_:_):rest | elem (last w) "\'\"" -> (unwords (reverse (w:v))) : mergeStr rest
w :rest -> getStr (w:v) rest
[] -> reverse v
tokVars :: String -> [CFTok]
tokVars = map mkCFTokVar . wordsLits
isFloat s = case s of
c:cs | isDigit c -> isFloat cs
'.':cs@(_:_) -> all isDigit cs
_ -> False
isString s = case s of
c:cs@(_:_) -> (c == '\'' && d == '\'') || (c == '"' && d == '"') where d = last cs
_ -> False
mkCFTok :: String -> CFTok
mkCFTok s = case s of
'"' :cs@(_:_) | last cs == '"' -> tL $ init cs
'\'':cs@(_:_) | last cs == '\'' -> tL $ init cs --- 's Gravenhage
_:_ | isFloat s -> tF s
_:_ | all isDigit s -> tI s
_ -> tS s
mkCFTokVar :: String -> CFTok
mkCFTokVar s = case s of
'?':_:_ -> tM s --- "?" --- compat with prCF
'x':'_':_ -> tV s
'x':[] -> tV s
'$':xs@(_:_) -> if last s == '$' then tV (init xs) else tS s
_ -> tS s
mkTokVars :: (String -> [CFTok]) -> String -> [CFTok]
mkTokVars tok = map tv . tok where
tv (TS s) = mkCFTokVar s
tv t = t
mkLit :: String -> CFTok
mkLit s
| isFloat s = tF s
| all isDigit s = tI s
| otherwise = tL s
-- obsolete
mkTL :: String -> CFTok
mkTL s
| isFloat s = tF s
| all isDigit s = tI s
| otherwise = tL ("'" ++ s ++ "'")
-- | Haskell lexer, usable for much code
lexHaskell :: String -> [CFTok]
lexHaskell ss = case lex ss of
[(w@(_:_),ws)] -> tS w : lexHaskell ws
_ -> []
-- | somewhat shaky text lexer
lexText :: String -> [CFTok]
lexText = uncap . lx where
lx s = case s of
'?':'?':cs -> tS "??" : lx cs
p : cs | isMPunct p -> tS [p] : uncap (lx cs)
p : cs | isPunct p -> tS [p] : lx cs
s : cs | isSpace s -> lx cs
_ : _ -> getWord s
_ -> []
getWord s = tS w : lx ws where (w,ws) = span isNotSpec s
isMPunct c = elem c ".!?"
isPunct c = elem c ",:;()\""
isNotSpec c = not (isMPunct c || isPunct c || isSpace c)
uncap (TS (c:cs) : ws) = tC (c:cs) : ws
uncap s = s
-- | lexer for C--, a mini variant of C
lexC2M :: String -> [CFTok]
lexC2M = lexC2M' False
lexC2M' :: Bool -> String -> [CFTok]
lexC2M' isHigherOrder s = case s of
'#':cs -> lexC $ dropWhile (/='\n') cs
'/':'*':cs -> lexC $ dropComment cs
c:cs | isSpace c -> lexC cs
c:cs | isAlpha c -> getId s
c:cs | isDigit c -> getLit s
c:d:cs | isSymb [c,d] -> tS [c,d] : lexC cs
c:cs | isSymb [c] -> tS [c] : lexC cs
_ -> [] --- covers end of file and unknown characters
where
lexC = lexC2M' isHigherOrder
getId s = mkT i : lexC cs where (i,cs) = span isIdChar s
getLit s = tI i : lexC cs where (i,cs) = span isDigit s ---- Float!
isIdChar c = isAlpha c || isDigit c || elem c "'_"
isSymb = reservedAnsiCSymbol
dropComment s = case s of
'*':'/':cs -> cs
_:cs -> dropComment cs
_ -> []
mkT i = if (isRes i) then (tS i) else
if isHigherOrder then (tV i) else (tL ("'" ++ i ++ "'"))
isRes = reservedAnsiC
reservedAnsiCSymbol s = case lookupTree show s ansiCtree of
Ok True -> True
_ -> False
reservedAnsiC s = case lookupTree show s ansiCtree of
Ok False -> True
_ -> False
-- | for an efficient lexer: precompile this!
ansiCtree = buildTree $ [(s,True) | s <- reservedAnsiCSymbols] ++
[(s,False) | s <- reservedAnsiCWords]
reservedAnsiCSymbols = words $
"<<= >>= << >> ++ -- == <= >= *= += -= %= /= &= ^= |= " ++
"^ { } = , ; + * - ( ) < > & % ! ~"
reservedAnsiCWords = words $
"auto break case char const continue default " ++
"do double else enum extern float for goto if int " ++
"long register return short signed sizeof static struct switch typedef " ++
"union unsigned void volatile while " ++
"main printin putchar" --- these are not ansi-C
-- | turn unknown tokens into string literals; not recursively for literals 123, 'foo'
unknown2string :: (String -> Bool) -> [CFTok] -> [CFTok]
unknown2string isKnown = map mkOne where
mkOne t@(TS s)
| isKnown s = t
| isFloat s = tF s
| all isDigit s = tI s
| otherwise = tL s
mkOne t@(TC s) = if isKnown s then t else mkLit s
mkOne t = t
unknown2var :: (String -> Bool) -> [CFTok] -> [CFTok]
unknown2var isKnown = map mkOne where
mkOne t@(TS "??") = if isKnown "??" then t else tM "??"
mkOne t@(TS s)
| isKnown s = t
| isFloat s = tF s
| isString s = tL (init (tail s))
| all isDigit s = tI s
| otherwise = tV s
mkOne t@(TC s) = if isKnown s then t else tV s
mkOne t = t
lexTextLiteral, lexHaskellLiteral, lexHaskellVar :: (String -> Bool) -> String -> [CFTok]
lexTextLiteral isKnown = unknown2string (eitherUpper isKnown) . lexText
lexHaskellLiteral isKnown = unknown2string isKnown . lexHaskell
lexHaskellVar isKnown = unknown2var isKnown . lexHaskell
lexTextVar isKnown = unknown2var (eitherUpper isKnown) . lexText
eitherUpper isKnown w@(c:cs) = isKnown (toLower c : cs) || isKnown (toUpper c : cs)
eitherUpper isKnown w = isKnown w
-- ignore unknown tokens (e.g. keyword spotting)
lexIgnore :: (String -> Bool) -> [CFTok] -> [CFTok]
lexIgnore isKnown = concatMap mkOne where
mkOne t@(TS s)
| isKnown s = [t]
| otherwise = []
mkOne t = [t]

View File

@@ -0,0 +1,79 @@
----------------------------------------------------------------------
-- |
-- Module : Transfer
-- Maintainer : AR
-- Stability : (stable)
-- Portability : (portable)
--
-- > CVS $Date: 2005/04/21 16:23:53 $
-- > CVS $Author: bringert $
-- > CVS $Revision: 1.5 $
--
-- linearize, parse, etc, by transfer. AR 9\/10\/2003
-----------------------------------------------------------------------------
module GF.UseGrammar.Transfer where
import GF.Grammar.Grammar
import GF.Grammar.Values
import GF.Grammar.AbsCompute
import qualified GF.Canon.GFC as GFC
import GF.Grammar.LookAbs
import GF.Grammar.MMacros
import GF.Grammar.Macros
import GF.Grammar.PrGrammar
import GF.Grammar.TypeCheck
import GF.Infra.Ident
import GF.Data.Operations
import qualified Transfer.Core.Abs as T
import Control.Monad
-- transfer is done in T.Exp - we only need these conversions.
exp2core :: Ident -> Exp -> T.Exp
exp2core f = T.EApp (T.EVar (var f)) . exp2c where
exp2c e = case e of
App f a -> T.EApp (exp2c f) (exp2c a)
Abs x b -> T.EAbs (T.PVVar (var x)) (exp2c b) ---- should be syntactic abstr
Q _ c -> T.EVar (var c)
QC _ c -> T.EVar (var c)
K s -> T.EStr s
EInt i -> T.EInteger $ toInteger i
Meta m -> T.EMeta (T.TMeta (prt m)) ---- which meta symbol?
Vr x -> T.EVar (var x) ---- should be syntactic var
var x = T.CIdent $ prt x
core2exp :: T.Exp -> Exp
core2exp e = case e of
T.EApp f a -> App (core2exp f) (core2exp a)
T.EAbs (T.PVVar x) b -> Abs (var x) (core2exp b) ---- only from syntactic abstr
T.EVar c -> Vr (var c) -- GF annotates to Q or QC
T.EStr s -> K s
T.EInteger i -> EInt $ fromInteger i
T.EMeta _ -> uExp -- meta symbol 0, refreshed by GF
where
var :: T.CIdent -> Ident
var (T.CIdent x) = zIdent x
-- The following are now obsolete (30/11/2005)
-- linearize, parse, etc, by transfer. AR 9/10/2003
doTransfer :: GFC.CanonGrammar -> Ident -> Tree -> Err Tree
doTransfer gr tra t = do
cat <- liftM snd $ val2cat $ valTree t
f <- lookupTransfer gr tra cat
e <- compute gr $ App f $ tree2exp t
annotate gr e
useByTransfer :: (Tree -> Err a) -> GFC.CanonGrammar -> Ident -> (Tree -> Err a)
useByTransfer lin gr tra t = doTransfer gr tra t >>= lin
mkByTransfer :: (a -> Err [Tree]) -> GFC.CanonGrammar -> Ident -> (a -> Err [Tree])
mkByTransfer parse gr tra s = parse s >>= mapM (doTransfer gr tra)

View File

@@ -0,0 +1,77 @@
----------------------------------------------------------------------
-- |
-- Module : TreeSelections
-- Maintainer : AR
-- Stability : (stable)
-- Portability : (portable)
--
-- choose shallowest trees, and remove an overload resolution prefix
-----------------------------------------------------------------------------
module GF.UseGrammar.TreeSelections (
getOverloadResults, smallestTrs, sizeTr, depthTr
) where
import GF.Grammar.Abstract
import GF.Grammar.Macros
import GF.Data.Operations
import GF.Data.Zipper
import Data.List
-- AR 2/7/2007
-- The top-level function takes a set of trees (typically parses)
-- and returns the list of those trees that have the minimum size.
-- In addition, the overload prefix "ovrld123_", is removed
-- from each constructor in which it appears. This is used for
-- showing the library API constructors in a parsable grammar.
-- TODO: access the generic functions smallestTrs, sizeTr, depthTr from shell
getOverloadResults :: [Tree] -> [Tree]
getOverloadResults = smallestTrs sizeTr . map (mkOverload "ovrld")
-- NB: this does not always give the desired result, since
-- some genuine alternatives may be deeper: now we will exclude the
-- latter of
--
-- mkCl this_NP love_V2 (mkNP that_NP here_Adv)
-- mkCl this_NP (mkVP (mkVP love_V2 that_NP) here_Adv)
--
-- A perfect method would know the definitional equivalences of constructors.
--
-- Notice also that size is a better measure than depth, because:
-- 1. Global depth does not exclude the latter of
--
-- mkCl (mkNP he_Pron) love_V2 that_NP
-- mkCl (mkNP he_Pron) (mkVP love_V2 that_NP)
--
-- 2. Length is needed to exclude the latter of
--
-- mkS (mkCl (mkNP he_Pron) love_V2 that_NP)
-- mkS presentTense (mkCl (mkNP he_Pron) love_V2 that_NP)
--
smallestTrs :: (Tr a -> Int) -> [Tr a] -> [Tr a]
smallestTrs size ts = map fst $ filter ((==mx) . snd) tds where
tds = [(t, size t) | t <- ts]
mx = minimum $ map snd tds
depthTr :: Tr a -> Int
depthTr (Tr (_, ts)) = case ts of
[] -> 1
_ -> 1 + (maximum $ map depthTr ts)
sizeTr :: Tr a -> Int
sizeTr (Tr (_, ts)) = 1 + sum (map sizeTr ts)
-- remove from each constant a prefix starting with "pref", up to first "_"
-- example format: ovrld123_mkNP
mkOverload :: String -> Tree -> Tree
mkOverload pref = mapTr (changeAtom overAtom) where
overAtom a = case a of
AtC (m, IC f) | isPrefixOf pref f ->
AtC (m, IC (tail (dropWhile (/='_') f)))
_ -> a

View File

@@ -0,0 +1,251 @@
----------------------------------------------------------------------
-- |
-- Module : Treebank
-- Maintainer : AR
-- Stability : (stable)
-- Portability : (portable)
--
-- Generate multilingual treebanks. AR 8\/2\/2006
--
-- (c) Aarne Ranta 2006 under GNU GPL
--
-- Purpose: to generate treebanks.
-----------------------------------------------------------------------------
module GF.UseGrammar.Treebank (
mkMultiTreebank,
mkUniTreebank,
multi2uniTreebank,
uni2multiTreebank,
testMultiTreebank,
treesTreebank,
getTreebank,
getUniTreebank,
readUniTreebanks,
readMultiTreebank,
lookupTreebank,
assocsTreebank,
isWordInTreebank,
printAssoc,
mkCompactTreebank
) where
import GF.Compile.ShellState
import GF.UseGrammar.Linear -- (linTree2string)
import GF.UseGrammar.Custom
import GF.UseGrammar.GetTree (string2tree)
import GF.Grammar.TypeCheck (annotate)
import GF.Canon.CMacros (noMark)
import GF.Grammar.Grammar (Trm)
import GF.Grammar.MMacros (exp2tree)
import GF.Grammar.Macros (zIdent)
import GF.Grammar.PrGrammar (prt_,prt)
import GF.Grammar.Values (tree2exp)
import GF.Data.Operations
import GF.Infra.Option
import GF.Infra.Ident (Ident)
import GF.Infra.UseIO
import qualified GF.Grammar.Abstract as A
import qualified Data.Map as M
import qualified Data.Set as S
import qualified Data.List as L
import Control.Monad (liftM)
import System.FilePath
-- Generate a treebank with a multilingual grammar. AR 8/2/2006
-- (c) Aarne Ranta 2006 under GNU GPL
-- keys are trees; format: XML file
type MultiTreebank = [(String,[(String,String)])] -- tree,lang,lin
-- keys are strings; format: string TAB tree TAB ... TAB tree
type UniTreebank = Treebank -- M.Map String [String] -- string,tree
-- both formats can be read from both kinds of files
readUniTreebanks :: FilePath -> IO [(Ident,UniTreebank)]
readUniTreebanks file = do
s <- readFileIf file
return $ if isMultiTreebank s
then multi2uniTreebank $ getTreebank $ lines s
else
let tb = getUniTreebank $ lines s
in [(zIdent (dropExtension file),tb)]
readMultiTreebank :: FilePath -> IO MultiTreebank
readMultiTreebank file = do
s <- readFileIf file
return $ if isMultiTreebank s
then getTreebank $ lines s
else uni2multiTreebank (zIdent (dropExtension file)) $ getUniTreebank $ lines s
isMultiTreebank :: String -> Bool
isMultiTreebank s = take 10 s == "<treebank>"
multi2uniTreebank :: MultiTreebank -> [(Ident,UniTreebank)]
multi2uniTreebank mt@((_,lls):_) = [(zIdent la, mkTb la) | (la,_) <- lls] where
mkTb la = M.fromListWith (++) [(s,[t]) | (t,lls) <- mt, (l,s) <- lls, l==la]
multi2uniTreebank [] = []
uni2multiTreebank :: Ident -> UniTreebank -> MultiTreebank
uni2multiTreebank la tb =
[(t,[(prt_ la, s)]) | (s,ts) <- assocsTreebank tb, t <- ts]
-- | the main functions
-- builds a treebank where trees are the keys, and writes a file (opt. XML)
mkMultiTreebank :: Options -> ShellState -> String -> [A.Tree] -> Res
mkMultiTreebank opts sh com trees
| oElem (iOpt "compact") opts = mkCompactTreebank opts sh trees
mkMultiTreebank opts sh com trees =
putInXML opts "treebank" comm (concatMap mkItem tris) where
mkItem(t,i)= putInXML opts "item" (cat i) (mkTree t ++ concatMap (mkLin t) langs)
-- mkItem(t,i)= putInXML opts "item" (cat i) (mkTree t >>mapM_ (mkLin t) langs)
mkTree t = putInXML opts "tree" [] (puts $ showTree t)
mkLin t lg = putInXML opts "lin" (lang lg) (puts $ linearize opts sh lg t)
langs = [prt_ l | l <- allLanguages sh]
comm = "" --- " command=" ++ show com +++ "abstract=" ++ show abstr
abstr = "" --- "Abs" ----
cat i = " number=" ++ show (show i) --- " cat=" ++ show "S" ----
lang lg = " lang=" ++ show (prt_ (zIdent lg))
tris = zip trees [1..]
-- builds a unilingual treebank where strings are the keys into an internal treebank
mkUniTreebank :: Options -> ShellState -> Language -> [A.Tree] -> Treebank
mkUniTreebank opts sh lg trees = M.fromListWith (++) [(lin t, [prt_ t]) | t <- trees]
where
lang = prt_ lg
lin t = linearize opts sh lang t
-- reads a treebank and linearizes its trees again, printing all differences
testMultiTreebank :: Options -> ShellState -> String -> Res
testMultiTreebank opts sh = putInXML opts "testtreebank" [] .
concatMap testOne .
getTreebanks . lines
where
testOne (e,lang,str0) = do
let tr = annot gr e
let str = linearize opts sh lang tr
if str == str0 then ret else putInXML opts "diff" [] $ concat [
putInXML opts "tree" [] (puts $ showTree tr),
putInXML opts "old" (" lang=" ++ show (prt_ (zIdent lang))) $ puts str0,
putInXML opts "new" (" lang=" ++ show (prt_ (zIdent lang))) $ puts str
]
gr = firstStateGrammar sh
-- writes all the trees of the treebank
treesTreebank :: Options -> String -> [String]
treesTreebank _ = terms . getTreebank . lines where
terms ts = [t | (t,_) <- ts]
-- string vs. IO
type Res = [String] -- IO ()
puts :: String -> Res
puts = return -- putStrLn
ret = [] -- return ()
--
-- here strings are keys
assocsTreebank :: UniTreebank -> [(String,[String])]
assocsTreebank = M.assocs
isWordInTreebank :: UniTreebank -> String -> Bool
isWordInTreebank tb w = S.member w (S.fromList (concatMap words (M.keys tb)))
printAssoc (s, ts) = s ++ concat ["\t" ++ t | t <- ts]
getTreebanks :: [String] -> [(String,String,String)]
getTreebanks = concatMap grps . getTreebank where
grps (t,lls) = [(t,x,y) | (x,y) <- lls]
getTreebank :: [String] -> MultiTreebank
getTreebank ll = case ll of
l:ls@(_:_:_) ->
let (l1,l2) = getItem ls
(tr,lins) = getTree l1
lglins = getLins lins
in (tr,lglins) : getTreebank l2
_ -> []
where
getItem = span ((/="</item") . take 6)
getTree (_:ss) =
let (t1,t2) = span ((/="</tree") . take 6) ss in (last t1, drop 1 t2)
getLins (beg:str:end:ss) = (getLang beg, str):getLins ss
getLins _ = []
getLang = takeWhile (/='"') . tail . dropWhile (/='"')
getUniTreebank :: [String] -> UniTreebank
getUniTreebank ls = M.fromListWith (++) [(s, ts) | s:ts <- map chop ls] where
chop = chunks '\t'
lookupTreebank :: Treebank -> String -> [String]
lookupTreebank tb s = maybe [] id $ M.lookup s tb
annot :: StateGrammar -> String -> A.Tree
annot gr s = errVal (error "illegal tree") $ do
let t = tree2exp $ string2tree gr s
annotate (grammar gr) t
putInXML :: Options -> String -> String -> Res -> Res
putInXML opts tag attrs io =
(ifXML $ puts $ tagXML $ tag ++ attrs) ++
io ++
(ifXML $ puts $ tagXML $ '/':tag)
where
ifXML c = if oElem showXML opts then c else []
tagXML :: String -> String
tagXML s = "<" ++ s ++ ">"
-- print the treebank in a compact format:
-- first a sorted list of all words, referrable by index
-- then the linearization of each tree, as sequences of word indices
-- this format is usable in embedded translation systems.
mkCompactTreebank :: Options -> ShellState -> [A.Tree] -> [String]
mkCompactTreebank opts sh = printCompactTreebank . mkJustMultiTreebank opts sh
printCompactTreebank :: (MultiTreebank,[String]) -> [String]
printCompactTreebank (tb,lgs) = (stat:langs:unwords ws : "\n" : linss) where
ws = L.sort $ L.nub $ concat $ map (concatMap (words . snd) . snd) tb
linss = map (unwords . pad) linss0
linss0 = map (map (show . encode) . words) allExs
allExs = concat [[snd (ls !! i) | (_,ls) <- tb] | i <- [0..length lgs - 1]]
encode w = maybe undefined id $ M.lookup w wmap
wmap = M.fromAscList $ zip ws [1..]
stat = unwords $ map show [length ws, length lgs, length tb, smax]
langs = unwords lgs
smax = maximum $ map length linss0
pad ws = ws ++ replicate (smax - length ws) "0"
-- [(String,[(String,String)])] -- tree,lang,lin
mkJustMultiTreebank :: Options -> ShellState -> [A.Tree] -> (MultiTreebank,[String])
mkJustMultiTreebank opts sh ts =
([(prt_ t, [(la, lin la t) | la <- langs]) | t <- ts],langs) where
langs = map prt_ $ allLanguages sh
lin = linearize opts sh
--- these handy functions are borrowed from EmbedAPI
linearize opts mgr lang = lin where
sgr = stateGrammarOfLangOpt False mgr zlang
cgr = canModules mgr
zlang = zIdent lang
untok = customOrDefault (addOptions opts (stateOptions sgr)) useUntokenizer customUntokenizer sgr
lin
| oElem showRecord opts = err id id . liftM prt . linearizeNoMark cgr zlang
| oElem tableLin opts =
err id id . liftM (unlines . map untok . prLinTable True) . allLinTables True cgr zlang
| oElem showAll opts =
err id id . liftM (unlines . map untok . prLinTable False) . allLinTables False cgr zlang
| otherwise = untok . linTree2string noMark cgr zlang
showTree t = prt_ $ tree2exp t