forked from GitHub/gf-core
GF/src is now for 2.9, and the new sources are in src-3.0 - keep it this way until the release of GF 3
This commit is contained in:
494
src-3.0/GF/UseGrammar/Custom.hs
Normal file
494
src-3.0/GF/UseGrammar/Custom.hs
Normal file
@@ -0,0 +1,494 @@
|
||||
----------------------------------------------------------------------
|
||||
-- |
|
||||
-- Module : Custom
|
||||
-- Maintainer : AR
|
||||
-- Stability : (stable)
|
||||
-- Portability : (portable)
|
||||
--
|
||||
-- > CVS $Date: 2005/11/16 10:21:21 $
|
||||
-- > CVS $Author: peb $
|
||||
-- > CVS $Revision: 1.85 $
|
||||
--
|
||||
-- A database for customizable GF shell commands.
|
||||
--
|
||||
-- databases for customizable commands. AR 21\/11\/2001.
|
||||
-- for: grammar parsers, grammar printers, term commands, string commands.
|
||||
-- idea: items added here are usable throughout GF; nothing else need be edited.
|
||||
-- they are often usable through the API: hence API cannot be imported here!
|
||||
--
|
||||
-- Major redesign 3\/4\/2002: the first entry in each database is DEFAULT.
|
||||
-- If no other value is given, the default is selected.
|
||||
-- Because of this, two invariants have to be preserved:
|
||||
--
|
||||
-- - no databases may be empty
|
||||
--
|
||||
-- - additions are made to the end of the database
|
||||
-----------------------------------------------------------------------------
|
||||
|
||||
module GF.UseGrammar.Custom where
|
||||
|
||||
import GF.Data.Operations
|
||||
import GF.Text.Text
|
||||
import GF.UseGrammar.Tokenize
|
||||
import GF.Grammar.Values
|
||||
import qualified GF.Grammar.Grammar as G
|
||||
import qualified GF.Canon.AbsGFC as A
|
||||
import qualified GF.Canon.GFC as C
|
||||
|
||||
import qualified GF.Devel.GFCCtoJS as JS
|
||||
import GF.Canon.CanonToGFCC
|
||||
import qualified GF.Devel.GFCCtoHaskell as CCH
|
||||
|
||||
import qualified GF.Source.AbsGF as GF
|
||||
import qualified GF.Grammar.MMacros as MM
|
||||
import GF.Grammar.AbsCompute
|
||||
import GF.Grammar.TypeCheck
|
||||
import GF.UseGrammar.Generate
|
||||
import GF.UseGrammar.MatchTerm
|
||||
import GF.UseGrammar.Linear (unoptimizeCanon)
|
||||
------import Compile
|
||||
import GF.Compile.ShellState
|
||||
import GF.UseGrammar.Editing
|
||||
import GF.UseGrammar.Paraphrases
|
||||
import GF.Infra.Option
|
||||
import GF.CF.CF
|
||||
import GF.CF.CFIdent
|
||||
|
||||
import GF.Canon.CanonToGrammar
|
||||
import GF.CF.PPrCF
|
||||
import GF.CF.PrLBNF
|
||||
import GF.Grammar.PrGrammar
|
||||
import GF.Compile.PrOld
|
||||
import GF.Canon.MkGFC
|
||||
import GF.Speech.PrGSL (gslPrinter)
|
||||
import GF.Speech.PrJSGF (jsgfPrinter)
|
||||
import GF.Speech.PrSRGS
|
||||
import GF.Speech.PrSRGS_ABNF
|
||||
import qualified GF.Speech.SISR as SISR
|
||||
import GF.Speech.PrSLF
|
||||
import GF.Speech.PrFA (faGraphvizPrinter,regularPrinter,faCPrinter)
|
||||
import GF.Speech.PrRegExp (regexpPrinter,multiRegexpPrinter)
|
||||
import GF.Speech.GrammarToVoiceXML (grammar2vxml)
|
||||
|
||||
import GF.Data.Zipper
|
||||
|
||||
import GF.UseGrammar.Statistics
|
||||
import GF.UseGrammar.Morphology
|
||||
import GF.UseGrammar.Information
|
||||
import GF.API.GrammarToHaskell
|
||||
import GF.API.GrammarToTransfer
|
||||
-----import GrammarToCanon (showCanon, showCanonOpt)
|
||||
-----import qualified GrammarToGFC as GFC
|
||||
import GF.Probabilistic.Probabilistic (prProbs)
|
||||
|
||||
-- the cf parsing algorithms
|
||||
import GF.CF.ChartParser -- OBSOLETE
|
||||
import qualified GF.Parsing.CF as PCF
|
||||
import qualified GF.OldParsing.ParseCF as PCFOld -- OBSOLETE
|
||||
|
||||
-- grammar conversions -- peb 19/4-04
|
||||
-- see also customGrammarPrinter
|
||||
import qualified GF.OldParsing.ConvertGrammar as CnvOld -- OBSOLETE
|
||||
import qualified GF.Printing.PrintParser as PrtOld -- OBSOLETE
|
||||
import qualified GF.Infra.Print as Prt
|
||||
import qualified GF.Conversion.GFC as Cnv
|
||||
import qualified GF.Conversion.Types as CnvTypes
|
||||
import qualified GF.Conversion.Haskell as CnvHaskell
|
||||
import qualified GF.Conversion.Prolog as CnvProlog
|
||||
import qualified GF.Conversion.TypeGraph as CnvTypeGraph
|
||||
import GF.Canon.Unparametrize
|
||||
import GF.Canon.Subexpressions
|
||||
import GF.Canon.AbsToBNF
|
||||
|
||||
import GF.Canon.GFC
|
||||
import qualified GF.Canon.MkGFC as MC
|
||||
import GF.CFGM.PrintCFGrammar (prCanonAsCFGM)
|
||||
import GF.Visualization.VisualizeGrammar (visualizeCanonGrammar, visualizeSourceGrammar)
|
||||
|
||||
import GF.API.MyParser
|
||||
|
||||
import qualified GF.Infra.Modules as M
|
||||
import GF.Infra.UseIO
|
||||
|
||||
import Control.Monad
|
||||
import Data.Char
|
||||
import Data.Maybe (fromMaybe)
|
||||
|
||||
-- character codings
|
||||
import GF.Text.Unicode
|
||||
import GF.Text.UTF8 (decodeUTF8)
|
||||
import GF.Text.Greek (mkGreek)
|
||||
import GF.Text.Arabic (mkArabic)
|
||||
import GF.Text.Hebrew (mkHebrew)
|
||||
import GF.Text.Russian (mkRussian, mkRusKOI8)
|
||||
import GF.Text.Ethiopic (mkEthiopic)
|
||||
import GF.Text.Tamil (mkTamil)
|
||||
import GF.Text.OCSCyrillic (mkOCSCyrillic)
|
||||
import GF.Text.LatinASupplement (mkLatinASupplement)
|
||||
import GF.Text.Devanagari (mkDevanagari)
|
||||
import GF.Text.Hiragana (mkJapanese)
|
||||
import GF.Text.ExtendedArabic (mkArabic0600)
|
||||
import GF.Text.ExtendedArabic (mkExtendedArabic)
|
||||
import GF.Text.ExtraDiacritics (mkExtraDiacritics)
|
||||
|
||||
-- minimal version also used in Hugs. AR 2/12/2002.
|
||||
|
||||
-- databases for customizable commands. AR 21/11/2001
|
||||
-- for: grammar parsers, grammar printers, term commands, string commands
|
||||
-- idea: items added here are usable throughout GF; nothing else need be edited
|
||||
-- they are often usable through the API: hence API cannot be imported here!
|
||||
|
||||
-- Major redesign 3/4/2002: the first entry in each database is DEFAULT.
|
||||
-- If no other value is given, the default is selected.
|
||||
-- Because of this, two invariants have to be preserved:
|
||||
-- - no databases may be empty
|
||||
-- - additions are made to the end of the database
|
||||
|
||||
-- * these are the databases; the comment gives the name of the flag
|
||||
|
||||
-- | grammarFormat, \"-format=x\" or file suffix
|
||||
customGrammarParser :: CustomData (FilePath -> IOE C.CanonGrammar)
|
||||
|
||||
-- | grammarPrinter, \"-printer=x\"
|
||||
customGrammarPrinter :: CustomData (Options -> StateGrammar -> String)
|
||||
|
||||
-- | multiGrammarPrinter, \"-printer=x\"
|
||||
customMultiGrammarPrinter :: CustomData (Options -> CanonGrammar -> String)
|
||||
|
||||
-- | syntaxPrinter, \"-printer=x\"
|
||||
customSyntaxPrinter :: CustomData (GF.Grammar -> String)
|
||||
|
||||
-- | termPrinter, \"-printer=x\"
|
||||
customTermPrinter :: CustomData (StateGrammar -> Tree -> String)
|
||||
|
||||
-- | termCommand, \"-transform=x\"
|
||||
customTermCommand :: CustomData (StateGrammar -> Tree -> [Tree])
|
||||
|
||||
-- | editCommand, \"-edit=x\"
|
||||
customEditCommand :: CustomData (StateGrammar -> Action)
|
||||
|
||||
-- | filterString, \"-filter=x\"
|
||||
customStringCommand :: CustomData (StateGrammar -> String -> String)
|
||||
|
||||
-- | useParser, \"-parser=x\"
|
||||
customParser :: CustomData (StateGrammar -> CFCat -> CFParser)
|
||||
|
||||
-- | useTokenizer, \"-lexer=x\"
|
||||
customTokenizer :: CustomData (StateGrammar -> String -> [[CFTok]])
|
||||
|
||||
-- | useUntokenizer, \"-unlexer=x\" --- should be from token list to string
|
||||
customUntokenizer :: CustomData (StateGrammar -> String -> String)
|
||||
|
||||
-- | uniCoding, \"-coding=x\"
|
||||
--
|
||||
-- contains conversions from different codings to the internal
|
||||
-- unicode coding
|
||||
customUniCoding :: CustomData (String -> String)
|
||||
|
||||
-- | this is the way of selecting an item
|
||||
customOrDefault :: Options -> OptFun -> CustomData a -> a
|
||||
customOrDefault opts optfun db = maybe (defaultCustomVal db) id $
|
||||
customAsOptVal opts optfun db
|
||||
|
||||
-- | to produce menus of custom operations
|
||||
customInfo :: CustomData a -> (String, [String])
|
||||
customInfo c = (titleCustomData c, map (ciStr . fst) (dbCustomData c))
|
||||
|
||||
-------------------------------
|
||||
-- * types and stuff
|
||||
|
||||
type CommandId = String
|
||||
|
||||
strCI :: String -> CommandId
|
||||
strCI = id
|
||||
|
||||
ciStr :: CommandId -> String
|
||||
ciStr = id
|
||||
|
||||
ciOpt :: CommandId -> Option
|
||||
ciOpt = iOpt
|
||||
|
||||
newtype CustomData a = CustomData (String, [(CommandId,a)])
|
||||
|
||||
customData :: String -> [(CommandId, a)] -> CustomData a
|
||||
customData title db = CustomData (title,db)
|
||||
|
||||
dbCustomData :: CustomData a -> [(CommandId, a)]
|
||||
dbCustomData (CustomData (_,db)) = db
|
||||
|
||||
titleCustomData :: CustomData a -> String
|
||||
titleCustomData (CustomData (t,_)) = t
|
||||
|
||||
lookupCustom :: CustomData a -> CommandId -> Maybe a
|
||||
lookupCustom = flip lookup . dbCustomData
|
||||
|
||||
customAsOptVal :: Options -> OptFun -> CustomData a -> Maybe a
|
||||
customAsOptVal opts optfun db = do
|
||||
arg <- getOptVal opts optfun
|
||||
lookupCustom db (strCI arg)
|
||||
|
||||
-- | take the first entry from the database
|
||||
defaultCustomVal :: CustomData a -> a
|
||||
defaultCustomVal (CustomData (s,db)) =
|
||||
ifNull (error ("empty database:" +++ s)) (snd . head) db
|
||||
|
||||
-------------------------------------------------------------------------
|
||||
-- * and here's the customizable part:
|
||||
|
||||
-- grammar parsers: the ID is also used as file name suffix
|
||||
customGrammarParser =
|
||||
customData "Grammar parsers, selected by file name suffix" $
|
||||
[
|
||||
------ (strCI "gf", compileModule noOptions) -- DEFAULT
|
||||
-- add your own grammar parsers here
|
||||
]
|
||||
|
||||
|
||||
customGrammarPrinter =
|
||||
customData "Grammar printers, selected by option -printer=x" $
|
||||
[
|
||||
(strCI "gfc", \_ -> prCanon . stateGrammarST) -- DEFAULT
|
||||
,(strCI "gf", \_ -> err id prGrammar . canon2sourceGrammar . stateGrammarST)
|
||||
,(strCI "cf", \_ -> prCF . stateCF)
|
||||
,(strCI "old", \_ -> printGrammarOld . stateGrammarST)
|
||||
,(strCI "gsl", gslPrinter)
|
||||
,(strCI "jsgf", jsgfPrinter Nothing)
|
||||
,(strCI "jsgf_sisr_old", jsgfPrinter (Just SISR.SISROld))
|
||||
,(strCI "srgs_xml", srgsXmlPrinter Nothing False)
|
||||
,(strCI "srgs_xml_non_rec", srgsXmlNonRecursivePrinter)
|
||||
,(strCI "srgs_xml_prob", srgsXmlPrinter Nothing True)
|
||||
,(strCI "srgs_xml_sisr_old", srgsXmlPrinter (Just SISR.SISROld) False)
|
||||
,(strCI "srgs_abnf", srgsAbnfPrinter Nothing False)
|
||||
,(strCI "srgs_abnf_non_rec", srgsAbnfNonRecursivePrinter)
|
||||
,(strCI "srgs_abnf_sisr_old", srgsAbnfPrinter (Just SISR.SISROld) False)
|
||||
,(strCI "vxml", grammar2vxml)
|
||||
,(strCI "slf", slfPrinter)
|
||||
,(strCI "slf_graphviz", slfGraphvizPrinter)
|
||||
,(strCI "slf_sub", slfSubPrinter)
|
||||
,(strCI "slf_sub_graphviz", slfSubGraphvizPrinter)
|
||||
,(strCI "fa_graphviz", faGraphvizPrinter)
|
||||
,(strCI "fa_c", faCPrinter)
|
||||
,(strCI "regexp", regexpPrinter)
|
||||
,(strCI "regexps", multiRegexpPrinter)
|
||||
,(strCI "regular", regularPrinter)
|
||||
,(strCI "plbnf", \_ -> prLBNF True)
|
||||
,(strCI "lbnf", \_ -> prLBNF False)
|
||||
,(strCI "bnf", \_ -> prBNF False)
|
||||
,(strCI "absbnf", \_ -> abstract2bnf . stateGrammarST)
|
||||
,(strCI "haskell", \_ -> grammar2haskell . stateGrammarST)
|
||||
,(strCI "gfcc_haskell", \opts -> CCH.grammar2haskell .
|
||||
canon2gfcc opts . stateGrammarST)
|
||||
,(strCI "haskell_gadt", \_ -> grammar2haskellGADT . stateGrammarST)
|
||||
,(strCI "transfer", \_ -> grammar2transfer . stateGrammarST)
|
||||
,(strCI "morpho", \_ -> prMorpho . stateMorpho)
|
||||
,(strCI "fullform",\_ -> prFullForm . stateMorpho)
|
||||
,(strCI "opts", \_ -> prOpts . stateOptions)
|
||||
,(strCI "words", \_ -> unwords . stateGrammarWords)
|
||||
,(strCI "printnames", \_ -> C.prPrintnamesGrammar . stateGrammarST)
|
||||
,(strCI "stat", \_ -> prStatistics . stateGrammarST)
|
||||
,(strCI "probs", \_ -> prProbs . stateProbs)
|
||||
,(strCI "unpar", \_ -> prCanon . unparametrizeCanon . stateGrammarST)
|
||||
,(strCI "subs", \_ -> prSubtermStat . stateGrammarST)
|
||||
|
||||
{- ----
|
||||
(strCI "gf", prt . st2grammar . stateGrammarST) -- DEFAULT
|
||||
,(strCI "canon", showCanon "Lang" . stateGrammarST)
|
||||
,(strCI "gfc", GFC.showGFC . stateGrammarST)
|
||||
,(strCI "canonOpt",showCanonOpt "Lang" . stateGrammarST)
|
||||
-}
|
||||
|
||||
-- add your own grammar printers here
|
||||
|
||||
-- grammar conversions:
|
||||
,(strCI "mcfg", \_ -> Prt.prt . stateMCFG)
|
||||
,(strCI "fcfg", \_ -> Prt.prt . fst . stateFCFG)
|
||||
,(strCI "cfg", \_ -> Prt.prt . stateCFG)
|
||||
,(strCI "pinfo", \_ -> Prt.prt . statePInfo)
|
||||
,(strCI "abstract", \_ -> Prt.prtAfter "\n" . Cnv.gfc2abstract . stateGrammarLang)
|
||||
|
||||
,(strCI "functiongraph",\_ -> CnvTypeGraph.prtFunctionGraph . Cnv.gfc2simple noOptions . stateGrammarLang)
|
||||
,(strCI "typegraph", \_ -> CnvTypeGraph.prtTypeGraph . Cnv.gfc2simple noOptions . stateGrammarLang)
|
||||
|
||||
,(strCI "gfc-haskell", \_ -> CnvHaskell.prtSGrammar . uncurry Cnv.gfc2simple . stateGrammarLangOpts)
|
||||
,(strCI "mcfg-haskell", \_ -> CnvHaskell.prtMGrammar . stateMCFG)
|
||||
,(strCI "cfg-haskell", \_ -> CnvHaskell.prtCGrammar . stateCFG)
|
||||
,(strCI "gfc-prolog", \_ -> CnvProlog.prtSGrammar . uncurry Cnv.gfc2simple . stateGrammarLangOpts)
|
||||
,(strCI "mcfg-prolog", \_ -> CnvProlog.prtMGrammar . stateMCFG)
|
||||
,(strCI "cfg-prolog", \_ -> CnvProlog.prtCGrammar . stateCFG)
|
||||
|
||||
-- obsolete, or only for testing:
|
||||
,(strCI "abs-skvatt", \_ -> Cnv.abstract2skvatt . Cnv.gfc2abstract . stateGrammarLang)
|
||||
,(strCI "cfg-skvatt", \_ -> Cnv.cfg2skvatt . stateCFG)
|
||||
,(strCI "simple", \_ -> Prt.prt . uncurry Cnv.gfc2simple . stateGrammarLangOpts)
|
||||
,(strCI "mcfg-erasing", \_ -> Prt.prt . fst . snd . uncurry Cnv.convertGFC . stateGrammarLangOpts)
|
||||
-- ,(strCI "mcfg-old", PrtOld.prt . CnvOld.mcfg . statePInfoOld)
|
||||
-- ,(strCI "cfg-old", PrtOld.prt . CnvOld.cfg . statePInfoOld)
|
||||
]
|
||||
where stateGrammarLangOpts s = (stateOptions s, stateGrammarLang s)
|
||||
|
||||
customMultiGrammarPrinter =
|
||||
customData "Printers for multiple grammars, selected by option -printer=x" $
|
||||
[
|
||||
(strCI "gfcm", const MC.prCanon)
|
||||
,(strCI "gfcc", canon2gfccPr)
|
||||
,(strCI "js", \opts -> JS.gfcc2js . canon2gfcc opts)
|
||||
,(strCI "header", const (MC.prCanonMGr . unoptimizeCanon))
|
||||
,(strCI "cfgm", prCanonAsCFGM)
|
||||
,(strCI "graph", visualizeCanonGrammar)
|
||||
,(strCI "missing", const missingLinCanonGrammar)
|
||||
|
||||
-- to prolog format:
|
||||
,(strCI "gfc-prolog", CnvProlog.prtSMulti)
|
||||
,(strCI "mcfg-prolog", CnvProlog.prtMMulti)
|
||||
,(strCI "cfg-prolog", CnvProlog.prtCMulti)
|
||||
]
|
||||
|
||||
|
||||
customSyntaxPrinter =
|
||||
customData "Syntax printers, selected by option -printer=x" $
|
||||
[
|
||||
-- add your own grammar printers here
|
||||
]
|
||||
|
||||
|
||||
customTermPrinter =
|
||||
customData "Term printers, selected by option -printer=x" $
|
||||
[
|
||||
(strCI "gf", const prt) -- DEFAULT
|
||||
-- add your own term printers here
|
||||
]
|
||||
|
||||
customTermCommand =
|
||||
customData "Term transformers, selected by option -transform=x" $
|
||||
[
|
||||
(strCI "identity", \_ t -> [t]) -- DEFAULT
|
||||
,(strCI "compute", \g t -> let gr = grammar g in
|
||||
err (const [t]) return
|
||||
(exp2termCommand gr (computeAbsTerm gr) t))
|
||||
,(strCI "nodup", \_ t -> if (hasDupIdent $ tree2exp t) then [] else [t])
|
||||
,(strCI "nodupatom", \_ t -> if (hasDupAtom $ tree2exp t) then [] else [t])
|
||||
,(strCI "paraphrase", \g t -> let gr = grammar g in
|
||||
exp2termlistCommand gr (mkParaphrases gr) t)
|
||||
|
||||
,(strCI "generate", \g t -> let gr = grammar g
|
||||
cat = actCat $ tree2loc t --- not needed
|
||||
in
|
||||
[tr | t <- generateTrees noOptions gr cat 2 Nothing (Just t),
|
||||
Ok tr <- [annotate gr $ MM.qualifTerm (absId g) t]])
|
||||
,(strCI "typecheck", \g t -> err (const []) (return . loc2tree)
|
||||
(reCheckStateReject (grammar g) (tree2loc t)))
|
||||
,(strCI "solve", \g t -> err (const []) (return . loc2tree)
|
||||
(solveAll (grammar g) (tree2loc t)
|
||||
>>= rejectUnsolvable))
|
||||
,(strCI "context", \g t -> err (const [t]) (return . loc2tree)
|
||||
(contextRefinements (grammar g) (tree2loc t)))
|
||||
,(strCI "reindex", \g t -> let gr = grammar g in
|
||||
err (const [t]) return
|
||||
(exp2termCommand gr (return . MM.reindexTerm) t))
|
||||
--- ,(strCI "delete", \g t -> [MM.mExp0])
|
||||
-- add your own term commands here
|
||||
]
|
||||
|
||||
customEditCommand =
|
||||
customData "Editor state transformers, selected by option -edit=x" $
|
||||
[
|
||||
(strCI "identity", const return) -- DEFAULT
|
||||
,(strCI "typecheck", \g -> reCheckState (grammar g))
|
||||
,(strCI "solve", \g -> solveAll (grammar g))
|
||||
,(strCI "context", \g -> contextRefinements (grammar g))
|
||||
,(strCI "compute", \g -> computeSubTree (grammar g))
|
||||
,(strCI "paraphrase", const return) --- done ad hoc on top level
|
||||
,(strCI "generate", const return) --- done ad hoc on top level
|
||||
,(strCI "transfer", const return) --- done ad hoc on top level
|
||||
-- add your own edit commands here
|
||||
]
|
||||
|
||||
customStringCommand =
|
||||
customData "String filters, selected by option -filter=x" $
|
||||
[
|
||||
(strCI "identity", const $ id) -- DEFAULT
|
||||
,(strCI "erase", const $ const "")
|
||||
,(strCI "take100", const $ take 100)
|
||||
,(strCI "text", const $ formatAsText)
|
||||
,(strCI "code", const $ formatAsCode)
|
||||
---- ,(strCI "latexfile", const $ mkLatexFile)
|
||||
,(strCI "length", const $ show . length)
|
||||
-- add your own string commands here
|
||||
]
|
||||
|
||||
customParser =
|
||||
customData "Parsers, selected by option -parser=x" $
|
||||
[
|
||||
(strCI "chart", PCFOld.parse "ibn" . stateCF) -- DEPRECATED
|
||||
,(strCI "bottomup", PCF.parse "gb" . stateCF)
|
||||
,(strCI "topdown", PCF.parse "gt" . stateCF)
|
||||
-- commented for now, since there's a bug in the incremental algorithm:
|
||||
-- ,(strCI "incremental", PCF.parse "ib" . stateCF)
|
||||
-- ,(strCI "incremental-bottomup", PCF.parse "ib" . stateCF)
|
||||
-- ,(strCI "incremental-topdown", PCF.parse "it" . stateCF)
|
||||
,(strCI "old", chartParser . stateCF) -- DEPRECATED
|
||||
,(strCI "myparser", myParser)
|
||||
-- add your own parsers here
|
||||
]
|
||||
|
||||
customTokenizer =
|
||||
let sg = singleton in
|
||||
customData "Tokenizers, selected by option -lexer=x" $
|
||||
[
|
||||
(strCI "words", const $ sg . tokWords)
|
||||
,(strCI "literals", const $ sg . tokLits)
|
||||
,(strCI "vars", const $ sg . tokVars)
|
||||
,(strCI "chars", const $ sg . map (tS . singleton))
|
||||
,(strCI "code", const $ sg . lexHaskell)
|
||||
,(strCI "codevars", \gr -> sg . (lexHaskellVar $ stateIsWord gr))
|
||||
,(strCI "textvars", \gr -> sg . (lexTextVar $ stateIsWord gr))
|
||||
,(strCI "text", const $ sg . lexText)
|
||||
,(strCI "unglue", \gr -> sg . map tS . decomposeWords (stateMorpho gr))
|
||||
,(strCI "codelit", \gr -> sg . (lexHaskellLiteral $ stateIsWord gr))
|
||||
,(strCI "textlit", \gr -> sg . (lexTextLiteral $ stateIsWord gr))
|
||||
,(strCI "codeC", const $ sg . lexC2M)
|
||||
,(strCI "ignore", \gr -> sg . lexIgnore (stateIsWord gr) . tokLits)
|
||||
,(strCI "subseqs", \gr -> subSequences . lexIgnore (stateIsWord gr) . tokLits)
|
||||
,(strCI "codeCHigh", const $ sg . lexC2M' True)
|
||||
-- add your own tokenizers here
|
||||
]
|
||||
|
||||
customUntokenizer =
|
||||
customData "Untokenizers, selected by option -unlexer=x" $
|
||||
[
|
||||
(strCI "unwords", const $ id) -- DEFAULT
|
||||
,(strCI "text", const $ formatAsText)
|
||||
,(strCI "html", const $ formatAsHTML)
|
||||
,(strCI "latex", const $ formatAsLatex)
|
||||
,(strCI "code", const $ formatAsCode)
|
||||
,(strCI "concat", const $ filter (not . isSpace))
|
||||
,(strCI "textlit", const $ formatAsTextLit)
|
||||
,(strCI "codelit", const $ formatAsCodeLit)
|
||||
,(strCI "concat", const $ concatRemSpace)
|
||||
,(strCI "glue", const $ performBinds)
|
||||
,(strCI "finnish", const $ performBindsFinnish)
|
||||
,(strCI "reverse", const $ reverse)
|
||||
,(strCI "bind", const $ performBinds) -- backward compat
|
||||
-- add your own untokenizers here
|
||||
]
|
||||
|
||||
customUniCoding =
|
||||
customData "Alphabet codings, selected by option -coding=x" $
|
||||
[
|
||||
(strCI "latin1", id) -- DEFAULT
|
||||
,(strCI "utf8", decodeUTF8)
|
||||
,(strCI "greek", treat [] mkGreek)
|
||||
,(strCI "hebrew", mkHebrew)
|
||||
,(strCI "arabic", mkArabic)
|
||||
,(strCI "russian", treat [] mkRussian)
|
||||
,(strCI "russianKOI8", mkRusKOI8)
|
||||
,(strCI "ethiopic", mkEthiopic)
|
||||
,(strCI "tamil", mkTamil)
|
||||
,(strCI "OCScyrillic", mkOCSCyrillic)
|
||||
,(strCI "devanagari", mkDevanagari)
|
||||
,(strCI "latinasupplement", mkLatinASupplement)
|
||||
,(strCI "japanese", mkJapanese)
|
||||
,(strCI "arabic0600", mkArabic0600)
|
||||
,(strCI "extendedarabic", mkExtendedArabic)
|
||||
,(strCI "extradiacritics", mkExtraDiacritics)
|
||||
]
|
||||
435
src-3.0/GF/UseGrammar/Editing.hs
Normal file
435
src-3.0/GF/UseGrammar/Editing.hs
Normal file
@@ -0,0 +1,435 @@
|
||||
----------------------------------------------------------------------
|
||||
-- |
|
||||
-- Module : Editing
|
||||
-- Maintainer : AR
|
||||
-- Stability : (stable)
|
||||
-- Portability : (portable)
|
||||
--
|
||||
-- > CVS $Date: 2005/04/21 16:23:45 $
|
||||
-- > CVS $Author: bringert $
|
||||
-- > CVS $Revision: 1.14 $
|
||||
--
|
||||
-- generic tree editing, with some grammar notions assumed. AR 18\/8\/2001.
|
||||
-- 19\/6\/2003 for GFC
|
||||
-----------------------------------------------------------------------------
|
||||
|
||||
module GF.UseGrammar.Editing where
|
||||
|
||||
import GF.Grammar.Abstract
|
||||
import qualified GF.Canon.GFC as GFC
|
||||
import GF.Grammar.TypeCheck
|
||||
import GF.Grammar.LookAbs
|
||||
import GF.Grammar.AbsCompute
|
||||
import GF.Grammar.Macros (errorCat)
|
||||
|
||||
import GF.Data.Operations
|
||||
import GF.Data.Zipper
|
||||
|
||||
-- generic tree editing, with some grammar notions assumed. AR 18/8/2001
|
||||
-- 19/6/2003 for GFC
|
||||
|
||||
type CGrammar = GFC.CanonGrammar
|
||||
|
||||
type State = Loc TrNode
|
||||
|
||||
-- | the "empty" state
|
||||
initState :: State
|
||||
initState = tree2loc uTree
|
||||
|
||||
isRootState :: State -> Bool
|
||||
isRootState s = case actPath s of
|
||||
Top -> True
|
||||
_ -> False
|
||||
|
||||
actTree :: State -> Tree
|
||||
actTree (Loc (t,_)) = t
|
||||
|
||||
actPath :: State -> Path TrNode
|
||||
actPath (Loc (_,p)) = p
|
||||
|
||||
actVal :: State -> Val
|
||||
actVal = valNode . nodeTree . actTree
|
||||
|
||||
actCat :: State -> Cat
|
||||
actCat = errVal errorCat . val2cat . actVal ---- undef
|
||||
|
||||
actAtom :: State -> Atom
|
||||
actAtom = atomTree . actTree
|
||||
|
||||
actFun :: State -> Err Fun
|
||||
actFun s = case actAtom s of
|
||||
AtC f -> return f
|
||||
t -> prtBad "active atom: expected function, found" t
|
||||
|
||||
actExp :: State -> Exp
|
||||
actExp = tree2exp . actTree
|
||||
|
||||
-- | current local bindings
|
||||
actBinds :: State -> Binds
|
||||
actBinds = bindsNode . nodeTree . actTree
|
||||
|
||||
-- | constraints in current subtree
|
||||
actConstrs :: State -> Constraints
|
||||
actConstrs = allConstrsTree . actTree
|
||||
|
||||
-- | constraints in the whole tree
|
||||
allConstrs :: State -> Constraints
|
||||
allConstrs = allConstrsTree . loc2tree
|
||||
|
||||
-- | metas in current subtree
|
||||
actMetas :: State -> [Meta]
|
||||
actMetas = metasTree . actTree
|
||||
|
||||
-- | metas in the whole tree
|
||||
allMetas :: State -> [Meta]
|
||||
allMetas = metasTree . loc2tree
|
||||
|
||||
actTreeBody :: State -> Tree
|
||||
actTreeBody = bodyTree . actTree
|
||||
|
||||
allPrevBinds :: State -> Binds
|
||||
allPrevBinds = concatMap bindsNode . traverseCollect . actPath
|
||||
|
||||
allBinds :: State -> Binds
|
||||
allBinds s = actBinds s ++ allPrevBinds s
|
||||
|
||||
actGen :: State -> Int
|
||||
actGen = length . allBinds -- symbol generator for VGen
|
||||
|
||||
allPrevVars :: State -> [Var]
|
||||
allPrevVars = map fst . allPrevBinds
|
||||
|
||||
allVars :: State -> [Var]
|
||||
allVars = map fst . allBinds
|
||||
|
||||
vGenIndex :: State -> Int
|
||||
vGenIndex = length . allBinds
|
||||
|
||||
actIsMeta :: State -> Bool
|
||||
actIsMeta = atomIsMeta . actAtom
|
||||
|
||||
actMeta :: State -> Err Meta
|
||||
actMeta = getMetaAtom . actAtom
|
||||
|
||||
-- | meta substs are not only on the actual path...
|
||||
entireMetaSubst :: State -> MetaSubst
|
||||
entireMetaSubst = concatMap metaSubstsNode . scanTree . loc2tree
|
||||
|
||||
isCompleteTree :: Tree -> Bool
|
||||
isCompleteTree = null . filter atomIsMeta . map atomNode . scanTree
|
||||
|
||||
isCompleteState :: State -> Bool
|
||||
isCompleteState = isCompleteTree . loc2tree
|
||||
|
||||
initStateCat :: Context -> Cat -> Err State
|
||||
initStateCat cont cat = do
|
||||
return $ tree2loc (Tr (mkNode [] mAtom (cat2val cont cat) ([],[]), []))
|
||||
|
||||
-- | this function only concerns the body of an expression...
|
||||
annotateInState :: CGrammar -> Exp -> State -> Err Tree
|
||||
annotateInState gr exp state = do
|
||||
let binds = allBinds state
|
||||
val = actVal state
|
||||
annotateIn gr binds exp (Just val)
|
||||
|
||||
-- | ...whereas this one works with lambda abstractions
|
||||
annotateExpInState :: CGrammar -> Exp -> State -> Err Tree
|
||||
annotateExpInState gr exp state = do
|
||||
let cont = allPrevBinds state
|
||||
binds = actBinds state
|
||||
val = actVal state
|
||||
typ <- mkProdVal binds val
|
||||
annotateIn gr binds exp (Just typ)
|
||||
|
||||
treeByExp :: (Exp -> Err Exp) -> CGrammar -> Exp -> State -> Err Tree
|
||||
treeByExp trans gr exp0 state = do
|
||||
exp <- trans exp0
|
||||
annotateExpInState gr exp state
|
||||
|
||||
-- * actions
|
||||
|
||||
type Action = State -> Err State
|
||||
|
||||
newCat :: CGrammar -> Cat -> Action
|
||||
newCat gr cat@(m,c) _ = do
|
||||
cont <- lookupCatContext gr m c
|
||||
testErr (null cont) "start cat must have null context" -- for easier meta refresh
|
||||
initStateCat cont cat
|
||||
|
||||
newFun :: CGrammar -> Fun -> Action
|
||||
newFun gr fun@(m,c) _ = do
|
||||
typ <- lookupFunType gr m c
|
||||
cat <- valCat typ
|
||||
st1 <- newCat gr cat initState
|
||||
refineWithAtom True gr (qq fun) st1
|
||||
|
||||
newTree :: Tree -> Action
|
||||
newTree t _ = return $ tree2loc t
|
||||
|
||||
newExpTC :: CGrammar -> Exp -> Action
|
||||
newExpTC gr t s = annotate gr (refreshMetas [] t) >>= flip newTree s
|
||||
|
||||
goNextMeta, goPrevMeta, goNextNewMeta, goPrevNewMeta, goNextMetaIfCan :: Action
|
||||
|
||||
goNextMeta = repeatUntilErr actIsMeta goAhead -- can be the location itself
|
||||
goPrevMeta = repeatUntilErr actIsMeta goBack
|
||||
|
||||
goNextNewMeta s = goAhead s >>= goNextMeta -- always goes away from location
|
||||
goPrevNewMeta s = goBack s >>= goPrevMeta
|
||||
|
||||
goNextMetaIfCan = actionIfPossible goNextMeta
|
||||
|
||||
actionIfPossible :: Action -> Action
|
||||
actionIfPossible a s = return $ errVal s (a s)
|
||||
|
||||
goFirstMeta, goLastMeta :: Action
|
||||
goFirstMeta s = goNextMeta $ goRoot s
|
||||
goLastMeta s = goLast s >>= goPrevMeta
|
||||
|
||||
noMoreMetas :: State -> Bool
|
||||
noMoreMetas = err (const True) (const False) . goNextMeta
|
||||
|
||||
replaceSubTree :: Tree -> Action
|
||||
replaceSubTree tree state = changeLoc state tree
|
||||
|
||||
refineOrReplaceWithTree :: Bool -> CGrammar -> Tree -> Action
|
||||
refineOrReplaceWithTree der gr tree state = case actMeta state of
|
||||
Ok m -> refineWithTreeReal der gr tree m state
|
||||
_ -> do
|
||||
let tree1 = addBinds (actBinds state) $ tree
|
||||
state' <- replaceSubTree tree1 state
|
||||
reCheckState gr state'
|
||||
|
||||
refineWithTree :: Bool -> CGrammar -> Tree -> Action
|
||||
refineWithTree der gr tree state = do
|
||||
m <- errIn "move pointer to meta" $ actMeta state
|
||||
refineWithTreeReal der gr tree m state
|
||||
|
||||
refineWithTreeReal :: Bool -> CGrammar -> Tree -> Meta -> Action
|
||||
refineWithTreeReal der gr tree m state = do
|
||||
state' <- replaceSubTree tree state
|
||||
let cs0 = allConstrs state'
|
||||
(cs,ms) = splitConstraints gr cs0
|
||||
v = vClos $ tree2exp (bodyTree tree)
|
||||
msubst = (m,v) : ms
|
||||
metaSubstRefinements gr msubst $
|
||||
mapLoc (reduceConstraintsNode gr . performMetaSubstNode msubst) state'
|
||||
|
||||
-- without dep. types, no constraints, no grammar needed - simply: do
|
||||
-- testErr (actIsMeta state) "move pointer to meta"
|
||||
-- replaceSubTree tree state
|
||||
|
||||
refineAllNodes :: Action -> Action
|
||||
refineAllNodes act state = do
|
||||
let estate0 = goFirstMeta state
|
||||
case estate0 of
|
||||
Bad _ -> return state
|
||||
Ok state0 -> do
|
||||
(state',n) <- tryRefine 0 state0
|
||||
if n==0
|
||||
then return state
|
||||
else actionIfPossible goFirstMeta state'
|
||||
where
|
||||
tryRefine n state = err (const $ return (state,n)) return $ do
|
||||
state' <- goNextMeta state
|
||||
meta <- actMeta state'
|
||||
case act state' of
|
||||
Ok state2 -> tryRefine (n+1) state2
|
||||
_ -> err (const $ return (state',n)) return $ do
|
||||
state2 <- goNextNewMeta state'
|
||||
tryRefine n state2
|
||||
|
||||
uniqueRefinements :: CGrammar -> Action
|
||||
uniqueRefinements = refineAllNodes . uniqueRefine
|
||||
|
||||
metaSubstRefinements :: CGrammar -> MetaSubst -> Action
|
||||
metaSubstRefinements gr = refineAllNodes . metaSubstRefine gr
|
||||
|
||||
contextRefinements :: CGrammar -> Action
|
||||
contextRefinements gr = refineAllNodes contextRefine where
|
||||
contextRefine state = case varRefinementsState state of
|
||||
[(e,_)] -> refineWithAtom False gr e state
|
||||
_ -> Bad "no unique refinement in context"
|
||||
varRefinementsState state =
|
||||
[r | r@(e,_) <- refinementsState gr state, isVariable e]
|
||||
|
||||
uniqueRefine :: CGrammar -> Action
|
||||
uniqueRefine gr state = case refinementsState gr state of
|
||||
[(e,(_,True))] -> Bad "only circular refinement"
|
||||
[(e,_)] -> refineWithAtom False gr e state
|
||||
_ -> Bad "no unique refinement"
|
||||
|
||||
metaSubstRefine :: CGrammar -> MetaSubst -> Action
|
||||
metaSubstRefine gr msubst state = do
|
||||
m <- errIn "move pointer to meta" $ actMeta state
|
||||
case lookup m msubst of
|
||||
Just v -> do
|
||||
e <- val2expSafe v
|
||||
refineWithExpTC False gr e state
|
||||
_ -> Bad "no metavariable substitution available"
|
||||
|
||||
refineWithExpTC :: Bool -> CGrammar -> Exp -> Action
|
||||
refineWithExpTC der gr exp0 state = do
|
||||
let oldmetas = allMetas state
|
||||
exp = refreshMetas oldmetas exp0
|
||||
tree0 <- annotateInState gr exp state
|
||||
let tree = addBinds (actBinds state) $ tree0
|
||||
refineWithTree der gr tree state
|
||||
|
||||
refineWithAtom :: Bool -> CGrammar -> Ref -> Action -- function or variable
|
||||
refineWithAtom der gr at state = do
|
||||
val <- lookupRef gr (allBinds state) at
|
||||
typ <- val2exp val
|
||||
let oldvars = allVars state
|
||||
exp <- ref2exp oldvars typ at
|
||||
refineWithExpTC der gr exp state
|
||||
|
||||
-- | in this command, we know that the result is well-typed, since computation
|
||||
-- rules have been type checked and the result is equal
|
||||
computeSubTree :: CGrammar -> Action
|
||||
computeSubTree gr state = do
|
||||
let exp = tree2exp (actTree state)
|
||||
tree <- treeByExp (compute gr) gr exp state
|
||||
replaceSubTree tree state
|
||||
|
||||
-- | but here we don't, since the transfer flag isn't type checked,
|
||||
-- and computing the transfer function is not checked to preserve equality
|
||||
transferSubTree :: Maybe Fun -> CGrammar -> Action
|
||||
transferSubTree Nothing _ s = return s
|
||||
transferSubTree (Just fun) gr state = do
|
||||
let exp = mkApp (qq fun) [tree2exp $ actTree state]
|
||||
tree <- treeByExp (compute gr) gr exp state
|
||||
state' <- replaceSubTree tree state
|
||||
reCheckState gr state'
|
||||
|
||||
deleteSubTree :: CGrammar -> Action
|
||||
deleteSubTree gr state =
|
||||
if isRootState state
|
||||
then do
|
||||
let cat = actCat state
|
||||
newCat gr cat state
|
||||
else do
|
||||
let metas = allMetas state
|
||||
binds = actBinds state
|
||||
exp = refreshMetas metas mExp0
|
||||
tree <- annotateInState gr exp state
|
||||
state' <- replaceSubTree (addBinds binds tree) state
|
||||
reCheckState gr state' --- must be unfortunately done. 20/11/2001
|
||||
|
||||
wrapWithFun :: CGrammar -> (Fun,Int) -> Action
|
||||
wrapWithFun gr (f@(m,c),i) state = do
|
||||
typ <- lookupFunType gr m c
|
||||
let olds = allPrevVars state
|
||||
oldmetas = allMetas state
|
||||
exp0 <- fun2wrap olds ((f,i),typ) (tree2exp (actTreeBody state))
|
||||
let exp = refreshMetas oldmetas exp0
|
||||
tree0 <- annotateInState gr exp state
|
||||
let tree = addBinds (actBinds state) $ tree0
|
||||
state' <- replaceSubTree tree state
|
||||
reCheckState gr state' --- must be unfortunately done. 20/11/2001
|
||||
|
||||
alphaConvert :: CGrammar -> (Var,Var) -> Action
|
||||
alphaConvert gr (x,x') state = do
|
||||
let oldvars = allPrevVars state
|
||||
testErr (notElem x' oldvars) ("clash with previous bindings" +++ show x')
|
||||
let binds0 = actBinds state
|
||||
vars0 = map fst binds0
|
||||
testErr (notElem x' vars0) ("clash with other bindings" +++ show x')
|
||||
let binds = [(if z==x then x' else z, t) | (z,t) <- binds0]
|
||||
vars = map fst binds
|
||||
exp' <- alphaConv (vars ++ oldvars) (x,x') (tree2exp (actTreeBody state))
|
||||
let exp = mkAbs vars exp'
|
||||
tree <- annotateExpInState gr exp state
|
||||
replaceSubTree tree state
|
||||
|
||||
changeFunHead :: CGrammar -> Fun -> Action
|
||||
changeFunHead gr f state = do
|
||||
let state' = changeNode (changeAtom (const (atomC f))) state
|
||||
reCheckState gr state' --- must be done because of constraints elsewhere
|
||||
|
||||
peelFunHead :: CGrammar -> (Fun,Int) -> Action
|
||||
peelFunHead gr (f@(m,c),i) state = do
|
||||
tree0 <- nthSubtree i $ actTree state
|
||||
let tree = addBinds (actBinds state) $ tree0
|
||||
state' <- replaceSubTree tree state
|
||||
reCheckState gr state' --- must be unfortunately done. 20/11/2001
|
||||
|
||||
-- | an expensive operation
|
||||
reCheckState :: CGrammar -> State -> Err State
|
||||
reCheckState gr st = annotate gr (tree2exp (loc2tree st)) >>= return . tree2loc
|
||||
|
||||
-- | a variant that returns Bad instead of a tree with unsolvable constraints
|
||||
reCheckStateReject :: CGrammar -> State -> Err State
|
||||
reCheckStateReject gr st = do
|
||||
st' <- reCheckState gr st
|
||||
rejectUnsolvable st'
|
||||
|
||||
rejectUnsolvable :: State -> Err State
|
||||
rejectUnsolvable st = case (constrsNode $ nodeTree $ actTree st) of
|
||||
[] -> return st
|
||||
cs -> Bad $ "Unsolvable constraints:" +++ prConstraints cs
|
||||
|
||||
-- | extract metasubstitutions from constraints and solve them
|
||||
solveAll :: CGrammar -> State -> Err State
|
||||
solveAll gr st = solve st >>= solve where
|
||||
solve st0 = do ---- why need twice?
|
||||
st <- reCheckState gr st0
|
||||
let cs0 = allConstrs st
|
||||
(cs,ms) = splitConstraints gr cs0
|
||||
metaSubstRefinements gr ms $
|
||||
mapLoc (reduceConstraintsNode gr . performMetaSubstNode ms) st
|
||||
|
||||
-- * active refinements
|
||||
|
||||
refinementsState :: CGrammar -> State -> [(Term,(Val,Bool))]
|
||||
refinementsState gr state =
|
||||
let filt = possibleRefVal gr state in
|
||||
if actIsMeta state
|
||||
then refsForType filt gr (allBinds state) (actVal state)
|
||||
else []
|
||||
|
||||
wrappingsState :: CGrammar -> State -> [((Fun,Int),Type)]
|
||||
wrappingsState gr state
|
||||
| actIsMeta state = []
|
||||
| isRootState state = funs
|
||||
| otherwise = [rule | rule@(_,typ) <- funs, possibleRefVal gr state aval typ]
|
||||
where
|
||||
funs = funsOnType (possibleRefVal gr state) gr aval
|
||||
aval = actVal state
|
||||
|
||||
peelingsState :: CGrammar -> State -> [(Fun,Int)]
|
||||
peelingsState gr state
|
||||
| actIsMeta state = []
|
||||
| isRootState state =
|
||||
err (const []) (\f -> [(f,i) | i <- [0 .. arityTree tree - 1]]) $ actFun state
|
||||
| otherwise =
|
||||
err (const [])
|
||||
(\f -> [fi | (fi@(g,_),typ) <- funs,
|
||||
possibleRefVal gr state aval typ,g==f]) $ actFun state
|
||||
where
|
||||
funs = funsOnType (possibleRefVal gr state) gr aval
|
||||
aval = actVal state
|
||||
tree = actTree state
|
||||
|
||||
headChangesState :: CGrammar -> State -> [Fun]
|
||||
headChangesState gr state = errVal [] $ do
|
||||
f@(m,c) <- funAtom (actAtom state)
|
||||
typ0 <- lookupFunType gr m c
|
||||
return [fun | (fun,typ) <- funRulesOf gr, fun /= f, typ == typ0]
|
||||
--- alpha-conv !
|
||||
|
||||
possibleRefVal :: CGrammar -> State -> Val -> Type -> Bool
|
||||
possibleRefVal gr state val typ = errVal True $ do --- was False
|
||||
vtyp <- valType typ
|
||||
let gen = actGen state
|
||||
cs <- return [(val, vClos vtyp)] --- eqVal gen val (vClos vtyp) --- only poss cs
|
||||
return $ possibleConstraints gr cs --- a simple heuristic
|
||||
|
||||
possibleTreeVal :: CGrammar -> State -> Tree -> Bool
|
||||
possibleTreeVal gr state tree = errVal True $ do --- was False
|
||||
let aval = actVal state
|
||||
let gval = valTree tree
|
||||
let gen = actGen state
|
||||
cs <- return [(aval, gval)] --- eqVal gen val (vClos vtyp) --- only poss cs
|
||||
return $ possibleConstraints gr cs --- a simple heuristic
|
||||
|
||||
116
src-3.0/GF/UseGrammar/Generate.hs
Normal file
116
src-3.0/GF/UseGrammar/Generate.hs
Normal file
@@ -0,0 +1,116 @@
|
||||
----------------------------------------------------------------------
|
||||
-- |
|
||||
-- Module : Generate
|
||||
-- Maintainer : AR
|
||||
-- Stability : (stable)
|
||||
-- Portability : (portable)
|
||||
--
|
||||
-- > CVS $Date: 2005/10/12 12:38:30 $
|
||||
-- > CVS $Author: aarne $
|
||||
-- > CVS $Revision: 1.16 $
|
||||
--
|
||||
-- Generate all trees of given category and depth. AR 30\/4\/2004
|
||||
--
|
||||
-- (c) Aarne Ranta 2004 under GNU GPL
|
||||
--
|
||||
-- Purpose: to generate corpora. We use simple types and don't
|
||||
-- guarantee the correctness of bindings\/dependences.
|
||||
-----------------------------------------------------------------------------
|
||||
|
||||
module GF.UseGrammar.Generate (generateTrees,generateAll) where
|
||||
|
||||
import GF.Canon.GFC
|
||||
import GF.Grammar.LookAbs
|
||||
import GF.Grammar.PrGrammar
|
||||
import GF.Grammar.Macros
|
||||
import GF.Grammar.Values
|
||||
import GF.Grammar.Grammar (Cat)
|
||||
import GF.Grammar.SGrammar
|
||||
import GF.Data.Operations
|
||||
import GF.Data.Zipper
|
||||
import GF.Infra.Option
|
||||
import Data.List
|
||||
|
||||
-- Generate all trees of given category and depth. AR 30/4/2004
|
||||
-- (c) Aarne Ranta 2004 under GNU GPL
|
||||
--
|
||||
-- Purpose: to generate corpora. We use simple types and don't
|
||||
-- guarantee the correctness of bindings/dependences.
|
||||
|
||||
|
||||
-- | the main function takes an abstract syntax and returns a list of trees
|
||||
generateTrees ::
|
||||
Options -> GFCGrammar -> Cat -> Int -> Maybe Int -> Maybe Tree -> [Exp]
|
||||
generateTrees opts gr cat n mn mt = map str2tr $ generate gr' opts cat' n mn mt'
|
||||
where
|
||||
gr' = gr2sgr opts emptyProbs gr
|
||||
cat' = prt $ snd cat
|
||||
mt' = maybe Nothing (return . tr2str) mt
|
||||
--- ifm = oElem withMetas opts
|
||||
ifm = oElem showOld opts
|
||||
|
||||
generateAll :: Options -> (Exp -> IO ()) -> GFCGrammar -> Cat -> IO ()
|
||||
generateAll opts io gr cat = mapM_ (io . str2tr) $ num $ gen cat'
|
||||
where
|
||||
num = optIntOrAll opts flagNumber
|
||||
gr' = gr2sgr opts emptyProbs gr
|
||||
cat' = prt $ snd cat
|
||||
gen c = generate gr' opts c 10 Nothing Nothing
|
||||
|
||||
|
||||
|
||||
------------------------------------------
|
||||
-- do the main thing with a simpler data structure
|
||||
-- the first Int gives tree depth, the second constrains subtrees
|
||||
-- chosen for each branch. A small number, such as 2, is a good choice
|
||||
-- if the depth is large (more than 3)
|
||||
-- If a tree is given as argument, generation concerns its metavariables.
|
||||
|
||||
generate :: SGrammar -> Options -> SCat -> Int -> Maybe Int -> Maybe STree -> [STree]
|
||||
generate gr opts cat i mn mt = case mt of
|
||||
Nothing -> gen opts cat
|
||||
Just t -> genM t
|
||||
where
|
||||
--- now use ifm to choose between two algorithms
|
||||
gen opts cat
|
||||
| oElem (iOpt "mem") opts = concat $ errVal [] $ lookupTree id cat $ allTrees -- -old
|
||||
| oElem (iOpt "nonub") opts = concatMap (\i -> gener i cat) [0..i-1] -- some duplicates
|
||||
| otherwise = nub $ concatMap (\i -> gener i cat) [0..i-1] -- new
|
||||
|
||||
gener 0 c = [SApp (f, []) | (f,([],_)) <- funs c]
|
||||
gener i c = [
|
||||
tr |
|
||||
(f,(cs,_)) <- funs c,
|
||||
let alts = map (gener (i-1)) cs,
|
||||
ts <- combinations alts,
|
||||
let tr = SApp (f, ts)
|
||||
-- depth tr >= i -- NO!
|
||||
]
|
||||
|
||||
allTrees = genAll i
|
||||
|
||||
-- dynamic generation
|
||||
genAll :: Int -> BinTree SCat [[STree]]
|
||||
genAll i = iter i genNext (mapTree (\ (c,_) -> (c,[[]])) gr)
|
||||
|
||||
iter 0 f tr = tr
|
||||
iter n f tr = iter (n-1) f (f tr)
|
||||
|
||||
genNext tr = mapTree (genNew tr) tr
|
||||
|
||||
genNew tr (cat,ts) = let size = length ts in
|
||||
(cat, [SApp (f, xs) |
|
||||
(f,(cs,_)) <- funs cat,
|
||||
xs <- combinations (map look cs),
|
||||
let fxs = SApp (f, xs),
|
||||
depth fxs == size]
|
||||
: ts)
|
||||
where
|
||||
look c = concat $ errVal [] $ lookupTree id c tr
|
||||
|
||||
funs cat = maybe id take mn $ errVal [] $ lookupTree id cat gr
|
||||
|
||||
genM t = case t of
|
||||
SApp (f,ts) -> [SApp (f,ts') | ts' <- combinations (map genM ts)]
|
||||
SMeta k -> gen opts k
|
||||
_ -> [t]
|
||||
74
src-3.0/GF/UseGrammar/GetTree.hs
Normal file
74
src-3.0/GF/UseGrammar/GetTree.hs
Normal file
@@ -0,0 +1,74 @@
|
||||
----------------------------------------------------------------------
|
||||
-- |
|
||||
-- Module : GetTree
|
||||
-- Maintainer : AR
|
||||
-- Stability : (stable)
|
||||
-- Portability : (portable)
|
||||
--
|
||||
-- > CVS $Date: 2005/09/15 16:22:02 $
|
||||
-- > CVS $Author: aarne $
|
||||
-- > CVS $Revision: 1.9 $
|
||||
--
|
||||
-- how to form linearizable trees from strings and from terms of different levels
|
||||
--
|
||||
-- 'String' --> raw 'Term' --> annot, qualif 'Term' --> 'Tree'
|
||||
-----------------------------------------------------------------------------
|
||||
|
||||
module GF.UseGrammar.GetTree where
|
||||
|
||||
import GF.Canon.GFC
|
||||
import GF.Grammar.Values
|
||||
import qualified GF.Grammar.Grammar as G
|
||||
import GF.Infra.Ident
|
||||
import GF.Grammar.MMacros
|
||||
import GF.Grammar.Macros
|
||||
import GF.Compile.Rename
|
||||
import GF.Grammar.TypeCheck
|
||||
import GF.Grammar.AbsCompute (beta)
|
||||
import GF.Compile.PGrammar
|
||||
import GF.Compile.ShellState
|
||||
|
||||
import GF.Data.Operations
|
||||
|
||||
import Data.Char
|
||||
|
||||
-- how to form linearizable trees from strings and from terms of different levels
|
||||
--
|
||||
-- String --> raw Term --> annot, qualif Term --> Tree
|
||||
|
||||
string2tree :: StateGrammar -> String -> Tree
|
||||
string2tree gr = errVal uTree . string2treeErr gr
|
||||
|
||||
string2treeErr :: StateGrammar -> String -> Err Tree
|
||||
string2treeErr _ "" = Bad "empty string"
|
||||
string2treeErr gr s = do
|
||||
t <- pTerm s
|
||||
let t0 = beta [] t
|
||||
let t1 = refreshMetas [] t0
|
||||
let t2 = qualifTerm abstr t1
|
||||
annotate grc t2
|
||||
where
|
||||
abstr = absId gr
|
||||
grc = grammar gr
|
||||
|
||||
string2Cat, string2Fun :: StateGrammar -> String -> (Ident,Ident)
|
||||
string2Cat gr c = (absId gr,identC c)
|
||||
string2Fun = string2Cat
|
||||
|
||||
strings2Cat, strings2Fun :: String -> (Ident,Ident)
|
||||
strings2Cat s = (identC m, identC (drop 1 c)) where (m,c) = span (/= '.') s
|
||||
strings2Fun = strings2Cat
|
||||
|
||||
string2ref :: StateGrammar -> String -> Err G.Term
|
||||
string2ref gr s = case s of
|
||||
'x':'_':ds -> return $ freshAsTerm ds --- hack for generated vars
|
||||
'"':_:_ -> return $ G.K $ init $ tail s
|
||||
_:_ | all isDigit s -> return $ G.EInt $ read s
|
||||
_ | elem '.' s -> return $ uncurry G.Q $ strings2Fun s
|
||||
_ -> return $ G.Vr $ identC s
|
||||
|
||||
string2cat :: StateGrammar -> String -> Err G.Cat
|
||||
string2cat gr s =
|
||||
if elem '.' s
|
||||
then return $ strings2Fun s
|
||||
else return $ curry id (absId gr) (identC s)
|
||||
162
src-3.0/GF/UseGrammar/Information.hs
Normal file
162
src-3.0/GF/UseGrammar/Information.hs
Normal file
@@ -0,0 +1,162 @@
|
||||
----------------------------------------------------------------------
|
||||
-- |
|
||||
-- Module : Information
|
||||
-- Maintainer : AR
|
||||
-- Stability : (stable)
|
||||
-- Portability : (portable)
|
||||
--
|
||||
-- > CVS $Date: 2005/10/05 20:02:20 $
|
||||
-- > CVS $Author: aarne $
|
||||
-- > CVS $Revision: 1.7 $
|
||||
--
|
||||
-- information on module, category, function, operation, parameter,...
|
||||
-- AR 16\/9\/2003.
|
||||
-- uses source grammar
|
||||
-----------------------------------------------------------------------------
|
||||
|
||||
module GF.UseGrammar.Information (
|
||||
showInformation,
|
||||
missingLinCanonGrammar
|
||||
) where
|
||||
|
||||
import GF.Grammar.Grammar
|
||||
import GF.Infra.Ident
|
||||
import GF.Infra.Modules
|
||||
import GF.Infra.Option
|
||||
import GF.CF.CF
|
||||
import GF.CF.PPrCF
|
||||
import GF.Compile.ShellState
|
||||
import GF.Grammar.PrGrammar
|
||||
import GF.Grammar.Lookup
|
||||
import GF.Grammar.Macros (zIdent)
|
||||
import qualified GF.Canon.GFC as GFC
|
||||
import qualified GF.Canon.AbsGFC as AbsGFC
|
||||
|
||||
import GF.Data.Operations
|
||||
import GF.Infra.UseIO
|
||||
|
||||
-- information on module, category, function, operation, parameter,... AR 16/9/2003
|
||||
-- uses source grammar
|
||||
|
||||
-- | the top level function
|
||||
showInformation :: Options -> ShellState -> Ident -> IOE ()
|
||||
showInformation opts st c = do
|
||||
is <- ioeErr $ getInformation opts st c
|
||||
if null is
|
||||
then putStrLnE "Identifier not in scope"
|
||||
else mapM_ (putStrLnE . prInformationM c) is
|
||||
where
|
||||
prInformationM c (i,m) = prInformation opts c i ++ "file:" +++ m ++ "\n"
|
||||
|
||||
-- | the data type of different kinds of information
|
||||
data Information =
|
||||
IModAbs SourceAbs
|
||||
| IModRes SourceRes
|
||||
| IModCnc SourceCnc
|
||||
| IModule SourceAbs -- ^ to be deprecated
|
||||
| ICatAbs Ident Context [Ident]
|
||||
| ICatCnc Ident Type [CFRule] Term
|
||||
| IFunAbs Ident Type (Maybe Term)
|
||||
| IFunCnc Ident Type [CFRule] Term
|
||||
| IOper Ident Type Term
|
||||
| IParam Ident [Param] [Term]
|
||||
| IValue Ident Type
|
||||
|
||||
type CatId = AbsGFC.CIdent
|
||||
type FunId = AbsGFC.CIdent
|
||||
|
||||
prInformation :: Options -> Ident -> Information -> String
|
||||
prInformation opts c i = unlines $ prt c : case i of
|
||||
IModule m -> [
|
||||
"module of type" +++ show (mtype m),
|
||||
"extends" +++ show (extends m),
|
||||
"opens" +++ show (opens m),
|
||||
"defines" +++ unwords (map prt (ownConstants (jments m)))
|
||||
]
|
||||
ICatAbs m co _ -> [
|
||||
"category in abstract module" +++ prt m,
|
||||
if null co then "not a dependent type"
|
||||
else "dependent type with context" +++ prContext co
|
||||
]
|
||||
ICatCnc m ty cfs tr -> [
|
||||
"category in concrete module" +++ prt m,
|
||||
"linearization type" +++ prt ty
|
||||
]
|
||||
IFunAbs m ty _ -> [
|
||||
"function in abstract module" +++ prt m,
|
||||
"type" +++ prt ty
|
||||
]
|
||||
IFunCnc m ty cfs tr -> [
|
||||
"function in concrete module" +++ prt m,
|
||||
"linearization" +++ prt tr
|
||||
--- "linearization type" +++ prt ty
|
||||
]
|
||||
IOper m ty tr -> [
|
||||
"operation in resource module" +++ prt m,
|
||||
"type" +++ prt ty,
|
||||
"definition" +++ prt tr
|
||||
]
|
||||
IParam m ty ts -> [
|
||||
"parameter type in resource module" +++ prt m,
|
||||
"constructors" +++ unwords (map prParam ty),
|
||||
"values" +++ unwords (map prt ts)
|
||||
]
|
||||
IValue m ty -> [
|
||||
"parameter constructor in resource module" +++ prt m,
|
||||
"type" +++ show ty
|
||||
]
|
||||
|
||||
-- | also finds out if an identifier is defined in many places
|
||||
getInformation :: Options -> ShellState -> Ident -> Err [(Information,FilePath)]
|
||||
getInformation opts st c = allChecks $ [
|
||||
do
|
||||
m <- lookupModule src c
|
||||
case m of
|
||||
ModMod mo -> returnm c $ IModule mo
|
||||
_ -> prtBad "not a source module" c
|
||||
] ++ map lookInSrc ss ++ map lookInCan cs
|
||||
where
|
||||
lookInSrc (i,m) = do
|
||||
j <- lookupInfo m c
|
||||
case j of
|
||||
AbsCat (Yes co) _ -> returnm i $ ICatAbs i co [] ---
|
||||
AbsFun (Yes ty) _ -> returnm i $ IFunAbs i ty Nothing ---
|
||||
CncCat (Yes ty) _ _ -> do
|
||||
---- let cat = ident2CFCat i c
|
||||
---- rs <- concat [rs | (c,rs) <- cf, ]
|
||||
returnm i $ ICatCnc i ty [] ty ---
|
||||
CncFun _ (Yes tr) _ -> do
|
||||
rs <- return []
|
||||
returnm i $ IFunCnc i tr rs tr ---
|
||||
ResOper (Yes ty) (Yes tr) -> returnm i $ IOper i ty tr
|
||||
ResParam (Yes (ps,_)) -> do
|
||||
ts <- allParamValues src (QC i c)
|
||||
returnm i $ IParam i ps ts
|
||||
ResValue (Yes (ty,_)) -> returnm i $ IValue i ty ---
|
||||
|
||||
_ -> prtBad "nothing available for" i
|
||||
lookInCan (i,m) = do
|
||||
Bad "nothing available yet in canonical"
|
||||
|
||||
returnm m i = return (i, pathOfModule st m)
|
||||
|
||||
src = srcModules st
|
||||
can = canModules st
|
||||
ss = [(i,m) | (i,ModMod m) <- modules src]
|
||||
cs = [(i,m) | (i,ModMod m) <- modules can]
|
||||
cf = concatMap ruleGroupsOfCF $ map snd $ cfs st
|
||||
|
||||
ownConstants :: BinTree Ident Info -> [Ident]
|
||||
ownConstants = map fst . filter isOwn . tree2list where
|
||||
isOwn (c,i) = case i of
|
||||
AnyInd _ _ -> False
|
||||
_ -> True
|
||||
|
||||
missingLinCanonGrammar :: GFC.CanonGrammar -> String
|
||||
missingLinCanonGrammar cgr =
|
||||
unlines $ concat [prt_ c : missing js | (c,js) <- concretes] where
|
||||
missing js = map ((" " ++) . prt_) $ filter (not . flip isInBinTree js) abstract
|
||||
abstract = err (const []) (map fst . tree2list . jments) $ lookupModMod cgr absId
|
||||
absId = maybe (zIdent "") id $ greatestAbstract cgr
|
||||
concretes = [(cnc,jments mo) |
|
||||
cnc <- allConcretes cgr absId, Ok mo <- [lookupModMod cgr cnc]]
|
||||
292
src-3.0/GF/UseGrammar/Linear.hs
Normal file
292
src-3.0/GF/UseGrammar/Linear.hs
Normal file
@@ -0,0 +1,292 @@
|
||||
----------------------------------------------------------------------
|
||||
-- |
|
||||
-- Module : Linear
|
||||
-- Maintainer : AR
|
||||
-- Stability : (stable)
|
||||
-- Portability : (portable)
|
||||
--
|
||||
-- > CVS $Date: 2005/11/14 16:03:41 $
|
||||
-- > CVS $Author: aarne $
|
||||
-- > CVS $Revision: 1.19 $
|
||||
--
|
||||
-- Linearization for canonical GF. AR 7\/6\/2003
|
||||
-----------------------------------------------------------------------------
|
||||
|
||||
module GF.UseGrammar.Linear where
|
||||
|
||||
import GF.Canon.GFC
|
||||
import GF.Canon.AbsGFC
|
||||
import qualified GF.Grammar.Abstract as A
|
||||
import GF.Canon.MkGFC (rtQIdent) ----
|
||||
import GF.Infra.Ident
|
||||
import GF.Grammar.PrGrammar
|
||||
import GF.Canon.CMacros
|
||||
import GF.Canon.Look
|
||||
import GF.Grammar.LookAbs
|
||||
import GF.Grammar.MMacros
|
||||
import GF.Grammar.TypeCheck (annotate) ----
|
||||
import GF.Data.Str
|
||||
import GF.Text.Text
|
||||
----import TypeCheck -- to annotate
|
||||
|
||||
import GF.Data.Operations
|
||||
import GF.Data.Zipper
|
||||
import qualified GF.Infra.Modules as M
|
||||
|
||||
import Control.Monad
|
||||
import Data.List (intersperse)
|
||||
|
||||
-- Linearization for canonical GF. AR 7/6/2003
|
||||
|
||||
-- | The worker function: linearize a Tree, return
|
||||
-- a record. Possibly mark subtrees.
|
||||
--
|
||||
-- NB. Constants in trees are annotated by the name of the abstract module.
|
||||
-- A concrete module name must be given to find (and choose) linearization rules.
|
||||
--
|
||||
-- - If no marking is wanted, 'noMark' :: 'Marker'.
|
||||
--
|
||||
-- - For xml marking, use 'markXML' :: 'Marker'
|
||||
linearizeToRecord :: CanonGrammar -> Marker -> Ident -> A.Tree -> Err Term
|
||||
linearizeToRecord gr mk m = lin [] where
|
||||
|
||||
lin ts t@(Tr (n,xs)) = errIn ("linearization of" +++ prt t) $ do
|
||||
|
||||
let binds = A.bindsNode n
|
||||
at = A.atomNode n
|
||||
fmk = markSubtree mk n ts (A.isFocusNode n)
|
||||
c <- A.val2cat $ A.valNode n
|
||||
xs' <- mapM (\ (i,x) -> lin (i:ts) x) $ zip [0..] xs
|
||||
|
||||
r <- case at of
|
||||
A.AtC f -> lookf c t f >>= comp xs'
|
||||
A.AtI i -> return $ recInt i
|
||||
A.AtL s -> return $ recS $ tK $ prt at
|
||||
A.AtF i -> return $ recS $ tK $ prt at
|
||||
A.AtV x -> lookCat c >>= comp [tK (prt_ at)]
|
||||
A.AtM m -> lookCat c >>= comp [tK (prt_ at)]
|
||||
|
||||
r' <- case r of -- to see stg in case the result is variants {}
|
||||
FV [] -> lookCat c >>= comp [tK (prt_ t)]
|
||||
_ -> return r
|
||||
|
||||
return $ fmk $ mkBinds binds r'
|
||||
|
||||
look = lookupLin gr . redirectIdent m . rtQIdent
|
||||
comp = ccompute gr
|
||||
mkBinds bs bdy = case bdy of
|
||||
R fs -> R $ [Ass (LV i) (tK (prt t)) | (i,(t,_)) <- zip [0..] bs] ++ fs
|
||||
FV rs -> FV $ map (mkBinds bs) rs
|
||||
|
||||
recS t = R [Ass (L (identC "s")) t] ----
|
||||
|
||||
recInt i = R [
|
||||
Ass (L (identC "last")) (EInt (rem i 10)),
|
||||
Ass (L (identC "s")) (tK $ show i),
|
||||
Ass (L (identC "size")) (EInt (if i > 9 then 1 else 0))
|
||||
]
|
||||
|
||||
lookCat = return . errVal defLindef . look
|
||||
---- should always be given in the module
|
||||
|
||||
-- to show missing linearization as term
|
||||
lookf c t f = case look f of
|
||||
Ok h -> return h
|
||||
_ -> lookCat c >>= comp [tK (prt_ t)]
|
||||
|
||||
|
||||
-- | thus the special case:
|
||||
linearizeNoMark :: CanonGrammar -> Ident -> A.Tree -> Err Term
|
||||
linearizeNoMark gr = linearizeToRecord gr noMark
|
||||
|
||||
-- | expand tables in linearized term to full, normal-order tables
|
||||
--
|
||||
-- NB expand from inside-out so that values are not looked up in copies of branches
|
||||
|
||||
expandLinTables :: CanonGrammar -> Term -> Err Term
|
||||
expandLinTables gr t = case t of
|
||||
R rs -> liftM (R . map (uncurry Ass)) $ mapPairsM exp [(l,r) | Ass l r <- rs]
|
||||
T ty rs -> do
|
||||
rs' <- mapPairsM exp [(l,r) | Cas l r <- rs] -- expand from inside-out
|
||||
let t' = T ty $ map (uncurry Cas) rs'
|
||||
vs <- alls ty
|
||||
ps <- mapM term2patt vs
|
||||
ts' <- mapM (comp . S t') $ vs
|
||||
return $ T ty [Cas [p] t | (p,t) <- zip ps ts']
|
||||
V ty ts0 -> do
|
||||
ts <- mapM exp ts0 -- expand from inside-out
|
||||
vs <- alls ty
|
||||
ps <- mapM term2patt vs
|
||||
return $ T ty [Cas [p] t | (p,t) <- zip ps ts]
|
||||
FV ts -> liftM FV $ mapM exp ts
|
||||
_ -> composOp exp t
|
||||
where
|
||||
alls = allParamValues gr
|
||||
exp = expandLinTables gr
|
||||
comp = ccompute gr []
|
||||
|
||||
-- Do this for an entire grammar:
|
||||
|
||||
unoptimizeCanon :: CanonGrammar -> CanonGrammar
|
||||
unoptimizeCanon g@(M.MGrammar ms) = M.MGrammar $ map (unoptimizeCanonMod g) ms
|
||||
|
||||
unoptimizeCanonMod :: CanonGrammar -> CanonModule -> CanonModule
|
||||
unoptimizeCanonMod g = convMod where
|
||||
convMod (m, M.ModMod (M.Module (M.MTConcrete a) x flags me os defs)) =
|
||||
(m, M.ModMod (M.Module (M.MTConcrete a) x flags me os (mapTree convDef defs)))
|
||||
convMod mm = mm
|
||||
convDef (c,CncCat ty df pr) = (c,CncCat ty (convT df) (convT pr))
|
||||
convDef (f,CncFun c xs li pr) = (f,CncFun c xs (convT li) (convT pr))
|
||||
convDef cd = cd
|
||||
convT = err error id . exp
|
||||
-- a version of expandLinTables that does not destroy share optimization
|
||||
exp t = case t of
|
||||
R rs -> liftM (R . map (uncurry Ass)) $ mapPairsM exp [(l,r) | Ass l r <- rs]
|
||||
T ty rs@[Cas [_] _] -> do
|
||||
rs' <- mapPairsM exp [(l,r) | Cas l r <- rs] -- expand from inside-out
|
||||
let t' = T ty $ map (uncurry Cas) rs'
|
||||
vs <- alls ty
|
||||
ps <- mapM term2patt vs
|
||||
ts' <- mapM (comp . S t') $ vs
|
||||
return $ T ty [Cas [p] t | (p,t) <- zip ps ts']
|
||||
V ty ts0 -> do
|
||||
ts <- mapM exp ts0 -- expand from inside-out
|
||||
vs <- alls ty
|
||||
ps <- mapM term2patt vs
|
||||
return $ T ty [Cas [p] t | (p,t) <- zip ps ts]
|
||||
FV ts -> liftM FV $ mapM exp ts
|
||||
I _ -> comp t
|
||||
_ -> composOp exp t
|
||||
where
|
||||
alls = allParamValues g
|
||||
comp = ccompute g []
|
||||
|
||||
|
||||
-- | from records, one can get to records of tables of strings
|
||||
rec2strTables :: Term -> Err [[(Label,[([Patt],[Str])])]]
|
||||
rec2strTables r = do
|
||||
vs <- allLinValues r
|
||||
mapM (mapPairsM (mapPairsM strsFromTerm)) vs
|
||||
|
||||
-- | from these tables, one may want to extract the ones for the "s" label
|
||||
strTables2sTables :: [[(Label,[([Patt],[Str])])]] -> [[([Patt],[Str])]]
|
||||
strTables2sTables ts = [t | r <- ts, (l,t) <- r, l == linLab0]
|
||||
|
||||
linLab0 :: Label
|
||||
linLab0 = L (identC "s")
|
||||
|
||||
-- | to get lists of token lists is easy
|
||||
sTables2strs :: [[([Patt],[Str])]] -> [[Str]]
|
||||
sTables2strs = map snd . concat
|
||||
|
||||
-- | from this, to get a list of strings
|
||||
strs2strings :: [[Str]] -> [String]
|
||||
strs2strings = map unlex
|
||||
|
||||
-- | this is just unwords; use an unlexer from Text to postprocess
|
||||
unlex :: [Str] -> String
|
||||
unlex = concat . map sstr . take 1 ----
|
||||
|
||||
-- | finally, a top-level function to get a string from an expression
|
||||
linTree2string :: Marker -> CanonGrammar -> Ident -> A.Tree -> String
|
||||
linTree2string mk gr m e = head $ linTree2strings mk gr m e -- never empty
|
||||
|
||||
-- | you can also get many strings
|
||||
linTree2strings :: Marker -> CanonGrammar -> Ident -> A.Tree -> [String]
|
||||
linTree2strings mk gr m e = err return id $ do
|
||||
t <- linearizeToRecord gr mk m e
|
||||
r <- expandLinTables gr t
|
||||
ts <- rec2strTables r
|
||||
let ss = strs2strings $ sTables2strs $ strTables2sTables ts
|
||||
ifNull (prtBad "empty linearization of" e) return ss -- thus never empty
|
||||
|
||||
-- | argument is a Tree, value is a list of strs; needed in Parsing
|
||||
allLinsOfTree :: CanonGrammar -> Ident -> A.Tree -> [Str]
|
||||
allLinsOfTree gr a e = err (singleton . str) id $ do
|
||||
e' <- return e ---- annotateExp gr e
|
||||
r <- linearizeNoMark gr a e'
|
||||
r' <- expandLinTables gr r
|
||||
ts <- rec2strTables r'
|
||||
return $ concat $ sTables2strs $ strTables2sTables ts
|
||||
|
||||
-- | the value is a list of structures arranged as records of tables of terms
|
||||
allLinsAsRec :: CanonGrammar -> Ident -> A.Tree -> Err [[(Label,[([Patt],Term)])]]
|
||||
allLinsAsRec gr c t = linearizeNoMark gr c t >>= expandLinTables gr >>= allLinValues
|
||||
|
||||
-- | the value is a list of structures arranged as records of tables of strings
|
||||
-- only taking into account string fields
|
||||
-- True: sep. by /, False: sep by \n
|
||||
allLinTables ::
|
||||
Bool -> CanonGrammar ->Ident ->A.Tree ->Err [[(Label,[([Patt],[String])])]]
|
||||
allLinTables slash gr c t = do
|
||||
r' <- allLinsAsRec gr c t
|
||||
mapM (mapM getS) r'
|
||||
where
|
||||
getS (lab,pss) = liftM (curry id lab) $ mapM gets pss
|
||||
gets (ps,t) = liftM (curry id ps . cc . map str2strings) $ strsFromTerm t
|
||||
cc = concat . intersperse [if slash then "/" else "\n"]
|
||||
|
||||
-- | the value is a list of strings gathered from all fields
|
||||
|
||||
allLinBranchFields :: CanonGrammar -> Ident -> A.Tree -> Err [String]
|
||||
allLinBranchFields gr c trm = do
|
||||
r <- linearizeNoMark gr c trm >>= expandLinTables gr
|
||||
return [s | (_,t) <- allLinBranches r, s <- gets t]
|
||||
where
|
||||
gets t = concat [cc (map str2strings s) | Ok s <- [strsFromTerm t]]
|
||||
cc = concat . intersperse ["/"]
|
||||
|
||||
prLinTable :: Bool -> [[(Label,[([Patt],[String])])]] -> [String]
|
||||
prLinTable pars = concatMap prOne . concat where
|
||||
prOne (lab,pss) = (if pars then ((prt lab) :) else id) (map pr pss) ----
|
||||
pr (ps,ss) = (if pars then ((unwords (map prt_ ps) +++ ":") +++)
|
||||
else id) (unwords ss)
|
||||
|
||||
{-
|
||||
-- the value is a list of strs
|
||||
allLinStrings :: CanonGrammar -> Tree -> [Str]
|
||||
allLinStrings gr ft = case allLinsAsStrs gr ft of
|
||||
Ok ts -> map snd $ concat $ map snd $ concat ts
|
||||
Bad s -> [str s]
|
||||
|
||||
-- the value is a list of strs, not forgetting their arguments
|
||||
allLinsAsStrs :: CanonGrammar -> Tree -> Err [[(Label,[([Patt],Str)])]]
|
||||
allLinsAsStrs gr ft = do
|
||||
lpts <- allLinearizations gr ft
|
||||
return $ concat $ mapM (mapPairsM (mapPairsM strsFromTerm)) lpts
|
||||
|
||||
|
||||
-- to a list of strings
|
||||
linearizeToStrings :: CanonGrammar -> ([Int] ->Term -> Term) -> Tree -> Err [String]
|
||||
linearizeToStrings gr mk = liftM (map unlex) . linearizeToStrss gr mk
|
||||
|
||||
-- to a list of token lists
|
||||
linearizeToStrss :: CanonGrammar -> ([Int] -> Term -> Term) -> Tree -> Err [[Str]]
|
||||
linearizeToStrss gr mk e = do
|
||||
R rs <- linearizeToRecord gr mk e ----
|
||||
t <- lookupErr linLab0 [(r,s) | Ass r s <- rs]
|
||||
return $ map strsFromTerm $ allInTable t
|
||||
-}
|
||||
|
||||
-- | the value is a list of strings, not forgetting their arguments
|
||||
allLinsOfFun :: CanonGrammar -> CIdent -> Err [[(Label,[([Patt],Term)])]]
|
||||
allLinsOfFun gr f = do
|
||||
t <- lookupLin gr f
|
||||
allAllLinValues t --- all fields, not only s. 11/12/2005
|
||||
|
||||
|
||||
-- | returns printname if one exists; otherwise linearizes with metas
|
||||
printOrLinearize :: CanonGrammar -> Ident -> A.Fun -> String
|
||||
printOrLinearize gr c f@(m, d) = errVal (prt fq) $
|
||||
case lookupPrintname gr (CIQ c d) of
|
||||
Ok t -> do
|
||||
ss <- strsFromTerm t
|
||||
let s = strs2strings [ss]
|
||||
return $ ifNull (prt fq) head s
|
||||
_ -> do
|
||||
ty <- lookupFunType gr m d
|
||||
f' <- ref2exp [] ty (A.QC m d)
|
||||
tr <- annotate gr f'
|
||||
return $ linTree2string noMark gr c tr
|
||||
where
|
||||
fq = CIQ m d
|
||||
50
src-3.0/GF/UseGrammar/MatchTerm.hs
Normal file
50
src-3.0/GF/UseGrammar/MatchTerm.hs
Normal file
@@ -0,0 +1,50 @@
|
||||
----------------------------------------------------------------------
|
||||
-- |
|
||||
-- Module : MatchTerm
|
||||
-- Maintainer : AR
|
||||
-- Stability : (stable)
|
||||
-- Portability : (portable)
|
||||
--
|
||||
--
|
||||
-- functions for matching with terms. AR 16/3/2006
|
||||
-----------------------------------------------------------------------------
|
||||
|
||||
module GF.UseGrammar.MatchTerm where
|
||||
|
||||
import GF.Data.Operations
|
||||
import GF.Data.Zipper
|
||||
|
||||
import GF.Grammar.Grammar
|
||||
import GF.Grammar.PrGrammar
|
||||
import GF.Infra.Ident
|
||||
import GF.Grammar.Values
|
||||
import GF.Grammar.Macros
|
||||
import GF.Grammar.MMacros
|
||||
|
||||
import Control.Monad
|
||||
import Data.List
|
||||
|
||||
-- test if a term has duplicated idents, either any or just atoms
|
||||
|
||||
hasDupIdent, hasDupAtom :: Exp -> Bool
|
||||
hasDupIdent = (>1) . maximum . map length . group . sort . allConstants True
|
||||
hasDupAtom = (>1) . maximum . map length . group . sort . allConstants False
|
||||
|
||||
-- test if a certain ident occurs in term
|
||||
|
||||
grepIdent :: Ident -> Exp -> Bool
|
||||
grepIdent c = elem c . allConstants True
|
||||
|
||||
-- form the list of all constants, optionally ignoring all but atoms
|
||||
|
||||
allConstants :: Bool -> Exp -> [Ident]
|
||||
allConstants alsoApp = err (const []) snd . flip appSTM [] . collect where
|
||||
collect e = case e of
|
||||
Q _ c -> add c e
|
||||
QC _ c -> add c e
|
||||
Cn c -> add c e
|
||||
App f a | not alsoApp -> case f of
|
||||
App g b -> collect b >> collect a
|
||||
_ -> collect a
|
||||
_ -> composOp collect e
|
||||
add c e = updateSTM (c:) >> return e
|
||||
140
src-3.0/GF/UseGrammar/Morphology.hs
Normal file
140
src-3.0/GF/UseGrammar/Morphology.hs
Normal file
@@ -0,0 +1,140 @@
|
||||
----------------------------------------------------------------------
|
||||
-- |
|
||||
-- Module : Morphology
|
||||
-- Maintainer : AR
|
||||
-- Stability : (stable)
|
||||
-- Portability : (portable)
|
||||
--
|
||||
-- > CVS $Date: 2005/04/21 16:23:49 $
|
||||
-- > CVS $Author: bringert $
|
||||
-- > CVS $Revision: 1.8 $
|
||||
--
|
||||
-- Morphological analyser constructed from a GF grammar.
|
||||
--
|
||||
-- we first found the binary search tree sorted by word forms more efficient
|
||||
-- than a trie, at least for grammars with 7000 word forms
|
||||
-- (18\/11\/2003) but this may change since we have to use a trie
|
||||
-- for decompositions and also want to use it in the parser
|
||||
-----------------------------------------------------------------------------
|
||||
|
||||
module GF.UseGrammar.Morphology where
|
||||
|
||||
import GF.Canon.AbsGFC
|
||||
import GF.Canon.GFC
|
||||
import GF.Grammar.PrGrammar
|
||||
import GF.Canon.CMacros
|
||||
import GF.Canon.Look
|
||||
import GF.Grammar.LookAbs
|
||||
import GF.Infra.Ident
|
||||
import qualified GF.Grammar.Macros as M
|
||||
import GF.UseGrammar.Linear
|
||||
|
||||
import GF.Data.Operations
|
||||
import GF.Data.Glue
|
||||
|
||||
import Data.Char
|
||||
import Data.List (sortBy, intersperse)
|
||||
import Control.Monad (liftM)
|
||||
import GF.Data.Trie2
|
||||
|
||||
-- construct a morphological analyser from a GF grammar. AR 11/4/2001
|
||||
|
||||
-- we first found the binary search tree sorted by word forms more efficient
|
||||
-- than a trie, at least for grammars with 7000 word forms
|
||||
-- (18\/11\/2003) but this may change since we have to use a trie
|
||||
-- for decompositions and also want to use it in the parser
|
||||
|
||||
type Morpho = Trie Char String
|
||||
|
||||
emptyMorpho :: Morpho
|
||||
emptyMorpho = emptyTrie
|
||||
|
||||
appMorpho :: Morpho -> String -> (String,[String])
|
||||
appMorpho = appMorphoOnly
|
||||
---- add lookup for literals
|
||||
|
||||
-- without literals
|
||||
appMorphoOnly :: Morpho -> String -> (String,[String])
|
||||
appMorphoOnly m s = trieLookup m s
|
||||
|
||||
-- recognize word, exluding literals
|
||||
isKnownWord :: Morpho -> String -> Bool
|
||||
isKnownWord mo = not . null . snd . appMorphoOnly mo
|
||||
|
||||
mkMorpho :: CanonGrammar -> Ident -> Morpho
|
||||
mkMorpho gr a = tcompile $ concatMap mkOne $ allItems where
|
||||
|
||||
comp = ccompute gr [] -- to undo 'values' optimization
|
||||
|
||||
mkOne (Left (fun,c)) = map (prOne fun c) $ allLins fun
|
||||
mkOne (Right (fun,_)) = map (prSyn fun) $ allSyns fun
|
||||
|
||||
-- gather forms of lexical items
|
||||
allLins fun@(m,f) = errVal [] $ do
|
||||
ts <- lookupLin gr (CIQ a f) >>= comp >>= allAllLinValues
|
||||
ss <- mapM (mapPairsM (mapPairsM (liftM wordsInTerm . comp))) ts
|
||||
return [(p,s) | (p,fs) <- concat $ map snd $ concat ss, s <- fs]
|
||||
prOne (_,f) c (ps,s) = (s, [prt f +++ tagPrt c +++ unwords (map prt_ ps)])
|
||||
|
||||
-- gather syncategorematic words
|
||||
allSyns fun@(m,f) = errVal [] $ do
|
||||
tss <- allLinsOfFun gr (CIQ a f)
|
||||
let ss = [s | ts <- tss, (_,fs) <- ts, (_,s) <- fs]
|
||||
return $ concat $ map wordsInTerm ss
|
||||
prSyn f s = (s, ["+<syncategorematic>" ++ tagPrt f])
|
||||
|
||||
-- all words, Left from lexical rules and Right syncategorematic
|
||||
allItems = [lexRole t (f,c) | (f,c,t) <- allFuns] where
|
||||
allFuns = [(f,c,t) | (f,t) <- funRulesOf gr, Ok c <- [M.valCat t]]
|
||||
lexRole t = case M.typeForm t of
|
||||
Ok ([],_,_) -> Left
|
||||
_ -> Right
|
||||
|
||||
-- printing full-form lexicon and results
|
||||
|
||||
prMorpho :: Morpho -> String
|
||||
prMorpho = unlines . map prMorphoAnalysis . collapse
|
||||
|
||||
prMorphoAnalysis :: (String,[String]) -> String
|
||||
prMorphoAnalysis (w,fs0) =
|
||||
let fs = filter (not . null) fs0 in
|
||||
if null fs then w ++++ "*" else unlines (w:fs)
|
||||
|
||||
prMorphoAnalysisShort :: (String,[String]) -> String
|
||||
prMorphoAnalysisShort (w,fs) = prBracket (w' ++ prTList "/" fs) where
|
||||
w' = if null fs then w +++ "*" else ""
|
||||
|
||||
tagPrt :: Print a => (a,a) -> String
|
||||
tagPrt (m,c) = "+" ++ prt c --- module name
|
||||
|
||||
-- | print all words recognized
|
||||
allMorphoWords :: Morpho -> [String]
|
||||
allMorphoWords = map fst . collapse
|
||||
|
||||
-- analyse running text and show results either in short form or on separate lines
|
||||
|
||||
-- | analyse running text and show just the word, with "*" if not found
|
||||
morphoTextStatus :: Morpho -> String -> String
|
||||
morphoTextStatus mo = unlines . map (prMark . appMorpho mo) . words where
|
||||
prMark (w,fs) = if null fs then "*" +++ w else w
|
||||
|
||||
-- | analyse running text and show results in short form, one word per line
|
||||
morphoTextShort :: Morpho -> String -> String
|
||||
morphoTextShort mo = unlines . map (prMorphoAnalysisShort . appMorpho mo) . words
|
||||
|
||||
-- | analyse running text and show results on separate lines
|
||||
morphoText :: Morpho -> String -> String
|
||||
morphoText mo = unlines . map (('\n':) . prMorphoAnalysis . appMorpho mo) . words
|
||||
|
||||
-- format used in the Italian Verb Engine
|
||||
prFullForm :: Morpho -> String
|
||||
prFullForm = unlines . map prOne . collapse where
|
||||
prOne (s,ps) = s ++ " : " ++ unwords (intersperse "/" ps)
|
||||
|
||||
-- using Huet's unglueing method to find word boundaries
|
||||
---- it would be much better to use a trie also for morphological analysis,
|
||||
---- so this is for the sake of experiment
|
||||
---- Moreover, we should specify the cases in which this happens - not all words
|
||||
|
||||
decomposeWords :: Morpho -> String -> [String]
|
||||
decomposeWords mo s = errVal (words s) $ decomposeSimple mo s
|
||||
70
src-3.0/GF/UseGrammar/Paraphrases.hs
Normal file
70
src-3.0/GF/UseGrammar/Paraphrases.hs
Normal file
@@ -0,0 +1,70 @@
|
||||
----------------------------------------------------------------------
|
||||
-- |
|
||||
-- Module : Paraphrases
|
||||
-- Maintainer : AR
|
||||
-- Stability : (stable)
|
||||
-- Portability : (portable)
|
||||
--
|
||||
-- > CVS $Date: 2005/04/21 16:23:49 $
|
||||
-- > CVS $Author: bringert $
|
||||
-- > CVS $Revision: 1.6 $
|
||||
--
|
||||
-- paraphrases of GF terms. AR 6\/10\/1998 -- 24\/9\/1999 -- 5\/7\/2000 -- 5\/6\/2002
|
||||
--
|
||||
-- Copyright (c) Aarne Ranta 1998--99, under GNU General Public License (see GPL)
|
||||
--
|
||||
-- thus inherited from the old GF. Incomplete and inefficient...
|
||||
-----------------------------------------------------------------------------
|
||||
|
||||
module GF.UseGrammar.Paraphrases (mkParaphrases) where
|
||||
|
||||
import GF.Grammar.Abstract
|
||||
import GF.Grammar.PrGrammar
|
||||
import GF.Grammar.LookAbs
|
||||
import GF.Grammar.AbsCompute
|
||||
|
||||
import GF.Data.Operations
|
||||
|
||||
import Data.List (nub)
|
||||
|
||||
-- paraphrases of GF terms. AR 6/10/1998 -- 24/9/1999 -- 5/7/2000 -- 5/6/2002
|
||||
-- Copyright (c) Aarne Ranta 1998--99, under GNU General Public License (see GPL)
|
||||
-- thus inherited from the old GF. Incomplete and inefficient...
|
||||
|
||||
mkParaphrases :: GFCGrammar -> Term -> [Term]
|
||||
mkParaphrases st = nub . map (beta []) . paraphrases (allDefs st)
|
||||
|
||||
type Definition = (Fun,Term)
|
||||
|
||||
paraphrases :: [Definition] -> Term -> [Term]
|
||||
paraphrases th t =
|
||||
paraImmed th t ++
|
||||
--- paraMatch th t ++
|
||||
case t of
|
||||
App c a -> [App d b | d <- paraphrases th c, b <- paraphrases th a]
|
||||
Abs x b -> [Abs x d | d <- paraphrases th b]
|
||||
c -> []
|
||||
++ [t]
|
||||
|
||||
paraImmed :: [Definition] -> Term -> [Term]
|
||||
paraImmed defs t =
|
||||
[Q m f | ((m,f), u) <- defs, t == u] ++ --- eqTerm
|
||||
case t of
|
||||
---- Cn c -> [u | (f, u) <- defs, eqStrIdent f c]
|
||||
_ -> []
|
||||
|
||||
{- ---
|
||||
paraMatch :: [Definition] -> Trm -> [Trm]
|
||||
paraMatch th@defs t =
|
||||
[mkApp (Cn f) xx | (PC f zz, u) <- defs,
|
||||
let (fs,sn) = fullApp u, fs == h, length sn == length zz] ++
|
||||
case findAMatch defs t of
|
||||
Ok (g,b) -> [substTerm [] g b]
|
||||
_ -> []
|
||||
where
|
||||
(h,xx) = fullApp t
|
||||
fullApp c = case c of
|
||||
App f a -> (f', a' ++ [a]) where (f',a') = fullApp f
|
||||
c -> (c,[])
|
||||
|
||||
-}
|
||||
177
src-3.0/GF/UseGrammar/Parsing.hs
Normal file
177
src-3.0/GF/UseGrammar/Parsing.hs
Normal file
@@ -0,0 +1,177 @@
|
||||
----------------------------------------------------------------------
|
||||
-- |
|
||||
-- Module : Parsing
|
||||
-- Maintainer : AR
|
||||
-- Stability : (stable)
|
||||
-- Portability : (portable)
|
||||
--
|
||||
-- > CVS $Date: 2005/06/02 10:23:52 $
|
||||
-- > CVS $Author: aarne $
|
||||
-- > CVS $Revision: 1.25 $
|
||||
--
|
||||
-- (Description of the module)
|
||||
-----------------------------------------------------------------------------
|
||||
|
||||
module GF.UseGrammar.Parsing where
|
||||
|
||||
import GF.Infra.CheckM
|
||||
import qualified GF.Canon.AbsGFC as C
|
||||
import GF.Canon.GFC
|
||||
import GF.Canon.MkGFC (trExp) ----
|
||||
import GF.Canon.CMacros
|
||||
import GF.Grammar.MMacros (refreshMetas)
|
||||
import GF.UseGrammar.Linear
|
||||
import GF.Data.Str
|
||||
import GF.CF.CF
|
||||
import GF.CF.CFIdent
|
||||
import GF.Infra.Ident
|
||||
import GF.Grammar.TypeCheck
|
||||
import GF.Grammar.Values
|
||||
--import CFMethod
|
||||
import GF.UseGrammar.Tokenize
|
||||
import GF.UseGrammar.Morphology (isKnownWord)
|
||||
import GF.CF.Profile
|
||||
import GF.Infra.Option
|
||||
import GF.UseGrammar.Custom
|
||||
import GF.Compile.ShellState
|
||||
|
||||
import GF.CF.PPrCF (prCFTree)
|
||||
-- import qualified GF.OldParsing.ParseGFC as NewOld -- OBSOLETE
|
||||
import qualified GF.Parsing.GFC as New
|
||||
|
||||
import GF.Data.Operations
|
||||
|
||||
import Data.List (nub,sortBy)
|
||||
import Data.Char (toLower)
|
||||
import Control.Monad (liftM)
|
||||
|
||||
-- AR 26/1/2000 -- 8/4 -- 28/1/2001 -- 9/12/2002
|
||||
|
||||
parseString :: Options -> StateGrammar -> CFCat -> String -> Err [Tree]
|
||||
parseString os sg cat = liftM fst . parseStringMsg os sg cat
|
||||
|
||||
parseStringMsg :: Options -> StateGrammar -> CFCat -> String -> Err ([Tree],String)
|
||||
parseStringMsg os sg cat s = do
|
||||
case checkStart $ parseStringC os sg cat s of
|
||||
Ok (ts,(_,ss)) -> return (ts, unlines $ reverse ss)
|
||||
Bad s -> return ([],s)
|
||||
|
||||
parseStringC :: Options -> StateGrammar -> CFCat -> String -> Check [Tree]
|
||||
parseStringC opts0 sg cat s
|
||||
| oElem (iOpt "old") opts0 ||
|
||||
(not (oElem (iOpt "fcfg") opts0) && stateHasHOAS sg) = do
|
||||
let opts = unionOptions opts0 $ stateOptions sg
|
||||
cf = stateCF sg
|
||||
gr = stateGrammarST sg
|
||||
cn = cncId sg
|
||||
toks = customOrDefault opts useTokenizer customTokenizer sg s
|
||||
parser = customOrDefault opts useParser customParser sg cat
|
||||
if oElem (iOpt "cut") opts
|
||||
then doUntil (not . null) $ map (tokens2trms opts sg cn parser) toks
|
||||
else mapM (tokens2trms opts sg cn parser) toks >>= return . concat
|
||||
|
||||
---- | or [oElem p opts0 |
|
||||
---- p <- [newCParser,newMParser,newFParser,newParser,newerParser] = do
|
||||
|
||||
| otherwise = do
|
||||
let opts = unionOptions opts0 $ stateOptions sg
|
||||
algorithm | oElem newCParser opts0 = "c"
|
||||
| oElem newMParser opts0 = "m"
|
||||
| oElem newFParser opts0 = "f"
|
||||
| otherwise = "f" -- default algorithm: FCFG
|
||||
strategy = maybe "bottomup" id $ getOptVal opts useParser
|
||||
-- -parser=bottomup/topdown
|
||||
tokenizer = customOrDefault opts useTokenizer customTokenizer sg
|
||||
toks = case tokenizer s of
|
||||
t:_ -> t
|
||||
_ -> [] ---- no support for undet. tok.
|
||||
unknowns =
|
||||
[w | TC w <- toks, unk w && unk (uncap w)] ++ [w | TS w <- toks, unk w]
|
||||
where
|
||||
unk w = not $ isKnownWord (morpho sg) w
|
||||
uncap (c:cs) = toLower c : cs
|
||||
uncap s = s
|
||||
|
||||
case unknowns of
|
||||
_:_ | oElem (iOpt "trynextlang") opts -> return []
|
||||
_:_ -> fail $ "Unknown words:" +++ unwords unknowns
|
||||
_ -> do
|
||||
|
||||
ts <- checkErr $ New.parse algorithm strategy (pInfo sg) (absId sg) cat toks
|
||||
ts' <- checkErr $
|
||||
allChecks $ map (annotate (stateGrammarST sg) . refreshMetas []) ts
|
||||
return $ optIntOrAll opts flagNumber ts'
|
||||
|
||||
|
||||
tokens2trms :: Options ->StateGrammar ->Ident -> CFParser -> [CFTok] -> Check [Tree]
|
||||
tokens2trms opts sg cn parser toks = trees2trms opts sg cn toks trees info
|
||||
where result = parser toks
|
||||
info = snd result
|
||||
trees = {- nub $ -} cfParseResults result -- peb 25/5-04: removed nub (O(n^2))
|
||||
|
||||
trees2trms ::
|
||||
Options -> StateGrammar -> Ident -> [CFTok] -> [CFTree] -> String -> Check [Tree]
|
||||
trees2trms opts sg cn as ts0 info = do
|
||||
let s = unwords $ map prCFTok as
|
||||
ts <- case () of
|
||||
_ | null ts0 -> checkWarn ("No success in cf parsing" +++ s) >> return []
|
||||
_ | raw -> do
|
||||
ts1 <- return (map cf2trm0 ts0) ----- should not need annot
|
||||
checks [
|
||||
mapM (checkErr . (annotate gr) . trExp) ts1 ---- complicated, often fails
|
||||
,checkWarn (unlines ("Raw CF trees:":(map prCFTree ts0))) >> return []
|
||||
]
|
||||
_ -> do
|
||||
let num = optIntOrN opts flagRawtrees 999999
|
||||
let (ts01,rest) = splitAt num ts0
|
||||
if null rest then return ()
|
||||
else raise ("Warning: only" +++ show num +++ "raw parses out of" +++
|
||||
show (length ts0) +++
|
||||
"considered; use -rawtrees=<Int> to see more"
|
||||
)
|
||||
(ts1,ss) <- checkErr $ mapErrN 1 postParse ts01
|
||||
if null ts1 then raise ss else return ()
|
||||
ts2 <- checkErr $
|
||||
allChecks $ map (annotate gr . refreshMetas [] . trExp) ts1 ----
|
||||
if forgive then return ts2 else do
|
||||
let tsss = [(t, allLinsOfTree gr cn t) | t <- ts2]
|
||||
ps = [t | (t,ss) <- tsss,
|
||||
any (compatToks as) (map str2cftoks ss)]
|
||||
if null ps
|
||||
then raise $ "Failure in morphology." ++
|
||||
if verb
|
||||
then "\nPossible corrections: " +++++
|
||||
unlines (nub (map sstr (concatMap snd tsss)))
|
||||
else ""
|
||||
else return ps
|
||||
if verb
|
||||
then checkWarn $ " the token list" +++ show as ++++ unknownWords sg as +++++ info
|
||||
else return ()
|
||||
|
||||
return $ optIntOrAll opts flagNumber $ nub ts
|
||||
where
|
||||
gr = stateGrammarST sg
|
||||
|
||||
raw = oElem rawParse opts
|
||||
verb = oElem beVerbose opts
|
||||
forgive = oElem forgiveParse opts
|
||||
|
||||
---- Operatins.allChecks :: ErrorMonad m => [m a] -> m [a]
|
||||
|
||||
unknownWords sg ts = case filter noMatch [t | t@(TS _) <- ts] of
|
||||
[] -> "where all words are known"
|
||||
us -> "with the unknown tokens" +++ show us --- needs to be fixed for literals
|
||||
where
|
||||
terminals = map TS $ stateGrammarWords sg
|
||||
noMatch t = all (not . compatTok t) terminals
|
||||
|
||||
|
||||
--- too much type checking in building term info? return FullTerm to save work?
|
||||
|
||||
-- | raw parsing: so simple it is for a context-free CF grammar
|
||||
cf2trm0 :: CFTree -> C.Exp
|
||||
cf2trm0 (CFTree (fun, (_, trees))) = mkAppAtom (cffun2trm fun) (map cf2trm0 trees)
|
||||
where
|
||||
cffun2trm (CFFun (fun,_)) = fun
|
||||
mkApp = foldl C.EApp
|
||||
mkAppAtom a = mkApp (C.EAtom a)
|
||||
66
src-3.0/GF/UseGrammar/Randomized.hs
Normal file
66
src-3.0/GF/UseGrammar/Randomized.hs
Normal file
@@ -0,0 +1,66 @@
|
||||
----------------------------------------------------------------------
|
||||
-- |
|
||||
-- Module : Randomized
|
||||
-- Maintainer : AR
|
||||
-- Stability : (stable)
|
||||
-- Portability : (portable)
|
||||
--
|
||||
-- > CVS $Date: 2005/04/21 16:23:51 $
|
||||
-- > CVS $Author: bringert $
|
||||
-- > CVS $Revision: 1.8 $
|
||||
--
|
||||
-- random generation and refinement. AR 22\/8\/2001.
|
||||
-- implemented as sequence of refinement menu selecsions, encoded as integers
|
||||
-----------------------------------------------------------------------------
|
||||
|
||||
module GF.UseGrammar.Randomized where
|
||||
|
||||
import GF.Grammar.Abstract
|
||||
import GF.UseGrammar.Editing
|
||||
|
||||
import GF.Data.Operations
|
||||
import GF.Data.Zipper
|
||||
|
||||
--- import Arch (myStdGen) --- circular for hbc
|
||||
import System.Random --- (mkStdGen, StdGen, randoms) --- bad import for hbc
|
||||
|
||||
-- random generation and refinement. AR 22/8/2001
|
||||
-- implemented as sequence of refinement menu selecsions, encoded as integers
|
||||
|
||||
myStdGen :: Int -> StdGen
|
||||
myStdGen = mkStdGen ---
|
||||
|
||||
-- | build one random tree; use mx to prevent infinite search
|
||||
mkRandomTree :: StdGen -> Int -> CGrammar -> Either Cat Fun -> Err Tree
|
||||
mkRandomTree gen mx gr cat = mkTreeFromInts (take mx (randoms gen)) gr cat
|
||||
|
||||
refineRandom :: StdGen -> Int -> CGrammar -> Action
|
||||
refineRandom gen mx = mkStateFromInts $ take mx $ map abs (randoms gen)
|
||||
|
||||
-- | build a tree from a list of integers
|
||||
mkTreeFromInts :: [Int] -> CGrammar -> Either Cat Fun -> Err Tree
|
||||
mkTreeFromInts ints gr catfun = do
|
||||
st0 <- either (\cat -> newCat gr cat initState)
|
||||
(\fun -> newFun gr fun initState)
|
||||
catfun
|
||||
state <- mkStateFromInts ints gr st0
|
||||
return $ loc2tree state
|
||||
|
||||
mkStateFromInts :: [Int] -> CGrammar -> Action
|
||||
mkStateFromInts ints gr z = mkRandomState ints z >>= reCheckState gr where
|
||||
mkRandomState [] state = do
|
||||
testErr (isCompleteState state) "not completed"
|
||||
return state
|
||||
mkRandomState (n:ns) state = do
|
||||
let refs = refinementsState gr state
|
||||
refs0 = map (not . snd . snd) refs
|
||||
testErr (not (null refs0)) $ "no nonrecursive refinements available for" +++
|
||||
prt (actVal state)
|
||||
(ref,_) <- (refs !? (n `mod` (length refs)))
|
||||
state1 <- refineWithAtom False gr ref state
|
||||
if isCompleteState state1
|
||||
then return state1
|
||||
else do
|
||||
state2 <- goNextMeta state1
|
||||
mkRandomState ns state2
|
||||
|
||||
181
src-3.0/GF/UseGrammar/Session.hs
Normal file
181
src-3.0/GF/UseGrammar/Session.hs
Normal file
@@ -0,0 +1,181 @@
|
||||
----------------------------------------------------------------------
|
||||
-- |
|
||||
-- Module : Session
|
||||
-- Maintainer : AR
|
||||
-- Stability : (stable)
|
||||
-- Portability : (portable)
|
||||
--
|
||||
-- > CVS $Date: 2005/08/17 15:13:55 $
|
||||
-- > CVS $Author: aarne $
|
||||
-- > CVS $Revision: 1.12 $
|
||||
--
|
||||
-- (Description of the module)
|
||||
-----------------------------------------------------------------------------
|
||||
|
||||
module GF.UseGrammar.Session where
|
||||
|
||||
import GF.Grammar.Abstract
|
||||
import GF.Infra.Option
|
||||
import GF.UseGrammar.Custom
|
||||
import GF.UseGrammar.Editing
|
||||
import GF.Compile.ShellState ---- grammar
|
||||
|
||||
import GF.Data.Operations
|
||||
import GF.Data.Zipper (keepPosition) ---
|
||||
|
||||
-- First version 8/2001. Adapted to GFC with modules 19/6/2003.
|
||||
-- Nothing had to be changed, which is a sign of good modularity.
|
||||
|
||||
-- keep these abstract
|
||||
|
||||
-- | 'Exp'-list: candidate refinements,clipboard
|
||||
type SState = [(State,([Exp],[Clip]),SInfo)]
|
||||
|
||||
-- | 'String' is message, 'Int' is the view
|
||||
type SInfo = ([String],(Int,Options))
|
||||
|
||||
initSState :: SState
|
||||
initSState = [(initState, ([],[]), (["Select 'New' category to start"],(0,noOptions)))]
|
||||
-- instead of empty
|
||||
|
||||
type Clip = Tree ---- (Exp,Type)
|
||||
|
||||
-- | (peb): Something wrong with this definition??
|
||||
-- Shouldn't the result type be 'SInfo'?
|
||||
--
|
||||
-- > okInfo :: Int -> SInfo == ([String], (Int, Options))
|
||||
okInfo :: n -> ([s], (n, Bool))
|
||||
okInfo n = ([],(n,True))
|
||||
|
||||
stateSState :: SState -> State
|
||||
candsSState :: SState -> [Exp]
|
||||
clipSState :: SState -> [Clip]
|
||||
infoSState :: SState -> SInfo
|
||||
msgSState :: SState -> [String]
|
||||
viewSState :: SState -> Int
|
||||
optsSState :: SState -> Options
|
||||
|
||||
stateSState ((s,_,_):_) = s
|
||||
candsSState ((_,(ts,_),_):_)= ts
|
||||
clipSState ((_,(_,ts),_):_)= ts
|
||||
infoSState ((_,_,i):_) = i
|
||||
msgSState ((_,_,(m,_)):_) = m
|
||||
viewSState ((_,_,(_,(v,_))):_) = v
|
||||
optsSState ((_,_,(_,(_,o))):_) = o
|
||||
|
||||
treeSState :: SState -> Tree
|
||||
treeSState = actTree . stateSState
|
||||
|
||||
|
||||
-- | from state to state
|
||||
type ECommand = SState -> SState
|
||||
|
||||
-- * elementary commands
|
||||
|
||||
-- ** change state, drop cands, drop message, preserve options
|
||||
|
||||
changeState :: State -> ECommand
|
||||
changeState s ss = changeMsg [] $ (s,([],clipSState ss),infoSState ss) : ss
|
||||
|
||||
changeCands :: [Exp] -> ECommand
|
||||
changeCands ts ss@((s,(_,cb),(_,b)):_) = (s,(ts,cb),(candInfo ts,b)) : ss
|
||||
|
||||
addtoClip :: Clip -> ECommand
|
||||
addtoClip t ss@((s,(ts,cb),(i,b)):_) = (s,(ts,t:cb),(i,b)) : ss
|
||||
|
||||
removeClip :: Int -> ECommand
|
||||
removeClip n ss@((s,(ts,cb),(i,b)):_) = (s,(ts, drop n cb),(i,b)) : ss
|
||||
|
||||
changeMsg :: [String] -> ECommand
|
||||
changeMsg m ((s,ts,(_,b)):ss) = (s,ts,(m,b)) : ss -- just change message
|
||||
changeMsg m _ = (s,ts,(m,b)) : [] where [(s,ts,(_,b))] = initSState
|
||||
|
||||
changeView :: ECommand
|
||||
changeView ((s,ts,(m,(v,b))):ss) = (s,ts,(m,(v+1,b))) : ss -- toggle view
|
||||
|
||||
withMsg :: [String] -> ECommand -> ECommand
|
||||
withMsg m c = changeMsg m . c
|
||||
|
||||
changeStOptions :: (Options -> Options) -> ECommand
|
||||
changeStOptions f ((s,ts,(m,(v,o))):ss) = (s,ts,(m,(v, f o))) : ss
|
||||
|
||||
noNeedForMsg :: ECommand
|
||||
noNeedForMsg = changeMsg [] -- everything's all right: no message
|
||||
|
||||
candInfo :: [Exp] -> [String]
|
||||
candInfo ts = case length ts of
|
||||
0 -> ["no acceptable alternative"]
|
||||
1 -> ["just one acceptable alternative"]
|
||||
n -> [show n +++ "alternatives to select"]
|
||||
|
||||
-- * keep SState abstract from this on
|
||||
|
||||
-- ** editing commands
|
||||
|
||||
action2command :: Action -> ECommand
|
||||
action2command act state = case act (stateSState state) of
|
||||
Ok s -> changeState s state
|
||||
Bad m -> changeMsg [m] state
|
||||
|
||||
action2commandNext :: Action -> ECommand -- move to next meta after execution
|
||||
action2commandNext act = action2command (\s -> act s >>= goNextMetaIfCan)
|
||||
|
||||
action2commandKeep :: Action -> ECommand -- keep old position after execution
|
||||
action2commandKeep act = action2command (\s -> keepPosition act s)
|
||||
|
||||
undoCommand :: Int -> ECommand
|
||||
undoCommand n ss =
|
||||
let k = length ss in
|
||||
if k < n
|
||||
then changeMsg ["cannot go all the way back"] [last ss]
|
||||
else changeMsg ["successful undo"] (drop n ss)
|
||||
|
||||
selectCand :: CGrammar -> Int -> ECommand
|
||||
selectCand gr i state = err (\m -> changeMsg [m] state) id $ do
|
||||
exp <- candsSState state !? i
|
||||
let s = stateSState state
|
||||
tree <- annotateInState gr exp s
|
||||
return $ case replaceSubTree tree s of
|
||||
Ok st' -> changeState st' state
|
||||
Bad s -> changeMsg [s] state
|
||||
|
||||
refineByExps :: Bool -> CGrammar -> [Exp] -> ECommand
|
||||
refineByExps der gr trees = case trees of
|
||||
[t] -> action2commandNext (refineWithExpTC der gr t)
|
||||
_ -> changeCands trees
|
||||
|
||||
refineByTrees :: Bool -> CGrammar -> [Tree] -> ECommand
|
||||
refineByTrees der gr trees = case trees of
|
||||
[t] -> action2commandNext (refineOrReplaceWithTree der gr t)
|
||||
_ -> changeCands $ map tree2exp trees
|
||||
|
||||
replaceByTrees :: CGrammar -> [Exp] -> ECommand
|
||||
replaceByTrees gr trees = case trees of
|
||||
[t] -> action2commandNext (\s ->
|
||||
annotateExpInState gr t s >>= flip replaceSubTree s)
|
||||
_ -> changeCands trees
|
||||
|
||||
replaceByEditCommand :: StateGrammar -> String -> ECommand
|
||||
replaceByEditCommand gr co =
|
||||
action2commandKeep $
|
||||
maybe return ($ gr) $
|
||||
lookupCustom customEditCommand (strCI co)
|
||||
|
||||
replaceByTermCommand :: Bool -> StateGrammar -> String -> Tree -> ECommand ----
|
||||
replaceByTermCommand der gr co exp =
|
||||
let g = grammar gr in
|
||||
refineByTrees der g $ maybe [exp] (\f -> f gr exp) $
|
||||
lookupCustom customTermCommand (strCI co)
|
||||
|
||||
possClipsSState :: StateGrammar -> SState -> [(Int,Clip)]
|
||||
possClipsSState gr s = filter poss $ zip [0..] (clipSState s)
|
||||
where
|
||||
poss = possibleTreeVal cgr st . snd
|
||||
st = stateSState s
|
||||
cgr = grammar gr
|
||||
|
||||
getNumberedClip :: Int -> SState -> Err Clip
|
||||
getNumberedClip i s = if length cs > i then return (cs !! i)
|
||||
else Bad "not enough clips"
|
||||
where
|
||||
cs = clipSState s
|
||||
44
src-3.0/GF/UseGrammar/Statistics.hs
Normal file
44
src-3.0/GF/UseGrammar/Statistics.hs
Normal file
@@ -0,0 +1,44 @@
|
||||
----------------------------------------------------------------------
|
||||
-- |
|
||||
-- Module : Statistics
|
||||
-- Maintainer : AR
|
||||
-- Stability : (stable)
|
||||
-- Portability : (portable)
|
||||
--
|
||||
-- > CVS $Date: 2005/09/04 11:45:38 $
|
||||
-- > CVS $Author: aarne $
|
||||
-- > CVS $Revision: 1.1 $
|
||||
--
|
||||
-- statistics on canonical grammar: amounts of generated code
|
||||
-- AR 4\/9\/2005.
|
||||
-- uses canonical grammar
|
||||
-----------------------------------------------------------------------------
|
||||
|
||||
module GF.UseGrammar.Statistics (prStatistics) where
|
||||
|
||||
import GF.Infra.Modules
|
||||
import GF.Infra.Option
|
||||
import GF.Grammar.PrGrammar
|
||||
import GF.Canon.GFC
|
||||
import GF.Canon.MkGFC
|
||||
|
||||
import GF.Data.Operations
|
||||
|
||||
import Data.List (sortBy)
|
||||
|
||||
-- | the top level function
|
||||
prStatistics :: CanonGrammar -> String
|
||||
prStatistics can = unlines $ [
|
||||
show (length mods) ++ "\t\t modules",
|
||||
show chars ++ "\t\t gfc size",
|
||||
"",
|
||||
"Top 40 definitions"
|
||||
] ++
|
||||
[show d ++ "\t\t " ++ f | (d,f) <- tops]
|
||||
where
|
||||
tops = take 40 $ reverse $ sortBy (\ (i,_) (j,_) -> compare i j) defs
|
||||
defs = [(length (prt (info2def j)), name m j) | (m,j) <- infos]
|
||||
infos = [(m,j) | (m,ModMod mo) <- mods, j <- tree2list (jments mo)]
|
||||
name m (f,_) = prt m ++ "." ++ prt f
|
||||
mods = modules can
|
||||
chars = length $ prCanon can
|
||||
222
src-3.0/GF/UseGrammar/Tokenize.hs
Normal file
222
src-3.0/GF/UseGrammar/Tokenize.hs
Normal file
@@ -0,0 +1,222 @@
|
||||
----------------------------------------------------------------------
|
||||
-- |
|
||||
-- Module : Tokenize
|
||||
-- Maintainer : AR
|
||||
-- Stability : (stable)
|
||||
-- Portability : (portable)
|
||||
--
|
||||
-- > CVS $Date: 2005/09/29 13:20:08 $
|
||||
-- > CVS $Author: aarne $
|
||||
-- > CVS $Revision: 1.14 $
|
||||
--
|
||||
-- lexers = tokenizers, to prepare input for GF grammars. AR 4\/1\/2002.
|
||||
-- an entry for each is included in 'Custom.customTokenizer'
|
||||
-----------------------------------------------------------------------------
|
||||
|
||||
module GF.UseGrammar.Tokenize ( tokWords,
|
||||
tokLits,
|
||||
tokVars,
|
||||
lexHaskell,
|
||||
lexHaskellLiteral,
|
||||
lexHaskellVar,
|
||||
lexText,
|
||||
lexTextVar,
|
||||
lexC2M, lexC2M',
|
||||
lexTextLiteral,
|
||||
lexIgnore,
|
||||
wordsLits
|
||||
) where
|
||||
|
||||
import GF.Data.Operations
|
||||
---- import UseGrammar (isLiteral,identC)
|
||||
import GF.CF.CFIdent
|
||||
|
||||
import Data.Char
|
||||
|
||||
-- lexers = tokenizers, to prepare input for GF grammars. AR 4/1/2002
|
||||
-- an entry for each is included in Custom.customTokenizer
|
||||
|
||||
-- | just words
|
||||
tokWords :: String -> [CFTok]
|
||||
tokWords = map tS . words
|
||||
|
||||
tokLits :: String -> [CFTok]
|
||||
tokLits = map mkCFTok . mergeStr . wordsLits where
|
||||
mergeStr ss = case ss of
|
||||
w@(c:cs):rest | elem c "\'\"" && c /= last w -> getStr [w] rest
|
||||
w :rest -> w : mergeStr rest
|
||||
[] -> []
|
||||
getStr v ss = case ss of
|
||||
w@(_:_):rest | elem (last w) "\'\"" -> (unwords (reverse (w:v))) : mergeStr rest
|
||||
w :rest -> getStr (w:v) rest
|
||||
[] -> reverse v
|
||||
|
||||
tokVars :: String -> [CFTok]
|
||||
tokVars = map mkCFTokVar . wordsLits
|
||||
|
||||
isFloat s = case s of
|
||||
c:cs | isDigit c -> isFloat cs
|
||||
'.':cs@(_:_) -> all isDigit cs
|
||||
_ -> False
|
||||
|
||||
isString s = case s of
|
||||
c:cs@(_:_) -> (c == '\'' && d == '\'') || (c == '"' && d == '"') where d = last cs
|
||||
_ -> False
|
||||
|
||||
|
||||
mkCFTok :: String -> CFTok
|
||||
mkCFTok s = case s of
|
||||
'"' :cs@(_:_) | last cs == '"' -> tL $ init cs
|
||||
'\'':cs@(_:_) | last cs == '\'' -> tL $ init cs --- 's Gravenhage
|
||||
_:_ | isFloat s -> tF s
|
||||
_:_ | all isDigit s -> tI s
|
||||
_ -> tS s
|
||||
|
||||
mkCFTokVar :: String -> CFTok
|
||||
mkCFTokVar s = case s of
|
||||
'?':_:_ -> tM s --- "?" --- compat with prCF
|
||||
'x':'_':_ -> tV s
|
||||
'x':[] -> tV s
|
||||
'$':xs@(_:_) -> if last s == '$' then tV (init xs) else tS s
|
||||
_ -> tS s
|
||||
|
||||
mkTokVars :: (String -> [CFTok]) -> String -> [CFTok]
|
||||
mkTokVars tok = map tv . tok where
|
||||
tv (TS s) = mkCFTokVar s
|
||||
tv t = t
|
||||
|
||||
mkLit :: String -> CFTok
|
||||
mkLit s
|
||||
| isFloat s = tF s
|
||||
| all isDigit s = tI s
|
||||
| otherwise = tL s
|
||||
|
||||
-- obsolete
|
||||
mkTL :: String -> CFTok
|
||||
mkTL s
|
||||
| isFloat s = tF s
|
||||
| all isDigit s = tI s
|
||||
| otherwise = tL ("'" ++ s ++ "'")
|
||||
|
||||
|
||||
-- | Haskell lexer, usable for much code
|
||||
lexHaskell :: String -> [CFTok]
|
||||
lexHaskell ss = case lex ss of
|
||||
[(w@(_:_),ws)] -> tS w : lexHaskell ws
|
||||
_ -> []
|
||||
|
||||
-- | somewhat shaky text lexer
|
||||
lexText :: String -> [CFTok]
|
||||
lexText = uncap . lx where
|
||||
|
||||
lx s = case s of
|
||||
'?':'?':cs -> tS "??" : lx cs
|
||||
p : cs | isMPunct p -> tS [p] : uncap (lx cs)
|
||||
p : cs | isPunct p -> tS [p] : lx cs
|
||||
s : cs | isSpace s -> lx cs
|
||||
_ : _ -> getWord s
|
||||
_ -> []
|
||||
|
||||
getWord s = tS w : lx ws where (w,ws) = span isNotSpec s
|
||||
isMPunct c = elem c ".!?"
|
||||
isPunct c = elem c ",:;()\""
|
||||
isNotSpec c = not (isMPunct c || isPunct c || isSpace c)
|
||||
uncap (TS (c:cs) : ws) = tC (c:cs) : ws
|
||||
uncap s = s
|
||||
|
||||
-- | lexer for C--, a mini variant of C
|
||||
lexC2M :: String -> [CFTok]
|
||||
lexC2M = lexC2M' False
|
||||
|
||||
lexC2M' :: Bool -> String -> [CFTok]
|
||||
lexC2M' isHigherOrder s = case s of
|
||||
'#':cs -> lexC $ dropWhile (/='\n') cs
|
||||
'/':'*':cs -> lexC $ dropComment cs
|
||||
c:cs | isSpace c -> lexC cs
|
||||
c:cs | isAlpha c -> getId s
|
||||
c:cs | isDigit c -> getLit s
|
||||
c:d:cs | isSymb [c,d] -> tS [c,d] : lexC cs
|
||||
c:cs | isSymb [c] -> tS [c] : lexC cs
|
||||
_ -> [] --- covers end of file and unknown characters
|
||||
where
|
||||
lexC = lexC2M' isHigherOrder
|
||||
getId s = mkT i : lexC cs where (i,cs) = span isIdChar s
|
||||
getLit s = tI i : lexC cs where (i,cs) = span isDigit s ---- Float!
|
||||
isIdChar c = isAlpha c || isDigit c || elem c "'_"
|
||||
isSymb = reservedAnsiCSymbol
|
||||
dropComment s = case s of
|
||||
'*':'/':cs -> cs
|
||||
_:cs -> dropComment cs
|
||||
_ -> []
|
||||
mkT i = if (isRes i) then (tS i) else
|
||||
if isHigherOrder then (tV i) else (tL ("'" ++ i ++ "'"))
|
||||
isRes = reservedAnsiC
|
||||
|
||||
|
||||
reservedAnsiCSymbol s = case lookupTree show s ansiCtree of
|
||||
Ok True -> True
|
||||
_ -> False
|
||||
|
||||
reservedAnsiC s = case lookupTree show s ansiCtree of
|
||||
Ok False -> True
|
||||
_ -> False
|
||||
|
||||
-- | for an efficient lexer: precompile this!
|
||||
ansiCtree = buildTree $ [(s,True) | s <- reservedAnsiCSymbols] ++
|
||||
[(s,False) | s <- reservedAnsiCWords]
|
||||
|
||||
reservedAnsiCSymbols = words $
|
||||
"<<= >>= << >> ++ -- == <= >= *= += -= %= /= &= ^= |= " ++
|
||||
"^ { } = , ; + * - ( ) < > & % ! ~"
|
||||
|
||||
reservedAnsiCWords = words $
|
||||
"auto break case char const continue default " ++
|
||||
"do double else enum extern float for goto if int " ++
|
||||
"long register return short signed sizeof static struct switch typedef " ++
|
||||
"union unsigned void volatile while " ++
|
||||
"main printin putchar" --- these are not ansi-C
|
||||
|
||||
-- | turn unknown tokens into string literals; not recursively for literals 123, 'foo'
|
||||
unknown2string :: (String -> Bool) -> [CFTok] -> [CFTok]
|
||||
unknown2string isKnown = map mkOne where
|
||||
mkOne t@(TS s)
|
||||
| isKnown s = t
|
||||
| isFloat s = tF s
|
||||
| all isDigit s = tI s
|
||||
| otherwise = tL s
|
||||
mkOne t@(TC s) = if isKnown s then t else mkLit s
|
||||
mkOne t = t
|
||||
|
||||
unknown2var :: (String -> Bool) -> [CFTok] -> [CFTok]
|
||||
unknown2var isKnown = map mkOne where
|
||||
mkOne t@(TS "??") = if isKnown "??" then t else tM "??"
|
||||
mkOne t@(TS s)
|
||||
| isKnown s = t
|
||||
| isFloat s = tF s
|
||||
| isString s = tL (init (tail s))
|
||||
| all isDigit s = tI s
|
||||
| otherwise = tV s
|
||||
mkOne t@(TC s) = if isKnown s then t else tV s
|
||||
mkOne t = t
|
||||
|
||||
lexTextLiteral, lexHaskellLiteral, lexHaskellVar :: (String -> Bool) -> String -> [CFTok]
|
||||
|
||||
lexTextLiteral isKnown = unknown2string (eitherUpper isKnown) . lexText
|
||||
lexHaskellLiteral isKnown = unknown2string isKnown . lexHaskell
|
||||
|
||||
lexHaskellVar isKnown = unknown2var isKnown . lexHaskell
|
||||
lexTextVar isKnown = unknown2var (eitherUpper isKnown) . lexText
|
||||
|
||||
|
||||
eitherUpper isKnown w@(c:cs) = isKnown (toLower c : cs) || isKnown (toUpper c : cs)
|
||||
eitherUpper isKnown w = isKnown w
|
||||
|
||||
-- ignore unknown tokens (e.g. keyword spotting)
|
||||
|
||||
lexIgnore :: (String -> Bool) -> [CFTok] -> [CFTok]
|
||||
lexIgnore isKnown = concatMap mkOne where
|
||||
mkOne t@(TS s)
|
||||
| isKnown s = [t]
|
||||
| otherwise = []
|
||||
mkOne t = [t]
|
||||
|
||||
79
src-3.0/GF/UseGrammar/Transfer.hs
Normal file
79
src-3.0/GF/UseGrammar/Transfer.hs
Normal file
@@ -0,0 +1,79 @@
|
||||
----------------------------------------------------------------------
|
||||
-- |
|
||||
-- Module : Transfer
|
||||
-- Maintainer : AR
|
||||
-- Stability : (stable)
|
||||
-- Portability : (portable)
|
||||
--
|
||||
-- > CVS $Date: 2005/04/21 16:23:53 $
|
||||
-- > CVS $Author: bringert $
|
||||
-- > CVS $Revision: 1.5 $
|
||||
--
|
||||
-- linearize, parse, etc, by transfer. AR 9\/10\/2003
|
||||
-----------------------------------------------------------------------------
|
||||
|
||||
module GF.UseGrammar.Transfer where
|
||||
|
||||
import GF.Grammar.Grammar
|
||||
import GF.Grammar.Values
|
||||
import GF.Grammar.AbsCompute
|
||||
import qualified GF.Canon.GFC as GFC
|
||||
import GF.Grammar.LookAbs
|
||||
import GF.Grammar.MMacros
|
||||
import GF.Grammar.Macros
|
||||
import GF.Grammar.PrGrammar
|
||||
import GF.Grammar.TypeCheck
|
||||
|
||||
import GF.Infra.Ident
|
||||
import GF.Data.Operations
|
||||
|
||||
import qualified Transfer.Core.Abs as T
|
||||
|
||||
import Control.Monad
|
||||
|
||||
|
||||
-- transfer is done in T.Exp - we only need these conversions.
|
||||
|
||||
exp2core :: Ident -> Exp -> T.Exp
|
||||
exp2core f = T.EApp (T.EVar (var f)) . exp2c where
|
||||
exp2c e = case e of
|
||||
App f a -> T.EApp (exp2c f) (exp2c a)
|
||||
Abs x b -> T.EAbs (T.PVVar (var x)) (exp2c b) ---- should be syntactic abstr
|
||||
Q _ c -> T.EVar (var c)
|
||||
QC _ c -> T.EVar (var c)
|
||||
K s -> T.EStr s
|
||||
EInt i -> T.EInteger $ toInteger i
|
||||
Meta m -> T.EMeta (T.TMeta (prt m)) ---- which meta symbol?
|
||||
Vr x -> T.EVar (var x) ---- should be syntactic var
|
||||
|
||||
var x = T.CIdent $ prt x
|
||||
|
||||
core2exp :: T.Exp -> Exp
|
||||
core2exp e = case e of
|
||||
T.EApp f a -> App (core2exp f) (core2exp a)
|
||||
T.EAbs (T.PVVar x) b -> Abs (var x) (core2exp b) ---- only from syntactic abstr
|
||||
T.EVar c -> Vr (var c) -- GF annotates to Q or QC
|
||||
T.EStr s -> K s
|
||||
T.EInteger i -> EInt $ fromInteger i
|
||||
T.EMeta _ -> uExp -- meta symbol 0, refreshed by GF
|
||||
where
|
||||
var :: T.CIdent -> Ident
|
||||
var (T.CIdent x) = zIdent x
|
||||
|
||||
|
||||
|
||||
-- The following are now obsolete (30/11/2005)
|
||||
-- linearize, parse, etc, by transfer. AR 9/10/2003
|
||||
|
||||
doTransfer :: GFC.CanonGrammar -> Ident -> Tree -> Err Tree
|
||||
doTransfer gr tra t = do
|
||||
cat <- liftM snd $ val2cat $ valTree t
|
||||
f <- lookupTransfer gr tra cat
|
||||
e <- compute gr $ App f $ tree2exp t
|
||||
annotate gr e
|
||||
|
||||
useByTransfer :: (Tree -> Err a) -> GFC.CanonGrammar -> Ident -> (Tree -> Err a)
|
||||
useByTransfer lin gr tra t = doTransfer gr tra t >>= lin
|
||||
|
||||
mkByTransfer :: (a -> Err [Tree]) -> GFC.CanonGrammar -> Ident -> (a -> Err [Tree])
|
||||
mkByTransfer parse gr tra s = parse s >>= mapM (doTransfer gr tra)
|
||||
77
src-3.0/GF/UseGrammar/TreeSelections.hs
Normal file
77
src-3.0/GF/UseGrammar/TreeSelections.hs
Normal file
@@ -0,0 +1,77 @@
|
||||
----------------------------------------------------------------------
|
||||
-- |
|
||||
-- Module : TreeSelections
|
||||
-- Maintainer : AR
|
||||
-- Stability : (stable)
|
||||
-- Portability : (portable)
|
||||
--
|
||||
-- choose shallowest trees, and remove an overload resolution prefix
|
||||
-----------------------------------------------------------------------------
|
||||
|
||||
module GF.UseGrammar.TreeSelections (
|
||||
|
||||
getOverloadResults, smallestTrs, sizeTr, depthTr
|
||||
|
||||
) where
|
||||
|
||||
import GF.Grammar.Abstract
|
||||
import GF.Grammar.Macros
|
||||
|
||||
import GF.Data.Operations
|
||||
import GF.Data.Zipper
|
||||
import Data.List
|
||||
|
||||
-- AR 2/7/2007
|
||||
-- The top-level function takes a set of trees (typically parses)
|
||||
-- and returns the list of those trees that have the minimum size.
|
||||
-- In addition, the overload prefix "ovrld123_", is removed
|
||||
-- from each constructor in which it appears. This is used for
|
||||
-- showing the library API constructors in a parsable grammar.
|
||||
-- TODO: access the generic functions smallestTrs, sizeTr, depthTr from shell
|
||||
|
||||
getOverloadResults :: [Tree] -> [Tree]
|
||||
getOverloadResults = smallestTrs sizeTr . map (mkOverload "ovrld")
|
||||
|
||||
-- NB: this does not always give the desired result, since
|
||||
-- some genuine alternatives may be deeper: now we will exclude the
|
||||
-- latter of
|
||||
--
|
||||
-- mkCl this_NP love_V2 (mkNP that_NP here_Adv)
|
||||
-- mkCl this_NP (mkVP (mkVP love_V2 that_NP) here_Adv)
|
||||
--
|
||||
-- A perfect method would know the definitional equivalences of constructors.
|
||||
--
|
||||
-- Notice also that size is a better measure than depth, because:
|
||||
-- 1. Global depth does not exclude the latter of
|
||||
--
|
||||
-- mkCl (mkNP he_Pron) love_V2 that_NP
|
||||
-- mkCl (mkNP he_Pron) (mkVP love_V2 that_NP)
|
||||
--
|
||||
-- 2. Length is needed to exclude the latter of
|
||||
--
|
||||
-- mkS (mkCl (mkNP he_Pron) love_V2 that_NP)
|
||||
-- mkS presentTense (mkCl (mkNP he_Pron) love_V2 that_NP)
|
||||
--
|
||||
|
||||
smallestTrs :: (Tr a -> Int) -> [Tr a] -> [Tr a]
|
||||
smallestTrs size ts = map fst $ filter ((==mx) . snd) tds where
|
||||
tds = [(t, size t) | t <- ts]
|
||||
mx = minimum $ map snd tds
|
||||
|
||||
depthTr :: Tr a -> Int
|
||||
depthTr (Tr (_, ts)) = case ts of
|
||||
[] -> 1
|
||||
_ -> 1 + (maximum $ map depthTr ts)
|
||||
|
||||
sizeTr :: Tr a -> Int
|
||||
sizeTr (Tr (_, ts)) = 1 + sum (map sizeTr ts)
|
||||
|
||||
-- remove from each constant a prefix starting with "pref", up to first "_"
|
||||
-- example format: ovrld123_mkNP
|
||||
|
||||
mkOverload :: String -> Tree -> Tree
|
||||
mkOverload pref = mapTr (changeAtom overAtom) where
|
||||
overAtom a = case a of
|
||||
AtC (m, IC f) | isPrefixOf pref f ->
|
||||
AtC (m, IC (tail (dropWhile (/='_') f)))
|
||||
_ -> a
|
||||
251
src-3.0/GF/UseGrammar/Treebank.hs
Normal file
251
src-3.0/GF/UseGrammar/Treebank.hs
Normal file
@@ -0,0 +1,251 @@
|
||||
----------------------------------------------------------------------
|
||||
-- |
|
||||
-- Module : Treebank
|
||||
-- Maintainer : AR
|
||||
-- Stability : (stable)
|
||||
-- Portability : (portable)
|
||||
--
|
||||
-- Generate multilingual treebanks. AR 8\/2\/2006
|
||||
--
|
||||
-- (c) Aarne Ranta 2006 under GNU GPL
|
||||
--
|
||||
-- Purpose: to generate treebanks.
|
||||
-----------------------------------------------------------------------------
|
||||
|
||||
module GF.UseGrammar.Treebank (
|
||||
mkMultiTreebank,
|
||||
mkUniTreebank,
|
||||
multi2uniTreebank,
|
||||
uni2multiTreebank,
|
||||
testMultiTreebank,
|
||||
treesTreebank,
|
||||
getTreebank,
|
||||
getUniTreebank,
|
||||
readUniTreebanks,
|
||||
readMultiTreebank,
|
||||
lookupTreebank,
|
||||
assocsTreebank,
|
||||
isWordInTreebank,
|
||||
printAssoc,
|
||||
mkCompactTreebank
|
||||
) where
|
||||
|
||||
import GF.Compile.ShellState
|
||||
import GF.UseGrammar.Linear -- (linTree2string)
|
||||
import GF.UseGrammar.Custom
|
||||
import GF.UseGrammar.GetTree (string2tree)
|
||||
import GF.Grammar.TypeCheck (annotate)
|
||||
import GF.Canon.CMacros (noMark)
|
||||
import GF.Grammar.Grammar (Trm)
|
||||
import GF.Grammar.MMacros (exp2tree)
|
||||
import GF.Grammar.Macros (zIdent)
|
||||
import GF.Grammar.PrGrammar (prt_,prt)
|
||||
import GF.Grammar.Values (tree2exp)
|
||||
import GF.Data.Operations
|
||||
import GF.Infra.Option
|
||||
import GF.Infra.Ident (Ident)
|
||||
import GF.Infra.UseIO
|
||||
import qualified GF.Grammar.Abstract as A
|
||||
import qualified Data.Map as M
|
||||
import qualified Data.Set as S
|
||||
import qualified Data.List as L
|
||||
import Control.Monad (liftM)
|
||||
import System.FilePath
|
||||
|
||||
-- Generate a treebank with a multilingual grammar. AR 8/2/2006
|
||||
-- (c) Aarne Ranta 2006 under GNU GPL
|
||||
|
||||
-- keys are trees; format: XML file
|
||||
type MultiTreebank = [(String,[(String,String)])] -- tree,lang,lin
|
||||
|
||||
-- keys are strings; format: string TAB tree TAB ... TAB tree
|
||||
type UniTreebank = Treebank -- M.Map String [String] -- string,tree
|
||||
|
||||
-- both formats can be read from both kinds of files
|
||||
readUniTreebanks :: FilePath -> IO [(Ident,UniTreebank)]
|
||||
readUniTreebanks file = do
|
||||
s <- readFileIf file
|
||||
return $ if isMultiTreebank s
|
||||
then multi2uniTreebank $ getTreebank $ lines s
|
||||
else
|
||||
let tb = getUniTreebank $ lines s
|
||||
in [(zIdent (dropExtension file),tb)]
|
||||
|
||||
readMultiTreebank :: FilePath -> IO MultiTreebank
|
||||
readMultiTreebank file = do
|
||||
s <- readFileIf file
|
||||
return $ if isMultiTreebank s
|
||||
then getTreebank $ lines s
|
||||
else uni2multiTreebank (zIdent (dropExtension file)) $ getUniTreebank $ lines s
|
||||
|
||||
isMultiTreebank :: String -> Bool
|
||||
isMultiTreebank s = take 10 s == "<treebank>"
|
||||
|
||||
multi2uniTreebank :: MultiTreebank -> [(Ident,UniTreebank)]
|
||||
multi2uniTreebank mt@((_,lls):_) = [(zIdent la, mkTb la) | (la,_) <- lls] where
|
||||
mkTb la = M.fromListWith (++) [(s,[t]) | (t,lls) <- mt, (l,s) <- lls, l==la]
|
||||
multi2uniTreebank [] = []
|
||||
|
||||
uni2multiTreebank :: Ident -> UniTreebank -> MultiTreebank
|
||||
uni2multiTreebank la tb =
|
||||
[(t,[(prt_ la, s)]) | (s,ts) <- assocsTreebank tb, t <- ts]
|
||||
|
||||
-- | the main functions
|
||||
|
||||
-- builds a treebank where trees are the keys, and writes a file (opt. XML)
|
||||
mkMultiTreebank :: Options -> ShellState -> String -> [A.Tree] -> Res
|
||||
mkMultiTreebank opts sh com trees
|
||||
| oElem (iOpt "compact") opts = mkCompactTreebank opts sh trees
|
||||
mkMultiTreebank opts sh com trees =
|
||||
putInXML opts "treebank" comm (concatMap mkItem tris) where
|
||||
mkItem(t,i)= putInXML opts "item" (cat i) (mkTree t ++ concatMap (mkLin t) langs)
|
||||
-- mkItem(t,i)= putInXML opts "item" (cat i) (mkTree t >>mapM_ (mkLin t) langs)
|
||||
mkTree t = putInXML opts "tree" [] (puts $ showTree t)
|
||||
mkLin t lg = putInXML opts "lin" (lang lg) (puts $ linearize opts sh lg t)
|
||||
|
||||
langs = [prt_ l | l <- allLanguages sh]
|
||||
comm = "" --- " command=" ++ show com +++ "abstract=" ++ show abstr
|
||||
abstr = "" --- "Abs" ----
|
||||
cat i = " number=" ++ show (show i) --- " cat=" ++ show "S" ----
|
||||
lang lg = " lang=" ++ show (prt_ (zIdent lg))
|
||||
tris = zip trees [1..]
|
||||
|
||||
-- builds a unilingual treebank where strings are the keys into an internal treebank
|
||||
|
||||
mkUniTreebank :: Options -> ShellState -> Language -> [A.Tree] -> Treebank
|
||||
mkUniTreebank opts sh lg trees = M.fromListWith (++) [(lin t, [prt_ t]) | t <- trees]
|
||||
where
|
||||
lang = prt_ lg
|
||||
lin t = linearize opts sh lang t
|
||||
|
||||
-- reads a treebank and linearizes its trees again, printing all differences
|
||||
testMultiTreebank :: Options -> ShellState -> String -> Res
|
||||
testMultiTreebank opts sh = putInXML opts "testtreebank" [] .
|
||||
concatMap testOne .
|
||||
getTreebanks . lines
|
||||
where
|
||||
testOne (e,lang,str0) = do
|
||||
let tr = annot gr e
|
||||
let str = linearize opts sh lang tr
|
||||
if str == str0 then ret else putInXML opts "diff" [] $ concat [
|
||||
putInXML opts "tree" [] (puts $ showTree tr),
|
||||
putInXML opts "old" (" lang=" ++ show (prt_ (zIdent lang))) $ puts str0,
|
||||
putInXML opts "new" (" lang=" ++ show (prt_ (zIdent lang))) $ puts str
|
||||
]
|
||||
gr = firstStateGrammar sh
|
||||
|
||||
-- writes all the trees of the treebank
|
||||
treesTreebank :: Options -> String -> [String]
|
||||
treesTreebank _ = terms . getTreebank . lines where
|
||||
terms ts = [t | (t,_) <- ts]
|
||||
|
||||
-- string vs. IO
|
||||
type Res = [String] -- IO ()
|
||||
puts :: String -> Res
|
||||
puts = return -- putStrLn
|
||||
ret = [] -- return ()
|
||||
--
|
||||
|
||||
-- here strings are keys
|
||||
assocsTreebank :: UniTreebank -> [(String,[String])]
|
||||
assocsTreebank = M.assocs
|
||||
|
||||
isWordInTreebank :: UniTreebank -> String -> Bool
|
||||
isWordInTreebank tb w = S.member w (S.fromList (concatMap words (M.keys tb)))
|
||||
|
||||
printAssoc (s, ts) = s ++ concat ["\t" ++ t | t <- ts]
|
||||
|
||||
getTreebanks :: [String] -> [(String,String,String)]
|
||||
getTreebanks = concatMap grps . getTreebank where
|
||||
grps (t,lls) = [(t,x,y) | (x,y) <- lls]
|
||||
|
||||
getTreebank :: [String] -> MultiTreebank
|
||||
getTreebank ll = case ll of
|
||||
l:ls@(_:_:_) ->
|
||||
let (l1,l2) = getItem ls
|
||||
(tr,lins) = getTree l1
|
||||
lglins = getLins lins
|
||||
in (tr,lglins) : getTreebank l2
|
||||
_ -> []
|
||||
where
|
||||
getItem = span ((/="</item") . take 6)
|
||||
|
||||
getTree (_:ss) =
|
||||
let (t1,t2) = span ((/="</tree") . take 6) ss in (last t1, drop 1 t2)
|
||||
|
||||
getLins (beg:str:end:ss) = (getLang beg, str):getLins ss
|
||||
getLins _ = []
|
||||
|
||||
getLang = takeWhile (/='"') . tail . dropWhile (/='"')
|
||||
|
||||
getUniTreebank :: [String] -> UniTreebank
|
||||
getUniTreebank ls = M.fromListWith (++) [(s, ts) | s:ts <- map chop ls] where
|
||||
chop = chunks '\t'
|
||||
|
||||
lookupTreebank :: Treebank -> String -> [String]
|
||||
lookupTreebank tb s = maybe [] id $ M.lookup s tb
|
||||
|
||||
annot :: StateGrammar -> String -> A.Tree
|
||||
annot gr s = errVal (error "illegal tree") $ do
|
||||
let t = tree2exp $ string2tree gr s
|
||||
annotate (grammar gr) t
|
||||
|
||||
putInXML :: Options -> String -> String -> Res -> Res
|
||||
putInXML opts tag attrs io =
|
||||
(ifXML $ puts $ tagXML $ tag ++ attrs) ++
|
||||
io ++
|
||||
(ifXML $ puts $ tagXML $ '/':tag)
|
||||
where
|
||||
ifXML c = if oElem showXML opts then c else []
|
||||
|
||||
|
||||
tagXML :: String -> String
|
||||
tagXML s = "<" ++ s ++ ">"
|
||||
|
||||
-- print the treebank in a compact format:
|
||||
-- first a sorted list of all words, referrable by index
|
||||
-- then the linearization of each tree, as sequences of word indices
|
||||
-- this format is usable in embedded translation systems.
|
||||
|
||||
mkCompactTreebank :: Options -> ShellState -> [A.Tree] -> [String]
|
||||
mkCompactTreebank opts sh = printCompactTreebank . mkJustMultiTreebank opts sh
|
||||
|
||||
printCompactTreebank :: (MultiTreebank,[String]) -> [String]
|
||||
printCompactTreebank (tb,lgs) = (stat:langs:unwords ws : "\n" : linss) where
|
||||
ws = L.sort $ L.nub $ concat $ map (concatMap (words . snd) . snd) tb
|
||||
|
||||
linss = map (unwords . pad) linss0
|
||||
linss0 = map (map (show . encode) . words) allExs
|
||||
allExs = concat [[snd (ls !! i) | (_,ls) <- tb] | i <- [0..length lgs - 1]]
|
||||
encode w = maybe undefined id $ M.lookup w wmap
|
||||
wmap = M.fromAscList $ zip ws [1..]
|
||||
stat = unwords $ map show [length ws, length lgs, length tb, smax]
|
||||
langs = unwords lgs
|
||||
smax = maximum $ map length linss0
|
||||
pad ws = ws ++ replicate (smax - length ws) "0"
|
||||
|
||||
-- [(String,[(String,String)])] -- tree,lang,lin
|
||||
mkJustMultiTreebank :: Options -> ShellState -> [A.Tree] -> (MultiTreebank,[String])
|
||||
mkJustMultiTreebank opts sh ts =
|
||||
([(prt_ t, [(la, lin la t) | la <- langs]) | t <- ts],langs) where
|
||||
langs = map prt_ $ allLanguages sh
|
||||
lin = linearize opts sh
|
||||
|
||||
|
||||
--- these handy functions are borrowed from EmbedAPI
|
||||
|
||||
linearize opts mgr lang = lin where
|
||||
sgr = stateGrammarOfLangOpt False mgr zlang
|
||||
cgr = canModules mgr
|
||||
zlang = zIdent lang
|
||||
untok = customOrDefault (addOptions opts (stateOptions sgr)) useUntokenizer customUntokenizer sgr
|
||||
lin
|
||||
| oElem showRecord opts = err id id . liftM prt . linearizeNoMark cgr zlang
|
||||
| oElem tableLin opts =
|
||||
err id id . liftM (unlines . map untok . prLinTable True) . allLinTables True cgr zlang
|
||||
| oElem showAll opts =
|
||||
err id id . liftM (unlines . map untok . prLinTable False) . allLinTables False cgr zlang
|
||||
|
||||
| otherwise = untok . linTree2string noMark cgr zlang
|
||||
|
||||
showTree t = prt_ $ tree2exp t
|
||||
Reference in New Issue
Block a user