mirror of
https://github.com/GrammaticalFramework/gf-core.git
synced 2026-04-09 04:59:31 -06:00
"Committed_by_peb"
This commit is contained in:
@@ -5,9 +5,9 @@
|
||||
-- Stability : (stable)
|
||||
-- Portability : (portable)
|
||||
--
|
||||
-- > CVS $Date: 2005/02/18 19:21:07 $
|
||||
-- > CVS $Date: 2005/04/20 12:49:45 $
|
||||
-- > CVS $Author: peb $
|
||||
-- > CVS $Revision: 1.10 $
|
||||
-- > CVS $Revision: 1.11 $
|
||||
--
|
||||
-- symbols (categories, functions) for context-free grammars.
|
||||
-----------------------------------------------------------------------------
|
||||
@@ -41,7 +41,7 @@ import PrGrammar
|
||||
import Str
|
||||
import Char (toLower, toUpper)
|
||||
|
||||
-- this type should be abstract
|
||||
-- | this type should be abstract
|
||||
data CFTok =
|
||||
TS String -- ^ normal strings
|
||||
| TC String -- ^ strings that are ambiguous between upper or lower case
|
||||
|
||||
@@ -4,9 +4,9 @@
|
||||
-- Stability : (stable)
|
||||
-- Portability : (portable)
|
||||
--
|
||||
-- > CVS $Date: 2005/04/18 14:55:32 $
|
||||
-- > CVS $Date: 2005/04/20 12:49:44 $
|
||||
-- > CVS $Author: peb $
|
||||
-- > CVS $Revision: 1.5 $
|
||||
-- > CVS $Revision: 1.6 $
|
||||
--
|
||||
-- All possible instantiations of different grammar formats used in conversion from GFC
|
||||
-----------------------------------------------------------------------------
|
||||
@@ -44,6 +44,7 @@ data Name = Name Fun [Profile (SyntaxForest Fun)]
|
||||
name2fun :: Name -> Fun
|
||||
name2fun (Name fun _) = fun
|
||||
|
||||
----------------------------------------------------------------------
|
||||
-- * profiles
|
||||
|
||||
-- | A profile is a simple representation of a function on a number of arguments.
|
||||
@@ -155,7 +156,10 @@ data MCat = MCat ECat [ELabel] deriving (Eq, Ord, Show)
|
||||
type MLabel = ELabel
|
||||
|
||||
mcat2ecat :: MCat -> ECat
|
||||
mcat2ecat (MCat mcat _) = mcat
|
||||
mcat2ecat (MCat cat _) = cat
|
||||
|
||||
mcat2scat :: MCat -> SCat
|
||||
mcat2scat = ecat2scat . mcat2ecat
|
||||
|
||||
----------------------------------------------------------------------
|
||||
-- * CFG
|
||||
@@ -164,6 +168,12 @@ type CGrammar = CFGrammar CCat Name Token
|
||||
type CRule = CFRule CCat Name Token
|
||||
data CCat = CCat ECat ELabel deriving (Eq, Ord, Show)
|
||||
|
||||
ccat2ecat :: CCat -> ECat
|
||||
ccat2ecat (CCat cat _) = cat
|
||||
|
||||
ccat2scat :: CCat -> SCat
|
||||
ccat2scat = ecat2scat . ccat2ecat
|
||||
|
||||
----------------------------------------------------------------------
|
||||
-- * pretty-printing
|
||||
|
||||
|
||||
@@ -4,9 +4,9 @@
|
||||
-- Stability : (stable)
|
||||
-- Portability : (portable)
|
||||
--
|
||||
-- > CVS $Date: 2005/04/11 13:52:51 $
|
||||
-- > CVS $Date: 2005/04/20 12:49:44 $
|
||||
-- > CVS $Author: peb $
|
||||
-- > CVS $Revision: 1.1 $
|
||||
-- > CVS $Revision: 1.2 $
|
||||
--
|
||||
-- Simple implementation of deductive chart parsing
|
||||
-----------------------------------------------------------------------------
|
||||
@@ -21,7 +21,7 @@ module GF.NewParsing.GeneralChart
|
||||
emptyChart,
|
||||
chartMember,
|
||||
chartInsert, chartInsertM,
|
||||
chartList,
|
||||
chartList, chartKeys,
|
||||
addToChart, addToChartM
|
||||
) where
|
||||
|
||||
@@ -35,6 +35,7 @@ import Monad (foldM)
|
||||
|
||||
chartLookup :: (Ord item, Ord key) => ParseChart item key -> key -> [item]
|
||||
chartList :: (Ord item, Ord key) => ParseChart item key -> [item]
|
||||
chartKeys :: (Ord item, Ord key) => ParseChart item key -> [key]
|
||||
buildChart :: (Ord item, Ord key) =>
|
||||
(item -> key) -- ^ key lookup function
|
||||
-> [ParseChart item key -> item -> [item]] -- ^ list of inference rules as functions
|
||||
@@ -95,6 +96,7 @@ emptyChart = KC rbmEmpty
|
||||
chartMember (KC tree) item key = rbmElem key item tree
|
||||
chartLookup (KC tree) key = rbmLookup key tree
|
||||
chartList (KC tree) = concatMap snd (rbmList tree)
|
||||
chartKeys (KC tree) = map fst (rbmList tree)
|
||||
chartInsert (KC tree) item key = fmap KC (rbmInsert key item tree)
|
||||
|
||||
chartInsertM (KC tree) item keys = fmap KC (foldM insertItem tree keys)
|
||||
|
||||
@@ -4,17 +4,18 @@
|
||||
-- Stability : (stable)
|
||||
-- Portability : (portable)
|
||||
--
|
||||
-- > CVS $Date: 2005/04/11 13:52:50 $
|
||||
-- > CVS $Date: 2005/04/20 12:49:44 $
|
||||
-- > CVS $Author: peb $
|
||||
-- > CVS $Revision: 1.1 $
|
||||
-- > CVS $Revision: 1.2 $
|
||||
--
|
||||
-- Basic GCFG formalism (derived from Pollard 1984)
|
||||
-----------------------------------------------------------------------------
|
||||
|
||||
module GF.Formalism.GCFG
|
||||
( Grammar, Rule(..), Abstract(..), Concrete(..)
|
||||
) where
|
||||
module GF.Formalism.GCFG where
|
||||
|
||||
import GF.Formalism.Utilities (SyntaxChart)
|
||||
import GF.Data.Assoc (assocMap, accumAssoc)
|
||||
import GF.Data.SortedList (nubsort, groupPairs)
|
||||
import GF.Infra.Print
|
||||
|
||||
----------------------------------------------------------------------
|
||||
@@ -28,6 +29,10 @@ data Abstract cat name = Abs cat [cat] name
|
||||
data Concrete lin term = Cnc lin [lin] term
|
||||
deriving (Eq, Ord, Show)
|
||||
|
||||
abstract2chart :: (Ord n, Ord e) => [Abstract e n] -> SyntaxChart n e
|
||||
abstract2chart rules = accumAssoc groupPairs $
|
||||
[ (e, (n, es)) | Abs e es n <- rules ]
|
||||
|
||||
----------------------------------------------------------------------
|
||||
|
||||
instance (Print c, Print n, Print l, Print t) => Print (Rule n c l t) where
|
||||
|
||||
@@ -4,9 +4,9 @@
|
||||
-- Stability : (stable)
|
||||
-- Portability : (portable)
|
||||
--
|
||||
-- > CVS $Date: 2005/04/16 05:40:49 $
|
||||
-- > CVS $Date: 2005/04/20 12:49:44 $
|
||||
-- > CVS $Author: peb $
|
||||
-- > CVS $Revision: 1.3 $
|
||||
-- > CVS $Revision: 1.4 $
|
||||
--
|
||||
-- Basic type declarations and functions for grammar formalisms
|
||||
-----------------------------------------------------------------------------
|
||||
@@ -105,7 +105,9 @@ inputMany toks = MkInput inEdges inBounds inFrom inTo inToken
|
||||
|
||||
|
||||
------------------------------------------------------------
|
||||
-- * charts, forests & trees
|
||||
-- * representations of syntactical analyses
|
||||
|
||||
-- ** charts as finite maps over edges
|
||||
|
||||
-- | The values of the chart, a list of key-daughters pairs,
|
||||
-- has unique keys. In essence, it is a map from 'n' to daughters.
|
||||
@@ -118,6 +120,8 @@ type SyntaxChart n e = Assoc e [(n, [[e]])]
|
||||
-- type Forest n = GeneralTrie n (SList [Forest n]) Bool
|
||||
-- (the Bool == isMeta)
|
||||
|
||||
-- ** syntax forests
|
||||
|
||||
data SyntaxForest n = FMeta
|
||||
| FNode n [[SyntaxForest n]]
|
||||
-- ^ The outer list should be a set (not necessarily sorted)
|
||||
@@ -126,24 +130,28 @@ data SyntaxForest n = FMeta
|
||||
-- are (conjunctive) concatenative nodes
|
||||
deriving (Eq, Ord, Show)
|
||||
|
||||
data SyntaxTree n = TMeta | TNode n [SyntaxTree n]
|
||||
deriving (Eq, Ord, Show)
|
||||
instance Functor SyntaxForest where
|
||||
fmap f (FNode n forests) = FNode (f n) $ map (map (fmap f)) forests
|
||||
fmap f (FMeta) = FMeta
|
||||
|
||||
forestName :: SyntaxForest n -> Maybe n
|
||||
forestName (FNode n _) = Just n
|
||||
forestName (FMeta) = Nothing
|
||||
|
||||
treeName :: SyntaxTree n -> Maybe n
|
||||
treeName (TNode n _) = Just n
|
||||
treeName (TMeta) = Nothing
|
||||
unifyManyForests :: (Monad m, Eq n) => [SyntaxForest n] -> m (SyntaxForest n)
|
||||
unifyManyForests = foldM unifyForests FMeta
|
||||
|
||||
instance Functor SyntaxTree where
|
||||
fmap f (TNode n trees) = TNode (f n) $ map (fmap f) trees
|
||||
fmap f (TMeta) = TMeta
|
||||
|
||||
instance Functor SyntaxForest where
|
||||
fmap f (FNode n forests) = FNode (f n) $ map (map (fmap f)) forests
|
||||
fmap f (FMeta) = FMeta
|
||||
-- | two forests can be unified, if either is 'FMeta', or both have the same parent,
|
||||
-- and all children can be unified
|
||||
unifyForests :: (Monad m, Eq n) => SyntaxForest n -> SyntaxForest n -> m (SyntaxForest n)
|
||||
unifyForests FMeta forest = return forest
|
||||
unifyForests forest FMeta = return forest
|
||||
unifyForests (FNode name1 children1) (FNode name2 children2)
|
||||
| name1 == name2 && not (null children) = return $ FNode name1 children
|
||||
| otherwise = fail "forest unification failure"
|
||||
where children = [ forests | forests1 <- children1, forests2 <- children2,
|
||||
sameLength forests1 forests2,
|
||||
forests <- zipWithM unifyForests forests1 forests2 ]
|
||||
|
||||
{- måste tänka mer på detta:
|
||||
compactForests :: Ord n => [SyntaxForest n] -> SList (SyntaxForest n)
|
||||
@@ -168,11 +176,33 @@ compactForests = map joinForests . groupBy eqNames . sortForests
|
||||
_ -> nubsort fss
|
||||
-}
|
||||
|
||||
-- ** conversions between representations
|
||||
-- ** syntax trees
|
||||
|
||||
forest2trees :: SyntaxForest n -> SList (SyntaxTree n)
|
||||
forest2trees (FNode n forests) = map (TNode n) $ forests >>= mapM forest2trees
|
||||
forest2trees (FMeta) = [TMeta]
|
||||
data SyntaxTree n = TMeta | TNode n [SyntaxTree n]
|
||||
deriving (Eq, Ord, Show)
|
||||
|
||||
instance Functor SyntaxTree where
|
||||
fmap f (TNode n trees) = TNode (f n) $ map (fmap f) trees
|
||||
fmap f (TMeta) = TMeta
|
||||
|
||||
treeName :: SyntaxTree n -> Maybe n
|
||||
treeName (TNode n _) = Just n
|
||||
treeName (TMeta) = Nothing
|
||||
|
||||
unifyManyTrees :: (Monad m, Eq n) => [SyntaxTree n] -> m (SyntaxTree n)
|
||||
unifyManyTrees = foldM unifyTrees TMeta
|
||||
|
||||
-- | two trees can be unified, if either is 'TMeta',
|
||||
-- or both have the same parent, and their children can be unified
|
||||
unifyTrees :: (Monad m, Eq n) => SyntaxTree n -> SyntaxTree n -> m (SyntaxTree n)
|
||||
unifyTrees TMeta tree = return tree
|
||||
unifyTrees tree TMeta = return tree
|
||||
unifyTrees (TNode name1 children1) (TNode name2 children2)
|
||||
| name1 == name2 && sameLength children1 children2
|
||||
= liftM (TNode name1) $ zipWithM unifyTrees children1 children2
|
||||
| otherwise = fail "tree unification failure"
|
||||
|
||||
-- ** conversions between representations
|
||||
|
||||
chart2forests :: (Ord n, Ord e) =>
|
||||
SyntaxChart n e -- ^ The complete chart
|
||||
@@ -203,38 +233,9 @@ chart2forests chart isMeta = es2fs
|
||||
-}
|
||||
|
||||
|
||||
-- ** operations on forests
|
||||
|
||||
unifyManyForests :: (Monad m, Eq n) => [SyntaxForest n] -> m (SyntaxForest n)
|
||||
unifyManyForests = foldM unifyForests FMeta
|
||||
|
||||
-- | two forests can be unified, if either is 'FMeta', or both have the same parent,
|
||||
-- and all children can be unified
|
||||
unifyForests :: (Monad m, Eq n) => SyntaxForest n -> SyntaxForest n -> m (SyntaxForest n)
|
||||
unifyForests FMeta forest = return forest
|
||||
unifyForests forest FMeta = return forest
|
||||
unifyForests (FNode name1 children1) (FNode name2 children2)
|
||||
| name1 == name2 && not (null children) = return $ FNode name1 children
|
||||
| otherwise = fail "forest unification failure"
|
||||
where children = [ forests | forests1 <- children1, forests2 <- children2,
|
||||
sameLength forests1 forests2,
|
||||
forests <- zipWithM unifyForests forests1 forests2 ]
|
||||
|
||||
|
||||
-- ** operations on trees
|
||||
|
||||
unifyManyTrees :: (Monad m, Eq n) => [SyntaxTree n] -> m (SyntaxTree n)
|
||||
unifyManyTrees = foldM unifyTrees TMeta
|
||||
|
||||
-- | two trees can be unified, if either is 'TMeta',
|
||||
-- or both have the same parent, and their children can be unified
|
||||
unifyTrees :: (Monad m, Eq n) => SyntaxTree n -> SyntaxTree n -> m (SyntaxTree n)
|
||||
unifyTrees TMeta tree = return tree
|
||||
unifyTrees tree TMeta = return tree
|
||||
unifyTrees (TNode name1 children1) (TNode name2 children2)
|
||||
| name1 == name2 && sameLength children1 children2
|
||||
= liftM (TNode name1) $ zipWithM unifyTrees children1 children2
|
||||
| otherwise = fail "tree unification failure"
|
||||
forest2trees :: SyntaxForest n -> SList (SyntaxTree n)
|
||||
forest2trees (FNode n forests) = map (TNode n) $ forests >>= mapM forest2trees
|
||||
forest2trees (FMeta) = [TMeta]
|
||||
|
||||
|
||||
|
||||
|
||||
@@ -5,9 +5,9 @@
|
||||
-- Stability : (stable)
|
||||
-- Portability : (portable)
|
||||
--
|
||||
-- > CVS $Date: 2005/04/11 13:53:38 $
|
||||
-- > CVS $Date: 2005/04/20 12:49:45 $
|
||||
-- > CVS $Author: peb $
|
||||
-- > CVS $Revision: 1.24 $
|
||||
-- > CVS $Revision: 1.25 $
|
||||
--
|
||||
-- Options and flags used in GF shell commands and files.
|
||||
--
|
||||
@@ -146,12 +146,25 @@ rawParse = iOpt "raw"
|
||||
firstParse = iOpt "1"
|
||||
dontParse = iOpt "read"
|
||||
|
||||
newParser, newerParser :: Option
|
||||
newParser = iOpt "new"
|
||||
newerParser = iOpt "newer"
|
||||
|
||||
{-
|
||||
useParserMCFG, useParserMCFGviaCFG, useParserCFG, useParserCF :: Option
|
||||
|
||||
useParserMCFG = iOpt "mcfg"
|
||||
useParserMCFGviaCFG = iOpt "mcfg-via-cfg"
|
||||
useParserCFG = iOpt "cfg"
|
||||
useParserCF = iOpt "cf"
|
||||
-}
|
||||
|
||||
-- ** grammar formats
|
||||
|
||||
showAbstr, showXML, showOld, showLatex, showFullForm,
|
||||
showEBNF, showCF, showWords, showOpts,
|
||||
isCompiled, isHaskell, noCompOpers, retainOpers,
|
||||
newParser, newerParser, noCF, checkCirc, noCheckCirc, lexerByNeed, useUTF8id :: Option
|
||||
noCF, checkCirc, noCheckCirc, lexerByNeed, useUTF8id :: Option
|
||||
defaultGrOpts :: [Option]
|
||||
|
||||
showAbstr = iOpt "abs"
|
||||
@@ -169,8 +182,6 @@ isHaskell = iOpt "gfhs"
|
||||
noCompOpers = iOpt "nocomp"
|
||||
retainOpers = iOpt "retain"
|
||||
defaultGrOpts = []
|
||||
newParser = iOpt "new"
|
||||
newerParser = iOpt "newer"
|
||||
noCF = iOpt "nocf"
|
||||
checkCirc = iOpt "nocirc"
|
||||
noCheckCirc = iOpt "nocheckcirc"
|
||||
@@ -264,6 +275,7 @@ gStartCat :: String -> Option
|
||||
useTokenizer = aOpt "lexer"
|
||||
useUntokenizer = aOpt "unlexer"
|
||||
useParser = aOpt "parser"
|
||||
-- useStrategy = aOpt "strategy" -- parsing strategy
|
||||
withFun = aOpt "fun"
|
||||
firstCat = aOpt "cat"
|
||||
gStartCat = aOpt "startcat"
|
||||
|
||||
@@ -4,15 +4,17 @@
|
||||
-- Stability : (stable)
|
||||
-- Portability : (portable)
|
||||
--
|
||||
-- > CVS $Date: 2005/04/18 14:55:33 $
|
||||
-- > CVS $Date: 2005/04/20 12:49:44 $
|
||||
-- > CVS $Author: peb $
|
||||
-- > CVS $Revision: 1.2 $
|
||||
-- > CVS $Revision: 1.3 $
|
||||
--
|
||||
-- Chart parsing of grammars in CF format
|
||||
-----------------------------------------------------------------------------
|
||||
|
||||
module GF.NewParsing.CF (parse) where
|
||||
|
||||
import Operations (errVal)
|
||||
|
||||
import GF.System.Tracing
|
||||
import GF.Infra.Print
|
||||
|
||||
@@ -29,7 +31,7 @@ type Name = CFI.CFFun
|
||||
type Category = CFI.CFCat
|
||||
|
||||
parse :: String -> CF.CF -> Category -> CF.CFParser
|
||||
parse = buildParser . P.parseCF
|
||||
parse = buildParser . errVal (errVal undefined (P.parseCF "")) . P.parseCF
|
||||
|
||||
buildParser :: P.CFParser Category Name Token -> CF.CF -> Category -> CF.CFParser
|
||||
buildParser parser cf start tokens = (parseResults, parseInformation)
|
||||
@@ -38,7 +40,7 @@ buildParser parser cf start tokens = (parseResults, parseInformation)
|
||||
theInput = input tokens
|
||||
edges = tracePrt "Parsing.CF - nr. edges" (prt.length) $
|
||||
parser pInf [start] theInput
|
||||
chart = tracePrt "Parsing.CF - size of chart" (prt . map (length.snd) . aAssocs) $
|
||||
chart = tracePrt "Parsing.CF - sz. chart" (prt . map (length.snd) . aAssocs) $
|
||||
grammar2chart $ map addCategory edges
|
||||
forests = tracePrt "Parsing.CF - nr. forests" (prt.length) $
|
||||
chart2forests chart (const False)
|
||||
|
||||
@@ -4,9 +4,9 @@
|
||||
-- Stability : (stable)
|
||||
-- Portability : (portable)
|
||||
--
|
||||
-- > CVS $Date: 2005/04/14 18:38:36 $
|
||||
-- > CVS $Date: 2005/04/20 12:49:44 $
|
||||
-- > CVS $Author: peb $
|
||||
-- > CVS $Revision: 1.2 $
|
||||
-- > CVS $Revision: 1.3 $
|
||||
--
|
||||
-- CFG parsing
|
||||
-----------------------------------------------------------------------------
|
||||
@@ -14,6 +14,8 @@
|
||||
module GF.NewParsing.CFG
|
||||
(parseCF, module GF.NewParsing.CFG.PInfo) where
|
||||
|
||||
import Operations (Err(..))
|
||||
|
||||
import GF.Formalism.Utilities
|
||||
import GF.Formalism.CFG
|
||||
import GF.NewParsing.CFG.PInfo
|
||||
@@ -24,17 +26,19 @@ import qualified GF.NewParsing.CFG.General as Gen
|
||||
----------------------------------------------------------------------
|
||||
-- parsing
|
||||
|
||||
parseCF :: (Ord n, Ord c, Ord t) => String -> CFParser c n t
|
||||
parseCF "gb" = Gen.parse bottomup
|
||||
parseCF "gt" = Gen.parse topdown
|
||||
parseCF "ib" = Inc.parse (bottomup, noFilter)
|
||||
parseCF "it" = Inc.parse (topdown, noFilter)
|
||||
parseCF "ibFT" = Inc.parse (bottomup, topdown)
|
||||
parseCF "ibFB" = Inc.parse (bottomup, bottomup)
|
||||
parseCF "ibFTB" = Inc.parse (bottomup, bothFilters)
|
||||
parseCF "itF" = Inc.parse (topdown, bottomup)
|
||||
parseCF :: (Ord n, Ord c, Ord t) => String -> Err (CFParser c n t)
|
||||
parseCF "gb" = Ok $ Gen.parse bottomup
|
||||
parseCF "gt" = Ok $ Gen.parse topdown
|
||||
parseCF "ib" = Ok $ Inc.parse (bottomup, noFilter)
|
||||
parseCF "it" = Ok $ Inc.parse (topdown, noFilter)
|
||||
parseCF "ibFT" = Ok $ Inc.parse (bottomup, topdown)
|
||||
parseCF "ibFB" = Ok $ Inc.parse (bottomup, bottomup)
|
||||
parseCF "ibFTB" = Ok $ Inc.parse (bottomup, bothFilters)
|
||||
parseCF "itF" = Ok $ Inc.parse (topdown, bottomup)
|
||||
-- default parser:
|
||||
parseCF _ = parseCF "gb"
|
||||
parseCF "" = parseCF "gb"
|
||||
-- error parser:
|
||||
parseCF prs = Bad $ "Parser not defined: " ++ prs
|
||||
|
||||
bottomup = (True, False)
|
||||
topdown = (False, True)
|
||||
|
||||
@@ -4,9 +4,9 @@
|
||||
-- Stability : (stable)
|
||||
-- Portability : (portable)
|
||||
--
|
||||
-- > CVS $Date: 2005/04/19 10:46:07 $
|
||||
-- > CVS $Date: 2005/04/20 12:49:44 $
|
||||
-- > CVS $Author: peb $
|
||||
-- > CVS $Revision: 1.4 $
|
||||
-- > CVS $Revision: 1.5 $
|
||||
--
|
||||
-- The main parsing module, parsing GFC grammars
|
||||
-- by translating to simpler formats, such as PMCFG and CFG
|
||||
@@ -19,28 +19,25 @@ import GF.System.Tracing
|
||||
import GF.Infra.Print
|
||||
import qualified PrGrammar
|
||||
|
||||
import Monad
|
||||
import Operations (Err(..))
|
||||
|
||||
import qualified Grammar
|
||||
-- import Values
|
||||
import qualified Macros
|
||||
-- import qualified Modules
|
||||
import qualified AbsGFC
|
||||
import qualified Ident
|
||||
import Operations
|
||||
import CFIdent (CFCat, cfCat2Ident, CFTok, prCFTok)
|
||||
import CFIdent (CFCat, cfCat2Ident, CFTok, wordsCFTok)
|
||||
|
||||
import GF.Data.SortedList
|
||||
import GF.Data.Assoc
|
||||
import GF.Formalism.Utilities
|
||||
import GF.Conversion.Types
|
||||
import GF.Formalism.GCFG
|
||||
import GF.Formalism.SimpleGFC
|
||||
|
||||
import qualified GF.Formalism.GCFG as G
|
||||
import qualified GF.Formalism.SimpleGFC as S
|
||||
import qualified GF.Formalism.MCFG as M
|
||||
import qualified GF.Formalism.CFG as C
|
||||
import qualified GF.NewParsing.MCFG as PM
|
||||
import qualified GF.NewParsing.CFG as PC
|
||||
--import qualified GF.Conversion.FromGFC as From
|
||||
|
||||
----------------------------------------------------------------------
|
||||
-- parsing information
|
||||
@@ -64,82 +61,60 @@ parse :: String -- ^ parsing strategy
|
||||
-> Ident.Ident -- ^ abstract module name
|
||||
-> CFCat -- ^ starting category
|
||||
-> [CFTok] -- ^ input tokens
|
||||
-> [Grammar.Term] -- ^ resulting GF terms
|
||||
-> Err [Grammar.Term] -- ^ resulting GF terms
|
||||
|
||||
parse (prs:strategy) pinfo abs startCat inString =
|
||||
do let inTokens = tracePrt "Parsing.GFC - input tokens" prt $
|
||||
inputMany (map wordsCFTok inString)
|
||||
forests <- selectParser prs strategy pinfo startCat inTokens
|
||||
traceM "Parsing.GFC - nr. forests" (prt (length forests))
|
||||
let filteredForests = tracePrt "Parsing.GFC - nr. filtered forests" (prt . length) $
|
||||
forests >>= applyProfileToForest
|
||||
-- compactFs = tracePrt "#compactForests" (prt . length) $
|
||||
-- tracePrt "compactForests" (prtBefore "\n") $
|
||||
-- compactForests forests
|
||||
trees = tracePrt "Parsing.GFC - nr. trees" (prt . length) $
|
||||
nubsort $ filteredForests >>= forest2trees
|
||||
-- compactFs >>= forest2trees
|
||||
return $ map (tree2term abs) trees
|
||||
|
||||
-- default parser = CFG (for now)
|
||||
parse "" pinfo abs startCat inString = parse "c" pinfo abs startCat inString
|
||||
|
||||
|
||||
-- parsing via CFG
|
||||
parse (c:strategy) pinfo abs startCat
|
||||
| c=='c' || c=='C' = map (tree2term abs) .
|
||||
parseCFG strategy cfpi startCats .
|
||||
map prCFTok
|
||||
where startCats = tracePrt "Parsing.GFC - starting categories" prt $
|
||||
filter isStartCat $ map fst $ aAssocs $ PC.topdownRules cfpi
|
||||
isStartCat (CCat (ECat cat _) _) = cat == cfCat2Ident startCat
|
||||
cfpi = cfPInfo pinfo
|
||||
selectParser prs strategy pinfo startCat inTokens | prs=='c'
|
||||
= do let startCats = tracePrt "Parsing.GFC - starting CF categories" prt $
|
||||
filter isStart $ map fst $ aAssocs $ PC.topdownRules cfpi
|
||||
isStart cat = ccat2scat cat == cfCat2Ident startCat
|
||||
cfpi = cfPInfo pinfo
|
||||
cfParser <- PC.parseCF strategy
|
||||
let cfChart = tracePrt "Parsing.GFC - sz. CF chart" (prt . length) $
|
||||
cfParser cfpi startCats inTokens
|
||||
chart = tracePrt "Parsing.GFC - sz. chart" (prt . map (length.snd) . aAssocs) $
|
||||
C.grammar2chart cfChart
|
||||
finalEdges = tracePrt "Parsing.GFC - final chart edges" prt $
|
||||
map (uncurry Edge (inputBounds inTokens)) startCats
|
||||
return $ chart2forests chart (const False) finalEdges
|
||||
|
||||
-- parsing via MCFG
|
||||
parse (c:strategy) pinfo abs startCat
|
||||
| c=='m' || c=='M' = map (tree2term abs) .
|
||||
parseMCFG strategy mcfpi startCats .
|
||||
map prCFTok
|
||||
where startCats = tracePrt "Parsing.GFC - starting categories" prt $
|
||||
filter isStartCat $ nubsort [ c | Rule (Abs c _ _) _ <- mcfpi ]
|
||||
isStartCat (MCat (ECat cat _) _) = cat == cfCat2Ident startCat
|
||||
mcfpi = mcfPInfo pinfo
|
||||
selectParser prs strategy pinfo startCat inTokens | prs=='m'
|
||||
= do let startCats = tracePrt "Parsing.GFC - starting MCF categories" prt $
|
||||
filter isStart $ nubsort [ c | G.Rule (G.Abs c _ _) _ <- mcfpi ]
|
||||
isStart cat = mcat2scat cat == cfCat2Ident startCat
|
||||
mcfpi = mcfPInfo pinfo
|
||||
mcfParser <- PM.parseMCF strategy
|
||||
let mcfChart = tracePrt "Parsing.GFC - sz. MCF chart" (prt . length) $
|
||||
mcfParser mcfpi startCats inTokens
|
||||
chart = tracePrt "Parsing.GFC - sz. chart" (prt . map (length.snd) . aAssocs) $
|
||||
G.abstract2chart mcfChart
|
||||
finalEdges = tracePrt "Parsing.GFC - final chart edges" prt $
|
||||
[ PM.makeFinalEdge cat lbl (inputBounds inTokens) |
|
||||
cat@(MCat _ [lbl]) <- startCats ]
|
||||
return $ chart2forests chart (const False) finalEdges
|
||||
|
||||
-- default parser
|
||||
parse strategy pinfo abs start = parse ('c':strategy) pinfo abs start
|
||||
|
||||
----------------------------------------------------------------------
|
||||
|
||||
parseCFG :: String -> CFPInfo -> [CCat] -> [Token] -> [SyntaxTree Fun]
|
||||
parseCFG strategy pinfo startCats inString = trace2 "Parsing.GFC - selected algorithm" "CFG" $
|
||||
trees
|
||||
where trees = tracePrt "Parsing.GFC - nr. trees" (prt . length) $
|
||||
nubsort $ forests >>= forest2trees
|
||||
-- compactFs >>= forest2trees
|
||||
|
||||
-- compactFs = tracePrt "#compactForests" (prt . length) $
|
||||
-- tracePrt "compactForests" (prtBefore "\n") $
|
||||
-- compactForests forests
|
||||
|
||||
forests = tracePrt "Parsing.GFC - nr. forests" (prt . length) $
|
||||
cfForests >>= convertFromCFForest
|
||||
cfForests= tracePrt "Parsing.GFC - nr. context-free forests" (prt . length) $
|
||||
chart2forests chart (const False) finalEdges
|
||||
|
||||
finalEdges = tracePrt "Parsing.GFC - final chart edges" prt $
|
||||
map (uncurry Edge (inputBounds inTokens)) startCats
|
||||
chart = --tracePrt "finalChartEdges" (prt . (? finalEdge)) $
|
||||
tracePrt "Parsing.GFC - size of chart" (prt . map (length.snd) . aAssocs) $
|
||||
C.grammar2chart cfChart
|
||||
cfChart = --tracePrt "finalEdges"
|
||||
--(prt . filter (\(Edge i j _) -> (i,j)==inputBounds inTokens)) $
|
||||
tracePrt "Parsing.GFC - size of context-free chart" (prt . length) $
|
||||
PC.parseCF strategy pinfo startCats inTokens
|
||||
|
||||
inTokens = input inString
|
||||
|
||||
----------------------------------------------------------------------
|
||||
|
||||
parseMCFG :: String -> MCFPInfo -> [MCat] -> [Token] -> [SyntaxTree Fun]
|
||||
parseMCFG strategy pinfo startCats inString = trace2 "Parsing.GFC - selected algorithm" "MCFG" $
|
||||
trees
|
||||
where trees = tracePrt "Parsing.GFC - nr. trees" (prt . length) $
|
||||
forests >>= forest2trees
|
||||
|
||||
forests = tracePrt "Parsing.GFC - nr. forests" (prt . length) $
|
||||
cfForests >>= convertFromCFForest
|
||||
cfForests= tracePrt "Parsing.GFC - nr. context-free forests" (prt . length) $
|
||||
chart2forests chart (const False) finalEdges
|
||||
|
||||
chart = tracePrt "Parsing.GFC - size of chart" (prt . map (length.snd) . aAssocs) $
|
||||
PM.parseMCF strategy pinfo inString -- inTokens
|
||||
|
||||
finalEdges = tracePrt "Parsing.GFC - final chart edges" prt $
|
||||
[ PM.makeFinalEdge cat lbl (inputBounds inTokens) |
|
||||
cat@(MCat _ [lbl]) <- startCats ]
|
||||
|
||||
inTokens = input inString
|
||||
-- error parser:
|
||||
selectParser prs strategy _ _ _ = Bad $ "Parser not defined: " ++ (prs:strategy)
|
||||
|
||||
|
||||
----------------------------------------------------------------------
|
||||
@@ -153,36 +128,23 @@ tree2term abs (TMeta) = Macros.mkMeta 0
|
||||
----------------------------------------------------------------------
|
||||
-- conversion and unification of forests
|
||||
|
||||
convertFromCFForest :: SyntaxForest Name -> [SyntaxForest Fun]
|
||||
|
||||
-- simplest implementation
|
||||
convertFromCFForest (FNode name@(Name fun profile) children)
|
||||
applyProfileToForest :: SyntaxForest Name -> [SyntaxForest Fun]
|
||||
applyProfileToForest (FNode name@(Name fun profile) children)
|
||||
| isCoercion name = concat chForests
|
||||
| otherwise = [ FNode fun chForests | not (null chForests) ]
|
||||
where chForests = concat [ applyProfileM unifyManyForests profile forests |
|
||||
forests0 <- children,
|
||||
forests <- mapM convertFromCFForest forests0 ]
|
||||
forests <- mapM applyProfileToForest forests0 ]
|
||||
|
||||
{-
|
||||
-- more intelligent(?) implementation
|
||||
convertFromCFForest (FNode (Name name profile) children)
|
||||
applyProfileToForest (FNode (Name name profile) children)
|
||||
| isCoercion name = concat chForests
|
||||
| otherwise = [ FNode name chForests | not (null chForests) ]
|
||||
where chForests = concat [ mapM (checkProfile forests) profile |
|
||||
forests0 <- children,
|
||||
forests <- mapM convertFromCFForest forests0 ]
|
||||
forests <- mapM applyProfileToForest forests0 ]
|
||||
-}
|
||||
|
||||
{-
|
||||
----------------------------------------------------------------------
|
||||
-- conversion and unification for parse trees instead of forests
|
||||
-- OBSOLETE!
|
||||
|
||||
convertFromCFTree :: SyntaxTree Name -> [SyntaxTree Fun]
|
||||
convertFromCFTree (TNode name@(Name fun profile) children0)
|
||||
| isCoercion name = concat chTrees
|
||||
| otherwise = map (TNode fun) chTrees
|
||||
where chTrees = [ children |
|
||||
children1 <- mapM convertFromCFTree children0,
|
||||
children <- applyProfileM unifyManyTrees profile children1 ]
|
||||
-}
|
||||
|
||||
@@ -4,32 +4,39 @@
|
||||
-- Stability : (stable)
|
||||
-- Portability : (portable)
|
||||
--
|
||||
-- > CVS $Date: 2005/04/19 10:46:07 $
|
||||
-- > CVS $Date: 2005/04/20 12:49:45 $
|
||||
-- > CVS $Author: peb $
|
||||
-- > CVS $Revision: 1.1 $
|
||||
-- > CVS $Revision: 1.2 $
|
||||
--
|
||||
-- MCFG parsing
|
||||
-----------------------------------------------------------------------------
|
||||
|
||||
module GF.NewParsing.MCFG where
|
||||
module GF.NewParsing.MCFG
|
||||
(parseMCF, module GF.NewParsing.MCFG.PInfo) where
|
||||
|
||||
import Operations (Err(..))
|
||||
|
||||
import GF.Formalism.Utilities
|
||||
import GF.Formalism.GCFG
|
||||
import GF.Formalism.MCFG
|
||||
import GF.NewParsing.MCFG.PInfo
|
||||
|
||||
import qualified GF.NewParsing.MCFG.Naive as Naive
|
||||
import qualified GF.NewParsing.MCFG.Active as Active
|
||||
import qualified GF.NewParsing.MCFG.Range as Range (makeRange)
|
||||
|
||||
----------------------------------------------------------------------
|
||||
-- parsing
|
||||
|
||||
--parseMCF :: (Ord n, Ord c, Ord t) => String -> CFParser c n t
|
||||
parseMCF "n" = Naive.parse
|
||||
-- default parser:
|
||||
parseMCF _ = parseMCF "n"
|
||||
|
||||
|
||||
makeFinalEdge cat lbl bnds = (cat, [(lbl, Range.makeRange bnds)])
|
||||
parseMCF :: (Ord c, Ord n, Ord l, Ord t) => String -> Err (MCFParser c n l t)
|
||||
parseMCF "n" = Ok $ Naive.parse
|
||||
parseMCF "an" = Ok $ Active.parse "n"
|
||||
parseMCF "ab" = Ok $ Active.parse "b"
|
||||
parseMCF "at" = Ok $ Active.parse "t"
|
||||
-- default parsers:
|
||||
parseMCF "a" = parseMCF "an"
|
||||
-- error parser:
|
||||
parseMCF prs = Bad $ "Parser not defined: " ++ prs
|
||||
|
||||
|
||||
|
||||
|
||||
@@ -1,174 +1,186 @@
|
||||
{-- Module --------------------------------------------------------------------
|
||||
Filename: ActiveParse.hs
|
||||
Author: Håkan Burden
|
||||
Time-stamp: <2005-04-18, 14:25>
|
||||
|
||||
Description: An agenda-driven implementation of algorithm 4.6, Active parsing
|
||||
of PMCFG, as described in Ljunglöf (2004)
|
||||
------------------------------------------------------------------------------}
|
||||
module GF.NewParsing.MCFG.Active (parse) where
|
||||
|
||||
module ActiveParse where
|
||||
import GF.NewParsing.GeneralChart
|
||||
import GF.Formalism.GCFG
|
||||
import GF.Formalism.MCFG
|
||||
import GF.Formalism.Utilities
|
||||
import GF.NewParsing.MCFG.Range
|
||||
import GF.NewParsing.MCFG.PInfo
|
||||
import GF.System.Tracing
|
||||
import Monad (guard)
|
||||
|
||||
----------------------------------------------------------------------
|
||||
-- * parsing
|
||||
|
||||
-- GF modules
|
||||
import Examples
|
||||
import GeneralChart
|
||||
import MCFGrammar
|
||||
import MCFParser
|
||||
import Nondet
|
||||
import Parser
|
||||
import Range
|
||||
parse :: (Ord n, Ord c, Ord l, Ord t) => String -> MCFParser c n l t
|
||||
parse strategy mcfg starts toks
|
||||
= [ Abs (cat, found) (zip rhs rrecs) fun |
|
||||
Final (Abs cat rhs fun) found rrecs <- chartLookup chart Fin ]
|
||||
where chart = process strategy mcfg starts toks
|
||||
|
||||
process :: (Ord n, Ord c, Ord l, Ord t) =>
|
||||
String -> MCFGrammar c n l t -> [c] -> Input t -> AChart c n l
|
||||
process strategy mcfg starts toks
|
||||
= trace2 "MCFG.Active - strategy" (if isBU strategy then "BU"
|
||||
else if isTD strategy then "TD" else "None") $
|
||||
tracePrt "MCFG.Active - chart size" prtSizes $
|
||||
buildChart keyof (complete : combine : convert : rules) axioms
|
||||
where rules | isNil strategy = [scan]
|
||||
| isBU strategy = [predictKilbury mcfg toks]
|
||||
| isTD strategy = [predictEarley mcfg toks]
|
||||
axioms | isNil strategy = predict mcfg toks
|
||||
| isBU strategy = terminal mcfg toks
|
||||
| isTD strategy = initial mcfg starts toks
|
||||
|
||||
{-- Datatypes -----------------------------------------------------------------
|
||||
AChart: A RedBlackMap with Items and Keys
|
||||
Item :
|
||||
AKey :
|
||||
------------------------------------------------------------------------------}
|
||||
data Item n c l = Active (AbstractRule n c)
|
||||
(RangeRec l)
|
||||
Range
|
||||
(Lin c l Range)
|
||||
(LinRec c l Range)
|
||||
[RangeRec l]
|
||||
| Passive (AbstractRule n c) (RangeRec l) [RangeRec l]
|
||||
isNil s = s=="n"
|
||||
isBU s = s=="b"
|
||||
isTD s = s=="t"
|
||||
|
||||
----------------------------------------------------------------------
|
||||
-- * type definitions
|
||||
|
||||
type AChart c n l = ParseChart (Item c n l) (AKey c)
|
||||
|
||||
data Item c n l = Active (Abstract c n)
|
||||
(RangeRec l)
|
||||
Range
|
||||
(Lin c l Range)
|
||||
(LinRec c l Range)
|
||||
[RangeRec l]
|
||||
| Final (Abstract c n) (RangeRec l) [RangeRec l]
|
||||
| Passive c (RangeRec l)
|
||||
deriving (Eq, Ord, Show)
|
||||
|
||||
type AChart n c l = ParseChart (Item n c l) (AKey c)
|
||||
|
||||
data AKey c = Act c
|
||||
| Pass c
|
||||
| Useless
|
||||
| Fin
|
||||
deriving (Eq, Ord, Show)
|
||||
|
||||
|
||||
keyof :: Item n c l -> AKey c
|
||||
keyof :: Item c n l -> AKey c
|
||||
keyof (Active _ _ _ (Lin _ (Cat (next, _, _):_)) _ _) = Act next
|
||||
keyof (Passive (_, cat, _) _ _) = Pass cat
|
||||
keyof _ = Useless
|
||||
keyof (Final _ _ _) = Fin
|
||||
keyof (Passive cat _) = Pass cat
|
||||
keyof _ = Useless
|
||||
|
||||
-- to be used in prediction
|
||||
emptyChildren :: Abstract c n -> [RangeRec l]
|
||||
emptyChildren (Abs _ rhs _) = replicate (length rhs) []
|
||||
|
||||
-- for tracing purposes
|
||||
prtSizes chart = "final=" ++ show (length (chartLookup chart Fin)) ++
|
||||
", passive=" ++ show (sum [length (chartLookup chart k) |
|
||||
k@(Pass _) <- chartKeys chart ]) ++
|
||||
", active=" ++ show (sum [length (chartLookup chart k) |
|
||||
k@(Act _) <- chartKeys chart ]) ++
|
||||
", useless=" ++ show (length (chartLookup chart Useless))
|
||||
|
||||
|
||||
{-- Parsing -------------------------------------------------------------------
|
||||
recognize:
|
||||
parse : Builds a chart from the initial agenda, given by prediction, and
|
||||
the inference rules
|
||||
keyof : Given an Item returns an appropriate Key for the Chart
|
||||
------------------------------------------------------------------------------}
|
||||
----------------------------------------------------------------------
|
||||
-- * inference rules
|
||||
|
||||
recognize strategy mcfg toks = chartMember
|
||||
(parse strategy mcfg toks) item (keyof item)
|
||||
where n = length toks
|
||||
n2 = n `div` 2
|
||||
item = (Passive ("f", S, [A])
|
||||
[("s",Range (0,n))]
|
||||
[[("p",Range (0,n2)),("q",Range (n2,n))]])
|
||||
|
||||
|
||||
parse :: (Ord n, Ord c, Ord l, Eq t) => Strategy -> Grammar n c l t -> [t]
|
||||
-> ParseChart (Item n c l) (AKey c)
|
||||
parse (False,False) mcfg toks = buildChart keyof
|
||||
[complete, scan, combine, convert]
|
||||
(predict mcfg toks)
|
||||
parse (True, False) mcfg toks = buildChart keyof
|
||||
[predictKilbury mcfg toks, complete, combine, convert]
|
||||
(terminal mcfg toks)
|
||||
parse (False, True) mcfg toks = buildChart keyof
|
||||
[predictEarley mcfg toks, complete, scan, combine, convert]
|
||||
(initial (take 1 mcfg) toks)
|
||||
|
||||
predictKilbury mcfg toks _ (Passive (_, cat, _) found _) =
|
||||
[ Active (f, a, rhs) [] rng lin' lins' daughters |
|
||||
Rule a rhs ((Lin l ((Cat (cat', r, i)):syms)):lins) f <- mcfg,
|
||||
cat == cat',
|
||||
lin' : lins' <- solutions $ rangeRestRec toks (Lin l syms : lins),
|
||||
-- lins' <- solutions $ rangeRestRec toks lins,
|
||||
rng <- solutions $ projection r found,
|
||||
let daughters = (replaceRec (replicate (length rhs) []) i found) ]
|
||||
predictKilbury _ _ _ _ = []
|
||||
|
||||
predictEarley mcfg toks _ item@(Active _ _ _ (Lin _ ((Cat (cat, _, _)):_)) _ _) =
|
||||
concat [ predEar toks item rule |
|
||||
rule@(Rule cat' _ _ _) <- mcfg, cat == cat' ]
|
||||
predictEarley _ _ _ _ = []
|
||||
|
||||
predEar toks _ (Rule cat [] lins f) =
|
||||
[ Passive (f, cat, []) (makeRangeRec lins') [] |
|
||||
lins' <- solutions $ rangeRestRec toks lins ]
|
||||
predEar toks (Active _ _ (Range (_,j)) _ _ _) (Rule cat rhs lins f) =
|
||||
[ Active (f, cat, rhs) [] (Range (j, j)) lin' lins' (replicate (length rhs) []) |
|
||||
(lin':lins') <- solutions $ rangeRestRec toks lins ]
|
||||
predEar toks (Active _ _ EmptyRange _ _ _) (Rule cat rhs lins f) =
|
||||
[ Active (f, cat, rhs) [] EmptyRange lin' lins' (replicate (length rhs) []) |
|
||||
(lin':lins') <- solutions $ rangeRestRec toks lins ]
|
||||
|
||||
|
||||
{--Inference rules ------------------------------------------------------------
|
||||
predict : Creates an Active Item of every Rule in the Grammar to give the
|
||||
initial Agenda
|
||||
complete:
|
||||
scan :
|
||||
combine : Creates an Active Item every time it is possible to combine
|
||||
an Active Item from the agenda with a Passive Item from the Chart
|
||||
convert : Active Items with nothing to find are converted to Passive Items
|
||||
------------------------------------------------------------------------------}
|
||||
|
||||
predict :: Eq t => Grammar n c l t -> [t] -> [Item n c l]
|
||||
predict grammar toks = [ Active (f, cat, rhs) [] EmptyRange lin' lins'
|
||||
(replicate (length rhs) []) |
|
||||
Rule cat rhs lins f <- grammar,
|
||||
(lin':lins') <- solutions $ rangeRestRec toks lins ]
|
||||
|
||||
|
||||
complete :: (Ord n, Ord c, Ord l) => ParseChart (Item n c l) (AKey c) -> Item n c l
|
||||
-> [Item n c l]
|
||||
complete _ (Active rule found (Range (i, j)) (Lin l []) (lin:lins) recs) =
|
||||
[ Active rule (found ++ [(l, Range (i,j))]) EmptyRange lin lins recs ]
|
||||
-- completion
|
||||
complete :: (Ord c, Ord n, Ord l) => AChart c n l -> Item c n l -> [Item c n l]
|
||||
complete _ (Active rule found rng (Lin l []) (lin:lins) recs) =
|
||||
return $ Active rule (found ++ [(l, rng)]) EmptyRange lin lins recs
|
||||
complete _ _ = []
|
||||
|
||||
|
||||
scan :: (Ord n, Ord c, Ord l) => ParseChart (Item n c l) (AKey c) -> Item n c l
|
||||
-> [Item n c l]
|
||||
scan _ (Active rule found rng (Lin l ((Tok rng'):syms)) lins recs) =
|
||||
[ Active rule found rng'' (Lin l syms) lins recs |
|
||||
rng'' <- solutions $ concRanges rng rng' ]
|
||||
-- scanning
|
||||
scan :: (Ord c, Ord n, Ord l) => AChart c n l -> Item c n l -> [Item c n l]
|
||||
scan _ (Active rule found rng (Lin l (Tok rng':syms)) lins recs) =
|
||||
do rng'' <- concatRange rng rng'
|
||||
return $ Active rule found rng'' (Lin l syms) lins recs
|
||||
scan _ _ = []
|
||||
|
||||
|
||||
combine :: (Ord n, Ord c, Ord l) => ParseChart (Item n c l) (AKey c) -> Item n c l
|
||||
-> [Item n c l]
|
||||
combine chart (Active rule found rng (Lin l ((Cat (c, r, d)):syms)) lins recs) =
|
||||
[ Active rule found rng'' (Lin l syms) lins (replaceRec recs d found') |
|
||||
Passive _ found' _ <- chartLookup chart (Pass c),
|
||||
rng' <- solutions $ projection r found',
|
||||
rng'' <- solutions $ concRanges rng rng',
|
||||
subsumes (recs !! d) found' ]
|
||||
combine chart (Passive (_, c, _) found _) =
|
||||
[ Active rule found' rng (Lin l syms) lins (replaceRec recs' d found) |
|
||||
Active rule found' rng' (Lin l ((Cat (c, r, d)):syms)) lins recs'
|
||||
<- chartLookup chart (Act c),
|
||||
rng'' <- solutions $ projection r found,
|
||||
rng <- solutions $ concRanges rng' rng'',
|
||||
subsumes (recs' !! d) found ]
|
||||
-- | Creates an Active Item every time it is possible to combine
|
||||
-- an Active Item from the agenda with a Passive Item from the Chart
|
||||
combine :: (Ord c, Ord n, Ord l) => AChart c n l -> Item c n l -> [Item c n l]
|
||||
combine chart (Active rule found rng (Lin l (Cat (c, r, d):syms)) lins recs) =
|
||||
do Passive _c found' <- chartLookup chart (Pass c)
|
||||
rng' <- projection r found'
|
||||
rng'' <- concatRange rng rng'
|
||||
guard $ subsumes (recs !! d) found'
|
||||
return $ Active rule found rng'' (Lin l syms) lins (replaceRec recs d found')
|
||||
combine chart (Passive c found) =
|
||||
do Active rule found' rng' (Lin l ((Cat (_c, r, d)):syms)) lins recs'
|
||||
<- chartLookup chart (Act c)
|
||||
rng'' <- projection r found
|
||||
rng <- concatRange rng' rng''
|
||||
guard $ subsumes (recs' !! d) found
|
||||
return $ Active rule found' rng (Lin l syms) lins (replaceRec recs' d found)
|
||||
combine _ _ = []
|
||||
|
||||
convert :: (Ord n, Ord c, Ord l) => ParseChart (Item n c l) (AKey c) -> Item n c l
|
||||
-> [Item n c l]
|
||||
convert _ (Active rule found rng (Lin l []) [] recs) =
|
||||
[ Passive rule (found ++ [(l, rng)]) recs ]
|
||||
-- | Active Items with nothing to find are converted to Final items,
|
||||
-- which in turn are converted to Passive Items
|
||||
convert :: (Ord c, Ord n, Ord l) => AChart c n l -> Item c n l -> [Item c n l]
|
||||
convert _ (Active rule found rng (Lin lbl []) [] recs) =
|
||||
return $ Final rule (found ++ [(lbl,rng)]) recs
|
||||
convert _ (Final (Abs cat _ _) found _) =
|
||||
return $ Passive cat found
|
||||
convert _ _ = []
|
||||
|
||||
----------------------------------------------------------------------
|
||||
-- Naive --
|
||||
|
||||
-- | Creates an Active Item of every Rule in the Grammar to give the initial Agenda
|
||||
predict :: (Ord c, Ord n, Ord l, Ord t) => MCFGrammar c n l t -> Input t -> [Item c n l]
|
||||
predict grammar toks =
|
||||
do Rule abs (Cnc _ _ lins) <- grammar
|
||||
(lin':lins') <- rangeRestRec toks lins
|
||||
return $ Active abs [] EmptyRange lin' lins' (emptyChildren abs)
|
||||
|
||||
----------------------------------------------------------------------
|
||||
-- Earley --
|
||||
-- anropas med alla startregler
|
||||
initial :: Eq t => [Rule n c l t] -> [t] -> [Item n c l]
|
||||
initial starts toks =
|
||||
[ Active (f, s, rhs) [] (Range (0, 0)) lin' lins' (replicate (length rhs) []) |
|
||||
Rule s rhs lins f <- starts,
|
||||
(lin':lins') <- solutions $ rangeRestRec toks lins ]
|
||||
|
||||
-- anropas med alla startkategorier
|
||||
initial :: (Ord c, Ord n, Ord l, Ord t) => MCFGrammar c n l t -> [c] -> Input t -> [Item c n l]
|
||||
initial mcfg starts toks =
|
||||
do Rule abs@(Abs cat _ _) (Cnc _ _ lins) <- mcfg
|
||||
guard $ cat `elem` starts
|
||||
lin' : lins' <- rangeRestRec toks lins
|
||||
return $ Active abs [] (Range (0, 0)) lin' lins' (emptyChildren abs)
|
||||
|
||||
-- earley prediction
|
||||
predictEarley :: (Ord c, Ord n, Ord l, Ord t) => MCFGrammar c n l t -> Input t
|
||||
-> AChart c n l -> Item c n l -> [Item c n l]
|
||||
predictEarley mcfg toks _ (Active _ _ rng (Lin _ (Cat (cat,_,_):_)) _ _) =
|
||||
do rule@(Rule (Abs cat' _ _) _) <- mcfg
|
||||
guard $ cat == cat'
|
||||
predEar toks rng rule
|
||||
predictEarley _ _ _ _ = []
|
||||
|
||||
predEar :: (Ord c, Ord n, Ord l, Ord t) =>
|
||||
Input t -> Range -> MCFRule c n l t -> [Item c n l]
|
||||
predEar toks _ (Rule abs@(Abs _ [] _) (Cnc _ _ lins)) =
|
||||
do lins' <- rangeRestRec toks lins
|
||||
return $ Final abs (makeRangeRec lins') []
|
||||
predEar toks rng (Rule abs (Cnc _ _ lins)) =
|
||||
do lin' : lins' <- rangeRestRec toks lins
|
||||
return $ Active abs [] (makeMaxRange rng) lin' lins' (emptyChildren abs)
|
||||
|
||||
makeMaxRange (Range (_, j)) = Range (j, j)
|
||||
makeMaxRange EmptyRange = EmptyRange
|
||||
|
||||
----------------------------------------------------------------------
|
||||
-- Kilbury --
|
||||
|
||||
terminal :: (Ord c, Ord n, Ord l, Ord t) => MCFGrammar c n l t -> Input t -> [Item c n l]
|
||||
terminal mcfg toks =
|
||||
[ Passive (f, cat, []) (makeRangeRec lins') [] |
|
||||
Rule cat [] lins f <- mcfg,
|
||||
lins' <- solutions $ rangeRestRec toks lins ]
|
||||
do Rule abs@(Abs _ [] _) (Cnc _ _ lins) <- mcfg
|
||||
lins' <- rangeRestRec toks lins
|
||||
return $ Final abs (makeRangeRec lins') []
|
||||
|
||||
-- kilbury prediction
|
||||
predictKilbury :: (Ord c, Ord n, Ord l, Ord t) =>
|
||||
MCFGrammar c n l t -> Input t
|
||||
-> AChart c n l -> Item c n l -> [Item c n l]
|
||||
predictKilbury mcfg toks _ (Passive cat found) =
|
||||
do Rule abs@(Abs _ rhs _) (Cnc _ _ (Lin l (Cat (cat', r, i):syms) : lins)) <- mcfg
|
||||
guard $ cat == cat'
|
||||
lin' : lins' <- rangeRestRec toks (Lin l syms : lins)
|
||||
rng <- projection r found
|
||||
let children = replaceRec (emptyChildren abs) i found
|
||||
return $ Active abs [] rng lin' lins' children
|
||||
predictKilbury _ _ _ _ = []
|
||||
|
||||
123
src/GF/Parsing/MCFG/Incremental.hs
Normal file
123
src/GF/Parsing/MCFG/Incremental.hs
Normal file
@@ -0,0 +1,123 @@
|
||||
{-- Module --------------------------------------------------------------------
|
||||
Filename: IncrementalParse.hs
|
||||
Author: Håkan Burden
|
||||
Time-stamp: <2005-04-18, 15:07>
|
||||
|
||||
Description: An agenda-driven implementation of the incremental algorithm 4.6
|
||||
that handles erasing and suppressing MCFG.
|
||||
As described in Ljunglöf (2004)
|
||||
------------------------------------------------------------------------------}
|
||||
|
||||
module IncrementalParse where
|
||||
|
||||
|
||||
-- Haskell
|
||||
import List
|
||||
|
||||
-- GF modules
|
||||
import Examples
|
||||
import GeneralChart
|
||||
import MCFGrammar
|
||||
import MCFParser
|
||||
import Parser
|
||||
import Range
|
||||
import Nondet
|
||||
|
||||
|
||||
{-- Datatypes -----------------------------------------------------------------
|
||||
IChart: A RedBlackMap with Items and Keys
|
||||
Item : One kind of Item since the Passive Items not necessarily need to be
|
||||
saturated iow, they can still have rows to recognize.
|
||||
IKey :
|
||||
------------------------------------------------------------------------------}
|
||||
|
||||
type IChart n c l = ParseChart (Item n c l) (IKey c l)
|
||||
|
||||
data Item n c l = Active (AbstractRule n c)
|
||||
(RangeRec l)
|
||||
Range
|
||||
(Lin c l Range)
|
||||
(LinRec c l Range)
|
||||
[RangeRec l]
|
||||
-- | Passive (AbstractRule n c)
|
||||
-- (RangeRec l)
|
||||
-- [RangeRec l]
|
||||
deriving (Eq, Ord, Show)
|
||||
|
||||
data IKey c l = Act c l Int
|
||||
-- | ActE l
|
||||
| Pass c l Int
|
||||
-- | Pred l
|
||||
| Useless
|
||||
deriving (Eq, Ord, Show)
|
||||
|
||||
keyof :: Item n c l -> IKey c l
|
||||
keyof (Active _ _ (Range (_,j)) (Lin _ ((Cat (next,lbl,_)):_)) _ _)
|
||||
= Act next lbl j
|
||||
keyof (Active (_, cat, _) found (Range (i,_)) (Lin lbl []) _ _)
|
||||
= Pass cat lbl i
|
||||
keyof _
|
||||
= Useless
|
||||
|
||||
|
||||
{-- Parsing -------------------------------------------------------------------
|
||||
recognize:
|
||||
parse : Builds a chart from the initial agenda, given by prediction, and
|
||||
the inference rules
|
||||
keyof : Given an Item returns an appropriate Key for the Chart
|
||||
------------------------------------------------------------------------------}
|
||||
|
||||
recognize mcfg toks = chartMember (parse mcfg toks) item (keyof item)
|
||||
where n = length toks
|
||||
n2 = n `div` 2
|
||||
item = Active ("f",S,[A])
|
||||
[] (Range (0, n)) (Lin "s" []) []
|
||||
[[("p", Range (0, n2)), ("q", Range (n2, n))]]
|
||||
|
||||
|
||||
parse :: (Ord n, Ord c, Ord l, Eq t) => Grammar n c l t -> [t] -> IChart n c l
|
||||
parse mcfg toks = buildChart keyof [complete ntoks, scan, combine] (predict mcfg toks ntoks)
|
||||
where ntoks = length toks
|
||||
|
||||
complete :: (Ord n, Ord c, Ord l) => Int -> IChart n c l
|
||||
-> Item n c l -> [Item n c l]
|
||||
complete ntoks _ (Active rule found rng@(Range (_,j)) (Lin l []) lins recs) =
|
||||
[ Active rule (found ++ [(l, rng)]) (Range (k,k)) lin lins' recs |
|
||||
(lin, lins') <- select lins,
|
||||
k <- [j .. ntoks] ]
|
||||
complete _ _ _ = []
|
||||
|
||||
|
||||
predict :: (Eq n, Eq c, Eq l, Eq t) => Grammar n c l t -> [t] -> Int -> [Item n c l]
|
||||
predict mcfg toks n = [ Active (f, c, rhs) [] (Range (k,k)) lin' lins'' daughters |
|
||||
Rule c rhs lins f <- mcfg,
|
||||
let daughters = replicate (length rhs) [],
|
||||
lins' <- solutions $ rangeRestRec toks lins,
|
||||
(lin', lins'') <- select lins',
|
||||
k <- [0..n] ]
|
||||
|
||||
|
||||
scan :: (Ord n, Ord c, Ord l) => IChart n c l -> Item n c l -> [Item n c l]
|
||||
scan _ (Active rule found rng (Lin l (Tok rng':syms)) lins recs) =
|
||||
[ Active rule found rng'' (Lin l syms) lins recs |
|
||||
rng'' <- solutions $ concRanges rng rng' ]
|
||||
scan _ _ = []
|
||||
|
||||
|
||||
combine :: (Ord n, Ord c, Ord l) => IChart n c l -> Item n c l -> [Item n c l]
|
||||
combine chart (Active rule found rng@(Range (_,j)) (Lin l ((Cat (c,r,d)):syms)) lins recs) =
|
||||
[ Active rule found rng'' (Lin l syms) lins (replaceRec recs d (found' ++ [(l',rng')])) |
|
||||
Active _ found' rng' (Lin l' []) _ _ <- chartLookup chart (Pass c r j),
|
||||
subsumes (recs !! d) (found' ++ [(l',rng')]),
|
||||
rng'' <- solutions $ concRanges rng rng' ]
|
||||
combine chart (Active (_,c,_) found rng'@(Range (i,_)) (Lin l []) _ _) =
|
||||
[ Active rule found' rng'' (Lin l' syms) lins (replaceRec recs d (found ++ [(l,rng')])) |
|
||||
Active rule found' rng (Lin l' ((Cat (c,r,d)):syms)) lins recs
|
||||
<- chartLookup chart (Act c l i),
|
||||
subsumes (recs !! d) (found ++ [(l,rng')]),
|
||||
rng'' <- solutions $ concRanges rng rng' ]
|
||||
combine _ _ = []
|
||||
|
||||
|
||||
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
|
||||
module GF.NewParsing.MCFG.Naive where
|
||||
module GF.NewParsing.MCFG.Naive (parse) where
|
||||
|
||||
|
||||
-- GF modules
|
||||
@@ -8,21 +8,34 @@ import GF.Formalism.GCFG
|
||||
import GF.Formalism.MCFG
|
||||
import GF.Formalism.Utilities
|
||||
import GF.NewParsing.MCFG.Range
|
||||
import GF.NewParsing.MCFG.PInfo
|
||||
import GF.Data.SortedList
|
||||
import GF.Data.Assoc
|
||||
import GF.System.Tracing
|
||||
|
||||
{-- Datatypes and types -------------------------------------------------------
|
||||
NChart : A RedBlackMap with Items and Keys
|
||||
Item : The parse Items are either Active or Passive
|
||||
NKey : One for Active Items, one for Passive and one for Active Items
|
||||
to convert to Passive
|
||||
DottedRule: (function-name, LHS, [Found in RHS], [To find in RHS])
|
||||
------------------------------------------------------------------------------}
|
||||
----------------------------------------------------------------------
|
||||
-- * parsing
|
||||
|
||||
-- | Builds a chart from the initial agenda, given by prediction, and
|
||||
-- the inference rules
|
||||
parse :: (Ord t, Ord n, Ord c, Ord l) => MCFParser c n l t
|
||||
parse mcfg starts toks
|
||||
= [ Abs (cat, makeRangeRec lins) (zip rhs rrecs) fun |
|
||||
Active (Abs cat _Nil fun, rhs) lins rrecs <- chartLookup chart Final ]
|
||||
where chart = process mcfg toks
|
||||
|
||||
process :: (Ord t, Ord n, Ord c, Ord l) => MCFGrammar c n l t -> Input t -> NChart c n l
|
||||
process mcfg toks
|
||||
= tracePrt "MCFG.Naive - chart size" prtSizes $
|
||||
buildChart keyof [convert, combine] (predict toks mcfg)
|
||||
|
||||
----------------------------------------------------------------------
|
||||
-- * type definitions
|
||||
|
||||
type NChart c n l = ParseChart (Item c n l) (NKey c)
|
||||
|
||||
data Item c n l = Active (DottedRule c n) (LinRec c l Range) [RangeRec l]
|
||||
| Passive (Abstract c n) (RangeRec l)
|
||||
| Passive c (RangeRec l)
|
||||
deriving (Eq, Ord, Show)
|
||||
|
||||
type DottedRule c n = (Abstract c n, [c])
|
||||
@@ -32,63 +45,43 @@ data NKey c = Act c
|
||||
| Final
|
||||
deriving (Eq, Ord, Show)
|
||||
|
||||
|
||||
{-- Parsing -------------------------------------------------------------------
|
||||
recognize:
|
||||
parse : Builds a chart from the initial agenda, given by prediction, and
|
||||
the inference rules
|
||||
keyof : Given an Item returns an appropriate Key for the Chart
|
||||
------------------------------------------------------------------------------}
|
||||
|
||||
|
||||
parse :: (Ord t, Ord n, Ord c, Ord l) => MCFGrammar c n l t -> [t]
|
||||
-> SyntaxChart n (c, RangeRec l)
|
||||
parse mcfg toks = chart3
|
||||
where chart3 = assocMap (const groupPairs) chart2
|
||||
chart2 = accumAssoc id $ nubsort chart1
|
||||
chart1 = [ ((cat, rrec), (fun, zip rhs rrecs)) |
|
||||
Active (Abs cat _Nil fun, rhs) lins rrecs <- chartLookup chart0 Final,
|
||||
let rrec = makeRangeRec lins ]
|
||||
chart0 = process mcfg toks
|
||||
|
||||
process :: (Ord t, Ord n, Ord c, Ord l) => MCFGrammar c n l t -> [t] -> NChart c n l
|
||||
process mcfg toks = buildChart keyof [convert, combine] (predict toks mcfg)
|
||||
|
||||
|
||||
keyof :: Item c n l -> NKey c
|
||||
keyof (Active (Abs _ (next:_) _, _) _ _) = Act next
|
||||
keyof (Passive (Abs cat _ _) _) = Pass cat
|
||||
keyof (Passive cat _) = Pass cat
|
||||
keyof _ = Final
|
||||
|
||||
-- for tracing purposes
|
||||
prtSizes chart = "final=" ++ show (length (chartLookup chart Final)) ++
|
||||
", passive=" ++ show (sum [length (chartLookup chart k) |
|
||||
k@(Pass _) <- chartKeys chart ]) ++
|
||||
", active=" ++ show (sum [length (chartLookup chart k) |
|
||||
k@(Act _) <- chartKeys chart ])
|
||||
|
||||
{--Inference rules ------------------------------------------------------------
|
||||
predict: Creates an Active Item of every Rule in the Grammar to give the
|
||||
initial Agenda
|
||||
combine: Creates an Active Item every time it is possible to combine
|
||||
an Active Item from the agenda with a Passive Item from the Chart
|
||||
convert: Active Items with nothing to find are converted to Passive Items
|
||||
------------------------------------------------------------------------------}
|
||||
----------------------------------------------------------------------
|
||||
-- * inference rules
|
||||
|
||||
predict :: (Eq t, Eq c) => [t] -> MCFGrammar c n l t -> [Item c n l]
|
||||
-- Creates an Active Item of every Rule in the Grammar to give the initial Agenda
|
||||
predict :: Ord t => Input t -> MCFGrammar c n l t -> [Item c n l]
|
||||
predict toks mcfg = [ Active (abs, []) lins' [] |
|
||||
Rule abs (Cnc _ _ lins) <- mcfg,
|
||||
lins' <- rangeRestRec toks lins ]
|
||||
|
||||
|
||||
-- | Creates an Active Item every time it is possible to combine
|
||||
-- an Active Item from the agenda with a Passive Item from the Chart
|
||||
combine :: (Ord n, Ord c, Ord l) => NChart c n l -> Item c n l -> [Item c n l]
|
||||
combine chart (Active (Abs nt (c:find) f, found) lins rrecs) =
|
||||
do Passive _ rrec <- chartLookup chart (Pass c)
|
||||
lins' <- concLinRec $ substArgRec (length found) rrec lins
|
||||
return $ Active (Abs nt find f, found ++ [c]) lins' (rrecs ++ [rrec])
|
||||
combine chart (Passive (Abs c _ _) rrec) =
|
||||
combine chart (Passive c rrec) =
|
||||
do Active (Abs nt (c:find) f, found) lins rrecs <- chartLookup chart (Act c)
|
||||
lins' <- concLinRec $ substArgRec (length found) rrec lins
|
||||
return $ Active (Abs nt find f, found ++ [c]) lins' (rrecs ++ [rrec])
|
||||
combine _ _ = []
|
||||
|
||||
|
||||
-- | Active Items with nothing to find are converted to Passive Items
|
||||
convert :: (Ord n, Ord c, Ord l) => NChart c n l -> Item c n l -> [Item c n l]
|
||||
convert _ (Active (Abs nt [] f, rhs) lins _) = [Passive (Abs nt rhs f) rrec]
|
||||
convert _ (Active (Abs cat [] _, _) lins _) = [Passive cat rrec]
|
||||
where rrec = makeRangeRec lins
|
||||
convert _ _ = []
|
||||
|
||||
|
||||
@@ -4,15 +4,14 @@
|
||||
-- Stability : (stable)
|
||||
-- Portability : (portable)
|
||||
--
|
||||
-- > CVS $Date: 2005/04/19 10:46:08 $
|
||||
-- > CVS $Date: 2005/04/20 12:49:45 $
|
||||
-- > CVS $Author: peb $
|
||||
-- > CVS $Revision: 1.1 $
|
||||
-- > CVS $Revision: 1.2 $
|
||||
--
|
||||
-- MCFG parsing, parser information
|
||||
-----------------------------------------------------------------------------
|
||||
|
||||
module GF.NewParsing.MCFG.PInfo
|
||||
(MCFParser, MCFPInfo(..), buildMCFPInfo) where
|
||||
module GF.NewParsing.MCFG.PInfo where
|
||||
|
||||
import GF.System.Tracing
|
||||
import GF.Infra.Print
|
||||
@@ -22,6 +21,7 @@ import GF.Formalism.GCFG
|
||||
import GF.Formalism.MCFG
|
||||
import GF.Data.SortedList
|
||||
import GF.Data.Assoc
|
||||
import GF.NewParsing.MCFG.Range
|
||||
|
||||
----------------------------------------------------------------------
|
||||
-- type declarations
|
||||
@@ -32,10 +32,13 @@ type MCFParser c n l t = MCFPInfo c n l t
|
||||
-> Input t
|
||||
-> MCFChart c n l
|
||||
|
||||
type MCFChart c n l = [(n, (c, RangeRec l), [(c, RangeRec l)])]
|
||||
type MCFChart c n l = [Abstract (c, RangeRec l) n]
|
||||
|
||||
type MCFPInfo c n l t = MCFGrammar c n l t
|
||||
|
||||
buildCFPInfo :: (Ord n, Ord c, Ord l, Ord t) => MCFGrammar c n l t -> MCFPInfo c n l t
|
||||
buildCFPInfo = id
|
||||
buildMCFPInfo :: (Ord n, Ord c, Ord l, Ord t) => MCFGrammar c n l t -> MCFPInfo c n l t
|
||||
buildMCFPInfo = id
|
||||
|
||||
makeFinalEdge :: c -> l -> (Int, Int) -> (c, RangeRec l)
|
||||
makeFinalEdge cat lbl bnds = (cat, [(lbl, makeRange bnds)])
|
||||
|
||||
|
||||
@@ -11,7 +11,7 @@ import GF.Formalism.GCFG
|
||||
import GF.Formalism.MCFG
|
||||
import GF.Formalism.Utilities
|
||||
import GF.Infra.Print
|
||||
|
||||
import GF.Data.Assoc ((?))
|
||||
|
||||
------------------------------------------------------------
|
||||
-- ranges as single pairs
|
||||
@@ -95,29 +95,29 @@ makeRangeRec lins = map convLin lins
|
||||
|
||||
--- Record projection --------------------------------------------------------
|
||||
|
||||
projection :: Eq l => l -> RangeRec l -> [Range]
|
||||
projection :: Ord l => l -> RangeRec l -> [Range]
|
||||
projection l rec = maybe (fail "projection") return $ lookup l rec
|
||||
|
||||
|
||||
--- Range restriction --------------------------------------------------------
|
||||
|
||||
rangeRestTok :: Eq t => [t] -> t -> [Range]
|
||||
rangeRestTok toks tok = do i <- elemIndices tok toks
|
||||
return (makeRange (i, i+1))
|
||||
rangeRestTok :: Ord t => Input t -> t -> [Range]
|
||||
rangeRestTok toks tok = do rng <- inputToken toks ? tok
|
||||
return (makeRange rng)
|
||||
|
||||
|
||||
rangeRestSym :: Eq t => [t] -> Symbol a t -> [Symbol a Range]
|
||||
rangeRestSym :: Ord t => Input t -> Symbol a t -> [Symbol a Range]
|
||||
rangeRestSym toks (Tok tok) = do rng <- rangeRestTok toks tok
|
||||
return (Tok rng)
|
||||
rangeRestSym _ (Cat c) = return (Cat c)
|
||||
|
||||
|
||||
rangeRestLin :: Eq t => [t] -> Lin c l t -> [Lin c l Range]
|
||||
rangeRestLin :: Ord t => Input t -> Lin c l t -> [Lin c l Range]
|
||||
rangeRestLin toks (Lin lbl syms) = do syms' <- mapM (rangeRestSym toks) syms
|
||||
return (Lin lbl syms')
|
||||
|
||||
|
||||
rangeRestRec :: Eq t => [t] -> LinRec c l t -> [LinRec c l Range]
|
||||
rangeRestRec :: Ord t => Input t -> LinRec c l t -> [LinRec c l Range]
|
||||
rangeRestRec toks = mapM (rangeRestLin toks)
|
||||
|
||||
|
||||
@@ -131,7 +131,7 @@ replaceRec recs i rec = (fst tup) ++ [rec] ++ (tail $ snd tup)
|
||||
|
||||
--- Argument substitution ----------------------------------------------------
|
||||
|
||||
substArgSymbol :: Eq l => Int -> RangeRec l -> Symbol (c, l, Int) Range
|
||||
substArgSymbol :: Ord l => Int -> RangeRec l -> Symbol (c, l, Int) Range
|
||||
-> Symbol (c, l, Int) Range
|
||||
substArgSymbol i rec (Tok rng) = (Tok rng)
|
||||
substArgSymbol i rec (Cat (c, l, j))
|
||||
@@ -139,13 +139,13 @@ substArgSymbol i rec (Cat (c, l, j))
|
||||
| otherwise = (Cat (c, l, j))
|
||||
|
||||
|
||||
substArgLin :: Eq l => Int -> RangeRec l -> Lin c l Range
|
||||
substArgLin :: Ord l => Int -> RangeRec l -> Lin c l Range
|
||||
-> Lin c l Range
|
||||
substArgLin i rec (Lin lbl syms) =
|
||||
(Lin lbl (map (substArgSymbol i rec) syms))
|
||||
|
||||
|
||||
substArgRec :: Eq l => Int -> RangeRec l -> LinRec c l Range
|
||||
substArgRec :: Ord l => Int -> RangeRec l -> LinRec c l Range
|
||||
-> LinRec c l Range
|
||||
substArgRec i rec lins = map (substArgLin i rec) lins
|
||||
|
||||
@@ -153,7 +153,7 @@ substArgRec i rec lins = map (substArgLin i rec) lins
|
||||
--- Subsumation -------------------------------------------------------------
|
||||
|
||||
-- "rec' subsumes rec?"
|
||||
subsumes :: Eq l => RangeRec l -> RangeRec l -> Bool
|
||||
subsumes :: Ord l => RangeRec l -> RangeRec l -> Bool
|
||||
subsumes rec rec' = and [elem r rec' | r <- rec]
|
||||
|
||||
|
||||
|
||||
183
src/GF/Parsing/MCFG/ViaCFG.hs
Normal file
183
src/GF/Parsing/MCFG/ViaCFG.hs
Normal file
@@ -0,0 +1,183 @@
|
||||
{-- Module --------------------------------------------------------------------
|
||||
Filename: ApproxParse.hs
|
||||
Author: Håkan Burden
|
||||
Time-stamp: <2005-04-18, 14:56>
|
||||
|
||||
Description: An agenda-driven implementation of the active algorithm 4.3.4,
|
||||
parsing through context-free approximation as described in
|
||||
Ljunglöf (2004)
|
||||
------------------------------------------------------------------------------}
|
||||
|
||||
module ApproxParse where
|
||||
|
||||
|
||||
-- Haskell modules
|
||||
import List
|
||||
import Monad
|
||||
|
||||
-- GF modules
|
||||
import ConvertMCFGtoDecoratedCFG
|
||||
import qualified DecoratedCFParser as CFP
|
||||
import qualified DecoratedGrammar as CFG
|
||||
import Examples
|
||||
import GeneralChart
|
||||
import qualified MCFGrammar as MCFG
|
||||
import MCFParser
|
||||
import Nondet
|
||||
import Parser
|
||||
import Range
|
||||
|
||||
|
||||
{-- Datatypes -----------------------------------------------------------------
|
||||
Chart
|
||||
Item
|
||||
Key
|
||||
|
||||
|
||||
Item : Four different Items are used. PreMCFG for MCFG Pre Items, Pre are
|
||||
the Items returned by the pre-Functions and Mark are the
|
||||
corresponding Items for the mark-Functions. For convenience correctly
|
||||
marked Mark Items are converted to Passive Items.
|
||||
I use dottedrule for convenience to keep track of wich daughter's RangeRec to look for.
|
||||
AChart: A RedBlackMap with Items and Keys
|
||||
AKey :
|
||||
------------------------------------------------------------------------------}
|
||||
|
||||
--Ev ta bort några typer av Item och bara nyckla på det som är unikt för den typen...
|
||||
data Item n c l = PreMCFG (n, c) (RangeRec l) [RangeRec l]
|
||||
| Pre (n, c) (RangeRec l) [l] [RangeRec l]
|
||||
| Mark (n, c) (RangeRec l) (RangeRec l) [RangeRec l]
|
||||
| Passive (n, c) (RangeRec l) (RangeRec l)
|
||||
deriving (Eq, Ord, Show)
|
||||
|
||||
type AChart n c l = ParseChart (Item n c l) (AKey n c l)
|
||||
|
||||
data AKey n c l = Pr (n, c) l
|
||||
| Pm (n, c) l
|
||||
| Mk (RangeRec l)
|
||||
| Ps (RangeRec l)
|
||||
| Useless
|
||||
deriving (Eq, Ord, Show)
|
||||
|
||||
|
||||
{-- Parsing -------------------------------------------------------------------
|
||||
recognize:
|
||||
parse : The Agenda consists of the Passive Items from context-free
|
||||
approximation (as PreMCFG Items) and the Pre Items inferred by
|
||||
pre-prediction.
|
||||
keyof : Given an Item returns an appropriate Key for the Chart
|
||||
------------------------------------------------------------------------------}
|
||||
|
||||
recognize strategy mcfg toks = chartMember (parse strategy mcfg toks)
|
||||
(Passive ("f", S)
|
||||
[("s" , MCFG.Range (0, n))]
|
||||
[("p" , MCFG.Range (0, n2)), ("q", MCFG.Range (n2, n))])
|
||||
(Ps [("s" , MCFG.Range (0, n))])
|
||||
where n = length toks
|
||||
n2 = n `div` 2
|
||||
|
||||
|
||||
--parse :: (Ord n, Ord NT, Ord String, Eq t) => CFP.Strategy -> MCFG.Grammar n NT String t -> [t]
|
||||
-- -> AChart n NT String
|
||||
parse strategy mcfg toks
|
||||
= buildChart keyof
|
||||
[preCombine, markPredict, markCombine, convert]
|
||||
(makePreItems (CFP.parse strategy (CFG.pInfo (convertGrammar mcfg)) [(S, "s")] toks) ++
|
||||
(prePredict mcfg))
|
||||
|
||||
|
||||
keyof :: Item n c l -> AKey n c l
|
||||
keyof (PreMCFG head [(lbl, rng)] _) = Pm head lbl
|
||||
keyof (Pre head _ (lbl:lbls) _) = Pr head lbl
|
||||
keyof (Mark _ _ _ (rec:recs)) = Mk rec
|
||||
keyof (Passive _ rec _) = Ps rec
|
||||
keyof _ = Useless
|
||||
|
||||
|
||||
{-- Initializing agenda -------------------------------------------------------
|
||||
makePreItems:
|
||||
------------------------------------------------------------------------------}
|
||||
|
||||
makePreItems :: (Eq c, Ord i) => CFG.Grammar n (Edge (c, l)) i t -> [Item n c l]
|
||||
makePreItems cfchart
|
||||
= [ PreMCFG (fun, cat) [(lbl, MCFG.makeRange (i, j))] (symToRec beta) |
|
||||
CFG.Rule (Edge i j (cat,lbl)) beta fun <- cfchart ]
|
||||
|
||||
|
||||
prePredict :: (Ord n, Ord c, Ord l) => MCFG.Grammar n c l t -> [Item n c l]
|
||||
prePredict mcfg =
|
||||
[ Pre (f, nt) [] (getLables lins) (replicate (nrOfCats (head lins)) []) |
|
||||
MCFG.Rule nt nts lins f <- mcfg ]
|
||||
|
||||
|
||||
{-- Inference rules ---------------------------------------------------------
|
||||
prePredict :
|
||||
preCombine :
|
||||
markPredict:
|
||||
markCombine:
|
||||
convert :
|
||||
----------------------------------------------------------------------------}
|
||||
|
||||
preCombine :: (Ord n, Ord c, Ord l) => ParseChart (Item n c l) (AKey n c l)
|
||||
-> Item n c l -> [Item n c l]
|
||||
preCombine chart (Pre head rec (l:ls) recs) =
|
||||
[ Pre head (rec ++ [(l, r)]) ls recs'' |
|
||||
PreMCFG head [(l, r)] recs' <- chartLookup chart (Pm head l),
|
||||
recs'' <- solutions (unifyRangeRecs recs recs') ]
|
||||
preCombine chart (PreMCFG head [(l, r)] recs) =
|
||||
[ Pre head (rec ++ [(l, r)]) ls recs'' |
|
||||
Pre head rec (l:ls) recs' <- chartLookup chart (Pr head l),
|
||||
recs'' <- solutions (unifyRangeRecs recs recs') ]
|
||||
preCombine _ _ = []
|
||||
|
||||
|
||||
markPredict :: (Ord n, Ord c, Ord l) => ParseChart (Item n c l) (AKey n c l)
|
||||
-> Item n c l -> [Item n c l]
|
||||
markPredict _ (Pre (n, c) rec [] recs) = [Mark (n, c) rec [] recs]
|
||||
markPredict _ _ = []
|
||||
|
||||
|
||||
markCombine :: (Ord n, Ord c, Ord l) => ParseChart (Item n c l) (AKey n c l)
|
||||
-> Item n c l -> [Item n c l]
|
||||
markCombine chart (Mark (f, c) rec mRec (r:recs)) =
|
||||
[ Mark (f, c) rec (mRec ++ r) recs |
|
||||
Passive _ r _ <- chartLookup chart (Ps r)]
|
||||
markCombine chart (Passive _ r _) =
|
||||
[ Mark (f, c) rec (mRec++r) recs |
|
||||
Mark (f, c) rec mRec (r:recs) <- chartLookup chart (Mk r) ]
|
||||
markCombine _ _ = []
|
||||
|
||||
|
||||
convert :: (Ord n, Ord c, Ord l) => ParseChart (Item n c l) (AKey n c l)
|
||||
-> Item n c l -> [Item n c l]
|
||||
convert _ (Mark (f, c) r rec []) = [Passive (f, c) r rec]
|
||||
convert _ _ = []
|
||||
|
||||
|
||||
{-- Help functions ----------------------------------------------------------------
|
||||
getRHS :
|
||||
getLables:
|
||||
symToRec :
|
||||
----------------------------------------------------------------------------------}
|
||||
|
||||
-- FULKOD !
|
||||
nrOfCats :: Eq c => MCFG.Lin c l t -> Int
|
||||
nrOfCats (MCFG.Lin l syms) = length $ nub [(c, i) | Cat (c, l, i) <- syms]
|
||||
|
||||
|
||||
--
|
||||
getLables :: LinRec c l t -> [l]
|
||||
getLables lins = [l | MCFG.Lin l syms <- lins]
|
||||
|
||||
|
||||
--
|
||||
symToRec :: Ord i => [Symbol (Edge (c, l), i) d] -> [[(l, MCFG.Range)]]
|
||||
symToRec beta = map makeLblRng $ groupBy (\(_, d) (_, d') -> (d == d'))
|
||||
$ sortBy sBd [(Edge i j (c, l) , d) | Cat (Edge i j (c, l), d)
|
||||
<- beta]
|
||||
where makeLblRng edges = [(l, (MCFG.makeRange (i, j))) | (Edge i j (_, l), _)
|
||||
<- edges]
|
||||
sBd (_, d) (_, d')
|
||||
| d < d' = LT
|
||||
| d > d' = GT
|
||||
| otherwise = EQ
|
||||
@@ -5,9 +5,9 @@
|
||||
-- Stability : (stable)
|
||||
-- Portability : (portable)
|
||||
--
|
||||
-- > CVS $Date: 2005/04/14 18:38:36 $
|
||||
-- > CVS $Date: 2005/04/20 12:49:45 $
|
||||
-- > CVS $Author: peb $
|
||||
-- > CVS $Revision: 1.18 $
|
||||
-- > CVS $Revision: 1.19 $
|
||||
--
|
||||
-- (Description of the module)
|
||||
-----------------------------------------------------------------------------
|
||||
@@ -69,7 +69,7 @@ parseStringC opts0 sg cat s
|
||||
let opts = unionOptions opts0 $ stateOptions sg
|
||||
pm = maybe "" id $ getOptVal opts0 useParser -- -parser=pm
|
||||
tok = customOrDefault opts useTokenizer customTokenizer sg
|
||||
ts <- return $ New.parse pm (pInfo sg) (absId sg) cat (tok s)
|
||||
ts <- checkErr $ New.parse pm (pInfo sg) (absId sg) cat (tok s)
|
||||
ts' <- mapM (checkErr . annotate (stateGrammarST sg) . refreshMetas []) ts
|
||||
return $ optIntOrAll opts flagNumber ts'
|
||||
|
||||
|
||||
@@ -2,7 +2,7 @@
|
||||
|
||||
följande är en föreslagen hierarkisk modulstruktur för GF 2.2
|
||||
|
||||
katalogen src kommer att innehålla (åtminstone) följande:
|
||||
* katalogen src kommer att innehålla (åtminstone) följande:
|
||||
- GF.hs modulen Main
|
||||
- GF/ resten av Haskell-filerna
|
||||
- JavaGUI/ java-filer
|
||||
@@ -12,249 +12,65 @@ katalogen src kommer att inneh
|
||||
- run-haddock.csh
|
||||
- check-haddock.perl
|
||||
|
||||
modifiera gärna strukturen och kommentarerna nedan
|
||||
----------------------------------------------------------------------
|
||||
|
||||
GF
|
||||
* struktur för haskell-filer:
|
||||
|
||||
GF/
|
||||
GFModes - flyttas till Shell??
|
||||
GF.Formalism (finns redan)
|
||||
GF.Conversion (...)
|
||||
GF.Parsing (heter nu GF.NewParsing, bör byta namn)
|
||||
GF.System (finns redan, för filer som har med
|
||||
operativsystemet att göra, t.ex. Tracing och Arch)
|
||||
|
||||
API/
|
||||
API
|
||||
BatchTranslate
|
||||
GrammarToHaskell
|
||||
IOGrammar
|
||||
MyParser - obsolet?
|
||||
filerna GF.NewParsing.GeneralChart och GF.NewParsing.IncrementalChart
|
||||
flyttas och byter namn till GF.Data.GeneralDeduction och GF.Data.IncrementalDeduction
|
||||
|
||||
CF/ - bör så småningom försvinna
|
||||
(ersättas med mer generell CFG-datatyp)
|
||||
CF
|
||||
CFIdent
|
||||
CFtoGrammar
|
||||
CFtoSRG
|
||||
CanonToCF
|
||||
ChartParser - obsolet.
|
||||
EBNF - ta bort parserkombinatorerna -- skapa en bnfc-fil
|
||||
PPrCF
|
||||
PrLBNF
|
||||
Profile
|
||||
vart ska filerna GFModes, Help, HelpFile, Today flyttas?
|
||||
förslag: Help, HelpFile, Today -> GF.System
|
||||
|
||||
Canon/
|
||||
AbsGFC [1/2 - AUTO]
|
||||
CMacros
|
||||
CanonToGrammar
|
||||
GFC
|
||||
GetGFC
|
||||
Look
|
||||
MkGFC
|
||||
PrExp
|
||||
Share
|
||||
Unlex
|
||||
LexGFC [AUTO]
|
||||
ParGFC [AUTO]
|
||||
PrintGFC [1/2 - AUTO]
|
||||
SkelGFC [AUTO]
|
||||
TestGFC [AUTO]
|
||||
api -> GF.API
|
||||
cf -> GF.CF
|
||||
canonical -> GF.Canon
|
||||
compile -> GF.Compile
|
||||
|
||||
[GFC.cf] bnfc-fil
|
||||
[ParGFC.y] [AUTO] happy-fil
|
||||
[LexGFC.x] [AUTO] alex-fil
|
||||
infra -> GF.Data (datatyper, algoritmer - helst ej direkt beroende av GF)
|
||||
GF.Infra (GF-infrastruktur)
|
||||
GF.Text (t.ex. olika språk, teckenkodningar)
|
||||
|
||||
Compile/
|
||||
CheckGrammar
|
||||
Compile
|
||||
Extend
|
||||
GetGrammar
|
||||
GrammarToCanon
|
||||
MkResource
|
||||
MkUnion
|
||||
ModDeps
|
||||
NewRename
|
||||
Optimize
|
||||
PGrammar
|
||||
PrOld
|
||||
Rebuild
|
||||
RemoveLiT
|
||||
Rename
|
||||
ShellState
|
||||
Update
|
||||
(...) -> GF.Fudgets (alla filer som har med fudgets att göra)
|
||||
grammar -> GF.Grammar
|
||||
cfgm -> GF.CFGM
|
||||
source -> GF.Source
|
||||
shell -> GF.Shell
|
||||
speech -> GF.Speech
|
||||
translate -> GF.Translate
|
||||
useGrammar -> GF.UseGrammar
|
||||
visuali... -> GF.Visualization
|
||||
|
||||
Data/
|
||||
Assoc
|
||||
Glue
|
||||
Map - slås ihop med RedBlackSet
|
||||
OrdMap2 - obsolet - använd Assoc istället
|
||||
OrdSet - obsolet - använd SortedList istället
|
||||
RedBlack \ slås samman
|
||||
RedBlackSet /
|
||||
SharedString [AUTO?]
|
||||
SortedList
|
||||
Trie \ slås samman
|
||||
Trie2 /
|
||||
Zipper
|
||||
CheckM
|
||||
ErrM
|
||||
GenneralInduction
|
||||
IncrementalInduction
|
||||
parsers -> filerna (ParGF och ParGFC) flyttas till där GF.cf och GFC.cf finns
|
||||
|
||||
Fudgets/
|
||||
EventF
|
||||
FudgetOps
|
||||
UnicodeF
|
||||
WriteF
|
||||
CommandF
|
||||
util -> Extras (kanske på toppnivå - inte GF.Extras)
|
||||
|
||||
Grammar/
|
||||
AbsCompute
|
||||
Abstract
|
||||
AppPredefined
|
||||
Compute
|
||||
Grammar
|
||||
Lockfield
|
||||
LookAbs
|
||||
Lookup
|
||||
MMacros
|
||||
Macros
|
||||
PatternMatch
|
||||
PrGrammar
|
||||
Refresh
|
||||
ReservedWords
|
||||
TC
|
||||
TypeCheck
|
||||
Unify
|
||||
Values
|
||||
|
||||
CFGM/
|
||||
AbsCFG [AUTO]
|
||||
LexCFG [AUTO]
|
||||
ParCFG [AUTO]
|
||||
PrintCFG [AUTO]
|
||||
PrintCFGrammar
|
||||
* java-katalogen byter namn:
|
||||
|
||||
[CFG.cf] bnfc-fil
|
||||
[ParCFG.y] [AUTO] happy-fil
|
||||
[LexCFG.x] [AUTO] alex-fil
|
||||
java -> JavaGUI
|
||||
|
||||
Source/
|
||||
AbsGF [AUTO]
|
||||
LexGF [AUTO]
|
||||
ParGF [AUTO]
|
||||
PrintGF [AUTO]
|
||||
SkelGF [AUTO]
|
||||
TestGF [AUTO]
|
||||
SourceToGrammar
|
||||
GrammarToSource
|
||||
|
||||
[GF.cf] bnfc-fil
|
||||
[ParGF.y] [AUTO] happy-fil
|
||||
[LexGF.x] [AUTO] alex-fil
|
||||
* haddock samlas på ett ställe:
|
||||
|
||||
Infra/
|
||||
Comments
|
||||
Ident
|
||||
Modules
|
||||
Operations
|
||||
Option
|
||||
Parsers - nästan obsolet (används bara i EBNF)
|
||||
ReadFiles
|
||||
Str
|
||||
UseIO
|
||||
haddock-check.perl -> haddock/check-haddock.perl
|
||||
haddock-script.csh -> haddock/run-haddock.csh
|
||||
haddock-resources/ -> haddock/resources/
|
||||
haddock/ -> haddock/html
|
||||
|
||||
Formalism/
|
||||
Conversion/
|
||||
Parsing/ dela upp i Grammar och Parsing?
|
||||
(då måste nuvarande Grammar byta namn)
|
||||
CFGrammar -> Grammar
|
||||
CFParserGeneral
|
||||
CFParserIncremental
|
||||
ConvertGFCtoMCFG -> Grammar
|
||||
ConvertGrammar -> Grammar
|
||||
ConvertMCFGtoCFG -> Grammar
|
||||
GeneralChart
|
||||
GrammarTypes -> Grammar
|
||||
IncrementalChart
|
||||
MCFGrammar -> Grammar
|
||||
MCFParserBasic
|
||||
MCFRange - obsolet
|
||||
ParseCF
|
||||
ParseCFG
|
||||
ParseGFC
|
||||
ParseMCFG
|
||||
Parser
|
||||
PrintParser
|
||||
PrintSimplifiedTerm
|
||||
|
||||
Shell/
|
||||
CommandL
|
||||
Commands
|
||||
JGF
|
||||
PShell
|
||||
Shell
|
||||
ShellCommands
|
||||
SubShell
|
||||
TeachYourself
|
||||
* kataloger som kan tas bort?
|
||||
|
||||
Speech/
|
||||
PrGSL
|
||||
PrJSGF
|
||||
SRG
|
||||
TransformCFG
|
||||
|
||||
System/
|
||||
Arch
|
||||
ArchEdit
|
||||
Tracing
|
||||
|
||||
Text/
|
||||
Arabic
|
||||
Devanagari
|
||||
Ethiopic
|
||||
ExtendedArabic
|
||||
ExtraDiacritics
|
||||
Greek
|
||||
Hebrew
|
||||
Hiragana
|
||||
LatinASupplement
|
||||
OCSCyrillic
|
||||
Russian
|
||||
Tamil
|
||||
Text
|
||||
UTF8
|
||||
Unicode
|
||||
|
||||
Translate/
|
||||
GFT
|
||||
|
||||
UseGrammar/
|
||||
Custom
|
||||
Editing
|
||||
Generate
|
||||
GetTree
|
||||
Information
|
||||
Linear
|
||||
MoreCustom - obsolet?
|
||||
Morphology
|
||||
Paraphrases
|
||||
Parsing
|
||||
Randomized
|
||||
RealMoreCustom - obsolet?
|
||||
Session
|
||||
TeachYourself
|
||||
Tokenize
|
||||
Transfer
|
||||
|
||||
Util/ byta namn till Extra?
|
||||
Today [AUTO]
|
||||
HelpFile [AUTO]
|
||||
AlphaConvGF
|
||||
GFDoc
|
||||
Htmls
|
||||
MkHelpFile
|
||||
HelpFile byta namn till HelpFile.txt?
|
||||
|
||||
[mkHelpFile.perl] ersättning för MkHelpFile?
|
||||
[mktoday.sh]
|
||||
|
||||
Visualization/
|
||||
VisualizeGrammar
|
||||
for-xxx (obsoleta)
|
||||
haddock
|
||||
newparsing (tom)
|
||||
notrace (tom)
|
||||
trace (tom)
|
||||
parsers (tom efter flytt av filer)
|
||||
old-stuff (obsolet)
|
||||
GF.OldParsing (obsolet)
|
||||
|
||||
Reference in New Issue
Block a user