mirror of
https://github.com/GrammaticalFramework/gf-core.git
synced 2026-04-11 13:59:31 -06:00
424 lines
15 KiB
Haskell
424 lines
15 KiB
Haskell
----------------------------------------------------------------------
|
|
-- |
|
|
-- Maintainer : PL
|
|
-- Stability : (stable)
|
|
-- Portability : (portable)
|
|
--
|
|
-- > CVS $Date: 2005/05/13 12:40:19 $
|
|
-- > CVS $Author: peb $
|
|
-- > CVS $Revision: 1.6 $
|
|
--
|
|
-- Basic type declarations and functions for grammar formalisms
|
|
-----------------------------------------------------------------------------
|
|
|
|
|
|
module GF.Formalism.Utilities where
|
|
|
|
import Control.Monad
|
|
import Data.Array
|
|
import Data.List (groupBy)
|
|
|
|
import GF.Data.SortedList
|
|
import GF.Data.Assoc
|
|
import GF.Data.Utilities (sameLength, foldMerge, splitBy)
|
|
|
|
import GF.Infra.PrintClass
|
|
|
|
------------------------------------------------------------
|
|
-- * symbols
|
|
|
|
data Symbol c t = Cat c | Tok t
|
|
deriving (Eq, Ord, Show)
|
|
|
|
symbol :: (c -> a) -> (t -> a) -> Symbol c t -> a
|
|
symbol fc ft (Cat cat) = fc cat
|
|
symbol fc ft (Tok tok) = ft tok
|
|
|
|
mapSymbol :: (c -> d) -> (t -> u) -> Symbol c t -> Symbol d u
|
|
mapSymbol fc ft = symbol (Cat . fc) (Tok . ft)
|
|
|
|
filterCats :: [Symbol c t] -> [c]
|
|
filterCats syms = [ cat | Cat cat <- syms ]
|
|
|
|
filterToks :: [Symbol c t] -> [t]
|
|
filterToks syms = [ tok | Tok tok <- syms ]
|
|
|
|
------------------------------------------------------------
|
|
-- * edges
|
|
|
|
data Edge s = Edge Int Int s
|
|
deriving (Eq, Ord, Show)
|
|
|
|
instance Functor Edge where
|
|
fmap f (Edge i j s) = Edge i j (f s)
|
|
|
|
|
|
------------------------------------------------------------
|
|
-- * representaions of input tokens
|
|
|
|
data Input t = MkInput { inputEdges :: [Edge t],
|
|
inputBounds :: (Int, Int),
|
|
inputFrom :: Array Int (Assoc t [Int]),
|
|
inputTo :: Array Int (Assoc t [Int]),
|
|
inputToken :: Assoc t [(Int, Int)]
|
|
}
|
|
|
|
makeInput :: Ord t => [Edge t] -> Input t
|
|
input :: Ord t => [t] -> Input t
|
|
inputMany :: Ord t => [[t]] -> Input t
|
|
|
|
instance Show t => Show (Input t) where
|
|
show input = "makeInput " ++ show (inputEdges input)
|
|
|
|
----------
|
|
|
|
makeInput inEdges | null inEdges = input []
|
|
| otherwise = MkInput inEdges inBounds inFrom inTo inToken
|
|
where inBounds = foldr1 minmax [ (i, j) | Edge i j _ <- inEdges ]
|
|
where minmax (a, b) (a', b') = (min a a', max b b')
|
|
inFrom = fmap (accumAssoc id) $ accumArray (<++>) [] inBounds $
|
|
[ (i, [(tok, j)]) | Edge i j tok <- inEdges ]
|
|
inTo = fmap (accumAssoc id) $ accumArray (<++>) [] inBounds
|
|
[ (j, [(tok, i)]) | Edge i j tok <- inEdges ]
|
|
inToken = accumAssoc id [ (tok, (i, j)) | Edge i j tok <- inEdges ]
|
|
|
|
input toks = MkInput inEdges inBounds inFrom inTo inToken
|
|
where inEdges = zipWith3 Edge [0..] [1..] toks
|
|
inBounds = (0, length toks)
|
|
inFrom = listArray inBounds $
|
|
[ listAssoc [(tok, [j])] | (tok, j) <- zip toks [1..] ] ++ [ listAssoc [] ]
|
|
inTo = listArray inBounds $
|
|
[ listAssoc [] ] ++ [ listAssoc [(tok, [i])] | (tok, i) <- zip toks [0..] ]
|
|
inToken = accumAssoc id [ (tok, (i, j)) | Edge i j tok <- inEdges ]
|
|
|
|
inputMany toks = MkInput inEdges inBounds inFrom inTo inToken
|
|
where inEdges = [ Edge i j t | (i, j, ts) <- zip3 [0..] [1..] toks, t <- ts ]
|
|
inBounds = (0, length toks)
|
|
inFrom = listArray inBounds $
|
|
[ listAssoc [ (t, [j]) | t <- nubsort ts ] | (ts, j) <- zip toks [1..] ]
|
|
++ [ listAssoc [] ]
|
|
inTo = listArray inBounds $
|
|
[ listAssoc [] ] ++
|
|
[ listAssoc [ (t, [i]) | t <- nubsort ts ] | (ts, i) <- zip toks [0..] ]
|
|
inToken = accumAssoc id [ (tok, (i, j)) | Edge i j tok <- inEdges ]
|
|
|
|
|
|
------------------------------------------------------------
|
|
-- * representations of syntactical analyses
|
|
|
|
-- ** charts as finite maps over edges
|
|
|
|
-- | The values of the chart, a list of key-daughters pairs,
|
|
-- has unique keys. In essence, it is a map from 'n' to daughters.
|
|
-- The daughters should be a set (not necessarily sorted) of rhs's.
|
|
type SyntaxChart n e = Assoc e [SyntaxNode n [e]]
|
|
|
|
data SyntaxNode n e = SMeta
|
|
| SNode n [e]
|
|
| SString String
|
|
| SInt Integer
|
|
| SFloat Double
|
|
deriving (Eq,Ord)
|
|
|
|
groupSyntaxNodes :: Ord n => [SyntaxNode n e] -> [SyntaxNode n [e]]
|
|
groupSyntaxNodes [] = []
|
|
groupSyntaxNodes (SNode n0 es0:xs) = (SNode n0 (es0:ess)) : groupSyntaxNodes xs'
|
|
where
|
|
(ess,xs') = span xs
|
|
|
|
span [] = ([],[])
|
|
span xs@(SNode n es:xs')
|
|
| n0 == n = let (ess,xs) = span xs' in (es:ess,xs)
|
|
| otherwise = ([],xs)
|
|
groupSyntaxNodes (SString s:xs) = (SString s) : groupSyntaxNodes xs
|
|
groupSyntaxNodes (SInt n:xs) = (SInt n) : groupSyntaxNodes xs
|
|
groupSyntaxNodes (SFloat f:xs) = (SFloat f) : groupSyntaxNodes xs
|
|
|
|
-- better(?) representation of forests:
|
|
-- data Forest n = F (SMap n (SList [Forest n])) Bool
|
|
-- ==
|
|
-- type Forest n = GeneralTrie n (SList [Forest n]) Bool
|
|
-- (the Bool == isMeta)
|
|
|
|
-- ** syntax forests
|
|
|
|
data SyntaxForest n = FMeta
|
|
| FNode n [[SyntaxForest n]]
|
|
-- ^ The outer list should be a set (not necessarily sorted)
|
|
-- of possible alternatives. Ie. the outer list
|
|
-- is a disjunctive node, and the inner lists
|
|
-- are (conjunctive) concatenative nodes
|
|
| FString String
|
|
| FInt Integer
|
|
| FFloat Double
|
|
deriving (Eq, Ord, Show)
|
|
|
|
instance Functor SyntaxForest where
|
|
fmap f (FNode n forests) = FNode (f n) $ map (map (fmap f)) forests
|
|
fmap _ (FString s) = FString s
|
|
fmap _ (FInt n) = FInt n
|
|
fmap _ (FFloat f) = FFloat f
|
|
fmap _ (FMeta) = FMeta
|
|
|
|
forestName :: SyntaxForest n -> Maybe n
|
|
forestName (FNode n _) = Just n
|
|
forestName _ = Nothing
|
|
|
|
unifyManyForests :: (Monad m, Eq n) => [SyntaxForest n] -> m (SyntaxForest n)
|
|
unifyManyForests = foldM unifyForests FMeta
|
|
|
|
-- | two forests can be unified, if either is 'FMeta', or both have the same parent,
|
|
-- and all children can be unified
|
|
unifyForests :: (Monad m, Eq n) => SyntaxForest n -> SyntaxForest n -> m (SyntaxForest n)
|
|
unifyForests FMeta forest = return forest
|
|
unifyForests forest FMeta = return forest
|
|
unifyForests (FNode name1 children1) (FNode name2 children2)
|
|
| name1 == name2 && not (null children) = return $ FNode name1 children
|
|
where children = [ forests | forests1 <- children1, forests2 <- children2,
|
|
sameLength forests1 forests2,
|
|
forests <- zipWithM unifyForests forests1 forests2 ]
|
|
unifyForests (FString s1) (FString s2)
|
|
| s1 == s2 = return $ FString s1
|
|
unifyForests (FInt n1) (FInt n2)
|
|
| n1 == n2 = return $ FInt n1
|
|
unifyForests (FFloat f1) (FFloat f2)
|
|
| f1 == f2 = return $ FFloat f1
|
|
unifyForests _ _ = fail "forest unification failure"
|
|
|
|
{- måste tänka mer på detta:
|
|
compactForests :: Ord n => [SyntaxForest n] -> SList (SyntaxForest n)
|
|
compactForests = map joinForests . groupBy eqNames . sortForests
|
|
where eqNames f g = forestName f == forestName g
|
|
sortForests = foldMerge mergeForests [] . map return
|
|
mergeForests [] gs = gs
|
|
mergeForests fs [] = fs
|
|
mergeForests fs@(f:fs') gs@(g:gs')
|
|
= case forestName f `compare` forestName g of
|
|
LT -> f : mergeForests fs' gs
|
|
GT -> g : mergeForests fs gs'
|
|
EQ -> f : g : mergeForests fs' gs'
|
|
joinForests fs = case forestName (head fs) of
|
|
Nothing -> FMeta
|
|
Just name -> FNode name $
|
|
compactDaughters $
|
|
concat [ fss | FNode _ fss <- fs ]
|
|
compactDaughters fss = case head fss of
|
|
[] -> [[]]
|
|
[_] -> map return $ compactForests $ concat fss
|
|
_ -> nubsort fss
|
|
-}
|
|
|
|
-- ** syntax trees
|
|
|
|
data SyntaxTree n = TMeta
|
|
| TNode n [SyntaxTree n]
|
|
| TString String
|
|
| TInt Integer
|
|
| TFloat Double
|
|
deriving (Eq, Ord, Show)
|
|
|
|
instance Functor SyntaxTree where
|
|
fmap f (TNode n trees) = TNode (f n) $ map (fmap f) trees
|
|
fmap _ (TString s) = TString s
|
|
fmap _ (TInt n) = TInt n
|
|
fmap _ (TFloat f) = TFloat f
|
|
fmap _ (TMeta) = TMeta
|
|
|
|
treeName :: SyntaxTree n -> Maybe n
|
|
treeName (TNode n _) = Just n
|
|
treeName (TMeta) = Nothing
|
|
|
|
unifyManyTrees :: (Monad m, Eq n) => [SyntaxTree n] -> m (SyntaxTree n)
|
|
unifyManyTrees = foldM unifyTrees TMeta
|
|
|
|
-- | two trees can be unified, if either is 'TMeta',
|
|
-- or both have the same parent, and their children can be unified
|
|
unifyTrees :: (Monad m, Eq n) => SyntaxTree n -> SyntaxTree n -> m (SyntaxTree n)
|
|
unifyTrees TMeta tree = return tree
|
|
unifyTrees tree TMeta = return tree
|
|
unifyTrees (TNode name1 children1) (TNode name2 children2)
|
|
| name1 == name2 && sameLength children1 children2
|
|
= liftM (TNode name1) $ zipWithM unifyTrees children1 children2
|
|
unifyTrees (TString s1) (TString s2)
|
|
| s1 == s2 = return (TString s1)
|
|
unifyTrees (TInt n1) (TInt n2)
|
|
| n1 == n2 = return (TInt n1)
|
|
unifyTrees (TFloat f1) (TFloat f2)
|
|
| f1 == f2 = return (TFloat f1)
|
|
unifyTrees _ _ = fail "tree unification failure"
|
|
|
|
-- ** conversions between representations
|
|
|
|
chart2forests :: (Ord n, Ord e) =>
|
|
SyntaxChart n e -- ^ The complete chart
|
|
-> (e -> Bool) -- ^ When is an edge 'FMeta'?
|
|
-> [e] -- ^ The starting edges
|
|
-> SList (SyntaxForest n) -- ^ The result has unique keys, ie. all 'n' are joined together.
|
|
-- In essence, the result is a map from 'n' to forest daughters
|
|
|
|
-- simplest implementation
|
|
|
|
chart2forests chart isMeta = concatMap (edge2forests [])
|
|
where edge2forests edges edge
|
|
| isMeta edge = [FMeta]
|
|
| edge `elem` edges = []
|
|
| otherwise = map (item2forest (edge:edges)) $ chart ? edge
|
|
item2forest edges (SMeta) = FMeta
|
|
item2forest edges (SNode name children) =
|
|
FNode name $ children >>= mapM (edge2forests edges)
|
|
item2forest edges (SString s) = FString s
|
|
item2forest edges (SInt n) = FInt n
|
|
item2forest edges (SFloat f) = FFloat f
|
|
|
|
{- -before AR inserted peb's patch 8/7/2007, this was:
|
|
|
|
chart2forests chart isMeta = concatMap edge2forests
|
|
where edge2forests edge = if isMeta edge then [FMeta]
|
|
else map item2forest $ chart ? edge
|
|
item2forest (SMeta) = FMeta
|
|
item2forest (SNode name children) = FNode name $ children >>= mapM edge2forests
|
|
item2forest (SString s) = FString s
|
|
item2forest (SInt n) = FInt n
|
|
item2forest (SFloat f) = FFloat f
|
|
|
|
-}
|
|
|
|
{-
|
|
-- more intelligent(?) implementation,
|
|
-- requiring that charts and forests are sorted maps and sorted sets
|
|
chart2forests chart isMeta = es2fs
|
|
where e2fs e = if isMeta e then [FMeta] else map i2f $ chart ? e
|
|
es2fs es = if null metas then fs else FMeta : fs
|
|
where (metas, nonMetas) = splitBy isMeta es
|
|
fs = map i2f $ unionMap (<++>) $ map (chart ?) nonMetas
|
|
i2f (name, children) = FNode name $
|
|
case head children of
|
|
[] -> [[]]
|
|
[_] -> map return $ es2fs $ concat children
|
|
_ -> children >>= mapM e2fs
|
|
-}
|
|
|
|
|
|
forest2trees :: SyntaxForest n -> SList (SyntaxTree n)
|
|
forest2trees (FNode n forests) = map (TNode n) $ forests >>= mapM forest2trees
|
|
forest2trees (FString s) = [TString s]
|
|
forest2trees (FInt n) = [TInt n]
|
|
forest2trees (FFloat f) = [TFloat f]
|
|
forest2trees (FMeta) = [TMeta]
|
|
|
|
----------------------------------------------------------------------
|
|
-- * profiles
|
|
|
|
-- | Pairing a rule name with a profile
|
|
data NameProfile a = Name a [Profile (SyntaxForest a)]
|
|
deriving (Eq, Ord, Show)
|
|
|
|
name2fun :: NameProfile a -> a
|
|
name2fun (Name fun _) = fun
|
|
|
|
-- | A profile is a simple representation of a function on a number of arguments.
|
|
-- We only use lists of profiles
|
|
data Profile a = Unify [Int] -- ^ The Int's are the argument positions.
|
|
-- 'Unify []' will become a metavariable,
|
|
-- 'Unify [a,b]' means that the arguments are equal,
|
|
| Constant a
|
|
deriving (Eq, Ord, Show)
|
|
|
|
instance Functor Profile where
|
|
fmap f (Constant a) = Constant (f a)
|
|
fmap f (Unify xs) = Unify xs
|
|
|
|
-- | a function name where the profile does not contain arguments
|
|
-- (i.e. denoting a constant, not a function)
|
|
constantNameToForest :: NameProfile a -> SyntaxForest a
|
|
constantNameToForest name@(Name fun profile) = FNode fun [map unConstant profile]
|
|
where unConstant (Constant a) = a
|
|
unConstant (Unify []) = FMeta
|
|
unConstant _ = error $ "constantNameToForest: the profile should not contain arguments"
|
|
|
|
-- | profile application; we need some way of unifying a list of arguments
|
|
applyProfile :: ([b] -> a) -> [Profile a] -> [b] -> [a]
|
|
applyProfile unify profile args = map apply profile
|
|
where apply (Unify xs) = unify $ map (args !!) xs
|
|
apply (Constant a) = a
|
|
|
|
-- | monadic profile application
|
|
applyProfileM :: Monad m => ([b] -> m a) -> [Profile a] -> [b] -> m [a]
|
|
applyProfileM unify profile args = mapM apply profile
|
|
where apply (Unify xs) = unify $ map (args !!) xs
|
|
apply (Constant a) = return a
|
|
|
|
-- | profile composition:
|
|
--
|
|
-- > applyProfile u z (ps `composeProfiles` qs) args
|
|
-- > ==
|
|
-- > applyProfile u z ps (applyProfile u z qs args)
|
|
--
|
|
-- compare with function composition
|
|
--
|
|
-- > (p . q) arg
|
|
-- > ==
|
|
-- > p (q arg)
|
|
--
|
|
-- Note that composing an 'Constant' with two or more arguments returns an error
|
|
-- (since 'Unify' can only take arguments) -- this might change in the future, if there is a need.
|
|
composeProfiles :: [Profile a] -> [Profile a] -> [Profile a]
|
|
composeProfiles ps qs = map compose ps
|
|
where compose (Unify [x]) = qs !! x
|
|
compose (Unify xs) = Unify [ y | x <- xs, let Unify ys = qs !! x, y <- ys ]
|
|
compose constant = constant
|
|
|
|
|
|
|
|
------------------------------------------------------------
|
|
-- pretty-printing
|
|
|
|
instance (Print c, Print t) => Print (Symbol c t) where
|
|
prt = symbol prt (simpleShow . prt)
|
|
where simpleShow str = "\"" ++ concatMap mkEsc str ++ "\""
|
|
mkEsc '\\' = "\\\\"
|
|
mkEsc '\"' = "\\\""
|
|
mkEsc '\n' = "\\n"
|
|
mkEsc '\t' = "\\t"
|
|
mkEsc chr = [chr]
|
|
prtList = prtSep " "
|
|
|
|
instance Print t => Print (Input t) where
|
|
prt input = "input " ++ prt (inputEdges input)
|
|
|
|
instance (Print s) => Print (Edge s) where
|
|
prt (Edge i j s) = "[" ++ show i ++ "-" ++ show j ++ ": " ++ prt s ++ "]"
|
|
prtList = prtSep ""
|
|
|
|
instance (Print s) => Print (SyntaxTree s) where
|
|
prt (TNode s trees)
|
|
| null trees = prt s
|
|
| otherwise = "(" ++ prt s ++ prtBefore " " trees ++ ")"
|
|
prt (TString s) = show s
|
|
prt (TInt n) = show n
|
|
prt (TFloat f) = show f
|
|
prt (TMeta) = "?"
|
|
prtList = prtAfter "\n"
|
|
|
|
instance (Print s) => Print (SyntaxForest s) where
|
|
prt (FNode s []) = "(" ++ prt s ++ " - ERROR: null forests)"
|
|
prt (FNode s [[]]) = prt s
|
|
prt (FNode s [forests]) = "(" ++ prt s ++ prtBefore " " forests ++ ")"
|
|
prt (FNode s children) = "{" ++ prtSep " | " [ prt s ++ prtBefore " " forests |
|
|
forests <- children ] ++ "}"
|
|
prt (FString s) = show s
|
|
prt (FInt n) = show n
|
|
prt (FFloat f) = show f
|
|
prt (FMeta) = "?"
|
|
prtList = prtAfter "\n"
|
|
|
|
instance Print a => Print (Profile a) where
|
|
prt (Unify []) = "?"
|
|
prt (Unify args) = prtSep "=" args
|
|
prt (Constant a) = prt a
|
|
|
|
instance Print a => Print (NameProfile a) where
|
|
prt (Name fun profile) = prt fun ++ prt profile
|
|
|
|
|