mirror of
https://github.com/GrammaticalFramework/gf-core.git
synced 2026-05-21 00:52:51 -06:00
added some generation facilities
This commit is contained in:
@@ -14,6 +14,27 @@ Changes in functionality since May 17, 2005, release of GF Version 2.2
|
|||||||
|
|
||||||
<p>
|
<p>
|
||||||
|
|
||||||
|
31/3 (AR) Added flags and options to some commands, to help generation:
|
||||||
|
<ul>
|
||||||
|
<li> <tt>gt -noexpand=NP,V,TV</tt> does not expand these categories,
|
||||||
|
but only generates metavariables for them.
|
||||||
|
<li> <tt>gt -doexpand=NP,V,TV</tt> only expands these categories,
|
||||||
|
and generates metavariables for others.
|
||||||
|
<li> <tt>gr -cf</tt> has the same flags.
|
||||||
|
<li> <tt>l -mark=metacat</tt> marks the metavariables with their categories.
|
||||||
|
<li> <tt>p -fail</tt> marks with <tt>#FAIL</tt> strings that have no parse.
|
||||||
|
<li> <tt>p -ambiguous</tt> marks as <tt>#AMBIGUOUS</tt>
|
||||||
|
strings that have more than one parse.
|
||||||
|
</ul>
|
||||||
|
|
||||||
|
<p>
|
||||||
|
|
||||||
|
<hr>
|
||||||
|
|
||||||
|
22/12 <b>Release of GF 2.5</b>.
|
||||||
|
|
||||||
|
<p>
|
||||||
|
|
||||||
16/3 (AR) Added two flag values to <tt>pt -transform=X</tt>:
|
16/3 (AR) Added two flag values to <tt>pt -transform=X</tt>:
|
||||||
<tt>nodup</tt> which excludes terms where a constant is duplicated,
|
<tt>nodup</tt> which excludes terms where a constant is duplicated,
|
||||||
and
|
and
|
||||||
|
|||||||
169
src/GF/Grammar/SGrammar.hs
Normal file
169
src/GF/Grammar/SGrammar.hs
Normal file
@@ -0,0 +1,169 @@
|
|||||||
|
----------------------------------------------------------------------
|
||||||
|
-- |
|
||||||
|
-- Module : SGrammar
|
||||||
|
-- Maintainer : AR
|
||||||
|
-- Stability : (stable)
|
||||||
|
-- Portability : (portable)
|
||||||
|
--
|
||||||
|
--
|
||||||
|
-- A simple format for context-free abstract syntax used e.g. in
|
||||||
|
-- generation. AR 31\/3\/2006
|
||||||
|
--
|
||||||
|
-- (c) Aarne Ranta 2004 under GNU GPL
|
||||||
|
--
|
||||||
|
-- Purpose: to generate corpora. We use simple types and don't
|
||||||
|
-- guarantee the correctness of bindings\/dependences.
|
||||||
|
-----------------------------------------------------------------------------
|
||||||
|
|
||||||
|
module GF.Grammar.SGrammar where
|
||||||
|
|
||||||
|
import GF.Canon.GFC
|
||||||
|
import GF.Grammar.LookAbs
|
||||||
|
import GF.Grammar.PrGrammar
|
||||||
|
import GF.Grammar.Macros
|
||||||
|
import GF.Grammar.Values
|
||||||
|
import GF.Grammar.Grammar
|
||||||
|
import GF.Infra.Ident (Ident)
|
||||||
|
|
||||||
|
import GF.Data.Operations
|
||||||
|
import GF.Data.Zipper
|
||||||
|
import GF.Infra.Option
|
||||||
|
|
||||||
|
import Data.List
|
||||||
|
|
||||||
|
-- (c) Aarne Ranta 2006 under GNU GPL
|
||||||
|
|
||||||
|
|
||||||
|
type SGrammar = BinTree SCat [SRule]
|
||||||
|
type SIdent = String
|
||||||
|
type SRule = (SFun,SType)
|
||||||
|
type SType = ([SCat],SCat)
|
||||||
|
type SCat = SIdent
|
||||||
|
type SFun = (Double,SIdent)
|
||||||
|
|
||||||
|
allRules gr = concat [rs | (c,rs) <- tree2list gr]
|
||||||
|
|
||||||
|
data STree =
|
||||||
|
SApp (SFun,[STree])
|
||||||
|
| SMeta SCat
|
||||||
|
| SString String
|
||||||
|
| SInt Integer
|
||||||
|
| SFloat Double
|
||||||
|
deriving (Show,Eq)
|
||||||
|
|
||||||
|
depth :: STree -> Int
|
||||||
|
depth t = case t of
|
||||||
|
SApp (_,ts@(_:_)) -> maximum (map depth ts) + 1
|
||||||
|
_ -> 1
|
||||||
|
|
||||||
|
type Probs = BinTree Ident Double
|
||||||
|
|
||||||
|
emptyProbs :: Probs
|
||||||
|
emptyProbs = emptyBinTree
|
||||||
|
|
||||||
|
prProbs :: Probs -> String
|
||||||
|
prProbs = unlines . map pr . tree2list where
|
||||||
|
pr (f,p) = prt f ++ "\t" ++ show p
|
||||||
|
|
||||||
|
------------------------------------------
|
||||||
|
-- translate grammar to simpler form and generated trees back
|
||||||
|
|
||||||
|
gr2sgr :: Options -> Probs -> GFCGrammar -> SGrammar
|
||||||
|
gr2sgr opts probs gr = buildTree [(c,norm (noexp c rs)) | rs@((_,(_,c)):_) <- rules] where
|
||||||
|
noe = maybe [] (chunks ',') $ getOptVal opts (aOpt "noexpand")
|
||||||
|
only = maybe [] (chunks ',') $ getOptVal opts (aOpt "doexpand")
|
||||||
|
un = getOptInt opts (aOpt "atoms")
|
||||||
|
rules =
|
||||||
|
prune $
|
||||||
|
groupBy (\x y -> scat x == scat y) $
|
||||||
|
sortBy (\x y -> compare (scat x) (scat y)) $
|
||||||
|
[(trId f, ty') | (f,ty) <- funRulesOf gr, ty' <- trTy ty]
|
||||||
|
trId (_,f) = let f' = prt f in case lookupTree prt f probs of
|
||||||
|
Ok p -> (p,f')
|
||||||
|
_ -> (2.0, f')
|
||||||
|
trTy ty = case catSkeleton ty of
|
||||||
|
Ok (mcs,mc) -> [(map trCat mcs, trCat mc)]
|
||||||
|
_ -> []
|
||||||
|
trCat (m,c) = prt c ---
|
||||||
|
scat (_,(_,c)) = c
|
||||||
|
|
||||||
|
prune rs = maybe rs (\n -> map (onlyAtoms n) rs) $ un
|
||||||
|
|
||||||
|
norm = fillProb
|
||||||
|
|
||||||
|
onlyAtoms n rs =
|
||||||
|
let (rs1,rs2) = partition atom rs
|
||||||
|
in take n rs1 ++ rs2
|
||||||
|
atom = null . fst . snd
|
||||||
|
|
||||||
|
noexp c rs
|
||||||
|
| null only = if elem c noe then [((2.0,'?':c),([],c))] else rs
|
||||||
|
| otherwise = if elem c only then rs else [((2.0,'?':c),([],c))]
|
||||||
|
|
||||||
|
-- for cases where explicit probability is not given (encoded as
|
||||||
|
-- p > 1) divide the remaining mass by the number of such cases
|
||||||
|
|
||||||
|
fillProb :: [SRule] -> [SRule]
|
||||||
|
fillProb rs = [((defa p,f),ty) | ((p,f),ty) <- rs] where
|
||||||
|
defa p = if p > 1.0 then def else p
|
||||||
|
def = (1 - sum given) / genericLength nope
|
||||||
|
(nope,given) = partition (> 1.0) [p | ((p,_),_) <- rs]
|
||||||
|
|
||||||
|
-- str2tr :: STree -> Exp
|
||||||
|
str2tr t = case t of
|
||||||
|
SApp ((_,'?':c),[]) -> mkMeta 0 -- from noexpand=c
|
||||||
|
SApp ((_,f),ts) -> mkApp (trId f) (map str2tr ts)
|
||||||
|
SMeta _ -> mkMeta 0
|
||||||
|
SString s -> K s
|
||||||
|
SInt i -> EInt i
|
||||||
|
SFloat i -> EFloat i
|
||||||
|
where
|
||||||
|
trId = cn . zIdent
|
||||||
|
|
||||||
|
-- tr2str :: Tree -> STree
|
||||||
|
tr2str (Tr (N (_,at,val,_,_),ts)) = case (at,val) of
|
||||||
|
(AtC (_,f), _) -> SApp ((2.0,prt_ f),map tr2str ts)
|
||||||
|
(AtM _, v) -> SMeta (catOf v)
|
||||||
|
(AtL s, _) -> SString s
|
||||||
|
(AtI i, _) -> SInt i
|
||||||
|
(AtF i, _) -> SFloat i
|
||||||
|
_ -> SMeta "FAILED_TO_GENERATE" ---- err monad!
|
||||||
|
where
|
||||||
|
catOf v = case v of
|
||||||
|
VApp w _ -> catOf w
|
||||||
|
VCn (_,c) -> prt_ c
|
||||||
|
_ -> "FAILED_TO_GENERATE_FROM_META"
|
||||||
|
|
||||||
|
|
||||||
|
------------------------------------------
|
||||||
|
-- to test
|
||||||
|
|
||||||
|
prSTree t = case t of
|
||||||
|
SApp ((_,f),ts) -> f ++ concat (map pr1 ts)
|
||||||
|
SMeta c -> '?':c
|
||||||
|
SString s -> prQuotedString s
|
||||||
|
SInt i -> show i
|
||||||
|
SFloat i -> show i
|
||||||
|
where
|
||||||
|
pr1 t@(SApp (_,ts)) = ' ' : (if null ts then id else prParenth) (prSTree t)
|
||||||
|
pr1 t = prSTree t
|
||||||
|
|
||||||
|
pSRule :: String -> SRule
|
||||||
|
pSRule s = case words s of
|
||||||
|
f : _ : cs -> ((2.0,f),(init cs', last cs'))
|
||||||
|
where cs' = [cs !! i | i <- [0,2..length cs - 1]]
|
||||||
|
_ -> error $ "not a rule" +++ s
|
||||||
|
|
||||||
|
exSgr = map pSRule [
|
||||||
|
"Pred : NP -> VP -> S"
|
||||||
|
,"Compl : TV -> NP -> VP"
|
||||||
|
,"PredVV : VV -> VP -> VP"
|
||||||
|
,"DefCN : CN -> NP"
|
||||||
|
,"ModCN : AP -> CN -> CN"
|
||||||
|
,"john : NP"
|
||||||
|
,"walk : VP"
|
||||||
|
,"love : TV"
|
||||||
|
,"try : VV"
|
||||||
|
,"girl : CN"
|
||||||
|
,"big : AP"
|
||||||
|
]
|
||||||
@@ -34,7 +34,8 @@ import GF.Grammar.LookAbs
|
|||||||
import GF.Grammar.PrGrammar
|
import GF.Grammar.PrGrammar
|
||||||
import GF.Grammar.Macros
|
import GF.Grammar.Macros
|
||||||
import GF.Grammar.Values
|
import GF.Grammar.Values
|
||||||
import GF.Grammar.Grammar -- (Cat,EInt,K)
|
import GF.Grammar.Grammar
|
||||||
|
import GF.Grammar.SGrammar
|
||||||
|
|
||||||
import GF.Infra.Ident
|
import GF.Infra.Ident
|
||||||
import GF.Data.Zipper
|
import GF.Data.Zipper
|
||||||
@@ -54,13 +55,13 @@ timeout = 99
|
|||||||
generateRandomTreesProb :: Options -> StdGen -> GFCGrammar -> Probs -> Cat -> [Exp]
|
generateRandomTreesProb :: Options -> StdGen -> GFCGrammar -> Probs -> Cat -> [Exp]
|
||||||
generateRandomTreesProb opts gen gr probs cat =
|
generateRandomTreesProb opts gen gr probs cat =
|
||||||
map str2tr $ randomTrees gen gr' cat' where
|
map str2tr $ randomTrees gen gr' cat' where
|
||||||
gr' = gr2sgr gr probs
|
gr' = gr2sgr opts probs gr
|
||||||
cat' = prt $ snd cat
|
cat' = prt $ snd cat
|
||||||
|
|
||||||
-- | check that probabilities attached to a grammar make sense
|
-- | check that probabilities attached to a grammar make sense
|
||||||
checkGrammarProbs :: GFCGrammar -> Probs -> Err Probs
|
checkGrammarProbs :: GFCGrammar -> Probs -> Err Probs
|
||||||
checkGrammarProbs gr probs =
|
checkGrammarProbs gr probs =
|
||||||
err Bad (return . gr2probs) $ checkSGrammar $ gr2sgr gr probs where
|
err Bad (return . gr2probs) $ checkSGrammar $ gr2sgr noOptions probs gr where
|
||||||
gr2probs sgr = buildTree [(zIdent f,p) | (_,rs) <- tree2list sgr, ((p,f),_) <- rs]
|
gr2probs sgr = buildTree [(zIdent f,p) | (_,rs) <- tree2list sgr, ((p,f),_) <- rs]
|
||||||
|
|
||||||
-- | compute the probability of a given tree
|
-- | compute the probability of a given tree
|
||||||
@@ -95,61 +96,9 @@ pProb s = case words s of
|
|||||||
readD :: String -> Double
|
readD :: String -> Double
|
||||||
readD = read
|
readD = read
|
||||||
|
|
||||||
type Probs = BinTree Ident Double
|
|
||||||
|
|
||||||
emptyProbs :: Probs
|
|
||||||
emptyProbs = emptyBinTree
|
|
||||||
|
|
||||||
prProbs :: Probs -> String
|
|
||||||
prProbs = unlines . map pr . tree2list where
|
|
||||||
pr (f,p) = prt f ++ "\t" ++ show p
|
|
||||||
|
|
||||||
------------------------------------------
|
------------------------------------------
|
||||||
-- translate grammar to simpler form and generated trees back
|
-- translate grammar to simpler form and generated trees back
|
||||||
|
|
||||||
gr2sgr :: GFCGrammar -> Probs -> SGrammar
|
|
||||||
gr2sgr gr probs = buildTree [(c,fillProb rs) | rs@((_,(_,c)):_) <- rules] where
|
|
||||||
rules =
|
|
||||||
groupBy (\x y -> scat x == scat y) $
|
|
||||||
sortBy (\x y -> compare (scat x) (scat y))
|
|
||||||
[(trId f, ty') | (f,ty) <- funRulesOf gr, ty' <- trTy ty]
|
|
||||||
trId (_,f) = let f' = prt f in case lookupTree prt f probs of
|
|
||||||
Ok p -> (p,f')
|
|
||||||
_ -> (2.0, f')
|
|
||||||
trTy ty = case catSkeleton ty of
|
|
||||||
Ok (mcs,mc) -> [(map trCat mcs, trCat mc)]
|
|
||||||
_ -> []
|
|
||||||
trCat (m,c) = prt c ---
|
|
||||||
scat (_,(_,c)) = c
|
|
||||||
|
|
||||||
str2tr :: STree -> Exp
|
|
||||||
str2tr t = case t of
|
|
||||||
SApp ((_,f),ts) -> mkApp (trId f) (map str2tr ts)
|
|
||||||
SMeta _ -> mkMeta 0
|
|
||||||
SString s -> K s
|
|
||||||
SInt i -> EInt i
|
|
||||||
SFloat i -> EFloat i
|
|
||||||
where
|
|
||||||
trId = cn . zIdent
|
|
||||||
|
|
||||||
type SGrammar = BinTree SCat [SRule]
|
|
||||||
type SIdent = String
|
|
||||||
type SRule = (SFun,SType)
|
|
||||||
type SType = ([SCat],SCat)
|
|
||||||
type SCat = SIdent
|
|
||||||
type SFun = (Double,SIdent)
|
|
||||||
|
|
||||||
allRules gr = concat [rs | (c,rs) <- tree2list gr]
|
|
||||||
|
|
||||||
data STree =
|
|
||||||
SApp (SFun,[STree])
|
|
||||||
-- | SAppN (SIdent,[STree]) -- no probability given
|
|
||||||
| SMeta SCat
|
|
||||||
| SString String
|
|
||||||
| SInt Integer
|
|
||||||
| SFloat Double
|
|
||||||
deriving (Show,Eq)
|
|
||||||
|
|
||||||
probTree :: STree -> Double
|
probTree :: STree -> Double
|
||||||
probTree t = case t of
|
probTree t = case t of
|
||||||
SApp ((p,_),ts) -> p * product (map probTree ts)
|
SApp ((p,_),ts) -> p * product (map probTree ts)
|
||||||
@@ -204,16 +153,8 @@ checkSGrammar = mapMTree chCat where
|
|||||||
Bad $ "illegal probability sum " ++ show s ++ " in " ++ c
|
Bad $ "illegal probability sum " ++ show s ++ " in " ++ c
|
||||||
_ -> return (c,rs)
|
_ -> return (c,rs)
|
||||||
|
|
||||||
-- for cases where explicit probability is not given (encoded as
|
|
||||||
-- p > 1) divide the remaining mass by the number of such cases
|
|
||||||
|
|
||||||
fillProb :: [SRule] -> [SRule]
|
|
||||||
fillProb rs = [((defa p,f),ty) | ((p,f),ty) <- rs] where
|
|
||||||
defa p = if p > 1.0 then def else p
|
|
||||||
def = (1 - sum given) / genericLength nope
|
|
||||||
(nope,given) = partition (> 1.0) [p | ((p,_),_) <- rs]
|
|
||||||
|
|
||||||
|
|
||||||
|
{-
|
||||||
------------------------------------------
|
------------------------------------------
|
||||||
-- to test outside GF
|
-- to test outside GF
|
||||||
|
|
||||||
@@ -246,7 +187,7 @@ pSRule s = case words s of
|
|||||||
where cs' = [cs !! i | i <- [0,2..length cs - 1]]
|
where cs' = [cs !! i | i <- [0,2..length cs - 1]]
|
||||||
_ -> error $ "not a rule" +++ s
|
_ -> error $ "not a rule" +++ s
|
||||||
|
|
||||||
exSgr = mkSGrammar $ map pSRule [
|
expSgr = mkSGrammar $ map pSRule [
|
||||||
"0.8 a : A"
|
"0.8 a : A"
|
||||||
,"0.2 b : A"
|
,"0.2 b : A"
|
||||||
,"0.2 n : A -> S -> S"
|
,"0.2 n : A -> S -> S"
|
||||||
@@ -257,3 +198,6 @@ ex1 :: IO ()
|
|||||||
ex1 = do
|
ex1 = do
|
||||||
g <- newStdGen
|
g <- newStdGen
|
||||||
mapM_ (putStrLn . prSTree) $ randomTrees g exSgr "S"
|
mapM_ (putStrLn . prSTree) $ randomTrees g exSgr "S"
|
||||||
|
|
||||||
|
-}
|
||||||
|
|
||||||
|
|||||||
@@ -249,6 +249,10 @@ execC co@(comm, opts0) sa@(sh@(st,(h,_,_,_)),a) = checkOptions st co >> case com
|
|||||||
let p = optParseArgErrMsg opts gro x
|
let p = optParseArgErrMsg opts gro x
|
||||||
case p of
|
case p of
|
||||||
Ok (ts,msg)
|
Ok (ts,msg)
|
||||||
|
| oElem (iOpt "fail") opts && null ts -> do
|
||||||
|
putStrLnFlush ("#FAIL:" +++ x) >> changeArg (const $ ATrms ts) sa
|
||||||
|
| oElem (iOpt "ambiguous") opts && length ts > 1 -> do
|
||||||
|
putStrLnFlush ("#AMBIGUOUS:" +++ x) >> changeArg (const $ ATrms ts) sa
|
||||||
| oElem (iOpt "prob") opts -> do
|
| oElem (iOpt "prob") opts -> do
|
||||||
let probs = stateProbs gro
|
let probs = stateProbs gro
|
||||||
let tps = rankByScore [(t,computeProbTree probs t) | t <- ts]
|
let tps = rankByScore [(t,computeProbTree probs t) | t <- ts]
|
||||||
|
|||||||
@@ -203,6 +203,8 @@ txtHelpFile =
|
|||||||
"\n -all as -lines, but also parse empty lines" ++
|
"\n -all as -lines, but also parse empty lines" ++
|
||||||
"\n -prob rank results by probability" ++
|
"\n -prob rank results by probability" ++
|
||||||
"\n -cut stop after first lexing result leading to parser success" ++
|
"\n -cut stop after first lexing result leading to parser success" ++
|
||||||
|
"\n -fail show strings whose parse fails prefixed by #FAIL" ++
|
||||||
|
"\n -ambiguous show strings that have more than one parse prefixed by #AMBIGUOUS" ++
|
||||||
"\n options for selecting parsing method:" ++
|
"\n options for selecting parsing method:" ++
|
||||||
"\n (default)parse using an overgenerating CFG" ++
|
"\n (default)parse using an overgenerating CFG" ++
|
||||||
"\n -cfg parse using a much less overgenerating CFG" ++
|
"\n -cfg parse using a much less overgenerating CFG" ++
|
||||||
@@ -344,11 +346,13 @@ txtHelpFile =
|
|||||||
"\n -lang use the abstract syntax of this grammar" ++
|
"\n -lang use the abstract syntax of this grammar" ++
|
||||||
"\n -number generate (at most) this number of trees" ++
|
"\n -number generate (at most) this number of trees" ++
|
||||||
"\n -noexpand don't expand these categories (comma-separated, e.g. -noexpand=V,CN)" ++
|
"\n -noexpand don't expand these categories (comma-separated, e.g. -noexpand=V,CN)" ++
|
||||||
|
"\n -doexpand only expand these categories (comma-separated, e.g. -doexpand=V,CN)" ++
|
||||||
"\n examples:" ++
|
"\n examples:" ++
|
||||||
"\n gt -depth=10 -cat=NP -- generate all NP's to depth 10 " ++
|
"\n gt -depth=10 -cat=NP -- generate all NP's to depth 10 " ++
|
||||||
"\n gt (PredVP ? (NegVG ?)) -- generate all trees of this form" ++
|
"\n gt (PredVP ? (NegVG ?)) -- generate all trees of this form" ++
|
||||||
"\n gt -cat=S -tr | l -- generate and linearize" ++
|
"\n gt -cat=S -tr | l -- generate and linearize" ++
|
||||||
"\n gt -noexpand=NP | l -mark=metacat -- the only NP is meta, linearized \"?0 +NP\"" ++
|
"\n gt -noexpand=NP | l -mark=metacat -- the only NP is meta, linearized \"?0 +NP\"" ++
|
||||||
|
"\n gt | l | p -lines -ambiguous | grep \"#AMBIGUOUS\" -- show ambiguous strings" ++
|
||||||
"\n" ++
|
"\n" ++
|
||||||
"\nma, morphologically_analyse: ma String" ++
|
"\nma, morphologically_analyse: ma String" ++
|
||||||
"\n Runs morphological analysis on each word in String and displays" ++
|
"\n Runs morphological analysis on each word in String and displays" ++
|
||||||
|
|||||||
@@ -180,11 +180,11 @@ optionsOfCommand co = case co of
|
|||||||
CTransformGrammar _ -> flags "printer"
|
CTransformGrammar _ -> flags "printer"
|
||||||
CConvertLatex _ -> none
|
CConvertLatex _ -> none
|
||||||
CLinearize _ -> both "utf8 table struct record all multi" "lang number unlexer mark"
|
CLinearize _ -> both "utf8 table struct record all multi" "lang number unlexer mark"
|
||||||
CParse -> both "cut new newer cfg mcfg n ign raw v lines all prob"
|
CParse -> both "ambiguous fail cut new newer cfg mcfg n ign raw v lines all prob"
|
||||||
"cat lang lexer parser number rawtrees"
|
"cat lang lexer parser number rawtrees"
|
||||||
CTranslate _ _ -> opts "cat lexer parser"
|
CTranslate _ _ -> opts "cat lexer parser"
|
||||||
CGenerateRandom -> both "cf prob" "cat lang number depth"
|
CGenerateRandom -> both "cf prob" "cat lang number depth atoms noexpand doexpand"
|
||||||
CGenerateTrees -> both "metas" "atoms depth alts cat lang number noexpand"
|
CGenerateTrees -> both "metas" "atoms depth alts cat lang number noexpand doexpand"
|
||||||
CPutTerm -> flags "transform number"
|
CPutTerm -> flags "transform number"
|
||||||
CTreeBank -> opts "c xml trees"
|
CTreeBank -> opts "c xml trees"
|
||||||
CLookupTreebank -> both "assocs raw strings trees" "treebank"
|
CLookupTreebank -> both "assocs raw strings trees" "treebank"
|
||||||
|
|||||||
@@ -25,7 +25,7 @@ import GF.Grammar.PrGrammar
|
|||||||
import GF.Grammar.Macros
|
import GF.Grammar.Macros
|
||||||
import GF.Grammar.Values
|
import GF.Grammar.Values
|
||||||
import GF.Grammar.Grammar (Cat)
|
import GF.Grammar.Grammar (Cat)
|
||||||
|
import GF.Grammar.SGrammar
|
||||||
import GF.Data.Operations
|
import GF.Data.Operations
|
||||||
import GF.Data.Zipper
|
import GF.Data.Zipper
|
||||||
import GF.Infra.Option
|
import GF.Infra.Option
|
||||||
@@ -43,59 +43,11 @@ import Data.List
|
|||||||
generateTrees :: Options -> GFCGrammar -> Cat -> Int -> Maybe Int -> Maybe Tree -> [Exp]
|
generateTrees :: Options -> GFCGrammar -> Cat -> Int -> Maybe Int -> Maybe Tree -> [Exp]
|
||||||
generateTrees opts gr cat n mn mt = map str2tr $ generate gr' ifm cat' n mn mt'
|
generateTrees opts gr cat n mn mt = map str2tr $ generate gr' ifm cat' n mn mt'
|
||||||
where
|
where
|
||||||
gr' = gr2sgr noe ats gr
|
gr' = gr2sgr opts emptyProbs gr
|
||||||
cat' = prt $ snd cat
|
cat' = prt $ snd cat
|
||||||
mt' = maybe Nothing (return . tr2str) mt
|
mt' = maybe Nothing (return . tr2str) mt
|
||||||
ifm = oElem withMetas opts
|
ifm = oElem withMetas opts
|
||||||
ats = getOptInt opts (aOpt "atoms")
|
|
||||||
noe = maybe [] (chunks ',') $ getOptVal opts (aOpt "noexpand")
|
|
||||||
|
|
||||||
------------------------------------------
|
|
||||||
-- translate grammar to simpler form and generated trees back
|
|
||||||
|
|
||||||
gr2sgr :: [SIdent] -> Maybe Int -> GFCGrammar -> SGrammar
|
|
||||||
gr2sgr noe un gr = buildTree [(c,noexp c rs) | rs@((_,(_,c)):_) <- prune rules] where
|
|
||||||
rules =
|
|
||||||
groupBy (\x y -> scat x == scat y) $
|
|
||||||
sortBy (\x y -> compare (scat x) (scat y))
|
|
||||||
[(trId f, ty') | (f,ty) <- funRulesOf gr, ty' <- trTy ty]
|
|
||||||
trId = prt . snd
|
|
||||||
trTy ty = case catSkeleton ty of
|
|
||||||
Ok (mcs,mc) -> [(map trCat mcs, trCat mc)]
|
|
||||||
_ -> []
|
|
||||||
trCat (m,c) = prt c ---
|
|
||||||
scat (_,(_,c)) = c
|
|
||||||
|
|
||||||
prune rs = maybe rs (\n -> map (onlyAtoms n) rs) $ un
|
|
||||||
onlyAtoms n rs =
|
|
||||||
let (rs1,rs2) = partition atom rs
|
|
||||||
in take n rs1 ++ rs2
|
|
||||||
atom = null . fst . snd
|
|
||||||
|
|
||||||
noexp c rs = if elem c noe then [('?':c,([],c))] else rs
|
|
||||||
|
|
||||||
-- str2tr :: STree -> Exp
|
|
||||||
str2tr t = case t of
|
|
||||||
SApp ('?':c,[]) -> mkMeta 0 -- from noexpand=c
|
|
||||||
SApp (f,ts) -> mkApp (trId f) (map str2tr ts)
|
|
||||||
SMeta _ -> mkMeta 0
|
|
||||||
---- SString s -> K s
|
|
||||||
where
|
|
||||||
trId = cn . zIdent
|
|
||||||
|
|
||||||
-- tr2str :: Tree -> STree
|
|
||||||
tr2str (Tr (N (_,at,val,_,_),ts)) = case (at,val) of
|
|
||||||
(AtC (_,f), _) -> SApp (prt_ f,map tr2str ts)
|
|
||||||
(AtM _, v) -> SMeta (catOf v)
|
|
||||||
(AtL s, _) -> SString s
|
|
||||||
(AtI i, _) -> SInt i
|
|
||||||
(AtF i, _) -> SFloat i
|
|
||||||
_ -> SMeta "FAILED_TO_GENERATE" ---- err monad!
|
|
||||||
where
|
|
||||||
catOf v = case v of
|
|
||||||
VApp w _ -> catOf w
|
|
||||||
VCn (_,c) -> prt_ c
|
|
||||||
_ -> "FAILED_TO_GENERATE_FROM_META"
|
|
||||||
|
|
||||||
------------------------------------------
|
------------------------------------------
|
||||||
-- do the main thing with a simpler data structure
|
-- do the main thing with a simpler data structure
|
||||||
@@ -139,58 +91,3 @@ generate gr ifm cat i mn mt = case mt of
|
|||||||
SApp (f,ts) -> [SApp (f,ts') | ts' <- combinations (map genM ts)]
|
SApp (f,ts) -> [SApp (f,ts') | ts' <- combinations (map genM ts)]
|
||||||
SMeta k -> gen k
|
SMeta k -> gen k
|
||||||
_ -> [t]
|
_ -> [t]
|
||||||
|
|
||||||
type SGrammar = BinTree SCat [SRule]
|
|
||||||
type SIdent = String
|
|
||||||
type SRule = (SFun,SType)
|
|
||||||
type SType = ([SCat],SCat)
|
|
||||||
type SCat = SIdent
|
|
||||||
type SFun = SIdent
|
|
||||||
|
|
||||||
allRules gr = concat [rs | (c,rs) <- tree2list gr]
|
|
||||||
|
|
||||||
data STree =
|
|
||||||
SApp (SFun,[STree])
|
|
||||||
| SMeta SCat
|
|
||||||
| SString String
|
|
||||||
| SInt Integer
|
|
||||||
| SFloat Double
|
|
||||||
deriving (Show,Eq)
|
|
||||||
|
|
||||||
depth :: STree -> Int
|
|
||||||
depth t = case t of
|
|
||||||
SApp (_,ts@(_:_)) -> maximum (map depth ts) + 1
|
|
||||||
_ -> 1
|
|
||||||
|
|
||||||
------------------------------------------
|
|
||||||
-- to test
|
|
||||||
|
|
||||||
prSTree t = case t of
|
|
||||||
SApp (f,ts) -> f ++ concat (map pr1 ts)
|
|
||||||
SMeta c -> '?':c
|
|
||||||
SString s -> prQuotedString s
|
|
||||||
SInt i -> show i
|
|
||||||
SFloat i -> show i
|
|
||||||
where
|
|
||||||
pr1 t@(SApp (_,ts)) = ' ' : (if null ts then id else prParenth) (prSTree t)
|
|
||||||
pr1 t = prSTree t
|
|
||||||
|
|
||||||
pSRule :: String -> SRule
|
|
||||||
pSRule s = case words s of
|
|
||||||
f : _ : cs -> (f,(init cs', last cs'))
|
|
||||||
where cs' = [cs !! i | i <- [0,2..length cs - 1]]
|
|
||||||
_ -> error $ "not a rule" +++ s
|
|
||||||
|
|
||||||
exSgr = map pSRule [
|
|
||||||
"Pred : NP -> VP -> S"
|
|
||||||
,"Compl : TV -> NP -> VP"
|
|
||||||
,"PredVV : VV -> VP -> VP"
|
|
||||||
,"DefCN : CN -> NP"
|
|
||||||
,"ModCN : AP -> CN -> CN"
|
|
||||||
,"john : NP"
|
|
||||||
,"walk : VP"
|
|
||||||
,"love : TV"
|
|
||||||
,"try : VV"
|
|
||||||
,"girl : CN"
|
|
||||||
,"big : AP"
|
|
||||||
]
|
|
||||||
|
|||||||
@@ -174,6 +174,8 @@ p, parse: p String
|
|||||||
-all as -lines, but also parse empty lines
|
-all as -lines, but also parse empty lines
|
||||||
-prob rank results by probability
|
-prob rank results by probability
|
||||||
-cut stop after first lexing result leading to parser success
|
-cut stop after first lexing result leading to parser success
|
||||||
|
-fail show strings whose parse fails prefixed by #FAIL
|
||||||
|
-ambiguous show strings that have more than one parse prefixed by #AMBIGUOUS
|
||||||
options for selecting parsing method:
|
options for selecting parsing method:
|
||||||
(default)parse using an overgenerating CFG
|
(default)parse using an overgenerating CFG
|
||||||
-cfg parse using a much less overgenerating CFG
|
-cfg parse using a much less overgenerating CFG
|
||||||
@@ -315,11 +317,13 @@ gt, generate_trees: gt Tree?
|
|||||||
-lang use the abstract syntax of this grammar
|
-lang use the abstract syntax of this grammar
|
||||||
-number generate (at most) this number of trees
|
-number generate (at most) this number of trees
|
||||||
-noexpand don't expand these categories (comma-separated, e.g. -noexpand=V,CN)
|
-noexpand don't expand these categories (comma-separated, e.g. -noexpand=V,CN)
|
||||||
|
-doexpand only expand these categories (comma-separated, e.g. -doexpand=V,CN)
|
||||||
examples:
|
examples:
|
||||||
gt -depth=10 -cat=NP -- generate all NP's to depth 10
|
gt -depth=10 -cat=NP -- generate all NP's to depth 10
|
||||||
gt (PredVP ? (NegVG ?)) -- generate all trees of this form
|
gt (PredVP ? (NegVG ?)) -- generate all trees of this form
|
||||||
gt -cat=S -tr | l -- generate and linearize
|
gt -cat=S -tr | l -- generate and linearize
|
||||||
gt -noexpand=NP | l -mark=metacat -- the only NP is meta, linearized "?0 +NP"
|
gt -noexpand=NP | l -mark=metacat -- the only NP is meta, linearized "?0 +NP"
|
||||||
|
gt | l | p -lines -ambiguous | grep "#AMBIGUOUS" -- show ambiguous strings
|
||||||
|
|
||||||
ma, morphologically_analyse: ma String
|
ma, morphologically_analyse: ma String
|
||||||
Runs morphological analysis on each word in String and displays
|
Runs morphological analysis on each word in String and displays
|
||||||
|
|||||||
Reference in New Issue
Block a user