forked from GitHub/gf-core
Morphological analysis and glueing.
This commit is contained in:
@@ -152,3 +152,15 @@ redirectIdent n f@(CIQ _ c) = CIQ n c
|
|||||||
|
|
||||||
ciq n f = CIQ n f
|
ciq n f = CIQ n f
|
||||||
|
|
||||||
|
wordsInTerm :: Term -> [String]
|
||||||
|
wordsInTerm trm = filter (not . null) $ case trm of
|
||||||
|
K (KS s) -> [s]
|
||||||
|
S c _ -> wo c
|
||||||
|
R rs -> concat [wo t | Ass _ t <- rs]
|
||||||
|
T _ cs -> concat [wo t | Cas _ t <- cs]
|
||||||
|
C s t -> wo s ++ wo t
|
||||||
|
FV ts -> concatMap wo ts
|
||||||
|
K (KP ss vs) -> ss ++ concat [s ++ t | Var s t <- vs]
|
||||||
|
P t _ -> wo t --- not needed ?
|
||||||
|
_ -> []
|
||||||
|
where wo = wordsInTerm
|
||||||
|
|||||||
@@ -12,6 +12,9 @@ import qualified Modules as M
|
|||||||
prCanonModInfo :: CanonModule -> String
|
prCanonModInfo :: CanonModule -> String
|
||||||
prCanonModInfo = prt . info2mod
|
prCanonModInfo = prt . info2mod
|
||||||
|
|
||||||
|
prCanon :: CanonGrammar -> String
|
||||||
|
prCanon = unlines . map prCanonModInfo . M.modules
|
||||||
|
|
||||||
canon2grammar :: Canon -> CanonGrammar
|
canon2grammar :: Canon -> CanonGrammar
|
||||||
canon2grammar (Gr modules) = M.MGrammar $ map mod2info modules where
|
canon2grammar (Gr modules) = M.MGrammar $ map mod2info modules where
|
||||||
mod2info m = case m of
|
mod2info m = case m of
|
||||||
|
|||||||
@@ -79,4 +79,5 @@ oldLexer = map change . L.tokens where
|
|||||||
(L.PT p (L.TS s)) | elem s new -> (L.PT p (L.TV (init s ++ "Z")))
|
(L.PT p (L.TS s)) | elem s new -> (L.PT p (L.TV (init s ++ "Z")))
|
||||||
_ -> t
|
_ -> t
|
||||||
new = words $ "abstract concrete interface incomplete " ++
|
new = words $ "abstract concrete interface incomplete " ++
|
||||||
"instance out open resource reuse transfer with"
|
"instance out open resource reuse transfer with where"
|
||||||
|
|
||||||
|
|||||||
@@ -76,11 +76,11 @@ renameIdentTerm :: Status -> Term -> Err Term
|
|||||||
renameIdentTerm env@(act,imps) t =
|
renameIdentTerm env@(act,imps) t =
|
||||||
errIn ("atomic term" +++ prt t +++ "given" +++ unwords (map (prt . fst) qualifs)) $
|
errIn ("atomic term" +++ prt t +++ "given" +++ unwords (map (prt . fst) qualifs)) $
|
||||||
case t of
|
case t of
|
||||||
|
Vr (IC "Int") -> return $ Q cPredefAbs cInt -- Int and String are predefined cats
|
||||||
|
Vr (IC "String") -> return $ Q cPredefAbs cString
|
||||||
Vr c -> do
|
Vr c -> do
|
||||||
f <- lookupTreeMany prt opens c
|
f <- lookupTreeMany prt opens c
|
||||||
return $ f c
|
return $ f c
|
||||||
Vr (IC "Int") -> return $ Q cPredefAbs cInt -- Int and String are predefined cats
|
|
||||||
Vr (IC "String") -> return $ Q cPredefAbs cString
|
|
||||||
Cn c -> do
|
Cn c -> do
|
||||||
f <- lookupTreeMany prt opens c
|
f <- lookupTreeMany prt opens c
|
||||||
return $ f c
|
return $ f c
|
||||||
|
|||||||
@@ -133,7 +133,7 @@ updateShellState opts sh (gr,(sgr,rts)) = do
|
|||||||
canModules = cgr,
|
canModules = cgr,
|
||||||
srcModules = src,
|
srcModules = src,
|
||||||
cfs = zip concrs cfs,
|
cfs = zip concrs cfs,
|
||||||
morphos = zip concrs (repeat emptyMorpho),
|
morphos = zip concrs (map (mkMorpho cgr) concrs),
|
||||||
gloptions = options (M.allFlags src), ---- canModules
|
gloptions = options (M.allFlags src), ---- canModules
|
||||||
readFiles = [ft | ft@(f,_) <- readFiles sh, notInrts f] ++ rts,
|
readFiles = [ft | ft@(f,_) <- readFiles sh, notInrts f] ++ rts,
|
||||||
absCats = csi,
|
absCats = csi,
|
||||||
|
|||||||
@@ -62,15 +62,20 @@ prContext co = unwords $ map prParenth [prt x +++ ":" +++ prt t | (x,t) <- co]
|
|||||||
|
|
||||||
instance Print A.Exp where prt = C.printTree
|
instance Print A.Exp where prt = C.printTree
|
||||||
instance Print A.Term where prt = C.printTree
|
instance Print A.Term where prt = C.printTree
|
||||||
instance Print A.Patt where prt = C.printTree
|
|
||||||
instance Print A.Case where prt = C.printTree
|
instance Print A.Case where prt = C.printTree
|
||||||
instance Print A.Atom where prt = C.printTree
|
instance Print A.Atom where prt = C.printTree
|
||||||
instance Print A.CIdent where prt = C.printTree
|
|
||||||
instance Print A.CType where prt = C.printTree
|
instance Print A.CType where prt = C.printTree
|
||||||
instance Print A.Label where prt = C.printTree
|
instance Print A.Label where prt = C.printTree
|
||||||
instance Print A.Module where prt = C.printTree
|
instance Print A.Module where prt = C.printTree
|
||||||
instance Print A.Sort where prt = C.printTree
|
instance Print A.Sort where prt = C.printTree
|
||||||
|
|
||||||
|
instance Print A.Patt where
|
||||||
|
prt = C.printTree
|
||||||
|
prt_ = prPatt
|
||||||
|
|
||||||
|
instance Print A.CIdent where
|
||||||
|
prt = C.printTree
|
||||||
|
prt_ (A.CIQ _ c) = prt c
|
||||||
|
|
||||||
-- printing values and trees in editing
|
-- printing values and trees in editing
|
||||||
|
|
||||||
@@ -183,6 +188,15 @@ prExp e = case e of
|
|||||||
App _ _ -> prParenth $ prExp e
|
App _ _ -> prParenth $ prExp e
|
||||||
_ -> pr1 e
|
_ -> pr1 e
|
||||||
|
|
||||||
|
prPatt :: A.Patt -> String
|
||||||
|
prPatt p = case p of
|
||||||
|
A.PC c ps -> prt_ c +++ unwords (map pr1 ps)
|
||||||
|
_ -> prt p --- PR
|
||||||
|
where
|
||||||
|
pr1 p = case p of
|
||||||
|
A.PC _ (_:_) -> prParenth $ prPatt p
|
||||||
|
_ -> prPatt p
|
||||||
|
|
||||||
-- option -strip strips qualifications
|
-- option -strip strips qualifications
|
||||||
prTermOpt opts = if oElem nostripQualif opts then prt else prExp
|
prTermOpt opts = if oElem nostripQualif opts then prt else prExp
|
||||||
|
|
||||||
|
|||||||
@@ -172,6 +172,8 @@ data Exp =
|
|||||||
| EConcat Exp Exp
|
| EConcat Exp Exp
|
||||||
| EGlue Exp Exp
|
| EGlue Exp Exp
|
||||||
| ELet [LocDef] Exp
|
| ELet [LocDef] Exp
|
||||||
|
| ELetb [LocDef] Exp
|
||||||
|
| EWhere Exp [LocDef]
|
||||||
| EEqs [Equation]
|
| EEqs [Equation]
|
||||||
| ELString LString
|
| ELString LString
|
||||||
| ELin Ident
|
| ELin Ident
|
||||||
|
|||||||
@@ -321,6 +321,8 @@ instance Print Exp where
|
|||||||
EConcat exp0 exp -> prPrec i 0 (concat [prt 1 exp0 , ["++"] , prt 0 exp])
|
EConcat exp0 exp -> prPrec i 0 (concat [prt 1 exp0 , ["++"] , prt 0 exp])
|
||||||
EGlue exp0 exp -> prPrec i 0 (concat [prt 1 exp0 , ["+"] , prt 0 exp])
|
EGlue exp0 exp -> prPrec i 0 (concat [prt 1 exp0 , ["+"] , prt 0 exp])
|
||||||
ELet locdefs exp -> prPrec i 0 (concat [["let"] , ["{"] , prt 0 locdefs , ["}"] , ["in"] , prt 0 exp])
|
ELet locdefs exp -> prPrec i 0 (concat [["let"] , ["{"] , prt 0 locdefs , ["}"] , ["in"] , prt 0 exp])
|
||||||
|
ELetb locdefs exp -> prPrec i 0 (concat [["let"] , prt 0 locdefs , ["in"] , prt 0 exp])
|
||||||
|
EWhere exp locdefs -> prPrec i 0 (concat [prt 1 exp , ["where"] , ["{"] , prt 0 locdefs , ["}"]])
|
||||||
EEqs equations -> prPrec i 0 (concat [["fn"] , ["{"] , prt 0 equations , ["}"]])
|
EEqs equations -> prPrec i 0 (concat [["fn"] , ["{"] , prt 0 equations , ["}"]])
|
||||||
ELString lstring -> prPrec i 4 (concat [prt 0 lstring])
|
ELString lstring -> prPrec i 4 (concat [prt 0 lstring])
|
||||||
ELin id -> prPrec i 2 (concat [["Lin"] , prt 0 id])
|
ELin id -> prPrec i 2 (concat [["Lin"] , prt 0 id])
|
||||||
|
|||||||
@@ -357,6 +357,8 @@ transExp x = case x of
|
|||||||
where
|
where
|
||||||
tryLoc (c,(mty,Just e)) = return (c,(mty,e))
|
tryLoc (c,(mty,Just e)) = return (c,(mty,e))
|
||||||
tryLoc (c,_) = Bad $ "local definition of" +++ GP.prt c +++ "without value"
|
tryLoc (c,_) = Bad $ "local definition of" +++ GP.prt c +++ "without value"
|
||||||
|
ELetb defs exp -> transExp $ ELet defs exp
|
||||||
|
EWhere exp defs -> transExp $ ELet defs exp
|
||||||
|
|
||||||
ELString (LString str) -> return $ G.K str
|
ELString (LString str) -> return $ G.K str
|
||||||
ELin id -> liftM G.LiT $ transIdent id
|
ELin id -> liftM G.LiT $ transIdent id
|
||||||
|
|||||||
@@ -21,11 +21,13 @@ import CFIdent
|
|||||||
|
|
||||||
---- import CFtoGrammar
|
---- import CFtoGrammar
|
||||||
import PPrCF
|
import PPrCF
|
||||||
|
import PrLBNF
|
||||||
import PrGrammar
|
import PrGrammar
|
||||||
|
import MkGFC
|
||||||
|
|
||||||
import Zipper
|
import Zipper
|
||||||
|
|
||||||
----import Morphology
|
import Morphology
|
||||||
-----import GrammarToHaskell
|
-----import GrammarToHaskell
|
||||||
-----import GrammarToCanon (showCanon, showCanonOpt)
|
-----import GrammarToCanon (showCanon, showCanonOpt)
|
||||||
-----import qualified GrammarToGFC as GFC
|
-----import qualified GrammarToGFC as GFC
|
||||||
@@ -141,16 +143,16 @@ customGrammarParser =
|
|||||||
customGrammarPrinter =
|
customGrammarPrinter =
|
||||||
customData "Grammar printers, selected by option -printer=x" $
|
customData "Grammar printers, selected by option -printer=x" $
|
||||||
[
|
[
|
||||||
---- (strCI "gf", prt) -- DEFAULT
|
(strCI "gfc", prCanon . stateGrammarST) -- DEFAULT
|
||||||
(strCI "cf", prCF . stateCF)
|
,(strCI "cf", prCF . stateCF)
|
||||||
|
,(strCI "lbnf", prLBNF . stateCF)
|
||||||
|
,(strCI "morpho", prMorpho . stateMorpho)
|
||||||
|
,(strCI "opts", prOpts . stateOptions)
|
||||||
{- ----
|
{- ----
|
||||||
(strCI "gf", prt . st2grammar . stateGrammarST) -- DEFAULT
|
(strCI "gf", prt . st2grammar . stateGrammarST) -- DEFAULT
|
||||||
,(strCI "canon", showCanon "Lang" . stateGrammarST)
|
,(strCI "canon", showCanon "Lang" . stateGrammarST)
|
||||||
,(strCI "gfc", GFC.showGFC . stateGrammarST)
|
,(strCI "gfc", GFC.showGFC . stateGrammarST)
|
||||||
,(strCI "canonOpt",showCanonOpt "Lang" . stateGrammarST)
|
,(strCI "canonOpt",showCanonOpt "Lang" . stateGrammarST)
|
||||||
,(strCI "morpho", prMorpho . stateMorpho)
|
|
||||||
,(strCI "opts", prOpts . stateOptions)
|
|
||||||
-}
|
-}
|
||||||
-- add your own grammar printers here
|
-- add your own grammar printers here
|
||||||
--- also include printing via grammar2syntax!
|
--- also include printing via grammar2syntax!
|
||||||
@@ -236,6 +238,7 @@ customTokenizer =
|
|||||||
,(strCI "chars", const $ map (tS . singleton))
|
,(strCI "chars", const $ map (tS . singleton))
|
||||||
,(strCI "code", const $ lexHaskell)
|
,(strCI "code", const $ lexHaskell)
|
||||||
,(strCI "text", const $ lexText)
|
,(strCI "text", const $ lexText)
|
||||||
|
,(strCI "unglue", \gr -> map tS . decomposeWords (stateMorpho gr))
|
||||||
---- ,(strCI "codelit", lexHaskellLiteral . stateIsWord)
|
---- ,(strCI "codelit", lexHaskellLiteral . stateIsWord)
|
||||||
---- ,(strCI "textlit", lexTextLiteral . stateIsWord)
|
---- ,(strCI "textlit", lexTextLiteral . stateIsWord)
|
||||||
,(strCI "codeC", const $ lexC2M)
|
,(strCI "codeC", const $ lexC2M)
|
||||||
@@ -253,7 +256,8 @@ customUntokenizer =
|
|||||||
,(strCI "textlit", const $ formatAsTextLit)
|
,(strCI "textlit", const $ formatAsTextLit)
|
||||||
,(strCI "codelit", const $ formatAsCodeLit)
|
,(strCI "codelit", const $ formatAsCodeLit)
|
||||||
,(strCI "concat", const $ concat . words)
|
,(strCI "concat", const $ concat . words)
|
||||||
,(strCI "bind", const $ performBinds)
|
,(strCI "glue", const $ performBinds)
|
||||||
|
,(strCI "bind", const $ performBinds) -- backward compat
|
||||||
-- add your own untokenizers here
|
-- add your own untokenizers here
|
||||||
]
|
]
|
||||||
++ moreCustomUntokenizer
|
++ moreCustomUntokenizer
|
||||||
|
|||||||
@@ -159,7 +159,7 @@ linearizeToStrss gr mk e = do
|
|||||||
R rs <- linearizeToRecord gr mk e ----
|
R rs <- linearizeToRecord gr mk e ----
|
||||||
t <- lookupErr linLab0 [(r,s) | Ass r s <- rs]
|
t <- lookupErr linLab0 [(r,s) | Ass r s <- rs]
|
||||||
return $ map strsFromTerm $ allInTable t
|
return $ map strsFromTerm $ allInTable t
|
||||||
|
-}
|
||||||
|
|
||||||
-- the value is a list of strings, not forgetting their arguments
|
-- the value is a list of strings, not forgetting their arguments
|
||||||
allLinsOfFun :: CanonGrammar -> CIdent -> Err [[(Label,[([Patt],Term)])]]
|
allLinsOfFun :: CanonGrammar -> CIdent -> Err [[(Label,[([Patt],Term)])]]
|
||||||
@@ -168,9 +168,6 @@ allLinsOfFun gr f = do
|
|||||||
allLinValues t
|
allLinValues t
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
-}
|
|
||||||
|
|
||||||
-- returns printname if one exists; otherwise linearizes with metas
|
-- returns printname if one exists; otherwise linearizes with metas
|
||||||
|
|
||||||
printOrLinearize :: CanonGrammar -> Ident -> A.Fun -> String
|
printOrLinearize :: CanonGrammar -> Ident -> A.Fun -> String
|
||||||
|
|||||||
@@ -3,8 +3,14 @@ module Morphology where
|
|||||||
import AbsGFC
|
import AbsGFC
|
||||||
import GFC
|
import GFC
|
||||||
import PrGrammar
|
import PrGrammar
|
||||||
|
import CMacros
|
||||||
|
import LookAbs
|
||||||
|
import Ident
|
||||||
|
import qualified Macros as M
|
||||||
|
import Linear
|
||||||
|
|
||||||
import Operations
|
import Operations
|
||||||
|
import Glue
|
||||||
|
|
||||||
import Char
|
import Char
|
||||||
import List (sortBy, intersperse)
|
import List (sortBy, intersperse)
|
||||||
@@ -40,35 +46,33 @@ appMorphoOnly m s = (s, ms) where
|
|||||||
isKnownWord :: Morpho -> String -> Bool
|
isKnownWord :: Morpho -> String -> Bool
|
||||||
isKnownWord mo = not . null . snd . appMorphoOnly mo
|
isKnownWord mo = not . null . snd . appMorphoOnly mo
|
||||||
|
|
||||||
mkMorpho :: CanonGrammar -> Morpho
|
mkMorpho :: CanonGrammar -> Ident -> Morpho
|
||||||
mkMorpho gr = emptyMorpho ----
|
---- mkMorpho gr = emptyMorpho ----
|
||||||
{- ----
|
mkMorpho gr a = mkMorphoTree $ concat $ map mkOne $ allItems where
|
||||||
mkMorpho gr = mkMorphoTree $ concat $ map mkOne $ allItems where
|
|
||||||
mkOne (Left (fun,c)) = map (prOne fun c) $ allLins fun
|
mkOne (Left (fun,c)) = map (prOne fun c) $ allLins fun
|
||||||
mkOne (Right (fun,_)) = map (prSyn fun) $ allSyns fun
|
mkOne (Right (fun,_)) = map (prSyn fun) $ allSyns fun
|
||||||
|
|
||||||
-- gather forms of lexical items
|
-- gather forms of lexical items
|
||||||
allLins fun = errVal [] $ do
|
allLins fun@(m,f) = errVal [] $ do
|
||||||
ts <- allLinsOfFun gr fun
|
ts <- allLinsOfFun gr (CIQ a f)
|
||||||
ss <- mapM (mapPairsM (mapPairsM (return . wordsInTerm))) ts
|
ss <- mapM (mapPairsM (mapPairsM (return . wordsInTerm))) ts
|
||||||
return [(p,s) | (p,fs) <- concat $ map snd $ concat ss, s <- fs]
|
return [(p,s) | (p,fs) <- concat $ map snd $ concat ss, s <- fs]
|
||||||
prOne f c (ps,s) = (s, prt f +++ tagPrt c ++ concat (map tagPrt ps))
|
prOne (_,f) c (ps,s) = (s, prt f +++ tagPrt c ++ concat (map prt_ ps))
|
||||||
|
|
||||||
-- gather syncategorematic words
|
-- gather syncategorematic words
|
||||||
allSyns fun = errVal [] $ do
|
allSyns fun@(m,f) = errVal [] $ do
|
||||||
tss <- allLinsOfFun gr fun
|
tss <- allLinsOfFun gr (CIQ a f)
|
||||||
let ss = [s | ts <- tss, (_,fs) <- ts, (_,s) <- fs]
|
let ss = [s | ts <- tss, (_,fs) <- ts, (_,s) <- fs]
|
||||||
return $ concat $ map wordsInTerm ss
|
return $ concat $ map wordsInTerm ss
|
||||||
prSyn f s = (s, "+<syncategorematic>" ++ tagPrt f)
|
prSyn f s = (s, "+<syncategorematic>" ++ tagPrt f)
|
||||||
|
|
||||||
-- all words, Left from lexical rules and Right syncategorematic
|
-- all words, Left from lexical rules and Right syncategorematic
|
||||||
allItems = [lexRole t (f,c) | (f,c) <- allFuns, t <- lookType f] where
|
allItems = [lexRole t (f,c) | (f,c,t) <- allFuns] where
|
||||||
allFuns = allFunsWithValCat ab
|
allFuns = [(f,c,t) | (f,t) <- funRulesOf gr, Ok c <- [M.valCat t]]
|
||||||
lookType = errVal [] . liftM (:[]) . lookupFunType ab
|
lexRole t = case M.typeForm t of
|
||||||
lexRole t = case typeForm t of
|
|
||||||
Ok ([],_,_) -> Left
|
Ok ([],_,_) -> Left
|
||||||
_ -> Right
|
_ -> Right
|
||||||
-}
|
|
||||||
|
|
||||||
-- printing full-form lexicon and results
|
-- printing full-form lexicon and results
|
||||||
|
|
||||||
@@ -82,8 +86,8 @@ prMorphoAnalysisShort :: (String,[String]) -> String
|
|||||||
prMorphoAnalysisShort (w,fs) = prBracket (w' ++ prTList "/" fs) where
|
prMorphoAnalysisShort (w,fs) = prBracket (w' ++ prTList "/" fs) where
|
||||||
w' = if null fs then w +++ "*" else ""
|
w' = if null fs then w +++ "*" else ""
|
||||||
|
|
||||||
tagPrt :: Print a => a -> String
|
tagPrt :: Print a => (a,a) -> String
|
||||||
tagPrt = ("+" ++) . prt --- could look up print name in grammar
|
tagPrt (m,c) = "+" ++ prt c --- module name
|
||||||
|
|
||||||
-- print all words recognized
|
-- print all words recognized
|
||||||
|
|
||||||
@@ -99,6 +103,15 @@ prFullForm :: Morpho -> String
|
|||||||
prFullForm = unlines . map prOne . tree2list where
|
prFullForm = unlines . map prOne . tree2list where
|
||||||
prOne (s,ps) = s ++ " : " ++ unwords (intersperse "/" ps)
|
prOne (s,ps) = s ++ " : " ++ unwords (intersperse "/" ps)
|
||||||
|
|
||||||
|
-- using Huet's unglueing method to find word boundaries
|
||||||
|
---- it would be much better to use a trie also for morphological analysis,
|
||||||
|
---- so this is for the sake of experiment
|
||||||
|
---- Moreover, we should specify the cases in which this happens - not all words
|
||||||
|
|
||||||
|
decomposeWords :: Morpho -> String -> [String]
|
||||||
|
decomposeWords mo s = errVal (words s) $
|
||||||
|
decomposeSimple (tcompileSimple (map fst $ tree2list mo)) s
|
||||||
|
|
||||||
-- auxiliaries
|
-- auxiliaries
|
||||||
|
|
||||||
mkMorphoTree :: (Ord a, Eq b) => [(a,b)] -> BinTree (a,[b])
|
mkMorphoTree :: (Ord a, Eq b) => [(a,b)] -> BinTree (a,[b])
|
||||||
|
|||||||
@@ -1 +1 @@
|
|||||||
module Today where today = "Fri Nov 7 16:15:47 CET 2003"
|
module Today where today = "Mon Nov 10 09:43:09 CET 2003"
|
||||||
|
|||||||
Reference in New Issue
Block a user