From 249d506f58a8b5f8ef87295ab3dde2d13ddd3885 Mon Sep 17 00:00:00 2001 From: aarne Date: Mon, 10 Nov 2003 07:55:45 +0000 Subject: [PATCH] Morphological analysis and glueing. --- src/GF/Canon/CMacros.hs | 12 +++++++++ src/GF/Canon/MkGFC.hs | 3 +++ src/GF/Compile/GetGrammar.hs | 3 ++- src/GF/Compile/Rename.hs | 4 +-- src/GF/Compile/ShellState.hs | 2 +- src/GF/Grammar/PrGrammar.hs | 18 +++++++++++-- src/GF/Source/AbsGF.hs | 2 ++ src/GF/Source/PrintGF.hs | 2 ++ src/GF/Source/SourceToGrammar.hs | 2 ++ src/GF/UseGrammar/Custom.hs | 18 ++++++++----- src/GF/UseGrammar/Linear.hs | 5 +--- src/GF/UseGrammar/Morphology.hs | 45 ++++++++++++++++++++------------ src/Today.hs | 2 +- 13 files changed, 84 insertions(+), 34 deletions(-) diff --git a/src/GF/Canon/CMacros.hs b/src/GF/Canon/CMacros.hs index 17433e48b..d32f639b4 100644 --- a/src/GF/Canon/CMacros.hs +++ b/src/GF/Canon/CMacros.hs @@ -152,3 +152,15 @@ redirectIdent n f@(CIQ _ c) = CIQ n c ciq n f = CIQ n f +wordsInTerm :: Term -> [String] +wordsInTerm trm = filter (not . null) $ case trm of + K (KS s) -> [s] + S c _ -> wo c + R rs -> concat [wo t | Ass _ t <- rs] + T _ cs -> concat [wo t | Cas _ t <- cs] + C s t -> wo s ++ wo t + FV ts -> concatMap wo ts + K (KP ss vs) -> ss ++ concat [s ++ t | Var s t <- vs] + P t _ -> wo t --- not needed ? + _ -> [] + where wo = wordsInTerm diff --git a/src/GF/Canon/MkGFC.hs b/src/GF/Canon/MkGFC.hs index 7547280a9..8f1e46b21 100644 --- a/src/GF/Canon/MkGFC.hs +++ b/src/GF/Canon/MkGFC.hs @@ -12,6 +12,9 @@ import qualified Modules as M prCanonModInfo :: CanonModule -> String prCanonModInfo = prt . info2mod +prCanon :: CanonGrammar -> String +prCanon = unlines . map prCanonModInfo . M.modules + canon2grammar :: Canon -> CanonGrammar canon2grammar (Gr modules) = M.MGrammar $ map mod2info modules where mod2info m = case m of diff --git a/src/GF/Compile/GetGrammar.hs b/src/GF/Compile/GetGrammar.hs index a9a40ee06..32efb960b 100644 --- a/src/GF/Compile/GetGrammar.hs +++ b/src/GF/Compile/GetGrammar.hs @@ -79,4 +79,5 @@ oldLexer = map change . L.tokens where (L.PT p (L.TS s)) | elem s new -> (L.PT p (L.TV (init s ++ "Z"))) _ -> t new = words $ "abstract concrete interface incomplete " ++ - "instance out open resource reuse transfer with" + "instance out open resource reuse transfer with where" + diff --git a/src/GF/Compile/Rename.hs b/src/GF/Compile/Rename.hs index 49e08ab6e..3a0bf5c52 100644 --- a/src/GF/Compile/Rename.hs +++ b/src/GF/Compile/Rename.hs @@ -76,11 +76,11 @@ renameIdentTerm :: Status -> Term -> Err Term renameIdentTerm env@(act,imps) t = errIn ("atomic term" +++ prt t +++ "given" +++ unwords (map (prt . fst) qualifs)) $ case t of + Vr (IC "Int") -> return $ Q cPredefAbs cInt -- Int and String are predefined cats + Vr (IC "String") -> return $ Q cPredefAbs cString Vr c -> do f <- lookupTreeMany prt opens c return $ f c - Vr (IC "Int") -> return $ Q cPredefAbs cInt -- Int and String are predefined cats - Vr (IC "String") -> return $ Q cPredefAbs cString Cn c -> do f <- lookupTreeMany prt opens c return $ f c diff --git a/src/GF/Compile/ShellState.hs b/src/GF/Compile/ShellState.hs index ad1566f1f..4d0c8b260 100644 --- a/src/GF/Compile/ShellState.hs +++ b/src/GF/Compile/ShellState.hs @@ -133,7 +133,7 @@ updateShellState opts sh (gr,(sgr,rts)) = do canModules = cgr, srcModules = src, cfs = zip concrs cfs, - morphos = zip concrs (repeat emptyMorpho), + morphos = zip concrs (map (mkMorpho cgr) concrs), gloptions = options (M.allFlags src), ---- canModules readFiles = [ft | ft@(f,_) <- readFiles sh, notInrts f] ++ rts, absCats = csi, diff --git a/src/GF/Grammar/PrGrammar.hs b/src/GF/Grammar/PrGrammar.hs index 607b766da..2b5648d8a 100644 --- a/src/GF/Grammar/PrGrammar.hs +++ b/src/GF/Grammar/PrGrammar.hs @@ -62,15 +62,20 @@ prContext co = unwords $ map prParenth [prt x +++ ":" +++ prt t | (x,t) <- co] instance Print A.Exp where prt = C.printTree instance Print A.Term where prt = C.printTree -instance Print A.Patt where prt = C.printTree instance Print A.Case where prt = C.printTree instance Print A.Atom where prt = C.printTree -instance Print A.CIdent where prt = C.printTree instance Print A.CType where prt = C.printTree instance Print A.Label where prt = C.printTree instance Print A.Module where prt = C.printTree instance Print A.Sort where prt = C.printTree +instance Print A.Patt where + prt = C.printTree + prt_ = prPatt + +instance Print A.CIdent where + prt = C.printTree + prt_ (A.CIQ _ c) = prt c -- printing values and trees in editing @@ -183,6 +188,15 @@ prExp e = case e of App _ _ -> prParenth $ prExp e _ -> pr1 e +prPatt :: A.Patt -> String +prPatt p = case p of + A.PC c ps -> prt_ c +++ unwords (map pr1 ps) + _ -> prt p --- PR + where + pr1 p = case p of + A.PC _ (_:_) -> prParenth $ prPatt p + _ -> prPatt p + -- option -strip strips qualifications prTermOpt opts = if oElem nostripQualif opts then prt else prExp diff --git a/src/GF/Source/AbsGF.hs b/src/GF/Source/AbsGF.hs index 8acf35349..28313c5ce 100644 --- a/src/GF/Source/AbsGF.hs +++ b/src/GF/Source/AbsGF.hs @@ -172,6 +172,8 @@ data Exp = | EConcat Exp Exp | EGlue Exp Exp | ELet [LocDef] Exp + | ELetb [LocDef] Exp + | EWhere Exp [LocDef] | EEqs [Equation] | ELString LString | ELin Ident diff --git a/src/GF/Source/PrintGF.hs b/src/GF/Source/PrintGF.hs index b406f1935..e4238ceff 100644 --- a/src/GF/Source/PrintGF.hs +++ b/src/GF/Source/PrintGF.hs @@ -321,6 +321,8 @@ instance Print Exp where EConcat exp0 exp -> prPrec i 0 (concat [prt 1 exp0 , ["++"] , prt 0 exp]) EGlue exp0 exp -> prPrec i 0 (concat [prt 1 exp0 , ["+"] , prt 0 exp]) ELet locdefs exp -> prPrec i 0 (concat [["let"] , ["{"] , prt 0 locdefs , ["}"] , ["in"] , prt 0 exp]) + ELetb locdefs exp -> prPrec i 0 (concat [["let"] , prt 0 locdefs , ["in"] , prt 0 exp]) + EWhere exp locdefs -> prPrec i 0 (concat [prt 1 exp , ["where"] , ["{"] , prt 0 locdefs , ["}"]]) EEqs equations -> prPrec i 0 (concat [["fn"] , ["{"] , prt 0 equations , ["}"]]) ELString lstring -> prPrec i 4 (concat [prt 0 lstring]) ELin id -> prPrec i 2 (concat [["Lin"] , prt 0 id]) diff --git a/src/GF/Source/SourceToGrammar.hs b/src/GF/Source/SourceToGrammar.hs index 4c4bc93a6..53681104c 100644 --- a/src/GF/Source/SourceToGrammar.hs +++ b/src/GF/Source/SourceToGrammar.hs @@ -357,6 +357,8 @@ transExp x = case x of where tryLoc (c,(mty,Just e)) = return (c,(mty,e)) tryLoc (c,_) = Bad $ "local definition of" +++ GP.prt c +++ "without value" + ELetb defs exp -> transExp $ ELet defs exp + EWhere exp defs -> transExp $ ELet defs exp ELString (LString str) -> return $ G.K str ELin id -> liftM G.LiT $ transIdent id diff --git a/src/GF/UseGrammar/Custom.hs b/src/GF/UseGrammar/Custom.hs index 1048aab95..4d5eb8122 100644 --- a/src/GF/UseGrammar/Custom.hs +++ b/src/GF/UseGrammar/Custom.hs @@ -21,11 +21,13 @@ import CFIdent ---- import CFtoGrammar import PPrCF +import PrLBNF import PrGrammar +import MkGFC import Zipper -----import Morphology +import Morphology -----import GrammarToHaskell -----import GrammarToCanon (showCanon, showCanonOpt) -----import qualified GrammarToGFC as GFC @@ -141,16 +143,16 @@ customGrammarParser = customGrammarPrinter = customData "Grammar printers, selected by option -printer=x" $ [ ----- (strCI "gf", prt) -- DEFAULT - (strCI "cf", prCF . stateCF) - + (strCI "gfc", prCanon . stateGrammarST) -- DEFAULT + ,(strCI "cf", prCF . stateCF) + ,(strCI "lbnf", prLBNF . stateCF) + ,(strCI "morpho", prMorpho . stateMorpho) + ,(strCI "opts", prOpts . stateOptions) {- ---- (strCI "gf", prt . st2grammar . stateGrammarST) -- DEFAULT ,(strCI "canon", showCanon "Lang" . stateGrammarST) ,(strCI "gfc", GFC.showGFC . stateGrammarST) ,(strCI "canonOpt",showCanonOpt "Lang" . stateGrammarST) - ,(strCI "morpho", prMorpho . stateMorpho) - ,(strCI "opts", prOpts . stateOptions) -} -- add your own grammar printers here --- also include printing via grammar2syntax! @@ -236,6 +238,7 @@ customTokenizer = ,(strCI "chars", const $ map (tS . singleton)) ,(strCI "code", const $ lexHaskell) ,(strCI "text", const $ lexText) + ,(strCI "unglue", \gr -> map tS . decomposeWords (stateMorpho gr)) ---- ,(strCI "codelit", lexHaskellLiteral . stateIsWord) ---- ,(strCI "textlit", lexTextLiteral . stateIsWord) ,(strCI "codeC", const $ lexC2M) @@ -253,7 +256,8 @@ customUntokenizer = ,(strCI "textlit", const $ formatAsTextLit) ,(strCI "codelit", const $ formatAsCodeLit) ,(strCI "concat", const $ concat . words) - ,(strCI "bind", const $ performBinds) + ,(strCI "glue", const $ performBinds) + ,(strCI "bind", const $ performBinds) -- backward compat -- add your own untokenizers here ] ++ moreCustomUntokenizer diff --git a/src/GF/UseGrammar/Linear.hs b/src/GF/UseGrammar/Linear.hs index a46200b36..c439d62b2 100644 --- a/src/GF/UseGrammar/Linear.hs +++ b/src/GF/UseGrammar/Linear.hs @@ -159,7 +159,7 @@ linearizeToStrss gr mk e = do R rs <- linearizeToRecord gr mk e ---- t <- lookupErr linLab0 [(r,s) | Ass r s <- rs] return $ map strsFromTerm $ allInTable t - +-} -- the value is a list of strings, not forgetting their arguments allLinsOfFun :: CanonGrammar -> CIdent -> Err [[(Label,[([Patt],Term)])]] @@ -168,9 +168,6 @@ allLinsOfFun gr f = do allLinValues t - --} - -- returns printname if one exists; otherwise linearizes with metas printOrLinearize :: CanonGrammar -> Ident -> A.Fun -> String diff --git a/src/GF/UseGrammar/Morphology.hs b/src/GF/UseGrammar/Morphology.hs index 102e41340..c8f00615a 100644 --- a/src/GF/UseGrammar/Morphology.hs +++ b/src/GF/UseGrammar/Morphology.hs @@ -3,8 +3,14 @@ module Morphology where import AbsGFC import GFC import PrGrammar +import CMacros +import LookAbs +import Ident +import qualified Macros as M +import Linear import Operations +import Glue import Char import List (sortBy, intersperse) @@ -40,35 +46,33 @@ appMorphoOnly m s = (s, ms) where isKnownWord :: Morpho -> String -> Bool isKnownWord mo = not . null . snd . appMorphoOnly mo -mkMorpho :: CanonGrammar -> Morpho -mkMorpho gr = emptyMorpho ---- -{- ---- -mkMorpho gr = mkMorphoTree $ concat $ map mkOne $ allItems where +mkMorpho :: CanonGrammar -> Ident -> Morpho +---- mkMorpho gr = emptyMorpho ---- +mkMorpho gr a = mkMorphoTree $ concat $ map mkOne $ allItems where + mkOne (Left (fun,c)) = map (prOne fun c) $ allLins fun mkOne (Right (fun,_)) = map (prSyn fun) $ allSyns fun -- gather forms of lexical items - allLins fun = errVal [] $ do - ts <- allLinsOfFun gr fun + allLins fun@(m,f) = errVal [] $ do + ts <- allLinsOfFun gr (CIQ a f) ss <- mapM (mapPairsM (mapPairsM (return . wordsInTerm))) ts return [(p,s) | (p,fs) <- concat $ map snd $ concat ss, s <- fs] - prOne f c (ps,s) = (s, prt f +++ tagPrt c ++ concat (map tagPrt ps)) + prOne (_,f) c (ps,s) = (s, prt f +++ tagPrt c ++ concat (map prt_ ps)) -- gather syncategorematic words - allSyns fun = errVal [] $ do - tss <- allLinsOfFun gr fun + allSyns fun@(m,f) = errVal [] $ do + tss <- allLinsOfFun gr (CIQ a f) let ss = [s | ts <- tss, (_,fs) <- ts, (_,s) <- fs] return $ concat $ map wordsInTerm ss prSyn f s = (s, "+" ++ tagPrt f) -- all words, Left from lexical rules and Right syncategorematic - allItems = [lexRole t (f,c) | (f,c) <- allFuns, t <- lookType f] where - allFuns = allFunsWithValCat ab - lookType = errVal [] . liftM (:[]) . lookupFunType ab - lexRole t = case typeForm t of + allItems = [lexRole t (f,c) | (f,c,t) <- allFuns] where + allFuns = [(f,c,t) | (f,t) <- funRulesOf gr, Ok c <- [M.valCat t]] + lexRole t = case M.typeForm t of Ok ([],_,_) -> Left _ -> Right --} -- printing full-form lexicon and results @@ -82,8 +86,8 @@ prMorphoAnalysisShort :: (String,[String]) -> String prMorphoAnalysisShort (w,fs) = prBracket (w' ++ prTList "/" fs) where w' = if null fs then w +++ "*" else "" -tagPrt :: Print a => a -> String -tagPrt = ("+" ++) . prt --- could look up print name in grammar +tagPrt :: Print a => (a,a) -> String +tagPrt (m,c) = "+" ++ prt c --- module name -- print all words recognized @@ -99,6 +103,15 @@ prFullForm :: Morpho -> String prFullForm = unlines . map prOne . tree2list where prOne (s,ps) = s ++ " : " ++ unwords (intersperse "/" ps) +-- using Huet's unglueing method to find word boundaries +---- it would be much better to use a trie also for morphological analysis, +---- so this is for the sake of experiment +---- Moreover, we should specify the cases in which this happens - not all words + +decomposeWords :: Morpho -> String -> [String] +decomposeWords mo s = errVal (words s) $ + decomposeSimple (tcompileSimple (map fst $ tree2list mo)) s + -- auxiliaries mkMorphoTree :: (Ord a, Eq b) => [(a,b)] -> BinTree (a,[b]) diff --git a/src/Today.hs b/src/Today.hs index 4d0cffe81..719a3da7d 100644 --- a/src/Today.hs +++ b/src/Today.hs @@ -1 +1 @@ -module Today where today = "Fri Nov 7 16:15:47 CET 2003" +module Today where today = "Mon Nov 10 09:43:09 CET 2003"