mirror of
https://github.com/GrammaticalFramework/gf-core.git
synced 2026-04-22 03:09:33 -06:00
The following are the outcomes:
- Predef.nonExist is fully supported by both the Haskell and the C runtimes
- Predef.BIND is now an internal compiler defined token. For now
it behaves just as usual for the Haskell runtime, i.e. it generates &+.
However, the special treatment will let us to handle it properly in
the C runtime.
- This required a major change in the PGF format since both
nonExist and BIND may appear inside 'pre' and this was not supported
before.
67 lines
2.4 KiB
Haskell
67 lines
2.4 KiB
Haskell
module PGF.Morphology(Lemma,Analysis,Morpho,
|
|
buildMorpho,isInMorpho,
|
|
lookupMorpho,fullFormLexicon,
|
|
morphoMissing,morphoKnown,morphoClassify,
|
|
missingWordMsg) where
|
|
|
|
import PGF.CId
|
|
import PGF.Data
|
|
|
|
import qualified Data.Map as Map
|
|
import qualified Data.Set as Set
|
|
import qualified Data.IntMap as IntMap
|
|
import Data.Array.IArray
|
|
import Data.List (intersperse)
|
|
import Data.Char (isDigit) ----
|
|
|
|
-- these 4 definitions depend on the datastructure used
|
|
|
|
type Lemma = CId
|
|
type Analysis = String
|
|
|
|
newtype Morpho = Morpho (Map.Map String [(Lemma,Analysis)])
|
|
|
|
buildMorpho :: PGF -> Language -> Morpho
|
|
buildMorpho pgf lang = Morpho $
|
|
case Map.lookup lang (concretes pgf) of
|
|
Just pinfo -> collectWords pinfo
|
|
Nothing -> Map.empty
|
|
|
|
collectWords pinfo = Map.fromListWith (++)
|
|
[(t, [(fun,lbls ! l)]) | (CncCat s e lbls) <- Map.elems (cnccats pinfo)
|
|
, fid <- [s..e]
|
|
, PApply funid _ <- maybe [] Set.toList (IntMap.lookup fid (productions pinfo))
|
|
, let CncFun fun lins = cncfuns pinfo ! funid
|
|
, (l,seqid) <- assocs lins
|
|
, sym <- elems (sequences pinfo ! seqid)
|
|
, t <- sym2tokns sym]
|
|
where
|
|
sym2tokns (SymKS t) = [t]
|
|
sym2tokns (SymKP ts alts) = concat (map sym2tokns ts ++ [sym2tokns sym | (syms,ps) <- alts, sym <- syms])
|
|
sym2tokns _ = []
|
|
|
|
lookupMorpho :: Morpho -> String -> [(Lemma,Analysis)]
|
|
lookupMorpho (Morpho mo) s = maybe [] id $ Map.lookup s mo
|
|
|
|
isInMorpho :: Morpho -> String -> Bool
|
|
isInMorpho (Morpho mo) s = maybe False (const True) $ Map.lookup s mo
|
|
|
|
fullFormLexicon :: Morpho -> [(String,[(Lemma,Analysis)])]
|
|
fullFormLexicon (Morpho mo) = Map.toList mo
|
|
|
|
morphoMissing :: Morpho -> [String] -> [String]
|
|
morphoMissing = morphoClassify False
|
|
|
|
morphoKnown :: Morpho -> [String] -> [String]
|
|
morphoKnown = morphoClassify True
|
|
|
|
morphoClassify :: Bool -> Morpho -> [String] -> [String]
|
|
morphoClassify k mo ws = [w | w <- ws, k /= null (lookupMorpho mo w), notLiteral w] where
|
|
notLiteral w = not (all isDigit w) ---- should be defined somewhere
|
|
|
|
missingWordMsg :: Morpho -> [String] -> String
|
|
missingWordMsg morpho ws = case morphoMissing morpho ws of
|
|
[] -> ", but all words are known"
|
|
ws -> "; unknown words: " ++ unwords ws
|
|
|