Files
gf-core/src/runtime/haskell/PGF/Morphology.hs
kr.angelov 426bc49a52 a major refactoring in the C and the Haskell runtimes. Note incompatible change in the PGF format!!!
The following are the outcomes:

   - Predef.nonExist is fully supported by both the Haskell and the C runtimes

   - Predef.BIND is now an internal compiler defined token. For now
     it behaves just as usual for the Haskell runtime, i.e. it generates &+.
     However, the special treatment will let us to handle it properly in 
     the C runtime.

   - This required a major change in the PGF format since both 
     nonExist and BIND may appear inside 'pre' and this was not supported
     before.
2013-09-27 15:09:48 +00:00

67 lines
2.4 KiB
Haskell

module PGF.Morphology(Lemma,Analysis,Morpho,
buildMorpho,isInMorpho,
lookupMorpho,fullFormLexicon,
morphoMissing,morphoKnown,morphoClassify,
missingWordMsg) where
import PGF.CId
import PGF.Data
import qualified Data.Map as Map
import qualified Data.Set as Set
import qualified Data.IntMap as IntMap
import Data.Array.IArray
import Data.List (intersperse)
import Data.Char (isDigit) ----
-- these 4 definitions depend on the datastructure used
type Lemma = CId
type Analysis = String
newtype Morpho = Morpho (Map.Map String [(Lemma,Analysis)])
buildMorpho :: PGF -> Language -> Morpho
buildMorpho pgf lang = Morpho $
case Map.lookup lang (concretes pgf) of
Just pinfo -> collectWords pinfo
Nothing -> Map.empty
collectWords pinfo = Map.fromListWith (++)
[(t, [(fun,lbls ! l)]) | (CncCat s e lbls) <- Map.elems (cnccats pinfo)
, fid <- [s..e]
, PApply funid _ <- maybe [] Set.toList (IntMap.lookup fid (productions pinfo))
, let CncFun fun lins = cncfuns pinfo ! funid
, (l,seqid) <- assocs lins
, sym <- elems (sequences pinfo ! seqid)
, t <- sym2tokns sym]
where
sym2tokns (SymKS t) = [t]
sym2tokns (SymKP ts alts) = concat (map sym2tokns ts ++ [sym2tokns sym | (syms,ps) <- alts, sym <- syms])
sym2tokns _ = []
lookupMorpho :: Morpho -> String -> [(Lemma,Analysis)]
lookupMorpho (Morpho mo) s = maybe [] id $ Map.lookup s mo
isInMorpho :: Morpho -> String -> Bool
isInMorpho (Morpho mo) s = maybe False (const True) $ Map.lookup s mo
fullFormLexicon :: Morpho -> [(String,[(Lemma,Analysis)])]
fullFormLexicon (Morpho mo) = Map.toList mo
morphoMissing :: Morpho -> [String] -> [String]
morphoMissing = morphoClassify False
morphoKnown :: Morpho -> [String] -> [String]
morphoKnown = morphoClassify True
morphoClassify :: Bool -> Morpho -> [String] -> [String]
morphoClassify k mo ws = [w | w <- ws, k /= null (lookupMorpho mo w), notLiteral w] where
notLiteral w = not (all isDigit w) ---- should be defined somewhere
missingWordMsg :: Morpho -> [String] -> String
missingWordMsg morpho ws = case morphoMissing morpho ws of
[] -> ", but all words are known"
ws -> "; unknown words: " ++ unwords ws