mirror of
https://github.com/GrammaticalFramework/gf-core.git
synced 2026-04-11 05:49:31 -06:00
A Russian dictionary generated from a wordlist created by the FreeLing project. The accompanying converter can be used to convert other wordlists in EAGLES format to GF grammars.
239 lines
7.9 KiB
Haskell
239 lines
7.9 KiB
Haskell
-- Copyright (C) 2011 Nikita Frolov
|
|
|
|
-- An early version of the parser that requires somewhat more memory. Kept for
|
|
-- nostalgic reasons.
|
|
|
|
module EaglesParser where
|
|
|
|
import qualified Data.Text as T
|
|
import Data.List
|
|
import qualified Data.Map as M
|
|
|
|
type Forms = M.Map Tag T.Text
|
|
|
|
data Tag = A Case Number Gender Animacy Form Degree Extra Obscene
|
|
| Adv Degree Extra Obscene
|
|
| AdvPron Extra
|
|
| Ord Case Number Gender Animacy
|
|
| AdjPron Case Number Gender Animacy Extra
|
|
| Frag Extra
|
|
| Conj Extra
|
|
| Inter Extra Obscene
|
|
| Num Case Number Gender Animacy Extra
|
|
| Part Extra
|
|
| Prep Extra
|
|
| N Case Number Gender Animacy Name Extra Obscene
|
|
| Pron Case Number Gender Animacy Extra
|
|
| V Mood Number Gender Tense Person Aspect Voice Trans Extra Obscene
|
|
| P Case Number Gender Tense Form Aspect Voice Trans Extra Obscene
|
|
deriving (Show, Ord, Eq)
|
|
|
|
parseTag :: T.Text -> Tag
|
|
parseTag tag = case (T.unpack tag) of {
|
|
('A':c:n:g:a:f:cmp:e:o:[]) -> A (readCase c) (readNumber n)
|
|
(readGender g) (readAnimacy a)
|
|
(readForm f) (readDegree cmp)
|
|
(readExtra e) (readObscene o) ;
|
|
('D':cmp:e:o:[]) -> Adv (readDegree cmp)
|
|
(readExtra e) (readObscene o) ;
|
|
('P':e:[]) -> AdvPron (readExtra e) ;
|
|
('Y':c:n:g:a:[]) -> Ord (readCase c) (readNumber n)
|
|
(readGender g) (readAnimacy a) ;
|
|
('R':c:n:g:a:e:[]) -> AdjPron (readCase c) (readNumber n)
|
|
(readGender g) (readAnimacy a) (readExtra e) ;
|
|
('M':e:[]) -> Frag (readExtra e) ;
|
|
('C':e:[]) -> Conj (readExtra e) ;
|
|
('J':e:o:[]) -> Inter (readExtra e) (readObscene o) ;
|
|
('Z':c:n:g:a:e:[]) -> Num (readCase c) (readNumber n)
|
|
(readGender g) (readAnimacy a) (readExtra e) ;
|
|
('T':e:[]) -> Part (readExtra e) ;
|
|
('B':e:[]) -> Prep (readExtra e) ;
|
|
('N':_:c:n:g:a:name:e:o:_:[]) -> N (readCase c) (readNumber n)
|
|
(readGender g) (readAnimacy a)
|
|
(readName name)
|
|
(readExtra e) (readObscene o) ;
|
|
('N':_:c:n:g:a:name:e:o:[]) -> N (readCase c) (readNumber n)
|
|
(readGender g) (readAnimacy a)
|
|
(readName name)
|
|
(readExtra e) (readObscene o) ;
|
|
('E':c:n:g:a:e:[]) -> Pron (readCase c) (readNumber n)
|
|
(readGender g) (readAnimacy a) (readExtra e) ;
|
|
('V':m:n:g:t:'P':p:a:v:tr:e:o:[]) -> V (readMood m) (readNumber n)
|
|
(readGender g) (readTense t)
|
|
(readPerson p) (readAspect a)
|
|
(readVoice v) (readTrans tr)
|
|
(readExtra e) (readObscene o) ;
|
|
('V':m:n:g:t:'0':a:v:tr:e:o:[]) -> V (readMood m) (readNumber n)
|
|
(readGender g) (readTense t)
|
|
NP (readAspect a)
|
|
(readVoice v) (readTrans tr)
|
|
(readExtra e) (readObscene o) ;
|
|
('V':m:n:g:t:p:a:v:tr:e:o:[]) -> V (readMood m) (readNumber n)
|
|
(readGender g) (readTense t)
|
|
(readPerson p) (readAspect a)
|
|
(readVoice v) (readTrans tr)
|
|
(readExtra e) (readObscene o) ;
|
|
('Q':c:n:g:t:f:a:v:tr:e:o:[]) -> P (readCase c) (readNumber n)
|
|
(readGender g) (readTense t)
|
|
(readForm f) (readAspect a)
|
|
(readVoice v) (readTrans tr)
|
|
(readExtra e) (readObscene o) ;
|
|
_ -> error $ "Parse error: " ++ T.unpack tag }
|
|
|
|
data Case = Nom | Gen | Dat | Acc | Inst | Prepos | Partit | Loc | Voc | NC
|
|
deriving (Show, Ord, Eq)
|
|
|
|
readCase 'N' = Nom
|
|
readCase 'G' = Gen
|
|
readCase 'D' = Dat
|
|
readCase 'F' = Acc
|
|
readCase 'C' = Inst
|
|
readCase 'O' = Prepos
|
|
readCase 'P' = Partit
|
|
readCase 'L' = Loc
|
|
readCase 'V' = Voc
|
|
readCase '0' = NC
|
|
|
|
data Number = Sg | Pl | NN deriving (Show, Ord, Eq)
|
|
|
|
readNumber 'S' = Sg
|
|
readNumber 'P' = Pl
|
|
readNumber '0' = NN
|
|
|
|
data Gender = Masc | Fem | Neut | Common | NG deriving (Show, Ord, Eq)
|
|
|
|
readGender 'F' = Fem
|
|
readGender 'M' = Masc
|
|
readGender 'A' = Neut
|
|
readGender 'C' = Common
|
|
readGender '0' = NG
|
|
|
|
data Animacy = Animate | Inanimate | NA deriving (Show, Ord, Eq)
|
|
|
|
readAnimacy 'A' = Animate
|
|
readAnimacy 'I' = Inanimate
|
|
readAnimacy '0' = NA
|
|
|
|
data Form = Short | Full | NF deriving (Show, Ord, Eq)
|
|
|
|
readForm 'S' = Short
|
|
readForm 'F' = Full
|
|
readForm '0' = NF
|
|
|
|
data Degree = Pos | Comp | Super | ND deriving (Show, Ord, Eq)
|
|
|
|
readDegree 'E' = Super
|
|
readDegree 'C' = Comp
|
|
readDegree 'P' = Pos
|
|
readDegree '0' = ND
|
|
|
|
data Extra = Introductory | Difficult | Distorted | Predicative
|
|
| Colloquial | Rare | Abbreviation | Obsolete | NE deriving (Show, Ord, Eq)
|
|
|
|
readExtra 'P' = Introductory
|
|
readExtra 'D' = Difficult
|
|
readExtra 'V' = Distorted
|
|
readExtra 'R' = Predicative
|
|
readExtra 'I' = Colloquial
|
|
readExtra 'A' = Rare
|
|
readExtra 'B' = Abbreviation
|
|
readExtra 'E' = Obsolete
|
|
readExtra '0' = NE
|
|
|
|
data Obscene = Obscene | NO deriving (Show, Ord, Eq)
|
|
|
|
readObscene 'H' = Obscene
|
|
readObscene '0' = NO
|
|
|
|
data Name = Topo | Proper | Patro | Family | NNa deriving (Show, Ord, Eq)
|
|
|
|
readName 'G' = Topo
|
|
readName 'N' = Proper
|
|
readName 'S' = Patro
|
|
readName 'F' = Family
|
|
readName '0' = NNa
|
|
|
|
data Mood = Gerund | Inf | Ind | Imp | NM deriving (Show, Ord, Eq)
|
|
|
|
readMood 'G' = Gerund
|
|
readMood 'I' = Inf
|
|
readMood 'D' = Ind
|
|
readMood 'M' = Imp
|
|
readMood '0' = NM
|
|
|
|
data Tense = Pres | Fut | Past | NT deriving (Show, Ord, Eq)
|
|
|
|
readTense 'P' = Pres
|
|
readTense 'F' = Fut
|
|
readTense 'S' = Past
|
|
readTense '0' = NT
|
|
|
|
data Person = P1 | P2 | P3 | NP deriving (Show, Ord, Eq)
|
|
|
|
readPerson '1' = P1
|
|
readPerson '2' = P2
|
|
readPerson '3' = P3
|
|
|
|
data Aspect = Perf | Imperf | NAs deriving (Show, Ord, Eq)
|
|
|
|
readAspect 'F' = Perf
|
|
readAspect 'N' = Imperf
|
|
readAspect '0' = NAs
|
|
|
|
data Voice = Act | Pass | NV deriving (Show, Ord, Eq)
|
|
|
|
readVoice 'A' = Act
|
|
readVoice 'S' = Pass
|
|
readVoice '0' = NV
|
|
|
|
data Trans = Trans | Intrans | NTr deriving (Show, Ord, Eq)
|
|
|
|
readTrans 'M' = Trans
|
|
readTrans 'A' = Intrans
|
|
readTrans '0' = NTr
|
|
|
|
isOpenCat :: Tag -> Bool
|
|
isOpenCat (A _ _ _ _ _ _ _ _) = True
|
|
isOpenCat (N _ _ _ _ _ _ _) = True
|
|
isOpenCat (V _ _ _ _ _ _ _ _ _ _) = True
|
|
isOpenCat (Adv _ _ _) = True
|
|
isOpenCat _ = False
|
|
|
|
noun :: Forms -> (Case, Number) -> T.Text
|
|
noun forms (c, n) = findForm matchNoun forms
|
|
where matchNoun (N c' n' _ _ _ _ _) = c == c' && n == n'
|
|
matchNoun _ = False
|
|
|
|
adj :: Forms -> Degree -> T.Text
|
|
adj forms d = findForm matchAdj forms
|
|
where matchAdj (A _ _ _ _ _ d' _ _) = d == d
|
|
matchAdj _ = False
|
|
|
|
verbPres :: Forms -> (Number, Person) -> T.Text
|
|
verbPres forms (n, p) = findForm matchPres forms
|
|
where matchPres (V Ind n' _ Pres p' _ Act _ _ _) = n == n' && p == p'
|
|
matchPres _ = False
|
|
|
|
verbPast :: Forms -> (Number, Gender) -> T.Text
|
|
verbPast forms (n, g) = findForm matchPast forms
|
|
where matchPast (V Ind n' g' Past _ _ Act _ _ _) = n == n' && g == g'
|
|
matchPast _ = False
|
|
|
|
verbImp :: Forms -> T.Text
|
|
verbImp forms = findForm matchImp forms
|
|
where matchImp (V Imp _ _ _ _ _ _ _ _ _) = True
|
|
matchImp _ = False
|
|
|
|
verbInf :: Forms -> T.Text
|
|
verbInf forms = findForm matchInf forms
|
|
where matchInf (V Inf _ _ _ _ _ _ _ _ _) = True
|
|
matchInf _ = False
|
|
|
|
adv :: Forms -> T.Text
|
|
adv forms = findForm matchAdv forms
|
|
where matchAdv (Adv d _ _) = d == Pos
|
|
matchAdv _ = False
|
|
|
|
findForm match forms = case find match (M.keys forms) of
|
|
Just tag -> forms M.! tag
|
|
Nothing -> findForm (\ _ -> True) forms |