mirror of
https://github.com/GrammaticalFramework/gf-core.git
synced 2026-05-04 16:52:50 -06:00
A Russian dictionary
A Russian dictionary generated from a wordlist created by the FreeLing project. The accompanying converter can be used to convert other wordlists in EAGLES format to GF grammars.
This commit is contained in:
239
contrib/eaglesconv/EaglesParser.hs
Normal file
239
contrib/eaglesconv/EaglesParser.hs
Normal file
@@ -0,0 +1,239 @@
|
||||
-- Copyright (C) 2011 Nikita Frolov
|
||||
|
||||
-- An early version of the parser that requires somewhat more memory. Kept for
|
||||
-- nostalgic reasons.
|
||||
|
||||
module EaglesParser where
|
||||
|
||||
import qualified Data.Text as T
|
||||
import Data.List
|
||||
import qualified Data.Map as M
|
||||
|
||||
type Forms = M.Map Tag T.Text
|
||||
|
||||
data Tag = A Case Number Gender Animacy Form Degree Extra Obscene
|
||||
| Adv Degree Extra Obscene
|
||||
| AdvPron Extra
|
||||
| Ord Case Number Gender Animacy
|
||||
| AdjPron Case Number Gender Animacy Extra
|
||||
| Frag Extra
|
||||
| Conj Extra
|
||||
| Inter Extra Obscene
|
||||
| Num Case Number Gender Animacy Extra
|
||||
| Part Extra
|
||||
| Prep Extra
|
||||
| N Case Number Gender Animacy Name Extra Obscene
|
||||
| Pron Case Number Gender Animacy Extra
|
||||
| V Mood Number Gender Tense Person Aspect Voice Trans Extra Obscene
|
||||
| P Case Number Gender Tense Form Aspect Voice Trans Extra Obscene
|
||||
deriving (Show, Ord, Eq)
|
||||
|
||||
parseTag :: T.Text -> Tag
|
||||
parseTag tag = case (T.unpack tag) of {
|
||||
('A':c:n:g:a:f:cmp:e:o:[]) -> A (readCase c) (readNumber n)
|
||||
(readGender g) (readAnimacy a)
|
||||
(readForm f) (readDegree cmp)
|
||||
(readExtra e) (readObscene o) ;
|
||||
('D':cmp:e:o:[]) -> Adv (readDegree cmp)
|
||||
(readExtra e) (readObscene o) ;
|
||||
('P':e:[]) -> AdvPron (readExtra e) ;
|
||||
('Y':c:n:g:a:[]) -> Ord (readCase c) (readNumber n)
|
||||
(readGender g) (readAnimacy a) ;
|
||||
('R':c:n:g:a:e:[]) -> AdjPron (readCase c) (readNumber n)
|
||||
(readGender g) (readAnimacy a) (readExtra e) ;
|
||||
('M':e:[]) -> Frag (readExtra e) ;
|
||||
('C':e:[]) -> Conj (readExtra e) ;
|
||||
('J':e:o:[]) -> Inter (readExtra e) (readObscene o) ;
|
||||
('Z':c:n:g:a:e:[]) -> Num (readCase c) (readNumber n)
|
||||
(readGender g) (readAnimacy a) (readExtra e) ;
|
||||
('T':e:[]) -> Part (readExtra e) ;
|
||||
('B':e:[]) -> Prep (readExtra e) ;
|
||||
('N':_:c:n:g:a:name:e:o:_:[]) -> N (readCase c) (readNumber n)
|
||||
(readGender g) (readAnimacy a)
|
||||
(readName name)
|
||||
(readExtra e) (readObscene o) ;
|
||||
('N':_:c:n:g:a:name:e:o:[]) -> N (readCase c) (readNumber n)
|
||||
(readGender g) (readAnimacy a)
|
||||
(readName name)
|
||||
(readExtra e) (readObscene o) ;
|
||||
('E':c:n:g:a:e:[]) -> Pron (readCase c) (readNumber n)
|
||||
(readGender g) (readAnimacy a) (readExtra e) ;
|
||||
('V':m:n:g:t:'P':p:a:v:tr:e:o:[]) -> V (readMood m) (readNumber n)
|
||||
(readGender g) (readTense t)
|
||||
(readPerson p) (readAspect a)
|
||||
(readVoice v) (readTrans tr)
|
||||
(readExtra e) (readObscene o) ;
|
||||
('V':m:n:g:t:'0':a:v:tr:e:o:[]) -> V (readMood m) (readNumber n)
|
||||
(readGender g) (readTense t)
|
||||
NP (readAspect a)
|
||||
(readVoice v) (readTrans tr)
|
||||
(readExtra e) (readObscene o) ;
|
||||
('V':m:n:g:t:p:a:v:tr:e:o:[]) -> V (readMood m) (readNumber n)
|
||||
(readGender g) (readTense t)
|
||||
(readPerson p) (readAspect a)
|
||||
(readVoice v) (readTrans tr)
|
||||
(readExtra e) (readObscene o) ;
|
||||
('Q':c:n:g:t:f:a:v:tr:e:o:[]) -> P (readCase c) (readNumber n)
|
||||
(readGender g) (readTense t)
|
||||
(readForm f) (readAspect a)
|
||||
(readVoice v) (readTrans tr)
|
||||
(readExtra e) (readObscene o) ;
|
||||
_ -> error $ "Parse error: " ++ T.unpack tag }
|
||||
|
||||
data Case = Nom | Gen | Dat | Acc | Inst | Prepos | Partit | Loc | Voc | NC
|
||||
deriving (Show, Ord, Eq)
|
||||
|
||||
readCase 'N' = Nom
|
||||
readCase 'G' = Gen
|
||||
readCase 'D' = Dat
|
||||
readCase 'F' = Acc
|
||||
readCase 'C' = Inst
|
||||
readCase 'O' = Prepos
|
||||
readCase 'P' = Partit
|
||||
readCase 'L' = Loc
|
||||
readCase 'V' = Voc
|
||||
readCase '0' = NC
|
||||
|
||||
data Number = Sg | Pl | NN deriving (Show, Ord, Eq)
|
||||
|
||||
readNumber 'S' = Sg
|
||||
readNumber 'P' = Pl
|
||||
readNumber '0' = NN
|
||||
|
||||
data Gender = Masc | Fem | Neut | Common | NG deriving (Show, Ord, Eq)
|
||||
|
||||
readGender 'F' = Fem
|
||||
readGender 'M' = Masc
|
||||
readGender 'A' = Neut
|
||||
readGender 'C' = Common
|
||||
readGender '0' = NG
|
||||
|
||||
data Animacy = Animate | Inanimate | NA deriving (Show, Ord, Eq)
|
||||
|
||||
readAnimacy 'A' = Animate
|
||||
readAnimacy 'I' = Inanimate
|
||||
readAnimacy '0' = NA
|
||||
|
||||
data Form = Short | Full | NF deriving (Show, Ord, Eq)
|
||||
|
||||
readForm 'S' = Short
|
||||
readForm 'F' = Full
|
||||
readForm '0' = NF
|
||||
|
||||
data Degree = Pos | Comp | Super | ND deriving (Show, Ord, Eq)
|
||||
|
||||
readDegree 'E' = Super
|
||||
readDegree 'C' = Comp
|
||||
readDegree 'P' = Pos
|
||||
readDegree '0' = ND
|
||||
|
||||
data Extra = Introductory | Difficult | Distorted | Predicative
|
||||
| Colloquial | Rare | Abbreviation | Obsolete | NE deriving (Show, Ord, Eq)
|
||||
|
||||
readExtra 'P' = Introductory
|
||||
readExtra 'D' = Difficult
|
||||
readExtra 'V' = Distorted
|
||||
readExtra 'R' = Predicative
|
||||
readExtra 'I' = Colloquial
|
||||
readExtra 'A' = Rare
|
||||
readExtra 'B' = Abbreviation
|
||||
readExtra 'E' = Obsolete
|
||||
readExtra '0' = NE
|
||||
|
||||
data Obscene = Obscene | NO deriving (Show, Ord, Eq)
|
||||
|
||||
readObscene 'H' = Obscene
|
||||
readObscene '0' = NO
|
||||
|
||||
data Name = Topo | Proper | Patro | Family | NNa deriving (Show, Ord, Eq)
|
||||
|
||||
readName 'G' = Topo
|
||||
readName 'N' = Proper
|
||||
readName 'S' = Patro
|
||||
readName 'F' = Family
|
||||
readName '0' = NNa
|
||||
|
||||
data Mood = Gerund | Inf | Ind | Imp | NM deriving (Show, Ord, Eq)
|
||||
|
||||
readMood 'G' = Gerund
|
||||
readMood 'I' = Inf
|
||||
readMood 'D' = Ind
|
||||
readMood 'M' = Imp
|
||||
readMood '0' = NM
|
||||
|
||||
data Tense = Pres | Fut | Past | NT deriving (Show, Ord, Eq)
|
||||
|
||||
readTense 'P' = Pres
|
||||
readTense 'F' = Fut
|
||||
readTense 'S' = Past
|
||||
readTense '0' = NT
|
||||
|
||||
data Person = P1 | P2 | P3 | NP deriving (Show, Ord, Eq)
|
||||
|
||||
readPerson '1' = P1
|
||||
readPerson '2' = P2
|
||||
readPerson '3' = P3
|
||||
|
||||
data Aspect = Perf | Imperf | NAs deriving (Show, Ord, Eq)
|
||||
|
||||
readAspect 'F' = Perf
|
||||
readAspect 'N' = Imperf
|
||||
readAspect '0' = NAs
|
||||
|
||||
data Voice = Act | Pass | NV deriving (Show, Ord, Eq)
|
||||
|
||||
readVoice 'A' = Act
|
||||
readVoice 'S' = Pass
|
||||
readVoice '0' = NV
|
||||
|
||||
data Trans = Trans | Intrans | NTr deriving (Show, Ord, Eq)
|
||||
|
||||
readTrans 'M' = Trans
|
||||
readTrans 'A' = Intrans
|
||||
readTrans '0' = NTr
|
||||
|
||||
isOpenCat :: Tag -> Bool
|
||||
isOpenCat (A _ _ _ _ _ _ _ _) = True
|
||||
isOpenCat (N _ _ _ _ _ _ _) = True
|
||||
isOpenCat (V _ _ _ _ _ _ _ _ _ _) = True
|
||||
isOpenCat (Adv _ _ _) = True
|
||||
isOpenCat _ = False
|
||||
|
||||
noun :: Forms -> (Case, Number) -> T.Text
|
||||
noun forms (c, n) = findForm matchNoun forms
|
||||
where matchNoun (N c' n' _ _ _ _ _) = c == c' && n == n'
|
||||
matchNoun _ = False
|
||||
|
||||
adj :: Forms -> Degree -> T.Text
|
||||
adj forms d = findForm matchAdj forms
|
||||
where matchAdj (A _ _ _ _ _ d' _ _) = d == d
|
||||
matchAdj _ = False
|
||||
|
||||
verbPres :: Forms -> (Number, Person) -> T.Text
|
||||
verbPres forms (n, p) = findForm matchPres forms
|
||||
where matchPres (V Ind n' _ Pres p' _ Act _ _ _) = n == n' && p == p'
|
||||
matchPres _ = False
|
||||
|
||||
verbPast :: Forms -> (Number, Gender) -> T.Text
|
||||
verbPast forms (n, g) = findForm matchPast forms
|
||||
where matchPast (V Ind n' g' Past _ _ Act _ _ _) = n == n' && g == g'
|
||||
matchPast _ = False
|
||||
|
||||
verbImp :: Forms -> T.Text
|
||||
verbImp forms = findForm matchImp forms
|
||||
where matchImp (V Imp _ _ _ _ _ _ _ _ _) = True
|
||||
matchImp _ = False
|
||||
|
||||
verbInf :: Forms -> T.Text
|
||||
verbInf forms = findForm matchInf forms
|
||||
where matchInf (V Inf _ _ _ _ _ _ _ _ _) = True
|
||||
matchInf _ = False
|
||||
|
||||
adv :: Forms -> T.Text
|
||||
adv forms = findForm matchAdv forms
|
||||
where matchAdv (Adv d _ _) = d == Pos
|
||||
matchAdv _ = False
|
||||
|
||||
findForm match forms = case find match (M.keys forms) of
|
||||
Just tag -> forms M.! tag
|
||||
Nothing -> findForm (\ _ -> True) forms
|
||||
Reference in New Issue
Block a user