Files
gf-core/contrib/eaglesconv/EaglesParser.hs
Nick Frolov 2ff1d34c86 A Russian dictionary
A Russian dictionary generated from a wordlist created by the FreeLing
project. The accompanying converter can be used to convert other wordlists in
EAGLES format to GF grammars.
2011-12-31 02:36:24 +00:00

239 lines
7.9 KiB
Haskell

-- Copyright (C) 2011 Nikita Frolov
-- An early version of the parser that requires somewhat more memory. Kept for
-- nostalgic reasons.
module EaglesParser where
import qualified Data.Text as T
import Data.List
import qualified Data.Map as M
type Forms = M.Map Tag T.Text
data Tag = A Case Number Gender Animacy Form Degree Extra Obscene
| Adv Degree Extra Obscene
| AdvPron Extra
| Ord Case Number Gender Animacy
| AdjPron Case Number Gender Animacy Extra
| Frag Extra
| Conj Extra
| Inter Extra Obscene
| Num Case Number Gender Animacy Extra
| Part Extra
| Prep Extra
| N Case Number Gender Animacy Name Extra Obscene
| Pron Case Number Gender Animacy Extra
| V Mood Number Gender Tense Person Aspect Voice Trans Extra Obscene
| P Case Number Gender Tense Form Aspect Voice Trans Extra Obscene
deriving (Show, Ord, Eq)
parseTag :: T.Text -> Tag
parseTag tag = case (T.unpack tag) of {
('A':c:n:g:a:f:cmp:e:o:[]) -> A (readCase c) (readNumber n)
(readGender g) (readAnimacy a)
(readForm f) (readDegree cmp)
(readExtra e) (readObscene o) ;
('D':cmp:e:o:[]) -> Adv (readDegree cmp)
(readExtra e) (readObscene o) ;
('P':e:[]) -> AdvPron (readExtra e) ;
('Y':c:n:g:a:[]) -> Ord (readCase c) (readNumber n)
(readGender g) (readAnimacy a) ;
('R':c:n:g:a:e:[]) -> AdjPron (readCase c) (readNumber n)
(readGender g) (readAnimacy a) (readExtra e) ;
('M':e:[]) -> Frag (readExtra e) ;
('C':e:[]) -> Conj (readExtra e) ;
('J':e:o:[]) -> Inter (readExtra e) (readObscene o) ;
('Z':c:n:g:a:e:[]) -> Num (readCase c) (readNumber n)
(readGender g) (readAnimacy a) (readExtra e) ;
('T':e:[]) -> Part (readExtra e) ;
('B':e:[]) -> Prep (readExtra e) ;
('N':_:c:n:g:a:name:e:o:_:[]) -> N (readCase c) (readNumber n)
(readGender g) (readAnimacy a)
(readName name)
(readExtra e) (readObscene o) ;
('N':_:c:n:g:a:name:e:o:[]) -> N (readCase c) (readNumber n)
(readGender g) (readAnimacy a)
(readName name)
(readExtra e) (readObscene o) ;
('E':c:n:g:a:e:[]) -> Pron (readCase c) (readNumber n)
(readGender g) (readAnimacy a) (readExtra e) ;
('V':m:n:g:t:'P':p:a:v:tr:e:o:[]) -> V (readMood m) (readNumber n)
(readGender g) (readTense t)
(readPerson p) (readAspect a)
(readVoice v) (readTrans tr)
(readExtra e) (readObscene o) ;
('V':m:n:g:t:'0':a:v:tr:e:o:[]) -> V (readMood m) (readNumber n)
(readGender g) (readTense t)
NP (readAspect a)
(readVoice v) (readTrans tr)
(readExtra e) (readObscene o) ;
('V':m:n:g:t:p:a:v:tr:e:o:[]) -> V (readMood m) (readNumber n)
(readGender g) (readTense t)
(readPerson p) (readAspect a)
(readVoice v) (readTrans tr)
(readExtra e) (readObscene o) ;
('Q':c:n:g:t:f:a:v:tr:e:o:[]) -> P (readCase c) (readNumber n)
(readGender g) (readTense t)
(readForm f) (readAspect a)
(readVoice v) (readTrans tr)
(readExtra e) (readObscene o) ;
_ -> error $ "Parse error: " ++ T.unpack tag }
data Case = Nom | Gen | Dat | Acc | Inst | Prepos | Partit | Loc | Voc | NC
deriving (Show, Ord, Eq)
readCase 'N' = Nom
readCase 'G' = Gen
readCase 'D' = Dat
readCase 'F' = Acc
readCase 'C' = Inst
readCase 'O' = Prepos
readCase 'P' = Partit
readCase 'L' = Loc
readCase 'V' = Voc
readCase '0' = NC
data Number = Sg | Pl | NN deriving (Show, Ord, Eq)
readNumber 'S' = Sg
readNumber 'P' = Pl
readNumber '0' = NN
data Gender = Masc | Fem | Neut | Common | NG deriving (Show, Ord, Eq)
readGender 'F' = Fem
readGender 'M' = Masc
readGender 'A' = Neut
readGender 'C' = Common
readGender '0' = NG
data Animacy = Animate | Inanimate | NA deriving (Show, Ord, Eq)
readAnimacy 'A' = Animate
readAnimacy 'I' = Inanimate
readAnimacy '0' = NA
data Form = Short | Full | NF deriving (Show, Ord, Eq)
readForm 'S' = Short
readForm 'F' = Full
readForm '0' = NF
data Degree = Pos | Comp | Super | ND deriving (Show, Ord, Eq)
readDegree 'E' = Super
readDegree 'C' = Comp
readDegree 'P' = Pos
readDegree '0' = ND
data Extra = Introductory | Difficult | Distorted | Predicative
| Colloquial | Rare | Abbreviation | Obsolete | NE deriving (Show, Ord, Eq)
readExtra 'P' = Introductory
readExtra 'D' = Difficult
readExtra 'V' = Distorted
readExtra 'R' = Predicative
readExtra 'I' = Colloquial
readExtra 'A' = Rare
readExtra 'B' = Abbreviation
readExtra 'E' = Obsolete
readExtra '0' = NE
data Obscene = Obscene | NO deriving (Show, Ord, Eq)
readObscene 'H' = Obscene
readObscene '0' = NO
data Name = Topo | Proper | Patro | Family | NNa deriving (Show, Ord, Eq)
readName 'G' = Topo
readName 'N' = Proper
readName 'S' = Patro
readName 'F' = Family
readName '0' = NNa
data Mood = Gerund | Inf | Ind | Imp | NM deriving (Show, Ord, Eq)
readMood 'G' = Gerund
readMood 'I' = Inf
readMood 'D' = Ind
readMood 'M' = Imp
readMood '0' = NM
data Tense = Pres | Fut | Past | NT deriving (Show, Ord, Eq)
readTense 'P' = Pres
readTense 'F' = Fut
readTense 'S' = Past
readTense '0' = NT
data Person = P1 | P2 | P3 | NP deriving (Show, Ord, Eq)
readPerson '1' = P1
readPerson '2' = P2
readPerson '3' = P3
data Aspect = Perf | Imperf | NAs deriving (Show, Ord, Eq)
readAspect 'F' = Perf
readAspect 'N' = Imperf
readAspect '0' = NAs
data Voice = Act | Pass | NV deriving (Show, Ord, Eq)
readVoice 'A' = Act
readVoice 'S' = Pass
readVoice '0' = NV
data Trans = Trans | Intrans | NTr deriving (Show, Ord, Eq)
readTrans 'M' = Trans
readTrans 'A' = Intrans
readTrans '0' = NTr
isOpenCat :: Tag -> Bool
isOpenCat (A _ _ _ _ _ _ _ _) = True
isOpenCat (N _ _ _ _ _ _ _) = True
isOpenCat (V _ _ _ _ _ _ _ _ _ _) = True
isOpenCat (Adv _ _ _) = True
isOpenCat _ = False
noun :: Forms -> (Case, Number) -> T.Text
noun forms (c, n) = findForm matchNoun forms
where matchNoun (N c' n' _ _ _ _ _) = c == c' && n == n'
matchNoun _ = False
adj :: Forms -> Degree -> T.Text
adj forms d = findForm matchAdj forms
where matchAdj (A _ _ _ _ _ d' _ _) = d == d
matchAdj _ = False
verbPres :: Forms -> (Number, Person) -> T.Text
verbPres forms (n, p) = findForm matchPres forms
where matchPres (V Ind n' _ Pres p' _ Act _ _ _) = n == n' && p == p'
matchPres _ = False
verbPast :: Forms -> (Number, Gender) -> T.Text
verbPast forms (n, g) = findForm matchPast forms
where matchPast (V Ind n' g' Past _ _ Act _ _ _) = n == n' && g == g'
matchPast _ = False
verbImp :: Forms -> T.Text
verbImp forms = findForm matchImp forms
where matchImp (V Imp _ _ _ _ _ _ _ _ _) = True
matchImp _ = False
verbInf :: Forms -> T.Text
verbInf forms = findForm matchInf forms
where matchInf (V Inf _ _ _ _ _ _ _ _ _) = True
matchInf _ = False
adv :: Forms -> T.Text
adv forms = findForm matchAdv forms
where matchAdv (Adv d _ _) = d == Pos
matchAdv _ = False
findForm match forms = case find match (M.keys forms) of
Just tag -> forms M.! tag
Nothing -> findForm (\ _ -> True) forms