mirror of
https://github.com/GrammaticalFramework/gf-core.git
synced 2026-04-23 19:42:50 -06:00
remove the obsolete parse grammar
This commit is contained in:
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
@@ -1,6 +0,0 @@
|
|||||||
--# -path=.:oald:alltenses
|
|
||||||
|
|
||||||
concrete English of EnglishAbs =
|
|
||||||
ParseEng,
|
|
||||||
OaldStructuralEng - [above_Prep, after_Prep,and_Conj,before_Prep,behind_Prep,between_Prep,during_Prep,except_Prep,for_Prep,from_Prep,in_Prep,on_Prep,or_Conj,through_Prep,to_Prep,under_Prep, with_Prep,without_Prep] ;
|
|
||||||
-- OaldEng - [everywhere_Adv,have_V2,here_Adv,quite_Adv,somewhere_Adv,there_Adv] ;
|
|
||||||
@@ -1,5 +0,0 @@
|
|||||||
abstract EnglishAbs =
|
|
||||||
ParseEngAbs,
|
|
||||||
OaldStructural - [above_Prep, after_Prep,and_Conj,before_Prep,behind_Prep,between_Prep,during_Prep,except_Prep,for_Prep,from_Prep,in_Prep,on_Prep,or_Conj,through_Prep,to_Prep,under_Prep, with_Prep,without_Prep] ;
|
|
||||||
-- Oald - [everywhere_Adv,have_V2,here_Adv,quite_Adv,somewhere_Adv,there_Adv] ;
|
|
||||||
|
|
||||||
@@ -1,15 +0,0 @@
|
|||||||
abstract Parse =
|
|
||||||
Noun,
|
|
||||||
Verb,
|
|
||||||
Adjective,
|
|
||||||
Adverb,
|
|
||||||
Numeral,
|
|
||||||
Sentence,
|
|
||||||
Question,
|
|
||||||
Relative,
|
|
||||||
Conjunction,
|
|
||||||
Phrase,
|
|
||||||
Text,
|
|
||||||
Structural,
|
|
||||||
Idiom,
|
|
||||||
Tense ;
|
|
||||||
@@ -1,167 +0,0 @@
|
|||||||
--# -path=.:oald:alltenses
|
|
||||||
|
|
||||||
concrete ParseEng of ParseEngAbs =
|
|
||||||
NounEng,
|
|
||||||
VerbEng - [ComplVS],
|
|
||||||
AdjectiveEng,
|
|
||||||
AdverbEng,
|
|
||||||
NumeralEng,
|
|
||||||
SentenceEng, --- - [UseCl, UseQCl, UseRCl],
|
|
||||||
QuestionEng,
|
|
||||||
RelativeEng - [IdRP, RelSlash],
|
|
||||||
ConjunctionEng,
|
|
||||||
PhraseEng, --- - [UttImpSg, UttImpPl],
|
|
||||||
TextX - [Pol,PNeg,PPos],
|
|
||||||
TenseX - [Pol,PNeg,PPos],
|
|
||||||
GrammarEng [Pol,PNeg,PPos],
|
|
||||||
StructuralEng - [above_Prep, everywhere_Adv, everybody_NP, every_Det, only_Predet, somebody_NP],
|
|
||||||
IdiomEng,
|
|
||||||
|
|
||||||
ExtraEng - [
|
|
||||||
UncNegCl, UncNegQCl, UncNegRCl, UncNegImpSg, UncNegImpPl,
|
|
||||||
StrandRelSlash,
|
|
||||||
that_RP
|
|
||||||
],
|
|
||||||
|
|
||||||
LexiconEng [N3, distance_N3,
|
|
||||||
A2, married_A2,
|
|
||||||
VQ, wonder_VQ,
|
|
||||||
V2A, paint_V2A,
|
|
||||||
V2Q, ask_V2Q,
|
|
||||||
V2V, beg_V2V,
|
|
||||||
V2S, answer_V2S,
|
|
||||||
VA, become_VA],
|
|
||||||
OaldEng - [everywhere_Adv, here_Adv, quite_Adv, somewhere_Adv, there_Adv,have_V2]
|
|
||||||
|
|
||||||
** open ParadigmsEng, ResEng, MorphoEng, NounEng, ParamX, Prelude in {
|
|
||||||
|
|
||||||
flags startcat = Phr ; unlexer = text ; lexer = text ;
|
|
||||||
|
|
||||||
--
|
|
||||||
-- * Overridden things from the common API
|
|
||||||
--
|
|
||||||
|
|
||||||
-- Allow both "hope that he runs" and "hope he runs".
|
|
||||||
lin ComplVS v s = variants { VerbEng.ComplVS v s; ComplBareVS v s } ;
|
|
||||||
|
|
||||||
{-
|
|
||||||
--- this can now be done by just using ExtraEng.UncNeg : Pol
|
|
||||||
|
|
||||||
-- Allow both contracted and uncontracted negated clauses.
|
|
||||||
lin UseCl t p cl =
|
|
||||||
case p.p of {
|
|
||||||
Pos => SentenceEng.UseCl t p cl;
|
|
||||||
Neg => variants { SentenceEng.UseCl t p cl; UncNegCl t p cl }
|
|
||||||
} ;
|
|
||||||
|
|
||||||
lin UseQCl t p cl =
|
|
||||||
case p.p of {
|
|
||||||
Pos => SentenceEng.UseQCl t p cl;
|
|
||||||
Neg => variants { SentenceEng.UseQCl t p cl; UncNegQCl t p cl }
|
|
||||||
} ;
|
|
||||||
|
|
||||||
lin UseRCl t p cl =
|
|
||||||
case p.p of {
|
|
||||||
Pos => SentenceEng.UseRCl t p cl;
|
|
||||||
Neg => variants { SentenceEng.UseRCl t p cl; UncNegRCl t p cl }
|
|
||||||
} ;
|
|
||||||
|
|
||||||
lin UttImpSg p i =
|
|
||||||
case p.p of {
|
|
||||||
CPos => PhraseEng.UttImpSg p i;
|
|
||||||
CNeg _ => variants { PhraseEng.UttImpSg p i ; UncNegImpSg p i }
|
|
||||||
} ;
|
|
||||||
|
|
||||||
lin UttImpPl p i =
|
|
||||||
case p.p of {
|
|
||||||
CPos => PhraseEng.UttImpPl p i;
|
|
||||||
CNeg _ => variants { PhraseEng.UttImpPl p i ; UncNegImpPl p i }
|
|
||||||
} ;
|
|
||||||
-}
|
|
||||||
|
|
||||||
-- Two different forms of relative clauses:
|
|
||||||
-- Pied piping: "at which we are looking".
|
|
||||||
-- Stranding: "that he looks at"
|
|
||||||
-- EmptyRelSlash is not used here, since it would give
|
|
||||||
-- a meta-variable for the RP.
|
|
||||||
|
|
||||||
lin RelSlash rp slash = variants { RelativeEng.RelSlash rp slash; StrandRelSlash rp slash } ;
|
|
||||||
|
|
||||||
|
|
||||||
-- Allow both "who"/"which" and "that"
|
|
||||||
lin IdRP =
|
|
||||||
{ s = table {
|
|
||||||
RC _ (NCase Gen) => "whose" ;
|
|
||||||
RC Neutr _ => variants { "which"; "that"; {- for dictionary entries with the wrong gender -} "who" } ;
|
|
||||||
RC _ NPAcc => variants { "whom"; "that"; {- incorrect but common -} "who" } ;
|
|
||||||
RC _ (NCase Nom) => variants { "who" ; "that" } ;
|
|
||||||
RPrep _ => variants { "which"; "whom"; {- incorrect but common -} "who" }
|
|
||||||
} ;
|
|
||||||
a = RNoAg
|
|
||||||
} ;
|
|
||||||
|
|
||||||
lin everybody_NP = variants { regNP "everybody" singular; regNP "everyone" singular } ;
|
|
||||||
lin somebody_NP = variants { regNP "somebody" singular; regNP "someone" singular } ;
|
|
||||||
|
|
||||||
lin every_Det = variants { mkDeterminer singular "every"; mkDeterminer singular "each" };
|
|
||||||
|
|
||||||
lin only_Predet = variants { ss "only"; ss "just" };
|
|
||||||
|
|
||||||
|
|
||||||
--
|
|
||||||
-- English-specific additions
|
|
||||||
--
|
|
||||||
|
|
||||||
-- Syntactic additions
|
|
||||||
|
|
||||||
lin
|
|
||||||
VerbCN v cn = {s = \\n,c => v.s ! VPresPart ++ cn.s ! n ! c; g = cn.g};
|
|
||||||
|
|
||||||
NumOfNP num np = {
|
|
||||||
s = \\c => num.s ! Nom ++ "of" ++ np.s ! c ;
|
|
||||||
a = agrP3 num.n
|
|
||||||
} ;
|
|
||||||
|
|
||||||
CAdvNP ad cn np = {
|
|
||||||
s = \\c => ad.s ++ cn.s ! Sg ! npcase2case c ++ ad.p ++ np.s ! npNom ;
|
|
||||||
a = agrP3 Sg
|
|
||||||
} ;
|
|
||||||
|
|
||||||
CAdvSSlash ad cn slash = {
|
|
||||||
s = \\c => ad.s ++ cn.s ! Sg ! npcase2case c ++ ad.p ++ slash.s ++ slash.c2;
|
|
||||||
a = agrP3 Sg
|
|
||||||
} ;
|
|
||||||
|
|
||||||
-- CompCN cn = { s = \\a => let n = (fromAgr a).n
|
|
||||||
-- in IndefArt.s ! False ! n ++ cn.s ! n ! Acc} ;
|
|
||||||
|
|
||||||
-- Lexical additions
|
|
||||||
|
|
||||||
lin
|
|
||||||
a8few_Det = mkDeterminer plural ["a few"];
|
|
||||||
another_Predet = ss "another" ;
|
|
||||||
any_Predet = ss "any" ;
|
|
||||||
anybody_NP = variants { regNP "anybody" singular; regNP "anyone" singular };
|
|
||||||
anything_NP = regNP "anything" singular;
|
|
||||||
both_Det = mkDeterminer plural "both";
|
|
||||||
either_Det = mkDeterminer singular "either" ;
|
|
||||||
exactly_AdN = ss "exactly" ;
|
|
||||||
most_Det = mkDeterminer plural "most";
|
|
||||||
neither_Det = mkDeterminer singular "neither" ;
|
|
||||||
only_AdV = mkAdV "only" ;
|
|
||||||
should_VV = {
|
|
||||||
s = table {
|
|
||||||
VVF VInf => ["ought to"] ;
|
|
||||||
VVF VPres => "should" ;
|
|
||||||
VVF VPPart => ["ought to"] ;
|
|
||||||
VVF VPresPart => variants {} ; -- FIXME: "shoulding" ?
|
|
||||||
VVF VPast => ["should have"] ;
|
|
||||||
VVPastNeg => ["shouldn't have"] ;
|
|
||||||
VVPresNeg => "shouldn't"
|
|
||||||
} ;
|
|
||||||
typ = VVAux
|
|
||||||
} ;
|
|
||||||
several_Det = mkDeterminer plural "several" ;
|
|
||||||
|
|
||||||
|
|
||||||
} ;
|
|
||||||
@@ -1,48 +0,0 @@
|
|||||||
abstract ParseEngAbs =
|
|
||||||
Parse - [above_Prep],
|
|
||||||
ExtraEngAbs - [
|
|
||||||
UncNegCl, UncNegQCl, UncNegRCl, UncNegImpSg, UncNegImpPl,
|
|
||||||
StrandRelSlash,
|
|
||||||
that_RP
|
|
||||||
],
|
|
||||||
|
|
||||||
Lexicon [N3, distance_N3,
|
|
||||||
A2, married_A2,
|
|
||||||
VQ, wonder_VQ,
|
|
||||||
V2A, paint_V2A,
|
|
||||||
V2Q, ask_V2Q,
|
|
||||||
V2V, beg_V2V,
|
|
||||||
V2S, answer_V2S,
|
|
||||||
VA, become_VA],
|
|
||||||
Oald - [everywhere_Adv, here_Adv, quite_Adv, somewhere_Adv, there_Adv,have_V2]
|
|
||||||
|
|
||||||
** {
|
|
||||||
|
|
||||||
-- Syntactic additions
|
|
||||||
|
|
||||||
fun VerbCN : V -> CN -> CN ; -- running man
|
|
||||||
|
|
||||||
fun NumOfNP : Num -> NP -> NP ; -- ten of the dogs
|
|
||||||
|
|
||||||
fun CAdvNP : CAdv -> CN -> NP -> NP ; -- more wine than the professor
|
|
||||||
fun CAdvSSlash : CAdv -> CN -> SSlash -> NP ; -- more wine than the professor drank
|
|
||||||
|
|
||||||
--fun CompCN : CN -> Comp ; -- "(every man is) a dog", "(all men are) dogs"
|
|
||||||
|
|
||||||
-- Lexical additions
|
|
||||||
|
|
||||||
fun a8few_Det : Det ;
|
|
||||||
fun another_Predet : Predet ;
|
|
||||||
fun any_Predet : Predet ;
|
|
||||||
fun anybody_NP : NP ;
|
|
||||||
fun anything_NP : NP ;
|
|
||||||
fun both_Det : Det ;
|
|
||||||
fun either_Det : Det ;
|
|
||||||
fun exactly_AdN : AdN ;
|
|
||||||
fun most_Det : Det ;
|
|
||||||
fun neither_Det : Det ;
|
|
||||||
fun only_AdV : AdV ;
|
|
||||||
fun should_VV : VV ;
|
|
||||||
fun several_Det : Det ;
|
|
||||||
|
|
||||||
}
|
|
||||||
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
@@ -1,184 +0,0 @@
|
|||||||
-- English lexicon for GF, produced from:
|
|
||||||
-- Oxford advanced learner's dictionary of current English:
|
|
||||||
-- expanded 'computer usable' version compiled by Roger Mitton
|
|
||||||
-- The computer usable version is transcribed from:
|
|
||||||
-- Oxford advanced learner's dictionary of current English
|
|
||||||
-- A.S. Hornby ; with the assistance of A.P. Cowie [and] J. Windsor Lewis.
|
|
||||||
-- 3rd. ed., London : Oxford University Press, 1974.
|
|
||||||
-- Distributed as 'dict0710' by:
|
|
||||||
-- Oxford Text Archive
|
|
||||||
-- Oxford University Computing Services
|
|
||||||
-- 13 Banbury Road
|
|
||||||
-- Oxford
|
|
||||||
-- OX2 6NN
|
|
||||||
-- Under these conditions:
|
|
||||||
-- Freely available for non-commercial use provided that this header is
|
|
||||||
-- included in its entirety with any copy distributed.
|
|
||||||
--
|
|
||||||
-- GF version generated by asc2gf, Bjorn Bringert Nov 2008
|
|
||||||
-- based on asc2lex, Matthew Purver Nov 2001
|
|
||||||
-- http://www.stanford.edu/~mpurver/software.html
|
|
||||||
|
|
||||||
abstract OaldStructural = Cat ** {
|
|
||||||
fun abaft_Prep : Prep;
|
|
||||||
fun aboard_Prep : Prep;
|
|
||||||
fun about_Prep : Prep;
|
|
||||||
fun above_Prep : Prep;
|
|
||||||
fun according_as_Conj : Conj;
|
|
||||||
fun according_to_Prep : Prep;
|
|
||||||
fun across_Prep : Prep;
|
|
||||||
fun afore_Prep : Prep;
|
|
||||||
fun after_Conj : Conj;
|
|
||||||
fun after_Prep : Prep;
|
|
||||||
fun against_Prep : Prep;
|
|
||||||
fun agin_Prep : Prep;
|
|
||||||
fun albeit_Conj : Conj;
|
|
||||||
fun along_Prep : Prep;
|
|
||||||
fun alongside_Prep : Prep;
|
|
||||||
fun although_Conj : Conj;
|
|
||||||
fun amid_Prep : Prep;
|
|
||||||
fun amidst_Prep : Prep;
|
|
||||||
fun among_Prep : Prep;
|
|
||||||
fun amongst_Prep : Prep;
|
|
||||||
fun an_Conj : Conj;
|
|
||||||
fun and_Conj : Conj;
|
|
||||||
fun anent_Prep : Prep;
|
|
||||||
fun around_Prep : Prep;
|
|
||||||
fun as_Conj : Conj;
|
|
||||||
fun aslant_Prep : Prep;
|
|
||||||
fun astride_Prep : Prep;
|
|
||||||
fun at_Prep : Prep;
|
|
||||||
fun athwart_Prep : Prep;
|
|
||||||
fun bar_Prep : Prep;
|
|
||||||
fun barring_Prep : Prep;
|
|
||||||
fun because_Conj : Conj;
|
|
||||||
fun before_Conj : Conj;
|
|
||||||
fun before_Prep : Prep;
|
|
||||||
fun behind_Prep : Prep;
|
|
||||||
fun below_Prep : Prep;
|
|
||||||
fun beneath_Prep : Prep;
|
|
||||||
fun beside_Prep : Prep;
|
|
||||||
fun besides_Prep : Prep;
|
|
||||||
fun between_Prep : Prep;
|
|
||||||
fun betwixt_Prep : Prep;
|
|
||||||
fun beyond_Prep : Prep;
|
|
||||||
fun but_Conj : Conj;
|
|
||||||
fun but_Prep : Prep;
|
|
||||||
fun by_Prep : Prep;
|
|
||||||
fun circa_Prep : Prep;
|
|
||||||
fun concerning_Prep : Prep;
|
|
||||||
fun considering_Prep : Prep;
|
|
||||||
fun cos_Conj : Conj;
|
|
||||||
fun despite_Prep : Prep;
|
|
||||||
fun directly_Conj : Conj;
|
|
||||||
fun down_Prep : Prep;
|
|
||||||
fun during_Prep : Prep;
|
|
||||||
fun either_Conj : Conj;
|
|
||||||
fun ere_Prep : Prep;
|
|
||||||
fun except_Conj : Conj;
|
|
||||||
fun except_Prep : Prep;
|
|
||||||
fun excepting_Prep : Prep;
|
|
||||||
fun failing_Prep : Prep;
|
|
||||||
fun for_Conj : Conj;
|
|
||||||
fun for_Prep : Prep;
|
|
||||||
fun forasmuch_as_Conj : Conj;
|
|
||||||
fun from_Prep : Prep;
|
|
||||||
fun howbeit_Conj : Conj;
|
|
||||||
fun if_Conj : Conj;
|
|
||||||
fun immediately_Conj : Conj;
|
|
||||||
fun in_Prep : Prep;
|
|
||||||
fun inside_Prep : Prep;
|
|
||||||
fun instantly_Conj : Conj;
|
|
||||||
fun into_Prep : Prep;
|
|
||||||
fun less_Prep : Prep;
|
|
||||||
fun lest_Conj : Conj;
|
|
||||||
fun like_Conj : Conj;
|
|
||||||
fun like_Prep : Prep;
|
|
||||||
fun likewise_Conj : Conj;
|
|
||||||
fun mid_Prep : Prep;
|
|
||||||
fun midst_Prep : Prep;
|
|
||||||
fun minus_Prep : Prep;
|
|
||||||
fun near_Prep : Prep;
|
|
||||||
fun neath_Prep : Prep;
|
|
||||||
fun neither_Conj : Conj;
|
|
||||||
fun nevertheless_Conj : Conj;
|
|
||||||
fun next_Prep : Prep;
|
|
||||||
fun nigh_Prep : Prep;
|
|
||||||
fun nigher_Prep : Prep;
|
|
||||||
fun nighest_Prep : Prep;
|
|
||||||
fun nisi_Conj : Conj;
|
|
||||||
fun nor_Conj : Conj;
|
|
||||||
fun notwithstanding_Conj : Conj;
|
|
||||||
fun notwithstanding_Prep : Prep;
|
|
||||||
fun now_Conj : Conj;
|
|
||||||
fun o'er_Prep : Prep;
|
|
||||||
fun of_Prep : Prep;
|
|
||||||
fun off_Prep : Prep;
|
|
||||||
fun on_Prep : Prep;
|
|
||||||
fun on_to_Prep : Prep;
|
|
||||||
fun only_Conj : Conj;
|
|
||||||
fun onto_Prep : Prep;
|
|
||||||
fun or_Conj : Conj;
|
|
||||||
fun otherwise_Conj : Conj;
|
|
||||||
fun outside_Prep : Prep;
|
|
||||||
fun over_Prep : Prep;
|
|
||||||
fun past_Prep : Prep;
|
|
||||||
fun pending_Prep : Prep;
|
|
||||||
fun per_Prep : Prep;
|
|
||||||
fun plus_Prep : Prep;
|
|
||||||
fun provided_Conj : Conj;
|
|
||||||
fun providing_Conj : Conj;
|
|
||||||
fun qua_Conj : Conj;
|
|
||||||
fun qua_Prep : Prep;
|
|
||||||
fun re_Prep : Prep;
|
|
||||||
fun respecting_Prep : Prep;
|
|
||||||
fun round_Prep : Prep;
|
|
||||||
fun sans_Prep : Prep;
|
|
||||||
fun save_Prep : Prep;
|
|
||||||
fun saving_Prep : Prep;
|
|
||||||
fun since_Conj : Conj;
|
|
||||||
fun since_Prep : Prep;
|
|
||||||
fun so_Conj : Conj;
|
|
||||||
fun supposing_Conj : Conj;
|
|
||||||
fun than_Conj : Conj;
|
|
||||||
fun that_Conj : Conj;
|
|
||||||
fun tho'_Conj : Conj;
|
|
||||||
fun though_Conj : Conj;
|
|
||||||
fun thro'_Prep : Prep;
|
|
||||||
fun through_Prep : Prep;
|
|
||||||
fun throughout_Prep : Prep;
|
|
||||||
fun thru_Prep : Prep;
|
|
||||||
fun till_Conj : Conj;
|
|
||||||
fun till_Prep : Prep;
|
|
||||||
fun to_Prep : Prep;
|
|
||||||
fun touching_Prep : Prep;
|
|
||||||
fun toward_Prep : Prep;
|
|
||||||
fun towards_Prep : Prep;
|
|
||||||
fun tween_Prep : Prep;
|
|
||||||
fun twixt_Prep : Prep;
|
|
||||||
fun under_Prep : Prep;
|
|
||||||
fun underneath_Prep : Prep;
|
|
||||||
fun unless_Conj : Conj;
|
|
||||||
fun unlike_Prep : Prep;
|
|
||||||
fun until_Conj : Conj;
|
|
||||||
fun until_Prep : Prep;
|
|
||||||
fun unto_Prep : Prep;
|
|
||||||
fun up_Prep : Prep;
|
|
||||||
fun upon_Prep : Prep;
|
|
||||||
fun versus_Prep : Prep;
|
|
||||||
fun via_Prep : Prep;
|
|
||||||
fun vice_Prep : Prep;
|
|
||||||
fun vis_à_vis_Prep : Prep;
|
|
||||||
fun wanting_Prep : Prep;
|
|
||||||
fun when_Conj : Conj;
|
|
||||||
fun whencesoever_Conj : Conj;
|
|
||||||
fun whenever_Conj : Conj;
|
|
||||||
fun whereas_Conj : Conj;
|
|
||||||
fun whether_Conj : Conj;
|
|
||||||
fun while_Conj : Conj;
|
|
||||||
fun whilst_Conj : Conj;
|
|
||||||
fun with_Prep : Prep;
|
|
||||||
fun within_Prep : Prep;
|
|
||||||
fun without_Prep : Prep;
|
|
||||||
fun yet_Conj : Conj;
|
|
||||||
}
|
|
||||||
@@ -1,185 +0,0 @@
|
|||||||
-- English lexicon for GF, produced from:
|
|
||||||
-- Oxford advanced learner's dictionary of current English:
|
|
||||||
-- expanded 'computer usable' version compiled by Roger Mitton
|
|
||||||
-- The computer usable version is transcribed from:
|
|
||||||
-- Oxford advanced learner's dictionary of current English
|
|
||||||
-- A.S. Hornby ; with the assistance of A.P. Cowie [and] J. Windsor Lewis.
|
|
||||||
-- 3rd. ed., London : Oxford University Press, 1974.
|
|
||||||
-- Distributed as 'dict0710' by:
|
|
||||||
-- Oxford Text Archive
|
|
||||||
-- Oxford University Computing Services
|
|
||||||
-- 13 Banbury Road
|
|
||||||
-- Oxford
|
|
||||||
-- OX2 6NN
|
|
||||||
-- Under these conditions:
|
|
||||||
-- Freely available for non-commercial use provided that this header is
|
|
||||||
-- included in its entirety with any copy distributed.
|
|
||||||
--
|
|
||||||
-- GF version generated by asc2gf, Bjorn Bringert Nov 2008
|
|
||||||
-- based on asc2lex, Matthew Purver Nov 2001
|
|
||||||
-- http://www.stanford.edu/~mpurver/software.html
|
|
||||||
|
|
||||||
--# -path=.:alltenses
|
|
||||||
concrete OaldStructuralEng of OaldStructural = CatEng ** open ParadigmsEng in {
|
|
||||||
lin abaft_Prep = mkPrep "abaft";
|
|
||||||
lin aboard_Prep = mkPrep "aboard";
|
|
||||||
lin about_Prep = mkPrep "about";
|
|
||||||
lin above_Prep = mkPrep "above";
|
|
||||||
lin according_as_Conj = mkConj "according as";
|
|
||||||
lin according_to_Prep = mkPrep "according to";
|
|
||||||
lin across_Prep = mkPrep "across";
|
|
||||||
lin afore_Prep = mkPrep "afore";
|
|
||||||
lin after_Conj = mkConj "after";
|
|
||||||
lin after_Prep = mkPrep "after";
|
|
||||||
lin against_Prep = mkPrep "against";
|
|
||||||
lin agin_Prep = mkPrep "agin";
|
|
||||||
lin albeit_Conj = mkConj "albeit";
|
|
||||||
lin along_Prep = mkPrep "along";
|
|
||||||
lin alongside_Prep = mkPrep "alongside";
|
|
||||||
lin although_Conj = mkConj "although";
|
|
||||||
lin amid_Prep = mkPrep "amid";
|
|
||||||
lin amidst_Prep = mkPrep "amidst";
|
|
||||||
lin among_Prep = mkPrep "among";
|
|
||||||
lin amongst_Prep = mkPrep "amongst";
|
|
||||||
lin an_Conj = mkConj "an";
|
|
||||||
lin and_Conj = mkConj "and";
|
|
||||||
lin anent_Prep = mkPrep "anent";
|
|
||||||
lin around_Prep = mkPrep "around";
|
|
||||||
lin as_Conj = mkConj "as";
|
|
||||||
lin aslant_Prep = mkPrep "aslant";
|
|
||||||
lin astride_Prep = mkPrep "astride";
|
|
||||||
lin at_Prep = mkPrep "at";
|
|
||||||
lin athwart_Prep = mkPrep "athwart";
|
|
||||||
lin bar_Prep = mkPrep "bar";
|
|
||||||
lin barring_Prep = mkPrep "barring";
|
|
||||||
lin because_Conj = mkConj "because";
|
|
||||||
lin before_Conj = mkConj "before";
|
|
||||||
lin before_Prep = mkPrep "before";
|
|
||||||
lin behind_Prep = mkPrep "behind";
|
|
||||||
lin below_Prep = mkPrep "below";
|
|
||||||
lin beneath_Prep = mkPrep "beneath";
|
|
||||||
lin beside_Prep = mkPrep "beside";
|
|
||||||
lin besides_Prep = mkPrep "besides";
|
|
||||||
lin between_Prep = mkPrep "between";
|
|
||||||
lin betwixt_Prep = mkPrep "betwixt";
|
|
||||||
lin beyond_Prep = mkPrep "beyond";
|
|
||||||
lin but_Conj = mkConj "but";
|
|
||||||
lin but_Prep = mkPrep "but";
|
|
||||||
lin by_Prep = mkPrep "by";
|
|
||||||
lin circa_Prep = mkPrep "circa";
|
|
||||||
lin concerning_Prep = mkPrep "concerning";
|
|
||||||
lin considering_Prep = mkPrep "considering";
|
|
||||||
lin cos_Conj = mkConj "cos";
|
|
||||||
lin despite_Prep = mkPrep "despite";
|
|
||||||
lin directly_Conj = mkConj "directly";
|
|
||||||
lin down_Prep = mkPrep "down";
|
|
||||||
lin during_Prep = mkPrep "during";
|
|
||||||
lin either_Conj = mkConj "either";
|
|
||||||
lin ere_Prep = mkPrep "ere";
|
|
||||||
lin except_Conj = mkConj "except";
|
|
||||||
lin except_Prep = mkPrep "except";
|
|
||||||
lin excepting_Prep = mkPrep "excepting";
|
|
||||||
lin failing_Prep = mkPrep "failing";
|
|
||||||
lin for_Conj = mkConj "for";
|
|
||||||
lin for_Prep = mkPrep "for";
|
|
||||||
lin forasmuch_as_Conj = mkConj "forasmuch as";
|
|
||||||
lin from_Prep = mkPrep "from";
|
|
||||||
lin howbeit_Conj = mkConj "howbeit";
|
|
||||||
lin if_Conj = mkConj "if";
|
|
||||||
lin immediately_Conj = mkConj "immediately";
|
|
||||||
lin in_Prep = mkPrep "in";
|
|
||||||
lin inside_Prep = mkPrep "inside";
|
|
||||||
lin instantly_Conj = mkConj "instantly";
|
|
||||||
lin into_Prep = mkPrep "into";
|
|
||||||
lin less_Prep = mkPrep "less";
|
|
||||||
lin lest_Conj = mkConj "lest";
|
|
||||||
lin like_Conj = mkConj "like";
|
|
||||||
lin like_Prep = mkPrep "like";
|
|
||||||
lin likewise_Conj = mkConj "likewise";
|
|
||||||
lin mid_Prep = mkPrep "mid";
|
|
||||||
lin midst_Prep = mkPrep "midst";
|
|
||||||
lin minus_Prep = mkPrep "minus";
|
|
||||||
lin near_Prep = mkPrep "near";
|
|
||||||
lin neath_Prep = mkPrep "'neath";
|
|
||||||
lin neither_Conj = mkConj "neither";
|
|
||||||
lin nevertheless_Conj = mkConj "nevertheless";
|
|
||||||
lin next_Prep = mkPrep "next";
|
|
||||||
lin nigh_Prep = mkPrep "nigh";
|
|
||||||
lin nigher_Prep = mkPrep "nigher";
|
|
||||||
lin nighest_Prep = mkPrep "nighest";
|
|
||||||
lin nisi_Conj = mkConj "nisi";
|
|
||||||
lin nor_Conj = mkConj "nor";
|
|
||||||
lin notwithstanding_Conj = mkConj "notwithstanding";
|
|
||||||
lin notwithstanding_Prep = mkPrep "notwithstanding";
|
|
||||||
lin now_Conj = mkConj "now";
|
|
||||||
lin o'er_Prep = mkPrep "o'er";
|
|
||||||
lin of_Prep = mkPrep "of";
|
|
||||||
lin off_Prep = mkPrep "off";
|
|
||||||
lin on_Prep = mkPrep "on";
|
|
||||||
lin on_to_Prep = mkPrep "on to";
|
|
||||||
lin only_Conj = mkConj "only";
|
|
||||||
lin onto_Prep = mkPrep "onto";
|
|
||||||
lin or_Conj = mkConj "or";
|
|
||||||
lin otherwise_Conj = mkConj "otherwise";
|
|
||||||
lin outside_Prep = mkPrep "outside";
|
|
||||||
lin over_Prep = mkPrep "over";
|
|
||||||
lin past_Prep = mkPrep "past";
|
|
||||||
lin pending_Prep = mkPrep "pending";
|
|
||||||
lin per_Prep = mkPrep "per";
|
|
||||||
lin plus_Prep = mkPrep "plus";
|
|
||||||
lin provided_Conj = mkConj "provided";
|
|
||||||
lin providing_Conj = mkConj "providing";
|
|
||||||
lin qua_Conj = mkConj "qua";
|
|
||||||
lin qua_Prep = mkPrep "qua";
|
|
||||||
lin re_Prep = mkPrep "re";
|
|
||||||
lin respecting_Prep = mkPrep "respecting";
|
|
||||||
lin round_Prep = mkPrep "round";
|
|
||||||
lin sans_Prep = mkPrep "sans";
|
|
||||||
lin save_Prep = mkPrep "save";
|
|
||||||
lin saving_Prep = mkPrep "saving";
|
|
||||||
lin since_Conj = mkConj "since";
|
|
||||||
lin since_Prep = mkPrep "since";
|
|
||||||
lin so_Conj = mkConj "so";
|
|
||||||
lin supposing_Conj = mkConj "supposing";
|
|
||||||
lin than_Conj = mkConj "than";
|
|
||||||
lin that_Conj = mkConj "that";
|
|
||||||
lin tho'_Conj = mkConj "tho'";
|
|
||||||
lin though_Conj = mkConj "though";
|
|
||||||
lin thro'_Prep = mkPrep "thro'";
|
|
||||||
lin through_Prep = mkPrep "through";
|
|
||||||
lin throughout_Prep = mkPrep "throughout";
|
|
||||||
lin thru_Prep = mkPrep "thru";
|
|
||||||
lin till_Conj = mkConj "till";
|
|
||||||
lin till_Prep = mkPrep "till";
|
|
||||||
lin to_Prep = mkPrep "to";
|
|
||||||
lin touching_Prep = mkPrep "touching";
|
|
||||||
lin toward_Prep = mkPrep "toward";
|
|
||||||
lin towards_Prep = mkPrep "towards";
|
|
||||||
lin tween_Prep = mkPrep "'tween";
|
|
||||||
lin twixt_Prep = mkPrep "'twixt";
|
|
||||||
lin under_Prep = mkPrep "under";
|
|
||||||
lin underneath_Prep = mkPrep "underneath";
|
|
||||||
lin unless_Conj = mkConj "unless";
|
|
||||||
lin unlike_Prep = mkPrep "unlike";
|
|
||||||
lin until_Conj = mkConj "until";
|
|
||||||
lin until_Prep = mkPrep "until";
|
|
||||||
lin unto_Prep = mkPrep "unto";
|
|
||||||
lin up_Prep = mkPrep "up";
|
|
||||||
lin upon_Prep = mkPrep "upon";
|
|
||||||
lin versus_Prep = mkPrep "versus";
|
|
||||||
lin via_Prep = mkPrep "via";
|
|
||||||
lin vice_Prep = mkPrep "vice";
|
|
||||||
lin vis_à_vis_Prep = mkPrep "vis-à-vis";
|
|
||||||
lin wanting_Prep = mkPrep "wanting";
|
|
||||||
lin when_Conj = mkConj "when";
|
|
||||||
lin whencesoever_Conj = mkConj "whencesoever";
|
|
||||||
lin whenever_Conj = mkConj "whenever";
|
|
||||||
lin whereas_Conj = mkConj "whereas";
|
|
||||||
lin whether_Conj = mkConj "whether";
|
|
||||||
lin while_Conj = mkConj "while";
|
|
||||||
lin whilst_Conj = mkConj "whilst";
|
|
||||||
lin with_Prep = mkPrep "with";
|
|
||||||
lin within_Prep = mkPrep "within";
|
|
||||||
lin without_Prep = mkPrep "without";
|
|
||||||
lin yet_Conj = mkConj "yet";
|
|
||||||
}
|
|
||||||
@@ -1,453 +0,0 @@
|
|||||||
#! /usr/bin/perl -w
|
|
||||||
#
|
|
||||||
# Perl script to process OALD machine-readable ASCII file
|
|
||||||
# into a GF lexicon
|
|
||||||
#
|
|
||||||
# Usage: ./asc2gf < ascii_0710-1.txt
|
|
||||||
#
|
|
||||||
# Bjorn Bringert 2008,
|
|
||||||
# based on asc2lex by
|
|
||||||
# Matthew Purver, 11/2001
|
|
||||||
|
|
||||||
use strict;
|
|
||||||
|
|
||||||
my %irregular_verbs = ();
|
|
||||||
my %words = ();
|
|
||||||
|
|
||||||
my $irreg_eng = "../../english/IrregEng.gf";
|
|
||||||
|
|
||||||
open(IRREG_ENG,"$irreg_eng") or die "Could not open $irreg_eng\n";
|
|
||||||
while (<IRREG_ENG>) {
|
|
||||||
if (s/\s*([a-z\d]+)_V\s*=.*/$1/) {
|
|
||||||
chomp;
|
|
||||||
$irregular_verbs{$_} = 1;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
close IRREG_ENG;
|
|
||||||
|
|
||||||
print "Known irregular verbs from $irreg_eng:\n";
|
|
||||||
print join(",", keys %irregular_verbs) . "\n";
|
|
||||||
|
|
||||||
|
|
||||||
# skip header section
|
|
||||||
while ( <STDIN> ) {
|
|
||||||
last if /<\/TEIHEADER>/;
|
|
||||||
}
|
|
||||||
|
|
||||||
# read a line from stdin
|
|
||||||
while ( my $line = <STDIN> ) {
|
|
||||||
|
|
||||||
# remove SGML tags
|
|
||||||
$line =~ s/<[^<>]+>//g;
|
|
||||||
|
|
||||||
# split line into fields according to spec (line may be empty now)
|
|
||||||
if ( $line =~ /^(.{23}).{23}(.{23}).{1}(.{58})$/ ) {
|
|
||||||
|
|
||||||
my ( $word, $pos, $cat ) = ( $1, $2, $3 );
|
|
||||||
|
|
||||||
# trim white space
|
|
||||||
for ( ( $word, $pos, $cat ) ) {
|
|
||||||
s/\s*$//;
|
|
||||||
}
|
|
||||||
|
|
||||||
# make word lower-case
|
|
||||||
$word =~ tr/A-Z/a-z/; # lower case
|
|
||||||
|
|
||||||
# translate OALD diacritics
|
|
||||||
$word =~ s/~n/ñ/g;
|
|
||||||
$word =~ s/<c/ç/g;
|
|
||||||
$word =~ s/"a/ä/g;
|
|
||||||
$word =~ s/"o/ö/g;
|
|
||||||
$word =~ s/"u/ü/g;
|
|
||||||
$word =~ s/"i/ï/g;
|
|
||||||
$word =~ s/\^a/â/g;
|
|
||||||
$word =~ s/\^e/ê/g;
|
|
||||||
$word =~ s/\^o/ô/g;
|
|
||||||
$word =~ s/`a/à/g;
|
|
||||||
$word =~ s/`e/è/g;
|
|
||||||
$word =~ s/_e/é/g;
|
|
||||||
|
|
||||||
# make legal identifier
|
|
||||||
# Note: in theory this could cause clashes, but I don't think it does
|
|
||||||
# with the OALD.
|
|
||||||
my $name = $word;
|
|
||||||
$name =~ s/ /_/g; # space -> _
|
|
||||||
$name =~ s/-/_/g; # - -> _
|
|
||||||
$name =~ s/\./_/g; # . -> _
|
|
||||||
$name =~ s/^'//; # drop initial '
|
|
||||||
|
|
||||||
|
|
||||||
# get PoS & subcat info
|
|
||||||
my @pos = split( /,/, $pos );
|
|
||||||
$cat =~ s/,/\',\'/g;
|
|
||||||
( $cat = "\'$cat\'" ) unless ( $cat eq '' );
|
|
||||||
|
|
||||||
foreach ( @pos ) {
|
|
||||||
my ( $pcode, $infl, $freq )=split(//);
|
|
||||||
|
|
||||||
# for verbs, get inflected forms
|
|
||||||
if ( $pcode =~ /^[GHIJ]/ ) {
|
|
||||||
$pos = 'verb';
|
|
||||||
my ($vbz, $vbg, $vbd);
|
|
||||||
|
|
||||||
# if this is a root form, work out the inflected forms
|
|
||||||
if ( $infl =~ /^\d/ ) {
|
|
||||||
if ( $infl == 0 ) {
|
|
||||||
( $vbz = $word ) =~ s/$/s/;
|
|
||||||
( $vbg = $word ) =~ s/$/ing/;
|
|
||||||
( $vbd = $word ) =~ s/$/ed/;
|
|
||||||
}
|
|
||||||
elsif ( $infl == 1 ) {
|
|
||||||
( $vbz = $word ) =~ s/$/es/;
|
|
||||||
( $vbg = $word ) =~ s/$/ing/;
|
|
||||||
( $vbd = $word ) =~ s/$/ed/;
|
|
||||||
}
|
|
||||||
elsif ( $infl == 2 ) {
|
|
||||||
( $vbz = $word ) =~ s/e$/es/;
|
|
||||||
( $vbg = $word ) =~ s/e$/ing/;
|
|
||||||
( $vbd = $word ) =~ s/e$/ed/;
|
|
||||||
}
|
|
||||||
elsif ( $infl == 3 ) {
|
|
||||||
( $vbz = $word ) =~ s/y$/ies/;
|
|
||||||
( $vbg = $word ) =~ s/y$/ying/;
|
|
||||||
( $vbd = $word ) =~ s/y$/ied/;
|
|
||||||
}
|
|
||||||
elsif ( $infl == 4 ) {
|
|
||||||
( $vbz = $word ) =~ s/$/s/;
|
|
||||||
( $vbg = $word ) =~ s/(\w)$/$1$1ing/;
|
|
||||||
( $vbd = $word ) =~ s/(\w)$/$1$1ed/;
|
|
||||||
}
|
|
||||||
elsif ( $infl == 5 ) {
|
|
||||||
# for irregulars, just mark as such for now, we'll guess later
|
|
||||||
$vbz = 'IRREG';
|
|
||||||
$vbg = 'IRREG';
|
|
||||||
$vbd = 'IRREG';
|
|
||||||
}
|
|
||||||
|
|
||||||
my $lin = "mkV \"$word\" \"$vbz\" \"$vbd\" \"$vbd\" \"$vbg\"";
|
|
||||||
|
|
||||||
# try to use a verb from IrregEng
|
|
||||||
if ( $infl == 5 ) {
|
|
||||||
for (my $i = 0; $i < length($word) - 1; $i++) {
|
|
||||||
my $suffix = substr($word, $i);
|
|
||||||
if ($irregular_verbs{$suffix}) {
|
|
||||||
if ($i == 0) {
|
|
||||||
$lin = "IrregEng.${name}_V";
|
|
||||||
} else {
|
|
||||||
my $prefix = substr($word, 0, $i);
|
|
||||||
$lin = "mkV \"$prefix\" IrregEng.${suffix}_V";
|
|
||||||
}
|
|
||||||
last;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
if ($pcode eq 'G') {
|
|
||||||
#add_word("${name}_VX", "mkVX ($lin)");
|
|
||||||
print STDERR "Ignoring anomalous verb: $name\n";
|
|
||||||
}
|
|
||||||
if ($pcode eq 'I' || $pcode eq 'J') {
|
|
||||||
add_word("${name}_V", "$lin");
|
|
||||||
}
|
|
||||||
if ($pcode eq 'H' || $pcode eq 'J') {
|
|
||||||
add_word("${name}_V2", "mkV2 ($lin)");
|
|
||||||
}
|
|
||||||
}
|
|
||||||
# if this is an inflected form, save for guessing irregulars later
|
|
||||||
elsif ( $infl =~ /^a/ ) {
|
|
||||||
#push( @vbz, $word );
|
|
||||||
}
|
|
||||||
elsif ( $infl =~ /^b/ ) {
|
|
||||||
#push( @vbg, $word );
|
|
||||||
}
|
|
||||||
elsif ( $infl =~ /^c/ ) {
|
|
||||||
#push( @vbd, $word );
|
|
||||||
}
|
|
||||||
elsif ( $infl =~ /^d/ ) {
|
|
||||||
#push( @vbn, $word );
|
|
||||||
}
|
|
||||||
}
|
|
||||||
# for nouns, get plural form
|
|
||||||
elsif( $pcode =~ /^[KLMNY]/ ) {
|
|
||||||
$pos = 'noun';
|
|
||||||
$pcode =~ s/^K/count/;
|
|
||||||
$pcode =~ s/^L/mass/;
|
|
||||||
$pcode =~ s/^M/both/;
|
|
||||||
$pcode =~ s/^N/proper/;
|
|
||||||
if ( $pcode =~ /^Y/ ) {
|
|
||||||
$pcode = 'count' if $infl =~ /^[>\)\]]/;
|
|
||||||
$pcode = 'mass' if $infl =~ /^\}/;
|
|
||||||
$pcode = 'proper' if $infl =~ /^[:=~]/;
|
|
||||||
}
|
|
||||||
# if this is a singular form, work out plural form
|
|
||||||
unless ( $infl =~ /^j/ ) {
|
|
||||||
my $pl = '-';
|
|
||||||
if ( $infl eq '6' ) {
|
|
||||||
( $pl = $word ) =~ s/$/s/;
|
|
||||||
}
|
|
||||||
elsif ( $infl eq '7' ) {
|
|
||||||
( $pl = $word ) =~ s/$/es/;
|
|
||||||
}
|
|
||||||
elsif ( $infl eq '8' ) {
|
|
||||||
( $pl = $word ) =~ s/y$/ies/;
|
|
||||||
}
|
|
||||||
elsif ( $infl =~ /^[9k\]]/ ) {
|
|
||||||
$pl = $word;
|
|
||||||
}
|
|
||||||
elsif ( $infl =~ /^i/ ) {
|
|
||||||
# for irregulars, let's just make a guess and mark with '*'
|
|
||||||
# this could be done better, as for verbs, but I can't be bothered now
|
|
||||||
$pl = $word;
|
|
||||||
( $pl =~ s/^((wo)?m)an/$1en\*/ ) or
|
|
||||||
( $pl =~ s/man(-|$)/men$1\*/ ) or
|
|
||||||
( $pl =~ s/-in-law/s-in-law\*/ ) or
|
|
||||||
( $pl =~ s/um$/a\*/ ) or
|
|
||||||
( $pl =~ s/us$/i\*/ ) or
|
|
||||||
( $pl =~ s/a$/ae\*/ ) or
|
|
||||||
( $pl =~ s/on$/a\*/ ) or
|
|
||||||
( $pl =~ s/is$/es\*/ ) or
|
|
||||||
( $pl =~ s/o$/i\*/ ) or
|
|
||||||
( $pl =~ s/child$/children\*/ ) or
|
|
||||||
( $pl =~ s/oot$/eet\*/ ) or
|
|
||||||
( $pl =~ s/ooth$/eeth\*/ ) or
|
|
||||||
( $pl =~ s/([lm])ouse$/$1ice\*/ ) or
|
|
||||||
( $pl =~ s/f(e)?$/ves\*/ ) or
|
|
||||||
( $pl =~ s/[ei]x$/ices\*/ ) or
|
|
||||||
( $pl =~ s/eau$/eaux\*/ ) or
|
|
||||||
( $pl = 'IRREG' );
|
|
||||||
}
|
|
||||||
# if plural-only, swap root form & plural
|
|
||||||
elsif ( $infl =~ /^\)/ ) {
|
|
||||||
$pl = $word;
|
|
||||||
$word = '-';
|
|
||||||
}
|
|
||||||
( $infl =~ s/^[:l]/per/ ) or ( $infl =~ s/^[mn]/loc/ ) or ( $infl = '_' );
|
|
||||||
|
|
||||||
my $comment = "";
|
|
||||||
if ( $word eq '-' ) {
|
|
||||||
$comment .= " {- FIXME: no singular form -}";
|
|
||||||
}
|
|
||||||
if ( $pl eq '-' ) {
|
|
||||||
$comment .= " {- FIXME: no plural form -}";
|
|
||||||
}
|
|
||||||
if ( $pl =~ s/\*$// ) {
|
|
||||||
$comment .= " {- FIXME: guessed plural form -}";
|
|
||||||
}
|
|
||||||
|
|
||||||
if ( $pcode eq 'proper' ) {
|
|
||||||
add_word("${name}_PN", "mkPN \"$word\"");
|
|
||||||
} else {
|
|
||||||
add_word("${name}_N", "mkN \"$word\" \"$pl\"$comment");
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
# for adjectives, get comparative & superlative forms
|
|
||||||
elsif( $pcode =~ /^O/ ) {
|
|
||||||
$pos = 'adj';
|
|
||||||
# if this is root form, work out inflected forms
|
|
||||||
unless ( $infl =~ /^[rs]/ ) {
|
|
||||||
my ($comp, $sup);
|
|
||||||
if ( $infl =~ /^[Apqt]/ ) {
|
|
||||||
$comp = $sup = '-';
|
|
||||||
}
|
|
||||||
elsif ( $infl =~ /^B/ ) {
|
|
||||||
( $comp = $word ) =~ s/$/r/;
|
|
||||||
( $sup = $word ) =~ s/$/st/;
|
|
||||||
}
|
|
||||||
elsif ( $infl =~ /^C/ ) {
|
|
||||||
( $comp = $word ) =~ s/$/er/;
|
|
||||||
( $sup = $word ) =~ s/$/est/;
|
|
||||||
}
|
|
||||||
elsif ( $infl =~ /^D/ ) {
|
|
||||||
( $comp = $word ) =~ s/y$/ier/;
|
|
||||||
( $sup = $word ) =~ s/y$/iest/;
|
|
||||||
}
|
|
||||||
elsif ( $infl =~ /^E/ ) {
|
|
||||||
# for irregulars, let's just have a guess and mark with '*'
|
|
||||||
# (there aren't very many of these)
|
|
||||||
( $comp = $word ) =~ s/(\w)$/$1$1er\*/;
|
|
||||||
( $sup = $word ) =~ s/(\w)$/$1$1est\*/;
|
|
||||||
}
|
|
||||||
$infl =~ s/^[ABCDE]/normal/;
|
|
||||||
$infl =~ s/^p/pred/;
|
|
||||||
$infl =~ s/^q/attr/;
|
|
||||||
$infl =~ s/^t/affix/;
|
|
||||||
|
|
||||||
if ( $comp eq '-' ) {
|
|
||||||
add_word("${name}_A", "compoundA (mkA \"$word\")");
|
|
||||||
} else {
|
|
||||||
add_word("${name}_A", "mkA \"$word\" \"$comp\"");
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
# adverb
|
|
||||||
elsif( $pcode =~ /^P/ ) {
|
|
||||||
$pos = 'adv';
|
|
||||||
$infl =~ s/^[u\+]/normal/;
|
|
||||||
$infl =~ s/^w/whrel/;
|
|
||||||
$infl =~ s/^v/whq/;
|
|
||||||
add_word("${name}_Adv", "mkAdv \"$word\"");
|
|
||||||
}
|
|
||||||
# pronoun
|
|
||||||
elsif( $pcode =~ s/^Q/_/ ) {
|
|
||||||
$pos = 'pron';
|
|
||||||
$infl =~ s/^x/normal/;
|
|
||||||
$infl =~ s/^y/whq/;
|
|
||||||
$infl =~ s/^z/whrel/;
|
|
||||||
my $class = '_';
|
|
||||||
# reflexive pronouns
|
|
||||||
if ( ( $word =~ /self$/ ) or
|
|
||||||
( $word =~ /selves$/ ) ) {
|
|
||||||
$pcode = 'acc';
|
|
||||||
}
|
|
||||||
# accusative personal pronouns
|
|
||||||
if ( ( $word =~ /^him/ ) or
|
|
||||||
( $word =~ /^her/ ) or
|
|
||||||
( $word =~ /^them/ ) or
|
|
||||||
( $word eq 'us' ) or
|
|
||||||
( $word eq 'thee' ) or
|
|
||||||
( $word eq 'me' ) ) {
|
|
||||||
$pcode = 'acc';
|
|
||||||
$class = 'per';
|
|
||||||
}
|
|
||||||
# nominative personal pronouns
|
|
||||||
if ( ( $word eq 'he' ) or
|
|
||||||
( $word eq 'she' ) or
|
|
||||||
( $word eq 'they' ) or
|
|
||||||
( $word eq 'we' ) or
|
|
||||||
( $word eq 'thou' ) or
|
|
||||||
( $word eq 'i' ) ) {
|
|
||||||
$pcode = 'nom';
|
|
||||||
$class = 'per';
|
|
||||||
}
|
|
||||||
# other personal pronouns
|
|
||||||
if ( ( $word =~ /.+one/ ) or
|
|
||||||
( $word =~ /one.+/ ) or
|
|
||||||
( $word =~ /body/ ) or
|
|
||||||
( $word =~ /^you/ ) or
|
|
||||||
( $word =~ /^who/ ) ) {
|
|
||||||
$class = 'per';
|
|
||||||
}
|
|
||||||
# non-personal pronouns
|
|
||||||
if ( $word =~ /thing/ ) {
|
|
||||||
$class = 'nper';
|
|
||||||
}
|
|
||||||
# otherwise case/person info will be '_' (anon variable)
|
|
||||||
# add full spec to @pron array
|
|
||||||
#push( @pron, "$pos( \'$word\', $pcode, $infl, $class ).\n" );
|
|
||||||
}
|
|
||||||
# for determiners, leave anon variable as placeholder for semantics
|
|
||||||
elsif( $pcode =~ /^[RS]/ ) {
|
|
||||||
$pos = 'det';
|
|
||||||
$pcode =~ s/^R/def/;
|
|
||||||
$pcode =~ s/^S/indef/;
|
|
||||||
#add_word("${name}_Det","mkDeterminer \"$word\"");
|
|
||||||
}
|
|
||||||
# for prepositions - nothing to say
|
|
||||||
elsif( $pcode =~ s/^T/prep/ ) {
|
|
||||||
$pos = 'prep';
|
|
||||||
add_word("${name}_Prep","mkPrep \"$word\"");
|
|
||||||
}
|
|
||||||
# for conjunctions - nothing to say
|
|
||||||
elsif( $pcode =~ s/^V/conj/ ) {
|
|
||||||
$pos = 'conj';
|
|
||||||
add_word("${name}_Conj","mkConj \"$word\"");
|
|
||||||
}
|
|
||||||
# for miscellaneous, leave '-' as placeholder for illocutionary info
|
|
||||||
elsif( $pcode =~ /^[UWXZ]/ ) {
|
|
||||||
$pos = 'misc';
|
|
||||||
#push( @prefix, "$pos( \'$word\', $pcode, '-' ).\n" ) if ( $pcode =~ s/^U/prefix/ );
|
|
||||||
#push( @interj, "$pos( \'$word\', $pcode, '-' ).\n" ) if ( $pcode =~ s/^W/interj/ );
|
|
||||||
#push( @partcl, "$pos( \'$word\', $pcode, '-' ).\n" ) if ( $pcode =~ s/^X/partcl/ );
|
|
||||||
#push( @unknown, "$pos( \'$word\', $pcode, '-' ).\n" ) if ( $pcode =~ s/^Z/unknown/ );
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
my $absfile = "Oald.gf";
|
|
||||||
my $cncfile = "OaldEng.gf";
|
|
||||||
my $abs_structfile = "OaldStructural.gf";
|
|
||||||
my $cnc_structfile = "OaldStructuralEng.gf";
|
|
||||||
|
|
||||||
open (ABS, '>', $absfile);
|
|
||||||
open (CNC, '>', $cncfile);
|
|
||||||
|
|
||||||
open (ABS_STRUCTURAL, '>', $abs_structfile);
|
|
||||||
open (CNC_STRUCTURAL, '>', $cnc_structfile);
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
# print a nice comment at the top
|
|
||||||
my $header = "-- English lexicon for GF, produced from:\n"
|
|
||||||
. "-- Oxford advanced learner's dictionary of current English:\n"
|
|
||||||
. "-- expanded 'computer usable' version compiled by Roger Mitton\n"
|
|
||||||
. "-- The computer usable version is transcribed from:\n"
|
|
||||||
. "-- Oxford advanced learner's dictionary of current English\n"
|
|
||||||
. "-- A.S. Hornby ; with the assistance of A.P. Cowie [and] J. Windsor Lewis.\n"
|
|
||||||
. "-- 3rd. ed., London : Oxford University Press, 1974.\n"
|
|
||||||
. "-- Distributed as 'dict0710' by:\n"
|
|
||||||
. "-- Oxford Text Archive\n"
|
|
||||||
. "-- Oxford University Computing Services\n"
|
|
||||||
. "-- 13 Banbury Road\n"
|
|
||||||
. "-- Oxford\n"
|
|
||||||
. "-- OX2 6NN\n"
|
|
||||||
. "-- Under these conditions:\n"
|
|
||||||
. "-- Freely available for non-commercial use provided that this header is\n"
|
|
||||||
. "-- included in its entirety with any copy distributed.\n"
|
|
||||||
. "--\n"
|
|
||||||
. "-- GF version generated by asc2gf, Bjorn Bringert Nov 2008\n"
|
|
||||||
. "-- based on asc2lex, Matthew Purver Nov 2001\n"
|
|
||||||
. "-- http://www.stanford.edu/~mpurver/software.html\n"
|
|
||||||
. "\n";
|
|
||||||
print ABS $header;
|
|
||||||
print ABS "abstract Oald = Cat ** {\n";
|
|
||||||
|
|
||||||
print CNC $header;
|
|
||||||
print CNC "--# -path=.:alltenses\n";
|
|
||||||
print CNC "concrete OaldEng of Oald = CatEng ** open ParadigmsEng, IrregEng in {\n";
|
|
||||||
|
|
||||||
print ABS_STRUCTURAL $header;
|
|
||||||
print ABS_STRUCTURAL "abstract OaldStructural = Cat ** {\n";
|
|
||||||
|
|
||||||
print CNC_STRUCTURAL $header;
|
|
||||||
print CNC_STRUCTURAL "--# -path=.:alltenses\n";
|
|
||||||
print CNC_STRUCTURAL "concrete OaldStructuralEng of OaldStructural = CatEng ** open ParadigmsEng in {\n";
|
|
||||||
|
|
||||||
foreach my $name (sort (keys %words)) {
|
|
||||||
(my $cat = $name) =~ s/.*_([A-Z][A-Za-z\d]*)$/$1/;
|
|
||||||
my $lin = $words{$name};
|
|
||||||
if ( $cat =~ /^(A)|(N)|(V)|(V2)$/ ) {
|
|
||||||
print ABS "fun $name : $cat;\n";
|
|
||||||
print CNC "lin $name = $lin;\n";
|
|
||||||
} else {
|
|
||||||
print ABS_STRUCTURAL "fun $name : $cat;\n";
|
|
||||||
print CNC_STRUCTURAL "lin $name = $lin;\n";
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
print ABS "}";
|
|
||||||
print CNC "}";
|
|
||||||
|
|
||||||
print ABS_STRUCTURAL "}";
|
|
||||||
print CNC_STRUCTURAL "}";
|
|
||||||
|
|
||||||
close(ABS_STRUCTURAL);
|
|
||||||
close(CNC_STRUCTURAL);
|
|
||||||
|
|
||||||
close(ABS);
|
|
||||||
close(CNC);
|
|
||||||
|
|
||||||
print "\nWrote open lexicon to $absfile and $cncfile\n";
|
|
||||||
print "Wrote closed lexicon to $abs_structfile and $cnc_structfile\n";
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
sub add_word {
|
|
||||||
my ($name,$lin) = @_;
|
|
||||||
if (exists $words{$name}) {
|
|
||||||
print STDERR "Duplicate word: $name\n";
|
|
||||||
} else {
|
|
||||||
$words{$name} = $lin;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
@@ -1,12 +0,0 @@
|
|||||||
This directory contains the Oxford Advanced Learner's Dictionary of Current English
|
|
||||||
(expanded computer-usable version), available from the Oxford Text Archive (http://ota.ahds.ac.uk).
|
|
||||||
|
|
||||||
It has a flat structure but contains part-of-speech, verb subcategorisation & pronunciation info.
|
|
||||||
|
|
||||||
Files:
|
|
||||||
ascii_0710-1.txt the original plain ASCII version of the OALD
|
|
||||||
ascii_0710-2.txt the information to go with it
|
|
||||||
asc2lex a Perl script to process ASCII -> Prolog
|
|
||||||
lexicon2.pl the resulting Prolog version, hand-corrected for irregulars etc.
|
|
||||||
|
|
||||||
Matthew Purver, Jan 2001
|
|
||||||
@@ -1,320 +0,0 @@
|
|||||||
#! /usr/bin/perl
|
|
||||||
#
|
|
||||||
# Perl script to process OALD machine-readable ASCII file
|
|
||||||
# into a Prolog-readable lexicon usable by SHARDS
|
|
||||||
#
|
|
||||||
# Usage: ./asc2lex < ascii_0710-1.txt [> OUTPUT.PL]
|
|
||||||
#
|
|
||||||
# Matthew Purver, 11/2001
|
|
||||||
|
|
||||||
# print a nice comment at the top
|
|
||||||
print "% Prolog lexicon for SHARDS, from OALD machine-readable dictionary\n";
|
|
||||||
print "% Produced by asc2lex, Matthew Purver 11/2001\n\n";
|
|
||||||
|
|
||||||
# skip header section
|
|
||||||
while ( <STDIN> ) {
|
|
||||||
last if /<\/TEIHEADER>/;
|
|
||||||
}
|
|
||||||
|
|
||||||
# read a line from stdin
|
|
||||||
while ( $line = <STDIN> ) {
|
|
||||||
|
|
||||||
# remove SGML tags
|
|
||||||
$line =~ s/<[^<>]+>//g;
|
|
||||||
|
|
||||||
# split line into fields according to spec (line may be empty now)
|
|
||||||
if ( $line =~ /^(.{23}).{23}(.{23}).{1}(.{58})$/ ) {
|
|
||||||
|
|
||||||
# trim white space
|
|
||||||
for ( ( $word, $pos, $cat ) = ( $1, $2, $3 ) ) {
|
|
||||||
s/\s*$//;
|
|
||||||
}
|
|
||||||
|
|
||||||
# make word lower-case atomic string
|
|
||||||
$word =~ s/\'/\\\'/g; # ' -> \'
|
|
||||||
$word =~ s/\"/\\\"/g; # " -> \"
|
|
||||||
$word =~ tr/A-Z/a-z/; # lower case
|
|
||||||
|
|
||||||
# get PoS & subcat info
|
|
||||||
@pos = split( /,/, $pos );
|
|
||||||
$cat =~ s/,/\',\'/g;
|
|
||||||
( $cat = "\'$cat\'" ) unless ( $cat eq '' );
|
|
||||||
|
|
||||||
# set up Prolog-style string & put into array
|
|
||||||
foreach ( @pos ) {
|
|
||||||
( $pcode, $infl, $freq )=split(//);
|
|
||||||
# for verbs, get inflected forms
|
|
||||||
if ( $pcode =~ /^[GHIJ]/ ) {
|
|
||||||
$pos = 'verb';
|
|
||||||
$pcode =~ s/^G/unknown/;
|
|
||||||
$pcode =~ s/^H/tran/;
|
|
||||||
$pcode =~ s/^I/intran/;
|
|
||||||
$pcode =~ s/^J/_/;
|
|
||||||
# if this is a root form, work out the inflected forms
|
|
||||||
if ( $infl =~ /^\d/ ) {
|
|
||||||
if ( $infl == 0 ) {
|
|
||||||
( $vbz = $word ) =~ s/$/s/;
|
|
||||||
( $vbg = $word ) =~ s/$/ing/;
|
|
||||||
( $vbd = $word ) =~ s/$/ed/;
|
|
||||||
}
|
|
||||||
elsif ( $infl == 1 ) {
|
|
||||||
( $vbz = $word ) =~ s/$/es/;
|
|
||||||
( $vbg = $word ) =~ s/$/ing/;
|
|
||||||
( $vbd = $word ) =~ s/$/ed/;
|
|
||||||
}
|
|
||||||
elsif ( $infl == 2 ) {
|
|
||||||
( $vbz = $word ) =~ s/e$/es/;
|
|
||||||
( $vbg = $word ) =~ s/e$/ing/;
|
|
||||||
( $vbd = $word ) =~ s/e$/ed/;
|
|
||||||
}
|
|
||||||
elsif ( $infl == 3 ) {
|
|
||||||
( $vbz = $word ) =~ s/y$/ies/;
|
|
||||||
( $vbg = $word ) =~ s/y$/ying/;
|
|
||||||
( $vbd = $word ) =~ s/y$/ied/;
|
|
||||||
}
|
|
||||||
elsif ( $infl == 4 ) {
|
|
||||||
( $vbz = $word ) =~ s/$/s/;
|
|
||||||
( $vbg = $word ) =~ s/(\w)$/$1$1ing/;
|
|
||||||
( $vbd = $word ) =~ s/(\w)$/$1$1ed/;
|
|
||||||
}
|
|
||||||
elsif ( $infl == 5 ) {
|
|
||||||
# for irregulars, just mark as such for now, we'll guess later
|
|
||||||
$vbz = 'IRREG';
|
|
||||||
$vbg = 'IRREG';
|
|
||||||
$vbd = 'IRREG';
|
|
||||||
}
|
|
||||||
# add the full spec to @verb array
|
|
||||||
push( @verb,
|
|
||||||
"$pos( \'$word\', \'$vbz\', \'$vbg\', \'$vbd\', \'$vbd\', $pcode, [$cat] ).\n" );
|
|
||||||
}
|
|
||||||
# if this is an inflected form, save for guessing irregulars later
|
|
||||||
elsif ( $infl =~ /^a/ ) {
|
|
||||||
push( @vbz, $word );
|
|
||||||
}
|
|
||||||
elsif ( $infl =~ /^b/ ) {
|
|
||||||
push( @vbg, $word );
|
|
||||||
}
|
|
||||||
elsif ( $infl =~ /^c/ ) {
|
|
||||||
push( @vbd, $word );
|
|
||||||
}
|
|
||||||
elsif ( $infl =~ /^d/ ) {
|
|
||||||
push( @vbn, $word );
|
|
||||||
}
|
|
||||||
}
|
|
||||||
# for nouns, get plural form
|
|
||||||
elsif( $pcode =~ /^[KLMNY]/ ) {
|
|
||||||
$pos = 'noun';
|
|
||||||
$pcode =~ s/^K/count/;
|
|
||||||
$pcode =~ s/^L/mass/;
|
|
||||||
$pcode =~ s/^M/both/;
|
|
||||||
$pcode =~ s/^N/proper/;
|
|
||||||
if ( $pcode =~ /^Y/ ) {
|
|
||||||
$pcode = 'count' if $infl =~ /^[>\)\]]/;
|
|
||||||
$pcode = 'mass' if $infl =~ /^\}/;
|
|
||||||
$pcode = 'proper' if $infl =~ /^[:=~]/;
|
|
||||||
}
|
|
||||||
# if this is a singular form, work out plural form
|
|
||||||
unless ( $infl =~ /^j/ ) {
|
|
||||||
$pl = '-';
|
|
||||||
if ( $infl == 6 ) {
|
|
||||||
( $pl = $word ) =~ s/$/s/;
|
|
||||||
}
|
|
||||||
elsif ( $infl == 7 ) {
|
|
||||||
( $pl = $word ) =~ s/$/es/;
|
|
||||||
}
|
|
||||||
elsif ( $infl == 8 ) {
|
|
||||||
( $pl = $word ) =~ s/y$/ies/;
|
|
||||||
}
|
|
||||||
elsif ( $infl =~ /^[9k\]]/ ) {
|
|
||||||
$pl = $word;
|
|
||||||
}
|
|
||||||
elsif ( $infl =~ /^i/ ) {
|
|
||||||
# for irregulars, let's just make a guess and mark with '*'
|
|
||||||
# this could be done better, as for verbs, but I can't be bothered now
|
|
||||||
$pl = $word;
|
|
||||||
( $pl =~ s/^((wo)?m)an/$1en\*/ ) or
|
|
||||||
( $pl =~ s/man(-|$)/men$1\*/ ) or
|
|
||||||
( $pl =~ s/-in-law/s-in-law\*/ ) or
|
|
||||||
( $pl =~ s/um$/a\*/ ) or
|
|
||||||
( $pl =~ s/us$/i\*/ ) or
|
|
||||||
( $pl =~ s/a$/ae\*/ ) or
|
|
||||||
( $pl =~ s/on$/a\*/ ) or
|
|
||||||
( $pl =~ s/is$/es\*/ ) or
|
|
||||||
( $pl =~ s/o$/i\*/ ) or
|
|
||||||
( $pl =~ s/child$/children\*/ ) or
|
|
||||||
( $pl =~ s/oot$/eet\*/ ) or
|
|
||||||
( $pl =~ s/ooth$/eeth\*/ ) or
|
|
||||||
( $pl =~ s/([lm])ouse$/$1ice\*/ ) or
|
|
||||||
( $pl =~ s/f(e)?$/ves\*/ ) or
|
|
||||||
( $pl =~ s/[ei]x$/ices\*/ ) or
|
|
||||||
( $pl =~ s/eau$/eaux\*/ ) or
|
|
||||||
( $pl = 'IRREG' );
|
|
||||||
}
|
|
||||||
# if plural-only, swap root form & plural
|
|
||||||
elsif ( $infl =~ /^\)/ ) {
|
|
||||||
$pl = $word;
|
|
||||||
$word = '-';
|
|
||||||
}
|
|
||||||
# and add full spec to @noun array
|
|
||||||
( $infl =~ s/^[:l]/per/ ) or ( $infl =~ s/^[mn]/loc/ ) or ( $infl = '_' );
|
|
||||||
push( @noun, "$pos( \'$word\', \'$pl\', $pcode, $infl ).\n" )
|
|
||||||
}
|
|
||||||
}
|
|
||||||
# for adjectives, get comparative & superlative forms
|
|
||||||
elsif( $pcode =~ /^O/ ) {
|
|
||||||
$pos = 'adj';
|
|
||||||
# if this is root form, work out inflected forms
|
|
||||||
unless ( $infl =~ /^[rs]/ ) {
|
|
||||||
if ( $infl =~ /^[Apqt]/ ) {
|
|
||||||
$comp = $sup = '-';
|
|
||||||
}
|
|
||||||
elsif ( $infl =~ /^B/ ) {
|
|
||||||
( $comp = $word ) =~ s/$/r/;
|
|
||||||
( $sup = $word ) =~ s/$/st/;
|
|
||||||
}
|
|
||||||
elsif ( $infl =~ /^C/ ) {
|
|
||||||
( $comp = $word ) =~ s/$/er/;
|
|
||||||
( $sup = $word ) =~ s/$/est/;
|
|
||||||
}
|
|
||||||
elsif ( $infl =~ /^D/ ) {
|
|
||||||
( $comp = $word ) =~ s/y$/ier/;
|
|
||||||
( $sup = $word ) =~ s/y$/iest/;
|
|
||||||
}
|
|
||||||
elsif ( $infl =~ /^E/ ) {
|
|
||||||
# for irregulars, let's just have a guess and mark with '*'
|
|
||||||
# (there aren't very many of these)
|
|
||||||
( $comp = $word ) =~ s/(\w)$/$1$1er\*/;
|
|
||||||
( $sup = $word ) =~ s/(\w)$/$1$1est\*/;
|
|
||||||
}
|
|
||||||
$infl =~ s/^[ABCDE]/normal/;
|
|
||||||
$infl =~ s/^p/pred/;
|
|
||||||
$infl =~ s/^q/attr/;
|
|
||||||
$infl =~ s/^t/affix/;
|
|
||||||
# and add full spec to @adj array
|
|
||||||
push( @adj, "$pos( \'$word\', \'$comp\', \'$sup\', $infl ).\n" );
|
|
||||||
}
|
|
||||||
}
|
|
||||||
# for adverbs, just add all info to @adv array
|
|
||||||
elsif( $pcode =~ /^P/ ) {
|
|
||||||
$pos = 'adv';
|
|
||||||
$infl =~ s/^[u\+]/normal/;
|
|
||||||
$infl =~ s/^w/whrel/;
|
|
||||||
$infl =~ s/^v/whq/;
|
|
||||||
push( @adv, "$pos( \'$word\', $infl ).\n" );
|
|
||||||
}
|
|
||||||
# for pronouns, work out some case/person info
|
|
||||||
elsif( $pcode =~ s/^Q/_/ ) {
|
|
||||||
$pos = 'pron';
|
|
||||||
$infl =~ s/^x/normal/;
|
|
||||||
$infl =~ s/^y/whq/;
|
|
||||||
$infl =~ s/^z/whrel/;
|
|
||||||
$class = '_';
|
|
||||||
# reflexive pronouns
|
|
||||||
if ( ( $word =~ /self$/ ) or
|
|
||||||
( $word =~ /selves$/ ) ) {
|
|
||||||
$pcode = 'acc';
|
|
||||||
}
|
|
||||||
# accusative personal pronouns
|
|
||||||
if ( ( $word =~ /^him/ ) or
|
|
||||||
( $word =~ /^her/ ) or
|
|
||||||
( $word =~ /^them/ ) or
|
|
||||||
( $word eq 'us' ) or
|
|
||||||
( $word eq 'thee' ) or
|
|
||||||
( $word eq 'me' ) ) {
|
|
||||||
$pcode = 'acc';
|
|
||||||
$class = 'per';
|
|
||||||
}
|
|
||||||
# nominative personal pronouns
|
|
||||||
if ( ( $word eq 'he' ) or
|
|
||||||
( $word eq 'she' ) or
|
|
||||||
( $word eq 'they' ) or
|
|
||||||
( $word eq 'we' ) or
|
|
||||||
( $word eq 'thou' ) or
|
|
||||||
( $word eq 'i' ) ) {
|
|
||||||
$pcode = 'nom';
|
|
||||||
$class = 'per';
|
|
||||||
}
|
|
||||||
# other personal pronouns
|
|
||||||
if ( ( $word =~ /.+one/ ) or
|
|
||||||
( $word =~ /one.+/ ) or
|
|
||||||
( $word =~ /body/ ) or
|
|
||||||
( $word =~ /^you/ ) or
|
|
||||||
( $word =~ /^who/ ) ) {
|
|
||||||
$class = 'per';
|
|
||||||
}
|
|
||||||
# non-personal pronouns
|
|
||||||
if ( $word =~ /thing/ ) {
|
|
||||||
$class = 'nper';
|
|
||||||
}
|
|
||||||
# otherwise case/person info will be '_' (anon variable)
|
|
||||||
# add full spec to @pron array
|
|
||||||
push( @pron, "$pos( \'$word\', $pcode, $infl, $class ).\n" );
|
|
||||||
}
|
|
||||||
# for determiners, leave anon variable as placeholder for semantics
|
|
||||||
elsif( $pcode =~ /^[RS]/ ) {
|
|
||||||
$pos = 'det';
|
|
||||||
$pcode =~ s/^R/def/;
|
|
||||||
$pcode =~ s/^S/indef/;
|
|
||||||
push( @det, "$pos( \'$word\', $pcode, _ ).\n" );
|
|
||||||
}
|
|
||||||
# for prepositions - nothing to say
|
|
||||||
elsif( $pcode =~ s/^T/prep/ ) {
|
|
||||||
$pos = 'prep';
|
|
||||||
push( @prep, "$pos( \'$word\', $pcode ).\n" );
|
|
||||||
}
|
|
||||||
# for conjunctions - nothing to say
|
|
||||||
elsif( $pcode =~ s/^V/conj/ ) {
|
|
||||||
$pos = 'conj';
|
|
||||||
push( @conj, "$pos( \'$word\', $pcode ).\n" );
|
|
||||||
}
|
|
||||||
# for miscellaneous, leave '-' as placeholder for illocutionary info
|
|
||||||
elsif( $pcode =~ /^[UWXZ]/ ) {
|
|
||||||
$pos = 'misc';
|
|
||||||
push( @prefix, "$pos( \'$word\', $pcode, '-' ).\n" ) if ( $pcode =~ s/^U/prefix/ );
|
|
||||||
push( @interj, "$pos( \'$word\', $pcode, '-' ).\n" ) if ( $pcode =~ s/^W/interj/ );
|
|
||||||
push( @partcl, "$pos( \'$word\', $pcode, '-' ).\n" ) if ( $pcode =~ s/^X/partcl/ );
|
|
||||||
push( @unknown, "$pos( \'$word\', $pcode, '-' ).\n" ) if ( $pcode =~ s/^Z/unknown/ );
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
# now have a guess at irregular verb forms (marking the best guess with '*')
|
|
||||||
foreach $verb ( @verb ) {
|
|
||||||
if ( $verb =~ /verb\( \'([^\']+)\', \'IRREG/ ) {
|
|
||||||
$word = $1;
|
|
||||||
$vbz = findbest( $word, @vbz );
|
|
||||||
$vbg = findbest( $word, @vbg );
|
|
||||||
$vbd = findbest( $word, @vbd );
|
|
||||||
$vbn = findbest( $word, @vbn );
|
|
||||||
$verb =~ s/($word\', \')IRREG(\', \')IRREG(\', \')IRREG(\', \')IRREG/\*$1$vbz$2$vbg$3$vbd$4$vbn/;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
# now print everything out (so we can group PoSs together)
|
|
||||||
print @verb, "\n", @noun, "\n", @adj, "\n", @adv;
|
|
||||||
print "\n", @pron, "\n", @det, "\n", @prep, "\n", @conj;
|
|
||||||
print "\n", @prefix, "\n", @interj, "\n", @partcl, "\n", @unknown;
|
|
||||||
|
|
||||||
|
|
||||||
# find closest string match
|
|
||||||
# similarity measure is just the length of identical prefix
|
|
||||||
# prefer shorter strings in the case of equal similarity
|
|
||||||
sub findbest
|
|
||||||
{
|
|
||||||
my ( $word, @array ) = @_;
|
|
||||||
|
|
||||||
$bestlen = 0;
|
|
||||||
foreach $test ( @array ) {
|
|
||||||
if ( ( substr( $word, 0, $bestlen-1 ) eq substr( $test, 0, $bestlen-1 ) ) &&
|
|
||||||
( length( $test ) < length( $best ) ) ) {
|
|
||||||
$best = $test;
|
|
||||||
}
|
|
||||||
while ( ( substr( $word, 0, $bestlen ) eq substr( $test, 0, $bestlen ) ) &&
|
|
||||||
( $bestlen <= length( $test ) ) ) {
|
|
||||||
$bestlen++;
|
|
||||||
$best = $test;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return $best;
|
|
||||||
}
|
|
||||||
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
@@ -1,39 +0,0 @@
|
|||||||
youPl_Pron 0.04
|
|
||||||
youPol_Pron 0.04
|
|
||||||
|
|
||||||
UttS 0.6
|
|
||||||
UttQS 0.2
|
|
||||||
UttImp 0.1
|
|
||||||
|
|
||||||
NoPConj 0.8
|
|
||||||
NoVoc 0.98
|
|
||||||
|
|
||||||
PredVP 0.9
|
|
||||||
|
|
||||||
DetCN 0.8
|
|
||||||
UsePron 0.1
|
|
||||||
something_NP 0.01
|
|
||||||
somebody_NP 0.01
|
|
||||||
everything_NP 0.01
|
|
||||||
everybody_NP 0.01
|
|
||||||
|
|
||||||
SlashV2 0.8
|
|
||||||
|
|
||||||
UseV 0.4
|
|
||||||
ComplSlash 0.4
|
|
||||||
ComplVQ 0.02
|
|
||||||
ComplVS 0.02
|
|
||||||
ComplVA 0.02
|
|
||||||
|
|
||||||
DetQuant 0.8
|
|
||||||
|
|
||||||
ASimul 0.8
|
|
||||||
TFut 0.1
|
|
||||||
TCond 0.1
|
|
||||||
PPos 0.7
|
|
||||||
|
|
||||||
ApposCN 0.01
|
|
||||||
|
|
||||||
ExistNP 0.0001
|
|
||||||
UseCopula 0.01
|
|
||||||
ConjS 0.1
|
|
||||||
Reference in New Issue
Block a user