next-lib renamed to lib, lib to old-lib

2009-06-22 15:39:08 +00:00
parent 90bd07b1cf
commit 2116f41bc2
1433 changed files with 8 additions and 8 deletions
--- a/lib/src/parse/BigLexEng.gf
+++ b/lib/src/parse/BigLexEng.gf
--- a/lib/src/parse/BigLexEngAbs.gf
+++ b/lib/src/parse/BigLexEngAbs.gf
--- a/lib/src/parse/Parse.gf
+++ b/lib/src/parse/Parse.gf
@@ -0,0 +1,14 @@
+abstract Parse = 
+  Noun,
+  Verb, 
+  Adjective,
+  Adverb,
+  Numeral,
+  Sentence, 
+  Question,
+  Relative,
+  Conjunction,
+  Phrase,
+  Text,
+  Structural,
+  Idiom ;
--- a/lib/src/parse/ParseEng.gf
+++ b/lib/src/parse/ParseEng.gf
@@ -0,0 +1,161 @@
+--# -path=.:oald:alltenses
+
+concrete ParseEng of ParseEngAbs = 
+  NounEng, 
+  VerbEng - [ComplVS], 
+  AdjectiveEng,
+  AdverbEng,
+  NumeralEng,
+  SentenceEng - [UseCl, UseQCl, UseRCl],
+  QuestionEng,
+  RelativeEng - [IdRP, RelSlash],
+  ConjunctionEng,
+  PhraseEng - [UttImpSg, UttImpPl],
+  TextX,
+  StructuralEng - [everybody_NP, every_Det, only_Predet, somebody_NP],
+  IdiomEng,
+
+  ExtraEng - [
+   UncNegCl, UncNegQCl, UncNegRCl, UncNegImpSg, UncNegImpPl,
+   StrandRelSlash,
+   that_RP
+  ],
+
+  LexiconEng [N3, distance_N3, 
+              A2, married_A2,
+	      VQ, wonder_VQ, 
+	      V2A, paint_V2A, 
+	      V2Q, ask_V2Q,
+	      V2V, beg_V2V,
+	      V2S, answer_V2S,
+	      VA, become_VA],
+  OaldEng - [everywhere_Adv, here_Adv, quite_Adv, somewhere_Adv, there_Adv]
+
+  ** open ParadigmsEng, ResEng, MorphoEng, NounEng, ParamX, Prelude in {
+
+flags startcat = Phr ; unlexer = text ; lexer = text ;
+
+--
+-- * Overridden things from the common API
+--
+
+-- Allow both "hope that he runs" and "hope he runs".
+lin ComplVS v s = variants { VerbEng.ComplVS v s; ComplBareVS v s } ;
+
+-- Allow both contracted and uncontracted negated clauses.
+lin UseCl t p cl = 
+      case p.p of {
+	Pos => SentenceEng.UseCl t p cl;
+	Neg => variants { SentenceEng.UseCl t p cl; UncNegCl t p cl }
+      } ;
+
+lin UseQCl t p cl = 
+      case p.p of {
+	Pos => SentenceEng.UseQCl t p cl;
+	Neg => variants { SentenceEng.UseQCl t p cl; UncNegQCl t p cl }
+      } ;
+
+lin UseRCl t p cl = 
+      case p.p of {
+	Pos => SentenceEng.UseRCl t p cl;
+	Neg => variants { SentenceEng.UseRCl t p cl; UncNegRCl t p cl }
+      } ;
+
+lin UttImpSg p i = 
+      case p.p of {
+	Pos => PhraseEng.UttImpSg p i;
+	Neg => variants { PhraseEng.UttImpSg p i ; UncNegImpSg p i }
+      } ;
+
+lin UttImpPl p i = 
+      case p.p of {
+	Pos => PhraseEng.UttImpPl p i;
+	Neg => variants { PhraseEng.UttImpPl p i ; UncNegImpPl p i }
+      } ;
+
+-- Two different forms of relative clauses:
+-- Pied piping: "at which we are looking". 
+-- Stranding: "that he looks at"
+-- EmptyRelSlash is not used here, since it would give 
+-- a meta-variable for the RP.
+
+lin RelSlash rp slash = variants { RelativeEng.RelSlash rp slash; StrandRelSlash rp slash } ;
+
+
+-- Allow both "who"/"which" and "that"
+lin IdRP = 
+     { s = table {
+        RC _ Gen    => "whose" ; 
+        RC Neutr _  => variants { "which"; "that"; {- for dictionary entries with the wrong gender -} "who" } ;
+        RC _ Acc    => variants { "whom"; "that"; {- incorrect but common -} "who" } ;
+        RC _ Nom    => variants { "who" ; "that" } ;
+        RPrep _     => variants { "which"; "whom"; {- incorrect but common -}  "who" }
+        } ;
+      a = RNoAg
+      } ;
+
+lin everybody_NP = variants { regNP "everybody" singular; regNP "everyone" singular } ;
+lin somebody_NP = variants { regNP "somebody" singular; regNP "someone" singular } ;
+
+lin every_Det = variants { mkDeterminer singular "every"; mkDeterminer singular "each" };
+
+lin only_Predet = variants { ss "only"; ss "just" };
+
+
+--
+-- English-specific additions
+--
+
+-- Syntactic additions
+
+lin
+    VerbCN v cn = {s = \\n,c => v.s ! VPresPart ++ cn.s ! n ! c; g = cn.g};
+
+    NumOfNP num np = {
+      s = \\c => num.s ! Nom ++ "of" ++ np.s ! c ; 
+      a = agrP3 num.n
+      } ;
+
+    CAdvNP ad cn np = {
+      s = \\c => ad.s ++ cn.s ! Sg ! c ++ ad.p ++ np.s ! Nom ; 
+      a = agrP3 Sg
+      } ;
+
+    CAdvSSlash ad cn slash = {
+      s = \\c => ad.s ++ cn.s ! Sg ! c ++ ad.p ++ slash.s ++ slash.c2;
+      a = agrP3 Sg
+      } ;
+
+    CompCN cn = { s = \\a => let n = (fromAgr a).n
+		              in IndefArt.s ! False ! n ++ cn.s ! n ! Acc} ;
+
+-- Lexical additions
+
+lin
+    a8few_Det = mkDeterminer plural ["a few"];
+    another_Predet = ss "another" ;
+    any_Predet = ss "any" ;
+    anybody_NP = variants { regNP "anybody" singular; regNP "anyone" singular };
+    anything_NP = regNP "anything" singular;
+    both_Det = mkDeterminer plural "both";
+    either_Det = mkDeterminer singular "either" ;
+    exactly_AdN = ss "exactly" ;
+    most_Det = mkDeterminer plural "most";
+    neither_Det = mkDeterminer singular "neither" ;
+    only_AdV = mkAdV "only" ;
+    should_VV = {
+      s = table {
+	VVF VInf => ["ought to"] ;
+	VVF VPres => "should" ;
+	VVF VPPart => ["ought to"] ;
+	VVF VPresPart => variants {} ; -- FIXME: "shoulding" ?
+	VVF VPast => ["should have"] ;
+	VVPastNeg => ["shouldn't have"] ;
+	VVPresNeg => "shouldn't"
+	} ;
+      isAux = True
+    } ;
+    several_Det = mkDeterminer plural "several" ;
+
+
+} ;
--- a/lib/src/parse/ParseEngAbs.gf
+++ b/lib/src/parse/ParseEngAbs.gf
@@ -0,0 +1,48 @@
+abstract ParseEngAbs = 
+  Parse, 
+  ExtraEngAbs - [
+   UncNegCl, UncNegQCl, UncNegRCl, UncNegImpSg, UncNegImpPl,
+   StrandRelSlash,
+   that_RP
+  ],
+
+  Lexicon [N3, distance_N3, 
+	   A2, married_A2,
+	      VQ, wonder_VQ, 
+	      V2A, paint_V2A, 
+	      V2Q, ask_V2Q,
+	      V2V, beg_V2V,
+	      V2S, answer_V2S,
+	      VA, become_VA],
+  Oald - [everywhere_Adv, here_Adv, quite_Adv, somewhere_Adv, there_Adv]
+
+  ** {
+
+-- Syntactic additions
+
+fun VerbCN : V -> CN -> CN ; -- running man
+
+fun NumOfNP : Num -> NP -> NP ; -- ten of the dogs
+
+fun CAdvNP  : CAdv -> CN -> NP -> NP ; -- more wine than the professor
+fun CAdvSSlash  : CAdv -> CN -> SSlash -> NP ; -- more wine than the professor drank
+
+fun CompCN : CN -> Comp ; -- "(every man is) a dog", "(all men are) dogs"
+
+-- Lexical additions
+
+fun a8few_Det : Det ;
+fun another_Predet : Predet ;
+fun any_Predet : Predet ;
+fun anybody_NP : NP ;
+fun anything_NP : NP ;
+fun both_Det : Det ;
+fun either_Det : Det ;
+fun exactly_AdN : AdN ;
+fun most_Det : Det ;
+fun neither_Det : Det ;
+fun only_AdV : AdV ;
+fun should_VV : VV ;
+fun several_Det : Det ;
+
+}
--- a/lib/src/parse/oald/OaldStructural.gf
+++ b/lib/src/parse/oald/OaldStructural.gf
@@ -0,0 +1,184 @@
+-- English lexicon for GF, produced from:
+-- Oxford advanced learner's dictionary of current English:
+--    expanded 'computer usable' version compiled by Roger Mitton
+-- The computer usable version is transcribed from:
+--   Oxford advanced learner's dictionary of current English
+--   A.S. Hornby ; with the assistance of A.P. Cowie [and] J. Windsor Lewis.
+--   3rd. ed., London : Oxford University Press, 1974.
+-- Distributed as 'dict0710' by:
+--   Oxford Text Archive
+--   Oxford University Computing Services
+--   13 Banbury Road
+--   Oxford
+--   OX2 6NN
+-- Under these conditions:
+--   Freely available for non-commercial use provided that this header is
+--   included in its entirety with any copy distributed.
+--
+-- GF version generated by asc2gf, Bjorn Bringert Nov 2008
+--    based on asc2lex, Matthew Purver Nov 2001
+--    http://www.stanford.edu/~mpurver/software.html
+
+abstract OaldStructural = Cat ** {
+fun abaft_Prep : Prep;
+fun aboard_Prep : Prep;
+fun about_Prep : Prep;
+fun above_Prep : Prep;
+fun according_as_Conj : Conj;
+fun according_to_Prep : Prep;
+fun across_Prep : Prep;
+fun afore_Prep : Prep;
+fun after_Conj : Conj;
+fun after_Prep : Prep;
+fun against_Prep : Prep;
+fun agin_Prep : Prep;
+fun albeit_Conj : Conj;
+fun along_Prep : Prep;
+fun alongside_Prep : Prep;
+fun although_Conj : Conj;
+fun amid_Prep : Prep;
+fun amidst_Prep : Prep;
+fun among_Prep : Prep;
+fun amongst_Prep : Prep;
+fun an_Conj : Conj;
+fun and_Conj : Conj;
+fun anent_Prep : Prep;
+fun around_Prep : Prep;
+fun as_Conj : Conj;
+fun aslant_Prep : Prep;
+fun astride_Prep : Prep;
+fun at_Prep : Prep;
+fun athwart_Prep : Prep;
+fun bar_Prep : Prep;
+fun barring_Prep : Prep;
+fun because_Conj : Conj;
+fun before_Conj : Conj;
+fun before_Prep : Prep;
+fun behind_Prep : Prep;
+fun below_Prep : Prep;
+fun beneath_Prep : Prep;
+fun beside_Prep : Prep;
+fun besides_Prep : Prep;
+fun between_Prep : Prep;
+fun betwixt_Prep : Prep;
+fun beyond_Prep : Prep;
+fun but_Conj : Conj;
+fun but_Prep : Prep;
+fun by_Prep : Prep;
+fun circa_Prep : Prep;
+fun concerning_Prep : Prep;
+fun considering_Prep : Prep;
+fun cos_Conj : Conj;
+fun despite_Prep : Prep;
+fun directly_Conj : Conj;
+fun down_Prep : Prep;
+fun during_Prep : Prep;
+fun either_Conj : Conj;
+fun ere_Prep : Prep;
+fun except_Conj : Conj;
+fun except_Prep : Prep;
+fun excepting_Prep : Prep;
+fun failing_Prep : Prep;
+fun for_Conj : Conj;
+fun for_Prep : Prep;
+fun forasmuch_as_Conj : Conj;
+fun from_Prep : Prep;
+fun howbeit_Conj : Conj;
+fun if_Conj : Conj;
+fun immediately_Conj : Conj;
+fun in_Prep : Prep;
+fun inside_Prep : Prep;
+fun instantly_Conj : Conj;
+fun into_Prep : Prep;
+fun less_Prep : Prep;
+fun lest_Conj : Conj;
+fun like_Conj : Conj;
+fun like_Prep : Prep;
+fun likewise_Conj : Conj;
+fun mid_Prep : Prep;
+fun midst_Prep : Prep;
+fun minus_Prep : Prep;
+fun near_Prep : Prep;
+fun neath_Prep : Prep;
+fun neither_Conj : Conj;
+fun nevertheless_Conj : Conj;
+fun next_Prep : Prep;
+fun nigh_Prep : Prep;
+fun nigher_Prep : Prep;
+fun nighest_Prep : Prep;
+fun nisi_Conj : Conj;
+fun nor_Conj : Conj;
+fun notwithstanding_Conj : Conj;
+fun notwithstanding_Prep : Prep;
+fun now_Conj : Conj;
+fun o'er_Prep : Prep;
+fun of_Prep : Prep;
+fun off_Prep : Prep;
+fun on_Prep : Prep;
+fun on_to_Prep : Prep;
+fun only_Conj : Conj;
+fun onto_Prep : Prep;
+fun or_Conj : Conj;
+fun otherwise_Conj : Conj;
+fun outside_Prep : Prep;
+fun over_Prep : Prep;
+fun past_Prep : Prep;
+fun pending_Prep : Prep;
+fun per_Prep : Prep;
+fun plus_Prep : Prep;
+fun provided_Conj : Conj;
+fun providing_Conj : Conj;
+fun qua_Conj : Conj;
+fun qua_Prep : Prep;
+fun re_Prep : Prep;
+fun respecting_Prep : Prep;
+fun round_Prep : Prep;
+fun sans_Prep : Prep;
+fun save_Prep : Prep;
+fun saving_Prep : Prep;
+fun since_Conj : Conj;
+fun since_Prep : Prep;
+fun so_Conj : Conj;
+fun supposing_Conj : Conj;
+fun than_Conj : Conj;
+fun that_Conj : Conj;
+fun tho'_Conj : Conj;
+fun though_Conj : Conj;
+fun thro'_Prep : Prep;
+fun through_Prep : Prep;
+fun throughout_Prep : Prep;
+fun thru_Prep : Prep;
+fun till_Conj : Conj;
+fun till_Prep : Prep;
+fun to_Prep : Prep;
+fun touching_Prep : Prep;
+fun toward_Prep : Prep;
+fun towards_Prep : Prep;
+fun tween_Prep : Prep;
+fun twixt_Prep : Prep;
+fun under_Prep : Prep;
+fun underneath_Prep : Prep;
+fun unless_Conj : Conj;
+fun unlike_Prep : Prep;
+fun until_Conj : Conj;
+fun until_Prep : Prep;
+fun unto_Prep : Prep;
+fun up_Prep : Prep;
+fun upon_Prep : Prep;
+fun versus_Prep : Prep;
+fun via_Prep : Prep;
+fun vice_Prep : Prep;
+fun vis_à_vis_Prep : Prep;
+fun wanting_Prep : Prep;
+fun when_Conj : Conj;
+fun whencesoever_Conj : Conj;
+fun whenever_Conj : Conj;
+fun whereas_Conj : Conj;
+fun whether_Conj : Conj;
+fun while_Conj : Conj;
+fun whilst_Conj : Conj;
+fun with_Prep : Prep;
+fun within_Prep : Prep;
+fun without_Prep : Prep;
+fun yet_Conj : Conj;
+}
--- a/lib/src/parse/oald/OaldStructuralEng.gf
+++ b/lib/src/parse/oald/OaldStructuralEng.gf
@@ -0,0 +1,185 @@
+-- English lexicon for GF, produced from:
+-- Oxford advanced learner's dictionary of current English:
+--    expanded 'computer usable' version compiled by Roger Mitton
+-- The computer usable version is transcribed from:
+--   Oxford advanced learner's dictionary of current English
+--   A.S. Hornby ; with the assistance of A.P. Cowie [and] J. Windsor Lewis.
+--   3rd. ed., London : Oxford University Press, 1974.
+-- Distributed as 'dict0710' by:
+--   Oxford Text Archive
+--   Oxford University Computing Services
+--   13 Banbury Road
+--   Oxford
+--   OX2 6NN
+-- Under these conditions:
+--   Freely available for non-commercial use provided that this header is
+--   included in its entirety with any copy distributed.
+--
+-- GF version generated by asc2gf, Bjorn Bringert Nov 2008
+--    based on asc2lex, Matthew Purver Nov 2001
+--    http://www.stanford.edu/~mpurver/software.html
+
+--# -path=.:alltenses
+concrete OaldStructuralEng of OaldStructural = CatEng ** open ParadigmsEng in {
+lin abaft_Prep = mkPrep "abaft";
+lin aboard_Prep = mkPrep "aboard";
+lin about_Prep = mkPrep "about";
+lin above_Prep = mkPrep "above";
+lin according_as_Conj = mkConj "according as";
+lin according_to_Prep = mkPrep "according to";
+lin across_Prep = mkPrep "across";
+lin afore_Prep = mkPrep "afore";
+lin after_Conj = mkConj "after";
+lin after_Prep = mkPrep "after";
+lin against_Prep = mkPrep "against";
+lin agin_Prep = mkPrep "agin";
+lin albeit_Conj = mkConj "albeit";
+lin along_Prep = mkPrep "along";
+lin alongside_Prep = mkPrep "alongside";
+lin although_Conj = mkConj "although";
+lin amid_Prep = mkPrep "amid";
+lin amidst_Prep = mkPrep "amidst";
+lin among_Prep = mkPrep "among";
+lin amongst_Prep = mkPrep "amongst";
+lin an_Conj = mkConj "an";
+lin and_Conj = mkConj "and";
+lin anent_Prep = mkPrep "anent";
+lin around_Prep = mkPrep "around";
+lin as_Conj = mkConj "as";
+lin aslant_Prep = mkPrep "aslant";
+lin astride_Prep = mkPrep "astride";
+lin at_Prep = mkPrep "at";
+lin athwart_Prep = mkPrep "athwart";
+lin bar_Prep = mkPrep "bar";
+lin barring_Prep = mkPrep "barring";
+lin because_Conj = mkConj "because";
+lin before_Conj = mkConj "before";
+lin before_Prep = mkPrep "before";
+lin behind_Prep = mkPrep "behind";
+lin below_Prep = mkPrep "below";
+lin beneath_Prep = mkPrep "beneath";
+lin beside_Prep = mkPrep "beside";
+lin besides_Prep = mkPrep "besides";
+lin between_Prep = mkPrep "between";
+lin betwixt_Prep = mkPrep "betwixt";
+lin beyond_Prep = mkPrep "beyond";
+lin but_Conj = mkConj "but";
+lin but_Prep = mkPrep "but";
+lin by_Prep = mkPrep "by";
+lin circa_Prep = mkPrep "circa";
+lin concerning_Prep = mkPrep "concerning";
+lin considering_Prep = mkPrep "considering";
+lin cos_Conj = mkConj "cos";
+lin despite_Prep = mkPrep "despite";
+lin directly_Conj = mkConj "directly";
+lin down_Prep = mkPrep "down";
+lin during_Prep = mkPrep "during";
+lin either_Conj = mkConj "either";
+lin ere_Prep = mkPrep "ere";
+lin except_Conj = mkConj "except";
+lin except_Prep = mkPrep "except";
+lin excepting_Prep = mkPrep "excepting";
+lin failing_Prep = mkPrep "failing";
+lin for_Conj = mkConj "for";
+lin for_Prep = mkPrep "for";
+lin forasmuch_as_Conj = mkConj "forasmuch as";
+lin from_Prep = mkPrep "from";
+lin howbeit_Conj = mkConj "howbeit";
+lin if_Conj = mkConj "if";
+lin immediately_Conj = mkConj "immediately";
+lin in_Prep = mkPrep "in";
+lin inside_Prep = mkPrep "inside";
+lin instantly_Conj = mkConj "instantly";
+lin into_Prep = mkPrep "into";
+lin less_Prep = mkPrep "less";
+lin lest_Conj = mkConj "lest";
+lin like_Conj = mkConj "like";
+lin like_Prep = mkPrep "like";
+lin likewise_Conj = mkConj "likewise";
+lin mid_Prep = mkPrep "mid";
+lin midst_Prep = mkPrep "midst";
+lin minus_Prep = mkPrep "minus";
+lin near_Prep = mkPrep "near";
+lin neath_Prep = mkPrep "'neath";
+lin neither_Conj = mkConj "neither";
+lin nevertheless_Conj = mkConj "nevertheless";
+lin next_Prep = mkPrep "next";
+lin nigh_Prep = mkPrep "nigh";
+lin nigher_Prep = mkPrep "nigher";
+lin nighest_Prep = mkPrep "nighest";
+lin nisi_Conj = mkConj "nisi";
+lin nor_Conj = mkConj "nor";
+lin notwithstanding_Conj = mkConj "notwithstanding";
+lin notwithstanding_Prep = mkPrep "notwithstanding";
+lin now_Conj = mkConj "now";
+lin o'er_Prep = mkPrep "o'er";
+lin of_Prep = mkPrep "of";
+lin off_Prep = mkPrep "off";
+lin on_Prep = mkPrep "on";
+lin on_to_Prep = mkPrep "on to";
+lin only_Conj = mkConj "only";
+lin onto_Prep = mkPrep "onto";
+lin or_Conj = mkConj "or";
+lin otherwise_Conj = mkConj "otherwise";
+lin outside_Prep = mkPrep "outside";
+lin over_Prep = mkPrep "over";
+lin past_Prep = mkPrep "past";
+lin pending_Prep = mkPrep "pending";
+lin per_Prep = mkPrep "per";
+lin plus_Prep = mkPrep "plus";
+lin provided_Conj = mkConj "provided";
+lin providing_Conj = mkConj "providing";
+lin qua_Conj = mkConj "qua";
+lin qua_Prep = mkPrep "qua";
+lin re_Prep = mkPrep "re";
+lin respecting_Prep = mkPrep "respecting";
+lin round_Prep = mkPrep "round";
+lin sans_Prep = mkPrep "sans";
+lin save_Prep = mkPrep "save";
+lin saving_Prep = mkPrep "saving";
+lin since_Conj = mkConj "since";
+lin since_Prep = mkPrep "since";
+lin so_Conj = mkConj "so";
+lin supposing_Conj = mkConj "supposing";
+lin than_Conj = mkConj "than";
+lin that_Conj = mkConj "that";
+lin tho'_Conj = mkConj "tho'";
+lin though_Conj = mkConj "though";
+lin thro'_Prep = mkPrep "thro'";
+lin through_Prep = mkPrep "through";
+lin throughout_Prep = mkPrep "throughout";
+lin thru_Prep = mkPrep "thru";
+lin till_Conj = mkConj "till";
+lin till_Prep = mkPrep "till";
+lin to_Prep = mkPrep "to";
+lin touching_Prep = mkPrep "touching";
+lin toward_Prep = mkPrep "toward";
+lin towards_Prep = mkPrep "towards";
+lin tween_Prep = mkPrep "'tween";
+lin twixt_Prep = mkPrep "'twixt";
+lin under_Prep = mkPrep "under";
+lin underneath_Prep = mkPrep "underneath";
+lin unless_Conj = mkConj "unless";
+lin unlike_Prep = mkPrep "unlike";
+lin until_Conj = mkConj "until";
+lin until_Prep = mkPrep "until";
+lin unto_Prep = mkPrep "unto";
+lin up_Prep = mkPrep "up";
+lin upon_Prep = mkPrep "upon";
+lin versus_Prep = mkPrep "versus";
+lin via_Prep = mkPrep "via";
+lin vice_Prep = mkPrep "vice";
+lin vis_à_vis_Prep = mkPrep "vis-à-vis";
+lin wanting_Prep = mkPrep "wanting";
+lin when_Conj = mkConj "when";
+lin whencesoever_Conj = mkConj "whencesoever";
+lin whenever_Conj = mkConj "whenever";
+lin whereas_Conj = mkConj "whereas";
+lin whether_Conj = mkConj "whether";
+lin while_Conj = mkConj "while";
+lin whilst_Conj = mkConj "whilst";
+lin with_Prep = mkPrep "with";
+lin within_Prep = mkPrep "within";
+lin without_Prep = mkPrep "without";
+lin yet_Conj = mkConj "yet";
+}
--- a/lib/src/parse/oald/asc2gf
+++ b/lib/src/parse/oald/asc2gf
@@ -0,0 +1,453 @@
+#! /usr/bin/perl -w
+#
+# Perl script to process OALD machine-readable ASCII file
+# into a GF lexicon
+#
+# Usage: ./asc2gf < ascii_0710-1.txt
+#
+# Bjorn Bringert 2008,
+# based on asc2lex by
+# Matthew Purver, 11/2001
+
+use strict;
+
+my %irregular_verbs = ();
+my %words = ();
+
+my $irreg_eng = "../../english/IrregEng.gf";
+
+open(IRREG_ENG,"$irreg_eng") or die "Could not open $irreg_eng\n";
+while (<IRREG_ENG>) {
+  if (s/\s*([a-z\d]+)_V\s*=.*/$1/) {
+    chomp; 
+    $irregular_verbs{$_} = 1;
+  }
+}
+close IRREG_ENG;
+
+print "Known irregular verbs from $irreg_eng:\n";
+print join(",", keys %irregular_verbs) . "\n";
+
+
+# skip header section
+while ( <STDIN> ) {
+    last if /<\/TEIHEADER>/;
+}
+
+# read a line from stdin
+while ( my $line = <STDIN> ) {
+
+    # remove SGML tags
+    $line =~ s/<[^<>]+>//g;
+
+    # split line into fields according to spec (line may be empty now)
+    if ( $line =~ /^(.{23}).{23}(.{23}).{1}(.{58})$/ ) {
+
+        my ( $word, $pos, $cat ) = ( $1, $2, $3 );
+
+  	# trim white space
+	for ( ( $word, $pos, $cat ) ) {
+	    s/\s*$//;
+	}
+
+	# make word lower-case
+	$word =~ tr/A-Z/a-z/;   # lower case
+
+	# translate OALD diacritics
+	$word =~ s/~n/ñ/g;
+	$word =~ s/<c/ç/g;
+	$word =~ s/"a/ä/g;
+	$word =~ s/"o/ö/g;
+	$word =~ s/"u/ü/g;
+	$word =~ s/"i/ï/g;
+	$word =~ s/\^a/â/g;
+	$word =~ s/\^e/ê/g;
+	$word =~ s/\^o/ô/g;
+	$word =~ s/`a/à/g;
+	$word =~ s/`e/è/g;
+	$word =~ s/_e/é/g;
+
+	# make legal identifier
+	# Note: in theory this could cause clashes, but I don't think it does
+	# with the OALD.
+	my $name = $word;
+	$name =~ s/ /_/g;   # space -> _
+	$name =~ s/-/_/g;   # - -> _
+	$name =~ s/\./_/g;  # . -> _
+	$name =~ s/^'//;    # drop initial '
+
+
+	# get PoS & subcat info
+	my @pos = split( /,/, $pos );
+	$cat =~ s/,/\',\'/g;
+	( $cat = "\'$cat\'" ) unless ( $cat eq '' );
+
+	foreach ( @pos ) {
+	    my ( $pcode, $infl, $freq )=split(//);
+
+	    # for verbs, get inflected forms
+	    if ( $pcode =~ /^[GHIJ]/ ) {
+		$pos = 'verb';
+		my ($vbz, $vbg, $vbd);
+
+		# if this is a root form, work out the inflected forms
+		if ( $infl =~ /^\d/ ) {
+		    if ( $infl == 0 ) {
+			( $vbz = $word ) =~ s/$/s/;
+			( $vbg = $word ) =~ s/$/ing/;
+			( $vbd = $word ) =~ s/$/ed/;
+		    }
+		    elsif ( $infl == 1 ) {
+			( $vbz = $word ) =~ s/$/es/;
+			( $vbg = $word ) =~ s/$/ing/;
+			( $vbd = $word ) =~ s/$/ed/;
+		    }
+		    elsif ( $infl == 2 ) {
+			( $vbz = $word ) =~ s/e$/es/;
+			( $vbg = $word ) =~ s/e$/ing/;
+			( $vbd = $word ) =~ s/e$/ed/;
+		    }
+		    elsif ( $infl == 3 ) {
+			( $vbz = $word ) =~ s/y$/ies/;
+			( $vbg = $word ) =~ s/y$/ying/;
+			( $vbd = $word ) =~ s/y$/ied/;
+		    }
+		    elsif ( $infl == 4 ) {
+			( $vbz = $word ) =~ s/$/s/;
+			( $vbg = $word ) =~ s/(\w)$/$1$1ing/;
+			( $vbd = $word ) =~ s/(\w)$/$1$1ed/;
+		    }
+		    elsif ( $infl == 5 ) {
+			# for irregulars, just mark as such for now, we'll guess later
+			$vbz = 'IRREG';
+			$vbg = 'IRREG';
+			$vbd = 'IRREG';
+		    }
+
+		    my $lin = "mkV \"$word\" \"$vbz\" \"$vbd\" \"$vbd\" \"$vbg\"";		    
+
+		    # try to use a verb from IrregEng
+		    if ( $infl == 5 ) {
+		      for (my $i = 0; $i < length($word) - 1; $i++) {
+			my $suffix = substr($word, $i);
+			if ($irregular_verbs{$suffix}) {
+			  if ($i == 0) {
+			    $lin = "IrregEng.${name}_V";
+			  } else {
+			    my $prefix = substr($word, 0, $i);
+			    $lin = "mkV \"$prefix\" IrregEng.${suffix}_V";
+			  }
+			  last;
+			}
+		      }
+		    }
+
+		    if ($pcode eq 'G') {
+		      #add_word("${name}_VX", "mkVX ($lin)");
+		      print STDERR "Ignoring anomalous verb: $name\n";
+		    } 
+		    if ($pcode eq 'I' || $pcode eq 'J') {
+		      add_word("${name}_V", "$lin");
+		    }
+		    if ($pcode eq 'H' || $pcode eq 'J') {
+		      add_word("${name}_V2", "mkV2 ($lin)");
+		    }
+		}
+		# if this is an inflected form, save for guessing irregulars later
+		elsif ( $infl =~ /^a/ ) {
+		    #push( @vbz, $word );
+		}
+		elsif ( $infl =~ /^b/ ) {
+		    #push( @vbg, $word );
+		}
+		elsif ( $infl =~ /^c/ ) {
+		    #push( @vbd, $word );
+		}
+		elsif ( $infl =~ /^d/ ) {
+		    #push( @vbn, $word );
+		}
+	    }
+	    # for nouns, get plural form
+	    elsif( $pcode =~ /^[KLMNY]/ ) {
+		$pos = 'noun';
+		$pcode =~ s/^K/count/;
+		$pcode =~ s/^L/mass/;
+		$pcode =~ s/^M/both/;
+		$pcode =~ s/^N/proper/;
+		if ( $pcode =~ /^Y/ ) {
+		    $pcode = 'count' if $infl =~ /^[>\)\]]/;
+		    $pcode = 'mass' if $infl =~ /^\}/;
+		    $pcode = 'proper' if $infl =~ /^[:=~]/;
+		}
+		# if this is a singular form, work out plural form
+		unless ( $infl =~ /^j/ ) {
+		    my $pl = '-';
+		    if ( $infl eq '6' ) {
+			( $pl = $word ) =~ s/$/s/;
+		    }
+		    elsif ( $infl eq '7' ) {
+			( $pl = $word ) =~ s/$/es/;
+		    }
+		    elsif ( $infl eq '8' ) {
+			( $pl = $word ) =~ s/y$/ies/;
+		    }
+		    elsif ( $infl =~ /^[9k\]]/ ) {
+			$pl = $word;
+		    }
+		    elsif ( $infl =~ /^i/ ) {
+			# for irregulars, let's just make a guess and mark with '*'
+			# this could be done better, as for verbs, but I can't be bothered now
+			$pl = $word;
+  			( $pl =~ s/^((wo)?m)an/$1en\*/ ) or
+  			    ( $pl =~ s/man(-|$)/men$1\*/ ) or
+  			      ( $pl =~ s/-in-law/s-in-law\*/ ) or
+  			      ( $pl =~ s/um$/a\*/ ) or
+  			      ( $pl =~ s/us$/i\*/ ) or
+  			      ( $pl =~ s/a$/ae\*/ ) or
+  			      ( $pl =~ s/on$/a\*/ ) or
+  			      ( $pl =~ s/is$/es\*/ ) or
+  			      ( $pl =~ s/o$/i\*/ ) or
+  			      ( $pl =~ s/child$/children\*/ ) or
+  			      ( $pl =~ s/oot$/eet\*/ ) or
+  			      ( $pl =~ s/ooth$/eeth\*/ ) or
+  			      ( $pl =~ s/([lm])ouse$/$1ice\*/ ) or
+  			      ( $pl =~ s/f(e)?$/ves\*/ ) or
+  			      ( $pl =~ s/[ei]x$/ices\*/ ) or
+  			      ( $pl =~ s/eau$/eaux\*/ ) or
+  			      ( $pl = 'IRREG' );
+		    }
+		    # if plural-only, swap root form & plural
+		    elsif ( $infl =~ /^\)/ ) {
+			$pl = $word;
+			$word = '-';
+		    }
+		    ( $infl =~ s/^[:l]/per/ ) or ( $infl =~ s/^[mn]/loc/ ) or ( $infl = '_' );
+
+		    my $comment = "";
+		    if ( $word eq '-' ) {
+		      $comment .= " {- FIXME: no singular form -}";
+		    } 
+		    if ( $pl eq '-' ) {
+		      $comment .= " {- FIXME: no plural form -}";
+		    }
+		    if ( $pl =~ s/\*$// ) {
+		      $comment .= " {- FIXME: guessed plural form -}";
+		    }
+
+		    if ( $pcode eq 'proper' ) {
+		      add_word("${name}_PN", "mkPN \"$word\"");
+		    } else {
+		      add_word("${name}_N", "mkN \"$word\" \"$pl\"$comment");
+		    }
+		}
+	    }
+	    # for adjectives, get comparative & superlative forms
+	    elsif( $pcode =~ /^O/ ) {
+		$pos = 'adj';
+		# if this is root form, work out inflected forms
+		unless ( $infl =~ /^[rs]/ ) {
+		    my ($comp, $sup);
+		    if ( $infl =~ /^[Apqt]/ ) {
+			$comp = $sup = '-';
+		    }
+		    elsif ( $infl =~ /^B/ ) {
+			( $comp = $word ) =~ s/$/r/;
+			( $sup = $word ) =~ s/$/st/;
+		    }
+		    elsif ( $infl =~ /^C/ ) {
+			( $comp = $word ) =~ s/$/er/;
+			( $sup = $word ) =~ s/$/est/;
+		    }
+		    elsif ( $infl =~ /^D/ ) {
+			( $comp = $word ) =~ s/y$/ier/;
+			( $sup = $word ) =~ s/y$/iest/;
+		    }
+		    elsif ( $infl =~ /^E/ ) {
+			# for irregulars, let's just have a guess and mark with '*'
+			# (there aren't very many of these)
+			( $comp = $word ) =~ s/(\w)$/$1$1er\*/;
+			( $sup = $word ) =~ s/(\w)$/$1$1est\*/;
+		    }
+		    $infl =~ s/^[ABCDE]/normal/;
+		    $infl =~ s/^p/pred/;
+		    $infl =~ s/^q/attr/;
+		    $infl =~ s/^t/affix/;
+
+		    if ( $comp eq '-' ) {
+		      add_word("${name}_A", "compoundA (mkA \"$word\")");
+		    } else {
+		      add_word("${name}_A", "mkA \"$word\" \"$comp\"");
+		    }
+		}
+	    }
+	    # adverb
+	    elsif( $pcode =~ /^P/ ) {
+		$pos = 'adv';
+		$infl =~ s/^[u\+]/normal/;
+		$infl =~ s/^w/whrel/;
+		$infl =~ s/^v/whq/;
+		add_word("${name}_Adv", "mkAdv \"$word\"");
+	    }
+	    # pronoun
+	    elsif( $pcode =~ s/^Q/_/ ) {
+		$pos = 'pron';
+		$infl =~ s/^x/normal/;
+		$infl =~ s/^y/whq/;
+		$infl =~ s/^z/whrel/;
+		my $class = '_';
+		# reflexive pronouns
+		if ( ( $word =~ /self$/ ) or 
+		     ( $word =~ /selves$/ ) ) {
+		    $pcode = 'acc';
+		}
+		# accusative personal pronouns
+		if ( ( $word =~ /^him/ ) or
+		     ( $word =~ /^her/ ) or
+		     ( $word =~ /^them/ ) or
+		     ( $word eq 'us' ) or
+		     ( $word eq 'thee' ) or
+		     ( $word eq 'me' ) ) {
+		    $pcode = 'acc';
+		    $class = 'per';
+		}
+		# nominative personal pronouns
+		if ( ( $word eq 'he' ) or
+		     ( $word eq 'she' ) or
+		     ( $word eq 'they' ) or
+		     ( $word eq 'we' ) or
+		     ( $word eq 'thou' ) or
+		     ( $word eq 'i' ) ) {
+		    $pcode = 'nom';
+		    $class = 'per';
+		}
+		# other personal pronouns
+		if ( ( $word =~ /.+one/ ) or
+		     ( $word =~ /one.+/ ) or
+		     ( $word =~ /body/ ) or
+		     ( $word =~ /^you/ ) or
+		     ( $word =~ /^who/ ) ) {
+		    $class = 'per';
+		}
+		# non-personal pronouns
+		if ( $word =~ /thing/ ) {
+		    $class = 'nper';
+		}
+		# otherwise case/person info will be '_' (anon variable)
+		# add full spec to @pron array
+		#push( @pron, "$pos( \'$word\', $pcode, $infl, $class ).\n" );
+	    }
+	    # for determiners, leave anon variable as placeholder for semantics
+	    elsif( $pcode =~ /^[RS]/ ) {
+		$pos = 'det';
+		$pcode =~ s/^R/def/;
+		$pcode =~ s/^S/indef/;
+		#add_word("${name}_Det","mkDeterminer \"$word\"");
+	    }
+	    # for prepositions - nothing to say
+	    elsif( $pcode =~ s/^T/prep/ ) {
+		$pos = 'prep';
+		add_word("${name}_Prep","mkPrep \"$word\"");
+	    }
+	    # for conjunctions - nothing to say
+	    elsif( $pcode =~ s/^V/conj/ ) {
+		$pos = 'conj';
+		add_word("${name}_Conj","mkConj \"$word\"");
+	    }
+	    # for miscellaneous, leave '-' as placeholder for illocutionary info
+	    elsif( $pcode =~ /^[UWXZ]/ ) {
+		$pos = 'misc';
+		#push( @prefix, "$pos( \'$word\', $pcode, '-' ).\n" ) if ( $pcode =~ s/^U/prefix/ );
+		#push( @interj, "$pos( \'$word\', $pcode, '-' ).\n" ) if ( $pcode =~ s/^W/interj/ );
+		#push( @partcl, "$pos( \'$word\', $pcode, '-' ).\n" ) if ( $pcode =~ s/^X/partcl/ );
+		#push( @unknown, "$pos( \'$word\', $pcode, '-' ).\n" ) if ( $pcode =~ s/^Z/unknown/ );
+	    }
+	}
+    }
+}
+
+my $absfile = "Oald.gf";
+my $cncfile = "OaldEng.gf";
+my $abs_structfile = "OaldStructural.gf";
+my $cnc_structfile = "OaldStructuralEng.gf";
+
+open (ABS, '>', $absfile);
+open (CNC, '>', $cncfile);
+
+open (ABS_STRUCTURAL, '>', $abs_structfile);
+open (CNC_STRUCTURAL, '>', $cnc_structfile);
+
+
+
+# print a nice comment at the top
+my $header = "-- English lexicon for GF, produced from:\n"
+           . "-- Oxford advanced learner's dictionary of current English:\n"
+           . "--    expanded 'computer usable' version compiled by Roger Mitton\n"
+           . "-- The computer usable version is transcribed from:\n"
+           . "--   Oxford advanced learner's dictionary of current English\n"
+           . "--   A.S. Hornby ; with the assistance of A.P. Cowie [and] J. Windsor Lewis.\n"
+           . "--   3rd. ed., London : Oxford University Press, 1974.\n"
+           . "-- Distributed as 'dict0710' by:\n"
+           . "--   Oxford Text Archive\n"
+           . "--   Oxford University Computing Services\n"
+           . "--   13 Banbury Road\n"
+           . "--   Oxford\n"
+           . "--   OX2 6NN\n"
+           . "-- Under these conditions:\n"
+           . "--   Freely available for non-commercial use provided that this header is\n"
+           . "--   included in its entirety with any copy distributed.\n"
+           . "--\n"
+           . "-- GF version generated by asc2gf, Bjorn Bringert Nov 2008\n"
+           . "--    based on asc2lex, Matthew Purver Nov 2001\n"
+           . "--    http://www.stanford.edu/~mpurver/software.html\n"
+           . "\n";
+print ABS $header;
+print ABS "abstract Oald = Cat ** {\n";
+
+print CNC $header;
+print CNC "--# -path=.:alltenses\n";
+print CNC "concrete OaldEng of Oald = CatEng ** open ParadigmsEng, IrregEng in {\n";
+
+print ABS_STRUCTURAL $header;
+print ABS_STRUCTURAL "abstract OaldStructural = Cat ** {\n";
+
+print CNC_STRUCTURAL $header;
+print CNC_STRUCTURAL "--# -path=.:alltenses\n";
+print CNC_STRUCTURAL "concrete OaldStructuralEng of OaldStructural = CatEng ** open ParadigmsEng in {\n";
+
+foreach my $name (sort (keys %words)) {
+  (my $cat = $name) =~ s/.*_([A-Z][A-Za-z\d]*)$/$1/;
+  my $lin = $words{$name};
+  if ( $cat =~ /^(A)|(N)|(V)|(V2)$/ ) {
+    print ABS "fun $name : $cat;\n";
+    print CNC "lin $name = $lin;\n";
+  } else {
+    print ABS_STRUCTURAL "fun $name : $cat;\n";
+    print CNC_STRUCTURAL "lin $name = $lin;\n";
+  }
+}
+
+print ABS "}";
+print CNC "}";
+
+print ABS_STRUCTURAL "}";
+print CNC_STRUCTURAL "}";
+
+close(ABS_STRUCTURAL);
+close(CNC_STRUCTURAL);
+
+close(ABS);
+close(CNC);
+
+print "\nWrote open lexicon to $absfile and $cncfile\n";
+print "Wrote closed lexicon to $abs_structfile and $cnc_structfile\n";
+
+
+
+sub add_word {
+  my ($name,$lin) = @_;
+  if (exists $words{$name}) {
+    print STDERR "Duplicate word: $name\n";
+  } else {
+    $words{$name} = $lin;
+  }
+}
--- a/lib/src/parse/oald/src/README
+++ b/lib/src/parse/oald/src/README
@@ -0,0 +1,12 @@
+This directory contains the Oxford Advanced Learner's Dictionary of Current English 
+(expanded computer-usable version), available from the Oxford Text Archive (http://ota.ahds.ac.uk).
+
+It has a flat structure but contains part-of-speech, verb subcategorisation & pronunciation info.
+
+Files:
+ascii_0710-1.txt	the original plain ASCII version of the OALD
+ascii_0710-2.txt	the information to go with it
+asc2lex			a Perl script to process ASCII -> Prolog
+lexicon2.pl		the resulting Prolog version, hand-corrected for irregulars etc.
+
+Matthew Purver, Jan 2001
--- a/lib/src/parse/oald/src/asc2lex
+++ b/lib/src/parse/oald/src/asc2lex
@@ -0,0 +1,320 @@
+#! /usr/bin/perl
+#
+# Perl script to process OALD machine-readable ASCII file
+# into a Prolog-readable lexicon usable by SHARDS
+#
+# Usage: ./asc2lex < ascii_0710-1.txt [> OUTPUT.PL]
+#
+# Matthew Purver, 11/2001
+
+# print a nice comment at the top
+print "% Prolog lexicon for SHARDS, from OALD machine-readable dictionary\n";
+print "% Produced by asc2lex, Matthew Purver 11/2001\n\n";
+
+# skip header section
+while ( <STDIN> ) {
+    last if /<\/TEIHEADER>/;
+}
+
+# read a line from stdin
+while ( $line = <STDIN> ) {
+
+    # remove SGML tags
+    $line =~ s/<[^<>]+>//g;
+
+    # split line into fields according to spec (line may be empty now)
+    if ( $line =~ /^(.{23}).{23}(.{23}).{1}(.{58})$/ ) {
+
+  	# trim white space
+	for ( ( $word, $pos, $cat ) = ( $1, $2, $3 ) ) {
+	    s/\s*$//;
+	}
+
+	# make word lower-case atomic string
+	$word =~ s/\'/\\\'/g;   # ' -> \'
+	$word =~ s/\"/\\\"/g;   # " -> \"
+	$word =~ tr/A-Z/a-z/;   # lower case
+
+	# get PoS & subcat info
+	@pos = split( /,/, $pos );
+	$cat =~ s/,/\',\'/g;
+	( $cat = "\'$cat\'" ) unless ( $cat eq '' );
+
+	# set up Prolog-style string & put into array
+	foreach ( @pos ) {
+	    ( $pcode, $infl, $freq )=split(//);
+	    # for verbs, get inflected forms
+	    if ( $pcode =~ /^[GHIJ]/ ) {
+		$pos = 'verb';
+		$pcode =~ s/^G/unknown/;
+		$pcode =~ s/^H/tran/;
+		$pcode =~ s/^I/intran/;
+		$pcode =~ s/^J/_/;
+		# if this is a root form, work out the inflected forms
+		if ( $infl =~ /^\d/ ) {
+		    if ( $infl == 0 ) {
+			( $vbz = $word ) =~ s/$/s/;
+			( $vbg = $word ) =~ s/$/ing/;
+			( $vbd = $word ) =~ s/$/ed/;
+		    }
+		    elsif ( $infl == 1 ) {
+			( $vbz = $word ) =~ s/$/es/;
+			( $vbg = $word ) =~ s/$/ing/;
+			( $vbd = $word ) =~ s/$/ed/;
+		    }
+		    elsif ( $infl == 2 ) {
+			( $vbz = $word ) =~ s/e$/es/;
+			( $vbg = $word ) =~ s/e$/ing/;
+			( $vbd = $word ) =~ s/e$/ed/;
+		    }
+		    elsif ( $infl == 3 ) {
+			( $vbz = $word ) =~ s/y$/ies/;
+			( $vbg = $word ) =~ s/y$/ying/;
+			( $vbd = $word ) =~ s/y$/ied/;
+		    }
+		    elsif ( $infl == 4 ) {
+			( $vbz = $word ) =~ s/$/s/;
+			( $vbg = $word ) =~ s/(\w)$/$1$1ing/;
+			( $vbd = $word ) =~ s/(\w)$/$1$1ed/;
+		    }
+		    elsif ( $infl == 5 ) {
+			# for irregulars, just mark as such for now, we'll guess later
+			$vbz = 'IRREG';
+			$vbg = 'IRREG';
+			$vbd = 'IRREG';
+		    }
+		    # add the full spec to @verb array
+		    push( @verb, 
+	  "$pos( \'$word\', \'$vbz\', \'$vbg\', \'$vbd\', \'$vbd\', $pcode, [$cat] ).\n" );
+		}
+		# if this is an inflected form, save for guessing irregulars later
+		elsif ( $infl =~ /^a/ ) {
+		    push( @vbz, $word );
+		}
+		elsif ( $infl =~ /^b/ ) {
+		    push( @vbg, $word );
+		}
+		elsif ( $infl =~ /^c/ ) {
+		    push( @vbd, $word );
+		}
+		elsif ( $infl =~ /^d/ ) {
+		    push( @vbn, $word );
+		}
+	    }
+	    # for nouns, get plural form
+	    elsif( $pcode =~ /^[KLMNY]/ ) {
+		$pos = 'noun';
+		$pcode =~ s/^K/count/;
+		$pcode =~ s/^L/mass/;
+		$pcode =~ s/^M/both/;
+		$pcode =~ s/^N/proper/;
+		if ( $pcode =~ /^Y/ ) {
+		    $pcode = 'count' if $infl =~ /^[>\)\]]/;
+		    $pcode = 'mass' if $infl =~ /^\}/;
+		    $pcode = 'proper' if $infl =~ /^[:=~]/;
+		}
+		# if this is a singular form, work out plural form
+		unless ( $infl =~ /^j/ ) {
+		    $pl = '-';
+		    if ( $infl == 6 ) {
+			( $pl = $word ) =~ s/$/s/;
+		    }
+		    elsif ( $infl == 7 ) {
+			( $pl = $word ) =~ s/$/es/;
+		    }
+		    elsif ( $infl == 8 ) {
+			( $pl = $word ) =~ s/y$/ies/;
+		    }
+		    elsif ( $infl =~ /^[9k\]]/ ) {
+			$pl = $word;
+		    }
+		    elsif ( $infl =~ /^i/ ) {
+			# for irregulars, let's just make a guess and mark with '*'
+			# this could be done better, as for verbs, but I can't be bothered now
+			$pl = $word;
+  			( $pl =~ s/^((wo)?m)an/$1en\*/ ) or
+  			    ( $pl =~ s/man(-|$)/men$1\*/ ) or
+  			      ( $pl =~ s/-in-law/s-in-law\*/ ) or
+  			      ( $pl =~ s/um$/a\*/ ) or
+  			      ( $pl =~ s/us$/i\*/ ) or
+  			      ( $pl =~ s/a$/ae\*/ ) or
+  			      ( $pl =~ s/on$/a\*/ ) or
+  			      ( $pl =~ s/is$/es\*/ ) or
+  			      ( $pl =~ s/o$/i\*/ ) or
+  			      ( $pl =~ s/child$/children\*/ ) or
+  			      ( $pl =~ s/oot$/eet\*/ ) or
+  			      ( $pl =~ s/ooth$/eeth\*/ ) or
+  			      ( $pl =~ s/([lm])ouse$/$1ice\*/ ) or
+  			      ( $pl =~ s/f(e)?$/ves\*/ ) or
+  			      ( $pl =~ s/[ei]x$/ices\*/ ) or
+  			      ( $pl =~ s/eau$/eaux\*/ ) or
+  			      ( $pl = 'IRREG' );
+		    }
+		    # if plural-only, swap root form & plural
+		    elsif ( $infl =~ /^\)/ ) {
+			$pl = $word;
+			$word = '-';
+		    }
+		    # and add full spec to @noun array
+		    ( $infl =~ s/^[:l]/per/ ) or ( $infl =~ s/^[mn]/loc/ ) or ( $infl = '_' );
+		    push( @noun, "$pos( \'$word\', \'$pl\', $pcode, $infl ).\n" )
+		}
+	    }
+	    # for adjectives, get comparative & superlative forms
+	    elsif( $pcode =~ /^O/ ) {
+		$pos = 'adj';
+		# if this is root form, work out inflected forms
+		unless ( $infl =~ /^[rs]/ ) {
+		    if ( $infl =~ /^[Apqt]/ ) {
+			$comp = $sup = '-';
+		    }
+		    elsif ( $infl =~ /^B/ ) {
+			( $comp = $word ) =~ s/$/r/;
+			( $sup = $word ) =~ s/$/st/;
+		    }
+		    elsif ( $infl =~ /^C/ ) {
+			( $comp = $word ) =~ s/$/er/;
+			( $sup = $word ) =~ s/$/est/;
+		    }
+		    elsif ( $infl =~ /^D/ ) {
+			( $comp = $word ) =~ s/y$/ier/;
+			( $sup = $word ) =~ s/y$/iest/;
+		    }
+		    elsif ( $infl =~ /^E/ ) {
+			# for irregulars, let's just have a guess and mark with '*'
+			# (there aren't very many of these)
+			( $comp = $word ) =~ s/(\w)$/$1$1er\*/;
+			( $sup = $word ) =~ s/(\w)$/$1$1est\*/;
+		    }
+		    $infl =~ s/^[ABCDE]/normal/;
+		    $infl =~ s/^p/pred/;
+		    $infl =~ s/^q/attr/;
+		    $infl =~ s/^t/affix/;
+		    # and add full spec to @adj array
+		    push( @adj, "$pos( \'$word\', \'$comp\', \'$sup\', $infl ).\n" );
+		}
+	    }
+	    # for adverbs, just add all info to @adv array
+	    elsif( $pcode =~ /^P/ ) {
+		$pos = 'adv';
+		$infl =~ s/^[u\+]/normal/;
+		$infl =~ s/^w/whrel/;
+		$infl =~ s/^v/whq/;
+		push( @adv, "$pos( \'$word\', $infl ).\n" );
+	    }
+	    # for pronouns, work out some case/person info
+	    elsif( $pcode =~ s/^Q/_/ ) {
+		$pos = 'pron';
+		$infl =~ s/^x/normal/;
+		$infl =~ s/^y/whq/;
+		$infl =~ s/^z/whrel/;
+		$class = '_';
+		# reflexive pronouns
+		if ( ( $word =~ /self$/ ) or 
+		     ( $word =~ /selves$/ ) ) {
+		    $pcode = 'acc';
+		}
+		# accusative personal pronouns
+		if ( ( $word =~ /^him/ ) or
+		     ( $word =~ /^her/ ) or
+		     ( $word =~ /^them/ ) or
+		     ( $word eq 'us' ) or
+		     ( $word eq 'thee' ) or
+		     ( $word eq 'me' ) ) {
+		    $pcode = 'acc';
+		    $class = 'per';
+		}
+		# nominative personal pronouns
+		if ( ( $word eq 'he' ) or
+		     ( $word eq 'she' ) or
+		     ( $word eq 'they' ) or
+		     ( $word eq 'we' ) or
+		     ( $word eq 'thou' ) or
+		     ( $word eq 'i' ) ) {
+		    $pcode = 'nom';
+		    $class = 'per';
+		}
+		# other personal pronouns
+		if ( ( $word =~ /.+one/ ) or
+		     ( $word =~ /one.+/ ) or
+		     ( $word =~ /body/ ) or
+		     ( $word =~ /^you/ ) or
+		     ( $word =~ /^who/ ) ) {
+		    $class = 'per';
+		}
+		# non-personal pronouns
+		if ( $word =~ /thing/ ) {
+		    $class = 'nper';
+		}
+		# otherwise case/person info will be '_' (anon variable)
+		# add full spec to @pron array
+		push( @pron, "$pos( \'$word\', $pcode, $infl, $class ).\n" );
+	    }
+	    # for determiners, leave anon variable as placeholder for semantics
+	    elsif( $pcode =~ /^[RS]/ ) {
+		$pos = 'det';
+		$pcode =~ s/^R/def/;
+		$pcode =~ s/^S/indef/;
+		push( @det, "$pos( \'$word\', $pcode, _ ).\n" );
+	    }
+	    # for prepositions - nothing to say
+	    elsif( $pcode =~ s/^T/prep/ ) {
+		$pos = 'prep';
+		push( @prep, "$pos( \'$word\', $pcode ).\n" );
+	    }
+	    # for conjunctions - nothing to say
+	    elsif( $pcode =~ s/^V/conj/ ) {
+		$pos = 'conj';
+		push( @conj, "$pos( \'$word\', $pcode ).\n" );
+	    }
+	    # for miscellaneous, leave '-' as placeholder for illocutionary info
+	    elsif( $pcode =~ /^[UWXZ]/ ) {
+		$pos = 'misc';
+		push( @prefix, "$pos( \'$word\', $pcode, '-' ).\n" ) if ( $pcode =~ s/^U/prefix/ );
+		push( @interj, "$pos( \'$word\', $pcode, '-' ).\n" ) if ( $pcode =~ s/^W/interj/ );
+		push( @partcl, "$pos( \'$word\', $pcode, '-' ).\n" ) if ( $pcode =~ s/^X/partcl/ );
+		push( @unknown, "$pos( \'$word\', $pcode, '-' ).\n" ) if ( $pcode =~ s/^Z/unknown/ );
+	    }
+	}
+    }
+}
+
+# now have a guess at irregular verb forms (marking the best guess with '*')
+foreach $verb ( @verb ) {
+    if ( $verb =~ /verb\( \'([^\']+)\', \'IRREG/ ) {
+	$word = $1;
+	$vbz = findbest( $word, @vbz );
+	$vbg = findbest( $word, @vbg );
+	$vbd = findbest( $word, @vbd );
+	$vbn = findbest( $word, @vbn );
+	$verb =~ s/($word\', \')IRREG(\', \')IRREG(\', \')IRREG(\', \')IRREG/\*$1$vbz$2$vbg$3$vbd$4$vbn/;
+    }
+}
+
+# now print everything out (so we can group PoSs together)
+print @verb, "\n", @noun, "\n", @adj, "\n", @adv;
+print "\n", @pron, "\n", @det, "\n", @prep, "\n", @conj;
+print "\n", @prefix, "\n", @interj, "\n", @partcl, "\n", @unknown;
+
+
+# find closest string match
+# similarity measure is just the length of identical prefix
+# prefer shorter strings in the case of equal similarity
+sub findbest 
+{
+    my ( $word, @array ) = @_;
+
+    $bestlen = 0;
+    foreach $test ( @array ) {
+	if ( ( substr( $word, 0, $bestlen-1 ) eq substr( $test, 0, $bestlen-1 ) ) &&
+	     ( length( $test ) < length( $best ) ) ) {
+	    $best = $test;
+	}
+	while ( ( substr( $word, 0, $bestlen ) eq substr( $test, 0, $bestlen ) ) &&
+		( $bestlen <= length( $test ) ) ) {
+	    $bestlen++;
+	    $best = $test;
+	}
+    }
+    return $best;
+}
--- a/lib/src/parse/oald/src/ascii_0710-1.txt
+++ b/lib/src/parse/oald/src/ascii_0710-1.txt
--- a/lib/src/parse/oald/src/ascii_0710-2.txt
+++ b/lib/src/parse/oald/src/ascii_0710-2.txt
--- a/lib/src/parse/oald/src/lexicon2.pl
+++ b/lib/src/parse/oald/src/lexicon2.pl