diff --git a/lib/resource/doc/gf-resource.html b/lib/resource/doc/gf-resource.html index 5e02a5320..29a1ffde3 100644 --- a/lib/resource/doc/gf-resource.html +++ b/lib/resource/doc/gf-resource.html @@ -9,7 +9,7 @@
-Second Version, Gothenburg, 18 February 2005
+Second Version, Gothenburg, 1 March 2005
First Draft, Gothenburg, 7 February 2005
@@ -81,13 +81,25 @@ success - libraries are another half.
- + Even x = + let jämn = case-To use a library function for Swedish definite phrases: +To use library functions for syntax and morphology:of { + => "jämn" ; + => "jämnt" ; + => "jämna" + } + in + {s = table { + Main => x.s ! Nom ++ "är" ++ jämn ; + Inv => "är" ++ x.s ! Nom ++ jämn ; + Sub => x.s ! Nom ++ "är" ++ jämn + } + }
- + Even = predA (regA "jämn") ;@@ -197,8 +209,8 @@ or any other flavour, including anaphora and discourse.
-But we do not believe semantics can be given once and -for all for a natural language. +But we do not try to give semantics once and +for all for the whole language.
@@ -246,7 +258,7 @@ The current GF Resource Project covers ten languages:
+
++ +Språkdata Seminar, Gothenburg, 1 March 2005 + +
+ +Aarne Ranta + +
+ +aarne@cs.chalmers.se +
+ +Swedish morphology and lexicon in GF + +
+ +Syntax case study: Swedish sentence structure + +
+ +Danish and Norwegian through parametrization + + + + +
+ +Paradigms: set of functions for extending the lexicon. + + + +
+ N = {s : Number => Species => Case => Str ; g : Gender} ;
+
+where
++ param + Species = Indef | Def ; + Number = Sg | Pl ; + Case = Nom | Gen ; ++ + + + +
+ bil =
+ {s = table {
+ Sg => table {
+ Indef => table {Nom => "bil" ; Gen => "bils" } ;
+ Def => table {Nom => "bilen" ; Gen => "bilens" }
+ } ;
+ Pl => table {
+ Indef => table {Nom => "bilar" ; Gen => "bilars" } ;
+ Def => table {Nom => "bilarna" ; Gen => "bilarnas" }
+ }
+ } ;
+ g = Utr
+ }
+
+
+
+
++ +Thus do not write +
+ gran =
+ {s = table {
+ Sg => table {
+ Indef => table {Nom => "gran" ; Gen => "grans" } ;
+ Def => table {Nom => "granen" ; Gen => "granens" }
+ } ;
+ Pl => table {
+ Indef => table {Nom => "granar" ; Gen => "granars" } ;
+ Def => table {Nom => "granarna" ; Gen => "granarnas" }
+ }
+ } ;
+ g = Utr
+ }
+
+
+
+
+
+
+ decl2 : Str -> N = \bil ->
+ {s = table {
+ Sg => table {
+ Indef => table {Nom => bil + "" ; Gen => bil + "s" } ;
+ Def => table {Nom => bil + "en" ; Gen => bil + "ens" }
+ } ;
+ Pl => table {
+ Indef => table {Nom => bil + "ar" ; Gen => bil + "ars" } ;
+ Def => table {Nom => bil + "arna" ; Gen => bil + "arnas" }
+ }
+ } ;
+ g = Utr
+ }
+
+This function can be used over and over again:
++ bil = decl2 "bil" ; + gran = decl2 "gran" ; + dag = decl2 "dag" ; ++ + + + +
+ +First define (for each word class) a worst-case function: +
+ mkN : (apa,apan,apor,aporna : Str) -> Noun =
+ {s = table {
+ Sg => table {
+ Indef => mkCase apa ;
+ Def => mkCase apan
+ } ;
+ Pl => table {
+ Indef => mkCase apor ;
+ Def => mkCase aporna
+ }
+ } ;
+ g = case last apan of {
+ "n" => Utr ;
+ _ => Neutr
+ }
+
+where we uniformly produce the genitive by
+
+ mkCase : Str -> Case => Str = \f -> table {
+ Nom => f ;
+ Gen => f + case last f of {
+ "s" | "x" => [] ;
+ _ => "s"
+ }
+ } ;
+
+
+
+
+
++ decl1 : Str -> N = \apa -> let ap = init apa in + mkN apa (apa + "n") (ap + "or") (ap + "orna") ; + + decl2 : Str -> N = \bil -> + mkN bil (bil + "en") (bil + "ar") (bil + "arna") ; + + decl3 : Str -> N = \fil -> + mkN fil (fil + "en") (fil + "er") (fil + "erna") ; + + decl4 : Str -> N = \rike -> + mkN rike (rike + "t") (rike + "n") (rik + "ena") ; + + decl5 : Str -> N = \lik -> + mkN lik (lik + "et") lik (lik + "en") ; ++ + + + + +
+ gosse - gossar -- 211 + nyckel - nycklar -- 231 + seger - segrar -- 232 + öken - öknar -- 233 + hummer - humrar -- 238 + kam - kammar -- 241 + mun - munnar -- 243 ++and many more (S. Hellberg, The Morphology of Present-Day Swedish, +Almqvist & Wiksell, Stockholm, 1978). In addition, we have at least +
+ mås - mås -- genitive form without s + sax - sax ++ + + + + + +
+ +A much more efficient method is the one used in +dictionaries: give two (or more) forms instead of one. +Our "dictionary heuristic function" covers the following cases: +
+ flicka - flickor + kor - kor (koret) + ko - kor (kon) + ros - rosor (rosen) + bil - bilar + nyckel - nycklar + hummer - humrar + rike - riken + lik - lik (liket) + lärare - lärare (läraren) ++ + + + + + +
+reg2Noun : Str -> Str -> Subst = \bil,bilar ->
+ let
+ l = last bil ;
+ b = Predef.tk 2 bil ;
+ ar = Predef.dp 2 bilar
+ in
+ case ar of {
+ "or" => case l of {
+ "a" => decl1Noun bil ;
+ "r" => sLik bil ;
+ "o" => mkNoun bil (bil + "n") bilar (bilar + "na") ;
+ _ => mkNoun bil (bil + "en") bilar (bilar + "na")
+ } ;
+ "ar" => ifTok Subst (Predef.tk 2 bilar) bil
+ (decl2Noun bil)
+ (case l of {
+ "e" => decl2Noun bil ;
+ _ => mkNoun bil (bil + "n") bilar (bilar + "na")
+ }
+ ) ;
+ "er" => decl3Noun bil ;
+ "en" => ifTok Subst bil bilar (sLik bil) (sRike bil) ; -- ben-ben
+ _ => ifTok Subst bil bilar (
+ case Predef.dp 3 bil of {
+ "are" => sKikare (init bil) ;
+ _ => decl5Noun bil
+ }
+ )
+ (decl5Noun bil) --- rest case with lots of garbage
+ } ;
+
+
+
+
+
+ > cc mk2N "öken" "öknar"
+ {s = table Number {
+ Sg => table {
+ Indef => table Case {
+ Nom => "öken" ;
+ Gen => "ökens"
+ } ;
+ Def => table Case {
+ Nom => "ökenn" ;
+ Gen => "ökenns"
+ }
+ ...
+ }
+
+Use the worst-case function if the heuristic does not work:
++ mkN "öken" "öknen" "öknar" "öknarna" ++ + + + +
+ mkN : (apa,apan,apor,aporna : Str) -> N ; + mk2N : (nyckel,nycklar : Str) -> N ; + + mkV : (supa,super,sup,söp,supit,supen : Str) -> V ; + regV : (tala : Str) -> V ; + mk2V : (leka,leker : Str) -> V ; + irregV : (dricka, drack, druckit : Str) -> V ; ++Construction functions for subcategorization. +
+ mkV2 : V -> Preposition -> V2 ; + dirV2 : V -> V2 ; + mkV3 : V -> Preposition -> Preposition -> V3 ; ++ + + + +
+ paradigm decl1
+ = ap+"a"
+ {ap+"a" & ap+"or" };
+
+For instance, if you find klocka and klockor, add
++ klocka_N = decl1 "klocka" ; ++to the lexicon. + +
+ +The notation for extraction and its implementation are +developed by Markus Forsberg and Harald Hammarström. + + + + + +
+ +Solution: restrict stem with a regular expression +
+ paradigm decl1 [ap : char* vowel char*]
+ = ap+"a"
+ {ap+"a" & ap+"or" };
+
+In general, exclude stems shorter than 3 characters.
+
++ +It is necessary to check the results manually. + + + + +
+ paradigm vEI [sm:OneOrMore, t:OneOrMore]
+ = sm+"i"+t+"a"
+ {sm+"i"+t+"a" & (sm+"e"+t | sm+"i"+t+"it")} ;
+
+For rare patterns, it is more productive to build the
+corresponding part of lexicon manually.
+
+
+
+
++ +Uses the + +Functional Morphology +format, which can be translated to GF, XFST, LEXC, +MySQL,... + +
+ +FM's "native" analysis engine is based on a trie +and includes compound analysis using algorithms +from G. Huet's +Zen Toolkit. +Analysis speed is 12,000 words per minute +with compound analysis, 50,000 without +(on an Intel M1.5 GHz laptop). + + + + + +