Merge branch 'master' of https://github.com/GrammaticalFramework/gf-rgl

2021-07-02 21:29:12 +02:00
parent c4d72d1bae a3701d1a48
commit 45e99c5e40
24 changed files with 61482 additions and 62253 deletions
--- a/src/abstract/Extend.gf
+++ b/src/abstract/Extend.gf
@@ -47,6 +47,7 @@ abstract Extend = Cat ** {
    PredVPS    : NP   -> VPS -> S ;          -- she [has walked and won't sleep]
    SQuestVPS  : NP   -> VPS -> QS ;         -- has she walked
    QuestVPS   : IP   -> VPS -> QS ;         -- who has walked
+    RelVPS     : RP   -> VPS -> RS ;         -- which won't sleep

 -- existentials that work in the absence of Cl
    ExistS     : Temp -> Pol -> NP -> S ;    -- there was a party
@@ -286,4 +287,6 @@ fun UseDAP     : DAP -> NP ;
    UseDAPMasc : DAP -> NP ;
    UseDAPFem  : DAP -> NP ;

+cat X ; -- for words that are difficult to classify, mainly for MorphoDict
+
 }
--- a/src/common/ExtendFunctor.gf
+++ b/src/common/ExtendFunctor.gf
@@ -4,6 +4,8 @@ lincat
  RNP = Grammar.NP ;
  RNPList = Grammar.ListNP ;

+  X = {s : Str} ; -- for words that are difficult to classify, mainly for MorphoDict
+
 lin
  BaseVPS = variants {} ;
  ConsVPS = variants {} ;
@@ -26,6 +28,7 @@ lin
  MkVPS vp = variants {} ;     -- Temp -> Pol -> VP -> VPS ; -- hasn't slept
  ConjVPS = variants {} ;     -- Conj -> [VPS] -> VPS ; -- has walked and won't sleep
  PredVPS = variants {} ;     -- NP -> VPS -> S ; -- has walked and won't sleep
+  RelVPS = variants {} ;      -- RP -> VPS -> RS ; -- which won't sleep
  MkVPI vp = variants {} ;     -- Temp -> Pol -> VP -> VPI ; -- to sleep / hasn't slept
  ConjVPI = variants {} ;     -- Conj -> [VPI] -> VPI ; -- has walked and won't sleep
  ComplVPIVV = variants {} ;     -- VV -> VPI -> VP ; -- want to sleep and to walk
--- a/src/english/ExtendEng.gf
+++ b/src/english/ExtendEng.gf
@@ -5,7 +5,7 @@ concrete ExtendEng of Extend =
  [
    VPS, ListVPS, VPI, ListVPI, VPS2, ListVPS2, VPI2, ListVPI2, RNP, RNPList,
    AdAdV, AdjAsCN, AdjAsNP, ApposNP, AdvIsNP,
-    MkVPS, BaseVPS, ConsVPS, ConjVPS, PredVPS, QuestVPS, SQuestVPS,
+    MkVPS, BaseVPS, ConsVPS, ConjVPS, PredVPS, QuestVPS, SQuestVPS, RelVPS,
    MkVPI, BaseVPI, ConsVPI, ConjVPI, ComplVPIVV,
    MkVPS2, BaseVPS2, ConsVPS2, ConjVPS2, ComplVPS2, ReflVPS2,
    MkVPI2, BaseVPI2, ConsVPI2, ConjVPI2, ComplVPI2,
@@ -127,6 +127,11 @@ concrete ExtendEng of Extend =
    QuestVPS ip vps = let vp = vps.s ! oDir ! toAgr ip.n P3 Neutr  in {
      s = \\q => ip.s ! npNom ++ vp.fin ++ vp.inf
      } ;
+    RelVPS rp vps = {
+      s = \\agr => let vp = vps.s ! oDir ! agr in
+           rp.s ! RC (fromAgr agr).g npNom ++ vp.fin ++ vp.inf ;
+      c = npNom ;
+      } ;


    MkVPI vp = mkVPI (lin VP vp) ;
@@ -177,7 +182,7 @@ concrete ExtendEng of Extend =
        let
          verb = vp.s ! t.t ! t.a ! p.p ! o ! a ; -- choice of Order determines aux or not
          compl = vp.s2 ! a ++ vp.ext
-        in {fin = verb.aux ;
+        in {fin = verb.aux ++ t.s ++ p.s ;
            inf = verb.adv ++ vp.ad ! a ++ verb.fin ++ verb.inf ++ vp.p ++ compl} ;
      } ;

--- a/src/english/IrregEng.gf
+++ b/src/english/IrregEng.gf
@@ -13,7 +13,8 @@ flags optimize=values ;
  bend_V = irregV "bend" "bent" "bent" ;
  beset_V = irregV "beset" "beset" "beset" ;
  bet_V = irregDuplV "bet" "bet" "bet" ;
-  bid_V = irregDuplV "bid" (variants {"bid" ; "bade"}) (variants {"bid" ; "bidden"}) ;
+  bid_V = irregDuplV "bid" "bid" "bid" ;
+  bid_bade_V = irregDuplV "bid" "bade" "bidden" ;
  bind_V = irregV "bind" "bound" "bound" ;
  bite_V = irregV "bite" "bit" "bitten" ;
  bleed_V = irregV "bleed" "bled" "bled" ;
@@ -23,7 +24,7 @@ flags optimize=values ;
  bring_V = irregV "bring" "brought" "brought" ;
  broadcast_V = irregV "broadcast" "broadcast" "broadcast" ;
  build_V = irregV "build" "built" "built" ;
-  burn_V = irregV "burn" (variants {"burned" ; "burnt"}) (variants {"burned" ; "burnt"}) ;
+  burn_V = irregV "burn" "burnt" "burnt" ;
  burst_V = irregV "burst" "burst" "burst" ;
  buy_V = irregV "buy" "bought" "bought" ;
  cast_V = irregV "cast" "cast" "cast" ;
@@ -36,10 +37,10 @@ flags optimize=values ;
  cut_V = irregDuplV "cut" "cut" "cut" ;
  deal_V = irregV "deal" "dealt" "dealt" ;
  dig_V = irregDuplV "dig" "dug" "dug" ;
-  dive_V = irregV "dive" (variants {"dived" ; "dove"}) "dived" ;
+  dive_V = irregV "dive" "dove" "dived" ;
  do_V = mk5V "do" "does" "did" "done" "doing" ;
  draw_V = irregV "draw" "drew" "drawn" ;
-  dream_V = irregV "dream" (variants {"dreamed" ; "dreamt"}) (variants {"dreamed" ; "dreamt"}) ;
+  dream_V = irregV "dream" "dreamt" "dreamt" ;
  drive_V = irregV "drive" "drove" "driven" ;
  drink_V = irregV "drink" "drank" "drunk" ;
  eat_V = irregV "eat" "ate" "eaten" ;
@@ -75,27 +76,27 @@ flags optimize=values ;
  know_V = irregV "know" "knew" "known" ;
  lay_V = irregV "lay" "laid" "laid" ;
  lead_V = irregV "lead" "led" "led" ;
-  leap_V = irregV "leap" (variants {"leaped" ; "lept"}) (variants {"leaped" ; "lept"}) ;
-  learn_V = irregV "learn" (variants {"learned" ; "learnt"}) (variants {"learned" ; "learnt"}) ;
+  leap_V = irregV "leap" "lept" "lept" ;
+  learn_V = irregV "learn" "learnt" "learnt" ;
  leave_V = irregV "leave" "left" "left" ;
  lend_V = irregV "lend" "lent" "lent" ;
  let_V = irregDuplV "let" "let" "let" ;
  lie_V = irregV "lie" "lay" "lain" ;
-  light_V = irregV "light" (variants {"lighted" ; "lit"}) "lighted" ;
+  light_V = irregV "light" "lit" "lit" ;
  lose_V = irregV "lose" "lost" "lost" ;
  make_V = irregV "make" "made" "made" ;
  mean_V = irregV "mean" "meant" "meant" ;
  meet_V = irregV "meet" "met" "met" ;
-  misspell_V = irregV "misspell" (variants {"misspelled" ; "misspelt"}) (variants {"misspelled" ; "misspelt"}) ;
+  misspell_V = irregV "misspell" "misspelt" "misspelt" ;
  mistake_V = irregV "mistake" "mistook" "mistaken" ;
-  mow_V = irregV "mow" "mowed" (variants {"mowed" ; "mown"}) ;
+  mow_V = irregV "mow" "mowed" "mown" ;
  overcome_V = irregV "overcome" "overcame" "overcome" ;
  overdo_V = mk5V "overdo" "overdoes" "overdid" "overdone" "overdoing" ;
  overtake_V = irregV "overtake" "overtook" "overtaken" ;
  overthrow_V = irregV "overthrow" "overthrew" "overthrown" ;
  pay_V = irregV "pay" "paid" "paid" ;
  plead_V = irregV "plead" "pled" "pled" ;
-  prove_V = irregV "prove" "proved" (variants {"proved" ; "proven"}) ;
+  prove_V = irregV "prove" "proved" "proven" ;
  put_V = irregDuplV "put" "put" "put" ;
  quit_V = irregDuplV "quit" "quit" "quit" ;
  read_V = irregV "read" "read" "read" ;
@@ -104,22 +105,22 @@ flags optimize=values ;
  ring_V = irregV "ring" "rang" "rung" ;
  rise_V = irregV "rise" "rose" "risen" ;
  run_V = irregDuplV "run" "ran" "run" ;
-  saw_V = irregV "saw" "sawed" (variants {"sawed" ; "sawn"}) ;
+  saw_V = irregV "saw" "sawed" "sawn" ;
  say_V = irregV "say" "said" "said" ;
  see_V = irregV "see" "saw" "seen" ;
  seek_V = irregV "seek" "sought" "sought" ;
  sell_V = irregV "sell" "sold" "sold" ;
  send_V = irregV "send" "sent" "sent" ;
  set_V = irregDuplV "set" "set" "set" ;
-  sew_V = irregV "sew" "sewed" (variants {"sewed" ; "sewn"}) ;
+  sew_V = irregV "sew" "sewed" "sewn" ;
  shake_V = irregV "shake" "shook" "shaken" ;
-  shave_V = irregV "shave" "shaved" (variants {"shaved" ; "shaven"}) ;
+  shave_V = irregV "shave" "shaved" "shaven" ;
  shear_V = irregV "shear" "shore" "shorn" ;
  shed_V = irregDuplV "shed" "shed" "shed" ;
  shine_V = irregV "shine" "shone" "shone" ;
-  shoe_V = irregV "shoe" "shoed" (variants {"shoed" ; "shod"}) ;
+  shoe_V = irregV "shoe" "shoed" "shod" ;
  shoot_V = irregV "shoot" "shot" "shot" ;
-  show_V = irregV "show" "showed" (variants {"showed" ; "shown"}) ;
+  show_V = irregV "show" "showed" "shown" ;
  shrink_V = irregV "shrink" "shrank" "shrunk" ;
  shut_V = irregDuplV "shut" "shut" "shut" ;
  sing_V = irregV "sing" "sang" "sung" ;
@@ -131,16 +132,18 @@ flags optimize=values ;
  sling_V = irregV "sling" "slung" "slung" ;
  slit_V = irregDuplV "slit" "slit" "slit" ;
  smite_V = irregV "smite" "smote" "smitten" ;
-  sow_V = irregV "sow" "sowed" (variants {"sowed" ; "sown"}) ;
+  sow_V = irregV "sow" "sowed" "sown" ;
  speak_V = irregV "speak" "spoke" "spoken" ;
  speed_V = irregV "speed" "sped" "sped" ;
  spend_V = irregV "spend" "spent" "spent" ;
-  spill_V = irregV "spill" (variants {"spilled" ; "spilt"}) (variants {"spilled" ; "spilt"}) ;
+  spill_V = irregV "spill" "spilt" "spilt" ;
  spin_V = irregDuplV "spin" "spun" "spun" ;
-  spit_V = irregDuplV "spit" (variants {"spit" ; "spat"}) "spit" ;
+  spit_V = irregDuplV "spit" "spit" "spit" ;
+  spit_spat_V = irregDuplV "spit" "spat" "spit" ;
  split_V = irregDuplV "split" "split" "split" ;
  spread_V = irregV "spread" "spread" "spread" ;
-  spring_V = irregV "spring" (variants {"sprang" ; "sprung"}) "sprung" ;
+  spring_V = irregV "spring" "sprang" "sprung" ;
+  spring_sprung_V = irregV "spring" "sprung" "sprung" ;
  stand_V = irregV "stand" "stood" "stood" ;
  steal_V = irregV "steal" "stole" "stolen" ;
  stick_V = irregV "stick" "stuck" "stuck" ;
@@ -152,7 +155,7 @@ flags optimize=values ;
  strive_V = irregV "strive" "strove" "striven" ;
  swear_V = irregV "swear" "swore" "sworn" ;
  sweep_V = irregV "sweep" "swept" "swept" ;
-  swell_V = irregV "swell" "swelled" (variants {"swelled" ; "swollen"}) ;
+  swell_V = irregV "swell" "swelled" "swollen" ;
  swim_V = irregDuplV "swim" "swam" "swum" ;
  swing_V = irregV "swing" "swung" "swung" ;
  take_V = irregV "take" "took" "taken" ;
@@ -160,7 +163,7 @@ flags optimize=values ;
  tear_V = irregV "tear" "tore" "torn" ;
  tell_V = irregV "tell" "told" "told" ;
  think_V = irregV "think" "thought" "thought" ;
-  thrive_V = irregV "thrive" (variants {"thrived" ; "throve"}) "thrived" ;
+  thrive_V = irregV "thrive" "throve" "thrived" ;
  throw_V = irregV "throw" "threw" "thrown" ;
  thrust_V = irregV "thrust" "thrust" "thrust" ;
  tread_V = irregV "tread" "trod" "trodden" ;
@@ -169,7 +172,7 @@ flags optimize=values ;
  upset_V = irregDuplV "upset" "upset" "upset" ;
  wake_V = irregV "wake" "woke" "woken" ;
  wear_V = irregV "wear" "wore" "worn" ;
-  weave_V = irregV "weave" (variants {"weaved" ; "wove"}) (variants {"weaved" ; "woven"}) ;
+  weave_V = irregV "weave" "wove" "woven" ;
  wed_V = irregDuplV "wed" "wed" "wed" ;
  weep_V = irregV "weep" "wept" "wept" ;
  wind_V = irregV "wind" "wound" "wound" ;
--- a/src/english/IrregEngAbs.gf
+++ b/src/english/IrregEngAbs.gf
@@ -9,6 +9,7 @@ fun
  beset_V : V ;
  bet_V : V ;
  bid_V : V ;
+  bid_bade_V : V ;
  bind_V : V ;
  bite_V : V ;
  bleed_V : V ;
@@ -133,9 +134,11 @@ fun
  spill_V : V ;
  spin_V : V ;
  spit_V : V ;
+  spit_spat_V : V ;
  split_V : V ;
  spread_V : V ;
  spring_V : V ;
+  spring_sprung_V : V ;
  stand_V : V ;
  steal_V : V ;
  stick_V : V ;
--- a/src/finnish/NewDictFin.gf
+++ b/src/finnish/NewDictFin.gf
@@ -19885,7 +19885,7 @@ lin moniaalle_Adv = mkAdv {s = c99 "moniaalle"} ;
 lin moniaalta_Adv = mkAdv {s = c99 "moniaalta"} ;
 lin monialaistua_V = mkV {s = c52 "monialaistua"} ;
 lin monias_N = mkN {s = d41 "monias"} ;
-lin moni_ilmeinen_N = mkN {s = d18 "moni-ilmeinen"} ;
+lin moni_ilmeinen_N = mkN {s = d38 "moni-ilmeinen"} ;
 lin monijumalaisuus_N = mkN {s = d40 "monijumalaisuus"} ;
 lin monikansainen_N = mkN {s = d38 "monikansainen"} ;
 lin monikko_N = mkN {s = d04A "monikko"} ;
@@ -41387,7 +41387,7 @@ lin ykkönen_N = mkN {s = d38 "ykkönen"} ;
 lin yks_Adv = mkAdv {s = c99 "yks"} ;
 lin ykseys_N = mkN {s = d40 "ykseys"} ;
 lin yksi_N = mkN {s = d31 "yksi"} ;
-lin yksi_ilmeinen_N = mkN {s = d18 "yksi-ilmeinen"} ;
+lin yksi_ilmeinen_N = mkN {s = d38 "yksi-ilmeinen"} ;
 lin yksiin_Adv = mkAdv {s = c99 "yksiin"} ;
 lin yksijumalaisuus_N = mkN {s = d40 "yksijumalaisuus"} ;
 lin yksikkö_N = mkN {s = d04A "yksikkö"} ;
--- a/src/finnish/ParadigmsFin.gf
+++ b/src/finnish/ParadigmsFin.gf
@@ -159,6 +159,8 @@ oper
     s = \\c => (StemFin.snoun2nounBind veri).s ! NCase n Gen + paine.s ! c
     } ;

+   foreignN : Str -> N ; -- foreign word without Finnish alternations, e.g. sake/saken/sakeja
+
 -- Nouns used as functions need a case, of which the default is
 -- the genitive.

@@ -667,7 +669,8 @@ mkVS = overload {

  mkPN_1 : Str -> PN = \s -> lin PN (snoun2spn (mk1N s)) ;

-  foreignPN : Str -> PN = \s -> (lin PN (snoun2spn (nforms2snoun (noun s)))) where {
+  foreignPN : Str -> PN = \s -> mkPN (foreignN s) ;
+  foreignN : Str -> N = \s -> (lin N (nforms2snoun (noun s))) where {
    noun : Str -> NForms = \s -> case s of {
      _ + "i" => dPaatti s (s + "n") ;
      _ + "e" => dNukke s (s + "n") ;
--- a/src/french/IdiomFre.gf
+++ b/src/french/IdiomFre.gf
@@ -11,6 +11,10 @@ concrete IdiomFre of Idiom = CatFre **
    ExistNP np = 
      mkClause "il" True False (agrP3 Masc Sg)
        (insertClit3 "y" (insertComplement (\\_ => (np.s ! Acc).ton) (predV avoir_V))) ;
+	
+    ExistNPAdv np adv = 
+      mkClause "il" True False (agrP3 Masc Sg)
+        (insertAdv adv.s (insertClit3 "y" (insertComplement (\\_ => (np.s ! Acc).ton) (predV avoir_V)))) ;

    ExistIP ip = {
      s = \\t,a,p,_ => 
--- a/src/german/VerbGer.gf
+++ b/src/german/VerbGer.gf
@@ -144,10 +144,10 @@ concrete VerbGer of Verb = CatGer ** open Prelude, ResGer, Coordination in {
    CompAdv a = {s = \\_ => a.s ; ext = []} ;

    CompCN cn = {s = \\a => case numberAgr a of {
-        Sg => "ein" + pronEnding ! GSg cn.g ! Nom ++ cn.s ! Strong ! Sg ! Nom ;
-        Pl => cn.s ! Strong ! Pl ! Nom
+        Sg => "ein" + pronEnding ! GSg cn.g ! Nom ++ cn.s ! Strong ! Sg ! Nom ++ cn.rc ! Sg ; ---
+        Pl => cn.s ! Strong ! Pl ! Nom ++ cn.rc ! Pl ---
        } ;
-		ext = []
+		ext = cn.adv ++ cn.ext
      } ;

    AdvVP vp adv = insertAdv adv.s vp ;
--- a/src/italian/IdiomIta.gf
+++ b/src/italian/IdiomIta.gf
@@ -23,6 +23,14 @@ concrete IdiomIta of Idiom = CatIta **
        (insertClit3 (elision "ci" "c'" "ci") 
          (insertComplement (\\_ => (np.s ! Nom).ton) 
            (predV copula))) ;
+	    
+    ExistNPAdv np adv =
+      let npa = complAgr np.a in
+      mkClause [] True False (agrP3 npa.g npa.n)
+      (insertAdv adv.s 
+        (insertClit3 (elision "ci" "c'" "ci") 
+          (insertComplement (\\_ => (np.s ! Nom).ton) 
+            (predV copula)))) ;

    ExistIP ip = {
      s = \\t,a,p,_ =>
--- a/src/morphodict/MkMorphodict.hs
+++ b/src/morphodict/MkMorphodict.hs
@@ -5,7 +5,9 @@ import PGF
 import qualified Data.Map as M
 import Data.Char
 import Data.List
+import Safe
 import System.Environment (getArgs)
+import Debug.Trace

 -- AR 2020-02-28

@@ -17,86 +19,164 @@ import System.Environment (getArgs)
 --  - functionname = baseform_category, with exceptions
 --     - variant inflection tables: lie_1_V, lie_2_V
 --     - words that have non-ident characters: 'bird\'s-eye_A'
--     - words that start with non-letters: W_'tween_Adv

 -- example:
 --   gf -make ../english/DictEng.gf
--   runghc MkMorphodict.hs DictEngAbs.pgf MorphoDictEng
+--   runghc MkMorphodict.hs pgf MorphoDictEng.config DictEngAbs.pgf MorphoDictEng
 -- 64923 ->  56599 functions

-usage = "MkMorphodict <pgf> <outfile>"
+usage = "runghc MkMorphodict (raw|pgf) <configfile> <datafile> <outfile>"

 main = do
-  pgfile:outfile:_ <- getArgs
-  pgf <- readPGF pgfile
-  config <- readFile (outfile ++ ".config") >>= return . mkConfig
-  
-  let (absrules,cncrules) = mkMorphoDict (MDEnv pgf config (head (languages pgf)))
-  
+  xx <- getArgs
+  if length xx /= 4
+    then do
+      putStrLn "Usage:"
+      putStrLn usage
+      putStrLn $ "Got instead: " ++ show xx
+    else do
+      let mode:configfile:datafile:outfile:_ = xx
+      config <- readFile configfile >>= return . mkConfig
+
+      rawdata <- case mode of
+        "pgf" -> pgfFile2rawData config datafile
+        "raw" -> readFile datafile >>= return . map getRawData . filter (not . null) . lines
+        _ -> error $ "Expected mode (pgf|raw), got " ++ mode
+      rawdata2gf config rawdata outfile
+
+
+rawdata2gf config rawdata outfile = do
+
+  let env = MDEnv rawdata config
+  let (absrules,cncrules) = mkMorphoDict env
+
  absheader <- readFile (outfile ++ "Abs.header")
  cncheader <- readFile (outfile ++ ".header")
-  
+
  writeFile (outfile ++ "Abs.gf") absheader
-  appendFile (outfile ++ "Abs.gf") $ unlines absrules
+  appendFile (outfile ++ "Abs.gf") $ unlines $ sort absrules
  appendFile (outfile ++ "Abs.gf") "}"
-  
+
  writeFile (outfile ++ ".gf") cncheader
-  appendFile (outfile ++ ".gf") $ unlines cncrules
+  appendFile (outfile ++ ".gf") $ unlines $ sort cncrules
  appendFile (outfile ++ ".gf") "}"

+-- one way to get raw data from a dictionary pgf
+-- another way, more controllod, is to write a wrapper grammar with a function, for each category, to generate a RawData entry
+pgfFile2rawData config pgffile = do
+  pgf    <- readPGF pgffile

-type Cat  = CId
+  let cats = nub [c | (c,(_,_,_)) <- M.assocs config]
+  let lang:_ = languages pgf
+
+  return [
+    (cat, map snd lin) |
+        cat  <- cats,
+        f    <- functionsByCat pgf (mkCId cat),
+        lin  <- tabularLinearizes pgf lang (mkApp f [])
+    ]
+
+type Cat  = String
+type Fun  = String
 type Oper = String
-type Config = M.Map Cat (Cat,Oper,[Int])
+type Config = M.Map Cat (Cat,Oper,([Int],[Int])) -- lin word_Cat = Oper str_i1 str_i2 ... str_in  featj1 ... featjn ;

 data MDEnv = MDEnv {
-  pgf    :: PGF,
-  config :: Config,
-  lang   :: Language
+  rawdata :: [RawData],
+  config  :: Config
  }

-mkConfig :: String -> Config
+mkConfig :: String -> Config  -- N : N mkN 0 2 4 6 # 9
 mkConfig ls = M.fromList [(c,i) | Left (c,i) <- map mkOne (lines ls)]
 where
  mkOne s = case words s of
-    "--":_                 -> Right s 
-    cat:":":tcat:oper:ints -> Left (mkCId cat,(mkCId tcat,oper,map read ints))
+    "--":_                 -> Right s
+    cat:":":tcat:oper:ints -> Left (cat,(tcat,oper,mkArgs ints))
    _ -> Right s
+  mkArgs ints = case break (=="#") ints of
+    (ss,[])   -> (map read'  ss, [])
+    (ss,_:fs) -> (map read' ss, map read' fs)
+  read' a = readNote [] a -- Safe.readNote provides better error message

-mkMorphoDict :: MDEnv -> ([String],[String])
+getRawData s = case words s of
+  c:cs -> (c,cs)
+
+type RawData  = (String,[String])                              -- old cat name, forms and features
+type RawRule  = (([String],Cat), (Oper, ([String],[String])))  -- parts of fun name, new category, oper, arguments
+type RuleData = ((Fun,    Cat),  (Oper, ([String],[String])))  -- final fun name, cat, oper, args
+
+mkMorphoDict :: MDEnv -> ([String],[String])  -- fun rules, lin rules
 mkMorphoDict env =
  unzip $
  map splitRule $
-  findCompounds $
+----  findCompounds $ -- let us not care about compounds for the time being, but include them if they are given
  nameFunctions $
  mergeRules $
-  concatMap findRules cats
+  findRules $
+  rawdata env
 where
-  splitRule (fun,(cat,lin)) = (unwords ["fun",fun,":",showCId cat,";"], unwords ["lin",fun,"=", unwords lin,";"])
+  splitRule ((fun,cat),(oper,(forms,feats))) =
+    (unwords ["fun",fun,":",cat,";"], unwords ["lin",fun,"=", unwords (oper : map quote forms ++ feats),";"])

  cats = nub [c | (c,(_,_,_)) <- M.assocs (config env)]

-  findRules cat = [
-    ([snd (lin !! head ints), showCId c], (c, op : appSig ints (map snd lin))) |  --- head ints is the base form in smart paradigms
-      f    <- functionsByCat (pgf env) cat,
-      lin  <- tabularLinearizes (pgf env) (lang env) (mkApp f []), -- [[(String, String)]]
-      Just (c,op,ints) <- [M.lookup cat (config env)]
-   ] 
+  findRules :: [RawData] -> [RawRule]
+  findRules raws = [
+    (([lemma],newcat),(oper, appSig sig args)) |
+        (oldcat,args) <- raws,
+        Just (newcat, oper, sig) <- [M.lookup oldcat (config env)],
+        let lemma = args `at` head (fst sig)
+   ]

-  appSig ints forms = [forms !! i | i <- ints]
+  appSig (ints,feats) args =
+    -- If there's wrong number in config file, uncomment the line below to see which number it should be
+    -- trace (intercalate "\n" $ map show (zip [0..] args)) $
+    ([args `at` i | i <- ints], [args `at` i | i <- feats])

+  mergeRules :: [RawRule] -> [RawRule]
  mergeRules = map head . groupBy (\x y -> snd x == snd y) . sortOn snd

+  nameFunctions :: [RawRule] -> [RuleData]
  nameFunctions = expandNames . sortOn fst

+  expandNames :: [RawRule] -> [RuleData]
  expandNames fls = case fls of
-    (f,l):fls2 -> case span ((==f) . fst) fls2 of
-      ([],_) -> (mkFun f,l) : expandNames fls2
+    (f@(w,c),l) : fls2 -> case span ((==f) . fst) fls2 of
+      ([],_) -> ((mkFun (w ++ [c]),c),l) : expandNames fls2
      (fls1,fls3) -> renames ((f,l):fls1) ++ expandNames fls3
    _ -> []

-  renames fls = [(mkFun (init f ++ [show i,last f]),l) | (i,(f,l)) <- zip [1..] fls]
+  renames :: [RawRule] -> [RuleData]
+---  renames fls = [((mkFun (f ++ [show i,c]),c),l) | (i,((f,c),l)) <- zip [1..] fls] -- disambiguate with int
+  renames fls = [((mkFun (f ++ fs ++ [c]),c),l) | (i,(((f,c),l),fs)) <- zip [1..] (zip fls (minimize fls))] -- disambiguate with different forms

+  minimize :: [RawRule] -> [[String]]
+  minimize fls = shrink [ws ++ fs | (_,(_,(_:ws,fs))) <- fls]
+
+  shrink fls = case fls of
+    fl@(_:_):_ | all ((==head fl) . head) fls -> shrink (map tail fls)
+    fl@(_:_):_ | all ((==last fl) . last) fls -> shrink (map init fls)
+    _ -> shrinkMore fls
+
+  shrinkMore fls = case fls of
+    _ | length (nub (map init fls)) == length fls -> shrinkMore (map init fls)
+    _ | length (nub (map tail fls)) == length fls -> shrinkMore (map tail fls)
+    _ -> fls
+
+-- >>> mkFun ["hello", "world", "hello friends", "hello-all"]
+-- "hello_world_hello_friends_hello_all"
+mkFun :: [String] -> String                          -- if word contains space or hyphen, replace with underscore
+mkFun = showCId . mkCId . concat . intersperse "_" . concatMap (words . removeHyphen)
+  where
+    removeHyphen [] = []
+    removeHyphen ['-'] = ['-'] -- If hyphen is the last character, it's usually meaningful, leave it
+    removeHyphen ('-':cs) = ' ' : removeHyphen cs
+    removeHyphen (c:cs) = c : removeHyphen cs
+
+quote s = "\"" ++ s ++ "\""
+
+{- ---- let us ignore this
+  findCompounds :: [RuleData] -> [RuleData]
  findCompounds = getCompounds . sortOn cat_orthrevforms

  cat_orthrevforms (_,(cat,_:forms)) = (cat,[map (!!i) fss | let fss = map reverse forms, i <- [0..minimum (map length fss) - 1]])
@@ -105,8 +185,9 @@ mkMorphoDict env =
  revstem = head . snd . cat_revforms
  wforms (_,(_,_:forms)) = forms

+  getCompounds :: [RuleData] -> [RuleData]
  getCompounds fls = case fls of
-    fl : fls1 | length (revstem fl) < 2 -> markWith fl [] : getCompounds fls1 
+    fl : fls1 | length (revstem fl) < 2 -> markWith fl [] : getCompounds fls1
    fl : fls2 -> case span (\x -> and [isPrefixOf (reverse w) (reverse w1) | (w,w1) <- zip (wforms fl) (wforms x)]) fls2 of
      ([],_:_) -> markWith fl [] : getCompounds fls2
      (fls1,fls3) -> markWith fl [] : map (markCompound fl) fls1 ++ getCompounds fls3
@@ -117,26 +198,14 @@ mkMorphoDict env =
      True  -> markWith fl1 [";","--","compound",(fst fl)]
      False -> markWith fl1 [";","--","notcompound",(fst fl)]

-  markWith (f,(c,op:ws)) xs = (f,(c,op : map quote ws ++ xs))
+  markWith (f,(c,op:ws)) xs = (f,(c,op : map quote ws ++ xs)) ---- TODO only quote string args, not features

  isPrefixWord x xy =
-    length suff > 1 &&
-    any (\c -> elem c "-0123456789aeiouyåäö") suff &&
-    isPrefixOf x xy
+    length suff > 1 &&                                ---- compound first part must be at least two letters long
+    any (\c -> elem c "-0123456789aeiouyåäö") suff && ---- must contain a vowel or a digit
+    isPrefixOf x xy                                   ---- and of course be a prefix
   where
     suff = drop (length x) xy
-
-mkFun = quoteIf . concat . intersperse "_"
-quoteIf s = case s of
-  _ | any (\c -> not (isAlphaNum c || elem c "_'")) s -> "'" ++ unSgQuote s ++ "'"
-  c:_ | not (isAlpha c) -> "W_" ++ s
-  _ -> s
- where
-  unSgQuote s = case s of
-    '\'':cs -> "\\\'" ++ unSgQuote cs
-    c:cs -> c : unSgQuote cs
-    _ -> s
+-}


-quote s = "\"" ++ s ++ "\""
-
--- a/src/morphodict/MorphoDictFin.config
+++ b/src/morphodict/MorphoDictFin.config
@@ -0,0 +1,6 @@
+N : N mkN 0 1 2 4 7 13 14 16 17 19
+A : A mkA' 0 1 2 4 7 13 14 16 17 19
+V : V mkV  0 17 19 22 43 49 23 25 31 58 94 37
+V2 : V mkV 0 17 19 22 43 49 23 25 31 58 94 37
+Adv : Adv mkAdv 0
+Prep : Prep mkPrep 0
--- a/src/morphodict/MorphoDictFin.gf
+++ b/src/morphodict/MorphoDictFin.gf
--- a/src/morphodict/MorphoDictFin.header
+++ b/src/morphodict/MorphoDictFin.header
@@ -0,0 +1,13 @@
+concrete MorphoDictFin of MorphoDictFinAbs = CatFin ** open
+  ParadigmsFin,
+--  MorphoFin,
+  Kotus
+--  Prelude
+ in {
+
+-- extracted from http://kaino.kotus.fi/sanat/nykysuomi/, licensed under LGPL
+
+flags coding = utf8 ;
+
+oper mkA' : (x1,_,_,_,_,_,_,_,_,x10 : Str) -> A = \a,b,c,d,e,f,g,h,i,j -> mkA (mkN a b c d e f g h i j) ; -- Need a single worst-case paradigm for how config is implemented
+
--- a/src/morphodict/MorphoDictFinAbs.gf
+++ b/src/morphodict/MorphoDictFinAbs.gf
--- a/src/morphodict/MorphoDictFinAbs.header
+++ b/src/morphodict/MorphoDictFinAbs.header
@@ -0,0 +1,4 @@
+abstract MorphoDictFinAbs =
+  Cat [N,A,V,Adv,Prep] **
+{
+
--- a/src/morphodict/README
+++ b/src/morphodict/README
@@ -1,38 +0,0 @@
-MkMorphoDict: Extracting a minimal morphological dictionary from an existing GF dictionary.
-
-Aarne Ranta 2020-03-02
-
-principles:
-
-There should be a single source for each lemgram (i.e. inflection table of a word)
-Functions names should be easy to guess: baseform_Category (but avoiding accidental errors if this is not a unique key)
-
-Hence,
-
-Functions are 1-to-1 with lemgrams, i.e. inflection tables, thus
-     - no sense distinctions
-     - no subcategorizations
-     - no variants
-
-Functionname = baseform_category, with exceptions
-     - same baseform_Category, different inflection tables: lie_1_V, lie_2_V
-     - words that have non-ident characters: 'bird\'s-eye_A'
-     - words that start with non-letters: W_'tween_Adv
-
-Example run, English:
-
-   gf -make ../english/DictEng.gf
-   runghc MkMorphodict.hs DictEngAbs.pgf MorphoDictEng
-
-Result: 64923 ->  56599 functions, of which 21679 could be compounds
-
-Swedish, using a dump of SALDO (not available in these sources)
-
-  cd saldo/
-  runghc SaldoGF.hs
-  # combine abs.tmp with Saldo.header to obtain Saldo.gf
-  # combine cnc.tmp with SaldoSwe.header to obtain SaldoSwe.gf
-  gf -make SaldoSwe.gf
-  cd ..
-  runghc MkMorphodict.hs saldo/Saldo.pgf MorphoDictSwe
-
--- a/src/morphodict/README.md
+++ b/src/morphodict/README.md
@@ -0,0 +1,171 @@
+# morphodict: purely morphological unilingual dictionaries
+
+Aarne Ranta 2020-03-02 -- 2021-05-27
+
+UNDER CONSTRUCTION, INCOMPLETE AND BUGGY
+
+## The vision
+
+Vision 1: if you need the noun "stjärna" in Swedish, you will find it
+as `MorphoDictSwe.stjärna_N`.
+
+Vision 2: if you analyse a Swedish text that contains the word "stjärnornas", it will be returned as `MorphoDictSwe.stjärna_N`.
+
+Vision 3: this will work for all words of Swedish and all other RGL languages. Only seldom will you need `ParadigmsSwe`.
+
+
+## What is contained
+
+The guiding principle is to provide a single source for each *lemgram* (i.e. linearization records, i.e. inflection table plus inherent features).
+Functions names should be easy to guess:
+- `baseform_Category`
+
+Baseforms that have many different lemgrams are an exception.
+They should be disambiguated by adding the differing forms, as in
+- `lie_lay_V` ("lie, lay, lain") 
+- `lie_lied_V` ("lie, lied lied")
+
+Such distinctions are made in all cases where there are alternative inflections, even if there is no sense distinction:
+- `learn_learned_V` ("learn, learned, learned")
+- `learn_learnt_V` ("learn, learnt, learnt")
+
+Hence,
+- no `variants` should appear in the MorphoDict
+- no entries should be duplicated if their lemgrams are the same
+- hence, in particular, sense distinctions do not result in different entries
+
+The dictionary will also exclude *multiwords* consisting of several tokens.
+Most of the time, even *compounds* written as single tokens should be excluded.
+However, as the status of a compound is not always clear, and since they do not create spurious morphological analyses, they can be tolerated, in particular if extracted from legacy sources.
+
+Since multiwords and compounds are excluded, `Paradigms` and `MakeStructural` should for each language provide API functions for easy definitions of them, preferably of the form
+```
+ mkC : Str -> C -> C
+```
+The situation when this is not enough is when separate functions are needed for gluing and concatenation compounds.
+
+*Open question*: what to do with compound prepositions that are common in e.g. English?
+The above principles imply
+```
+ according_to_Prep = mkPrep "according" to_Prep
+```
+defined *outside* `MorphoDictEng`, so that `mkPrep` comes from `ParadigmsEng` and `to_Prep` from `MorphoDictEng`.
+This may sound like against tradition, but follows the general guidelines of morphological dictionaries.
+
+
+## Relevant categories 
+
+In addition to sense distinctions, MorphoDict ignores subcategorizations.
+One reason is that, just like senses (although in a lesser degree), they are open-ended and sometimes vague.
+Another reason is that different subcategory variants overload morphological analysis.
+
+The most numerous categories to be addressed are content words:
+- `A`
+- `Adv`
+- `Interj`
+- `N`
+- `PN`
+- `Symb`
+- `V`
+
+In addition, structural words should appear here with their native lemma names:
+- `Conj`
+- `Det`
+- `IAdv`
+- `IDet`
+- `IP`
+- `NP` (special NP-like "pronouns", such as "somebody")
+- `Prep`
+- `Pron` (in the RGL only covering personal pronouns)
+- `Punct`
+- `Subj`
+
+Additional language-specific categories can be included if the reasons are clear.
+They must then be importable from the `Paradigms` module for that language, together with `mk` functions.
+The `Extend` module may also put them in use in syntax.
+
+Following the model of Universal Tagset, we add a category `X` for unspecified words in `Extend`, with the linearization type `{s : Str}`.
+Hence it can only be used for uninflected strings with unclear status.
+
+## Naming
+
+As stated before,
+- `functionname` = `baseform_category` if there is a unique lemgram
+- = `baseform_number_category` if there is a need to disambiguate
+
+The disambiguation numbering should reflect the frequency or probability of the lemgram, but this is just a recommendation, since the frequency is not always known.
+
+The baseform should be the native alphabet baseform in Unicode letters, which is as such a valid GF identifier.
+However, if the word contains characters that are not legal in identifiers, the function name should be simply included in single quotes, rather than inventing transliterations.
+If function names are formed by the API function `PGF.mkCId`, these conventions are automatically followed.
+
+
+## Coding conventions
+
+To enable easy ocular and automatic inspection,
+- write one entry per line, each prefixed by `fun` or `lin` keyword
+- sort the entries alphabetically
+- use paradigms with enough many arguments to make the characteristic forms explicit
+
+To guarantee compatibility with the rest of the RGL and application grammars,
+- paradigms used should be imported from `Paradigms` and `MakeStructural` rather than defined in `MorphoDict` itself
+- import of *low-level modules* such as `Res` should be avoided
+- `MorphoDict` should be self-contained, i.e. not inherit from other modules such as `Structural` or `Irreg`. But it is OK to `open` them in a qualified mode to use when defining linearizations.
+
+
+
+## Bootstrapping with `MkMorphoDict`
+
+Example run, English:
+```
+  gf -make ../english/DictEng.gf
+  runghc MkMorphodict.hs pgf MorphoDictEng.config DictEngAbs.pgf MorphoDictEng
+  ```
+Or, if you have raw data from another source, of the format "N woman women", you can do
+```
+  runghc MkMorphodict.hs raw MorphoDictEng.config raw_words_eng.txt MorphoDictEng
+  ```
+The script needs a *configuration file* mapping legacy categories and forms lists to parts of GF code:
+```
+  N : N mkN 0 2
+  A : A mkA 0 2 4 6
+  V : V mkV 0 4 2
+  V2 : V mkV 0 4 2
+  Adv : Adv mkAdv 0
+  Prep : Prep mkPrep 0
+```
+In addition, it needs *header files* containing lines to be prefixed to the generated files:
+```
+  concrete MorphoDictEng of MorphoDictEngAbs =
+    CatEng [N,A,V,Adv,Prep] **
+    open
+      ParadigmsEng
+    in
+   {
+```
+```
+  abstract MorphoDictEngAbs =
+    Cat [N,A,V,Adv,Prep] **
+  {
+```
+For more details, we refer to `MkMorphodict.hs` for the time being.
+
+If the config and header files are sound, the script produces compilable GF files.
+They also mostly comply to the guidelines given in this document.
+
+Some things TODO:
+- deal with multiwords such as "more regular" generated by Paradigms
+- use references to native Irreg files instead of very long smart paradigms
+- support increments in addition to overwrites
+
+
+
+## Things to do
+
+To support the construction of a `MorphoDict`, the following should be provided in `Paradigms`:
+- explicit smart paradigms with characteristic forms and inherent features for each category
+- API constants for all inherent features that are needed
+- compound-constructing functions for all categories that need them
+- the extra categories that one wants to include in that language
+
+
--- a/src/morphodict/morphodict.cabal
+++ b/src/morphodict/morphodict.cabal
@@ -0,0 +1,20 @@
+name: morphodict
+version: 0.1
+homepage: https://github.com/GrammaticalFramework/gf-rgl/tree/master/src/morphodict
+author: Aarne Ranta
+category: Natural Language Processing
+build-type: Simple
+extra-source-files: README.md
+cabal-version: >=1.10
+
+executable MkMorphoDict
+  hs-source-dirs:
+    .
+  main-is: MkMorphoDict.hs
+  other-modules:
+  build-depends:
+    base,
+    containers,
+    safe,
+    gf
+  default-language: Haskell2010
--- a/src/morphodict/stack.yaml
+++ b/src/morphodict/stack.yaml
@@ -0,0 +1,16 @@
+resolver: lts-12.26
+
+packages:
+- .
+
+# so that `stack build --copy-bins` puts bin here
+local-bin-path: .
+
+extra-deps:
+- gf-3.10
+- cgi-3001.3.0.3 # dependency of gf
+
+flags:
+     # this excludes PGF2 module in gf package
+  gf:
+    c-runtime: false
--- a/src/morphodict/stack.yaml.lock
+++ b/src/morphodict/stack.yaml.lock
@@ -0,0 +1,26 @@
+# This file was autogenerated by Stack.
+# You should not edit this file by hand.
+# For more information, please see the documentation at:
+#   https://docs.haskellstack.org/en/stable/lock_files
+
+packages:
+- completed:
+    hackage: gf-3.10@sha256:6f851dfaab5e1f9d4f3796515b86f78806a2bb305136a902713dfc2b92d9cfb0,8477
+    pantry-tree:
+      size: 64924
+      sha256: 66332577ff42a42eed767f451f53266e1020b72749cdcdf7387933615d5de091
+  original:
+    hackage: gf-3.10
+- completed:
+    hackage: cgi-3001.3.0.3@sha256:4f3768d09e4a6620642588cab2e99d83c1b6b542dad6147d0af9532170036115,2076
+    pantry-tree:
+      size: 667
+      sha256: 65f6fd4574cffd1e5e2490c133b7ba58fd2fea0a65d81f1fa6fe14f08025629b
+  original:
+    hackage: cgi-3001.3.0.3
+snapshots:
+- completed:
+    size: 509471
+    url: https://raw.githubusercontent.com/commercialhaskell/stackage-snapshots/master/lts/12/26.yaml
+    sha256: 95f014df58d0679b1c4a2b7bf2b652b61da8d30de5f571abb0d59015ef678646
+  original: lts-12.26
--- a/src/morphodict/utils/only_homonyms.sh
+++ b/src/morphodict/utils/only_homonyms.sh
@@ -0,0 +1,51 @@
+#!/bin/bash
+
+USAGE="usage: ./remove_sense_distinctions.sh <concrete syntax file>"
+NOTE="This is not extremely useful, it will just create a file with only those entries that are homonymous in dictionary form, but differ in other forms. The purpose of the file is for you to look at/do small experiments with. The real job is done in MkMorphoDict.hs."
+
+# String manipulation
+CONC=$1                      # e.g. MorphoDictFin.gf
+BAK="$CONC.bak"              # e.g. MorphoDictFin.gf.bak
+
+NAME=`echo $CONC | cut -f 1 -d '.'` # e.g. MorphoDictFin
+ABS="${NAME}Abs.gf"          # e.g. MorphoDictFinAbs.gf
+CONC_HEADER="$NAME.header"   # e.g. MorphoDictFin.header
+ABS_HEADER="${NAME}Abs.header"  # e.g. MorphoDictFinAbs.header
+
+find_duplicates() {
+    echo "Putting (temporarily) only homonyms in $CONC"
+    echo "cat $CONC_HEADER > $CONC"
+    cat $CONC_HEADER > $CONC
+    DUPLS=`cut -f 2 -d ' ' /tmp/$CONC  \
+         | sort | uniq -c | sort -nr \
+         | egrep "^ +1?[2-9][0-9]? [a-zåäö]+_" \
+         | tr -d '[0-9][A-ZÅÄÖ]'`
+    for d in $DUPLS
+    do
+        grep "lin $d" $BAK >> $CONC
+    done
+    echo "}" >> $CONC
+}
+
+remove_numbers() {
+    echo "cp $CONC{,.bak}"
+    cp $CONC{,.bak}
+    echo "cat $CONC | sed -E 's/_[0-9]_/_/g' | uniq > /tmp/$CONC"
+    cat $CONC | sed -E 's/_[0-9]_/_/g' | uniq > /tmp/$CONC
+    echo "Done removing numbers."
+}
+
+#### Action starts here
+
+echo $NOTE
+
+if [[ $CONC == *"Abs.gf" ]]
+  then
+    echo $USAGE
+  else
+    remove_numbers
+    find_duplicates
+    # echo "gf -v=0 -make $CONC"
+    # gf -v=0 -make $CONC
+    echo "$CONC contains now only homonyms. Original file is found in $BAK."
+fi
--- a/src/swedish/ExtendSwe.gf
+++ b/src/swedish/ExtendSwe.gf
@@ -7,7 +7,7 @@ concrete ExtendSwe of Extend = CatSwe **
    StrandRelSlash, EmptyRelSlash, StrandQuestSlash,
    PassVPSlash, PassAgentVPSlash, UttVPShort, ByVP, InOrderToVP,
    MkVPI, BaseVPI, ConsVPI, ConjVPI, ComplVPIVV,
-    MkVPS, BaseVPS, ConsVPS, ConjVPS, PredVPS,
+    MkVPS, BaseVPS, ConsVPS, ConjVPS, PredVPS, RelVPS,
    MkVPS2, ConjVPS2, ComplVPS2, ReflVPS2, MkVPI2, ConjVPI2, ComplVPI2,
    ICompAP,ProDrop,EmbedSSlash,
    AdAdV, PositAdVAdj, GerundCN, GerundNP, GerundAdv, PresPartAP, PastPartAP, PastPartAgentAP,
@@ -94,22 +94,6 @@ in {
              {c2 = a2.c2} ; -- has the right c2


-    N2VPSlash n2 =
-      let vp : CatSwe.VP = UseComp (CompCN (UseN2 n2)) ;
-          dummyVPS : VPSlash = SlashV2a (P.mkV2 "dummy") ;
-      in dummyVPS **  -- has necessary fields for VPSlash
-               vp **  -- has all the right fields except for c2
-              {c2 = n2.c2} ; -- has the right c2
-
-
-
-    A2VPSlash a2 =
-      let vp : CatSwe.VP = UseComp (CompAP (UseA2 a2)) ;
-          dummyVPS : VPSlash = SlashV2a (P.mkV2 "dummy") ;
-      in dummyVPS **  -- has necessary fields for VPSlash
-               vp **  -- has all the right fields except for c2
-              {c2 = a2.c2} ; -- has the right c2
-
  lin UttVPShort vp = {s = infVP vp (agrP3 Utr Sg)} ;

  lincat
@@ -148,6 +132,16 @@ in {
            }
        } ;

+    RelVPS rp vps = {
+      s = \\ag,rcase =>
+        let agr = case rp.a of {  -- RP's agr may override in the regular RelativeScand, is this true with VPS too?
+                    RNoAg => ag ;
+                    RAg g n p => {g = g ; n = n ; p = p}
+                  } ;
+         in rp.s ! ag.g ! ag.n ! rcase ++ vps.s ! Sub ! agr ;
+      c = NPNom
+      } ;
+
    MkVPS t p vp = {
      s = \\o,a =>
            let
--- a/src/swedish/ParadigmsSwe.gf
+++ b/src/swedish/ParadigmsSwe.gf
@@ -44,6 +44,7 @@ oper

  utrum     : Gender ;  -- the "en" gender
  neutrum   : Gender ;  -- the "ett" gender
+  neuter    : Gender ;  -- synonym of neutrum 

 -- To abstract over number names, we define the following.

@@ -151,6 +152,7 @@ oper
    mkPN : (jesus,jesu : Str) -> Gender -> PN -- irregular genitive
    } ;

+    geoPN : Str -> PN ;  -- neuter, with identical genitive if ends in a vowel

 --2 Adjectives

@@ -348,6 +350,7 @@ oper
  Case = CommonScand.Case ;
  utrum = Utr ; 
  neutrum = Neutr ;
+  neuter = Neutr ;
  singular = Sg ;
  plural = Pl ;
  nominative = Nom ;
@@ -525,6 +528,14 @@ oper
      {s = table {Nom => jesus ; Gen => jesu} ; g = g ; lock_PN = <>} ;
    } ;

+  geoPN name =
+    let names : Str = case name of {
+      _ + ("a"|"e"|"i"|"o"|"u"|"y"|"å"|"ä"|"ö") => name ;
+      _ + "s" => name ;
+      _ => name + "s"
+    } in
+    mkPN name names neutrum ;
+
  regPN n = regGenPN n utrum ;
  regGenPN n g = {s = \\c => mkCase c n ; g = g} ** {lock_PN = <>} ;
  nounPN n = {s = n.s ! singular ! Indef ; g = n.g ; lock_PN = <>} ;