forked from GitHub/gf-core
new DictEngFin in finnished/stemmed. Only 9k lemmas missing.
This commit is contained in:
File diff suppressed because it is too large
Load Diff
@@ -2,12 +2,21 @@ import qualified Data.Set as S
|
|||||||
|
|
||||||
-- comment out words that are predefined in another lexicon
|
-- comment out words that are predefined in another lexicon
|
||||||
-- runghc ElimPredef.hs <DictEngFin.gf
|
-- runghc ElimPredef.hs <DictEngFin.gf
|
||||||
|
-- removeFile = "predef.txt"
|
||||||
|
-- removeMsg = "PREDEF"
|
||||||
|
|
||||||
|
-- also used for temporarily eliminating whatever from compilation
|
||||||
|
--removeFile = "commentOut"
|
||||||
|
--removeMsg = "POSTPONE"
|
||||||
|
|
||||||
|
removeFile = "t-nouns"
|
||||||
|
removeMsg = "PLURNOUN"
|
||||||
|
|
||||||
main = do
|
main = do
|
||||||
predefs <- readFile "predef.txt" >>= return . S.fromList . map (head . words) . lines
|
predefs <- readFile removeFile >>= return . S.fromList . map (head . words) . lines
|
||||||
interact (unlines . map (elimPredef predefs) . lines)
|
interact (unlines . map (elimPredef predefs) . lines)
|
||||||
|
|
||||||
elimPredef predefs line = case words line of
|
elimPredef predefs line = case words line of
|
||||||
w:_ | S.member w predefs -> "--PREDEF " ++ line
|
w:_ | S.member w predefs -> "--" ++ removeMsg ++ " " ++ line
|
||||||
_ -> line
|
_ -> line
|
||||||
|
|
||||||
|
|||||||
@@ -655,12 +655,18 @@ oper
|
|||||||
A2V : Type = A2 ;
|
A2V : Type = A2 ;
|
||||||
|
|
||||||
mkV0 v = v ** {lock_V = <>} ;
|
mkV0 v = v ** {lock_V = <>} ;
|
||||||
|
mkV2Sbare : V -> V2S = \v -> mkV2S v (casePrep allative) ; ----
|
||||||
|
|
||||||
mkV2S v p = mk2V2 v p ** {lock_V2S = <>} ;
|
mkV2S v p = mk2V2 v p ** {lock_V2S = <>} ;
|
||||||
|
mkV2Vbare : V -> V2V = \v -> mkV2V v (casePrep partitive) ; ----
|
||||||
mkV2V v p = mkV2Vf v p infIllat ;
|
mkV2V v p = mkV2Vf v p infIllat ;
|
||||||
mkV2Vf v p f = mk2V2 v p ** {vi = f ; lock_V2V = <>} ;
|
mkV2Vf v p f = mk2V2 v p ** {vi = f ; lock_V2V = <>} ;
|
||||||
|
|
||||||
|
mkVAbare : V -> VA = \v -> mkVA v (casePrep partitive) ; ----
|
||||||
mkVA v p = v ** {c2 = p ; lock_VA = <>} ;
|
mkVA v p = v ** {c2 = p ; lock_VA = <>} ;
|
||||||
|
mkV2Abare : V -> V2A = \v -> mkV2A v (casePrep partitive) (casePrep translative) ;
|
||||||
mkV2A v p q = v ** {c2 = p ; c3 = q ; lock_V2A = <>} ;
|
mkV2A v p q = v ** {c2 = p ; c3 = q ; lock_V2A = <>} ;
|
||||||
|
mkV2Qbare : V -> V2Q = \v -> mkV2Q v (casePrep ablative) ; ----
|
||||||
mkV2Q v p = mk2V2 v p ** {lock_V2Q = <>} ;
|
mkV2Q v p = mk2V2 v p ** {lock_V2Q = <>} ;
|
||||||
|
|
||||||
mkAS v = v ** {lock_A = <>} ;
|
mkAS v = v ** {lock_A = <>} ;
|
||||||
|
|||||||
@@ -1,6 +1,6 @@
|
|||||||
--# -path=.:..:../../abstract:../../common:../../english:../kotus
|
--# -path=.:..:../../abstract:../../common:../../english:../kotus
|
||||||
|
|
||||||
resource WNKotus = open Kotus, MorphoFin, ParadigmsFin, Prelude in {
|
resource WNKotus = open Kotus, MorphoFin, ParadigmsFin, CatFin, StemFin, Prelude in {
|
||||||
|
|
||||||
-- interpretations of paradigms in KOTUS word list, used in DictFin built with the Finnish Wordnet
|
-- interpretations of paradigms in KOTUS word list, used in DictFin built with the Finnish Wordnet
|
||||||
|
|
||||||
@@ -10,42 +10,130 @@ oper
|
|||||||
|
|
||||||
-- lexicon constructors
|
-- lexicon constructors
|
||||||
|
|
||||||
compoundN : Str -> NForms -> N
|
separateN : Str -> N -> N = \s,n -> mkN (s + "_") n ;
|
||||||
|
|
||||||
|
compoundN : Str -> NForms -> N = \s,nf -> lin N (mkStrN s (nforms2snoun nf)) ;
|
||||||
|
|
||||||
|
compoundA : Str -> NForms -> N = \s,nf -> lin N (mkStrN s (nforms2snoun nf)) ;
|
||||||
|
|
||||||
|
compoundAdv = overload {
|
||||||
|
compoundAdv : Str -> NForms -> Adv = \s,nf -> mkAdv (s + nf ! 0) ;
|
||||||
|
compoundAdv : Str -> Str -> Adv = \s,t -> mkAdv (s + t) ;
|
||||||
|
} ;
|
||||||
|
|
||||||
|
compoundV : Str -> VForms -> V = \s,vf -> mkV (lin VK {s = table (Predef.Ints 11) {f => s + vf ! f}}) ;
|
||||||
|
|
||||||
mkWN = overload {
|
mkWN = overload {
|
||||||
mkWN : (_ : Str) -> N = \s -> mkN s ;
|
mkWN : (_ : Str) -> N = \s -> mkN s ;
|
||||||
mkWN : (_,_ : Str) -> N = \s,p -> mkN (s ++ p) ;
|
mkWN : (_,_ : Str) -> N = \s,t -> separateN s (mkN t);
|
||||||
mkWN : (_,_,_ : Str) -> N = \s,p,q -> mkN (s ++ p ++ q) ;
|
mkWN : (_ : NForms) -> N = \nf -> lin N (nforms2snoun nf) ;
|
||||||
mkWN : (_,_,_,_ : Str) -> N = \s,p,q,r -> mkN (s ++ p ++ q ++ r) ;
|
mkWN : NForms -> Str -> N = \s,t -> separateN t (lin N (nforms2snoun s)) ;
|
||||||
mkWN : (_,_,_,_,_ : Str) -> N = \s,p,q,r,x -> mkN (s ++ p ++ q ++ r ++ x) ;
|
mkWN : NForms -> Str -> Str -> N = \s,t,u -> separateN (t ++ u) (lin N (nforms2snoun s)) ;
|
||||||
mkWN : (_,_,_,_,_,_ : Str) -> N = \s,p,q,r,x,y -> mkN (s ++ p ++ q ++ r ++ x ++ y) ;
|
mkWN : (_ : N) -> N = \n -> n ;
|
||||||
mkWN : (_,_,_,_,_,_,_ : Str) -> N = \s,p,q,r,x,y,z -> mkN (s ++ p ++ q ++ r ++ x ++ y ++ z) ;
|
mkWN : N -> Str -> N = \n,s -> separateN s n ; --- emansipaation kannattaja
|
||||||
mkWN : (_,_,_,_,_,_,_,_ : Str) -> N = \s,p,q,r,x,y,z,u -> mkN (s ++ p ++ q ++ r ++ x ++ y ++ z ++ u) ;
|
mkWN : N -> (_,_ : Str) -> N = \n,s,t -> separateN (s ++ t) n ; --- silmäluomien synnynnäinen puuttuminen
|
||||||
mkWN : (_,_,_,_,_,_,_,_,_ : Str) -> N = \s,p,q,r,x,y,z,u,v -> mkN (s ++ p ++ q ++ r ++ x ++ y ++ z ++ u ++ v) ;
|
|
||||||
} ;
|
} ;
|
||||||
|
|
||||||
|
|
||||||
mkWA = overload {
|
mkWA = overload {
|
||||||
mkWA : (_ : Str) -> A = \s -> mkA s ;
|
mkWA : (_ : Str) -> A = \s -> mkA s ;
|
||||||
mkWA : (_,_ : Str) -> A = \s,p -> mkA (s ++ p) ;
|
mkWA : (_,_ : Str) -> A = \s,t -> mkA (separateN s (mkN t));
|
||||||
mkWA : (_,_,_ : Str) -> A = \s,p,q -> mkA (s ++ p ++ q) ;
|
mkWA : (_ : NForms) -> A = \nf -> mkA (lin N (nforms2snoun nf)) ;
|
||||||
} ;
|
mkWN : NForms -> Str -> A = \s,t -> mkA (separateN t (lin N (nforms2snoun s))) ;
|
||||||
|
mkWA : (_ : N) -> A = \n -> mkA n ;
|
||||||
mkWV = overload {
|
mkWA : N -> Str -> A = \n,s -> mkA (separateN s n) ; --- emansipaation kannattaja
|
||||||
mkWV : (_ : Str) -> V = \s -> mkV s ;
|
mkWA : N -> (_,_ : Str) -> A = \n,s,t -> mkA (separateN (s ++ t) n) ; --- silmäluomien synnynnäinen puuttuminen
|
||||||
mkWV : (_,_ : Str) -> V = \s,p -> partV (mkV s) p ;
|
|
||||||
mkWV : (_,_,_ : Str) -> V = \s,p,q -> partV (mkV s) (p ++ q) ;
|
|
||||||
mkWV : (_,_,_,_ : Str) -> V = \s,p,q,r -> partV (mkV s) (p ++ q ++ r) ;
|
|
||||||
} ;
|
} ;
|
||||||
|
|
||||||
mkWAdv = overload {
|
mkWAdv = overload {
|
||||||
mkWAdv : (_ : Str) -> WAdv = \s -> ParadigmsEng.mkAdv s ;
|
mkWAdv : (_ : Str) -> Adv = \s -> mkAdv s ;
|
||||||
mkWAdv : (_,_ : Str) -> WAdv = \s,p -> ParadigmsEng.mkAdv (s ++ p) ;
|
mkWAdv : (_ : Adv) -> Adv = \a -> a ;
|
||||||
mkWAdv : (_,_,_ : Str) -> WAdv = \s,p,q -> ParadigmsEng.mkAdv (s ++ p ++ q) ;
|
mkWAdv : NForms -> Adv = \nf -> mkAdv (nf ! 0) ;
|
||||||
mkWAdv : (_,_,_,_ : Str) -> WAdv = \s,p,q,r -> ParadigmsEng.mkAdv (s ++ p ++ q ++ r) ;
|
mkWAdv : Adv -> Str -> Adv = \a,s -> mkAdv (s ++ a.s) ;
|
||||||
mkWAdv : (_,_,_,_,_ : Str) -> WAdv = \s,p,q,r,s -> ParadigmsEng.mkAdv (s ++ p ++ q ++ r ++ s) ;
|
mkWAdv : (_,_ : Str) -> Adv = \s,p -> mkAdv (s ++ p) ;
|
||||||
|
mkWAdv : (_,_,_ : Str) -> Adv = \s,p,q -> mkAdv (s ++ p ++ q) ;
|
||||||
} ;
|
} ;
|
||||||
|
|
||||||
|
mkWV = overload {
|
||||||
|
mkWV : (_ : Str) -> V = \s -> mkV s ;
|
||||||
|
mkWV : (_ : VForms) -> V = \vf -> mkV (lin VK {s = vf}) ;
|
||||||
|
mkWV : (_ : V) -> V = \v -> v ;
|
||||||
|
mkWV : VForms -> Str -> V = \vf,s -> mkV (mkV (lin VK {s = vf})) s ;
|
||||||
|
} ;
|
||||||
|
|
||||||
|
mkWV2 = overload {
|
||||||
|
mkWV2 : (_ : Str) -> V2 = \s -> mkV2 s ;
|
||||||
|
mkWV2 : (_ : VForms) -> V2 = \vf -> mkV2 (lin VK {s = vf}) ;
|
||||||
|
mkWV2 : (_ : V) -> V2 = \v -> mkV2 v ;
|
||||||
|
mkWV2 : VForms -> Str -> V2 = \vf,s -> mkV2 (mkV (mkV (lin VK {s = vf})) s) ;
|
||||||
|
} ;
|
||||||
|
|
||||||
|
mkWV3 = overload {
|
||||||
|
mkWV3 : (_ : Str) -> V3 = \s -> dirdirV3 (mkV s) ;
|
||||||
|
mkWV3 : (_ : VForms) -> V3 = \vf -> dirdirV3 (mkV (lin VK {s = vf})) ;
|
||||||
|
mkWV3 : (_ : V) -> V3 = \v -> dirdirV3 v ;
|
||||||
|
mkWV3 : VForms -> Str -> V3 = \vf,s -> dirdirV3 (mkV (mkV (lin VK {s = vf})) s) ;
|
||||||
|
} ;
|
||||||
|
|
||||||
|
|
||||||
|
mkWVV = overload {
|
||||||
|
mkWVV : (_ : Str) -> VV = \s -> mkVV (mkV s) ;
|
||||||
|
mkWVV : (_ : VForms) -> VV = \vf -> mkVV (mkV (lin VK {s = vf})) ;
|
||||||
|
mkWVV : (_ : V) -> VV = \v -> mkVV v ;
|
||||||
|
mkWVV : VForms -> Str -> VV = \vf,s -> mkVV (mkV (mkV (lin VK {s = vf})) s) ;
|
||||||
|
} ;
|
||||||
|
|
||||||
|
mkWVS = overload {
|
||||||
|
mkWVS : (_ : Str) -> VS = \s -> mkVS (mkV s) ;
|
||||||
|
mkWVS : (_ : VForms) -> VS = \vf -> mkVS (mkV (lin VK {s = vf})) ;
|
||||||
|
mkWVS : (_ : V) -> VS = \v -> mkVS v ;
|
||||||
|
mkWVS : VForms -> Str -> VS = \vf,s -> mkVS (mkV (mkV (lin VK {s = vf})) s) ;
|
||||||
|
} ;
|
||||||
|
|
||||||
|
mkWVQ = overload {
|
||||||
|
mkWVQ : (_ : Str) -> VQ = \s -> mkVQ (mkV s) ;
|
||||||
|
mkWVQ : (_ : VForms) -> VQ = \vf -> mkVQ (mkV (lin VK {s = vf})) ;
|
||||||
|
mkWVQ : (_ : V) -> VQ = \v -> mkVQ v ;
|
||||||
|
mkWVQ : VForms -> Str -> VQ = \vf,s -> mkVQ (mkV (mkV (lin VK {s = vf})) s) ;
|
||||||
|
} ;
|
||||||
|
|
||||||
|
mkWV2V = overload {
|
||||||
|
mkWV2V : (_ : Str) -> V2V = \s -> mkV2Vbare (mkV s) ;
|
||||||
|
mkWV2V : (_ : VForms) -> V2V = \vf -> mkV2Vbare (mkV (lin VK {s = vf})) ;
|
||||||
|
mkWV2V : (_ : V) -> V2V = \v -> mkV2Vbare v ;
|
||||||
|
mkWV2V : VForms -> Str -> V2V = \vf,s -> mkV2Vbare (mkV (mkV (lin VK {s = vf})) s) ;
|
||||||
|
} ;
|
||||||
|
|
||||||
|
mkWVA = overload {
|
||||||
|
mkWVA : (_ : Str) -> VA = \s -> mkVAbare (mkV s) ;
|
||||||
|
mkWVA : (_ : VForms) -> VA = \vf -> mkVAbare (mkV (lin VK {s = vf})) ;
|
||||||
|
mkWVA : (_ : V) -> VA = \v -> mkVAbare v ;
|
||||||
|
mkWVA : VForms -> Str -> VA = \vf,s -> mkVAbare (mkV (mkV (lin VK {s = vf})) s) ;
|
||||||
|
} ;
|
||||||
|
|
||||||
|
mkWV2A = overload {
|
||||||
|
mkWV2A : (_ : Str) -> V2A = \s -> mkV2Abare (mkV s) ;
|
||||||
|
mkWV2A : (_ : VForms) -> V2A = \vf -> mkV2Abare (mkV (lin VK {s = vf})) ;
|
||||||
|
mkWV2A : (_ : V) -> V2A = \v -> mkV2Abare v ;
|
||||||
|
mkWV2A : VForms -> Str -> V2A = \vf,s -> mkV2Abare (mkV (mkV (lin VK {s = vf})) s) ;
|
||||||
|
} ;
|
||||||
|
|
||||||
|
mkWV2Q = overload {
|
||||||
|
mkWV2Q : (_ : Str) -> V2Q = \s -> mkV2Qbare (mkV s) ;
|
||||||
|
mkWV2Q : (_ : VForms) -> V2Q = \vf -> mkV2Qbare (mkV (lin VK {s = vf})) ;
|
||||||
|
mkWV2Q : (_ : V) -> V2Q = \v -> mkV2Qbare v ;
|
||||||
|
mkWV2Q : VForms -> Str -> V2Q = \vf,s -> mkV2Qbare (mkV (mkV (lin VK {s = vf})) s) ;
|
||||||
|
} ;
|
||||||
|
|
||||||
|
mkWV2S = overload {
|
||||||
|
mkWV2S : (_ : Str) -> V2S = \s -> mkV2Sbare (mkV s) ;
|
||||||
|
mkWV2S : (_ : VForms) -> V2S = \vf -> mkV2Sbare (mkV (lin VK {s = vf})) ;
|
||||||
|
mkWV2S : (_ : V) -> V2S = \v -> mkV2Sbare v ;
|
||||||
|
mkWV2S : VForms -> Str -> V2S = \vf,s -> mkV2Sbare (mkV (mkV (lin VK {s = vf})) s) ;
|
||||||
|
} ;
|
||||||
|
|
||||||
|
mkWAdV : Str -> AdV = \s -> lin AdV (ss s) ;
|
||||||
|
mkWAdA : Str -> AdA = \s -> lin AdA (ss s) ;
|
||||||
|
mkWAdN : Str -> AdN = \s -> lin AdN (ss s) ;
|
||||||
|
|
||||||
-- kotus paradigms
|
-- kotus paradigms
|
||||||
|
|
||||||
@@ -232,12 +320,7 @@ oper
|
|||||||
} ;
|
} ;
|
||||||
k49A : Str -> NForms -- 11 vemmel
|
k49A : Str -> NForms -- 11 vemmel
|
||||||
= \s -> dPiennar s (strongGrade (init s) + "len") ;
|
= \s -> dPiennar s (strongGrade (init s) + "len") ;
|
||||||
{-
|
|
||||||
k50 : Str -> NForms -- 520 vääräsääri
|
|
||||||
= \s -> ;
|
|
||||||
k51 : Str -> NForms -- 62 vierasmies
|
|
||||||
= \s -> ;
|
|
||||||
-}
|
|
||||||
k52 : Str -> VForms -- 667 ärjyä
|
k52 : Str -> VForms -- 667 ärjyä
|
||||||
= \s -> cHukkua s (init s + "n") ;
|
= \s -> cHukkua s (init s + "n") ;
|
||||||
k52A : Str -> VForms -- 1568 öljyyntyä
|
k52A : Str -> VForms -- 1568 öljyyntyä
|
||||||
@@ -349,5 +432,14 @@ oper
|
|||||||
kccompoundNK : (Str -> NForms) -> Str -> Str -> NForms = \d,x,y ->
|
kccompoundNK : (Str -> NForms) -> Str -> Str -> NForms = \d,x,y ->
|
||||||
let ys = d y in \\v => x + ys ! v ;
|
let ys = d y in \\v => x + ys ! v ;
|
||||||
|
|
||||||
|
---- remnants of erroneous annotations
|
||||||
|
|
||||||
|
k50 : Str -> N ---- Forms -- 520 vääräsääri
|
||||||
|
= \s -> mkN s ;
|
||||||
|
k51 : Str -> N ---- Forms -- 62 vierasmies
|
||||||
|
= \s -> mkN s ;
|
||||||
|
kH1 : Str -> N ---- Forms -- remnant of homonym information
|
||||||
|
= \s -> mkN s ;
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
11
lib/src/finnish/stemmed/log.txt
Normal file
11
lib/src/finnish/stemmed/log.txt
Normal file
@@ -0,0 +1,11 @@
|
|||||||
|
AR 28/3/2013
|
||||||
|
|
||||||
|
26/3 Morphology from Kotus.
|
||||||
|
|
||||||
|
27/3 Senses from Princeton.
|
||||||
|
|
||||||
|
27/3
|
||||||
|
Designed new paradigms. Filtered problematic/illegal things (PLURNOUN, ILLEGALVERB, POSTPONE, TODO).
|
||||||
|
Just 9035 lemmas missing now.
|
||||||
|
|
||||||
|
|
||||||
Reference in New Issue
Block a user