forked from GitHub/gf-core
finnish kotus word list
This commit is contained in:
41
examples/uusisuomi/kotus/Kotus.hs
Normal file
41
examples/uusisuomi/kotus/Kotus.hs
Normal file
@@ -0,0 +1,41 @@
|
||||
import List
|
||||
import Char
|
||||
|
||||
kotus = "sanat.xxmmll"
|
||||
|
||||
main = do
|
||||
ss <- readFile kotus >>= return . lines
|
||||
let ws = map analyse ss
|
||||
mapM putStrLn $ treat ws
|
||||
|
||||
treat = map mkRule . paradigms
|
||||
|
||||
mkRule ((pa,ex),nu) =
|
||||
" " ++ pos ++ pa ++ " : Str -> " ++ poss ++
|
||||
"Forms -- " ++ show nu ++ " " ++ ex ++ "\n = \\s -> ;"
|
||||
where
|
||||
(pos,poss) = if read (take 2 pa) < 52 then ("d","N") else ("c","V")
|
||||
|
||||
paradigms = map info . groupByFst . sort
|
||||
|
||||
info x = (last x, length x)
|
||||
--info = last
|
||||
|
||||
groupByFst = groupBy (\ x y -> fst x == fst y)
|
||||
|
||||
-- <st><s>aaloe</s><t><tn>3</tn></t></st>
|
||||
-- <st><s>vuoksi</s><hn>1</hn><t><tn>7</tn></t></st>
|
||||
-- <st><s>visiitti</s><t><tn>5</tn><av>C</av></t></st>
|
||||
|
||||
analyse s =
|
||||
let
|
||||
rest = drop 7 s
|
||||
(word,end) = span (/='<') rest
|
||||
lst = drop 6 $ dropWhile (/='t') end
|
||||
(num,gr) = span isDigit lst
|
||||
para = (replicate (2 - length num) '0' ++ num) ++ ['A' | isPrefixOf "av" (drop 6 gr)]
|
||||
in
|
||||
(para,word)
|
||||
|
||||
sub cs s = isPrefixOf cs s || isPrefixOf cs (drop 1 s)
|
||||
|
||||
28
examples/uusisuomi/kotus/kotus-sanalista.dtd
Normal file
28
examples/uusisuomi/kotus/kotus-sanalista.dtd
Normal file
@@ -0,0 +1,28 @@
|
||||
<!--
|
||||
Copyright (C) Kotimaisten kielten tutkimuskeskus 2006
|
||||
Kotimaisten kielten tutkimuskeskuksen nykysuomen sanalista, versio 1
|
||||
Julkaistu 15.12.2006
|
||||
|
||||
Sanalista julkaistaan GNU LGPL -lisenssillä.
|
||||
Lisenssiteksti luettavissa osoitteessa http://www.gnu.org/licenses/lgpl.html
|
||||
Listaan perustuvien sovellusten mukana on aina toimitettava alkuperäinen
|
||||
sanalista lukukelpoisessa muodossaan.
|
||||
-->
|
||||
|
||||
<!ELEMENT kotus-sanalista (st*) >
|
||||
|
||||
<!ELEMENT st (s, hn?, t*) >
|
||||
|
||||
<!ELEMENT s (#PCDATA) >
|
||||
|
||||
<!ELEMENT hn (#PCDATA) >
|
||||
|
||||
<!ELEMENT t (tn, av?)* >
|
||||
<!ATTLIST t taivutus CDATA #IMPLIED>
|
||||
|
||||
<!ELEMENT tn (#PCDATA) >
|
||||
|
||||
<!ELEMENT av (#PCDATA) >
|
||||
<!ATTLIST av astevaihtelu CDATA #IMPLIED>
|
||||
|
||||
|
||||
94125
examples/uusisuomi/kotus/kotus-sanalista_v1.xml
Normal file
94125
examples/uusisuomi/kotus/kotus-sanalista_v1.xml
Normal file
File diff suppressed because it is too large
Load Diff
44360
examples/uusisuomi/kotus/sanat.xxmmll
Normal file
44360
examples/uusisuomi/kotus/sanat.xxmmll
Normal file
File diff suppressed because it is too large
Load Diff
Reference in New Issue
Block a user