From 009f05ea5630b0cf27a25c4a200cb8a0b8c37cb6 Mon Sep 17 00:00:00 2001 From: aarne Date: Thu, 15 May 2008 11:46:27 +0000 Subject: [PATCH] IIT updates to Hindi --- lib/resource-1.4/hdeva.hs | 145 +++++++++++++++++++++++++++++++ lib/resource-1.4/hindi/ResHin.gf | 33 ++++--- 2 files changed, 164 insertions(+), 14 deletions(-) create mode 100644 lib/resource-1.4/hdeva.hs diff --git a/lib/resource-1.4/hdeva.hs b/lib/resource-1.4/hdeva.hs new file mode 100644 index 000000000..7976c93f1 --- /dev/null +++ b/lib/resource-1.4/hdeva.hs @@ -0,0 +1,145 @@ +import System + +main = do + s <- getContents + let ofile = "devaout.html" + writeFile ofile "\n

\n" + appendFile ofile $ udevap s + appendFile ofile "\n

\n" + system ("open " ++ ofile) + +---main = interact udeva + +udevap :: String -> String +udevap = unlines . map (unwords . ("

":). map udevaWord . words) . lines + +udeva :: String -> String +udeva = unlines . map (unwords . map udevaWord . words) . lines + +udevaGF :: String -> String +udevaGF s = case s of + '"':cs -> let (w,q:rest) = span (/='"') cs in '"' : udevaWord w ++ [q] ++ udevaGF rest + c :cs -> c : udevaGF cs + _ -> s + +udevaWord = encodeUTF8 . str2deva + +str2deva :: String -> String +str2deva s = map toEnum $ case chop s of + c:cs -> encodeInit c : map encode cs + _ -> [] + +chop s = case s of + ['-'] -> [s] + '-' :cs -> let (c:r) = chop cs in ('-':c) : r -- to force initial vowel + '+' :cs -> let (c:r) = chop cs in ('+':c) : r -- to force non-initial vowel + v:':':cs -> [v,':'] : chop cs + v:'.':cs -> [v,'.'] : chop cs + c:'a':cs -> [c] : chop cs + c :cs -> [c] : chop cs + _ -> [] + +encodeInit :: String -> Int +encodeInit s = case s of + '+':c -> encode c + '-':c -> encodeInit c + "a" -> 0x0905 + "a:" -> 0x0906 + "i" -> 0x0907 + "i:" -> 0x0908 + "u" -> 0x0909 + "u:" -> 0x090a + "r:" -> 0x090b + "e" -> 0x090f + "E" -> 0x0910 + "o" -> 0x0913 + "O" -> 0x0914 + _ -> encode s + +encode :: String -> Int +encode s = case s of + "k" -> 0x0915 + "K" -> 0x0916 + "g" -> 0x0917 + "G" -> 0x0918 + "N:" -> 0x0919 + + "c" -> 0x091a + "C" -> 0x091b + "j" -> 0x091c + "J" -> 0x091d + "n:" -> 0x091e + + "t." -> 0x091f + "T." -> 0x0920 + "d." -> 0x0921 + "D." -> 0x0922 + "n." -> 0x0923 + + "t" -> 0x0924 + "T" -> 0x0925 + "d" -> 0x0926 + "D" -> 0x0927 + "n" -> 0x0928 + + "p" -> 0x092a + "P" -> 0x092b + "b" -> 0x092c + "B" -> 0x092d + "m" -> 0x092e + + "y" -> 0x092f + "r" -> 0x0930 + "l" -> 0x0932 + "v" -> 0x0935 + + "S" -> 0x0936 + "s." -> 0x0937 + "s" -> 0x0938 + "h" -> 0x0939 + + "z" -> 0x095b + "R" -> 0x095c + + "a:" -> 0x093e + "i" -> 0x093f + "i:" -> 0x0940 + "u" -> 0x0941 + "u:" -> 0x0942 + "r:" -> 0x0943 + "e" -> 0x0947 + "E" -> 0x0948 + "o" -> 0x094b + "O" -> 0x094c + + "~" -> 0x0901 + "*" -> 0x0902 + + " " -> space + "\n" -> fromEnum '\n' + + '-':c -> encodeInit c + '+':c -> encode c + + _ -> 0x093e --- a: + + +space = fromEnum ' ' + + +encodeUTF8 :: String -> String +encodeUTF8 "" = "" +encodeUTF8 (c:cs) = + if c > '\x0000' && c < '\x0080' then + c : encodeUTF8 cs + else if c < toEnum 0x0800 then + let i = fromEnum c + in toEnum (0xc0 + i `div` 0x40) : + toEnum (0x80 + i `mod` 0x40) : + encodeUTF8 cs + else + let i = fromEnum c + in toEnum (0xe0 + i `div` 0x1000) : + toEnum (0x80 + (i `mod` 0x1000) `div` 0x40) : + toEnum (0x80 + i `mod` 0x40) : + encodeUTF8 cs diff --git a/lib/resource-1.4/hindi/ResHin.gf b/lib/resource-1.4/hindi/ResHin.gf index f59442ad6..b8ed909d4 100644 --- a/lib/resource-1.4/hindi/ResHin.gf +++ b/lib/resource-1.4/hindi/ResHin.gf @@ -16,21 +16,27 @@ resource ResHin = ParamX ** open Prelude in { oper Noun = {s : Number => Case => Str ; g : Gender} ; - mkNoun : (x1,_,_,_,_,x6 : Str) -> Gender -> Noun = \sd,so,sv,pd,po,pv,g -> { + mkNoun : (x1,_,_,_,_,x6 : Str) -> Gender -> Noun = + \sd,so,sv,pd,po,pv,g -> { s = table Number [table Case [sd;so;sv] ; table Case [pd;po;pv]] ; g = g } ; reggNoun : Str -> Gender -> Noun = \s,g -> case of { - <-(_ + ("+a:" | "+i:")), Fem> => mkNoun s s s (s + "+e~") (s + "+o~") (s + "+o") Fem ; + <-(_ + ("a:" | "i:")), Fem> => + mkNoun s s s (s + "e~") (s + "o~") (s + "o") Fem ; _ => regNoun s ** {g = g} } ; regNoun : Str -> Noun = \s -> case s of { - x + "+iya:" => mkNoun s s s (x + "+iya:~") (x + "+iyo*") (x + "+iyo") Fem ; - x + "+a:" => mkNoun s (x + "+e") (x + "+e") (x + "+e") (x + "+o*") (x + "+o") Masc ; - x + "+i:" => mkNoun s s s (x + "+iya:~") (x + "+iyo*") (x + "+iyo") Fem ; - _ => mkNoun s s s s (s + "+o*") (s + "+o") Masc + x + "iya:" => + mkNoun s s s (x + "iya:~") (x + "iyo*") (x + "iyo") Fem ; + x + "a:" => + mkNoun s (x + "e") (x + "e") (x + "e") (x + "o*") (x + "o") Masc ; + x + "i:" => + mkNoun s s s (x + "iya:~") (x + "iyo*") (x + "iyo") Fem ; + _ => + mkNoun s s s s (s + "o*") (s + "o") Masc } ; @@ -45,7 +51,7 @@ resource ResHin = ParamX ** open Prelude in { } ; regAdjective : Str -> Adjective = \s -> case s of { - acch + "+a:" => mkAdjective s (acch + "+e") (acch + "+i:") ; + acch + "a:" => mkAdjective s (acch + "e") (acch + "i:") ; _ => mkAdjective s s s } ; @@ -98,19 +104,18 @@ resource ResHin = ParamX ** open Prelude in { regVerb : Str -> Verb = \cal -> let caly : Str = case cal of { - _ + ("+a:" | "+e") => cal + "+y" ; - c + "+u:" => c + "+uy" ; - c + "+i:" => c + "+iy" ; ---- c + v@("+u" | "+i") + ":" => c + v + "+y" ; + _ + ("a:" | "e") => cal + "y" ; + c + "u:" => c + "uy" ; + c + "i:" => c + "iy" ; _ => cal } in mkVerb (cal + "na:") cal (cal + "ta:") (cal + "te") (cal + "ti:") (cal + "ti:") - (caly + "+a:") (caly + "+e") (caly + "+i:") (caly + "+i:*") - (caly + "+u:~") (caly + "+e") (caly + "+o") (caly + "+e*") - (caly + "+i-e") ; + (caly + "a:") (caly + "e") (caly + "i:") (caly + "i:*") + (caly + "u:~") (caly + "e") (caly + "o") (caly + "e*") + (caly + "i-e") ; param CTense = CPresent | CPast | CFuture ;