diff --git a/lib/resource-1.4/hindi/LexiconHin.gf b/lib/resource-1.4/hindi/LexiconHin.gf index e7b932de8..b0e4b7286 100644 --- a/lib/resource-1.4/hindi/LexiconHin.gf +++ b/lib/resource-1.4/hindi/LexiconHin.gf @@ -20,7 +20,7 @@ concrete LexiconHin of Lexicon = CatHin ** -- become_VA = mkVA (irregV "become" "became" "become") ; -- beer_N = regN "beer" ; -- beg_V2V = mkV2V (regDuplV "beg") noPrep toP ; - big_A = mkA "baRa:" ; + big_A = mkA "baRA" ; -- bike_N = regN "bike" ; -- bird_N = regN "bird" ; -- black_A = regADeg "black" ; @@ -29,8 +29,8 @@ concrete LexiconHin of Lexicon = CatHin ** -- book_N = regN "book" ; -- boot_N = regN "boot" ; -- boss_N = mkN human (regN "boss") ; - boy_N = mkN "laRka:" ; - bread_N = mkN "roTi:" ; + boy_N = mkN "lar.kA" ; + bread_N = mkN "rot.I" ; -- break_V2 = dirV2 (irregV "break" "broke" "broken") ; -- broad_A = regADeg "broad" ; -- brother_N2 = mkN2 (mkN masculine (mkN "brother")) (mkPrep "of") ; @@ -66,7 +66,7 @@ concrete LexiconHin of Lexicon = CatHin ** -- door_N = regN "door" ; -- drink_V2 = dirV2 (irregV "drink" "drank" "drunk") ; -- easy_A2V = mkA2V (regA "easy") forP ; - eat_V2 = mkV2 "Ka:" ; + eat_V2 = mkV2 "KA" ; -- empty_A = regADeg "empty" ; -- enemy_N = regN "enemy" ; -- factory_N = regN "factory" ; @@ -81,10 +81,10 @@ concrete LexiconHin of Lexicon = CatHin ** -- fruit_N = regN "fruit" ; -- fun_AV = mkAV (regA "fun") ; -- garden_N = regN "garden" ; - girl_N = mkN "laRki:" ; + girl_N = mkN "lar.kI" ; -- glove_N = regN "glove" ; -- gold_N = regN "gold" ; - good_A = mkA "acCa:" ; + good_A = mkA "a-cCA" ; go_V = mkV "cal" ; -- green_A = regADeg "green" ; -- harbour_N = regN "harbour" ; @@ -109,7 +109,7 @@ concrete LexiconHin of Lexicon = CatHin ** -- leave_V2 = dirV2 (irregV "leave" "left" "left") ; -- like_V2 = dirV2 (regV "like") ; -- listen_V2 = prepV2 (regV "listen") toP ; - live_V = mkV "Cu:" ; ---- touch + live_V = mkV "CU" ; ---- touch -- long_A = regADeg "long" ; -- lose_V2 = dirV2 (irregV "lose" "lost" "lost") ; -- love_N = regN "love" ; @@ -143,7 +143,7 @@ concrete LexiconHin of Lexicon = CatHin ** -- radio_N = regN "radio" ; -- rain_V0 = mkV0 (regV "rain") ; -- read_V2 = dirV2 (irregV "read" "read" "read") ; - red_A = mkA "la:l" ; + red_A = mkA "lAl" ; -- religion_N = regN "religion" ; -- restaurant_N = regN "restaurant" ; -- river_N = regN "river" ; @@ -315,7 +315,7 @@ concrete LexiconHin of Lexicon = CatHin ** -- laugh_V = regV "laugh" ; -- lie_V = IrregHin.lie_V ; -- play_V = regV "play" ; - sew_V = mkV "si:" ; + sew_V = mkV "sI" ; -- sing_V = IrregHin.sing_V ; -- sit_V = IrregHin.sit_V ; -- smell_V = regV "smell" ; @@ -332,7 +332,7 @@ concrete LexiconHin of Lexicon = CatHin ** -- cut_V2 = dirV2 IrregHin.cut_V ; -- fear_V2 = dirV2 (regV "fear") ; -- fight_V2 = dirV2 fight_V ; - hit_V2 = mkV2 (mkV "ma:r") "ko" ; + hit_V2 = mkV2 (mkV "mAr") "ko" ; -- hold_V2 = dirV2 hold_V ; -- hunt_V2 = dirV2 (regV "hunt") ; -- kill_V2 = dirV2 (regV "kill") ; diff --git a/lib/resource-1.4/hindi/ResHin.gf b/lib/resource-1.4/hindi/ResHin.gf index 5e7dcbb26..18b8a7308 100644 --- a/lib/resource-1.4/hindi/ResHin.gf +++ b/lib/resource-1.4/hindi/ResHin.gf @@ -23,20 +23,20 @@ resource ResHin = ParamX ** open Prelude in { } ; reggNoun : Str -> Gender -> Noun = \s,g -> case of { - <-(_ + ("a:" | "i:")), Fem> => - mkNoun s s s (s + "e~") (s + "o~") (s + "o") Fem ; + <-(_ + ("A" | "I")), Fem> => + mkNoun s s s (s + "eM") (s + "oM") (s + "o") Fem ; _ => regNoun s ** {g = g} } ; regNoun : Str -> Noun = \s -> case s of { - x + "iya:" => - mkNoun s s s (x + "iya:~") (x + "iyo*") (x + "iyo") Fem ; - x + "a:" => - mkNoun s (x + "e") (x + "e") (x + "e") (x + "o*") (x + "o") Masc ; - x + "i:" => - mkNoun s s s (x + "iya:~") (x + "iyo*") (x + "iyo") Fem ; + x + "iyA" => + mkNoun s s s (x + "iyAM") (x + "iyoN") (x + "iyo") Fem ; + x + "A" => + mkNoun s (x + "e") (x + "e") (x + "e") (x + "oN") (x + "o") Masc ; + x + "I" => + mkNoun s s s (x + "iyAM") (x + "iyoN") (x + "iyo") Fem ; _ => - mkNoun s s s s (s + "o*") (s + "o") Masc + mkNoun s s s s (s + "oN") (s + "o") Masc } ; @@ -51,7 +51,7 @@ resource ResHin = ParamX ** open Prelude in { } ; regAdjective : Str -> Adjective = \s -> case s of { - acch + "a:" => mkAdjective s (acch + "e") (acch + "i:") ; + acch + "A" => mkAdjective s (acch + "e") (acch + "I") ; _ => mkAdjective s s s } ; @@ -76,7 +76,7 @@ resource ResHin = ParamX ** open Prelude in { \inf,stem,ims,imp,ifs,ifp,pms,pmp,pfs,pfp,ss1,ss2,sp2,sp3,r -> { s = let ga : Number -> Gender -> Str = \n,g -> - (regAdjective "ga:").s ! g ! n ! Dir + (regAdjective "gA").s ! g ! n ! Dir in table { VInf => inf ; VStem => stem ; @@ -99,48 +99,48 @@ resource ResHin = ParamX ** open Prelude in { VAbs => stem + "kar" ; --- ke VReq => r ; VImp => sp2 ; - VReqFut => stem + "i-ega:" + VReqFut => stem + "ie-gA" } } ; regVerb : Str -> Verb = \cal -> let caly : Str = case cal of { - _ + ("a:" | "e") => cal + "y" ; - c + "u:" => c + "uy" ; - c + "i:" => c + "iy" ; + _ + ("A" | "e") => cal + "y" ; + c + "U" => c + "uy" ; + c + "I" => c + "iy" ; _ => cal } in mkVerb - (cal + "na:") cal - (cal + "ta:") (cal + "te") (cal + "ti:") (cal + "ti:") - (caly + "a:") (caly + "e") (caly + "i:") (caly + "i:*") - (caly + "u:~") (caly + "e") (caly + "o") (caly + "e*") - (caly + "i-e") ; + (cal + "nA") cal + (cal + "tA") (cal + "te") (cal + "tI") (cal + "tI") + (caly + "A") (caly + "e") (caly + "I") (caly + "IN") + (caly + "UM") (caly + "e") (caly + "o") (caly + "eN") + (caly + "ie-") ; param CTense = CPresent | CPast | CFuture ; oper copula : CTense -> Number -> Person -> Gender -> Str = \t,n,p,g -> case of { - => "hu:~" ; + => "hUM" ; => "hE" ; => "hE" ; - => "hE*" ; + => "hEN" ; => "ho" ; - => "hE*" ; - => "Ta:" ; - => "Ti:" ; + => "hEN" ; + => "TA" ; + => "TI" ; => "Te" ; - => "Ti:*" ; - => "hu:*ga:" ; - => "hu:*gi:" ; - => "hoga:" ; - => "hogi:" ; + => "TIN" ; + => "hUNgA" ; + => "hUNgI" ; + => "hogA" ; + => "hogI" ; => "hoge" ; - => "ho*ge" ; + => "hoNge" ; => "hogi:" ; - => "ho*gi:" + => "hoNgi:" } ; param @@ -148,12 +148,12 @@ resource ResHin = ParamX ** open Prelude in { oper personalPronoun : Person -> Number -> {s : PronCase => Str} = \p,n -> case of { - => {s = table PronCase ["mE*" ; "muJ" ; "muJe" ; "mera:"]} ; - => {s = table PronCase ["ham" ; "ham" ; "hame*" ; "hama:ra:"]} ; - => {s = table PronCase ["tu:" ; "tuJ" ; "tuJe" ; "tera:"]} ; - => {s = table PronCase ["tum" ; "tum" ; "tumhe*" ; "tumha:ra:"]} ; - => {s = table PronCase ["vah" ; "us" ; "use" ; "uska:"]} ; - => {s = table PronCase ["ve" ; "un" ; "unhe*" ; "unka:"]} + => {s = table PronCase ["mEN" ; "muJ" ; "muJe" ; "merA"]} ; + => {s = table PronCase ["ham" ; "ham" ; "hameN" ; "hamArA"]} ; + => {s = table PronCase ["tU" ; "tuJ" ; "tuJe" ; "terA"]} ; + => {s = table PronCase ["tum" ; "tum" ; "tumheN" ; "tumhArA"]} ; + => {s = table PronCase ["vah" ; "u-s" ; "u-se" ; "u-skA"]} ; + => {s = table PronCase ["ve" ; "u-n" ; "u-nheN" ; "u-nkA"]} } ; -- the Hindi verb phrase @@ -198,7 +198,7 @@ resource ResHin = ParamX ** open Prelude in { s = \\b,vh => let na = if_then_Str b [] "na" ; - nahim = if_then_Str b [] "nahi:*" ; + nahim = if_then_Str b [] "nahIN" ; in case vh of { VPTense VPGenPres (Ag g n p) => @@ -228,7 +228,7 @@ resource ResHin = ParamX ** open Prelude in { } ; raha : Gender -> Number -> Str = \g,n -> - (regAdjective "raha:").s ! g ! n ! Dir ; + (regAdjective "rahA").s ! g ! n ! Dir ; VPHSlash = VPH ** {c2 : Compl} ; diff --git a/lib/resource-1.4/hindi/StructuralHin.gf b/lib/resource-1.4/hindi/StructuralHin.gf index 3f647ee1d..14ffe6a5d 100644 --- a/lib/resource-1.4/hindi/StructuralHin.gf +++ b/lib/resource-1.4/hindi/StructuralHin.gf @@ -51,7 +51,7 @@ concrete StructuralHin of Structural = CatHin ** -- if_Subj = ss "if" ; -- in8front_Prep = ss ["in front of"] ; -- i_Pron = mkNP "I" "me" "my" Sg P1 Masc ; - in_Prep = ss "me*" ; + in_Prep = ss "meN" ; -- it_Pron = mkNP "it" "it" "its" Sg P3 Neutr ; -- less_CAdv = ss "less" ; -- many_Det = mkDeterminer Pl "many" ; diff --git a/src-3.0/GF/Text/Transliterations.hs b/src-3.0/GF/Text/Transliterations.hs index 28f653dcf..05e10dc98 100644 --- a/src-3.0/GF/Text/Transliterations.hs +++ b/src-3.0/GF/Text/Transliterations.hs @@ -23,7 +23,8 @@ characterTable = unlines . map prOne . Map.assocs . trans_from_unicode where data Transliteration = Trans { trans_to_unicode :: Map.Map String Int, - trans_from_unicode :: Map.Map Int String + trans_from_unicode :: Map.Map Int String, + invisible_chars :: [String] } appTransToUnicode :: Transliteration -> String -> String @@ -32,6 +33,7 @@ appTransToUnicode trans = map (\c -> maybe c (return . toEnum) $ Map.lookup c (trans_to_unicode trans) ) . + filter (flip notElem (invisible_chars trans)) . unchar appTransFromUnicode :: Transliteration -> String -> String @@ -46,9 +48,10 @@ appTransFromUnicode trans = -- conventions: -- each character is either [letter] or [letter+nonletter] -- when using a sparse range of unicodes, mark missing codes as "-" in transliterations +-- characters can be invisible: ignored in translation to unicode mkTransliteration :: [String] -> [Int] -> Transliteration -mkTransliteration ts us = Trans (Map.fromList (tzip ts us)) (Map.fromList (uzip us ts)) +mkTransliteration ts us = Trans (Map.fromList (tzip ts us)) (Map.fromList (uzip us ts)) [] where tzip ts us = [(t,u) | (t,u) <- zip ts us, t /= "-"] uzip us ts = [(u,t) | (u,t) <- zip us ts, t /= "-"] @@ -75,9 +78,9 @@ transThai = mkTransliteration allTrans allCodes where allCodes = [0x0e00 .. 0x0e7f] transDevanagari :: Transliteration -transDevanagari = mkTransliteration allTrans allCodes where +transDevanagari = (mkTransliteration allTrans allCodes){invisible_chars = ["a"]} where allTrans = words $ - "~ * - - " ++ + "M N - - " ++ "a- A- i- I- u- U- R- - - - e- E- - - o- O- " ++ "k K g G N: c C j J n: t. T. d. D. n. t " ++ "T d D n - p P b B m y r - l - - v " ++