From f33059ae39a1fd90f62f953c910d4c2a00589c51 Mon Sep 17 00:00:00 2001
From: aarne <aarne@chalmers.se>
Date: Fri, 31 May 2013 16:25:42 +0000
Subject: [PATCH] Prasad's sanskrit transliteration ; MiniresourceSan now
 compiles but is mostly incorrect due to missing paradigms

---
 examples/miniresource/MiniresourceSan.gf | 196 +++++++++++------------
 src/compiler/GF/Text/Transliterations.hs |  15 +-
 2 files changed, 109 insertions(+), 102 deletions(-)

diff --git a/examples/miniresource/MiniresourceSan.gf b/examples/miniresource/MiniresourceSan.gf
index aa3bd953c..dbeecebf2 100644
--- a/examples/miniresource/MiniresourceSan.gf
+++ b/examples/miniresource/MiniresourceSan.gf
@@ -5,102 +5,89 @@ concrete MiniresourceSan of Miniresource = open Prelude in {
   flags coding = utf8;
 
   lincat
-    S   = {s : Str} ; 
+    S  = {s : Str} ; 
     Cl = {s : Bool => Str} ; 
     NP = NounPhrase ;  
-      -- {s : Str} ;     
     VP = VerbPhrase ;  
-      -- {verb : Verb ; compl : Str} ;  
-    AP = {s : Str; monoSyl: Bool} ;
-    CN = Noun ;        -- {s : Str; c : Str} ;
-    Det = {s : Str ; n : Number} ;
-    N = Noun ;         -- {s : Str; c : Str} ;
-    A = Adj ;          -- {s : Str; monoSyl: Bool} ; 
-    V = Verb;          -- {s : Str ; pp,ds,dp,ep : Str ; neg : Str}
-    V2 = Verb ;
+    AP = Adj ;
+    CN = Noun ;
+    Det = {s : Gender => Case => Str ; n : Number} ;
+    N = Noun ; 
+    A = Adj ;
+    V = Verb;
+    V2 = Verb ** {c : Case} ;
     AdA = {s : Str} ; 
     Pol = {s : Str ; b : Bool} ;
     Tense = {s : Str} ;
-    Conj = {s : SForm => Str} ;    
+    Conj = {s : Str} ;    
+
 
   lin
+
     UseCl t p cl = {s = t.s ++ p.s ++ cl.s ! p.b} ; 
 
     PredVP np vp = {
-       s  = \\p => np.s ++ neg p ++ vp.verb.s ++ vp.compl
+       s  = \\p => np.s ! Nom ++ neg p ++ vp.compl ! np.a ++ vp.verb.s ! VPres np.a.n np.a.p
        } ;
 
     ComplV2 v2 np = {
      verb  = v2 ;
-     compl = np.s
+     compl = \\_ => np.s ! v2.c
      } ; 
 
     UseV v = {
       verb = v ; 
-      compl = []
+      compl = \\_ => []
       } ; 
 
-    DetCN det cn = case det.n of {
-            Sg => {s = det.s ++ cn.c ++ cn.s ; n = Sg } ;
-            Pl => {s = det.s ++ "些" ++ cn.s ; n = Pl }             
+    DetCN det cn = {
+      s = \\c => det.s ! cn.g ! c ++ cn.s ! det.n ! c ; 
+      a = agr cn.g det.n P3
       } ;
       
-    ModCN ap cn = case ap.monoSyl of {
-            True => {s = ap.s ++ cn.s ; c = cn.c} ;
-            False => {s = ap.s ++ "的" ++ cn.s ; c = cn.c} 
-            } ;
+    ModCN ap cn = {s = \\n,c => ap.s ! cn.g ! n ! c ++ cn.s ! n ! c ; g = cn.g} ;
 
     CompAP ap = {
       verb = copula ;
-      compl = ap.s ++ "的"
+      compl = \\a => ap.s ! a.g ! a.n ! Nom 
       } ;
 
     AdAP ada ap = {
-      s = ada.s ++ ap.s ;
-      monoSyl = False
+      s = \\g,n,c => ada.s ++ ap.s ! g ! n ! c
       } ;
 
     ConjNP co x y = {
-      s = x.s ++ co.s ! Phr NPhrase ++ y.s
+      s = \\c => x.s ! c ++ co.s ++ y.s ! c ;
+      a = y.a ----
       } ;
 
-    ConjS  co x y = {s = x.s ++ co.s ! Sent ++ y.s} ;  
+    ConjS  co x y = {s = x.s ++ co.s ++ y.s} ;  
 
     UseN n = n ;
     UseA adj = adj ;
 
-    a_Det = mkDet "一" Sg ;
-    every_Det = mkDet "每" Sg ;        
-    the_Det = mkDet "那" Sg ;
+    a_Det = mkDet "" Sg ;
+    every_Det = mkDet "प्रति" Sg ;        
+    the_Det = mkDet "" Sg ;
 
-    this_Det = mkDet "这" Sg ;
-    these_Det = mkDet "这" Pl ;
-    that_Det = mkDet "那" Sg ;
-    those_Det = mkDet "那" Pl ;
+    this_Det = mkDet "एतद्" Sg ;
+--    these_Det = mkDet "这" Pl ;
+    that_Det = mkDet "तद्" Sg ;
+--    those_Det = mkDet "那" Pl ;
 
-    i_NP = pronNP "我" ;
-    youSg_NP = pronNP "你" ;
-    he_NP = pronNP "他" ;
-    she_NP = pronNP "她" ;
-    we_NP = pronNP "我们" ;
-    youPl_NP = pronNP "你们" ;
-    they_NP = pronNP "他们" ;
+    i_NP = pronNP "" Sg P1 ;
+    youSg_NP = pronNP "" Sg P2 ;
+    he_NP = pronNP "" Sg P3 ;
+    she_NP = pronNP "" Sg P3 ;
+    we_NP = pronNP "" Pl P1 ;
+    youPl_NP = pronNP "" Pl P2 ;
+    they_NP = pronNP "" Pl P3 ;
 
-    very_AdA = ss (word "非常") ;    
+    very_AdA = ss "अति" ;    
     
-    and_Conj = {s = table {
-                    Phr NPhrase => "和" ;
-                    Phr APhrase => "而" ;
-                    Phr VPhrase => "又" ;
-                    Sent =>  []
-                          }
-                } ;
+    and_Conj = {s = " च"} ;
 
-    or_Conj  = {s = table {
-                    Phr _ => "或" ;
-                    Sent => word "还是"
-                          }
-                } ;
+    or_Conj  = {s = "अथवा"} ;
 
     Pos  = {s = [] ; b = True} ;
     Neg  = {s = [] ; b = False} ;
@@ -110,17 +97,24 @@ concrete MiniresourceSan of Miniresource = open Prelude in {
 -- module TestChi
 
 lin
-  man_N = mkN "男人" ;  
-  woman_N = mkN "女人" ;
-  house_N = mkN "房子" ;
-  tree_N = mkN "树" "棵";
-  big_A = mkA "大" ;
-  small_A = mkA "小" ;
-  green_A = mkA "绿" ;
-  walk_V = mkV "走" ;
-  arrive_V = mkV "到" ;
-  love_V2 = mkV2 "爱" ;
-  please_V2 = mkV2 "麻烦" ;
+  man_N = mkN "नरः" ;  
+  woman_N = mkN "स्त्री" ;
+  house_N = mkN "गृहं" ;
+  tree_N = mkN "वृक्ष";
+  big_A = mkA "महाकाय" ;
+  small_A = mkA "अल्प" ;
+  green_A = mkA "हरित" ;
+  walk_V = mkV "गम्" ;
+  arrive_V = mkV "अभि-उपा-गम्" ;
+  love_V2 = mkV2 "कम्" ;
+  please_V2 = mkV2 "प्री" ;
+
+
+
+
+
+
+
 
 -- module ResSan
 
@@ -132,15 +126,19 @@ param
     Gender = Masc | Fem | Neutr ;
     Person = P3 | P2 | P1 ;
 
-    Agr = Ag Gender Number Person ;
 
     VForm = VPres Number Person ;
 
+oper
+    Agr = {g : Gender ; n : Number ; p : Person} ;
+
+    agr : Gender -> Number -> Person -> Agr = \g,n,p -> {g = g ; n = n ; p = p} ;
+
 -- parts of speech
 
 oper
 
-  VerbPhrase = {verb : Verb ; compl : Str} ;
+  VerbPhrase = {verb : Verb ; compl : Agr => Str} ;
   NounPhrase = {s : Case => Str ; a : Agr} ; 
 
 -- for morphology
@@ -150,7 +148,7 @@ oper
   Verb : Type = {s : VForm => Str} ;
 
   mkNoun : (s1,_,_,_,_,_,_,_, _,_,_, _,_,_,_,_,s17 : Str) -> Gender -> Noun = 
-    \snon,sacc,sins,sdat,sabl,sgen,sloc,svoc,
+    \snom,sacc,sins,sdat,sabl,sgen,sloc,svoc,
      dnomaccvoc,dinsdatabl,dgenloc,
      pnomvoc,pacc,pins,pdatabl,pgen,ploc, 
      gen -> {
@@ -159,7 +157,7 @@ oper
            Nom => snom ; Acc => sacc ; Ins => sins ; Dat => sdat ; Abl => sabl ; Gen => sgen ; Loc => sloc ; Voc => svoc
            } ;
         Dl => table {
-           Nom | Voc => dnomaccvoc ; Ins | Dat | Abl => dinsdatabl ; Gen | Loc => dgenloc 
+           Nom | Acc | Voc => dnomaccvoc ; Ins | Dat | Abl => dinsdatabl ; Gen | Loc => dgenloc 
            } ;
         Pl => table {
            Nom | Voc => pnomvoc ; Acc => pacc ; Ins => pins ; Dat | Abl => pdatabl ; Gen => pgen ; Loc => ploc
@@ -170,12 +168,12 @@ oper
 
   endingNoun : Str -> (s1,_,_,_,_,_,_,_, _,_,_, _,_,_,_,_,s17 : Str) -> Gender -> Noun = 
     \stem,
-     snon,sacc,sins,sdat,sabl,sgen,sloc,svoc,
+     snom,sacc,sins,sdat,sabl,sgen,sloc,svoc,
      dnomaccvoc,dinsdatabl,dgenloc,
      pnomvoc,pacc,pins,pdatabl,pgen,ploc, 
      gen -> 
        mkNoun
-         (stemm + snon) (stem + sacc) (stem + sins) (stem + sdat) (stem + sabl) (stem + sgen) (stem + sloc) (stem + svoc) 
+         (stem + snom) (stem + sacc) (stem + sins) (stem + sdat) (stem + sabl) (stem + sgen) (stem + sloc) (stem + svoc) 
          (stem + dnomaccvoc) (stem + dinsdatabl) (stem + dgenloc) 
          (stem + pnomvoc) (stem + pacc) (stem + pins) (stem + pdatabl) (stem + pgen) (stem + ploc) 
          gen ;
@@ -184,11 +182,14 @@ oper
   ramaNoun : Str -> Noun = \rama ->
     let ram = init rama in
     endingNoun ram
-      "aH" "amx" "eNe" "a:ya" "a:tx" "asxya" "e" "a"
-      "o+" "a:t'xya:mx" "ayo:" 
-      "a:H" "a:nx" "e+H" "e:t'yaH" "a:Na:ma" "e:Su" 
+      "ः" "म्" "ॆणॆ" "ाय" "ात्" "स्य" "ॆ" ""
+      "ौ" "ाथ्याम्" "यो" 
+      "ाः" "ान्" "ैः" "ेथयः" "ाणाम" "ेषु" 
       Masc ;
 
+  mkAdj : (m,f,n : Noun) -> Adj = \m,f,n -> {s = table {Masc => m.s ; Fem => f.s ; Neutr => n.s}} ; 
+
+
   mkVerb : (s1,_,_,_,_,_,_,_,s9 : Str) -> Verb = 
     \s3,s2,s1,d3,d2,d1,p3,p2,p1 -> {
       s = table {
@@ -211,58 +212,51 @@ oper
 
   patVerb : Str -> Verb = \pat ->
     endingVerb pat
-      "ita" "isa" "ima" "ataH" "at'aH" "avaH" "inxta" "at'a" "a:maH" ;
+      "ित" "िस" "िम" "तः" "थः" "ावः" "िनख़त" "थ" "ामः" ;
 
+  copula : Verb = {s = \\_ => []} ;
+
+  neg : Bool -> Str = \b -> case b of {True => [] ; False => "न"} ;
 
-{-
-  neg : Bool -> Str = \b -> case b of {True => [] ; False => "不"} ;
 
 -- for structural words
 
-  mkDet : Str -> Number -> {s : Str ; n : Number} = \s,n -> {
-    s = word s ;
+  mkDet : Str -> Number -> {s : Gender => Case => Str ; n : Number} = \s,n -> {
+    s = \\_,_ => s ;
     n = n
     } ;
 
-  pronNP : (s : Str) -> NounPhrase = \s -> {
-    s = word s
+  pronNP : (s : Str) -> Number -> Person -> NounPhrase = \s,n,p -> {
+    s = \\_ => s ;
+    a = agr Masc n p
     } ;
     
--- Write the characters that constitute a word separately. 
--- This enables straightforward tokenization.
 
-  bword : Str -> Str -> Str = \x,y -> x ++ y ; 
-  -- change to x + y to treat words as single tok ens
 
-  word : Str -> Str = \s -> case s of {
-      x@? + y@? + z@? + u@? => bword x (bword y (bword z u)) ;
-      x@? + y@? + z@? => bword x (bword y z) ;
-      x@? + y@? => bword x y ;
-      _ => s
-      } ;
-
--- module ParadigmsChi
+-- module ParadigmsSan
 
 oper
   mkN = overload {
     mkN : (man : Str) -> N 
-      = \n -> lin N (mkNoun n "个") ;  
-    mkN : (man : Str) -> Str -> N 
-      = \n,c -> lin N (mkNoun n c)
+      = \s -> lin N (ramaNoun s) ;
     } ;  
       
   mkA : (small : Str) -> A 
-      = \a -> lin A (mkAdj a) ;
+      = \s -> let n = ramaNoun s in lin A (mkAdj n n n) ;
       
   mkV : (walk : Str) -> V 
-      = \s -> lin V (mkVerb s) ; 
+      = \s -> lin V (patVerb s) ; 
 
   mkV2 = overload {
     mkV2 : (love : Str) -> V2 
-      = \love -> lin V2 (mkVerb love) ;
-    mkV2 : (love : V) -> V2 
-      = \love -> lin V2 love ;
+      = \love -> lin V2 (mkV love ** {c = Acc}) ;
+---    mkV2 : (love : V) -> V2 
+---      = \love -> lin V2 love ;
    } ;
--}
 
 }
+
+
+
+
+
diff --git a/src/compiler/GF/Text/Transliterations.hs b/src/compiler/GF/Text/Transliterations.hs
index 5eb64ec17..bd054c25f 100644
--- a/src/compiler/GF/Text/Transliterations.hs
+++ b/src/compiler/GF/Text/Transliterations.hs
@@ -43,6 +43,7 @@ allTransliterations = Map.fromAscList [
   ("greek", transGreek),
   ("hebrew", transHebrew),
   ("persian", transPersian),
+  ("sanskrit", transSanskrit),
   ("sindhi", transSindhi),
   ("nepali", transNepali),
   ("telugu", transTelugu),
@@ -290,4 +291,16 @@ allTrans = words $
 	" -   -   -   -   -   -  -   -   f.  f-  f'  f(  f)  f  f?  f*"++
 	" p.  p-  p'  p(  p)  p  p?  p*" 	
 allCodes = [0x1200..0x1357]
- 
\ No newline at end of file
+ 
+-- by Prasad 31/5/2013
+transSanskrit :: Transliteration
+transSanskrit = (mkTransliteration "Sanskrit" allTrans allCodes) {invisible_chars = ["a"]} where
+  allTrans = words $
+    "-  n~ m. h. - A A: I I: U U: R. L. - - E: " ++
+    "E+ - O O: O+ k k' g g' n- c c' j j' n* T " ++
+    "T' D D' N t t' d d' n - p p' b b' m y " ++
+    "r - l L - v s* S s h - - - v- a: i " ++
+    "i: u u: r. r.: - e e: e+ - o o: o+ a_ - - " ++
+    "o~  -  -  -  -  - - -  q x G  z  R  R'  f  -  " ++
+    "R.: L.: l. l.: p, p.  N0 N1 N2 N3 N4 N5 N6 N7 N8 N9 "
+  allCodes = [0x0900 .. 0x097f]