mirror of
https://github.com/GrammaticalFramework/gf-core.git
synced 2026-05-09 03:02:50 -06:00
GF/src is now for 2.9, and the new sources are in src-3.0 - keep it this way until the release of GF 3
This commit is contained in:
97
src-3.0/GF/Text/Devanagari.hs
Normal file
97
src-3.0/GF/Text/Devanagari.hs
Normal file
@@ -0,0 +1,97 @@
|
||||
----------------------------------------------------------------------
|
||||
-- |
|
||||
-- Module : Devanagari
|
||||
-- Maintainer : (Maintainer)
|
||||
-- Stability : (stable)
|
||||
-- Portability : (portable)
|
||||
--
|
||||
-- > CVS $Date: 2005/04/21 16:23:34 $
|
||||
-- > CVS $Author: bringert $
|
||||
-- > CVS $Revision: 1.5 $
|
||||
--
|
||||
-- (Description of the module)
|
||||
-----------------------------------------------------------------------------
|
||||
|
||||
module GF.Text.Devanagari (mkDevanagari) where
|
||||
|
||||
mkDevanagari :: String -> String
|
||||
mkDevanagari = digraphWordToUnicode . adHocToDigraphWord
|
||||
|
||||
adHocToDigraphWord :: String -> [(Char, Char)]
|
||||
adHocToDigraphWord str = case str of
|
||||
[] -> []
|
||||
'<' : cs -> ('\\', '<') : spoolMarkup cs
|
||||
' ' : cs -> ('\\', ' ') : adHocToDigraphWord cs -- skip space
|
||||
|
||||
-- if c1 is a vowel
|
||||
-- Two of the same vowel => lengthening
|
||||
c1 : c2 : cs | c1 == c2 && isVowel c1 -> (cap c1, ':') : adHocToDigraphWord cs
|
||||
-- digraphed or long vowel
|
||||
c1 : c2 : cs | isVowel c1 && isVowel c2 -> (cap c1, cap c2) : adHocToDigraphWord cs
|
||||
c1 : cs | isVowel c1 -> (' ', cap c1) : adHocToDigraphWord cs
|
||||
|
||||
-- c1 isn't a vowel
|
||||
-- c1 : 'a' : [] -> [(' ', c1)] -- a inherent
|
||||
-- c1 : c2 : [] | isVowel c2 -> (' ', c1) : [(' ', c2)]
|
||||
|
||||
-- c1 is aspirated
|
||||
c1 : 'H' : c2 : c3 : cs | c2 == c3 && isVowel c2 ->
|
||||
(c1, 'H') : (c2, ':') : adHocToDigraphWord cs
|
||||
c1 : 'H' : c2 : c3 : cs | isVowel c2 && isVowel c3 ->
|
||||
(c1, 'H') : (c2, c3) : adHocToDigraphWord cs
|
||||
c1 : 'H' : 'a' : cs -> (c1, 'H') : adHocToDigraphWord cs -- a inherent
|
||||
c1 : 'H' : c2 : cs | isVowel c2 -> (c1, 'H') : (' ', c2) : adHocToDigraphWord cs
|
||||
-- not vowelless at EOW
|
||||
c1 : 'H' : ' ' : cs -> (c1, 'H') : ('\\', ' ') : adHocToDigraphWord cs
|
||||
c1 : 'H' : [] -> [(c1, 'H')]
|
||||
c1 : 'H' : cs -> (c1, 'H') : (' ', '^') : adHocToDigraphWord cs -- vowelless
|
||||
|
||||
-- c1 unasp.
|
||||
c1 : c2 : c3 : cs | c2 == c3 && isVowel c2 -> (' ', c1) : (c2, ':') : adHocToDigraphWord cs
|
||||
c1 : c2 : c3 : cs | isVowel c2 && isVowel c3 -> (' ', c1) : (c2, c3) : adHocToDigraphWord cs
|
||||
c1 : 'a' : cs -> (' ', c1) : adHocToDigraphWord cs -- a inherent
|
||||
c1 : c2 : cs | isVowel c2 -> (' ', c1) : (' ', c2) : adHocToDigraphWord cs
|
||||
-- not vowelless at EOW
|
||||
c1 : ' ' : cs -> (' ', c1) : ('\\', ' '): adHocToDigraphWord cs
|
||||
c1 : [] -> [(' ', c1)]
|
||||
'M' : cs -> (' ', 'M') : adHocToDigraphWord cs -- vowelless but no vowelless sign for anusvara
|
||||
c1 : cs -> (' ', c1) : (' ', '^') : adHocToDigraphWord cs -- vowelless
|
||||
|
||||
isVowel x = elem x "aeiou:"
|
||||
cap :: Char -> Char
|
||||
cap x = case x of
|
||||
'a' -> 'A'
|
||||
'e' -> 'E'
|
||||
'i' -> 'I'
|
||||
'o' -> 'O'
|
||||
'u' -> 'U'
|
||||
c -> c
|
||||
|
||||
spoolMarkup :: String -> [(Char, Char)]
|
||||
spoolMarkup s = case s of
|
||||
-- [] -> [] -- Shouldn't happen
|
||||
'>' : cs -> ('\\', '>') : adHocToDigraphWord cs
|
||||
c1 : cs -> ('\\', c1) : spoolMarkup cs
|
||||
|
||||
|
||||
digraphWordToUnicode :: [(Char, Char)] -> String
|
||||
digraphWordToUnicode = map digraphToUnicode
|
||||
|
||||
digraphToUnicode :: (Char, Char) -> Char
|
||||
digraphToUnicode (c1, c2) = case lookup (c1, c2) cc of Just c' -> c' ; _ -> c2
|
||||
where
|
||||
cc = zip allDevanagariCodes allDevanagari
|
||||
|
||||
digraphedDevanagari = " ~ M ;__ AA: II: UU:RoLoEvE~ EE:AvA~ OAU kkH ggHNG ccH jjH \241 TTH DDH N ttH ddH nn. ppH bbH m y rr. l LL. v \231 S s h____ .-Sa: ii: uu:ror:eve~ eaiava~ oau ^____OM | -dddu______ Q X G zD.RH fy.R:L:mrmR#I#d#0#1#2#3#4#5#6#7#8#9#o"
|
||||
|
||||
allDevanagariCodes :: [(Char, Char)]
|
||||
allDevanagariCodes = mkPairs digraphedDevanagari
|
||||
|
||||
allDevanagari :: String
|
||||
allDevanagari = (map toEnum [0x0901 .. 0x0970])
|
||||
|
||||
mkPairs :: String -> [(Char, Char)]
|
||||
mkPairs str = case str of
|
||||
[] -> []
|
||||
c1 : c2 : cs -> (c1, c2) : mkPairs cs
|
||||
|
||||
Reference in New Issue
Block a user