mirror of
https://github.com/GrammaticalFramework/gf-rgl.git
synced 2026-07-02 03:58:34 -06:00
partial implementation of Ancient Greek RGL
This commit is contained in:
@@ -0,0 +1,407 @@
|
||||
--# -path=.:../abstract:../common:../prelude
|
||||
|
||||
--1 Greek auxiliary operations. -- NOT USED. Remember old version; sound law SL etc
|
||||
|
||||
resource AccentsGrc = {
|
||||
|
||||
flags
|
||||
optimize = noexpand ;
|
||||
|
||||
oper
|
||||
|
||||
-- Accent shift and accent change: the following functions are applied to
|
||||
-- the stem in certain cases:
|
||||
|
||||
-- a) when the Pl Gen ending w~n is added, the accent in the stem
|
||||
-- must be removed using dropAccent:
|
||||
|
||||
dropAccent : Str -> Str = \str -> case str of {
|
||||
x + ("'" | "`" | "~") + z => x + z ;
|
||||
x + "=" + z => x + "-" + z ;
|
||||
_ => str
|
||||
} ;
|
||||
|
||||
|
||||
|
||||
{- Redesign: ------------------------------------------------------------------
|
||||
|
||||
Try to extract the patterns of vowel lengths and accent position
|
||||
and do the paradigms with less pattern matching.
|
||||
|
||||
We need the general rule:
|
||||
1. the accent position is taken from SgNom, and only moved on demand.
|
||||
2. a shift is demanded if a an ending with a long vowel is added and
|
||||
the accent position is on the 3rd last vowel.
|
||||
3. if an ending with accent is added, the accent in the stem has to be dropped.
|
||||
|
||||
-- Declension I,II: Word`stock'+NomSgEnding => wordkind includes ending.
|
||||
Then from lists of endings *without accent* one takes the accent position
|
||||
from the user-provided form(s), and given the vowel length of the ending,
|
||||
determines the accent position of other forms
|
||||
-- Declension III:
|
||||
Wordstem+Ending (wordstem+os=GenSg) => NomSg may have empty ending
|
||||
Then the accent position is taken from the NomSg.
|
||||
-}
|
||||
|
||||
oper
|
||||
Position : PType = Predef.Ints 3 ;
|
||||
|
||||
WPat = { syl : Syllability ;
|
||||
v : VPat * VPat * VPat ;
|
||||
s : Str * Str * Str * Str;
|
||||
acnt : Accent
|
||||
} ;
|
||||
param
|
||||
Accent = Acute Position | Circum Position | NoAccent ;
|
||||
Syllability = Mono | Bi | Many ;
|
||||
|
||||
Vowel = A | E | I | O | U | AU | EU | OU | AI | EI | OI | UI |
|
||||
-- vowel with spirtus asper/lenis:
|
||||
Aa | Ea | Ia | Oa | Ua | AUa | EUa | OUa | AIa | EIa | OIa | UIa |
|
||||
Al | El | Il | Ol | Ul | AUl | EUl | OUl | AIl | EIl | OIl | UIl |
|
||||
-- vowel with iota subscriptum and spiritus
|
||||
Ai | Ei | Oi | Aia | Eia | Oia | Ail | Eil | Oil
|
||||
;
|
||||
|
||||
VPat = NoVowel | Short Vowel | Long Vowel ; -- 1+45+45 = 91 possibilities
|
||||
|
||||
Spirit = Asper | Lenis | NoSpirit ;
|
||||
|
||||
oper
|
||||
removeLength : Str -> Str = \str -> case str of {
|
||||
"a_" | "a." => "a" ; "i_" | "i." => "i" ; "y_" | "y." => "y" ; x => x } ;
|
||||
|
||||
toStr : VPat -> Str = \vpat -> case vpat of {
|
||||
Short A => "a" ;
|
||||
Short E => "e" ;
|
||||
Short I => "i" ;
|
||||
Short O => "o" ;
|
||||
Short U => "y" ;
|
||||
Long A => "a_" ; -- maybe better "a", since the unicode-table has not a_'
|
||||
Long E => "h" ;
|
||||
Long I => "i_" ;
|
||||
Long O => "w" ;
|
||||
Long U => "y_" ;
|
||||
Long AU => "ay" ;
|
||||
Long EU => "ey" ;
|
||||
Long OU => "oy" ;
|
||||
Long AI => "ai" ;
|
||||
Long EI => "ei" ;
|
||||
Long OI => "oi" ;
|
||||
Long UI => "yi" ;
|
||||
Short AI => "ai" ;
|
||||
Short OI => "oi" ;
|
||||
-- vowels and diphthongs with spriritus asper:
|
||||
Short Aa => "a(" ;
|
||||
Short Ea => "e(" ;
|
||||
Short Ia => "i(" ;
|
||||
Short Oa => "o(" ;
|
||||
Short Ua => "y(" ;
|
||||
-- Long Aa
|
||||
Long Ea => "h(" ;
|
||||
-- Long Ia
|
||||
Long Oa => "w(" ;
|
||||
-- Long Ua
|
||||
Long AUa => "ay(" ;
|
||||
Long EUa => "ey(" ;
|
||||
Long OUa => "oy(" ;
|
||||
Long AIa => "ai(" ;
|
||||
Long EIa => "ei(" ;
|
||||
Long OIa => "oi(" ;
|
||||
Long UIa => "yi(" ;
|
||||
-- vowels and diphthongs with spriritus lenis:
|
||||
Short Al => "a)" ;
|
||||
Short El => "e)" ;
|
||||
Short Il => "i)" ;
|
||||
Short Ol => "o)" ;
|
||||
Short Ul => "y)" ;
|
||||
-- Long Al
|
||||
Long El => "h)" ;
|
||||
-- Long Il
|
||||
Long Ol => "w)" ;
|
||||
-- Long Ul
|
||||
Long AUl => "ay)" ;
|
||||
Long EUl => "ey)" ;
|
||||
Long OUl => "oy)" ;
|
||||
Long AIl => "ai)" ;
|
||||
Long EIl => "ei)" ;
|
||||
Long OIl => "oi)" ;
|
||||
Long UIl => "yi)" ;
|
||||
-- vowels with iota subscriptum, and spirits
|
||||
Long Ai => "a|" ;
|
||||
Long Ei => "e|" ;
|
||||
Long Oi => "o|" ;
|
||||
Long Aia => "a|(" ;
|
||||
Long Eia => "h|(" ;
|
||||
Long Oia => "w|(" ;
|
||||
Long Ail => "a|)" ;
|
||||
Long Eil => "h|)" ;
|
||||
Long Oil => "w|)" ;
|
||||
NoVowel => "" ; -- TODO: trema etc. perhaps I2 = "i-"
|
||||
_ => "*" -- ca.30 possibilities
|
||||
} ;
|
||||
|
||||
toVPat : Str -> VPat = \str -> case str of {
|
||||
("a."|"a") => Short A ;
|
||||
("i."|"i"|"i-"|"i=") => Short I ;
|
||||
("y."|"y"|"y-"|"y=") => Short U ;
|
||||
"a_" => Long A ;
|
||||
"i_" => Long I ;
|
||||
"y_" => Long U ;
|
||||
"a" => Short A ;
|
||||
"e" => Short E ;
|
||||
"i" => Short I ;
|
||||
"o" => Short O ;
|
||||
"y" => Short U ;
|
||||
"a" => Long A ; -- maybe better "a", since the unicode-table has not a_'
|
||||
"h" => Long E ;
|
||||
"i" => Long I ;
|
||||
"w" => Long O ;
|
||||
"y" => Long U ;
|
||||
"ay" => Long AU ;
|
||||
"ey" => Long EU ;
|
||||
"oy" => Long OU ;
|
||||
"ai" => Long AI ;
|
||||
"ei" => Long EI ;
|
||||
"oi" => Long OI ;
|
||||
"yi" => Long UI ;
|
||||
-- vowels and diphthongs with spriritus asper:
|
||||
"a(" => Short Aa ;
|
||||
"e(" => Short Ea ;
|
||||
"i(" => Short Ia ;
|
||||
"o(" => Short Oa ;
|
||||
"y(" => Short Ua ;
|
||||
-- Long Aa
|
||||
"h(" => Long Ea ;
|
||||
-- Long Ia
|
||||
"w(" => Long Oa ;
|
||||
-- Long Ua
|
||||
"ay(" => Long AUa ;
|
||||
"ey(" => Long EUa ;
|
||||
"oy(" => Long OUa ;
|
||||
"ai(" => Long AIa ;
|
||||
"ei(" => Long EIa ;
|
||||
"oi(" => Long OIa ;
|
||||
"yi(" => Long UIa ;
|
||||
-- vowels and diphthongs with spriritus lenis:
|
||||
"a)" => Short Al ;
|
||||
"e)" => Short El ;
|
||||
"i)" => Short Il ;
|
||||
"o)" => Short Ol ;
|
||||
"y)" => Short Ul ;
|
||||
-- Long Al
|
||||
"h)" => Long El ;
|
||||
-- Long Il
|
||||
"w)" => Long Ol ;
|
||||
-- Long Ul
|
||||
"ay)" => Long AUl ;
|
||||
"ey)" => Long EUl ;
|
||||
"oy)" => Long OUl ;
|
||||
"ai)" => Long AIl ;
|
||||
"ei)" => Long EIl ;
|
||||
"oi)" => Long OIl ;
|
||||
"yi)" => Long UIl ;
|
||||
-- vowels with iota subscriptum, and spirits
|
||||
"a|" => Long Ai ;
|
||||
"e|" => Long Ei ;
|
||||
"o|" => Long Oi ;
|
||||
"a|(" => Long Aia ;
|
||||
"h|(" => Long Eia ;
|
||||
"w|(" => Long Oia ;
|
||||
"a|)" => Long Ail ;
|
||||
"h|)" => Long Eil ;
|
||||
"w|)" => Long Oil ;
|
||||
_ => NoVowel -- TODO: trema etc. perhaps I2 = "i-"
|
||||
} ;
|
||||
|
||||
-- accent ~ can only be on long vowels, hence y~ must become <Long U, Circum> etc:
|
||||
|
||||
toVPat2 : Str -> Str -> VPat = \o,z ->
|
||||
case o of { (#shortV|#restV) => case z of {"~"+_ => toVPat (removeLength o + "_") ;
|
||||
_ => toVPat o} ;
|
||||
_ => toVPat o } ;
|
||||
|
||||
toAcntPos : Str -> Accent = \str ->
|
||||
case str of { -- we don't check for double accent; the leftmost accent counts
|
||||
-- 3 syllables
|
||||
("'"|"('"|")'") + _ + "$" + _ + "$" + _ => Acute 1 ;
|
||||
("~"|"(~"|")~") + _ + "$" + _ + "$" + _ => Circum 1 ;
|
||||
_ + "$" + "'" + _ + "$" + _ => Acute 2 ;
|
||||
_ + "$" + "~" + _ + "$" + _ => Circum 2 ;
|
||||
_ + "$" + _ + "$" + "'" + _ => Acute 3 ;
|
||||
_ + "$" + _ + "$" + "~" + _ => Circum 3 ;
|
||||
-- 2 syllables
|
||||
("'"|"('"|")'") + _ + "$" + _ => Acute 2 ;
|
||||
("~"|"(~"|")~") + _ + "$" + _ => Circum 2 ;
|
||||
_ + "$" + "'" + _ => Acute 3 ;
|
||||
_ + "$" + "~" + _ => Circum 3 ;
|
||||
-- 1 syllable
|
||||
("'"|"('"|")'") + _ => Acute 3 ;
|
||||
("~"|"(~"|")~") + _ => Circum 3 ;
|
||||
-(_ + ("'" | "~" | "`") + _) => NoAccent ;
|
||||
_ => Predef.error "Error: double accent"
|
||||
} ;
|
||||
|
||||
-- The order of the patterns in the pattern alternatives is important since
|
||||
-- diphthongs match against vowel+vowel, and short vowels.
|
||||
-- (Here "ai", "oi" count as long; for end syllables, this is adjusted in addAccent.)
|
||||
|
||||
wpat : Str -> WPat = \str ->
|
||||
let T = "$" ;
|
||||
da = dropAccent
|
||||
in
|
||||
case str of {
|
||||
-- monosyllabic words:
|
||||
y@(("r(" | "") + #nonvowels)
|
||||
+ o@((#diphthong | #longV | #shortV | #restV) + (#spirit | ""))
|
||||
+ z@#nonvowels
|
||||
=> { syl = Mono ;
|
||||
v = <NoVowel, NoVowel, toVPat2 o z> ;
|
||||
acnt = toAcntPos z ;
|
||||
s = <[] , [] , y , da z>
|
||||
} ;
|
||||
-- bisyllabic words:
|
||||
x@(("r(" | "") + #nonvowels)
|
||||
+ e@((#diphthong | #longV | #shortV | #restV) + (#spirit | ""))
|
||||
+ y@#nonvowels
|
||||
+ o@(#diphthong | #longV | #shortV | #restV)
|
||||
+ z@#nonvowels
|
||||
=> { syl = Bi ;
|
||||
v = <NoVowel , toVPat2 e y, toVPat2 o z> ;
|
||||
acnt = toAcntPos (y + T + z) ;
|
||||
s = <[] , x , da y , da z>
|
||||
} ;
|
||||
-- manysyllabic words:
|
||||
r@_
|
||||
+ a@((#diphthong | #longV | #shortV | #restV) + (#spirit | ""))
|
||||
+ x@#nonvowels
|
||||
+ e@(#diphthong | #longV | #shortV | #restV)
|
||||
+ y@#nonvowels
|
||||
+ o@(#diphthong | #longV | #shortV | #restV)
|
||||
+ z@#nonvowels
|
||||
=> { syl = Many ;
|
||||
v = <toVPat2 a x, toVPat2 e y, toVPat2 o z> ;
|
||||
acnt = toAcntPos (x + T + y + T + z) ;
|
||||
s = <r , da x , da y , da z>
|
||||
} ;
|
||||
_ => -- Predef.error "vowel/accent pattern not recognized"
|
||||
{ syl = Mono ;
|
||||
v = <NoVowel,NoVowel,NoVowel> ;
|
||||
acnt= NoAccent ;
|
||||
s = <"BUGGY","VOWEL","ACCENT","PATTERN">
|
||||
}
|
||||
} ;
|
||||
|
||||
-- Add an accent to an unaccentuated string; includes necessary accent changes and shifts
|
||||
-- (We don't check that the given string has enough vowels.)
|
||||
|
||||
merge : (Str * Str * Str * Str) -> (Str * Str * Str) -> Str =
|
||||
\cs,vs -> cs.p1 + vs.p1 + cs.p2 + vs.p2 + cs.p3 + vs.p3 + cs.p4 ;
|
||||
|
||||
glue : (Str * Str * Str * Str )-> (VPat * VPat * VPat) -> Str =
|
||||
\cs,vs -> cs.p1 + (toStr vs.p1) + cs.p2 + (toStr vs.p2) + cs.p3 + (toStr vs.p3) + cs.p4 ;
|
||||
|
||||
addAccent0 : Accent -> Str -> Str = \accent,str -> str ; -- for testing
|
||||
|
||||
-- addAccent inserts the given accent = (accnt pos) to the vowel at the position,
|
||||
-- and for pos=1=3rd last and last vowel is short, puts the accent to 2nd last:
|
||||
addAccent : Accent -> Str -> Str = \accent,str ->
|
||||
let
|
||||
w = wpat str ;
|
||||
v1 = toStr w.v.p1 ; -- third last vowel
|
||||
v2 = toStr w.v.p2 ; -- second last vowel
|
||||
v3 = toStr w.v.p3 ; -- last vowel
|
||||
v2s = removeLength v2 ;
|
||||
v3s = removeLength v3 ;
|
||||
in
|
||||
case accent of {
|
||||
Acute 3 => merge w.s <v1, v2, v3 + "'"> ;
|
||||
Circum 3 => merge w.s <v1, v2, v3s + "~"> ;
|
||||
Acute 2 => case <w.v.p2, w.v.p3> of { -- BR 9 4
|
||||
<Long u2, Short u3> => merge w.s <v1, v2s + "~", v3> ;
|
||||
_ => merge w.s <v1, v2 + "'", v3>
|
||||
} ;
|
||||
Circum 2 => merge w.s <v1, v2s + "~", v3> ; -- change to ' for v2 short?
|
||||
Acute 1 => case <w.v.p3, w.s.p4> of {
|
||||
<Short _, _> => merge w.s <v1 + "'", v2, v3> ; -- BR 9 1.b
|
||||
<Long AI,""> => merge w.s <v1 + "'", v2, v3> ; -- BR 29.7 declension ; conjug?
|
||||
<Long OI,""> => merge w.s <v1 + "'", v2, v3> ; -- BR 29.7 declension ; conjug?
|
||||
_ => merge w.s <v1, v2 + "'", v3> -- sometimes changes to "~" needed?
|
||||
} ;
|
||||
_ => -- Predef.error ("Illegal accentuation for " ++ str)
|
||||
str
|
||||
} ;
|
||||
|
||||
-- from accent position in stem to accent position in (stem+long ending): ?
|
||||
-- stempos1,stempos2,stempos3 => (stem+end)pos1,(stem+end)pos2,(stem+end)pos3
|
||||
|
||||
addAccent' : Accent -> Str -> Str = \accent,str ->
|
||||
let
|
||||
accent' = case accent of {Acute 2 => Acute 1 ;
|
||||
Circum 2 => Acute 1 ;
|
||||
Acute 3 => Acute 2 ;
|
||||
Circum 3 => Circum 2 ;
|
||||
_ => accent}
|
||||
in addAccent accent' str ;
|
||||
-}
|
||||
-- --------------------------------------------------------------------------------
|
||||
|
||||
{- Sound laws on structured string (Example)
|
||||
|
||||
-- Problem:
|
||||
-- In addNEnding, an accent on the ending overwrites accents on the stem.
|
||||
-- But if vowel contraction <e',ei> => <[],ei~> deletes the final vowel of
|
||||
-- the stem, one would need to shift stem parts to the right:
|
||||
-- c1+v1+c2+v2+c3+v3+[]++[]+v+c2 => c1+v1+c2+v2+c3+[]+[]++[]+(v3+v)+c2
|
||||
-- to avoid the adjacent []+[]. But c1 may contain vowels, and the number of
|
||||
-- syllables is not exactly known!
|
||||
-- If we use c1+v1+c2+v2+c3+v3+[]++[]+v+c2 => c1+v1+c2+v2+c3+(v3+v)+[]++[]+[]+c2,
|
||||
-- as with <e',ei> => <ei~,[]>, we have to adjust the accent in the stem. (ToDo)
|
||||
-- (could also turn the ending to c2+[]+[], but need not).
|
||||
|
||||
-- cV2 : soundlaw = \s2 -> <"", contractVowels s2.p1 s2.p2> ; -- deprec.
|
||||
|
||||
-- Test
|
||||
SL : Soundlaw = \we -> let S = we.p1 ;
|
||||
E = we.p2 ;
|
||||
sv3 : Str = S.v.p3 + case S.a of { Acute 3 => "'" ;
|
||||
Circum 3 => "~" ;
|
||||
_ => "" } ;
|
||||
sc4 = S.c.p4 ;
|
||||
ec1 = E.c.p1 ;
|
||||
ev = E.v ;
|
||||
in case <sv3,sc4,ec1,ev> of {
|
||||
<_,"","",_> => let vs = cV2 <sv3,ev> ; -- contractVowels
|
||||
S2 = substV3 S vs.p1 ;
|
||||
E2 = toNEnding (vs.p2 + E.c.p2)
|
||||
in <S2, E2> ;
|
||||
<_,?+_,_+("m"|"s"|"t"|"v"),_> =>
|
||||
let cs = mC2 <sc4,ec1> ;
|
||||
S2 = substC4 S cs.p1 ;
|
||||
E2 = substC E <cs.p2,E.c.p2>
|
||||
in <S2, E2> ;
|
||||
_ => we } ;
|
||||
|
||||
substV3 : Word -> Str -> Word = -- assume that w.v.p3 and u are not "" !!
|
||||
\w,u -> { a = case u of { _+"~" => Circum 3 ; _+"'" => Acute 3 ; _ => w.a } ;
|
||||
s = w.s ;
|
||||
v = <w.v.p1, w.v.p2, dropAccent u> ; -- We need u=/=[], else w.v.p1 prevents shifting
|
||||
l = <w.l.p1, w.l.p2, vowelLength (dropAccent u)> ; -- TODO: short "ai" etc
|
||||
c = w.c } ;
|
||||
substC : NEnding -> (Str*Str) -> NEnding =
|
||||
\w,cs -> { a = w.a ; v = w.v ; l = w.l ; c = cs } ;
|
||||
|
||||
-- expl: cc SL <toWord "nanoga'", toNEnding "wn">
|
||||
|
||||
toStrN3 : Word -> Str -> Str =
|
||||
\w,e -> let we = SL (adjustAccent <w, toNEnding e>)
|
||||
in toStrT (concat we) ;
|
||||
|
||||
-- toStrN3 : Word -> Str -> Str = \w,e -> let we = (SL <w, toNEnding e>)
|
||||
-- in toStr (addNEnding we.p1 we.p2) ;
|
||||
|
||||
-- cc let we = (SL <toWord "gene'", toNEnding "i">) in toStr (addNEnding we.p1 we.p2)
|
||||
-- cc toStr (addNEnding (SL <toWord "gene'", toNEnding "i">).p1 (SL <toWord "gene'", toNEnding "i">).p2)
|
||||
-}
|
||||
|
||||
}
|
||||
Reference in New Issue
Block a user