forked from GitHub/gf-rgl
408 lines
15 KiB
Plaintext
408 lines
15 KiB
Plaintext
--# -path=.:../abstract:../common:../prelude
|
|
|
|
--1 Greek auxiliary operations. -- NOT USED. Remember old version; sound law SL etc
|
|
|
|
resource AccentsGrc = {
|
|
|
|
flags
|
|
optimize = noexpand ;
|
|
|
|
oper
|
|
|
|
-- Accent shift and accent change: the following functions are applied to
|
|
-- the stem in certain cases:
|
|
|
|
-- a) when the Pl Gen ending w~n is added, the accent in the stem
|
|
-- must be removed using dropAccent:
|
|
|
|
dropAccent : Str -> Str = \str -> case str of {
|
|
x + ("'" | "`" | "~") + z => x + z ;
|
|
x + "=" + z => x + "-" + z ;
|
|
_ => str
|
|
} ;
|
|
|
|
|
|
|
|
{- Redesign: ------------------------------------------------------------------
|
|
|
|
Try to extract the patterns of vowel lengths and accent position
|
|
and do the paradigms with less pattern matching.
|
|
|
|
We need the general rule:
|
|
1. the accent position is taken from SgNom, and only moved on demand.
|
|
2. a shift is demanded if a an ending with a long vowel is added and
|
|
the accent position is on the 3rd last vowel.
|
|
3. if an ending with accent is added, the accent in the stem has to be dropped.
|
|
|
|
-- Declension I,II: Word`stock'+NomSgEnding => wordkind includes ending.
|
|
Then from lists of endings *without accent* one takes the accent position
|
|
from the user-provided form(s), and given the vowel length of the ending,
|
|
determines the accent position of other forms
|
|
-- Declension III:
|
|
Wordstem+Ending (wordstem+os=GenSg) => NomSg may have empty ending
|
|
Then the accent position is taken from the NomSg.
|
|
-}
|
|
|
|
oper
|
|
Position : PType = Predef.Ints 3 ;
|
|
|
|
WPat = { syl : Syllability ;
|
|
v : VPat * VPat * VPat ;
|
|
s : Str * Str * Str * Str;
|
|
acnt : Accent
|
|
} ;
|
|
param
|
|
Accent = Acute Position | Circum Position | NoAccent ;
|
|
Syllability = Mono | Bi | Many ;
|
|
|
|
Vowel = A | E | I | O | U | AU | EU | OU | AI | EI | OI | UI |
|
|
-- vowel with spirtus asper/lenis:
|
|
Aa | Ea | Ia | Oa | Ua | AUa | EUa | OUa | AIa | EIa | OIa | UIa |
|
|
Al | El | Il | Ol | Ul | AUl | EUl | OUl | AIl | EIl | OIl | UIl |
|
|
-- vowel with iota subscriptum and spiritus
|
|
Ai | Ei | Oi | Aia | Eia | Oia | Ail | Eil | Oil
|
|
;
|
|
|
|
VPat = NoVowel | Short Vowel | Long Vowel ; -- 1+45+45 = 91 possibilities
|
|
|
|
Spirit = Asper | Lenis | NoSpirit ;
|
|
|
|
oper
|
|
removeLength : Str -> Str = \str -> case str of {
|
|
"a_" | "a." => "a" ; "i_" | "i." => "i" ; "y_" | "y." => "y" ; x => x } ;
|
|
|
|
toStr : VPat -> Str = \vpat -> case vpat of {
|
|
Short A => "a" ;
|
|
Short E => "e" ;
|
|
Short I => "i" ;
|
|
Short O => "o" ;
|
|
Short U => "y" ;
|
|
Long A => "a_" ; -- maybe better "a", since the unicode-table has not a_'
|
|
Long E => "h" ;
|
|
Long I => "i_" ;
|
|
Long O => "w" ;
|
|
Long U => "y_" ;
|
|
Long AU => "ay" ;
|
|
Long EU => "ey" ;
|
|
Long OU => "oy" ;
|
|
Long AI => "ai" ;
|
|
Long EI => "ei" ;
|
|
Long OI => "oi" ;
|
|
Long UI => "yi" ;
|
|
Short AI => "ai" ;
|
|
Short OI => "oi" ;
|
|
-- vowels and diphthongs with spriritus asper:
|
|
Short Aa => "a(" ;
|
|
Short Ea => "e(" ;
|
|
Short Ia => "i(" ;
|
|
Short Oa => "o(" ;
|
|
Short Ua => "y(" ;
|
|
-- Long Aa
|
|
Long Ea => "h(" ;
|
|
-- Long Ia
|
|
Long Oa => "w(" ;
|
|
-- Long Ua
|
|
Long AUa => "ay(" ;
|
|
Long EUa => "ey(" ;
|
|
Long OUa => "oy(" ;
|
|
Long AIa => "ai(" ;
|
|
Long EIa => "ei(" ;
|
|
Long OIa => "oi(" ;
|
|
Long UIa => "yi(" ;
|
|
-- vowels and diphthongs with spriritus lenis:
|
|
Short Al => "a)" ;
|
|
Short El => "e)" ;
|
|
Short Il => "i)" ;
|
|
Short Ol => "o)" ;
|
|
Short Ul => "y)" ;
|
|
-- Long Al
|
|
Long El => "h)" ;
|
|
-- Long Il
|
|
Long Ol => "w)" ;
|
|
-- Long Ul
|
|
Long AUl => "ay)" ;
|
|
Long EUl => "ey)" ;
|
|
Long OUl => "oy)" ;
|
|
Long AIl => "ai)" ;
|
|
Long EIl => "ei)" ;
|
|
Long OIl => "oi)" ;
|
|
Long UIl => "yi)" ;
|
|
-- vowels with iota subscriptum, and spirits
|
|
Long Ai => "a|" ;
|
|
Long Ei => "e|" ;
|
|
Long Oi => "o|" ;
|
|
Long Aia => "a|(" ;
|
|
Long Eia => "h|(" ;
|
|
Long Oia => "w|(" ;
|
|
Long Ail => "a|)" ;
|
|
Long Eil => "h|)" ;
|
|
Long Oil => "w|)" ;
|
|
NoVowel => "" ; -- TODO: trema etc. perhaps I2 = "i-"
|
|
_ => "*" -- ca.30 possibilities
|
|
} ;
|
|
|
|
toVPat : Str -> VPat = \str -> case str of {
|
|
("a."|"a") => Short A ;
|
|
("i."|"i"|"i-"|"i=") => Short I ;
|
|
("y."|"y"|"y-"|"y=") => Short U ;
|
|
"a_" => Long A ;
|
|
"i_" => Long I ;
|
|
"y_" => Long U ;
|
|
"a" => Short A ;
|
|
"e" => Short E ;
|
|
"i" => Short I ;
|
|
"o" => Short O ;
|
|
"y" => Short U ;
|
|
"a" => Long A ; -- maybe better "a", since the unicode-table has not a_'
|
|
"h" => Long E ;
|
|
"i" => Long I ;
|
|
"w" => Long O ;
|
|
"y" => Long U ;
|
|
"ay" => Long AU ;
|
|
"ey" => Long EU ;
|
|
"oy" => Long OU ;
|
|
"ai" => Long AI ;
|
|
"ei" => Long EI ;
|
|
"oi" => Long OI ;
|
|
"yi" => Long UI ;
|
|
-- vowels and diphthongs with spriritus asper:
|
|
"a(" => Short Aa ;
|
|
"e(" => Short Ea ;
|
|
"i(" => Short Ia ;
|
|
"o(" => Short Oa ;
|
|
"y(" => Short Ua ;
|
|
-- Long Aa
|
|
"h(" => Long Ea ;
|
|
-- Long Ia
|
|
"w(" => Long Oa ;
|
|
-- Long Ua
|
|
"ay(" => Long AUa ;
|
|
"ey(" => Long EUa ;
|
|
"oy(" => Long OUa ;
|
|
"ai(" => Long AIa ;
|
|
"ei(" => Long EIa ;
|
|
"oi(" => Long OIa ;
|
|
"yi(" => Long UIa ;
|
|
-- vowels and diphthongs with spriritus lenis:
|
|
"a)" => Short Al ;
|
|
"e)" => Short El ;
|
|
"i)" => Short Il ;
|
|
"o)" => Short Ol ;
|
|
"y)" => Short Ul ;
|
|
-- Long Al
|
|
"h)" => Long El ;
|
|
-- Long Il
|
|
"w)" => Long Ol ;
|
|
-- Long Ul
|
|
"ay)" => Long AUl ;
|
|
"ey)" => Long EUl ;
|
|
"oy)" => Long OUl ;
|
|
"ai)" => Long AIl ;
|
|
"ei)" => Long EIl ;
|
|
"oi)" => Long OIl ;
|
|
"yi)" => Long UIl ;
|
|
-- vowels with iota subscriptum, and spirits
|
|
"a|" => Long Ai ;
|
|
"e|" => Long Ei ;
|
|
"o|" => Long Oi ;
|
|
"a|(" => Long Aia ;
|
|
"h|(" => Long Eia ;
|
|
"w|(" => Long Oia ;
|
|
"a|)" => Long Ail ;
|
|
"h|)" => Long Eil ;
|
|
"w|)" => Long Oil ;
|
|
_ => NoVowel -- TODO: trema etc. perhaps I2 = "i-"
|
|
} ;
|
|
|
|
-- accent ~ can only be on long vowels, hence y~ must become <Long U, Circum> etc:
|
|
|
|
toVPat2 : Str -> Str -> VPat = \o,z ->
|
|
case o of { (#shortV|#restV) => case z of {"~"+_ => toVPat (removeLength o + "_") ;
|
|
_ => toVPat o} ;
|
|
_ => toVPat o } ;
|
|
|
|
toAcntPos : Str -> Accent = \str ->
|
|
case str of { -- we don't check for double accent; the leftmost accent counts
|
|
-- 3 syllables
|
|
("'"|"('"|")'") + _ + "$" + _ + "$" + _ => Acute 1 ;
|
|
("~"|"(~"|")~") + _ + "$" + _ + "$" + _ => Circum 1 ;
|
|
_ + "$" + "'" + _ + "$" + _ => Acute 2 ;
|
|
_ + "$" + "~" + _ + "$" + _ => Circum 2 ;
|
|
_ + "$" + _ + "$" + "'" + _ => Acute 3 ;
|
|
_ + "$" + _ + "$" + "~" + _ => Circum 3 ;
|
|
-- 2 syllables
|
|
("'"|"('"|")'") + _ + "$" + _ => Acute 2 ;
|
|
("~"|"(~"|")~") + _ + "$" + _ => Circum 2 ;
|
|
_ + "$" + "'" + _ => Acute 3 ;
|
|
_ + "$" + "~" + _ => Circum 3 ;
|
|
-- 1 syllable
|
|
("'"|"('"|")'") + _ => Acute 3 ;
|
|
("~"|"(~"|")~") + _ => Circum 3 ;
|
|
-(_ + ("'" | "~" | "`") + _) => NoAccent ;
|
|
_ => Predef.error "Error: double accent"
|
|
} ;
|
|
|
|
-- The order of the patterns in the pattern alternatives is important since
|
|
-- diphthongs match against vowel+vowel, and short vowels.
|
|
-- (Here "ai", "oi" count as long; for end syllables, this is adjusted in addAccent.)
|
|
|
|
wpat : Str -> WPat = \str ->
|
|
let T = "$" ;
|
|
da = dropAccent
|
|
in
|
|
case str of {
|
|
-- monosyllabic words:
|
|
y@(("r(" | "") + #nonvowels)
|
|
+ o@((#diphthong | #longV | #shortV | #restV) + (#spirit | ""))
|
|
+ z@#nonvowels
|
|
=> { syl = Mono ;
|
|
v = <NoVowel, NoVowel, toVPat2 o z> ;
|
|
acnt = toAcntPos z ;
|
|
s = <[] , [] , y , da z>
|
|
} ;
|
|
-- bisyllabic words:
|
|
x@(("r(" | "") + #nonvowels)
|
|
+ e@((#diphthong | #longV | #shortV | #restV) + (#spirit | ""))
|
|
+ y@#nonvowels
|
|
+ o@(#diphthong | #longV | #shortV | #restV)
|
|
+ z@#nonvowels
|
|
=> { syl = Bi ;
|
|
v = <NoVowel , toVPat2 e y, toVPat2 o z> ;
|
|
acnt = toAcntPos (y + T + z) ;
|
|
s = <[] , x , da y , da z>
|
|
} ;
|
|
-- manysyllabic words:
|
|
r@_
|
|
+ a@((#diphthong | #longV | #shortV | #restV) + (#spirit | ""))
|
|
+ x@#nonvowels
|
|
+ e@(#diphthong | #longV | #shortV | #restV)
|
|
+ y@#nonvowels
|
|
+ o@(#diphthong | #longV | #shortV | #restV)
|
|
+ z@#nonvowels
|
|
=> { syl = Many ;
|
|
v = <toVPat2 a x, toVPat2 e y, toVPat2 o z> ;
|
|
acnt = toAcntPos (x + T + y + T + z) ;
|
|
s = <r , da x , da y , da z>
|
|
} ;
|
|
_ => -- Predef.error "vowel/accent pattern not recognized"
|
|
{ syl = Mono ;
|
|
v = <NoVowel,NoVowel,NoVowel> ;
|
|
acnt= NoAccent ;
|
|
s = <"BUGGY","VOWEL","ACCENT","PATTERN">
|
|
}
|
|
} ;
|
|
|
|
-- Add an accent to an unaccentuated string; includes necessary accent changes and shifts
|
|
-- (We don't check that the given string has enough vowels.)
|
|
|
|
merge : (Str * Str * Str * Str) -> (Str * Str * Str) -> Str =
|
|
\cs,vs -> cs.p1 + vs.p1 + cs.p2 + vs.p2 + cs.p3 + vs.p3 + cs.p4 ;
|
|
|
|
glue : (Str * Str * Str * Str )-> (VPat * VPat * VPat) -> Str =
|
|
\cs,vs -> cs.p1 + (toStr vs.p1) + cs.p2 + (toStr vs.p2) + cs.p3 + (toStr vs.p3) + cs.p4 ;
|
|
|
|
addAccent0 : Accent -> Str -> Str = \accent,str -> str ; -- for testing
|
|
|
|
-- addAccent inserts the given accent = (accnt pos) to the vowel at the position,
|
|
-- and for pos=1=3rd last and last vowel is short, puts the accent to 2nd last:
|
|
addAccent : Accent -> Str -> Str = \accent,str ->
|
|
let
|
|
w = wpat str ;
|
|
v1 = toStr w.v.p1 ; -- third last vowel
|
|
v2 = toStr w.v.p2 ; -- second last vowel
|
|
v3 = toStr w.v.p3 ; -- last vowel
|
|
v2s = removeLength v2 ;
|
|
v3s = removeLength v3 ;
|
|
in
|
|
case accent of {
|
|
Acute 3 => merge w.s <v1, v2, v3 + "'"> ;
|
|
Circum 3 => merge w.s <v1, v2, v3s + "~"> ;
|
|
Acute 2 => case <w.v.p2, w.v.p3> of { -- BR 9 4
|
|
<Long u2, Short u3> => merge w.s <v1, v2s + "~", v3> ;
|
|
_ => merge w.s <v1, v2 + "'", v3>
|
|
} ;
|
|
Circum 2 => merge w.s <v1, v2s + "~", v3> ; -- change to ' for v2 short?
|
|
Acute 1 => case <w.v.p3, w.s.p4> of {
|
|
<Short _, _> => merge w.s <v1 + "'", v2, v3> ; -- BR 9 1.b
|
|
<Long AI,""> => merge w.s <v1 + "'", v2, v3> ; -- BR 29.7 declension ; conjug?
|
|
<Long OI,""> => merge w.s <v1 + "'", v2, v3> ; -- BR 29.7 declension ; conjug?
|
|
_ => merge w.s <v1, v2 + "'", v3> -- sometimes changes to "~" needed?
|
|
} ;
|
|
_ => -- Predef.error ("Illegal accentuation for " ++ str)
|
|
str
|
|
} ;
|
|
|
|
-- from accent position in stem to accent position in (stem+long ending): ?
|
|
-- stempos1,stempos2,stempos3 => (stem+end)pos1,(stem+end)pos2,(stem+end)pos3
|
|
|
|
addAccent' : Accent -> Str -> Str = \accent,str ->
|
|
let
|
|
accent' = case accent of {Acute 2 => Acute 1 ;
|
|
Circum 2 => Acute 1 ;
|
|
Acute 3 => Acute 2 ;
|
|
Circum 3 => Circum 2 ;
|
|
_ => accent}
|
|
in addAccent accent' str ;
|
|
-}
|
|
-- --------------------------------------------------------------------------------
|
|
|
|
{- Sound laws on structured string (Example)
|
|
|
|
-- Problem:
|
|
-- In addNEnding, an accent on the ending overwrites accents on the stem.
|
|
-- But if vowel contraction <e',ei> => <[],ei~> deletes the final vowel of
|
|
-- the stem, one would need to shift stem parts to the right:
|
|
-- c1+v1+c2+v2+c3+v3+[]++[]+v+c2 => c1+v1+c2+v2+c3+[]+[]++[]+(v3+v)+c2
|
|
-- to avoid the adjacent []+[]. But c1 may contain vowels, and the number of
|
|
-- syllables is not exactly known!
|
|
-- If we use c1+v1+c2+v2+c3+v3+[]++[]+v+c2 => c1+v1+c2+v2+c3+(v3+v)+[]++[]+[]+c2,
|
|
-- as with <e',ei> => <ei~,[]>, we have to adjust the accent in the stem. (ToDo)
|
|
-- (could also turn the ending to c2+[]+[], but need not).
|
|
|
|
-- cV2 : soundlaw = \s2 -> <"", contractVowels s2.p1 s2.p2> ; -- deprec.
|
|
|
|
-- Test
|
|
SL : Soundlaw = \we -> let S = we.p1 ;
|
|
E = we.p2 ;
|
|
sv3 : Str = S.v.p3 + case S.a of { Acute 3 => "'" ;
|
|
Circum 3 => "~" ;
|
|
_ => "" } ;
|
|
sc4 = S.c.p4 ;
|
|
ec1 = E.c.p1 ;
|
|
ev = E.v ;
|
|
in case <sv3,sc4,ec1,ev> of {
|
|
<_,"","",_> => let vs = cV2 <sv3,ev> ; -- contractVowels
|
|
S2 = substV3 S vs.p1 ;
|
|
E2 = toNEnding (vs.p2 + E.c.p2)
|
|
in <S2, E2> ;
|
|
<_,?+_,_+("m"|"s"|"t"|"v"),_> =>
|
|
let cs = mC2 <sc4,ec1> ;
|
|
S2 = substC4 S cs.p1 ;
|
|
E2 = substC E <cs.p2,E.c.p2>
|
|
in <S2, E2> ;
|
|
_ => we } ;
|
|
|
|
substV3 : Word -> Str -> Word = -- assume that w.v.p3 and u are not "" !!
|
|
\w,u -> { a = case u of { _+"~" => Circum 3 ; _+"'" => Acute 3 ; _ => w.a } ;
|
|
s = w.s ;
|
|
v = <w.v.p1, w.v.p2, dropAccent u> ; -- We need u=/=[], else w.v.p1 prevents shifting
|
|
l = <w.l.p1, w.l.p2, vowelLength (dropAccent u)> ; -- TODO: short "ai" etc
|
|
c = w.c } ;
|
|
substC : NEnding -> (Str*Str) -> NEnding =
|
|
\w,cs -> { a = w.a ; v = w.v ; l = w.l ; c = cs } ;
|
|
|
|
-- expl: cc SL <toWord "nanoga'", toNEnding "wn">
|
|
|
|
toStrN3 : Word -> Str -> Str =
|
|
\w,e -> let we = SL (adjustAccent <w, toNEnding e>)
|
|
in toStrT (concat we) ;
|
|
|
|
-- toStrN3 : Word -> Str -> Str = \w,e -> let we = (SL <w, toNEnding e>)
|
|
-- in toStr (addNEnding we.p1 we.p2) ;
|
|
|
|
-- cc let we = (SL <toWord "gene'", toNEnding "i">) in toStr (addNEnding we.p1 we.p2)
|
|
-- cc toStr (addNEnding (SL <toWord "gene'", toNEnding "i">).p1 (SL <toWord "gene'", toNEnding "i">).p2)
|
|
-}
|
|
|
|
}
|