Pinyin versions of chinese, named Cmn; mostly automatically generated

This commit is contained in:
aarne
2012-10-15 12:51:55 +00:00
parent 71c1a06ff8
commit 20ff29a8d6
28 changed files with 1812 additions and 2 deletions

View File

@@ -1,5 +1,5 @@
lang1 = "Eng"
lang2 = "Tha"
lang2 = "Chi"
-- to write a comparison for two languages

View File

@@ -230,3 +230,64 @@ Lexicon and Structural checked and completed by Jolene. Some open issues found b
GF/lib/src/chinese/ complete and compilable! Added to darcs by AR.
Issues from
Yip-Po Ching and Don Rimmington,
Basic Chinese. A Grammar and Workbook,
Routledge,
London and New York,
2009.
p. 4 the dun-comma in lists
p. 28 "who is X" vs. "X is who"
p. 38 er -> liang before a measure word
p. 41 ordinals require measure words: di yi ge xuesheng
p. 44 how many - duoshao vs. yi
p. 63 possessive precedes indefinite plural: wo de hen duopengyou "many of my friends"
p. 94 degree + adjective + de + noun: hen da de wuzi
p. 95 no copula in adjectival predication
p. 96 adjectives negated by bu
p. 97 AB -> AABB reduplication
p. 104 non-gradable adjectives require shi: zhei tiao yu shi huo de
p. 106 it was Chinese that I studied
p. 116 I did it better than you
p. 128 disyllabic place words
p. 155-158 mapping tenses to aspects
p. 168 "can": hui/neng
p. 174 negation and tense
p. 185 yes/no
p. 186 alternation questions
p. 197 "or" haishi/huozhe
p. 206 "please" in imperative
p. 207 "let's" zanmen he yi bei ba
p. 242 coverbs
p. 255 disyllabic prepositions

View File

@@ -48,7 +48,7 @@ whatSg_IP, whatPl_IP = mkIPL " 什么" ;
where_IAdv = mkIAdvL "哪里" ;
when_IAdv = mkIAdvL "什么时候" ;
how_IAdv = mkIAdvL "如何" ;
all_Predet = ss "所有" ;
all_Predet = ssword "所有" ;
many_Det = mkDet "多" Pl ;
someSg_Det = mkDet (word "一些") Sg ;
somePl_Det = mkDet (word "一些") Sg ;

View File

@@ -0,0 +1,26 @@
concrete AdjectiveCmn of Adjective = CatCmn ** open ResCmn, Prelude in {
lin
PositA a = a ;
--ComparA a np = complexAP (a.s ++ than_s ++ np.s) ;
ComparA a np = complexAP (than_s ++ np.s ++ a.s) ;
UseComparA a = complexAP (geng_s ++ a.s) ;
AdjOrd ord = complexAP ord.s ;
CAdvAP ad ap np = complexAP (ap.s ++ ad.s ++ ad.p ++ np.s) ;
ComplA2 a np = complexAP (a.s ++ appPrep a.c2 np.s) ;
ReflA2 a = complexAP (a.s ++ appPrep a.c2 reflPron) ;
SentAP ap sc = complexAP (ap.s ++ sc.s) ;
AdAP ada ap = complexAP (ada.s ++ ap.s) ;
UseA2 a = a ;
}

View File

@@ -0,0 +1,19 @@
concrete AdverbCmn of Adverb = CatCmn **
open ResCmn, Prelude in {
lin
PositAdvAdj a = {s = a.s ; advType = ATManner} ;
PrepNP prep np = ss (appPrep prep np.s) ** {advType = ATPlace} ; --- should depend on prep, np ? or treat in ExtraCmn ?
ComparAdvAdj cadv a np = ss (a.s ++ cadv.s ++ cadv.p ++ np.s) ** {advType = ATManner} ;
ComparAdvAdjS cadv a s = ss (a.s ++ cadv.s ++ cadv.p ++ s.s) ** {advType = ATManner} ;
AdAdv adv ad = ss (ad.s ++ adv.s) ** {advType = ad.advType} ;
SubjS subj s = ss (subj.prePart ++ s.s ++ subj.sufPart) ** {advType = ATManner} ;
AdnCAdv cadv = ss (cadv.s ++ conjThat) ** {advType = ATManner} ; -----
}

View File

@@ -0,0 +1,3 @@
--# -path=.:../abstract:../common:prelude
concrete AllCmn of AllChiAbs = LangCmn, ExtraCmn ;

View File

@@ -0,0 +1,84 @@
concrete CatCmn of Cat = CommonX - [Tense, Temp, Adv] ** open ResCmn, Prelude in {
lincat
-- Tensed/Untensed
S = {s : Str} ;
QS = {s : Str} ;
RS = {s : Str} ;
SSlash = {s : Str ; c2 : Preposition} ;
-- Sentence
Cl = Clause ; -- {s : Polarity => Aspect => Str ; np: Str ; vp: Polarity => Aspect => Str} ;
ClSlash = Clause ** {c2 : Preposition} ;
Imp = {s : Polarity => Str} ;
-- Question
QCl = {s : Polarity => Aspect => Str} ;
IP = {s : Str} ;
IComp = {s : Str} ;
IDet, IQuant = {s : Str} ;
-- Relative
RCl = {s : Polarity => Aspect => Str} ;
RP = {s : Str} ;
-- Verb
VP = ResCmn.VP ;
Comp = ResCmn.VP ;
VPSlash = ResCmn.VP ** {c2 : Preposition} ;
-- Adjective
AP = ResCmn.Adj ;
-- Noun
CN = ResCmn.Noun ;
NP, Pron = ResCmn.NP ;
Det, Quant = Determiner ;
Predet = {s : Str} ; ----
Ord = {s : Str} ;
Num = {s : Str ; numType : NumType} ;
Adv = {s : Str ; advType : AdvType} ;
-- Numeral
Numeral, Card, Digits = {s : Str} ;
-- Structural
Conj = {s : ConjForm => {s1,s2 : Str}} ;
Subj = {prePart : Str ; sufPart : Str} ;
Prep = Preposition ;
-- Open lexical classes, e.g. Lexicon
V, VS, VQ, VA = Verb ;
V2, V2Q, V2S = Verb ** {c2 : Preposition} ;
V3, V2A, V2V = Verb ** {c2, c3 : Preposition} ;
VV = Verb ;
A = ResCmn.Adj ;
A2 = ResCmn.Adj ** {c2 : Preposition} ;
N = ResCmn.Noun ;
N2 = ResCmn.Noun ** {c2 : Preposition} ;
N3 = ResCmn.Noun ** {c2,c3 : Preposition} ;
PN = ResCmn.NP ;
-- overridden
Temp = {s : Str ; t : Aspect} ;
Tense = {s : Str ; t : Aspect} ;
}

View File

@@ -0,0 +1,34 @@
concrete ConjunctionCmn of Conjunction = CatCmn ** open ResCmn, Prelude, Coordination in {
lin
ConjS c = conjunctDistrSS (c.s ! CSent) ;
ConjAdv c as = conjunctDistrSS (c.s ! CSent) as ** {advType = ATPlace} ; ---- ??
ConjNP c = conjunctDistrSS (c.s ! CPhr CNPhrase) ;
ConjAP c as = conjunctDistrSS (c.s ! CPhr CAPhrase) as ** {monoSyl = False} ;
ConjRS c = conjunctDistrSS (c.s ! CSent) ;
-- These fun's are generated from the list cat's.
BaseS = twoSS ;
ConsS = consrSS thcomma ;
BaseAdv = twoSS ;
ConsAdv = consrSS thcomma ;
BaseNP = twoSS ;
ConsNP = consrSS thcomma ;
BaseAP = twoSS ;
ConsAP = consrSS thcomma ;
BaseRS = twoSS ;
ConsRS = consrSS thcomma ;
lincat
[S] = {s1,s2 : Str} ;
[Adv] = {s1,s2 : Str} ;
[NP] = {s1,s2 : Str} ;
[AP] = {s1,s2 : Str} ;
[RS] = {s1,s2 : Str} ;
oper
thcomma : Str = [] ; ---- should be a space
}

View File

@@ -0,0 +1,7 @@
concrete ExtraCmn of ExtraChiAbs = CatCmn **
open ResCmn, Prelude in {
lincat
Aspect = {s : Str ; a : ResCmn.Aspect} ;
}

View File

@@ -0,0 +1,22 @@
--# -path=.:../abstract:../common:prelude
concrete GrammarCmn of Grammar =
NounCmn,
VerbCmn,
AdjectiveCmn,
AdverbCmn,
NumeralCmn,
SentenceCmn,
QuestionCmn,
RelativeCmn,
ConjunctionCmn,
PhraseCmn,
TextCmn,
StructuralCmn,
IdiomCmn,
TenseCmn
** {
flags startcat = Phr ; unlexer = text ; lexer = text ;
} ;

View File

@@ -0,0 +1,27 @@
concrete IdiomCmn of Idiom = CatCmn ** open Prelude, ResCmn in {
lin
---- formal subject, e.g. it is hot ?? now empty subject
ImpersCl vp = mkClause [] vp ;
--can be empty, or ImpersCl vp = mkClause "天" vp ; but "天" only used to describe weather(e.g. it's raining)
---- one wants to learn Chinese ?? now empty subject
GenericCl vp = mkClause [] vp ;
-- GenericCl vp = mkClause "有人" vp ; (meaning: there is a person)
---- it is John who did it
CleftNP np rs = mkClause np.s copula rs.s ;
CleftAdv ad s = mkClause ad.s (insertObj s (predV copula)) ; ---- it is here she slept
ExistNP np = mkClause [] (regVerb you_s) np.s ; ---- infl of you
ExistIP ip = {s = (mkClause [] (regVerb you_s) ip.s).s} ; ---- infl of you
ProgrVP vp = vp ; ----
ImpPl1 vp = ss (infVP vp) ; ----
}

View File

@@ -0,0 +1,11 @@
--# -path=.:../abstract:../common:../prelude
concrete LangCmn of Lang =
GrammarCmn,
LexiconCmn
** {
flags startcat = Phr ; unlexer = concat ; lexer = text ;
} ;

View File

@@ -0,0 +1,458 @@
concrete LexiconCmn of Lexicon = CatCmn **
open ParadigmsCmn, ResCmn, Prelude in {
flags
coding = utf8 ;
lin
-- LexiconCmn
man_N = mkN "nan2ren2" "ge4"; -- "nanren" "ge" first being noun, second is classifier(counter)
woman_N = mkN "nu:3ren2" "ge4"; -- "nvren" "ge" classifier behaves like the "cup" in "cup of tea"
house_N = mkN "fang2zi3" "jian1"; -- "fangzi" "jian"
tree_N = mkN "shu4" "ke1"; -- "shu" "ke"
big_A = mkA "da4" ; -- "da"
small_A = mkA "xiao3" ; -- "xiao"
green_A = mkA "lu:4" ; -- "lv"
walk_V = mkV "zou3" ; -- "zou"
sleep_V = mkV "shui4" ; -- "shui"
---- arrive_V = mkV "dao4" "le" [] [] "guo4"; -- "dao"
love_V2 = mkV2 "ai4" ; -- "ai"
watch_V2 = mkV2 "kan1" ; -- "kan"
--- please2_V2 = mkV "ma2fan2" ; -- "mafan"
--- believe_VS = mkV "xiang1xin4" ; -- "xiangxin"
know_VS = mkV "zhi1dao4" ; -- "zhidao"
wonder_VQ = mkV "hao3ji1" ; -- "haoqi"
john_PN = mkPN "yao1han4" ; -- "yuehan"
--- mary_PN = mkPN "ma3li2" ; -- "mali"
-- Swadesh
--big_A = mkA "da4" ;
long_A = mkA "chang2" ;
wide_A = mkA "kuan1" ;
thick_A = mkA "hou4" ;
heavy_A = mkA "chong2" ;
--small_A = mkA "xiao3" ;
short_A = mkA "duan3" ;
narrow_A = mkA "zhai3" ;
thin_A = mkA "bo2" ; -- [mark] for person mkA "shou4"
--woman_N = mkN "nu:3ren2" ;
--man(adult_N = mkN "nan2ren2" ;
--man(human_N = mkN "ren2" ;
child_N = mkN "hai2zi3" ;
wife_N = mkN "qi1zi3" ;
husband_N = mkN "zhang4fu1" ;
--father_N = mkN "fu4qin1" ;
animal_N = mkN "dong4wu4" "qi2"; -- [mark] added classifier for nouns
fish_N = mkN "yu2" "tiao2";
bird_N = mkN "niao3" "qi2";
dog_N = mkN "gou3" "qi2";
louse_N = mkN "shi1" "qi2";
snake_N = mkN "she2" "tiao2";
worm_N = mkN "chong2" "qi2";
--tree_N = mkN "shu4" ;
forest_N = mkN "sen1lin2" "pian1";
stick_N = mkN "shu4zhi1" ;
fruit_N = mkN "shui3guo3" ;
seed_N = mkN "zhong3zi3" "li4";
leaf_N = mkN "ye4zi3" "pian1"; -- [mark] "ye4" -> "ye4zi3" , "ye4" is often treated as morpheme
root_N = mkN "shu4gen1" ; -- [mark] "gen1" --> "shu4gen1"(tree root)
bark_N = mkN "shu4pi2" "kuai1";
flower_N = mkN "hua1" "duo3";
grass_N = mkN "cao3" "ke1";
rope_N = mkN "sheng2" "gen1";
skin_N = mkN "pi2" "kuai1";
meat_N = mkN "rou4" "kuai1";
blood_N = mkN "xie3" "di1"; -- [mark] several classifiers, "di1"(drop), "tan1"(puddle)
bone_N = mkN "gu3tou2" "kuai1"; -- [mark] "gu3" -> "gu3tou2" , "gu3" is often treated as morpheme
fat_N = mkN "zhi1fang2" "dui1"; -- [mark] often without classifier
egg_N = mkN "dan4" "ke1";
horn_N = mkN "jiao3" "gen1";
tail_N = mkN "wei3ba1" "tiao2"; -- [mark] "wei3" -> "wei3ba1" , "wei3" is often treated as morpheme, or if stands alone, it is a classifier itself
feather_N = mkN "yu3mao2" "gen1";
hair_N = mkN "tou2fa1" "gen1"; -- [mark] several classifiers , "gen1"(single hair), "ba3"(several hairs)
head_N = mkN "tou2" "ke1";
ear_N = mkN "er3duo3" "qi2";
eye_N = mkN "yan3jing1" "qi2";
nose_N = mkN "bi2zi3" ;
mouth_N = mkN "zui3" "zhang1";
tooth_N = mkN "ya2chi3" "ke1";
tongue_N = mkN "she2" "gen1";
fingernail_N = mkN "zhi3jia3" "pian1";
foot_N = mkN "jiao3" "qi2";
leg_N = mkN "tui3" "tiao2";
knee_N = mkN "xi1gai4" ; -- [mark] "xi1" -> "xi1gai4"
hand_N = mkN "shou3" "qi2";
wing_N = mkN "chi4bang3" "qi2"; -- [mark] "yi4" -> "chi4bang3", "chi4bang3" is the common form for wing.
belly_N = mkN "du3zi3" ;
guts_N = mkN "chang2zi3" "gen1";
neck_N = mkN "bo2zi3" ;
back_N = mkN "bei1" ;
breast_N = mkN "xiong1" ;
heart_N = mkN "xin1zang1" "ke1";
liver_N = mkN "gan1" ;
drink_V2 = mkV2 "he1" ;
eat_V2 = mkV2 "chi1" ;
bite_V2 = mkV2 "yao3" ;
suck_V2 = mkV2 "xi1" ;
spit_V = mkV "tu3" ;
vomit_V = mkV "ou3" ;
blow_V = mkV "chui1" ;
breathe_V = mkV "hu1xi1" ;
laugh_V = mkV "xiao4" ;
see_V2 = mkV2 "kan1" ;
hear_V2 = mkV2 "ting1" ;
--know_V = mkV "zhi1dao4" ;
think_V = mkV "xiang3" ;
smell_V = mkV "wen2" ; -- [mark] "xiu4" -> "wen2", "wen2" is the common form for smell.
fear_V2 = mkV2 "pa4" ;
--sleep_V = mkV "shui4" ;
live_V = mkV "huo2" ;
die_V = mkV "si3" ;
kill_V2 = mkV2 "sha1" ;
fight_V2 = mkV2 "da2jia4" ; -- [mark] "chao3jia4" -> "da2jia4", "chao3jia4" = quarrel, argue
hunt_V2 = mkV2 "da2lie4" ; -- [mark] "da2lie4" is iv, can't think of proper translation in v2 form for hunt
hit_V2 = mkV2 "da2" ;
cut_V2 = mkV2 "ge1" ;
split_V2 = mkV2 "pi1kai1" ;
stab_V2 = mkV2 "ci4" ;
scratch_V2 = mkV2 "sao1" ;
dig_V = mkV "wa1" ;
swim_V = mkV "you2yong3" ;
fly_V = mkV "fei1" ;
--walk_V = mkV "zou3" ;
come_V = mkV "lai2" ;
lie_V = mkV "tang3" ;
sit_V = mkV "zuo4" ;
stand_V = mkV "zhan4" ;
turn_V = mkV "zhuan3" ;
fall_V = mkV "la4xia4" ;
hold_V2 = mkV2 "wo4" ;
squeeze_V2 = mkV2 "ji3" ;
rub_V2 = mkV2 "rou2" ;
wash_V2 = mkV2 "xi3" ;
wipe_V2 = mkV2 "ca1" ;
pull_V2 = mkV2 "la1" ;
push_V2 = mkV2 "tui1" ;
throw_V2 = mkV2 "reng1" ;
tie_V2 = mkV2 "bang3" ;
sew_V = mkV "feng2" ;
count_V2 = mkV2 "shu3" ;
say_VS = mkVS (mkV "shui4") ;
sing_V = mkV "chang4" ;
play_V = mkV "wan2" ;
float_V = mkV "fu2" ;
flow_V = mkV "liu2" ;
freeze_V = mkV "jie1bing1" ;
swell_V = mkV "peng2zhang4" ;
sun_N = mkN "tai4yang2" ;
moon_N = mkN "yue4liang4" ;
star_N = mkN "xing1xing1" "ke1";
water_N = mkN "shui3" "di1";
rain_N = mkN "yu3" "chang3";
river_N = mkN "he2" "tiao2";
lake_N = mkN "hu2" ;
sea_N = mkN "hai3" "pian1";
salt_N = mkN "yan2" "ping2";
stone_N = mkN "dan4tou2" "kuai1";
sand_N = mkN "sha1" "li4";
dust_N = mkN "chen2tu3" [];
earth_N = mkN "deqiu2" ;
cloud_N = mkN "yun2" "duo3";
fog_N = mkN "wu4" "chang3";
sky_N = mkN "tian1kong1" "pian1";
wind_N = mkN "feng1" "zhen4";
snow_N = mkN "xue3" "chang3";
ice_N = mkN "bing1" "kuai1";
smoke_N = mkN "yan1" "zhen4";
fire_N = mkN "huo3" "chang3";
ashes_N = mkN "hui1" [];
burn_V = mkV "shao1" ;
road_N = mkN "lu4" "tiao2";
mountain_N = mkN "shan1" "zuo4";
red_A = mkA "hong2" ;
--green_A = mkA "lu:4" ;
yellow_A = mkA "huang2" ;
white_A = mkA "bai2" ;
black_A = mkA "hei1" ;
night_N = mkN "ye4wan3" ; -- [mark] "ye4wan3" 's classifier is "ge4"
day_N = mkN "bai2tian1" []; -- [mark] "bai2tian1" -> "tian1", "tian1" itself is classifier
year_N = mkN "nian2" [] ; -- [mark] "nian2" itself is classifier
warm_A = mkA "wen1nuan3" ;
cold_A = mkA "leng3" ;
full_A = mkA "man3" ;
new_A = mkA "xin1" ;
old_A = mkA "lao3" ; -- [mark] "lao3" for person, "jiu4" for things
good_A = mkA "hao3" ;
bad_A = mkA "huai4" ;
rotten_A = mkA "lan4" ;
dirty_A = mkA "zang1" ;
straight_A = mkA "zhi2" ;
round_A = mkA "yuan2" ;
sharp_A = mkA "jian1" ;
dull_A = mkA "dun4" ;
smooth_A = mkA "guang1gu3" ;
wet_A = mkA "shi1" ;
dry_A = mkA "gan1" ;
correct_A = mkA "dui4" ;
near_A = mkA "jin4" ;
far_A = mkA "yuan3" ;
left_Ord = ss "zuo3" ;
right_Ord = ss "you4" ;
name_N = mkN "ming2zi4" ; -- [mark] "ming2" --> "ming2zi4"
-- HSK
add_V3 = mkV3 "jia1" ;
airplane_N = mkN "fei1ji1" "jia4";
already_Adv = mkAdv "yi3jing1" ;
answer_V2S = mkV2S (mkV "hui2da1") ;
apple_N = mkN "pin2guo3" ;
art_N = mkN "yi4shu4" []; -- [mark] usually without classifier
ask_V2Q = mkV2Q (mkV "wen4") ;
bank_N = mkN "yin2hang2" "jian1";
beautiful_A = mkA "piao1liang4" ;
become_VA = mkV "bian4" ;
beer_N = mkN "pi2jiu3" "bei1";
bike_N = mkN "zi4hang2che1" "tai2";
blue_A = mkA "la" ;
boat_N = mkN "chuan2" "sao1";
book_N = mkN "shu1" "ben3";
bread_N = mkN "mian4bao1" ;
buy_V2 = mkV2 "mai3" ;
cap_N = mkN "mao4zi3" "ding3";
car_N = mkN "qi4che1" "tai2";
chair_N = mkN "yi3zi3" "ba3";
city_N = mkN "cheng2shi4" ; -- [mark] "shi4" --> "cheng2shi4"
clean_A = mkA "gan1jing4" ;
coat_N = mkN "yi1fu2" "jian4";
country_N = mkN "guo2jia1" ; -- [mark] "guo2" --> "guo2jia1"
cow_N = mkN "niu2" "tou2";
do_V2 = mkV2 "zuo4" ;
doctor_N = mkN "yi1sheng1" "ming2";
door_N = mkN "men2" "shan1";
---easy_A2V = mkA "rong2yi4" ;
factory_N = mkN "gong1chang3" "jian1";
far_Adv = mkAdv "yuan3" ;
---father_N2 = mkN2 "fu4qin1" ;
fear_VS = mkVS (mkV "pa4") ;
find_V2 = mkV2 "fa1xian4" ;
forget_V2 = mkV2 "wang4" ;
friend_N = mkN "peng2you3" ;
girl_N = mkN "gu1niang2" ;
give_V3 = mkV3 "gei3" ;
go_V = mkV "qu4" ;
--go_N = mkN "wang3" ;
grammar_N = mkN "yu3fa3" ;
hat_N = mkN "mao4zi3" "ding3";
--take_N = mkN "dai4" ;
--have_N = mkN "you3" ;
--take_N = mkN "dai4" ;
--have_N = mkN "you3" ;
hill_N = mkN "shan1" "zuo4";
hope_VS = mkV "xi1wang4" ;
horse_N = mkN "ma3" "pi1";
hot_A = mkA "re4" ;
--how many_N = mkN "duo1shao3" ;
important_A = mkA "chong2yao1" ;
--heavy_A = mkA "chong2" ;
industry_N = mkN "gong1ye4" [];
jump_V = mkV "tiao4" ;
know_V2 = mkV2 "zhi1dao4" ;
know_VQ = mkV "zhi1dao4" ;
lamp_N = mkN "deng1" "zhan3";
language_N = mkN "yu3yan2" "zhong3";
learn_V2 = mkV2 "xue2" ;
--leave_N = mkN "li2kai1" ;
leave_V2 = mkV2 "li2kai1" ; --[mark] "li2" --> "li2kai1", "li2" itself is either a morpheme, or a marker indicating distance
--walk_N = mkN "zou3" ;
like_V2 = mkV2 "xi3huan1" ;
--be willing_N = mkN "yuan4yi4" ;
listen_V2 = mkV2 "ting1" ;
lose_V2 = mkV2 "diu1" ;
--love_N = mkN "xi3huan1" ;
love_V2 = mkV2 "ai4" ;
---mother_N2 = mkN2 "ma1" ;
music_N = mkN "yin1le4" [] ; -- [mark] usually without classifier
newspaper_N = mkN "bao4zhi3" "zhang1"; --[mark] "bao4" --> "bao4zhi3"
now_Adv = mkAdv "xian4zai4" ;
number_N = mkN "hao2ma3" ; -- [mark] "hao2" --> "hao2ma3"
open_V2 = mkV2 "kai1" ;
paint_V2A = mkV2A (mkV "hua4") ;
paper_N = mkN "zhi3" "zhang1";
--place_N = mkN "defang1" ;
--part_N = mkN "bu4fen1" ;
pen_N = mkN "bi3" "qi2";
--pen_N = mkN "gang1bi3" "qi2";
person_N = mkN "ren2" ;
--beat_N = mkN "da2" ;
--pull_N = mkN "la1" ;
play_V2 = mkV2 "wan2" ; --[mark] "wan2er2" --> "wan2"
--perform_N = mkN "biao3yan3" ;
--have_N = mkN "you3" ;
put_V2 = mkV2 "bai3" ;
--let go_N = mkN "fang4" ;
question_N = mkN "wen4ti2" ;
--be enough_N = mkN "gou4" ;
--very_N = mkN "hen3" ;
rain_V0 = mkV "xia4yu3";
read_V2 = mkV2 "dou4" ;
reason_N = mkN "dao4li3" ;
restaurant_N = mkN "fan4dian4" "jian1";
--appropriate_N = mkN "ge3shi4" ;
--correct_N = mkN "zheng1que4" ;
--run_N = mkN "pao3bu4" ;
run_V = mkV "pao3" ;
--road_N = mkN "dao4" ;
school_N = mkN "xue2jiao4" "suo3";
science_N = mkN "ke1xue2" []; -- [mark] usually without classifier
sell_V3 = mkV3 "mai4" ;
--go_N = mkN "qu4" ;
send_V3 = mkV3 "ji4" ;
--clap_N = mkN "pai1" ;
sheep_N = mkN "yang2" "qi2";
ship_N = mkN "chuan2" "sao1";
shoe_N = mkN "xie2" "qi2";
shop_N = mkN "shang1dian4" "jian1";
--of that kind_N = mkN "na3yang4" ;
--what_N = mkN "shen2ma" ;
song_N = mkN "ge1" "shou3";
--tell_N = mkN "jiang3" ;
speak_V2 = mkV2 "shui4" ;
--road_N = mkN "dao4" ;
--live_N = mkN "zhu4" ;
--act as_N = mkN "dang1" ;
student_N = mkN "xue2sheng1" "ming2" ;
table_N = mkN "zhuo1zi3" "zhang1";
--word_N = mkN "hua4" ;
talk_V3 = mkV3 "shui4" ;
--talk_N = mkN "tan2" ;
--education_N = mkN "jiao1yu4" ;
teach_V2 = mkV2 "jiao1" ;
--coach_N = mkN "fu3dao3" ;
--teacher_N = mkN "xian1sheng1" ;
--master worker_N = mkN "shi1fu4" ;
teacher_N = mkN "lao3shi1" "ming2";
television_N = mkN "dian4shi4" "tai2";
--that_N = mkN "na3ge4" ;
--that_N = mkN "na3" ;
--from_N = mkN "cong1" ;
--by way of_N = mkN "tong1guo4" ;
--towards_N = mkN "xiang4" ;
--and_N = mkN "he2" ;
today_Adv = mkAdv "jin1tian1" ;
--now_N = mkN "xian4zai4" ;
--also_N = mkN "ye3" ;
train_N = mkN "huo3che1" "liang4";
travel_V = mkV "lu:3hang2" ;
--below_N = mkN "xia4bian1" ;
--understand_N = mkN "lejie3" ;
understand_V2 = mkV2 "dong3" ;
--recognize_N = mkN "ren4shi2" ;
--open up_N = mkN "tong1" ;
university_N = mkN "da4xue2" "suo3";
wait_V2 = mkV2 "deng3" ;
--need_N = mkN "xu1yao1" ;
watch_V2 = mkV2 "kan1" ;
--time_N = mkN "shi2hou4" ;
--wait_N = mkN "deng3" ;
--what_N = mkN "shen2ma" ;
--how_N = mkN "zen3ma" ;
win_V2 = mkV2 "ying2" ;
--window_N = mkN "chuang1hu4" ;
window_N = mkN "chuang1" "shan1";
wine_N = mkN "jiu3" "ping2";
--leave_N = mkN "li2" ;
--do_N = mkN "zuo4" ;
write_V2 = mkV2 "xie3" ;
young_A = mkA "nian2qing1" ;
-- from Google
apartment_N = mkN "gong1yu4" "jian1" ;
baby_N = mkN "ying1er2" ;
boot_N = mkN "xue1zi3" "qi2" ; -- [mark] "ji1" --> "xue1zi3"
boss_N = mkN "lao3ban3" ;
boy_N = mkN "nan2hai2" ;
brother_N2 = mkN2 "di4di4" ;
butter_N = mkN "huang2you2" "he2";
camera_N = mkN "she4xiang4tou2" ;
carpet_N = mkN "detan3" "zhang1";
cat_N = mkN "mao1" "qi2" ;
ceiling_N = mkN "tian1hua1ban3" ; --[mark] "tian1hua1ban3shang4" --> "tian1hua1ban3"
cheese_N = mkN "nai3lao4" "kuai1";
church_N = mkN "jiao1tang2" "suo3" ;
computer_N = mkN "ji4suan4ji1" "tai2" ;
cousin_N = mkN "biao3di4" ;
distance_N3 = mkN3 (mkN "deju4li2") emptyPrep emptyPrep ; ----
enemy_N = mkN "di2ren2" ;
father_N2 = mkN2 "fu4qin1" ;
floor_N = mkN "deban3" ; -- [mark] floor "deban3"(have you cleaned the floor) "lou2ceng2"(which floor do you live?)
fridge_N = mkN "bing1xiang1" "tai2" ;
garden_N = mkN "hua1yuan2" "zuo4";
glove_N = mkN "shou3tao4" "fu4";
gold_N = mkN "jin1zi3" "kuai1"; -- [mark] also without classifier
harbour_N = mkN "hai3gang3" ;
iron_N = mkN "tie3" "kuai1"; -- [mark] also without classifier
king_N = mkN "guo2wang2" ;
leather_N = mkN "pi2ge2" "kuai1";
love_N = mkN "ai4" [] ; -- [mark] "kuai1" --> [], often without classifier
milk_N = mkN "niu2nai3" "bei1" ; -- [mark] "nai3" --> "niu2nai3", which literaly means cow milk
mother_N2 = mkN2 "mu3qin1";
oil_N = mkN "you2" "tong3" ; -- [mark] rewritten
peace_N = mkN "he2ping2" []; -- [mark] often without classifier
planet_N = mkN "xing1qiu2" ;
plastic_N = mkN "su4liao4" "kuai1"; -- [mark] "su4liao4zhi4cheng2" --> "su4liao4" often without classifier
policeman_N = mkN "jing3cha2" "ming2" ;
priest_N = mkN "mu4shi1" "wei4";
queen_N = mkN "nu:3wang2" ;
radio_N = mkN "shou1yin1ji1" "tai2" ;
religion_N = mkN "zong1jiao1" []; -- [mark] also without classifier
rock_N = mkN "dan4tou2" "kuai1" ;
roof_N = mkN "wu1ding3" ;
rubber_N = mkN "xiang4jiao1" "kuai1";
rule_N = mkN "gui1ze2" "xiang4"; -- [mark] "yuan2ze2" --> "gui1ze2"
shirt_N = mkN "chen4shan1" "jian4" ;
silver_N = mkN "yin2zi3" "kuai1"; -- [mark] "yin2" --> "yin2zi3"
sister_N = mkN "mei4mei4" ;
sock_N = mkN "wa4zi3" "qi2";
steel_N = mkN "gang1" "kuai1";
stove_N = mkN "lu2zi3" ;
village_N = mkN "cun1zhuang1" "zuo4";
war_N = mkN "zhan4zheng1" "chang3" ; -- [mark] rewritten
wood_N = mkN "mu4tou2" "kuai1" ; -- [mark] "mu4" --> "mu4tou2"
-- just missing
lin
alas_Interj = ssword "ai1" ;
beg_V2V = mkV2V (mkV "qi3qiu2") ; -- beg him to do something
break_V2 = mkV2 "da2po4" ;
broad_A = mkA "kuan1" ;
brown_A = mkA "zong1" ;
clever_A = mkA "cong1ming2" ;
close_V2 = mkV2 "guan1bi4" ;
easy_A2V = mkA2 "jian3chan2" ;
empty_A = mkA "kong1" ;
fun_AV = mkA "you3qu4" ;
hate_V2 = mkV2 "tao3yan4" ;
married_A2 = mkA2 "jie1hun1" ;
paris_PN = mkPN "ba1li2" ;
probable_AS = mkA "ke3neng2" ;
ready_A = mkA "zhun3bei4hao3" ; -- [mark] "zhun3bei4hao3": 准备(v) + 好(adj,complement)
seek_V2 = mkV2 "xun2qiu2" ;
stop_V = mkV "ting2zhi3" ;
stupid_A = mkA "ben4" ;
switch8off_V2 = mkV2 "guan1" ;
switch8on_V2 = mkV2 "kai1" ;
ugly_A = mkA "chou3" ;
uncertain_A = mkA "bu4que4ding4" ; -- [mark] "bu4que4ding4": 不("un-") + 确定("certain")
}

View File

@@ -0,0 +1,15 @@
import qualified Data.Map as Map
import Pinyin (c2pMap, useMapGF, mkList)
main = do
s <- readFile pinyinFile
let m = c2pMap (mkList (words s))
mapM_ (mkPinyin m) ["Lexicon", "Numeral", "Res", "Structural"]
return ()
pinyinFile = "../pinyin.txt"
mkPinyin ma mo = do
s <- readFile (mo ++ "Chi.gf")
writeFile (mo ++ "Cmn.gf") (useMapGF ma s)

View File

@@ -0,0 +1,87 @@
concrete NounCmn of Noun = CatCmn ** open ResCmn, Prelude in {
lin
DetCN det cn = case det.detType of {
DTFull Sg => {s = det.s ++ cn.c ++ cn.s} ; -- this house
DTFull Pl => {s = det.s ++ xie_s ++ cn.s} ; -- these houses
DTNum => {s = det.s ++ cn.c ++ cn.s} ; -- (these) five houses
DTPoss => {s = det.s ++ cn.s} -- our (five) houses
} ;
UsePN pn = pn ;
UsePron p = p ;
DetNP det = det ;
PredetNP pred np = mkNP (pred.s ++ possessive_s ++ np.s) ;
PPartNP np v2 = mkNP ((predV v2).verb.s ++ possessive_s ++ np.s) ; ---- ??
AdvNP np adv = mkNP (adv.s ++ possessive_s ++ np.s) ;
DetQuant quant num = {
s = quant.s ++ num.s ;
detType = case num.numType of {
NTFull => DTNum ; -- five
NTVoid n => case quant.detType of {
DTPoss => DTPoss ; -- our
_ => DTFull n -- these/this
}
}
} ;
DetQuantOrd quant num ord = {
s = quant.s ++ num.s ++ ord.s ;
detType = case num.numType of {
NTFull => DTNum ; -- five
NTVoid n => case quant.detType of {
DTPoss => DTPoss ; -- our
_ => DTFull n -- these/this
}
}
} ;
PossPron p = {
s = p.s ++ possessive_s ;
detType = DTPoss
} ;
NumSg = {s = [] ; numType = NTVoid Sg} ;
NumPl = {s = [] ; numType = NTVoid Pl} ;
NumCard n = n ** {numType = NTFull} ;
NumDigits d = d ** {numType = NTFull} ;
OrdDigits d = {s = ordinal_s ++ d.s} ;
NumNumeral numeral = numeral ** {hasC = True} ;
OrdNumeral numeral = {s = ordinal_s ++ numeral.s} ;
AdNum adn num = {s = adn.s ++ num.s ; hasC = True} ;
OrdSuperl a = {s = superlative_s ++ a.s} ;
DefArt = mkDet the_s ;
IndefArt = mkDet yi_s ; ---- in the plural ?
MassNP cn = cn ;
UseN n = n ;
UseN2 n = n ;
Use2N3 f = {s = f.s ; c = f.c ; c2 = f.c2} ;
Use3N3 f = {s = f.s ; c = f.c ; c2 = f.c3} ;
ComplN2 f x = {s = appPrep f.c2 x.s ++ f.s ; c = f.c} ;
ComplN3 f x = {s = appPrep f.c2 x.s ++ f.s ; c = f.c ; c2 = f.c3} ;
AdjCN ap cn = case ap.monoSyl of {
True => {s = ap.s ++ cn.s ; c = cn.c} ;
False => {s = ap.s ++ possessive_s ++ cn.s ; c = cn.c}
} ;
RelCN cn rs = {s = rs.s ++ cn.s ; c = cn.c} ;
AdvCN cn ad = {s = ad.s ++ possessive_s ++ cn.s ; c = cn.c} ;
SentCN cn cs = {s = cs.s ++ cn.s ; c = cn.c} ;
ApposCN cn np = {s = np.s ++ cn.s ; c = cn.c} ;
RelNP np rs = mkNP (rs.s ++ np.s) ;
}

View File

@@ -0,0 +1,143 @@
concrete NumeralCmn of Numeral = CatCmn ** open ResCmn, Prelude in {
flags coding = utf8 ;
param Qform = bai | bai0 | shiwan | shiwan0 ;
param Bform = shi | shi0 | wan | wan0 ;
param Zero = zero | nozero ;
oper ling : Zero * Zero => Str =
table {<zero,z> => "ling2" ;
<z,zero> => "ling2" ;
<nozero,nozero> => []} ;
oper Wan : Zero => Str =
table {zero => "wan4" ;
nozero => []} ;
oper mkD : Str -> Str -> Str = \x,_ -> word x ; -- hiding the "formal" version
--lincat Numeral = {s : Str} ;
lincat Digit = {s : Str} ;
lincat Sub10 = {s : Str} ;
lincat Sub100 = {inh : Zero ; s : Bform => Str} ;
lincat Sub1000 = {inh : Zero ; s : Qform => Str} ;
lincat Sub1000000 = {s : Str} ;
lin num x0 =
{s = x0.s} ;
-- 一二三四五六七八九十一百千
--
lin n2 =
{s = mkD "er4" "er4"} ;
lin n3 =
{s = mkD "san1" "san1"} ;
lin n4 =
{s = mkD "si4" "si4"} ;
lin n5 =
{s = mkD "wu3" "wu3"} ;
lin n6 =
{s = mkD "liu4" "liu4"} ;
lin n7 =
{s = mkD "qi1" "qi1"} ;
lin n8 =
{s = mkD "ba1" "ba1"} ;
lin n9 =
{s = mkD "jiu3" "jiu3"} ;
lin pot01 =
{s = mkD "yi1" "yi1"} ;
lin pot0 d =
{s = d.s} ;
lin pot110 =
{inh = nozero ;
s = table {
shi => mkD "yi1shi2" "yi1shi2" ;
shi0 => mkD "yi1shi2" "yi1shi2" ;
wan => mkD "yi1wan4" "yi1wan4" ;
wan0 => mkD "yi1wan4" "yi1wan4"}} ;
lin pot111 =
{inh = nozero ;
s = table {
shi => mkD "shi2yi1" "shi2yi1" ;
shi0 => mkD "yi1shi2yi1" "yi1shi2yi1" ;
wan => mkD "shi2yi1wan4" "shi2yi1wan4" ;
wan0 => mkD "shi2yi1wan4" "shi2yi1wan4"}} ;
lin pot1to19 d =
{inh = nozero ;
s = table {
shi => mkD "yi1shi2" "yi1shi2" ++ d.s ;
shi0 => mkD "yi1shi2" "yi1shi2" ;
wan => mkD "yi1wan4" "yi1wan4" ++ d.s ++ mkD "qian1" "qian1" ;
wan0 => mkD "yi1wan4" "yi1wan4" ++ d.s ++ mkD "qian1" "qian1"}} ;
lin pot0as1 n =
{inh = zero ;
s = table {
shi => n.s ;
shi0 => n.s ;
wan => n.s ++ mkD "qian1" "qian1" ;
wan0 => n.s ++ mkD "qian1" "qian1"}} ;
lin pot1 d =
{inh = zero ;
s = table {
shi => d.s ++ mkD "shi2" "shi2" ;
shi0 => d.s ++ mkD "shi2" "shi2" ;
wan0 => d.s ++ "wan4" ;
wan => d.s ++ "wan4"}} ;
lin pot1plus d e =
{inh = nozero ;
s = table {
shi => d.s ++ mkD "shi2" "shi2" ++ e.s ;
shi0 => d.s ++ mkD "shi2" "shi2" ++ e.s ;
wan => d.s ++ "wan4" ++ e.s ++ mkD "qian1" "qian1" ;
wan0 => d.s ++ "wan4" ++ e.s ++ mkD "qian1" "qian1"}} ;
lin pot1as2 n =
{inh = zero ;
s = table {
bai => n.s ! shi ;
bai0 => n.s ! shi ;
shiwan => n.s ! wan ;
shiwan0 => n.s ! wan0}} ;
lin pot2 d =
{inh = zero ;
s = table {
bai => d.s ++ mkD "bai3" "bai3" ;
bai0 => d.s ++ mkD "bai3" "bai3" ;
shiwan0 => d.s ++ mkD "shi2wan4" "shi2wan4" ;
shiwan => d.s ++ mkD "shi2wan4" "shi2wan4"}} ;
lin pot2plus d e =
{inh = nozero ;
s = table {
bai => d.s ++ mkD "" "bai3" ++ (ling ! <e.inh,e.inh>) ++ e.s ! shi0 ;
bai0 => d.s ++ mkD "" "bai3" ++ (ling ! <e.inh,e.inh>) ++ e.s ! shi0 ;
shiwan => d.s ++ mkD "" "shi2" ++ (Wan ! (e.inh)) ++ e.s ! wan ;
shiwan0 => d.s ++ mkD "" "shi2" ++ (Wan ! (e.inh)) ++ e.s ! wan0}} ;
lin pot2as3 n =
{s = n.s ! bai} ;
lin pot3 n =
{s = n.s ! shiwan} ;
lin pot3plus n m =
{s = (n.s ! shiwan0) ++ (ling ! <n.inh,m.inh>) ++ m.s ! bai0} ;
-- numerals as sequences of digits
lincat
Dig = SS ;
lin
IDig d = d ;
IIDig d i = ss (d.s ++ i.s) ;
D_0 = ss "0" ;
D_1 = ss "1" ;
D_2 = ss "2" ;
D_3 = ss "3" ;
D_4 = ss "4" ;
D_5 = ss "5" ;
D_6 = ss "6" ;
D_7 = ss "7" ;
D_8 = ss "8" ;
D_9 = ss "9" ;
}

View File

@@ -0,0 +1,118 @@
resource ParadigmsCmn = open CatCmn, ResCmn, Prelude in {
flags coding = utf8 ;
flags coding=utf8;
oper
mkN = overload {
mkN : (man : Str) -> N
= \n -> lin N (regNoun n ge_s) ;
mkN : (man : Str) -> Str -> N
= \n,c -> lin N (regNoun n c)
} ;
mkN2 = overload {
mkN2 : Str -> N2
= \n -> lin N2 (regNoun n ge_s ** {c2 = emptyPrep}) ; ---- possessive ?
---- mkN2 : N -> Str -> N2
---- = \n,p -> lin N2 (n ** {c2 = mkPrep p}) ;
} ;
mkN3 : N -> Preposition -> Preposition -> N3
= \n,p,q -> lin N3 (n ** {c2 = p ; c3 = q}) ;
mkPN : (john : Str) -> PN
= \s -> lin PN {s = word s} ;
mkA = overload {
mkA : (small : Str) -> A
= \a -> lin A (simpleAdj a) ;
mkA : (small : Str) -> Bool -> A
= \a,b -> lin A (mkAdj a b) ;
} ;
mkA2 : Str -> A2 = \a -> lin A2 (simpleAdj a ** {c2 = emptyPrep}) ;
mkV = overload {
mkV : (walk : Str) -> V
= \walk -> lin V (regVerb walk) ;
mkV : (arrive : Str) -> Str -> Str -> Str -> Str -> V
= \arrive,pp,ds,dp,ep -> lin V (mkVerb arrive pp ds dp ep neg_s) ;
mkV : (arrive : Str) -> Str -> Str -> Str -> Str -> Str -> V
= \arrive,pp,ds,dp,ep,neg -> lin V (mkVerb arrive pp ds dp ep neg) ;
} ;
mkV2 : Str -> V2
= \s -> lin V2 (regVerb s ** {c2 = emptyPrep}) ;
mkV3 = overload {
mkV3 : Str -> V3
= \s -> lin V3 (regVerb s ** {c2,c3 = emptyPrep}) ;
mkV3 : V -> V3
= \s -> lin V3 (s ** {c2,c3 = emptyPrep}) ;
---- mkV3 : V -> Str -> Str -> V3
---- = \v,p,q -> lin V3 (v ** {c2 = mkPrep p ; c3 = mkPrep q}) ;
} ;
mkVV : Str -> VV = ----
\v -> lin VV (regVerb v) ;
mkVQ : V -> VQ =
\v -> lin VQ v ;
mkVS : V -> VS =
\v -> lin VS v ;
mkVA : V -> VA =
\v -> lin VA v ;
mkV2Q : V -> V2Q =
\v -> lin V2Q (v ** {c2 = emptyPrep}) ;
---- mkV2Q : V -> Str -> V2Q =
---- \v,p -> lin V2Q (v ** {c2 = mkPrep p}) ;
mkV2V : V -> V2V =
\v -> lin V2V (v ** {c2 = emptyPrep ; c3 = emptyPrep}) ;
---- mkV2V : V -> Str -> Str -> V2V =
---- \v,p,q -> lin V2V (v ** {c2 = mkPrep p ; c3 = mkPrep q}) ;
mkV2S : V -> V2S =
\v -> lin V2S (v ** {c2 = emptyPrep}) ;
---- mkV2S : V -> Str -> V2S =
---- \v,p -> lin V2S (v ** {c2 = mkPrep p}) ;
mkV2A : V -> V2A
= \v -> lin V2A (v ** {c2 = emptyPrep ; c3 = emptyPrep}) ;
---- mkV2A : V -> Str -> Str -> V2A
---- = \v,p,q -> lin V2A (v ** {c2 = mkPrep p ; c3 = mkPrep q}) ;
mkAdv = overload {
mkAdv : Str -> Adv
= \s -> lin Adv {s = word s ; advType = ATPlace} ;
mkAdv : Str -> AdvType -> Adv
= \s,at -> lin Adv {s = word s ; advType = at} ;
} ;
AdvType : Type
= ResCmn.AdvType ;
placeAdvType : AdvType
= ATPlace ;
timeAdvType : AdvType
= ATTime ;
mannerAdvType : AdvType
= ATManner ;
mkPrep = overload { ---- is this the right order of the fields?
mkPrep : Str -> Preposition
= \s -> ResCmn.mkPreposition s [] ;
mkPrep : Str -> Str -> Preposition
= \s,t -> ResCmn.mkPreposition s t ;
} ;
emptyPrep : Preposition = mkPrep [] ;
}

View File

@@ -0,0 +1,27 @@
concrete PhraseCmn of Phrase = CatCmn ** open Prelude, ResCmn in {
lin
PhrUtt pconj utt voc = {s = pconj.s ++ utt.s ++ voc.s} ;
UttS s = s ;
UttQS qs = qs ;
UttImpSg pol imp = {s = pol.s ++ imp.s ! pol.p} ;
UttImpPl pol imp = {s = pol.s ++ imp.s ! pol.p} ;
UttImpPol pol imp = {s = pol.s ++ imp.s ! pol.p} ; --- add politeness here?
UttIP ip = ip ;
UttIAdv iadv = iadv ;
UttNP np = np ;
UttCN cn = cn ;
UttAP ap = ap ;
UttCard x = x ;
UttVP vp = ss (infVP vp) ;
UttAdv adv = adv ;
NoPConj = {s = []} ;
PConjConj conj = ss (conj.s ! CSent).s2 ;
NoVoc = {s = []} ;
VocNP np = {s = np.s} ; ---- ??
}

View File

@@ -0,0 +1,64 @@
module Pinyin where
import Numeric
import qualified Data.Map as Map
import System
-- AR 3/10/2012
-- Chinese unicode - character - pinyin conversions
-- character data from http://www.linguanaut.com/chinese_alphabet2.htm
main = do
xs <- getArgs
s <- readFile "pinyin.txt"
let ws = mkList (words s)
case xs of
"c2p":_ -> interact (useMap (c2pMap ws)) -- Chinese char to Pinyin (all results)
"p2c":_ -> interact (useMap (p2cMap ws)) -- Pinyin to Chinese char (all results)
"c2u":_ -> interact (useMap (c2uMap ws)) -- Chinese char to Unicode hex
"u2c":_ -> interact (useMap (u2cMap ws)) -- Unicode hex to Chinese char
"c2pGF":_ -> interact (useMapGF (c2pMap ws)) -- char to pinyin (first result) in string literals (e.g. in GF files)
"p2cTry":_ -> interact (tryUseMap (p2cMap ws)) -- pinyin to char, trying syllable with all tone marks
_ -> mapM_ (putStrLn . printOne) ws
mkList ws = case ws of
c:w:ws -> (head (map (flip Numeric.showHex "" . fromEnum) c), (c, chop w)) : mkList ws
_ -> []
printOne (u,(c,ws)) = u ++ "\t" ++ c ++ "\t" ++ unwords ws
chop = words . map unslash
where
unslash '/' = ' '
unslash c = c
useMap :: Map.Map String String -> String -> String
useMap = useMapWith words unwords (const "NONE")
tryUseMap :: Map.Map String String -> String -> String
tryUseMap m = unlines . map try . words where
try w = unwords [c ++ " (" ++ w2 ++ ")" | w2 <- alts w, Just c <- [Map.lookup w2 m]]
alts w = w : [w ++ show i | i <- [1 .. 4]]
useMapWith :: (String -> [String]) -> ([String] -> String) -> (String -> String) -> Map.Map String String -> String -> String
useMapWith ws uws deft m = uws . map (\w -> maybe (deft w) id (Map.lookup w m)) . ws
useMapGF m s = case s of
'C':'h':'i':'n':cs -> "Chin" ++ useMapGF m cs -- don't change Chinese, China
'C':'h':'i' :cs -> "Cmn" ++ useMapGF m cs -- to change language code Chi to Cmn
'"':cs -> '"':convert cs
c :cs -> c :useMapGF m cs
_ -> s
where
convert cs = case cs of
'"':s -> '"' : useMapGF m s
c :s -> maybe [c] (head . words) (Map.lookup [c] m) ++ convert s
_ -> cs
c2pMap ws = Map.fromList [(c,unwords ps) | (_,(c,ps)) <- ws]
p2cMap ws = Map.fromListWith (++) [(p,c) | (_,(c,ps)) <- ws, p <- ps] -- store all chars with the same pinyin
c2uMap ws = Map.fromList [(c,u) | (u,(c,_)) <- ws]
u2cMap ws = Map.fromList [(u,c) | (u,(c,_)) <- ws]

View File

@@ -0,0 +1,40 @@
concrete QuestionCmn of Question = CatCmn **
open ResCmn, Prelude in {
flags optimize=all_subs ;
lin
QuestCl cl = {s = \\p,a => cl.s ! p ! a ++ question_s} ; --- plus redup questions
QuestVP ip vp = {
s = \\p,a => ip.s ++ vp.prePart ++ useVerb vp.verb ! p ! a ++ vp.compl
} ;
QuestSlash ip cls = {
s =\\p,a => cls.c2.prepPre ++ cls.np ++ cls.c2.prepMain ++ cls.vp ! p ! a ++
possessive_s ++ di_s ++ ip.s
} ;
QuestIAdv iadv cl = {s = \\p,a => cl.np ++ iadv.s ++ cl.vp ! p ! a} ;
QuestIComp icomp np = {s = \\p,a => np.s ++ icomp.s} ; ---- order
PrepIP p ip = ss (appPrep p ip.s) ;
AdvIP ip adv = ss (adv.s ++ possessive_s ++ ip.s) ; ---- adding de
IdetCN det cn = {s = det.s ++ cn.c ++ cn.s} ; ---- number?
IdetIP idet = idet ;
IdetQuant iquant num = ss (iquant.s ++ num.s) ; ----
AdvIAdv i a = ss (a.s ++ i.s) ;
CompIAdv a = ss (zai_s ++ a.s) ;
CompIP ip = ss (copula_s ++ ip.s) ;
}

View File

@@ -0,0 +1,12 @@
concrete RelativeCmn of Relative = CatCmn ** open ResCmn, Prelude in {
lin
RelCl cl = {s = \\p,a => cl.s ! p ! a ++ relative_s} ; ---- ??
RelVP rp vp = {
s = \\p,a => vp.prePart ++ useVerb vp.verb ! p ! a ++ vp.compl ++ rp.s
} ; ---- ??
RelSlash rp slash = {s = \\p,a => slash.s ! p ! a ++ appPrep slash.c2 rp.s} ;
FunRP p np rp = ss (appPrep p np.s ++ rp.s) ; ---- ??
IdRP = ss relative_s ;
}

View File

@@ -0,0 +1,223 @@
--# -path=.:../abstract:../common:../../prelude
--1 Thai auxiliary operations.
--
---- This module contains operations that are needed to make the
---- resource syntax work. To define everything that is needed to
---- implement $Test$, it moreover contains regular lexical
---- patterns needed for $Lex$.
--
resource ResCmn = ParamX ** open Prelude in {
flags coding = utf8 ;
oper
-- strings ----
defaultStr = "" ;
than_s = "bi3" ;
progressive_s = defaultStr ;
possessive_s = "de" ; -- also used for AP + NP
deAdvV_s = "de" ; -- between Adv and V
deVAdv_s = "de2" ; -- between V and Adv
imperneg_s = neg_s ;
conjThat = emptyStr ; ----
reflPron = word "zi4ji3" ; -- pron + refl
passive_s = defaultStr ;
relative_s = possessive_s ; -- relative
superlative_s = "zui4" ; -- superlative, sup + adj + de
zai_s = "zai4" ; -- copula for place
you_s = "you3" ; -- to have
copula_s = "shi4" ;
exist_s = word "cun2zai4" ;
neg_s = "bu4" ;
question_s = "ma3" ;
yi_s = "yi1" ;
ordinal_s = "di4" ;
xie_s = "xie1" ;
the_s = "na3" ;
geng_s = "geng1" ; -- more, in comparison
zai_V = mkVerb "zai4" [] [] [] [] "bu4" ;
fullstop_s = "." ;
questmark_s = "?" ;
exclmark_s = "!" ;
ge_s = "ge4" ;
di_s = "shi4" ; -- used in QuestSlash
emptyStr = [] ;
-- Write the characters that constitute a word separately. This enables straightforward tokenization.
bword : Str -> Str -> Str = \x,y -> x + y ; -- change to x ++ y to treat words as separate tokens
word : Str -> Str = \s -> case s of {
x@? + y@? + z@? + u@? => bword x (bword y (bword z u)) ;
x@? + y@? + z@? => bword x (bword y z) ;
x@? + y@? => bword x y ;
_ => s
} ;
ssword : Str -> SS = \s -> ss (word s) ;
------------------------------------------------ from Jolene
-- parameters
param
Aspect = APlain | APerf | ADurStat | ADurProg | AExper ; ---- APlain added by AR
ConjForm = CPhr CPosType | CSent;
CPosType = CAPhrase | CNPhrase | CVPhrase ;
DeForm = DeNoun | NdNoun ; -- parameter created for noun with/out partical "de"
AdvType = ATPlace | ATTime | ATManner ;
-- parts of speech
oper
VP = {verb : Verb ; compl : Str ; prePart : Str} ;
NP = {s : Str} ;
-- for morphology
Noun : Type = {s : Str; c : Str} ;
Adj : Type = {s : Str; monoSyl: Bool} ;
Verb : Type = {s : Str ; pp,ds,dp,ep : Str ; neg : Str} ;
regNoun : Str -> Str -> Noun = \s,c -> {s = word s ; c = word c};
mkAdj : Str -> Bool -> Adj = \s,b -> {s = word s ; monoSyl = b};
complexAP : Str -> Adj = \s -> {s = s ; monoSyl = False} ;
simpleAdj : Str -> Adj = \s -> case s of {
? => mkAdj s True ; -- monosyllabic
_ => mkAdj s False
} ;
copula : Verb = mkVerb "shi4" [] [] [] [] "bu4" ;
regVerb : (walk : Str) -> Verb = \v ->
mkVerb v "le" "zhao1" "zai4" "guo4" "mei2" ;
mkVerb : (v : Str) -> (pp,ds,dp,ep,neg : Str) -> Verb = \v,pp,ds,dp,ep,neg ->
{s = word v ; pp = pp ; ds = ds ; dp = dp ; ep = ep ; neg = neg} ;
useVerb : Verb -> Polarity => Aspect => Str = \v ->
table {
Pos => table {
APlain => v.s ;
APerf => v.s ++ v.pp ;
ADurStat => v.s ++ v.ds ;
ADurProg => v.dp ++ v.s ;
AExper => v.s ++ v.ep
} ;
Neg => table {
APlain => v.neg ++ v.s ; --- neg?
APerf => "bu4" ++ v.s ++ v.pp ;
ADurStat => "bu4" ++ v.s ;
ADurProg => v.neg ++ v.dp ++ v.s ; -- mei or bu
AExper => v.neg ++ v.s ++ v.ep
}
} ;
infVP : VP -> Str = \vp -> vp.prePart ++ vp.verb.s ++ vp.compl ;
predV : Verb -> VP = \v -> {
verb = v ;
compl = [] ;
prePart = [] ;
} ;
insertObj : NP -> VP -> VP = \np,vp -> {
verb = vp.verb ;
compl = np.s ++ vp.compl ;
prePart = vp.prePart
} ;
insertObjPost : NP -> VP -> VP = \np,vp -> {
verb = vp.verb ;
compl = vp.compl ++ np.s ;
prePart = vp.prePart
} ;
insertAdv : SS -> VP -> VP = \adv,vp -> {
verb = vp.verb ;
compl = vp.compl ;
prePart = adv.s
} ;
insertExtra : SS -> VP -> VP = \ext,vp ->
insertObjPost ext vp ;
-- clauses: keep np and vp separate to enable insertion of IAdv
Clause : Type = {
s : Polarity => Aspect => Str ;
np : Str;
vp : Polarity => Aspect => Str
} ;
mkClause = overload {
mkClause : Str -> Verb -> Clause = \np,v -> mkClauseCompl np (useVerb v) [] ;
mkClause : Str -> (Polarity => Aspect => Str) -> Str -> Clause = mkClauseCompl ;
mkClause : Str -> Verb -> Str -> Clause = \subj,verb,obj ->
mkClauseCompl subj (useVerb verb) obj ;
mkClause : Str -> VP -> Clause = \np,vp ->
mkClauseCompl np (\\p,a => vp.prePart ++ useVerb vp.verb ! p ! a) vp.compl ;
} ;
mkClauseCompl : Str -> (Polarity => Aspect => Str) -> Str -> Clause = \np,vp,compl -> {
s = \\p,a => np ++ vp ! p ! a ++ compl ;
np = np ;
vp = \\p,a => vp ! p ! a ++ compl
} ;
-- for structural words
param
DetType = DTFull Number | DTNum | DTPoss ; -- this, these, five, our
NumType = NTFull | NTVoid Number ; -- five, sg, pl
oper
Determiner = {s : Str ; detType : DetType} ;
mkDet = overload {
mkDet : Str -> Determiner = \s -> {s = s ; detType = DTFull Sg} ;
mkDet : Str -> Number -> Determiner = \s,n -> {s = s ; detType = DTFull n} ;
mkDet : Str -> DetType -> Determiner = \s,d -> {s = s ; detType = d} ;
} ;
mkQuant : Str -> {s : Str} = ss ;
pronNP : (s : Str) -> NP = \s -> {
s = word s
} ;
mkPreposition : Str -> Str -> Preposition = \s,b -> {
prepMain = word s ;
prepPre = word b
} ;
mkSubj : Str -> Str -> {prePart : Str ; sufPart : Str} = \p,s -> {
prePart = word p ;
sufPart = word s
} ;
Preposition = {prepMain : Str ; prepPre : Str} ;
-- added by AR
mkNP : Str -> NP = ss ;
appPrep : Preposition -> Str -> Str = \prep,s ->
prep.prepPre ++ s ++ prep.prepMain ;
}

View File

@@ -0,0 +1,45 @@
concrete SentenceCmn of Sentence = CatCmn **
open Prelude, ResCmn in {
flags optimize=all_subs ;
lin
PredVP np vp = mkClause np.s vp ;
PredSCVP sc vp = mkClause sc.s vp ;
ImpVP vp = {
s = table {
Pos => infVP vp ;
Neg => neg_s ++ infVP vp
}
} ;
SlashVP np vp =
mkClauseCompl np.s (\\p,a => vp.prePart ++ useVerb vp.verb ! p ! a) vp.compl
** {c2 = vp.c2} ;
SlashVS np vs sslash = <mkClause np.s vs sslash.s : Clause> ** {c2 = sslash.c2} ;
-- yet another reason for discontinuity of clauses
AdvSlash slash adv =
mkClause slash.np (<\\p,a => adv.s ++ slash.vp ! p ! a : Polarity => Aspect => Str>) []
** {c2 = slash.c2} ;
SlashPrep cl prep = cl ** {c2 = prep} ;
EmbedS s = ss (conjThat ++ s.s) ;
EmbedQS qs = qs ;
EmbedVP vp = ss (infVP vp) ;
UseCl t p cl = {s = cl.s ! p.p ! t.t} ;
UseQCl t p cl = {s = cl.s ! p.p ! t.t} ;
UseRCl t p cl = {s = cl.s ! p.p ! t.t} ;
UseSlash t p cl = {s = cl.s ! p.p ! t.t ; c2 = cl.c2} ;
AdvS a s = ss (a.s ++ s.s) ;
RelS s r = ss (s.s ++ r.s) ;
}

View File

@@ -0,0 +1,164 @@
concrete StructuralCmn of Structural = CatCmn **
open ParadigmsCmn, ResCmn, Prelude in {
flags coding = utf8 ;
lin
every_Det = mkDet "mei3" Sg ;
this_Quant = mkDet "zhe4" ;
that_Quant = mkDet "na3" ;
i_Pron = pronNP "wo3" ;
youSg_Pron = pronNP "ni3" ;
he_Pron = pronNP "ta1" ;
she_Pron = pronNP "ta1" ;
we_Pron = pronNP "wo3men" ;
youPl_Pron = pronNP "ni3men" ;
they_Pron = pronNP "ta1men" ;
very_AdA = ssword "fei1chang2" ;
by8means_Prep = mkPrep "pang2bian1" [] ;
in_Prep = mkPrep "li3" [];
possess_Prep = mkPrep "de" [];
with_Prep = mkPrep "yi1qi3" "he2";
and_Conj = {s = table {
CPhr CNPhrase => mkConjForm "he2" ;
CPhr CAPhrase => mkConjForm "er2" ;
CPhr CVPhrase => mkConjForm "you4" ;
CSent => mkConjForm []
}
} ;
or_Conj = {s = table {
CPhr _ => mkConjForm "huo4" ;
CSent => mkConjForm "hai2shi4"
}
} ;
although_Subj = mkSubj "sui1ran2" "dan4";
because_Subj = mkSubj "yin1wei2" "suo3yi3" ;
when_Subj = mkSubj [] "deshi2hou4" ;
here_Adv = mkAdv "zhe4li3" ;
there_Adv = mkAdv "na3li3" ;
whoSg_IP, whoPl_IP = mkIPL "shei2" ;
whatSg_IP, whatPl_IP = mkIPL " shen2ma" ;
where_IAdv = mkIAdvL "na3li3" ;
when_IAdv = mkIAdvL "shen2mashi2hou4" ;
how_IAdv = mkIAdvL "ru2he2" ;
all_Predet = ssword "suo3you3" ;
many_Det = mkDet "duo1" Pl ;
someSg_Det = mkDet (word "yi1xie1") Sg ;
somePl_Det = mkDet (word "yi1xie1") Sg ;
few_Det = mkDet "shao3" Pl ;
other_A = mkA "qi2ta1" ;
oper
mkIPL, mkIAdvL, mkAdA, mkIDetL, mkPConjL, mkCAdv, mkIQuant = ssword ;
-- hsk
lin
above_Prep = mkPrep "shang4bian1" ;
after_Prep = mkPrep "yi3hou4" ;
under_Prep = mkPrep "xia4" ;
why_IAdv = mkIAdvL "wei2shen2ma" ;
too_AdA = mkAdA "tai4" ;
before_Prep = mkPrep "cong1qian2" ; --s
between_Prep = mkPrep "zhi1jian1" ; --s
but_PConj = mkPConjL "dan4shi4" ; --s
can_VV = mkVerb "neng2" [] [] [] [] "bu4" ;
must_VV = mkVerb "bi4xu1" [] [] [] [] "bu4" ; ---- False "bu4neng2"
want_VV = mkVerb "xiang3" [] [] [] [] "bu4" ;
can8know_VV = mkV "hui4" [] [] [] [] "bu4" ; ----
except_Prep = mkPrep "chu2le" "yi3wai4" ; --s
for_Prep = mkPrep "wei2le" ; --s
from_Prep = mkPrep "cong1" ; --s
---how8many_IDet = mkIDet "ji1" ; --s
---how8much_IDet = mkIDet "duo1shao3" ; --s
in8front_Prep = mkPrep "qian2bian1" ; --s
it_Pron = pronNP "ta1" ; --s
---less_CAdv = mkCAdv "shao3" ; --s
much_Det = mkDet "duo1" Sg ; --s
---more_CAdv = mkCAdv "geng1" ; --s
---most_Predet = mkPredet "zui4" ; --s
no_Quant = mkDet "bu4" ; --s
not_Predet = ssword "bu4" ;
---only_Predet = mkPredet "qi2" ; --s
otherwise_PConj = mkPConjL "hai2shi4" ; --s
to_Prep = mkPrep "wang3" ; --s
---which_IQuant = mkIQuant "na3" ; --s
have_V2 = mkV2 "you3" ;
yes_Utt = ss copula_s ;
no_Utt = ss neg_s ;
oper
mkConjForm : Str -> {s1,s2 : Str} = \s -> {s1 = [] ; s2 = word s} ;
-- manually by AR
lin
always_AdV = ssword "yi1zhi2" ;
part_Prep = mkPrep possessive_s ;
language_title_Utt = ssword "zhong1wen2" ;
please_Voc = ss "qing3" ;
quite_Adv = mkAdA "de2hen3" ;
-- just missing
lin
almost_AdA = ssword "ji1hu1" ;
almost_AdN = ssword "ji1hu1" ;
--as_CAdv = ssword "shen2ma" ; -- as good as X
at_least_AdN = ssword "zui4shao3" ; -- at least five
at_most_AdN = ssword "zui4duo1" ;
behind_Prep = mkPrep "hou4mian4" "zai4";
--both7and_DConj = ssword "shen2ma" ; -- both - and
by8agent_Prep = mkPrep "bei4" ; -- by for agent in passive
-- [mark] 被
during_Prep = mkPrep "qi1jian1" "zai4" ; -- [mark] often equivalent to nothing
-- translation for "he swam during this summer. " and "he swam this summer." are often the same
--either7or_DConj = ssword "shen2ma" ;
everybody_NP = ssword "mei3ge4ren2" ; -- [mark] "mei3ge4ren2": 每(every)+个(classifier)+人(person)
everything_NP = ssword "mei3jian4shi4" ; -- [mark] "mei3jian4shi4": 每(every)+件(classifier)+事(thing)
everywhere_Adv = mkAdv "dao4chu3" ;
here7from_Adv = mkAdv "cong1zhe4li3" ; -- from here
here7to_Adv = mkAdv "dao4zhe4li3" ; -- to here
-- [mark] "cong1zhe4li3" 从(from) 这里(here)
-- "dao4zhe4li3" 到( to ) 这里(here)
how8many_IDet = ssword "duo1shao3" ;
how8much_IAdv = ssword "duo1shao3" ;
if_Subj = mkSubj "ru2guo3" "jiu4" ; -- [mark] "jiu4" often comes between NP and VP
--less_CAdv = ssword "shen2ma" ; -- less good than
--more_CAdv = ssword "shen2ma" ;
most_Predet = ssword "da4duo1shu3" ;
nobody_NP = ssword "mei2ren2" ;
nothing_NP = ssword "mei2you3shen2ma" ;
on_Prep = mkPrep "shang4" "zai4" ;
only_Predet = ssword "qi2you3" ; -- only John
so_AdA = ssword "ru2ci3" ;
somebody_NP = ssword "mou3ren2" ;
something_NP = ssword "mou3shi4" ; -- [mark] in sent, it depends on the context
somewhere_Adv = mkAdv "mou3chu3" ;
that_Subj = mkSubj [] ", " ; -- that + S [mark] comma
there7from_Adv = mkAdv "cong1na3li3" ; -- from there
there7to_Adv = mkAdv "dao4na3li3" ;
therefore_PConj = ssword "yin1ci3" ;
through_Prep = mkPrep "tong1guo4" ;
which_IQuant = ssword [] ; -- [mark] in sent, it depends on the context
without_Prep = mkPrep "mei2you3" [];
youPol_Pron = ssword "nin2" ; -- polite you
}

View File

@@ -0,0 +1,36 @@
--# -path=.:../abstract:../common
concrete SymbolCmn of Symbol = CatCmn ** open Prelude, ResCmn in {
flags coding = utf8;
lin
SymbPN i = i ;
IntPN i = i ;
FloatPN i = i ;
NumPN i = i ;
CNIntNP cn i = {
s = cn.s ++ i.s ;
c = cn.c
} ;
CNSymbNP det cn xs = ss (det.s ++ cn.s ++ xs.s) ; ----
CNNumNP cn i = {
s = cn.s ++ i.s ;
c = cn.c
} ;
SymbS sy = sy ;
SymbNum sy = sy ;
SymbOrd sy = sy ;
lincat
Symb, [Symb] = SS ;
lin
MkSymb s = s ;
BaseSymb = infixSS "" ;
ConsSymb = infixSS "" ;
}

View File

@@ -0,0 +1,13 @@
concrete TenseCmn of Tense =
CatCmn [Tense,Temp], TenseX [Ant,Pol,AAnter,ASimul,PNeg,PPos] ** open ResCmn in {
lin
TTAnt t a = {s = t.s ++ a.s ; t = t.t} ;
---- ??
TPres = {s = [] ; t = APlain} ;
TPast = {s = [] ; t = APerf} ;
TFut = {s = [] ; t = ADurProg} ;
TCond = {s = [] ; t = ADurStat} ;
}

View File

@@ -0,0 +1,11 @@
concrete TextCmn of Text = CommonX - [Temp,Tense,Adv] ** open ResCmn in {
-- No punctuation - but make sure to leave spaces between sentences!
lin
TEmpty = {s = []} ;
TFullStop x xs = {s = x.s ++ fullstop_s ++ xs.s} ;
TQuestMark x xs = {s = x.s ++ questmark_s ++ xs.s} ;
TExclMark x xs = {s = x.s ++ exclmark_s ++ xs.s} ;
}

View File

@@ -0,0 +1,60 @@
concrete VerbCmn of Verb = CatCmn ** open ResCmn, Prelude in {
flags optimize=all_subs ;
lin
UseV = predV ;
SlashV2a v = predV v ** {c2 = v.c2} ;
Slash2V3 v np = insertObj np (predV v) ** {c2 = v.c3} ; ---- to check arg order
Slash3V3 v np = insertObj np (predV v) ** {c2 = v.c2} ;
SlashV2A v ap = insertObj ap (predV v) ** {c2 = v.c2} ;
SlashV2V v vp = insertObj (mkNP (infVP vp)) (predV v) ** {c2 = v.c2} ;
SlashV2S v s = insertObj s (predV v) ** {c2 = v.c2} ;
SlashV2Q v q = insertObj q (predV v) ** {c2 = v.c2} ;
ComplVV v vp = {
verb = v ;
compl = vp.verb.s ++ vp.compl ;
prePart = vp.prePart
} ;
ComplVS v s = insertObj s (predV v) ;
ComplVQ v q = insertObj q (predV v) ;
ComplVA v ap = insertObj ap (predV v) ;
ComplSlash vp np = insertObj (mkNP (appPrep vp.c2 np.s)) vp ;
UseComp comp = comp ;
SlashVV v vp = ---- too simple?
insertObj (mkNP (infVP vp)) (predV v) ** {c2 = vp.c2} ;
SlashV2VNP v np vp =
insertObj np
(insertObj (mkNP (infVP vp)) (predV v)) ** {c2 = vp.c2} ;
AdvVP vp adv = case adv.advType of {
ATManner => insertObj (ss (deVAdv_s ++ adv.s)) vp ; -- he sleeps well
_ => insertAdv (ss (zai_V.s ++ adv.s)) vp -- he sleeps in the house / today
} ;
AdVVP adv vp = insertAdv adv vp ;
ReflVP vp = insertObj (mkNP reflPron) vp ;
PassV2 v = insertObj (mkNP passive_s) (predV v) ; ----
CompAP ap = insertObj (mkNP ap.s) (predV copula) ; ---- hen / bu
CompNP np = insertObj np (predV copula) ; ----
CompCN cn = insertObj cn (predV copula) ; ----
CompAdv adv = insertObj adv (predV zai_V) ;
}