diff --git a/lib/doc/Compare.hs b/lib/doc/Compare.hs index 1965fe96b..3f4f13777 100644 --- a/lib/doc/Compare.hs +++ b/lib/doc/Compare.hs @@ -1,5 +1,5 @@ lang1 = "Eng" -lang2 = "Tha" +lang2 = "Chi" -- to write a comparison for two languages diff --git a/lib/src/chinese/README b/lib/src/chinese/README index 0369c9296..a480c8f2c 100644 --- a/lib/src/chinese/README +++ b/lib/src/chinese/README @@ -230,3 +230,64 @@ Lexicon and Structural checked and completed by Jolene. Some open issues found b GF/lib/src/chinese/ complete and compilable! Added to darcs by AR. +Issues from + + Yip-Po Ching and Don Rimmington, + Basic Chinese. A Grammar and Workbook, + Routledge, + London and New York, + 2009. + +p. 4 the dun-comma in lists + +p. 28 "who is X" vs. "X is who" + +p. 38 er -> liang before a measure word + +p. 41 ordinals require measure words: di yi ge xuesheng + +p. 44 how many - duoshao vs. yi + +p. 63 possessive precedes indefinite plural: wo de hen duopengyou "many of my friends" + +p. 94 degree + adjective + de + noun: hen da de wuzi + +p. 95 no copula in adjectival predication + +p. 96 adjectives negated by bu + +p. 97 AB -> AABB reduplication + +p. 104 non-gradable adjectives require shi: zhei tiao yu shi huo de + +p. 106 it was Chinese that I studied + +p. 116 I did it better than you + +p. 128 disyllabic place words + +p. 155-158 mapping tenses to aspects + +p. 168 "can": hui/neng + +p. 174 negation and tense + +p. 185 yes/no + +p. 186 alternation questions + +p. 197 "or" haishi/huozhe + +p. 206 "please" in imperative + +p. 207 "let's" zanmen he yi bei ba + +p. 242 coverbs + +p. 255 disyllabic prepositions + + + + + + diff --git a/lib/src/chinese/StructuralChi.gf b/lib/src/chinese/StructuralChi.gf index c346ef02f..844c9de39 100644 --- a/lib/src/chinese/StructuralChi.gf +++ b/lib/src/chinese/StructuralChi.gf @@ -48,7 +48,7 @@ whatSg_IP, whatPl_IP = mkIPL " 什么" ; where_IAdv = mkIAdvL "哪里" ; when_IAdv = mkIAdvL "什么时候" ; how_IAdv = mkIAdvL "如何" ; -all_Predet = ss "所有" ; +all_Predet = ssword "所有" ; many_Det = mkDet "多" Pl ; someSg_Det = mkDet (word "一些") Sg ; somePl_Det = mkDet (word "一些") Sg ; diff --git a/lib/src/chinese/pinyin/AdjectiveCmn.gf b/lib/src/chinese/pinyin/AdjectiveCmn.gf new file mode 100644 index 000000000..a765efe20 --- /dev/null +++ b/lib/src/chinese/pinyin/AdjectiveCmn.gf @@ -0,0 +1,26 @@ +concrete AdjectiveCmn of Adjective = CatCmn ** open ResCmn, Prelude in { + + lin + + PositA a = a ; + + --ComparA a np = complexAP (a.s ++ than_s ++ np.s) ; + ComparA a np = complexAP (than_s ++ np.s ++ a.s) ; + + UseComparA a = complexAP (geng_s ++ a.s) ; + + AdjOrd ord = complexAP ord.s ; + + CAdvAP ad ap np = complexAP (ap.s ++ ad.s ++ ad.p ++ np.s) ; + + ComplA2 a np = complexAP (a.s ++ appPrep a.c2 np.s) ; + + ReflA2 a = complexAP (a.s ++ appPrep a.c2 reflPron) ; + + SentAP ap sc = complexAP (ap.s ++ sc.s) ; + + AdAP ada ap = complexAP (ada.s ++ ap.s) ; + + UseA2 a = a ; + +} diff --git a/lib/src/chinese/pinyin/AdverbCmn.gf b/lib/src/chinese/pinyin/AdverbCmn.gf new file mode 100644 index 000000000..61cc78918 --- /dev/null +++ b/lib/src/chinese/pinyin/AdverbCmn.gf @@ -0,0 +1,19 @@ +concrete AdverbCmn of Adverb = CatCmn ** + open ResCmn, Prelude in { + + lin + PositAdvAdj a = {s = a.s ; advType = ATManner} ; + + PrepNP prep np = ss (appPrep prep np.s) ** {advType = ATPlace} ; --- should depend on prep, np ? or treat in ExtraCmn ? + + ComparAdvAdj cadv a np = ss (a.s ++ cadv.s ++ cadv.p ++ np.s) ** {advType = ATManner} ; + + ComparAdvAdjS cadv a s = ss (a.s ++ cadv.s ++ cadv.p ++ s.s) ** {advType = ATManner} ; + + AdAdv adv ad = ss (ad.s ++ adv.s) ** {advType = ad.advType} ; + + SubjS subj s = ss (subj.prePart ++ s.s ++ subj.sufPart) ** {advType = ATManner} ; + + AdnCAdv cadv = ss (cadv.s ++ conjThat) ** {advType = ATManner} ; ----- + +} diff --git a/lib/src/chinese/pinyin/AllCmn.gf b/lib/src/chinese/pinyin/AllCmn.gf new file mode 100644 index 000000000..060e86afa --- /dev/null +++ b/lib/src/chinese/pinyin/AllCmn.gf @@ -0,0 +1,3 @@ +--# -path=.:../abstract:../common:prelude + +concrete AllCmn of AllChiAbs = LangCmn, ExtraCmn ; diff --git a/lib/src/chinese/pinyin/CatCmn.gf b/lib/src/chinese/pinyin/CatCmn.gf new file mode 100644 index 000000000..6e7b3ee72 --- /dev/null +++ b/lib/src/chinese/pinyin/CatCmn.gf @@ -0,0 +1,84 @@ +concrete CatCmn of Cat = CommonX - [Tense, Temp, Adv] ** open ResCmn, Prelude in { + + lincat + +-- Tensed/Untensed + + S = {s : Str} ; + QS = {s : Str} ; + RS = {s : Str} ; + SSlash = {s : Str ; c2 : Preposition} ; + +-- Sentence + + Cl = Clause ; -- {s : Polarity => Aspect => Str ; np: Str ; vp: Polarity => Aspect => Str} ; + + ClSlash = Clause ** {c2 : Preposition} ; + + Imp = {s : Polarity => Str} ; + +-- Question + + QCl = {s : Polarity => Aspect => Str} ; + IP = {s : Str} ; + IComp = {s : Str} ; + IDet, IQuant = {s : Str} ; + +-- Relative + + RCl = {s : Polarity => Aspect => Str} ; + RP = {s : Str} ; + +-- Verb + + VP = ResCmn.VP ; + Comp = ResCmn.VP ; + VPSlash = ResCmn.VP ** {c2 : Preposition} ; + +-- Adjective + + AP = ResCmn.Adj ; + +-- Noun + + CN = ResCmn.Noun ; + NP, Pron = ResCmn.NP ; + Det, Quant = Determiner ; + Predet = {s : Str} ; ---- + Ord = {s : Str} ; + Num = {s : Str ; numType : NumType} ; + + Adv = {s : Str ; advType : AdvType} ; + +-- Numeral + + Numeral, Card, Digits = {s : Str} ; + +-- Structural + + Conj = {s : ConjForm => {s1,s2 : Str}} ; + Subj = {prePart : Str ; sufPart : Str} ; + Prep = Preposition ; + +-- Open lexical classes, e.g. Lexicon + + V, VS, VQ, VA = Verb ; + V2, V2Q, V2S = Verb ** {c2 : Preposition} ; + V3, V2A, V2V = Verb ** {c2, c3 : Preposition} ; + VV = Verb ; + + A = ResCmn.Adj ; + A2 = ResCmn.Adj ** {c2 : Preposition} ; + + N = ResCmn.Noun ; + N2 = ResCmn.Noun ** {c2 : Preposition} ; + N3 = ResCmn.Noun ** {c2,c3 : Preposition} ; + PN = ResCmn.NP ; + +-- overridden + + Temp = {s : Str ; t : Aspect} ; + Tense = {s : Str ; t : Aspect} ; + + +} diff --git a/lib/src/chinese/pinyin/ConjunctionCmn.gf b/lib/src/chinese/pinyin/ConjunctionCmn.gf new file mode 100644 index 000000000..8a598e20c --- /dev/null +++ b/lib/src/chinese/pinyin/ConjunctionCmn.gf @@ -0,0 +1,34 @@ +concrete ConjunctionCmn of Conjunction = CatCmn ** open ResCmn, Prelude, Coordination in { + + lin + + ConjS c = conjunctDistrSS (c.s ! CSent) ; + ConjAdv c as = conjunctDistrSS (c.s ! CSent) as ** {advType = ATPlace} ; ---- ?? + ConjNP c = conjunctDistrSS (c.s ! CPhr CNPhrase) ; + ConjAP c as = conjunctDistrSS (c.s ! CPhr CAPhrase) as ** {monoSyl = False} ; + ConjRS c = conjunctDistrSS (c.s ! CSent) ; + +-- These fun's are generated from the list cat's. + + BaseS = twoSS ; + ConsS = consrSS thcomma ; + BaseAdv = twoSS ; + ConsAdv = consrSS thcomma ; + BaseNP = twoSS ; + ConsNP = consrSS thcomma ; + BaseAP = twoSS ; + ConsAP = consrSS thcomma ; + BaseRS = twoSS ; + ConsRS = consrSS thcomma ; + + lincat + [S] = {s1,s2 : Str} ; + [Adv] = {s1,s2 : Str} ; + [NP] = {s1,s2 : Str} ; + [AP] = {s1,s2 : Str} ; + [RS] = {s1,s2 : Str} ; + + oper + thcomma : Str = [] ; ---- should be a space + +} diff --git a/lib/src/chinese/pinyin/ExtraCmn.gf b/lib/src/chinese/pinyin/ExtraCmn.gf new file mode 100644 index 000000000..6c6cfc1ba --- /dev/null +++ b/lib/src/chinese/pinyin/ExtraCmn.gf @@ -0,0 +1,7 @@ +concrete ExtraCmn of ExtraChiAbs = CatCmn ** + open ResCmn, Prelude in { + + lincat + Aspect = {s : Str ; a : ResCmn.Aspect} ; + +} diff --git a/lib/src/chinese/pinyin/GrammarCmn.gf b/lib/src/chinese/pinyin/GrammarCmn.gf new file mode 100644 index 000000000..544ab97cc --- /dev/null +++ b/lib/src/chinese/pinyin/GrammarCmn.gf @@ -0,0 +1,22 @@ +--# -path=.:../abstract:../common:prelude + +concrete GrammarCmn of Grammar = + NounCmn, + VerbCmn, + AdjectiveCmn, + AdverbCmn, + NumeralCmn, + SentenceCmn, + QuestionCmn, + RelativeCmn, + ConjunctionCmn, + PhraseCmn, + TextCmn, + StructuralCmn, + IdiomCmn, + TenseCmn + ** { + +flags startcat = Phr ; unlexer = text ; lexer = text ; + +} ; diff --git a/lib/src/chinese/pinyin/IdiomCmn.gf b/lib/src/chinese/pinyin/IdiomCmn.gf new file mode 100644 index 000000000..8ff954f66 --- /dev/null +++ b/lib/src/chinese/pinyin/IdiomCmn.gf @@ -0,0 +1,27 @@ +concrete IdiomCmn of Idiom = CatCmn ** open Prelude, ResCmn in { + + lin + ---- formal subject, e.g. it is hot ?? now empty subject + ImpersCl vp = mkClause [] vp ; + --can be empty, or ImpersCl vp = mkClause "天" vp ; but "天" only used to describe weather(e.g. it's raining) + + ---- one wants to learn Chinese ?? now empty subject + GenericCl vp = mkClause [] vp ; + -- GenericCl vp = mkClause "有人" vp ; (meaning: there is a person) + + ---- it is John who did it + CleftNP np rs = mkClause np.s copula rs.s ; + + CleftAdv ad s = mkClause ad.s (insertObj s (predV copula)) ; ---- it is here she slept + + ExistNP np = mkClause [] (regVerb you_s) np.s ; ---- infl of you + + ExistIP ip = {s = (mkClause [] (regVerb you_s) ip.s).s} ; ---- infl of you + + ProgrVP vp = vp ; ---- + + ImpPl1 vp = ss (infVP vp) ; ---- + +} + + diff --git a/lib/src/chinese/pinyin/LangCmn.gf b/lib/src/chinese/pinyin/LangCmn.gf new file mode 100644 index 000000000..992ad6c1c --- /dev/null +++ b/lib/src/chinese/pinyin/LangCmn.gf @@ -0,0 +1,11 @@ +--# -path=.:../abstract:../common:../prelude + + +concrete LangCmn of Lang = + GrammarCmn, + LexiconCmn + ** { + +flags startcat = Phr ; unlexer = concat ; lexer = text ; + +} ; diff --git a/lib/src/chinese/pinyin/LexiconCmn.gf b/lib/src/chinese/pinyin/LexiconCmn.gf new file mode 100644 index 000000000..3effed0cb --- /dev/null +++ b/lib/src/chinese/pinyin/LexiconCmn.gf @@ -0,0 +1,458 @@ +concrete LexiconCmn of Lexicon = CatCmn ** + open ParadigmsCmn, ResCmn, Prelude in { + +flags + coding = utf8 ; + +lin + +-- LexiconCmn + + man_N = mkN "nan2ren2" "ge4"; -- "nanren" "ge" first being noun, second is classifier(counter) + woman_N = mkN "nu:3ren2" "ge4"; -- "nvren" "ge" classifier behaves like the "cup" in "cup of tea" + house_N = mkN "fang2zi3" "jian1"; -- "fangzi" "jian" + tree_N = mkN "shu4" "ke1"; -- "shu" "ke" + big_A = mkA "da4" ; -- "da" + small_A = mkA "xiao3" ; -- "xiao" + green_A = mkA "lu:4" ; -- "lv" + walk_V = mkV "zou3" ; -- "zou" + sleep_V = mkV "shui4" ; -- "shui" +---- arrive_V = mkV "dao4" "le" [] [] "guo4"; -- "dao" + love_V2 = mkV2 "ai4" ; -- "ai" + watch_V2 = mkV2 "kan1" ; -- "kan" +--- please2_V2 = mkV "ma2fan2" ; -- "mafan" +--- believe_VS = mkV "xiang1xin4" ; -- "xiangxin" + know_VS = mkV "zhi1dao4" ; -- "zhidao" + wonder_VQ = mkV "hao3ji1" ; -- "haoqi" + john_PN = mkPN "yao1han4" ; -- "yuehan" +--- mary_PN = mkPN "ma3li2" ; -- "mali" + + +-- Swadesh + +--big_A = mkA "da4" ; +long_A = mkA "chang2" ; +wide_A = mkA "kuan1" ; +thick_A = mkA "hou4" ; +heavy_A = mkA "chong2" ; +--small_A = mkA "xiao3" ; +short_A = mkA "duan3" ; +narrow_A = mkA "zhai3" ; +thin_A = mkA "bo2" ; -- [mark] for person mkA "shou4" +--woman_N = mkN "nu:3ren2" ; +--man(adult_N = mkN "nan2ren2" ; +--man(human_N = mkN "ren2" ; +child_N = mkN "hai2zi3" ; +wife_N = mkN "qi1zi3" ; +husband_N = mkN "zhang4fu1" ; +--father_N = mkN "fu4qin1" ; +animal_N = mkN "dong4wu4" "qi2"; -- [mark] added classifier for nouns +fish_N = mkN "yu2" "tiao2"; +bird_N = mkN "niao3" "qi2"; +dog_N = mkN "gou3" "qi2"; +louse_N = mkN "shi1" "qi2"; +snake_N = mkN "she2" "tiao2"; +worm_N = mkN "chong2" "qi2"; +--tree_N = mkN "shu4" ; +forest_N = mkN "sen1lin2" "pian1"; +stick_N = mkN "shu4zhi1" ; +fruit_N = mkN "shui3guo3" ; +seed_N = mkN "zhong3zi3" "li4"; +leaf_N = mkN "ye4zi3" "pian1"; -- [mark] "ye4" -> "ye4zi3" , "ye4" is often treated as morpheme +root_N = mkN "shu4gen1" ; -- [mark] "gen1" --> "shu4gen1"(tree root) +bark_N = mkN "shu4pi2" "kuai1"; +flower_N = mkN "hua1" "duo3"; +grass_N = mkN "cao3" "ke1"; +rope_N = mkN "sheng2" "gen1"; +skin_N = mkN "pi2" "kuai1"; +meat_N = mkN "rou4" "kuai1"; +blood_N = mkN "xie3" "di1"; -- [mark] several classifiers, "di1"(drop), "tan1"(puddle) +bone_N = mkN "gu3tou2" "kuai1"; -- [mark] "gu3" -> "gu3tou2" , "gu3" is often treated as morpheme +fat_N = mkN "zhi1fang2" "dui1"; -- [mark] often without classifier +egg_N = mkN "dan4" "ke1"; +horn_N = mkN "jiao3" "gen1"; +tail_N = mkN "wei3ba1" "tiao2"; -- [mark] "wei3" -> "wei3ba1" , "wei3" is often treated as morpheme, or if stands alone, it is a classifier itself +feather_N = mkN "yu3mao2" "gen1"; +hair_N = mkN "tou2fa1" "gen1"; -- [mark] several classifiers , "gen1"(single hair), "ba3"(several hairs) +head_N = mkN "tou2" "ke1"; +ear_N = mkN "er3duo3" "qi2"; +eye_N = mkN "yan3jing1" "qi2"; +nose_N = mkN "bi2zi3" ; +mouth_N = mkN "zui3" "zhang1"; +tooth_N = mkN "ya2chi3" "ke1"; +tongue_N = mkN "she2" "gen1"; +fingernail_N = mkN "zhi3jia3" "pian1"; +foot_N = mkN "jiao3" "qi2"; +leg_N = mkN "tui3" "tiao2"; +knee_N = mkN "xi1gai4" ; -- [mark] "xi1" -> "xi1gai4" +hand_N = mkN "shou3" "qi2"; +wing_N = mkN "chi4bang3" "qi2"; -- [mark] "yi4" -> "chi4bang3", "chi4bang3" is the common form for wing. +belly_N = mkN "du3zi3" ; +guts_N = mkN "chang2zi3" "gen1"; +neck_N = mkN "bo2zi3" ; +back_N = mkN "bei1" ; +breast_N = mkN "xiong1" ; +heart_N = mkN "xin1zang1" "ke1"; +liver_N = mkN "gan1" ; +drink_V2 = mkV2 "he1" ; +eat_V2 = mkV2 "chi1" ; +bite_V2 = mkV2 "yao3" ; +suck_V2 = mkV2 "xi1" ; +spit_V = mkV "tu3" ; +vomit_V = mkV "ou3" ; +blow_V = mkV "chui1" ; +breathe_V = mkV "hu1xi1" ; +laugh_V = mkV "xiao4" ; +see_V2 = mkV2 "kan1" ; +hear_V2 = mkV2 "ting1" ; +--know_V = mkV "zhi1dao4" ; +think_V = mkV "xiang3" ; +smell_V = mkV "wen2" ; -- [mark] "xiu4" -> "wen2", "wen2" is the common form for smell. +fear_V2 = mkV2 "pa4" ; +--sleep_V = mkV "shui4" ; +live_V = mkV "huo2" ; +die_V = mkV "si3" ; +kill_V2 = mkV2 "sha1" ; +fight_V2 = mkV2 "da2jia4" ; -- [mark] "chao3jia4" -> "da2jia4", "chao3jia4" = quarrel, argue +hunt_V2 = mkV2 "da2lie4" ; -- [mark] "da2lie4" is iv, can't think of proper translation in v2 form for hunt +hit_V2 = mkV2 "da2" ; +cut_V2 = mkV2 "ge1" ; +split_V2 = mkV2 "pi1kai1" ; +stab_V2 = mkV2 "ci4" ; +scratch_V2 = mkV2 "sao1" ; +dig_V = mkV "wa1" ; +swim_V = mkV "you2yong3" ; +fly_V = mkV "fei1" ; +--walk_V = mkV "zou3" ; +come_V = mkV "lai2" ; +lie_V = mkV "tang3" ; +sit_V = mkV "zuo4" ; +stand_V = mkV "zhan4" ; +turn_V = mkV "zhuan3" ; +fall_V = mkV "la4xia4" ; +hold_V2 = mkV2 "wo4" ; +squeeze_V2 = mkV2 "ji3" ; +rub_V2 = mkV2 "rou2" ; +wash_V2 = mkV2 "xi3" ; +wipe_V2 = mkV2 "ca1" ; +pull_V2 = mkV2 "la1" ; +push_V2 = mkV2 "tui1" ; +throw_V2 = mkV2 "reng1" ; +tie_V2 = mkV2 "bang3" ; +sew_V = mkV "feng2" ; +count_V2 = mkV2 "shu3" ; +say_VS = mkVS (mkV "shui4") ; +sing_V = mkV "chang4" ; +play_V = mkV "wan2" ; +float_V = mkV "fu2" ; +flow_V = mkV "liu2" ; +freeze_V = mkV "jie1bing1" ; +swell_V = mkV "peng2zhang4" ; +sun_N = mkN "tai4yang2" ; +moon_N = mkN "yue4liang4" ; +star_N = mkN "xing1xing1" "ke1"; +water_N = mkN "shui3" "di1"; +rain_N = mkN "yu3" "chang3"; +river_N = mkN "he2" "tiao2"; +lake_N = mkN "hu2" ; +sea_N = mkN "hai3" "pian1"; +salt_N = mkN "yan2" "ping2"; +stone_N = mkN "dan4tou2" "kuai1"; +sand_N = mkN "sha1" "li4"; +dust_N = mkN "chen2tu3" []; +earth_N = mkN "deqiu2" ; +cloud_N = mkN "yun2" "duo3"; +fog_N = mkN "wu4" "chang3"; +sky_N = mkN "tian1kong1" "pian1"; +wind_N = mkN "feng1" "zhen4"; +snow_N = mkN "xue3" "chang3"; +ice_N = mkN "bing1" "kuai1"; +smoke_N = mkN "yan1" "zhen4"; +fire_N = mkN "huo3" "chang3"; +ashes_N = mkN "hui1" []; +burn_V = mkV "shao1" ; +road_N = mkN "lu4" "tiao2"; +mountain_N = mkN "shan1" "zuo4"; +red_A = mkA "hong2" ; +--green_A = mkA "lu:4" ; +yellow_A = mkA "huang2" ; +white_A = mkA "bai2" ; +black_A = mkA "hei1" ; +night_N = mkN "ye4wan3" ; -- [mark] "ye4wan3" 's classifier is "ge4" +day_N = mkN "bai2tian1" []; -- [mark] "bai2tian1" -> "tian1", "tian1" itself is classifier +year_N = mkN "nian2" [] ; -- [mark] "nian2" itself is classifier +warm_A = mkA "wen1nuan3" ; +cold_A = mkA "leng3" ; +full_A = mkA "man3" ; +new_A = mkA "xin1" ; +old_A = mkA "lao3" ; -- [mark] "lao3" for person, "jiu4" for things +good_A = mkA "hao3" ; +bad_A = mkA "huai4" ; +rotten_A = mkA "lan4" ; +dirty_A = mkA "zang1" ; +straight_A = mkA "zhi2" ; +round_A = mkA "yuan2" ; +sharp_A = mkA "jian1" ; +dull_A = mkA "dun4" ; +smooth_A = mkA "guang1gu3" ; +wet_A = mkA "shi1" ; +dry_A = mkA "gan1" ; +correct_A = mkA "dui4" ; +near_A = mkA "jin4" ; +far_A = mkA "yuan3" ; +left_Ord = ss "zuo3" ; +right_Ord = ss "you4" ; +name_N = mkN "ming2zi4" ; -- [mark] "ming2" --> "ming2zi4" + +-- HSK + +add_V3 = mkV3 "jia1" ; +airplane_N = mkN "fei1ji1" "jia4"; +already_Adv = mkAdv "yi3jing1" ; +answer_V2S = mkV2S (mkV "hui2da1") ; +apple_N = mkN "pin2guo3" ; +art_N = mkN "yi4shu4" []; -- [mark] usually without classifier +ask_V2Q = mkV2Q (mkV "wen4") ; +bank_N = mkN "yin2hang2" "jian1"; +beautiful_A = mkA "piao1liang4" ; +become_VA = mkV "bian4" ; +beer_N = mkN "pi2jiu3" "bei1"; +bike_N = mkN "zi4hang2che1" "tai2"; +blue_A = mkA "la" ; +boat_N = mkN "chuan2" "sao1"; +book_N = mkN "shu1" "ben3"; +bread_N = mkN "mian4bao1" ; +buy_V2 = mkV2 "mai3" ; +cap_N = mkN "mao4zi3" "ding3"; +car_N = mkN "qi4che1" "tai2"; +chair_N = mkN "yi3zi3" "ba3"; +city_N = mkN "cheng2shi4" ; -- [mark] "shi4" --> "cheng2shi4" +clean_A = mkA "gan1jing4" ; +coat_N = mkN "yi1fu2" "jian4"; +country_N = mkN "guo2jia1" ; -- [mark] "guo2" --> "guo2jia1" +cow_N = mkN "niu2" "tou2"; +do_V2 = mkV2 "zuo4" ; +doctor_N = mkN "yi1sheng1" "ming2"; +door_N = mkN "men2" "shan1"; +---easy_A2V = mkA "rong2yi4" ; +factory_N = mkN "gong1chang3" "jian1"; +far_Adv = mkAdv "yuan3" ; +---father_N2 = mkN2 "fu4qin1" ; +fear_VS = mkVS (mkV "pa4") ; +find_V2 = mkV2 "fa1xian4" ; +forget_V2 = mkV2 "wang4" ; +friend_N = mkN "peng2you3" ; +girl_N = mkN "gu1niang2" ; +give_V3 = mkV3 "gei3" ; +go_V = mkV "qu4" ; +--go_N = mkN "wang3" ; +grammar_N = mkN "yu3fa3" ; +hat_N = mkN "mao4zi3" "ding3"; +--take_N = mkN "dai4" ; +--have_N = mkN "you3" ; +--take_N = mkN "dai4" ; +--have_N = mkN "you3" ; +hill_N = mkN "shan1" "zuo4"; +hope_VS = mkV "xi1wang4" ; +horse_N = mkN "ma3" "pi1"; +hot_A = mkA "re4" ; +--how many_N = mkN "duo1shao3" ; +important_A = mkA "chong2yao1" ; +--heavy_A = mkA "chong2" ; +industry_N = mkN "gong1ye4" []; +jump_V = mkV "tiao4" ; +know_V2 = mkV2 "zhi1dao4" ; +know_VQ = mkV "zhi1dao4" ; +lamp_N = mkN "deng1" "zhan3"; +language_N = mkN "yu3yan2" "zhong3"; +learn_V2 = mkV2 "xue2" ; +--leave_N = mkN "li2kai1" ; +leave_V2 = mkV2 "li2kai1" ; --[mark] "li2" --> "li2kai1", "li2" itself is either a morpheme, or a marker indicating distance +--walk_N = mkN "zou3" ; +like_V2 = mkV2 "xi3huan1" ; +--be willing_N = mkN "yuan4yi4" ; +listen_V2 = mkV2 "ting1" ; +lose_V2 = mkV2 "diu1" ; +--love_N = mkN "xi3huan1" ; +love_V2 = mkV2 "ai4" ; +---mother_N2 = mkN2 "ma1" ; +music_N = mkN "yin1le4" [] ; -- [mark] usually without classifier +newspaper_N = mkN "bao4zhi3" "zhang1"; --[mark] "bao4" --> "bao4zhi3" +now_Adv = mkAdv "xian4zai4" ; +number_N = mkN "hao2ma3" ; -- [mark] "hao2" --> "hao2ma3" +open_V2 = mkV2 "kai1" ; +paint_V2A = mkV2A (mkV "hua4") ; +paper_N = mkN "zhi3" "zhang1"; +--place_N = mkN "defang1" ; +--part_N = mkN "bu4fen1" ; +pen_N = mkN "bi3" "qi2"; +--pen_N = mkN "gang1bi3" "qi2"; +person_N = mkN "ren2" ; +--beat_N = mkN "da2" ; +--pull_N = mkN "la1" ; +play_V2 = mkV2 "wan2" ; --[mark] "wan2er2" --> "wan2" +--perform_N = mkN "biao3yan3" ; +--have_N = mkN "you3" ; +put_V2 = mkV2 "bai3" ; +--let go_N = mkN "fang4" ; +question_N = mkN "wen4ti2" ; + +--be enough_N = mkN "gou4" ; +--very_N = mkN "hen3" ; +rain_V0 = mkV "xia4yu3"; +read_V2 = mkV2 "dou4" ; +reason_N = mkN "dao4li3" ; +restaurant_N = mkN "fan4dian4" "jian1"; +--appropriate_N = mkN "ge3shi4" ; +--correct_N = mkN "zheng1que4" ; +--run_N = mkN "pao3bu4" ; +run_V = mkV "pao3" ; +--road_N = mkN "dao4" ; +school_N = mkN "xue2jiao4" "suo3"; +science_N = mkN "ke1xue2" []; -- [mark] usually without classifier +sell_V3 = mkV3 "mai4" ; +--go_N = mkN "qu4" ; +send_V3 = mkV3 "ji4" ; +--clap_N = mkN "pai1" ; +sheep_N = mkN "yang2" "qi2"; +ship_N = mkN "chuan2" "sao1"; +shoe_N = mkN "xie2" "qi2"; +shop_N = mkN "shang1dian4" "jian1"; +--of that kind_N = mkN "na3yang4" ; +--what_N = mkN "shen2ma" ; +song_N = mkN "ge1" "shou3"; +--tell_N = mkN "jiang3" ; +speak_V2 = mkV2 "shui4" ; +--road_N = mkN "dao4" ; +--live_N = mkN "zhu4" ; +--act as_N = mkN "dang1" ; +student_N = mkN "xue2sheng1" "ming2" ; +table_N = mkN "zhuo1zi3" "zhang1"; +--word_N = mkN "hua4" ; +talk_V3 = mkV3 "shui4" ; +--talk_N = mkN "tan2" ; +--education_N = mkN "jiao1yu4" ; +teach_V2 = mkV2 "jiao1" ; +--coach_N = mkN "fu3dao3" ; +--teacher_N = mkN "xian1sheng1" ; +--master worker_N = mkN "shi1fu4" ; +teacher_N = mkN "lao3shi1" "ming2"; +television_N = mkN "dian4shi4" "tai2"; +--that_N = mkN "na3ge4" ; +--that_N = mkN "na3" ; +--from_N = mkN "cong1" ; +--by way of_N = mkN "tong1guo4" ; +--towards_N = mkN "xiang4" ; +--and_N = mkN "he2" ; +today_Adv = mkAdv "jin1tian1" ; +--now_N = mkN "xian4zai4" ; +--also_N = mkN "ye3" ; +train_N = mkN "huo3che1" "liang4"; +travel_V = mkV "lu:3hang2" ; +--below_N = mkN "xia4bian1" ; +--understand_N = mkN "lejie3" ; +understand_V2 = mkV2 "dong3" ; +--recognize_N = mkN "ren4shi2" ; +--open up_N = mkN "tong1" ; +university_N = mkN "da4xue2" "suo3"; +wait_V2 = mkV2 "deng3" ; +--need_N = mkN "xu1yao1" ; +watch_V2 = mkV2 "kan1" ; +--time_N = mkN "shi2hou4" ; +--wait_N = mkN "deng3" ; +--what_N = mkN "shen2ma" ; +--how_N = mkN "zen3ma" ; +win_V2 = mkV2 "ying2" ; +--window_N = mkN "chuang1hu4" ; +window_N = mkN "chuang1" "shan1"; +wine_N = mkN "jiu3" "ping2"; +--leave_N = mkN "li2" ; +--do_N = mkN "zuo4" ; +write_V2 = mkV2 "xie3" ; +young_A = mkA "nian2qing1" ; + +-- from Google + +apartment_N = mkN "gong1yu4" "jian1" ; +baby_N = mkN "ying1er2" ; +boot_N = mkN "xue1zi3" "qi2" ; -- [mark] "ji1" --> "xue1zi3" +boss_N = mkN "lao3ban3" ; +boy_N = mkN "nan2hai2" ; +brother_N2 = mkN2 "di4di4" ; +butter_N = mkN "huang2you2" "he2"; +camera_N = mkN "she4xiang4tou2" ; +carpet_N = mkN "detan3" "zhang1"; +cat_N = mkN "mao1" "qi2" ; +ceiling_N = mkN "tian1hua1ban3" ; --[mark] "tian1hua1ban3shang4" --> "tian1hua1ban3" +cheese_N = mkN "nai3lao4" "kuai1"; +church_N = mkN "jiao1tang2" "suo3" ; +computer_N = mkN "ji4suan4ji1" "tai2" ; +cousin_N = mkN "biao3di4" ; +distance_N3 = mkN3 (mkN "deju4li2") emptyPrep emptyPrep ; ---- +enemy_N = mkN "di2ren2" ; +father_N2 = mkN2 "fu4qin1" ; +floor_N = mkN "deban3" ; -- [mark] floor "deban3"(have you cleaned the floor) "lou2ceng2"(which floor do you live?) +fridge_N = mkN "bing1xiang1" "tai2" ; +garden_N = mkN "hua1yuan2" "zuo4"; +glove_N = mkN "shou3tao4" "fu4"; +gold_N = mkN "jin1zi3" "kuai1"; -- [mark] also without classifier +harbour_N = mkN "hai3gang3" ; +iron_N = mkN "tie3" "kuai1"; -- [mark] also without classifier +king_N = mkN "guo2wang2" ; +leather_N = mkN "pi2ge2" "kuai1"; +love_N = mkN "ai4" [] ; -- [mark] "kuai1" --> [], often without classifier +milk_N = mkN "niu2nai3" "bei1" ; -- [mark] "nai3" --> "niu2nai3", which literaly means cow milk +mother_N2 = mkN2 "mu3qin1"; +oil_N = mkN "you2" "tong3" ; -- [mark] rewritten +peace_N = mkN "he2ping2" []; -- [mark] often without classifier +planet_N = mkN "xing1qiu2" ; +plastic_N = mkN "su4liao4" "kuai1"; -- [mark] "su4liao4zhi4cheng2" --> "su4liao4" often without classifier +policeman_N = mkN "jing3cha2" "ming2" ; +priest_N = mkN "mu4shi1" "wei4"; +queen_N = mkN "nu:3wang2" ; +radio_N = mkN "shou1yin1ji1" "tai2" ; +religion_N = mkN "zong1jiao1" []; -- [mark] also without classifier +rock_N = mkN "dan4tou2" "kuai1" ; +roof_N = mkN "wu1ding3" ; +rubber_N = mkN "xiang4jiao1" "kuai1"; +rule_N = mkN "gui1ze2" "xiang4"; -- [mark] "yuan2ze2" --> "gui1ze2" +shirt_N = mkN "chen4shan1" "jian4" ; +silver_N = mkN "yin2zi3" "kuai1"; -- [mark] "yin2" --> "yin2zi3" +sister_N = mkN "mei4mei4" ; +sock_N = mkN "wa4zi3" "qi2"; +steel_N = mkN "gang1" "kuai1"; +stove_N = mkN "lu2zi3" ; +village_N = mkN "cun1zhuang1" "zuo4"; +war_N = mkN "zhan4zheng1" "chang3" ; -- [mark] rewritten +wood_N = mkN "mu4tou2" "kuai1" ; -- [mark] "mu4" --> "mu4tou2" + + +-- just missing + +lin + alas_Interj = ssword "ai1" ; + beg_V2V = mkV2V (mkV "qi3qiu2") ; -- beg him to do something + break_V2 = mkV2 "da2po4" ; + broad_A = mkA "kuan1" ; + brown_A = mkA "zong1" ; + clever_A = mkA "cong1ming2" ; + close_V2 = mkV2 "guan1bi4" ; + easy_A2V = mkA2 "jian3chan2" ; + empty_A = mkA "kong1" ; + fun_AV = mkA "you3qu4" ; + hate_V2 = mkV2 "tao3yan4" ; + married_A2 = mkA2 "jie1hun1" ; + paris_PN = mkPN "ba1li2" ; + probable_AS = mkA "ke3neng2" ; + ready_A = mkA "zhun3bei4hao3" ; -- [mark] "zhun3bei4hao3": 准备(v) + 好(adj,complement) + seek_V2 = mkV2 "xun2qiu2" ; + stop_V = mkV "ting2zhi3" ; + stupid_A = mkA "ben4" ; + switch8off_V2 = mkV2 "guan1" ; + switch8on_V2 = mkV2 "kai1" ; + ugly_A = mkA "chou3" ; + uncertain_A = mkA "bu4que4ding4" ; -- [mark] "bu4que4ding4": 不("un-") + 确定("certain") + + + +} diff --git a/lib/src/chinese/pinyin/MkPinyin.hs b/lib/src/chinese/pinyin/MkPinyin.hs new file mode 100644 index 000000000..d7bb76242 --- /dev/null +++ b/lib/src/chinese/pinyin/MkPinyin.hs @@ -0,0 +1,15 @@ +import qualified Data.Map as Map +import Pinyin (c2pMap, useMapGF, mkList) + +main = do + s <- readFile pinyinFile + let m = c2pMap (mkList (words s)) + mapM_ (mkPinyin m) ["Lexicon", "Numeral", "Res", "Structural"] + return () + +pinyinFile = "../pinyin.txt" + +mkPinyin ma mo = do + s <- readFile (mo ++ "Chi.gf") + writeFile (mo ++ "Cmn.gf") (useMapGF ma s) + diff --git a/lib/src/chinese/pinyin/NounCmn.gf b/lib/src/chinese/pinyin/NounCmn.gf new file mode 100644 index 000000000..b0ebf3425 --- /dev/null +++ b/lib/src/chinese/pinyin/NounCmn.gf @@ -0,0 +1,87 @@ +concrete NounCmn of Noun = CatCmn ** open ResCmn, Prelude in { + + lin + DetCN det cn = case det.detType of { + DTFull Sg => {s = det.s ++ cn.c ++ cn.s} ; -- this house + DTFull Pl => {s = det.s ++ xie_s ++ cn.s} ; -- these houses + DTNum => {s = det.s ++ cn.c ++ cn.s} ; -- (these) five houses + DTPoss => {s = det.s ++ cn.s} -- our (five) houses + } ; + UsePN pn = pn ; + UsePron p = p ; + + DetNP det = det ; + + PredetNP pred np = mkNP (pred.s ++ possessive_s ++ np.s) ; + + PPartNP np v2 = mkNP ((predV v2).verb.s ++ possessive_s ++ np.s) ; ---- ?? + + AdvNP np adv = mkNP (adv.s ++ possessive_s ++ np.s) ; + + DetQuant quant num = { + s = quant.s ++ num.s ; + detType = case num.numType of { + NTFull => DTNum ; -- five + NTVoid n => case quant.detType of { + DTPoss => DTPoss ; -- our + _ => DTFull n -- these/this + } + } + } ; + + DetQuantOrd quant num ord = { + s = quant.s ++ num.s ++ ord.s ; + detType = case num.numType of { + NTFull => DTNum ; -- five + NTVoid n => case quant.detType of { + DTPoss => DTPoss ; -- our + _ => DTFull n -- these/this + } + } + } ; + + PossPron p = { + s = p.s ++ possessive_s ; + detType = DTPoss + } ; + + NumSg = {s = [] ; numType = NTVoid Sg} ; + NumPl = {s = [] ; numType = NTVoid Pl} ; + + NumCard n = n ** {numType = NTFull} ; + NumDigits d = d ** {numType = NTFull} ; + OrdDigits d = {s = ordinal_s ++ d.s} ; + + NumNumeral numeral = numeral ** {hasC = True} ; + OrdNumeral numeral = {s = ordinal_s ++ numeral.s} ; + + AdNum adn num = {s = adn.s ++ num.s ; hasC = True} ; + + OrdSuperl a = {s = superlative_s ++ a.s} ; + + DefArt = mkDet the_s ; + IndefArt = mkDet yi_s ; ---- in the plural ? + + MassNP cn = cn ; + + UseN n = n ; + UseN2 n = n ; + Use2N3 f = {s = f.s ; c = f.c ; c2 = f.c2} ; + Use3N3 f = {s = f.s ; c = f.c ; c2 = f.c3} ; + + ComplN2 f x = {s = appPrep f.c2 x.s ++ f.s ; c = f.c} ; + ComplN3 f x = {s = appPrep f.c2 x.s ++ f.s ; c = f.c ; c2 = f.c3} ; + + AdjCN ap cn = case ap.monoSyl of { + True => {s = ap.s ++ cn.s ; c = cn.c} ; + False => {s = ap.s ++ possessive_s ++ cn.s ; c = cn.c} + } ; + + RelCN cn rs = {s = rs.s ++ cn.s ; c = cn.c} ; + AdvCN cn ad = {s = ad.s ++ possessive_s ++ cn.s ; c = cn.c} ; + SentCN cn cs = {s = cs.s ++ cn.s ; c = cn.c} ; + ApposCN cn np = {s = np.s ++ cn.s ; c = cn.c} ; + + RelNP np rs = mkNP (rs.s ++ np.s) ; + +} diff --git a/lib/src/chinese/pinyin/NumeralCmn.gf b/lib/src/chinese/pinyin/NumeralCmn.gf new file mode 100644 index 000000000..d56453314 --- /dev/null +++ b/lib/src/chinese/pinyin/NumeralCmn.gf @@ -0,0 +1,143 @@ +concrete NumeralCmn of Numeral = CatCmn ** open ResCmn, Prelude in { + +flags coding = utf8 ; + + +param Qform = bai | bai0 | shiwan | shiwan0 ; +param Bform = shi | shi0 | wan | wan0 ; +param Zero = zero | nozero ; +oper ling : Zero * Zero => Str = + table { => "ling2" ; + => "ling2" ; + => []} ; +oper Wan : Zero => Str = + table {zero => "wan4" ; + nozero => []} ; + +oper mkD : Str -> Str -> Str = \x,_ -> word x ; -- hiding the "formal" version + +--lincat Numeral = {s : Str} ; +lincat Digit = {s : Str} ; +lincat Sub10 = {s : Str} ; +lincat Sub100 = {inh : Zero ; s : Bform => Str} ; +lincat Sub1000 = {inh : Zero ; s : Qform => Str} ; +lincat Sub1000000 = {s : Str} ; +lin num x0 = + {s = x0.s} ; + +-- 一二三四五六七八九十一百千 +-- + +lin n2 = + {s = mkD "er4" "er4"} ; +lin n3 = + {s = mkD "san1" "san1"} ; +lin n4 = + {s = mkD "si4" "si4"} ; +lin n5 = + {s = mkD "wu3" "wu3"} ; +lin n6 = + {s = mkD "liu4" "liu4"} ; +lin n7 = + {s = mkD "qi1" "qi1"} ; +lin n8 = + {s = mkD "ba1" "ba1"} ; +lin n9 = + {s = mkD "jiu3" "jiu3"} ; +lin pot01 = + {s = mkD "yi1" "yi1"} ; +lin pot0 d = + {s = d.s} ; +lin pot110 = + {inh = nozero ; + s = table { + shi => mkD "yi1shi2" "yi1shi2" ; + shi0 => mkD "yi1shi2" "yi1shi2" ; + wan => mkD "yi1wan4" "yi1wan4" ; + wan0 => mkD "yi1wan4" "yi1wan4"}} ; +lin pot111 = + {inh = nozero ; + s = table { + shi => mkD "shi2yi1" "shi2yi1" ; + shi0 => mkD "yi1shi2yi1" "yi1shi2yi1" ; + wan => mkD "shi2yi1wan4" "shi2yi1wan4" ; + wan0 => mkD "shi2yi1wan4" "shi2yi1wan4"}} ; +lin pot1to19 d = + {inh = nozero ; + s = table { + shi => mkD "yi1shi2" "yi1shi2" ++ d.s ; + shi0 => mkD "yi1shi2" "yi1shi2" ; + wan => mkD "yi1wan4" "yi1wan4" ++ d.s ++ mkD "qian1" "qian1" ; + wan0 => mkD "yi1wan4" "yi1wan4" ++ d.s ++ mkD "qian1" "qian1"}} ; +lin pot0as1 n = + {inh = zero ; + s = table { + shi => n.s ; + shi0 => n.s ; + wan => n.s ++ mkD "qian1" "qian1" ; + wan0 => n.s ++ mkD "qian1" "qian1"}} ; +lin pot1 d = + {inh = zero ; + s = table { + shi => d.s ++ mkD "shi2" "shi2" ; + shi0 => d.s ++ mkD "shi2" "shi2" ; + wan0 => d.s ++ "wan4" ; + wan => d.s ++ "wan4"}} ; +lin pot1plus d e = + {inh = nozero ; + s = table { + shi => d.s ++ mkD "shi2" "shi2" ++ e.s ; + shi0 => d.s ++ mkD "shi2" "shi2" ++ e.s ; + wan => d.s ++ "wan4" ++ e.s ++ mkD "qian1" "qian1" ; + wan0 => d.s ++ "wan4" ++ e.s ++ mkD "qian1" "qian1"}} ; +lin pot1as2 n = + {inh = zero ; + s = table { + bai => n.s ! shi ; + bai0 => n.s ! shi ; + shiwan => n.s ! wan ; + shiwan0 => n.s ! wan0}} ; +lin pot2 d = + {inh = zero ; + s = table { + bai => d.s ++ mkD "bai3" "bai3" ; + bai0 => d.s ++ mkD "bai3" "bai3" ; + shiwan0 => d.s ++ mkD "shi2wan4" "shi2wan4" ; + shiwan => d.s ++ mkD "shi2wan4" "shi2wan4"}} ; +lin pot2plus d e = + {inh = nozero ; + s = table { + bai => d.s ++ mkD "" "bai3" ++ (ling ! ) ++ e.s ! shi0 ; + bai0 => d.s ++ mkD "" "bai3" ++ (ling ! ) ++ e.s ! shi0 ; + shiwan => d.s ++ mkD "" "shi2" ++ (Wan ! (e.inh)) ++ e.s ! wan ; + shiwan0 => d.s ++ mkD "" "shi2" ++ (Wan ! (e.inh)) ++ e.s ! wan0}} ; +lin pot2as3 n = + {s = n.s ! bai} ; +lin pot3 n = + {s = n.s ! shiwan} ; +lin pot3plus n m = + {s = (n.s ! shiwan0) ++ (ling ! ) ++ m.s ! bai0} ; + + +-- numerals as sequences of digits + + lincat + Dig = SS ; + + lin + IDig d = d ; + + IIDig d i = ss (d.s ++ i.s) ; + + D_0 = ss "0" ; + D_1 = ss "1" ; + D_2 = ss "2" ; + D_3 = ss "3" ; + D_4 = ss "4" ; + D_5 = ss "5" ; + D_6 = ss "6" ; + D_7 = ss "7" ; + D_8 = ss "8" ; + D_9 = ss "9" ; + +} diff --git a/lib/src/chinese/pinyin/ParadigmsCmn.gf b/lib/src/chinese/pinyin/ParadigmsCmn.gf new file mode 100644 index 000000000..72933a8bc --- /dev/null +++ b/lib/src/chinese/pinyin/ParadigmsCmn.gf @@ -0,0 +1,118 @@ +resource ParadigmsCmn = open CatCmn, ResCmn, Prelude in { + +flags coding = utf8 ; + +flags coding=utf8; +oper + mkN = overload { + mkN : (man : Str) -> N + = \n -> lin N (regNoun n ge_s) ; + mkN : (man : Str) -> Str -> N + = \n,c -> lin N (regNoun n c) + } ; + + mkN2 = overload { + mkN2 : Str -> N2 + = \n -> lin N2 (regNoun n ge_s ** {c2 = emptyPrep}) ; ---- possessive ? +---- mkN2 : N -> Str -> N2 +---- = \n,p -> lin N2 (n ** {c2 = mkPrep p}) ; + } ; + + mkN3 : N -> Preposition -> Preposition -> N3 + = \n,p,q -> lin N3 (n ** {c2 = p ; c3 = q}) ; + + + mkPN : (john : Str) -> PN + = \s -> lin PN {s = word s} ; + + mkA = overload { + mkA : (small : Str) -> A + = \a -> lin A (simpleAdj a) ; + mkA : (small : Str) -> Bool -> A + = \a,b -> lin A (mkAdj a b) ; + } ; + + mkA2 : Str -> A2 = \a -> lin A2 (simpleAdj a ** {c2 = emptyPrep}) ; + + mkV = overload { + mkV : (walk : Str) -> V + = \walk -> lin V (regVerb walk) ; + mkV : (arrive : Str) -> Str -> Str -> Str -> Str -> V + = \arrive,pp,ds,dp,ep -> lin V (mkVerb arrive pp ds dp ep neg_s) ; + mkV : (arrive : Str) -> Str -> Str -> Str -> Str -> Str -> V + = \arrive,pp,ds,dp,ep,neg -> lin V (mkVerb arrive pp ds dp ep neg) ; + } ; + + + mkV2 : Str -> V2 + = \s -> lin V2 (regVerb s ** {c2 = emptyPrep}) ; + + mkV3 = overload { + mkV3 : Str -> V3 + = \s -> lin V3 (regVerb s ** {c2,c3 = emptyPrep}) ; + mkV3 : V -> V3 + = \s -> lin V3 (s ** {c2,c3 = emptyPrep}) ; +---- mkV3 : V -> Str -> Str -> V3 +---- = \v,p,q -> lin V3 (v ** {c2 = mkPrep p ; c3 = mkPrep q}) ; + } ; + + mkVV : Str -> VV = ---- + \v -> lin VV (regVerb v) ; + + mkVQ : V -> VQ = + \v -> lin VQ v ; + + mkVS : V -> VS = + \v -> lin VS v ; + + mkVA : V -> VA = + \v -> lin VA v ; + + mkV2Q : V -> V2Q = + \v -> lin V2Q (v ** {c2 = emptyPrep}) ; +---- mkV2Q : V -> Str -> V2Q = +---- \v,p -> lin V2Q (v ** {c2 = mkPrep p}) ; + + mkV2V : V -> V2V = + \v -> lin V2V (v ** {c2 = emptyPrep ; c3 = emptyPrep}) ; +---- mkV2V : V -> Str -> Str -> V2V = +---- \v,p,q -> lin V2V (v ** {c2 = mkPrep p ; c3 = mkPrep q}) ; + + mkV2S : V -> V2S = + \v -> lin V2S (v ** {c2 = emptyPrep}) ; +---- mkV2S : V -> Str -> V2S = +---- \v,p -> lin V2S (v ** {c2 = mkPrep p}) ; + + mkV2A : V -> V2A + = \v -> lin V2A (v ** {c2 = emptyPrep ; c3 = emptyPrep}) ; +---- mkV2A : V -> Str -> Str -> V2A +---- = \v,p,q -> lin V2A (v ** {c2 = mkPrep p ; c3 = mkPrep q}) ; + + mkAdv = overload { + mkAdv : Str -> Adv + = \s -> lin Adv {s = word s ; advType = ATPlace} ; + mkAdv : Str -> AdvType -> Adv + = \s,at -> lin Adv {s = word s ; advType = at} ; + } ; + + AdvType : Type + = ResCmn.AdvType ; + placeAdvType : AdvType + = ATPlace ; + timeAdvType : AdvType + = ATTime ; + mannerAdvType : AdvType + = ATManner ; + + mkPrep = overload { ---- is this the right order of the fields? + mkPrep : Str -> Preposition + = \s -> ResCmn.mkPreposition s [] ; + mkPrep : Str -> Str -> Preposition + = \s,t -> ResCmn.mkPreposition s t ; + } ; + + emptyPrep : Preposition = mkPrep [] ; + + +} + diff --git a/lib/src/chinese/pinyin/PhraseCmn.gf b/lib/src/chinese/pinyin/PhraseCmn.gf new file mode 100644 index 000000000..5401cfe74 --- /dev/null +++ b/lib/src/chinese/pinyin/PhraseCmn.gf @@ -0,0 +1,27 @@ +concrete PhraseCmn of Phrase = CatCmn ** open Prelude, ResCmn in { + + lin + PhrUtt pconj utt voc = {s = pconj.s ++ utt.s ++ voc.s} ; + + UttS s = s ; + UttQS qs = qs ; + UttImpSg pol imp = {s = pol.s ++ imp.s ! pol.p} ; + UttImpPl pol imp = {s = pol.s ++ imp.s ! pol.p} ; + UttImpPol pol imp = {s = pol.s ++ imp.s ! pol.p} ; --- add politeness here? + + UttIP ip = ip ; + UttIAdv iadv = iadv ; + UttNP np = np ; + UttCN cn = cn ; + UttAP ap = ap ; + UttCard x = x ; + UttVP vp = ss (infVP vp) ; + UttAdv adv = adv ; + + NoPConj = {s = []} ; + PConjConj conj = ss (conj.s ! CSent).s2 ; + + NoVoc = {s = []} ; + VocNP np = {s = np.s} ; ---- ?? + +} diff --git a/lib/src/chinese/pinyin/Pinyin.hs b/lib/src/chinese/pinyin/Pinyin.hs new file mode 100644 index 000000000..131517ed2 --- /dev/null +++ b/lib/src/chinese/pinyin/Pinyin.hs @@ -0,0 +1,64 @@ +module Pinyin where + +import Numeric +import qualified Data.Map as Map +import System + +-- AR 3/10/2012 +-- Chinese unicode - character - pinyin conversions +-- character data from http://www.linguanaut.com/chinese_alphabet2.htm + +main = do + xs <- getArgs + s <- readFile "pinyin.txt" + let ws = mkList (words s) + case xs of + "c2p":_ -> interact (useMap (c2pMap ws)) -- Chinese char to Pinyin (all results) + "p2c":_ -> interact (useMap (p2cMap ws)) -- Pinyin to Chinese char (all results) + "c2u":_ -> interact (useMap (c2uMap ws)) -- Chinese char to Unicode hex + "u2c":_ -> interact (useMap (u2cMap ws)) -- Unicode hex to Chinese char + "c2pGF":_ -> interact (useMapGF (c2pMap ws)) -- char to pinyin (first result) in string literals (e.g. in GF files) + "p2cTry":_ -> interact (tryUseMap (p2cMap ws)) -- pinyin to char, trying syllable with all tone marks + + _ -> mapM_ (putStrLn . printOne) ws + +mkList ws = case ws of + c:w:ws -> (head (map (flip Numeric.showHex "" . fromEnum) c), (c, chop w)) : mkList ws + _ -> [] + +printOne (u,(c,ws)) = u ++ "\t" ++ c ++ "\t" ++ unwords ws + +chop = words . map unslash + where + unslash '/' = ' ' + unslash c = c + +useMap :: Map.Map String String -> String -> String +useMap = useMapWith words unwords (const "NONE") + +tryUseMap :: Map.Map String String -> String -> String +tryUseMap m = unlines . map try . words where + try w = unwords [c ++ " (" ++ w2 ++ ")" | w2 <- alts w, Just c <- [Map.lookup w2 m]] + alts w = w : [w ++ show i | i <- [1 .. 4]] + +useMapWith :: (String -> [String]) -> ([String] -> String) -> (String -> String) -> Map.Map String String -> String -> String +useMapWith ws uws deft m = uws . map (\w -> maybe (deft w) id (Map.lookup w m)) . ws + +useMapGF m s = case s of + 'C':'h':'i':'n':cs -> "Chin" ++ useMapGF m cs -- don't change Chinese, China + 'C':'h':'i' :cs -> "Cmn" ++ useMapGF m cs -- to change language code Chi to Cmn + '"':cs -> '"':convert cs + c :cs -> c :useMapGF m cs + _ -> s + where + convert cs = case cs of + '"':s -> '"' : useMapGF m s + c :s -> maybe [c] (head . words) (Map.lookup [c] m) ++ convert s + _ -> cs + +c2pMap ws = Map.fromList [(c,unwords ps) | (_,(c,ps)) <- ws] +p2cMap ws = Map.fromListWith (++) [(p,c) | (_,(c,ps)) <- ws, p <- ps] -- store all chars with the same pinyin +c2uMap ws = Map.fromList [(c,u) | (u,(c,_)) <- ws] +u2cMap ws = Map.fromList [(u,c) | (u,(c,_)) <- ws] + + diff --git a/lib/src/chinese/pinyin/QuestionCmn.gf b/lib/src/chinese/pinyin/QuestionCmn.gf new file mode 100644 index 000000000..88f2b421d --- /dev/null +++ b/lib/src/chinese/pinyin/QuestionCmn.gf @@ -0,0 +1,40 @@ +concrete QuestionCmn of Question = CatCmn ** + open ResCmn, Prelude in { + + flags optimize=all_subs ; + + lin + + QuestCl cl = {s = \\p,a => cl.s ! p ! a ++ question_s} ; --- plus redup questions + + QuestVP ip vp = { + s = \\p,a => ip.s ++ vp.prePart ++ useVerb vp.verb ! p ! a ++ vp.compl + } ; + + QuestSlash ip cls = { + s =\\p,a => cls.c2.prepPre ++ cls.np ++ cls.c2.prepMain ++ cls.vp ! p ! a ++ + possessive_s ++ di_s ++ ip.s + } ; + + QuestIAdv iadv cl = {s = \\p,a => cl.np ++ iadv.s ++ cl.vp ! p ! a} ; + + QuestIComp icomp np = {s = \\p,a => np.s ++ icomp.s} ; ---- order + + PrepIP p ip = ss (appPrep p ip.s) ; + + AdvIP ip adv = ss (adv.s ++ possessive_s ++ ip.s) ; ---- adding de + + IdetCN det cn = {s = det.s ++ cn.c ++ cn.s} ; ---- number? + + IdetIP idet = idet ; + + IdetQuant iquant num = ss (iquant.s ++ num.s) ; ---- + + AdvIAdv i a = ss (a.s ++ i.s) ; + + CompIAdv a = ss (zai_s ++ a.s) ; + + CompIP ip = ss (copula_s ++ ip.s) ; + +} + diff --git a/lib/src/chinese/pinyin/RelativeCmn.gf b/lib/src/chinese/pinyin/RelativeCmn.gf new file mode 100644 index 000000000..221e91466 --- /dev/null +++ b/lib/src/chinese/pinyin/RelativeCmn.gf @@ -0,0 +1,12 @@ +concrete RelativeCmn of Relative = CatCmn ** open ResCmn, Prelude in { + + lin + RelCl cl = {s = \\p,a => cl.s ! p ! a ++ relative_s} ; ---- ?? + RelVP rp vp = { + s = \\p,a => vp.prePart ++ useVerb vp.verb ! p ! a ++ vp.compl ++ rp.s + } ; ---- ?? + RelSlash rp slash = {s = \\p,a => slash.s ! p ! a ++ appPrep slash.c2 rp.s} ; + FunRP p np rp = ss (appPrep p np.s ++ rp.s) ; ---- ?? + IdRP = ss relative_s ; + +} diff --git a/lib/src/chinese/pinyin/ResCmn.gf b/lib/src/chinese/pinyin/ResCmn.gf new file mode 100644 index 000000000..f448e582d --- /dev/null +++ b/lib/src/chinese/pinyin/ResCmn.gf @@ -0,0 +1,223 @@ +--# -path=.:../abstract:../common:../../prelude + +--1 Thai auxiliary operations. +-- +---- This module contains operations that are needed to make the +---- resource syntax work. To define everything that is needed to +---- implement $Test$, it moreover contains regular lexical +---- patterns needed for $Lex$. +-- +resource ResCmn = ParamX ** open Prelude in { + + flags coding = utf8 ; + + oper + +-- strings ---- + + defaultStr = "" ; + + than_s = "bi3" ; + progressive_s = defaultStr ; + possessive_s = "de" ; -- also used for AP + NP + deAdvV_s = "de" ; -- between Adv and V + deVAdv_s = "de2" ; -- between V and Adv + imperneg_s = neg_s ; + conjThat = emptyStr ; ---- + reflPron = word "zi4ji3" ; -- pron + refl + passive_s = defaultStr ; + relative_s = possessive_s ; -- relative + superlative_s = "zui4" ; -- superlative, sup + adj + de + zai_s = "zai4" ; -- copula for place + you_s = "you3" ; -- to have + + copula_s = "shi4" ; + exist_s = word "cun2zai4" ; + neg_s = "bu4" ; + question_s = "ma3" ; + yi_s = "yi1" ; + ordinal_s = "di4" ; + xie_s = "xie1" ; + the_s = "na3" ; + geng_s = "geng1" ; -- more, in comparison + + zai_V = mkVerb "zai4" [] [] [] [] "bu4" ; + fullstop_s = "." ; + questmark_s = "?" ; + exclmark_s = "!" ; + ge_s = "ge4" ; + di_s = "shi4" ; -- used in QuestSlash + + emptyStr = [] ; + + +-- Write the characters that constitute a word separately. This enables straightforward tokenization. + + bword : Str -> Str -> Str = \x,y -> x + y ; -- change to x ++ y to treat words as separate tokens + + word : Str -> Str = \s -> case s of { + x@? + y@? + z@? + u@? => bword x (bword y (bword z u)) ; + x@? + y@? + z@? => bword x (bword y z) ; + x@? + y@? => bword x y ; + _ => s + } ; + + ssword : Str -> SS = \s -> ss (word s) ; + +------------------------------------------------ from Jolene + +-- parameters + +param + Aspect = APlain | APerf | ADurStat | ADurProg | AExper ; ---- APlain added by AR + ConjForm = CPhr CPosType | CSent; + CPosType = CAPhrase | CNPhrase | CVPhrase ; + DeForm = DeNoun | NdNoun ; -- parameter created for noun with/out partical "de" + + AdvType = ATPlace | ATTime | ATManner ; + +-- parts of speech + +oper + + VP = {verb : Verb ; compl : Str ; prePart : Str} ; + NP = {s : Str} ; + +-- for morphology + + Noun : Type = {s : Str; c : Str} ; + Adj : Type = {s : Str; monoSyl: Bool} ; + Verb : Type = {s : Str ; pp,ds,dp,ep : Str ; neg : Str} ; + + regNoun : Str -> Str -> Noun = \s,c -> {s = word s ; c = word c}; + + mkAdj : Str -> Bool -> Adj = \s,b -> {s = word s ; monoSyl = b}; + + complexAP : Str -> Adj = \s -> {s = s ; monoSyl = False} ; + + simpleAdj : Str -> Adj = \s -> case s of { + ? => mkAdj s True ; -- monosyllabic + _ => mkAdj s False + } ; + + copula : Verb = mkVerb "shi4" [] [] [] [] "bu4" ; + + regVerb : (walk : Str) -> Verb = \v -> + mkVerb v "le" "zhao1" "zai4" "guo4" "mei2" ; + + mkVerb : (v : Str) -> (pp,ds,dp,ep,neg : Str) -> Verb = \v,pp,ds,dp,ep,neg -> + {s = word v ; pp = pp ; ds = ds ; dp = dp ; ep = ep ; neg = neg} ; + + useVerb : Verb -> Polarity => Aspect => Str = \v -> + table { + Pos => table { + APlain => v.s ; + APerf => v.s ++ v.pp ; + ADurStat => v.s ++ v.ds ; + ADurProg => v.dp ++ v.s ; + AExper => v.s ++ v.ep + } ; + Neg => table { + APlain => v.neg ++ v.s ; --- neg? + APerf => "bu4" ++ v.s ++ v.pp ; + ADurStat => "bu4" ++ v.s ; + ADurProg => v.neg ++ v.dp ++ v.s ; -- mei or bu + AExper => v.neg ++ v.s ++ v.ep + } + } ; + + infVP : VP -> Str = \vp -> vp.prePart ++ vp.verb.s ++ vp.compl ; + + predV : Verb -> VP = \v -> { + verb = v ; + compl = [] ; + prePart = [] ; + } ; + + insertObj : NP -> VP -> VP = \np,vp -> { + verb = vp.verb ; + compl = np.s ++ vp.compl ; + prePart = vp.prePart + } ; + + insertObjPost : NP -> VP -> VP = \np,vp -> { + verb = vp.verb ; + compl = vp.compl ++ np.s ; + prePart = vp.prePart + } ; + + insertAdv : SS -> VP -> VP = \adv,vp -> { + verb = vp.verb ; + compl = vp.compl ; + prePart = adv.s + } ; + + insertExtra : SS -> VP -> VP = \ext,vp -> + insertObjPost ext vp ; + +-- clauses: keep np and vp separate to enable insertion of IAdv + + Clause : Type = { + s : Polarity => Aspect => Str ; + np : Str; + vp : Polarity => Aspect => Str + } ; + + + mkClause = overload { + mkClause : Str -> Verb -> Clause = \np,v -> mkClauseCompl np (useVerb v) [] ; + mkClause : Str -> (Polarity => Aspect => Str) -> Str -> Clause = mkClauseCompl ; + mkClause : Str -> Verb -> Str -> Clause = \subj,verb,obj -> + mkClauseCompl subj (useVerb verb) obj ; + mkClause : Str -> VP -> Clause = \np,vp -> + mkClauseCompl np (\\p,a => vp.prePart ++ useVerb vp.verb ! p ! a) vp.compl ; + } ; + + mkClauseCompl : Str -> (Polarity => Aspect => Str) -> Str -> Clause = \np,vp,compl -> { + s = \\p,a => np ++ vp ! p ! a ++ compl ; + np = np ; + vp = \\p,a => vp ! p ! a ++ compl + } ; + + +-- for structural words + +param + DetType = DTFull Number | DTNum | DTPoss ; -- this, these, five, our + NumType = NTFull | NTVoid Number ; -- five, sg, pl + +oper + Determiner = {s : Str ; detType : DetType} ; + + mkDet = overload { + mkDet : Str -> Determiner = \s -> {s = s ; detType = DTFull Sg} ; + mkDet : Str -> Number -> Determiner = \s,n -> {s = s ; detType = DTFull n} ; + mkDet : Str -> DetType -> Determiner = \s,d -> {s = s ; detType = d} ; + } ; + + mkQuant : Str -> {s : Str} = ss ; + + pronNP : (s : Str) -> NP = \s -> { + s = word s + } ; + + mkPreposition : Str -> Str -> Preposition = \s,b -> { + prepMain = word s ; + prepPre = word b + } ; + + mkSubj : Str -> Str -> {prePart : Str ; sufPart : Str} = \p,s -> { + prePart = word p ; + sufPart = word s + } ; + + Preposition = {prepMain : Str ; prepPre : Str} ; + +-- added by AR + + mkNP : Str -> NP = ss ; + + appPrep : Preposition -> Str -> Str = \prep,s -> + prep.prepPre ++ s ++ prep.prepMain ; + +} diff --git a/lib/src/chinese/pinyin/SentenceCmn.gf b/lib/src/chinese/pinyin/SentenceCmn.gf new file mode 100644 index 000000000..a694980fb --- /dev/null +++ b/lib/src/chinese/pinyin/SentenceCmn.gf @@ -0,0 +1,45 @@ +concrete SentenceCmn of Sentence = CatCmn ** + open Prelude, ResCmn in { + + flags optimize=all_subs ; + + lin + + PredVP np vp = mkClause np.s vp ; + + PredSCVP sc vp = mkClause sc.s vp ; + + ImpVP vp = { + s = table { + Pos => infVP vp ; + Neg => neg_s ++ infVP vp + } + } ; + + SlashVP np vp = + mkClauseCompl np.s (\\p,a => vp.prePart ++ useVerb vp.verb ! p ! a) vp.compl + ** {c2 = vp.c2} ; + + SlashVS np vs sslash = ** {c2 = sslash.c2} ; + + + -- yet another reason for discontinuity of clauses + AdvSlash slash adv = + mkClause slash.np (<\\p,a => adv.s ++ slash.vp ! p ! a : Polarity => Aspect => Str>) [] + ** {c2 = slash.c2} ; + + SlashPrep cl prep = cl ** {c2 = prep} ; + + EmbedS s = ss (conjThat ++ s.s) ; + EmbedQS qs = qs ; + EmbedVP vp = ss (infVP vp) ; + + UseCl t p cl = {s = cl.s ! p.p ! t.t} ; + UseQCl t p cl = {s = cl.s ! p.p ! t.t} ; + UseRCl t p cl = {s = cl.s ! p.p ! t.t} ; + UseSlash t p cl = {s = cl.s ! p.p ! t.t ; c2 = cl.c2} ; + + AdvS a s = ss (a.s ++ s.s) ; + + RelS s r = ss (s.s ++ r.s) ; +} diff --git a/lib/src/chinese/pinyin/StructuralCmn.gf b/lib/src/chinese/pinyin/StructuralCmn.gf new file mode 100644 index 000000000..ab831b8da --- /dev/null +++ b/lib/src/chinese/pinyin/StructuralCmn.gf @@ -0,0 +1,164 @@ +concrete StructuralCmn of Structural = CatCmn ** + open ParadigmsCmn, ResCmn, Prelude in { + + flags coding = utf8 ; + +lin + every_Det = mkDet "mei3" Sg ; + + this_Quant = mkDet "zhe4" ; + that_Quant = mkDet "na3" ; + + i_Pron = pronNP "wo3" ; + youSg_Pron = pronNP "ni3" ; + he_Pron = pronNP "ta1" ; + she_Pron = pronNP "ta1" ; + we_Pron = pronNP "wo3men" ; + youPl_Pron = pronNP "ni3men" ; + they_Pron = pronNP "ta1men" ; + + very_AdA = ssword "fei1chang2" ; + + by8means_Prep = mkPrep "pang2bian1" [] ; + in_Prep = mkPrep "li3" []; + possess_Prep = mkPrep "de" []; + with_Prep = mkPrep "yi1qi3" "he2"; + +and_Conj = {s = table { + CPhr CNPhrase => mkConjForm "he2" ; + CPhr CAPhrase => mkConjForm "er2" ; + CPhr CVPhrase => mkConjForm "you4" ; + CSent => mkConjForm [] + } + } ; + or_Conj = {s = table { + CPhr _ => mkConjForm "huo4" ; + CSent => mkConjForm "hai2shi4" + } + } ; + + although_Subj = mkSubj "sui1ran2" "dan4"; + because_Subj = mkSubj "yin1wei2" "suo3yi3" ; + when_Subj = mkSubj [] "deshi2hou4" ; + +here_Adv = mkAdv "zhe4li3" ; +there_Adv = mkAdv "na3li3" ; +whoSg_IP, whoPl_IP = mkIPL "shei2" ; +whatSg_IP, whatPl_IP = mkIPL " shen2ma" ; +where_IAdv = mkIAdvL "na3li3" ; +when_IAdv = mkIAdvL "shen2mashi2hou4" ; +how_IAdv = mkIAdvL "ru2he2" ; +all_Predet = ssword "suo3you3" ; +many_Det = mkDet "duo1" Pl ; +someSg_Det = mkDet (word "yi1xie1") Sg ; +somePl_Det = mkDet (word "yi1xie1") Sg ; +few_Det = mkDet "shao3" Pl ; +other_A = mkA "qi2ta1" ; + +oper + mkIPL, mkIAdvL, mkAdA, mkIDetL, mkPConjL, mkCAdv, mkIQuant = ssword ; + +-- hsk + +lin + + +above_Prep = mkPrep "shang4bian1" ; +after_Prep = mkPrep "yi3hou4" ; +under_Prep = mkPrep "xia4" ; +why_IAdv = mkIAdvL "wei2shen2ma" ; +too_AdA = mkAdA "tai4" ; + +before_Prep = mkPrep "cong1qian2" ; --s +between_Prep = mkPrep "zhi1jian1" ; --s +but_PConj = mkPConjL "dan4shi4" ; --s + + + can_VV = mkVerb "neng2" [] [] [] [] "bu4" ; + must_VV = mkVerb "bi4xu1" [] [] [] [] "bu4" ; ---- False "bu4neng2" + want_VV = mkVerb "xiang3" [] [] [] [] "bu4" ; + +can8know_VV = mkV "hui4" [] [] [] [] "bu4" ; ---- + + +except_Prep = mkPrep "chu2le" "yi3wai4" ; --s +for_Prep = mkPrep "wei2le" ; --s +from_Prep = mkPrep "cong1" ; --s +---how8many_IDet = mkIDet "ji1" ; --s +---how8much_IDet = mkIDet "duo1shao3" ; --s +in8front_Prep = mkPrep "qian2bian1" ; --s +it_Pron = pronNP "ta1" ; --s +---less_CAdv = mkCAdv "shao3" ; --s +much_Det = mkDet "duo1" Sg ; --s +---more_CAdv = mkCAdv "geng1" ; --s +---most_Predet = mkPredet "zui4" ; --s +no_Quant = mkDet "bu4" ; --s +not_Predet = ssword "bu4" ; +---only_Predet = mkPredet "qi2" ; --s +otherwise_PConj = mkPConjL "hai2shi4" ; --s +to_Prep = mkPrep "wang3" ; --s +---which_IQuant = mkIQuant "na3" ; --s + +have_V2 = mkV2 "you3" ; + +yes_Utt = ss copula_s ; +no_Utt = ss neg_s ; + +oper + mkConjForm : Str -> {s1,s2 : Str} = \s -> {s1 = [] ; s2 = word s} ; + +-- manually by AR + +lin + always_AdV = ssword "yi1zhi2" ; + part_Prep = mkPrep possessive_s ; + language_title_Utt = ssword "zhong1wen2" ; + please_Voc = ss "qing3" ; + quite_Adv = mkAdA "de2hen3" ; + +-- just missing + +lin +almost_AdA = ssword "ji1hu1" ; +almost_AdN = ssword "ji1hu1" ; +--as_CAdv = ssword "shen2ma" ; -- as good as X +at_least_AdN = ssword "zui4shao3" ; -- at least five +at_most_AdN = ssword "zui4duo1" ; +behind_Prep = mkPrep "hou4mian4" "zai4"; +--both7and_DConj = ssword "shen2ma" ; -- both - and +by8agent_Prep = mkPrep "bei4" ; -- by for agent in passive + -- [mark] 被 +during_Prep = mkPrep "qi1jian1" "zai4" ; -- [mark] often equivalent to nothing + -- translation for "he swam during this summer. " and "he swam this summer." are often the same +--either7or_DConj = ssword "shen2ma" ; +everybody_NP = ssword "mei3ge4ren2" ; -- [mark] "mei3ge4ren2": 每(every)+个(classifier)+人(person) +everything_NP = ssword "mei3jian4shi4" ; -- [mark] "mei3jian4shi4": 每(every)+件(classifier)+事(thing) +everywhere_Adv = mkAdv "dao4chu3" ; +here7from_Adv = mkAdv "cong1zhe4li3" ; -- from here +here7to_Adv = mkAdv "dao4zhe4li3" ; -- to here + -- [mark] "cong1zhe4li3" 从(from) 这里(here) + -- "dao4zhe4li3" 到( to ) 这里(here) +how8many_IDet = ssword "duo1shao3" ; +how8much_IAdv = ssword "duo1shao3" ; +if_Subj = mkSubj "ru2guo3" "jiu4" ; -- [mark] "jiu4" often comes between NP and VP +--less_CAdv = ssword "shen2ma" ; -- less good than +--more_CAdv = ssword "shen2ma" ; +most_Predet = ssword "da4duo1shu3" ; +nobody_NP = ssword "mei2ren2" ; +nothing_NP = ssword "mei2you3shen2ma" ; +on_Prep = mkPrep "shang4" "zai4" ; +only_Predet = ssword "qi2you3" ; -- only John +so_AdA = ssword "ru2ci3" ; +somebody_NP = ssword "mou3ren2" ; +something_NP = ssword "mou3shi4" ; -- [mark] in sent, it depends on the context +somewhere_Adv = mkAdv "mou3chu3" ; +that_Subj = mkSubj [] ", " ; -- that + S [mark] comma +there7from_Adv = mkAdv "cong1na3li3" ; -- from there +there7to_Adv = mkAdv "dao4na3li3" ; +therefore_PConj = ssword "yin1ci3" ; +through_Prep = mkPrep "tong1guo4" ; +which_IQuant = ssword [] ; -- [mark] in sent, it depends on the context +without_Prep = mkPrep "mei2you3" []; +youPol_Pron = ssword "nin2" ; -- polite you + +} diff --git a/lib/src/chinese/pinyin/SymbolCmn.gf b/lib/src/chinese/pinyin/SymbolCmn.gf new file mode 100644 index 000000000..8ba32f931 --- /dev/null +++ b/lib/src/chinese/pinyin/SymbolCmn.gf @@ -0,0 +1,36 @@ +--# -path=.:../abstract:../common + +concrete SymbolCmn of Symbol = CatCmn ** open Prelude, ResCmn in { + + flags coding = utf8; + + lin + SymbPN i = i ; + IntPN i = i ; + FloatPN i = i ; + NumPN i = i ; + CNIntNP cn i = { + s = cn.s ++ i.s ; + c = cn.c + } ; + CNSymbNP det cn xs = ss (det.s ++ cn.s ++ xs.s) ; ---- + CNNumNP cn i = { + s = cn.s ++ i.s ; + c = cn.c + } ; + + SymbS sy = sy ; + SymbNum sy = sy ; + SymbOrd sy = sy ; + +lincat + + Symb, [Symb] = SS ; + +lin + MkSymb s = s ; + + BaseSymb = infixSS "" ; + ConsSymb = infixSS "" ; + +} diff --git a/lib/src/chinese/pinyin/TenseCmn.gf b/lib/src/chinese/pinyin/TenseCmn.gf new file mode 100644 index 000000000..787b6a54a --- /dev/null +++ b/lib/src/chinese/pinyin/TenseCmn.gf @@ -0,0 +1,13 @@ +concrete TenseCmn of Tense = + CatCmn [Tense,Temp], TenseX [Ant,Pol,AAnter,ASimul,PNeg,PPos] ** open ResCmn in { + + lin + TTAnt t a = {s = t.s ++ a.s ; t = t.t} ; + + ---- ?? + TPres = {s = [] ; t = APlain} ; + TPast = {s = [] ; t = APerf} ; + TFut = {s = [] ; t = ADurProg} ; + TCond = {s = [] ; t = ADurStat} ; + +} diff --git a/lib/src/chinese/pinyin/TextCmn.gf b/lib/src/chinese/pinyin/TextCmn.gf new file mode 100644 index 000000000..fad1ec21b --- /dev/null +++ b/lib/src/chinese/pinyin/TextCmn.gf @@ -0,0 +1,11 @@ +concrete TextCmn of Text = CommonX - [Temp,Tense,Adv] ** open ResCmn in { + +-- No punctuation - but make sure to leave spaces between sentences! + + lin + TEmpty = {s = []} ; + TFullStop x xs = {s = x.s ++ fullstop_s ++ xs.s} ; + TQuestMark x xs = {s = x.s ++ questmark_s ++ xs.s} ; + TExclMark x xs = {s = x.s ++ exclmark_s ++ xs.s} ; + +} diff --git a/lib/src/chinese/pinyin/VerbCmn.gf b/lib/src/chinese/pinyin/VerbCmn.gf new file mode 100644 index 000000000..b4703e324 --- /dev/null +++ b/lib/src/chinese/pinyin/VerbCmn.gf @@ -0,0 +1,60 @@ +concrete VerbCmn of Verb = CatCmn ** open ResCmn, Prelude in { + + flags optimize=all_subs ; + + lin + UseV = predV ; + + SlashV2a v = predV v ** {c2 = v.c2} ; + + Slash2V3 v np = insertObj np (predV v) ** {c2 = v.c3} ; ---- to check arg order + Slash3V3 v np = insertObj np (predV v) ** {c2 = v.c2} ; + + SlashV2A v ap = insertObj ap (predV v) ** {c2 = v.c2} ; + + SlashV2V v vp = insertObj (mkNP (infVP vp)) (predV v) ** {c2 = v.c2} ; + SlashV2S v s = insertObj s (predV v) ** {c2 = v.c2} ; + SlashV2Q v q = insertObj q (predV v) ** {c2 = v.c2} ; + + ComplVV v vp = { + verb = v ; + compl = vp.verb.s ++ vp.compl ; + prePart = vp.prePart + } ; + + ComplVS v s = insertObj s (predV v) ; + ComplVQ v q = insertObj q (predV v) ; + ComplVA v ap = insertObj ap (predV v) ; + + ComplSlash vp np = insertObj (mkNP (appPrep vp.c2 np.s)) vp ; + + UseComp comp = comp ; + + SlashVV v vp = ---- too simple? + insertObj (mkNP (infVP vp)) (predV v) ** {c2 = vp.c2} ; + + SlashV2VNP v np vp = + insertObj np + (insertObj (mkNP (infVP vp)) (predV v)) ** {c2 = vp.c2} ; + + AdvVP vp adv = case adv.advType of { + ATManner => insertObj (ss (deVAdv_s ++ adv.s)) vp ; -- he sleeps well + _ => insertAdv (ss (zai_V.s ++ adv.s)) vp -- he sleeps in the house / today + } ; + + AdVVP adv vp = insertAdv adv vp ; + + ReflVP vp = insertObj (mkNP reflPron) vp ; + + PassV2 v = insertObj (mkNP passive_s) (predV v) ; ---- + + CompAP ap = insertObj (mkNP ap.s) (predV copula) ; ---- hen / bu + + CompNP np = insertObj np (predV copula) ; ---- + + CompCN cn = insertObj cn (predV copula) ; ---- + + CompAdv adv = insertObj adv (predV zai_V) ; + +} +