From 754949f5cc69d937a48c547a636fa9eff228bb13 Mon Sep 17 00:00:00 2001 From: aarne Date: Mon, 15 Oct 2012 08:07:17 +0000 Subject: [PATCH] chinese (Chi) in place and compiles, based on work by Jolene Zhuo Lin Qiqige --- lib/src/Make.hs | 5 +- lib/src/api/CombinatorsChi.gf | 6 + lib/src/api/ConstructorsChi.gf | 3 + lib/src/api/SymbolicChi.gf | 5 + lib/src/api/SyntaxChi.gf | 5 + lib/src/api/TryChi.gf | 12 + lib/src/chinese/AdjectiveChi.gf | 26 ++ lib/src/chinese/AdverbChi.gf | 19 ++ lib/src/chinese/AllChi.gf | 3 + lib/src/chinese/AllChiAbs.gf | 5 + lib/src/chinese/CatChi.gf | 84 ++++++ lib/src/chinese/ConjunctionChi.gf | 34 +++ lib/src/chinese/ExtraChi.gf | 7 + lib/src/chinese/ExtraChiAbs.gf | 4 + lib/src/chinese/GrammarChi.gf | 22 ++ lib/src/chinese/IdiomChi.gf | 27 ++ lib/src/chinese/LangChi.gf | 11 + lib/src/chinese/LexiconChi.gf | 458 ++++++++++++++++++++++++++++++ lib/src/chinese/NounChi.gf | 87 ++++++ lib/src/chinese/NumeralChi.gf | 143 ++++++++++ lib/src/chinese/ParadigmsChi.gf | 118 ++++++++ lib/src/chinese/PhraseChi.gf | 27 ++ lib/src/chinese/QuestionChi.gf | 40 +++ lib/src/chinese/README | 209 ++++++++++++++ lib/src/chinese/RelativeChi.gf | 12 + lib/src/chinese/ResChi.gf | 221 ++++++++++++++ lib/src/chinese/SentenceChi.gf | 45 +++ lib/src/chinese/StructuralChi.gf | 164 +++++++++++ lib/src/chinese/SymbolChi.gf | 36 +++ lib/src/chinese/TenseChi.gf | 13 + lib/src/chinese/TextChi.gf | 11 + lib/src/chinese/VerbChi.gf | 60 ++++ 32 files changed, 1920 insertions(+), 2 deletions(-) create mode 100644 lib/src/api/CombinatorsChi.gf create mode 100644 lib/src/api/ConstructorsChi.gf create mode 100644 lib/src/api/SymbolicChi.gf create mode 100644 lib/src/api/SyntaxChi.gf create mode 100644 lib/src/api/TryChi.gf create mode 100644 lib/src/chinese/AdjectiveChi.gf create mode 100644 lib/src/chinese/AdverbChi.gf create mode 100644 lib/src/chinese/AllChi.gf create mode 100644 lib/src/chinese/AllChiAbs.gf create mode 100644 lib/src/chinese/CatChi.gf create mode 100644 lib/src/chinese/ConjunctionChi.gf create mode 100644 lib/src/chinese/ExtraChi.gf create mode 100644 lib/src/chinese/ExtraChiAbs.gf create mode 100644 lib/src/chinese/GrammarChi.gf create mode 100644 lib/src/chinese/IdiomChi.gf create mode 100644 lib/src/chinese/LangChi.gf create mode 100644 lib/src/chinese/LexiconChi.gf create mode 100644 lib/src/chinese/NounChi.gf create mode 100644 lib/src/chinese/NumeralChi.gf create mode 100644 lib/src/chinese/ParadigmsChi.gf create mode 100644 lib/src/chinese/PhraseChi.gf create mode 100644 lib/src/chinese/QuestionChi.gf create mode 100644 lib/src/chinese/README create mode 100644 lib/src/chinese/RelativeChi.gf create mode 100644 lib/src/chinese/ResChi.gf create mode 100644 lib/src/chinese/SentenceChi.gf create mode 100644 lib/src/chinese/StructuralChi.gf create mode 100644 lib/src/chinese/SymbolChi.gf create mode 100644 lib/src/chinese/TenseChi.gf create mode 100644 lib/src/chinese/TextChi.gf create mode 100644 lib/src/chinese/VerbChi.gf diff --git a/lib/src/Make.hs b/lib/src/Make.hs index db4357813..e4f954f3e 100644 --- a/lib/src/Make.hs +++ b/lib/src/Make.hs @@ -33,6 +33,7 @@ langsCoding = [ (("arabic", "Ara"),""), (("bulgarian","Bul"),""), (("catalan", "Cat"),"Romance"), + (("chinese", "Chi"),""), (("danish", "Dan"),"Scand"), (("dutch", "Dut"),""), (("english", "Eng"),""), @@ -73,7 +74,7 @@ langsLangAll = langs langsLang = langs `except` langsIncomplete -- languagues that have notpresent marked -langsPresent = langsLang `except` ["Jpn","Nep","Pes","Snd","Tha","Thb"] +langsPresent = langsLang `except` ["Chi","Jpn","Nep","Pes","Snd","Tha","Thb"] -- languages for which Lang can be compiled but which are incomplete langsIncomplete = ["Amh","Ara","Lat","Mlt","Tur","Thb"] @@ -82,7 +83,7 @@ langsIncomplete = ["Amh","Ara","Lat","Mlt","Tur","Thb"] langsAPI = langsLang `except` langsIncomplete -- languages for which to compile Symbolic -langsSymbolic = langsLang `except` (langsIncomplete ++ ["Afr","Hin","Ina","Jpn","Nep","Pnb","Rus", "Snd", "Thb"]) +langsSymbolic = langsLang `except` (langsIncomplete ++ ["Afr","Chi","Hin","Ina","Jpn","Nep","Pnb","Rus", "Snd", "Thb"]) -- languages for which to compile minimal Syntax langsMinimal = langs `only` ["Ara","Eng","Bul","Rus"] diff --git a/lib/src/api/CombinatorsChi.gf b/lib/src/api/CombinatorsChi.gf new file mode 100644 index 000000000..68fe0e699 --- /dev/null +++ b/lib/src/api/CombinatorsChi.gf @@ -0,0 +1,6 @@ +--# -path=.:alltenses:prelude + +resource CombinatorsChi = Combinators with + (Cat = CatChi), + (Structural = StructuralChi), + (Constructors = ConstructorsChi) ; diff --git a/lib/src/api/ConstructorsChi.gf b/lib/src/api/ConstructorsChi.gf new file mode 100644 index 000000000..32a41d419 --- /dev/null +++ b/lib/src/api/ConstructorsChi.gf @@ -0,0 +1,3 @@ +--# -path=.:alltenses:prelude + +resource ConstructorsChi = Constructors with (Grammar = GrammarChi) ; diff --git a/lib/src/api/SymbolicChi.gf b/lib/src/api/SymbolicChi.gf new file mode 100644 index 000000000..0d1e17968 --- /dev/null +++ b/lib/src/api/SymbolicChi.gf @@ -0,0 +1,5 @@ +--# -path=.:alltenses + +resource SymbolicChi = Symbolic with + (Symbol = SymbolChi), + (Grammar = GrammarChi) ; diff --git a/lib/src/api/SyntaxChi.gf b/lib/src/api/SyntaxChi.gf new file mode 100644 index 000000000..35ffe2a62 --- /dev/null +++ b/lib/src/api/SyntaxChi.gf @@ -0,0 +1,5 @@ +--# -path=.:./alltenses:../prelude + +instance SyntaxChi of Syntax = + ConstructorsChi, CatChi, StructuralChi, CombinatorsChi ; + diff --git a/lib/src/api/TryChi.gf b/lib/src/api/TryChi.gf new file mode 100644 index 000000000..18dd225ae --- /dev/null +++ b/lib/src/api/TryChi.gf @@ -0,0 +1,12 @@ +--# -path=.:alltenses + +resource TryChi = SyntaxChi, LexiconChi, ParadigmsChi -[mkAdv, mkDet,mkQuant]** + open (P = ParadigmsChi) in { + + oper + + mkAdv = overload SyntaxChi { + mkAdv : Str -> Adv = P.mkAdv ; + } ; + +} diff --git a/lib/src/chinese/AdjectiveChi.gf b/lib/src/chinese/AdjectiveChi.gf new file mode 100644 index 000000000..a3d50bb43 --- /dev/null +++ b/lib/src/chinese/AdjectiveChi.gf @@ -0,0 +1,26 @@ +concrete AdjectiveChi of Adjective = CatChi ** open ResChi, Prelude in { + + lin + + PositA a = a ; + + --ComparA a np = complexAP (a.s ++ than_s ++ np.s) ; + ComparA a np = complexAP (than_s ++ np.s ++ a.s) ; + + UseComparA a = complexAP (geng_s ++ a.s) ; + + AdjOrd ord = complexAP ord.s ; + + CAdvAP ad ap np = complexAP (ap.s ++ ad.s ++ ad.p ++ np.s) ; + + ComplA2 a np = complexAP (a.s ++ appPrep a.c2 np.s) ; + + ReflA2 a = complexAP (a.s ++ appPrep a.c2 reflPron) ; + + SentAP ap sc = complexAP (ap.s ++ sc.s) ; + + AdAP ada ap = complexAP (ada.s ++ ap.s) ; + + UseA2 a = a ; + +} diff --git a/lib/src/chinese/AdverbChi.gf b/lib/src/chinese/AdverbChi.gf new file mode 100644 index 000000000..544a32d80 --- /dev/null +++ b/lib/src/chinese/AdverbChi.gf @@ -0,0 +1,19 @@ +concrete AdverbChi of Adverb = CatChi ** + open ResChi, Prelude in { + + lin + PositAdvAdj a = {s = a.s ; advType = ATManner} ; + + PrepNP prep np = ss (appPrep prep np.s) ** {advType = ATPlace} ; --- should depend on prep, np ? or treat in ExtraChi ? + + ComparAdvAdj cadv a np = ss (a.s ++ cadv.s ++ cadv.p ++ np.s) ** {advType = ATManner} ; + + ComparAdvAdjS cadv a s = ss (a.s ++ cadv.s ++ cadv.p ++ s.s) ** {advType = ATManner} ; + + AdAdv adv ad = ss (ad.s ++ adv.s) ** {advType = ad.advType} ; + + SubjS subj s = ss (subj.prePart ++ s.s ++ subj.sufPart) ** {advType = ATManner} ; + + AdnCAdv cadv = ss (cadv.s ++ conjThat) ** {advType = ATManner} ; ----- + +} diff --git a/lib/src/chinese/AllChi.gf b/lib/src/chinese/AllChi.gf new file mode 100644 index 000000000..f93576eb1 --- /dev/null +++ b/lib/src/chinese/AllChi.gf @@ -0,0 +1,3 @@ +--# -path=.:../abstract:../common:prelude + +concrete AllChi of AllChiAbs = LangChi, ExtraChi ; diff --git a/lib/src/chinese/AllChiAbs.gf b/lib/src/chinese/AllChiAbs.gf new file mode 100644 index 000000000..6a1a87331 --- /dev/null +++ b/lib/src/chinese/AllChiAbs.gf @@ -0,0 +1,5 @@ +--# -path=.:../abstract:../common:prelude + +abstract AllChiAbs = + Lang, + ExtraChiAbs ; diff --git a/lib/src/chinese/CatChi.gf b/lib/src/chinese/CatChi.gf new file mode 100644 index 000000000..71f38ea03 --- /dev/null +++ b/lib/src/chinese/CatChi.gf @@ -0,0 +1,84 @@ +concrete CatChi of Cat = CommonX - [Tense, Temp, Adv] ** open ResChi, Prelude in { + + lincat + +-- Tensed/Untensed + + S = {s : Str} ; + QS = {s : Str} ; + RS = {s : Str} ; + SSlash = {s : Str ; c2 : Preposition} ; + +-- Sentence + + Cl = Clause ; -- {s : Polarity => Aspect => Str ; np: Str ; vp: Polarity => Aspect => Str} ; + + ClSlash = Clause ** {c2 : Preposition} ; + + Imp = {s : Polarity => Str} ; + +-- Question + + QCl = {s : Polarity => Aspect => Str} ; + IP = {s : Str} ; + IComp = {s : Str} ; + IDet, IQuant = {s : Str} ; + +-- Relative + + RCl = {s : Polarity => Aspect => Str} ; + RP = {s : Str} ; + +-- Verb + + VP = ResChi.VP ; + Comp = ResChi.VP ; + VPSlash = ResChi.VP ** {c2 : Preposition} ; + +-- Adjective + + AP = ResChi.Adj ; + +-- Noun + + CN = ResChi.Noun ; + NP, Pron = ResChi.NP ; + Det, Quant = Determiner ; + Predet = {s : Str} ; ---- + Ord = {s : Str} ; + Num = {s : Str ; numType : NumType} ; + + Adv = {s : Str ; advType : AdvType} ; + +-- Numeral + + Numeral, Card, Digits = {s : Str} ; + +-- Structural + + Conj = {s : ConjForm => {s1,s2 : Str}} ; + Subj = {prePart : Str ; sufPart : Str} ; + Prep = Preposition ; + +-- Open lexical classes, e.g. Lexicon + + V, VS, VQ, VA = Verb ; + V2, V2Q, V2S = Verb ** {c2 : Preposition} ; + V3, V2A, V2V = Verb ** {c2, c3 : Preposition} ; + VV = Verb ; + + A = ResChi.Adj ; + A2 = ResChi.Adj ** {c2 : Preposition} ; + + N = ResChi.Noun ; + N2 = ResChi.Noun ** {c2 : Preposition} ; + N3 = ResChi.Noun ** {c2,c3 : Preposition} ; + PN = ResChi.NP ; + +-- overridden + + Temp = {s : Str ; t : Aspect} ; + Tense = {s : Str ; t : Aspect} ; + + +} diff --git a/lib/src/chinese/ConjunctionChi.gf b/lib/src/chinese/ConjunctionChi.gf new file mode 100644 index 000000000..3e8d4aa92 --- /dev/null +++ b/lib/src/chinese/ConjunctionChi.gf @@ -0,0 +1,34 @@ +concrete ConjunctionChi of Conjunction = CatChi ** open ResChi, Prelude, Coordination in { + + lin + + ConjS c = conjunctDistrSS (c.s ! CSent) ; + ConjAdv c as = conjunctDistrSS (c.s ! CSent) as ** {advType = ATPlace} ; ---- ?? + ConjNP c = conjunctDistrSS (c.s ! CPhr CNPhrase) ; + ConjAP c as = conjunctDistrSS (c.s ! CPhr CAPhrase) as ** {monoSyl = False} ; + ConjRS c = conjunctDistrSS (c.s ! CSent) ; + +-- These fun's are generated from the list cat's. + + BaseS = twoSS ; + ConsS = consrSS thcomma ; + BaseAdv = twoSS ; + ConsAdv = consrSS thcomma ; + BaseNP = twoSS ; + ConsNP = consrSS thcomma ; + BaseAP = twoSS ; + ConsAP = consrSS thcomma ; + BaseRS = twoSS ; + ConsRS = consrSS thcomma ; + + lincat + [S] = {s1,s2 : Str} ; + [Adv] = {s1,s2 : Str} ; + [NP] = {s1,s2 : Str} ; + [AP] = {s1,s2 : Str} ; + [RS] = {s1,s2 : Str} ; + + oper + thcomma : Str = [] ; ---- should be a space + +} diff --git a/lib/src/chinese/ExtraChi.gf b/lib/src/chinese/ExtraChi.gf new file mode 100644 index 000000000..7a38dd3b8 --- /dev/null +++ b/lib/src/chinese/ExtraChi.gf @@ -0,0 +1,7 @@ +concrete ExtraChi of ExtraChiAbs = CatChi ** + open ResChi, Prelude in { + + lincat + Aspect = {s : Str ; a : ResChi.Aspect} ; + +} diff --git a/lib/src/chinese/ExtraChiAbs.gf b/lib/src/chinese/ExtraChiAbs.gf new file mode 100644 index 000000000..91c9f672d --- /dev/null +++ b/lib/src/chinese/ExtraChiAbs.gf @@ -0,0 +1,4 @@ +abstract ExtraChiAbs = Cat ** { + cat + Aspect ; + } ; diff --git a/lib/src/chinese/GrammarChi.gf b/lib/src/chinese/GrammarChi.gf new file mode 100644 index 000000000..a57c24ab9 --- /dev/null +++ b/lib/src/chinese/GrammarChi.gf @@ -0,0 +1,22 @@ +--# -path=.:../abstract:../common:prelude + +concrete GrammarChi of Grammar = + NounChi, + VerbChi, + AdjectiveChi, + AdverbChi, + NumeralChi, + SentenceChi, + QuestionChi, + RelativeChi, + ConjunctionChi, + PhraseChi, + TextChi, + StructuralChi, + IdiomChi, + TenseChi + ** { + +flags startcat = Phr ; unlexer = text ; lexer = text ; + +} ; diff --git a/lib/src/chinese/IdiomChi.gf b/lib/src/chinese/IdiomChi.gf new file mode 100644 index 000000000..cb9a4440f --- /dev/null +++ b/lib/src/chinese/IdiomChi.gf @@ -0,0 +1,27 @@ +concrete IdiomChi of Idiom = CatChi ** open Prelude, ResChi in { + + lin + ---- formal subject, e.g. it is hot ?? now empty subject + ImpersCl vp = mkClause [] vp ; + --can be empty, or ImpersCl vp = mkClause "天" vp ; but "天" only used to describe weather(e.g. it's raining) + + ---- one wants to learn Chinese ?? now empty subject + GenericCl vp = mkClause [] vp ; + -- GenericCl vp = mkClause "有人" vp ; (meaning: there is a person) + + ---- it is John who did it + CleftNP np rs = mkClause np.s copula rs.s ; + + CleftAdv ad s = mkClause ad.s (insertObj s (predV copula)) ; ---- it is here she slept + + ExistNP np = mkClause [] (regVerb you_s) np.s ; ---- infl of you + + ExistIP ip = {s = (mkClause [] (regVerb you_s) ip.s).s} ; ---- infl of you + + ProgrVP vp = vp ; ---- + + ImpPl1 vp = ss (infVP vp) ; ---- + +} + + diff --git a/lib/src/chinese/LangChi.gf b/lib/src/chinese/LangChi.gf new file mode 100644 index 000000000..00e5bc351 --- /dev/null +++ b/lib/src/chinese/LangChi.gf @@ -0,0 +1,11 @@ +--# -path=.:../abstract:../common:../prelude + + +concrete LangChi of Lang = + GrammarChi, + LexiconChi + ** { + +flags startcat = Phr ; unlexer = concat ; lexer = text ; + +} ; diff --git a/lib/src/chinese/LexiconChi.gf b/lib/src/chinese/LexiconChi.gf new file mode 100644 index 000000000..3ff8e39b6 --- /dev/null +++ b/lib/src/chinese/LexiconChi.gf @@ -0,0 +1,458 @@ +concrete LexiconChi of Lexicon = CatChi ** + open ParadigmsChi, ResChi, Prelude in { + +flags + coding = utf8 ; + +lin + +-- LexiconCmn + + man_N = mkN "男人" "个"; -- "nanren" "ge" first being noun, second is classifier(counter) + woman_N = mkN "女人" "个"; -- "nvren" "ge" classifier behaves like the "cup" in "cup of tea" + house_N = mkN "房子" "间"; -- "fangzi" "jian" + tree_N = mkN "树" "棵"; -- "shu" "ke" + big_A = mkA "大" ; -- "da" + small_A = mkA "小" ; -- "xiao" + green_A = mkA "绿" ; -- "lv" + walk_V = mkV "走" ; -- "zou" + sleep_V = mkV "睡" ; -- "shui" +---- arrive_V = mkV "到" "了" [] [] "过"; -- "dao" + love_V2 = mkV2 "爱" ; -- "ai" + watch_V2 = mkV2 "看" ; -- "kan" +--- please2_V2 = mkV "麻烦" ; -- "mafan" +--- believe_VS = mkV "相信" ; -- "xiangxin" + know_VS = mkV "知道" ; -- "zhidao" + wonder_VQ = mkV "好奇" ; -- "haoqi" + john_PN = mkPN "约翰" ; -- "yuehan" +--- mary_PN = mkPN "玛丽" ; -- "mali" + + +-- Swadesh + +--big_A = mkA "大" ; +long_A = mkA "长" ; +wide_A = mkA "宽" ; +thick_A = mkA "厚" ; +heavy_A = mkA "重" ; +--small_A = mkA "小" ; +short_A = mkA "短" ; +narrow_A = mkA "窄" ; +thin_A = mkA "薄" ; -- [mark] for person mkA "瘦" +--woman_N = mkN "女人" ; +--man(adult_N = mkN "男人" ; +--man(human_N = mkN "人" ; +child_N = mkN "孩子" ; +wife_N = mkN "妻子" ; +husband_N = mkN "丈夫" ; +--father_N = mkN "父亲" ; +animal_N = mkN "动物" "只"; -- [mark] added classifier for nouns +fish_N = mkN "鱼" "条"; +bird_N = mkN "鸟" "只"; +dog_N = mkN "狗" "只"; +louse_N = mkN "虱" "只"; +snake_N = mkN "蛇" "条"; +worm_N = mkN "虫" "只"; +--tree_N = mkN "树" ; +forest_N = mkN "森林" "片"; +stick_N = mkN "树枝" ; +fruit_N = mkN "水果" ; +seed_N = mkN "种子" "粒"; +leaf_N = mkN "叶子" "片"; -- [mark] "叶" -> "叶子" , "叶" is often treated as morpheme +root_N = mkN "树根" ; -- [mark] "根" --> "树根"(tree root) +bark_N = mkN "树皮" "块"; +flower_N = mkN "花" "朵"; +grass_N = mkN "草" "棵"; +rope_N = mkN "绳" "根"; +skin_N = mkN "皮" "块"; +meat_N = mkN "肉" "块"; +blood_N = mkN "血" "滴"; -- [mark] several classifiers, "滴"(drop), "滩"(puddle) +bone_N = mkN "骨头" "块"; -- [mark] "骨" -> "骨头" , "骨" is often treated as morpheme +fat_N = mkN "脂肪" "堆"; -- [mark] often without classifier +egg_N = mkN "蛋" "颗"; +horn_N = mkN "角" "根"; +tail_N = mkN "尾巴" "条"; -- [mark] "尾" -> "尾巴" , "尾" is often treated as morpheme, or if stands alone, it is a classifier itself +feather_N = mkN "羽毛" "根"; +hair_N = mkN "头发" "根"; -- [mark] several classifiers , "根"(single hair), "把"(several hairs) +head_N = mkN "头" "颗"; +ear_N = mkN "耳朵" "只"; +eye_N = mkN "眼睛" "只"; +nose_N = mkN "鼻子" ; +mouth_N = mkN "嘴" "张"; +tooth_N = mkN "牙齿" "颗"; +tongue_N = mkN "舌" "根"; +fingernail_N = mkN "指甲" "片"; +foot_N = mkN "脚" "只"; +leg_N = mkN "腿" "条"; +knee_N = mkN "膝盖" ; -- [mark] "膝" -> "膝盖" +hand_N = mkN "手" "只"; +wing_N = mkN "翅膀" "只"; -- [mark] "翼" -> "翅膀", "翅膀" is the common form for wing. +belly_N = mkN "肚子" ; +guts_N = mkN "肠子" "根"; +neck_N = mkN "脖子" ; +back_N = mkN "背" ; +breast_N = mkN "胸" ; +heart_N = mkN "心脏" "颗"; +liver_N = mkN "肝" ; +drink_V2 = mkV2 "喝" ; +eat_V2 = mkV2 "吃" ; +bite_V2 = mkV2 "咬" ; +suck_V2 = mkV2 "吸" ; +spit_V = mkV "吐" ; +vomit_V = mkV "呕" ; +blow_V = mkV "吹" ; +breathe_V = mkV "呼吸" ; +laugh_V = mkV "笑" ; +see_V2 = mkV2 "看" ; +hear_V2 = mkV2 "听" ; +--know_V = mkV "知道" ; +think_V = mkV "想" ; +smell_V = mkV "闻" ; -- [mark] "嗅" -> "闻", "闻" is the common form for smell. +fear_V2 = mkV2 "怕" ; +--sleep_V = mkV "睡" ; +live_V = mkV "活" ; +die_V = mkV "死" ; +kill_V2 = mkV2 "杀" ; +fight_V2 = mkV2 "打架" ; -- [mark] "吵架" -> "打架", "吵架" = quarrel, argue +hunt_V2 = mkV2 "打猎" ; -- [mark] "打猎" is iv, can't think of proper translation in v2 form for hunt +hit_V2 = mkV2 "打" ; +cut_V2 = mkV2 "割" ; +split_V2 = mkV2 "劈开" ; +stab_V2 = mkV2 "刺" ; +scratch_V2 = mkV2 "搔" ; +dig_V = mkV "挖" ; +swim_V = mkV "游泳" ; +fly_V = mkV "飞" ; +--walk_V = mkV "走" ; +come_V = mkV "来" ; +lie_V = mkV "躺" ; +sit_V = mkV "坐" ; +stand_V = mkV "站" ; +turn_V = mkV "转" ; +fall_V = mkV "落下" ; +hold_V2 = mkV2 "握" ; +squeeze_V2 = mkV2 "挤" ; +rub_V2 = mkV2 "揉" ; +wash_V2 = mkV2 "洗" ; +wipe_V2 = mkV2 "擦" ; +pull_V2 = mkV2 "拉" ; +push_V2 = mkV2 "推" ; +throw_V2 = mkV2 "扔" ; +tie_V2 = mkV2 "绑" ; +sew_V = mkV "缝" ; +count_V2 = mkV2 "数" ; +say_VS = mkVS (mkV "说") ; +sing_V = mkV "唱" ; +play_V = mkV "玩" ; +float_V = mkV "浮" ; +flow_V = mkV "流" ; +freeze_V = mkV "结冰" ; +swell_V = mkV "膨胀" ; +sun_N = mkN "太阳" ; +moon_N = mkN "月亮" ; +star_N = mkN "星星" "颗"; +water_N = mkN "水" "滴"; +rain_N = mkN "雨" "场"; +river_N = mkN "河" "条"; +lake_N = mkN "湖" ; +sea_N = mkN "海" "片"; +salt_N = mkN "盐" "瓶"; +stone_N = mkN "石头" "块"; +sand_N = mkN "沙" "粒"; +dust_N = mkN "尘土" []; +earth_N = mkN "地球" ; +cloud_N = mkN "云" "朵"; +fog_N = mkN "雾" "场"; +sky_N = mkN "天空" "片"; +wind_N = mkN "风" "阵"; +snow_N = mkN "雪" "场"; +ice_N = mkN "冰" "块"; +smoke_N = mkN "烟" "阵"; +fire_N = mkN "火" "场"; +ashes_N = mkN "灰" []; +burn_V = mkV "烧" ; +road_N = mkN "路" "条"; +mountain_N = mkN "山" "座"; +red_A = mkA "红" ; +--green_A = mkA "绿" ; +yellow_A = mkA "黄" ; +white_A = mkA "白" ; +black_A = mkA "黑" ; +night_N = mkN "夜晚" ; -- [mark] "夜晚" 's classifier is "个" +day_N = mkN "白天" []; -- [mark] "白天" -> "天", "天" itself is classifier +year_N = mkN "年" [] ; -- [mark] "年" itself is classifier +warm_A = mkA "温暖" ; +cold_A = mkA "冷" ; +full_A = mkA "满" ; +new_A = mkA "新" ; +old_A = mkA "老" ; -- [mark] "老" for person, "旧" for things +good_A = mkA "好" ; +bad_A = mkA "坏" ; +rotten_A = mkA "烂" ; +dirty_A = mkA "脏" ; +straight_A = mkA "直" ; +round_A = mkA "圆" ; +sharp_A = mkA "尖" ; +dull_A = mkA "钝" ; +smooth_A = mkA "光滑" ; +wet_A = mkA "湿" ; +dry_A = mkA "干" ; +correct_A = mkA "对" ; +near_A = mkA "近" ; +far_A = mkA "远" ; +left_Ord = ss "左" ; +right_Ord = ss "右" ; +name_N = mkN "名字" ; -- [mark] "名" --> "名字" + +-- HSK + +add_V3 = mkV3 "加" ; +airplane_N = mkN "飞机" "架"; +already_Adv = mkAdv "已经" ; +answer_V2S = mkV2S (mkV "回答") ; +apple_N = mkN "苹果" ; +art_N = mkN "艺术" []; -- [mark] usually without classifier +ask_V2Q = mkV2Q (mkV "问") ; +bank_N = mkN "银行" "间"; +beautiful_A = mkA "漂亮" ; +become_VA = mkV "变" ; +beer_N = mkN "啤酒" "杯"; +bike_N = mkN "自行车" "台"; +blue_A = mkA "蓝" ; +boat_N = mkN "船" "艘"; +book_N = mkN "书" "本"; +bread_N = mkN "面包" ; +buy_V2 = mkV2 "买" ; +cap_N = mkN "帽子" "顶"; +car_N = mkN "汽车" "台"; +chair_N = mkN "椅子" "把"; +city_N = mkN "城市" ; -- [mark] "市" --> "城市" +clean_A = mkA "干净" ; +coat_N = mkN "衣服" "件"; +country_N = mkN "国家" ; -- [mark] "国" --> "国家" +cow_N = mkN "牛" "头"; +do_V2 = mkV2 "做" ; +doctor_N = mkN "医生" "名"; +door_N = mkN "门" "扇"; +---easy_A2V = mkA "容易" ; +factory_N = mkN "工厂" "间"; +far_Adv = mkAdv "远" ; +---father_N2 = mkN2 "父亲" ; +fear_VS = mkVS (mkV "怕") ; +find_V2 = mkV2 "发现" ; +forget_V2 = mkV2 "忘" ; +friend_N = mkN "朋友" ; +girl_N = mkN "姑娘" ; +give_V3 = mkV3 "给" ; +go_V = mkV "去" ; +--go_N = mkN "往" ; +grammar_N = mkN "语法" ; +hat_N = mkN "帽子" "顶"; +--take_N = mkN "带" ; +--have_N = mkN "有" ; +--take_N = mkN "带" ; +--have_N = mkN "有" ; +hill_N = mkN "山" "座"; +hope_VS = mkV "希望" ; +horse_N = mkN "马" "匹"; +hot_A = mkA "热" ; +--how many_N = mkN "多少" ; +important_A = mkA "重要" ; +--heavy_A = mkA "重" ; +industry_N = mkN "工业" []; +jump_V = mkV "跳" ; +know_V2 = mkV2 "知道" ; +know_VQ = mkV "知道" ; +lamp_N = mkN "灯" "盏"; +language_N = mkN "语言" "种"; +learn_V2 = mkV2 "学" ; +--leave_N = mkN "离开" ; +leave_V2 = mkV2 "离开" ; --[mark] "离" --> "离开", "离" itself is either a morpheme, or a marker indicating distance +--walk_N = mkN "走" ; +like_V2 = mkV2 "喜欢" ; +--be willing_N = mkN "愿意" ; +listen_V2 = mkV2 "听" ; +lose_V2 = mkV2 "丢" ; +--love_N = mkN "喜欢" ; +love_V2 = mkV2 "爱" ; +---mother_N2 = mkN2 "妈" ; +music_N = mkN "音乐" [] ; -- [mark] usually without classifier +newspaper_N = mkN "报纸" "张"; --[mark] "报" --> "报纸" +now_Adv = mkAdv "现在" ; +number_N = mkN "号码" ; -- [mark] "号" --> "号码" +open_V2 = mkV2 "开" ; +paint_V2A = mkV2A (mkV "画") ; +paper_N = mkN "纸" "张"; +--place_N = mkN "地方" ; +--part_N = mkN "部分" ; +pen_N = mkN "笔" "只"; +--pen_N = mkN "钢笔" "只"; +person_N = mkN "人" ; +--beat_N = mkN "打" ; +--pull_N = mkN "拉" ; +play_V2 = mkV2 "玩" ; --[mark] "玩儿" --> "玩" +--perform_N = mkN "表演" ; +--have_N = mkN "有" ; +put_V2 = mkV2 "摆" ; +--let go_N = mkN "放" ; +question_N = mkN "问题" ; + +--be enough_N = mkN "够" ; +--very_N = mkN "很" ; +rain_V0 = mkV "下雨"; +read_V2 = mkV2 "读" ; +reason_N = mkN "道理" ; +restaurant_N = mkN "饭店" "间"; +--appropriate_N = mkN "合适" ; +--correct_N = mkN "正确" ; +--run_N = mkN "跑步" ; +run_V = mkV "跑" ; +--road_N = mkN "道" ; +school_N = mkN "学校" "所"; +science_N = mkN "科学" []; -- [mark] usually without classifier +sell_V3 = mkV3 "卖" ; +--go_N = mkN "去" ; +send_V3 = mkV3 "寄" ; +--clap_N = mkN "拍" ; +sheep_N = mkN "羊" "只"; +ship_N = mkN "船" "艘"; +shoe_N = mkN "鞋" "只"; +shop_N = mkN "商店" "间"; +--of that kind_N = mkN "那样" ; +--what_N = mkN "什么" ; +song_N = mkN "歌" "首"; +--tell_N = mkN "讲" ; +speak_V2 = mkV2 "说" ; +--road_N = mkN "道" ; +--live_N = mkN "住" ; +--act as_N = mkN "当" ; +student_N = mkN "学生" "名" ; +table_N = mkN "桌子" "张"; +--word_N = mkN "话" ; +talk_V3 = mkV3 "说" ; +--talk_N = mkN "谈" ; +--education_N = mkN "教育" ; +teach_V2 = mkV2 "教" ; +--coach_N = mkN "辅导" ; +--teacher_N = mkN "先生" ; +--master worker_N = mkN "师傅" ; +teacher_N = mkN "老师" "名"; +television_N = mkN "电视" "台"; +--that_N = mkN "那个" ; +--that_N = mkN "那" ; +--from_N = mkN "从" ; +--by way of_N = mkN "通过" ; +--towards_N = mkN "向" ; +--and_N = mkN "和" ; +today_Adv = mkAdv "今天" ; +--now_N = mkN "现在" ; +--also_N = mkN "也" ; +train_N = mkN "火车" "辆"; +travel_V = mkV "旅行" ; +--below_N = mkN "下边" ; +--understand_N = mkN "了解" ; +understand_V2 = mkV2 "懂" ; +--recognize_N = mkN "认识" ; +--open up_N = mkN "通" ; +university_N = mkN "大学" "所"; +wait_V2 = mkV2 "等" ; +--need_N = mkN "需要" ; +watch_V2 = mkV2 "看" ; +--time_N = mkN "时候" ; +--wait_N = mkN "等" ; +--what_N = mkN "什么" ; +--how_N = mkN "怎么" ; +win_V2 = mkV2 "赢" ; +--window_N = mkN "窗户" ; +window_N = mkN "窗" "扇"; +wine_N = mkN "酒" "瓶"; +--leave_N = mkN "离" ; +--do_N = mkN "作" ; +write_V2 = mkV2 "写" ; +young_A = mkA "年轻" ; + +-- from Google + +apartment_N = mkN "公寓" "间" ; +baby_N = mkN "婴儿" ; +boot_N = mkN "靴子" "只" ; -- [mark] "机" --> "靴子" +boss_N = mkN "老板" ; +boy_N = mkN "男孩" ; +brother_N2 = mkN2 "弟弟" ; +butter_N = mkN "黄油" "盒"; +camera_N = mkN "摄像头" ; +carpet_N = mkN "地毯" "张"; +cat_N = mkN "猫" "只" ; +ceiling_N = mkN "天花板" ; --[mark] "天花板上" --> "天花板" +cheese_N = mkN "奶酪" "块"; +church_N = mkN "教堂" "所" ; +computer_N = mkN "计算机" "台" ; +cousin_N = mkN "表弟" ; +distance_N3 = mkN3 (mkN "的距离") emptyPrep emptyPrep ; ---- +enemy_N = mkN "敌人" ; +father_N2 = mkN2 "父亲" ; +floor_N = mkN "地板" ; -- [mark] floor "地板"(have you cleaned the floor) "楼层"(which floor do you live?) +fridge_N = mkN "冰箱" "台" ; +garden_N = mkN "花园" "座"; +glove_N = mkN "手套" "副"; +gold_N = mkN "金子" "块"; -- [mark] also without classifier +harbour_N = mkN "海港" ; +iron_N = mkN "铁" "块"; -- [mark] also without classifier +king_N = mkN "国王" ; +leather_N = mkN "皮革" "块"; +love_N = mkN "爱" [] ; -- [mark] "块" --> [], often without classifier +milk_N = mkN "牛奶" "杯" ; -- [mark] "奶" --> "牛奶", which literaly means cow milk +mother_N2 = mkN2 "母亲"; +oil_N = mkN "油" "桶" ; -- [mark] rewritten +peace_N = mkN "和平" []; -- [mark] often without classifier +planet_N = mkN "星球" ; +plastic_N = mkN "塑料" "块"; -- [mark] "塑料制成" --> "塑料" often without classifier +policeman_N = mkN "警察" "名" ; +priest_N = mkN "牧师" "位"; +queen_N = mkN "女王" ; +radio_N = mkN "收音机" "台" ; +religion_N = mkN "宗教" []; -- [mark] also without classifier +rock_N = mkN "石头" "块" ; +roof_N = mkN "屋顶" ; +rubber_N = mkN "橡胶" "块"; +rule_N = mkN "规则" "项"; -- [mark] "原则" --> "规则" +shirt_N = mkN "衬衫" "件" ; +silver_N = mkN "银子" "块"; -- [mark] "银" --> "银子" +sister_N = mkN "妹妹" ; +sock_N = mkN "袜子" "只"; +steel_N = mkN "钢" "块"; +stove_N = mkN "炉子" ; +village_N = mkN "村庄" "座"; +war_N = mkN "战争" "场" ; -- [mark] rewritten +wood_N = mkN "木头" "块" ; -- [mark] "木" --> "木头" + + +-- just missing + +lin + alas_Interj = ssword "唉" ; + beg_V2V = mkV2V (mkV "乞求") ; -- beg him to do something + break_V2 = mkV2 "打破" ; + broad_A = mkA "宽" ; + brown_A = mkA "棕" ; + clever_A = mkA "聪明" ; + close_V2 = mkV2 "关闭" ; + easy_A2V = mkA2 "简单" ; + empty_A = mkA "空" ; + fun_AV = mkA "有趣" ; + hate_V2 = mkV2 "讨厌" ; + married_A2 = mkA2 "结婚" ; + paris_PN = mkPN "巴黎" ; + probable_AS = mkA "可能" ; + ready_A = mkA "准备好" ; -- [mark] "准备好": 准备(v) + 好(adj,complement) + seek_V2 = mkV2 "寻求" ; + stop_V = mkV "停止" ; + stupid_A = mkA "笨" ; + switch8off_V2 = mkV2 "关" ; + switch8on_V2 = mkV2 "开" ; + ugly_A = mkA "丑" ; + uncertain_A = mkA "不确定" ; -- [mark] "不确定": 不("un-") + 确定("certain") + + + +} diff --git a/lib/src/chinese/NounChi.gf b/lib/src/chinese/NounChi.gf new file mode 100644 index 000000000..3fc0b4bdc --- /dev/null +++ b/lib/src/chinese/NounChi.gf @@ -0,0 +1,87 @@ +concrete NounChi of Noun = CatChi ** open ResChi, Prelude in { + + lin + DetCN det cn = case det.detType of { + DTFull Sg => {s = det.s ++ cn.c ++ cn.s} ; -- this house + DTFull Pl => {s = det.s ++ xie_s ++ cn.s} ; -- these houses + DTNum => {s = det.s ++ cn.c ++ cn.s} ; -- (these) five houses + DTPoss => {s = det.s ++ cn.s} -- our (five) houses + } ; + UsePN pn = pn ; + UsePron p = p ; + + DetNP det = det ; + + PredetNP pred np = mkNP (pred.s ++ possessive_s ++ np.s) ; + + PPartNP np v2 = mkNP ((predV v2).verb.s ++ possessive_s ++ np.s) ; ---- ?? + + AdvNP np adv = mkNP (adv.s ++ possessive_s ++ np.s) ; + + DetQuant quant num = { + s = quant.s ++ num.s ; + detType = case num.numType of { + NTFull => DTNum ; -- five + NTVoid n => case quant.detType of { + DTPoss => DTPoss ; -- our + _ => DTFull n -- these/this + } + } + } ; + + DetQuantOrd quant num ord = { + s = quant.s ++ num.s ++ ord.s ; + detType = case num.numType of { + NTFull => DTNum ; -- five + NTVoid n => case quant.detType of { + DTPoss => DTPoss ; -- our + _ => DTFull n -- these/this + } + } + } ; + + PossPron p = { + s = p.s ++ possessive_s ; + detType = DTPoss + } ; + + NumSg = {s = [] ; numType = NTVoid Sg} ; + NumPl = {s = [] ; numType = NTVoid Pl} ; + + NumCard n = n ** {numType = NTFull} ; + NumDigits d = d ** {numType = NTFull} ; + OrdDigits d = {s = ordinal_s ++ d.s} ; + + NumNumeral numeral = numeral ** {hasC = True} ; + OrdNumeral numeral = {s = ordinal_s ++ numeral.s} ; + + AdNum adn num = {s = adn.s ++ num.s ; hasC = True} ; + + OrdSuperl a = {s = superlative_s ++ a.s} ; + + DefArt = mkDet the_s ; + IndefArt = mkDet yi_s ; ---- in the plural ? + + MassNP cn = cn ; + + UseN n = n ; + UseN2 n = n ; + Use2N3 f = {s = f.s ; c = f.c ; c2 = f.c2} ; + Use3N3 f = {s = f.s ; c = f.c ; c2 = f.c3} ; + + ComplN2 f x = {s = appPrep f.c2 x.s ++ f.s ; c = f.c} ; + ComplN3 f x = {s = appPrep f.c2 x.s ++ f.s ; c = f.c ; c2 = f.c3} ; + + AdjCN ap cn = case ap.monoSyl of { + True => {s = ap.s ++ cn.s ; c = cn.c} ; + False => {s = ap.s ++ possessive_s ++ cn.s ; c = cn.c} + } ; + + RelCN cn rs = {s = rs.s ++ cn.s ; c = cn.c} ; + AdvCN cn ad = {s = ad.s ++ possessive_s ++ cn.s ; c = cn.c} ; + SentCN cn cs = {s = cs.s ++ cn.s ; c = cn.c} ; + ApposCN cn np = {s = np.s ++ cn.s ; c = cn.c} ; + + RelNP np rs = mkNP (rs.s ++ np.s) ; + +} diff --git a/lib/src/chinese/NumeralChi.gf b/lib/src/chinese/NumeralChi.gf new file mode 100644 index 000000000..c509ea2cc --- /dev/null +++ b/lib/src/chinese/NumeralChi.gf @@ -0,0 +1,143 @@ +concrete NumeralChi of Numeral = CatChi ** open ResChi, Prelude in { + +flags coding = utf8 ; + + +param Qform = bai | bai0 | shiwan | shiwan0 ; +param Bform = shi | shi0 | wan | wan0 ; +param Zero = zero | nozero ; +oper ling : Zero * Zero => Str = + table { => "零" ; + => "零" ; + => []} ; +oper Wan : Zero => Str = + table {zero => "万" ; + nozero => []} ; + +oper mkD : Str -> Str -> Str = \x,_ -> word x ; -- hiding the "formal" version + +--lincat Numeral = {s : Str} ; +lincat Digit = {s : Str} ; +lincat Sub10 = {s : Str} ; +lincat Sub100 = {inh : Zero ; s : Bform => Str} ; +lincat Sub1000 = {inh : Zero ; s : Qform => Str} ; +lincat Sub1000000 = {s : Str} ; +lin num x0 = + {s = x0.s} ; + +-- 一二三四五六七八九十一百千 +-- + +lin n2 = + {s = mkD "二" "贰"} ; +lin n3 = + {s = mkD "三" "叁"} ; +lin n4 = + {s = mkD "四" "肆"} ; +lin n5 = + {s = mkD "五" "伍"} ; +lin n6 = + {s = mkD "六" "陆"} ; +lin n7 = + {s = mkD "七" "柒"} ; +lin n8 = + {s = mkD "八" "捌"} ; +lin n9 = + {s = mkD "九" "玖"} ; +lin pot01 = + {s = mkD "一" "壹"} ; +lin pot0 d = + {s = d.s} ; +lin pot110 = + {inh = nozero ; + s = table { + shi => mkD "一十" "壹拾" ; + shi0 => mkD "一十" "壹拾" ; + wan => mkD "一万" "壹万" ; + wan0 => mkD "一万" "壹万"}} ; +lin pot111 = + {inh = nozero ; + s = table { + shi => mkD "十一" "拾壹" ; + shi0 => mkD "一十一" "壹拾壹" ; + wan => mkD "十一万" "拾壹万" ; + wan0 => mkD "十一万" "拾壹万"}} ; +lin pot1to19 d = + {inh = nozero ; + s = table { + shi => mkD "一十" "壹拾" ++ d.s ; + shi0 => mkD "一十" "壹拾" ; + wan => mkD "一万" "壹万" ++ d.s ++ mkD "千" "仟" ; + wan0 => mkD "一万" "壹万" ++ d.s ++ mkD "千" "仟"}} ; +lin pot0as1 n = + {inh = zero ; + s = table { + shi => n.s ; + shi0 => n.s ; + wan => n.s ++ mkD "千" "仟" ; + wan0 => n.s ++ mkD "千" "仟"}} ; +lin pot1 d = + {inh = zero ; + s = table { + shi => d.s ++ mkD "十" "拾" ; + shi0 => d.s ++ mkD "十" "拾" ; + wan0 => d.s ++ "万" ; + wan => d.s ++ "万"}} ; +lin pot1plus d e = + {inh = nozero ; + s = table { + shi => d.s ++ mkD "十" "拾" ++ e.s ; + shi0 => d.s ++ mkD "十" "拾" ++ e.s ; + wan => d.s ++ "万" ++ e.s ++ mkD "千" "仟" ; + wan0 => d.s ++ "万" ++ e.s ++ mkD "千" "仟"}} ; +lin pot1as2 n = + {inh = zero ; + s = table { + bai => n.s ! shi ; + bai0 => n.s ! shi ; + shiwan => n.s ! wan ; + shiwan0 => n.s ! wan0}} ; +lin pot2 d = + {inh = zero ; + s = table { + bai => d.s ++ mkD "百" "佰" ; + bai0 => d.s ++ mkD "百" "佰" ; + shiwan0 => d.s ++ mkD "十万" "拾万" ; + shiwan => d.s ++ mkD "十万" "拾万"}} ; +lin pot2plus d e = + {inh = nozero ; + s = table { + bai => d.s ++ mkD "" "佰" ++ (ling ! ) ++ e.s ! shi0 ; + bai0 => d.s ++ mkD "" "佰" ++ (ling ! ) ++ e.s ! shi0 ; + shiwan => d.s ++ mkD "" "拾" ++ (Wan ! (e.inh)) ++ e.s ! wan ; + shiwan0 => d.s ++ mkD "" "拾" ++ (Wan ! (e.inh)) ++ e.s ! wan0}} ; +lin pot2as3 n = + {s = n.s ! bai} ; +lin pot3 n = + {s = n.s ! shiwan} ; +lin pot3plus n m = + {s = (n.s ! shiwan0) ++ (ling ! ) ++ m.s ! bai0} ; + + +-- numerals as sequences of digits + + lincat + Dig = SS ; + + lin + IDig d = d ; + + IIDig d i = ss (d.s ++ i.s) ; + + D_0 = ss "0" ; + D_1 = ss "1" ; + D_2 = ss "2" ; + D_3 = ss "3" ; + D_4 = ss "4" ; + D_5 = ss "5" ; + D_6 = ss "6" ; + D_7 = ss "7" ; + D_8 = ss "8" ; + D_9 = ss "9" ; + +} diff --git a/lib/src/chinese/ParadigmsChi.gf b/lib/src/chinese/ParadigmsChi.gf new file mode 100644 index 000000000..68a988f4d --- /dev/null +++ b/lib/src/chinese/ParadigmsChi.gf @@ -0,0 +1,118 @@ +resource ParadigmsChi = open CatChi, ResChi, Prelude in { + +flags coding = utf8 ; + +flags coding=utf8; +oper + mkN = overload { + mkN : (man : Str) -> N + = \n -> lin N (regNoun n ge_s) ; + mkN : (man : Str) -> Str -> N + = \n,c -> lin N (regNoun n c) + } ; + + mkN2 = overload { + mkN2 : Str -> N2 + = \n -> lin N2 (regNoun n ge_s ** {c2 = emptyPrep}) ; ---- possessive ? +---- mkN2 : N -> Str -> N2 +---- = \n,p -> lin N2 (n ** {c2 = mkPrep p}) ; + } ; + + mkN3 : N -> Preposition -> Preposition -> N3 + = \n,p,q -> lin N3 (n ** {c2 = p ; c3 = q}) ; + + + mkPN : (john : Str) -> PN + = \s -> lin PN {s = word s} ; + + mkA = overload { + mkA : (small : Str) -> A + = \a -> lin A (simpleAdj a) ; + mkA : (small : Str) -> Bool -> A + = \a,b -> lin A (mkAdj a b) ; + } ; + + mkA2 : Str -> A2 = \a -> lin A2 (simpleAdj a ** {c2 = emptyPrep}) ; + + mkV = overload { + mkV : (walk : Str) -> V + = \walk -> lin V (regVerb walk) ; + mkV : (arrive : Str) -> Str -> Str -> Str -> Str -> V + = \arrive,pp,ds,dp,ep -> lin V (mkVerb arrive pp ds dp ep neg_s) ; + mkV : (arrive : Str) -> Str -> Str -> Str -> Str -> Str -> V + = \arrive,pp,ds,dp,ep,neg -> lin V (mkVerb arrive pp ds dp ep neg) ; + } ; + + + mkV2 : Str -> V2 + = \s -> lin V2 (regVerb s ** {c2 = emptyPrep}) ; + + mkV3 = overload { + mkV3 : Str -> V3 + = \s -> lin V3 (regVerb s ** {c2,c3 = emptyPrep}) ; + mkV3 : V -> V3 + = \s -> lin V3 (s ** {c2,c3 = emptyPrep}) ; +---- mkV3 : V -> Str -> Str -> V3 +---- = \v,p,q -> lin V3 (v ** {c2 = mkPrep p ; c3 = mkPrep q}) ; + } ; + + mkVV : Str -> VV = ---- + \v -> lin VV (regVerb v) ; + + mkVQ : V -> VQ = + \v -> lin VQ v ; + + mkVS : V -> VS = + \v -> lin VS v ; + + mkVA : V -> VA = + \v -> lin VA v ; + + mkV2Q : V -> V2Q = + \v -> lin V2Q (v ** {c2 = emptyPrep}) ; +---- mkV2Q : V -> Str -> V2Q = +---- \v,p -> lin V2Q (v ** {c2 = mkPrep p}) ; + + mkV2V : V -> V2V = + \v -> lin V2V (v ** {c2 = emptyPrep ; c3 = emptyPrep}) ; +---- mkV2V : V -> Str -> Str -> V2V = +---- \v,p,q -> lin V2V (v ** {c2 = mkPrep p ; c3 = mkPrep q}) ; + + mkV2S : V -> V2S = + \v -> lin V2S (v ** {c2 = emptyPrep}) ; +---- mkV2S : V -> Str -> V2S = +---- \v,p -> lin V2S (v ** {c2 = mkPrep p}) ; + + mkV2A : V -> V2A + = \v -> lin V2A (v ** {c2 = emptyPrep ; c3 = emptyPrep}) ; +---- mkV2A : V -> Str -> Str -> V2A +---- = \v,p,q -> lin V2A (v ** {c2 = mkPrep p ; c3 = mkPrep q}) ; + + mkAdv = overload { + mkAdv : Str -> Adv + = \s -> lin Adv {s = word s ; advType = ATPlace} ; + mkAdv : Str -> AdvType -> Adv + = \s,at -> lin Adv {s = word s ; advType = at} ; + } ; + + AdvType : Type + = ResChi.AdvType ; + placeAdvType : AdvType + = ATPlace ; + timeAdvType : AdvType + = ATTime ; + mannerAdvType : AdvType + = ATManner ; + + mkPrep = overload { ---- is this the right order of the fields? + mkPrep : Str -> Preposition + = \s -> ResChi.mkPreposition s [] ; + mkPrep : Str -> Str -> Preposition + = \s,t -> ResChi.mkPreposition s t ; + } ; + + emptyPrep : Preposition = mkPrep [] ; + + +} + diff --git a/lib/src/chinese/PhraseChi.gf b/lib/src/chinese/PhraseChi.gf new file mode 100644 index 000000000..06b4b3ee8 --- /dev/null +++ b/lib/src/chinese/PhraseChi.gf @@ -0,0 +1,27 @@ +concrete PhraseChi of Phrase = CatChi ** open Prelude, ResChi in { + + lin + PhrUtt pconj utt voc = {s = pconj.s ++ utt.s ++ voc.s} ; + + UttS s = s ; + UttQS qs = qs ; + UttImpSg pol imp = {s = pol.s ++ imp.s ! pol.p} ; + UttImpPl pol imp = {s = pol.s ++ imp.s ! pol.p} ; + UttImpPol pol imp = {s = pol.s ++ imp.s ! pol.p} ; --- add politeness here? + + UttIP ip = ip ; + UttIAdv iadv = iadv ; + UttNP np = np ; + UttCN cn = cn ; + UttAP ap = ap ; + UttCard x = x ; + UttVP vp = ss (infVP vp) ; + UttAdv adv = adv ; + + NoPConj = {s = []} ; + PConjConj conj = ss (conj.s ! CSent).s2 ; + + NoVoc = {s = []} ; + VocNP np = {s = np.s} ; ---- ?? + +} diff --git a/lib/src/chinese/QuestionChi.gf b/lib/src/chinese/QuestionChi.gf new file mode 100644 index 000000000..7ff9b3af2 --- /dev/null +++ b/lib/src/chinese/QuestionChi.gf @@ -0,0 +1,40 @@ +concrete QuestionChi of Question = CatChi ** + open ResChi, Prelude in { + + flags optimize=all_subs ; + + lin + + QuestCl cl = {s = \\p,a => cl.s ! p ! a ++ question_s} ; --- plus redup questions + + QuestVP ip vp = { + s = \\p,a => ip.s ++ vp.prePart ++ useVerb vp.verb ! p ! a ++ vp.compl + } ; + + QuestSlash ip cls = { + s =\\p,a => cls.c2.prepPre ++ cls.np ++ cls.c2.prepMain ++ cls.vp ! p ! a ++ + possessive_s ++ di_s ++ ip.s + } ; + + QuestIAdv iadv cl = {s = \\p,a => cl.np ++ iadv.s ++ cl.vp ! p ! a} ; + + QuestIComp icomp np = {s = \\p,a => np.s ++ icomp.s} ; ---- order + + PrepIP p ip = ss (appPrep p ip.s) ; + + AdvIP ip adv = ss (adv.s ++ possessive_s ++ ip.s) ; ---- adding de + + IdetCN det cn = {s = det.s ++ cn.c ++ cn.s} ; ---- number? + + IdetIP idet = idet ; + + IdetQuant iquant num = ss (iquant.s ++ num.s) ; ---- + + AdvIAdv i a = ss (a.s ++ i.s) ; + + CompIAdv a = ss (zai_s ++ a.s) ; + + CompIP ip = ss (copula_s ++ ip.s) ; + +} + diff --git a/lib/src/chinese/README b/lib/src/chinese/README new file mode 100644 index 000000000..ead96121b --- /dev/null +++ b/lib/src/chinese/README @@ -0,0 +1,209 @@ +Chinese resource grammar experiment + +(c) Aarne Ranta 2012 + +Idea: bootstrap a complete resource grammar by +- cloning files from another language (Thai) +- extracting lexicon from available sources (Swadesh, HSK list, Google translate) +- fixing the errors in syntax and lexicon +- testing the grammar in applications + +With next to now knowledge of Chinese, but access to web sources, a grammar book + + Claudia Ross and Jing-heng Sheng Ma, + Modern Mandarin Chinese Grammar. A Practical Guide, + Routledge, + London and New York, + 2006. + +and an extended mini resource by Jolene Zhuo Lin qiqige +and comments on that from Inari Listenmaa. + +The question is how good it will be before the visit to Shanghai in two weeks, +and how much work will be needed to fix everything. But so far I feel like the +guy in Searle's "Chinese Room", http://en.wikipedia.org/wiki/Chinese_room + + +5/10/2012 + +Clone chinese/*Chi.gf from thai/*Tha.gf by + + runghc lib/src/Clone.hs Tha Chi + +This compiles directly, omitting Lexicon and Structural but producing some Thai words, visible with + + pg -words + +Then port parts of examples/extmini/*Cmn.gf appending them to Lexicon, Structural, Paradigms, Res. +Tweak until everything compiles. For simplicity, retain lincat's of Thai. + +Then identify Thai words with 'pg -words', and locate them with 'ma'. Replace them with constants +in ResChi, initialized with defaultStr = "". 17 such constants are needed. StringsChi can be eliminated. +It was a transient part of Tha anyway. + +Copy examples/numerals/chinese.gf to NumeralChi.gf. + +We now have 47 words (Chinese characters), + + Lang> pg -words + 0 1 2 3 4 5 6 7 8 9 万 个 他 他们 仟 伍 你 你们 佰 叁 壹 壹万 壹拾 + 壹拾壹 大 女人 她 好奇 小 我 我们 房子 拾 拾万 拾壹 拾壹万 捌 柒 树 棵 + 每 玖 男人 睡 知道 约翰 绿 肆 贰 走 这 那 间 陆 零 非常 + +and thousands of linearizable Cl. With Thai word order so far. And all these blank (defaultStr) +function words. + + 男人 绿 大 非常 每 男人 绿 走 大 + every green man very bigly is being walking bigly + + 约翰 睡 + John sleeps + + 约翰 睡 + John is sleeping + + 他 知道 约翰 走 + he knows that John would have walked + + 每 走 + everyone walks + + 你们 知道 走 + you know that one will walk + + 房子 我们 绿 男人 + greenest house we is being a man + + 这 走 小 + this is being walking smally + +Ca. 2 hours of work has gone to all this. + +Next step: Chinese Swadesh list, http://en.wiktionary.org/wiki/Appendix:Mandarin_Swadesh_list + + -- № English word POS Pinyin IPA notes Traditional Chinese Simplified Chinese + + > let analyse line = case words line of n:eng:ws | all Data.Char.isDigit n -> eng ++ " " ++ last ws ; _ -> "" + > readFile "swadesh.txt" >>= mapM_ (putStrLn . analyse) . lines + +After most of this is done, we have 218 words: + + 一些 万 丈夫 个 云 什么 什么时候 他 他们 仟 伍 你 你们 佰 光滑 冰 冷 刺 割 劈开 + 动物 厚 叁 叶 吃 名 吐 听 吵架 吹 呕 呼吸 和 咬 哪里 唱 喝 嗅 嘴 因为 圆 在...里 + 地球 坏 坐 壹 壹万 壹拾 壹拾壹 多 夜晚 大 天 太阳 头 头发 女人 她 好 好奇 如何 如果 + 妻子 孩子 宽 对 小 少 尖 尘土 尾 山 干 年 心脏 怕 想 我 我们 房子 手 打 打猎 扔 拉 + 拾 拾万 拾壹 拾壹万 指甲 挖 挤 捌 推 揉 握 搔 擦 数 新 星 月亮 杀 来 柒 树 树枝 树皮 + 根 森林 棵 死 每 水 水果 沙 河 洗 活 流 浮 海 温暖 游泳 湖 湿 满 火 灰 烂 烟 爱 牙齿 + 狗 玖 玩 男人 白 白天 皮 盐 直 看 眼睛 睡 知道 短 石 种子 窄 站 笑 红 约翰 绑 结冰 绳 + 绿 缝 羽毛 翼 老 耳朵 肆 肉 肚子 肝 肠子 背 胸 脂肪 脏 脖子 脚 腿 膝 膨胀 舌 花 草 落下 + 薄 虫 虱 蛇 蛋 血 角 谁 贰 走 路 躺 转 近 这 这里 那 那里 重 钝 长 间 陆 雨 雪 零 雾 + 非常 风 飞 骨 鱼 鸟 黄 黑 鼻 + +We have more than a half left: + + Lang> pg -missing -lang=Chi | ? wc -w + 329 _tmpi + +But we get more interesting sentences: + + Lang> gr -number=8 PredVP ? ? | l + 白天 满 冷 多 个 好奇 你们 谁 + many cold fuller days wonder who you weren't + + 胸 黄 红 一些 胸 数 + some red breast yellowly is counted + + 头 握 多 头 看 + many heads to be held see themselves + + 丈夫 那里 对 他 擦 + his husbands there correctly are wiping them + + 约翰 躺 + John lies + + 地球 这里 非常 刺 知道 我们 站 + earth very here stabbed knows that we wouldn't have stood + + 多 那里 那里 想 在...里 她 + many there there are being thinking in her + + 尘土 每 个 那里 握 + every dust there holds itself + +Ca. 30 minutes for this phase, 2h30 total. + +Now let's take the missing words and try to look them up the in HSK1 word list. +Now we have 218 left, less than 200 really missing. But potentially some junk. + +Some more words found in Google translate. Guessing mkVA and other complex subcats. +Now we have 126 left, less than 100 really missing. pg -words shows 404 Chinese words: + +…之间 一 一些 万 丈夫 上边 下 不 丢 个 中 为了 为什么 书 买 云 人 什么 什么时候 +今天 从 从前 他 他们 仟 以后 件 伍 会 但是 你 你们 佰 做 光滑 公寓 写 冰 冰箱 冷 +刺 前边 割 劈开 动物 医生 卖 厚 原则上 去 叁 发现 变 只 台 右 叶 号 吃 名 吐 吗 +听 吵架 吸 吹 呕 呼吸 和 和平 咬 哪里 唱 商店 啤酒 喜欢 喝 嗅 嘴 回答 因为 国 国王 +圆 在 在...里 地板 地毯 地球 坏 坐 块 塑料制成 壹 壹万 壹拾 壹拾壹 多 夜晚 大 大学 +天 天花板上 太 太阳 头 头发 女人 女王 奶 奶酪 她 好 好奇 如何 如果 妹妹 妻子 姑娘 +婴儿 存在 学 学校 学生 孩子 它 宗教 宽 寄 对 小 少 尖 尘土 尾 屋顶 山 工业 工厂 +左 已经 市 希望 帽子 干 干净 年 年轻 开 弟弟 往 心脏 必须 忘 怕 想 愿意 懂 我 +我们 或者 战 房子 所 手 手套 打 打猎 扔 报 拉 拾 拾万 拾壹 拾壹万 指甲 挖 挤 +捌 推 揉 握 搔 摄像头 摆 擦 收音机 敌人 教 教堂 数 新 旅行 时间 星 星球 是 是否 +最 月亮 有 朋友 木 机 杀 村庄 来 柒 树 树枝 树皮 根 桌子 森林 棵 椅子 次 歌 死 +母亲 每 比 水 水果 汽车 沙 河 油 洗 活 流 浮 海 海港 温暖 游泳 湖 湿 满 漂亮 火 +火车 灯 灰 炉子 烂 烟 烧 热 爱 父亲 牙齿 牛 牧师 状物 狗 猫 玖 玩 玩儿 现在 +电视 男人 男孩 画 白 白天 的 的橡胶 的距离 皮 皮革 盐 直 看 眼睛 睡 知道 短 +石 石头 离 种 种子 科学 窄 窗 站 笑 笔 等 红 约翰 纸 绑 结冰 给 绳 绿 缝 羊 +羽毛 翼 老 老师 老板 耳朵 肆 肉 肚子 肝 肠子 背 胸 脂肪 脏 脖子 脚 腿 膝 膨胀 +自己 自行车 舌 船 艺术 花 花园 苹果 草 落下 蓝 薄 虫 虱 虽然 蛇 蛋 血 衣服 表弟 + 衬衫 袜子 被 角 警察 计算机 语法 语言 说 读 谁 贰 赢 走 跑 路 跳 躺 转 近 还是 +这 这里 远 道理 那 那里 酒 重 重要 金 钝 钢 铁 银 银行 长 门 问 问题 间 陆 除了… +以外 雨 雪 零 雾 非常 面包 鞋 音乐 风 飞 飞机 饭店 马 骨 鱼 鸟 黄 黄油 黑 鼻 + + +4h work for a compiling grammar with a decently sized lexicon. The next thing +is to fix the worst bugs, in particular in word order. + + +6/10 + +Started a test set for syntax, with 54 sentences testing predication and noun +phrase formation. The first run, baseline.txt, uses the Thai-based syntax. + +As the first thing, revised the order of determiners in an NP, and the place of +adverbs in VP. Det lincats seem to be too rich, whereas Cl may need to be made +discontinuous, to enable the proper place of IAdv. This is correct in Jolene's +code. + +3h of work today, total 7h. + + +7/10 + +Ported Jonele's lincat's everywhere, eliminated Thai-style identifiers. Difficult +to choose default tenses. But the code is nice to work with. + +4h today, total 11h. + + +8/10 + +Refactored clause building with an overloaded ResChi.mkClause. Added existentials with the verb +you_s. Very uncertain on many things, time to call an expert. + +1h today, total 12h. + +Following a suggestion by Thomas Hallgren, divided multicharacter words to as many tokens as +there are characters. This means all division into tokens is performed by the parser, which in a +sense is optimal. To "remove" spaces in linearization, simply use l -unchars; to "insert" spaces +in parsing, use pt -chars | p. All this is done by the oper ResChi.word, which can be changed +to change this behaviour. + + +12/10 + +Went through some open questions with Jolene. Then fixed some parameters for Det and Adv and prepared +Lexicon and Structural for her inspection and completions. + +5h my time today, 3h Jolene's, total 20h. + diff --git a/lib/src/chinese/RelativeChi.gf b/lib/src/chinese/RelativeChi.gf new file mode 100644 index 000000000..f1803957c --- /dev/null +++ b/lib/src/chinese/RelativeChi.gf @@ -0,0 +1,12 @@ +concrete RelativeChi of Relative = CatChi ** open ResChi, Prelude in { + + lin + RelCl cl = {s = \\p,a => cl.s ! p ! a ++ relative_s} ; ---- ?? + RelVP rp vp = { + s = \\p,a => vp.prePart ++ useVerb vp.verb ! p ! a ++ vp.compl ++ rp.s + } ; ---- ?? + RelSlash rp slash = {s = \\p,a => slash.s ! p ! a ++ appPrep slash.c2 rp.s} ; + FunRP p np rp = ss (appPrep p np.s ++ rp.s) ; ---- ?? + IdRP = ss relative_s ; + +} diff --git a/lib/src/chinese/ResChi.gf b/lib/src/chinese/ResChi.gf new file mode 100644 index 000000000..d24de980e --- /dev/null +++ b/lib/src/chinese/ResChi.gf @@ -0,0 +1,221 @@ +--# -path=.:../abstract:../common:../../prelude + +--1 Thai auxiliary operations. +-- +---- This module contains operations that are needed to make the +---- resource syntax work. To define everything that is needed to +---- implement $Test$, it moreover contains regular lexical +---- patterns needed for $Lex$. +-- +resource ResChi = ParamX ** open Prelude in { + + flags coding = utf8 ; + + oper + +-- strings ---- + + defaultStr = "" ; + + than_s = "比" ; + progressive_s = defaultStr ; + possessive_s = "的" ; + imperneg_s = neg_s ; + conjThat = emptyStr ; ---- + reflPron = word "自己" ; -- pron + refl + passive_s = defaultStr ; + relative_s = possessive_s ; -- relative + superlative_s = "最" ; -- superlative, sup + adj + de + zai_s = "在" ; -- copula for place + you_s = "有" ; -- to have + + copula_s = "是" ; + exist_s = word "存在" ; + neg_s = "不" ; + question_s = "吗" ; + yi_s = "一" ; + ordinal_s = "第" ; + xie_s = "些" ; + the_s = "那" ; + geng_s = "更" ; -- more, in comparison + + zai_V = mkVerb "在" [] [] [] [] "不" ; + fullstop_s = "。" ; + questmark_s = "?" ; + exclmark_s = "!" ; + ge_s = "个" ; + di_s = "是" ; -- used in QuestSlash + + emptyStr = [] ; + + +-- Write the characters that constitute a word separately. This enables straightforward tokenization. + + bword : Str -> Str -> Str = \x,y -> x ++ y ; -- change to x + y to treat words as single tokens + + word : Str -> Str = \s -> case s of { + x@? + y@? + z@? + u@? => bword x (bword y (bword z u)) ; + x@? + y@? + z@? => bword x (bword y z) ; + x@? + y@? => bword x y ; + _ => s + } ; + + ssword : Str -> SS = \s -> ss (word s) ; + +------------------------------------------------ from Jolene + +-- parameters + +param + Aspect = APlain | APerf | ADurStat | ADurProg | AExper ; ---- APlain added by AR + ConjForm = CPhr CPosType | CSent; + CPosType = CAPhrase | CNPhrase | CVPhrase ; + DeForm = DeNoun | NdNoun ; -- parameter created for noun with/out partical "de" + + AdvType = ATPlace | ATTime | ATManner ; + +-- parts of speech + +oper + + VP = {verb : Verb ; compl : Str ; prePart : Str} ; + NP = {s : Str} ; + +-- for morphology + + Noun : Type = {s : Str; c : Str} ; + Adj : Type = {s : Str; monoSyl: Bool} ; + Verb : Type = {s : Str ; pp,ds,dp,ep : Str ; neg : Str} ; + + regNoun : Str -> Str -> Noun = \s,c -> {s = word s ; c = word c}; + + mkAdj : Str -> Bool -> Adj = \s,b -> {s = word s ; monoSyl = b}; + + complexAP : Str -> Adj = \s -> {s = s ; monoSyl = False} ; + + simpleAdj : Str -> Adj = \s -> case s of { + ? => mkAdj s True ; -- monosyllabic + _ => mkAdj s False + } ; + + copula : Verb = mkVerb "是" [] [] [] [] "不" ; + + regVerb : (walk : Str) -> Verb = \v -> + mkVerb v "了" "着" "在" "过" "没" ; + + mkVerb : (v : Str) -> (pp,ds,dp,ep,neg : Str) -> Verb = \v,pp,ds,dp,ep,neg -> + {s = word v ; pp = pp ; ds = ds ; dp = dp ; ep = ep ; neg = neg} ; + + useVerb : Verb -> Polarity => Aspect => Str = \v -> + table { + Pos => table { + APlain => v.s ; + APerf => v.s ++ v.pp ; + ADurStat => v.s ++ v.ds ; + ADurProg => v.dp ++ v.s ; + AExper => v.s ++ v.ep + } ; + Neg => table { + APlain => v.neg ++ v.s ; --- neg? + APerf => "不" ++ v.s ++ v.pp ; + ADurStat => "不" ++ v.s ; + ADurProg => v.neg ++ v.dp ++ v.s ; -- mei or bu + AExper => v.neg ++ v.s ++ v.ep + } + } ; + + infVP : VP -> Str = \vp -> vp.prePart ++ vp.verb.s ++ vp.compl ; + + predV : Verb -> VP = \v -> { + verb = v ; + compl = [] ; + prePart = [] ; + } ; + + insertObj : NP -> VP -> VP = \np,vp -> { + verb = vp.verb ; + compl = np.s ++ vp.compl ; + prePart = vp.prePart + } ; + + insertObjPost : NP -> VP -> VP = \np,vp -> { + verb = vp.verb ; + compl = vp.compl ++ np.s ; + prePart = vp.prePart + } ; + + insertAdv : SS -> VP -> VP = \adv,vp -> { + verb = vp.verb ; + compl = vp.compl ; + prePart = adv.s + } ; + + insertExtra : SS -> VP -> VP = \ext,vp -> + insertObjPost ext vp ; + +-- clauses: keep np and vp separate to enable insertion of IAdv + + Clause : Type = { + s : Polarity => Aspect => Str ; + np : Str; + vp : Polarity => Aspect => Str + } ; + + + mkClause = overload { + mkClause : Str -> Verb -> Clause = \np,v -> mkClauseCompl np (useVerb v) [] ; + mkClause : Str -> (Polarity => Aspect => Str) -> Str -> Clause = mkClauseCompl ; + mkClause : Str -> Verb -> Str -> Clause = \subj,verb,obj -> + mkClauseCompl subj (useVerb verb) obj ; + mkClause : Str -> VP -> Clause = \np,vp -> + mkClauseCompl np (\\p,a => vp.prePart ++ useVerb vp.verb ! p ! a) vp.compl ; + } ; + + mkClauseCompl : Str -> (Polarity => Aspect => Str) -> Str -> Clause = \np,vp,compl -> { + s = \\p,a => np ++ vp ! p ! a ++ compl ; + np = np ; + vp = \\p,a => vp ! p ! a ++ compl + } ; + + +-- for structural words + +param + DetType = DTFull Number | DTNum | DTPoss ; -- this, these, five, our + NumType = NTFull | NTVoid Number ; -- five, sg, pl + +oper + Determiner = {s : Str ; detType : DetType} ; + + mkDet = overload { + mkDet : Str -> Determiner = \s -> {s = s ; detType = DTFull Sg} ; + mkDet : Str -> Number -> Determiner = \s,n -> {s = s ; detType = DTFull n} ; + mkDet : Str -> DetType -> Determiner = \s,d -> {s = s ; detType = d} ; + } ; + + mkQuant : Str -> {s : Str} = ss ; + + pronNP : (s : Str) -> NP = \s -> { + s = word s + } ; + + mkPreposition : Str -> Str -> Preposition = \s,b -> { + prepMain = word s ; + prepPre = word b + } ; + + mkSubj : Str -> Str -> {prePart : Str ; sufPart : Str} = \p,s -> { + prePart = word p ; + sufPart = word s + } ; + + Preposition = {prepMain : Str ; prepPre : Str} ; + +-- added by AR + + mkNP : Str -> NP = ss ; + + appPrep : Preposition -> Str -> Str = \prep,s -> + prep.prepPre ++ s ++ prep.prepMain ; + +} diff --git a/lib/src/chinese/SentenceChi.gf b/lib/src/chinese/SentenceChi.gf new file mode 100644 index 000000000..c443d4690 --- /dev/null +++ b/lib/src/chinese/SentenceChi.gf @@ -0,0 +1,45 @@ +concrete SentenceChi of Sentence = CatChi ** + open Prelude, ResChi in { + + flags optimize=all_subs ; + + lin + + PredVP np vp = mkClause np.s vp ; + + PredSCVP sc vp = mkClause sc.s vp ; + + ImpVP vp = { + s = table { + Pos => infVP vp ; + Neg => neg_s ++ infVP vp + } + } ; + + SlashVP np vp = + mkClauseCompl np.s (\\p,a => vp.prePart ++ useVerb vp.verb ! p ! a) vp.compl + ** {c2 = vp.c2} ; + + SlashVS np vs sslash = ** {c2 = sslash.c2} ; + + + -- yet another reason for discontinuity of clauses + AdvSlash slash adv = + mkClause slash.np (<\\p,a => adv.s ++ slash.vp ! p ! a : Polarity => Aspect => Str>) [] + ** {c2 = slash.c2} ; + + SlashPrep cl prep = cl ** {c2 = prep} ; + + EmbedS s = ss (conjThat ++ s.s) ; + EmbedQS qs = qs ; + EmbedVP vp = ss (infVP vp) ; + + UseCl t p cl = {s = cl.s ! p.p ! t.t} ; + UseQCl t p cl = {s = cl.s ! p.p ! t.t} ; + UseRCl t p cl = {s = cl.s ! p.p ! t.t} ; + UseSlash t p cl = {s = cl.s ! p.p ! t.t ; c2 = cl.c2} ; + + AdvS a s = ss (a.s ++ s.s) ; + + RelS s r = ss (s.s ++ r.s) ; +} diff --git a/lib/src/chinese/StructuralChi.gf b/lib/src/chinese/StructuralChi.gf new file mode 100644 index 000000000..c346ef02f --- /dev/null +++ b/lib/src/chinese/StructuralChi.gf @@ -0,0 +1,164 @@ +concrete StructuralChi of Structural = CatChi ** + open ParadigmsChi, ResChi, Prelude in { + + flags coding = utf8 ; + +lin + every_Det = mkDet "每" Sg ; + + this_Quant = mkDet "这" ; + that_Quant = mkDet "那" ; + + i_Pron = pronNP "我" ; + youSg_Pron = pronNP "你" ; + he_Pron = pronNP "他" ; + she_Pron = pronNP "她" ; + we_Pron = pronNP "我们" ; + youPl_Pron = pronNP "你们" ; + they_Pron = pronNP "他们" ; + + very_AdA = ssword "非常" ; + + by8means_Prep = mkPrep "旁边" [] ; + in_Prep = mkPrep "里" []; + possess_Prep = mkPrep "的" []; + with_Prep = mkPrep "一起" "和"; + +and_Conj = {s = table { + CPhr CNPhrase => mkConjForm "和" ; + CPhr CAPhrase => mkConjForm "而" ; + CPhr CVPhrase => mkConjForm "又" ; + CSent => mkConjForm [] + } + } ; + or_Conj = {s = table { + CPhr _ => mkConjForm "或" ; + CSent => mkConjForm "还是" + } + } ; + + although_Subj = mkSubj "虽然" "但"; + because_Subj = mkSubj "因为" "所以" ; + when_Subj = mkSubj [] "的时候" ; + +here_Adv = mkAdv "这里" ; +there_Adv = mkAdv "那里" ; +whoSg_IP, whoPl_IP = mkIPL "谁" ; +whatSg_IP, whatPl_IP = mkIPL " 什么" ; +where_IAdv = mkIAdvL "哪里" ; +when_IAdv = mkIAdvL "什么时候" ; +how_IAdv = mkIAdvL "如何" ; +all_Predet = ss "所有" ; +many_Det = mkDet "多" Pl ; +someSg_Det = mkDet (word "一些") Sg ; +somePl_Det = mkDet (word "一些") Sg ; +few_Det = mkDet "少" Pl ; +other_A = mkA "其他" ; + +oper + mkIPL, mkIAdvL, mkAdA, mkIDetL, mkPConjL, mkCAdv, mkIQuant = ssword ; + +-- hsk + +lin + + +above_Prep = mkPrep "上边" ; +after_Prep = mkPrep "以后" ; +under_Prep = mkPrep "下" ; +why_IAdv = mkIAdvL "为什么" ; +too_AdA = mkAdA "太" ; + +before_Prep = mkPrep "从前" ; --s +between_Prep = mkPrep "之间" ; --s +but_PConj = mkPConjL "但是" ; --s + + + can_VV = mkVerb "能" [] [] [] [] "不" ; + must_VV = mkVerb "必须" [] [] [] [] "不" ; ---- False "不能" + want_VV = mkVerb "想" [] [] [] [] "不" ; + +can8know_VV = mkV "会" [] [] [] [] "不" ; ---- + + +except_Prep = mkPrep "除了" "以外" ; --s +for_Prep = mkPrep "为了" ; --s +from_Prep = mkPrep "从" ; --s +---how8many_IDet = mkIDet "几" ; --s +---how8much_IDet = mkIDet "多少" ; --s +in8front_Prep = mkPrep "前边" ; --s +it_Pron = pronNP "它" ; --s +---less_CAdv = mkCAdv "少" ; --s +much_Det = mkDet "多" Sg ; --s +---more_CAdv = mkCAdv "更" ; --s +---most_Predet = mkPredet "最" ; --s +no_Quant = mkDet "不" ; --s +not_Predet = ssword "不" ; +---only_Predet = mkPredet "只" ; --s +otherwise_PConj = mkPConjL "还是" ; --s +to_Prep = mkPrep "往" ; --s +---which_IQuant = mkIQuant "哪" ; --s + +have_V2 = mkV2 "有" ; + +yes_Utt = ss copula_s ; +no_Utt = ss neg_s ; + +oper + mkConjForm : Str -> {s1,s2 : Str} = \s -> {s1 = [] ; s2 = word s} ; + +-- manually by AR + +lin + always_AdV = ssword "一直" ; + part_Prep = mkPrep possessive_s ; + language_title_Utt = ssword "中文" ; + please_Voc = ss "请" ; + quite_Adv = mkAdA "得很" ; + +-- just missing + +lin +almost_AdA = ssword "几乎" ; +almost_AdN = ssword "几乎" ; +--as_CAdv = ssword "什么" ; -- as good as X +at_least_AdN = ssword "最少" ; -- at least five +at_most_AdN = ssword "最多" ; +behind_Prep = mkPrep "后面" "在"; +--both7and_DConj = ssword "什么" ; -- both - and +by8agent_Prep = mkPrep "被" ; -- by for agent in passive + -- [mark] 被 +during_Prep = mkPrep "期间" "在" ; -- [mark] often equivalent to nothing + -- translation for "he swam during this summer. " and "he swam this summer." are often the same +--either7or_DConj = ssword "什么" ; +everybody_NP = ssword "每个人" ; -- [mark] "每个人": 每(every)+个(classifier)+人(person) +everything_NP = ssword "每件事" ; -- [mark] "每件事": 每(every)+件(classifier)+事(thing) +everywhere_Adv = mkAdv "到处" ; +here7from_Adv = mkAdv "从这里" ; -- from here +here7to_Adv = mkAdv "到这里" ; -- to here + -- [mark] "从这里" 从(from) 这里(here) + -- "到这里" 到( to ) 这里(here) +how8many_IDet = ssword "多少" ; +how8much_IAdv = ssword "多少" ; +if_Subj = mkSubj "如果" "就" ; -- [mark] "就" often comes between NP and VP +--less_CAdv = ssword "什么" ; -- less good than +--more_CAdv = ssword "什么" ; +most_Predet = ssword "大多数" ; +nobody_NP = ssword "没人" ; +nothing_NP = ssword "没有什么" ; +on_Prep = mkPrep "上" "在" ; +only_Predet = ssword "只有" ; -- only John +so_AdA = ssword "如此" ; +somebody_NP = ssword "某人" ; +something_NP = ssword "某事" ; -- [mark] in sent, it depends on the context +somewhere_Adv = mkAdv "某处" ; +that_Subj = mkSubj [] ", " ; -- that + S [mark] comma +there7from_Adv = mkAdv "从那里" ; -- from there +there7to_Adv = mkAdv "到那里" ; +therefore_PConj = ssword "因此" ; +through_Prep = mkPrep "通过" ; +which_IQuant = ssword [] ; -- [mark] in sent, it depends on the context +without_Prep = mkPrep "没有" []; +youPol_Pron = ssword "您" ; -- polite you + +} diff --git a/lib/src/chinese/SymbolChi.gf b/lib/src/chinese/SymbolChi.gf new file mode 100644 index 000000000..908affa37 --- /dev/null +++ b/lib/src/chinese/SymbolChi.gf @@ -0,0 +1,36 @@ +--# -path=.:../abstract:../common + +concrete SymbolChi of Symbol = CatChi ** open Prelude, ResChi in { + + flags coding = utf8; + + lin + SymbPN i = i ; + IntPN i = i ; + FloatPN i = i ; + NumPN i = i ; + CNIntNP cn i = { + s = cn.s ++ i.s ; + c = cn.c + } ; + CNSymbNP det cn xs = ss (det.s ++ cn.s ++ xs.s) ; ---- + CNNumNP cn i = { + s = cn.s ++ i.s ; + c = cn.c + } ; + + SymbS sy = sy ; + SymbNum sy = sy ; + SymbOrd sy = sy ; + +lincat + + Symb, [Symb] = SS ; + +lin + MkSymb s = s ; + + BaseSymb = infixSS "" ; + ConsSymb = infixSS "" ; + +} diff --git a/lib/src/chinese/TenseChi.gf b/lib/src/chinese/TenseChi.gf new file mode 100644 index 000000000..060c9fe3e --- /dev/null +++ b/lib/src/chinese/TenseChi.gf @@ -0,0 +1,13 @@ +concrete TenseChi of Tense = + CatChi [Tense,Temp], TenseX [Ant,Pol,AAnter,ASimul,PNeg,PPos] ** open ResChi in { + + lin + TTAnt t a = {s = t.s ++ a.s ; t = t.t} ; + + ---- ?? + TPres = {s = [] ; t = APlain} ; + TPast = {s = [] ; t = APerf} ; + TFut = {s = [] ; t = ADurProg} ; + TCond = {s = [] ; t = ADurStat} ; + +} diff --git a/lib/src/chinese/TextChi.gf b/lib/src/chinese/TextChi.gf new file mode 100644 index 000000000..acb204ff3 --- /dev/null +++ b/lib/src/chinese/TextChi.gf @@ -0,0 +1,11 @@ +concrete TextChi of Text = CommonX - [Temp,Tense,Adv] ** open ResChi in { + +-- No punctuation - but make sure to leave spaces between sentences! + + lin + TEmpty = {s = []} ; + TFullStop x xs = {s = x.s ++ fullstop_s ++ xs.s} ; + TQuestMark x xs = {s = x.s ++ questmark_s ++ xs.s} ; + TExclMark x xs = {s = x.s ++ exclmark_s ++ xs.s} ; + +} diff --git a/lib/src/chinese/VerbChi.gf b/lib/src/chinese/VerbChi.gf new file mode 100644 index 000000000..e6ddd0eb8 --- /dev/null +++ b/lib/src/chinese/VerbChi.gf @@ -0,0 +1,60 @@ +concrete VerbChi of Verb = CatChi ** open ResChi, Prelude in { + + flags optimize=all_subs ; + + lin + UseV = predV ; + + SlashV2a v = predV v ** {c2 = v.c2} ; + + Slash2V3 v np = insertObj np (predV v) ** {c2 = v.c3} ; ---- to check arg order + Slash3V3 v np = insertObj np (predV v) ** {c2 = v.c2} ; + + SlashV2A v ap = insertObj ap (predV v) ** {c2 = v.c2} ; + + SlashV2V v vp = insertObj (mkNP (infVP vp)) (predV v) ** {c2 = v.c2} ; + SlashV2S v s = insertObj s (predV v) ** {c2 = v.c2} ; + SlashV2Q v q = insertObj q (predV v) ** {c2 = v.c2} ; + + ComplVV v vp = { + verb = v ; + compl = vp.verb.s ++ vp.compl ; + prePart = vp.prePart + } ; + + ComplVS v s = insertObj s (predV v) ; + ComplVQ v q = insertObj q (predV v) ; + ComplVA v ap = insertObj ap (predV v) ; + + ComplSlash vp np = insertObj (mkNP (appPrep vp.c2 np.s)) vp ; + + UseComp comp = comp ; + + SlashVV v vp = ---- too simple? + insertObj (mkNP (infVP vp)) (predV v) ** {c2 = vp.c2} ; + + SlashV2VNP v np vp = + insertObj np + (insertObj (mkNP (infVP vp)) (predV v)) ** {c2 = vp.c2} ; + + AdvVP vp adv = case adv.advType of { + ATManner => insertObj adv vp ; -- he sleeps well + _ => insertAdv (ss (zai_V.s ++ adv.s)) vp -- he sleeps in the house / today + } ; + + AdVVP adv vp = insertAdv adv vp ; + + ReflVP vp = insertObj (mkNP reflPron) vp ; + + PassV2 v = insertObj (mkNP passive_s) (predV v) ; ---- + + CompAP ap = insertObj (mkNP ap.s) (predV copula) ; ---- hen / bu + + CompNP np = insertObj np (predV copula) ; ---- + + CompCN cn = insertObj cn (predV copula) ; ---- + + CompAdv adv = insertObj adv (predV zai_V) ; + +} +