diff --git a/lib/resource-1.0/minimal/MinLexicon.gf b/lib/resource-1.0/minimal/MinLexicon.gf new file mode 100644 index 000000000..16954a549 --- /dev/null +++ b/lib/resource-1.0/minimal/MinLexicon.gf @@ -0,0 +1,14 @@ +abstract MinLexicon = Cat ** { + fun + add_V3 : V3 ; + apple_N : N ; + become_VA : VA ; + big_A : A ; + break_V2 : V2 ; + brother_N2 : N2 ; + paint_V2A : V2A ; +-- paris_PN : PN ; + sleep_V : V ; + wonder_VQ : VQ ; + +} diff --git a/lib/resource-1.0/minimal/MinLexiconEng.gf b/lib/resource-1.0/minimal/MinLexiconEng.gf new file mode 100644 index 000000000..c922c309b --- /dev/null +++ b/lib/resource-1.0/minimal/MinLexiconEng.gf @@ -0,0 +1,2 @@ +concrete MinLexiconEng of MinLexicon = MinLexiconI with + (Lexicon = LexiconEng) ; diff --git a/lib/resource-1.0/minimal/MinLexiconFre.gf b/lib/resource-1.0/minimal/MinLexiconFre.gf new file mode 100644 index 000000000..3295f0a7e --- /dev/null +++ b/lib/resource-1.0/minimal/MinLexiconFre.gf @@ -0,0 +1,2 @@ +concrete MinLexiconFre of MinLexicon = MinLexiconI with + (Lexicon = LexiconFre) ; diff --git a/lib/resource-1.0/minimal/MinLexiconI.gf b/lib/resource-1.0/minimal/MinLexiconI.gf new file mode 100644 index 000000000..47763062d --- /dev/null +++ b/lib/resource-1.0/minimal/MinLexiconI.gf @@ -0,0 +1,17 @@ +incomplete concrete MinLexiconI of MinLexicon = Cat ** open Lexicon in { + + lin + + add_V3 = add_V3 ; + apple_N = apple_N ; + become_VA = become_VA ; + big_A = big_A ; + break_V2 = break_V2 ; + brother_N2 = brother_N2 ; + paint_V2A = paint_V2A ; +-- paris_PN = paris_PN ; + say_VS = say_VS ; + sleep_V = sleep_V ; + wonder_VQ = wonder_VQ ; + + } ; diff --git a/lib/resource-1.0/minimal/MinStructural.gf b/lib/resource-1.0/minimal/MinStructural.gf new file mode 100644 index 000000000..26ccda9e7 --- /dev/null +++ b/lib/resource-1.0/minimal/MinStructural.gf @@ -0,0 +1,29 @@ +abstract MinStructural = Cat ** { + + fun + +-- This is an alphabetical list of structural words: just one from each category +-- but so that all variation is created, e.g. all persons for pronouns. + + almost_AdN : AdN ; + always_AdV : AdV ; + and_Conj : Conj ; + but_PConj : PConj ; + can_VV : VV ; + either7or_DConj : DConj ; + he_Pron : Pron ; + here_Adv : Adv ; + how_IAdv : IAdv ; + i_Pron : Pron ; + if_Subj : Subj ; + in_Prep : Prep ; + that_NP : NP ; + we_Pron : Pron ; + whichPl_IDet : IDet ; + whichSg_IDet : IDet ; + whoPl_IP : IP ; + whoSg_IP : IP ; + youSg_Pron : Pron ; + youPl_Pron : Pron ; + +} diff --git a/lib/resource-1.0/minimal/MinStructuralEng.gf b/lib/resource-1.0/minimal/MinStructuralEng.gf new file mode 100644 index 000000000..aadb05899 --- /dev/null +++ b/lib/resource-1.0/minimal/MinStructuralEng.gf @@ -0,0 +1,2 @@ +concrete MinStructuralEng of MinStructural = MinStructuralI with + (Structural = StructuralEng) ; diff --git a/lib/resource-1.0/minimal/MinStructuralFre.gf b/lib/resource-1.0/minimal/MinStructuralFre.gf new file mode 100644 index 000000000..d7557ab5a --- /dev/null +++ b/lib/resource-1.0/minimal/MinStructuralFre.gf @@ -0,0 +1,2 @@ +concrete MinStructuralFre of MinStructural = MinStructuralI with + (Structural = StructuralFre) ; diff --git a/lib/resource-1.0/minimal/MinStructuralI.gf b/lib/resource-1.0/minimal/MinStructuralI.gf new file mode 100644 index 000000000..d3c00d6f3 --- /dev/null +++ b/lib/resource-1.0/minimal/MinStructuralI.gf @@ -0,0 +1,23 @@ +incomplete concrete MinStructuralI of MinStructural = open Structural in { + lin + almost_AdN = almost_AdN ; + always_AdV = always_AdV ; + and_Conj = and_Conj ; + but_PConj = but_PConj ; + can_VV = can_VV ; + either7or_DConj = either7or_DConj ; + he_Pron = he_Pron ; + here_Adv = here_Adv ; + how_IAdv = how_IAdv ; + i_Pron = i_Pron ; + if_Subj = if_Subj ; + in_Prep = in_Prep ; + that_NP = that_NP ; + we_Pron = we_Pron ; + whichPl_IDet = whichPl_IDet ; + whichSg_IDet = whichSg_IDet ; + whoPl_IP = whoPl_IP ; + whoSg_IP = whoSg_IP ; + youSg_Pron = youSg_Pron ; + youPl_Pron = youPl_Pron ; + } ; diff --git a/lib/resource-1.0/minimal/Minimal.gf b/lib/resource-1.0/minimal/Minimal.gf new file mode 100644 index 000000000..1390ffe59 --- /dev/null +++ b/lib/resource-1.0/minimal/Minimal.gf @@ -0,0 +1,21 @@ +--1 Minimal Resource Grammar + +-- This module defines the syntax with a minimal lexicon. +-- This is useful for treebank generation. + +abstract Minimal = + Noun, + Verb, + Adjective, + Adverb, + Numeral, + Sentence, + Question, + Relative, + Conjunction, + Phrase, + Text, + MinStructural, + Idiom, + MinLexicon + ** {} ; diff --git a/lib/resource-1.0/minimal/MinimalEng.gf b/lib/resource-1.0/minimal/MinimalEng.gf new file mode 100644 index 000000000..7499ad927 --- /dev/null +++ b/lib/resource-1.0/minimal/MinimalEng.gf @@ -0,0 +1,22 @@ +--# -path=.:../english:../abstract:../common:prelude + +concrete MinimalEng of Minimal = + NounEng, + VerbEng, + AdjectiveEng, + AdverbEng, + NumeralEng, + SentenceEng, + QuestionEng, + RelativeEng, + ConjunctionEng, + PhraseEng, + TextX, + StructuralEng, + IdiomEng, + LexiconEng + ** { + +flags startcat = Phr ; -- unlexer = text ; lexer = text ; + +} ; diff --git a/lib/resource-1.0/minimal/MinimalFre.gf b/lib/resource-1.0/minimal/MinimalFre.gf new file mode 100644 index 000000000..fc8a2ca65 --- /dev/null +++ b/lib/resource-1.0/minimal/MinimalFre.gf @@ -0,0 +1,22 @@ +--# -path=.:../french:../romance:../abstract:../common:prelude + +concrete MinimalFre of Minimal = + NounFre, + VerbFre, + AdjectiveFre, + AdverbFre, + NumeralFre, + SentenceFre, + QuestionFre, + RelativeFre, + ConjunctionFre, + PhraseFre, + TextX, + StructuralFre, + IdiomFre, + LexiconFre + ** { + +flags startcat = Phr ; -- unlexer = text ; lexer = text ; + +} ; diff --git a/lib/resource-1.0/minimal/MkLex.hs b/lib/resource-1.0/minimal/MkLex.hs new file mode 100644 index 000000000..3260843d5 --- /dev/null +++ b/lib/resource-1.0/minimal/MkLex.hs @@ -0,0 +1,143 @@ +module MkLexicon where + +import Char + +allLines o f = do + s <- readFile f + mapM_ (putStrLn . o) (filter noComm (lines s)) + + +-- discard comments and empty lines + +noComm s = case s of + '-':'-':_ -> False + "" -> False + _ -> True + +-- remove tailing comments + +remTail s = case s of + '-':'-':_ -> [] + c:cs -> c : remTail cs + _ -> s + +-- postfix with category + +postfix p s = takeWhile (not . isSpace) s ++ "_" ++ p + +-- make fun rule + +mkFun s = + let (w,p) = span (/='_') s in + " " ++ s ++ " : " ++ tail p ++ " ;" + +-- make reused lin rule + +mkRLin s = + let w = head (words s) in + " " ++ w ++ " = " ++ w ++ " ;" + +-- make regular lin rule + +mkLin s = + let (w,p) = span (/='_') s in + " " ++ s ++ " = " ++ lin (tail p) w ++ " ;" + where + lin cat w = case cat of + "V2" -> "dirV2 (regV" ++ " \"" ++ w ++ "\")" + 'V':_ -> "mk" ++ cat ++ " (regV" ++ " \"" ++ w ++ "\")" + _ -> "reg" ++ cat ++ " \"" ++ w ++ "\"" + +-- normalize identifiers in Structural + +mkIdent s = case words s of + w:ws -> if obsolete w then "" + else " " ++ (unwords $ mkId (update w) : ws) + _ -> s + where + mkId name@(c:cs) = + let + (x,y) = span isCat cs + in + toLower c : clean x ++ "_" ++ new y + isCat = flip notElem "PDNVCAIS" + clean x = case span isLower x of + (_,[]) -> x + (u,v) -> u ++ "8" ++ map toLower v + new y = case y of + "NumDet" -> "NDet" + _ -> y + obsolete w = elem w $ words "TheseNumNP ThoseNumNP NobodyNP NeitherNor NoDet AnyDet" + update w = case w of + "EitherOr" -> "EitherOrConjD" + "BothAnd" -> "BothAndConjD" + "PhrYes" -> "YesPhr" + "PhrNo" -> "NoPhr" + "WeNumNP" -> "WeNP" + "YeNumNP" -> "YeNP" + "HowManyDet" -> "HowManyIDet" + "MostsDet" -> "MostManyDet" + "WhichDet" -> "WhichOneIDet" + "WhichNDet" -> "WhichManyIDet" + "EverywhereNP" -> "EverywhereAdv" + "SomewhereNP" -> "SomewhereAdv" + "AgentPrep" -> "By8agentPrep" + _ -> w + +-- massage French verbs 9/2/2005 + +freVerb s = case words s of + v:_ -> " " ++ v ++ " : " ++ cat v ++ " ;" + _ -> [] + where + cat v = dropWhile (not . isUpper) v + +-- Swedish verbs 17/2 + +sweVerb s = case words s of + ('v':a:u:[]):verb:_ -> "fun " ++ verb ++ " : V ;\n" ++ + "lin " ++ verb ++ " = " ++ infl a u verb ++ " ;" + _ -> [] + where + infl a u verb = + let + (dne,geb) = span isConsonant $ tail $ reverse verb + (beg,voc,end) = (reverse (tail geb), head geb, reverse dne) + (pret,sup) = (beg++ [toLower a] ++end, beg++ [toLower u] ++ end ++"it") + in + unwords ["irregV", prQuot verb, prQuot pret, prQuot sup] + +prQuot s = "\"" ++ s ++ "\"" + +isConsonant = not . isVowel + +isVowel = flip elem "aeiouyäöå" + +-- Norwegian 13/3 + +groupLines :: [String] -> [String] +groupLines ss = [unwords [a, b, c] | [a,_,b,c,_] <- grps ss] where + grps ls = let (g,rest) = splitAt 5 ls in g:grps rest + +lin2fun s = case words s of + _:fun:_:_ -> " fun " ++ fun ++ " : " ++ cat fun ++ " ;" + _ -> s + where + cat fun = reverse (takeWhile (/='_') (reverse fun)) + +-- filter from a given file those lines whose first word is in a sought-set + +allThose :: [String] -> [String] -> [String] +allThose soughts givens = concatMap seek soughts where + seek w = let s = [line | line <- givens, w':_ <- [words line], w == w'] + in if null s then ["-- " ++ w] else s + +-- do this with files +-- example: getAllThose "abstract/Mtmp" "english/BasicEng.gf" + +getAllThose :: FilePath -> FilePath -> IO () +getAllThose sought given = do + s <- readFile sought + gi <- readFile given + let so = [w | l <- lines s, w:_ <- [words l]] + mapM_ putStrLn $ allThose so $ lines gi diff --git a/lib/resource-1.0/minimal/Tab.hs b/lib/resource-1.0/minimal/Tab.hs new file mode 100644 index 000000000..59e002c17 --- /dev/null +++ b/lib/resource-1.0/minimal/Tab.hs @@ -0,0 +1,58 @@ +module Tab where + +import Data.Map + +type Tree = String ---- +type Bracket = [String] --- + +type Tab = Map [String] [Tree] + + +-- build a Tab from a unilingual treebank file (not XML) + +test file = do + tab <- tb2tab file + tst tab + where + tst tab = do + s <- getLine +-- let ts = analyse tab $ words s + let ts = Prelude.map unwords $ analyseParts tab $ words s + mapM_ putStrLn ts + tst tab + +-- analyse whole inputs +analyse :: Tab -> [String] -> [Tree] +analyse tab s = maybe [] id $ Data.Map.lookup s tab + +-- analyse parts of inputs +analyseParts :: Tab -> [String] -> [Bracket] +analyseParts tab = anap where + anap ws = case ws of + w:vs -> case results ws of + [] -> [w:res | res <- anap vs] + (ts,ws2):_ -> [t:res | t <- ts, res <- anap ws2] + _ -> [[]] + results ws = Prelude.filter (not . Prelude.null . fst) + [(ana ws1,ws2) | i <- [0..length ws], let (ws1,ws2) = splitAt i ws] + ana = analyse tab + + +tb2tab :: FilePath -> IO Tab +tb2tab file = do + ss <- readFile file >>= return . lines + let ps = pairs ss + return $ fromListWith (++) ps + +pairs :: [String] -> [([String],[String])] +pairs xs = case xs of + x:y:ys -> (words y,[x]) : pairs ys + _ -> [] + + + +{- + +ceci - that_NP + +-}