a minimal-lexicon version of resource

2026-05-24 02:12:50 -06:00 · 2006-03-02 17:20:46 +00:00
parent bf872d018e
commit 27467d32fd
13 changed files with 357 additions and 0 deletions
--- a/lib/resource-1.0/minimal/MinLexicon.gf
+++ b/lib/resource-1.0/minimal/MinLexicon.gf
@@ -0,0 +1,14 @@
 abstract MinLexicon = Cat ** {
  fun
  add_V3 : V3 ;
  apple_N : N ;
  become_VA : VA ;
  big_A : A ;
  break_V2 : V2 ;
  brother_N2 : N2 ;
  paint_V2A : V2A ;
 --  paris_PN : PN ;
  sleep_V : V ;
  wonder_VQ : VQ ;
 }
--- a/lib/resource-1.0/minimal/MinLexiconEng.gf
+++ b/lib/resource-1.0/minimal/MinLexiconEng.gf
@@ -0,0 +1,2 @@
 concrete MinLexiconEng of MinLexicon = MinLexiconI with
  (Lexicon = LexiconEng) ;
--- a/lib/resource-1.0/minimal/MinLexiconFre.gf
+++ b/lib/resource-1.0/minimal/MinLexiconFre.gf
@@ -0,0 +1,2 @@
 concrete MinLexiconFre of MinLexicon = MinLexiconI with
  (Lexicon = LexiconFre) ;
--- a/lib/resource-1.0/minimal/MinLexiconI.gf
+++ b/lib/resource-1.0/minimal/MinLexiconI.gf
@@ -0,0 +1,17 @@
 incomplete concrete MinLexiconI of MinLexicon = Cat ** open Lexicon in {
  lin
  add_V3 = add_V3 ;
  apple_N = apple_N ;
  become_VA = become_VA ;
  big_A = big_A ;
  break_V2 = break_V2 ;
  brother_N2 = brother_N2 ;
  paint_V2A = paint_V2A ;
 --  paris_PN = paris_PN ;
  say_VS = say_VS ;
  sleep_V = sleep_V ;
  wonder_VQ = wonder_VQ ;
  } ;
--- a/lib/resource-1.0/minimal/MinStructural.gf
+++ b/lib/resource-1.0/minimal/MinStructural.gf
@@ -0,0 +1,29 @@
 abstract MinStructural = Cat ** {
  fun
 -- This is an alphabetical list of structural words: just one from each category
 -- but so that all variation is created, e.g. all persons for pronouns.
  almost_AdN : AdN ;   
  always_AdV : AdV ;
  and_Conj : Conj ;
  but_PConj : PConj ;
  can_VV : VV ;
  either7or_DConj : DConj ;
  he_Pron : Pron ;
  here_Adv : Adv ;
  how_IAdv : IAdv ;
  i_Pron : Pron ;
  if_Subj : Subj ;
  in_Prep : Prep ;
  that_NP : NP ;
  we_Pron : Pron ;
  whichPl_IDet : IDet ;
  whichSg_IDet : IDet ;
  whoPl_IP : IP ;
  whoSg_IP : IP ;
  youSg_Pron : Pron ;
  youPl_Pron : Pron ;
 }
--- a/lib/resource-1.0/minimal/MinStructuralEng.gf
+++ b/lib/resource-1.0/minimal/MinStructuralEng.gf
@@ -0,0 +1,2 @@
 concrete MinStructuralEng of MinStructural = MinStructuralI with
  (Structural = StructuralEng) ;
--- a/lib/resource-1.0/minimal/MinStructuralFre.gf
+++ b/lib/resource-1.0/minimal/MinStructuralFre.gf
@@ -0,0 +1,2 @@
 concrete MinStructuralFre of MinStructural = MinStructuralI with
  (Structural = StructuralFre) ;
--- a/lib/resource-1.0/minimal/MinStructuralI.gf
+++ b/lib/resource-1.0/minimal/MinStructuralI.gf
@@ -0,0 +1,23 @@
 incomplete concrete MinStructuralI of MinStructural = open Structural in {
 lin
  almost_AdN = almost_AdN ;
  always_AdV = always_AdV ;
  and_Conj = and_Conj ;
  but_PConj = but_PConj ;
  can_VV = can_VV ;
  either7or_DConj = either7or_DConj ;
  he_Pron = he_Pron ;
  here_Adv = here_Adv ;
  how_IAdv = how_IAdv ;
  i_Pron = i_Pron ;
  if_Subj = if_Subj ;
  in_Prep = in_Prep ;
  that_NP = that_NP ;
  we_Pron = we_Pron ;
  whichPl_IDet = whichPl_IDet ;
  whichSg_IDet = whichSg_IDet ;
  whoPl_IP = whoPl_IP ;
  whoSg_IP = whoSg_IP ;
  youSg_Pron = youSg_Pron ;
  youPl_Pron = youPl_Pron ;
  } ;
--- a/lib/resource-1.0/minimal/Minimal.gf
+++ b/lib/resource-1.0/minimal/Minimal.gf
@@ -0,0 +1,21 @@
 --1 Minimal Resource Grammar
 -- This module defines the syntax with a minimal lexicon.
 -- This is useful for treebank generation.
 abstract Minimal = 
  Noun,
  Verb, 
  Adjective,
  Adverb,
  Numeral,
  Sentence, 
  Question,
  Relative,
  Conjunction,
  Phrase,
  Text,
  MinStructural,
  Idiom,
  MinLexicon
  ** {} ;
--- a/lib/resource-1.0/minimal/MinimalEng.gf
+++ b/lib/resource-1.0/minimal/MinimalEng.gf
@@ -0,0 +1,22 @@
 --# -path=.:../english:../abstract:../common:prelude
 concrete MinimalEng of Minimal = 
  NounEng, 
  VerbEng, 
  AdjectiveEng,
  AdverbEng,
  NumeralEng,
  SentenceEng,
  QuestionEng,
  RelativeEng,
  ConjunctionEng,
  PhraseEng,
  TextX,
  StructuralEng,
  IdiomEng,
  LexiconEng
  ** {
 flags startcat = Phr ; -- unlexer = text ; lexer = text ;
 } ;
--- a/lib/resource-1.0/minimal/MinimalFre.gf
+++ b/lib/resource-1.0/minimal/MinimalFre.gf
@@ -0,0 +1,22 @@
 --# -path=.:../french:../romance:../abstract:../common:prelude
 concrete MinimalFre of Minimal = 
  NounFre, 
  VerbFre, 
  AdjectiveFre,
  AdverbFre,
  NumeralFre,
  SentenceFre,
  QuestionFre,
  RelativeFre,
  ConjunctionFre,
  PhraseFre,
  TextX,
  StructuralFre,
  IdiomFre,
  LexiconFre
  ** {
 flags startcat = Phr ; -- unlexer = text ; lexer = text ;
 } ;
--- a/lib/resource-1.0/minimal/MkLex.hs
+++ b/lib/resource-1.0/minimal/MkLex.hs
@@ -0,0 +1,143 @@
 module MkLexicon where
 import Char
 allLines o f = do
  s <- readFile f
  mapM_ (putStrLn . o) (filter noComm (lines s))
 -- discard comments and empty lines
 noComm s = case s of
  '-':'-':_ -> False
  "" -> False
  _ -> True
 -- remove tailing comments
 remTail s = case s of
  '-':'-':_ -> []
  c:cs -> c : remTail cs
  _ -> s
 -- postfix with category
 postfix p s = takeWhile (not . isSpace) s ++ "_" ++ p
 -- make fun rule
 mkFun s = 
  let (w,p) = span (/='_') s in 
  "  " ++ s ++ " : " ++ tail p ++ " ;"
 -- make reused lin rule
 mkRLin s = 
  let w = head (words s) in 
  "  " ++ w ++ " = " ++ w ++ " ;"
 -- make regular lin rule
 mkLin s = 
  let (w,p) = span (/='_') s in 
  "  " ++ s ++ " = " ++ lin (tail p) w ++ " ;"
 where
  lin cat w = case cat of
    "V2"  -> "dirV2 (regV" ++ " \"" ++ w ++ "\")" 
    'V':_ -> "mk" ++ cat ++ " (regV" ++ " \"" ++ w ++ "\")" 
    _     -> "reg" ++ cat ++ " \"" ++ w ++ "\"" 
 -- normalize identifiers in Structural
 mkIdent s = case words s of
  w:ws -> if obsolete w then "" 
          else "  " ++ (unwords $ mkId (update w) : ws) 
  _ -> s
 where
   mkId name@(c:cs) = 
     let 
       (x,y) = span isCat cs 
     in 
     toLower c : clean x ++ "_" ++ new y
   isCat = flip notElem "PDNVCAIS"
   clean x = case span isLower x of
     (_,[]) -> x
     (u,v)  -> u ++ "8" ++ map toLower v
   new y = case y of
     "NumDet" -> "NDet"
     _ -> y
   obsolete w = elem w $ words "TheseNumNP ThoseNumNP NobodyNP NeitherNor NoDet AnyDet"
   update w = case w of
     "EitherOr" -> "EitherOrConjD"
     "BothAnd"  -> "BothAndConjD"
     "PhrYes"   -> "YesPhr"
     "PhrNo"    -> "NoPhr"
     "WeNumNP"    -> "WeNP"
     "YeNumNP"    -> "YeNP"
     "HowManyDet" -> "HowManyIDet"
     "MostsDet" -> "MostManyDet"
     "WhichDet" -> "WhichOneIDet"
     "WhichNDet" -> "WhichManyIDet"
     "EverywhereNP" -> "EverywhereAdv"
     "SomewhereNP" -> "SomewhereAdv"
     "AgentPrep" -> "By8agentPrep"
     _ -> w
 -- massage French verbs 9/2/2005
 freVerb s = case words s of
  v:_ -> "  " ++ v ++ " : " ++ cat v ++ " ;"
  _ -> []
 where
   cat v = dropWhile (not . isUpper) v
 -- Swedish verbs 17/2
 sweVerb s = case words s of
  ('v':a:u:[]):verb:_ -> "fun " ++ verb ++ " : V ;\n" ++ 
                         "lin " ++ verb ++ " = " ++ infl a u verb ++ " ;"
  _ -> []
 where
   infl a u verb = 
     let
       (dne,geb) = span isConsonant $ tail $ reverse verb 
       (beg,voc,end) = (reverse (tail geb), head geb, reverse dne)
       (pret,sup) = (beg++ [toLower a] ++end, beg++ [toLower u] ++ end ++"it")
     in
     unwords ["irregV", prQuot verb, prQuot pret, prQuot sup] 
 prQuot s = "\"" ++ s ++ "\""
 isConsonant = not . isVowel
 isVowel = flip elem "aeiouyäöå"
 -- Norwegian 13/3
 groupLines :: [String] -> [String]
 groupLines ss = [unwords [a, b, c] | [a,_,b,c,_] <- grps ss] where
  grps ls = let (g,rest) = splitAt 5 ls in g:grps rest
 lin2fun s = case words s of
  _:fun:_:_ -> "  fun " ++ fun ++ " : " ++ cat fun ++ " ;"
  _ -> s 
 where
   cat fun = reverse (takeWhile (/='_') (reverse fun))
 -- filter from a given file those lines whose first word is in a sought-set
 allThose :: [String] -> [String] -> [String]
 allThose soughts givens = concatMap seek soughts where
  seek w = let s = [line | line <- givens, w':_ <- [words line], w == w']
           in if null s then ["-- " ++ w] else s
 -- do this with files
 -- example: getAllThose "abstract/Mtmp" "english/BasicEng.gf"
 getAllThose :: FilePath -> FilePath -> IO ()
 getAllThose sought given = do
  s  <- readFile sought
  gi <- readFile given
  let so = [w | l <- lines s, w:_ <- [words l]] 
  mapM_ putStrLn $ allThose so $ lines gi
--- a/lib/resource-1.0/minimal/Tab.hs
+++ b/lib/resource-1.0/minimal/Tab.hs
@@ -0,0 +1,58 @@
 module Tab where
 import Data.Map
 type Tree = String ----
 type Bracket = [String] ---
 type Tab = Map [String] [Tree]
 -- build a Tab from a unilingual treebank file (not XML)
 test file = do
  tab <- tb2tab file
  tst tab
 where
  tst tab = do
    s  <- getLine 
 --    let ts = analyse tab $ words s
    let ts = Prelude.map unwords $ analyseParts tab $ words s
    mapM_ putStrLn ts
    tst tab
 -- analyse whole inputs
 analyse :: Tab -> [String] -> [Tree]
 analyse tab s = maybe [] id $ Data.Map.lookup s tab
 -- analyse parts of inputs
 analyseParts :: Tab -> [String] -> [Bracket]
 analyseParts tab = anap where
  anap ws = case ws of
    w:vs -> case results ws of
      []         -> [w:res | res <- anap vs]
      (ts,ws2):_ -> [t:res | t <- ts, res <- anap ws2]
    _ -> [[]]
  results ws = Prelude.filter (not . Prelude.null . fst) 
    [(ana ws1,ws2) | i <- [0..length ws], let (ws1,ws2) = splitAt i ws]
  ana = analyse tab
 tb2tab :: FilePath -> IO Tab
 tb2tab file = do
  ss <- readFile file >>= return . lines
  let ps = pairs ss
  return $ fromListWith (++) ps
 pairs :: [String] -> [([String],[String])]
 pairs xs = case xs of
  x:y:ys -> (words y,[x]) : pairs ys
  _ -> []
 {-
 ceci - that_NP 
 -}
		`@@ -0,0 +1,2 @@`
							`concrete MinLexiconEng of MinLexicon = MinLexiconI with`
							`(Lexicon = LexiconEng) ;`
		`@@ -0,0 +1,2 @@`
							`concrete MinLexiconFre of MinLexicon = MinLexiconI with`
							`(Lexicon = LexiconFre) ;`
		`@@ -0,0 +1,2 @@`
							`concrete MinStructuralEng of MinStructural = MinStructuralI with`
							`(Structural = StructuralEng) ;`
		`@@ -0,0 +1,2 @@`
							`concrete MinStructuralFre of MinStructural = MinStructuralI with`
							`(Structural = StructuralFre) ;`