mirror of
https://github.com/GrammaticalFramework/gf-core.git
synced 2026-05-07 18:22:50 -06:00
changed names of resource-1.3; added a note on homepage on release
This commit is contained in:
164
lib/resource-1.3/minimal/MkLex.hs
Normal file
164
lib/resource-1.3/minimal/MkLex.hs
Normal file
@@ -0,0 +1,164 @@
|
||||
module MkLexicon where
|
||||
|
||||
import Char
|
||||
import qualified Data.Set as S
|
||||
|
||||
-- apply function o to each line in file f
|
||||
|
||||
allLines o f = do
|
||||
s <- readFile f
|
||||
mapM_ (putStrLn . o) (filter noComm (lines s))
|
||||
|
||||
|
||||
-- take each line in file k as a key, and choose those lines in file f
|
||||
-- that match any of the keys (i.e. whose first word matches it)
|
||||
chooseKeys k f = do
|
||||
keys <- readFile k >>= return . (S.fromList . lines)
|
||||
let choose line = case words line of
|
||||
w:_ -> S.member w keys
|
||||
_ -> False
|
||||
old <- readFile f >>= return . lines
|
||||
mapM_ (putStrLn . drop 2) $ filter choose old -- to remove extra indent
|
||||
|
||||
|
||||
-- discard comments and empty lines
|
||||
|
||||
noComm s = case s of
|
||||
'-':'-':_ -> False
|
||||
"" -> False
|
||||
_ -> True
|
||||
|
||||
-- remove tailing comments
|
||||
|
||||
remTail s = case s of
|
||||
'-':'-':_ -> []
|
||||
c:cs -> c : remTail cs
|
||||
_ -> s
|
||||
|
||||
-- postfix with category
|
||||
|
||||
postfix p s = takeWhile (not . isSpace) s ++ "_" ++ p
|
||||
|
||||
-- make fun rule
|
||||
|
||||
mkFun s =
|
||||
let (w,p) = span (/='_') s in
|
||||
" " ++ s ++ " : " ++ tail p ++ " ;"
|
||||
|
||||
-- make reused lin rule
|
||||
|
||||
mkRLin s =
|
||||
let w = head (words s) in
|
||||
" " ++ w ++ " = " ++ w ++ " ;"
|
||||
|
||||
-- make regular lin rule
|
||||
|
||||
mkLin s =
|
||||
let (w,p) = span (/='_') s in
|
||||
" " ++ s ++ " = " ++ lin (tail p) w ++ " ;"
|
||||
where
|
||||
lin cat w = case cat of
|
||||
"V2" -> "dirV2 (regV" ++ " \"" ++ w ++ "\")"
|
||||
'V':_ -> "mk" ++ cat ++ " (regV" ++ " \"" ++ w ++ "\")"
|
||||
_ -> "reg" ++ cat ++ " \"" ++ w ++ "\""
|
||||
|
||||
-- normalize identifiers in Structural
|
||||
|
||||
mkIdent s = case words s of
|
||||
w:ws -> if obsolete w then ""
|
||||
else " " ++ (unwords $ mkId (update w) : ws)
|
||||
_ -> s
|
||||
where
|
||||
mkId name@(c:cs) =
|
||||
let
|
||||
(x,y) = span isCat cs
|
||||
in
|
||||
toLower c : clean x ++ "_" ++ new y
|
||||
isCat = flip notElem "PDNVCAIS"
|
||||
clean x = case span isLower x of
|
||||
(_,[]) -> x
|
||||
(u,v) -> u ++ "8" ++ map toLower v
|
||||
new y = case y of
|
||||
"NumDet" -> "NDet"
|
||||
_ -> y
|
||||
obsolete w = elem w $ words "TheseNumNP ThoseNumNP NobodyNP NeitherNor NoDet AnyDet"
|
||||
update w = case w of
|
||||
"EitherOr" -> "EitherOrConjD"
|
||||
"BothAnd" -> "BothAndConjD"
|
||||
"PhrYes" -> "YesPhr"
|
||||
"PhrNo" -> "NoPhr"
|
||||
"WeNumNP" -> "WeNP"
|
||||
"YeNumNP" -> "YeNP"
|
||||
"HowManyDet" -> "HowManyIDet"
|
||||
"MostsDet" -> "MostManyDet"
|
||||
"WhichDet" -> "WhichOneIDet"
|
||||
"WhichNDet" -> "WhichManyIDet"
|
||||
"EverywhereNP" -> "EverywhereAdv"
|
||||
"SomewhereNP" -> "SomewhereAdv"
|
||||
"AgentPrep" -> "By8agentPrep"
|
||||
_ -> w
|
||||
|
||||
-- massage French verbs 9/2/2005
|
||||
|
||||
freVerb s = case words s of
|
||||
v:_ -> " " ++ v ++ " : " ++ cat v ++ " ;"
|
||||
_ -> []
|
||||
where
|
||||
cat v = dropWhile (not . isUpper) v
|
||||
|
||||
-- Swedish verbs 17/2
|
||||
|
||||
sweVerb s = case words s of
|
||||
('v':a:u:[]):verb:_ -> "fun " ++ verb ++ " : V ;\n" ++
|
||||
"lin " ++ verb ++ " = " ++ infl a u verb ++ " ;"
|
||||
_ -> []
|
||||
where
|
||||
infl a u verb =
|
||||
let
|
||||
(dne,geb) = span isConsonant $ tail $ reverse verb
|
||||
(beg,voc,end) = (reverse (tail geb), head geb, reverse dne)
|
||||
(pret,sup) = (beg++ [toLower a] ++end, beg++ [toLower u] ++ end ++"it")
|
||||
in
|
||||
unwords ["irregV", prQuot verb, prQuot pret, prQuot sup]
|
||||
|
||||
prQuot s = "\"" ++ s ++ "\""
|
||||
|
||||
isConsonant = not . isVowel
|
||||
|
||||
isVowel = flip elem "aeiouyäöå"
|
||||
|
||||
-- Norwegian 13/3
|
||||
|
||||
groupLines :: [String] -> [String]
|
||||
groupLines ss = [unwords [a, b, c] | [a,_,b,c,_] <- grps ss] where
|
||||
grps ls = let (g,rest) = splitAt 5 ls in g:grps rest
|
||||
|
||||
lin2fun s = case words s of
|
||||
_:fun:_:_ -> " fun " ++ fun ++ " : " ++ cat fun ++ " ;"
|
||||
_ -> s
|
||||
where
|
||||
cat fun = reverse (takeWhile (/='_') (reverse fun))
|
||||
|
||||
-- filter from a given file those lines whose first word is in a sought-set
|
||||
|
||||
allThose :: [String] -> [String] -> [String]
|
||||
allThose soughts givens = concatMap seek soughts where
|
||||
seek w = let s = [line | line <- givens, w':_ <- [words line], w == w']
|
||||
in if null s then ["-- " ++ w] else s
|
||||
|
||||
-- do this with files
|
||||
-- example: getAllThose "abstract/Mtmp" "english/BasicEng.gf"
|
||||
|
||||
getAllThose :: FilePath -> FilePath -> IO ()
|
||||
getAllThose sought given = do
|
||||
s <- readFile sought
|
||||
gi <- readFile given
|
||||
let so = [w | l <- lines s, w:_ <- [words l]]
|
||||
mapM_ putStrLn $ allThose so $ lines gi
|
||||
|
||||
|
||||
-- Swadesh 7/3/2006: replace string defs with f = f ;
|
||||
|
||||
reuseLex line = case words line of
|
||||
w : "=" : _ | elem '"' line -> " " ++ w ++ " = " ++ w ++ " ;"
|
||||
_ -> line
|
||||
Reference in New Issue
Block a user