forked from GitHub/gf-rgl
(Morphodict) Automatic whitespace removal
This commit is contained in:
@@ -30,27 +30,27 @@ main = do
|
|||||||
if length xx /= 4
|
if length xx /= 4
|
||||||
then putStrLn usage
|
then putStrLn usage
|
||||||
else do
|
else do
|
||||||
let mode:configfile:datafile:outfile:_ = xx
|
let mode:configfile:datafile:outfile:_ = xx
|
||||||
config <- readFile configfile >>= return . mkConfig
|
config <- readFile configfile >>= return . mkConfig
|
||||||
|
|
||||||
rawdata <- case mode of
|
rawdata <- case mode of
|
||||||
"pgf" -> pgfFile2rawData config datafile
|
"pgf" -> pgfFile2rawData config datafile
|
||||||
"raw" -> readFile datafile >>= return . map getRawData . filter (not . null) . lines
|
"raw" -> readFile datafile >>= return . map getRawData . filter (not . null) . lines
|
||||||
rawdata2gf config rawdata outfile
|
rawdata2gf config rawdata outfile
|
||||||
|
|
||||||
|
|
||||||
rawdata2gf config rawdata outfile = do
|
rawdata2gf config rawdata outfile = do
|
||||||
|
|
||||||
let env = MDEnv rawdata config
|
let env = MDEnv rawdata config
|
||||||
let (absrules,cncrules) = mkMorphoDict env
|
let (absrules,cncrules) = mkMorphoDict env
|
||||||
|
|
||||||
absheader <- readFile (outfile ++ "Abs.header")
|
absheader <- readFile (outfile ++ "Abs.header")
|
||||||
cncheader <- readFile (outfile ++ ".header")
|
cncheader <- readFile (outfile ++ ".header")
|
||||||
|
|
||||||
writeFile (outfile ++ "Abs.gf") absheader
|
writeFile (outfile ++ "Abs.gf") absheader
|
||||||
appendFile (outfile ++ "Abs.gf") $ unlines $ sort absrules
|
appendFile (outfile ++ "Abs.gf") $ unlines $ sort absrules
|
||||||
appendFile (outfile ++ "Abs.gf") "}"
|
appendFile (outfile ++ "Abs.gf") "}"
|
||||||
|
|
||||||
writeFile (outfile ++ ".gf") cncheader
|
writeFile (outfile ++ ".gf") cncheader
|
||||||
appendFile (outfile ++ ".gf") $ unlines $ sort cncrules
|
appendFile (outfile ++ ".gf") $ unlines $ sort cncrules
|
||||||
appendFile (outfile ++ ".gf") "}"
|
appendFile (outfile ++ ".gf") "}"
|
||||||
@@ -68,7 +68,7 @@ pgfFile2rawData config pgffile = do
|
|||||||
cat <- cats,
|
cat <- cats,
|
||||||
f <- functionsByCat pgf (mkCId cat),
|
f <- functionsByCat pgf (mkCId cat),
|
||||||
lin <- tabularLinearizes pgf lang (mkApp f [])
|
lin <- tabularLinearizes pgf lang (mkApp f [])
|
||||||
]
|
]
|
||||||
|
|
||||||
type Cat = String
|
type Cat = String
|
||||||
type Fun = String
|
type Fun = String
|
||||||
@@ -84,7 +84,7 @@ mkConfig :: String -> Config -- N : N mkN 0 2 4 6 # 9
|
|||||||
mkConfig ls = M.fromList [(c,i) | Left (c,i) <- map mkOne (lines ls)]
|
mkConfig ls = M.fromList [(c,i) | Left (c,i) <- map mkOne (lines ls)]
|
||||||
where
|
where
|
||||||
mkOne s = case words s of
|
mkOne s = case words s of
|
||||||
"--":_ -> Right s
|
"--":_ -> Right s
|
||||||
cat:":":tcat:oper:ints -> Left (cat,(tcat,oper,mkArgs ints))
|
cat:":":tcat:oper:ints -> Left (cat,(tcat,oper,mkArgs ints))
|
||||||
_ -> Right s
|
_ -> Right s
|
||||||
mkArgs ints = case break (=="#") ints of
|
mkArgs ints = case break (=="#") ints of
|
||||||
@@ -119,7 +119,7 @@ mkMorphoDict env =
|
|||||||
(oldcat,args) <- raws,
|
(oldcat,args) <- raws,
|
||||||
Just (newcat, oper, sig) <- [M.lookup oldcat (config env)],
|
Just (newcat, oper, sig) <- [M.lookup oldcat (config env)],
|
||||||
let lemma = args !! head (fst sig)
|
let lemma = args !! head (fst sig)
|
||||||
]
|
]
|
||||||
|
|
||||||
appSig (ints,feats) args = ([args !! i | i <- ints], [args !! i | i <- feats])
|
appSig (ints,feats) args = ([args !! i | i <- ints], [args !! i | i <- feats])
|
||||||
|
|
||||||
@@ -153,7 +153,7 @@ mkMorphoDict env =
|
|||||||
_ | length (nub (map tail fls)) == length fls -> shrinkMore (map tail fls)
|
_ | length (nub (map tail fls)) == length fls -> shrinkMore (map tail fls)
|
||||||
_ -> fls
|
_ -> fls
|
||||||
|
|
||||||
|
|
||||||
mkFun = showCId . mkCId . concat . intersperse "_"
|
mkFun = showCId . mkCId . concat . intersperse "_"
|
||||||
|
|
||||||
quote s = "\"" ++ s ++ "\""
|
quote s = "\"" ++ s ++ "\""
|
||||||
@@ -162,7 +162,7 @@ quote s = "\"" ++ s ++ "\""
|
|||||||
|
|
||||||
|
|
||||||
{- ---- let us ignore this
|
{- ---- let us ignore this
|
||||||
findCompounds :: [RuleData] -> [RuleData]
|
findCompounds :: [RuleData] -> [RuleData]
|
||||||
findCompounds = getCompounds . sortOn cat_orthrevforms
|
findCompounds = getCompounds . sortOn cat_orthrevforms
|
||||||
|
|
||||||
cat_orthrevforms (_,(cat,_:forms)) = (cat,[map (!!i) fss | let fss = map reverse forms, i <- [0..minimum (map length fss) - 1]])
|
cat_orthrevforms (_,(cat,_:forms)) = (cat,[map (!!i) fss | let fss = map reverse forms, i <- [0..minimum (map length fss) - 1]])
|
||||||
@@ -171,9 +171,9 @@ quote s = "\"" ++ s ++ "\""
|
|||||||
revstem = head . snd . cat_revforms
|
revstem = head . snd . cat_revforms
|
||||||
wforms (_,(_,_:forms)) = forms
|
wforms (_,(_,_:forms)) = forms
|
||||||
|
|
||||||
getCompounds :: [RuleData] -> [RuleData]
|
getCompounds :: [RuleData] -> [RuleData]
|
||||||
getCompounds fls = case fls of
|
getCompounds fls = case fls of
|
||||||
fl : fls1 | length (revstem fl) < 2 -> markWith fl [] : getCompounds fls1
|
fl : fls1 | length (revstem fl) < 2 -> markWith fl [] : getCompounds fls1
|
||||||
fl : fls2 -> case span (\x -> and [isPrefixOf (reverse w) (reverse w1) | (w,w1) <- zip (wforms fl) (wforms x)]) fls2 of
|
fl : fls2 -> case span (\x -> and [isPrefixOf (reverse w) (reverse w1) | (w,w1) <- zip (wforms fl) (wforms x)]) fls2 of
|
||||||
([],_:_) -> markWith fl [] : getCompounds fls2
|
([],_:_) -> markWith fl [] : getCompounds fls2
|
||||||
(fls1,fls3) -> markWith fl [] : map (markCompound fl) fls1 ++ getCompounds fls3
|
(fls1,fls3) -> markWith fl [] : map (markCompound fl) fls1 ++ getCompounds fls3
|
||||||
@@ -188,7 +188,7 @@ quote s = "\"" ++ s ++ "\""
|
|||||||
|
|
||||||
isPrefixWord x xy =
|
isPrefixWord x xy =
|
||||||
length suff > 1 && ---- compound first part must be at least two letters long
|
length suff > 1 && ---- compound first part must be at least two letters long
|
||||||
any (\c -> elem c "-0123456789aeiouyåäö") suff && ---- must contain a vowel or a digit
|
any (\c -> elem c "-0123456789aeiouyåäö") suff && ---- must contain a vowel or a digit
|
||||||
isPrefixOf x xy ---- and of course be a prefix
|
isPrefixOf x xy ---- and of course be a prefix
|
||||||
where
|
where
|
||||||
suff = drop (length x) xy
|
suff = drop (length x) xy
|
||||||
|
|||||||
Reference in New Issue
Block a user