forked from GitHub/gf-core
41 lines
1.3 KiB
Haskell
41 lines
1.3 KiB
Haskell
module GF.Text.Clitics (getClitics,getCliticsText) where
|
|
|
|
import Data.List
|
|
|
|
-- AR 6/2/2011
|
|
-- Analyse word as stem+clitic whenever
|
|
-- (1) clitic is in clitic list
|
|
-- (2) either
|
|
-- (a) stem is in Lexicon
|
|
-- (b) stem can be analysed as stem0+clitic0
|
|
--
|
|
-- Examples:
|
|
-- Italian amarmi = amar+mi
|
|
-- Finnish autossanikohan = autossa+ni+kohan
|
|
--
|
|
-- The analysis gives all results, including the case where the whole word is in Lexicon.
|
|
--
|
|
-- The clitics in the list are expected to be reversed.
|
|
|
|
getClitics :: (String -> Bool) -> [String] -> String -> [[String]]
|
|
getClitics isLex rclitics = map (reverse . map reverse) . clits . reverse where
|
|
clits rword = ifLex rword [rclit:more |
|
|
rclit <- rclitics, stem <- splits rclit rword, more <- clits stem]
|
|
splits c = maybe [] return . stripPrefix c
|
|
|
|
ifLex w ws = if isLex (reverse w) then [w] : ws else ws
|
|
|
|
|
|
getCliticsText :: (String -> Bool) -> [String] -> [String] -> [String]
|
|
getCliticsText isLex rclitics =
|
|
map unwords . sequence . map (map render . getClitics isLex rclitics)
|
|
where
|
|
render = unwords . intersperse "&+"
|
|
|
|
|
|
-- example
|
|
|
|
getClitics1 = getClitics exlex1 exclits1
|
|
exlex1 = flip elem ["auto", "naise", "rahan","maa","maahan","maahankaan"]
|
|
exclits1 = map reverse ["ni","ko","han","pas","nsa","kin","kaan"]
|