mirror of
https://github.com/GrammaticalFramework/gf-core.git
synced 2026-04-22 11:19:32 -06:00
a simple clitic analysis command 'ca'
This commit is contained in:
40
src/compiler/GF/Text/Clitics.hs
Normal file
40
src/compiler/GF/Text/Clitics.hs
Normal file
@@ -0,0 +1,40 @@
|
||||
module GF.Text.Clitics (getClitics,getCliticsText) where
|
||||
|
||||
import Data.List
|
||||
|
||||
-- AR 6/2/2011
|
||||
-- Analyse word as stem+clitic whenever
|
||||
-- (1) clitic is in clitic list
|
||||
-- (2) either
|
||||
-- (a) stem is in Lexicon
|
||||
-- (b) stem can be analysed as stem0+clitic0
|
||||
--
|
||||
-- Examples:
|
||||
-- Italian amarmi = amar+mi
|
||||
-- Finnish autossanikohan = autossa+ni+kohan
|
||||
--
|
||||
-- The analysis gives all results, including the case where the whole word is in Lexicon.
|
||||
--
|
||||
-- The clitics in the list are expected to be reversed.
|
||||
|
||||
getClitics :: (String -> Bool) -> [String] -> String -> [[String]]
|
||||
getClitics isLex rclitics = map (reverse . map reverse) . clits . reverse where
|
||||
clits rword = ifLex rword [rclit:more |
|
||||
rclit <- rclitics, stem <- splits rclit rword, more <- clits stem]
|
||||
splits c = maybe [] return . stripPrefix c
|
||||
|
||||
ifLex w ws = if isLex (reverse w) then [w] : ws else ws
|
||||
|
||||
|
||||
getCliticsText :: (String -> Bool) -> [String] -> [String] -> [String]
|
||||
getCliticsText isLex rclitics =
|
||||
map unwords . sequence . map (map render . getClitics isLex rclitics)
|
||||
where
|
||||
render = unwords . intersperse "&+"
|
||||
|
||||
|
||||
-- example
|
||||
|
||||
getClitics1 = getClitics exlex1 exclits1
|
||||
exlex1 = flip elem ["auto", "naise", "rahan","maa","maahan","maahankaan"]
|
||||
exclits1 = map reverse ["ni","ko","han","pas","nsa","kin","kaan"]
|
||||
Reference in New Issue
Block a user