Class: XapianFu::StopperFactory

Inherits:
Object
  • Object
show all
Defined in:
lib/xapian_fu/stopper_factory.rb

Class Method Summary collapse

Class Method Details

.stop_words_filename(lang) ⇒ Object

Return the full path to the stop words file for the given language


27
28
29
# File 'lib/xapian_fu/stopper_factory.rb', line 27

def self.stop_words_filename(lang)
  File.join(File.dirname(__FILE__), 'stopwords', lang.to_s.downcase + '.txt')
end

.stop_words_for(lang) ⇒ Object

Read and parse the stop words file for the given language, returning an array of words


32
33
34
35
36
37
38
39
40
41
42
43
44
# File 'lib/xapian_fu/stopper_factory.rb', line 32

def self.stop_words_for(lang)
  raise UnsupportedStopperLanguage, lang.to_s unless File.exists?(stop_words_filename(lang))
  words = []
  # Open files with correct encoding in Ruby 1.9
  open_args = [stop_words_filename(lang), "r"]
  open_args << { :encoding => "UTF-8" } if String.new.respond_to? :encoding
  open(*open_args) do |f|
    while line = f.readline rescue nil
      words << line.split(" ", 2).first.downcase.strip  unless line =~ /^ +|^$|^\|/
    end
  end
  words
end

.stopper_for(lang) ⇒ Object

Return a SimpleStopper loaded with stop words for the given language


8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
# File 'lib/xapian_fu/stopper_factory.rb', line 8

def self.stopper_for(lang)
  case lang
  when Xapian::Stopper
    lang
  when false
    false
  else
    lang = lang.to_s.downcase.strip
    if @stoppers[lang]
      @stoppers[lang]
    else
      stopper = Xapian::SimpleStopper.new
      stop_words_for(lang).each { |word| stopper.add(word) }
      @stoppers[lang] = stopper
    end
  end
end