Class: Myaso::Ngrams

Inherits:
Object
  • Object
show all
Extended by:
Forwardable
Includes:
Enumerable
Defined in:
lib/myaso/ngrams.rb

Overview

A simple yet handy implementation of a n-gram storage.

Instance Method Summary collapse

Constructor Details

#initializeNgrams

An instance of a n-gram storage is initialized by zero counts.


16
17
18
19
20
# File 'lib/myaso/ngrams.rb', line 16

def initialize
  @table = Hash.new do |h, k|
    h[k] = Hash.new { |h, k| h[k] = Hash.new(0) }
  end
end

Instance Method Details

#==(other) ⇒ Object

Two storages are equal iff they tables are equal.


39
40
41
# File 'lib/myaso/ngrams.rb', line 39

def == other
  self.table == other.table
end

#[](unigram, bigram = nil, trigram = nil) ⇒ Object

Obtain the count of the specified unigram, bigram, or trigram.


24
25
26
27
28
# File 'lib/myaso/ngrams.rb', line 24

def [] unigram, bigram = nil, trigram = nil
  return 0 unless table.include? unigram
  return 0 unless table[unigram].include? bigram
  table[unigram][bigram][trigram]
end

#[]=(unigram, bigram = nil, trigram = nil, count) ⇒ Object

Assign the count to the specified unigram, bigram, or trigram.


32
33
34
35
# File 'lib/myaso/ngrams.rb', line 32

def []= unigram, bigram = nil, trigram = nil, count
  @unigrams_count = nil
  table[unigram][bigram][trigram] = count
end

#each_trigramObject

Trigrams enumerator. Yes, this method should return an Enumerator instance, but it is too slow.


46
47
48
49
50
51
52
53
54
55
56
57
58
# File 'lib/myaso/ngrams.rb', line 46

def each_trigram
  table.each do |unigram, bigrams|
    bigrams.each do |bigram, trigrams|
      next unless bigram

      trigrams.each do |trigram, count|
        next unless trigram

        yield [[unigram, bigram, trigram], count]
      end
    end
  end
end

#unigrams_countObject

Unigrams count.


62
63
64
65
66
# File 'lib/myaso/ngrams.rb', line 62

def unigrams_count
  @unigrams_count ||= table.keys.inject(0) do |count, unigram|
    count + table[unigram][nil][nil]
  end
end