Module: Normalizer

Included in:
FSelector::BaseContinuous, FSelector::CFS_c, FSelector::ReliefF_c, FSelector::Relief_c
Defined in:
lib/fselector/normalizer.rb

Overview

normalize continuous feature

Instance Method Summary (collapse)

Instance Method Details

- normalize_by_log!(base = 10)

log transformation, requires positive feature values



10
11
12
13
14
15
16
17
18
19
20
21
# File 'lib/fselector/normalizer.rb', line 10

def normalize_by_log!(base=10)
  each_sample do |k, s|
    s.keys.each do |f|
      if s[f] > 0.0
        s[f] = Math.log(s[f], base) 
      else
        abort "[#{__FILE__}@#{__LINE__}]: \n"+
              "feature values must be positive!"
      end
    end
  end
end

- normalize_by_min_max!(min = 0.0, max = 1.0)

scale to [min, max], max > min



30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
# File 'lib/fselector/normalizer.rb', line 30

def normalize_by_min_max!(min=0.0, max=1.0)
  # first determine min and max for each feature
  f2min_max = {}
       
  each_feature do |f|
    fvs = get_feature_values(f)
    f2min_max[f] = [fvs.min, fvs.max]
  end
  
  # then normalize
  each_sample do |k, s|
    s.keys.each do |f|
      min_v, max_v = f2min_max[f]
      s[f] = min + (s[f]-min_v) * (max-min) / (max_v-min_v)
    end
  end
end

- normalize_by_zscore!

convert to z-score

ref: Wikipedia



52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
# File 'lib/fselector/normalizer.rb', line 52

def normalize_by_zscore!
  # first determine mean and sd for each feature
  f2mean_sd = {}
  
  each_feature do |f|
    fvs = get_feature_values(f)
    f2mean_sd[f] = fvs.mean, fvs.sd
  end
  
  # then normalize
  each_sample do |k, s|
    s.keys.each do |f|
      mean, sd = f2mean_sd[f]
      if sd.zero?
        s[f] = 0.0
      else
        s[f] = (s[f]-mean)/sd
      end
    end
  end
end