Class: Lorax::Signature

Inherits:
Object
  • Object
show all
Defined in:
lib/lorax/signature.rb

Constant Summary collapse

SEP =
"\0"

Instance Method Summary collapse

Constructor Details

#initialize(node = nil) ⇒ Signature

Returns a new instance of Signature.


7
8
9
10
11
12
13
14
15
# File 'lib/lorax/signature.rb', line 7

def initialize(node=nil)
  @signatures = {} # node      => signature
  @monograms  = {} # node      => monogram (signature not including children)
  @nodes      = {} # signature => [node, ...]
  @weights    = {} # node      => weight
  @size       = 0
  @node       = node
  signature(node) if node
end

Instance Method Details

#monogram(node = @node) ⇒ Object


85
86
87
88
89
# File 'lib/lorax/signature.rb', line 85

def monogram(node=@node)
  return @monograms[node] if @monograms.key?(node)
  signature(node)
  @monograms[node]
end

#nodes(sig = nil) ⇒ Object


21
22
23
# File 'lib/lorax/signature.rb', line 21

def nodes(sig=nil)
  sig ? @nodes[sig] : [@node]
end

#rootObject


17
18
19
# File 'lib/lorax/signature.rb', line 17

def root
  @node
end

#set_signature(node, value) ⇒ Object

:nodoc: for testing


91
92
93
94
# File 'lib/lorax/signature.rb', line 91

def set_signature(node, value) # :nodoc: for testing
  (@nodes[value] ||= []) << node
  @signatures[node]      =  value
end

#set_weight(node, value) ⇒ Object

:nodoc: for testing


96
97
98
# File 'lib/lorax/signature.rb', line 96

def set_weight(node, value) # :nodoc: for testing
  @weights[node] = value
end

#signature(node = @node) ⇒ Object

Raises:

  • (ArgumentError)

29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
# File 'lib/lorax/signature.rb', line 29

def signature(node=@node)
  return @signatures[node] if @signatures.key?(node)
  raise ArgumentError, "signature expects a Node, but received #{node.inspect}" unless node.is_a?(Nokogiri::XML::Node)

  if node.text?
    content = node.content.strip
    if content.empty?
      return nil
    else
      monogram     = signature = hashify(content)
    end
  elsif node.cdata? || node.comment?
    monogram     = signature = hashify(node.content)
  elsif node.type == Nokogiri::XML::Node::ENTITY_REF_NODE
    monogram     = signature = hashify(node.to_html)
  elsif node.element?
    children_sig = hashify(node.children       .collect { |child| signature(child) }.compact)
    attr_sig     = hashify(node.attributes.sort.collect { |k,v|   [k, v.value]     }.flatten)
    monogram     = hashify(node.name, attr_sig)
    signature    = hashify(node.name, attr_sig, children_sig)
  else
    raise ArgumentError, "signature expects an element, text, cdata or comment node, but received #{node.class}"
  end

  @size += 1
  weight(node)

  (@nodes[signature] ||= []) << node
  @monograms[node]           =  monogram
  @signatures[node]          =  signature
end

#sizeObject


25
26
27
# File 'lib/lorax/signature.rb', line 25

def size
  @size
end

#weight(node = @node) ⇒ Object

Raises:

  • (ArgumentError)

61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
# File 'lib/lorax/signature.rb', line 61

def weight(node=@node)
  return @weights[node] if @weights.key?(node)
  raise ArgumentError, "weight expects a Node, but received #{node.inspect}" unless node.is_a?(Nokogiri::XML::Node)

  if node.text?
    content = node.content.strip
    if content.empty?
      calculated_weight = 0
    else          
      calculated_weight = 1 + Math.log(content.length)
    end
  elsif node.cdata? || node.comment?
    calculated_weight = 1 + Math.log(node.content.length)
  elsif node.type == Nokogiri::XML::Node::ENTITY_REF_NODE
    calculated_weight = 1
  elsif node.element?
    calculated_weight = node.children.inject(1) { |sum, child| sum += weight(child) }
  else
    raise ArgumentError, "weight expects an element, text, cdata or comment node, but received #{node.class}"
  end

  @weights[node] = calculated_weight
end