Module: Hpricot::Traverse
- Included in:
- Container::Trav, Leaf::Trav
- Defined in:
- lib/hpricot/traverse.rb,
lib/hpricot/modules.rb,
lib/hpricot/traverse.rb,
lib/hpricot/elements.rb
Overview
:startdoc:
Class Method Summary (collapse)
Instance Method Summary (collapse)
-
- (Object) after(html = nil, &blk)
Adds elements immediately after this element, contained in the html string.
-
- (Object) at(expr)
(also: #%)
Find the first matching node for the CSS or XPath expr string.
-
- (Object) before(html = nil, &blk)
Adds elements immediately before this element, contained in the html string.
-
- (Boolean) bogusetag?
Is this object a stranded end tag?.
-
- (Object) children_of_type(tag_name)
Find children of a given tag_name.
- - (Object) clean_path(path)
-
- (Boolean) comment?
Is this object a comment?.
-
- (Object) css_path
Builds a unique CSS string for this node, from the root of the document containing it.
-
- (Boolean) doc?
Is this object the enclosing HTML or XML document?.
-
- (Boolean) doctype?
Is this object a doctype tag?.
-
- (Boolean) elem?
Is this object an HTML or XML element?.
-
- (Object) following
Find all nodes which follow the current one.
- - (Object) get_subnode(*indexes)
-
- (Object) html(inner = nil, &blk)
(also: #inner_html)
Builds an HTML string from the contents of this node.
- - (Object) index(name)
-
- (Object) inner_html=(inner)
(also: #innerHTML=)
Inserts new contents into the current node, based on the HTML contained in string inner.
-
- (Object) inner_text
(also: #innerText)
Builds a string from the text contained in this node.
-
- (Object) make(input = nil, &blk)
Parses an HTML string, making an HTML fragment based on the options used to create the container document.
-
- (Object) next
(also: #next_node)
Returns the node neighboring this node to the south: just below it.
- - (Object) node_position
-
- (Object) nodes_at(*pos)
Puts together an array of neighboring nodes based on their proximity to this node.
- - (Object) position
-
- (Object) preceding
Find all preceding nodes.
-
- (Object) previous
(also: #previous_node)
Returns to node neighboring this node to the north: just above it.
-
- (Boolean) procins?
Is this object an XML processing instruction?.
-
- (Object) search(expr, &blk)
(also: #/)
Searches this node for all elements matching the CSS or XPath expr.
-
- (Object) swap(html = nil, &blk)
Replace this element and its contents with the nodes contained in the html string.
-
- (Boolean) text?
Is this object an HTML text node?.
-
- (Object) to_html
(also: #to_s)
Builds an HTML string from this node and its contents.
-
- (Object) to_original_html
Attempts to preserve the original HTML of the document, only outputing new tags for elements which have changed.
-
- (Object) to_plain_text
Builds a string from the text contained in this node.
-
- (Object) traverse_element(*names, &block)
traverse_element traverses elements in the tree.
-
- (Object) traverse_text(&block)
traverse_text traverses texts in the tree.
-
- (Boolean) xmldecl?
Is this object an XML declaration?.
-
- (Object) xpath
Builds a unique XPath string for this node, from the root of the document containing it.
Class Method Details
+ (Object) filter(tok, &blk)
375 376 377 |
# File 'lib/hpricot/elements.rb', line 375 def self.filter(tok, &blk) define_method("filter[#{tok.is_a?(String) ? tok : tok.inspect}]", &blk) end |
Instance Method Details
- (Object) after(html = nil, &blk)
Adds elements immediately after this element, contained in the html string.
121 122 123 |
# File 'lib/hpricot/traverse.rb', line 121 def after(html = nil, &blk) parent.insert_after(make(html, &blk), self) end |
- (Object) at(expr) Also known as: %
Find the first matching node for the CSS or XPath expr string.
341 342 343 |
# File 'lib/hpricot/traverse.rb', line 341 def at(expr) search(expr).first end |
- (Object) before(html = nil, &blk)
Adds elements immediately before this element, contained in the html string.
126 127 128 |
# File 'lib/hpricot/traverse.rb', line 126 def before(html = nil, &blk) parent.insert_before(make(html, &blk), self) end |
- (Boolean) bogusetag?
Is this object a stranded end tag?
21 |
# File 'lib/hpricot/traverse.rb', line 21 def bogusetag?() BogusETag::Trav === self end |
- (Object) children_of_type(tag_name)
Find children of a given tag_name.
ele.children_of_type('p')
#=> [...array of paragraphs...]
390 391 392 393 394 395 396 |
# File 'lib/hpricot/traverse.rb', line 390 def children_of_type(tag_name) if respond_to? :children children.find_all do |x| x.respond_to?(:pathname) && x.pathname == tag_name end end end |
- (Object) clean_path(path)
203 204 205 |
# File 'lib/hpricot/traverse.rb', line 203 def clean_path(path) path.gsub(/^\s+|\s+$/, '') end |
- (Boolean) comment?
Is this object a comment?
19 |
# File 'lib/hpricot/traverse.rb', line 19 def comment?() Comment::Trav === self end |
- (Object) css_path
Builds a unique CSS string for this node, from the root of the document containing it.
226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 |
# File 'lib/hpricot/traverse.rb', line 226 def css_path if elem? and has_attribute? 'id' "##{get_attribute('id')}" else sim, i, id = 0, 0, 0 parent.children.each do |e| id = sim if e == self sim += 1 if e.pathname == self.pathname end if parent.children p = parent.css_path p = p ? "#{p} > #{self.pathname}" : self.pathname p += ":nth(#{id})" if sim >= 2 p end end |
- (Boolean) doc?
Is this object the enclosing HTML or XML document?
7 |
# File 'lib/hpricot/traverse.rb', line 7 def doc?() Doc::Trav === self end |
- (Boolean) doctype?
Is this object a doctype tag?
15 |
# File 'lib/hpricot/traverse.rb', line 15 def doctype?() DocType::Trav === self end |
- (Boolean) elem?
Is this object an HTML or XML element?
9 |
# File 'lib/hpricot/traverse.rb', line 9 def elem?() Elem::Trav === self end |
- (Object) following
Find all nodes which follow the current one.
114 115 116 117 118 |
# File 'lib/hpricot/traverse.rb', line 114 def following sibs = parent.children si = sibs.index(self) + 1 return Elements[*sibs[si...sibs.length]] end |
- (Object) get_subnode(*indexes)
138 139 140 141 142 143 144 |
# File 'lib/hpricot/traverse.rb', line 138 def get_subnode(*indexes) n = self indexes.each {|index| n = n.get_subnode_internal(index) } n end |
- (Object) html(inner = nil, &blk) Also known as: inner_html
Builds an HTML string from the contents of this node.
168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 |
# File 'lib/hpricot/traverse.rb', line 168 def html(inner = nil, &blk) if inner or blk altered! case inner when Array self.children = inner else self.children = make(inner, &blk) end reparent self.children else if respond_to?(:children) and children children.map { |x| x.output("") }.join else "" end end end |
- (Object) index(name)
47 48 49 50 51 52 53 54 55 56 |
# File 'lib/hpricot/traverse.rb', line 47 def index(name) i = 0 return i if name == "*" children.each do |x| return i if (x.respond_to?(:name) and name == x.name) or (x.text? and name == "text()") i += 1 end if children -1 end |
- (Object) inner_html=(inner) Also known as: innerHTML=
Inserts new contents into the current node, based on the HTML contained in string inner.
191 192 193 |
# File 'lib/hpricot/traverse.rb', line 191 def inner_html=(inner) html(inner || []) end |
- (Object) inner_text Also known as: innerText
Builds a string from the text contained in this node. All HTML elements are removed.
158 159 160 161 162 163 164 |
# File 'lib/hpricot/traverse.rb', line 158 def inner_text if respond_to?(:children) and children children.map { |x| x.inner_text }.join else "" end end |
- (Object) make(input = nil, &blk)
Parses an HTML string, making an HTML fragment based on the options used to create the container document.
25 26 27 28 29 30 31 |
# File 'lib/hpricot/traverse.rb', line 25 def make(input = nil, &blk) if parent and parent.respond_to? :make parent.make(input, &blk) else Hpricot.make(input, &blk).children end end |
- (Object) next Also known as: next_node
Returns the node neighboring this node to the south: just below it. This method includes text nodes and comments and such.
91 92 93 94 |
# File 'lib/hpricot/traverse.rb', line 91 def next sib = parent.children sib[sib.index(self) + 1] if parent end |
- (Object) node_position
242 243 244 |
# File 'lib/hpricot/traverse.rb', line 242 def node_position parent.children.index(self) end |
- (Object) nodes_at(*pos)
Puts together an array of neighboring nodes based on their proximity to this node. So, for example, to get the next node, you could use nodes_at(1). Or, to get the previous node, use <tt>nodes_at(1).
This method also accepts ranges and sets of numbers.
ele.nodes_at(-3..-1, 1..3) # gets three nodes before and three after
ele.nodes_at(1, 5, 7) # gets three nodes at offsets below the current node
ele.nodes_at(0, 5..6) # the current node and two others
67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 |
# File 'lib/hpricot/traverse.rb', line 67 def nodes_at(*pos) sib = parent.children i, si = 0, sib.index(self) pos.map! do |r| if r.is_a?(Range) and r.begin.is_a?(String) r = Range.new(parent.index(r.begin)-si, parent.index(r.end)-si, r.exclude_end?) end r end p pos Elements[* sib.select do |x| sel = case i - si when *pos true end i += 1 sel end ] end |
- (Object) position
246 247 248 |
# File 'lib/hpricot/traverse.rb', line 246 def position parent.children_of_type(self.pathname).index(self) end |
- (Object) preceding
Find all preceding nodes.
107 108 109 110 111 |
# File 'lib/hpricot/traverse.rb', line 107 def preceding sibs = parent.children si = sibs.index(self) return Elements[*sibs[0...si]] end |
- (Object) previous Also known as: previous_node
Returns to node neighboring this node to the north: just above it. This method includes text nodes and comments and such.
99 100 101 102 103 |
# File 'lib/hpricot/traverse.rb', line 99 def previous sib = parent.children x = sib.index(self) - 1 sib[x] if sib and x >= 0 end |
- (Boolean) procins?
Is this object an XML processing instruction?
17 |
# File 'lib/hpricot/traverse.rb', line 17 def procins?() ProcIns::Trav === self end |
- (Object) search(expr, &blk) Also known as: /
Searches this node for all elements matching the CSS or XPath expr. Returns an Elements array containing the matching nodes. If blk is given, it is used to iterate through the matching set.
254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 |
# File 'lib/hpricot/traverse.rb', line 254 def search(expr, &blk) if Range === expr return Elements.(at(expr.begin), at(expr.end), expr.exclude_end?) end last = nil nodes = [self] done = [] expr = expr.to_s hist = [] until expr.empty? expr = clean_path(expr) expr.gsub!(%r!^//!, '') case expr when %r!^/?\.\.! last = expr = $' nodes.map! { |node| node.parent } when %r!^[>/]\s*! last = expr = $' nodes = Elements[*nodes.map { |node| node.children if node.respond_to? :children }.flatten.compact] when %r!^\+! last = expr = $' nodes.map! do |node| siblings = node.parent.children siblings[siblings.index(node)+1] end nodes.compact! when %r!^~! last = expr = $' nodes.map! do |node| siblings = node.parent.children siblings[(siblings.index(node)+1)..-1] end nodes.flatten! when %r!^[|,]! last = expr = " #$'" nodes.shift if nodes.first == self done += nodes nodes = [self] else m = expr.match(%r!^([#.]?)([a-z0-9\\*_-]*)!i).to_a after = $' mt = after[%r!:[a-z0-9\\*_-]+!i, 0] oop = false if mt and not (mt == ":not" or Traverse.method_defined? "filter[#{mt}]") after = $' m[2] += mt expr = after end if m[1] == '#' oid = get_element_by_id(m[2]) nodes = oid ? [oid] : [] expr = after else m[2] = "*" if after =~ /^\(\)/ || m[2] == "" || m[1] == "." ret = [] nodes.each do |node| case m[2] when '*' node.traverse_element { |n| ret << n } else if node.respond_to? :get_elements_by_tag_name ret += [*node.get_elements_by_tag_name(m[2])] - [*(node unless last)] end end end nodes = ret end last = nil end hist << expr break if hist[-1] == hist[-2] nodes, expr = Elements.filter(nodes, expr) end nodes = done + nodes.flatten.uniq if blk nodes.each(&blk) self else Elements[*nodes] end end |
- (Object) swap(html = nil, &blk)
Replace this element and its contents with the nodes contained in the html string.
133 134 135 136 |
# File 'lib/hpricot/traverse.rb', line 133 def swap(html = nil, &blk) parent.altered! parent.replace_child(self, make(html, &blk)) end |
- (Boolean) text?
Is this object an HTML text node?
11 |
# File 'lib/hpricot/traverse.rb', line 11 def text?() Text::Trav === self end |
- (Object) to_html Also known as: to_s
Builds an HTML string from this node and its contents. If you need to write to a stream, try calling output(io) as a method on this object.
36 37 38 |
# File 'lib/hpricot/traverse.rb', line 36 def to_html output("") end |
- (Object) to_original_html
Attempts to preserve the original HTML of the document, only outputing new tags for elements which have changed.
43 44 45 |
# File 'lib/hpricot/traverse.rb', line 43 def to_original_html output("", :preserve => true) end |
- (Object) to_plain_text
Builds a string from the text contained in this node. All HTML elements are removed.
148 149 150 151 152 153 154 |
# File 'lib/hpricot/traverse.rb', line 148 def to_plain_text if respond_to?(:children) and children children.map { |x| x.to_plain_text }.join.strip.gsub(/\n{2,}/, "\n\n") else "" end end |
- (Object) traverse_element(*names, &block)
traverse_element traverses elements in the tree. It yields elements in depth first order.
If names are empty, it yields all elements. If non-empty names are given, it should be list of universal names.
A nested element is yielded in depth first order as follows.
t = Hpricot('<a id=0><b><a id=1 /></b><c id=2 /></a>')
t.traverse_element("a", "c") {|e| p e}
# =>
{elem <a id="0"> {elem <b> {emptyelem <a id="1">} </b>} {emptyelem <c id="2">} </a>}
{emptyelem <a id="1">}
{emptyelem <c id="2">}
Universal names are specified as follows.
t = Hpricot(<<'End')
<html>
<meta name="robots" content="index,nofollow">
<meta name="author" content="Who am I?">
</html>
End
t.traverse_element("{http://www.w3.org/1999/xhtml}meta") {|e| p e}
# =>
{emptyelem <{http://www.w3.org/1999/xhtml}meta name="robots" content="index,nofollow">}
{emptyelem <{http://www.w3.org/1999/xhtml}meta name="author" content="Who am I?">}
374 375 376 377 378 379 380 381 382 383 |
# File 'lib/hpricot/traverse.rb', line 374 def traverse_element(*names, &block) # :yields: element if names.empty? traverse_all_element(&block) else name_set = {} names.each {|n| name_set[n] = true } traverse_some_element(name_set, &block) end nil end |
- (Object) traverse_text(&block)
traverse_text traverses texts in the tree
680 681 682 683 |
# File 'lib/hpricot/traverse.rb', line 680 def traverse_text(&block) # :yields: text traverse_text_internal(&block) nil end |
- (Boolean) xmldecl?
Is this object an XML declaration?
13 |
# File 'lib/hpricot/traverse.rb', line 13 def xmldecl?() XMLDecl::Trav === self end |
- (Object) xpath
Builds a unique XPath string for this node, from the root of the document containing it.
209 210 211 212 213 214 215 216 217 218 219 220 221 222 |
# File 'lib/hpricot/traverse.rb', line 209 def xpath if elem? and has_attribute? 'id' "//#{self.name}[@id='#{get_attribute('id')}']" else sim, id = 0, 0, 0 parent.children.each do |e| id = sim if e == self sim += 1 if e.pathname == self.pathname end if parent.children p = File.join(parent.xpath, self.pathname) p += "[#{id+1}]" if sim >= 2 p end end |