Module: Pismo

Defined in:
lib/pismo.rb,
lib/pismo/reader.rb,
lib/pismo/version.rb,
lib/pismo/document.rb,
lib/pismo/utilities.rb,
lib/pismo/reader/base.rb,
lib/pismo/reader/tree.rb,
lib/pismo/reader/cluster.rb,
lib/pismo/internal_attributes.rb,
lib/pismo/external_attributes.rb

Defined Under Namespace

Modules: ExternalAttributes, InternalAttributes, Reader Classes: Document, NFunctions, Utilities

Constant Summary

UNICODE_CONVERSIONS =
{
  "8230" => '...',
  "8194" => ' ',
  "8195" => ' ',
  "8201" => ' ',
  "8211" => '-',
  "8216" => '\'',
  "8217" => '\'',
  "8220" => '"',
  "8221" => '"'
}
TRANSLATED_CONVERSIONS =
UNICODE_CONVERSIONS.map {|k, v| [k.to_i.chr("UTF-8"), v] }
VERSION =
"0.8.0"

Class Method Summary (collapse)

Class Method Details

+ (Object) [](url)

Load a URL, as with Pismo, and caches the Pismo document (mostly useful for debugging use)



35
36
37
38
# File 'lib/pismo.rb', line 35

def self.[](url)
  @docs ||= {}
  @docs[url] ||= Pismo::Document.new(url)
end

+ (Object) document(handle, options = {})

Sugar methods to make creating document objects nicer



29
30
31
# File 'lib/pismo.rb', line 29

def self.document(handle, options = {})
  Document.new(handle, options)
end

+ (Object) normalize_entities(text)



40
41
42
43
# File 'lib/pismo.rb', line 40

def self.normalize_entities(text)
  @entities ||= HTMLEntities.new
  normalize_unicode_characters @entities.decode(text)
end

+ (Object) normalize_unicode_characters(html)



58
59
60
61
# File 'lib/pismo.rb', line 58

def self.normalize_unicode_characters(html)
  TRANSLATED_CONVERSIONS.each {|k,v| html.gsub! k, v }
  html
end