Class: WebRobots::RobotsTxt

Inherits:
Object
  • Object
show all
Defined in:
lib/webrobots/robotstxt.rb

Defined Under Namespace

Classes: AccessControlLine, AgentLine, AllowLine, CrawlDelayLine, DisallowLine, ExtentionLine, Line, Parser, Record

Constant Summary

DISALLOW_ALL =
<<-TXT
User-Agent: *
Disallow: /
TXT

Instance Attribute Summary (collapse)

Class Method Summary (collapse)

Instance Method Summary (collapse)

Constructor Details

- (RobotsTxt) initialize(site, records, options = nil)

class Parser



525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
# File 'lib/webrobots/robotstxt.rb', line 525

def initialize(site, records, options = nil)
  @timestamp = Time.now
  @site = site
  @options = options || {}
  @last_checked = nil

  @error = @options[:error]
  @target = @options[:target]
  @sitemaps = @options[:sitemaps] || []

  if records && !records.empty?
    @records, defaults = [], []
    records.each { |record|
      if record.default?
        defaults << record
      elsif !@target || record.match?(@target)
        @records << record
      end
    }
    @records.concat(defaults)
  else
    @records = []
  end
end

Instance Attribute Details

- (Object) error

Returns the value of attribute error



551
552
553
# File 'lib/webrobots/robotstxt.rb', line 551

def error
  @error
end

- (Object) site (readonly)

Returns the value of attribute site



550
551
552
# File 'lib/webrobots/robotstxt.rb', line 550

def site
  @site
end

- (Object) sitemaps (readonly)

Returns the value of attribute sitemaps



550
551
552
# File 'lib/webrobots/robotstxt.rb', line 550

def sitemaps
  @sitemaps
end

- (Object) timestamp (readonly)

Returns the value of attribute timestamp



550
551
552
# File 'lib/webrobots/robotstxt.rb', line 550

def timestamp
  @timestamp
end

Class Method Details

+ (Object) unfetchable(site, reason, target = nil)



597
598
599
600
601
# File 'lib/webrobots/robotstxt.rb', line 597

def self.unfetchable(site, reason, target = nil)
  Parser.new(target).parse(DISALLOW_ALL, site).tap { |robots_txt|
    robots_txt.error = reason
  }
end

Instance Method Details

- (Boolean) allow?(request_uri, user_agent = nil)

Returns:

  • (Boolean)


576
577
578
579
580
581
582
583
584
585
# File 'lib/webrobots/robotstxt.rb', line 576

def allow?(request_uri, user_agent = nil)
  record = find_record(user_agent) or return true
  allow = record.allow?(request_uri)
  if @last_checked and delay = record.delay
    delay -= Time.now - @last_checked
    sleep delay if delay > 0
  end
  @last_checked = Time.now
  return allow
end

- (Object) error!

Raises:

  • (@error)


553
554
555
# File 'lib/webrobots/robotstxt.rb', line 553

def error!
  raise @error if @error
end

- (Object) options(user_agent = nil)



587
588
589
590
# File 'lib/webrobots/robotstxt.rb', line 587

def options(user_agent = nil)
  record = find_record(user_agent) or return {}
  record.options
end