Class: PreviewPage

Inherits:
Object
  • Object
show all
Defined in:
app/models/preview_page.rb

Overview

a convenience class for parsing an atom feed into items useful for Article-like display

Instance Attribute Summary (collapse)

Class Method Summary (collapse)

Instance Method Summary (collapse)

Instance Attribute Details

- (Object) author

Returns the value of attribute author



13
14
15
# File 'app/models/preview_page.rb', line 13

def author
  @author
end

- (Object) content_buckets

Returns the value of attribute content_buckets



14
15
16
# File 'app/models/preview_page.rb', line 14

def content_buckets
  @content_buckets
end

- (Object) is_dpl

Returns the value of attribute is_dpl



13
14
15
# File 'app/models/preview_page.rb', line 13

def is_dpl
  @is_dpl
end

- (Object) original_content

Returns the value of attribute original_content



13
14
15
# File 'app/models/preview_page.rb', line 13

def original_content
  @original_content
end

- (Object) page_source

Returns the value of attribute page_source



13
14
15
# File 'app/models/preview_page.rb', line 13

def page_source
  @page_source
end

- (Object) published_at

Returns the value of attribute published_at



13
14
15
# File 'app/models/preview_page.rb', line 13

def published_at
  @published_at
end

- (Object) source

Returns the value of attribute source



12
13
14
# File 'app/models/preview_page.rb', line 12

def source
  @source
end

- (Object) source_id

Returns the value of attribute source_id



12
13
14
# File 'app/models/preview_page.rb', line 12

def source_id
  @source_id
end

- (Object) source_url

Returns the value of attribute source_url



13
14
15
# File 'app/models/preview_page.rb', line 13

def source_url
  @source_url
end

- (Object) tags

Returns the value of attribute tags



14
15
16
# File 'app/models/preview_page.rb', line 14

def tags
  @tags
end

- (Object) title

Returns the value of attribute title



13
14
15
# File 'app/models/preview_page.rb', line 13

def title
  @title
end

- (Object) updated_at

Returns the value of attribute updated_at



13
14
15
# File 'app/models/preview_page.rb', line 13

def updated_at
  @updated_at
end

Class Method Details

+ (Object) new_from_source(source, source_id)



19
20
21
22
23
24
25
26
27
28
# File 'app/models/preview_page.rb', line 19

def self.new_from_source(source,source_id)
  page_source = PageSource.find_by_name(source)
  return nil if(page_source.blank?)
  page = PreviewPage.new
  page.source = source
  page.source_id = source_id.to_s
  page.page_source = page_source
  page.parse_atom_content
  return page
end

Instance Method Details

- (Object) content



40
41
42
43
44
45
46
47
48
# File 'app/models/preview_page.rb', line 40

def content
  # blank content check
  if(self.original_content.blank?)
    return ''
  end

  self.convert_links
  return @converted_content.to_html
end

puts together a hash of the <a href>'s in the content

href

link text



54
55
56
57
58
59
60
61
62
63
64
# File 'app/models/preview_page.rb', line 54

def content_links
  if(@content_links.nil?)
    @content_links = {}
    self.parsed_content.css('a').each do |anchor|
      if(anchor['href'])
        @content_links[anchor['href']] = anchor.content
      end
    end
  end
  @content_links
end

converts relative hrefs and hrefs that refer to the feed source to something relative to /preview/pages/



83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
# File 'app/models/preview_page.rb', line 83

def convert_links
  # if no content, don't bother.
  if(self.original_content.blank?)
    return 0
  end

  source_uri = URI.parse(self.source_url)
  host_to_make_relative = source_uri.host

  if(@converted_content.nil?)
    @converted_content = Nokogiri::HTML::DocumentFragment.parse(self.original_content)
  end

  convert_count = 0
  @converted_content.css('a').each do |anchor|
    if(anchor['href'])
      if(anchor['href'] =~ /^\#/) # in-page anchor, don't change
        next
      end
      # make sure the URL is valid format
      begin
        original_uri = URI.parse(anchor['href'])
      rescue
        anchor.set_attribute('href', '#')
        anchor.set_attribute('class', 'bad_link')
        anchor.set_attribute('title', 'Bad Link, Please edit or remove it.')
        next
      end

      if(original_uri.scheme.nil?)
        if(original_uri.path =~ /^\/wiki\/(.*)/)  # does path start with '/wiki'? - then strip it out
          # check to see if this is a Category:blah link
          title = $1
          if(title =~ /Category\:(.+)/)
            newhref = "/preview/showcategory/" + $1
          else
            newhref =  '/preview/pages/' + title
          end
        else
          if(self.source == 'copwiki')
            newhref =  '/preview/pages/' + original_uri.path
          else
            # tease an id out of the href
            target_id = original_uri.path.gsub(source_uri.path.gsub(self.source_id,''),'')
            if(target_id != original_uri.path)
              newhref = "/preview/page/#{self.source}/#{target_id}"
            else
              anchor.set_attribute('href', '#')
              anchor.set_attribute('class', 'warning_link')
              anchor.set_attribute('title', 'Relative link, unable to show in preview')
              next
            end
          end
        end
        # attach the fragment to the end of it if there was one
        if(!original_uri.fragment.blank?)
          newhref += "##{original_uri.fragment}"
        end
        anchor.set_attribute('href',newhref)
        convert_count += 1
      elsif((original_uri.scheme == 'http' or original_uri.scheme == 'https') and original_uri.host == host_to_make_relative)
        # make relative
        if(original_uri.path =~ /^\/wiki\/(.*)/) # does path start with '/wiki'? - then strip it out
          newhref =  '/preview/pages/' + $1
        else
          if(self.source == 'copwiki')
            newhref =  '/preview/pages/' + original_uri.path
          else
            # tease an id out of the href
            target_id = original_uri.path.gsub(source_uri.path.gsub(self.source_id,''),'')
            if(target_id != original_uri.path)
              newhref = "/preview/page/#{self.source}/#{target_id}"
            else
              anchor.set_attribute('href', original_uri.to_s)
              anchor.set_attribute('class', 'warning_link')
              anchor.set_attribute('title', 'Unable to handle in preview')
              next
            end
          end
        end

        # attach the fragment to the end of it if there was one
        if(!original_uri.fragment.blank?)
          newhref += "##{original_uri.fragment}"
        end
        anchor.set_attribute('href',newhref)
        convert_count += 1
      end
    end # anchor had an href attribute
  end # loop through the anchor tags
  convert_count
end


66
67
68
69
70
71
72
73
74
75
76
77
# File 'app/models/preview_page.rb', line 66

def converted_links
  if(@converted_links.nil?)
    @converted_links = {}
    self.convert_links if(@converted_content.nil?)
    @converted_content.css('a').each do |anchor|
      if(anchor['href'])
        @converted_links[anchor['href']] = anchor.content
      end
    end
  end
  @converted_links
end

- (Object) parse_atom_content



198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
# File 'app/models/preview_page.rb', line 198

def parse_atom_content()
 parsed_atom_entry = page_source.atom_page_entry(self.source_id)


  if parsed_atom_entry.updated.nil?
    self.updated_at = Time.now.utc
  else
    self.updated_at = parsed_atom_entry.updated
  end

  if parsed_atom_entry.published.nil?
    self.published_at = self.updated_at
  else
    self.published_at = parsed_atom_entry.published
  end

  self.title = parsed_atom_entry.title
  self.source_url = parsed_atom_entry.links[0].href if self.source_url.blank?
  self.author = parsed_atom_entry.authors[0].name
  self.original_content = parsed_atom_entry.content.to_s

  # flag as dpl
  if !parsed_atom_entry.categories.blank? and parsed_atom_entry.categories.map(&:term).include?('dpl')
    self.is_dpl = true
  end

   if(!parsed_atom_entry.categories.blank?)
    self.set_tags(parsed_atom_entry.categories.map(&:term))
    self.put_in_buckets(parsed_atom_entry.categories.map(&:term))
  end

end

- (Object) parsed_content

parses original_content with Nokogiri



33
34
35
36
37
38
# File 'app/models/preview_page.rb', line 33

def parsed_content
  if(@parsed_content.nil?)
    @parsed_content = Nokogiri::HTML::DocumentFragment.parse(self.original_content)
  end
  @parsed_content
end

- (Object) put_in_buckets(categoryarray)



188
189
190
191
192
193
194
195
196
# File 'app/models/preview_page.rb', line 188

def put_in_buckets(categoryarray)
  namearray = []
  categoryarray.each do |name|
    namearray << ContentBucket.normalizename(name)
  end

  buckets = ContentBucket.find(:all, :conditions => "name IN (#{namearray.map{|n| "'#{n}'"}.join(',')})")
  self.content_buckets = buckets
end

- (Object) set_tags(tagarray)



176
177
178
179
180
181
182
183
184
185
186
# File 'app/models/preview_page.rb', line 176

def set_tags(tagarray)
  namearray = []
  tagarray.each do |tag_name|
    normalized_tag_name = Tag.normalizename(tag_name)
    next if Tag::BLACKLIST.include?(normalized_tag_name)
    namearray << normalized_tag_name
  end

  taglist = Tag.find(:all, :conditions => "name IN (#{namearray.map{|n| "'#{n}'"}.join(',')})")
  self.tags = taglist
end