Class: XOXO::Parser

Inherits:
Object show all
Defined in:
lib/xoxo.rb

Constant Summary

CONTAINER_TAGS =
%w{dl ol ul}

Instance Attribute Summary (collapse)

Instance Method Summary (collapse)

Constructor Details

- (Parser) initialize(xoxo)

Initialize new XOXO Parser.



165
166
167
168
169
170
171
172
# File 'lib/xoxo.rb', line 165

def initialize(xoxo)
  @parser = REXML::Parsers::PullParser.new(xoxo)

  @textstack = ['']
  @xostack = []
  @structs = []
  @tags = []
end

Instance Attribute Details

- (Array) structs (readonly)

Returns:

  • (Array)


162
163
164
# File 'lib/xoxo.rb', line 162

def structs
  @structs
end

Instance Method Details

- (Hash) normalize_attrs(attrs) (private)

Take a hash of attributes and make sure the keys are all lowercase.

Parameters:

  • the (Hash)

    attributes hash

Returns:

  • (Hash)

    the normalized attributes hash



255
256
257
258
259
260
261
262
# File 'lib/xoxo.rb', line 255

def normalize_attrs(attrs)
  attrs.keys.find_all { |k, v| k != k.downcase }.each { |k, v|
    v = v.downcase  if k == "rel" || k == "type"
    attrs.delete k
    attrs[k.downcase] = v
  }
  attrs
end

- (Parser) parse

Parse XOXO document.

The end result of parsing is stored in the structs attribute.

Returns:

  • (Parser)

    the current parser object



179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
# File 'lib/xoxo.rb', line 179

def parse
  while @parser.has_next?
    res = @parser.pull

    if res.start_element?
      @tags << res[0]

      case res[0]
      when "a"
        attrs = normalize_attrs res[1]
        attrs['url'] = attrs['href']
        attrs.delete 'href'
        push attrs
        @textstack << ''

      when "dl"
        push({})

      when "ol", "ul"
        push []

      when "li", "dt", "dd"
        @textstack << ''

      end
    elsif res.end_element?
      @tags.pop

      case res[0]
      when "a"
        val = @textstack.pop
        unless val.empty?
          val = ''  if @xostack.last['title'] == val
          val = ''  if @xostack.last['url'] == val
          @xostack.last['text'] = val  unless val.empty?
        end
        @xostack.pop

      when "dl", "ol", "ul"
        @xostack.pop

      when "li"
        val = @textstack.pop
        while @structs.last != @xostack.last
          val = @structs.pop
          @xostack.last << val
        end
        @xostack.last << val  if val.kind_of? String

      when "dt"
        # skip

      when "dd"
        val = @textstack.pop
        key = @textstack.pop

        val = @structs.pop  if @structs.last != @xostack.last
        @xostack.last[key] = val

      end
    elsif res.text?
      unless @tags.empty? || CONTAINER_TAGS.include?(@tags.last)
        @textstack.last << res[0]
      end
    end
  end

  self
end

- (Object) push(struct) (private)



265
266
267
268
269
270
271
272
273
274
275
# File 'lib/xoxo.rb', line 265

def push(struct)
  if struct == {} && @structs.last.kind_of?(Hash) &&
      @structs.last.has_key?('url') &&
      @structs.last != @xostack.last
    # put back the <a>-made one for extra def's
    @xostack << @structs.last
  else
    @structs << struct
    @xostack << @structs.last
  end
end