Class: PDF::Reader::PageTextReceiver

Inherits:
Object
  • Object
show all
Extended by:
Forwardable
Defined in:
lib/pdf/reader/page_text_receiver.rb

Overview

Builds a UTF-8 string of all the text on a single page by processing all the operaters in a content stream.

Constant Summary

SPACE =
" "

Instance Attribute Summary (collapse)

Instance Method Summary (collapse)

Instance Attribute Details

- (Object) content (readonly)

Returns the value of attribute content



17
18
19
# File 'lib/pdf/reader/page_text_receiver.rb', line 17

def content
  @content
end

- (Object) options (readonly)

Returns the value of attribute options



17
18
19
# File 'lib/pdf/reader/page_text_receiver.rb', line 17

def options
  @options
end

- (Object) state (readonly)

Returns the value of attribute state



17
18
19
# File 'lib/pdf/reader/page_text_receiver.rb', line 17

def state
  @state
end

Instance Method Details

- (Object) invoke_xobject(label)

XObjects



84
85
86
87
88
89
90
91
# File 'lib/pdf/reader/page_text_receiver.rb', line 84

def invoke_xobject(label)
  @state.invoke_xobject(label) do |xobj|
    case xobj
    when PDF::Reader::FormXObject then
      xobj.walk(self)
    end
  end
end

- (Object) move_to_next_line_and_show_text(str)

'



70
71
72
73
# File 'lib/pdf/reader/page_text_receiver.rb', line 70

def move_to_next_line_and_show_text(str) # '
  @state.move_to_start_of_next_line
  show_text(str)
end

- (Object) page=(page)

starting a new page



41
42
43
44
45
46
# File 'lib/pdf/reader/page_text_receiver.rb', line 41

def page=(page)
  @state = PageState.new(page)
  @content = []
  @characters = []
  @mediabox = page.objects.deref(page.attributes[:MediaBox])
end

- (Object) set_spacing_next_line_show_text(aw, ac, string)



75
76
77
78
79
# File 'lib/pdf/reader/page_text_receiver.rb', line 75

def set_spacing_next_line_show_text(aw, ac, string) # "
  @state.set_word_spacing(aw)
  @state.set_character_spacing(ac)
  move_to_next_line_and_show_text(string)
end

- (Object) show_text(string)

Text Showing Operators

record text that is drawn on the page



56
57
58
# File 'lib/pdf/reader/page_text_receiver.rb', line 56

def show_text(string) # Tj (AWAY)
  internal_show_text(string)
end

- (Object) show_text_with_positioning(params)

TJ [(A) 120 (WA) 20 (Y)]



60
61
62
63
64
65
66
67
68
# File 'lib/pdf/reader/page_text_receiver.rb', line 60

def show_text_with_positioning(params) # TJ [(A) 120 (WA) 20 (Y)]
  params.each do |arg|
    if arg.is_a?(String)
      internal_show_text(arg)
    else
      @state.process_glyph_displacement(0, arg, false)
    end
  end
end