Class: File

Inherits:
Object show all
Defined in:
lib/epitools/core_ext/file.rb

Instance Method Summary collapse

Instance Method Details

#each_line_with_offsetObject

Iterate over each line of the file, yielding the line and the byte offset of the start of the line in the file


149
150
151
152
153
154
155
156
157
158
# File 'lib/epitools/core_ext/file.rb', line 149

def each_line_with_offset
  return to_enum(:each_line_with_offset) unless block_given?

  offset = 0

  each_line do |line|
    yield line, offset
    offset = tell
  end
end

#reverse_each(&block) ⇒ Object

A streaming `reverse_each` implementation. (For large files, it's faster and uses less memory.)


13
14
15
16
17
18
# File 'lib/epitools/core_ext/file.rb', line 13

def reverse_each(&block)
  return to_enum(:reverse_each) unless block_given?

  seek_end
  reverse_each_from_current_pos(&block)
end

#reverse_each_from_current_pos {|fragment| ... } ⇒ Object

Read each line of file backwards (from the current position.)

Yields:

  • (fragment)

54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
# File 'lib/epitools/core_ext/file.rb', line 54

def reverse_each_from_current_pos
  return to_enum(:reverse_each_from_current_pos) unless block_given?

  # read the rest of the current line, in case we started in the middle of a line
  start_pos = pos
  fragment = readline rescue ""
  seek(start_pos)

  while data = reverse_read(4096)
    lines = data.each_line.to_a
    lines.last << fragment unless lines.last[-1] == "\n"

    fragment = lines.first

    lines[1..-1].reverse_each { |line| yield line }
  end

  yield fragment
end

#reverse_read(length, block_aligned = false) ⇒ Object

Read the previous `length` bytes. After the read, `pos` will be at the beginning of the region that you just read. Returns `nil` when the beginning of the file is reached.

If the `block_aligned` argument is `true`, reads will always be aligned to file positions which are multiples of 512 bytes. (This should increase performance slightly.)


27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
# File 'lib/epitools/core_ext/file.rb', line 27

def reverse_read(length, block_aligned=false)
  raise "length must be a multiple of 512" if block_aligned and length % 512 != 0

  end_pos = pos
  return nil if end_pos == 0

  if block_aligned
    misalignment = end_pos % length
    length      += misalignment
  end

  if length >= end_pos # this read will take us to the beginning of the file
    seek(0)
  else
    seek(-length, IO::SEEK_CUR)
  end

  start_pos = pos
  data      = read(end_pos - start_pos)
  seek(start_pos)

  data
end

#reverse_readlineObject

Read the previous line (leaving `pos` at the beginning of the string that was read.)

Raises:


91
92
93
94
95
96
97
98
99
# File 'lib/epitools/core_ext/file.rb', line 91

def reverse_readline
  raise BOFError.new("beginning of file reached") if pos == 0

  seek_backwards_to("\n", 512, -2)
  new_pos = pos
  data = readline
  seek(new_pos)
  data
end

#seek_backwards_to(string, blocksize = 512, rindex_end = -1)) ⇒ Object Also known as: reverse_seek_to

Scan backwards in the file until `string` is found, and set the IO's pos to the first character after the matched string.


126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
# File 'lib/epitools/core_ext/file.rb', line 126

def seek_backwards_to(string, blocksize=512, rindex_end=-1)
  raise "Error: blocksize must be at least as large as the string" if blocksize < string.size

  loop do
    data = reverse_read(blocksize)

    if index = data.rindex(string, rindex_end)
      seek(index+string.size, IO::SEEK_CUR)
      break
    elsif pos == 0
      return nil
    else
      seek(string.size - 1, IO::SEEK_CUR)
    end
  end

  pos
end

#seek_endObject

Seek to `EOF`


77
78
79
# File 'lib/epitools/core_ext/file.rb', line 77

def seek_end
  seek(0, IO::SEEK_END)
end

#seek_startObject

Seek to `BOF`


84
85
86
# File 'lib/epitools/core_ext/file.rb', line 84

def seek_start
  seek(0)
end

#seek_to(string, blocksize = 512) ⇒ Object

Scan through the file until `string` is found, and set the IO's pos to the first character of the matched string.


104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
# File 'lib/epitools/core_ext/file.rb', line 104

def seek_to(string, blocksize=512)
  raise "Error: blocksize must be at least as large as the string" if blocksize < string.size

  loop do
    data = read(blocksize)

    if index = data.index(string)
      seek(-(data.size - index), IO::SEEK_CUR)
      break
    elsif eof?
      return nil
    else
      seek(-(string.size - 1), IO::SEEK_CUR)
    end
  end

  pos
end