Class: Rosette::Core::SnapshotFactory

Inherits:
Object
  • Object
show all
Defined in:
lib/rosette/core/snapshots/snapshot_factory.rb

Overview

Takes snapshots of git repos. A snapshot is a simple key/value map (hash) of paths to commit ids. The commit id is the last time the file changed.

Examples:

snapshot = SnapshotFactory.new
  .set_repo_config(repo_config)
  .set_start_commit_id('73cd130a42017d794ffa86ef0d255541d518a7b3')
  .take_snapshot

Instance Attribute Summary collapse

Instance Method Summary collapse

Constructor Details

#initializeSnapshotFactory

Creates a new factory.


36
37
38
# File 'lib/rosette/core/snapshots/snapshot_factory.rb', line 36

def initialize
  reset
end

Instance Attribute Details

#pathsObject (readonly)

Returns the value of attribute paths


33
34
35
# File 'lib/rosette/core/snapshots/snapshot_factory.rb', line 33

def paths
  @paths
end

#repo_configRepo (readonly)


32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
# File 'lib/rosette/core/snapshots/snapshot_factory.rb', line 32

class SnapshotFactory
  attr_reader :repo_config, :start_commit_id, :paths

  # Creates a new factory.
  def initialize
    reset
  end

  # Sets the Rosette repo object to use.
  #
  # @param [Repo] repo The Rosette repo object to use.
  # @return [self]
  def set_repo_config(repo_config)
    @repo_config = repo_config
    self
  end

  # Sets the starting commit id. File changes that occurred more recently
  # than this commit will not be reflected in the snapshot. In other words,
  # this is the commit id to take the snapshot of.
  #
  # @param [String] commit_id The starting commit id.
  # @return [self]
  def set_start_commit_id(commit_id)
    @start_commit_id = commit_id
    self
  end

  # Set the paths that will be included in the snapshot.
  #
  # @param [Array] paths The paths to include in the snapshot.
  # @return [self]
  def set_paths(paths)
    @paths = paths
    self
  end

  # Takes the snapshot.
  #
  # @return [Hash<String, String>] The snapshot hash (path to commit id
  #   pairs).
  def take_snapshot
    build_hash.tap do
      reset
    end
  end

  private

  def build_hash
    repo = repo_config.repo
    rev_walk = RevWalk.new(repo.jgit_repo)
    rev_commit = repo.get_rev_commit(start_commit_id, rev_walk)
    path_set = (make_path_set(rev_commit) + paths).to_a
    num_replacements = 0

    tree_filter = if path_set.size > 0
      path_filter = if repo_config && path_set.empty?
        RepoConfigPathFilter.create(repo_config)
      else
        PathFilterGroup.createFromStrings(path_set)
      end

      AndTreeFilter.create(path_filter, TreeFilter::ANY_DIFF)
    end

    {}.tap do |path_hash|
      tree_walk = TreeWalk.new(repo.jgit_repo)
      rev_walk.markStart(rev_commit)

      while cur_commit = rev_walk.next
        cur_commit_id = cur_commit.getId.name

        tree_walk.reset
        parent_count = cur_commit.getParentCount

        if parent_count == 0
          tree_walk.addTree(EmptyTreeIterator.new)
        else
          parent_count.times do |i|
            tree_walk.addTree(cur_commit.getParent(i).getTree)
          end
        end

        tree_walk.addTree(cur_commit.getTree)
        tree_walk.setFilter(tree_filter)
        tree_walk.setRecursive(true)

        each_file_in(tree_walk) do |walker|
          path = walker.getPathString

          unless path_hash[path]
            path_hash[path] = cur_commit_id
            num_replacements += 1
          end
        end

        if num_replacements > path_hash.size
          break
        end
      end

      rev_walk.dispose
      tree_walk.release
    end
  end

  def make_path_set(rev_commit)
    path_gatherer = make_path_gatherer(rev_commit)

    files = each_file_in(path_gatherer).each_with_object(Set.new) do |walker, ret|
      ret << walker.getPathString
    end

    path_gatherer.release
    files
  end

  def make_path_gatherer(rev_commit)
    TreeWalk.new(repo_config.repo.jgit_repo).tap do |walker|
      walker.addTree(rev_commit.getTree)
      walker.setRecursive(true)

      # explicit paths take precedence over repo config ones
      filter = if paths.size > 0
        PathFilterGroup.createFromStrings(paths)
      elsif repo_config
        RepoConfigPathFilter.create(repo_config)
      end

      walker.setFilter(filter) if filter
    end
  end

  def reset
    @repo_config = nil
    @start_commit_id = nil
    @paths = []
  end

  def each_file_in(tree_walk)
    if block_given?
      while tree_walk.next
        yield tree_walk
      end
    else
      to_enum(__method__, tree_walk)
    end
  end
end

#start_commit_idString (readonly)


32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
# File 'lib/rosette/core/snapshots/snapshot_factory.rb', line 32

class SnapshotFactory
  attr_reader :repo_config, :start_commit_id, :paths

  # Creates a new factory.
  def initialize
    reset
  end

  # Sets the Rosette repo object to use.
  #
  # @param [Repo] repo The Rosette repo object to use.
  # @return [self]
  def set_repo_config(repo_config)
    @repo_config = repo_config
    self
  end

  # Sets the starting commit id. File changes that occurred more recently
  # than this commit will not be reflected in the snapshot. In other words,
  # this is the commit id to take the snapshot of.
  #
  # @param [String] commit_id The starting commit id.
  # @return [self]
  def set_start_commit_id(commit_id)
    @start_commit_id = commit_id
    self
  end

  # Set the paths that will be included in the snapshot.
  #
  # @param [Array] paths The paths to include in the snapshot.
  # @return [self]
  def set_paths(paths)
    @paths = paths
    self
  end

  # Takes the snapshot.
  #
  # @return [Hash<String, String>] The snapshot hash (path to commit id
  #   pairs).
  def take_snapshot
    build_hash.tap do
      reset
    end
  end

  private

  def build_hash
    repo = repo_config.repo
    rev_walk = RevWalk.new(repo.jgit_repo)
    rev_commit = repo.get_rev_commit(start_commit_id, rev_walk)
    path_set = (make_path_set(rev_commit) + paths).to_a
    num_replacements = 0

    tree_filter = if path_set.size > 0
      path_filter = if repo_config && path_set.empty?
        RepoConfigPathFilter.create(repo_config)
      else
        PathFilterGroup.createFromStrings(path_set)
      end

      AndTreeFilter.create(path_filter, TreeFilter::ANY_DIFF)
    end

    {}.tap do |path_hash|
      tree_walk = TreeWalk.new(repo.jgit_repo)
      rev_walk.markStart(rev_commit)

      while cur_commit = rev_walk.next
        cur_commit_id = cur_commit.getId.name

        tree_walk.reset
        parent_count = cur_commit.getParentCount

        if parent_count == 0
          tree_walk.addTree(EmptyTreeIterator.new)
        else
          parent_count.times do |i|
            tree_walk.addTree(cur_commit.getParent(i).getTree)
          end
        end

        tree_walk.addTree(cur_commit.getTree)
        tree_walk.setFilter(tree_filter)
        tree_walk.setRecursive(true)

        each_file_in(tree_walk) do |walker|
          path = walker.getPathString

          unless path_hash[path]
            path_hash[path] = cur_commit_id
            num_replacements += 1
          end
        end

        if num_replacements > path_hash.size
          break
        end
      end

      rev_walk.dispose
      tree_walk.release
    end
  end

  def make_path_set(rev_commit)
    path_gatherer = make_path_gatherer(rev_commit)

    files = each_file_in(path_gatherer).each_with_object(Set.new) do |walker, ret|
      ret << walker.getPathString
    end

    path_gatherer.release
    files
  end

  def make_path_gatherer(rev_commit)
    TreeWalk.new(repo_config.repo.jgit_repo).tap do |walker|
      walker.addTree(rev_commit.getTree)
      walker.setRecursive(true)

      # explicit paths take precedence over repo config ones
      filter = if paths.size > 0
        PathFilterGroup.createFromStrings(paths)
      elsif repo_config
        RepoConfigPathFilter.create(repo_config)
      end

      walker.setFilter(filter) if filter
    end
  end

  def reset
    @repo_config = nil
    @start_commit_id = nil
    @paths = []
  end

  def each_file_in(tree_walk)
    if block_given?
      while tree_walk.next
        yield tree_walk
      end
    else
      to_enum(__method__, tree_walk)
    end
  end
end

Instance Method Details

#set_paths(paths) ⇒ self

Set the paths that will be included in the snapshot.


64
65
66
67
# File 'lib/rosette/core/snapshots/snapshot_factory.rb', line 64

def set_paths(paths)
  @paths = paths
  self
end

#set_repo_config(repo_config) ⇒ self

Sets the Rosette repo object to use.


44
45
46
47
# File 'lib/rosette/core/snapshots/snapshot_factory.rb', line 44

def set_repo_config(repo_config)
  @repo_config = repo_config
  self
end

#set_start_commit_id(commit_id) ⇒ self

Sets the starting commit id. File changes that occurred more recently than this commit will not be reflected in the snapshot. In other words, this is the commit id to take the snapshot of.


55
56
57
58
# File 'lib/rosette/core/snapshots/snapshot_factory.rb', line 55

def set_start_commit_id(commit_id)
  @start_commit_id = commit_id
  self
end

#take_snapshotHash<String, String>

Takes the snapshot.


73
74
75
76
77
# File 'lib/rosette/core/snapshots/snapshot_factory.rb', line 73

def take_snapshot
  build_hash.tap do
    reset
  end
end