Class: Rosette::Core::CommitProcessor

Inherits:
Object
  • Object
show all
Defined in:
lib/rosette/core/extractor/commit_processor.rb

Overview

Extracts phrases from a git commit. Should be thread-safe.

Examples:

processor = CommitProcessor.new(configuration)
processor.process_each_phrase('my_repo', 'master') do |phrase|
  puts phrase.key
end

Instance Attribute Summary collapse

Instance Method Summary collapse

Constructor Details

#initialize(config, error_reporter = NilErrorReporter.instance) ⇒ CommitProcessor

Creates a new processor.

Parameters:

  • config (Configurator)

    The Rosette config to use.

  • error_reporter (ErrorReporter) (defaults to: NilErrorReporter.instance)

    The error reporter to report syntax errors, etc to.


29
30
31
32
# File 'lib/rosette/core/extractor/commit_processor.rb', line 29

def initialize(config, error_reporter = NilErrorReporter.instance)
  @config = config
  @error_reporter = error_reporter
end

Instance Attribute Details

#configConfigurator (readonly)

Returns the Rosette config to use.

Returns:


21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
# File 'lib/rosette/core/extractor/commit_processor.rb', line 21

class CommitProcessor
  attr_reader :config, :error_reporter

  # Creates a new processor.
  #
  # @param [Configurator] config The Rosette config to use.
  # @param [ErrorReporter] error_reporter The error reporter to report
  #   syntax errors, etc to.
  def initialize(config, error_reporter = NilErrorReporter.instance)
    @config = config
    @error_reporter = error_reporter
  end

  # Extracts translatable phrases from the given ref and yields them
  # sequentially to the given block. If no block is given, this method
  # returns an +Enumerator+.
  #
  # @param [String] repo_name The name of the repository to extract
  #   translatable phrases from. Must be configured in +config+.
  # @param [String] commit_ref The git ref or commit id to extract
  #   translatable phrases from.
  # @raise [Java::OrgEclipseJgitErrors::MissingObjectException]
  # @return [void, Enumerator] either nothing if a block is given or
  #   an instance of +Enumerator+ if no block is given.
  # @yield [phrase] a single extracted phrase.
  # @yieldparam phrase [Phrase]
  def process_each_phrase(repo_name, commit_ref)
    if block_given?
      repo_config = config.get_repo(repo_name)
      rev_walk = RevWalk.new(repo_config.repo.jgit_repo)
      diff_finder = DiffFinder.new(repo_config.repo.jgit_repo, rev_walk)
      commit = repo_config.repo.get_rev_commit(commit_ref, rev_walk)

      diff_finder.diff_with_parents(commit).each_pair do |_, diff_entries|
        diff_entries.each do |diff_entry|
          if diff_entry.getNewPath != '/dev/null'
            process_diff_entry(diff_entry, repo_config, commit) do |phrase|
              yield phrase
            end
          end
        end
      end
    else
      to_enum(__method__, repo_name, commit_ref)
    end
  end

  protected

  def process_diff_entry(diff_entry, repo_config, commit)
    repo_config.get_extractor_configs(diff_entry.getNewPath).each do |extractor_config|
      source_code = read_object_from_entry(diff_entry, repo_config, extractor_config)
      line_numbers_to_author = repo_config.repo.blame(diff_entry.getNewPath, commit.getId.name)

      begin
        extractor_config.extractor.extract_each_from(source_code) do |phrase, line_number|
          phrase.file = diff_entry.getNewPath
          phrase.commit_id = commit.getId.name

          if extractor_config.extractor.supports_line_numbers?
            if author_identity = line_numbers_to_author[line_number - 1]
              phrase.author_name = author_identity.getName
              phrase.author_email = author_identity.getEmailAddress
              phrase.line_number = line_number
            end
          end

          yield phrase
        end
      rescue SyntaxError => e
        error_reporter.report_error(
          ExtractionSyntaxError.new(
            e.message, e.original_exception, e.language,
            diff_entry.getNewPath, commit.getId.name
          )
        )
      end
    end
  end

  def read_object_from_entry(diff_entry, repo_config, extractor_config)
    object_reader = repo_config.repo.jgit_repo.newObjectReader
    bytes = object_reader.open(diff_entry.getNewId.toObjectId).getBytes
    Java::JavaLang::String.new(bytes, extractor_config.encoding.to_s).to_s
  end
end

#error_reporterErrorReporter (readonly)

Returns the error reporter to report syntax errors, etc to.

Returns:

  • (ErrorReporter)

    the error reporter to report syntax errors, etc to.


21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
# File 'lib/rosette/core/extractor/commit_processor.rb', line 21

class CommitProcessor
  attr_reader :config, :error_reporter

  # Creates a new processor.
  #
  # @param [Configurator] config The Rosette config to use.
  # @param [ErrorReporter] error_reporter The error reporter to report
  #   syntax errors, etc to.
  def initialize(config, error_reporter = NilErrorReporter.instance)
    @config = config
    @error_reporter = error_reporter
  end

  # Extracts translatable phrases from the given ref and yields them
  # sequentially to the given block. If no block is given, this method
  # returns an +Enumerator+.
  #
  # @param [String] repo_name The name of the repository to extract
  #   translatable phrases from. Must be configured in +config+.
  # @param [String] commit_ref The git ref or commit id to extract
  #   translatable phrases from.
  # @raise [Java::OrgEclipseJgitErrors::MissingObjectException]
  # @return [void, Enumerator] either nothing if a block is given or
  #   an instance of +Enumerator+ if no block is given.
  # @yield [phrase] a single extracted phrase.
  # @yieldparam phrase [Phrase]
  def process_each_phrase(repo_name, commit_ref)
    if block_given?
      repo_config = config.get_repo(repo_name)
      rev_walk = RevWalk.new(repo_config.repo.jgit_repo)
      diff_finder = DiffFinder.new(repo_config.repo.jgit_repo, rev_walk)
      commit = repo_config.repo.get_rev_commit(commit_ref, rev_walk)

      diff_finder.diff_with_parents(commit).each_pair do |_, diff_entries|
        diff_entries.each do |diff_entry|
          if diff_entry.getNewPath != '/dev/null'
            process_diff_entry(diff_entry, repo_config, commit) do |phrase|
              yield phrase
            end
          end
        end
      end
    else
      to_enum(__method__, repo_name, commit_ref)
    end
  end

  protected

  def process_diff_entry(diff_entry, repo_config, commit)
    repo_config.get_extractor_configs(diff_entry.getNewPath).each do |extractor_config|
      source_code = read_object_from_entry(diff_entry, repo_config, extractor_config)
      line_numbers_to_author = repo_config.repo.blame(diff_entry.getNewPath, commit.getId.name)

      begin
        extractor_config.extractor.extract_each_from(source_code) do |phrase, line_number|
          phrase.file = diff_entry.getNewPath
          phrase.commit_id = commit.getId.name

          if extractor_config.extractor.supports_line_numbers?
            if author_identity = line_numbers_to_author[line_number - 1]
              phrase.author_name = author_identity.getName
              phrase.author_email = author_identity.getEmailAddress
              phrase.line_number = line_number
            end
          end

          yield phrase
        end
      rescue SyntaxError => e
        error_reporter.report_error(
          ExtractionSyntaxError.new(
            e.message, e.original_exception, e.language,
            diff_entry.getNewPath, commit.getId.name
          )
        )
      end
    end
  end

  def read_object_from_entry(diff_entry, repo_config, extractor_config)
    object_reader = repo_config.repo.jgit_repo.newObjectReader
    bytes = object_reader.open(diff_entry.getNewId.toObjectId).getBytes
    Java::JavaLang::String.new(bytes, extractor_config.encoding.to_s).to_s
  end
end

Instance Method Details

#process_each_phrase(repo_name, commit_ref) {|phrase| ... } ⇒ void, Enumerator

Extracts translatable phrases from the given ref and yields them sequentially to the given block. If no block is given, this method returns an Enumerator.

Parameters:

  • repo_name (String)

    The name of the repository to extract translatable phrases from. Must be configured in config.

  • commit_ref (String)

    The git ref or commit id to extract translatable phrases from.

Yields:

  • (phrase)

    a single extracted phrase.

Yield Parameters:

Returns:

  • (void, Enumerator)

    either nothing if a block is given or an instance of Enumerator if no block is given.

Raises:

  • (Java::OrgEclipseJgitErrors::MissingObjectException)

47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
# File 'lib/rosette/core/extractor/commit_processor.rb', line 47

def process_each_phrase(repo_name, commit_ref)
  if block_given?
    repo_config = config.get_repo(repo_name)
    rev_walk = RevWalk.new(repo_config.repo.jgit_repo)
    diff_finder = DiffFinder.new(repo_config.repo.jgit_repo, rev_walk)
    commit = repo_config.repo.get_rev_commit(commit_ref, rev_walk)

    diff_finder.diff_with_parents(commit).each_pair do |_, diff_entries|
      diff_entries.each do |diff_entry|
        if diff_entry.getNewPath != '/dev/null'
          process_diff_entry(diff_entry, repo_config, commit) do |phrase|
            yield phrase
          end
        end
      end
    end
  else
    to_enum(__method__, repo_name, commit_ref)
  end
end