Class: MiGA::Project

Inherits:
MiGA
  • Object
show all
Defined in:
lib/miga/project.rb

Overview

MiGA representation of a project.

Constant Summary collapse

@@FOLDERS =

Top-level folders inside a project.

%w[data metadata daemon]
@@DATA_FOLDERS =

Folders for results.

%w[
   01.raw_reads 02.trimmed_reads 03.read_quality 04.trimmed_fasta
   05.assembly 06.cds
   07.annotation 07.annotation/01.function 07.annotation/02.taxonomy
   07.annotation/01.function/01.essential
   07.annotation/01.function/02.ssu
   07.annotation/02.taxonomy/01.mytaxa
   07.annotation/03.qa 07.annotation/03.qa/01.checkm
   07.annotation/03.qa/02.mytaxa_scan
   08.mapping 08.mapping/01.read-ctg 08.mapping/02.read-gene
   09.distances 09.distances/01.haai 09.distances/02.aai
   09.distances/03.ani 09.distances/04.ssu
   10.clades 10.clades/01.find 10.clades/02.ani 10.clades/03.ogs
   10.clades/04.phylogeny 10.clades/04.phylogeny/01.essential
   10.clades/04.phylogeny/02.core 10.clades/05.metadata
@@RESULT_DIRS =
{
  # Distances
  haai_distances: "09.distances/01.haai",
  aai_distances: "09.distances/02.aai",
  ani_distances: "09.distances/03.ani",
  #ssu_distances: "09.distances/04.ssu",
  # Clade identification
  clade_finding: "10.clades/01.find",
  # Clade analysis
  subclades: "10.clades/02.ani",
  ogs: "10.clades/03.ogs",
  ess_phylogeny: "10.clades/04.phylogeny/01.essential",
  core_phylogeny: "10.clades/04.phylogeny/02.core",
  clade_metadata: "10.clades/05.metadata"
}
@@KNOWN_TYPES =
{
  mixed: {
    description: "Mixed collection of genomes, metagenomes, and viromes.",
    single: true, multi: true},
  genomes: {description: "Collection of genomes.",
    single: true, multi: false},
  clade: {description: "Collection of closely-related genomes (ANI <= 90%).",
    single: true, multi: false},
  metagenomes: {description: "Collection of metagenomes and/or viromes.",
    single: false, multi: true}
}
@@DISTANCE_TASKS =
[:haai_distances, :aai_distances, :ani_distances,
:clade_finding]
@@INCLADE_TASKS =
[:subclades, :ogs, :ess_phylogeny, :core_phylogeny,
:clade_metadata]

Constants included from MiGA

CITATION, VERSION, VERSION_DATE, VERSION_NAME

Instance Attribute Summary collapse

Class Method Summary collapse

Instance Method Summary collapse

Methods inherited from MiGA

CITATION, DEBUG, DEBUG_OFF, DEBUG_ON, DEBUG_TRACE_OFF, DEBUG_TRACE_ON, FULL_VERSION, LONG_VERSION, VERSION, VERSION_DATE, initialized?, #result_files_exist?, root_path, tabulate

Constructor Details

#initialize(path, update = false) ⇒ Project

Create a new MiGA::Project at path, if it doesn't exist and update is false, or load an existing one.


108
109
110
111
112
113
# File 'lib/miga/project.rb', line 108

def initialize(path, update=false)
  @datasets = {}
  @path = File.absolute_path(path)
  self.create if update or not Project.exist? self.path
  self.load if self..nil?
end

Instance Attribute Details

#metadataObject (readonly)

Information about the project as MiGA::Metadata.


103
104
105
# File 'lib/miga/project.rb', line 103

def 
  
end

#pathObject (readonly)

Absolute path to the project folder.


99
100
101
# File 'lib/miga/project.rb', line 99

def path
  @path
end

Class Method Details

.DISTANCE_TASKSObject

Project-wide distance estimations.


71
# File 'lib/miga/project.rb', line 71

def self.DISTANCE_TASKS ; @@DISTANCE_TASKS ; end

.exist?(path) ⇒ Boolean

Does the project at path exist?


83
84
85
# File 'lib/miga/project.rb', line 83

def self.exist?(path)
  Dir.exist?(path) and File.exist?(path + "/miga.project.json")
end

.INCLADE_TASKSObject

Project-wide tasks for :clade projects.


77
# File 'lib/miga/project.rb', line 77

def self.INCLADE_TASKS ; @@INCLADE_TASKS ; end

.KNOWN_TYPESObject

Supported types of projects.


56
# File 'lib/miga/project.rb', line 56

def self.KNOWN_TYPES ; @@KNOWN_TYPES ; end

.load(path) ⇒ Object

Load the project at path. Returns MiGA::Project if project exists, nil otherwise.


90
91
92
93
# File 'lib/miga/project.rb', line 90

def self.load(path)
  return nil unless Project.exist? path
  Project.new path
end

.RESULT_DIRSObject

Directories containing the results from project-wide tasks.


37
# File 'lib/miga/project.rb', line 37

def self.RESULT_DIRS ; @@RESULT_DIRS ; end

Instance Method Details

#add_dataset(name) ⇒ Object

Add dataset identified by name and return MiGA::Dataset.


196
197
198
199
200
201
202
203
# File 'lib/miga/project.rb', line 196

def add_dataset(name)
  unless [:datasets].include? name
    MiGA::Dataset.new(self, name)
    [:datasets] << name
    save
  end
  dataset(name)
end

#add_result(name, save = true) ⇒ Object

Add the result identified by Symbol name, and return MiGA::Result. Save the result if save.


261
262
263
264
265
266
267
268
269
# File 'lib/miga/project.rb', line 261

def add_result(name, save=true)
  return nil if @@RESULT_DIRS[name].nil?
  base = "#{path}/data/#{@@RESULT_DIRS[name]}/miga-project"
  return MiGA::Result.load(base + ".json") unless save
  return nil unless result_files_exist?(base, ".done")
  r = send("add_result_#{name}", base)
  r.save
  r
end

#createObject

Create an empty project.


117
118
119
120
121
122
123
124
125
126
127
128
129
130
# File 'lib/miga/project.rb', line 117

def create
  unless MiGA::MiGA.initialized?
    raise "Impossible to create project in uninitialized MiGA."
  end
  dirs = [path] + @@FOLDERS.map{|d| "#{path}/#{d}" } +
    @@DATA_FOLDERS.map{ |d| "#{path}/data/#{d}"}
  dirs.each{ |d| Dir.mkdir(d) unless Dir.exist? d }
   = MiGA::.new(self.path + "/miga.project.json",
    {datasets: [], name: File.basename(path)})
  FileUtils.cp(ENV["MIGA_HOME"] + "/.miga_daemon.json",
    "#{path}/daemon/daemon.json") unless
      File.exist? "#{path}/daemon/daemon.json"
  self.load
end

#dataset(name) ⇒ Object

Returns MiGA::Dataset.


172
173
174
175
176
177
178
# File 'lib/miga/project.rb', line 172

def dataset(name)
  name = name.miga_name
  return nil unless MiGA::Dataset.exist?(self, name)
  @datasets ||= {}
  @datasets[name] ||= MiGA::Dataset.new(self, name)
  @datasets[name]
end

#dataset_namesObject

Returns Array of String (without evaluating dataset objects).


166
167
168
# File 'lib/miga/project.rb', line 166

def dataset_names
  [:datasets]
end

#datasetsObject

Returns Array of MiGA::Dataset.


160
161
162
# File 'lib/miga/project.rb', line 160

def datasets
  [:datasets].map{ |name| dataset(name) }
end

#done_preprocessing?(save = true) ⇒ Boolean

Are all the datasets in the project preprocessed? Save intermediate results if save.


306
307
308
# File 'lib/miga/project.rb', line 306

def done_preprocessing?(save=true)
  datasets.map{|ds| (not ds.is_ref?) or ds.done_preprocessing?(save) }.all?
end

#each_dataset(&blk) ⇒ Object

Iterate through datasets, with one or two variables passed to blk. If one, the dataset MiGA::Dataset object is passed. If two, the name and the dataset object are passed.


184
185
186
187
188
189
190
191
192
# File 'lib/miga/project.rb', line 184

def each_dataset(&blk)
  [:datasets].each do |name|
    if blk.arity == 1
      blk.call(dataset(name))
    else
      blk.call(name, dataset(name))
    end
  end
end

#each_dataset_profile_advance(&blk) ⇒ Object

Call blk passing the result of MiGA::Dataset#profile_advance for each registered dataset.


328
329
330
# File 'lib/miga/project.rb', line 328

def each_dataset_profile_advance(&blk)
  each_dataset { |ds| blk.call(ds.profile_advance) }
end

#import_dataset(ds, method = :hardlink) ⇒ Object

Import the dataset ds, a MiGA::Dataset, using method which is any method supported by File#generic_transfer.


218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
# File 'lib/miga/project.rb', line 218

def import_dataset(ds, method=:hardlink)
  raise "Impossible to import dataset, it already exists: #{ds.name}." if
    MiGA::Dataset.exist?(self, ds.name)
  # Import dataset results
  ds.each_result do |task, result|
    # import result files
    result.each_file do |file|
      File.generic_transfer("#{result.dir}/#{file}",
        "#{path}/data/#{MiGA::Dataset.RESULT_DIRS[task]}/#{file}", method)
    end
    # import result metadata
    %w(json start done).each do |suffix|
      if File.exist? "#{result.dir}/#{ds.name}.#{suffix}"
        File.generic_transfer("#{result.dir}/#{ds.name}.#{suffix}",
          "#{path}/data/#{MiGA::Dataset.RESULT_DIRS[task]}/" +
                     "#{ds.name}.#{suffix}", method)
      end
    end
  end
  # Import dataset metadata
  File.generic_transfer("#{ds.project.path}/metadata/#{ds.name}.json",
    "#{self.path}/metadata/#{ds.name}.json", method)
  # Save dataset
  self.add_dataset(ds.name)
end

#is_clade?Boolean

Is this a clade project?


156
# File 'lib/miga/project.rb', line 156

def is_clade? ; type==:clade ; end

#loadObject

(Re-)load project data and metadata.


141
142
143
144
# File 'lib/miga/project.rb', line 141

def load
   = MiGA::.load "#{path}/miga.project.json"
  raise "Couldn't find project metadata at #{path}" if .nil?
end

#nameObject

Name of the project.


148
# File 'lib/miga/project.rb', line 148

def name ; [:name] ; end

#next_distances(save = true) ⇒ Object

Get the next distances task, saving intermediate results if save. Returns a Symbol.


274
275
276
# File 'lib/miga/project.rb', line 274

def next_distances(save=true)
  @@DISTANCE_TASKS.find{ |t| add_result(t, save).nil? }
end

#next_inclade(save = true) ⇒ Object

Get the next inclade task, saving intermediate results if save. Returns a Symbol.


281
282
283
284
# File 'lib/miga/project.rb', line 281

def next_inclade(save=true)
  return nil unless [:type]==:clade
  @@INCLADE_TASKS.find{ |t| add_result(t, save).nil? }
end

#profile_datasets_advanceObject

Returns a two-dimensional matrix (Array of Array) where the first index corresponds to the dataset, the second index corresponds to the dataset task, and the value corresponds to:

  • 0: Before execution.

  • 1: Done (or not required).

  • 2: To do.


317
318
319
320
321
322
323
# File 'lib/miga/project.rb', line 317

def profile_datasets_advance
  advance = []
  self.each_dataset_profile_advance do |ds_adv|
    advance << ds_adv
  end
  advance
end

#result(name) ⇒ Object

Get result identified by Symbol name, returns MiGA::Result.


246
247
248
249
250
# File 'lib/miga/project.rb', line 246

def result(name)
  return nil if @@RESULT_DIRS[name.to_sym].nil?
  MiGA::Result.load "#{path}/data/" + @@RESULT_DIRS[name.to_sym] + 
    "/miga-project.json"
end

#resultsObject

Get all results, an Array of MiGA::Result.


254
255
256
# File 'lib/miga/project.rb', line 254

def results
  @@RESULT_DIRS.keys.map{ |k| result(k) }.reject{ |r| r.nil? }
end

#saveObject

Save any changes persistently.


134
135
136
137
# File 'lib/miga/project.rb', line 134

def save
  .save
  self.load
end

#typeObject

Type of project.


152
# File 'lib/miga/project.rb', line 152

def type ; [:type] ; end

Unlink dataset identified by name and return MiGA::Dataset.


207
208
209
210
211
212
213
# File 'lib/miga/project.rb', line 207

def unlink_dataset(name)
  d = dataset(name)
  return nil if d.nil?
  self.[:datasets].delete(name)
  save
  d
end

#unregistered_datasetsObject

Find all datasets with (potential) result files but are yet unregistered.


288
289
290
291
292
293
294
295
296
297
298
299
300
301
# File 'lib/miga/project.rb', line 288

def unregistered_datasets
  datasets = []
  MiGA::Dataset.RESULT_DIRS.values.each do |dir|
    Dir.entries("#{path}/data/#{dir}").each do |file|
      next unless
        file =~ %r{
          \.(fa(a|sta|stqc?)?|fna|solexaqa|gff[23]?|done|ess)(\.gz)?$
        }x
      m = /([^\.]+)/.match(file)
      datasets << m[1] unless m.nil? or m[1] == "miga-project"
    end
  end
  datasets.uniq - [:datasets]
end