• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

pulibrary / pdc_describe / 4e4e59fc-9df4-4838-9fd4-6c7ea33cdb7c

07 Apr 2025 06:36PM UTC coverage: 1.283% (-94.6%) from 95.862%
4e4e59fc-9df4-4838-9fd4-6c7ea33cdb7c

Pull #1994

circleci

hectorcorrea
Switched to use the autocomplete that we aleady use for ROR. Integrated it with the existing logic for creators
Pull Request #1994: Started adding auto complete to contributors

0 of 46 new or added lines in 2 files covered. (0.0%)

4806 existing lines in 74 files now uncovered.

65 of 5065 relevant lines covered (1.28%)

0.01 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

0.0
/app/services/s3_query_service.rb
1
# frozen_string_literal: true
2

UNCOV
3
require "aws-sdk-s3"
×
4

5
# A service to query an S3 bucket for information about a given data set
6
# rubocop:disable Metrics/ClassLength
UNCOV
7
class S3QueryService
×
UNCOV
8
  attr_reader :model
×
9

UNCOV
10
  PRECURATION = "precuration"
×
UNCOV
11
  POSTCURATION = "postcuration"
×
UNCOV
12
  PRESERVATION = "preservation"
×
13

UNCOV
14
  def self.configuration
×
UNCOV
15
    Rails.configuration.s3
×
UNCOV
16
  end
×
17

UNCOV
18
  def self.pre_curation_config
×
UNCOV
19
    configuration.pre_curation
×
UNCOV
20
  end
×
21

UNCOV
22
  def self.post_curation_config
×
UNCOV
23
    configuration.post_curation
×
UNCOV
24
  end
×
25

UNCOV
26
  def self.preservation_config
×
27
    configuration.preservation
×
UNCOV
28
  end
×
29

UNCOV
30
  attr_reader :part_size, :last_response
×
31

32
  ##
33
  # @param [Work] model
34
  # @param [String] mode Valid values are "precuration", "postcuration", "preservation".
35
  #                      This value controlls the AWS S3 bucket used to access the files.
36
  # @example S3QueryService.new(Work.find(1), "precuration")
UNCOV
37
  def initialize(model, mode = "precuration")
×
UNCOV
38
    @model = model
×
UNCOV
39
    @doi = model.doi
×
UNCOV
40
    @mode = mode
×
UNCOV
41
    @part_size = 5_368_709_120 # 5GB is the maximum part size for AWS
×
UNCOV
42
    @last_response = nil
×
UNCOV
43
    @s3_responses = {}
×
UNCOV
44
  end
×
45

UNCOV
46
  def config
×
UNCOV
47
    if @mode == PRESERVATION
×
48
      self.class.preservation_config
×
UNCOV
49
    elsif @mode == POSTCURATION
×
UNCOV
50
      self.class.post_curation_config
×
UNCOV
51
    elsif @mode == PRECURATION
×
UNCOV
52
      self.class.pre_curation_config
×
UNCOV
53
    else
×
54
      raise ArgumentError, "Invalid mode value: #{@mode}"
×
UNCOV
55
    end
×
UNCOV
56
  end
×
57

UNCOV
58
  def pre_curation?
×
59
    @mode == PRECURATION
×
UNCOV
60
  end
×
61

UNCOV
62
  def post_curation?
×
63
    @mode == POSTCURATION
×
UNCOV
64
  end
×
65

66
  ##
67
  # The name of the bucket this class is configured to use.
68
  # See config/s3.yml for configuration file.
UNCOV
69
  def bucket_name
×
UNCOV
70
    config.fetch(:bucket, nil)
×
UNCOV
71
  end
×
72

UNCOV
73
  def region
×
UNCOV
74
    config.fetch(:region, nil)
×
UNCOV
75
  end
×
76

77
  ##
78
  # The S3 prefix for this object, i.e., the address within the S3 bucket,
79
  # which is based on the DOI
UNCOV
80
  def prefix
×
UNCOV
81
    "#{@doi}/#{model.id}/"
×
UNCOV
82
  end
×
83

84
  ##
85
  # Construct an S3 address for this data set
UNCOV
86
  def s3_address
×
UNCOV
87
    "s3://#{bucket_name}/#{prefix}"
×
UNCOV
88
  end
×
89

90
  ##
91
  # Public signed URL to fetch this file from the S3 (valid for a limited time)
UNCOV
92
  def file_url(key)
×
UNCOV
93
    signer = Aws::S3::Presigner.new(client:)
×
UNCOV
94
    signer.presigned_url(:get_object, bucket: bucket_name, key:)
×
UNCOV
95
  end
×
96

UNCOV
97
  def access_key_id
×
UNCOV
98
    S3QueryService.configuration["access_key_id"]
×
UNCOV
99
  end
×
100

UNCOV
101
  def secret_access_key
×
UNCOV
102
    S3QueryService.configuration["secret_access_key"]
×
UNCOV
103
  end
×
104

UNCOV
105
  def credentials
×
UNCOV
106
    @credentials ||= Aws::Credentials.new(access_key_id, secret_access_key)
×
UNCOV
107
  end
×
108

UNCOV
109
  def client
×
UNCOV
110
    @client ||= Aws::S3::Client.new(region:, credentials:)
×
UNCOV
111
  end
×
112

113
  # required, accepts ETag, Checksum, ObjectParts, StorageClass, ObjectSize
UNCOV
114
  def self.object_attributes
×
UNCOV
115
    [
×
UNCOV
116
      "ETag",
×
UNCOV
117
      "Checksum",
×
UNCOV
118
      "ObjectParts",
×
UNCOV
119
      "StorageClass",
×
UNCOV
120
      "ObjectSize"
×
UNCOV
121
    ]
×
UNCOV
122
  end
×
123

UNCOV
124
  def get_s3_object_attributes(key:)
×
UNCOV
125
    response = client.get_object_attributes({
×
UNCOV
126
                                              bucket: bucket_name,
×
UNCOV
127
                                              key:,
×
UNCOV
128
                                              object_attributes: self.class.object_attributes
×
UNCOV
129
                                            })
×
UNCOV
130
    response.to_h
×
UNCOV
131
  end
×
132

UNCOV
133
  def get_s3_object(key:)
×
UNCOV
134
    response = client.get_object({
×
UNCOV
135
                                   bucket: bucket_name,
×
UNCOV
136
                                   key:
×
UNCOV
137
                                 })
×
UNCOV
138
    object = response.to_h
×
UNCOV
139
    return if object.empty?
×
140

UNCOV
141
    object
×
UNCOV
142
  rescue Aws::Errors::ServiceError => aws_service_error
×
UNCOV
143
    message = "An error was encountered when requesting the AWS S3 Object #{key}: #{aws_service_error}"
×
UNCOV
144
    Rails.logger.error(message)
×
UNCOV
145
    raise aws_service_error
×
UNCOV
146
  end
×
147

UNCOV
148
  def build_s3_object_key(filename:)
×
UNCOV
149
    "#{prefix}#{filename}"
×
UNCOV
150
  end
×
151

UNCOV
152
  def find_s3_file(filename:)
×
UNCOV
153
    s3_object_key = build_s3_object_key(filename:)
×
154

UNCOV
155
    object = get_s3_object_attributes(key: s3_object_key)
×
UNCOV
156
    return if object.nil?
×
157

UNCOV
158
    S3File.new(work: model, filename: s3_object_key, last_modified: object[:last_modified], size: object[:object_size], checksum: object[:etag])
×
UNCOV
159
  end
×
160

161
  # Retrieve the S3 resources uploaded to the S3 Bucket
162
  # @return [Array<S3File>]
UNCOV
163
  def client_s3_files(reload: false, bucket_name: self.bucket_name, prefix: self.prefix)
×
UNCOV
164
    if reload # force a reload
×
UNCOV
165
      @client_s3_files = nil
×
UNCOV
166
      clear_s3_responses(bucket_name:, prefix:)
×
UNCOV
167
    end
×
UNCOV
168
    @client_s3_files ||= get_s3_objects(bucket_name:, prefix:)
×
UNCOV
169
  end
×
170

UNCOV
171
  def client_s3_empty_files(reload: false, bucket_name: self.bucket_name, prefix: self.prefix)
×
UNCOV
172
    if reload # force a reload
×
UNCOV
173
      @client_s3_empty_files = nil
×
UNCOV
174
      clear_s3_responses(bucket_name:, prefix:)
×
UNCOV
175
    end
×
UNCOV
176
    @client_s3_empty_files ||= begin
×
UNCOV
177
      files_and_directories = get_s3_objects(bucket_name:, prefix:)
×
UNCOV
178
      files_and_directories.select(&:empty?)
×
UNCOV
179
    end
×
UNCOV
180
  end
×
181

UNCOV
182
  def file_count
×
UNCOV
183
    client_s3_files.count
×
UNCOV
184
  rescue Aws::Errors::ServiceError => aws_service_error
×
UNCOV
185
    message = "An error was encountered when requesting AWS S3 Objects from the bucket #{bucket_name} with the prefix #{prefix}: #{aws_service_error}"
×
UNCOV
186
    Rails.logger.error(message)
×
UNCOV
187
    raise aws_service_error
×
UNCOV
188
  end
×
189

190
  ##
191
  # Query the S3 bucket for what we know about the doi
192
  # For docs see:
193
  # * https://docs.aws.amazon.com/sdk-for-ruby/v3/api/Aws/S3/Client.html#list_objects_v2-instance_method
194
  # * https://docs.aws.amazon.com/sdk-for-ruby/v3/api/Aws/S3/Client.html#get_object_attributes-instance_method
195
  # @return Hash with two properties {objects: [<S3File>], ok: Bool}
196
  #   objects is an Array of S3File objects
197
  #   ok is false if there is an error connecting to S3. Otherwise true.
UNCOV
198
  def data_profile
×
UNCOV
199
    { objects: client_s3_files, ok: true }
×
UNCOV
200
  rescue => ex
×
UNCOV
201
    Rails.logger.error("Error querying S3. Bucket: #{bucket_name}. DOI: #{@doi}. Exception: #{ex.message}")
×
202

UNCOV
203
    { objects: [], ok: false }
×
UNCOV
204
  end
×
205

206
  ##
207
  # Copies the existing files from the pre-curation bucket to the post-curation bucket.
208
  # Notice that the copy process happens at AWS (i.e. the files are not downloaded and re-uploaded).
209
  # Returns an array with the files that were copied.
UNCOV
210
  def publish_files(current_user)
×
UNCOV
211
    source_bucket = S3QueryService.pre_curation_config[:bucket]
×
UNCOV
212
    target_bucket = S3QueryService.post_curation_config[:bucket]
×
UNCOV
213
    empty_files = client_s3_empty_files(reload: true, bucket_name: source_bucket)
×
214
    # Do not move the empty files, however, ensure that it is noted that the
215
    #   presence of empty files is specified in the provenance log.
UNCOV
216
    unless empty_files.empty?
×
UNCOV
217
      empty_files.each do |empty_file|
×
UNCOV
218
        message = "Warning: Attempted to publish empty S3 file #{empty_file.filename}."
×
UNCOV
219
        WorkActivity.add_work_activity(model.id, message, current_user.id, activity_type: WorkActivity::SYSTEM)
×
UNCOV
220
      end
×
UNCOV
221
    end
×
222

UNCOV
223
    files = client_s3_files(reload: true, bucket_name: source_bucket)
×
UNCOV
224
    snapshot = ApprovedUploadSnapshot.new(work: model)
×
UNCOV
225
    snapshot.store_files(files, current_user:)
×
UNCOV
226
    snapshot.save
×
UNCOV
227
    files.each do |file|
×
UNCOV
228
      ApprovedFileMoveJob.perform_later(work_id: model.id, source_bucket:, source_key: file.key, target_bucket:,
×
UNCOV
229
                                        target_key: file.key, size: file.size, snapshot_id: snapshot.id)
×
UNCOV
230
    end
×
UNCOV
231
    true
×
UNCOV
232
  end
×
233

UNCOV
234
  def copy_file(source_key:, target_bucket:, target_key:, size:)
×
UNCOV
235
    Rails.logger.info("Copying #{source_key} to #{target_bucket}/#{target_key}")
×
UNCOV
236
    if size > part_size
×
UNCOV
237
      copy_multi_part(source_key:, target_bucket:, target_key:, size:)
×
UNCOV
238
    else
×
UNCOV
239
      client.copy_object(copy_source: source_key.gsub("+", "%2B"), bucket: target_bucket, key: target_key, checksum_algorithm: "SHA256")
×
UNCOV
240
    end
×
UNCOV
241
  rescue Aws::Errors::ServiceError => aws_service_error
×
242
    message = "An error was encountered when requesting to copy AWS S3 Object from #{source_key} to #{target_key} in the bucket #{target_bucket}: #{aws_service_error}"
×
243
    Rails.logger.error(message)
×
244
    raise aws_service_error
×
UNCOV
245
  end
×
246

UNCOV
247
  def copy_multi_part(source_key:, target_bucket:, target_key:, size:)
×
UNCOV
248
    multi = client.create_multipart_upload(bucket: target_bucket, key: target_key, checksum_algorithm: "SHA256")
×
UNCOV
249
    part_num = 0
×
UNCOV
250
    start_byte = 0
×
UNCOV
251
    parts = []
×
UNCOV
252
    while start_byte < size
×
UNCOV
253
      part_num += 1
×
UNCOV
254
      end_byte = [start_byte + part_size, size].min - 1
×
UNCOV
255
      resp = client.upload_part_copy(bucket: target_bucket, copy_source: source_key, key: multi.key, part_number: part_num,
×
UNCOV
256
                                     upload_id: multi.upload_id, copy_source_range: "bytes=#{start_byte}-#{end_byte}")
×
UNCOV
257
      parts << { etag: resp.copy_part_result.etag, part_number: part_num, checksum_sha256: resp.copy_part_result.checksum_sha256 }
×
UNCOV
258
      start_byte = end_byte + 1
×
UNCOV
259
    end
×
UNCOV
260
    client.complete_multipart_upload(bucket: target_bucket, key: target_key, upload_id: multi.upload_id, multipart_upload: { parts: })
×
UNCOV
261
  rescue Aws::Errors::ServiceError => aws_service_error
×
262
    message = "An error was encountered when requesting to multipart copy AWS S3 Object from #{source_key} to #{target_key} in the bucket #{target_bucket}: #{aws_service_error}"
×
263
    Rails.logger.error(message)
×
264
    raise aws_service_error
×
UNCOV
265
  end
×
266

UNCOV
267
  def copy_directory(source_key:, target_bucket:, target_key:)
×
UNCOV
268
    client.copy_object(copy_source: source_key, bucket: target_bucket, key: target_key)
×
UNCOV
269
  rescue Aws::Errors::ServiceError => aws_service_error
×
UNCOV
270
    message = "An error was encountered when requesting to copy the AWS S3 directory Object from #{source_key} to #{target_key} in the bucket #{target_bucket}: #{aws_service_error}"
×
UNCOV
271
    Rails.logger.error(message)
×
UNCOV
272
    raise aws_service_error
×
UNCOV
273
  end
×
274

UNCOV
275
  def delete_s3_object(s3_file_key, bucket: bucket_name)
×
UNCOV
276
    resp = client.delete_object({ bucket:, key: s3_file_key })
×
UNCOV
277
    resp.to_h
×
UNCOV
278
  rescue Aws::Errors::ServiceError => aws_service_error
×
UNCOV
279
    message = "An error was encountered when requesting to delete the AWS S3 Object #{s3_file_key} in the bucket #{bucket_name}: #{aws_service_error}"
×
UNCOV
280
    Rails.logger.error(message)
×
UNCOV
281
    raise aws_service_error
×
UNCOV
282
  end
×
283

UNCOV
284
  def create_directory
×
UNCOV
285
    client.put_object({ bucket: bucket_name, key: prefix, content_length: 0 })
×
UNCOV
286
  rescue Aws::Errors::ServiceError => aws_service_error
×
UNCOV
287
    message = "An error was encountered when requesting to create the AWS S3 directory Object in the bucket #{bucket_name} with the key #{prefix}: #{aws_service_error}"
×
UNCOV
288
    Rails.logger.error(message)
×
UNCOV
289
    raise aws_service_error
×
UNCOV
290
  end
×
291

UNCOV
292
  def upload_file(io:, filename:, size:, md5_digest: nil)
×
293
    # upload file from io in a single request, may not exceed 5GB
UNCOV
294
    key = "#{prefix}#{filename}"
×
UNCOV
295
    if size > part_size
×
UNCOV
296
      upload_multipart_file(target_bucket: bucket_name, target_key: key, size:, io:)
×
UNCOV
297
    else
×
UNCOV
298
      md5_digest ||= md5(io:)
×
UNCOV
299
      @last_response = client.put_object(bucket: bucket_name, key:, body: io, content_md5: md5_digest)
×
UNCOV
300
    end
×
UNCOV
301
    key
×
UNCOV
302
  rescue Aws::S3::Errors::SignatureDoesNotMatch => e
×
UNCOV
303
    Honeybadger.notify("Error Uploading file #{filename} for object: #{s3_address} Signature did not match! error: #{e}")
×
UNCOV
304
    false
×
UNCOV
305
  rescue Aws::Errors::ServiceError => aws_service_error
×
UNCOV
306
    message = "An error was encountered when requesting to create the AWS S3 Object in the bucket #{bucket_name} with the key #{key}: #{aws_service_error}"
×
UNCOV
307
    Rails.logger.error(message)
×
UNCOV
308
    raise aws_service_error
×
UNCOV
309
  end
×
310

UNCOV
311
  def check_file(bucket:, key:)
×
UNCOV
312
    client.head_object({ bucket:, key: })
×
UNCOV
313
  rescue Aws::Errors::ServiceError => aws_service_error
×
314
    message = "An error was encountered when requesting to check the status of the AWS S3 Object in the bucket #{bucket} with the key #{key}: #{aws_service_error}"
×
315
    Rails.logger.error(message)
×
316
    raise aws_service_error
×
UNCOV
317
  end
×
318

UNCOV
319
  def md5(io:)
×
UNCOV
320
    md5 = Digest::MD5.new
×
UNCOV
321
    io.each(10_000) { |block| md5.update block }
×
UNCOV
322
    io.rewind
×
UNCOV
323
    md5.base64digest
×
UNCOV
324
  end
×
325

UNCOV
326
  def count_objects(bucket_name: self.bucket_name, prefix: self.prefix)
×
UNCOV
327
    responses = s3_responses(bucket_name:, prefix:)
×
UNCOV
328
    responses.reduce(0) { |total, resp| total + resp.key_count }
×
UNCOV
329
  end
×
330

UNCOV
331
  private
×
332

UNCOV
333
    def clear_s3_responses(bucket_name:, prefix:)
×
UNCOV
334
      key = "#{bucket_name} #{prefix}"
×
UNCOV
335
      @s3_responses[key] = nil
×
UNCOV
336
    end
×
337

UNCOV
338
    def s3_responses(bucket_name:, prefix:)
×
UNCOV
339
      key = "#{bucket_name} #{prefix}"
×
UNCOV
340
      responses = @s3_responses[key]
×
UNCOV
341
      if responses.nil?
×
UNCOV
342
        resp = client.list_objects_v2({ bucket: bucket_name, max_keys: 1000, prefix: })
×
UNCOV
343
        responses = [resp]
×
UNCOV
344
        while resp.is_truncated
×
UNCOV
345
          resp = client.list_objects_v2({ bucket: bucket_name, max_keys: 1000, prefix:, continuation_token: resp.next_continuation_token })
×
UNCOV
346
          responses << resp
×
UNCOV
347
        end
×
UNCOV
348
        @s3_responses[key] = responses
×
UNCOV
349
      end
×
UNCOV
350
      responses
×
UNCOV
351
    end
×
352

UNCOV
353
    def get_s3_objects(bucket_name:, prefix:)
×
UNCOV
354
      start = Time.zone.now
×
UNCOV
355
      responses = s3_responses(bucket_name:, prefix:)
×
UNCOV
356
      objects = responses.reduce([]) do |all_objects, resp|
×
UNCOV
357
        resp_hash = resp.to_h
×
UNCOV
358
        resp_objects = parse_objects(resp_hash)
×
UNCOV
359
        all_objects + resp_objects
×
UNCOV
360
      end
×
UNCOV
361
      elapsed = Time.zone.now - start
×
UNCOV
362
      Rails.logger.info("Loading S3 objects. Bucket: #{bucket_name}. Prefix: #{prefix}. Elapsed: #{elapsed} seconds")
×
UNCOV
363
      objects
×
UNCOV
364
    end
×
365

UNCOV
366
    def parse_objects(resp)
×
UNCOV
367
      objects = []
×
UNCOV
368
      resp_hash = resp.to_h
×
UNCOV
369
      response_objects = resp_hash[:contents]
×
UNCOV
370
      response_objects&.each do |object|
×
UNCOV
371
        s3_file = S3File.new(work: model, filename: object[:key], last_modified: object[:last_modified], size: object[:size], checksum: object[:etag])
×
UNCOV
372
        objects << s3_file
×
UNCOV
373
      end
×
UNCOV
374
      objects
×
UNCOV
375
    end
×
376

UNCOV
377
    def upload_multipart_file(target_bucket:, target_key:, size:, io:)
×
UNCOV
378
      multi = client.create_multipart_upload(bucket: target_bucket, key: target_key)
×
UNCOV
379
      part_num = 0
×
UNCOV
380
      start_byte = 0
×
UNCOV
381
      parts = []
×
UNCOV
382
      while start_byte < size
×
UNCOV
383
        part_num += 1
×
UNCOV
384
        Tempfile.open("mutlipart-upload") do |file|
×
UNCOV
385
          IO.copy_stream(io, file, part_size)
×
UNCOV
386
          file.rewind
×
UNCOV
387
          checksum = md5(io: file)
×
UNCOV
388
          resp = client.upload_part(body: file, bucket: target_bucket, key: multi.key, part_number: part_num, upload_id: multi.upload_id, content_md5: checksum)
×
UNCOV
389
          parts << { etag: resp.etag, part_number: part_num }
×
UNCOV
390
        end
×
UNCOV
391
        start_byte += part_size
×
UNCOV
392
      end
×
UNCOV
393
      @last_response = client.complete_multipart_upload(bucket: target_bucket, key: target_key, upload_id: multi.upload_id, multipart_upload: { parts: })
×
UNCOV
394
    rescue Aws::Errors::ServiceError => aws_service_error
×
395
      message = "An error was encountered when requesting to multipart upload to AWS S3 Object to #{target_key} in the bucket #{target_bucket}: #{aws_service_error}"
×
396
      Rails.logger.error(message)
×
397
      raise aws_service_error
×
UNCOV
398
    end
×
UNCOV
399
end
×
400
# rubocop:enable Metrics/ClassLength
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2025 Coveralls, Inc