• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

pulibrary / pdc_describe / 1aaf6302-d8cf-4943-bb96-5e86951c32a3

pending completion
1aaf6302-d8cf-4943-bb96-5e86951c32a3

Pull #1079

circleci

Bess Sadler
Nil safe doi gsub
Pull Request #1079: Nil safe collection title

2 of 2 new or added lines in 1 file covered. (100.0%)

1777 of 2063 relevant lines covered (86.14%)

100.37 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

28.06
/app/services/s3_query_service.rb
1
# frozen_string_literal: true
2

3
require "aws-sdk-s3"
1✔
4

5
# A service to query an S3 bucket for information about a given data set
6
# rubocop:disable Metrics/ClassLength
7
class S3QueryService
1✔
8
  attr_reader :model
1✔
9

10
  def self.configuration
1✔
11
    Rails.configuration.s3
4✔
12
  end
13

14
  def self.pre_curation_config
1✔
15
    configuration.pre_curation
4✔
16
  end
17

18
  def self.post_curation_config
1✔
19
    configuration.post_curation
×
20
  end
21

22
  ##
23
  # @param [Work] model
24
  # @param [Boolean] pre_curation
25
  # @example S3QueryService.new(Work.find(1), true)
26
  def initialize(model, pre_curation = true)
1✔
27
    @model = model
×
28
    @doi = model.doi
×
29
    @pre_curation = pre_curation
×
30
  end
31

32
  def config
1✔
33
    return self.class.post_curation_config if post_curation?
×
34

35
    self.class.pre_curation_config
×
36
  end
37

38
  def pre_curation?
1✔
39
    @pre_curation
×
40
  end
41

42
  def post_curation?
1✔
43
    !pre_curation?
×
44
  end
45

46
  ##
47
  # The name of the bucket this class is configured to use.
48
  # See config/s3.yml for configuration file.
49
  def bucket_name
1✔
50
    config.fetch(:bucket, nil)
×
51
  end
52

53
  def region
1✔
54
    config.fetch(:region, nil)
×
55
  end
56

57
  ##
58
  # The S3 prefix for this object, i.e., the address within the S3 bucket,
59
  # which is based on the DOI
60
  def prefix
1✔
61
    "#{@doi}/#{model.id}/"
×
62
  end
63

64
  ##
65
  # Construct an S3 address for this data set
66
  def s3_address
1✔
67
    "s3://#{bucket_name}/#{prefix}"
×
68
  end
69

70
  ##
71
  # Public signed URL to fetch this file from the S3 (valid for a limited time)
72
  def file_url(key)
1✔
73
    signer = Aws::S3::Presigner.new(client: client)
×
74
    signer.presigned_url(:get_object, bucket: bucket_name, key: key)
×
75
  end
76

77
  # There is probably a better way to fetch the current ActiveStorage configuration but we have
78
  # not found it.
79
  def active_storage_configuration
1✔
80
    Rails.configuration.active_storage.service_configurations[Rails.configuration.active_storage.service.to_s]
×
81
  end
82

83
  def access_key_id
1✔
84
    active_storage_configuration["access_key_id"]
×
85
  end
86

87
  def secret_access_key
1✔
88
    active_storage_configuration["secret_access_key"]
×
89
  end
90

91
  def credentials
1✔
92
    @credentials ||= Aws::Credentials.new(access_key_id, secret_access_key)
×
93
  end
94

95
  def client
1✔
96
    @client ||= Aws::S3::Client.new(region: region, credentials: credentials)
×
97
  end
98

99
  def get_s3_object(key:)
1✔
100
    response = client.get_object({
×
101
                                   bucket: bucket_name,
102
                                   key: key
103
                                 })
104
    object = response.to_h
×
105
    return if object.empty?
×
106

107
    object
×
108
  end
109

110
  def find_s3_file(filename:)
1✔
111
    s3_object_key = "#{prefix}#{filename}"
×
112

113
    object = get_s3_object(key: s3_object_key)
×
114
    return if object.nil?
×
115

116
    S3File.new(work: model, filename: s3_object_key, last_modified: object[:last_modified], size: object[:content_length], checksum: object[:etag])
×
117
  end
118

119
  # Retrieve the S3 resources uploaded to the S3 Bucket
120
  # @return [Array<S3File>]
121
  def client_s3_files(reload: false, bucket_name: self.bucket_name, prefix: self.prefix)
1✔
122
    @client_s3_files = nil if reload # force a reload
×
123
    @client_s3_files ||= begin
×
124
      start = Time.zone.now
×
125
      resp = client.list_objects_v2({ bucket: bucket_name, max_keys: 1000, prefix: prefix })
×
126
      resp_hash = resp.to_h
×
127
      objects = parse_objects(resp_hash)
×
128
      objects += parse_continuation(resp_hash)
×
129
      elapsed = Time.zone.now - start
×
130
      Rails.logger.info("Loading S3 objects. Bucket: #{bucket_name}. Prefix: #{prefix}. Elapsed: #{elapsed} seconds")
×
131
      objects
×
132
    end
133
  end
134

135
  def file_count
1✔
136
    client_s3_files.count
×
137
  end
138

139
  ##
140
  # Query the S3 bucket for what we know about the doi
141
  # For docs see:
142
  # * https://docs.aws.amazon.com/sdk-for-ruby/v3/api/Aws/S3/Client.html#list_objects_v2-instance_method
143
  # * https://docs.aws.amazon.com/sdk-for-ruby/v3/api/Aws/S3/Client.html#get_object_attributes-instance_method
144
  # @return Hash with two properties {objects: [<S3File>], ok: Bool}
145
  #   objects is an Array of S3File objects
146
  #   ok is false if there is an error connecting to S3. Otherwise true.
147
  def data_profile
1✔
148
    { objects: client_s3_files, ok: true }
×
149
  rescue => ex
150
    Rails.logger.error("Error querying S3. Bucket: #{bucket_name}. DOI: #{@doi}. Exception: #{ex.message}")
×
151

152
    { objects: [], ok: false }
×
153
  end
154

155
  ##
156
  # Copies the existing files from the pre-curation bucket to the post-curation bucket.
157
  # Notice that the copy process happens at AWS (i.e. the files are not downloaded and re-uploaded).
158
  # Returns an array with the files that were copied.
159
  def publish_files
1✔
160
    source_bucket = S3QueryService.pre_curation_config[:bucket]
×
161
    target_bucket = S3QueryService.post_curation_config[:bucket]
×
162
    files = client_s3_files(reload: true, bucket_name: source_bucket)
×
163

164
    files.each do |file|
×
165
      copy_file(source_key: "/#{source_bucket}/#{file.key}", target_bucket: target_bucket,
×
166
                target_key: file.key, size: file.size)
167
    end
168

169
    error_files = check_files(target_bucket, files)
×
170

171
    delete_files_and_directory(files) if error_files.empty?
×
172
    error_files
×
173
  end
174

175
  def copy_file(source_key:, target_bucket:, target_key:, size:)
1✔
176
    Rails.logger.info("Copying #{source_key} to #{target_bucket}/#{target_key}")
×
177
    multi = client.create_multipart_upload(bucket: target_bucket, key: target_key)
×
178
    part_size = 5_368_709_120 # 5GB is the maximum
×
179
    part_num = 0
×
180
    start_byte = 0
×
181
    parts = []
×
182
    while start_byte < size
×
183
      part_num += 1
×
184
      end_byte = [start_byte + part_size, size].min - 1
×
185
      resp = client.upload_part_copy(bucket: target_bucket, copy_source: source_key, key: multi.key, part_number: part_num,
×
186
                                     upload_id: multi.upload_id, copy_source_range: "bytes=#{start_byte}-#{end_byte}")
187
      parts << { etag: resp.copy_part_result.etag, part_number: part_num }
×
188
      start_byte = end_byte + 1
×
189
    end
190
    client.complete_multipart_upload(bucket: target_bucket, key: target_key, upload_id: multi.upload_id, multipart_upload: { parts: parts })
×
191
  end
192

193
  def delete_s3_object(s3_file_key)
1✔
194
    resp = client.delete_object({ bucket: bucket_name, key: s3_file_key })
×
195
    resp.to_h
×
196
  end
197

198
  def create_directory
1✔
199
    client.put_object({ bucket: bucket_name, key: prefix, content_length: 0 })
×
200
  end
201

202
  def upload_file(io:, filename:)
1✔
203
    # upload file from io in a single request, may not exceed 5GB
204
    md5_digest = md5(io: io)
×
205
    key = "#{prefix}#{filename}"
×
206
    client.put_object(bucket: bucket_name, key: key, body: io, content_md5: md5_digest)
×
207
    key
×
208
  rescue Aws::S3::Errors::SignatureDoesNotMatch => e
209
    Honeybadger.notify("Error Uploading file #{filename} for object: #{s3_address} Signature did not match! error: #{e}")
×
210
    false
×
211
  end
212

213
  private
1✔
214

215
    def model_uploads
1✔
216
      if pre_curation?
×
217
        client_s3_files
×
218
      else
219
        []
×
220
      end
221
    end
222

223
    def parse_objects(resp)
1✔
224
      objects = []
×
225
      resp_hash = resp.to_h
×
226
      response_objects = resp_hash[:contents]
×
227
      response_objects&.each do |object|
×
228
        next if object[:size] == 0 # ignore directories whose size is zero
×
229
        s3_file = S3File.new(work: model, filename: object[:key], last_modified: object[:last_modified], size: object[:size], checksum: object[:etag])
×
230
        objects << s3_file
×
231
      end
232
      objects
×
233
    end
234

235
    def parse_continuation(resp_hash)
1✔
236
      objects = []
×
237
      while resp_hash[:is_truncated]
×
238
        token = resp_hash[:next_continuation_token]
×
239
        resp = client.list_objects_v2({ bucket: bucket_name, max_keys: 1000, prefix: prefix, continuation_token: token })
×
240
        resp_hash = resp.to_h
×
241
        objects += parse_objects(resp_hash)
×
242
      end
243
      objects
×
244
    end
245

246
    def check_files(target_bucket, files)
1✔
247
      error_files = []
×
248
      files.each do |file|
×
249
        error_files << file unless client.head_object({ bucket: target_bucket, key: file.key })
×
250
      end
251
      error_files
×
252
    end
253

254
    def delete_files_and_directory(files)
1✔
255
      # ...and delete them from the pre-curation bucket.
256
      files.each do |s3_file|
×
257
        delete_s3_object(s3_file.key)
×
258
      end
259
      delete_s3_object(model.s3_object_key)
×
260
    end
261

262
    def md5(io:)
1✔
263
      md5 = Digest::MD5.new
×
264
      io.each(10_000) { |block| md5.update block }
×
265
      io.rewind
×
266
      md5.base64digest
×
267
    end
268
end
269
# rubocop:enable Metrics/ClassLength
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2025 Coveralls, Inc