• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

pulibrary / pdc_describe / 00dac745-55d3-4397-95bf-de4f70881e93

pending completion
00dac745-55d3-4397-95bf-de4f70881e93

Pull #995

circleci

Hector Correa
Add tests for new logic
Pull Request #995: Handles external user ids

8 of 8 new or added lines in 1 file covered. (100.0%)

936 of 1902 relevant lines covered (49.21%)

9.91 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

31.58
/app/services/s3_query_service.rb
1
# frozen_string_literal: true
2

3
require "aws-sdk-s3"
1✔
4

5
# A service to query an S3 bucket for information about a given data set
6
# rubocop:disable Metrics/ClassLength
7
class S3QueryService
1✔
8
  attr_reader :model
1✔
9

10
  def self.configuration
1✔
11
    Rails.configuration.s3
4✔
12
  end
13

14
  def self.pre_curation_config
1✔
15
    configuration.pre_curation
4✔
16
  end
17

18
  def self.post_curation_config
1✔
19
    configuration.post_curation
×
20
  end
21

22
  ##
23
  # @param [Work] model
24
  # @param [Boolean] pre_curation
25
  # @example S3QueryService.new(Work.find(1), true)
26
  def initialize(model, pre_curation = true)
1✔
27
    @model = model
×
28
    @doi = model.doi
×
29
    @pre_curation = pre_curation
×
30
  end
31

32
  def config
1✔
33
    return self.class.post_curation_config if post_curation?
×
34

35
    self.class.pre_curation_config
×
36
  end
37

38
  def pre_curation?
1✔
39
    @pre_curation
×
40
  end
41

42
  def post_curation?
1✔
43
    !pre_curation?
×
44
  end
45

46
  ##
47
  # The name of the bucket this class is configured to use.
48
  # See config/s3.yml for configuration file.
49
  def bucket_name
1✔
50
    config.fetch(:bucket, nil)
×
51
  end
52

53
  def region
1✔
54
    config.fetch(:region, nil)
×
55
  end
56

57
  ##
58
  # The S3 prefix for this object, i.e., the address within the S3 bucket,
59
  # which is based on the DOI
60
  def prefix
1✔
61
    "#{@doi}/#{model.id}/"
×
62
  end
63

64
  ##
65
  # Construct an S3 address for this data set
66
  def s3_address
1✔
67
    "s3://#{bucket_name}/#{prefix}"
×
68
  end
69

70
  ##
71
  # Public signed URL to fetch this file from the S3 (valid for a limited time)
72
  def file_url(key)
1✔
73
    signer = Aws::S3::Presigner.new(client: client)
×
74
    signer.presigned_url(:get_object, bucket: bucket_name, key: key)
×
75
  end
76

77
  def delete_file(key)
1✔
78
    client.delete_object({ bucket: bucket_name, key: key })
×
79
  end
80

81
  # There is probably a better way to fetch the current ActiveStorage configuration but we have
82
  # not found it.
83
  def active_storage_configuration
1✔
84
    Rails.configuration.active_storage.service_configurations[Rails.configuration.active_storage.service.to_s]
×
85
  end
86

87
  def access_key_id
1✔
88
    active_storage_configuration["access_key_id"]
×
89
  end
90

91
  def secret_access_key
1✔
92
    active_storage_configuration["secret_access_key"]
×
93
  end
94

95
  def credentials
1✔
96
    @credentials ||= Aws::Credentials.new(access_key_id, secret_access_key)
×
97
  end
98

99
  def client
1✔
100
    @client ||= Aws::S3::Client.new(region: region, credentials: credentials)
×
101
  end
102

103
  # Retrieve the S3 resources attached to the Work model
104
  # @return [Array<S3File>]
105
  def model_s3_files
1✔
106
    objects = []
×
107
    return objects if model.nil?
×
108

109
    model_uploads.each do |attachment|
×
110
      s3_file = S3File.new(work: model,
×
111
                           filename: attachment.key,
112
                           last_modified: attachment.created_at,
113
                           size: attachment.byte_size,
114
                           checksum: attachment.checksum)
115
      objects << s3_file
×
116
    end
117

118
    objects
×
119
  end
120

121
  def get_s3_object(key:)
1✔
122
    response = client.get_object({
×
123
                                   bucket: bucket_name,
124
                                   key: key
125
                                 })
126
    object = response.to_h
×
127
    return if object.empty?
×
128

129
    object
×
130
  end
131

132
  def find_s3_file(filename:)
1✔
133
    s3_object_key = "#{prefix}#{filename}"
×
134

135
    object = get_s3_object(key: s3_object_key)
×
136
    return if object.nil?
×
137

138
    S3File.new(work: model, filename: s3_object_key, last_modified: object[:last_modified], size: object[:content_length], checksum: object[:etag])
×
139
  end
140

141
  # Retrieve the S3 resources uploaded to the S3 Bucket
142
  # @return [Array<S3File>]
143
  def client_s3_files(reload: false)
1✔
144
    @client_s3_files = nil if reload # force a reload
×
145
    @client_s3_files ||= begin
×
146
      start = Time.zone.now
×
147
      resp = client.list_objects_v2({ bucket: bucket_name, max_keys: 1000, prefix: prefix })
×
148
      resp_hash = resp.to_h
×
149
      objects = parse_objects(resp_hash)
×
150
      objects += parse_continuation(resp_hash)
×
151
      elapsed = Time.zone.now - start
×
152
      Rails.logger.info("Loading S3 objects. Bucket: #{bucket_name}. Prefix: #{prefix}. Elapsed: #{elapsed} seconds")
×
153
      objects
×
154
    end
155
  end
156

157
  def file_count
1✔
158
    client_s3_files.count
×
159
  end
160

161
  # TODO: delete this (or client_s3_files)
162
  def s3_files
1✔
163
    client_s3_files
×
164
  end
165

166
  ##
167
  # Query the S3 bucket for what we know about the doi
168
  # For docs see:
169
  # * https://docs.aws.amazon.com/sdk-for-ruby/v3/api/Aws/S3/Client.html#list_objects_v2-instance_method
170
  # * https://docs.aws.amazon.com/sdk-for-ruby/v3/api/Aws/S3/Client.html#get_object_attributes-instance_method
171
  # @return Hash with two properties {objects: [<S3File>], ok: Bool}
172
  #   objects is an Array of S3File objects
173
  #   ok is false if there is an error connecting to S3. Otherwise true.
174
  def data_profile
1✔
175
    { objects: s3_files, ok: true }
×
176
  rescue => ex
177
    Rails.logger.error("Error querying S3. Bucket: #{bucket_name}. DOI: #{@doi}. Exception: #{ex.message}")
×
178

179
    { objects: [], ok: false }
×
180
  end
181

182
  ##
183
  # Copies the existing files from the pre-curation bucket to the post-curation bucket.
184
  # Notice that the copy process happens at AWS (i.e. the files are not downloaded and re-uploaded).
185
  # Returns an array with the files that were copied.
186
  def publish_files
1✔
187
    files = []
×
188
    source_bucket = S3QueryService.pre_curation_config[:bucket]
×
189
    target_bucket = S3QueryService.post_curation_config[:bucket]
×
190
    model.pre_curation_uploads.each do |file|
×
191
      params = {
192
        copy_source: "/#{source_bucket}/#{file.key}",
×
193
        bucket: target_bucket,
194
        key: file.key
195
      }
196
      Rails.logger.info("Copying #{params[:copy_source]} to #{params[:bucket]}/#{params[:key]}")
×
197
      client.copy_object(params)
×
198
      files << file
×
199
    end
200
    files
×
201
  end
202

203
  def delete_s3_object(s3_file_key)
1✔
204
    resp = client.delete_object({ bucket: bucket_name, key: s3_file_key })
×
205
    resp.to_h
×
206
  end
207

208
  private
1✔
209

210
    def model_uploads
1✔
211
      if pre_curation?
×
212
        client_s3_files
×
213
      else
214
        []
×
215
      end
216
    end
217

218
    def parse_objects(resp)
1✔
219
      objects = []
×
220
      resp_hash = resp.to_h
×
221
      response_objects = resp_hash[:contents]
×
222
      response_objects&.each do |object|
×
223
        next if object[:size] == 0 # ignore directories whose size is zero
×
224
        s3_file = S3File.new(work: model, filename: object[:key], last_modified: object[:last_modified], size: object[:size], checksum: object[:etag])
×
225
        objects << s3_file
×
226
      end
227
      objects
×
228
    end
229

230
    def parse_continuation(resp_hash)
1✔
231
      objects = []
×
232
      while resp_hash[:is_truncated]
×
233
        token = resp_hash[:next_continuation_token]
×
234
        resp = client.list_objects_v2({ bucket: bucket_name, max_keys: 1000, prefix: prefix, continuation_token: token })
×
235
        resp_hash = resp.to_h
×
236
        objects += parse_objects(resp_hash)
×
237
      end
238
      objects
×
239
    end
240
end
241
# rubocop:enable Metrics/ClassLength
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2025 Coveralls, Inc