• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

pulibrary / pdc_describe / e71dda69-4c6c-449e-b2c9-4282b2b3c755

pending completion
e71dda69-4c6c-449e-b2c9-4282b2b3c755

Pull #962

circleci

Carolyn Cole
Removing upload method change and adding stubbing of S3 method that is now being called directly
Pull Request #962: Fetch pre-curation files from directly from AWS

13 of 13 new or added lines in 3 files covered. (100.0%)

1832 of 1858 relevant lines covered (98.6%)

167.01 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

93.52
/app/services/s3_query_service.rb
1
# frozen_string_literal: true
2

3
require "aws-sdk-s3"
1✔
4

5
# A service to query an S3 bucket for information about a given data set
6
# rubocop:disable Metrics/ClassLength
7
class S3QueryService
1✔
8
  attr_reader :model
1✔
9

10
  def self.configuration
1✔
11
    Rails.configuration.s3
474✔
12
  end
13

14
  def self.pre_curation_config
1✔
15
    configuration.pre_curation
290✔
16
  end
17

18
  def self.post_curation_config
1✔
19
    configuration.post_curation
184✔
20
  end
21

22
  def self.url_protocol
1✔
23
    "https"
1✔
24
  end
25

26
  def self.s3_host
1✔
27
    "s3.amazonaws.com"
1✔
28
  end
29

30
  ##
31
  # @param [Work] model
32
  # @param [Boolean] pre_curation
33
  # @example S3QueryService.new(Work.find(1), true)
34
  def initialize(model, pre_curation = true)
1✔
35
    @model = model
208✔
36
    @doi = model.doi
208✔
37
    @pre_curation = pre_curation
208✔
38
  end
39

40
  def config
1✔
41
    return self.class.post_curation_config if post_curation?
424✔
42

43
    self.class.pre_curation_config
263✔
44
  end
45

46
  def pre_curation?
1✔
47
    @pre_curation
473✔
48
  end
49

50
  def post_curation?
1✔
51
    !pre_curation?
424✔
52
  end
53

54
  ##
55
  # The name of the bucket this class is configured to use.
56
  # See config/s3.yml for configuration file.
57
  def bucket_name
1✔
58
    config.fetch(:bucket, nil)
257✔
59
  end
60

61
  def region
1✔
62
    config.fetch(:region, nil)
167✔
63
  end
64

65
  ##
66
  # The S3 prefix for this object, i.e., the address within the S3 bucket,
67
  # which is based on the DOI
68
  def prefix
1✔
69
    "#{@doi}/#{model.id}/"
172✔
70
  end
71

72
  ##
73
  # Construct an S3 address for this data set
74
  def s3_address
1✔
75
    "s3://#{bucket_name}/#{prefix}"
1✔
76
  end
77

78
  ##
79
  # Public signed URL to fetch this file from the S3 (valid for a limited time)
80
  def file_url(key)
1✔
81
    signer = Aws::S3::Presigner.new(client: client)
2✔
82
    signer.presigned_url(:get_object, bucket: bucket_name, key: key)
2✔
83
  end
84

85
  def delete_file(key)
1✔
86
    client.delete_object({ bucket: bucket_name, key: key })
×
87
  end
88

89
  # There is probably a better way to fetch the current ActiveStorage configuration but we have
90
  # not found it.
91
  def active_storage_configuration
1✔
92
    Rails.configuration.active_storage.service_configurations[Rails.configuration.active_storage.service.to_s]
334✔
93
  end
94

95
  def access_key_id
1✔
96
    active_storage_configuration["access_key_id"]
167✔
97
  end
98

99
  def secret_access_key
1✔
100
    active_storage_configuration["secret_access_key"]
167✔
101
  end
102

103
  def credentials
1✔
104
    @credentials ||= Aws::Credentials.new(access_key_id, secret_access_key)
167✔
105
  end
106

107
  def client
1✔
108
    @client ||= Aws::S3::Client.new(region: region, credentials: credentials)
167✔
109
  end
110

111
  # Retrieve the S3 resources attached to the Work model
112
  # @return [Array<S3File>]
113
  def model_s3_files
1✔
114
    objects = []
49✔
115
    return objects if model.nil?
49✔
116

117
    model_uploads.each do |attachment|
49✔
118
      s3_file = S3File.new(query_service: self,
2✔
119
                           filename: attachment.key,
120
                           last_modified: attachment.created_at,
121
                           size: attachment.byte_size,
122
                           checksum: attachment.checksum)
123
      objects << s3_file
×
124
    end
125

126
    objects
34✔
127
  end
128

129
  def get_s3_object(key:)
1✔
130
    response = client.get_object({
2✔
131
                                   bucket: bucket_name,
132
                                   key: key
133
                                 })
134
    object = response.to_h
2✔
135
    return if object.empty?
2✔
136

137
    object
2✔
138
  end
139

140
  def find_s3_file(filename:)
1✔
141
    s3_object_key = "#{prefix}#{filename}"
1✔
142

143
    object = get_s3_object(key: s3_object_key)
1✔
144
    return if object.nil?
1✔
145

146
    S3File.new(query_service: self, filename: s3_object_key, last_modified: object[:last_modified], size: object[:content_length], checksum: object[:etag])
1✔
147
  end
148

149
  # Retrieve the S3 resources uploaded to the S3 Bucket
150
  # @return [Array<S3File>]
151
  def client_s3_files
1✔
152
    Rails.logger.debug("Bucket: #{bucket_name}")
85✔
153
    Rails.logger.debug("Prefix: #{prefix}")
85✔
154
    resp = client.list_objects_v2({ bucket: bucket_name, max_keys: 1000, prefix: prefix })
85✔
155
    resp_hash = resp.to_h
64✔
156
    objects = parse_objects(resp_hash)
64✔
157

158
    while resp_hash[:is_truncated]
64✔
159
      token = resp_hash[:next_continuation_token]
×
160
      resp = client.list_objects_v2({ bucket: bucket_name, max_keys: 1000, prefix: prefix, continuation_token: token })
×
161
      resp_hash = resp.to_h
×
162
      more_objects = parse_objects(resp_hash)
×
163
      objects += more_objects
×
164
    end
165

166
    objects
64✔
167
  end
168

169
  # Retrieve the S3 resources from the S3 Bucket without those attached to the Work model
170
  # @return [Array<S3File>]
171
  def s3_files
1✔
172
    model_s3_file_keys = model_s3_files.map(&:filename)
49✔
173
    client_s3_files.reject { |client_s3_file| model_s3_file_keys.include?(client_s3_file.filename) }
36✔
174
  end
175

176
  ##
177
  # Query the S3 bucket for what we know about the doi
178
  # For docs see:
179
  # * https://docs.aws.amazon.com/sdk-for-ruby/v3/api/Aws/S3/Client.html#list_objects_v2-instance_method
180
  # * https://docs.aws.amazon.com/sdk-for-ruby/v3/api/Aws/S3/Client.html#get_object_attributes-instance_method
181
  # @return Hash with two properties {objects: [<S3File>], ok: Bool}
182
  #   objects is an Array of S3File objects
183
  #   ok is false if there is an error connecting to S3. Otherwise true.
184
  def data_profile
1✔
185
    { objects: s3_files, ok: true }
49✔
186
  rescue => ex
187
    Rails.logger.error("Error querying S3. Bucket: #{bucket_name}. DOI: #{@doi}. Exception: #{ex.message}")
15✔
188

189
    { objects: [], ok: false }
15✔
190
  end
191

192
  ##
193
  # Copies the existing files from the pre-curation bucket to the post-curation bucket.
194
  # Notice that the copy process happens at AWS (i.e. the files are not downloaded and re-uploaded).
195
  # Returns an array with the files that were copied.
196
  def publish_files
1✔
197
    files = []
23✔
198
    source_bucket = S3QueryService.pre_curation_config[:bucket]
23✔
199
    target_bucket = S3QueryService.post_curation_config[:bucket]
23✔
200
    model.pre_curation_uploads.each do |file|
23✔
201
      params = {
202
        copy_source: "/#{source_bucket}/#{file.key}",
24✔
203
        bucket: target_bucket,
204
        key: file.key
205
      }
206
      Rails.logger.info("Copying #{params[:copy_source]} to #{params[:bucket]}/#{params[:key]}")
24✔
207
      client.copy_object(params)
24✔
208
      files << file
24✔
209
    end
210
    files
23✔
211
  end
212

213
  private
1✔
214

215
    def model_uploads
1✔
216
      if pre_curation?
49✔
217
        model.pre_curation_uploads_fast
21✔
218
      else
219
        []
28✔
220
      end
221
    end
222

223
    def parse_objects(resp_hash)
1✔
224
      objects = []
64✔
225
      response_objects = resp_hash[:contents]
64✔
226
      Rails.logger.debug("Objects: #{response_objects}")
64✔
227
      response_objects&.each do |object|
64✔
228
        next if object[:size] == 0 # ignore directories whose size is zero
13✔
229
        s3_file = S3File.new(query_service: self, filename: object[:key], last_modified: object[:last_modified], size: object[:size], checksum: object[:etag])
12✔
230
        objects << s3_file
12✔
231
      end
232
      objects
64✔
233
    end
234
end
235
# rubocop:enable Metrics/ClassLength
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2025 Coveralls, Inc