• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

pulibrary / pdc_describe / 3c9138a1-8e1b-4a4b-8095-3d52b81dd7e1

pending completion
3c9138a1-8e1b-4a4b-8095-3d52b81dd7e1

Pull #962

circleci

Carolyn Cole
Updating to make work_edit_spec run
Pull Request #962: Fetch pre-curation files from directly from AWS

41 of 41 new or added lines in 4 files covered. (100.0%)

1828 of 1861 relevant lines covered (98.23%)

166.17 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

89.47
/app/services/s3_query_service.rb
1
# frozen_string_literal: true
2

3
require "aws-sdk-s3"
1✔
4

5
# A service to query an S3 bucket for information about a given data set
6
# rubocop:disable Metrics/ClassLength
7
class S3QueryService
1✔
8
  attr_reader :model
1✔
9

10
  def self.configuration
1✔
11
    Rails.configuration.s3
274✔
12
  end
13

14
  def self.pre_curation_config
1✔
15
    configuration.pre_curation
168✔
16
  end
17

18
  def self.post_curation_config
1✔
19
    configuration.post_curation
106✔
20
  end
21

22
  ##
23
  # @param [Work] model
24
  # @param [Boolean] pre_curation
25
  # @example S3QueryService.new(Work.find(1), true)
26
  def initialize(model, pre_curation = true)
1✔
27
    @model = model
77✔
28
    @doi = model.doi
77✔
29
    @pre_curation = pre_curation
77✔
30
  end
31

32
  def config
1✔
33
    return self.class.post_curation_config if post_curation?
224✔
34

35
    self.class.pre_curation_config
141✔
36
  end
37

38
  def pre_curation?
1✔
39
    @pre_curation
224✔
40
  end
41

42
  def post_curation?
1✔
43
    !pre_curation?
224✔
44
  end
45

46
  ##
47
  # The name of the bucket this class is configured to use.
48
  # See config/s3.yml for configuration file.
49
  def bucket_name
1✔
50
    config.fetch(:bucket, nil)
162✔
51
  end
52

53
  def region
1✔
54
    config.fetch(:region, nil)
62✔
55
  end
56

57
  ##
58
  # The S3 prefix for this object, i.e., the address within the S3 bucket,
59
  # which is based on the DOI
60
  def prefix
1✔
61
    "#{@doi}/#{model.id}/"
89✔
62
  end
63

64
  ##
65
  # Construct an S3 address for this data set
66
  def s3_address
1✔
67
    "s3://#{bucket_name}/#{prefix}"
1✔
68
  end
69

70
  ##
71
  # Public signed URL to fetch this file from the S3 (valid for a limited time)
72
  def file_url(key)
1✔
73
    signer = Aws::S3::Presigner.new(client: client)
×
74
    signer.presigned_url(:get_object, bucket: bucket_name, key: key)
×
75
  end
76

77
  def delete_file(key)
1✔
78
    client.delete_object({ bucket: bucket_name, key: key })
×
79
  end
80

81
  # There is probably a better way to fetch the current ActiveStorage configuration but we have
82
  # not found it.
83
  def active_storage_configuration
1✔
84
    Rails.configuration.active_storage.service_configurations[Rails.configuration.active_storage.service.to_s]
124✔
85
  end
86

87
  def access_key_id
1✔
88
    active_storage_configuration["access_key_id"]
62✔
89
  end
90

91
  def secret_access_key
1✔
92
    active_storage_configuration["secret_access_key"]
62✔
93
  end
94

95
  def credentials
1✔
96
    @credentials ||= Aws::Credentials.new(access_key_id, secret_access_key)
62✔
97
  end
98

99
  def client
1✔
100
    @client ||= Aws::S3::Client.new(region: region, credentials: credentials)
130✔
101
  end
102

103
  # Retrieve the S3 resources attached to the Work model
104
  # @return [Array<S3File>]
105
  def model_s3_files
1✔
106
    objects = []
×
107
    return objects if model.nil?
×
108

109
    model_uploads.each do |attachment|
×
110
      s3_file = S3File.new(query_service: self,
×
111
                           filename: attachment.key,
112
                           last_modified: attachment.created_at,
113
                           size: attachment.byte_size,
114
                           checksum: attachment.checksum)
115
      objects << s3_file
×
116
    end
117

118
    objects
×
119
  end
120

121
  def get_s3_object(key:)
1✔
122
    response = client.get_object({
2✔
123
                                   bucket: bucket_name,
124
                                   key: key
125
                                 })
126
    object = response.to_h
2✔
127
    return if object.empty?
2✔
128

129
    object
2✔
130
  end
131

132
  def find_s3_file(filename:)
1✔
133
    s3_object_key = "#{prefix}#{filename}"
1✔
134

135
    object = get_s3_object(key: s3_object_key)
1✔
136
    return if object.nil?
1✔
137

138
    S3File.new(query_service: self, filename: s3_object_key, last_modified: object[:last_modified], size: object[:content_length], checksum: object[:etag])
1✔
139
  end
140

141
  # Retrieve the S3 resources uploaded to the S3 Bucket
142
  # @return [Array<S3File>]
143
  def client_s3_files(reload: false)
1✔
144
    @client_s3_files = nil if reload # force a reload
52✔
145
    @client_s3_files ||= begin
52✔
146
      Rails.logger.debug("Bucket: #{bucket_name}")
43✔
147
      Rails.logger.debug("Prefix: #{prefix}")
43✔
148
      resp = client.list_objects_v2({ bucket: bucket_name, max_keys: 1000, prefix: prefix })
43✔
149
      resp_hash = resp.to_h
37✔
150
      objects = parse_objects(resp_hash)
37✔
151
      objects += parse_continuation(resp_hash)
37✔
152
      objects
37✔
153
    end
154
  end
155

156
  def file_count
1✔
157
    client_s3_files.count
1✔
158
  end
159

160
  # TODO: delete this (or client_s3_files)
161
  def s3_files
1✔
162
    client_s3_files
33✔
163
  end
164

165
  ##
166
  # Query the S3 bucket for what we know about the doi
167
  # For docs see:
168
  # * https://docs.aws.amazon.com/sdk-for-ruby/v3/api/Aws/S3/Client.html#list_objects_v2-instance_method
169
  # * https://docs.aws.amazon.com/sdk-for-ruby/v3/api/Aws/S3/Client.html#get_object_attributes-instance_method
170
  # @return Hash with two properties {objects: [<S3File>], ok: Bool}
171
  #   objects is an Array of S3File objects
172
  #   ok is false if there is an error connecting to S3. Otherwise true.
173
  def data_profile
1✔
174
    { objects: s3_files, ok: true }
33✔
175
  rescue => ex
176
    Rails.logger.error("Error querying S3. Bucket: #{bucket_name}. DOI: #{@doi}. Exception: #{ex.message}")
2✔
177

178
    { objects: [], ok: false }
2✔
179
  end
180

181
  ##
182
  # Copies the existing files from the pre-curation bucket to the post-curation bucket.
183
  # Notice that the copy process happens at AWS (i.e. the files are not downloaded and re-uploaded).
184
  # Returns an array with the files that were copied.
185
  def publish_files
1✔
186
    files = []
23✔
187
    source_bucket = S3QueryService.pre_curation_config[:bucket]
23✔
188
    target_bucket = S3QueryService.post_curation_config[:bucket]
23✔
189
    model.pre_curation_uploads.each do |file|
23✔
190
      params = {
191
        copy_source: "/#{source_bucket}/#{file.key}",
24✔
192
        bucket: target_bucket,
193
        key: file.key
194
      }
195
      Rails.logger.info("Copying #{params[:copy_source]} to #{params[:bucket]}/#{params[:key]}")
24✔
196
      client.copy_object(params)
24✔
197
      files << file
24✔
198
    end
199
    files
23✔
200
  end
201

202
  def delete_s3_object(s3_file_key)
1✔
203
    resp = client.delete_object({ bucket: bucket_name,  key: s3_file_key})
3✔
204
    resp.to_h
3✔
205
  end
206

207
  private
1✔
208

209
    def model_uploads
1✔
210
      if pre_curation?
×
211
        client_s3_files
×
212
      else
213
        []
×
214
      end
215
    end
216

217
    def parse_objects(resp)
1✔
218
      objects = []
38✔
219
      resp_hash = resp.to_h
38✔
220
      response_objects = resp_hash[:contents]
38✔
221
      Rails.logger.debug("Objects: #{response_objects}")
38✔
222
      response_objects&.each do |object|
38✔
223
        next if object[:size] == 0 # ignore directories whose size is zero
18✔
224
        s3_file = S3File.new(query_service: self, filename: object[:key], last_modified: object[:last_modified], size: object[:size], checksum: object[:etag])
12✔
225
        objects << s3_file
12✔
226
      end
227
      objects
38✔
228
    end
229

230
    def parse_continuation(resp_hash)
1✔
231
      objects = []
37✔
232
      while resp_hash[:is_truncated]
37✔
233
        token = resp_hash[:next_continuation_token]
1✔
234
        resp = client.list_objects_v2({ bucket: bucket_name, max_keys: 1000, prefix: prefix, continuation_token: token })
1✔
235
        resp_hash = resp.to_h
1✔
236
        objects += parse_objects(resp_hash)
1✔
237
      end
238
      objects
37✔
239
    end
240
end
241
# rubocop:enable Metrics/ClassLength
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2025 Coveralls, Inc