• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

pulibrary / pdc_describe / 9cd1579d-9684-4b66-ae83-9619a7d64644

pending completion
9cd1579d-9684-4b66-ae83-9619a7d64644

Pull #1094

circleci

GitHub
Merge branch 'main' into sidekiq-prod
Pull Request #1094: Adding redis and sidekiq

1628 of 2126 relevant lines covered (76.58%)

97.58 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

17.86
/app/services/pul_dspace_data.rb
1
# frozen_string_literal: true
2
class PULDspaceData
1✔
3
  attr_reader :work, :ark, :keys
1✔
4

5
  def initialize(work)
1✔
6
    @work = work
×
7
    @ark = work.ark&.gsub("ark:/", "")
×
8
    @keys = []
×
9
  end
10

11
  def migrate
1✔
12
    return if ark.nil?
×
13
    work.resource.migrated = true
×
14
    work.save
×
15
    migrate_dspace
×
16
    aws_copy(aws_files)
×
17
  end
18

19
  def id
1✔
20
    return nil if ark.nil?
×
21
    @id ||= begin
×
22
              json = get_data("handle/#{ark}")
×
23
              json["id"]
×
24
            end
25
  end
26

27
  def bitstreams
1✔
28
    return [] if ark.nil?
×
29
    @bitstreams ||= get_data("items/#{id}/bitstreams")
×
30
  end
31

32
  def metadata
1✔
33
    return {} if ark.nil?
×
34
    @metadata ||= begin
×
35
                    json = get_data("items/#{id}/metadata")
×
36
                    metadata = {}
×
37
                    json.each do |value|
×
38
                      key = value["key"]
×
39
                      metadata[key] = [] if metadata[key].blank?
×
40
                      metadata[key] << value["value"]
×
41
                    end
42
                    metadata
×
43
                  end
44
  end
45

46
  def download_bitstreams
1✔
47
    return [] if ark.nil?
×
48
    bitstreams.map do |bitstream|
×
49
      filename = download_bitstream(bitstream["retrieveLink"], bitstream["name"])
×
50
      if checksum_file(filename, bitstream)
×
51
        filename
×
52
      end
53
    end
54
  end
55

56
  def upload_to_s3(filenames)
1✔
57
    filenames.map do |filename|
×
58
      io = File.open(filename)
×
59
      key = work.s3_query_service.upload_file(io: io, filename: File.basename(filename))
×
60
      if key
×
61
        @keys << key
×
62
        nil
×
63
      else
64
        "An error uploading #{filename}.  Please try again."
×
65
      end
66
    end
67
  end
68

69
  def doi
1✔
70
    return "" if ark.nil?
×
71
    @doi ||= begin
×
72
               doi_url = metadata["dc.identifier.uri"].select { |value| value.starts_with?("https://doi.org/") }&.first
×
73
               doi_url&.gsub("https://doi.org/", "")
×
74
             end
75
  end
76

77
  def aws_files
1✔
78
    return [] if ark.nil? || doi.nil?
×
79
    @aws_files ||= work.s3_query_service.client_s3_files(reload: true, bucket_name: dspace_bucket_name, prefix: doi.tr(".", "-"))
×
80
  end
81

82
  def aws_copy(files)
1✔
83
    files.each do |s3_file|
×
84
      DspaceFileCopyJob.perform_later(doi, s3_file.key, s3_file.size, work.id)
×
85
      keys << s3_file.key
×
86
    end
87
  end
88

89
  def dspace_bucket_name
1✔
90
    @dspace_bucket_name ||= Rails.configuration.s3.dspace[:bucket]
×
91
  end
92

93
  private
1✔
94

95
    def migrate_dspace
1✔
96
      filenames = download_bitstreams
×
97
      if filenames.any?(nil)
×
98
        bitstreams = dspace.bitstreams
×
99
        error_files = Hash[filenames.zip bitstreams].select { |key, _value| key.nil? }
×
100
        error_names = error_files.map { |bitstream| bitstream["name"] }.join(", ")
×
101
        raise "Error downloading file(s) #{error_names}"
×
102
      end
103
      results = upload_to_s3(filenames)
×
104
      errors = results.reject(&:"blank?")
×
105
      if errors.count > 0
×
106
        raise "Error uploading file(s):\n #{errors.join("\n")}"
×
107
      end
108
      filenames.each { |filename| File.delete(filename) }
×
109
    end
110

111
    def get_data(url_path)
1✔
112
      url = "#{Rails.configuration.dspace.base_url}#{url_path}"
×
113
      uri = URI(url)
×
114
      http = request_http(url)
×
115
      req = Net::HTTP::Get.new uri.path
×
116
      response = http.request req
×
117
      if response.code != "200"
×
118
        Honeybadger.notify("Error retreiving dspace data from #{url} #{response.code} #{response.body}")
×
119
        return nil
×
120
      end
121
      JSON.parse(response.body)
×
122
    end
123

124
    def download_bitstream(retrieval_path, name)
1✔
125
      url = "#{Rails.configuration.dspace.base_url}#{retrieval_path}"
×
126
      path = File.join(Rails.configuration.dspace.download_file_path, "dspace_download", work.id.to_s)
×
127
      filename = File.join(path, name)
×
128
      FileUtils.mkdir_p path
×
129
      download_file(url, filename)
×
130
      filename
×
131
    end
132

133
    def download_file(url, filename)
1✔
134
      http = request_http(url)
×
135
      uri = URI(url)
×
136
      req = Net::HTTP::Get.new uri.path
×
137
      http.request req do |response|
×
138
        io = File.open(filename, "w")
×
139
        response.read_body do |chunk|
×
140
          io.write chunk.force_encoding("UTF-8")
×
141
        end
142
        io.close
×
143
      end
144
    end
145

146
    def checksum_file(filename, bitstream)
1✔
147
      checksum_class = Digest.const_get(bitstream["checkSum"]["checkSumAlgorithm"])
×
148
      if checksum_class.file(filename).hexdigest != bitstream["checkSum"]["value"]
×
149
        Rails.logger.error "mismatching checksum #{filename} #{bitstream}"
×
150
        Honeybadger.notify("Mismatching checksum #{filename} #{bitstream}")
×
151
        false
×
152
      else
153
        Rails.logger.debug "Matching checksums for #{filename}"
×
154
        true
×
155
      end
156
    rescue NameError
157
      Honeybadger.notify("Unknown checksum algorithm #{bitstream['checkSum']['checkSumAlgorithm']} #{filename} #{bitstream}")
×
158
      false
×
159
    end
160

161
    def request_http(url)
1✔
162
      uri = URI(url)
×
163
      http = Net::HTTP.new(uri.host, uri.port)
×
164
      http.use_ssl = true
×
165
      http
×
166
    end
167
end
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2025 Coveralls, Inc