• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

pulibrary / pdc_describe / 9091a1ae-29be-458c-984a-339d213919c4

12 Dec 2024 07:41PM UTC coverage: 26.434% (-69.7%) from 96.113%
9091a1ae-29be-458c-984a-339d213919c4

Pull #2000

circleci

jrgriffiniii
Removing integration with ActiveStorage
Pull Request #2000: Bump actionpack from 7.2.1.1 to 7.2.2.1

945 of 3575 relevant lines covered (26.43%)

0.35 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

20.93
/app/services/pul_dspace_connector.rb
1
# frozen_string_literal: true
2
class PULDspaceConnector
1✔
3
  attr_reader :work, :ark, :download_base
1✔
4

5
  DSPACE_PAGE_SIZE = 20
1✔
6

7
  def initialize(work)
1✔
8
    @work = work
×
9
    @ark = work.ark&.gsub("ark:/", "")
×
10
    @download_base = "#{Rails.configuration.dspace.base_url.gsub('rest/', '')}bitstream/#{ark}"
×
11
  end
12

13
  def id
1✔
14
    @id ||= begin
×
15
              json = get_data("handle/#{ark}")
×
16
              json["id"]
×
17
            end
18
  end
19

20
  def bitstreams
1✔
21
    @bitstreams ||= begin
×
22
                      data = []
×
23
                      # handle pages if needed
24
                      # this is a inelegant way to get all the files, but I am not seeing a count anywhere
25
                      loop do
×
26
                        new_data = get_data("items/#{id}/bitstreams?offset=#{data.length}&limit=#{DSPACE_PAGE_SIZE}")
×
27
                        data.concat(new_data) unless new_data.empty?
×
28
                        break if new_data.count < DSPACE_PAGE_SIZE
×
29
                      end
30
                      data
×
31
                    end
32
  end
33

34
  def metadata
1✔
35
    @metadata ||= begin
×
36
                    json = get_data("items/#{id}/metadata")
×
37
                    metadata = {}
×
38
                    json.each do |value|
×
39
                      key = value["key"]
×
40
                      metadata[key] = [] if metadata[key].blank?
×
41
                      metadata[key] << value["value"]
×
42
                    end
43
                    metadata
×
44
                  end
45
  end
46

47
  def list_bitsteams
1✔
48
    @list_bitsteams ||=
×
49
      original_bitstreams.map do |bitstream|
50
        path = File.join(Rails.configuration.dspace.download_file_path, "dspace_download", work.id.to_s)
×
51
        filename = File.join(path, bitstream["name"])
×
52
        if bitstream["checkSum"]["checkSumAlgorithm"] != "MD5"
×
53
          Honeybadger.notify("Unknown checksum algorithm #{bitstream['checkSum']['checkSumAlgorithm']} #{filename} #{bitstream}")
×
54
        end
55

56
        S3File.new(filename_display: bitstream["name"], checksum: base64digest(bitstream["checkSum"]["value"]), last_modified: DateTime.now,
×
57
                   size: -1, work:, url: "#{download_base}/#{bitstream['sequenceId']}", filename:)
58
      end
59
  end
60

61
  def download_bitstreams(bitstream_list)
1✔
62
    bitstream_list.map do |file|
×
63
      filename = download_bitstream(file.url, file.filename)
×
64
      if checksum_file(filename, file.checksum)
×
65
        file
×
66
      else
67
        { file:, error: "Checsum Missmatch" }
×
68
      end
69
    end
70
  end
71

72
  def doi
1✔
73
    return "" if ark.nil?
×
74
    @doi ||= begin
×
75
               doi_url = metadata["dc.identifier.uri"].select { |value| value.starts_with?("https://doi.org/") }&.first
×
76
               doi_url&.gsub("https://doi.org/", "")
×
77
             end
78
  end
79

80
  private
1✔
81

82
    def get_data(url_path)
1✔
83
      return {} if ark.nil?
×
84

85
      url = "#{Rails.configuration.dspace.base_url}#{url_path}"
×
86
      uri = URI(url)
×
87
      http = request_http(url)
×
88
      req = Net::HTTP::Get.new uri
×
89
      response = http.request req
×
90
      if response.code != "200"
×
91
        Honeybadger.notify("Error retreiving dspace data from #{url} #{response.code} #{response.body}")
×
92
        return nil
×
93
      end
94
      JSON.parse(response.body)
×
95
    end
96

97
    def download_bitstream(retrieval_url, filename)
1✔
98
      path = File.join(Rails.configuration.dspace.download_file_path, "dspace_download", work.id.to_s)
×
99
      FileUtils.mkdir_p path
×
100
      download_file(retrieval_url, filename)
×
101
      filename
×
102
    end
103

104
    def download_file(url, filename)
1✔
105
      stdout_and_stderr_str, status = Open3.capture2e("wget -c '#{url}' -O '#{filename}'")
×
106
      unless status.success?
×
107
        Honeybadger.notify("Error dowloading file #{url} for work id #{work.id} to #{filename}! Error: #{stdout_and_stderr_str}")
×
108
      end
109
    end
110

111
    def checksum_file(filename, original_checksum)
1✔
112
      checksum = Digest::MD5.file(filename)
×
113
      base64 = checksum.base64digest
×
114
      if base64 != original_checksum
×
115
        msg = "Mismatching checksum #{filename} #{original_checksum} for work: #{work.id} doi: #{work.doi} ark: #{work.ark}"
×
116
        Rails.logger.error msg
×
117
        Honeybadger.notify(msg)
×
118
        false
×
119
      else
120
        Rails.logger.debug { "Matching checksums for #{filename}" }
×
121
        true
×
122
      end
123
    end
124

125
    def base64digest(hexdigest)
1✔
126
      [[hexdigest].pack("H*")].pack("m0")
×
127
    end
128

129
    def request_http(url)
1✔
130
      uri = URI(url)
×
131
      http = Net::HTTP.new(uri.host, uri.port)
×
132
      http.use_ssl = true
×
133
      http
×
134
    end
135

136
    def original_bitstreams
1✔
137
      bitstreams.select { |bitstream| bitstream["bundleName"] == "ORIGINAL" }
×
138
    end
139
end
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2025 Coveralls, Inc