• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

pulibrary / pdc_describe / cace366a-ffad-45f1-9b60-678e607fa527

14 May 2024 02:21PM UTC coverage: 60.862% (-35.0%) from 95.908%
cace366a-ffad-45f1-9b60-678e607fa527

push

circleci

jrgriffiniii
wip

1 of 3 new or added lines in 2 files covered. (33.33%)

1194 existing lines in 57 files now uncovered.

2076 of 3411 relevant lines covered (60.86%)

22.71 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

20.93
/app/services/pul_dspace_connector.rb
1
# frozen_string_literal: true
2
class PULDspaceConnector
1✔
3
  attr_reader :work, :ark, :download_base
1✔
4

5
  DSPACE_PAGE_SIZE = 20
1✔
6

7
  def initialize(work)
1✔
UNCOV
8
    @work = work
×
UNCOV
9
    @ark = work.ark&.gsub("ark:/", "")
×
UNCOV
10
    @download_base = "#{Rails.configuration.dspace.base_url.gsub('rest/', '')}bitstream/#{ark}"
×
11
  end
12

13
  def id
1✔
UNCOV
14
    @id ||= begin
×
UNCOV
15
              json = get_data("handle/#{ark}")
×
UNCOV
16
              json["id"]
×
17
            end
18
  end
19

20
  def bitstreams
1✔
UNCOV
21
    @bitstreams ||= begin
×
UNCOV
22
                      data = []
×
23
                      # handle pages if needed
24
                      # this is a inelegant way to get all the files, but I am not seeing a count anywhere
UNCOV
25
                      loop do
×
UNCOV
26
                        new_data = get_data("items/#{id}/bitstreams?offset=#{data.length}&limit=#{DSPACE_PAGE_SIZE}")
×
UNCOV
27
                        data.concat(new_data) unless new_data.empty?
×
UNCOV
28
                        break if new_data.count < DSPACE_PAGE_SIZE
×
29
                      end
UNCOV
30
                      data
×
31
                    end
32
  end
33

34
  def metadata
1✔
UNCOV
35
    @metadata ||= begin
×
UNCOV
36
                    json = get_data("items/#{id}/metadata")
×
UNCOV
37
                    metadata = {}
×
UNCOV
38
                    json.each do |value|
×
UNCOV
39
                      key = value["key"]
×
UNCOV
40
                      metadata[key] = [] if metadata[key].blank?
×
UNCOV
41
                      metadata[key] << value["value"]
×
42
                    end
UNCOV
43
                    metadata
×
44
                  end
45
  end
46

47
  def list_bitsteams
1✔
UNCOV
48
    @list_bitsteams ||=
×
49
      original_bitstreams.map do |bitstream|
UNCOV
50
        path = File.join(Rails.configuration.dspace.download_file_path, "dspace_download", work.id.to_s)
×
UNCOV
51
        filename = File.join(path, bitstream["name"])
×
UNCOV
52
        if bitstream["checkSum"]["checkSumAlgorithm"] != "MD5"
×
53
          Honeybadger.notify("Unknown checksum algorithm #{bitstream['checkSum']['checkSumAlgorithm']} #{filename} #{bitstream}")
×
54
        end
55

UNCOV
56
        S3File.new(filename_display: bitstream["name"], checksum: base64digest(bitstream["checkSum"]["value"]), last_modified: DateTime.now,
×
57
                   size: -1, work:, url: "#{download_base}/#{bitstream['sequenceId']}", filename:)
58
      end
59
  end
60

61
  def download_bitstreams(bitstream_list)
1✔
UNCOV
62
    bitstream_list.map do |file|
×
UNCOV
63
      filename = download_bitstream(file.url, file.filename)
×
UNCOV
64
      if checksum_file(filename, file.checksum)
×
UNCOV
65
        file
×
66
      else
UNCOV
67
        { file:, error: "Checsum Missmatch" }
×
68
      end
69
    end
70
  end
71

72
  def doi
1✔
UNCOV
73
    return "" if ark.nil?
×
UNCOV
74
    @doi ||= begin
×
UNCOV
75
               doi_url = metadata["dc.identifier.uri"].select { |value| value.starts_with?("https://doi.org/") }&.first
×
UNCOV
76
               doi_url&.gsub("https://doi.org/", "")
×
77
             end
78
  end
79

80
  private
1✔
81

82
    def get_data(url_path)
1✔
UNCOV
83
      return {} if ark.nil?
×
84

UNCOV
85
      url = "#{Rails.configuration.dspace.base_url}#{url_path}"
×
UNCOV
86
      uri = URI(url)
×
UNCOV
87
      http = request_http(url)
×
UNCOV
88
      req = Net::HTTP::Get.new uri
×
UNCOV
89
      response = http.request req
×
UNCOV
90
      if response.code != "200"
×
91
        Honeybadger.notify("Error retreiving dspace data from #{url} #{response.code} #{response.body}")
×
92
        return nil
×
93
      end
UNCOV
94
      JSON.parse(response.body)
×
95
    end
96

97
    def download_bitstream(retrieval_url, filename)
1✔
UNCOV
98
      path = File.join(Rails.configuration.dspace.download_file_path, "dspace_download", work.id.to_s)
×
UNCOV
99
      FileUtils.mkdir_p path
×
UNCOV
100
      download_file(retrieval_url, filename)
×
UNCOV
101
      filename
×
102
    end
103

104
    def download_file(url, filename)
1✔
UNCOV
105
      stdout_and_stderr_str, status = Open3.capture2e("wget -c '#{url}' -O '#{filename}'")
×
UNCOV
106
      unless status.success?
×
107
        Honeybadger.notify("Error dowloading file #{url} for work id #{work.id} to #{filename}! Error: #{stdout_and_stderr_str}")
×
108
      end
109
    end
110

111
    def checksum_file(filename, original_checksum)
1✔
UNCOV
112
      checksum = Digest::MD5.file(filename)
×
UNCOV
113
      base64 = checksum.base64digest
×
UNCOV
114
      if base64 != original_checksum
×
UNCOV
115
        msg = "Mismatching checksum #{filename} #{original_checksum} for work: #{work.id} doi: #{work.doi} ark: #{work.ark}"
×
UNCOV
116
        Rails.logger.error msg
×
UNCOV
117
        Honeybadger.notify(msg)
×
UNCOV
118
        false
×
119
      else
UNCOV
120
        Rails.logger.debug { "Matching checksums for #{filename}" }
×
UNCOV
121
        true
×
122
      end
123
    end
124

125
    def base64digest(hexdigest)
1✔
UNCOV
126
      [[hexdigest].pack("H*")].pack("m0")
×
127
    end
128

129
    def request_http(url)
1✔
UNCOV
130
      uri = URI(url)
×
UNCOV
131
      http = Net::HTTP.new(uri.host, uri.port)
×
UNCOV
132
      http.use_ssl = true
×
UNCOV
133
      http
×
134
    end
135

136
    def original_bitstreams
1✔
UNCOV
137
      bitstreams.select { |bitstream| bitstream["bundleName"] == "ORIGINAL" }
×
138
    end
139
end
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2025 Coveralls, Inc