• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

pulibrary / pdc_describe / 4e4e59fc-9df4-4838-9fd4-6c7ea33cdb7c

07 Apr 2025 06:36PM UTC coverage: 1.283% (-94.6%) from 95.862%
4e4e59fc-9df4-4838-9fd4-6c7ea33cdb7c

Pull #1994

circleci

hectorcorrea
Switched to use the autocomplete that we aleady use for ROR. Integrated it with the existing logic for creators
Pull Request #1994: Started adding auto complete to contributors

0 of 46 new or added lines in 2 files covered. (0.0%)

4806 existing lines in 74 files now uncovered.

65 of 5065 relevant lines covered (1.28%)

0.01 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

0.0
/app/services/pul_dspace_connector.rb
1
# frozen_string_literal: true
UNCOV
2
class PULDspaceConnector
×
UNCOV
3
  attr_reader :work, :ark, :download_base
×
4

UNCOV
5
  DSPACE_PAGE_SIZE = 20
×
6

UNCOV
7
  def initialize(work)
×
UNCOV
8
    @work = work
×
UNCOV
9
    @ark = work.ark&.gsub("ark:/", "")
×
UNCOV
10
    @download_base = "#{Rails.configuration.dspace.base_url.gsub('rest/', '')}bitstream/#{ark}"
×
UNCOV
11
  end
×
12

UNCOV
13
  def id
×
UNCOV
14
    @id ||= begin
×
UNCOV
15
              json = get_data("handle/#{ark}")
×
UNCOV
16
              json["id"]
×
UNCOV
17
            end
×
UNCOV
18
  end
×
19

UNCOV
20
  def bitstreams
×
UNCOV
21
    @bitstreams ||= begin
×
UNCOV
22
                      data = []
×
23
                      # handle pages if needed
24
                      # this is a inelegant way to get all the files, but I am not seeing a count anywhere
UNCOV
25
                      loop do
×
UNCOV
26
                        new_data = get_data("items/#{id}/bitstreams?offset=#{data.length}&limit=#{DSPACE_PAGE_SIZE}")
×
UNCOV
27
                        data.concat(new_data) unless new_data.empty?
×
UNCOV
28
                        break if new_data.count < DSPACE_PAGE_SIZE
×
UNCOV
29
                      end
×
UNCOV
30
                      data
×
UNCOV
31
                    end
×
UNCOV
32
  end
×
33

UNCOV
34
  def metadata
×
UNCOV
35
    @metadata ||= begin
×
UNCOV
36
                    json = get_data("items/#{id}/metadata")
×
UNCOV
37
                    metadata = {}
×
UNCOV
38
                    json.each do |value|
×
UNCOV
39
                      key = value["key"]
×
UNCOV
40
                      metadata[key] = [] if metadata[key].blank?
×
UNCOV
41
                      metadata[key] << value["value"]
×
UNCOV
42
                    end
×
UNCOV
43
                    metadata
×
UNCOV
44
                  end
×
UNCOV
45
  end
×
46

UNCOV
47
  def list_bitsteams
×
UNCOV
48
    @list_bitsteams ||=
×
UNCOV
49
      original_bitstreams.map do |bitstream|
×
UNCOV
50
        path = File.join(Rails.configuration.dspace.download_file_path, "dspace_download", work.id.to_s)
×
UNCOV
51
        filename = File.join(path, bitstream["name"])
×
UNCOV
52
        if bitstream["checkSum"]["checkSumAlgorithm"] != "MD5"
×
53
          Honeybadger.notify("Unknown checksum algorithm #{bitstream['checkSum']['checkSumAlgorithm']} #{filename} #{bitstream}")
×
UNCOV
54
        end
×
55

UNCOV
56
        S3File.new(filename_display: bitstream["name"], checksum: base64digest(bitstream["checkSum"]["value"]), last_modified: DateTime.now,
×
UNCOV
57
                   size: -1, work:, url: "#{download_base}/#{bitstream['sequenceId']}", filename:)
×
UNCOV
58
      end
×
UNCOV
59
  end
×
60

UNCOV
61
  def download_bitstreams(bitstream_list)
×
UNCOV
62
    bitstream_list.map do |file|
×
UNCOV
63
      filename = download_bitstream(file.url, file.filename)
×
UNCOV
64
      if checksum_file(filename, file.checksum)
×
UNCOV
65
        file
×
UNCOV
66
      else
×
UNCOV
67
        { file:, error: "Checsum Missmatch" }
×
UNCOV
68
      end
×
UNCOV
69
    end
×
UNCOV
70
  end
×
71

UNCOV
72
  def doi
×
UNCOV
73
    return "" if ark.nil?
×
UNCOV
74
    @doi ||= begin
×
UNCOV
75
               doi_url = metadata["dc.identifier.uri"].select { |value| value.starts_with?("https://doi.org/") }&.first
×
UNCOV
76
               doi_url&.gsub("https://doi.org/", "")
×
UNCOV
77
             end
×
UNCOV
78
  end
×
79

UNCOV
80
  private
×
81

UNCOV
82
    def get_data(url_path)
×
UNCOV
83
      return {} if ark.nil?
×
84

UNCOV
85
      url = "#{Rails.configuration.dspace.base_url}#{url_path}"
×
UNCOV
86
      uri = URI(url)
×
UNCOV
87
      http = request_http(url)
×
UNCOV
88
      req = Net::HTTP::Get.new uri
×
UNCOV
89
      response = http.request req
×
UNCOV
90
      if response.code != "200"
×
91
        Honeybadger.notify("Error retreiving dspace data from #{url} #{response.code} #{response.body}")
×
92
        return nil
×
UNCOV
93
      end
×
UNCOV
94
      JSON.parse(response.body)
×
UNCOV
95
    end
×
96

UNCOV
97
    def download_bitstream(retrieval_url, filename)
×
UNCOV
98
      path = File.join(Rails.configuration.dspace.download_file_path, "dspace_download", work.id.to_s)
×
UNCOV
99
      FileUtils.mkdir_p path
×
UNCOV
100
      download_file(retrieval_url, filename)
×
UNCOV
101
      filename
×
UNCOV
102
    end
×
103

UNCOV
104
    def download_file(url, filename)
×
UNCOV
105
      stdout_and_stderr_str, status = Open3.capture2e("wget -c '#{url}' -O '#{filename}'")
×
UNCOV
106
      unless status.success?
×
107
        Honeybadger.notify("Error dowloading file #{url} for work id #{work.id} to #{filename}! Error: #{stdout_and_stderr_str}")
×
UNCOV
108
      end
×
UNCOV
109
    end
×
110

UNCOV
111
    def checksum_file(filename, original_checksum)
×
UNCOV
112
      checksum = Digest::MD5.file(filename)
×
UNCOV
113
      base64 = checksum.base64digest
×
UNCOV
114
      if base64 != original_checksum
×
UNCOV
115
        msg = "Mismatching checksum #{filename} #{original_checksum} for work: #{work.id} doi: #{work.doi} ark: #{work.ark}"
×
UNCOV
116
        Rails.logger.error msg
×
UNCOV
117
        Honeybadger.notify(msg)
×
UNCOV
118
        false
×
UNCOV
119
      else
×
UNCOV
120
        Rails.logger.debug { "Matching checksums for #{filename}" }
×
UNCOV
121
        true
×
UNCOV
122
      end
×
UNCOV
123
    end
×
124

UNCOV
125
    def base64digest(hexdigest)
×
UNCOV
126
      [[hexdigest].pack("H*")].pack("m0")
×
UNCOV
127
    end
×
128

UNCOV
129
    def request_http(url)
×
UNCOV
130
      uri = URI(url)
×
UNCOV
131
      http = Net::HTTP.new(uri.host, uri.port)
×
UNCOV
132
      http.use_ssl = true
×
UNCOV
133
      http
×
UNCOV
134
    end
×
135

UNCOV
136
    def original_bitstreams
×
UNCOV
137
      bitstreams.select { |bitstream| bitstream["bundleName"] == "ORIGINAL" }
×
UNCOV
138
    end
×
UNCOV
139
end
×
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2025 Coveralls, Inc