• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

pulibrary / bibdata / 27557f24-159e-4724-9dfe-c0871846dba0

09 May 2025 10:32PM UTC coverage: 92.368% (+0.1%) from 92.248%
27557f24-159e-4724-9dfe-c0871846dba0

push

circleci

sandbergja
FAILING, need to move some ruby tests to rust

1 of 1 new or added line in 1 file covered. (100.0%)

2 existing lines in 1 file now uncovered.

3643 of 3944 relevant lines covered (92.37%)

374.13 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

98.2
/lib/bibdata_rs/theses/fetcher.rb
1
# frozen_string_literal: true
2

3
require 'faraday'
1✔
4
require 'json'
1✔
5
require 'tmpdir'
1✔
6
require 'openssl'
1✔
7
require 'retriable'
1✔
8
require 'logger'
1✔
9

10
# Do not fail if SSL negotiation with DSpace isn't working
11
OpenSSL::SSL::VERIFY_PEER = OpenSSL::SSL::VERIFY_NONE
1✔
12

13
module BibdataRs::Theses
1✔
14
  class Fetcher
1✔
15
    attr_writer :logger
1✔
16

17
    # leave in Ruby, since the Rails thing is so convenient
18
    def self.env_config
1✔
19
      Rails.application.config_for Rails.root.join('config/dspace.yml'), env: BibdataRs::Theses::rails_env
45✔
20
    end
21

22
    # leave in Ruby (if needed), since the config file is tricky in rust since it contains a variety of data types
23
    def self.default_server
1✔
24
      env_config['server']
15✔
25
    end
26

27
    # leave in Ruby (if needed), since the config file is tricky in rust since it contains a variety of data types
28
    def self.default_community
1✔
29
      env_config['community']
15✔
30
    end
31

32
    # leave in Ruby (if needed), since the config file is tricky in rust since it contains a variety of data types
33
    def self.default_rest_limit
1✔
34
      env_config['rest_limit']
15✔
35
    end
36

37
    # @param [Hash] opts  options to pass to the client
38
    # @option opts [String] :server ('https://dataspace.princeton.edu/rest/')
39
    # @option opts [String] :community ('88435/dsp019c67wm88m')
40
    # leave in ruby for now
41
    def initialize(server: nil, community: nil, rest_limit: nil)
1✔
42
      @server = server || self.class.default_server
15✔
43
      @community = community || self.class.default_community
15✔
44

45
      @rest_limit = rest_limit || self.class.default_rest_limit
15✔
46
    end
47

48
    # leave in ruby for now
49
    # USED
50
    def logger
1✔
51
      @logger ||= begin
14✔
52
        built = Logger.new($stdout)
4✔
53
        built.level = Logger::DEBUG
4✔
54
        built
4✔
55
      end
56
    end
57

58
    ##
59
    # Write to the log anytime an API call fails and we have to retry.
60
    # See https://github.com/kamui/retriable#callbacks for more information.
61
    # leave in ruby right now, since I'm not sure how to return a proc in Magnus
62
    # USED
63
    def log_retries
1✔
64
      proc do |exception, try, elapsed_time, next_interval|
5✔
65
        logger.debug "#{exception.class}: '#{exception.message}' - #{try} tries in #{elapsed_time} seconds and #{next_interval} seconds until the next try."
5✔
66
      end
67
    end
68

69
    ##
70
    # @param id [String] thesis collection id
71
    # @return [Array<Hash>] metadata hash for each record
72
    # Rewrite in Rust, but rewrite flatten_json first?  Or does it make sense to do them separately???
73
    # USED
74
    def fetch_collection(id)
1✔
75
      theses = []
3✔
76
      offset = 0
3✔
77
      completed = false
3✔
78

79
      until completed
3✔
80
        url = build_collection_url(id:, offset:)
5✔
81
        logger.debug("Querying for the DSpace Collection at #{url}...")
5✔
82
        Retriable.retriable(on: JSON::ParserError, tries: Orangetheses::RETRY_LIMIT, on_retry: log_retries) do
5✔
83
          response = api_client.get(url)
9✔
84
          items = JSON.parse(response.body)
9✔
85
          if items.empty?
4✔
86
            completed = true
2✔
87
          else
88
            theses << flatten_json(items)
2✔
89
            offset += @rest_limit
2✔
90
          end
91
        end
92
      end
93
      theses.flatten
2✔
94
    end
95

96
    ##
97
    # Cache all collections
98
    # USED
99
    def cache_all_collections(indexer)
1✔
100
      solr_documents = []
4✔
101

102
      collections.each do |collection_id|
4✔
103
        collection_documents = cache_collection(indexer, collection_id)
2✔
104
        solr_documents += collection_documents
2✔
105
      end
106

107
      solr_documents.flatten
4✔
108
    end
109

110
    ##
111
    # Cache a single collection
112
    # USED
113
    def cache_collection(indexer, collection_id)
1✔
114
      solr_documents = []
2✔
115

116
      elements = fetch_collection(collection_id)
2✔
117
      elements.each do |attrs|
2✔
118
        solr_document = indexer.build_solr_document(**attrs)
2✔
119
        solr_documents << solr_document
2✔
120
      end
121

122
      solr_documents
2✔
123
    end
124

125
    ##
126
    # Get a json representation of all thesis collections and write it as JSON to
127
    # a cache file.
128
    # USED
129
    def self.write_all_collections_to_cache
1✔
130
      indexer = Indexer.new
1✔
131
      fetcher = Fetcher.new
1✔
132
      File.open(BibdataRs::Theses.theses_cache_path, 'w') do |f|
1✔
133
        documents = fetcher.cache_all_collections(indexer)
1✔
134
        solr_documents = documents.map(&:to_solr)
1✔
135
        json_cache = JSON.pretty_generate(solr_documents)
1✔
136
        f.puts(json_cache)
1✔
137
      end
138
    end
139

140
    ##
141
    # The DSpace id of the community we're fetching content for.
142
    # E.g., for handle '88435/dsp019c67wm88m', the DSpace id is 267
143
    # USED
144
    def api_community_id
1✔
145
      @api_community_id ||= api_community['id'].to_s
5✔
146
    end
147

148
    private
1✔
149

150
      # USED
151
      def build_collection_url(id:, offset:)
1✔
152
        BibdataRs::Theses::collection_url(@server, id.to_s, @rest_limit.to_s, offset.to_s)
5✔
153
      end
154

155
      # USED
156
      def flatten_json(items)
1✔
157
        items.collect do |i|
7✔
158
          h = {}
7✔
159
          h['id'] = i['handle'][%r{[^/]*$}]
7✔
160
          i['metadata'].each do |m|
7✔
161
            m['value'] = map_department(m['value']) if m['key'] == 'pu.department'
130✔
162
            m['value'] = map_program(m['value']) if m['key'] == 'pu.certificate'
130✔
163
            next if m['value'].nil?
130✔
164

165
            if h[m['key']].nil?
120✔
166
              h[m['key']] = [m['value']]
109✔
167
            else
168
              h[m['key']] << m['value']
11✔
169
            end
170
          end
171
          h
7✔
172
        end
173
      end
174

175
      # USED
176
      def api_client
1✔
177
        Faraday
18✔
178
      end
179

180
      # USED
181
      def api_communities
1✔
182
        @api_communities ||= begin
5✔
183
          BibdataRs::Theses.api_communities_json(@server)
5✔
184
          response = api_client.get("#{@server}/communities/")
5✔
185
          response.body
5✔
186
        rescue StandardError => e
UNCOV
187
          Faraday.logger.warn(e)
×
UNCOV
188
          '[]'
×
189
        end
190
      end
191

192
      # USED
193
      def json_api_communities
1✔
194
        @json_api_communities ||= JSON.parse(api_communities)
10✔
195
      end
196

197
      ##
198
      # Parse the JSON feed containing all of the communities, and return only the
199
      # community that matches the handle.
200
      # @return [JSON] a json representation of the DSpace community
201
      # USED
202
      def api_community
1✔
203
        return if json_api_communities.empty?
5✔
204

205
        @api_community ||= json_api_communities.find { |c| c['handle'] == @community }
106✔
206
      end
207

208
      ##
209
      # Get all of the collections for a given community
210
      # USED
211
      def api_collections
1✔
212
        @api_collections ||= begin
4✔
213
          collections_url = "#{@server}/communities/#{api_community_id}/collections"
4✔
214
          logger.info("Querying #{collections_url} for the collections...")
4✔
215
          response = api_client.get(collections_url)
4✔
216
          response.body
4✔
217
        end
218
      end
219

220
      ##
221
      # All of the collections for a given community, parsed as JSON
222
      # USED
223
      def api_collections_json
1✔
224
        @api_collections_json ||= JSON.parse(api_collections)
4✔
225
      end
226

227
      # example to debug using a specific collection id.
228
      # @collections ||= api_collections_json.map { |i| i['id'] = '2666' }
229
      # https://dataspace-dev.princeton.edu/rest/collections/2666/items
230
      # USED
231
      def collections
1✔
232
        @collections ||= api_collections_json.map { |i| i['id'] }
6✔
233
      end
234

235
      # USED
236
      def map_department(dept)
1✔
237
        BibdataRs::Theses.map_department dept
14✔
238
      end
239

240
      # USED
241
      def map_program(program)
1✔
242
        BibdataRs::Theses.map_program program
12✔
243
      end
244
  end
245
end
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2025 Coveralls, Inc