• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

pulibrary / bibdata / 4f02f51c-97db-4b81-9973-dee0f54d13ea

10 May 2025 10:51PM UTC coverage: 92.3% (-0.07%) from 92.368%
4f02f51c-97db-4b81-9973-dee0f54d13ea

push

circleci

sandbergja
cleanup

3584 of 3883 relevant lines covered (92.3%)

379.33 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

95.79
/lib/bibdata_rs/theses/fetcher.rb
1
# frozen_string_literal: true
2

3
require 'faraday'
1✔
4
require 'json'
1✔
5
require 'tmpdir'
1✔
6
require 'openssl'
1✔
7
require 'retriable'
1✔
8
require 'logger'
1✔
9

10
# Do not fail if SSL negotiation with DSpace isn't working
11
OpenSSL::SSL::VERIFY_PEER = OpenSSL::SSL::VERIFY_NONE
1✔
12

13
module BibdataRs::Theses
1✔
14
  class Fetcher
1✔
15
    attr_writer :logger
1✔
16

17
    # leave in Ruby, since the Rails thing is so convenient
18
    def self.env_config
1✔
19
      Rails.application.config_for Rails.root.join('config/dspace.yml'), env: BibdataRs::Theses::rails_env
18✔
20
    end
21

22
    # leave in Ruby (if needed), since the config file is tricky in rust since it contains a variety of data types
23
    def self.default_server
1✔
24
      env_config['server']
6✔
25
    end
26

27
    # leave in Ruby (if needed), since the config file is tricky in rust since it contains a variety of data types
28
    def self.default_community
1✔
29
      env_config['community']
6✔
30
    end
31

32
    # leave in Ruby (if needed), since the config file is tricky in rust since it contains a variety of data types
33
    def self.default_rest_limit
1✔
34
      env_config['rest_limit']
6✔
35
    end
36

37
    # @param [Hash] opts  options to pass to the client
38
    # @option opts [String] :server ('https://dataspace.princeton.edu/rest/')
39
    # @option opts [String] :community ('88435/dsp019c67wm88m')
40
    # leave in ruby for now
41
    def initialize(server: nil, community: nil, rest_limit: nil)
1✔
42
      @server = server || self.class.default_server
6✔
43
      @community = community || self.class.default_community
6✔
44

45
      @rest_limit = rest_limit || self.class.default_rest_limit
6✔
46
    end
47

48
    # leave in ruby for now
49
    # USED
50
    def logger
1✔
51
      @logger ||= begin
14✔
52
        built = Logger.new($stdout)
4✔
53
        built.level = Logger::DEBUG
4✔
54
        built
4✔
55
      end
56
    end
57

58
    ##
59
    # Write to the log anytime an API call fails and we have to retry.
60
    # See https://github.com/kamui/retriable#callbacks for more information.
61
    # leave in ruby right now, since I'm not sure how to return a proc in Magnus
62
    # USED
63
    def log_retries
1✔
64
      proc do |exception, try, elapsed_time, next_interval|
5✔
65
        logger.debug "#{exception.class}: '#{exception.message}' - #{try} tries in #{elapsed_time} seconds and #{next_interval} seconds until the next try."
5✔
66
      end
67
    end
68

69
    ##
70
    # @param id [String] thesis collection id
71
    # @return [Array<Hash>] metadata hash for each record
72
    # Rewrite in Rust, but rewrite flatten_json first?  Or does it make sense to do them separately???
73
    # USED
74
    def fetch_collection(id)
1✔
75
      theses = []
3✔
76
      offset = 0
3✔
77
      completed = false
3✔
78

79
      until completed
3✔
80
        url = BibdataRs::Theses::collection_url(@server, id.to_s, @rest_limit.to_s, offset.to_s)
5✔
81
        logger.debug("Querying for the DSpace Collection at #{url}...")
5✔
82
        Retriable.retriable(on: JSON::ParserError, tries: Orangetheses::RETRY_LIMIT, on_retry: log_retries) do
5✔
83
          response = api_client.get(url)
9✔
84
          items = JSON.parse(response.body)
9✔
85

86
          if items.empty?
4✔
87
            completed = true
2✔
88
          else
89
            theses << items
2✔
90
            offset += @rest_limit
2✔
91
          end
92
        end
93
      end
94
      theses.flatten
2✔
95
    end
96

97
    ##
98
    # Cache all collections
99
    # USED
100
    def cache_all_collections()
1✔
101
      solr_documents = []
4✔
102

103
      collections.each do |collection_id|
4✔
104
        collection_documents = cache_collection(collection_id)
2✔
105
        solr_documents += collection_documents
2✔
106
      end
107

108
      solr_documents.flatten
4✔
109
    end
110

111
    ##
112
    # Cache a single collection
113
    # USED
114
    def cache_collection(collection_id)
1✔
115
      solr_documents = []
2✔
116

117
      elements = fetch_collection(collection_id)
2✔
118
      elements.each do |attrs|
2✔
119
        solr_document = JSON.parse(BibdataRs::Theses.ruby_json_to_solr_json(attrs.to_json))
2✔
120
        solr_documents << solr_document
2✔
121
      end
122

123
      solr_documents
2✔
124
    end
125

126
    ##
127
    # Get a json representation of all thesis collections and write it as JSON to
128
    # a cache file.
129
    # USED
130
    def self.write_all_collections_to_cache
1✔
131
      fetcher = Fetcher.new
1✔
132
      File.open(BibdataRs::Theses.theses_cache_path, 'w') do |f|
1✔
133
        solr_documents = fetcher.cache_all_collections
1✔
134
        json_cache = JSON.pretty_generate(solr_documents)
1✔
135
        f.puts(json_cache)
1✔
136
      end
137
    end
138

139
    ##
140
    # The DSpace id of the community we're fetching content for.
141
    # E.g., for handle '88435/dsp019c67wm88m', the DSpace id is 267
142
    # USED
143
    def api_community_id
1✔
144
      @api_community_id ||= api_community['id'].to_s
5✔
145
    end
146

147
    private
1✔
148

149

150
      # USED
151
      def api_client
1✔
152
        Faraday
18✔
153
      end
154

155
      # USED
156
      def api_communities
1✔
157
        @api_communities ||= begin
5✔
158
          BibdataRs::Theses.api_communities_json(@server)
5✔
159
          response = api_client.get("#{@server}/communities/")
5✔
160
          response.body
5✔
161
        rescue StandardError => e
162
          Faraday.logger.warn(e)
×
163
          '[]'
×
164
        end
165
      end
166

167
      # USED
168
      def json_api_communities
1✔
169
        @json_api_communities ||= JSON.parse(api_communities)
10✔
170
      end
171

172
      ##
173
      # Parse the JSON feed containing all of the communities, and return only the
174
      # community that matches the handle.
175
      # @return [JSON] a json representation of the DSpace community
176
      # USED
177
      def api_community
1✔
178
        return if json_api_communities.empty?
5✔
179

180
        @api_community ||= json_api_communities.find { |c| c['handle'] == @community }
106✔
181
      end
182

183
      ##
184
      # Get all of the collections for a given community
185
      # USED
186
      def api_collections
1✔
187
        @api_collections ||= begin
4✔
188
          collections_url = "#{@server}/communities/#{api_community_id}/collections"
4✔
189
          logger.info("Querying #{collections_url} for the collections...")
4✔
190
          response = api_client.get(collections_url)
4✔
191
          response.body
4✔
192
        end
193
      end
194

195
      ##
196
      # All of the collections for a given community, parsed as JSON
197
      # USED
198
      def api_collections_json
1✔
199
        @api_collections_json ||= JSON.parse(api_collections)
4✔
200
      end
201

202
      # example to debug using a specific collection id.
203
      # @collections ||= api_collections_json.map { |i| i['id'] = '2666' }
204
      # https://dataspace-dev.princeton.edu/rest/collections/2666/items
205
      # USED
206
      def collections
1✔
207
        @collections ||= api_collections_json.map { |i| i['id'] }
6✔
208
      end
209

210
      # USED
211
      def map_department(dept)
1✔
212
        BibdataRs::Theses.map_department dept
×
213
      end
214

215
      # USED
216
      def map_program(program)
1✔
217
        BibdataRs::Theses.map_program program
×
218
      end
219
  end
220
end
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2025 Coveralls, Inc