• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

pulibrary / orangetheses / a3ee308b-19e9-4517-bc97-af78629fea7a

09 Oct 2024 03:04PM UTC coverage: 83.356% (-4.0%) from 87.326%
a3ee308b-19e9-4517-bc97-af78629fea7a

Pull #86

circleci

jrgriffiniii
wip
Pull Request #86: Restructuring the generation of access restriction text during indexing and implementing a Rake Task for indexing OAI Items using a given Set ID

4 of 23 new or added lines in 3 files covered. (17.39%)

19 existing lines in 1 file now uncovered.

611 of 733 relevant lines covered (83.36%)

16.61 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

80.99
/lib/orangetheses/fetcher.rb
1
# frozen_string_literal: true
2

3
require 'faraday'
1✔
4
require 'json'
1✔
5
require 'tmpdir'
1✔
6
require 'openssl'
1✔
7
require 'retriable'
1✔
8
require 'logger'
1✔
9
require 'yaml'
1✔
10
require 'erb'
1✔
11

12
# Do not fail if SSL negotiation with DSpace isn't working
13
OpenSSL::SSL::VERIFY_PEER = OpenSSL::SSL::VERIFY_NONE
1✔
14

15
module Orangetheses
1✔
16
  class Fetcher
1✔
17
    attr_writer :logger
1✔
18

19
    def self.config_file_path
1✔
20
      File.join(File.dirname(__FILE__), '..', '..', 'config', 'dspace.yml')
39✔
21
    end
22

23
    def self.config_file
1✔
24
      IO.read(config_file_path)
39✔
25
    end
26

27
    def self.config_erb
1✔
28
      ERB.new(config_file).result(binding)
39✔
29
    rescue StandardError, SyntaxError => e
30
      raise("#{config_file} was found, but could not be parsed with ERB. \n#{e.inspect}")
×
31
    end
32

33
    def self.config_yaml
1✔
34
      YAML.safe_load(config_erb, aliases: true)
39✔
35
    end
36

37
    def self.env
1✔
38
      ENV['ORANGETHESES_ENV'] || 'development'
39✔
39
    end
40

41
    def self.env_config
1✔
42
      config_yaml[env]
39✔
43
    end
44

45
    def self.default_server
1✔
46
      env_config['server']
13✔
47
    end
48

49
    def self.default_community
1✔
50
      env_config['community']
13✔
51
    end
52

53
    def self.default_rest_limit
1✔
54
      env_config['rest_limit']
13✔
55
    end
56

57
    # @param [Hash] opts  options to pass to the client
58
    # @option opts [String] :server ('https://dataspace.princeton.edu/rest/')
59
    # @option opts [String] :community ('88435/dsp019c67wm88m')
60
    def initialize(server: nil, community: nil, rest_limit: nil)
1✔
61
      @server = server || self.class.default_server
13✔
62
      @community = community || self.class.default_community
13✔
63

64
      @rest_limit = rest_limit || self.class.default_rest_limit
13✔
65
    end
66

67
    def logger
1✔
68
      @logger ||= begin
2✔
69
        built = Logger.new($stdout)
1✔
70
        built.level = Logger::DEBUG
1✔
71
        built
1✔
72
      end
73
    end
74

75
    ##
76
    # Where files get cached for later indexing
77
    def json_file_path
1✔
78
      @json_file_path ||= ENV['FILEPATH'] || '/tmp/theses.json'
3✔
79
    end
80

81
    ##
82
    # Write to the log anytime an API call fails and we have to retry.
83
    # See https://github.com/kamui/retriable#callbacks for more information.
84
    def log_retries
1✔
85
      proc do |exception, try, elapsed_time, next_interval|
2✔
UNCOV
86
        logger.debug "#{exception.class}: '#{exception.message}' - #{try} tries in #{elapsed_time} seconds and #{next_interval} seconds until the next try."
×
87
      end
88
    end
89

90
    ##
91
    # @param id [String] thesis collection id
92
    # @return [Array<Hash>] metadata hash for each record
93
    def fetch_collection(id)
1✔
94
      theses = []
2✔
95
      offset = 0
2✔
96
      completed = false
2✔
97

98
      until completed
2✔
99
        url = build_collection_url(id:, offset:)
2✔
100
        logger.debug("Querying for the DSpace Collection at #{url}...")
2✔
101
        Retriable.retriable(on: JSON::ParserError, tries: Orangetheses::RETRY_LIMIT, on_retry: log_retries) do
2✔
102
          response = api_client.get(url)
2✔
UNCOV
103
          items = JSON.parse(response.body)
×
UNCOV
104
          if items.empty?
×
UNCOV
105
            completed = true
×
106
          else
UNCOV
107
            theses << flatten_json(items)
×
UNCOV
108
            offset += @rest_limit
×
109
          end
110
        end
111
      end
UNCOV
112
      theses.flatten
×
113
    end
114

115
    def index_collection(indexer, id)
1✔
116
      fetched = fetch_collection(id)
×
117
      fetched.each do |record|
×
118
        indexer.index_hash(record)
×
119
      end
120
    end
121

122
    def index_all_collections(indexer)
1✔
123
      collections.each do |c|
×
124
        index_collection(indexer, c)
×
125
      end
126
    end
127

128
    ##
129
    # Cache all collections
130
    def cache_all_collections(indexer)
1✔
131
      solr_documents = []
4✔
132

133
      collections.each do |collection_id|
4✔
UNCOV
134
        collection_documents = cache_collection(indexer, collection_id)
×
UNCOV
135
        solr_documents += collection_documents
×
136
      end
137

138
      solr_documents.flatten
1✔
139
    end
140

141
    ##
142
    # Cache a single collection
143
    def cache_collection(indexer, collection_id)
1✔
144
      solr_documents = []
1✔
145

146
      elements = fetch_collection(collection_id)
1✔
UNCOV
147
      elements.each do |attrs|
×
UNCOV
148
        solr_document = indexer.build_solr_document(**attrs)
×
UNCOV
149
        solr_documents << solr_document
×
150
      end
151

UNCOV
152
      solr_documents
×
153
    end
154

155
    ##
156
    # Get a json representation of a single collection and write it as JSON to
157
    # a cache file.
158
    def self.write_collection_to_cache(collection_id)
1✔
159
      indexer = Indexer.new
1✔
160
      fetcher = Fetcher.new
1✔
161
      File.open(fetcher.json_file_path, 'w') do |f|
1✔
162
        documents = fetcher.cache_collection(indexer, collection_id)
1✔
UNCOV
163
        solr_documents = documents.map(&:to_solr)
×
UNCOV
164
        json_cache = JSON.pretty_generate(solr_documents)
×
UNCOV
165
        f.puts(json_cache)
×
166
      end
167
    end
168

169
    ##
170
    # Get a json representation of all thesis collections and write it as JSON to
171
    # a cache file.
172
    def self.write_all_collections_to_cache
1✔
173
      indexer = Indexer.new
1✔
174
      fetcher = Fetcher.new
1✔
175
      File.open(fetcher.json_file_path, 'w') do |f|
1✔
176
        documents = fetcher.cache_all_collections(indexer)
1✔
UNCOV
177
        solr_documents = documents.map(&:to_solr)
×
UNCOV
178
        json_cache = JSON.pretty_generate(solr_documents)
×
UNCOV
179
        f.puts(json_cache)
×
180
      end
181
    end
182

183
    ##
184
    # The DSpace id of the community we're fetching content for.
185
    # E.g., for handle '88435/dsp019c67wm88m', the DSpace id is 267
186
    def api_community_id
1✔
187
      @api_community_id ||= api_community['id'].to_s
5✔
188
    end
189

190
    private
1✔
191

192
    def build_collection_url(id:, offset:)
1✔
193
      "#{@server}/collections/#{id}/items?limit=#{@rest_limit}&offset=#{offset}&expand=metadata"
2✔
194
    end
195

196
    def flatten_json(items)
1✔
197
      items.collect do |i|
5✔
198
        h = {}
5✔
199
        h['id'] = i['handle'][%r{[^/]*$}]
5✔
200
        i['metadata'].each do |m|
5✔
201
          m['value'] = map_department(m['value']) if m['key'] == 'pu.department'
100✔
202
          m['value'] = map_program(m['value']) if m['key'] == 'pu.certificate'
100✔
203
          next if m['value'].nil?
100✔
204

205
          if h[m['key']].nil?
90✔
206
            h[m['key']] = [m['value']]
85✔
207
          else
208
            h[m['key']] << m['value']
5✔
209
          end
210
        end
211
        h
5✔
212
      end
213
    end
214

215
    def api_client
1✔
216
      Faraday
11✔
217
    end
218

219
    def api_communities
1✔
220
      @api_communities ||= begin
5✔
221
        response = api_client.get("#{@server}/communities/")
5✔
222
        response.body
1✔
223
      rescue StandardError => e
224
        Faraday.logger.warn(e)
×
225
        '[]'
×
226
      end
227
    end
228

229
    def json_api_communities
1✔
230
      @json_api_communities ||= JSON.parse(api_communities)
6✔
231
    end
232

233
    ##
234
    # Parse the JSON feed containing all of the communities, and return only the
235
    # community that matches the handle.
236
    # @return [JSON] a json representation of the DSpace community
237
    def api_community
1✔
238
      return if json_api_communities.empty?
5✔
239

240
      @api_community ||= json_api_communities.find { |c| c['handle'] == @community }
2✔
241
    end
242

243
    ##
244
    # Get all of the collections for a given community
245
    def api_collections
1✔
246
      @api_collections ||= begin
4✔
247
        response = api_client.get("#{@server}/communities/#{api_community_id}/collections")
4✔
248
        response.body
1✔
249
      end
250
    end
251

252
    ##
253
    # All of the collections for a given community, parsed as JSON
254
    def api_collections_json
1✔
255
      @api_collections_json ||= JSON.parse(api_collections)
4✔
256
    end
257

258
    def collections
1✔
259
      @collections ||= api_collections_json.map { |i| i['id'] }
4✔
260
    end
261

262
    def map_department(dept)
1✔
263
      lc_authorized_departments[dept]
10✔
264
    end
265

266
    def map_program(program)
1✔
267
      lc_authorized_programs[program]
10✔
268
    end
269

270
    def lc_authorized_departments
1✔
271
      {
10✔
272
        'African American Studies' => 'Princeton University. Department of African American Studies',
273
        'Art and Archaeology' => 'Princeton University. Department of Art and Archaeology',
274
        'Aeronautical Engineering' => 'Princeton University. Department of Aeronautical Engineering',
275
        'Anthropology' => 'Princeton University. Department of Anthropology',
276
        'Architecture School' => 'Princeton University. School of Architecture',
277
        'Astrophysical Sciences' => 'Princeton University. Department of Astrophysical Sciences',
278
        'Biochemical Sciences' => 'Princeton University. Department of Biochemical Sciences',
279
        'Biology' => 'Princeton University. Department of Biology',
280
        'Civil and Environmental Engineering' => 'Princeton University. Department of Civil and Environmental Engineering',
281
        'Civil Engineering and Operations Research' => 'Princeton University. Department of Civil Engineering and Operations Research',
282
        'Chemical and Biological Engineering' => 'Princeton University. Department of Chemical and Biological Engineering',
283
        'Chemistry' => 'Princeton University. Department of Chemistry',
284
        'Classics' => 'Princeton University. Department of Classics',
285
        'Comparative Literature' => 'Princeton University. Department of Comparative Literature',
286
        'Computer Science' => 'Princeton University. Department of Computer Science',
287
        'East Asian Studies' => 'Princeton University. Department of East Asian Studies',
288
        'Economics' => 'Princeton University. Department of Economics',
289
        'Ecology and Evolutionary Biology' => 'Princeton University. Department of Ecology and Evolutionary Biology',
290
        'Electrical Engineering' => 'Princeton University. Department of Electrical Engineering',
291
        'Engineering and Applied Science' => 'Princeton University. School of Engineering and Applied Science',
292
        'English' => 'Princeton University. Department of English',
293
        'French and Italian' => 'Princeton University. Department of French and Italian',
294
        'Geosciences' => 'Princeton University. Department of Geosciences',
295
        'German' => 'Princeton University. Department of Germanic Languages and Literatures',
296
        'History' => 'Princeton University. Department of History',
297
        'Special Program in Humanities' => 'Princeton University. Special Program in the Humanities',
298
        'Independent Concentration' => 'Princeton University Independent Concentration Program',
299
        'Mathematics' => 'Princeton University. Department of Mathematics',
300
        'Molecular Biology' => 'Princeton University. Department of Molecular Biology',
301
        'Mechanical and Aerospace Engineering' => 'Princeton University. Department of Mechanical and Aerospace Engineering',
302
        'Medieval Studies' => 'Princeton University. Program in Medieval Studies',
303
        'Modern Languages' => 'Princeton University. Department of Modern Languages.',
304
        'Music' => 'Princeton University. Department of Music',
305
        'Near Eastern Studies' => 'Princeton University. Department of Near Eastern Studies',
306
        'Neuroscience' => 'Princeton Neuroscience Institute',
307
        'Operations Research and Financial Engineering' => 'Princeton University. Department of Operations Research and Financial Engineering',
308
        'Oriental Studies' => 'Princeton University. Department of Oriental Studies',
309
        'Philosophy' => 'Princeton University. Department of Philosophy',
310
        'Physics' => 'Princeton University. Department of Physics',
311
        'Politics' => 'Princeton University. Department of Politics',
312
        'Psychology' => 'Princeton University. Department of Psychology',
313
        'Religion' => 'Princeton University. Department of Religion',
314
        'Romance Languages and Literatures' => 'Princeton University. Department of Romance Languages and Literatures',
315
        'Slavic Languages and Literature' => 'Princeton University. Department of Slavic Languages and Literatures',
316
        'Sociology' => 'Princeton University. Department of Sociology',
317
        'Spanish and Portuguese' => 'Princeton University. Department of Spanish and Portuguese Languages and Cultures',
318
        'Spanish and Portuguese Languages and Cultures' => 'Princeton University. Department of Spanish and Portuguese Languages and Cultures',
319
        'Statistics' => 'Princeton University. Department of Statistics',
320
        'School of Public and International Affairs' => 'School of Public and International Affairs'
321
      }
322
    end
323

324
    def lc_authorized_programs
1✔
325
      {
10✔
326
        'African American Studies Program' => 'Princeton University. Program in African-American Studies',
327
        'African Studies Program' => 'Princeton University. Program in African Studies',
328
        'American Studies Program' => 'Princeton University. Program in American Studies',
329
        'Applications of Computing Program' => 'Princeton University. Program in Applications of Computing',
330
        'Architecture and Engineering Program' => 'Princeton University. Program in Architecture and Engineering',
331
        'Center for Statistics and Machine Learning' => 'Princeton University. Center for Statistics and Machine Learning',
332
        'Creative Writing Program' => 'Princeton University. Creative Writing Program',
333
        'East Asian Studies Program' => 'Princeton University. Program in East Asian Studies',
334
        'Engineering Biology Program' => 'Princeton University. Program in Engineering Biology',
335
        'Engineering and Management Systems Program' => 'Princeton University. Program in Engineering and Management Systems',
336
        'Environmental Studies Program' => 'Princeton University. Program in Environmental Studies',
337
        'Ethnographic Studies Program' => 'Princeton University. Program in Ethnographic Studies',
338
        'European Cultural Studies Program' => 'Princeton University. Program in European Cultural Studies',
339
        'Finance Program' => 'Princeton University. Program in Finance',
340
        'Geological Engineering Program' => 'Princeton University. Program in Geological Engineering',
341
        'Global Health and Health Policy Program' => 'Princeton University. Program in Global Health and Health Policy',
342
        'Hellenic Studies Program' => 'Princeton University. Program in Hellenic Studies',
343
        'Humanities Council and Humanistic Studies Program' => 'Princeton University. Program in Humanistic Studies',
344
        'Judaic Studies Program' => 'Princeton University. Program in Judaic Studies',
345
        'Latin American Studies Program' => 'Princeton University. Program in Latin American Studies',
346
        'Latino Studies Program' => 'Princeton University. Program in Latino Studies',
347
        'Linguistics Program' => 'Princeton University. Program in Linguistics',
348
        'Materials Science and Engineering Program' => 'Princeton University. Program in Materials Science and Engineering',
349
        'Medieval Studies Program' => 'Princeton University. Program in Medieval Studies',
350
        'Near Eastern Studies Program' => 'Princeton University. Program in Near Eastern Studies',
351
        'Neuroscience Program' => 'Princeton University. Program in Neuroscience',
352
        'Program in Cognitive Science' => 'Princeton University. Program in Cognitive Science',
353
        'Program in Entrepreneurship' => 'Princeton University. Program in Entrepreneurship',
354
        'Program in Gender and Sexuality Studies' => 'Princeton University. Program in Gender and Sexuality Studies',
355
        'Program in Music Theater' => 'Princeton University. Program in Music Theater',
356
        'Program in Technology & Society, Technology Track' => 'Princeton University. Program in Technology and Society',
357
        'Program in Values and Public Life' => 'Princeton University. Program in Values and Public Life',
358
        'Quantitative and Computational Biology Program' => 'Princeton University. Program in Quantitative and Computational Biology',
359
        'Robotics & Intelligent Systems Program' => 'Princeton University. Program in Robotics and Intelligent Systems',
360
        'Russian & Eurasian Studies Program' => 'Princeton University. Program in Russian, East European and Eurasian Studies',
361
        'South Asian Studies Program' => 'Princeton University. Program in South Asian Studies',
362
        'Theater' => 'Princeton University. Program in Theater',
363
        'Theater Program' => 'Princeton University. Program in Theater',
364
        'Sustainable Energy Program' => 'Princeton University. Program in Sustainable Energy',
365
        'Urban Studies Program' => 'Princeton University. Program in Urban Studies'
366
      }
367
    end
368
  end
369
end
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2025 Coveralls, Inc