• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

pulibrary / bibdata / 97cbe92d-c54f-45de-a4c8-e506299fcc4d

21 Dec 2023 09:42PM UTC coverage: 90.834% (-0.5%) from 91.287%
97cbe92d-c54f-45de-a4c8-e506299fcc4d

Pull #2284

circleci

sandbergja
Upgrade old gem dependencies

Removes the rerun gem, since it is no longer used.  Also, remove some leftover files from the spring gem.
Pull Request #2284: Upgrade old gem dependencies

3409 of 3753 relevant lines covered (90.83%)

334.45 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

96.36
/marc_to_solr/lib/cache_map.rb
1
require 'faraday'
1✔
2
require 'active_support/core_ext/string'
1✔
3

4
# Cached mapping of ARKs to Bib IDs
5
# Retrieves and stores paginated Solr responses containing the ARK's and BibID's
6
class CacheMap
1✔
7
  def self.cache_key_for(ark:)
1✔
8
    ark.gsub(/[\:\/]/, '_')
30✔
9
  end
10

11
  # Constructor
12
  # @param cache [ActiveSupport::Cache::Store, CacheAdapter] Low-level cache
13
  # @param host [String] the host for the Blacklight endpoint
14
  # @param path [String] the path for the Blacklight endpoint
15
  # @param rows [Integer] the number of rows for each Solr response
16
  # @param logger [IO] the logging device
17
  def initialize(cache:, host:, path: '/catalog.json', rows: 1000000, logger: STDOUT)
1✔
18
    @cache = cache
3✔
19
    @host = host
3✔
20
    @path = path
3✔
21
    @rows = rows
3✔
22
    @logger = logger
3✔
23
  end
24

25
  # Seed the cache
26
  # @param page [Integer] the page number at which to start the caching
27
  def seed!(page: 1)
1✔
28
    @logger.info "Seeding the cache for #{@host} using Solr..."
3✔
29
    # Determine if the values from the Solr response have been cached
30
    @cached_values = @cache.fetch(cache_key)
3✔
31
    return if page == 1 && !@cached_values.nil?
3✔
32

33
    response = query(page:)
3✔
34
    if response.empty?
3✔
35
      @logger.warn "No response could be retrieved from Solr for #{@host}"
1✔
36
      return
1✔
37
    end
38

39
    pages = response.fetch('pages')
2✔
40

41
    cache_page(response)
2✔
42

43
    # Recurse if there are more pages to cache
44
    if pages.fetch('last_page?') == false
2✔
45
      seed!(page: page + 1)
×
46
    else
47
      # Otherwise, mark within the cache that a thread has populated all of the ARK/BibID pairs
48
      @cache.write(cache_key, cache_key)
2✔
49
    end
50
  end
51

52
  # Fetch a BibID from the cache
53
  # @param ark [String] the ARK mapped to the BibID
54
  # @return [String, nil] the BibID (or nil if it has not been mapped)
55
  def fetch(ark)
1✔
56
    # Attempt to retrieve this from the cache
57
    value = @cache.fetch(self.class.cache_key_for(ark:))
28✔
58

59
    if value.nil?
28✔
60
      @logger.warn "Failed to resolve #{ark}" if URI::ARK.princeton_ark?(url: ark)
28✔
61
    else
62
      @logger.debug "Resolved #{ark} for #{value}"
×
63
    end
64
    value
28✔
65
  end
66

67
  private
1✔
68

69
    # Cache a page
70
    # @param page [Hash] Solr response page
71
    def cache_page(page)
1✔
72
      docs = page.fetch('docs')
2✔
73
      docs.each do |doc|
2✔
74
        arks = doc.fetch('identifier_ssim', [])
2✔
75
        bib_ids = doc.fetch('source_metadata_identifier_ssim', [])
2✔
76
        id = doc.fetch('id')
2✔
77
        # Grab the human readable type
78
        resource_types = doc.fetch('internal_resource_ssim', nil) || doc.fetch('has_model_ssim', nil)
2✔
79
        resource_type = resource_types.first
2✔
80

81
        ark = arks.first
2✔
82
        bib_id = bib_ids.first
2✔
83

84
        # Write this to the file cache
85
        key_for_ark = self.class.cache_key_for(ark:)
2✔
86
        # Handle collisions by refusing to overwrite the first value
87
        unless @cache.exist?(key_for_ark)
2✔
88
          @cache.write(key_for_ark, id:, source_metadata_identifier: bib_id, internal_resource: resource_type)
2✔
89
          @logger.debug "Cached the mapping for #{ark} to #{bib_id}"
2✔
90
        end
91
      end
92
    end
93

94
    # Query the service using the endpoint
95
    # @param [Integer] the page parameter for the query
96
    def query(page: 1)
1✔
97
      begin
98
        url = URI::HTTPS.build(host: @host, path: @path, query: "q=&rows=#{@rows}&page=#{page}&f[identifier_tesim][]=ark")
3✔
99
        http_response = Faraday.get(url)
3✔
100
        values = JSON.parse(http_response.body)
3✔
101
        values.fetch('response')
2✔
102
      rescue StandardError => err
1✔
103
        @logger.error "Failed to seed the ARK cached from Solr: #{err}"
1✔
104
        {}
1✔
105
      end
106
    end
107

108
    # Generate the unique key for the cache from the hostname and path for Solr
109
    # @return [String] the cache key
110
    def cache_key
1✔
111
      [@host.gsub(/[\.\/]/, '_'), @path.gsub(/[\.\/]/, '_')].join('_')
7✔
112
    end
113
end
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2025 Coveralls, Inc