• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

pulibrary / pdc_discovery / 79cc0f1f-db9d-4052-80b2-03da9188224f

08 Nov 2024 05:55PM UTC coverage: 96.43% (+0.2%) from 96.253%
79cc0f1f-db9d-4052-80b2-03da9188224f

Pull #714

circleci

jrgriffiniii
Addressing the failing tests for DSpace and DescribeIndexer
Pull Request #714: Improving test consistency for DSpace and DescribeIndexer

53 of 59 new or added lines in 6 files covered. (89.83%)

5 existing lines in 2 files now uncovered.

3593 of 3726 relevant lines covered (96.43%)

289.17 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

93.44
/app/lib/describe_indexer.rb
1
# frozen_string_literal: true
2

3
require 'faraday_middleware'
1✔
4
require 'traject'
1✔
5
require 'open-uri'
1✔
6

7
##
8
# Fetch an RSS feed of approved works from PDC Describe. For each work, index a PDC Describe JSON resource to solr.
9
class DescribeIndexer
1✔
10
  ##
11
  # See config/pdc_discovery.yml for configuration of the RSS feed that
12
  # this indexer uses to harvest data from PDC Describe.
13
  # @param [String] rss_url
14
  def initialize(rss_url: Rails.configuration.pdc_discovery.pdc_describe_rss)
1✔
15
    @rss_url = rss_url
55✔
16
  end
17

18
  ##
19
  # Load the traject indexing config for PDC Describe JSON resources
20
  def traject_indexer
1✔
21
    Traject::Indexer::NokogiriIndexer.new.tap do |i|
860✔
22
      i.load_config_file(datacite_indexing_config_path)
860✔
23
    end
24
  end
25

26
  def datacite_indexing_config_path
1✔
27
    pathname = ::Rails.root.join('config', 'traject', "pdc_describe_indexing_config.rb")
860✔
28
    pathname.to_s
860✔
29
  end
30

31
  ##
32
  # Only index if Rails.configuration.pdc_discovery.index_pdc_describe == true
33
  # See config/pdc_discovery.yml to change this setting for a given environment.
34
  def index
1✔
35
    if Rails.configuration.pdc_discovery.index_pdc_describe == true
28✔
36
      perform_indexing
27✔
37
    else
38
      Rails.logger.warn "PDC Describe indexing is not turned on for this environment. See config/pdc_discovery.yml"
1✔
39
    end
40
  end
41

42
  # Given a json document, return an XML string that contains
43
  # the JSON blob as a CDATA element
44
  # @param [String] json
45
  # @return [String]
46
  def prep_for_indexing(json)
1✔
47
    parsed = JSON.parse(json)
814✔
48
    xml = parsed.to_xml
814✔
49
    doc = Nokogiri::XML(xml)
814✔
50
    collection_node = doc.at('group')
814✔
51
    cdata = Nokogiri::XML::CDATA.new(doc, json)
814✔
52
    collection_node.add_next_sibling("<pdc_describe_json></pdc_describe_json>")
814✔
53
    pdc_describe_json_node = doc.at('pdc_describe_json')
814✔
54
    pdc_describe_json_node.add_child(cdata)
814✔
55
    doc.to_s
814✔
56
  end
57

58
  def index_one(json)
1✔
59
    resource_xml = prep_for_indexing(json)
45✔
60
    traject_indexer.process(resource_xml)
45✔
61
    traject_indexer.complete
45✔
62
  end
63

64
  def client
1✔
65
    @client ||= Blacklight.default_index.connection
12✔
66
  end
67

68
  def delete!(query:)
1✔
69
    # solr.delete_by_query 'price:1.00'
70
    client.delete_by_query(query)
3✔
71
    client.commit
3✔
72
    client.optimize
3✔
73
    client
3✔
74
  end
75

76
private
1✔
77

78
  def rss_http_response
1✔
79
    URI.open(@rss_url)
27✔
80
  end
81

82
  def rss_xml_doc
1✔
83
    Nokogiri::XML(rss_http_response)
27✔
84
  end
85

86
  def rss_url_nodes
1✔
87
    rss_xml_doc.xpath("//item/url/text()")
27✔
88
  end
89

90
  def rss_url_list
1✔
91
    rss_url_nodes.map(&:to_s)
27✔
92
  end
93

94
  ##
95
  # Parse the rss_url, get a JSON resource url for each item, convert it to XML, and pass it to traject
96
  def perform_indexing
1✔
97
    urls_to_retry = []
27✔
98
    rss_url_list.each do |url|
27✔
99
      process_url(url)
769✔
100
    rescue
UNCOV
101
      urls_to_retry << url
×
102
    end
103

104
    # retry an errored urls a second time and send error only if they don't work a second time
105
    urls_to_retry.each do |url|
27✔
UNCOV
106
      process_url(url)
×
107
    rescue => ex
108
      Rails.logger.warn "Error importing record from #{url}. Exception: #{ex.message}"
×
UNCOV
109
      Honeybadger.notify "Error importing record from #{url}. Exception: #{ex.message}"
×
110
    end
111
  end
112

113
  def process_url(url)
1✔
114
    uri = URI.open(url, open_timeout: 30, read_timeout: 30)
769✔
115
    resource_json = uri.read
769✔
116
    resource_xml = prep_for_indexing(resource_json)
769✔
117
    traject_indexer.process(resource_xml)
769✔
118
    Rails.logger.info "Successfully imported record from #{url}."
769✔
119
  end
120
end
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2025 Coveralls, Inc