• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

pulibrary / pdc_discovery / 5c0f5da8-b4e1-4e69-9a5c-83b9a0f043ba

01 Mar 2024 01:12PM UTC coverage: 96.969% (+0.03%) from 96.943%
5c0f5da8-b4e1-4e69-9a5c-83b9a0f043ba

Pull #574

circleci

carolyncole
Allowing sort by year and first author
The sort field for year was not defined by the PDC indexer, which caused the sort to be random
The Author sort was the sorting by the alpabetically first of any of the authors, which made the sort seem random
fixes #572
Pull Request #574: Allowing sort by year and first author

34 of 34 new or added lines in 4 files covered. (100.0%)

1 existing line in 1 file now uncovered.

2943 of 3035 relevant lines covered (96.97%)

284.23 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

99.57
/spec/lib/describe_indexer_spec.rb
1
# frozen_string_literal: true
2

3
# rubocop:disable Metrics/BlockLength
4
RSpec.describe DescribeIndexer do
1✔
5
  describe 'indexing a single record' do
1✔
6
    let(:single_item) { file_fixture("bitklavier_binaural.json").read }
21✔
7
    let(:indexer) do
1✔
8
      described_class.new(rss_url: "file://whatever.rss")
20✔
9
    end
10
    let(:indexed_record) do
1✔
11
      Blacklight.default_index.connection.delete_by_query("*:*")
20✔
12
      Blacklight.default_index.connection.commit
20✔
13
      indexer.index_one(single_item)
20✔
14
      response = Blacklight.default_index.connection.get 'select', params: { q: '*:*' }
20✔
15
      response["response"]["docs"].first
20✔
16
    end
17

18
    context "basic fields" do
1✔
19
      ##
20
      # The id is based on the DOI
21
      # A doi of 10.34770/r75s-9j74 will become doi-10-34770-r75s-9j74
22
      it "id" do
1✔
23
        expect(indexed_record["id"]).to eq "doi-10-34770-r75s-9j74"
1✔
24
      end
25

26
      it "stores a copy of the full JSON in CDATA" do
1✔
27
        stored_json = indexed_record["pdc_describe_json_ss"]
1✔
28
        parsed_json = JSON.parse(stored_json)
1✔
29
        expect(parsed_json["resource"]["titles"][0]["title"]).to eq "bitKlavier Grand Sample Library—Binaural Mic Image"
1✔
30
      end
31

32
      it "author" do
1✔
33
        expect(indexed_record["author_tesim"]).to eq ['Trueman, Daniel']
1✔
34
      end
35

36
      it "description" do
1✔
37
        description = "The bitKlavier Grand consists"
1✔
38
        expect(indexed_record["description_tsim"].first.include?(description)).to be true
1✔
39
      end
40

41
      it "contributors" do
1✔
42
        expect(indexed_record["contributor_tsim"].include?("Villalta, Andrés")).to eq true
1✔
43
        expect(indexed_record["contributor_tsim"].include?("Chou, Katie")).to eq true
1✔
44
        expect(indexed_record["contributor_tsim"].include?("Ayres, Christien")).to eq true
1✔
45
        expect(indexed_record["contributor_tsim"].include?("Wang, Matthew")).to eq true
1✔
46
      end
47

48
      it "title" do
1✔
49
        # title includes all titles
50
        main_title = "bitKlavier Grand Sample Library—Binaural Mic Image"
1✔
51
        alt_title = "alter title for bitKlavier"
1✔
52
        expect(indexed_record["title_tesim"].include?(main_title)).to eq true
1✔
53
        expect(indexed_record["title_tesim"].include?(alt_title)).to eq true
1✔
54
        # alt title does not include the main title
55
        expect(indexed_record["alternative_title_tesim"].include?(main_title)).to eq false
1✔
56
        expect(indexed_record["alternative_title_tesim"].include?(alt_title)).to eq true
1✔
57
      end
58

59
      it "rights" do
1✔
60
        expect(indexed_record["rights_name_ssi"]).to eq "GNU General Public License"
1✔
61
        expect(indexed_record["rights_uri_ssi"]).to eq "https://www.gnu.org/licenses/gpl-3.0.en.html"
1✔
62
      end
63

64
      it "keywords" do
1✔
65
        expect(indexed_record["subject_all_ssim"].include?("keyword1")).to eq true
1✔
66
        expect(indexed_record["subject_all_ssim"].include?("keyword2")).to eq true
1✔
67
        expect(indexed_record["subject_all_ssim"].include?("keyword3")).to eq true
1✔
68
      end
69

70
      it "collection tag" do
1✔
71
        expect(indexed_record["collection_tag_ssim"].include?("Humanities")).to eq true
1✔
72
        expect(indexed_record["collection_tag_ssim"].include?("Something else")).to eq true
1✔
73
      end
74

75
      it "community" do
1✔
76
        expect(indexed_record["community_name_ssi"]).to eq "Research Data"
1✔
77
      end
78

79
      it "genre_ssim" do
1✔
80
        expect(indexed_record["genre_ssim"].first).to eq "Dataset"
1✔
81
      end
82

83
      it "issue_date_ssim" do
1✔
84
        expect(indexed_record["issue_date_ssim"].first).to eq "2021"
1✔
85
      end
86

87
      # The pdc_created_at_dtsi field is used to describe when a record was created in PDC
88
      # This is used to sort the Recently Published page
89
      it "pdc_created_at_dtsi" do
1✔
90
        expect(indexed_record["pdc_created_at_dtsi"]).to eq "2021-12-31T19:00:00Z"
1✔
91
        # example timestamp 1997-12-31T23:59:59Z
92
      end
93

94
      it "pdc_updated_at_dtsi" do
1✔
95
        expect(indexed_record["pdc_updated_at_dtsi"]).to eq "2021-12-31T20:00:00Z"
1✔
96
      end
97

98
      it "publisher_ssim" do
1✔
99
        expect(indexed_record["publisher_ssim"].first).to eq "Princeton University"
1✔
100
      end
101

102
      it "migrated_bsi" do
1✔
103
        expect(indexed_record["migrated_bsi"]).to eq true
1✔
104
      end
105

106
      xit 'referenced_by' do
1✔
UNCOV
107
        expect(result['referenced_by_ssim'].first).to eq 'https://arxiv.org/abs/1903.06605'
×
108
      end
109
    end
110

111
    context "uris" do
1✔
112
      it "stores full URL for ARK and DOI" do
1✔
113
        expect(indexed_record["uri_ssim"].include?("http://arks.princeton.edu/ark:/88435/dsp015999n653h")).to eq true
1✔
114
        expect(indexed_record["uri_ssim"].include?("https://doi.org/10.34770/r75s-9j74")).to eq true
1✔
115
      end
116
    end
117

118
    context "files" do
1✔
119
      it "stores file detailed information" do
1✔
120
        files = JSON.parse(indexed_record['files_ss'])
1✔
121
        file1 = files.find { |file| file["name"] == "file1.jpg" }
2✔
122
        file2 = files.find { |file| file["name"] == "file2.txt" }
3✔
123
        file3 = files.find { |file| file["name"] == "file3.txt" }
4✔
124
        expect(file1["size"]).to eq "316781"
1✔
125
        expect(file1["url"]).to eq "https://g-5beea4.90d4e.bd7c.data.globus.org/pdc-describe-staging-postcuration/10.80021/3m1k-6036/122/file1.jpg"
1✔
126
        expect(file2["size"]).to eq "396003"
1✔
127
        expect(file3["name"]).to eq "file3.txt"
1✔
128
        expect(file3["full_name"]).to eq "10.80021/3m1k-6036/122/folder-a/file3.txt"
1✔
129
      end
130
      it "excludes PDC preservation files" do
1✔
131
        files = JSON.parse(indexed_record['files_ss'])
1✔
132
        datacite_xml = files.find { |file| file["full_name"].include? "/princeton_data_commons/datacite.xml" }
4✔
133
        expect(datacite_xml).to be nil
1✔
134
      end
135
    end
136

137
    context "all text catch all field" do
1✔
138
      it "indexes the file name in the all text catch all field" do
1✔
139
        files = JSON.parse(indexed_record['files_ss'])
1✔
140
        file_name = File.basename(files.first["name"])
1✔
141
        response = Blacklight.default_index.connection.get 'select', params: { q: file_name }
1✔
142
        expect(response["response"]["numFound"]).to eq 1
1✔
143

144
        response = Blacklight.default_index.connection.get 'select', params: { q: "non-existing-value" }
1✔
145
        expect(response["response"]["numFound"]).to eq 0
1✔
146
      end
147
    end
148
  end
149

150
  describe 'indexing an RSS feed from PDC Describe' do
1✔
151
    let(:rss_feed) { file_fixture("works.rss").read }
4✔
152
    let(:resource1) { file_fixture("bitklavier_binaural.json").read }
9✔
153
    let(:resource2) { file_fixture("sowing_the_seeds.json").read }
9✔
154
    let(:rss_url_string) { "https://pdc-describe-prod.princeton.edu/describe/works.rss" }
5✔
155
    let(:indexer) { described_class.new(rss_url: rss_url_string) }
5✔
156

157
    it "has a traject indexer" do
1✔
158
      expect(indexer.traject_indexer).to be_instance_of Traject::Indexer::NokogiriIndexer
1✔
159
    end
160

161
    context 'indexing to solr' do
1✔
162
      before do
1✔
163
        Blacklight.default_index.connection.delete_by_query("*:*")
8✔
164
        Blacklight.default_index.connection.commit
8✔
165
        stub_request(:get, "https://pdc-describe-prod.princeton.edu/describe/works.rss")
8✔
166
          .to_return(status: 200, body: rss_feed, headers: {})
167
        stub_request(:get, "https://pdc-describe-prod.princeton.edu/describe/works/6.json")
8✔
168
          .to_return(status: 200, body: resource1, headers: {})
169
        stub_request(:get, "https://pdc-describe-prod.princeton.edu/describe/works/20.json")
8✔
170
          .to_return(status: 200, body: resource2, headers: {})
171
      end
172

173
      it "sends items to solr" do
1✔
174
        response = Blacklight.default_index.connection.get 'select', params: { q: '*:*' }
1✔
175
        expect(response["response"]["numFound"]).to eq 0
1✔
176

177
        # If index_pdc_describe == false, do not index PDC Describe.
178
        # This is a safety measure so we don't index in production until we're ready
179
        # See config/pdc_discovery.yml to change this setting for real
180
        Rails.configuration.pdc_discovery.index_pdc_describe = false
1✔
181
        indexer.index
1✔
182
        response = Blacklight.default_index.connection.get 'select', params: { q: '*:*' }
1✔
183
        expect(response["response"]["numFound"]).to eq 0
1✔
184

185
        # If index_pdc_describe == true, DO index PDC Describe.
186
        Rails.configuration.pdc_discovery.index_pdc_describe = true
1✔
187
        indexer.index
1✔
188
        response = Blacklight.default_index.connection.get 'select', params: { q: '*:*' }
1✔
189
        expect(response["response"]["numFound"]).to eq 2
1✔
190
      end
191

192
      it "can sort by year_available_itsi" do
1✔
193
        Rails.configuration.pdc_discovery.index_pdc_describe = true
1✔
194
        indexer.index
1✔
195
        response = Blacklight.default_index.connection.get 'select', params: { q: '*:*', sort: 'year_available_itsi desc' }
1✔
196
        expect(response["response"]["numFound"]).to eq 2
1✔
197
        expect(response["response"]['docs'].first['pdc_created_at_dtsi']).to eq("2023-07-11T11:06:10Z")
1✔
198
        expect(response["response"]['docs'].last['pdc_created_at_dtsi']).to eq("2021-12-31T19:00:00Z")
1✔
199
      end
200

201
      context "works with multiple creators" do
1✔
202
        let(:pppl1) { File.read(File.join(fixture_path, 'files', 'pppl1.json')) }
2✔
203
        let(:pppl2) { File.read(File.join(fixture_path, 'files', 'pppl2.json')) }
2✔
204
        before do
1✔
205
          stub_request(:get, "https://pdc-describe-prod.princeton.edu/describe/works/6.json")
1✔
206
            .to_return(status: 200, body: pppl1, headers: {})
207
          stub_request(:get, "https://pdc-describe-prod.princeton.edu/describe/works/20.json")
1✔
208
            .to_return(status: 200, body: pppl2, headers: {})
209
        end
210

211
        it "can sort by the first author" do
1✔
212
          Rails.configuration.pdc_discovery.index_pdc_describe = true
1✔
213
          indexer.index
1✔
214
          response = Blacklight.default_index.connection.get 'select', params: { q: '*:*', sort: 'author_si desc' }
1✔
215
          expect(response["response"]["numFound"]).to eq 2
1✔
216
          expect(response["response"]['docs'].first['author_tesim'].first).to eq("Wang, Yin")
1✔
217
          expect(response["response"]['docs'].last['author_tesim'].first).to eq("Schwartz, Jacob A.")
1✔
218
        end
219
      end
220

221
      context "when there are items which are under active embargo" do
1✔
222
        let(:item_file_fixture) { file_fixture("pdc_describe_active_embargo.json") }
3✔
223
        let(:embargo_resource) { item_file_fixture.read }
3✔
224
        # This redundancy is required for consistent testing
225
        let(:rss_feed) { file_fixture("works.rss").read }
3✔
226
        let(:rss_url) { "https://pdc-describe-prod.princeton.edu/describe/works.rss" }
3✔
227
        let(:indexer) do
1✔
228
          described_class.new(rss_url: rss_url)
2✔
229
        end
230
        let(:solr_response) do
1✔
231
          Blacklight.default_index.connection.get 'select', params: { q: '*:*' }
2✔
232
        end
233
        let(:response) { solr_response["response"] }
3✔
234
        let(:num_found) { response["numFound"] }
3✔
235
        let(:docs) { response["docs"] }
3✔
236
        let(:doc) { docs.first }
3✔
237
        let(:files) do
1✔
238
          values = doc["files_ss"]
1✔
239
          JSON.parse(values)
1✔
240
        end
241

242
        before do
1✔
243
          stub_request(:get, rss_url)
2✔
244
            .to_return(status: 200, body: rss_feed, headers: {})
245
          stub_request(:get, "https://pdc-describe-prod.princeton.edu/describe/works/6.json")
2✔
246
            .to_return(status: 200, body: embargo_resource, headers: {})
247
          stub_request(:get, "https://pdc-describe-prod.princeton.edu/describe/works/20.json")
2✔
248
            .to_return(status: 200, body: embargo_resource, headers: {})
249

250
          Rails.configuration.pdc_discovery.index_pdc_describe = true
2✔
251
          indexer.index
2✔
252
        end
253

254
        it "indexes the embargo date" do
1✔
255
          expect(solr_response).to include("response")
1✔
256
          expect(response).to include("numFound")
1✔
257
          expect(num_found).to eq 1
1✔
258
          expect(docs).not_to be_empty
1✔
259
          expect(doc).to include("embargo_date_dtsi")
1✔
260
        end
261

262
        it "does not index the files" do
1✔
263
          expect(solr_response).to include("response")
1✔
264
          expect(response).to include("numFound")
1✔
265
          expect(num_found).to eq 1
1✔
266
          expect(docs).not_to be_empty
1✔
267
          expect(doc).to include("files_ss")
1✔
268
          expect(files).to be_empty
1✔
269
        end
270
      end
271

272
      context "when there are items which are under active embargo" do
1✔
273
        let(:item_file_fixture) { file_fixture("pdc_describe_expired_embargo.json") }
2✔
274
        let(:embargo_resource) { item_file_fixture.read }
2✔
275
        # This redundancy is required for consistent testing
276
        let(:rss_feed) { file_fixture("works.rss").read }
2✔
277
        let(:rss_url) { "https://pdc-describe-prod.princeton.edu/describe/works.rss" }
2✔
278
        let(:indexer) do
1✔
279
          described_class.new(rss_url: rss_url)
1✔
280
        end
281
        let(:solr_response) do
1✔
282
          Blacklight.default_index.connection.get 'select', params: { q: '*:*' }
1✔
283
        end
284
        let(:response) { solr_response["response"] }
2✔
285
        let(:num_found) { response["numFound"] }
2✔
286
        let(:docs) { response["docs"] }
2✔
287
        let(:doc) { docs.first }
2✔
288
        let(:files) { doc["files_ss"] }
2✔
289

290
        before do
1✔
291
          stub_request(:get, rss_url)
1✔
292
            .to_return(status: 200, body: rss_feed, headers: {})
293
          stub_request(:get, "https://pdc-describe-prod.princeton.edu/describe/works/6.json")
1✔
294
            .to_return(status: 200, body: embargo_resource, headers: {})
295
          stub_request(:get, "https://pdc-describe-prod.princeton.edu/describe/works/20.json")
1✔
296
            .to_return(status: 200, body: embargo_resource, headers: {})
297

298
          Rails.configuration.pdc_discovery.index_pdc_describe = true
1✔
299
          indexer.index
1✔
300
        end
301

302
        it "does indexes the files" do
1✔
303
          expect(solr_response).to include("response")
1✔
304
          expect(response).to include("numFound")
1✔
305
          expect(num_found).to eq 1
1✔
306
          expect(docs).not_to be_empty
1✔
307
          expect(doc).to include("files_ss")
1✔
308
          expect(files).not_to be_empty
1✔
309
        end
310
      end
311

312
      context "when there work is not under embargo" do
1✔
313
        let(:item_file_fixture) { file_fixture("pdc_describe_no_embargo.json") }
3✔
314
        let(:embargo_resource) { item_file_fixture.read }
3✔
315
        # This redundancy is required for consistent testing
316
        let(:rss_feed) { file_fixture("works.rss").read }
3✔
317
        let(:rss_url) { "https://pdc-describe-prod.princeton.edu/describe/works.rss" }
3✔
318
        let(:indexer) do
1✔
319
          described_class.new(rss_url: rss_url)
2✔
320
        end
321
        let(:solr_response) do
1✔
322
          Blacklight.default_index.connection.get 'select', params: { q: '*:*' }
2✔
323
        end
324
        let(:response) { solr_response["response"] }
3✔
325
        let(:num_found) { response["numFound"] }
3✔
326
        let(:docs) { response["docs"] }
3✔
327
        let(:doc) { docs.first }
3✔
328
        let(:files) do
1✔
329
          values = doc["files_ss"]
1✔
330
          JSON.parse(values)
1✔
331
        end
332

333
        before do
1✔
334
          stub_request(:get, rss_url)
2✔
335
            .to_return(status: 200, body: rss_feed, headers: {})
336
          stub_request(:get, "https://pdc-describe-prod.princeton.edu/describe/works/6.json")
2✔
337
            .to_return(status: 200, body: embargo_resource, headers: {})
338
          stub_request(:get, "https://pdc-describe-prod.princeton.edu/describe/works/20.json")
2✔
339
            .to_return(status: 200, body: embargo_resource, headers: {})
340

341
          Rails.configuration.pdc_discovery.index_pdc_describe = true
2✔
342
          indexer.index
2✔
343
        end
344

345
        it "does not index an embargo date" do
1✔
346
          expect(solr_response).to include("response")
1✔
347
          expect(response).to include("numFound")
1✔
348
          expect(num_found).to eq 1
1✔
349
          expect(docs).not_to be_empty
1✔
350
          expect(doc).not_to include("embargo_date_dtsi")
1✔
351
        end
352

353
        it "does not index the files" do
1✔
354
          expect(solr_response).to include("response")
1✔
355
          expect(response).to include("numFound")
1✔
356
          expect(num_found).to eq 1
1✔
357
          expect(docs).not_to be_empty
1✔
358
          expect(doc).to include("files_ss")
1✔
359
          expect(files).to be_empty
1✔
360
        end
361
      end
362
    end
363
  end
364
end
365
# rubocop:enable Metrics/BlockLength
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2025 Coveralls, Inc