• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

pulibrary / bibdata / 97cbe92d-c54f-45de-a4c8-e506299fcc4d

21 Dec 2023 09:42PM UTC coverage: 90.834% (-0.5%) from 91.287%
97cbe92d-c54f-45de-a4c8-e506299fcc4d

Pull #2284

circleci

sandbergja
Upgrade old gem dependencies

Removes the rerun gem, since it is no longer used.  Also, remove some leftover files from the spring gem.
Pull Request #2284: Upgrade old gem dependencies

3409 of 3753 relevant lines covered (90.83%)

334.45 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

91.59
/marc_to_solr/lib/princeton_marc.rb
1
# encoding: UTF-8
2
require 'library_stdnums'
1✔
3
require 'lightly'
1✔
4
require 'uri'
1✔
5
require_relative 'cache_adapter'
1✔
6
require_relative 'cache_manager'
1✔
7
require_relative 'cache_map'
1✔
8
require_relative 'composite_cache_map'
1✔
9
require_relative 'electronic_access_link'
1✔
10
require_relative 'electronic_access_link_factory'
1✔
11
require_relative 'hierarchical_heading'
1✔
12
require_relative 'iiif_manifest_url_builder'
1✔
13
require_relative 'orangelight_url_builder'
1✔
14
require_relative 'process_holdings_helpers'
1✔
15

16
module MARC
1✔
17
  class Record
1✔
18
    # Taken from pul-store marc.rb lib extension
19
    # Shamelessly lifted from SolrMARC, with a few changes; no doubt there will
20
    # be more.
21
    @@THREE_OR_FOUR_DIGITS = /^(20|19|18|17|16|15|14|13|12|11|10|9|8|7|6|5|4|3|2|1)(\d{2})\.?$/
1✔
22
    @@FOUR_DIGIT_PATTERN_BRACES = /^\[([12]\d{3})\??\]\.?$/
1✔
23
    @@FOUR_DIGIT_PATTERN_ONE_BRACE = /^\[(20|19|18|17|16|15|14|13|12|11|10)(\d{2})/
1✔
24
    @@FOUR_DIGIT_PATTERN_OTHER_1 = /^l(\d{3})/
1✔
25
    @@FOUR_DIGIT_PATTERN_OTHER_2 = /^\[(20|19|18|17|16|15|14|13|12|11|10)\](\d{2})/
1✔
26
    @@FOUR_DIGIT_PATTERN_OTHER_3 = /^\[?(20|19|18|17|16|15|14|13|12|11|10)(\d)[^\d]\]?/
1✔
27
    @@FOUR_DIGIT_PATTERN_OTHER_4 = /i\.e\.\,? (20|19|18|17|16|15|14|13|12|11|10)(\d{2})/
1✔
28
    @@FOUR_DIGIT_PATTERN_OTHER_5 = /^\[?(\d{2})\-\-\??\]?/
1✔
29
    @@BC_DATE_PATTERN = /[0-9]+ [Bb]\.?[Cc]\.?/
1✔
30
    def best_date
1✔
31
      date = nil
×
32
      if self['260']
×
33
        if self['260']['c']
×
34
          field_260c = self['260']['c']
×
35
          case field_260c
×
36
          when @@THREE_OR_FOUR_DIGITS
37
            date = "#{$1}#{$2}"
×
38
          when @@FOUR_DIGIT_PATTERN_BRACES
39
            date = $1
×
40
          when @@FOUR_DIGIT_PATTERN_ONE_BRACE
41
            date = $1
×
42
          when @@FOUR_DIGIT_PATTERN_OTHER_1
43
            date = "1#{$1}"
×
44
          when @@FOUR_DIGIT_PATTERN_OTHER_2
45
            date = "#{$1}#{$2}"
×
46
          when @@FOUR_DIGIT_PATTERN_OTHER_3
47
            date = "#{$1}#{$2}0"
×
48
          when @@FOUR_DIGIT_PATTERN_OTHER_4
49
            date = "#{$1}#{$2}"
×
50
          when @@FOUR_DIGIT_PATTERN_OTHER_5
51
            date = "#{$1}00"
×
52
          when @@BC_DATE_PATTERN
53
            date = nil
×
54
          end
55
        end
56
      end
57
      date ||= self.date_from_008
×
58
    end
59

60
    def date_from_008
1✔
61
      if self['008']
284✔
62
        d = self['008'].value[7, 4]
214✔
63
        d = d.gsub 'u', '0' unless d == 'uuuu'
214✔
64
        d = d.gsub ' ', '0' unless d == '    '
214✔
65
        d if /^[0-9]{4}$/.match?(d)
214✔
66
      end
67
    end
68

69
    def end_date_from_008
1✔
70
      if self['008']
149✔
71
        d = self['008'].value[11, 4]
114✔
72
        d = d.gsub 'u', '9' unless d == 'uuuu'
114✔
73
        d = d.gsub ' ', '9' unless d == '    '
114✔
74
        d if /^[0-9]{4}$/.match?(d)
114✔
75
      end
76
    end
77

78
    def date_display
1✔
79
      date = nil
×
80
      if self['260']
×
81
        date = self['260']['c'] if self['260']['c']
×
82
      end
83
      date ||= self.date_from_008
×
84
    end
85
  end
86
end
87

88
FALLBACK_STANDARD_NO = 'Other standard number'
1✔
89
def map_024_indicators_to_labels i
1✔
90
  case i
14✔
91
  when '0' then 'International Standard Recording Code'
×
92
  when '1' then 'Universal Product Code'
2✔
93
  when '2' then 'International Standard Music Number'
×
94
  when '3' then 'International Article Number'
4✔
95
  when '4' then 'Serial Item and Contribution Identifier'
2✔
96
  when '7' then '$2'
3✔
97
  else FALLBACK_STANDARD_NO
3✔
98
  end
99
end
100

101
def indicator_label_246 i
1✔
102
  case i
×
103
  when '0' then 'Portion of title'
×
104
  when '1' then 'Parallel title'
×
105
  when '2' then 'Distinctive title'
×
106
  when '3' then 'Other title'
×
107
  when '4' then 'Cover title'
×
108
  when '5' then 'Added title page title'
×
109
  when '6' then 'Caption title'
×
110
  when '7' then 'Running title'
×
111
  when '8' then 'Spine title'
×
112
  end
113
end
114

115
def subfield_specified_hash_key subfield_value, fallback
1✔
116
  key = subfield_value.capitalize.gsub(/[[:punct:]]?$/, '')
2✔
117
  key.empty? ? fallback : key
2✔
118
end
119

120
def standard_no_hash record
1✔
121
  standard_no = {}
142✔
122
  Traject::MarcExtractor.cached('024').collect_matching_lines(record) do |field, _spec, _extractor|
142✔
123
    standard_label = map_024_indicators_to_labels(field.indicator1)
14✔
124
    standard_number = nil
14✔
125
    field.subfields.each do |s_field|
14✔
126
      standard_number = s_field.value if s_field.code == 'a'
18✔
127
      standard_label = subfield_specified_hash_key(s_field.value, FALLBACK_STANDARD_NO) if (s_field.code == '2') && (standard_label == '$2')
18✔
128
    end
129
    standard_label = FALLBACK_STANDARD_NO if standard_label == '$2'
14✔
130
    standard_no[standard_label] ? standard_no[standard_label] << standard_number : standard_no[standard_label] = [standard_number] unless standard_number.nil?
14✔
131
  end
132
  standard_no
142✔
133
end
134

135
# Handles ISBNs, ISSNs, and OCLCs
136
# ISBN: 020a, 020z, 776z
137
# ISSN: 022a, 022l, 022y, 022z, 776x
138
# OCLC: 035a, 776w, 787w
139
# BIB: 776w, 787w (adds BIB prefix so Blacklight can detect whether to search id field)
140
def other_versions record
1✔
141
  linked_nums = []
142✔
142
  Traject::MarcExtractor.cached('020az:022alyz:035a:776wxz:787w').collect_matching_lines(record) do |field, _spec, _extractor|
142✔
143
    field.subfields.each do |s_field|
461✔
144
      linked_nums << StdNum::ISBN.normalize(s_field.value) if (field.tag == "020") || ((field.tag == "776") && (s_field.code == 'z'))
802✔
145
      linked_nums << StdNum::ISSN.normalize(s_field.value) if (field.tag == "022") || ((field.tag == "776") && (s_field.code == 'x'))
802✔
146
      linked_nums << oclc_normalize(s_field.value, prefix: true) if (field.tag == "035") && oclc_number?(s_field.value)
802✔
147
      if ((field.tag == "776") && (s_field.code == 'w')) || ((field.tag == "787") && (s_field.code == 'w'))
802✔
148
        linked_nums << oclc_normalize(s_field.value, prefix: true) if oclc_number?(s_field.value)
16✔
149
        linked_nums << "BIB" + strip_non_numeric(s_field.value) unless s_field.value.include?('(')
16✔
150
        logger.error "#{record['001']} - linked field formatting: #{s_field.value}" if s_field.value.include?('(') && !s_field.value.start_with?('(')
16✔
151
      end
152
    end
153
  end
154
  linked_nums.compact.uniq
142✔
155
end
156

157
# only includes values before $t
158
def process_names record
1✔
159
  names = []
143✔
160
  Traject::MarcExtractor.cached('100aqbcdk:110abcdfgkln:111abcdfgklnpq:700aqbcdk:710abcdfgkln:711abcdfgklnpq').collect_matching_lines(record) do |field, spec, extractor|
143✔
161
    name = extractor.collect_subfields(field, spec).first
252✔
162
    unless name.nil?
252✔
163
      remove = ''
248✔
164
      after_t = false
248✔
165
      field.subfields.each do |s_field|
248✔
166
        remove << " #{s_field.value}" if after_t && spec.includes_subfield_code?(s_field.code)
854✔
167
        after_t = true if s_field.code == 't'
854✔
168
      end
169
      name = name.chomp(remove)
248✔
170
      names << Traject::Macros::Marc21.trim_punctuation(name)
248✔
171
    end
172
  end
173
  names.uniq
143✔
174
end
175

176
# only includes values before $t
177
def process_alt_script_names record
1✔
178
  names = []
143✔
179
  Traject::MarcExtractor.cached('100aqbcdk:110abcdfgkln:111abcdfgklnpq:700aqbcdk:710abcdfgkln:711abcdfgklnpq').collect_matching_lines(record) do |field, spec, extractor|
143✔
180
    next unless field.tag == '880'
252✔
181
    name = extractor.collect_subfields(field, spec).first
53✔
182
    unless name.nil?
53✔
183
      remove = ''
53✔
184
      after_t = false
53✔
185
      field.subfields.each do |s_field|
53✔
186
        remove << " #{s_field.value}" if after_t && spec.includes_subfield_code?(s_field.code)
160✔
187
        after_t = true if s_field.code == 't'
160✔
188
      end
189
      name = name.chomp(remove)
53✔
190
      names << Traject::Macros::Marc21.trim_punctuation(name)
53✔
191
    end
192
  end
193
  names.uniq
143✔
194
end
195

196
##
197
# Get hash of authors grouped by role
198
# @param [MARC::Record]
199
# @return [Hash]
200
def process_author_roles record
1✔
201
  author_roles = {
202
    'TRL' => 'translators',
143✔
203
    'EDT' => 'editors',
204
    'COM' => 'compilers',
205
    'TRANSLATOR' => 'translators',
206
    'EDITOR' => 'editors',
207
    'COMPILER' => 'compilers'
208
  }
209

210
  names = {}
143✔
211
  names['secondary_authors'] = []
143✔
212
  names['translators'] = []
143✔
213
  names['editors'] = []
143✔
214
  names['compilers'] = []
143✔
215

216
  Traject::MarcExtractor.cached('100a:110a:111a:700a:710a:711a').collect_matching_lines(record) do |field, spec, extractor|
143✔
217
    name = extractor.collect_subfields(field, spec).first
257✔
218
    unless name.nil?
257✔
219
      name = Traject::Macros::Marc21.trim_punctuation(name)
253✔
220

221
      # If name is from 1xx field, it is the primary author.
222
      if /1../.match?(field.tag)
253✔
223
        names['primary_author'] = name
70✔
224
      else
225
        relator = ""
183✔
226
        field.subfields.each do |s_field|
183✔
227
          # relator code (subfield 4)
228
          if s_field.code == '4'
600✔
229
            relator = s_field.value.upcase.gsub(/[[:punct:]]?$/, '')
5✔
230
          # relator term (subfield e)
231
          elsif s_field.code == 'e'
595✔
232
            relator = s_field.value.upcase.gsub(/[[:punct:]]?$/, '')
99✔
233
          end
234
        end
235

236
        # Set role from relator value.
237
        role = author_roles[relator] || 'secondary_authors'
183✔
238
        names[role] << name
183✔
239
      end
240
    end
241
  end
242
  names
143✔
243
end
244

245
##
246
# Process publication information for citations.
247
# @param [MARC::Record]
248
# @return [Array] pub info strings from fields 260 and 264.
249
def set_pub_citation(record)
1✔
250
  pub_citation = []
145✔
251
  Traject::MarcExtractor.cached('260:264').collect_matching_lines(record) do |field, _spec, _extractor|
145✔
252
    a_pub_info = nil
141✔
253
    b_pub_info = nil
141✔
254
    pub_info = ""
141✔
255
    field.subfields.each do |s_field|
141✔
256
      a_pub_info = Traject::Macros::Marc21.trim_punctuation(s_field.value).strip if s_field.code == 'a'
416✔
257
      b_pub_info = Traject::Macros::Marc21.trim_punctuation(s_field.value).strip if s_field.code == 'b'
416✔
258
    end
259

260
    # Build publication info string and add to citation array.
261
    pub_info += a_pub_info unless a_pub_info.nil?
141✔
262
    pub_info += ": " if !a_pub_info.nil? && !b_pub_info.nil?
141✔
263
    pub_info += b_pub_info unless b_pub_info.nil?
141✔
264
    pub_citation << pub_info if !pub_info.empty?
141✔
265
  end
266
  pub_citation
145✔
267
end
268

269
SEPARATOR = '—'
1✔
270

271
# for the hierarchical subject/genre display
272
# split with em dash along t,v,x,y,z
273
# optionally pass a block to only allow fields that match certain criteria
274
# For example, if you only want subject headings from the Bilindex vocabulary,
275
# you could use `process_hierarchy(record, '650|*7|abcvxyz') { |field| field['2'] == 'bidex' }`
276
def process_hierarchy(record, fields)
1✔
277
  headings = []
1,837✔
278
  split_on_subfield = ['t', 'v', 'x', 'y', 'z']
1,837✔
279
  Traject::MarcExtractor.cached(fields).collect_matching_lines(record) do |field, spec, extractor|
1,837✔
280
    include_heading = block_given? ? yield(field) : true
1,801✔
281
    next unless include_heading && extractor.collect_subfields(field, spec).first
1,801✔
282
    headings << HierarchicalHeading.new(field:, spec:, split_on_subfield:).to_s
856✔
283
  end
284
  headings.compact
1,837✔
285
end
286

287
# for the split subject facet
288
# split with em dash along x,z
289
def process_subject_topic_facet record
1✔
290
  subjects = []
142✔
291
  Traject::MarcExtractor.cached('600|*0|abcdfklmnopqrtxz:610|*0|abfklmnoprstxz:611|*0|abcdefgklnpqstxz:630|*0|adfgklmnoprstxz:650|*0|abcxz:651|*0|axz').collect_matching_lines(record) do |field, spec, extractor|
142✔
292
    subject = extractor.collect_subfields(field, spec).first
225✔
293
    unless subject.nil?
225✔
294
      hierarchical_string = HierarchicalHeading.new(field:, spec:, split_on_subfield: %w[x z]).to_s
225✔
295
      subjects << hierarchical_string.split(SEPARATOR)
225✔
296
    end
297
  end
298
  Traject::MarcExtractor.cached('650|*7|abcxz').collect_matching_lines(record) do |field, spec, extractor|
142✔
299
    subject = extractor.collect_subfields(field, spec).first
156✔
300
    should_include = siku_heading?(field) || local_heading?(field) || any_thesaurus_match?(field, %w[homoit])
156✔
301
    if should_include && !subject.nil?
156✔
302
      hierarchical_string = HierarchicalHeading.new(field:, spec:, split_on_subfield: %w[x z]).to_s
49✔
303
      subjects << hierarchical_string.split(SEPARATOR)
49✔
304
    end
305
  end
306
  subjects.flatten
142✔
307
end
308

309
def strip_non_numeric num_str
1✔
310
  num_str.gsub(/\D/, '').to_i.to_s
308✔
311
end
312

313
def oclc_number? oclc
1✔
314
  # Strip spaces and dashes
315
  clean_oclc = oclc.gsub(/[\-\s]/, '')
823✔
316
  # Ensure it follows the OCLC standard
317
  # (see https://help.oclc.org/Metadata_Services/WorldShare_Collection_Manager/Data_sync_collections/Prepare_your_data/30035_field_and_OCLC_control_numbers)
318
  clean_oclc.match(/\(OCoLC\)(ocn|ocm|on)*\d+/) != nil
823✔
319
end
320

321
def oclc_normalize oclc, opts = { prefix: false }
1✔
322
  oclc_num = strip_non_numeric(oclc)
305✔
323
  if opts[:prefix] == true
305✔
324
    case oclc_num.length
186✔
325
    when 1..8
326
      "ocm" + "%08d" % oclc_num
65✔
327
    when 9
328
      "ocn" + oclc_num
60✔
329
    else
330
      "on" + oclc_num
61✔
331
    end
332
  else
333
    oclc_num
119✔
334
  end
335
end
336

337
# Construct (or retrieve) the cache manager service
338
# @return [CacheManager] the cache manager service
339
def build_cache_manager(figgy_dir_path:)
1✔
340
  return @cache_manager unless @cache_manager.nil?
14✔
341

342
  figgy_lightly = Lightly.new(dir: figgy_dir_path, life: 0, hash: false)
3✔
343
  figgy_cache_adapter = CacheAdapter.new(service: figgy_lightly)
3✔
344

345
  CacheManager.initialize(figgy_cache: figgy_cache_adapter, logger:)
3✔
346

347
  @cache_manager = CacheManager.current
3✔
348
end
349

350
# returns hash of links ($u) (key),
351
# anchor text ($y, $3, hostname), and additional labels ($z) (array value)
352
# @param [MARC::Record] the MARC record being parsed
353
# @return [Hash] the values used to construct the links
354
def electronic_access_links(record, figgy_dir_path)
1✔
355
  solr_field_values = {}
159✔
356
  holding_856s = {}
159✔
357
  iiif_manifest_paths = {}
159✔
358

359
  output = []
159✔
360
  iiif_manifest_links = []
159✔
361
  fragment_index = 0
159✔
362

363
  Traject::MarcExtractor.cached('856').collect_matching_lines(record) do |field, _spec, _extractor|
159✔
364
    anchor_text = false
47✔
365
    z_label = false
47✔
366
    url_key = false
47✔
367
    holding_id = nil
47✔
368
    bib_id = record['001']
47✔
369

370
    electronic_access_link = ElectronicAccessLinkFactory.build bib_id: bib_id, marc_field: field
47✔
371

372
    # If the electronic access link is an ARK...
373
    if electronic_access_link.ark
47✔
374
      # ...and attempt to build an Orangelight URL from the (cached) mappings exposed by the repositories
375
      cache_manager = build_cache_manager(figgy_dir_path:)
14✔
376

377
      # Orangelight links
378
      catalog_url_builder = OrangelightUrlBuilder.new(ark_cache: cache_manager.ark_cache, fragment: fragment_value(fragment_index))
14✔
379
      orangelight_url = catalog_url_builder.build(url: electronic_access_link.ark)
14✔
380

381
      if orangelight_url
14✔
382
        # Index this by the domain for Orangelight
383
        anchor_text = electronic_access_link.anchor_text
×
384
        anchor_text = 'Digital content' if electronic_access_link.url&.host == electronic_access_link.anchor_text
×
385
        orangelight_link = electronic_access_link.clone url_key: orangelight_url.to_s, anchor_text: anchor_text
×
386
        # Only add the link to the current page if it resolves to a resource with a IIIF Manifest
387
        output << orangelight_link
×
388
      else
389
        # Otherwise, always add the link to the resource
390
        output << electronic_access_link
14✔
391
      end
392

393
      # Figgy URL's
394
      figgy_url_builder = IIIFManifestUrlBuilder.new(ark_cache: cache_manager.figgy_ark_cache, service_host: 'figgy.princeton.edu')
14✔
395
      figgy_iiif_manifest = figgy_url_builder.build(url: electronic_access_link.ark)
14✔
396
      if figgy_iiif_manifest
14✔
397
        figgy_iiif_manifest_link = electronic_access_link.clone url_key: figgy_iiif_manifest.to_s
×
398
        iiif_manifest_paths[electronic_access_link.url_key] = figgy_iiif_manifest_link.url.to_s
×
399
      end
400

401
    else
402
      # Always add links to the resource if it isn't an ARK
403
      output << electronic_access_link
33✔
404
    end
405

406
    output.each do |link|
47✔
407
      if link.holding_id
58✔
408
        holding_856s[link.holding_id] = { link.url_key => link.url_labels }
1✔
409
      elsif link.url_key && link.url_labels
57✔
410
        solr_field_values[link.url_key] = link.url_labels
53✔
411
      end
412
    end
413
    fragment_index += 1
47✔
414
  end
415

416
  solr_field_values['holding_record_856s'] = holding_856s unless holding_856s == {}
159✔
417
  solr_field_values['iiif_manifest_paths'] = iiif_manifest_paths unless iiif_manifest_paths.empty?
159✔
418
  solr_field_values
159✔
419
end
420

421
def fragment_value(fragment_index)
1✔
422
  if fragment_index == 0
14✔
423
    'view'
9✔
424
  else
425
    "view_#{fragment_index}"
5✔
426
  end
427
end
428

429
def remove_parens_035 standard_no
1✔
430
  standard_no.gsub(/^\(.*?\)/, '')
295✔
431
end
432

433
GENRES = [
434
  'Bibliography',
1✔
435
  'Biography',
436
  'Catalogs',
437
  'Catalogues raisonnes',
438
  'Commentaries',
439
  'Congresses',
440
  'Diaries',
441
  'Dictionaries',
442
  'Drama',
443
  'Encyclopedias',
444
  'Exhibitions',
445
  'Fiction',
446
  'Guidebooks',
447
  'In art',
448
  'Indexes',
449
  'Librettos',
450
  'Manuscripts',
451
  'Newspapers',
452
  'Periodicals',
453
  'Pictorial works',
454
  'Poetry',
455
  'Portraits',
456
  'Scores',
457
  'Songs and music',
458
  'Sources',
459
  'Statistics',
460
  'Texts',
461
  'Translations'
462
]
463

464
GENRE_STARTS_WITH = [
465
  'Census',
1✔
466
  'Maps',
467
  'Methods',
468
  'Parts',
469
  'Personal narratives',
470
  'Scores and parts',
471
  'Study and teaching',
472
  'Translations into '
473
]
474

475
SUBJECT_GENRE_VOCABULARIES = ['sk', 'aat', 'lcgft', 'rbbin', 'rbgenr', 'rbmscv',
1✔
476
                              'rbpap', 'rbpri', 'rbprov', 'rbpub', 'rbtyp', 'homoit']
477

478
# 600/610/650/651 $v, $x filtered
479
# 655 $a, $v, $x filtered
480
def process_genre_facet record
1✔
481
  genres = []
142✔
482
  Traject::MarcExtractor.cached('600|*0|x:610|*0|x:611|*0|x:630|*0|x:650|*0|x:651|*0|x:655|*0|x').collect_matching_lines(record) do |field, spec, extractor|
142✔
483
    genre = extractor.collect_subfields(field, spec).first
230✔
484
    unless genre.nil?
230✔
485
      genre = Traject::Macros::Marc21.trim_punctuation(genre)
87✔
486
      genres << genre if GENRES.include?(genre) || GENRE_STARTS_WITH.any? { |g| genre[g] }
703✔
487
    end
488
  end
489
  Traject::MarcExtractor.cached('650|*7|v:655|*7|a:655|*7|v').collect_matching_lines(record) do |field, spec, extractor|
142✔
490
    should_include = false
300✔
491
    field.subfields.each do |s_field|
300✔
492
      # only include heading if it is part of the vocabulary
493
      should_include = SUBJECT_GENRE_VOCABULARIES.include?(s_field.value) if s_field.code == '2'
999✔
494
    end
495
    genre = extractor.collect_subfields(field, spec).first
300✔
496
    unless genre.nil?
300✔
497
      genre = Traject::Macros::Marc21.trim_punctuation(genre)
78✔
498
      if genre.match?(/^\s+$/)
78✔
499
        logger.error "#{record['001']} - Blank genre field"
×
500
      elsif should_include
78✔
501
        genres << genre
45✔
502
      end
503
    end
504
  end
505
  Traject::MarcExtractor.cached('600|*0|v:610|*0|v:611|*0|v:630|*0|v:650|*0|v:651|*0|v:655|*0|a:655|*0|v').collect_matching_lines(record) do |field, spec, extractor|
142✔
506
    genre = extractor.collect_subfields(field, spec).first
235✔
507
    unless genre.nil?
235✔
508
      genre = Traject::Macros::Marc21.trim_punctuation(genre)
74✔
509
      if genre.match?(/^\s+$/)
74✔
510
        logger.error "#{record['001']} - Blank genre field"
×
511
      else
512
        genres << genre
74✔
513
      end
514
    end
515
  end
516
  genres.uniq
142✔
517
end
518

519
def everything_after_t record, fields
1✔
520
  values = []
570✔
521
  Traject::MarcExtractor.cached(fields).collect_matching_lines(record) do |field, _spec, _extractor|
570✔
522
    after_t = false
256✔
523
    title = []
256✔
524
    field.subfields.each do |s_field|
256✔
525
      title << s_field.value if after_t
867✔
526
      if s_field.code == 't'
867✔
527
        title << s_field.value
18✔
528
        after_t = true
18✔
529
      end
530
    end
531
    values << Traject::Macros::Marc21.trim_punctuation(title.join(' ')) unless title.empty?
256✔
532
  end
533
  values
570✔
534
end
535

536
def everything_after_t_alt_script record, fields
1✔
537
  values = []
285✔
538
  Traject::MarcExtractor.cached(fields).collect_matching_lines(record) do |field, _spec, _extractor|
285✔
539
    next unless field.tag == '880'
257✔
540
    after_t = false
53✔
541
    title = []
53✔
542
    field.subfields.each do |s_field|
53✔
543
      title << s_field.value if after_t
160✔
544
      if s_field.code == 't'
160✔
545
        title << s_field.value
3✔
546
        after_t = true
3✔
547
      end
548
    end
549
    values << Traject::Macros::Marc21.trim_punctuation(title.join(' ')) unless title.empty?
53✔
550
  end
551
  values
285✔
552
end
553

554
def everything_through_t record, fields
1✔
555
  values = []
143✔
556
  Traject::MarcExtractor.cached(fields).collect_matching_lines(record) do |field, _spec, _extractor|
143✔
557
    non_t = true
3✔
558
    title = []
3✔
559
    field.subfields.each do |s_field|
3✔
560
      title << s_field.value
6✔
561
      if s_field.code == 't'
6✔
562
        non_t = false
2✔
563
        break
2✔
564
      end
565
    end
566
    values << Traject::Macros::Marc21.trim_punctuation(title.join(' ')) unless (title.empty? || non_t)
3✔
567
  end
568
  values
143✔
569
end
570

571
##
572
# @param record [MARC::Record]
573
# @param fields [String] MARC fields of interest
574
# @return [Array] of name-titles each in an [Array], each element [String] split by hierarchy,
575
# both name ($a) and title ($t) are required
576
def prep_name_title record, fields
1✔
577
  values = []
426✔
578
  Traject::MarcExtractor.cached(fields).collect_matching_lines(record) do |field, spec, _extractor|
426✔
579
    name_title = []
101✔
580
    author = []
101✔
581
    non_a = true
101✔
582
    non_t = true
101✔
583
    field.subfields.each do |s_field|
101✔
584
      next if (!spec.subfields.nil? && !spec.subfields.include?(s_field.code))
356✔
585
      non_a = false if s_field.code == 'a'
159✔
586
      non_t = false if s_field.code == 't'
159✔
587
      if non_t
159✔
588
        author << s_field.value
135✔
589
      else
590
        name_title << s_field.value
24✔
591
      end
592
    end
593
    unless (non_a || non_t)
101✔
594
      name_title.unshift(author.join(' '))
14✔
595
      values << name_title unless name_title.empty?
14✔
596
    end
597
  end
598
  values
426✔
599
end
600

601
# @param fields [Array] with portions of hierarchy
602
# @return [Array] portions of hierarchy including previous elements
603
def expand_sublists_for_hierarchy(fields)
1✔
604
  fields.collect do |field|
162✔
605
    field.collect.with_index do |_v, index|
29✔
606
      Traject::Macros::Marc21.trim_punctuation(field[0..index].join(' '))
82✔
607
    end
608
  end
609
end
610

611
# @param fields [Array] with portions of hierarchy from name-titles or title-only fields
612
# @return [Array] portions of hierarchy including previous elements
613
def join_hierarchy(fields, include_first_element: false)
1✔
614
  if include_first_element == false
162✔
615
    # Exclude the name-only portion of hierarchy
616
    expand_sublists_for_hierarchy(fields).map { |a| a[1..-1] }
48✔
617
  else
618
    # Include full hierarchy
619
    expand_sublists_for_hierarchy(fields)
142✔
620
  end
621
end
622

623
# Removes empty call_number fields from holdings_1display
624
def remove_empty_call_number_fields(holding)
1✔
625
  holding.tap { |h| ["call_number", "call_number_browse"].map { |k| h.delete(k) if h.fetch(k, []).empty? } }
1,288✔
626
end
627

628
# Collects only non empty khi
629
def call_number_khi(field)
1✔
630
  field.subfields.reject { |s| s.value.empty? }.collect { |s| s if ["k", "h", "i"].include?(s.code) }.compact
5,928✔
631
end
632

633
# Alma Princeton item
634
def alma_code_start_22?(code)
1✔
635
  code.to_s.start_with?("22") && code.to_s.end_with?("06421")
5,043✔
636
end
637

638
def alma_code_start_53?(code)
1✔
639
  code.to_s.start_with?("53") && code.to_s.end_with?("06421")
186✔
640
end
641

642
def scsb_code_start?(code)
1✔
643
  code.to_s.start_with?("scsb")
424✔
644
end
645

646
def alma_852(record)
1✔
647
  record.fields('852').select { |f| alma_code_start_22?(f['8']) }
1,105✔
648
end
649

650
def scsb_852(record)
1✔
651
  record.fields('852').select { |f| scsb_code_start?(f['b']) }
440✔
652
end
653

654
def browse_fields(record, khi_key_order: ['k', 'h', 'i'])
1✔
655
  result = []
282✔
656
  fields = if scsb_doc?(record['001']&.value)
282✔
657
             scsb_852(record)
16✔
658
           else
659
             alma_852(record)
266✔
660
           end
661
  fields.each do |field|
282✔
662
    subfields = call_number_khi(field)
704✔
663
    next if subfields.empty?
704✔
664
    values = [field[khi_key_order[0]], field[khi_key_order[1]], field[khi_key_order[2]]].compact.reject(&:empty?)
686✔
665
    result << values.join(" ") if values.present?
686✔
666
  end
667
  result
282✔
668
end
669

670
def alma_876(record)
1✔
671
  record.fields('876').select { |f| alma_code_start_22?(f['0']) }
2,229✔
672
end
673

674
def alma_951_active(record)
1✔
675
  alma_951 = record.fields('951').select { |f| alma_code_start_53?(f['8']) }
453✔
676
  alma_951&.select { |f| f['a'] == "Available" }
441✔
677
end
678

679
def alma_953(record)
1✔
680
  record.fields('953').select { |f| alma_code_start_53?(f['a']) }
164✔
681
end
682

683
def alma_954(record)
1✔
684
  record.fields('954').select { |f| alma_code_start_53?(f['a']) }
151✔
685
end
686

687
def alma_950(record)
1✔
688
  field_950_a = record.fields('950').select { |f| ["true", "false"].include?(f['a']) }
75✔
689
  field_950_a.map { |f| f['b'] }.first if field_950_a.present?
75✔
690
end
691

692
# SCSB item
693
# Keep this check with the alma_code? check
694
# until we make sure that the records in alma are updated
695
def scsb_doc?(record_id)
1✔
696
  /^SCSB-\d+/.match?(record_id)
3,572✔
697
end
698

699
def process_holdings(record)
1✔
700
  all_holdings = {}
141✔
701
  holdings_helpers = ProcessHoldingsHelpers.new(record:)
141✔
702
  holdings_helpers.fields_852_alma_or_scsb.each do |field_852|
141✔
703
    holding_id = holdings_helpers.holding_id(field_852)
352✔
704
    # Calculate the permanent holding
705
    holding = holdings_helpers.build_holding(field_852, permanent: true)
352✔
706
    items_by_holding_id = holdings_helpers.items_by_holding_id(holding_id)
352✔
707
    group_866_867_868_fields = holdings_helpers.group_866_867_868_on_holding_perm_id(holding_id, field_852)
352✔
708
    # if there are items (876 fields)
709
    if items_by_holding_id.present?
352✔
710
      add_permanent_items_to_holdings(items_by_holding_id, field_852, holdings_helpers, all_holdings, holding)
332✔
711
      add_temporary_items_to_holdings(items_by_holding_id, field_852, holdings_helpers, all_holdings)
332✔
712
    else
713
      # if there are no items (876 fields), create the holding by using the 852 field
714
      all_holdings[holding_id] = remove_empty_call_number_fields(holding) unless holding_id.nil? || invalid_location?(holding['location_code'])
20✔
715
    end
716
    all_holdings = holdings_helpers.process_866_867_868_fields(fields: group_866_867_868_fields, all_holdings:, holding_id:) if all_holdings.present? && all_holdings[holding_id]
352✔
717
  end
718
  all_holdings
141✔
719
end
720

721
def add_permanent_items_to_holdings(items_by_holding_id, field_852, holdings_helpers, all_holdings, holding)
1✔
722
  locations = holdings_helpers.select_permanent_location_876(items_by_holding_id, field_852)
332✔
723

724
  locations.each do |field_876|
332✔
725
    holding_key = holdings_helpers.holding_id(field_852)
378✔
726
    add_item_to_holding(field_852, field_876, holding_key, holdings_helpers, all_holdings, holding)
378✔
727
  end
728
end
729

730
def add_temporary_items_to_holdings(items_by_holding_id, field_852, holdings_helpers, all_holdings)
1✔
731
  locations = holdings_helpers.select_temporary_location_876(items_by_holding_id, field_852)
332✔
732

733
  locations.each do |field_876|
332✔
734
    if holdings_helpers.current_location_code(field_876) == 'RES_SHARE$IN_RS_REQ'
377✔
735
      holding = holdings_helpers.build_holding(field_852, permanent: true)
2✔
736
      holding_key = holdings_helpers.holding_id(field_852)
2✔
737
    else
738
      holding = holdings_helpers.build_holding(field_852, field_876, permanent: false)
375✔
739
      holding_key = holdings_helpers.current_location_code(field_876)
375✔
740
    end
741
    holding['temp_location_code'] = holdings_helpers.current_location_code(field_876)
377✔
742
    add_item_to_holding(field_852, field_876, holding_key, holdings_helpers, all_holdings, holding)
377✔
743
  end
744
end
745

746
# rubocop:disable Metrics/ParameterLists
747
def add_item_to_holding(field_852, field_876, holding_key, holdings_helpers, all_holdings, holding)
1✔
748
  item = holdings_helpers.build_item(field_852:, field_876:)
755✔
749
  if holding_key.present? || !invalid_location?(holding['location_code'])
755✔
750
    all_holdings[holding_key] = remove_empty_call_number_fields(holding) if all_holdings[holding_key].nil?
392✔
751
  end
752
  all_holdings = holdings_helpers.holding_items(value: holding_key, all_holdings:, item:)
755✔
753
end
754
# rubocop:enable Metrics/ParameterLists
755

756
def invalid_location?(code)
1✔
757
  Traject::TranslationMap.new("locations")[code].nil?
383✔
758
end
759

760
def process_recap_notes record
1✔
761
  item_notes = []
141✔
762
  partner_lib = nil
141✔
763
  Traject::MarcExtractor.cached('852').collect_matching_lines(record) do |field, _spec, _extractor|
141✔
764
    is_scsb = scsb_doc?(record['001'].value) && field['0']
374✔
765
    next unless is_scsb
374✔
766
    field.subfields.each do |s_field|
212✔
767
      if s_field.code == 'b'
648✔
768
        partner_lib = s_field.value # ||= Traject::TranslationMap.new("locations", :default => "__passthrough__")[s_field.value]
212✔
769
      end
770
    end
771
  end
772
  Traject::MarcExtractor.cached('87603ahjptxz').collect_matching_lines(record) do |field, _spec, _extractor|
141✔
773
    is_scsb = scsb_doc?(record['001'].value) && field['0']
755✔
774
    next unless is_scsb
755✔
775
    col_group = ''
232✔
776
    field.subfields.each do |s_field|
232✔
777
      if s_field.code == 'x'
2,098✔
778
        if s_field.value == 'Shared'
232✔
779
          col_group = 'S'
3✔
780
        elsif s_field.value == 'Private'
229✔
781
          col_group = 'P'
1✔
782
        elsif s_field.value == 'Committed'
228✔
783
          col_group = 'C'
1✔
784
        elsif s_field.value == 'Uncommittable'
227✔
785
          col_group = 'U'
1✔
786
        else
787
          col_group = 'O'
226✔
788
        end
789
      end
790
    end
791
    if partner_lib == 'scsbnypl'
232✔
792
      partner_display_string = 'N'
226✔
793
    elsif partner_lib == 'scsbcul'
6✔
794
      partner_display_string = 'C'
1✔
795
    elsif partner_lib == "scsbhl"
5✔
796
      partner_display_string = "H"
5✔
797
    end
798
    item_notes << "#{partner_display_string} - #{col_group}"
232✔
799
  end
800
  item_notes
141✔
801
end
802

803
def local_heading?(field)
1✔
804
  field.any? { |subfield| subfield.code == '2' && subfield.value == 'local' } &&
2,227✔
805
    field.any? { |subfield| subfield.code == '5' && subfield.value == 'NjP' }
112✔
806
end
807

808
def siku_heading?(field)
1✔
809
  any_thesaurus_match? field, %w[sk skbb]
464✔
810
end
811

812
def any_thesaurus_match?(field, thesauri)
1✔
813
  field.any? { |subfield| subfield.code == '2' && thesauri.include?(subfield.value) }
4,989✔
814
end
815

816
# The regular expression /99[0-9]+6421/ ensures that an mms id is present in a $w
817
def valid_linked_fields(record, field_tag, accumulator)
1✔
818
  fields = record.fields(field_tag).select { |f| f["w"] =~ /99[0-9]+6421/ }
295✔
819
  fields.each do |field|
284✔
820
    accumulator << field["w"] if field["w"].start_with?("99") && field["w"].end_with?("06421")
7✔
821
  end
822
end
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2025 Coveralls, Inc