• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

pulibrary / bibdata / 373ad6ff-fad2-405a-ab26-3d30fb5ceecf

24 Dec 2024 08:24PM UTC coverage: 91.938% (+0.08%) from 91.859%
373ad6ff-fad2-405a-ab26-3d30fb5ceecf

Pull #2563

circleci

maxkadel
Put attaching xml files in their own batch
Pull Request #2563: I2321 Shift SCSB full index tasks into separate background jobs

152 of 156 new or added lines in 10 files covered. (97.44%)

65 existing lines in 17 files now uncovered.

3478 of 3783 relevant lines covered (91.94%)

366.14 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

92.79
/marc_to_solr/lib/princeton_marc.rb
1
# encoding: UTF-8
2
require 'active_support'
1✔
3
require 'library_stdnums'
1✔
4
require 'lightly'
1✔
5
require 'uri'
1✔
6
require_relative 'cache_adapter'
1✔
7
require_relative 'cache_manager'
1✔
8
require_relative 'cache_map'
1✔
9
require_relative 'composite_cache_map'
1✔
10
require_relative 'electronic_access_link'
1✔
11
require_relative 'electronic_access_link_factory'
1✔
12
require_relative 'hierarchical_heading'
1✔
13
require_relative 'iiif_manifest_url_builder'
1✔
14
require_relative 'linked_fields_extractor'
1✔
15
require_relative 'orangelight_url_builder'
1✔
16
require_relative 'process_holdings_helpers'
1✔
17

18
module MARC
1✔
19
  class Record
1✔
20
    # Taken from pul-store marc.rb lib extension
21
    # Shamelessly lifted from SolrMARC, with a few changes; no doubt there will
22
    # be more.
23
    @@THREE_OR_FOUR_DIGITS = /^(20|19|18|17|16|15|14|13|12|11|10|9|8|7|6|5|4|3|2|1)(\d{2})\.?$/
1✔
24
    @@FOUR_DIGIT_PATTERN_BRACES = /^\[([12]\d{3})\??\]\.?$/
1✔
25
    @@FOUR_DIGIT_PATTERN_ONE_BRACE = /^\[(20|19|18|17|16|15|14|13|12|11|10)(\d{2})/
1✔
26
    @@FOUR_DIGIT_PATTERN_OTHER_1 = /^l(\d{3})/
1✔
27
    @@FOUR_DIGIT_PATTERN_OTHER_2 = /^\[(20|19|18|17|16|15|14|13|12|11|10)\](\d{2})/
1✔
28
    @@FOUR_DIGIT_PATTERN_OTHER_3 = /^\[?(20|19|18|17|16|15|14|13|12|11|10)(\d)[^\d]\]?/
1✔
29
    @@FOUR_DIGIT_PATTERN_OTHER_4 = /i\.e\.\,? (20|19|18|17|16|15|14|13|12|11|10)(\d{2})/
1✔
30
    @@FOUR_DIGIT_PATTERN_OTHER_5 = /^\[?(\d{2})\-\-\??\]?/
1✔
31
    @@BC_DATE_PATTERN = /[0-9]+ [Bb]\.?[Cc]\.?/
1✔
32
    def best_date
1✔
33
      date = nil
×
34
      if self['260']
×
35
        if self['260']['c']
×
UNCOV
36
          field_260c = self['260']['c']
×
37
          case field_260c
×
38
          when @@THREE_OR_FOUR_DIGITS
39
            date = "#{$1}#{$2}"
×
40
          when @@FOUR_DIGIT_PATTERN_BRACES
41
            date = $1
×
42
          when @@FOUR_DIGIT_PATTERN_ONE_BRACE
43
            date = $1
×
44
          when @@FOUR_DIGIT_PATTERN_OTHER_1
45
            date = "1#{$1}"
×
46
          when @@FOUR_DIGIT_PATTERN_OTHER_2
47
            date = "#{$1}#{$2}"
×
48
          when @@FOUR_DIGIT_PATTERN_OTHER_3
49
            date = "#{$1}#{$2}0"
×
50
          when @@FOUR_DIGIT_PATTERN_OTHER_4
51
            date = "#{$1}#{$2}"
×
52
          when @@FOUR_DIGIT_PATTERN_OTHER_5
53
            date = "#{$1}00"
×
54
          when @@BC_DATE_PATTERN
UNCOV
55
            date = nil
×
56
          end
57
        end
58
      end
UNCOV
59
      date ||= self.date_from_008
×
60
    end
61

62
    def date_from_008
1✔
63
      if self['008']
330✔
64
        d = self['008'].value[7, 4]
242✔
65
        d = d.gsub 'u', '0' unless d == 'uuuu'
242✔
66
        d = d.gsub ' ', '0' unless d == '    '
242✔
67
        d if /^[0-9]{4}$/.match?(d)
242✔
68
      end
69
    end
70

71
    def end_date_from_008
1✔
72
      if self['008']
172✔
73
        d = self['008'].value[11, 4]
128✔
74
        d = d.gsub 'u', '9' unless d == 'uuuu'
128✔
75
        d = d.gsub ' ', '9' unless d == '    '
128✔
76
        d if /^[0-9]{4}$/.match?(d)
128✔
77
      end
78
    end
79

80
    def date_display
1✔
UNCOV
81
      date = nil
×
UNCOV
82
      if self['260']
×
UNCOV
83
        date = self['260']['c'] if self['260']['c']
×
84
      end
UNCOV
85
      date ||= self.date_from_008
×
86
    end
87
  end
88
end
89

90
FALLBACK_STANDARD_NO = 'Other standard number'
1✔
91
def map_024_indicators_to_labels i
1✔
92
  case i
14✔
UNCOV
93
  when '0' then 'International Standard Recording Code'
×
94
  when '1' then 'Universal Product Code'
2✔
UNCOV
95
  when '2' then 'International Standard Music Number'
×
96
  when '3' then 'International Article Number'
4✔
97
  when '4' then 'Serial Item and Contribution Identifier'
2✔
98
  when '7' then '$2'
3✔
99
  else FALLBACK_STANDARD_NO
3✔
100
  end
101
end
102

103
def indicator_label_246 i
1✔
104
  case i
×
105
  when '0' then 'Portion of title'
×
106
  when '1' then 'Parallel title'
×
107
  when '2' then 'Distinctive title'
×
108
  when '3' then 'Other title'
×
UNCOV
109
  when '4' then 'Cover title'
×
UNCOV
110
  when '5' then 'Added title page title'
×
UNCOV
111
  when '6' then 'Caption title'
×
UNCOV
112
  when '7' then 'Running title'
×
UNCOV
113
  when '8' then 'Spine title'
×
114
  end
115
end
116

117
def subfield_specified_hash_key subfield_value, fallback
1✔
118
  key = subfield_value.capitalize.gsub(/[[:punct:]]?$/, '')
2✔
119
  key.empty? ? fallback : key
2✔
120
end
121

122
def standard_no_hash record
1✔
123
  standard_no = {}
165✔
124
  Traject::MarcExtractor.cached('024').collect_matching_lines(record) do |field, _spec, _extractor|
165✔
125
    standard_label = map_024_indicators_to_labels(field.indicator1)
14✔
126
    standard_number = nil
14✔
127
    field.subfields.each do |s_field|
14✔
128
      standard_number = s_field.value if s_field.code == 'a'
18✔
129
      standard_label = subfield_specified_hash_key(s_field.value, FALLBACK_STANDARD_NO) if (s_field.code == '2') && (standard_label == '$2')
18✔
130
    end
131
    standard_label = FALLBACK_STANDARD_NO if standard_label == '$2'
14✔
132
    standard_no[standard_label] ? standard_no[standard_label] << standard_number : standard_no[standard_label] = [standard_number] unless standard_number.nil?
14✔
133
  end
134
  standard_no
165✔
135
end
136

137
# Handles ISBNs, ISSNs, and OCLCs
138
# ISBN: 020a, 020z, 776z
139
# ISSN: 022a, 022l, 022y, 022z, 776x
140
# OCLC: 035a, 776w, 787w
141
# BIB: 776w, 787w (adds BIB prefix so Blacklight can detect whether to search id field)
142
def other_versions record
1✔
143
  linked_nums = []
165✔
144
  Traject::MarcExtractor.cached('020az:022alyz:035a:776wxz:787w').collect_matching_lines(record) do |field, _spec, _extractor|
165✔
145
    field.subfields.each do |s_field|
517✔
146
      linked_nums << StdNum::ISBN.normalize(s_field.value) if (field.tag == "020") || ((field.tag == "776") && (s_field.code == 'z'))
869✔
147
      linked_nums << StdNum::ISSN.normalize(s_field.value) if (field.tag == "022") || ((field.tag == "776") && (s_field.code == 'x'))
869✔
148
      linked_nums << oclc_normalize(s_field.value, prefix: true) if (field.tag == "035") && oclc_number?(s_field.value)
869✔
149
      if ((field.tag == "776") && (s_field.code == 'w')) || ((field.tag == "787") && (s_field.code == 'w'))
869✔
150
        linked_nums << oclc_normalize(s_field.value, prefix: true) if oclc_number?(s_field.value)
16✔
151
        linked_nums << "BIB" + strip_non_numeric(s_field.value) unless s_field.value.include?('(')
16✔
152
        logger.error "#{record['001']} - linked field formatting: #{s_field.value}" if s_field.value.include?('(') && !s_field.value.start_with?('(')
16✔
153
      end
154
    end
155
  end
156
  linked_nums.compact.uniq
165✔
157
end
158

159
# only includes values before $t
160
def process_names record
1✔
161
  Traject::MarcExtractor.cached('100aqbcdk:110abcdfgkln:111abcdfgklnpq:700aqbcdk:710abcdfgkln:711abcdfgklnpq').collect_matching_lines(record) do |field, spec, extractor|
166✔
162
    name = extractor.collect_subfields(field, spec).first
293✔
163
    unless name.nil?
293✔
164
      remove = ''
289✔
165
      after_t = false
289✔
166
      field.subfields.each do |s_field|
289✔
167
        remove << " #{s_field.value}" if after_t && spec.includes_subfield_code?(s_field.code)
978✔
168
        after_t = true if s_field.code == 't'
978✔
169
      end
170
      name = name.chomp(remove)
289✔
171
      Traject::Macros::Marc21.trim_punctuation(name)
289✔
172
    end
173
  end.compact.uniq
174
end
175

176
# only includes values before $t
177
def process_alt_script_names record
1✔
178
  names = []
166✔
179
  Traject::MarcExtractor.cached('100aqbcdk:110abcdfgkln:111abcdfgklnpq:700aqbcdk:710abcdfgkln:711abcdfgklnpq').collect_matching_lines(record) do |field, spec, extractor|
166✔
180
    next unless field.tag == '880'
293✔
181
    name = extractor.collect_subfields(field, spec).first
67✔
182
    unless name.nil?
67✔
183
      remove = ''
67✔
184
      after_t = false
67✔
185
      field.subfields.each do |s_field|
67✔
186
        remove << " #{s_field.value}" if after_t && spec.includes_subfield_code?(s_field.code)
202✔
187
        after_t = true if s_field.code == 't'
202✔
188
      end
189
      name = name.chomp(remove)
67✔
190
      names << Traject::Macros::Marc21.trim_punctuation(name)
67✔
191
    end
192
  end
193
  names.uniq
166✔
194
end
195

196
##
197
# Get hash of authors grouped by role
198
# @param [MARC::Record]
199
# @return [Hash]
200
def process_author_roles record
1✔
201
  author_roles = {
202
    'TRL' => 'translators',
166✔
203
    'EDT' => 'editors',
204
    'COM' => 'compilers',
205
    'TRANSLATOR' => 'translators',
206
    'EDITOR' => 'editors',
207
    'COMPILER' => 'compilers'
208
  }
209

210
  names = {}
166✔
211
  names['secondary_authors'] = []
166✔
212
  names['translators'] = []
166✔
213
  names['editors'] = []
166✔
214
  names['compilers'] = []
166✔
215

216
  Traject::MarcExtractor.cached('100a:110a:111a:700a:710a:711a').collect_matching_lines(record) do |field, spec, extractor|
166✔
217
    name = extractor.collect_subfields(field, spec).first
298✔
218
    unless name.nil?
298✔
219
      name = Traject::Macros::Marc21.trim_punctuation(name)
294✔
220

221
      # If name is from 1xx field, it is the primary author.
222
      if /1../.match?(field.tag)
294✔
223
        names['primary_author'] = name
78✔
224
      else
225
        relator = ""
216✔
226
        field.subfields.each do |s_field|
216✔
227
          # relator code (subfield 4)
228
          if s_field.code == '4'
698✔
229
            relator = s_field.value.upcase.gsub(/[[:punct:]]?$/, '')
5✔
230
          # relator term (subfield e)
231
          elsif s_field.code == 'e'
693✔
232
            relator = s_field.value.upcase.gsub(/[[:punct:]]?$/, '')
122✔
233
          end
234
        end
235

236
        # Set role from relator value.
237
        role = author_roles[relator] || 'secondary_authors'
216✔
238
        names[role] << name
216✔
239
      end
240
    end
241
  end
242
  names
166✔
243
end
244

245
##
246
# Process publication information for citations.
247
# @param [MARC::Record]
248
# @return [Array] pub info strings from fields 260 and 264.
249
def set_pub_citation(record)
1✔
250
  Traject::MarcExtractor.cached('260:264').collect_matching_lines(record) do |field, _spec, _extractor|
168✔
251
    a_pub_info = nil
161✔
252
    b_pub_info = nil
161✔
253
    pub_info = ""
161✔
254
    field.subfields.each do |s_field|
161✔
255
      a_pub_info = Traject::Macros::Marc21.trim_punctuation(s_field.value).strip if s_field.code == 'a'
483✔
256
      b_pub_info = Traject::Macros::Marc21.trim_punctuation(s_field.value).strip if s_field.code == 'b'
483✔
257
    end
258

259
    # Build publication info string and add to citation array.
260
    pub_info += a_pub_info unless a_pub_info.nil?
161✔
261
    pub_info += ": " if !a_pub_info.nil? && !b_pub_info.nil?
161✔
262
    pub_info += b_pub_info unless b_pub_info.nil?
161✔
263
    pub_info if !pub_info.empty?
161✔
264
  end.compact
265
end
266

267
SEPARATOR = '—'
1✔
268

269
# for the hierarchical subject/genre display
270
# split with em dash along t,v,x,y,z
271
# optionally pass a block to only allow fields that match certain criteria
272
# For example, if you only want subject headings from the Bilindex vocabulary,
273
# you could use `process_hierarchy(record, '650|*7|abcvxyz') { |field| field['2'] == 'bidex' }`
274
def process_hierarchy(record, fields)
1✔
275
  split_on_subfield = ['t', 'v', 'x', 'y', 'z']
2,300✔
276
  Traject::MarcExtractor.cached(fields).collect_matching_lines(record) do |field, spec, extractor|
2,300✔
277
    include_heading = block_given? ? yield(field) : true
2,354✔
278
    next unless include_heading && extractor.collect_subfields(field, spec).first
2,354✔
279
    HierarchicalHeading.new(field:, spec:, split_on_subfield:).to_s
1,255✔
280
  end.compact
281
end
282

283
# for the split subject facet
284
# split with em dash along x,z
285
def process_subject_topic_facet record
1✔
286
  lcsh_subjects = Traject::MarcExtractor.cached('600|*0|abcdfklmnopqrtxz:610|*0|abfklmnoprstxz:611|*0|abcdefgklnpqstxz:630|*0|adfgklmnoprstxz:650|*0|abcxz:651|*0|axz').collect_matching_lines(record) do |field, spec, extractor|
165✔
287
    subject = extractor.collect_subfields(field, spec).first
265✔
288
    unless subject.nil?
265✔
289
      hierarchical_string = HierarchicalHeading.new(field:, spec:, split_on_subfield: %w[x z]).to_s
265✔
290
      hierarchical_string.split(SEPARATOR)
265✔
291
    end
292
  end.compact
293
  other_thesaurus_subjects = Traject::MarcExtractor.cached('650|*7|abcxz').collect_matching_lines(record) do |field, spec, extractor|
165✔
294
    subject = extractor.collect_subfields(field, spec).first
170✔
295
    should_include = siku_heading?(field) || local_heading?(field) || any_thesaurus_match?(field, %w[homoit])
170✔
296
    if should_include && !subject.nil?
170✔
297
      hierarchical_string = HierarchicalHeading.new(field:, spec:, split_on_subfield: %w[x z]).to_s
49✔
298
      hierarchical_string.split(SEPARATOR)
49✔
299
    end
300
  end.flatten.compact
301
  lcsh_subjects + other_thesaurus_subjects
165✔
302
end
303

304
def strip_non_numeric num_str
1✔
305
  num_str.gsub(/\D/, '').to_i.to_s
336✔
306
end
307

308
def oclc_number? oclc
1✔
309
  # Strip spaces and dashes
310
  clean_oclc = oclc.gsub(/[\-\s]/, '')
910✔
311
  # Ensure it follows the OCLC standard
312
  # (see https://help.oclc.org/Metadata_Services/WorldShare_Collection_Manager/Data_sync_collections/Prepare_your_data/30035_field_and_OCLC_control_numbers)
313
  clean_oclc.match(/\(OCoLC\)(ocn|ocm|on)*\d+/) != nil
910✔
314
end
315

316
def oclc_normalize oclc, opts = { prefix: false }
1✔
317
  oclc_num = strip_non_numeric(oclc)
333✔
318
  if opts[:prefix] == true
333✔
319
    case oclc_num.length
200✔
320
    when 1..8
321
      "ocm" + "%08d" % oclc_num
69✔
322
    when 9
323
      "ocn" + oclc_num
65✔
324
    else
325
      "on" + oclc_num
66✔
326
    end
327
  else
328
    oclc_num
133✔
329
  end
330
end
331

332
# Construct (or retrieve) the cache manager service
333
# @return [CacheManager] the cache manager service
334
def build_cache_manager(figgy_dir_path:)
1✔
335
  return @cache_manager unless @cache_manager.nil?
17✔
336

337
  figgy_lightly = Lightly.new(dir: figgy_dir_path, life: 0, hash: false)
3✔
338
  figgy_cache_adapter = CacheAdapter.new(service: figgy_lightly)
3✔
339

340
  CacheManager.initialize(figgy_cache: figgy_cache_adapter, logger:)
3✔
341

342
  @cache_manager = CacheManager.current
3✔
343
end
344

345
# returns hash of links ($u) (key),
346
# anchor text ($y, $3, hostname), and additional labels ($z) (array value)
347
# @param [MARC::Record] the MARC record being parsed
348
# @return [Hash] the values used to construct the links
349
def electronic_access_links(record, figgy_dir_path)
1✔
350
  solr_field_values = {}
182✔
351
  holding_856s = {}
182✔
352
  iiif_manifest_paths = {}
182✔
353

354
  output = []
182✔
355
  iiif_manifest_links = []
182✔
356
  fragment_index = 0
182✔
357

358
  Traject::MarcExtractor.cached('856').collect_matching_lines(record) do |field, _spec, _extractor|
182✔
359
    anchor_text = false
51✔
360
    z_label = false
51✔
361
    url_key = false
51✔
362
    holding_id = nil
51✔
363
    bib_id = record['001']
51✔
364

365
    electronic_access_link = ElectronicAccessLinkFactory.build bib_id: bib_id, marc_field: field
51✔
366

367
    # If the electronic access link is an ARK...
368
    if electronic_access_link.ark
51✔
369
      # ...and attempt to build an Orangelight URL from the (cached) mappings exposed by the repositories
370
      cache_manager = build_cache_manager(figgy_dir_path:)
17✔
371

372
      # Orangelight links
373
      catalog_url_builder = OrangelightUrlBuilder.new(ark_cache: cache_manager.ark_cache, fragment: fragment_value(fragment_index))
17✔
374
      orangelight_url = catalog_url_builder.build(url: electronic_access_link.ark)
17✔
375

376
      if orangelight_url
17✔
377
        # Index this by the domain for Orangelight
378
        anchor_text = electronic_access_link.anchor_text
6✔
379
        anchor_text = 'Digital content' if electronic_access_link.url&.host == electronic_access_link.anchor_text
6✔
380
        orangelight_link = electronic_access_link.clone url_key: orangelight_url.to_s, anchor_text: anchor_text
6✔
381
        # Only add the link to the current page if it resolves to a resource with a IIIF Manifest
382
        output << orangelight_link
6✔
383
      else
384
        # Otherwise, always add the link to the resource
385
        output << electronic_access_link
11✔
386
      end
387

388
      # Figgy URL's
389
      figgy_url_builder = IIIFManifestUrlBuilder.new(ark_cache: cache_manager.figgy_ark_cache, service_host: 'figgy.princeton.edu')
17✔
390
      figgy_iiif_manifest = figgy_url_builder.build(url: electronic_access_link.ark)
17✔
391
      if figgy_iiif_manifest
17✔
392
        figgy_iiif_manifest_link = electronic_access_link.clone url_key: figgy_iiif_manifest.to_s
6✔
393
        iiif_manifest_paths[electronic_access_link.url_key] = figgy_iiif_manifest_link.url.to_s
6✔
394
      end
395

396
    else
397
      # Always add links to the resource if it isn't an ARK
398
      output << electronic_access_link
34✔
399
    end
400

401
    output.each do |link|
51✔
402
      if link.holding_id
62✔
403
        holding_856s[link.holding_id] = { link.url_key => link.url_labels }
1✔
404
      elsif link.url_key && link.url_labels
61✔
405
        solr_field_values[link.url_key] = link.url_labels
57✔
406
      end
407
    end
408
    fragment_index += 1
51✔
409
  end
410

411
  solr_field_values['holding_record_856s'] = holding_856s unless holding_856s == {}
182✔
412
  solr_field_values['iiif_manifest_paths'] = iiif_manifest_paths unless iiif_manifest_paths.empty?
182✔
413
  solr_field_values
182✔
414
end
415

416
def fragment_value(fragment_index)
1✔
417
  if fragment_index == 0
17✔
418
    'view'
12✔
419
  else
420
    "view_#{fragment_index}"
5✔
421
  end
422
end
423

424
def remove_parens_035 standard_no
1✔
425
  standard_no.gsub(/^\(.*?\)/, '')
332✔
426
end
427

428
def everything_after_t record, fields
1✔
429
  values = []
662✔
430
  Traject::MarcExtractor.cached(fields).collect_matching_lines(record) do |field, _spec, _extractor|
662✔
431
    after_t = false
297✔
432
    title = []
297✔
433
    field.subfields.each do |s_field|
297✔
434
      title << s_field.value if after_t
991✔
435
      if s_field.code == 't'
991✔
436
        title << s_field.value
19✔
437
        after_t = true
19✔
438
      end
439
    end
440
    values << Traject::Macros::Marc21.trim_punctuation(title.join(' ')) unless title.empty?
297✔
441
  end
442
  values
662✔
443
end
444

445
def everything_after_t_alt_script record, fields
1✔
446
  values = []
331✔
447
  Traject::MarcExtractor.cached(fields).collect_matching_lines(record) do |field, _spec, _extractor|
331✔
448
    next unless field.tag == '880'
298✔
449
    after_t = false
67✔
450
    title = []
67✔
451
    field.subfields.each do |s_field|
67✔
452
      title << s_field.value if after_t
202✔
453
      if s_field.code == 't'
202✔
454
        title << s_field.value
3✔
455
        after_t = true
3✔
456
      end
457
    end
458
    values << Traject::Macros::Marc21.trim_punctuation(title.join(' ')) unless title.empty?
67✔
459
  end
460
  values
331✔
461
end
462

463
def everything_through_t record, fields
1✔
464
  values = []
166✔
465
  Traject::MarcExtractor.cached(fields).collect_matching_lines(record) do |field, _spec, _extractor|
166✔
466
    non_t = true
3✔
467
    title = []
3✔
468
    field.subfields.each do |s_field|
3✔
469
      title << s_field.value
6✔
470
      if s_field.code == 't'
6✔
471
        non_t = false
2✔
472
        break
2✔
473
      end
474
    end
475
    values << Traject::Macros::Marc21.trim_punctuation(title.join(' ')) unless (title.empty? || non_t)
3✔
476
  end
477
  values
166✔
478
end
479

480
##
481
# @param record [MARC::Record]
482
# @param fields [String] MARC fields of interest
483
# @return [Array] of name-titles each in an [Array], each element [String] split by hierarchy,
484
# both name ($a) and title ($t) are required
485
def prep_name_title record, fields
1✔
486
  values = []
494✔
487
  Traject::MarcExtractor.cached(fields).collect_matching_lines(record) do |field, spec, _extractor|
494✔
488
    name_title = []
128✔
489
    author = []
128✔
490
    non_a = true
128✔
491
    non_t = true
128✔
492
    field.subfields.each do |s_field|
128✔
493
      next if (!spec.subfields.nil? && !spec.subfields.include?(s_field.code))
438✔
494
      non_a = false if s_field.code == 'a'
197✔
495
      non_t = false if s_field.code == 't'
197✔
496
      if non_t
197✔
497
        author << s_field.value
172✔
498
      else
499
        name_title << s_field.value
25✔
500
      end
501
    end
502
    unless (non_a || non_t)
128✔
503
      name_title.unshift(author.join(' '))
15✔
504
      values << name_title unless name_title.empty?
15✔
505
    end
506
  end
507
  values
494✔
508
end
509

510
# @param fields [Array] with portions of hierarchy
511
# @return [Array] portions of hierarchy including previous elements
512
def expand_sublists_for_hierarchy(fields)
1✔
513
  fields.collect do |field|
186✔
514
    field.collect.with_index do |_v, index|
31✔
515
      Traject::Macros::Marc21.trim_punctuation(field[0..index].join(' '))
86✔
516
    end
517
  end
518
end
519

520
# @param fields [Array] with portions of hierarchy from name-titles or title-only fields
521
# @return [Array] portions of hierarchy including previous elements
522
def join_hierarchy(fields, include_first_element: false)
1✔
523
  if include_first_element == false
186✔
524
    # Exclude the name-only portion of hierarchy
525
    expand_sublists_for_hierarchy(fields).map { |a| a[1..-1] }
52✔
526
  else
527
    # Include full hierarchy
528
    expand_sublists_for_hierarchy(fields)
164✔
529
  end
530
end
531

532
# Removes empty call_number fields from holdings_1display
533
def remove_empty_call_number_fields(holding)
1✔
534
  holding.tap { |h| ["call_number", "call_number_browse"].map { |k| h.delete(k) if h.fetch(k, []).empty? } }
1,316✔
535
end
536

537
# Collects only non empty khi
538
def call_number_khi(field)
1✔
539
  field.subfields.reject { |s| s.value.empty? }.collect { |s| s if ["k", "h", "i"].include?(s.code) }.compact
6,144✔
540
end
541

542
# Alma Princeton item
543
def alma_code_start_22?(code)
1✔
544
  code.to_s.start_with?("22") && code.to_s.end_with?("06421")
5,642✔
545
end
546

547
def alma_code_start_53?(code)
1✔
548
  code.to_s.start_with?("53") && code.to_s.end_with?("06421")
199✔
549
end
550

551
def scsb_code_start?(code)
1✔
552
  code.to_s.start_with?("scsb")
430✔
553
end
554

555
def alma_852(record)
1✔
556
  record.fields('852').select { |f| alma_code_start_22?(f['8']) }
1,199✔
557
end
558

559
def scsb_852(record)
1✔
560
  record.fields('852').select { |f| scsb_code_start?(f['b']) }
448✔
561
end
562

563
def browse_fields(record, khi_key_order: ['k', 'h', 'i'])
1✔
564
  result = []
326✔
565
  fields = if scsb_doc?(record['001']&.value)
326✔
566
             scsb_852(record)
18✔
567
           else
568
             alma_852(record)
308✔
569
           end
570
  fields.each do |field|
326✔
571
    subfields = call_number_khi(field)
724✔
572
    next if subfields.empty?
724✔
573
    values = [field[khi_key_order[0]], field[khi_key_order[1]], field[khi_key_order[2]]].compact.reject(&:empty?)
704✔
574
    result << values.join(" ") if values.present?
704✔
575
  end
576
  result
326✔
577
end
578

579
def alma_876(record)
1✔
580
  record.fields('876').select { |f| alma_code_start_22?(f['0']) }
2,458✔
581
end
582

583
def alma_951_active(record)
1✔
584
  alma_951 = record.fields('951').select { |f| alma_code_start_53?(f['8']) }
525✔
585
  alma_951&.select { |f| f['a'] == "Available" }
513✔
586
end
587

588
def alma_953(record)
1✔
589
  record.fields('953').select { |f| alma_code_start_53?(f['a']) }
186✔
590
end
591

592
def alma_954(record)
1✔
593
  record.fields('954').select { |f| alma_code_start_53?(f['a']) }
173✔
594
end
595

596
def alma_950(record)
1✔
597
  field_950_a = record.fields('950').select { |f| ["true", "false"].include?(f['a']) }
91✔
598
  field_950_a.map { |f| f['b'] }.first if field_950_a.present?
90✔
599
end
600

601
# SCSB item
602
# Keep this check with the alma_code? check
603
# until we make sure that the records in alma are updated
604
def scsb_doc?(record_id)
1✔
605
  /^SCSB-\d+/.match?(record_id)
6,162✔
606
end
607

608
def process_holdings(record)
1✔
609
  all_holdings = {}
164✔
610
  holdings_helpers = ProcessHoldingsHelpers.new(record:)
164✔
611
  holdings_helpers.fields_852_alma_or_scsb.each do |field_852|
164✔
612
    next if holdings_helpers.includes_only_private_scsb_items?(field_852)
363✔
613
    holding_id = holdings_helpers.holding_id(field_852)
360✔
614
    # Calculate the permanent holding
615
    holding = holdings_helpers.build_holding(field_852, permanent: true)
360✔
616
    items_by_holding = holdings_helpers.items_by_852(field_852)
360✔
617
    group_866_867_868_fields = holdings_helpers.group_866_867_868_on_holding_perm_id(holding_id, field_852)
360✔
618
    # if there are items (876 fields)
619
    if items_by_holding.present?
360✔
620
      add_permanent_items_to_holdings(items_by_holding, field_852, holdings_helpers, all_holdings, holding)
337✔
621
      add_temporary_items_to_holdings(items_by_holding, field_852, holdings_helpers, all_holdings)
337✔
622
    else
623
      # if there are no items (876 fields), create the holding by using the 852 field
624
      all_holdings[holding_id] = remove_empty_call_number_fields(holding) unless holding_id.nil? || invalid_location?(holding['location_code'])
23✔
625
    end
626
    all_holdings = holdings_helpers.process_866_867_868_fields(fields: group_866_867_868_fields, all_holdings:, holding_id:) if all_holdings.present? && all_holdings[holding_id]
360✔
627
  end
628
  all_holdings
164✔
629
end
630

631
def add_permanent_items_to_holdings(items_by_holding, field_852, holdings_helpers, all_holdings, holding)
1✔
632
  locations = holdings_helpers.select_permanent_location_876(items_by_holding, field_852)
337✔
633

634
  locations.each do |field_876|
337✔
635
    holding_key = holdings_helpers.holding_id(field_852)
439✔
636
    add_item_to_holding(field_852, field_876, holding_key, holdings_helpers, all_holdings, holding)
439✔
637
  end
638
end
639

640
def add_temporary_items_to_holdings(items_by_holding, field_852, holdings_helpers, all_holdings)
1✔
641
  locations = holdings_helpers.select_temporary_location_876(items_by_holding, field_852)
337✔
642

643
  locations.each do |field_876|
337✔
644
    next if holdings_helpers.includes_only_private_scsb_items?(field_852)
378✔
645

646
    if holdings_helpers.current_location_code(field_876) == 'RES_SHARE$IN_RS_REQ'
378✔
647
      holding = holdings_helpers.build_holding(field_852, permanent: true)
2✔
648
      holding_key = holdings_helpers.holding_id(field_852)
2✔
649
    else
650
      holding = holdings_helpers.build_holding(field_852, field_876, permanent: false)
376✔
651
      holding_key = holdings_helpers.current_location_code(field_876)
376✔
652
    end
653
    holding['temp_location_code'] = holdings_helpers.current_location_code(field_876)
378✔
654
    add_item_to_holding(field_852, field_876, holding_key, holdings_helpers, all_holdings, holding)
378✔
655
  end
656
end
657

658
# rubocop:disable Metrics/ParameterLists
659
def add_item_to_holding(field_852, field_876, holding_key, holdings_helpers, all_holdings, holding)
1✔
660
  item = holdings_helpers.build_item(field_852:, field_876:)
817✔
661
  if holding_key.present? || !invalid_location?(holding['location_code'])
817✔
662
    all_holdings[holding_key] = remove_empty_call_number_fields(holding) if all_holdings[holding_key].nil?
453✔
663
  end
664
  all_holdings = holdings_helpers.holding_items(value: holding_key, all_holdings:, item:)
817✔
665
end
666
# rubocop:enable Metrics/ParameterLists
667

668
def invalid_location?(code)
1✔
669
  Traject::TranslationMap.new("locations")[code].nil?
387✔
670
end
671

672
def process_recap_notes record
1✔
673
  item_notes = []
327✔
674
  partner_lib = nil
327✔
675
  Traject::MarcExtractor.cached('852').collect_matching_lines(record) do |field, _spec, _extractor|
327✔
676
    is_scsb = scsb_doc?(record['001'].value) && field['0']
773✔
677
    next unless is_scsb
773✔
678
    field.subfields.each do |s_field|
431✔
679
      if s_field.code == 'b'
1,338✔
680
        partner_lib = s_field.value # ||= Traject::TranslationMap.new("locations", :default => "__passthrough__")[s_field.value]
431✔
681
      end
682
    end
683
  end
684
  Traject::MarcExtractor.cached('87603ahjptxz').collect_matching_lines(record) do |field, _spec, _extractor|
327✔
685
    is_scsb = scsb_doc?(record['001'].value) && field['0']
1,639✔
686
    next unless is_scsb
1,639✔
687
    col_group = ''
471✔
688
    field.subfields.each do |s_field|
471✔
689
      if s_field.code == 'x'
4,273✔
690
        if s_field.value == 'Shared'
471✔
691
          col_group = 'S'
6✔
692
        elsif s_field.value == 'Private'
465✔
693
          col_group = 'P'
5✔
694
        elsif s_field.value == 'Committed'
460✔
695
          col_group = 'C'
2✔
696
        elsif s_field.value == 'Uncommittable'
458✔
697
          col_group = 'U'
2✔
698
        else
699
          col_group = 'O'
456✔
700
        end
701
      end
702
    end
703
    if partner_lib == 'scsbnypl'
471✔
704
      partner_display_string = 'N'
452✔
705
    elsif partner_lib == 'scsbcul'
19✔
706
      partner_display_string = 'C'
2✔
707
    elsif partner_lib == "scsbhl"
17✔
708
      partner_display_string = "H"
17✔
709
    end
710
    item_notes << "#{partner_display_string} - #{col_group}"
471✔
711
  end
712
  item_notes
327✔
713
end
714

715
def local_heading?(field)
1✔
716
  field.any? { |subfield| subfield.code == '2' && subfield.value == 'local' } &&
2,439✔
717
    field.any? { |subfield| subfield.code == '5' && subfield.value == 'NjP' }
112✔
718
end
719

720
def siku_heading?(field)
1✔
721
  any_thesaurus_match? field, %w[sk skbb]
506✔
722
end
723

724
def any_thesaurus_match?(field, thesauri)
1✔
725
  field.any? { |subfield| subfield.code == '2' && thesauri.include?(subfield.value) }
5,698✔
726
end
727

728
def valid_linked_fields(record, field_tag, accumulator)
1✔
729
  accumulator.concat LinkedFieldsExtractor.new(record, field_tag).mms_ids
330✔
730
end
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2025 Coveralls, Inc