• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

pulibrary / marc_cleanup / ae93ed24-4bfb-4d6c-9bc2-33443bb2a0cb

04 Dec 2023 09:56PM UTC coverage: 18.733% (+3.9%) from 14.788%
ae93ed24-4bfb-4d6c-9bc2-33443bb2a0cb

Pull #78

circleci

mzelesky
write tests for yaml generator
Pull Request #78: Variable field validator

103 of 106 new or added lines in 4 files covered. (97.17%)

1 existing line in 1 file now uncovered.

408 of 2178 relevant lines covered (18.73%)

0.65 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

17.74
/lib/marc_cleanup/record_level.rb
1
module MarcCleanup
1✔
2
  def non_repeatable_fields
1✔
3
    %w[
UNCOV
4
      001
×
5
      003
6
      005
7
      008
8
      010
9
      018
10
      036
11
      038
12
      040
13
      042
14
      043
15
      044
16
      045
17
      066
18
      100
19
      110
20
      111
21
      130
22
      240
23
      243
24
      245
25
      254
26
      256
27
      263
28
      306
29
      310
30
      357
31
      384
32
      507
33
      514
34
      841
35
      842
36
      844
37
      882
38
    ]
39
  end
40

41
  def non_repeatable_field_errors?(record:, schema: RECORD_SCHEMA)
1✔
42
    field_count = record.fields.group_by(&:tag).map { |key, value| { tag: key, count: value.size } }
47✔
43
    nr_fields = field_count.select do |field|
22✔
44
      field[:count] > 1 &&
25✔
45
      schema[field[:tag]] &&
46
      schema[field[:tag]]['repeat'] == false
47
    end
48
    !nr_fields.empty?
22✔
49
  end
50

51
  def invalid_tag?(record)
1✔
52
    record.tags.find { |x| x =~ /[^0-9]/ } ? true : false
×
53
  end
54

55
  def bad_utf8?(record)
1✔
56
    record.to_s.scrub != record.to_s
×
57
  end
58

59
  def bad_utf8_identify(record)
1✔
60
    record.fields.each do |field|
×
61
      field_index = record.fields.index(field)
×
62
      if field.class == MARC::DataField
×
63
        field.subfields.each do |subfield|
×
64
          subfield_index = record.fields[field_index].subfields.index(subfield)
×
65
          record.fields[field_index].subfields[subfield_index].value.scrub! { |bytes| 'â–‘' + bytes.unpack('H*')[0] + 'â–‘' }.force_encoding('UTF-8')
×
66
        end
67
      else
68
        record.fields[field_index].value.scrub! { |bytes| 'â–‘' + bytes.unpack('H*')[0] + 'â–‘' }.force_encoding('UTF-8')
×
69
      end
70
    end
71
    record
×
72
  end
73

74
  def tab_newline_char?(record)
1✔
75
    pattern = /[\x09\n\r]/
×
76
    record.fields.each do |field|
×
77
      if field.class == MARC::DataField
×
78
        field.subfields.each do |subfield|
×
79
          return true if subfield.value =~ pattern
×
80
        end
81
      elsif field.value =~ pattern
×
82
        return true
×
83
      end
84
    end
85
    false
×
86
  end
87

88
  def invalid_xml_identify(record)
1✔
89
    pattern = /[\u0000-\u0008\u000B\u000C\u000E-\u001C\u007F-\u0084\u0086-\u009F\uFDD0-\uFDEF\uFFFE\uFFFF]/
×
90
    0.upto(record.fields.size - 1) do |field_num|
×
91
      next unless record.fields[field_num].to_s =~ pattern
×
92

93
      if record.fields[field_num].class == MARC::DataField
×
94
        0.upto(record.fields[field_num].subfields.size - 1) do |subf_num|
×
95
          next if record.fields[field_num].subfields[subf_num].value.nil?
×
96

97
          record.fields[field_num].subfields[subf_num].value.gsub!(pattern, 'â–‘\1â–‘')
×
98
        end
99
      else
100
        record.fields[field_num].value.gsub!(pattern, 'â–‘\1â–‘') unless record.fields[field_num].value.nil?
×
101
      end
102
    end
103
    record
×
104
  end
105

106
  def invalid_xml_chars?(record)
1✔
107
    pattern = /[\u0000-\u0008\u000B\u000C\u000E-\u001C\u007F-\u0084\u0086-\u009F\uFDD0-\uFDEF\uFFFE\uFFFF]/
×
108
    record.to_s =~ pattern ? true : false
×
109
  end
110

111
  def combining_chars_identify(record)
1✔
112
    pattern = /([^\p{L}\p{M}]\p{M}+)/
×
113
    0.upto(record.fields.size - 1) do |field_num|
×
114
      if record.fields[field_num].class == MARC::DataField
×
115
        0.upto(record.fields[field_num].subfields.size - 1) do |subf_num|
×
116
          record.fields[field_num].subfields[subf_num].value.gsub!(pattern, 'â–‘\1â–‘')
×
117
        end
118
      else
119
        record.fields[field_num].value.gsub!(pattern, 'â–‘\1â–‘')
×
120
      end
121
    end
122
    record
×
123
  end
124

125
  def combining_char_errors?(record)
1✔
126
    pattern = /[^\p{L}\p{M}]\p{M}+/
×
127
    record.to_s =~ pattern ? true : false
×
128
  end
129

130
  def invalid_chars?(record)
1✔
131
    good_chars = CHARSET
×
132
    record.fields.each do |field|
×
133
      if field.class == MARC::DataField
×
134
        field.subfields.each do |subfield|
×
135
          next if subfield.value.nil?
×
136

137
          subfield.value.each_char do |c|
×
138
            return true unless good_chars.include?(c.ord)
×
139
          end
140
        end
141
      else
142
        field.value.each_char do |c|
×
143
          return true unless good_chars.include?(c.ord)
×
144
        end
145
      end
146
    end
147
    false
×
148
  end
149

150
  def invalid_chars_identify(record)
1✔
151
    good_chars = CHARSET
×
152
    0.upto(record.fields.size - 1) do |field_num|
×
153
      if record.fields[field_num].class == MARC::DataField
×
154
        0.upto(record.fields[field_num].subfields.size - 1) do |subf_num|
×
155
          next if record.fields[field_num].subfields[subf_num].value.nil?
×
156

157
          temp_value = ''
×
158
          record.fields[field_num].subfields[subf_num].value.each_char do |c|
×
159
            good_chars.include?(c.ord) ? (temp_value << c) : (temp_value << 'â–‘' + c + 'â–‘')
×
160
          end
161
          record.fields[field_num].subfields[subf_num].value = temp_value
×
162
        end
163
      elsif record.fields[field_num].value
×
164
        temp_value = ''
×
165
        record.fields[field_num].value.each_char do |c|
×
166
          good_chars.include?(c.ord) ? (temp_value << c) : (temp_value << 'â–‘' + c + 'â–‘')
×
167
        end
168
        record.fields[field_num].value = temp_value
×
169
      end
170
    end
171
    record
×
172
  end
173

174
  def composed_chars_errors?(record)
1✔
175
    record.fields.each do |field|
×
176
      if field.class == MARC::DataField
×
177
        field.subfields.each do |subfield|
×
178
          subfield.value.each_codepoint do |c|
×
179
            next unless c < 1570 || (7680..10_792).cover?(c)
×
180

181
            return true unless c.chr(Encoding::UTF_8).unicode_normalized?(:nfd)
×
182
          end
183
          if subfield.value =~ /^.*[\u0653\u0654\u0655].*$/
×
184
            return true unless subfield.value.unicode_normalized?(:nfc)
×
185
          end
186
        end
187
      else
188
        field.value.each_codepoint do |c|
×
189
          next unless c < 1570 || (7680..10_792).cover?(c)
×
190

191
          return true unless c.chr(Encoding::UTF_8).unicode_normalized?(:nfd)
×
192
        end
193
        if field.value =~ /^.*[\u0653\u0654\u0655].*$/
×
194
          return true unless field.value.unicode_normalized?(:nfc)
×
195
        end
196
      end
197
    end
198
    false
×
199
  end
200

201
  ### Count fields in a record; set :subfields to True to drill down to subfields
202
  def field_count(record, opts = {})
1✔
203
    results = {}
×
204
    if opts[:subfields]
×
205
      record.fields.each do |field|
×
206
        tag = field.tag.scrub('')
×
207
        case tag
×
208
        when /^00/
209
          results[tag] = 0 unless results[tag]
×
210
          results[tag] += 1
×
211
        else
212
          field.subfields.each do |subfield|
×
213
            key = tag + subfield.code.to_s.scrub('')
×
214
            results[key] = 0 unless results[key]
×
215
            results[key] += 1
×
216
          end
217
        end
218
      end
219
    else
220
      record.fields.each do |field|
×
221
        tag = field.tag.scrub('')
×
222
        results[tag] = 0 unless results[tag]
×
223
        results[tag] += 1
×
224
      end
225
    end
226
    results
×
227
  end
228

229
  def extra_space_gsub(string)
1✔
230
    string.gsub!(/([[:blank:]]){2,}/, '\1')
×
231
    string.gsub!(/^(.*)[[:blank:]]+$/, '\1')
×
232
    string.gsub(/^[[:blank:]]+(.*)$/, '\1')
×
233
  end
234

235
  ### Remove extra spaces from all fields that are not positionally defined
236
  def extra_space_fix(record)
1✔
237
    record.fields.each do |field|
×
238
      next unless field.class == MARC::DataField && field.tag != '010'
×
239

240
      field_index = record.fields.index(field)
×
241
      curr_subfield = -1
×
242
      case field.tag
×
243
      when /^[1-469]..|0[2-9].|01[1-9]|7[0-5].|5[0-24-9].|53[0-24-9]/
244
        field.subfields.each do |subfield|
×
245
          curr_subfield += 1
×
246
          next if subfield.value.nil?
×
247

248
          record.fields[field_index].subfields[curr_subfield].value = extra_space_gsub(subfield.value)
×
249
        end
250
      when '533'
251
        field.subfields.each do |subfield|
×
252
          curr_subfield += 1
×
253
          next if subfield.code == '7' || subfield.value.nil?
×
254

255
          record.fields[field_index].subfields[curr_subfield].value = extra_space_gsub(subfield.value)
×
256
        end
257
      when /^7[6-8]./
258
        field.subfields.each do |subfield|
×
259
          curr_subfield += 1
×
260
          next if subfield.code =~ /[^a-v3-8]/ || subfield.value.nil?
×
261

262
          record.fields[field_index].subfields[curr_subfield].value = extra_space_gsub(subfield.value)
×
263
        end
264
      when /^8../
265
        field.subfields.each do |subfield|
×
266
          curr_subfield += 1
×
267
          next if %w[w 7].include?(subfield.code) || subfield.value.nil?
×
268

269
          record.fields[field_index].subfields[curr_subfield].value = extra_space_gsub(subfield.value)
×
270
        end
271
      end
272
    end
273
    record
×
274
  end
275

276
  ### Scrub invalid UTF-8 byte sequences within field values,
277
  #     replacing with nothing; indicators, subfield codes, and tags must be
278
  #     handled separately
279
  def bad_utf8_fix(record)
1✔
280
    record.fields.each do |field|
×
281
      field_index = record.fields.index(field)
×
282
      if field.class == MARC::DataField
×
283
        field.subfields.each do |subfield|
×
284
          subfield_index = record.fields[field_index].subfields.index(subfield)
×
285
          record.fields[field_index].subfields[subfield_index].value.scrub!('').force_encoding('UTF-8')
×
286
        end
287
      else
288
        record.fields[field_index].value.scrub!('').force_encoding('UTF-8')
×
289
      end
290
    end
291
    record
×
292
  end
293

294
  ### Replace invalid XML 1.0 characters with a space
295
  def invalid_xml_fix(record)
1✔
296
    bad_xml_range = /[\u0000-\u0008\u000B\u000C\u000E-\u001C\u007F-\u0084\u0086-\u009F\uFDD0-\uFDEF\uFFFE\uFFFF]/
×
297
    record.leader.gsub!(bad_xml_range, ' ')
×
298
    record.fields.each do |field|
×
299
      field_index = record.fields.index(field)
×
300
      if field.class == MARC::DataField
×
301
        curr_subfield = 0
×
302
        final_subfield = field.subfields.length
×
303
        while curr_subfield < final_subfield
×
304
          record.fields[field_index].subfields[curr_subfield].value.gsub!(bad_xml_range, ' ')
×
305
          curr_subfield += 1
×
306
        end
307
      else
308
        record.fields[field_index].value.gsub!(bad_xml_range, ' ')
×
309
      end
310
    end
311
    record
×
312
  end
313

314
  ### Replace tab and newline characters with a space
315
  def tab_newline_fix(record)
1✔
316
    regex = /[\x09\n\r]/
×
317
    record.leader.gsub!(regex, ' ')
×
318
    record.fields.each do |field|
×
319
      field_index = record.fields.index(field)
×
320
      if field.class == MARC::DataField
×
321
        curr_subfield = 0
×
322
        final_subfield = field.subfields.length
×
323
        field.indicator1.gsub!(regex, ' ') if field.indicator1
×
324
        field.indicator2.gsub!(regex, ' ') if field.indicator2
×
325
        while curr_subfield < final_subfield
×
326
          record.fields[field_index].subfields[curr_subfield].value.gsub!(regex, ' ')
×
327
          curr_subfield += 1
×
328
        end
329
      else
330
        record.fields[field_index].value.gsub!(regex, ' ')
×
331
      end
332
    end
333
    record
×
334
  end
335

336
  ## Can delete fields based on tags alone, or with
337
  ## optional indicator values provided in arrays
338
  def field_delete(tags, record, indicators = {})
1✔
339
    if indicators.empty?
×
340
      record.fields.delete_if { |field| tags.include? field.tag }
×
341
    else
342
      ind_1 = indicators[:ind_1]
×
343
      ind_2 = indicators[:ind_2]
×
344
      if ind_1 && ind_2
×
345
        record.fields.delete_if { |field| (tags.include? field.tag) && (ind_1.include? field.indicator1) && (ind_2.include? field.indicator2) }
×
346
      elsif ind_1
×
347
        record.fields.delete_if { |field| (tags.include? field.tag) && (ind_1.include? field.indicator1) }
×
348
      else
349
        record.fields.delete_if { |field| (tags.include? field.tag) && (ind_2.include? field.indicator2) }
×
350
      end
351
    end
352
    record
×
353
  end
354

355
  def recap_fixes(record)
1✔
356
    record = bad_utf8_fix(record)
×
357
    record = field_delete(['959'], record)
×
358
    record = field_delete(['856'], record)
×
359
    record = leaderfix(record)
×
360
    record = extra_space_fix(record)
×
361
    record = invalid_xml_fix(record)
×
362
    record = composed_chars_normalize(record)
×
363
    record = tab_newline_fix(record)
×
364
    empty_subfield_fix(record)
×
365
  end
366

367
  ### Remove field from a record that matches a string version of the field
368
  ###   generated by ruby-marc (ex.: '655  0 $a Electronic books')
369
  def remove_field(field_string, record)
1✔
370
    orig_string = field_string.dup
×
371
    orig_string.scrub!('')
×
372
    orig_string.strip!
×
373
    field_tag = orig_string[0..2]
×
374
    target_fields = record.fields(field_tag)
×
375
    return record if target_fields.empty?
×
376

377
    target_fields.each do |field|
×
378
      string = field.to_s
×
379
      string.scrub!('')
×
380
      string.strip!
×
381
      next unless string == orig_string
×
382

383
      field_index = record.fields.index(field)
×
384
      record.fields.delete_at(field_index)
×
385
    end
386
    record
×
387
  end
388

389
  ### Replace field from a record that matches a string version of the field
390
  ###   with the supplied field object, which can be either a ControlField
391
  ###   or a DataField
392
  def replace_field(field_string:, replacement_field:, record:)
1✔
393
    orig_string = field_string.dup
×
394
    orig_string.scrub!('')
×
395
    orig_string.strip!
×
396
    field_tag = orig_string[0..2]
×
397
    target_fields = record.fields(field_tag)
×
398
    return record if target_fields.empty?
×
399

400
    target_fields.each do |field|
×
401
      string = field.to_s
×
402
      string.scrub!('')
×
403
      string.strip!
×
404
      next unless string == orig_string
×
405

406
      field_index = record.fields.index(field)
×
407
      record.fields[field_index] = replacement_field
×
408
    end
409
    record
×
410
  end
411

412
  ### Perform multiple field replacements on a record;
413
  ###   input is a hash with field strings as keys
414
  ###   and replacement fields as values
415
  def replace_fields(field_hash:, record:)
1✔
416
    field_hash.each do |field_string, replacement_field|
×
417
      record = replace_field(field_string: field_string,
×
418
                             replacement_field: replacement_field,
419
                             record: record)
420
    end
421
    record
×
422
  end
423

424
  ### Default field sort: sort fixed fields numerically, then sort the rest
425
  ###   in groups, leaving the order of fields within the group alone
426
  def field_sort(record)
1✔
427
    new_rec = MARC::Record.new
×
428
    new_rec.leader = record.leader
×
429
    record.fields('001'..'009').sort_by { |field| field.tag }.each do |field|
×
430
      new_rec.append(field)
×
431
    end
432
    record.fields('010'..'099').each do |field|
×
433
      new_rec.append(field)
×
434
    end
435
    record.fields('100'..'199').each do |field|
×
436
      new_rec.append(field)
×
437
    end
438
    record.fields('200'..'299').each do |field|
×
439
      new_rec.append(field)
×
440
    end
441
    record.fields('300'..'399').each do |field|
×
442
      new_rec.append(field)
×
443
    end
444
    record.fields('400'..'499').each do |field|
×
445
      new_rec.append(field)
×
446
    end
447
    record.fields('500'..'599').each do |field|
×
448
      new_rec.append(field)
×
449
    end
450
    record.fields('600'..'699').each do |field|
×
451
      new_rec.append(field)
×
452
    end
453
    record.fields('700'..'799').each do |field|
×
454
      new_rec.append(field)
×
455
    end
456
    record.fields('800'..'899').each do |field|
×
457
      new_rec.append(field)
×
458
    end
459
    record.fields('900'..'999').each do |field|
×
460
      new_rec.append(field)
×
461
    end
462
    new_rec
×
463
  end
464

465
  def remove_duplicate_fields(record)
1✔
466
    field_array = []
1✔
467
    record.fields.reverse_each do |field|
1✔
468
      field_index = record.fields.index(field)
3✔
469
      string = field.to_s
3✔
470
      if field_array.include?(string)
3✔
471
        record.fields.delete_at(field_index)
1✔
472
      else
473
        field_array << string
2✔
474
      end
475
    end
476
    record
1✔
477
  end
478

479
  ### Duplicate record to preserve original when making modifications
480
  def duplicate_record(record)
1✔
481
    raw_marc = ''
×
482
    writer = MARC::Writer.new(StringIO.new(raw_marc, 'w'))
×
483
    writer.write(record)
×
484
    writer.close
×
485
    reader = MARC::Reader.new(StringIO.new(raw_marc, 'r'),
×
486
                              external_encoding: 'UTF-8',
487
                              invalid: :replace,
488
                              replace: '')
489
    reader.first
×
490
  end
491

492
  def blvl_ab_valid?(record)
1✔
493
    record['773'] ? true : false
×
494
  end
495

496
  def ftype_ac_cdm_valid?(record)
1✔
497
    present_fields1 = record.fields(
×
498
      %w[
499
        020
500
        024
501
        027
502
        088
503
        100
504
        110
505
        111
506
        300
507
        533
508
        700
509
        710
510
        711
511
        800
512
        810
513
        811
514
        830
515
      ]
516
    )
517
    present_fields2 = record.fields(%w[260 264 533])
×
518
    return false if present_fields1.empty?
×
519
    return false if present_fields2.empty?
×
520

521
    f1_criteria = false
×
522
    present_fields1.each do |field|
×
523
      f1_criteria = true if field['a']
×
524
    end
525
    present_fields2.each do |field|
×
526
      case field.tag
×
527
      when '260'
528
        return true if field['a'] || field['b']
×
529
      when '264'
530
        return true if field['b']
×
531
      when '533'
532
        return true if field['c']
×
533
      end
534
    end
535
    false
×
536
  end
537

538
  def ftype_ac_is_valid?(record)
1✔
539
    present_fields = record.fields(%w[260 264 533])
×
540
    return false if present_fields.empty?
×
541

542
    present_fields.each do |field|
×
543
      case field.tag
×
544
      when '260'
545
        return true if field['a'] || field['b']
×
546
      when '264'
547
        return true if field['b']
×
548
      when '533'
549
        return true if field['c']
×
550
      end
551
    end
552
    false
×
553
  end
554

555
  def ftype_dt_cdm_valid?(record)
1✔
556
    present_fields = record.fields(
×
557
      %w[
558
        020
559
        024
560
        027
561
        028
562
        088
563
        100
564
        110
565
        111
566
        300
567
        533
568
        700
569
        710
570
        711
571
        800
572
        810
573
        811
574
        830
575
      ]
576
    )
577
    return false if present_fields.empty?
×
578

579
    present_fields.each do |field|
×
580
      case field.tag
×
581
      when '300'
582
        return true if field['a'] || field['f']
×
583
      when '533'
584
        return true if field['e']
×
585
      else
586
        return true if field['a']
×
587
      end
588
    end
589
    false
×
590
  end
591

592
  def ftype_e_cdims_valid?(record)
1✔
593
    present_fields1 = record.fields(%w[007 300 338])
×
594
    present_fields2 = record.fields(%w[260 264 533])
×
595
    return false if present_fields1.empty?
×
596
    return false if present_fields2.empty?
×
597

598
    f1_criteria = false
×
599
    present_fields1.each do |field|
×
600
      case field.tag
×
601
      when '007'
602
        f1_criteria = true if %w[a d r].include? field.value[0]
×
603
      when '300'
604
        f1_criteria = true if field['a']
×
605
      when '338'
606
        f1_criteria = true if field['a'] || field['b']
×
607
      when 533
608
        f1_criteria = true if field['e']
×
609
      end
610
    end
611
    return false unless f1_criteria
×
612

613
    present_fields2.each do |field|
×
614
      case field.tag
×
615
      when '260'
616
        return true if field['a'] || field['b']
×
617
      when '264'
618
        return true if field['b']
×
619
      when '533'
620
        return true if field['c']
×
621
      end
622
    end
623
    false
×
624
  end
625

626
  def ftype_f_cdm_valid?(record)
1✔
627
    present_fields = record.fields(
×
628
      %w[
629
        007
630
        300
631
        338
632
        533
633
      ]
634
    )
635
    return false if present_fields.empty?
×
636

637
    present_fields.each do |field|
×
638
      case field.tag
×
639
      when '007'
640
        return true if %w[a d r].include? field.value[0]
×
641
      when '300'
642
        return true if field['a'] || field['f']
×
643
      when '338'
644
        return true if field['a'] || field['b']
×
645
      when 533
646
        return true if field['e']
×
647
      end
648
    end
649
    false
×
650
  end
651

652
  def ftype_g_cdm_valid?(record)
1✔
653
    present_fields = record.fields(
×
654
      %w[
655
        007
656
        008
657
        300
658
        338
659
        345
660
        346
661
        538
662
      ]
663
    )
664
    return false if present_fields.empty?
×
665

666
    present_fields.each do |field|
×
667
      case field.tag
×
668
      when '007'
669
        return true if %w[a d r].include? field.value[0]
×
670
      when '008'
671
        return true if %w[g k o r].include?(record.leader[6]) && %w[f m p s t v].include?(field.value[33])
×
672
      when '300'
673
        return true if field['a']
×
674
      when '345'
675
        return true
×
676
      when '346'
677
        return true
×
678
      when '538'
679
        return true if field['a']
×
680
      end
681
    end
682
    false
×
683
  end
684

685
  def ftype_g_is_valid?(record)
1✔
686
    present_fields1 = record.fields(
×
687
      %w[
688
        007
689
        008
690
        300
691
        338
692
        345
693
        346
694
        538
695
      ]
696
    )
697
    present_fields2 = record.fields(%w[260 264 533])
×
698
    return false if present_fields1.empty?
×
699
    return false if present_fields2.empty?
×
700

701
    f1_criteria = false
×
702
    present_fields1.each do |field|
×
703
      case field.tag
×
704
      when '007'
705
        f1_criteria = true if %w[g m v].include? field.value[0]
×
706
      when '008'
707
        f1_criteria = true if %w[g k o r].include?(record.leader[6]) && %w[f m p s t v].include?(field.value[33])
×
708
      when '300'
709
        f1_criteria = true if field['a']
×
710
      when '338'
711
        f1_criteria = true if field['a'] || field['b']
×
712
      when '345'
713
        f1_criteria = true
×
714
      when '346'
715
        f1_criteria = true
×
716
      when 538
717
        f1_criteria = true if field['a']
×
718
      end
719
    end
720
    return false unless f1_criteria
×
721

722
    present_fields2.each do |field|
×
723
      case field.tag
×
724
      when '260'
725
        return true if field['a'] || field['b']
×
726
      when '264'
727
        return true if field['b']
×
728
      when '533'
729
        return true if field['c']
×
730
      end
731
    end
732
    false
×
733
  end
734

735
  def ftype_ij_cdm_valid?(record)
1✔
736
    present_fields = record.fields(
×
737
      %w[
738
        007
739
        300
740
        338
741
        344
742
        538
743
      ]
744
    )
745
    return false if present_fields.empty?
×
746

747
    present_fields.each do |field|
×
748
      case field.tag
×
749
      when '007'
750
        return true if field.value[0] == 's'
×
751
      when '300'
752
        return true if field['a']
×
753
      when '338'
754
        return true if field['a'] || field['b']
×
755
      when '344'
756
        return true
×
757
      when '538'
758
        return true if field['a']
×
759
      end
760
    end
761
    false
×
762
  end
763

764
  def ftype_ij_is_valid?(record)
1✔
765
    present_fields1 = record.fields(
×
766
      %w[
767
        007
768
        300
769
        338
770
        344
771
        538
772
      ]
773
    )
774
    present_fields2 = record.fields(%w[260 264 533])
×
775
    return false if present_fields1.empty?
×
776
    return false if present_fields2.empty?
×
777

778
    f1_criteria = false
×
779
    present_fields1.each do |field|
×
780
      case field.tag
×
781
      when '007'
782
        f1_criteria = true if field.value[0] == 's'
×
783
      when '300'
784
        f1_criteria = true if field['a']
×
785
      when '338'
786
        f1_criteria = true if field['a'] || field['b']
×
787
      when '344'
788
        f1_criteria = true
×
789
      when 538
790
        f1_criteria = true if field['a']
×
791
      end
792
    end
793
    return false unless f1_criteria
×
794

795
    present_fields2.each do |field|
×
796
      case field.tag
×
797
      when '260'
798
        return true if field['a'] || field['b']
×
799
      when '264'
800
        return true if field['b']
×
801
      when '533'
802
        return true if field['c']
×
803
      end
804
    end
805
    false
×
806
  end
807

808
  def ftype_k_cdm_valid?(record)
1✔
809
    present_fields = record.fields(
×
810
      %w[
811
        007
812
        008
813
        300
814
        338
815
      ]
816
    )
817
    return false if present_fields.empty?
×
818

819
    present_fields.each do |field|
×
820
      case field.tag
×
821
      when '007'
822
        return true if field.value[0] == 'k'
×
823
      when '008'
824
        return true if %w[g k o r].include?(record.leader[6]) && %w[a c k l n o p].include?(field.value[33])
×
825
      when '300'
826
        return true if field['a']
×
827
      when '338'
828
        return true if field['a'] || field['b']
×
829
      end
830
    end
831
    false
×
832
  end
833

834
  def ftype_k_is_valid?(record)
1✔
835
    present_fields1 = record.fields(
×
836
      %w[
837
        007
838
        008
839
        300
840
        338
841
      ]
842
    )
843
    present_fields2 = record.fields(%w[260 264 533])
×
844
    return false if present_fields1.empty?
×
845
    return false if present_fields2.empty?
×
846

847
    f1_criteria = false
×
848
    present_fields1.each do |field|
×
849
      case field.tag
×
850
      when '007'
851
        f1_criteria = true if field.value[0] == 'k'
×
852
      when '008'
853
        return true if %w[g k o r].include?(record.leader[6]) && %w[a c k l n o p].include?(field.value[33])
×
854
      when '300'
855
        f1_criteria = true if field['a']
×
856
      when '338'
857
        f1_criteria = true if field['a'] || field['b']
×
858
      end
859
    end
860
    return false unless f1_criteria
×
861

862
    present_fields2.each do |field|
×
863
      case field.tag
×
864
      when '260'
865
        return true if field['a'] || field['b']
×
866
      when '264'
867
        return true if field['b']
×
868
      when '533'
869
        return true if field['c']
×
870
      end
871
    end
872
    false
×
873
  end
874

875
  def ftype_m_cdm_valid?(record)
1✔
876
    present_fields = record.fields(
×
877
      %w[
878
        007
879
        300
880
        338
881
        347
882
        538
883
      ]
884
    )
885
    return false if present_fields.empty?
×
886

887
    present_fields.each do |field|
×
888
      case field.tag
×
889
      when '007'
890
        return true if field.value[0] == 'c'
×
891
      when '300'
892
        return true if field['a']
×
893
      when '338'
894
        return true if field['a'] || field['b']
×
895
      when '347'
896
        return true
×
897
      when '538'
898
        return true if field['a']
×
899
      end
900
    end
901
    false
×
902
  end
903

904
  def ftype_m_is_valid?(record)
1✔
905
    present_fields1 = record.fields(
×
906
      %w[
907
        007
908
        300
909
        338
910
        347
911
        538
912
      ]
913
    )
914
    present_fields2 = record.fields(%w[260 264 533])
×
915
    return false if present_fields1.empty?
×
916
    return false if present_fields2.empty?
×
917

918
    f1_criteria = false
×
919
    present_fields1.each do |field|
×
920
      case field.tag
×
921
      when '007'
922
        f1_criteria = true if field.value[0] == 'c'
×
923
      when '300'
924
        f1_criteria = true if field['a']
×
925
      when '338'
926
        f1_criteria = true if field['a'] || field['b']
×
927
      when '347'
928
        f1_criteria = true
×
929
      when '538'
930
        f1_criteria = true if field['a']
×
931
      end
932
    end
933
    return false unless f1_criteria
×
934

935
    present_fields2.each do |field|
×
936
      case field.tag
×
937
      when '260'
938
        return true if field['a'] || field['b']
×
939
      when '264'
940
        return true if field['b']
×
941
      when '533'
942
        return true if field['c']
×
943
      end
944
    end
945
    false
×
946
  end
947

948
  def ftype_or_cdm_valid?(record)
1✔
949
    present_fields = record.fields(
×
950
      %w[
951
        008
952
        300
953
        338
954
      ]
955
    )
956
    return false if present_fields.empty?
×
957

958
    present_fields.each do |field|
×
959
      case field.tag
×
960
      when '008'
961
        return true if %w[g k o r].include?(record.leader[6]) && %w[a b c d g q r w].include?(field.value[33])
×
962
      when '300'
963
        return true if field['a']
×
964
      when '338'
965
        return true if field['a'] || field['b']
×
966
      end
967
    end
968
    false
×
969
  end
970

971
  def ftype_or_is_valid?(record)
1✔
972
    present_fields1 = record.fields(
×
973
      %w[
974
        008
975
        300
976
        338
977
      ]
978
    )
979
    present_fields2 = record.fields(%w[260 264 533])
×
980
    return false if present_fields1.empty?
×
981
    return false if present_fields2.empty?
×
982

983
    f1_criteria = false
×
984
    present_fields1.each do |field|
×
985
      case field.tag
×
986
      when '008'
987
        return true if %w[g k o r].include?(record.leader[6]) && %w[a b c d g q r w].include?(field.value[33])
×
988
      when '300'
989
        f1_criteria = true if field['a']
×
990
      when '338'
991
        f1_criteria = true if field['a'] || field['b']
×
992
      end
993
    end
994
    return false unless f1_criteria
×
995

996
    present_fields2.each do |field|
×
997
      case field.tag
×
998
      when '260'
999
        return true if field['a'] || field['b']
×
1000
      when '264'
1001
        return true if field['b']
×
1002
      when '533'
1003
        return true if field['c']
×
1004
      end
1005
    end
1006
    false
×
1007
  end
1008

1009
  def ftype_p_cd_valid?(record)
1✔
1010
    present_fields = record.fields(
×
1011
      %w[
1012
        100
1013
        110
1014
        111
1015
        300
1016
        338
1017
        700
1018
        710
1019
        711
1020
      ]
1021
    )
1022
    return false if present_fields.empty?
×
1023

1024
    present_fields.each do |field|
×
1025
      case field.tag
×
1026
      when '300'
1027
        return true if field['a'] || field['f']
×
1028
      when '338'
1029
        return true if field['a'] || field['b']
×
1030
      else
1031
        return true if field['a']
×
1032
      end
1033
    end
1034
    false
×
1035
  end
1036

1037
  def sparse_record?(record)
1✔
1038
    type = record.leader[6]
×
1039
    blvl = record.leader[7]
×
1040
    form = bib_form(record)
×
1041
    return true unless %w[\  a b c d f o q r s].include?(form)
×
1042

1043
    f245 = record['245']
×
1044
    return true unless f245 && (f245['a'] || f245['k'])
×
1045
    return true unless record['008']
×
1046

1047
    valid =
1048
      if %w[a b].include?(blvl)
×
1049
        blvl_ab_valid?(record)
×
1050
      elsif %w[a c].include?(type) && %w[c d m].include?(blvl)
×
1051
        ftype_ac_cdm_valid?(record)
×
1052
      elsif %w[a c].include?(type) && %w[i s].include?(blvl)
×
1053
        ftype_ac_is_valid?(record)
×
1054
      elsif %w[d t].include?(type) && %w[c d m].include?(blvl)
×
1055
        ftype_dt_cdm_valid?(record)
×
1056
      elsif %w[e].include?(type) && %w[c d i m s].include?(blvl)
×
1057
        ftype_e_cdims_valid?(record)
×
1058
      elsif %w[f].include?(type) && %w[c d m].include?(blvl)
×
1059
        ftype_f_cdm_valid?(record)
×
1060
      elsif %w[g].include?(type) && %w[c d m].include?(blvl)
×
1061
        ftype_g_cdm_valid?(record)
×
1062
      elsif %w[g].include?(type) && %w[i s].include?(blvl)
×
1063
        ftype_g_is_valid?(record)
×
1064
      elsif %w[i j].include?(type) && %w[c d m].include?(blvl)
×
1065
        ftype_ij_cdm_valid?(record)
×
1066
      elsif %w[i j].include?(type) && %w[i s].include?(blvl)
×
1067
        ftype_ij_is_valid?(record)
×
1068
      elsif %w[k].include?(type) && %w[c d m].include?(blvl)
×
1069
        ftype_k_cdm_valid?(record)
×
1070
      elsif %w[k].include?(type) && %w[i s].include?(blvl)
×
1071
        ftype_k_is_valid?(record)
×
1072
      elsif %w[m].include?(type) && %w[c d m].include?(blvl)
×
1073
        ftype_m_cdm_valid?(record)
×
1074
      elsif %w[m].include?(type) && %w[i s].include?(blvl)
×
1075
        ftype_m_is_valid?(record)
×
1076
      elsif %w[o r].include?(type) && %w[c d m].include?(blvl)
×
1077
        ftype_or_cdm_valid?(record)
×
1078
      elsif %w[o r].include?(type) && %w[i s].include?(blvl)
×
1079
        ftype_or_is_valid?(record)
×
1080
      elsif %w[p].include?(type) && %w[c d].include?(blvl)
×
1081
        ftype_p_cd_valid?(record)
×
1082
      else
1083
        true
×
1084
      end
1085
    valid ? false : true
×
1086
  end
1087

1088
  ### `schema` is a YAML file loaded as a hash;
1089
  ### schema = YAML.load_file("#{ROOT_DIR}/lib/marc_cleanup/variable_field_schema.yml")
1090
  def validate_marc(record:, schema: RECORD_SCHEMA)
1✔
1091
    hash = {}
21✔
1092
    hash[:multiple_1xx] = multiple_1xx?(record)
21✔
1093
    hash[:has_130_240] = has_130_240?(record)
21✔
1094
    hash[:multiple_no_245] = multiple_no_245?(record)
21✔
1095
    hash[:non_repeatable_field_errors] = non_repeatable_field_errors?(record: record, schema: schema)
21✔
1096
    hash[:invalid_tags] = record.fields.select do |field|
21✔
1097
      field.class == MARC::DataField &&
24✔
1098
      field.tag[0] != '9' &&
1099
      !schema.keys.include?(field.tag)
1100
    end.map { |f| f.tag }
2✔
1101
    hash[:invalid_fields] = {}
21✔
1102
    record.fields('010'..'899').each do |field|
21✔
1103
      next unless schema[field.tag]
24✔
1104

1105
      field_num = record.fields(field.tag).index { |f| field }
44✔
1106
      field_num += 1
22✔
1107
      tag = field.tag
22✔
1108
      if field.tag == '880'
22✔
1109
        linked_field = field.subfields.select { |s| s.code == '6' }
18✔
1110
        if linked_field.empty?
6✔
1111
          error = "No field linkage in instance #{field_num} of 880"
2✔
1112
          hash[:invalid_fields][field.tag] ||= []
2✔
1113
          hash[:invalid_fields][field.tag] << error
2✔
1114
        elsif linked_field.size > 1
4✔
1115
          error = "Multiple field links in instance #{field_num} of 880"
2✔
1116
          hash[:invalid_fields][field.tag] ||= []
2✔
1117
          hash[:invalid_fields][field.tag] << error
2✔
1118
        elsif field['6'] !~ /^[0-9]{3}-[0-9]+/
2✔
NEW
1119
          error = "Invalid field linkage in instance #{field_num} of 880"
×
NEW
1120
          hash[:invalid_fields][field.tag] ||= []
×
NEW
1121
          hash[:invalid_fields][field.tag] << error
×
1122
        else
1123
          tag = field['6'].gsub(/^([0-9]{3})-.*$/, '\1')
2✔
1124
        end
1125
      end
1126
      unless schema[tag]['ind1'].include?(field.indicator1.to_s)
22✔
1127
        error = "Invalid indicator1 value #{field.indicator1.to_s} in instance #{field_num}"
8✔
1128
        hash[:invalid_fields][field.tag] ||= []
8✔
1129
        hash[:invalid_fields][field.tag] << error
8✔
1130
      end
1131
      unless schema[tag]['ind2'].include?(field.indicator2.to_s)
22✔
1132
        error = "Invalid indicator2 value #{field.indicator2.to_s} in instance #{field_num}"
4✔
1133
        hash[:invalid_fields][field.tag] ||= []
4✔
1134
        hash[:invalid_fields][field.tag] << error
4✔
1135
      end
1136
      subf_hash = {}
22✔
1137
      field.subfields.each do |subfield|
22✔
1138
        subf_hash[subfield.code] ||= 0
30✔
1139
        subf_hash[subfield.code] += 1
30✔
1140
      end
1141
      subf_hash.each do |code, count|
22✔
1142
        if schema[tag]['subfields'][code].nil?
26✔
1143
          hash[:invalid_fields][field.tag] ||= []
8✔
1144
          hash[:invalid_fields][field.tag] << "Invalid subfield code #{code} in instance #{field_num}"
8✔
1145
        elsif schema[tag]['subfields'][code]['repeat'] == false && count > 1
18✔
1146
          hash[:invalid_fields][field.tag] ||= []
4✔
1147
          hash[:invalid_fields][field.tag] << "Non-repeatable subfield code #{code} repeated in instance #{field_num}"
4✔
1148
        end
1149
      end
1150
    end
1151
    hash
21✔
1152
  end
1153
end
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2025 Coveralls, Inc