• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

pulibrary / marc_cleanup / 35e02db8-a93b-4afa-949d-66881f236507

31 Mar 2025 06:07PM UTC coverage: 98.898% (+0.7%) from 98.229%
35e02db8-a93b-4afa-949d-66881f236507

Pull #179

circleci

mzelesky
test and refactor composed_chars_errors?
Pull Request #179: [WIP] test and refactor composed_chars_errors?

36 of 36 new or added lines in 1 file covered. (100.0%)

14 existing lines in 1 file now uncovered.

2064 of 2087 relevant lines covered (98.9%)

4.6 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

95.94
/lib/marc_cleanup/record_level.rb
1
# frozen_string_literal: true
2

3
module MarcCleanup
1✔
4
  def non_repeatable_field_errors?(record:, schema: RECORD_SCHEMA)
1✔
5
    field_count = record.fields.group_by(&:tag).map { |key, value| { tag: key, count: value.size } }
53✔
6
    nr_fields = field_count.select do |field|
25✔
7
      field[:count] > 1 &&
28✔
8
      schema[field[:tag]] &&
9
      schema[field[:tag]]['repeat'] == false
10
    end
11
    !nr_fields.empty?
25✔
12
  end
13

14
  def bad_utf8?(record)
1✔
15
    record.to_s.scrub != record.to_s
2✔
16
  end
17

18
  def bad_utf8_scrub_datafield(field)
1✔
19
    new_field = MARC::DataField.new(field.tag,
1✔
20
                                    field.indicator1,
21
                                    field.indicator2)
22
    field.subfields.each do |subfield|
1✔
23
      new_value = bad_utf8_scrub_value(subfield.value)
1✔
24
      new_subfield = MARC::Subfield.new(subfield.code, new_value)
1✔
25
      new_field.append(new_subfield)
1✔
26
    end
27
    new_field
1✔
28
  end
29

30
  def bad_utf8_scrub_value(string)
1✔
31
    string.scrub('').force_encoding('UTF-8')
2✔
32
  end
33

34
  ### Scrub invalid UTF-8 byte sequences within field values,
35
  #     replacing with nothing; indicators, subfield codes, and tags must be
36
  #     handled separately
37
  def bad_utf8_scrub(record)
1✔
38
    record.fields.each_with_index do |field, field_index|
2✔
39
      if field.instance_of?(MARC::DataField)
2✔
40
        record.fields[field_index] = bad_utf8_scrub_datafield(field)
1✔
41
      else
42
        record.fields[field_index].value = bad_utf8_scrub_value(field.value)
1✔
43
      end
44
    end
45
    record
2✔
46
  end
47

48
  def bad_utf8_identify_value(string)
1✔
49
    string.scrub { |bytes| "â–‘#{bytes.unpack1('H*')}â–‘" }
6✔
50
          .force_encoding('UTF-8')
51
  end
52

53
  def bad_utf8_identify_controlfield(field)
1✔
54
    new_value = bad_utf8_identify_value(field.value)
1✔
55
    MARC::ControlField.new(field.tag, new_value)
1✔
56
  end
57

58
  def bad_utf8_identify_datafield(field)
1✔
59
    new_field = MARC::DataField.new(field.tag)
1✔
60
    new_field.indicator1 = field.indicator1
1✔
61
    new_field.indicator2 = field.indicator2
1✔
62
    field.subfields.each do |subfield|
1✔
63
      new_value = bad_utf8_identify_value(subfield.value)
1✔
64
      new_field.append(MARC::Subfield.new(subfield.code, new_value))
1✔
65
    end
66
    new_field
1✔
67
  end
68

69
  def bad_utf8_identify(record)
1✔
70
    record.fields.each_with_index do |field, field_index|
2✔
71
      record.fields[field_index] = if field.instance_of?(MARC::DataField)
2✔
72
                                     bad_utf8_identify_datafield(field)
1✔
73
                                   else
74
                                     bad_utf8_identify_controlfield(field)
1✔
75
                                   end
76
    end
77
    record
2✔
78
  end
79

80
  def tab_newline_char?(record)
1✔
81
    pattern = /[\x09\n\r]/
5✔
82
    return true if record.leader =~ pattern
5✔
83

84
    record.fields.any? do |field|
4✔
85
      field.to_s =~ pattern
4✔
86
    end
87
  end
88

89
  def invalid_xml_identify_value(string)
1✔
90
    regex = /[\u0000-\u0008\u000B\u000C\u000E-\u001C\u007F-\u0084\u0086-\u009F\uFDD0-\uFDEF\uFFFE\uFFFF]/
5✔
91
    new_string = ''.dup
5✔
92
    string.chars.each do |char|
5✔
93
      new_string << if char =~ regex
55✔
94
                      "â–‘#{char}â–‘"
5✔
95
                    else
96
                      char
50✔
97
                    end
98
    end
99
    new_string
5✔
100
  end
101

102
  def invalid_xml_identify_datafield(field)
1✔
103
    regex = /[\u0000-\u0008\u000B\u000C\u000E-\u001C\u007F-\u0084\u0086-\u009F\uFDD0-\uFDEF\uFFFE\uFFFF]/
3✔
104
    new_field = MARC::DataField.new(field.tag)
3✔
105
    new_field.indicator1 = field.indicator1.gsub(regex, 'â–‘')
3✔
106
    new_field.indicator2 = field.indicator2.gsub(regex, 'â–‘')
3✔
107
    field.subfields.each do |subfield|
3✔
108
      new_value = invalid_xml_identify_value(subfield.value)
3✔
109
      new_field.append(MARC::Subfield.new(subfield.code, new_value))
3✔
110
    end
111
    new_field
3✔
112
  end
113

114
  def invalid_xml_identify_controlfield(field)
1✔
115
    new_value = invalid_xml_identify_value(field.value)
2✔
116
    MARC::ControlField.new(field.tag, new_value)
2✔
117
  end
118

119
  ### Replaces the invalid XML in the Leader and indicators with the special
120
  ###   character, so as not to invalidate the MARC format
121
  def invalid_xml_identify(record)
1✔
122
    regex = /[\u0000-\u0008\u000B\u000C\u000E-\u001C\u007F-\u0084\u0086-\u009F\uFDD0-\uFDEF\uFFFE\uFFFF]/
5✔
123
    record.leader = record.leader.gsub(regex, 'â–‘')
5✔
124
    record.fields.each_with_index do |field, field_index|
5✔
125
      record.fields[field_index] = if field.instance_of?(MARC::DataField)
5✔
126
                                     invalid_xml_identify_datafield(field)
3✔
127
                                   else
128
                                     invalid_xml_identify_controlfield(field)
2✔
129
                                   end
130
    end
131
    record
5✔
132
  end
133

134
  ### Finds characters that are discouraged in the XML 1.1 standard
135
  def invalid_xml_chars?(record)
1✔
136
    regex = /[\u0000-\u0008\u000B\u000C\u000E-\u001C\u007F-\u0084\u0086-\u009F\uFDD0-\uFDEF\uFFFE\uFFFF]/
5✔
137
    record.to_s =~ regex ? true : false
5✔
138
  end
139

140
  def combining_chars_identify(record)
1✔
141
    pattern = /([^\p{L}\p{M}]\p{M}+)/
×
142
    0.upto(record.fields.size - 1) do |field_num|
×
143
      if record.fields[field_num].class == MARC::DataField
×
144
        0.upto(record.fields[field_num].subfields.size - 1) do |subf_num|
×
145
          record.fields[field_num].subfields[subf_num].value.gsub!(pattern, 'â–‘\1â–‘')
×
146
        end
147
      else
148
        record.fields[field_num].value.gsub!(pattern, 'â–‘\1â–‘')
×
149
      end
150
    end
151
    record
×
152
  end
153

154
  def combining_char_errors?(record)
1✔
155
    pattern = /[^\p{L}\p{M}]\p{M}+/
×
156
    record.to_s =~ pattern ? true : false
×
157
  end
158

159
  def composed_chars_string_error?(string)
1✔
160
    if string =~ /[\u{0653}\u{0654}\u{0655}]/ && !string.unicode_normalized?(:nfc)
5✔
161
      true
2✔
162
    else
163
      string.codepoints.any? do |codepoint|
3✔
164
        codepoint < 1570 || (7680..10_792).cover?(codepoint) &&
4✔
165
          !codepoint.chr(Encoding::UTF_8).unicode_normalized?(:nfd)
166
      end
167
    end
168
  end
169

170
  def composed_chars_errors?(record)
1✔
171
    record.fields.each do |field|
5✔
172
      if field.instance_of?(MARC::DataField)
5✔
173
        return true if field.subfields.any? do |subfield|
3✔
174
          composed_chars_string_error?(subfield.value)
3✔
175
        end
176
      elsif composed_chars_string_error?(field.value)
2✔
177
        return true
2✔
178
      end
179
    end
180
    false
1✔
181
  end
182

183
  ### Count fields in a record; set :subfields to True to drill down to subfields
184
  def field_count(record, opts = {})
1✔
185
    results = {}
2✔
186
    if opts[:subfields]
2✔
187
      record.fields.each do |field|
1✔
188
        tag = field.tag.scrub('')
3✔
189
        case tag
3✔
190
        when /^00/
191
          results[tag] = 0 unless results[tag]
1✔
192
          results[tag] += 1
1✔
193
        else
194
          field.subfields.each do |subfield|
2✔
195
            key = tag + subfield.code.to_s.scrub('')
4✔
196
            results[key] = 0 unless results[key]
4✔
197
            results[key] += 1
4✔
198
          end
199
        end
200
      end
201
    else
202
      record.fields.each do |field|
1✔
203
        tag = field.tag.scrub('')
3✔
204
        results[tag] = 0 unless results[tag]
3✔
205
        results[tag] += 1
3✔
206
      end
207
    end
208
    results
2✔
209
  end
210

211
  def invalid_xml_fix_datafield(field)
1✔
212
    regex = /[\u0000-\u0008\u000B\u000C\u000E-\u001C\u007F-\u0084\u0086-\u009F\uFDD0-\uFDEF\uFFFE\uFFFF]/
3✔
213
    new_field = MARC::DataField.new(field.tag)
3✔
214
    new_field.indicator1 = field.indicator1.gsub(regex, ' ')
3✔
215
    new_field.indicator2 = field.indicator2.gsub(regex, ' ')
3✔
216
    field.subfields.each do |subfield|
3✔
217
      new_value = subfield.value.gsub(regex, ' ')
3✔
218
      new_field.append(MARC::Subfield.new(subfield.code, new_value))
3✔
219
    end
220
    new_field
3✔
221
  end
222

223
  def invalid_xml_fix_controlfield(field)
1✔
224
    regex = /[\u0000-\u0008\u000B\u000C\u000E-\u001C\u007F-\u0084\u0086-\u009F\uFDD0-\uFDEF\uFFFE\uFFFF]/
2✔
225
    MARC::ControlField.new(field.tag, field.value.gsub(regex, ' '))
2✔
226
  end
227

228
  ### Replace invalid XML 1.0 characters with a space
229
  def invalid_xml_fix(record)
1✔
230
    regex = /[\u0000-\u0008\u000B\u000C\u000E-\u001C\u007F-\u0084\u0086-\u009F\uFDD0-\uFDEF\uFFFE\uFFFF]/
5✔
231
    record.leader = record.leader.gsub(regex, ' ')
5✔
232
    record.fields.each_with_index do |field, field_index|
5✔
233
      record.fields[field_index] = if field.instance_of?(MARC::DataField)
5✔
234
                                     invalid_xml_fix_datafield(field)
3✔
235
                                   else
236
                                      invalid_xml_fix_controlfield(field)
2✔
237
                                   end
238
    end
239
    record
5✔
240
  end
241

242
  def tab_newline_fix_datafield(field)
1✔
243
    regex = /[\u0009\n\r]/
3✔
244
    new_field = MARC::DataField.new(field.tag)
3✔
245
    new_field.indicator1 = field.indicator1.gsub(regex, ' ')
3✔
246
    new_field.indicator2 = field.indicator2.gsub(regex, ' ')
3✔
247
    field.subfields.each do |subfield|
3✔
248
      new_value = subfield.value.gsub(regex, ' ')
3✔
249
      new_field.append(MARC::Subfield.new(subfield.code, new_value))
3✔
250
    end
251
    new_field
3✔
252
  end
253

254
  def tab_newline_fix_controlfield(field)
1✔
255
    regex = /[\u0009\n\r]/
2✔
256
    MARC::ControlField.new(field.tag, field.value.gsub(regex, ' '))
2✔
257
  end
258

259
  ### Replace tab and newline characters with a space
260
  def tab_newline_fix(record)
1✔
261
    record.leader = record.leader.gsub(/[\u0009\n\r]/, ' ')
5✔
262
    record.fields.each_with_index do |field, field_index|
5✔
263
      record.fields[field_index] = if field.instance_of?(MARC::DataField)
5✔
264
                                     tab_newline_fix_datafield(field)
3✔
265
                                   else
266
                                     tab_newline_fix_controlfield(field)
2✔
267
                                   end
268
    end
269
    record
5✔
270
  end
271

272
  ## Can delete fields based on tags alone, or with
273
  ## optional indicator values provided in arrays
274
  def field_delete_by_tags(record:, tags:, indicators: {})
1✔
275
    full_indicator_array = [' ']
4✔
276
    full_indicator_array += %w[0 1 2 3 4 5 6 7 8 9]
4✔
277
    indicators[:ind1] ||= full_indicator_array
4✔
278
    indicators[:ind2] ||= full_indicator_array
4✔
279
    record.fields.delete_if do |field|
4✔
280
      tags.include?(field.tag) &&
12✔
281
        indicators[:ind1].include?(field.indicator1) &&
282
        indicators[:ind2].include?(field.indicator2)
283
    end
284
    record
4✔
285
  end
286

287
  def recap_fixes(record)
1✔
UNCOV
288
    record = bad_utf8_scrub(record)
×
UNCOV
289
    record = field_delete_by_tags(record: record, tags: %w[959 856])
×
UNCOV
290
    record = leaderfix(record)
×
UNCOV
291
    record = extra_space_fix(record)
×
UNCOV
292
    record = invalid_xml_fix(record)
×
UNCOV
293
    record = composed_chars_normalize(record)
×
UNCOV
294
    record = tab_newline_fix(record)
×
UNCOV
295
    empty_subfield_fix(record)
×
296
  end
297

298
  ### Perform multiple field removals on a record;
299
  ###   input is an array of hashes with the following attributes:
300
  ###   - source_field: ruby-marc field (DataField or ControlField)
301
  ###   - ignore_indicators: optional Boolean to specify whether to ignore
302
  ###     indicators for this replacement
303
  ###   - case_sensitive: optional Boolean to specify whether matching
304
  ###     should be case-sensitive
305
  def remove_fields(field_array:, record:)
1✔
306
    field_array.each do |field|
2✔
307
      field[:ignore_indicators] = false unless field.key?(:ignore_indicators)
4✔
308
      field[:case_sensitive] = true unless field.key?(:case_sensitive)
4✔
309
      record = remove_field(source_field: field[:source_field],
4✔
310
                            record: record,
311
                            ignore_indicators: field[:ignore_indicators],
312
                            case_sensitive: field[:case_sensitive])
313
    end
314
    record
2✔
315
  end
316

317
  ### Remove field from a record that matches a supplied source field
318
  ###   which can be either a ControlField or a DataField
319
  def remove_field(record:, source_field:, ignore_indicators: false, case_sensitive: true)
1✔
320
    start_pos = field_content_start(source_field: source_field,
11✔
321
                                    ignore_indicators: ignore_indicators)
322
    target_fields = replace_field_targets(record: record,
11✔
323
                                          source_field: source_field,
324
                                          start_pos: start_pos,
325
                                          case_sensitive: case_sensitive)
326
    target_fields.each do |field|
11✔
327
      record.fields.delete(field)
6✔
328
    end
329
    record
11✔
330
  end
331

332
  def replace_field_targets(record:, start_pos:, case_sensitive:, source_field:)
1✔
333
    source_field_content = source_field.to_s[start_pos..]
24✔
334
    if case_sensitive
24✔
335
      record.fields(source_field.tag).select do |field|
20✔
336
        field.to_s[start_pos..] == source_field_content
20✔
337
      end
338
    else
339
      record.fields(source_field.tag).select do |field|
4✔
340
        field.to_s[start_pos..].casecmp?(source_field_content)
4✔
341
      end
342
    end
343
  end
344

345
  ### Replace field from a record that matches a supplied source field
346
  ###   with the supplied replacement field, which can be either a ControlField
347
  ###   or a DataField
348
  def replace_field(source_field:, replacement_field:, record:, ignore_indicators: false, case_sensitive: true)
1✔
349
    start_pos = field_content_start(source_field: source_field,
13✔
350
                                    ignore_indicators: ignore_indicators)
351
    target_fields = replace_field_targets(record: record,
13✔
352
                                          source_field: source_field,
353
                                          start_pos: start_pos,
354
                                          case_sensitive: case_sensitive)
355
    target_fields.each do |field|
13✔
356
      record.fields[record.fields.index(field)] = replacement_field
7✔
357
    end
358
    record
13✔
359
  end
360

361
  def field_content_start(source_field:, ignore_indicators:)
1✔
362
    if ignore_indicators && source_field.instance_of?(MARC::DataField)
24✔
363
      7
4✔
364
    else
365
      4
20✔
366
    end
367
  end
368

369
  ### Perform multiple field replacements on a record;
370
  ###   input is an array of hashes with the following attributes:
371
  ###   - source_field: ruby-marc field (DataField or ControlField)
372
  ###   - replacement_field: ruby-marc field (DataField or ControlField)
373
  ###   - ignore_indicators: optional Boolean to specify whether to ignore
374
  ###     indicators for this replacement
375
  ###   - case_sensitive: optional Boolean to specify whether matching
376
  ###     should be case-sensitive
377
  def replace_fields(field_array:, record:)
1✔
378
    field_array.each do |replacement|
2✔
379
      replacement[:ignore_indicators] = false unless replacement.key?(:ignore_indicators)
4✔
380
      replacement[:case_sensitive] = true unless replacement.key?(:case_sensitive)
4✔
381
      record = replace_field(source_field: replacement[:source_field],
4✔
382
                             replacement_field: replacement[:replacement_field],
383
                             record: record,
384
                             ignore_indicators: replacement[:ignore_indicators],
385
                             case_sensitive: replacement[:case_sensitive])
386
    end
387
    record
2✔
388
  end
389

390
  def sort_0xx_fields(source:, new_record:)
1✔
391
    source.fields('001'..'009').sort_by(&:tag).each do |field|
1✔
392
      new_record.append(field)
2✔
393
    end
394
    source.fields('010'..'099').each do |field|
1✔
395
      new_record.append(field)
2✔
396
    end
397
    new_record
1✔
398
  end
399

400
  ### Default field sort: sort fixed fields numerically, then sort the rest
401
  ###   in groups, leaving the order of fields within the group alone
402
  def field_sort(record)
1✔
403
    new_rec = MARC::Record.new
1✔
404
    new_rec.leader = record.leader
1✔
405
    new_rec = sort_0xx_fields(source: record, new_record: new_rec)
1✔
406
    1.upto(9).each do |tag_start|
1✔
407
      record.fields("#{tag_start}00".."#{tag_start}99").each do |field|
9✔
408
        new_rec.append(field)
19✔
409
      end
410
    end
411
    new_rec
1✔
412
  end
413

414
  def remove_duplicate_fields(record)
1✔
415
    field_array = []
1✔
416
    record.fields.reverse_each do |field|
1✔
417
      field_index = record.fields.index(field)
3✔
418
      string = field.to_s
3✔
419
      if field_array.include?(string)
3✔
420
        record.fields.delete_at(field_index)
1✔
421
      else
422
        field_array << string
2✔
423
      end
424
    end
425
    record
1✔
426
  end
427

428
  ### Duplicate record to preserve original when making modifications
429
  def duplicate_record(record)
1✔
UNCOV
430
    raw_marc = ''
×
UNCOV
431
    writer = MARC::Writer.new(StringIO.new(raw_marc, 'w'))
×
UNCOV
432
    writer.write(record)
×
UNCOV
433
    writer.close
×
UNCOV
434
    reader = MARC::Reader.new(StringIO.new(raw_marc, 'r'),
×
435
                              external_encoding: 'UTF-8',
436
                              invalid: :replace,
437
                              replace: '')
UNCOV
438
    reader.first
×
439
  end
440

441
  def blvl_ab_valid?(record)
1✔
442
    record['773'] ? true : false
2✔
443
  end
444

445
  def ftype_ac_cdm_valid?(record)
1✔
446
    present_fields1 = record.fields(
6✔
447
      %w[
448
        020
449
        024
450
        027
451
        088
452
        100
453
        110
454
        111
455
        300
456
        533
457
        700
458
        710
459
        711
460
        800
461
        810
462
        811
463
        830
464
      ]
465
    )
466
    present_fields2 = record.fields(%w[260 264 533])
6✔
467
    return false if present_fields1.empty?
6✔
468
    return false if present_fields2.empty?
5✔
469

470
    f1_criteria = false
4✔
471
    present_fields1.each do |field|
4✔
472
      f1_criteria = true if field['a']
5✔
473
    end
474
    present_fields2.each do |field|
4✔
475
      case field.tag
4✔
476
      when '260'
477
        return true if field['a'] || field['b']
1✔
478
      when '264'
479
        return true if field['b']
2✔
480
      when '533'
481
        return true if field['c']
1✔
482
      end
483
    end
484
    false
1✔
485
  end
486

487
  def ftype_ac_is_valid?(record)
1✔
488
    present_fields = record.fields(%w[260 264 533])
5✔
489
    return false if present_fields.empty?
5✔
490

491
    present_fields.each do |field|
4✔
492
      case field.tag
4✔
493
      when '260'
494
        return true if field['a'] || field['b']
1✔
495
      when '264'
496
        return true if field['b']
2✔
497
      when '533'
498
        return true if field['c']
1✔
499
      end
500
    end
501
    false
1✔
502
  end
503

504
  def ftype_dt_cdm_valid?(record)
1✔
505
    present_fields = record.fields(
7✔
506
      %w[
507
        020
508
        024
509
        027
510
        028
511
        088
512
        100
513
        110
514
        111
515
        300
516
        533
517
        700
518
        710
519
        711
520
        800
521
        810
522
        811
523
        830
524
      ]
525
    )
526
    return false if present_fields.empty?
7✔
527

528
    present_fields.each do |field|
6✔
529
      case field.tag
6✔
530
      when '300'
531
        return true if field['a'] || field['f']
2✔
532
      when '533'
533
        return true if field['e']
2✔
534
      else
535
        return true if field['a']
2✔
536
      end
537
    end
538
    false
3✔
539
  end
540

541
  def ftype_e_cdims_valid?(record)
1✔
542
    present_fields1 = record.fields(%w[007 300 338 533])
6✔
543
    present_fields2 = record.fields(%w[260 264 533])
6✔
544
    return false if present_fields1.empty?
6✔
545
    return false if present_fields2.empty?
5✔
546

547
    f1_criteria = false
4✔
548
    present_fields1.each do |field|
4✔
549
      case field.tag
5✔
550
      when '007'
551
        f1_criteria = true if %w[a d r].include? field.value[0]
1✔
552
      when '300'
553
        f1_criteria = true if field['a']
1✔
554
      when '338'
555
        f1_criteria = true if field['a'] || field['b']
1✔
556
      when '533'
557
        f1_criteria = true if field['e']
2✔
558
      end
559
    end
560
    return false unless f1_criteria
4✔
561

562
    present_fields2.each do |field|
3✔
563
      case field.tag
4✔
564
      when '260'
565
        return true if field['a'] || field['b']
1✔
566
      when '264'
567
        return true if field['b']
1✔
568
      when '533'
569
        return true if field['c']
2✔
570
      end
571
    end
572
    false
3✔
573
  end
574

575
  def ftype_f_cdm_valid?(record)
1✔
576
    present_fields = record.fields(
5✔
577
      %w[
578
        007
579
        300
580
        338
581
        533
582
      ]
583
    )
584
    return false if present_fields.empty?
5✔
585

586
    present_fields.each do |field|
4✔
587
      case field.tag
4✔
588
      when '007'
589
        return true if %w[a d r].include? field.value[0]
1✔
590
      when '300'
591
        return true if field['a'] || field['f']
1✔
592
      when '338'
593
        return true if field['a'] || field['b']
1✔
594
      when '533'
595
        return true if field['e']
1✔
596
      end
597
    end
598
    false
2✔
599
  end
600

601
  def ftype_g_cdm_valid?(record)
1✔
602
    present_fields = record.fields(
6✔
603
      %w[
604
        007
605
        008
606
        300
607
        338
608
        345
609
        346
610
        538
611
      ]
612
    )
613
    present_fields.each do |field|
6✔
614
      case field.tag
11✔
615
      when '007'
616
        return true if %w[a d r].include? field.value[0]
1✔
617
      when '008'
618
        return true if %w[g k o r].include?(record.leader[6]) && %w[f m p s t v].include?(field.value[33])
6✔
619
      when '300'
620
        return true if field['a']
1✔
621
      when '345'
622
        return true
1✔
623
      when '346'
624
        return true
1✔
625
      when '538'
626
        return true if field['a']
1✔
627
      end
628
    end
629
    false
3✔
630
  end
631

632
  def ftype_g_is_valid?(record)
1✔
633
    present_fields1 = record.fields(
8✔
634
      %w[
635
        007
636
        008
637
        300
638
        338
639
        345
640
        346
641
        538
642
      ]
643
    )
644
    present_fields2 = record.fields(%w[260 264 533])
8✔
645
    return false if present_fields2.empty?
8✔
646

647
    f1_criteria = false
7✔
648
    present_fields1.each do |field|
7✔
649
      case field.tag
13✔
650
      when '007'
651
        f1_criteria = true if %w[g m v].include? field.value[0]
1✔
652
      when '008'
653
        f1_criteria = true if %w[f m p s t v].include?(field.value[33])
7✔
654
      when '300'
655
        f1_criteria = true if field['a']
1✔
656
      when '338'
657
        f1_criteria = true if field['a'] || field['b']
1✔
658
      when '345'
659
        f1_criteria = true
1✔
660
      when '346'
661
        f1_criteria = true
1✔
662
      when '538'
663
        f1_criteria = true if field['a']
1✔
664
      end
665
    end
666
    return false unless f1_criteria
7✔
667

668
    present_fields2.each do |field|
6✔
669
      case field.tag
6✔
670
      when '260'
671
        return true if field['a'] || field['b']
1✔
672
      when '264'
673
        return true if field['b']
4✔
674
      when '533'
675
        return true if field['c']
1✔
676
      end
677
    end
678
    false
2✔
679
  end
680

681
  def ftype_ij_cdm_valid?(record)
1✔
682
    present_fields = record.fields(
6✔
683
      %w[
684
        007
685
        300
686
        338
687
        344
688
        538
689
      ]
690
    )
691
    return false if present_fields.empty?
6✔
692

693
    present_fields.each do |field|
5✔
694
      case field.tag
5✔
695
      when '007'
696
        return true if field.value[0] == 's'
1✔
697
      when '300'
698
        return true if field['a']
1✔
699
      when '338'
700
        return true if field['a'] || field['b']
1✔
701
      when '344'
702
        return true
1✔
703
      when '538'
704
        return true if field['a']
1✔
705
      end
706
    end
707
    false
2✔
708
  end
709

710
  def ftype_ij_is_valid?(record)
1✔
711
    present_fields1 = record.fields(
6✔
712
      %w[
713
        007
714
        300
715
        338
716
        344
717
        538
718
      ]
719
    )
720
    present_fields2 = record.fields(%w[260 264 533])
6✔
721
    return false if present_fields1.empty?
6✔
722
    return false if present_fields2.empty?
6✔
723

724
    f1_criteria = false
5✔
725
    present_fields1.each do |field|
5✔
726
      case field.tag
5✔
727
      when '007'
728
        f1_criteria = true if field.value[0] == 's'
1✔
729
      when '300'
730
        f1_criteria = true if field['a']
1✔
731
      when '338'
732
        f1_criteria = true if field['a'] || field['b']
1✔
733
      when '344'
734
        f1_criteria = true
1✔
735
      when '538'
736
        f1_criteria = true if field['a']
1✔
737
      end
738
    end
739
    return false unless f1_criteria
5✔
740

741
    present_fields2.each do |field|
5✔
742
      case field.tag
5✔
743
      when '260'
744
        return true if field['a'] || field['b']
1✔
745
      when '264'
746
        return true if field['b']
1✔
747
      when '533'
748
        return true if field['c']
3✔
749
      end
750
    end
751
    false
3✔
752
  end
753

754
  def ftype_k_cdm_valid?(record)
1✔
755
    present_fields = record.fields(
4✔
756
      %w[
757
        007
758
        008
759
        300
760
        338
761
      ]
762
    )
763
    present_fields.each do |field|
4✔
764
      case field.tag
7✔
765
      when '007'
766
        return true if field.value[0] == 'k'
1✔
767
      when '008'
768
        return true if %w[a c k l n o p].include?(field.value[33])
4✔
769
      when '300'
770
        return true if field['a']
1✔
771
      when '338'
772
        return true if field['a'] || field['b']
1✔
773
      end
774
    end
775
    false
1✔
776
  end
777

778
  def ftype_k_is_valid?(record)
1✔
779
    present_fields1 = record.fields(
5✔
780
      %w[
781
        007
782
        008
783
        300
784
        338
785
      ]
786
    )
787
    present_fields2 = record.fields(%w[260 264 533])
5✔
788
    return false if present_fields2.empty?
5✔
789

790
    f1_criteria = false
4✔
791
    present_fields1.each do |field|
4✔
792
      case field.tag
7✔
793
      when '007'
794
        f1_criteria = true if field.value[0] == 'k'
1✔
795
      when '008'
796
        return true if %w[a c k l n o p].include?(field.value[33])
4✔
797
      when '300'
798
        f1_criteria = true if field['a']
1✔
799
      when '338'
800
        f1_criteria = true if field['a'] || field['b']
1✔
801
      end
802
    end
803
    return false unless f1_criteria
3✔
804

805
    present_fields2.each do |field|
3✔
806
      case field.tag
3✔
807
      when '260'
808
        return true if field['a'] || field['b']
1✔
809
      when '264'
810
        return true if field['b']
1✔
811
      when '533'
812
        return true if field['c']
1✔
813
      end
814
    end
815
    false
3✔
816
  end
817

818
  def ftype_m_cdm_valid?(record)
1✔
819
    present_fields = record.fields(
6✔
820
      %w[
821
        007
822
        300
823
        338
824
        347
825
        538
826
      ]
827
    )
828
    return false if present_fields.empty?
6✔
829

830
    present_fields.each do |field|
5✔
831
      case field.tag
5✔
832
      when '007'
833
        return true if field.value[0] == 'c'
1✔
834
      when '300'
835
        return true if field['a']
1✔
836
      when '338'
837
        return true if field['a'] || field['b']
1✔
838
      when '347'
839
        return true
1✔
840
      when '538'
841
        return true if field['a']
1✔
842
      end
843
    end
844
    false
2✔
845
  end
846

847
  def ftype_m_is_valid?(record)
1✔
848
    present_fields1 = record.fields(
5✔
849
      %w[
850
        007
851
        300
852
        338
853
        347
854
        538
855
      ]
856
    )
857
    present_fields2 = record.fields(%w[260 264 533])
5✔
858
    return false if present_fields1.empty?
5✔
859
    return false if present_fields2.empty?
5✔
860

861
    f1_criteria = false
5✔
862
    present_fields1.each do |field|
5✔
863
      case field.tag
5✔
864
      when '007'
865
        f1_criteria = true if field.value[0] == 'c'
1✔
866
      when '300'
867
        f1_criteria = true if field['a']
1✔
868
      when '338'
869
        f1_criteria = true if field['a'] || field['b']
1✔
870
      when '347'
871
        f1_criteria = true
1✔
872
      when '538'
873
        f1_criteria = true if field['a']
1✔
874
      end
875
    end
876
    return false unless f1_criteria
5✔
877

878
    present_fields2.each do |field|
5✔
879
      case field.tag
5✔
880
      when '260'
881
        return true if field['a'] || field['b']
3✔
882
      when '264'
883
        return true if field['b']
1✔
884
      when '533'
885
        return true if field['c']
1✔
886
      end
887
    end
888
    false
2✔
889
  end
890

891
  def ftype_or_cdm_valid?(record)
1✔
892
    present_fields = record.fields(
3✔
893
      %w[
894
        008
895
        300
896
        338
897
      ]
898
    )
899
    present_fields.each do |field|
3✔
900
      case field.tag
5✔
901
      when '008'
902
        return true if %w[a b c d g q r w].include?(field.value[33])
3✔
903
      when '300'
904
        return true if field['a']
1✔
905
      when '338'
906
        return true if field['a'] || field['b']
1✔
907
      end
908
    end
909
    false
1✔
910
  end
911

912
  def ftype_or_is_valid?(record)
1✔
913
    present_fields1 = record.fields(
4✔
914
      %w[
915
        008
916
        300
917
        338
918
      ]
919
    )
920
    present_fields2 = record.fields(%w[260 264 533])
4✔
921
    return false if present_fields2.empty?
4✔
922

923
    f1_criteria = false
3✔
924
    present_fields1.each do |field|
3✔
925
      case field.tag
6✔
926
      when '008'
927
        return true if %w[a b c d g q r w].include?(field.value[33])
3✔
928
      when '300'
929
        f1_criteria = true if field['a']
1✔
930
      when '338'
931
        f1_criteria = true if field['a'] || field['b']
2✔
932
      end
933
    end
934
    return false unless f1_criteria
3✔
935

936
    present_fields2.each do |field|
3✔
937
      case field.tag
3✔
938
      when '260'
939
        return true if field['a'] || field['b']
1✔
940
      when '264'
941
        return true if field['b']
1✔
942
      when '533'
943
        return true if field['c']
1✔
944
      end
945
    end
946
    false
1✔
947
  end
948

949
  def ftype_p_cd_valid?(record)
1✔
950
    present_fields = record.fields(
3✔
951
      %w[
952
        100
953
        110
954
        111
955
        300
956
        338
957
        700
958
        710
959
        711
960
      ]
961
    )
962
    return false if present_fields.empty?
3✔
963

964
    present_fields.each do |field|
3✔
965
      case field.tag
3✔
966
      when '300'
967
        return true if field['a'] || field['f']
1✔
968
      when '338'
969
        return true if field['a'] || field['b']
1✔
970
      else
971
        return true if field['a']
1✔
972
      end
973
    end
974
    false
2✔
975
  end
976

977
  def bib_form(record)
1✔
978
    %w[a c d i j m p t].include?(record.leader[6]) ? record['008'].value[23] : record['008'].value[29]
90✔
979
  end
980

981
  def sparse_record?(record)
1✔
982
    return true unless record.fields('008').size == 1
90✔
983

984
    type = record.leader[6]
90✔
985
    blvl = record.leader[7]
90✔
986
    form = bib_form(record)
90✔
987
    return true unless %w[\  a b c d f o q r s].include?(form)
90✔
988

989
    f245 = record['245']
89✔
990
    return true unless f245 && (f245['a'] || f245['k'])
89✔
991

992
    valid =
993
      if %w[a b].include?(blvl)
88✔
994
        blvl_ab_valid?(record)
2✔
995
      elsif %w[a c].include?(type) && %w[c d m].include?(blvl)
86✔
996
        ftype_ac_cdm_valid?(record)
6✔
997
      elsif %w[a c].include?(type) && %w[i s].include?(blvl)
80✔
998
        ftype_ac_is_valid?(record)
5✔
999
      elsif %w[d t].include?(type) && %w[c d m].include?(blvl)
75✔
1000
        ftype_dt_cdm_valid?(record)
7✔
1001
      elsif %w[e].include?(type) && %w[c d i m s].include?(blvl)
68✔
1002
        ftype_e_cdims_valid?(record)
6✔
1003
      elsif %w[f].include?(type) && %w[c d m].include?(blvl)
62✔
1004
        ftype_f_cdm_valid?(record)
5✔
1005
      elsif %w[g].include?(type) && %w[c d m].include?(blvl)
57✔
1006
        ftype_g_cdm_valid?(record)
6✔
1007
      elsif %w[g].include?(type) && %w[i s].include?(blvl)
51✔
1008
        ftype_g_is_valid?(record)
8✔
1009
      elsif %w[i j].include?(type) && %w[c d m].include?(blvl)
43✔
1010
        ftype_ij_cdm_valid?(record)
6✔
1011
      elsif %w[i j].include?(type) && %w[i s].include?(blvl)
37✔
1012
        ftype_ij_is_valid?(record)
6✔
1013
      elsif %w[k].include?(type) && %w[c d m].include?(blvl)
31✔
1014
        ftype_k_cdm_valid?(record)
4✔
1015
      elsif %w[k].include?(type) && %w[i s].include?(blvl)
27✔
1016
        ftype_k_is_valid?(record)
5✔
1017
      elsif %w[m].include?(type) && %w[c d m].include?(blvl)
22✔
1018
        ftype_m_cdm_valid?(record)
6✔
1019
      elsif %w[m].include?(type) && %w[i s].include?(blvl)
16✔
1020
        ftype_m_is_valid?(record)
5✔
1021
      elsif %w[o r].include?(type) && %w[c d m].include?(blvl)
11✔
1022
        ftype_or_cdm_valid?(record)
3✔
1023
      elsif %w[o r].include?(type) && %w[i s].include?(blvl)
8✔
1024
        ftype_or_is_valid?(record)
4✔
1025
      elsif %w[p].include?(type) && %w[c d].include?(blvl)
4✔
1026
        ftype_p_cd_valid?(record)
3✔
1027
      else
1028
        true
1✔
1029
      end
1030
    valid ? false : true
88✔
1031
  end
1032

1033
  ### `schema` is a YAML file loaded as a hash;
1034
  ### schema = YAML.load_file("#{ROOT_DIR}/lib/marc_cleanup/variable_field_schema.yml")
1035
  def validate_marc(record:, schema: RECORD_SCHEMA)
1✔
1036
    hash = {}
24✔
1037
    hash[:multiple_1xx] = multiple_1xx?(record)
24✔
1038
    hash[:has_130_240] = has_130_240?(record)
24✔
1039
    hash[:multiple_no_245] = multiple_no_245?(record)
24✔
1040
    hash[:non_repeatable_field_errors] = non_repeatable_field_errors?(record: record, schema: schema)
24✔
1041
    hash[:invalid_tags] = record.fields.select do |field|
24✔
1042
      field.class == MARC::DataField &&
27✔
1043
      field.tag[0] != '9' &&
1044
      !schema.keys.include?(field.tag)
1045
    end.map { |f| f.tag }
3✔
1046
    hash[:invalid_fields] = {}
24✔
1047
    record.fields('010'..'899').each do |field|
24✔
1048
      next unless schema[field.tag]
26✔
1049

1050
      field_num = record.fields(field.tag).index { |f| field }
48✔
1051
      field_num += 1
24✔
1052
      tag = field.tag
24✔
1053
      if field.tag == '880'
24✔
1054
        linked_field = field.subfields.select { |s| s.code == '6' }
24✔
1055
        if linked_field.empty?
8✔
1056
          error = "No field linkage in instance #{field_num} of 880"
2✔
1057
          hash[:invalid_fields][field.tag] ||= []
2✔
1058
          hash[:invalid_fields][field.tag] << error
2✔
1059
        elsif linked_field.size > 1
6✔
1060
          error = "Multiple field links in instance #{field_num} of 880"
2✔
1061
          hash[:invalid_fields][field.tag] ||= []
2✔
1062
          hash[:invalid_fields][field.tag] << error
2✔
1063
        elsif field['6'] !~ /^[0-9]{3}-[0-9]+/
4✔
1064
          error = "Invalid field linkage in instance #{field_num} of 880"
1✔
1065
          hash[:invalid_fields][field.tag] ||= []
1✔
1066
          hash[:invalid_fields][field.tag] << error
1✔
1067
        else
1068
          tag = field['6'].gsub(/^([0-9]{3})-.*$/, '\1')
3✔
1069
          unless schema[tag]
3✔
1070
            error = "Invalid linked field tag #{tag} in instance #{field_num} of 880"
1✔
1071
            hash[:invalid_fields][field.tag] ||= []
1✔
1072
            hash[:invalid_fields][field.tag] << error
1✔
1073
          end
1074
        end
1075
      end
1076
      next unless schema[tag]
24✔
1077

1078
      unless schema[tag]['ind1'].include?(field.indicator1.to_s)
23✔
1079
        error = "Invalid indicator1 value #{field.indicator1.to_s} in instance #{field_num}"
8✔
1080
        hash[:invalid_fields][field.tag] ||= []
8✔
1081
        hash[:invalid_fields][field.tag] << error
8✔
1082
      end
1083
      unless schema[tag]['ind2'].include?(field.indicator2.to_s)
23✔
1084
        error = "Invalid indicator2 value #{field.indicator2.to_s} in instance #{field_num}"
4✔
1085
        hash[:invalid_fields][field.tag] ||= []
4✔
1086
        hash[:invalid_fields][field.tag] << error
4✔
1087
      end
1088
      subf_hash = {}
23✔
1089
      field.subfields.each do |subfield|
23✔
1090
        subf_hash[subfield.code] ||= 0
32✔
1091
        subf_hash[subfield.code] += 1
32✔
1092
      end
1093
      subf_hash.each do |code, count|
23✔
1094
        if schema[tag]['subfields'][code].nil?
28✔
1095
          hash[:invalid_fields][field.tag] ||= []
9✔
1096
          hash[:invalid_fields][field.tag] << "Invalid subfield code #{code} in instance #{field_num}"
9✔
1097
        elsif schema[tag]['subfields'][code]['repeat'] == false && count > 1
19✔
1098
          hash[:invalid_fields][field.tag] ||= []
4✔
1099
          hash[:invalid_fields][field.tag] << "Non-repeatable subfield code #{code} repeated in instance #{field_num}"
4✔
1100
        end
1101
      end
1102
    end
1103
    hash
24✔
1104
  end
1105

1106
  ### When the 040$e says rda, position 18 of the leader must be c or i.
1107
  def rda_convention_mismatch(record)
1✔
1108
    rda040 = record.fields('040').select { |field| field['e'] == 'rda' }
4✔
1109
    !rda040.empty? && !%w[c i].include?(record.leader[18])
2✔
1110
  end
1111

1112
  def rda_convention_correction(record)
1✔
1113
    if rda_convention_mismatch(record) == true
1✔
1114
      record.leader[18] = "i"
1✔
1115
    end
1116
    record
1✔
1117
  end
1118

1119
end
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2025 Coveralls, Inc