• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

pulibrary / bibdata / 373ad6ff-fad2-405a-ab26-3d30fb5ceecf

24 Dec 2024 08:24PM UTC coverage: 91.938% (+0.08%) from 91.859%
373ad6ff-fad2-405a-ab26-3d30fb5ceecf

Pull #2563

circleci

maxkadel
Put attaching xml files in their own batch
Pull Request #2563: I2321 Shift SCSB full index tasks into separate background jobs

152 of 156 new or added lines in 10 files covered. (97.44%)

65 existing lines in 17 files now uncovered.

3478 of 3783 relevant lines covered (91.94%)

366.14 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

94.2
/marc_to_solr/lib/genre.rb
1
# This class is responsible for listing the
2
# genres present in a given MARC record
3
class Genre
1✔
4
  SUBJECT_GENRE_VOCABULARIES = ['sk', 'aat', 'lcgft', 'rbbin', 'rbgenr', 'rbmscv',
1✔
5
                                'rbpap', 'rbpri', 'rbprov', 'rbpub', 'rbtyp', 'homoit'].freeze
6

7
  def initialize(record)
1✔
8
    @record = record
186✔
9
  end
10

11
  # 600/610/650/651 $v, $x filtered
12
  # 655 $a, $v, $x filtered
13
  def to_a
1✔
14
    @as_array ||= (
186✔
15
      genres_from_subfield_x + genres_from_subject_vocabularies + genres_from_subfield_v + genres_from_primary_source_mapping + genres_from_autobiography
186✔
16
    ).compact.uniq
17
  end
18

19
  private
1✔
20

21
    attr_reader :record
1✔
22

23
    def genres_from_subfield_x
1✔
24
      Traject::MarcExtractor.cached('600|*0|x:610|*0|x:611|*0|x:630|*0|x:650|*0|x:651|*0|x:655|*0|x').collect_matching_lines(record) do |field, spec, extractor|
186✔
25
        genre = extractor.collect_subfields(field, spec).first
307✔
26
        next if genre.nil?
307✔
27
        genre = Traject::Macros::Marc21.trim_punctuation(genre)
120✔
28
        genre if likely_genre_term(genre)
120✔
29
      end
30
    end
31

32
    def genres_from_subject_vocabularies
1✔
33
      Traject::MarcExtractor.cached('650|*7|v:655|*7|a:655|*7|v').collect_matching_lines(record) do |field, spec, extractor|
186✔
34
        should_include = field.subfields.any? do |s_field|
382✔
35
          # only include heading if it is part of the vocabulary
36
          SUBJECT_GENRE_VOCABULARIES.include?(s_field.value) if s_field.code == '2'
1,122✔
37
        end
38
        genre = extractor.collect_subfields(field, spec).first
382✔
39
        next if genre.nil?
382✔
40
        genre = Traject::Macros::Marc21.trim_punctuation(genre)
126✔
41
        if genre.match?(/^\s+$/)
126✔
UNCOV
42
          logger.error "#{record['001']} - Blank genre field"
×
UNCOV
43
          next
×
44
        elsif should_include
126✔
45
          genre
67✔
46
        end
47
      end
48
    end
49

50
    def genres_from_subfield_v
1✔
51
      Traject::MarcExtractor.cached('600|*0|v:610|*0|v:611|*0|v:630|*0|v:650|*0|v:651|*0|v:655|*0|a:655|*0|v').collect_matching_lines(record) do |field, spec, extractor|
186✔
52
        genre = extractor.collect_subfields(field, spec).first
319✔
53
        next if genre.nil?
319✔
54
        genre = Traject::Macros::Marc21.trim_punctuation(genre)
125✔
55
        if genre.match?(/^\s+$/)
125✔
UNCOV
56
          logger.error "#{record['001']} - Blank genre field"
×
UNCOV
57
          next
×
58
        end
59
        genre
125✔
60
      end
61
    end
62

63
    def genres_from_primary_source_mapping
1✔
64
      potential_genres = Traject::MarcExtractor.cached('600|*0|vx:610|*0|vx:611|*0|vx:630|*0|vx:650|*0|vx:651|*0|vx:655|*0|a:655|*0|vx').collect_matching_lines(record) do |field, spec, extractor|
186✔
65
        extractor.collect_subfields(field, spec)
319✔
66
      end
67
      if potential_genres.any? { |genre| genre_term_indicates_primary_source? genre } && !literary_work?
369✔
68
        ['Primary sources']
15✔
69
      else
70
        []
171✔
71
      end
72
    end
73

74
    def genres_from_autobiography
1✔
75
      if biography? && author_matches_subject? && !literary_work?
186✔
76
        ['Primary sources']
2✔
77
      else
78
        []
184✔
79
      end
80
    end
81

82
    def genre_term_indicates_primary_source?(genre)
1✔
83
      normalized_genre = genre.downcase.strip.delete_suffix('.')
183✔
84
      primary_source_genres.any? { |primary_source_genre| normalized_genre.match?(/(^|\W)#{primary_source_genre}($|\W)/) }
3,511✔
85
    end
86

87
    def biography?
1✔
88
      potential_genres = Traject::MarcExtractor.cached('600|*0|vx:610|*0|vx:611|*0|vx:630|*0|vx:650|*0|avx:651|*0|vx:655|*0|avx').collect_matching_lines(record) do |field, spec, extractor|
186✔
89
        extractor.collect_subfields(field, spec)
307✔
90
      end
91
      potential_genres.include?('Biography')
186✔
92
    end
93

94
    def author_matches_subject?
1✔
95
      authors = Traject::MarcExtractor.cached('100abcdjq').extract(record).uniq.map { |name| Traject::Macros::Marc21.trim_punctuation name.downcase.strip }
6✔
96
      name_subjects = Traject::MarcExtractor.cached('600abcdjq').extract(record).uniq.map { |name| Traject::Macros::Marc21.trim_punctuation name.downcase.strip }
6✔
97
      authors.any? { |author| name_subjects.include? author }
6✔
98
    end
99

100
    def likely_genre_term term
1✔
101
      genre_terms.include?(term) || genre_starting_terms.any? { |potential| term.start_with? potential }
936✔
102
    end
103

104
    def genre_terms
1✔
105
      [
106
        'Bibliography',
120✔
107
        'Biography',
108
        'Catalogs',
109
        'Catalogues raisonnes',
110
        'Commentaries',
111
        'Congresses',
112
        'Diaries',
113
        'Dictionaries',
114
        'Drama',
115
        'Encyclopedias',
116
        'Exhibitions',
117
        'Fiction',
118
        'Guidebooks',
119
        'In art',
120
        'Indexes',
121
        'Librettos',
122
        'Manuscripts',
123
        'Newspapers',
124
        'Periodicals',
125
        'Pictorial works',
126
        'Poetry',
127
        'Portraits',
128
        'Scores',
129
        'Songs and music',
130
        'Sources',
131
        'Statistics',
132
        'Texts',
133
        'Translations'
134
      ]
135
    end
136

137
    def genre_starting_terms
1✔
138
      [
139
        'Census',
102✔
140
        'Maps',
141
        'Methods',
142
        'Parts',
143
        'Personal narratives',
144
        'Scores and parts',
145
        'Study and teaching',
146
        'Translations into '
147
      ]
148
    end
149

150
    def primary_source_genres
1✔
151
      [
152
        'atlases',
183✔
153
        'charters',
154
        'correspondence',
155
        'diaries',
156
        'documents',
157
        'interview',
158
        'interviews',
159
        'letters',
160
        'manuscripts',
161
        'maps',
162
        'notebooks, sketchbooks, etc',
163
        'oral history',
164
        'pamphlets',
165
        'personal narratives',
166
        'photographs',
167
        'pictorial works',
168
        'sources',
169
        'speeches',
170
        'statistics'
171
      ]
172
    end
173

174
    def literary_work?
1✔
175
      book? && record.fields('008').any? { |litf| %w[1 d e f j p].include? litf.value[33] }
25✔
176
    end
177

178
    def book?
1✔
179
      record.leader && record.leader[6..7]&.match?(/a[acdim]/)
18✔
180
    end
181
end
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2025 Coveralls, Inc