• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

pulibrary / bibdata / 2ee2c4fc-5ef0-4806-b86e-01bf70aa67a0

24 Dec 2024 04:55PM UTC coverage: 91.859% (-0.04%) from 91.902%
2ee2c4fc-5ef0-4806-b86e-01bf70aa67a0

Pull #2569

circleci

christinach
Generate new .rubocop_todo.yml
rubocop fix
Pull Request #2569: Rubocop gems

335 of 378 new or added lines in 57 files covered. (88.62%)

2 existing lines in 2 files now uncovered.

3385 of 3685 relevant lines covered (91.86%)

377.92 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

96.34
/marc_to_solr/lib/augment_the_subject.rb
1
# frozen_string_literal: true
2

3
require 'set'
1✔
4

5
##
6
# The creation and management of metadata are not neutral activities.
7
class AugmentTheSubject
1✔
8
  LCSH_TERMS_CSV_FILE = File.join(File.dirname(__FILE__), 'augment_the_subject', 'indigenous_studies.csv')
1✔
9
  # Can be re-created using `bundle exec rake augment:recreate_fixtures`
10
  LCSH_STANDALONE_A_FILE = File.join(File.dirname(__FILE__), 'augment_the_subject', 'standalone_subfield_a.json')
1✔
11
  # Must be created by hand from file provided by metadata librarians
12
  LCSH_STANDALONE_X_FILE = File.join(File.dirname(__FILE__), 'augment_the_subject', 'standalone_subfield_x.json')
1✔
13
  # Can be re-created using `bundle exec rake augment:recreate_fixtures`
14
  LCSH_REQUIRED_SUBFIELDS = File.join(File.dirname(__FILE__), 'augment_the_subject', 'indigenous_studies_required.json')
1✔
15

16
  ##
17
  # Ensure the needed config files exist
18
  def initialize
1✔
19
    raise "Cannot find lcsh csv file at #{LCSH_TERMS_CSV_FILE}" unless File.exist?(LCSH_TERMS_CSV_FILE)
34✔
20
    unless File.exist?(LCSH_STANDALONE_A_FILE)
34✔
NEW
21
      raise "Cannot find lcsh standalone subfield a file at #{LCSH_STANDALONE_A_FILE}"
×
22
    end
23
    unless File.exist?(LCSH_STANDALONE_X_FILE)
34✔
NEW
24
      raise "Cannot find lcsh standalone subfield x file at #{LCSH_STANDALONE_X_FILE}"
×
25
    end
26
    unless File.exist?(LCSH_REQUIRED_SUBFIELDS)
34✔
NEW
27
      raise "Cannot find lcsh required subfields file at #{LCSH_REQUIRED_SUBFIELDS}"
×
28
    end
29
  end
30

31
  def standalone_subfield_a_terms
1✔
32
    @standalone_subfield_a_terms ||= begin
1,308✔
33
      parsed_json = JSON.parse(File.read(LCSH_STANDALONE_A_FILE), { symbolize_names: true })
26✔
34
      parsed_json[:standalone_subfield_a].map do |term|
26✔
35
        normalize(term)
145,574✔
36
      end
37
    end
38
  end
39

40
  def standalone_subfield_x_terms
1✔
41
    @standalone_subfield_x_terms ||= begin
1,283✔
42
      parsed_json = JSON.parse(File.read(LCSH_STANDALONE_X_FILE), { symbolize_names: true })
16✔
43
      parsed_json[:standalone_subfield_x].map do |term|
16✔
44
        normalize(term)
416✔
45
      end
46
    end
47
  end
48

49
  def indigenous_studies_required
1✔
50
    @indigenous_studies_required ||= begin
1,279✔
51
      parsed_json = JSON.parse(File.read(LCSH_REQUIRED_SUBFIELDS), { symbolize_names: false })
13✔
52
      # Turns all the sub-arrays into sets for set comparison later
53
      parsed_json.transform_values! do |value|
13✔
54
        value.map do |val|
1,378✔
55
          val.map { |term| normalize(term) }.to_set
5,148✔
56
        end
57
      end
58
      # Normalizes and symbolizes key for fast and consistent retrieval
59
      parsed_json.transform_keys! do |key|
13✔
60
        normalize(key).to_sym
1,378✔
61
      end
62
    end
63
  end
64

65
  ##
66
  # Normalize lcsh terms so they can match at index time.
67
  # 1. downcase
68
  # 2. replace ǂ terms with SEPARATOR
69
  def normalize(lcsh_term)
1✔
70
    lcsh_term.chomp.downcase.gsub(/ ǂ. /, SEPARATOR)
156,247✔
71
  end
72

73
  ##
74
  # Given an array of terms, add "Indigenous Studies" if any of the terms match
75
  # @param [<String>] terms
76
  # @return [<String>]
77
  def add_indigenous_studies(terms)
1✔
78
    terms << 'Indigenous Studies' if indigenous_studies?(terms)
657✔
79
    terms
657✔
80
  end
81

82
  ##
83
  # Given an array of terms, check whether this set of terms should have an
84
  # additional subject heading of "Indigenous Studies" added
85
  # @param [<String>] terms
86
  # @return [Boolean]
87
  def indigenous_studies?(terms)
1✔
88
    terms.each do |term|
677✔
89
      next if term.blank?
1,304✔
90

91
      return true if subfield_a_match?(term)
1,303✔
92
      return true if subfield_x_match?(term)
1,279✔
93
      return true if subfield_a_with_required_subfields_match?(term)
1,276✔
94
    end
95
    false
645✔
96
  end
97

98
  ##
99
  # For some subject terms, only the first part needs to match.
100
  # E.g., "Quinnipiac Indians-History", "Quinnipiac Indians-Culture" should both
101
  # be assigned an Indigenous Studies term even though that entire term doesn't
102
  # appear in our terms list.
103
  def subfield_a_match?(term)
1✔
104
    subfield_a = normalize(term.split(SEPARATOR).first).gsub(/\.$/, '')
1,306✔
105
    standalone_subfield_a_terms.include?(subfield_a)
1,306✔
106
  end
107

108
  ##
109
  # For some subfield terms, only a single subfield needs to match.
110
  # E.g., any subject term that includes "Indian authors" should be assigned Indigenous Studies
111
  def subfield_x_match?(term)
1✔
112
    subfields = term.split(SEPARATOR)
1,281✔
113
    subfields = subfields.map { |subfield| normalize(subfield) }
3,533✔
114
    !(standalone_subfield_x_terms & subfields).empty?
1,281✔
115
  end
116

117
  ##
118
  # Some subject terms require a combination of terms in order to be assigned Indigenous Studies.
119
  # For example, "Alaska-Antiquities" should be a match, but "Alaska" by itself should not,
120
  # nor should "Antiquities" by itself.
121
  def subfield_a_with_required_subfields_match?(term)
1✔
122
    subfields = term.split(SEPARATOR)
1,277✔
123
    subfields = subfields.map { |subfield| normalize(subfield) }
3,516✔
124
    subfield_a = subfields.shift.to_sym
1,277✔
125

126
    required_subfields = indigenous_studies_required[subfield_a]
1,277✔
127
    return false unless required_subfields
1,277✔
128

129
    required_subfields.map do |req_terms|
47✔
130
      return true if req_terms.subset?(subfields.to_set)
184✔
131
    end
132
    false
42✔
133
  end
134

135
  # In order to re-write the fixture file based on a new CSV, run the rake task
136
  # `bundle exec rake augment:recreate_fixtures`
137
  def self.parse_standalone_a
1✔
138
    subfield_a_aggregator = Set.new
1✔
139
    CSV.foreach(LCSH_TERMS_CSV_FILE, headers: true) do |row|
1✔
140
      requires_subfield = row['With subdivisions ǂx etc.'] == 'y'
5,758✔
141
      unless requires_subfield
5,758✔
142
        lcsh_term = row['Term in MARC']
5,599✔
143
        subfield_a = lcsh_term.chomp.split('ǂ').first.strip
5,599✔
144
        subfield_a_aggregator << subfield_a
5,599✔
145
      end
146
    end
147
    output = {}
1✔
148
    output[:standalone_subfield_a] = subfield_a_aggregator.sort
1✔
149
    output
1✔
150
  end
151

152
  # In order to re-write the fixture file based on a new CSV, run the rake task
153
  # `bundle exec rake augment:recreate_fixtures`
154
  def self.parse_required_subfields
1✔
155
    output = {}
1✔
156
    CSV.foreach(LCSH_TERMS_CSV_FILE, headers: true) do |row|
1✔
157
      if row['With subdivisions ǂx etc.'] == 'y'
5,758✔
158
        term = row['Term in MARC']
159✔
159
        term_list = term.chomp.split(/ ǂ. /)
159✔
160
        subfield_a = term_list.shift
159✔
161
        if output[subfield_a]
159✔
162
          output[subfield_a] << term_list
53✔
163
        else
164
          output[subfield_a] = [term_list]
106✔
165
        end
166
      end
167
    end
168
    output.to_json
1✔
169
  end
170
end
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2025 Coveralls, Inc