• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

pulibrary / bibdata / f04bc944-f9b4-4a42-8b26-dcacd0e3e688

11 Mar 2025 10:27PM UTC coverage: 34.017% (-58.1%) from 92.162%
f04bc944-f9b4-4a42-8b26-dcacd0e3e688

Pull #2653

circleci

christinach
Add new lc_subject_facet field.
Helps with the vocabulary work https://github.com/pulibrary/orangelight/pull/3386
In this new field we index only the lc subject heading and the subdivisions
So that when the user searches using the Details section, they can query solr for
all the subject headings and their divisions.

This is needed for the Subject browse Vocabulary work.
example: "lc_subject_facet": [
             "Booksellers and bookselling—Italy—Directories",
             "Booksellers and bookselling-Italy",
             "Booksellers and bookselling"
              ]
Pull Request #2653: Add new lc_subject_facet field.

1 of 3 new or added lines in 1 file covered. (33.33%)

2215 existing lines in 93 files now uncovered.

1294 of 3804 relevant lines covered (34.02%)

0.99 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

26.83
/marc_to_solr/lib/augment_the_subject.rb
1
# frozen_string_literal: true
2

3
require 'set'
1✔
4

5
##
6
# The creation and management of metadata are not neutral activities.
7
class AugmentTheSubject
1✔
8
  LCSH_TERMS_CSV_FILE = File.join(File.dirname(__FILE__), 'augment_the_subject', 'indigenous_studies.csv')
1✔
9
  # Can be re-created using `bundle exec rake augment:recreate_fixtures`
10
  LCSH_STANDALONE_A_FILE = File.join(File.dirname(__FILE__), 'augment_the_subject', 'standalone_subfield_a.json')
1✔
11
  # Must be created by hand from file provided by metadata librarians
12
  LCSH_STANDALONE_X_FILE = File.join(File.dirname(__FILE__), 'augment_the_subject', 'standalone_subfield_x.json')
1✔
13
  # Can be re-created using `bundle exec rake augment:recreate_fixtures`
14
  LCSH_REQUIRED_SUBFIELDS = File.join(File.dirname(__FILE__), 'augment_the_subject', 'indigenous_studies_required.json')
1✔
15

16
  ##
17
  # Ensure the needed config files exist
18
  def initialize
1✔
19
    raise "Cannot find lcsh csv file at #{LCSH_TERMS_CSV_FILE}" unless File.exist?(LCSH_TERMS_CSV_FILE)
1✔
20
    unless File.exist?(LCSH_STANDALONE_A_FILE)
1✔
21
      raise "Cannot find lcsh standalone subfield a file at #{LCSH_STANDALONE_A_FILE}"
×
22
    end
23
    unless File.exist?(LCSH_STANDALONE_X_FILE)
1✔
24
      raise "Cannot find lcsh standalone subfield x file at #{LCSH_STANDALONE_X_FILE}"
×
25
    end
26
    unless File.exist?(LCSH_REQUIRED_SUBFIELDS)
1✔
27
      raise "Cannot find lcsh required subfields file at #{LCSH_REQUIRED_SUBFIELDS}"
×
28
    end
29
  end
30

31
  def standalone_subfield_a_terms
1✔
UNCOV
32
    @standalone_subfield_a_terms ||= begin
×
UNCOV
33
      parsed_json = JSON.parse(File.read(LCSH_STANDALONE_A_FILE), { symbolize_names: true })
×
UNCOV
34
      parsed_json[:standalone_subfield_a].map do |term|
×
UNCOV
35
        normalize(term)
×
36
      end
37
    end
38
  end
39

40
  def standalone_subfield_x_terms
1✔
UNCOV
41
    @standalone_subfield_x_terms ||= begin
×
UNCOV
42
      parsed_json = JSON.parse(File.read(LCSH_STANDALONE_X_FILE), { symbolize_names: true })
×
UNCOV
43
      parsed_json[:standalone_subfield_x].map do |term|
×
UNCOV
44
        normalize(term)
×
45
      end
46
    end
47
  end
48

49
  def indigenous_studies_required
1✔
UNCOV
50
    @indigenous_studies_required ||= begin
×
UNCOV
51
      parsed_json = JSON.parse(File.read(LCSH_REQUIRED_SUBFIELDS), { symbolize_names: false })
×
52
      # Turns all the sub-arrays into sets for set comparison later
UNCOV
53
      parsed_json.transform_values! do |value|
×
UNCOV
54
        value.map do |val|
×
UNCOV
55
          val.map { |term| normalize(term) }.to_set
×
56
        end
57
      end
58
      # Normalizes and symbolizes key for fast and consistent retrieval
UNCOV
59
      parsed_json.transform_keys! do |key|
×
UNCOV
60
        normalize(key).to_sym
×
61
      end
62
    end
63
  end
64

65
  ##
66
  # Normalize lcsh terms so they can match at index time.
67
  # 1. downcase
68
  # 2. replace ǂ terms with SEPARATOR
69
  def normalize(lcsh_term)
1✔
UNCOV
70
    lcsh_term.chomp.downcase.gsub(/ ǂ. /, SEPARATOR)
×
71
  end
72

73
  ##
74
  # Given an array of terms, add "Indigenous Studies" if any of the terms match
75
  # @param [<String>] terms
76
  # @return [<String>]
77
  def add_indigenous_studies(terms)
1✔
UNCOV
78
    terms << 'Indigenous Studies' if indigenous_studies?(terms)
×
UNCOV
79
    terms
×
80
  end
81

82
  ##
83
  # Given an array of terms, check whether this set of terms should have an
84
  # additional subject heading of "Indigenous Studies" added
85
  # @param [<String>] terms
86
  # @return [Boolean]
87
  def indigenous_studies?(terms)
1✔
UNCOV
88
    terms.each do |term|
×
UNCOV
89
      next if term.blank?
×
90

UNCOV
91
      return true if subfield_a_match?(term)
×
UNCOV
92
      return true if subfield_x_match?(term)
×
UNCOV
93
      return true if subfield_a_with_required_subfields_match?(term)
×
94
    end
UNCOV
95
    false
×
96
  end
97

98
  ##
99
  # For some subject terms, only the first part needs to match.
100
  # E.g., "Quinnipiac Indians-History", "Quinnipiac Indians-Culture" should both
101
  # be assigned an Indigenous Studies term even though that entire term doesn't
102
  # appear in our terms list.
103
  def subfield_a_match?(term)
1✔
UNCOV
104
    subfield_a = normalize(term.split(SEPARATOR).first).gsub(/\.$/, '')
×
UNCOV
105
    standalone_subfield_a_terms.include?(subfield_a)
×
106
  end
107

108
  ##
109
  # For some subfield terms, only a single subfield needs to match.
110
  # E.g., any subject term that includes "Indian authors" should be assigned Indigenous Studies
111
  def subfield_x_match?(term)
1✔
UNCOV
112
    subfields = term.split(SEPARATOR)
×
UNCOV
113
    subfields = subfields.map { |subfield| normalize(subfield) }
×
UNCOV
114
    !(standalone_subfield_x_terms & subfields).empty?
×
115
  end
116

117
  ##
118
  # Some subject terms require a combination of terms in order to be assigned Indigenous Studies.
119
  # For example, "Alaska-Antiquities" should be a match, but "Alaska" by itself should not,
120
  # nor should "Antiquities" by itself.
121
  def subfield_a_with_required_subfields_match?(term)
1✔
UNCOV
122
    subfields = term.split(SEPARATOR)
×
UNCOV
123
    subfields = subfields.map { |subfield| normalize(subfield) }
×
UNCOV
124
    subfield_a = subfields.shift.to_sym
×
125

UNCOV
126
    required_subfields = indigenous_studies_required[subfield_a]
×
UNCOV
127
    return false unless required_subfields
×
128

UNCOV
129
    required_subfields.map do |req_terms|
×
UNCOV
130
      return true if req_terms.subset?(subfields.to_set)
×
131
    end
UNCOV
132
    false
×
133
  end
134

135
  # In order to re-write the fixture file based on a new CSV, run the rake task
136
  # `bundle exec rake augment:recreate_fixtures`
137
  def self.parse_standalone_a
1✔
UNCOV
138
    subfield_a_aggregator = Set.new
×
UNCOV
139
    CSV.foreach(LCSH_TERMS_CSV_FILE, headers: true) do |row|
×
UNCOV
140
      requires_subfield = row['With subdivisions ǂx etc.'] == 'y'
×
UNCOV
141
      unless requires_subfield
×
UNCOV
142
        lcsh_term = row['Term in MARC']
×
UNCOV
143
        subfield_a = lcsh_term.chomp.split('ǂ').first.strip
×
UNCOV
144
        subfield_a_aggregator << subfield_a
×
145
      end
146
    end
UNCOV
147
    output = {}
×
UNCOV
148
    output[:standalone_subfield_a] = subfield_a_aggregator.sort
×
UNCOV
149
    output
×
150
  end
151

152
  # In order to re-write the fixture file based on a new CSV, run the rake task
153
  # `bundle exec rake augment:recreate_fixtures`
154
  def self.parse_required_subfields
1✔
UNCOV
155
    output = {}
×
UNCOV
156
    CSV.foreach(LCSH_TERMS_CSV_FILE, headers: true) do |row|
×
UNCOV
157
      if row['With subdivisions ǂx etc.'] == 'y'
×
UNCOV
158
        term = row['Term in MARC']
×
UNCOV
159
        term_list = term.chomp.split(/ ǂ. /)
×
UNCOV
160
        subfield_a = term_list.shift
×
UNCOV
161
        if output[subfield_a]
×
UNCOV
162
          output[subfield_a] << term_list
×
163
        else
UNCOV
164
          output[subfield_a] = [term_list]
×
165
        end
166
      end
167
    end
UNCOV
168
    output.to_json
×
169
  end
170
end
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2025 Coveralls, Inc