• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

pulibrary / bibdata / 1dcebae2-3318-4e77-bc53-82276e293354

02 May 2025 04:45PM UTC coverage: 28.256% (-63.9%) from 92.189%
1dcebae2-3318-4e77-bc53-82276e293354

push

circleci

sandbergja
Add basic infrastructure for compiling rust code

* Add a rake compile task to compile
* Run the rake task in CI
* Run the rake task before rspec tests with the rust tag, to provide quick feedback on rust changes in TDD cycles

2 of 7 new or added lines in 2 files covered. (28.57%)

2467 existing lines in 97 files now uncovered.

1089 of 3854 relevant lines covered (28.26%)

0.29 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

26.83
/marc_to_solr/lib/augment_the_subject.rb
1
# frozen_string_literal: true
2

3
require 'set'
1✔
4

5
##
6
# The creation and management of metadata are not neutral activities.
7
class AugmentTheSubject
1✔
8
  LCSH_TERMS_CSV_FILE = File.join(File.dirname(__FILE__), 'augment_the_subject', 'indigenous_studies.csv')
1✔
9
  # Can be re-created using `bundle exec rake augment:recreate_fixtures`
10
  LCSH_STANDALONE_A_FILE = File.join(File.dirname(__FILE__), 'augment_the_subject', 'standalone_subfield_a.json')
1✔
11
  # Must be created by hand from file provided by metadata librarians
12
  LCSH_STANDALONE_X_FILE = File.join(File.dirname(__FILE__), 'augment_the_subject', 'standalone_subfield_x.json')
1✔
13
  # Can be re-created using `bundle exec rake augment:recreate_fixtures`
14
  LCSH_REQUIRED_SUBFIELDS = File.join(File.dirname(__FILE__), 'augment_the_subject', 'indigenous_studies_required.json')
1✔
15

16
  ##
17
  # Ensure the needed config files exist
18
  def initialize
1✔
19
    raise "Cannot find lcsh csv file at #{LCSH_TERMS_CSV_FILE}" unless File.exist?(LCSH_TERMS_CSV_FILE)
1✔
20
    unless File.exist?(LCSH_STANDALONE_A_FILE)
1✔
21
      raise "Cannot find lcsh standalone subfield a file at #{LCSH_STANDALONE_A_FILE}"
×
22
    end
23
    unless File.exist?(LCSH_STANDALONE_X_FILE)
1✔
24
      raise "Cannot find lcsh standalone subfield x file at #{LCSH_STANDALONE_X_FILE}"
×
25
    end
26
    unless File.exist?(LCSH_REQUIRED_SUBFIELDS)
1✔
27
      raise "Cannot find lcsh required subfields file at #{LCSH_REQUIRED_SUBFIELDS}"
×
28
    end
29
  end
30

31
  def standalone_subfield_a_terms
1✔
UNCOV
32
    @standalone_subfield_a_terms ||= begin
×
UNCOV
33
      parsed_json = JSON.parse(File.read(LCSH_STANDALONE_A_FILE), { symbolize_names: true })
×
UNCOV
34
      parsed_json[:standalone_subfield_a].map do |term|
×
UNCOV
35
        normalize(term)
×
36
      end
37
    end
38
  end
39

40
  def standalone_subfield_x_terms
1✔
UNCOV
41
    @standalone_subfield_x_terms ||= begin
×
UNCOV
42
      parsed_json = JSON.parse(File.read(LCSH_STANDALONE_X_FILE), { symbolize_names: true })
×
UNCOV
43
      parsed_json[:standalone_subfield_x].map do |term|
×
UNCOV
44
        normalize(term)
×
45
      end
46
    end
47
  end
48

49
  def indigenous_studies_required
1✔
UNCOV
50
    @indigenous_studies_required ||= begin
×
UNCOV
51
      parsed_json = JSON.parse(File.read(LCSH_REQUIRED_SUBFIELDS), { symbolize_names: false })
×
52
      # Turns all the sub-arrays into sets for set comparison later
UNCOV
53
      parsed_json.transform_values! do |value|
×
UNCOV
54
        value.map do |val|
×
UNCOV
55
          val.map { |term| normalize(term) }.to_set
×
56
        end
57
      end
58
      # Normalizes and symbolizes key for fast and consistent retrieval
UNCOV
59
      parsed_json.transform_keys! do |key|
×
UNCOV
60
        normalize(key).to_sym
×
61
      end
62
    end
63
  end
64

65
  ##
66
  # Normalize lcsh terms so they can match at index time.
67
  # 1. downcase
68
  # 2. replace ǂ terms with SEPARATOR
69
  def normalize(lcsh_term)
1✔
UNCOV
70
    lcsh_term.chomp.downcase.gsub(/ ǂ. /, SEPARATOR)
×
71
  end
72

73
  ##
74
  # Given an array of terms, add "Indigenous Studies" if any of the terms match
75
  # @param [<String>] terms
76
  # @return [<String>]
77
  def add_indigenous_studies(terms)
1✔
UNCOV
78
    terms << 'Indigenous Studies' if indigenous_studies?(terms)
×
UNCOV
79
    terms
×
80
  end
81

82
  ##
83
  # Given an array of terms, check whether this set of terms should have an
84
  # additional subject heading of "Indigenous Studies" added
85
  # @param [<String>] terms
86
  # @return [Boolean]
87
  def indigenous_studies?(terms)
1✔
UNCOV
88
    terms.each do |term|
×
UNCOV
89
      next if term.blank?
×
90

UNCOV
91
      return true if subfield_a_match?(term)
×
UNCOV
92
      return true if subfield_x_match?(term)
×
UNCOV
93
      return true if subfield_a_with_required_subfields_match?(term)
×
94
    end
UNCOV
95
    false
×
96
  end
97

98
  ##
99
  # For some subject terms, only the first part needs to match.
100
  # E.g., "Quinnipiac Indians-History", "Quinnipiac Indians-Culture" should both
101
  # be assigned an Indigenous Studies term even though that entire term doesn't
102
  # appear in our terms list.
103
  def subfield_a_match?(term)
1✔
UNCOV
104
    subfield_a = normalize(term.split(SEPARATOR).first).gsub(/\.$/, '')
×
UNCOV
105
    standalone_subfield_a_terms.include?(subfield_a)
×
106
  end
107

108
  ##
109
  # For some subfield terms, only a single subfield needs to match.
110
  # E.g., any subject term that includes "Indian authors" should be assigned Indigenous Studies
111
  def subfield_x_match?(term)
1✔
UNCOV
112
    subfields = term.split(SEPARATOR)
×
UNCOV
113
    subfields = subfields.map { |subfield| normalize(subfield) }
×
UNCOV
114
    !(standalone_subfield_x_terms & subfields).empty?
×
115
  end
116

117
  ##
118
  # Some subject terms require a combination of terms in order to be assigned Indigenous Studies.
119
  # For example, "Alaska-Antiquities" should be a match, but "Alaska" by itself should not,
120
  # nor should "Antiquities" by itself.
121
  def subfield_a_with_required_subfields_match?(term)
1✔
UNCOV
122
    subfields = term.split(SEPARATOR)
×
UNCOV
123
    subfields = subfields.map { |subfield| normalize(subfield) }
×
UNCOV
124
    subfield_a = subfields.shift.to_sym
×
125

UNCOV
126
    required_subfields = indigenous_studies_required[subfield_a]
×
UNCOV
127
    return false unless required_subfields
×
128

UNCOV
129
    required_subfields.map do |req_terms|
×
UNCOV
130
      return true if req_terms.subset?(subfields.to_set)
×
131
    end
UNCOV
132
    false
×
133
  end
134

135
  # In order to re-write the fixture file based on a new CSV, run the rake task
136
  # `bundle exec rake augment:recreate_fixtures`
137
  def self.parse_standalone_a
1✔
UNCOV
138
    subfield_a_aggregator = Set.new
×
UNCOV
139
    CSV.foreach(LCSH_TERMS_CSV_FILE, headers: true) do |row|
×
UNCOV
140
      requires_subfield = row['With subdivisions ǂx etc.'] == 'y'
×
UNCOV
141
      unless requires_subfield
×
UNCOV
142
        lcsh_term = row['Term in MARC']
×
UNCOV
143
        subfield_a = lcsh_term.chomp.split('ǂ').first.strip
×
UNCOV
144
        subfield_a_aggregator << subfield_a
×
145
      end
146
    end
UNCOV
147
    output = {}
×
UNCOV
148
    output[:standalone_subfield_a] = subfield_a_aggregator.sort
×
UNCOV
149
    output
×
150
  end
151

152
  # In order to re-write the fixture file based on a new CSV, run the rake task
153
  # `bundle exec rake augment:recreate_fixtures`
154
  def self.parse_required_subfields
1✔
UNCOV
155
    output = {}
×
UNCOV
156
    CSV.foreach(LCSH_TERMS_CSV_FILE, headers: true) do |row|
×
UNCOV
157
      if row['With subdivisions ǂx etc.'] == 'y'
×
UNCOV
158
        term = row['Term in MARC']
×
UNCOV
159
        term_list = term.chomp.split(/ ǂ. /)
×
UNCOV
160
        subfield_a = term_list.shift
×
UNCOV
161
        if output[subfield_a]
×
UNCOV
162
          output[subfield_a] << term_list
×
163
        else
UNCOV
164
          output[subfield_a] = [term_list]
×
165
        end
166
      end
167
    end
UNCOV
168
    output.to_json
×
169
  end
170
end
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2025 Coveralls, Inc