• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

pulibrary / bibdata / 5cb1e831-5540-4e5a-a704-17755a4eb060

06 Jun 2024 06:08PM UTC coverage: 91.214% (-0.05%) from 91.26%
5cb1e831-5540-4e5a-a704-17755a4eb060

Pull #2388

circleci

maxkadel
Checkpoint - green

- All but one "full only" method moved into Full class
Pull Request #2388: Do not process full dumps with private records

127 of 139 new or added lines in 4 files covered. (91.37%)

9 existing lines in 1 file now uncovered.

3509 of 3847 relevant lines covered (91.21%)

342.2 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

93.18
/app/models/scsb/partner_updates.rb
1
require 'json'
1✔
2

3
module Scsb
1✔
4
  class PartnerUpdates
1✔
5
    # Full only
6
    def self.full(dump:)
1✔
7
      timestamp = DateTime.now.to_time
6✔
8
      dump_file_type = :recap_records_full
6✔
9
      full_download = Scsb::PartnerUpdates::Full.new(dump:, dump_file_type:)
6✔
10
      full_download.process_full_files
6✔
11
    end
12

13
    # Incremental only
14
    def self.incremental(dump:, timestamp:)
1✔
15
      dump_file_type = :recap_records
2✔
16
      new(dump:, timestamp: timestamp.to_time, dump_file_type:).process_incremental_files
2✔
17
    end
18

19
    # Both
20
    def initialize(dump:, timestamp:, s3_bucket: Scsb::S3Bucket.partner_transfer_client, dump_file_type:)
1✔
21
      @dump = dump
3✔
22
      @s3_bucket = s3_bucket
3✔
23
      @update_directory = ENV['SCSB_PARTNER_UPDATE_DIRECTORY'] || '/tmp/updates'
3✔
24
      @scsb_file_dir = ENV['SCSB_FILE_DIR']
3✔
25
      @last_dump = timestamp
3✔
26
      @inv_xml = []
3✔
27
      @tab_newline = []
3✔
28
      @leader = []
3✔
29
      @composed_chars = []
3✔
30
      @bad_utf8 = []
3✔
31
      @dump_file_type = dump_file_type
3✔
32
    end
33

34
    # Both
35
    def set_generated_date
1✔
36
      @dump.generated_date = date_strings.map { |str| DateTime.parse(str) }.sort.first
6✔
37
    end
38

39
    # Both
40
    def date_strings
1✔
41
      @dump.dump_files.map do |df|
2✔
42
        if df.dump_file_type == "recap_records_full_metadata"
4✔
NEW
UNCOV
43
          File.basename(df.path).split("_")[3]
×
44
        else
45
          File.basename(df.path).split("_")[2]
4✔
46
        end
47
      end
48
    end
49

50
    # Incremental only
51
    def process_incremental_files
1✔
52
      prepare_directory
2✔
53
      update_files = download_partner_updates
2✔
54
      process_partner_updates(files: update_files)
2✔
55
      set_generated_date
2✔
56
      log_record_fixes
2✔
57
      delete_files = download_partner_deletes
2✔
58
      process_partner_deletes(files: delete_files)
2✔
59
    end
60

61
    private
1✔
62

63
      # Both
64
      def add_error(message:)
1✔
NEW
UNCOV
65
        error = Array.wrap(@dump.event.error)
×
NEW
UNCOV
66
        error << message
×
NEW
UNCOV
67
        @dump.event.error = error.join("; ")
×
NEW
UNCOV
68
        @dump.event.save
×
69
      end
70

71
      # Incremental only
72
      def download_partner_updates
1✔
73
        file_list = @s3_bucket.list_files(prefix: ENV['SCSB_S3_PARTNER_UPDATES'] || 'data-exports/PUL/MARCXml/Incremental')
2✔
74
        @s3_bucket.download_files(files: file_list, timestamp_filter: @last_dump, output_directory: @update_directory)
2✔
75
      end
76

77
      # Incremental only
78
      def download_partner_deletes
1✔
79
        file_list = @s3_bucket.list_files(prefix: ENV['SCSB_S3_PARTNER_DELETES'] || 'data-exports/PUL/Json')
2✔
80
        @s3_bucket.download_files(files: file_list, timestamp_filter: @last_dump, output_directory: @update_directory)
2✔
81
      end
82

83
      # Both
84
      def process_partner_updates(files:, file_prefix: 'scsb_update_')
1✔
85
        xml_files = []
2✔
86
        files.each do |file|
2✔
87
          filename = File.basename(file, '.zip')
2✔
88
          filename.gsub!(/^[^_]+_([0-9]+)_([0-9]+).*$/, '\1_\2')
2✔
89
          file_increment = 1
2✔
90
          Zip::File.open(file) do |zip_file|
2✔
91
            zip_file.each do |entry|
2✔
92
              target = "#{@update_directory}/#{filename}_#{file_increment}.xml"
4✔
93
              xml_files << target
4✔
94
              entry.extract(target)
4✔
95
              file_increment += 1
4✔
96
            end
97
          end
98
          File.unlink(file)
2✔
99
        end
100
        xml_files.each do |file|
2✔
101
          filename = File.basename(file)
4✔
102
          reader = MARC::XMLReader.new(file.to_s, external_encoding: 'UTF-8')
4✔
103
          filepath = "#{@scsb_file_dir}/#{file_prefix}#{filename}"
4✔
104
          writer = MARC::XMLWriter.new(filepath)
4✔
105
          reader.each { |record| writer.write(process_record(record)) }
12✔
106
          writer.close
4✔
107
          File.unlink(file)
4✔
108
          attach_dump_file(filepath)
4✔
109
        end
110
      end
111

112
      # Incremental only
113
      def process_partner_deletes(files:)
1✔
114
        json_files = []
2✔
115
        files.each do |file|
2✔
116
          filename = File.basename(file, '.zip')
2✔
117
          file_increment = 1
2✔
118
          Zip::File.open(file) do |zip_file|
2✔
119
            zip_file.each do |entry|
2✔
120
              target = "#{@update_directory}/scsbdelete#{filename}_#{file_increment}.json"
2✔
121
              json_files << target
2✔
122
              entry.extract(target)
2✔
123
              file_increment += 1
2✔
124
            end
125
          end
126
          File.unlink(file)
2✔
127
        end
128
        ids = []
2✔
129
        json_files.each do |file|
2✔
130
          scsb_ids(file, ids)
2✔
131
          File.unlink(file)
2✔
132
        end
133
        @dump.delete_ids = ids
2✔
134
        @dump.save
2✔
135
      end
136

137
      # Incremental only (deletes)
138
      def scsb_ids(filename, ids)
1✔
139
        file = File.read(filename)
2✔
140
        data = JSON.parse(file)
2✔
141
        data.each do |record|
2✔
142
          ids << "SCSB-#{record['bib']['bibId']}"
12✔
143
        end
144
        ids
2✔
145
      end
146

147
      # Both
148
      def process_record(record)
1✔
149
        record = field_delete(['856', '959'], record)
9✔
150
        record.leader[5] = 'c' if record.leader[5].eql?('d')
9✔
151
        if bad_utf8?(record)
9✔
UNCOV
152
          @bad_utf8 << record['001']
×
UNCOV
153
          record = bad_utf8_fix(record)
×
154
        end
155
        if invalid_xml_chars?(record)
9✔
UNCOV
156
          @inv_xml << record['001']
×
UNCOV
157
          record = invalid_xml_fix(record)
×
158
        end
159
        if tab_newline_char?(record)
9✔
160
          @tab_newline << record['001']
2✔
161
          record = tab_newline_fix(record)
2✔
162
        end
163
        if leader_errors?(record)
9✔
164
          @leader << record['001']
4✔
165
          record = leaderfix(record)
4✔
166
        end
167
        if composed_chars_errors?(record)
9✔
168
          @composed_chars << record['001']
2✔
169
          record = composed_chars_normalize(record)
2✔
170
        end
171
        record = extra_space_fix(record)
9✔
172
        empty_subfield_fix(record)
9✔
173
      end
174

175
      # Both
176
      def attach_dump_file(filepath, dump_file_type: nil)
1✔
177
        dump_file_type ||= @dump_file_type
6✔
178
        df = DumpFile.create(dump_file_type:, path: filepath)
6✔
179
        df.zip
6✔
180
        df.save
6✔
181
        @dump.dump_files << df
6✔
182
        @dump.save
6✔
183
      end
184

185
      # Both
186
      def log_record_fixes
1✔
187
        log_file = {
188
          inv_xml: @inv_xml,
2✔
189
          tab_newline: @tab_newline,
190
          leader: @leader,
191
          composed_chars: @composed_chars,
192
          bad_utf8: @bad_utf8
193
        }
194
        filepath = log_file_name
2✔
195
        File.write(filepath, log_file.to_json.to_s)
2✔
196
        attach_dump_file(filepath, dump_file_type: :log_file)
2✔
197
      end
198

199
      # Both
200
      def log_file_name
1✔
201
        "#{@scsb_file_dir}/fixes_#{@last_dump.strftime('%Y_%m_%d')}.json"
2✔
202
      end
203

204
      # Both
205
      def prepare_directory
1✔
206
        FileUtils.mkdir_p(@update_directory)
2✔
207
      end
208
  end
209
end
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2025 Coveralls, Inc