381ebe8c-8717-4a37-93a5-7a9a805ad9de

Committed 20 Dec 2024 07:43PM UTC coverage: 91.859% (-0.05%) from 91.904%

Build # 381ebe8c-8717-4a37-93a5-7a9a805ad9de

Build Type

Pull #2563

circleci

Committed by

maxkadel

Commit Message

Test for idempotency

Pull Request Pull Request #2563: I2321 Shift SCSB full index tasks into separate background jobs

Run Details

91 of 92 new or added lines in 6 files covered. (98.91%)

5 existing lines in 1 file now uncovered.

3419 of 3722 relevant lines covered (91.86%)

375.43 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

98.04

/app/models/scsb/partner_updates/full.rb

require 'json/add/regexp'

module Scsb
  class PartnerUpdates
    class Full < Update
      def initialize(dump:, dump_file_type:, timestamp: DateTime.now.to_time)
        @dump = dump
        @institutions = [
          { inst: "NYPL", prefix: 'scsbfull_nypl_' },
          { inst: "CUL", prefix: 'scsbfull_cul_' },
          { inst: "HL", prefix: 'scsbfull_hl_' }
        ]
        super
      end

      def process_full_files
        prepare_directory
        dump_id = @dump.id
        @institutions.each do |institution|
          DownloadAndProcessFullJob.perform_later(inst: institution[:inst], prefix: institution[:prefix], dump_id:)
        end
        set_generated_date
        log_record_fixes
      end

      def self.download_partner_files(file_filter:, dump_id:, file_prefix:)
        file = Scsb::PartnerUpdates::Full.download_full_file(file_filter)
        if file
          ProcessPartnerUpdatesJob.perform_later(
            dump_id:,
            files: [file.to_s],
            file_prefix:
          )
        else
          Scsb::PartnerUpdates::Full.add_error(message: "No full dump files found matching #{file_filter}", dump_id:)
        end
      end

      # Ensures that CSV is present and that it does not include any private records
      def self.validate_csv(inst:, dump_id:)
        @update_directory = ENV['SCSB_PARTNER_UPDATE_DIRECTORY'] || '/tmp/updates'
        @scsb_file_dir = ENV['SCSB_FILE_DIR']
        matcher = /#{inst}.*\.csv/.as_json
        file = Scsb::PartnerUpdates::Full.download_full_file(matcher)
        matches_expected_collections = false
        if file
          csv = CSV.read(file, headers: true)
          group_ids = csv["Collection Group Id(s)"].first
          matches_expected_collections = group_ids == '1*2*5*6'
          unless matches_expected_collections
            add_error(message: "Metadata file indicates that dump for #{inst} does not include the correct Group IDs, not processing. Group ids: #{group_ids}", dump_id:)
          end
          filename = File.basename(file)
          destination_filepath = "#{@scsb_file_dir}/#{filename}"
          FileUtils.move(file, destination_filepath)
          Dump.attach_dump_file(dump_id:, filepath: destination_filepath, dump_file_type: :recap_records_full_metadata)
          File.unlink(destination_filepath) if File.exist?(destination_filepath)
        else
          add_error(message: "No metadata files found matching #{inst}", dump_id:)
        end
        matches_expected_collections
      end

      def self.add_error(message:, dump_id:)
        dump_event = Dump.find(dump_id).event
        error = Array.wrap(dump_event.error)
        error << message
        dump_event.error = error.join("; ")
        dump_event.save
      end

      def self.download_full_file(file_filter)
        update_directory = ENV['SCSB_PARTNER_UPDATE_DIRECTORY'] || '/tmp/updates'
        prefix = ENV['SCSB_S3_PARTNER_FULLS'] || 'data-exports/PUL/MARCXml/Full'
        s3_bucket = Scsb::S3Bucket.partner_transfer_client
        file_filter = Regexp.json_create(file_filter)
        s3_bucket.download_recent(prefix:, output_directory: update_directory, file_filter:)
      end
    end
  end
end

1	require 'json/add/regexp'	1✔
2
3	module Scsb	1✔
4	class PartnerUpdates	1✔
5	class Full < Update	1✔
6	def initialize(dump:, dump_file_type:, timestamp: DateTime.now.to_time)	1✔
7	@dump = dump	8✔
8	@institutions = [
9	{ inst: "NYPL", prefix: 'scsbfull_nypl_' },	8✔
10	{ inst: "CUL", prefix: 'scsbfull_cul_' },
11	{ inst: "HL", prefix: 'scsbfull_hl_' }
12	]
13	super	8✔
14	end
15
16	def process_full_files	1✔
17	prepare_directory	7✔
18	dump_id = @dump.id	7✔
19	@institutions.each do \|institution\|	7✔
20	DownloadAndProcessFullJob.perform_later(inst: institution[:inst], prefix: institution[:prefix], dump_id:)	21✔
21	end
22	set_generated_date	7✔
23	log_record_fixes	7✔
24	end
25
26	def self.download_partner_files(file_filter:, dump_id:, file_prefix:)	1✔
27	file = Scsb::PartnerUpdates::Full.download_full_file(file_filter)	8✔
28	if file	8✔
29	ProcessPartnerUpdatesJob.perform_later(	8✔
30	dump_id:,
31	files: [file.to_s],
32	file_prefix:
33	)
34	else
NEW 35	Scsb::PartnerUpdates::Full.add_error(message: "No full dump files found matching #{file_filter}", dump_id:)	×
36	end
37	end
38
39	# Ensures that CSV is present and that it does not include any private records
40	def self.validate_csv(inst:, dump_id:)	1✔
41	@update_directory = ENV['SCSB_PARTNER_UPDATE_DIRECTORY'] \|\| '/tmp/updates'	15✔
42	@scsb_file_dir = ENV['SCSB_FILE_DIR']	15✔
43	matcher = /#{inst}.*\.csv/.as_json	15✔
44	file = Scsb::PartnerUpdates::Full.download_full_file(matcher)	15✔
45	matches_expected_collections = false	15✔
46	if file	15✔
47	csv = CSV.read(file, headers: true)	10✔
48	group_ids = csv["Collection Group Id(s)"].first	10✔
49	matches_expected_collections = group_ids == '125*6'	10✔
50	unless matches_expected_collections	10✔
51	add_error(message: "Metadata file indicates that dump for #{inst} does not include the correct Group IDs, not processing. Group ids: #{group_ids}", dump_id:)	3✔
52	end
53	filename = File.basename(file)	10✔
54	destination_filepath = "#{@scsb_file_dir}/#{filename}"	10✔
55	FileUtils.move(file, destination_filepath)	10✔
56	Dump.attach_dump_file(dump_id:, filepath: destination_filepath, dump_file_type: :recap_records_full_metadata)	10✔
57	File.unlink(destination_filepath) if File.exist?(destination_filepath)	10✔
58	else
59	add_error(message: "No metadata files found matching #{inst}", dump_id:)	5✔
60	end
61	matches_expected_collections	15✔
62	end
63
64	def self.add_error(message:, dump_id:)	1✔
65	dump_event = Dump.find(dump_id).event	8✔
66	error = Array.wrap(dump_event.error)	8✔
67	error << message	8✔
68	dump_event.error = error.join("; ")	8✔
69	dump_event.save	8✔
70	end
71
72	def self.download_full_file(file_filter)	1✔
73	update_directory = ENV['SCSB_PARTNER_UPDATE_DIRECTORY'] \|\| '/tmp/updates'	23✔
74	prefix = ENV['SCSB_S3_PARTNER_FULLS'] \|\| 'data-exports/PUL/MARCXml/Full'	23✔
75	s3_bucket = Scsb::S3Bucket.partner_transfer_client	23✔
76	file_filter = Regexp.json_create(file_filter)	23✔
77	s3_bucket.download_recent(prefix:, output_directory: update_directory, file_filter:)	23✔
78	end
79	end
80	end
81	end

pulibrary / bibdata / 381ebe8c-8717-4a37-93a5-7a9a805ad9de

Source File Press 'n' to go to next uncovered line, 'b' for previous

Source File
Press 'n' to go to next uncovered line, 'b' for previous