• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

pulibrary / lib_jobs / c9db9174-d6c5-4d36-908b-2333f1b04a24

08 Oct 2025 04:49PM UTC coverage: 96.043% (-0.3%) from 96.319%
c9db9174-d6c5-4d36-908b-2333f1b04a24

push

circleci

christinach
connect to archivespace set the git_lab_eac as a git repo
process all agent types and saves CPF.xml to the eacs dir
in the gitlab repo git_lab_eacs/eacs/
Uses the Gitlab class to connect to Gitlab
Adds new methods to commit and push to gitlab eacs directory

related to [#968]

32 of 42 new or added lines in 2 files covered. (76.19%)

1 existing line in 1 file now uncovered.

3374 of 3513 relevant lines covered (96.04%)

1209.06 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

92.81
/app/models/aspace_version_control/get_agents_job.rb
1
# frozen_string_literal: true
2
require 'archivesspace/client'
1✔
3
require 'nokogiri'
1✔
4
require 'fileutils'
1✔
5

6
module AspaceVersionControl
1✔
7
  # rubocop:disable Metrics/ClassLength
8
  class GetAgentsJob < LibJob
1✔
9
    attr_reader :repo_eacs
1✔
10

11
    def initialize(local_git_lab_eacs_dir: Rails.application.config.aspace.local_git_lab_eacs_dir)
1✔
12
      super(category: "Agents_export")
30✔
13
      @errors = []
30✔
14
      @local_git_lab_eacs_dir = local_git_lab_eacs_dir
30✔
15
      @repo_eacs = Rails.application.config.aspace.repo_eacs
30✔
16
    end
17

18
    def aspace_login
1✔
19
      aspace_client(aspace_config)
2✔
20
    end
21

22
    def aspace_config
1✔
23
      raise "Missing required environment variables: ASPACE_URL, ASPACE_USER, or ASPACE_PASSWORD" unless ENV['ASPACE_URL'] && ENV['ASPACE_USER'] && ENV['ASPACE_PASSWORD']
4✔
24

25
      @config ||= ArchivesSpace::Configuration.new({
3✔
26
                                                     base_uri: ENV['ASPACE_URL'],
27
                                                     username: ENV['ASPACE_USER'],
28
                                                     password: ENV['ASPACE_PASSWORD'],
29
                                                     throttle: 0,
30
                                                     verify_ssl: false
31
                                                   })
32
    end
33

34
    def aspace_client(config)
1✔
35
      @client ||= ArchivesSpace::Client.new(config).login
2✔
36
    rescue => error
37
      Rails.logger.error("Failed to connect to ArchivesSpace: #{error.message}")
1✔
38
      @errors << "ArchivesSpace connection failed: #{error.message}"
1✔
39
      raise error
1✔
40
    end
41

42
    def handle(data_set:)
1✔
43
      aspace_login
1✔
44
      Rails.logger.info("Opening Repo at #{@local_git_lab_eacs_dir}")
1✔
45
      GitLab.new(repo_path: @local_git_lab_eacs_dir).update(path: @local_git_lab_eacs_dir)
1✔
46

47
      repo_eacs.each do |repo, path|
1✔
48
        prepare_and_commit_to_git_lab(repo, path)
1✔
49
      end
50

51
      data_set.data = report
1✔
52
      data_set.report_time = Time.zone.now
1✔
53
      data_set
1✔
54
    end
55

56
    def report
1✔
57
      if @errors.empty?
3✔
58
        "Agents successfully exported."
2✔
59
      else
60
        @errors.join(', ')
1✔
61
      end
62
    end
63

64
    def list_family_agents
1✔
65
      @family_agents ||= @client.get("agents/families", {
1✔
66
                                       query: { all_ids: true }
67
                                     }).parsed
68
    end
69

70
    def list_corporate_entities_agents
1✔
71
      @corporate_entities_agents ||= @client.get('agents/corporate_entities', {
1✔
72
                                                   query: { all_ids: true }
73
                                                 }).parsed
74
    end
75

76
    def list_person_agents
1✔
77
      @person_agents ||= @client.get('agents/people', {
5✔
78
                                       query: { all_ids: true }
79
                                     }).parsed
80
    end
81

82
    def get_archival_context_xml(id, agent_type)
1✔
83
      # repository 1 is the global repository in aspace that has all the agents
84
      xml_str_body = @client.get("/repositories/1/archival_contexts/#{agent_type}/#{id}.xml").body
15✔
85
      doc = Nokogiri::XML(xml_str_body)
15✔
86

87
      filename_base = generate_cpf_filename(doc, id, agent_type)
15✔
88

89
      {
90
        xml_content: doc.to_xml,
15✔
91
        filename: "#{filename_base}.CPF.xml"
92
      }
93
    end
94

95
    # Methods to use separately if we want to schedule them individually
96
    # or run them ad hoc
97

98
    def get_person_archival_context_xml(id)
1✔
99
      get_archival_context_xml(id, 'people')
1✔
100
    end
101

102
    def get_family_archival_context_xml(id)
1✔
103
      get_archival_context_xml(id, 'families')
1✔
104
    end
105

106
    def get_corporate_entity_archival_context_xml(id)
1✔
107
      get_archival_context_xml(id, 'corporate_entities')
1✔
108
    end
109

110
    def write_cpf_to_file(dir, id, agent_type)
1✔
111
      Rails.logger.info("Processing CPF for #{agent_type}/#{id}")
14✔
112

113
      cpf_data = get_archival_context_xml(id, agent_type)
14✔
114
      filename = "#{dir}/#{cpf_data[:filename]}"
13✔
115

116
      File.open(filename, "w") do |file|
13✔
117
        file << cpf_data[:xml_content]
13✔
118
      end
119

120
      Rails.logger.info("Wrote CPF XML to #{filename}")
13✔
121
      cpf_data[:filename]
13✔
122
    rescue => error
123
      err = "Unable to process CPF for #{agent_type}/#{id}: #{error.message}"
1✔
124
      log_stdout(err)
1✔
125
      log_stderr(err)
1✔
126
    end
127

128
    # rubocop:disable Metrics/MethodLength
129
    def process_all_cpf_files(agent_type, output_dir, chunk_size: 500, start_from: 0)
1✔
130
      agent_ids = get_agent_ids_by_type(agent_type)
5✔
131
      total_count = agent_ids.count
4✔
132

133
      Rails.logger.info("Processing #{total_count} #{agent_type} agents for CPF files (starting from #{start_from})")
4✔
134
      FileUtils.mkdir_p(output_dir)
4✔
135

136
      checkpoint_file = "#{output_dir}/#{agent_type}_checkpoint.txt"
4✔
137
      start_from = resolve_start_position(checkpoint_file, start_from)
4✔
138

139
      config = {
140
        output_dir: output_dir,
4✔
141
        start_from: start_from,
142
        total_count: total_count,
143
        chunk_size: chunk_size,
144
        checkpoint_file: checkpoint_file
145
      }
146
      processed_count = process_agent_batch(agent_ids, agent_type, config)
4✔
147

148
      cleanup_checkpoint(checkpoint_file)
4✔
149
      Rails.logger.info("Completed processing: #{processed_count} #{agent_type} agents successful out of #{total_count - start_from} attempted")
4✔
150
      processed_count
4✔
151
    end
152
    # enable Metrics/MethodLength
153

154
    # Methods for each agent type in case we want to run them separately
155
    def process_all_person_cpf_files(output_dir, chunk_size: 500, start_from: 0)
1✔
156
      process_all_cpf_files('people', output_dir, chunk_size: chunk_size, start_from: start_from)
1✔
157
    end
158

159
    def process_all_family_cpf_files(output_dir, chunk_size: 500, start_from: 0)
1✔
160
      process_all_cpf_files('families', output_dir, chunk_size: chunk_size, start_from: start_from)
1✔
161
    end
162

163
    def process_all_corporate_entity_cpf_files(output_dir, chunk_size: 500, start_from: 0)
1✔
164
      process_all_cpf_files('corporate_entities', output_dir, chunk_size: chunk_size, start_from: start_from)
1✔
165
    end
166

167
    private
1✔
168

169
    def prepare_and_commit_to_git_lab(repo, path)
1✔
170
      git_lab_repo_path = repo_path(@local_git_lab_eacs_dir, path)
1✔
171
      Rails.logger.info("Preparing commit to GitLab for #{git_lab_repo_path}")
1✔
172

173
      make_directories(git_lab_repo_path)
1✔
174
      process_all_agent_types_to_directory(git_lab_repo_path)
1✔
175
      GitLab.new(repo_path: @local_git_lab_eacs_dir).commit_eacs_to_git(path: path)
1✔
176
    rescue Git::Error => error
NEW
177
      Rails.logger.error("Error updating EACs using GitLab for repo #{repo} at path #{path}.\nError: #{error}")
×
178
    end
179

180
    def process_all_agent_types_to_directory(output_dir)
1✔
NEW
181
      Rails.logger.info("Processing all agent types to #{output_dir}")
×
182

183
      # Process each agent type and save to the output directory
NEW
184
      ['people', 'families', 'corporate_entities'].each do |agent_type|
×
NEW
185
        Rails.logger.info("Processing #{agent_type} agents")
×
NEW
186
        process_all_cpf_files(agent_type, output_dir, chunk_size: 500, start_from: 0)
×
187
      end
188
    end
189

190
    def repo_path(local_git_lab_dir, path)
1✔
191
      File.join(local_git_lab_dir, path)
1✔
192
    end
193

194
    def make_directories(git_lab_repo_path)
1✔
195
      FileUtils.mkdir_p(git_lab_repo_path)
1✔
196
    end
197

198
    def get_agent_ids_by_type(agent_type)
1✔
199
      case agent_type
5✔
200
      when 'people'
201
        list_person_agents
4✔
202
      when 'families'
203
        list_family_agents
×
204
      when 'corporate_entities'
205
        list_corporate_entities_agents
×
206
      else
207
        raise "Unknown agent type: #{agent_type}"
1✔
208
      end
209
    end
210

211
    def resolve_start_position(checkpoint_file, start_from)
1✔
212
      if File.exist?(checkpoint_file) && start_from.zero?
4✔
213
        checkpoint_position = File.read(checkpoint_file).to_i
1✔
214
        Rails.logger.info("Resuming from checkpoint: #{checkpoint_position}")
1✔
215
        checkpoint_position
1✔
216
      else
217
        start_from
3✔
218
      end
219
    end
220

221
    def process_agent_batch(agent_ids, agent_type, config)
1✔
222
      processed_count = 0
4✔
223
      output_dir = config[:output_dir]
4✔
224
      start_from = config[:start_from]
4✔
225
      total_count = config[:total_count]
4✔
226
      chunk_size = config[:chunk_size]
4✔
227
      checkpoint_file = config[:checkpoint_file]
4✔
228

229
      agent_ids.drop(start_from).each_with_index do |id, relative_index|
4✔
230
        absolute_index = start_from + relative_index
11✔
231

232
        processed_count += 1 if process_single_agent(id, agent_type, output_dir, absolute_index)
11✔
233

234
        handle_progress_and_checkpoints(absolute_index, total_count, agent_type, chunk_size, checkpoint_file)
11✔
235
      end
236

237
      processed_count
4✔
238
    end
239

240
    def process_single_agent(id, agent_type, output_dir, absolute_index)
1✔
241
      write_cpf_to_file(output_dir, id, agent_type)
11✔
242
      true
11✔
243
    rescue => error
244
      Rails.logger.error("Failed to process #{agent_type} agent #{id} at index #{absolute_index}: #{error.message}")
×
245
      @errors << "#{agent_type} agent #{id}: #{error.message}"
×
246
      false
×
247
    end
248

249
    def handle_progress_and_checkpoints(absolute_index, total_count, agent_type, chunk_size, checkpoint_file)
1✔
250
      Rails.logger.info("Processed #{absolute_index + 1}/#{total_count} #{agent_type} agents") if ((absolute_index + 1) % 100).zero?
11✔
251

252
      # Save checkpoint every chunk_size records
253
      if ((absolute_index + 1) % chunk_size).zero?
11✔
254
        File.write(checkpoint_file, absolute_index + 1)
4✔
255
        Rails.logger.info("Checkpoint saved at #{absolute_index + 1}")
4✔
256
        sleep(2)
4✔
257
      elsif ((absolute_index + 1) % 20).zero?
7✔
258
        sleep(0.05) # don't overwhelm the API with requests
×
259
      end
260
    end
261

262
    def cleanup_checkpoint(checkpoint_file)
1✔
263
      File.delete(checkpoint_file) if File.exist?(checkpoint_file)
4✔
264
    end
265

266
    def generate_cpf_filename(doc, id, agent_type)
1✔
267
      namespace = { 'eac' => 'urn:isbn:1-931666-33-4' }
18✔
268

269
      surname = doc.at_xpath('//eac:nameEntry/eac:part[@localType="surname"]', namespace)&.text
18✔
270
      forename = doc.at_xpath('//eac:nameEntry/eac:part[@localType="forename"]', namespace)&.text
18✔
271

272
      # Build name parts if they exist
273
      name_parts = []
18✔
274
      name_parts << surname.gsub(/\s+/, '').upcase if surname
18✔
275
      name_parts << forename.gsub(/\s+/, '').upcase if forename
18✔
276

277
      # Concatenate name parts, agent_type, and agent_id
278
      filename_parts = []
18✔
279
      filename_parts << name_parts.join('_') if name_parts.any?
18✔
280
      filename_parts << agent_type
18✔
281
      filename_parts << id.to_s
18✔
282

283
      filename_parts.join('_')
18✔
284
    end
285

286
    def log_stderr(stderr_str)
1✔
287
      @errors << stderr_str unless stderr_str.empty?
1✔
288
    end
289

290
    def log_stdout(stdout_str)
1✔
291
      Rails.logger.info(stdout_str) unless stdout_str.empty?
1✔
292
    end
293
  end
294
  # rubocop:enable Metrics/ClassLength
295
end
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2025 Coveralls, Inc