• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

pulibrary / lib_jobs / 9db49f56-82a0-4d13-aaab-cda9263ad377

08 Oct 2025 01:06PM UTC coverage: 96.282% (-0.07%) from 96.356%
9db49f56-82a0-4d13-aaab-cda9263ad377

Pull #973

circleci

christinach
Add a job to get family, person and corporate entities agents

related to [#964]
Pull Request #973: Add a job to get family, person and corporate entities agents

115 of 122 new or added lines in 1 file covered. (94.26%)

3341 of 3470 relevant lines covered (96.28%)

1223.97 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

94.26
/app/models/aspace_version_control/get_agents_job.rb
1
# frozen_string_literal: true
2
require 'archivesspace/client'
1✔
3
require 'nokogiri'
1✔
4

5
module AspaceVersionControl
1✔
6
  class GetAgentsJob < LibJob
1✔
7
    def initialize
1✔
8
      super(category: "Agents_export")
30✔
9
      @errors = []
30✔
10
    end
11

12
    def aspace_login
1✔
13
      aspace_client(aspace_config)
2✔
14
    end
15

16
    def aspace_config
1✔
17
      raise "Missing required environment variables: ASPACE_URL, ASPACE_USER, or ASPACE_PASSWORD" unless ENV['ASPACE_URL'] && ENV['ASPACE_USER'] && ENV['ASPACE_PASSWORD']
4✔
18

19
      @config ||= ArchivesSpace::Configuration.new({
3✔
20
                                                     base_uri: ENV['ASPACE_URL'],
21
                                                     username: ENV['ASPACE_USER'],
22
                                                     password: ENV['ASPACE_PASSWORD'],
23
                                                     throttle: 0,
24
                                                     verify_ssl: false
25
                                                   })
26
    end
27

28
    def aspace_client(config)
1✔
29
      @client ||= ArchivesSpace::Client.new(config).login
2✔
30
    rescue => error
31
      Rails.logger.error("Failed to connect to ArchivesSpace: #{error.message}")
1✔
32
      @errors << "ArchivesSpace connection failed: #{error.message}"
1✔
33
      raise error
1✔
34
    end
35

36
    def handle(data_set:)
1✔
37
      aspace_login
1✔
38
      # TODO:  agent logic
39
      data_set.data = report
1✔
40
      data_set.report_time = Time.zone.now
1✔
41
      data_set
1✔
42
    end
43

44
    def report
1✔
45
      if @errors.empty?
3✔
46
        "Agents successfully exported."
2✔
47
      else
48
        @errors.join(', ')
1✔
49
      end
50
    end
51

52
    def list_family_agents
1✔
53
      @family_agents ||= @client.get("agents/families", {
1✔
54
                                       query: { all_ids: true }
55
                                     }).parsed
56
    end
57

58
    def list_corporate_entities_agents
1✔
59
      @corporate_entities_agents ||= @client.get('agents/corporate_entities', {
1✔
60
                                                   query: { all_ids: true }
61
                                                 }).parsed
62
    end
63

64
    def list_person_agents
1✔
65
      @person_agents ||= @client.get('agents/people', {
5✔
66
                                       query: { all_ids: true }
67
                                     }).parsed
68
    end
69

70
    def get_archival_context_xml(id, agent_type)
1✔
71
      xml_str_body = @client.get("/repositories/1/archival_contexts/#{agent_type}/#{id}.xml").body
15✔
72
      doc = Nokogiri::XML(xml_str_body)
15✔
73

74
      filename_base = generate_cpf_filename(doc, id)
15✔
75

76
      {
77
        xml_content: doc.to_xml,
15✔
78
        filename: "#{filename_base}.CPF.xml"
79
      }
80
    end
81

82
    # Methods to use separately if needed
83
    def get_person_archival_context_xml(id)
1✔
84
      get_archival_context_xml(id, 'people')
1✔
85
    end
86

87
    def get_family_archival_context_xml(id)
1✔
88
      get_archival_context_xml(id, 'families')
1✔
89
    end
90

91
    def get_corporate_entity_archival_context_xml(id)
1✔
92
      get_archival_context_xml(id, 'corporate_entities')
1✔
93
    end
94

95
    def write_cpf_to_file(dir, id, agent_type)
1✔
96
      Rails.logger.info("Processing CPF for #{agent_type}/#{id}")
14✔
97

98
      cpf_data = get_archival_context_xml(id, agent_type)
14✔
99
      filename = "#{dir}/#{cpf_data[:filename]}"
13✔
100

101
      File.open(filename, "w") do |file|
13✔
102
        file << cpf_data[:xml_content]
13✔
103
      end
104

105
      Rails.logger.info("Wrote CPF XML to #{filename}")
13✔
106
      cpf_data[:filename]
13✔
107
    rescue => error
108
      err = "Unable to process CPF for #{agent_type}/#{id}: #{error.message}"
1✔
109
      log_stdout(err)
1✔
110
      log_stderr(err)
1✔
111
    end
112

113
    def process_all_cpf_files(agent_type, output_dir, chunk_size: 500, start_from: 0)
1✔
114
      agent_ids = get_agent_ids_by_type(agent_type)
5✔
115
      total_count = agent_ids.count
4✔
116

117
      Rails.logger.info("Processing #{total_count} #{agent_type} agents for CPF files (starting from #{start_from})")
4✔
118
      FileUtils.mkdir_p(output_dir)
4✔
119

120
      checkpoint_file = "#{output_dir}/#{agent_type}_checkpoint.txt"
4✔
121
      start_from = resolve_start_position(checkpoint_file, start_from)
4✔
122

123
      processed_count = process_agent_batch(agent_ids, agent_type, output_dir, start_from, total_count, chunk_size, checkpoint_file)
4✔
124

125
      cleanup_checkpoint(checkpoint_file)
4✔
126
      Rails.logger.info("Completed processing: #{processed_count} #{agent_type} agents successful out of #{total_count - start_from} attempted")
4✔
127
      processed_count
4✔
128
    end
129

130
    # Methods for each agent type in case we want to run them separately
131
    def process_all_person_cpf_files(output_dir, chunk_size: 500, start_from: 0)
1✔
132
      process_all_cpf_files('people', output_dir, chunk_size: chunk_size, start_from: start_from)
1✔
133
    end
134

135
    def process_all_family_cpf_files(output_dir, chunk_size: 500, start_from: 0)
1✔
136
      process_all_cpf_files('families', output_dir, chunk_size: chunk_size, start_from: start_from)
1✔
137
    end
138

139
    def process_all_corporate_entity_cpf_files(output_dir, chunk_size: 500, start_from: 0)
1✔
140
      process_all_cpf_files('corporate_entities', output_dir, chunk_size: chunk_size, start_from: start_from)
1✔
141
    end
142

143
    private
1✔
144

145
    def get_agent_ids_by_type(agent_type)
1✔
146
      case agent_type
5✔
147
      when 'people'
148
        list_person_agents
4✔
149
      when 'families'
NEW
150
        list_family_agents
×
151
      when 'corporate_entities'
NEW
152
        list_corporate_entities_agents
×
153
      else
154
        raise "Unknown agent type: #{agent_type}"
1✔
155
      end
156
    end
157

158
    def resolve_start_position(checkpoint_file, start_from)
1✔
159
      if File.exist?(checkpoint_file) && start_from == 0
4✔
160
        checkpoint_position = File.read(checkpoint_file).to_i
1✔
161
        Rails.logger.info("Resuming from checkpoint: #{checkpoint_position}")
1✔
162
        checkpoint_position
1✔
163
      else
164
        start_from
3✔
165
      end
166
    end
167

168
    def process_agent_batch(agent_ids, agent_type, output_dir, start_from, total_count, chunk_size, checkpoint_file)
1✔
169
      processed_count = 0
4✔
170

171
      agent_ids.drop(start_from).each_with_index do |id, relative_index|
4✔
172
        absolute_index = start_from + relative_index
11✔
173

174
        processed_count += 1 if process_single_agent(id, agent_type, output_dir, absolute_index)
11✔
175

176
        handle_progress_and_checkpoints(absolute_index, total_count, agent_type, chunk_size, checkpoint_file)
11✔
177
      end
178

179
      processed_count
4✔
180
    end
181

182
    def process_single_agent(id, agent_type, output_dir, absolute_index)
1✔
183
      write_cpf_to_file(output_dir, id, agent_type)
11✔
184
      true
11✔
185
    rescue => error
NEW
186
      Rails.logger.error("Failed to process #{agent_type} agent #{id} at index #{absolute_index}: #{error.message}")
×
NEW
187
      @errors << "#{agent_type} agent #{id}: #{error.message}"
×
NEW
188
      false
×
189
    end
190

191
    def handle_progress_and_checkpoints(absolute_index, total_count, agent_type, chunk_size, checkpoint_file)
1✔
192
      # Progress logging every 100 records
193
      Rails.logger.info("Processed #{absolute_index + 1}/#{total_count} #{agent_type} agents") if (absolute_index + 1) % 100 == 0
11✔
194

195
      # Save checkpoint every chunk_size records
196
      if (absolute_index + 1) % chunk_size == 0
11✔
197
        File.write(checkpoint_file, absolute_index + 1)
4✔
198
        Rails.logger.info("Checkpoint saved at #{absolute_index + 1}")
4✔
199
        sleep(2) # Longer pause every chunk to prevent API overload
4✔
200
      elsif (absolute_index + 1) % 20 == 0
7✔
NEW
201
        sleep(0.05)
×
202
      end
203
    end
204

205
    def cleanup_checkpoint(checkpoint_file)
1✔
206
      File.delete(checkpoint_file) if File.exist?(checkpoint_file)
4✔
207
    end
208

209
    def generate_cpf_filename(doc, id)
1✔
210
      namespace = { 'eac' => 'urn:isbn:1-931666-33-4' }
18✔
211

212
      surname = doc.at_xpath('//eac:nameEntry/eac:part[@localType="surname"]', namespace)&.text
18✔
213
      forename = doc.at_xpath('//eac:nameEntry/eac:part[@localType="forename"]', namespace)&.text
18✔
214
      entity_id = doc.at_xpath('//eac:entityId', namespace)&.text
18✔
215
      record_id = doc.at_xpath('//eac:recordId', namespace)&.text
18✔
216

217
      if surname && forename
18✔
218
        "#{surname}#{forename}".gsub(/\s+/, '').upcase
16✔
219
      elsif surname
2✔
NEW
220
        surname.gsub(/\s+/, '').upcase
×
221
      else
222
        # Fallback to entityId or recordId if name parts not found
223
        entity_id || record_id || "agent_#{id}"
2✔
224
      end
225
    end
226

227
    def log_stderr(stderr_str)
1✔
228
      @errors << stderr_str unless stderr_str.empty?
1✔
229
    end
230

231
    def log_stdout(stdout_str)
1✔
232
      Rails.logger.info(stdout_str) unless stdout_str.empty?
1✔
233
    end
234
  end
235
end
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2025 Coveralls, Inc