• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

pulibrary / tigerdata-app / fa5427d1-c616-4db4-8869-6c67d0eea350

23 Jun 2025 06:40PM UTC coverage: 71.937% (-0.2%) from 72.107%
fa5427d1-c616-4db4-8869-6c67d0eea350

Pull #1533

circleci

jrgriffiniii
Introducing some additional refactoring steps for the ProjectXmlPresenter; Implements a route for retrieving the Mediaflux XML document for project metadata
Pull Request #1533: Implements a route for retrieving the Mediaflux XML document for project metadata

4 of 18 branches covered (22.22%)

4 of 15 new or added lines in 3 files covered. (26.67%)

18 existing lines in 2 files now uncovered.

2994 of 4162 relevant lines covered (71.94%)

485.82 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

96.43
/app/models/project.rb
1
# frozen_string_literal: true
2
class Project < ApplicationRecord
1✔
3
  validates_with ProjectValidator
1✔
4
  has_many :provenance_events, dependent: :destroy
1✔
5
  before_save do |project|
1✔
6
    # Ensure that the metadata JSONB postgres field is persisted properly
7
    project.metadata = project.metadata_model
536✔
8
  end
9

10
  # Valid project status described in ADR 7
11
  # See `architecture-decisions/0007-valid-project-statuses.md`
12
  PENDING_STATUS = "pending"
1✔
13
  APPROVED_STATUS = "approved"
1✔
14
  ACTIVE_STATUS = "active"
1✔
15

16
  delegate :to_json, to: :metadata_json # field in the database
1✔
17

18
  def create!(initial_metadata:, user:)
1✔
19
    self.metadata_model = initial_metadata
20✔
20
    if self.valid?
20✔
21
      if initial_metadata.project_id == ProjectMetadata::DOI_NOT_MINTED
18✔
22
        self.draft_doi(user: user)
14✔
23
        self.save!
14✔
24
        ProvenanceEvent.generate_submission_events(project: self, user: user)
14✔
25
      else
26
        self.save!
4✔
27
      end
28
      # return doi
29
      self.metadata_model.project_id
18✔
30
    else
31
      nil
32
    end
33
  end
34

35
  def approve!(mediaflux_id:, current_user:)
1✔
36
    self.mediaflux_id = mediaflux_id
9✔
37
    self.metadata_model.status = Project::APPROVED_STATUS
9✔
38
    self.save!
9✔
39

40
    # create two provenance events, one for approving the project and
41
      # another for changing the status of the project
42
    ProvenanceEvent.generate_approval_events(project: self, user: current_user)
9✔
43

44
  end
45

46
  def reload
1✔
47
    super
40✔
48
    @metadata_model = ProjectMetadata.new_from_hash(self.metadata)
40✔
49
    self
40✔
50
  end
51

52
  def activate!(collection_id:, current_user:)
1✔
53
    response = Mediaflux::AssetMetadataRequest.new(session_token: current_user.mediaflux_session, id: collection_id)
3✔
54
    mediaflux_metadata = response.metadata # get the metadata of the collection from mediaflux
3✔
55

56
    return unless mediaflux_metadata[:collection] == true # If the collection id exists
3✔
57

58
    # check if the project doi in rails matches the project doi in mediaflux
59
    return unless mediaflux_metadata[:project_id] == self.metadata_model.project_id
3✔
60

61
    # activate a project by setting the status to 'active'
62
    self.metadata_model.status = Project::ACTIVE_STATUS
2✔
63

64
    # also read in the actual project directory
65
    self.metadata_model.project_directory = mediaflux_metadata[:project_directory]
2✔
66
    self.save!
2✔
67

68
    ProvenanceEvent.generate_active_events(project: self, user: current_user)
2✔
69
  end
70

71
  def draft_doi(user: nil)
1✔
72
    puldatacite = PULDatacite.new
14✔
73
    self.metadata_model.project_id = puldatacite.draft_doi
14✔
74
  end
75

76
  # Ideally this method should return a ProjectMetadata object (like `metadata_model` does)
77
  # but we'll keep them both while we are refactoring the code so that we don't break
78
  # everything at once since `metadata` is used everywhere.
79
  def metadata
1✔
80
    @metadata_hash = (metadata_json || {}).with_indifferent_access
746✔
81
  end
82

83
  def metadata_model
1✔
84
    @metadata_model ||= ProjectMetadata.new_from_hash(self.metadata)
7,707✔
85
  end
86

87
  def metadata_model=(new_metadata_model)
1✔
88
    @metadata_model = new_metadata_model
419✔
89
  end
90

91
  def metadata=(metadata_model)
1✔
92
    # Convert our metadata to a hash so it can be saved on our JSONB field
93
    metadata_hash = JSON.parse(metadata_model.to_json)
1,141✔
94
    self.metadata_json = metadata_hash
1,141✔
95
  end
96

97
  def title
1✔
98
    self.metadata_model.title
478✔
99
  end
100

101
  def departments
1✔
102
    unsorted = metadata_model.departments || []
180✔
103
    unsorted.sort
180✔
104
  end
105

106
  def project_directory
1✔
107
    return nil if metadata_model.project_directory.nil?
435✔
108
    dirname, basename = project_directory_pathname.split
435✔
109
    if (dirname.relative?)
435✔
110
      "#{Mediaflux::Connection.root_namespace}/#{safe_name(metadata_model.project_directory)}"
321✔
111
    else
112
      project_directory_pathname.to_s
114✔
113
    end
114
  end
115

116
  def project_directory_parent_path
1✔
117
    return Mediaflux::Connection.root_namespace if metadata_model.project_directory.nil?
36✔
118
    dirname  = project_directory_pathname.dirname
22✔
119
    if (dirname.relative?)
22✔
120
      Mediaflux::Connection.root_namespace
20✔
121
    else
122
      dirname.to_s
2✔
123
    end
124
  end
125

126
  def project_directory_short
1✔
127
    return nil if metadata_model.project_directory.nil?
143✔
128
    project_directory_pathname.basename.to_s
129✔
129
  end
130

131
  def status
1✔
132
    metadata_model.status
821✔
133
  end
134

135
  def pending?
1✔
136
    status == PENDING_STATUS
423✔
137
  end
138

139
  def in_mediaflux?
1✔
140
    mediaflux_id.present?
118✔
141
  end
142

143
  def self.users_projects(user)
1✔
144
    # See https://scalegrid.io/blog/using-jsonb-in-postgresql-how-to-effectively-store-index-json-data-in-postgresql/
145
    # for information on the @> operator
146
    uid = user.uid
99✔
147
    query_ro = '{"data_user_read_only":["' + uid + '"]}'
99✔
148
    query_rw = '{"data_user_read_write":["' + uid + '"]}'
99✔
149
    query = "(metadata_json @> ? :: jsonb) OR (metadata_json @> ? :: jsonb)"
99✔
150
    args = [query_ro, query_rw]
99✔
151
    if user.eligible_sponsor?
99✔
152
      query += "OR (metadata_json->>'data_sponsor' = ?)"
40✔
153
      args << uid
40✔
154
    end
155
    if user.eligible_manager?
99✔
156
      query += "OR (metadata_json->>'data_manager' = ?)"
18✔
157
      args << uid
18✔
158
    end
159
    Project.where( query, *args)
99✔
160
  end
161

162
  def self.sponsored_projects(sponsor)
1✔
163
    Project.where("metadata_json->>'data_sponsor' = ?", sponsor)
1✔
164
  end
165

166
  def self.managed_projects(manager)
1✔
167
    Project.where("metadata_json->>'data_manager' = ?", manager)
1✔
168
  end
169

170
  def self.pending_projects
1✔
171
    Project.where("mediaflux_id IS NULL")
95✔
172
  end
173

174
  def self.approved_projects
1✔
175
    Project.where("mediaflux_id IS NOT NULL")
95✔
176
  end
177

178
  def self.data_user_projects(user)
1✔
179
    # See https://scalegrid.io/blog/using-jsonb-in-postgresql-how-to-effectively-store-index-json-data-in-postgresql/
180
    # for information on the @> operator
181
    query_ro = '{"data_user_read_only":["' + user + '"]}'
1✔
182
    query_rw = '{"data_user_read_write":["' + user + '"]}'
1✔
183
    Project.where("(metadata_json @> ? :: jsonb) OR (metadata_json @> ? :: jsonb)", query_ro, query_rw)
1✔
184
  end
185

186
  def user_has_access?(user:)
1✔
187
    return true if user.eligible_sysadmin?
90✔
188
    metadata_model.data_sponsor == user.uid || metadata_model.data_manager == user.uid ||
74✔
189
    metadata_model.data_user_read_only.include?(user.uid) || metadata_model.data_user_read_write.include?(user.uid)
190
  end
191

192
  def save_in_mediaflux(user:)
1✔
193
    ProjectMediaflux.save(project: self, user: user)
25✔
194
  end
195

196
  def created_by_user
1✔
197
    User.find_by(uid: metadata_model.created_by)
13✔
198
  end
199

200
  def to_xml
1✔
201
    ProjectMediaflux.xml_payload(project: self)
6✔
202
  end
203

204
  # @return [Nokogiri::XML::Document] the Mediaflux XML document for this project
205
  def mediaflux_document
1✔
206
    ProjectMediaflux.document(project: self)
×
207
  end
208

209
  # @return [Nokogiri::XML::Element] the <meta> element from the Mediaflux XML document
210
  def mediaflux_meta_element
1✔
NEW
211
    doc = mediaflux_document.dup
×
212
    # Remove the namespaces in order to simplify the XPath query
NEW
213
    doc.remove_namespaces!
×
NEW
214
    elements = doc.xpath("/request/service/args/meta")
×
NEW
215
    raise("Failed to extract the <meta> element found in the Mediaflux XML document for project #{self.id}") if elements.empty?
×
216

NEW
217
    elements.first
×
218
  end
219

220
  # @return [String] XML representation of the <meta> element
221
  def mediaflux_meta_xml
1✔
NEW
222
    mediaflux_meta_element.to_xml
×
223
  end
224

225
  def mediaflux_metadata(session_id:)
1✔
226
    @mediaflux_metadata ||= begin
238✔
227
      accum_req = Mediaflux::AssetMetadataRequest.new(session_token: session_id, id: mediaflux_id)
69✔
228
      accum_req.metadata
69✔
229
    end
230
    @mediaflux_metadata
234✔
231
  end
232

233
  def asset_count(session_id:)
1✔
234
    values = mediaflux_metadata(session_id:)
32✔
235
    values.fetch(:total_file_count, 0)
32✔
236
  end
237

238
  def self.default_storage_unit
1✔
239
    "KB"
55✔
240
  end
241

242
  def self.default_storage_usage
1✔
243
    "0 #{default_storage_unit}"
54✔
244
  end
245

246
  def storage_usage(session_id:)
1✔
247
    values = mediaflux_metadata(session_id:)
57✔
248
    values.fetch(:quota_used, self.class.default_storage_usage) # if the storage is empty use the default
53✔
249
  end
250

251
  def storage_usage_raw(session_id:)
1✔
252
    values = mediaflux_metadata(session_id:)
4✔
253
    values.fetch(:quota_used_raw, 0) # if the storage raw is empty use zero
4✔
254
  end
255

256
  def self.default_storage_capacity
1✔
257
    "0 GB"
36✔
258
  end
259

260
  def storage_capacity(session_id:)
1✔
261
    values = mediaflux_metadata(session_id:)
53✔
262
    quota_value = values.fetch(:quota_allocation, '') #if quota does not exist, set value to an empty string
53✔
263
    if quota_value.blank?
53✔
264
      return self.class.default_storage_capacity
36✔
265
    else
266
      return quota_value
17✔
267
    end
268
  end
269

270
  def storage_capacity_raw(session_id:)
1✔
271
    values = mediaflux_metadata(session_id:)
90✔
272
    quota_value = values.fetch(:quota_allocation_raw, 0) #if quota does not exist, set value to 0
90✔
273
    quota_value
90✔
274
  end
275

276
  # Fetches the first n files
277
  def file_list(session_id:, size: 10)
1✔
278
    return { files: [] } if mediaflux_id.nil?
34✔
279

280
    query_req = Mediaflux::QueryRequest.new(session_token: session_id, collection: mediaflux_id, deep_search: true, aql_query: "type!='application/arc-asset-collection'")
21✔
281
    iterator_id = query_req.result
21✔
282

283
    iterator_req = Mediaflux::IteratorRequest.new(session_token: session_id, iterator: iterator_id, size: size)
21✔
284
    results = iterator_req.result
21✔
285

286
    # Destroy _after_ fetching the first set of results from iterator_req.
287
    # This call is required since it possible that we have read less assets than
288
    # what the collection has but we are done with the iterator.
289
    Mediaflux::IteratorDestroyRequest.new(session_token: session_id, iterator: iterator_id).resolve
21✔
290

291
    results
21✔
292
  end
293

294
  # Fetches the entire file list to a file
295
  def file_list_to_file(session_id:, filename:)
1✔
296
    return { files: [] } if mediaflux_id.nil?
11✔
297

298
    query_req = Mediaflux::QueryRequest.new(session_token: session_id, collection: mediaflux_id, deep_search: true,  aql_query: "type!='application/arc-asset-collection'")
10✔
299
    iterator_id = query_req.result
10✔
300

301
    start_time = Time.zone.now
10✔
302
    prefix = "file_list_to_file #{session_id[0..7]} #{self.metadata_model.project_id}"
10✔
303
    log_elapsed(start_time, prefix, "STARTED")
10✔
304

305
    File.open(filename, "w") do |file|
10✔
306
      page_number = 0
10✔
307
      # file header
308
      file.write("ID, PATH, NAME, COLLECTION?, LAST_MODIFIED, SIZE\r\n")
10✔
309
      loop do
10✔
310
        iterator_start_time = Time.zone.now
10✔
311
        page_number += 1
10✔
312
        iterator_req = Mediaflux::IteratorRequest.new(session_token: session_id, iterator: iterator_id, size: 1000)
10✔
313
        iterator_resp = iterator_req.result
10✔
314
        log_elapsed(iterator_start_time, prefix, "FETCHED page #{page_number} from iterator")
10✔
315
        lines = files_from_iterator(iterator_resp)
10✔
316
        file.write(lines.join("\r\n") + "\r\n")
10✔
317
        break if iterator_resp[:complete] || iterator_req.error?
10✔
318
      end
319
      log_elapsed(start_time, prefix, "ENDED")
10✔
320
    end
321

322
    # Destroy _after_ fetching the results from iterator_req
323
    # This call is technically not necessary since Mediaflux automatically deletes the iterator
324
    # once we have ran through it and by now we have. But it does not hurt either.
325
    Mediaflux::IteratorDestroyRequest.new(session_token: session_id, iterator: iterator_id).resolve
10✔
326
  end
327

328
  # Ensure that the project directory is a valid path
329
  # @example
330
  #   Project.safe_name("My Project") # => "My-Project"
331
  def self.safe_name(name)
1✔
332
    # only alphanumeric characters
333
    name.strip.gsub(/[^A-Za-z\d]/, "-")
327✔
334
  end
335

336
  private
1✔
337

338
    def files_from_iterator(iterator_resp)
1✔
339
      lines = []
10✔
340
      iterator_resp[:files].each do |asset|
10✔
341
        lines << "#{asset.id}, #{asset.path_only}, #{asset.name}, #{asset.collection}, #{asset.last_modified}, #{asset.size}"
16✔
342
      end
343
      lines
10✔
344
    end
345

346
    def project_directory_pathname
1✔
347
      # allow the directory to be modified by changes in the metadata_json
348
      @project_directory_pathname = nil if @original_directory.present? && @original_directory != metadata_model.project_directory
700✔
349

350
      @project_directory_pathname ||= begin
700✔
351
        @original_directory = metadata_model.project_directory
200✔
352
        Pathname.new(@original_directory)
200✔
353
      end
354
    end
355

356
    # Ensure that the project directory is a valid path
357
    def safe_name(name)
1✔
358
      Project.safe_name(name)
321✔
359
    end
360

361
    def log_elapsed(start_time, prefix, message)
1✔
362
      elapsed_time = Time.zone.now - start_time
30✔
363
      timing_info = "#{format('%.2f', elapsed_time)} s"
30✔
364
      Rails.logger.info "#{prefix}: #{message}, #{timing_info}"
30✔
365
    end
366
end
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2025 Coveralls, Inc