• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

pulibrary / bibdata / 3165b919-56f4-4faa-baf4-b5a08621230c

18 Sep 2025 03:08PM UTC coverage: 90.866% (+0.2%) from 90.675%
3165b919-56f4-4faa-baf4-b5a08621230c

Pull #2927

circleci

Ryan Laddusaw
Add QA DSpace config
Pull Request #2927: Index theses from dspace 7+

1411 of 1519 new or added lines in 16 files covered. (92.89%)

41 existing lines in 7 files now uncovered.

8874 of 9766 relevant lines covered (90.87%)

338.18 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

87.34
/lib/bibdata_rs/src/theses/legacy_dataspace/collection.rs
1
// This module is responsible for interacting with Dataspace collections
2
// using the Dataspace JSON API
3

4
use std::{
5
    fs::File,
6
    io::{BufWriter, Write},
7
};
8

9
use crate::solr::SolrDocument;
10
use crate::theses::{
11
    config,
12
    legacy_dataspace::{community, document::DataspaceDocument},
13
    temp_legacy_theses_cache_path,
14
};
15
use anyhow::{anyhow, Result};
16
use log::debug;
17
use magnus::exception;
18
use rayon::prelude::*;
19

20
pub fn collection_url(server: &str, id: &str, rest_limit: &str, offset: &str) -> String {
7✔
21
    format!(
7✔
22
        "{}/collections/{}/items?limit={}&offset={}&expand=metadata",
7✔
23
        server, id, rest_limit, offset
24
    )
25
}
7✔
26

NEW
27
fn magnus_err_from_serde_err(value: &serde_json::Error) -> magnus::Error {
×
NEW
28
    magnus::Error::new(exception::runtime_error(), value.to_string())
×
NEW
29
}
×
30

31
fn magnus_err_from_anyhow_err(value: &anyhow::Error) -> magnus::Error {
1✔
32
    magnus::Error::new(exception::runtime_error(), value.to_string())
1✔
33
}
1✔
34

35
// The main function for thesis caching, to be called from Ruby
36
pub fn legacy_collections_as_solr(
1✔
37
    server: String,
1✔
38
    community_handle: String,
1✔
39
    rest_limit: u32,
1✔
40
) -> Result<(), magnus::Error> {
1✔
41
    // env_logger::init();
NEW
42
    let documents: Vec<DataspaceDocument> =
×
43
        get_document_list(&server, &community_handle, rest_limit, |server, handle| {
1✔
44
            community::get_collection_list(server, handle, community::get_community_id)
1✔
45
        })
1✔
46
        .map_err(|e| magnus_err_from_anyhow_err(&e))?;
1✔
NEW
47
    let file = File::create(temp_legacy_theses_cache_path())
×
NEW
48
        .map_err(|value| magnus_err_from_anyhow_err(&anyhow!(value)))?;
×
NEW
49
    let mut writer = BufWriter::new(file);
×
NEW
50
    serde_json::to_writer_pretty(
×
NEW
51
        &mut writer,
×
NEW
52
        &documents
×
NEW
53
            .iter()
×
NEW
54
            .map(SolrDocument::from)
×
NEW
55
            .collect::<Vec<SolrDocument>>(),
×
56
    )
NEW
57
    .map_err(|e| magnus_err_from_serde_err(&e))?;
×
NEW
58
    writer
×
NEW
59
        .flush()
×
NEW
60
        .map_err(|value| magnus_err_from_anyhow_err(&anyhow!(value)))?;
×
NEW
61
    Ok(())
×
62
}
1✔
63

64
type CollectionIdsSelector = fn(&str, &str) -> Result<Vec<u32>>;
65
pub fn get_document_list(
3✔
66
    server: &str,
3✔
67
    community_handle: &str,
3✔
68
    rest_limit: u32,
3✔
69
    ids_selector: CollectionIdsSelector, // a closure that returns a Vec of dspace collection ids
3✔
70
) -> Result<Vec<DataspaceDocument>> {
3✔
71
    let collection_ids = ids_selector(server, community_handle)?;
3✔
72
    let documents = collection_ids
2✔
73
        .par_iter()
2✔
74
        .try_fold(Vec::new, |mut accumulator, collection_id| {
2✔
75
            get_documents_in_collection(
2✔
76
                &mut accumulator,
2✔
77
                server,
2✔
78
                *collection_id,
2✔
79
                rest_limit,
2✔
80
                0,
81
                0,
82
            )?;
1✔
83
            Ok::<Vec<DataspaceDocument>, anyhow::Error>(accumulator)
1✔
84
        })
2✔
85
        .try_reduce(Vec::new, |mut a, b| {
2✔
86
            a.extend(b);
1✔
87
            Ok(a)
1✔
88
        })?;
1✔
89

90
    Ok(documents)
1✔
91
}
3✔
92

93
// This function recursively fetches paginated API results and retries on error
94
fn get_documents_in_collection(
6✔
95
    documents: &mut Vec<DataspaceDocument>,
6✔
96
    server: &str,
6✔
97
    collection_id: u32,
6✔
98
    rest_limit: u32,
6✔
99
    offset: u32,
6✔
100
    attempt: u8,
6✔
101
) -> Result<Vec<DataspaceDocument>> {
6✔
102
    let url = collection_url(
6✔
103
        server,
6✔
104
        &collection_id.to_string(),
6✔
105
        &rest_limit.to_string(),
6✔
106
        &offset.to_string(),
6✔
107
    );
108
    if attempt == 0 {
6✔
109
        debug!("Querying for the DSpace Collection at {}", url)
3✔
110
    } else {
111
        debug!(
3✔
NEW
112
            "Retrying query {}, attempt {} of {}",
×
113
            url,
114
            attempt,
115
            config::THESES_RETRY_ATTEMPTS
116
        );
117
    }
118
    let new_documents = match get_url_as_json(&url) {
6✔
119
        Ok(docs) => Ok(docs),
2✔
120
        Err(e) => {
4✔
121
            // If there was an error, increment the count of attempts and recurse
122
            if attempt < config::THESES_RETRY_ATTEMPTS {
4✔
123
                get_documents_in_collection(
3✔
124
                    documents,
3✔
125
                    server,
3✔
126
                    collection_id,
3✔
127
                    rest_limit,
3✔
128
                    offset,
3✔
129
                    attempt + 1,
3✔
130
                )
131
            } else {
132
                Err(e)
1✔
133
            }
134
        }
135
    }?;
4✔
136
    // If we didn't get an empty JSON, there are still more pages of data to fetch, so
137
    // recurse with a higher offset (i.e. fetch the next page)
138
    if !new_documents.is_empty() {
2✔
139
        documents.extend(new_documents);
1✔
140
        get_documents_in_collection(
1✔
141
            documents,
1✔
142
            server,
1✔
143
            collection_id,
1✔
144
            rest_limit,
1✔
145
            offset + rest_limit,
1✔
146
            0,
NEW
147
        )?;
×
148
    }
1✔
149
    Ok(vec![])
2✔
150
}
6✔
151

152
fn get_url_as_json(url: &str) -> Result<Vec<DataspaceDocument>> {
6✔
153
    reqwest::blocking::get(url)?
6✔
154
        .json()
6✔
155
        .map_err(|e| anyhow!("Could not parse json at {url}: {e:?}"))
6✔
156
}
6✔
157

158
#[cfg(test)]
159
mod tests {
160
    use rb_sys_test_helpers::ruby_test;
161

162
    use super::*;
163

164
    #[test]
165
    fn it_creates_a_collection_url() {
1✔
166
        assert_eq!(collection_url(
1✔
167
            "https://dataspace-dev.princeton.edu/rest",
1✔
168
            "402",
1✔
169
            "100",
1✔
170
            "1000"
1✔
171
        ),
172
    "https://dataspace-dev.princeton.edu/rest/collections/402/items?limit=100&offset=1000&expand=metadata")
173
    }
1✔
174

175
    #[test]
176
    fn it_fetches_the_documents_from_the_community() {
1✔
177
        let mut server = mockito::Server::new();
1✔
178
        let mock_page1 = server
1✔
179
            .mock(
1✔
180
                "GET",
1✔
181
                "/collections/361/items?limit=100&offset=0&expand=metadata",
1✔
182
            )
1✔
183
            .with_status(200)
1✔
184
            .with_body_from_file("../../spec/fixtures/files/theses/api_client_get_legacy.json")
1✔
185
            .create();
1✔
186
        let mock_page2 = server
1✔
187
            .mock(
1✔
188
                "GET",
1✔
189
                "/collections/361/items?limit=100&offset=100&expand=metadata",
1✔
190
            )
1✔
191
            .with_status(200)
1✔
192
            .with_body("[]")
1✔
193
            .create();
1✔
194

195
        let ids_selector: CollectionIdsSelector = |_server: &str, _handle: &str| Ok(vec![361u32]);
1✔
196
        let docs =
1✔
197
            get_document_list(&server.url(), "88435/dsp019c67wm88m", 100, ids_selector).unwrap();
1✔
198
        assert_eq!(docs.len(), 1);
1✔
199
        assert_eq!(
1✔
200
            docs[0].title.clone().unwrap(),
1✔
201
            vec![
1✔
202
                "Calibration of the Princeton University Subsonic Instructional Wind Tunnel"
1✔
203
                    .to_owned()
1✔
204
            ]
205
        );
206

207
        mock_page1.assert();
1✔
208
        mock_page2.assert();
1✔
209
    }
1✔
210

211
    #[test]
212
    fn it_retries_requests_when_500_errors() {
1✔
213
        let mut server = mockito::Server::new();
1✔
214
        let mock_page1 = server
1✔
215
            .mock(
1✔
216
                "GET",
1✔
217
                "/collections/361/items?limit=100&offset=0&expand=metadata",
1✔
218
            )
1✔
219
            .with_status(500)
1✔
220
            .expect(4) // The initial request + 3 retries
1✔
221
            .create();
1✔
222

223
        let ids_selector: CollectionIdsSelector = |_server: &str, _handle: &str| Ok(vec![361u32]);
1✔
224
        let docs = get_document_list(&server.url(), "88435/dsp019c67wm88m", 100, ids_selector);
1✔
225
        assert!(docs.is_err());
1✔
226

227
        mock_page1.assert();
1✔
228
    }
1✔
229

230
    #[ruby_test]
231
    fn it_notifies_ruby_of_errors() {
232
        let mut server = mockito::Server::new();
233
        let mock_bad_response = server
234
            .mock("GET", "/communities/")
235
            .with_status(500)
236
            .create();
237

238
        assert!(legacy_collections_as_solr(server.url(), "88435/dsp019c67wm88m".to_owned(), 100).is_err());
239
        mock_bad_response.assert();
240
    }
241
}
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2025 Coveralls, Inc