• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

pulibrary / bibdata / 3165b919-56f4-4faa-baf4-b5a08621230c

18 Sep 2025 03:08PM UTC coverage: 90.866% (+0.2%) from 90.675%
3165b919-56f4-4faa-baf4-b5a08621230c

Pull #2927

circleci

Ryan Laddusaw
Add QA DSpace config
Pull Request #2927: Index theses from dspace 7+

1411 of 1519 new or added lines in 16 files covered. (92.89%)

41 existing lines in 7 files now uncovered.

8874 of 9766 relevant lines covered (90.87%)

338.18 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

96.64
/lib/bibdata_rs/src/theses/legacy_dataspace/document/normalize.rs
1
// This module is responsible for normalizing data within a DataspaceDocument
2

3
use crate::{
4
    solr::{AccessFacet, ElectronicAccess, LibraryFacet},
5
    theses::{
6
        legacy_dataspace::document::DataspaceDocument,
7
        department,
8
        embargo::{self, Embargo},
9
        holdings::{self, ThesisAvailability},
10
        language, program,
11
    },
12
};
13
use itertools::Itertools;
14
use regex::{Captures, Regex};
15
use std::sync::LazyLock;
16

17
impl DataspaceDocument {
18
    pub fn access_facet(&self) -> Option<AccessFacet> {
27✔
19
        match (self.embargo(), self.on_site_only()) {
27✔
20
            (embargo::Embargo::Current(_), _) => None,
1✔
21
            (_, ThesisAvailability::AvailableOffSite) => Some(AccessFacet::Online),
21✔
22
            (_, ThesisAvailability::OnSiteOnly) => Some(AccessFacet::InTheLibrary),
5✔
23
        }
24
    }
27✔
25

26
    pub fn advanced_location(&self) -> Option<Vec<String>> {
27✔
27
        match self.on_site_only() {
27✔
28
            ThesisAvailability::OnSiteOnly => Some(vec![
6✔
29
                "mudd$stacks".to_owned(),
6✔
30
                "Mudd Manuscript Library".to_owned(),
6✔
31
            ]),
6✔
32
            _ => None,
21✔
33
        }
34
    }
27✔
35

36
    pub fn all_authors(&self) -> Vec<String> {
30✔
37
        let mut authors = match &self.contributor_author {
30✔
38
            Some(authors) => authors.clone(),
4✔
39
            None => Vec::new(),
26✔
40
        };
41
        authors.extend(self.contributor_advisor.clone().unwrap_or_default());
30✔
42
        authors.extend(self.contributor.clone().unwrap_or_default());
30✔
43
        authors.extend(
30✔
44
            self.department
30✔
45
                .clone()
30✔
46
                .unwrap_or_default()
30✔
47
                .iter()
30✔
48
                .filter_map(|dept| department::map_department(dept)),
30✔
49
        );
50
        authors.extend(
30✔
51
            self.certificate
30✔
52
                .clone()
30✔
53
                .unwrap_or_default()
30✔
54
                .iter()
30✔
55
                .filter_map(|program| program::map_program(program)),
30✔
56
        );
57
        authors
30✔
58
    }
30✔
59

60
    pub fn ark_hash(&self) -> Option<ElectronicAccess> {
30✔
61
        let empty_vec = vec![];
30✔
62
        holdings::dataspace_url_with_metadata(
30✔
63
            self.identifier_uri.as_ref(),
30✔
64
            self.location.is_some(),
30✔
65
            self.rights_access_rights.is_some(),
30✔
66
            self.walkin_is_yes(),
30✔
67
            match &self.date_classyear {
30✔
68
                Some(class_year) => class_year.clone(),
14✔
69
                None => empty_vec,
16✔
70
            },
71
            self.embargo(),
30✔
72
        )
73
    }
30✔
74

75
    pub fn authorized_ceritificates(&self) -> Option<Vec<String>> {
27✔
76
        self.certificate.as_ref().map(|certificates| {
27✔
77
            certificates
3✔
78
                .iter()
3✔
79
                .filter_map(|certificate| program::map_program(certificate))
4✔
80
                .collect()
3✔
81
        })
3✔
82
    }
27✔
83

84
    pub fn authorized_departments(&self) -> Option<Vec<String>> {
27✔
85
        self.department.as_ref().map(|departments| {
27✔
86
            departments
6✔
87
                .iter()
6✔
88
                .filter_map(|department| department::map_department(department))
10✔
89
                .collect()
6✔
90
        })
6✔
91
    }
27✔
92

93
    pub fn call_number(&self) -> String {
54✔
94
        holdings::call_number(self.identifier_other.as_ref())
54✔
95
    }
54✔
96

97
    pub fn class_year(&self) -> Option<i16> {
81✔
98
        let years = self.date_classyear.clone().unwrap_or_default();
81✔
99
        let year = years.first()?;
81✔
100
        year.parse::<i16>().ok()
33✔
101
    }
81✔
102

103
    pub fn languages(&self) -> Vec<String> {
54✔
104
        language::codes_to_english_names(self.language_iso.clone())
54✔
105
    }
54✔
106

107
    pub fn location(&self) -> Option<LibraryFacet> {
27✔
108
        match self.on_site_only() {
27✔
109
            ThesisAvailability::OnSiteOnly => Some(LibraryFacet::Mudd),
6✔
110
            _ => None,
21✔
111
        }
112
    }
27✔
113

114
    pub fn location_display(&self) -> Option<String> {
27✔
115
        match self.on_site_only() {
27✔
116
            ThesisAvailability::OnSiteOnly => Some("Mudd Manuscript Library".to_owned()),
6✔
117
            _ => None,
21✔
118
        }
119
    }
27✔
120

121
    pub fn location_code(&self) -> Option<String> {
27✔
122
        match self.on_site_only() {
27✔
123
            ThesisAvailability::OnSiteOnly => Some("mudd$stacks".to_owned()),
6✔
124
            _ => None,
21✔
125
        }
126
    }
27✔
127

128
    pub fn on_site_only(&self) -> ThesisAvailability {
198✔
129
        holdings::on_site_only(
198✔
130
            self.location.is_some(),
198✔
131
            self.rights_access_rights.is_some(),
198✔
132
            self.walkin_is_yes(),
198✔
133
            match &self.date_classyear {
198✔
134
                Some(class_year) => class_year.clone(),
79✔
135
                None => vec![],
119✔
136
            },
137
            self.embargo(),
198✔
138
        )
139
    }
198✔
140

141
    pub fn online_portfolio_statements(&self) -> Option<String> {
27✔
142
        if self.on_site_only() == ThesisAvailability::OnSiteOnly
27✔
143
            || matches!(self.embargo(), Embargo::Current(_))
21✔
144
        {
145
            None
6✔
146
        } else {
147
            holdings::online_holding_string(self.identifier_other.as_ref())
21✔
148
        }
149
    }
27✔
150

151
    pub fn physical_holding_string(&self) -> Option<String> {
27✔
152
        match self.on_site_only() {
27✔
153
            ThesisAvailability::AvailableOffSite => None,
21✔
154
            ThesisAvailability::OnSiteOnly => {
155
                holdings::physical_holding_string(self.identifier_other.as_ref())
6✔
156
            }
157
        }
158
    }
27✔
159

160
    pub fn restrictions_note_display(&self) -> Option<Vec<String>> {
27✔
161
        match &self.rights_access_rights {
27✔
162
            Some(rights) => rights.first().map(|s| vec![s.clone()]),
7✔
163
            None => {
164
                if self.walkin_is_yes() {
20✔
165
                    Some(vec!["Walk-in Access. This thesis can only be viewed on computer terminals at the '<a href=\"http://mudd.princeton.edu\">Mudd Manuscript Library</a>.".to_owned()])
1✔
166
                } else {
167
                    match self.embargo() {
19✔
NEW
168
                    Embargo::Current(text) => Some(vec![text]),
×
169
                    Embargo::None => None,
17✔
NEW
170
                    Embargo::Expired => None,
×
171
                    Embargo::Invalid => Some(vec![
2✔
172
                        format!("This content is currently under embargo. For more information contact the <a href=\"mailto:dspadmin@princeton.edu?subject=Regarding embargoed DataSpace Item 88435/{}\"> Mudd Manuscript Library</a>.", self.id.clone().unwrap_or_default())
2✔
173
                    ]),
2✔
174
                }
175
                }
176
            }
177
        }
178
    }
27✔
179

180
    /// Take first title, strip out latex expressions when present to include along
181
    /// with non-normalized version (allowing users to get matches both when LaTex
182
    /// is pasted directly into the search box and when sub/superscripts are placed
183
    /// adjacent to regular characters
184
    pub fn title_search_versions(&self) -> Option<Vec<String>> {
27✔
185
        match &self.title {
27✔
186
            Some(titles) => titles.first().map(|title| {
3✔
187
                vec![title.to_string(), normalize_latex(title)]
3✔
188
                    .into_iter()
3✔
189
                    .unique()
3✔
190
                    .collect()
3✔
191
            }),
3✔
192
            None => None,
24✔
193
        }
194
    }
27✔
195

196
    fn embargo(&self) -> embargo::Embargo {
295✔
197
        embargo::Embargo::from_dates(
295✔
198
            self.embargo_lift.as_ref(),
295✔
199
            self.embargo_terms.as_ref(),
295✔
200
            self.id.as_ref().map_or("", |v| v),
295✔
201
        )
202
    }
295✔
203

204
    fn walkin_is_yes(&self) -> bool {
248✔
205
        matches!(&self.mudd_walkin, Some(vec) if vec.first().is_some_and(|walkin| walkin == "yes"))
12✔
206
    }
248✔
207
}
208

209
fn normalize_latex(original: &str) -> String {
4✔
210
    static LATEX_REGEX: LazyLock<Regex> = LazyLock::new(|| Regex::new(r"\\\(.*?\\\)").unwrap());
1✔
211
    LATEX_REGEX
4✔
212
        .replace_all(original, |captures: &Captures| {
4✔
213
            captures[0]
2✔
214
                .chars()
2✔
215
                .filter(|c| c.is_alphanumeric())
17✔
216
                .collect::<String>()
2✔
217
        })
2✔
218
        .to_string()
4✔
219
}
4✔
220

221
#[cfg(test)]
222
mod tests {
223
    use super::*;
224
    #[test]
225
    fn it_normalizes_latex() {
1✔
226
        assert_eq!(
1✔
227
            normalize_latex("2D \\(^{1}\\)H-\\(^{14}\\)N HSQC inverse-detection experiments"),
1✔
228
            "2D 1H-14N HSQC inverse-detection experiments"
229
        );
230
    }
1✔
231

232
    #[test]
233
    fn ark_hash_gets_the_ark_with_fulltext_link_display_when_restrictions() {
1✔
234
        let metadata = DataspaceDocument::builder()
1✔
235
            .with_id("dsp01b2773v788")
1✔
236
            .with_description_abstract("Summary")
1✔
237
            .with_contributor("Wolff, Tamsen".to_string())
1✔
238
            .with_contributor_advisor("Sandberg, Robert".to_string())
1✔
239
            .with_contributor_author("Clark, Hillary".to_string())
1✔
240
            .with_date_classyear("2014")
1✔
241
            .with_department("Princeton University. Department of English")
1✔
242
            .with_department("Princeton University. Program in Theater")
1✔
243
            .with_identifier_uri("http://arks.princeton.edu/ark:/88435/dsp01b2773v788")
1✔
244
            .with_format_extent("102 pages")
1✔
245
            .with_language_iso("en_US")
1✔
246
            .with_title("Dysfunction: A Play in One Act")
1✔
247
            .build();
1✔
248

249
        assert_eq!(
1✔
250
            metadata.ark_hash().unwrap(),
1✔
251
            ElectronicAccess {
1✔
252
                url: "http://arks.princeton.edu/ark:/88435/dsp01b2773v788".to_owned(),
1✔
253
                link_text: "DataSpace".to_owned(),
1✔
254
                link_description: Some("Full text".to_owned()),
1✔
255
                iiif_manifest_paths: None,
1✔
256
                digital_content: None
1✔
257
            }
1✔
258
        );
259
    }
1✔
260

261
    #[test]
262
    fn ark_hash_gets_the_ark_with_fulltext_link_display_when_no_restrictions() {
1✔
263
        let metadata = DataspaceDocument::builder()
1✔
264
            .with_id("dsp01b2773v788")
1✔
265
            .with_description_abstract("Summary")
1✔
266
            .with_contributor("Wolff, Tamsen".to_string())
1✔
267
            .with_contributor_advisor("Sandberg, Robert".to_string())
1✔
268
            .with_contributor_author("Clark, Hillary".to_string())
1✔
269
            .with_date_classyear("2014")
1✔
270
            .with_department("Princeton University. Department of English")
1✔
271
            .with_department("Princeton University. Program in Theater")
1✔
272
            .with_identifier_uri("http://arks.princeton.edu/ark:/88435/dsp01b2773v788")
1✔
273
            .with_format_extent("102 pages")
1✔
274
            .with_language_iso("en_US")
1✔
275
            .with_title("Dysfunction: A Play in One Act")
1✔
276
            .build();
1✔
277

278
        assert_eq!(
1✔
279
            metadata.ark_hash().unwrap(),
1✔
280
            ElectronicAccess {
1✔
281
                url: "http://arks.princeton.edu/ark:/88435/dsp01b2773v788".to_owned(),
1✔
282
                link_text: "DataSpace".to_owned(),
1✔
283
                link_description: Some("Full text".to_owned()),
1✔
284
                iiif_manifest_paths: None,
1✔
285
                digital_content: None
1✔
286
            }
1✔
287
        );
288
    }
1✔
289

290
    #[test]
291
    fn ark_hash_returns_none_when_no_url() {
1✔
292
        let metadata = DataspaceDocument::builder()
1✔
293
            .with_id("dsp01b2773v788")
1✔
294
            .with_description_abstract("Summary")
1✔
295
            .with_contributor("Wolff, Tamsen".to_string())
1✔
296
            .with_contributor_advisor("Sandberg, Robert".to_string())
1✔
297
            .with_contributor_author("Clark, Hillary".to_string())
1✔
298
            .with_date_classyear("2014")
1✔
299
            .with_department("Princeton University. Department of English")
1✔
300
            .with_department("Princeton University. Program in Theater")
1✔
301
            .with_format_extent("102 pages".to_string())
1✔
302
            .with_language_iso("en_US")
1✔
303
            .with_title("Dysfunction: A Play in One Act")
1✔
304
            .build();
1✔
305

306
        assert_eq!(metadata.ark_hash(), None);
1✔
307
    }
1✔
308

309
    #[test]
310
    fn on_site_only() {
1✔
311
        assert_eq!(
1✔
312
            DataspaceDocument::builder()
1✔
313
                .with_embargo_terms("2100-01-01")
1✔
314
                .build()
1✔
315
                .on_site_only(),
1✔
316
            ThesisAvailability::OnSiteOnly,
NEW
317
            "doc with embargo terms field should return OnSiteOnly"
×
318
        );
319
        assert_eq!(
1✔
320
            DataspaceDocument::builder()
1✔
321
                .with_embargo_lift("2100-01-01")
1✔
322
                .build()
1✔
323
                .on_site_only(),
1✔
324
            ThesisAvailability::OnSiteOnly,
NEW
325
            "doc with embargo lift field should return OnSiteOnly"
×
326
        );
327
        assert_eq!(
1✔
328
            DataspaceDocument::builder()
1✔
329
                .with_embargo_lift("2000-01-01")
1✔
330
                .with_mudd_walkin("yes")
1✔
331
                .with_date_classyear("2012-01-01T00:00:00Z")
1✔
332
                .build()
1✔
333
                .on_site_only(),
1✔
334
            ThesisAvailability::OnSiteOnly,
NEW
335
            "with a specified accession date prior to 2013, it should return OnSiteOnly"
×
336
        );
337

338
        assert_eq!(
1✔
339
            DataspaceDocument::builder()
1✔
340
                .with_location("physical location")
1✔
341
                .build()
1✔
342
                .on_site_only(),
1✔
343
            ThesisAvailability::AvailableOffSite,
NEW
344
            "doc with location field should return AvailableOffSite"
×
345
        );
346
        assert_eq!(
1✔
347
            DataspaceDocument::builder()
1✔
348
                .with_embargo_lift("2000-01-01")
1✔
349
                .build()
1✔
350
                .on_site_only(),
1✔
351
            ThesisAvailability::AvailableOffSite,
NEW
352
            "doc with expired embargo lift field should return AvailableOffSite"
×
353
        );
354
        assert_eq!(
1✔
355
            DataspaceDocument::builder()
1✔
356
                .with_embargo_lift("2000-01-01")
1✔
357
                .with_mudd_walkin("yes")
1✔
358
                .build()
1✔
359
                .on_site_only(),
1✔
360
            ThesisAvailability::AvailableOffSite,
NEW
361
            "without a specified accession date, it should return AvailableOffSite"
×
362
        );
363
        assert_eq!(
1✔
364
            DataspaceDocument::builder()
1✔
365
                .with_embargo_lift("2000-01-01")
1✔
366
                .with_mudd_walkin("yes")
1✔
367
                .with_date_classyear("2013-01-01T00:00:00Z")
1✔
368
                .build()
1✔
369
                .on_site_only(),
1✔
370
            ThesisAvailability::AvailableOffSite,
NEW
371
            "with a specified accession date in 2013, it should return AvailableOffSite"
×
372
        );
373
        assert_eq!(
1✔
374
            DataspaceDocument::builder().build().on_site_only(),
1✔
375
            ThesisAvailability::AvailableOffSite,
NEW
376
            "doc with no access-related fields should return AvailableOffSite"
×
377
        );
378
        assert_eq!(
1✔
379
            DataspaceDocument::builder().build().on_site_only(),
1✔
380
            ThesisAvailability::AvailableOffSite
381
        );
382
    }
1✔
383

384
    mod all_authors {
385
        use super::*;
386

387
        #[test]
388
        fn it_includes_author() {
1✔
389
            let document = DataspaceDocument::builder()
1✔
390
                .with_contributor_author("Turing, Alan")
1✔
391
                .build();
1✔
392
            assert_eq!(document.all_authors(), vec!["Turing, Alan".to_owned()]);
1✔
393
        }
1✔
394

395
        #[test]
396
        fn it_includes_normalized_department() {
1✔
397
            let document = DataspaceDocument::builder()
1✔
398
                .with_department("Astrophysical Sciences")
1✔
399
                .build();
1✔
400
            assert_eq!(
1✔
401
                document.all_authors(),
1✔
402
                vec!["Princeton University. Department of Astrophysical Sciences".to_owned()]
1✔
403
            );
404
        }
1✔
405

406
        #[test]
407
        fn it_includes_normalized_certificate() {
1✔
408
            let document = DataspaceDocument::builder()
1✔
409
                .with_certificate("Hellenic Studies Program")
1✔
410
                .build();
1✔
411
            assert_eq!(
1✔
412
                document.all_authors(),
1✔
413
                vec!["Princeton University. Program in Hellenic Studies".to_owned()]
1✔
414
            );
415
        }
1✔
416
    }
417
}
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2025 Coveralls, Inc