• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

pulibrary / bibdata / 234b9333-676b-4d43-b1c6-d1512d649e59

17 Sep 2025 04:04PM UTC coverage: 90.885% (+0.2%) from 90.675%
234b9333-676b-4d43-b1c6-d1512d649e59

Pull #2927

circleci

Ryan Laddusaw
Merge generated files and fix some bugs
Pull Request #2927: Index theses from dspace 7+

1411 of 1518 new or added lines in 16 files covered. (92.95%)

40 existing lines in 6 files now uncovered.

8874 of 9764 relevant lines covered (90.88%)

346.0 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

96.79
/lib/bibdata_rs/src/theses/dataspace/document/normalize.rs
1
// This module is responsible for normalizing data within a DataspaceDocument
2

3
use crate::{
4
    solr::{AccessFacet, ElectronicAccess, LibraryFacet},
5
    theses::{
6
        dataspace::document::DataspaceDocument,
7
        department,
8
        embargo::{self, Embargo},
9
        holdings::{self, ThesisAvailability},
10
        language, program,
11
    },
12
};
13
use itertools::Itertools;
14
use regex::{Captures, Regex};
15
use std::sync::LazyLock;
16

17
impl DataspaceDocument {
18
    pub fn access_facet(&self) -> Option<AccessFacet> {
25✔
19
        match (self.embargo(), self.on_site_only()) {
25✔
20
            (embargo::Embargo::Current(_), _) => None,
1✔
21
            (_, ThesisAvailability::AvailableOffSite) => Some(AccessFacet::Online),
19✔
22
            (_, ThesisAvailability::OnSiteOnly) => Some(AccessFacet::InTheLibrary),
5✔
23
        }
24
    }
25✔
25

26
    pub fn advanced_location(&self) -> Option<Vec<String>> {
25✔
27
        match self.on_site_only() {
25✔
28
            ThesisAvailability::OnSiteOnly => Some(vec![
6✔
29
                "mudd$stacks".to_owned(),
6✔
30
                "Mudd Manuscript Library".to_owned(),
6✔
31
            ]),
6✔
32
            _ => None,
19✔
33
        }
34
    }
25✔
35

36
    pub fn all_authors(&self) -> Vec<Option<String>> {
28✔
37
        let mut authors = match &self.contributor_author {
28✔
38
            Some(authors) => authors.clone(),
2✔
39
            None => Vec::new(),
26✔
40
        };
41
        authors.extend(self.contributor_advisor.clone().unwrap_or_default());
28✔
42
        authors.extend(self.contributor.clone().unwrap_or_default());
28✔
43
        authors.extend(
28✔
44
            self.department
28✔
45
                .clone()
28✔
46
                .unwrap_or_default()
28✔
47
                .iter()
28✔
48
                .map(|dept| { dept.clone().unwrap_or_default() })
28✔
49
                .filter_map(|dept| department::map_department(&dept))
28✔
50
                .map(|dept| { Some(dept) }),
28✔
51
        );
52
        authors.extend(
28✔
53
            self.certificate
28✔
54
                .clone()
28✔
55
                .unwrap_or_default()
28✔
56
                .iter()
28✔
57
                .map(|program| { program.clone().unwrap_or_default() })
28✔
58
                .filter_map(|program| program::map_program(&program))
28✔
59
                .map(|program| { Some(program) }),
28✔
60
        );
61
        authors
28✔
62
    }
28✔
63

64
    pub fn unwrap_vec_values(vec: Option<Vec<Option<String>>>) -> Option<Vec<String>> {
783✔
65
        Some(vec.unwrap_or_default()
783✔
66
            .iter()
783✔
67
            .map(|s| { s.clone().unwrap_or_default() })
783✔
68
            .collect())
783✔
69
    }
783✔
70

71
    pub fn ark_hash(&self) -> Option<ElectronicAccess> {
28✔
72
        let empty_vec = vec![];
28✔
73
        holdings::dataspace_url_with_metadata(
28✔
74
            Self::unwrap_vec_values(self.identifier_uri.clone()).as_ref(),
28✔
75
            self.location.is_some(),
28✔
76
            self.rights_access_rights.is_some(),
28✔
77
            self.walkin_is_yes(),
28✔
78
            match &self.date_classyear {
28✔
79
                Some(class_year) => Self::unwrap_vec_values(Some(class_year.clone()))?,
12✔
80
                None => empty_vec,
16✔
81
            },
82
            self.embargo(),
28✔
83
        )
84
    }
28✔
85

86
    pub fn authorized_ceritificates(&self) -> Option<Vec<String>> {
25✔
87
        self.certificate.as_ref().map(|certificates| {
25✔
88
            certificates
3✔
89
                .iter()
3✔
90
                .filter_map(|certificate| program::map_program(certificate.clone().unwrap_or_default().as_ref()))
4✔
91
                .collect()
3✔
92
        })
3✔
93
    }
25✔
94

95
    pub fn authorized_departments(&self) -> Option<Vec<String>> {
25✔
96
        self.department.as_ref().map(|departments| {
25✔
97
            departments
4✔
98
                .iter()
4✔
99
                .filter_map(|department| department::map_department(department.clone().unwrap_or_default().as_ref()))
6✔
100
                .collect()
4✔
101
        })
4✔
102
    }
25✔
103

104
    pub fn call_number(&self) -> String {
50✔
105
        holdings::call_number(Self::unwrap_vec_values(self.identifier_other.clone()).as_ref())
50✔
106
    }
50✔
107

108
    pub fn class_year(&self) -> Option<i16> {
75✔
109
        let years = self.date_classyear.clone().unwrap_or_default();
75✔
110
        let year = years.first()?;
75✔
111
        year.clone().unwrap_or_default().parse::<i16>().ok()
27✔
112
    }
75✔
113

114
    pub fn languages(&self) -> Vec<String> {
50✔
115
        language::codes_to_english_names(Self::unwrap_vec_values(self.language_iso.clone()))
50✔
116
    }
50✔
117

118
    pub fn location(&self) -> Option<LibraryFacet> {
25✔
119
        match self.on_site_only() {
25✔
120
            ThesisAvailability::OnSiteOnly => Some(LibraryFacet::Mudd),
6✔
121
            _ => None,
19✔
122
        }
123
    }
25✔
124

125
    pub fn location_display(&self) -> Option<String> {
25✔
126
        match self.on_site_only() {
25✔
127
            ThesisAvailability::OnSiteOnly => Some("Mudd Manuscript Library".to_owned()),
6✔
128
            _ => None,
19✔
129
        }
130
    }
25✔
131

132
    pub fn location_code(&self) -> Option<String> {
25✔
133
        match self.on_site_only() {
25✔
134
            ThesisAvailability::OnSiteOnly => Some("mudd$stacks".to_owned()),
6✔
135
            _ => None,
19✔
136
        }
137
    }
25✔
138

139
    pub fn on_site_only(&self) -> ThesisAvailability {
184✔
140
        let empty_vec = vec![];
184✔
141
        holdings::on_site_only(
184✔
142
            self.location.is_some(),
184✔
143
            self.rights_access_rights.is_some(),
184✔
144
            self.walkin_is_yes(),
184✔
145
            match &self.date_classyear {
184✔
146
                Some(class_year) => Self::unwrap_vec_values(Some(class_year.clone())).unwrap_or_default(),
65✔
147
                None => empty_vec,
119✔
148
            },
149
            self.embargo(),
184✔
150
        )
151
    }
184✔
152

153
    pub fn online_portfolio_statements(&self) -> Option<String> {
25✔
154
        if self.on_site_only() == ThesisAvailability::OnSiteOnly
25✔
155
            || matches!(self.embargo(), Embargo::Current(_))
19✔
156
        {
157
            None
6✔
158
        } else {
159
            holdings::online_holding_string(Self::unwrap_vec_values(self.identifier_other.clone()).as_ref())
19✔
160
        }
161
    }
25✔
162

163
    pub fn physical_holding_string(&self) -> Option<String> {
25✔
164
        match self.on_site_only() {
25✔
165
            ThesisAvailability::AvailableOffSite => None,
19✔
166
            ThesisAvailability::OnSiteOnly => {
167
                holdings::physical_holding_string(Self::unwrap_vec_values(self.identifier_other.clone()).as_ref())
6✔
168
            }
169
        }
170
    }
25✔
171

172
    pub fn restrictions_note_display(&self) -> Option<Vec<String>> {
25✔
173
        match &self.rights_access_rights {
25✔
174
            Some(rights) => Self::unwrap_vec_values(Some(rights.clone())).unwrap_or_default().first().map(|s| vec![s.clone()]),
7✔
175
            None => {
176
                if self.walkin_is_yes() {
18✔
177
                    Some(vec!["Walk-in Access. This thesis can only be viewed on computer terminals at the '<a href=\"http://mudd.princeton.edu\">Mudd Manuscript Library</a>.".to_owned()])
1✔
178
                } else {
179
                    match self.embargo() {
17✔
180
                    Embargo::Current(text) => Some(vec![text]),
×
181
                    Embargo::None => None,
15✔
182
                    Embargo::Expired => None,
×
183
                    Embargo::Invalid => Some(vec![
2✔
184
                        format!("This content is currently under embargo. For more information contact the <a href=\"mailto:dspadmin@princeton.edu?subject=Regarding embargoed DataSpace Item 88435/{}\"> Mudd Manuscript Library</a>.", self.id.clone().unwrap_or_default())
2✔
185
                    ]),
2✔
186
                }
187
                }
188
            }
189
        }
190
    }
25✔
191

192
    /// Take first title, strip out latex expressions when present to include along
193
    /// with non-normalized version (allowing users to get matches both when LaTex
194
    /// is pasted directly into the search box and when sub/superscripts are placed
195
    /// adjacent to regular characters
196
    pub fn title_search_versions(&self) -> Option<Vec<String>> {
25✔
197
        match &self.title {
25✔
198
            Some(titles) => titles.first().map(|title| {
1✔
199
                vec![title.clone().unwrap_or_default().to_string(), normalize_latex(title.clone().unwrap_or_default().as_ref())]
1✔
200
                    .into_iter()
1✔
201
                    .unique()
1✔
202
                    .collect()
1✔
203
            }),
1✔
204
            None => None,
24✔
205
        }
206
    }
25✔
207

208
    fn embargo(&self) -> embargo::Embargo {
273✔
209
        embargo::Embargo::from_dates(
273✔
210
            Self::unwrap_vec_values(self.embargo_lift.clone()).as_ref(),
273✔
211
            Self::unwrap_vec_values(self.embargo_terms.clone()).as_ref(),
273✔
212
            self.id.as_ref().map_or("", |v| v),
273✔
213
        )
214
    }
273✔
215

216
    fn walkin_is_yes(&self) -> bool {
230✔
217
        matches!(&self.mudd_walkin, Some(vec) if vec.first().is_some_and(|walkin| walkin == &Some("yes".to_string())))
12✔
218
    }
230✔
219
}
220

221
fn normalize_latex(original: &str) -> String {
2✔
222
    static LATEX_REGEX: LazyLock<Regex> = LazyLock::new(|| Regex::new(r"\\\(.*?\\\)").unwrap());
1✔
223
    LATEX_REGEX
2✔
224
        .replace_all(original, |captures: &Captures| {
2✔
225
            captures[0]
2✔
226
                .chars()
2✔
227
                .filter(|c| c.is_alphanumeric())
17✔
228
                .collect::<String>()
2✔
229
        })
2✔
230
        .to_string()
2✔
231
}
2✔
232

233
#[cfg(test)]
234
mod tests {
235
    use super::*;
236
    use crate::theses::dataspace::document::Metadatum;
237

238
    fn metadatum_vec_from_string(value: &str) -> Vec<Metadatum> {
47✔
239
        vec![Metadatum { value: Some(value.to_string()) }]
47✔
240
    }
47✔
241

242
    #[test]
243
    fn it_normalizes_latex() {
1✔
244
        assert_eq!(
1✔
245
            normalize_latex("2D \\(^{1}\\)H-\\(^{14}\\)N HSQC inverse-detection experiments"),
1✔
246
            "2D 1H-14N HSQC inverse-detection experiments"
247
        );
248
    }
1✔
249

250
    #[test]
251
    fn ark_hash_gets_the_ark_with_fulltext_link_display_when_restrictions() {
1✔
252
        let metadata = DataspaceDocument::builder()
1✔
253
            .with_id("dsp01b2773v788")
1✔
254
            .with_description_abstract(metadatum_vec_from_string("Summary"))
1✔
255
            .with_contributor(metadatum_vec_from_string("Wolff, Tamsen"))
1✔
256
            .with_contributor_advisor(metadatum_vec_from_string("Sandberg, Robert"))
1✔
257
            .with_contributor_author(metadatum_vec_from_string("Clark, Hillary"))
1✔
258
            .with_date_classyear(metadatum_vec_from_string("2014"))
1✔
259
            .with_department(metadatum_vec_from_string("Princeton University. Department of English"))
1✔
260
            .with_department(metadatum_vec_from_string("Princeton University. Program in Theater"))
1✔
261
            .with_identifier_uri(metadatum_vec_from_string("http://arks.princeton.edu/ark:/88435/dsp01b2773v788"))
1✔
262
            .with_format_extent(metadatum_vec_from_string("102 pages"))
1✔
263
            .with_language_iso(metadatum_vec_from_string("en_US"))
1✔
264
            .with_title(metadatum_vec_from_string("Dysfunction: A Play in One Act"))
1✔
265
            .build();
1✔
266

267
        assert_eq!(
1✔
268
            metadata.ark_hash().unwrap(),
1✔
269
            ElectronicAccess {
1✔
270
                url: "http://arks.princeton.edu/ark:/88435/dsp01b2773v788".to_owned(),
1✔
271
                link_text: "DataSpace".to_owned(),
1✔
272
                link_description: Some("Full text".to_owned()),
1✔
273
                iiif_manifest_paths: None,
1✔
274
                digital_content: None
1✔
275
            }
1✔
276
        );
277
    }
1✔
278

279
    #[test]
280
    fn ark_hash_gets_the_ark_with_fulltext_link_display_when_no_restrictions() {
1✔
281
        let metadata = DataspaceDocument::builder()
1✔
282
            .with_id("dsp01b2773v788")
1✔
283
            .with_description_abstract(metadatum_vec_from_string("Summary"))
1✔
284
            .with_contributor(metadatum_vec_from_string("Wolff, Tamsen"))
1✔
285
            .with_contributor_advisor(metadatum_vec_from_string("Sandberg, Robert"))
1✔
286
            .with_contributor_author(metadatum_vec_from_string("Clark, Hillary"))
1✔
287
            .with_date_classyear(metadatum_vec_from_string("2014"))
1✔
288
            .with_department(metadatum_vec_from_string("Princeton University. Department of English"))
1✔
289
            .with_department(metadatum_vec_from_string("Princeton University. Program in Theater"))
1✔
290
            .with_identifier_uri(metadatum_vec_from_string("http://arks.princeton.edu/ark:/88435/dsp01b2773v788"))
1✔
291
            .with_format_extent(metadatum_vec_from_string("102 pages"))
1✔
292
            .with_language_iso(metadatum_vec_from_string("en_US"))
1✔
293
            .with_title(metadatum_vec_from_string("Dysfunction: A Play in One Act"))
1✔
294
            .build();
1✔
295

296
        assert_eq!(
1✔
297
            metadata.ark_hash().unwrap(),
1✔
298
            ElectronicAccess {
1✔
299
                url: "http://arks.princeton.edu/ark:/88435/dsp01b2773v788".to_owned(),
1✔
300
                link_text: "DataSpace".to_owned(),
1✔
301
                link_description: Some("Full text".to_owned()),
1✔
302
                iiif_manifest_paths: None,
1✔
303
                digital_content: None
1✔
304
            }
1✔
305
        );
306
    }
1✔
307

308
    #[test]
309
    fn ark_hash_returns_none_when_no_url() {
1✔
310
        let metadata = DataspaceDocument::builder()
1✔
311
            .with_id("dsp01b2773v788")
1✔
312
            .with_description_abstract(metadatum_vec_from_string("Summary"))
1✔
313
            .with_contributor(metadatum_vec_from_string("Wolff, Tamsen"))
1✔
314
            .with_contributor_advisor(metadatum_vec_from_string("Sandberg, Robert"))
1✔
315
            .with_contributor_author(metadatum_vec_from_string("Clark, Hillary"))
1✔
316
            .with_date_classyear(metadatum_vec_from_string("2014"))
1✔
317
            .with_department(metadatum_vec_from_string("Princeton University. Department of English"))
1✔
318
            .with_department(metadatum_vec_from_string("Princeton University. Program in Theater"))
1✔
319
            .with_format_extent(metadatum_vec_from_string("102 pages"))
1✔
320
            .with_language_iso(metadatum_vec_from_string("en_US"))
1✔
321
            .with_title(metadatum_vec_from_string("Dysfunction: A Play in One Act"))
1✔
322
            .build();
1✔
323

324
        assert_eq!(metadata.ark_hash(), None);
1✔
325
    }
1✔
326

327
    #[test]
328
    fn on_site_only() {
1✔
329
        assert_eq!(
1✔
330
            DataspaceDocument::builder()
1✔
331
                .with_embargo_terms(metadatum_vec_from_string("2100-01-01"))
1✔
332
                .build()
1✔
333
                .on_site_only(),
1✔
334
            ThesisAvailability::OnSiteOnly,
UNCOV
335
            "doc with embargo terms field should return OnSiteOnly"
×
336
        );
337
        assert_eq!(
1✔
338
            DataspaceDocument::builder()
1✔
339
                .with_embargo_lift(metadatum_vec_from_string("2100-01-01"))
1✔
340
                .build()
1✔
341
                .on_site_only(),
1✔
342
            ThesisAvailability::OnSiteOnly,
UNCOV
343
            "doc with embargo lift field should return OnSiteOnly"
×
344
        );
345
        assert_eq!(
1✔
346
            DataspaceDocument::builder()
1✔
347
                .with_embargo_lift(metadatum_vec_from_string("2000-01-01"))
1✔
348
                .with_mudd_walkin(metadatum_vec_from_string("yes"))
1✔
349
                .with_date_classyear(metadatum_vec_from_string("2012-01-01T00:00:00Z"))
1✔
350
                .build()
1✔
351
                .on_site_only(),
1✔
352
            ThesisAvailability::OnSiteOnly,
UNCOV
353
            "with a specified accession date prior to 2013, it should return OnSiteOnly"
×
354
        );
355

356
        assert_eq!(
1✔
357
            DataspaceDocument::builder()
1✔
358
                .with_location(metadatum_vec_from_string("physical location"))
1✔
359
                .build()
1✔
360
                .on_site_only(),
1✔
361
            ThesisAvailability::AvailableOffSite,
UNCOV
362
            "doc with location field should return AvailableOffSite"
×
363
        );
364
        assert_eq!(
1✔
365
            DataspaceDocument::builder()
1✔
366
                .with_embargo_lift(metadatum_vec_from_string("2000-01-01"))
1✔
367
                .build()
1✔
368
                .on_site_only(),
1✔
369
            ThesisAvailability::AvailableOffSite,
UNCOV
370
            "doc with expired embargo lift field should return AvailableOffSite"
×
371
        );
372
        assert_eq!(
1✔
373
            DataspaceDocument::builder()
1✔
374
                .with_embargo_lift(metadatum_vec_from_string("2000-01-01"))
1✔
375
                .with_mudd_walkin(metadatum_vec_from_string("yes"))
1✔
376
                .build()
1✔
377
                .on_site_only(),
1✔
378
            ThesisAvailability::AvailableOffSite,
UNCOV
379
            "without a specified accession date, it should return AvailableOffSite"
×
380
        );
381
        assert_eq!(
1✔
382
            DataspaceDocument::builder()
1✔
383
                .with_embargo_lift(metadatum_vec_from_string("2000-01-01"))
1✔
384
                .with_mudd_walkin(metadatum_vec_from_string("yes"))
1✔
385
                .with_date_classyear(metadatum_vec_from_string("2013-01-01T00:00:00Z"))
1✔
386
                .build()
1✔
387
                .on_site_only(),
1✔
388
            ThesisAvailability::AvailableOffSite,
UNCOV
389
            "with a specified accession date in 2013, it should return AvailableOffSite"
×
390
        );
391
        assert_eq!(
1✔
392
            DataspaceDocument::builder().build().on_site_only(),
1✔
393
            ThesisAvailability::AvailableOffSite,
UNCOV
394
            "doc with no access-related fields should return AvailableOffSite"
×
395
        );
396
        assert_eq!(
1✔
397
            DataspaceDocument::builder().build().on_site_only(),
1✔
398
            ThesisAvailability::AvailableOffSite
399
        );
400
    }
1✔
401

402
    mod all_authors {
403
        use super::*;
404

405
        #[test]
406
        fn it_includes_author() {
1✔
407
            let document = DataspaceDocument::builder()
1✔
408
                .with_contributor_author(metadatum_vec_from_string("Turing, Alan"))
1✔
409
                .build();
1✔
410
            assert_eq!(document.all_authors(), vec![Some("Turing, Alan".to_owned())]);
1✔
411
        }
1✔
412

413
        #[test]
414
        fn it_includes_normalized_department() {
1✔
415
            let document = DataspaceDocument::builder()
1✔
416
                .with_department(metadatum_vec_from_string("Astrophysical Sciences"))
1✔
417
                .build();
1✔
418
            assert_eq!(
1✔
419
                document.all_authors(),
1✔
420
                vec![Some("Princeton University. Department of Astrophysical Sciences".to_owned())]
1✔
421
            );
422
        }
1✔
423

424
        #[test]
425
        fn it_includes_normalized_certificate() {
1✔
426
            let document = DataspaceDocument::builder()
1✔
427
                .with_certificate(metadatum_vec_from_string("Hellenic Studies Program"))
1✔
428
                .build();
1✔
429
            assert_eq!(
1✔
430
                document.all_authors(),
1✔
431
                vec![Some("Princeton University. Program in Hellenic Studies".to_owned())]
1✔
432
            );
433
        }
1✔
434
    }
435
}
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2025 Coveralls, Inc