• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

pulibrary / bibdata / 6af59736-1eb7-499a-b28b-31234d28f25b

23 Sep 2025 08:17PM UTC coverage: 90.818% (+0.1%) from 90.675%
6af59736-1eb7-499a-b28b-31234d28f25b

Pull #2927

circleci

Ryan Laddusaw
Update Department mapping with feeedback
Pull Request #2927: Index theses from dspace 7+

1340 of 1448 new or added lines in 16 files covered. (92.54%)

41 existing lines in 7 files now uncovered.

8823 of 9715 relevant lines covered (90.82%)

354.65 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

96.67
/lib/bibdata_rs/src/theses/dataspace/document/normalize.rs
1
// This module is responsible for normalizing data within a DataspaceDocument
2

3
use crate::{
4
    solr::{AccessFacet, ElectronicAccess, LibraryFacet},
5
    theses::{
6
        dataspace::document::DataspaceDocument,
7
        department,
8
        embargo::{self, Embargo},
9
        holdings::{self, ThesisAvailability},
10
        language, program,
11
    },
12
};
13
use itertools::Itertools;
14
use regex::{Captures, Regex};
15
use std::sync::LazyLock;
16

17
impl DataspaceDocument {
18
    pub fn access_facet(&self) -> Option<AccessFacet> {
26✔
19
        match (self.embargo(), self.on_site_only()) {
26✔
20
            (embargo::Embargo::Current(_), _) => None,
1✔
21
            (_, ThesisAvailability::AvailableOffSite) => Some(AccessFacet::Online),
20✔
22
            (_, ThesisAvailability::OnSiteOnly) => Some(AccessFacet::InTheLibrary),
5✔
23
        }
24
    }
26✔
25

26
    pub fn advanced_location(&self) -> Option<Vec<String>> {
26✔
27
        match self.on_site_only() {
26✔
28
            ThesisAvailability::OnSiteOnly => Some(vec![
6✔
29
                "mudd$stacks".to_owned(),
6✔
30
                "Mudd Manuscript Library".to_owned(),
6✔
31
            ]),
6✔
32
            _ => None,
20✔
33
        }
34
    }
26✔
35

36
    pub fn all_authors(&self) -> Vec<String> {
29✔
37
        let mut authors = match &self.contributor_author {
29✔
38
            Some(authors) => authors.clone(),
3✔
39
            None => Vec::new(),
26✔
40
        };
41
        authors.extend(self.contributor_advisor.clone().unwrap_or_default());
29✔
42
        authors.extend(self.contributor.clone().unwrap_or_default());
29✔
43
        authors.extend(
29✔
44
            self.department
29✔
45
                .clone()
29✔
46
                .unwrap_or_default()
29✔
47
                .iter()
29✔
48
                .filter_map(|dept| department::map_department(&dept)),
29✔
49
        );
50
        authors.extend(
29✔
51
            self.certificate
29✔
52
                .clone()
29✔
53
                .unwrap_or_default()
29✔
54
                .iter()
29✔
55
                .filter_map(|program| program::map_program(&program)),
29✔
56
        );
57
        authors
29✔
58
    }
29✔
59

60
    pub fn ark_hash(&self) -> Option<ElectronicAccess> {
29✔
61
        holdings::dataspace_url_with_metadata(
29✔
62
            self.identifier_uri.clone().as_ref(),
29✔
63
            self.location.is_some(),
29✔
64
            self.rights_access_rights.is_some(),
29✔
65
            self.walkin_is_yes(),
29✔
66
            match &self.date_classyear {
29✔
67
                Some(class_year) => Some(class_year)?,
13✔
68
                None => &[],
16✔
69
            },
70
            self.embargo(),
29✔
71
        )
72
    }
29✔
73

74
    pub fn authorized_ceritificates(&self) -> Option<Vec<String>> {
26✔
75
        self.certificate.as_ref().map(|certificates| {
26✔
76
            certificates
3✔
77
                .iter()
3✔
78
                .filter_map(|certificate| program::map_program(certificate))
4✔
79
                .collect()
3✔
80
        })
3✔
81
    }
26✔
82

83
    pub fn authorized_departments(&self) -> Option<Vec<String>> {
26✔
84
        self.department.as_ref().map(|departments| {
26✔
85
            departments
5✔
86
                .iter()
5✔
87
                .filter_map(|department| department::map_department(department))
8✔
88
                .collect()
5✔
89
        })
5✔
90
    }
26✔
91

92
    pub fn call_number(&self) -> String {
52✔
93
        holdings::call_number(self.identifier_other.clone().as_ref())
52✔
94
    }
52✔
95

96
    pub fn class_year(&self) -> Option<i16> {
78✔
97
        let years = self.date_classyear.clone().unwrap_or_default();
78✔
98
        let year = years.first()?;
78✔
99
        year.parse::<i16>().ok()
30✔
100
    }
78✔
101

102
    pub fn languages(&self) -> Vec<String> {
52✔
103
        language::codes_to_english_names(self.language_iso.clone())
52✔
104
    }
52✔
105

106
    pub fn location(&self) -> Option<LibraryFacet> {
26✔
107
        match self.on_site_only() {
26✔
108
            ThesisAvailability::OnSiteOnly => Some(LibraryFacet::Mudd),
6✔
109
            _ => None,
20✔
110
        }
111
    }
26✔
112

113
    pub fn location_display(&self) -> Option<String> {
26✔
114
        match self.on_site_only() {
26✔
115
            ThesisAvailability::OnSiteOnly => Some("Mudd Manuscript Library".to_owned()),
6✔
116
            _ => None,
20✔
117
        }
118
    }
26✔
119

120
    pub fn location_code(&self) -> Option<String> {
26✔
121
        match self.on_site_only() {
26✔
122
            ThesisAvailability::OnSiteOnly => Some("mudd$stacks".to_owned()),
6✔
123
            _ => None,
20✔
124
        }
125
    }
26✔
126

127
    pub fn on_site_only(&self) -> ThesisAvailability {
191✔
128
        holdings::on_site_only(
191✔
129
            self.location.is_some(),
191✔
130
            self.rights_access_rights.is_some(),
191✔
131
            self.walkin_is_yes(),
191✔
132
            match &self.date_classyear {
191✔
133
                Some(class_year) => class_year,
72✔
134
                None => &[],
119✔
135
            },
136
            self.embargo(),
191✔
137
        )
138
    }
191✔
139

140
    pub fn online_portfolio_statements(&self) -> Option<String> {
26✔
141
        if self.on_site_only() == ThesisAvailability::OnSiteOnly
26✔
142
            || matches!(self.embargo(), Embargo::Current(_))
20✔
143
        {
144
            None
6✔
145
        } else {
146
            holdings::online_holding_string(self.identifier_other.as_ref())
20✔
147
        }
148
    }
26✔
149

150
    pub fn physical_holding_string(&self) -> Option<String> {
26✔
151
        match self.on_site_only() {
26✔
152
            ThesisAvailability::AvailableOffSite => None,
20✔
153
            ThesisAvailability::OnSiteOnly => {
154
                holdings::physical_holding_string(self.identifier_other.as_ref())
6✔
155
            }
156
        }
157
    }
26✔
158

159
    pub fn restrictions_note_display(&self) -> Option<Vec<String>> {
26✔
160
        match &self.rights_access_rights {
26✔
161
            Some(rights) => rights.first().map(|s| vec![s.clone()]),
7✔
162
            None => {
163
                if self.walkin_is_yes() {
19✔
164
                    Some(vec!["Walk-in Access. This thesis can only be viewed on computer terminals at the '<a href=\"http://mudd.princeton.edu\">Mudd Manuscript Library</a>.".to_owned()])
1✔
165
                } else {
166
                    match self.embargo() {
18✔
167
                    Embargo::Current(text) => Some(vec![text]),
×
168
                    Embargo::None => None,
16✔
169
                    Embargo::Expired => None,
×
170
                    Embargo::Invalid => Some(vec![
2✔
171
                        format!("This content is currently under embargo. For more information contact the <a href=\"mailto:dspadmin@princeton.edu?subject=Regarding embargoed DataSpace Item 88435/{}\"> Mudd Manuscript Library</a>.", self.id.clone().unwrap_or_default())
2✔
172
                    ]),
2✔
173
                }
174
                }
175
            }
176
        }
177
    }
26✔
178

179
    /// Take first title, strip out latex expressions when present to include along
180
    /// with non-normalized version (allowing users to get matches both when LaTex
181
    /// is pasted directly into the search box and when sub/superscripts are placed
182
    /// adjacent to regular characters
183
    pub fn title_search_versions(&self) -> Option<Vec<String>> {
26✔
184
        match &self.title {
26✔
185
            Some(titles) => titles.first().map(|title| {
2✔
186
                vec![title.to_string(), normalize_latex(title)]
2✔
187
                    .into_iter()
2✔
188
                    .unique()
2✔
189
                    .collect()
2✔
190
            }),
2✔
191
            None => None,
24✔
192
        }
193
    }
26✔
194

195
    fn embargo(&self) -> embargo::Embargo {
284✔
196
        embargo::Embargo::from_dates(
284✔
197
            self.embargo_lift.as_ref(),
284✔
198
            self.embargo_terms.as_ref(),
284✔
199
            self.id.as_ref().map_or("", |v| v),
284✔
200
        )
201
    }
284✔
202

203
    fn walkin_is_yes(&self) -> bool {
239✔
204
        matches!(&self.mudd_walkin, Some(vec) if vec.first().is_some_and(|walkin| walkin == "yes"))
12✔
205
    }
239✔
206
}
207

208
fn normalize_latex(original: &str) -> String {
3✔
209
    static LATEX_REGEX: LazyLock<Regex> = LazyLock::new(|| Regex::new(r"\\\(.*?\\\)").unwrap());
1✔
210
    LATEX_REGEX
3✔
211
        .replace_all(original, |captures: &Captures| {
3✔
212
            captures[0]
2✔
213
                .chars()
2✔
214
                .filter(|c| c.is_alphanumeric())
17✔
215
                .collect::<String>()
2✔
216
        })
2✔
217
        .to_string()
3✔
218
}
3✔
219

220
#[cfg(test)]
221
mod tests {
222
    use super::*;
223
    use crate::theses::dataspace::document::Metadatum;
224

225
    fn metadatum_vec_from_string(value: &str) -> Vec<Metadatum> {
47✔
226
        vec![Metadatum { value: Some(value.to_string()) }]
47✔
227
    }
47✔
228

229
    #[test]
230
    fn it_normalizes_latex() {
1✔
231
        assert_eq!(
1✔
232
            normalize_latex("2D \\(^{1}\\)H-\\(^{14}\\)N HSQC inverse-detection experiments"),
1✔
233
            "2D 1H-14N HSQC inverse-detection experiments"
234
        );
235
    }
1✔
236

237
    #[test]
238
    fn ark_hash_gets_the_ark_with_fulltext_link_display_when_restrictions() {
1✔
239
        let metadata = DataspaceDocument::builder()
1✔
240
            .with_id("dsp01b2773v788")
1✔
241
            .with_description_abstract(metadatum_vec_from_string("Summary"))
1✔
242
            .with_contributor(metadatum_vec_from_string("Wolff, Tamsen"))
1✔
243
            .with_contributor_advisor(metadatum_vec_from_string("Sandberg, Robert"))
1✔
244
            .with_contributor_author(metadatum_vec_from_string("Clark, Hillary"))
1✔
245
            .with_date_classyear(metadatum_vec_from_string("2014"))
1✔
246
            .with_department(metadatum_vec_from_string("Princeton University. Department of English"))
1✔
247
            .with_department(metadatum_vec_from_string("Princeton University. Program in Theater"))
1✔
248
            .with_identifier_uri(metadatum_vec_from_string("http://arks.princeton.edu/ark:/88435/dsp01b2773v788"))
1✔
249
            .with_format_extent(metadatum_vec_from_string("102 pages"))
1✔
250
            .with_language_iso(metadatum_vec_from_string("en_US"))
1✔
251
            .with_title(metadatum_vec_from_string("Dysfunction: A Play in One Act"))
1✔
252
            .build();
1✔
253

254
        assert_eq!(
1✔
255
            metadata.ark_hash().unwrap(),
1✔
256
            ElectronicAccess {
1✔
257
                url: "http://arks.princeton.edu/ark:/88435/dsp01b2773v788".to_owned(),
1✔
258
                link_text: "Theses Central".to_owned(),
1✔
259
                link_description: Some("Full text".to_owned()),
1✔
260
                iiif_manifest_paths: None,
1✔
261
                digital_content: None
1✔
262
            }
1✔
263
        );
264
    }
1✔
265

266
    #[test]
267
    fn ark_hash_gets_the_ark_with_fulltext_link_display_when_no_restrictions() {
1✔
268
        let metadata = DataspaceDocument::builder()
1✔
269
            .with_id("dsp01b2773v788")
1✔
270
            .with_description_abstract(metadatum_vec_from_string("Summary"))
1✔
271
            .with_contributor(metadatum_vec_from_string("Wolff, Tamsen"))
1✔
272
            .with_contributor_advisor(metadatum_vec_from_string("Sandberg, Robert"))
1✔
273
            .with_contributor_author(metadatum_vec_from_string("Clark, Hillary"))
1✔
274
            .with_date_classyear(metadatum_vec_from_string("2014"))
1✔
275
            .with_department(metadatum_vec_from_string("Princeton University. Department of English"))
1✔
276
            .with_department(metadatum_vec_from_string("Princeton University. Program in Theater"))
1✔
277
            .with_identifier_uri(metadatum_vec_from_string("http://arks.princeton.edu/ark:/88435/dsp01b2773v788"))
1✔
278
            .with_format_extent(metadatum_vec_from_string("102 pages"))
1✔
279
            .with_language_iso(metadatum_vec_from_string("en_US"))
1✔
280
            .with_title(metadatum_vec_from_string("Dysfunction: A Play in One Act"))
1✔
281
            .build();
1✔
282

283
        assert_eq!(
1✔
284
            metadata.ark_hash().unwrap(),
1✔
285
            ElectronicAccess {
1✔
286
                url: "http://arks.princeton.edu/ark:/88435/dsp01b2773v788".to_owned(),
1✔
287
                link_text: "Theses Central".to_owned(),
1✔
288
                link_description: Some("Full text".to_owned()),
1✔
289
                iiif_manifest_paths: None,
1✔
290
                digital_content: None
1✔
291
            }
1✔
292
        );
293
    }
1✔
294

295
    #[test]
296
    fn ark_hash_returns_none_when_no_url() {
1✔
297
        let metadata = DataspaceDocument::builder()
1✔
298
            .with_id("dsp01b2773v788")
1✔
299
            .with_description_abstract(metadatum_vec_from_string("Summary"))
1✔
300
            .with_contributor(metadatum_vec_from_string("Wolff, Tamsen"))
1✔
301
            .with_contributor_advisor(metadatum_vec_from_string("Sandberg, Robert"))
1✔
302
            .with_contributor_author(metadatum_vec_from_string("Clark, Hillary"))
1✔
303
            .with_date_classyear(metadatum_vec_from_string("2014"))
1✔
304
            .with_department(metadatum_vec_from_string("Princeton University. Department of English"))
1✔
305
            .with_department(metadatum_vec_from_string("Princeton University. Program in Theater"))
1✔
306
            .with_format_extent(metadatum_vec_from_string("102 pages"))
1✔
307
            .with_language_iso(metadatum_vec_from_string("en_US"))
1✔
308
            .with_title(metadatum_vec_from_string("Dysfunction: A Play in One Act"))
1✔
309
            .build();
1✔
310

311
        assert_eq!(metadata.ark_hash(), None);
1✔
312
    }
1✔
313

314
    #[test]
315
    fn on_site_only() {
1✔
316
        assert_eq!(
1✔
317
            DataspaceDocument::builder()
1✔
318
                .with_embargo_terms(metadatum_vec_from_string("2100-01-01"))
1✔
319
                .build()
1✔
320
                .on_site_only(),
1✔
321
            ThesisAvailability::OnSiteOnly,
UNCOV
322
            "doc with embargo terms field should return OnSiteOnly"
×
323
        );
324
        assert_eq!(
1✔
325
            DataspaceDocument::builder()
1✔
326
                .with_embargo_lift(metadatum_vec_from_string("2100-01-01"))
1✔
327
                .build()
1✔
328
                .on_site_only(),
1✔
329
            ThesisAvailability::OnSiteOnly,
UNCOV
330
            "doc with embargo lift field should return OnSiteOnly"
×
331
        );
332
        assert_eq!(
1✔
333
            DataspaceDocument::builder()
1✔
334
                .with_embargo_lift(metadatum_vec_from_string("2000-01-01"))
1✔
335
                .with_mudd_walkin(metadatum_vec_from_string("yes"))
1✔
336
                .with_date_classyear(metadatum_vec_from_string("2012-01-01T00:00:00Z"))
1✔
337
                .build()
1✔
338
                .on_site_only(),
1✔
339
            ThesisAvailability::OnSiteOnly,
UNCOV
340
            "with a specified accession date prior to 2013, it should return OnSiteOnly"
×
341
        );
342

343
        assert_eq!(
1✔
344
            DataspaceDocument::builder()
1✔
345
                .with_location(metadatum_vec_from_string("physical location"))
1✔
346
                .build()
1✔
347
                .on_site_only(),
1✔
348
            ThesisAvailability::AvailableOffSite,
UNCOV
349
            "doc with location field should return AvailableOffSite"
×
350
        );
351
        assert_eq!(
1✔
352
            DataspaceDocument::builder()
1✔
353
                .with_embargo_lift(metadatum_vec_from_string("2000-01-01"))
1✔
354
                .build()
1✔
355
                .on_site_only(),
1✔
356
            ThesisAvailability::AvailableOffSite,
UNCOV
357
            "doc with expired embargo lift field should return AvailableOffSite"
×
358
        );
359
        assert_eq!(
1✔
360
            DataspaceDocument::builder()
1✔
361
                .with_embargo_lift(metadatum_vec_from_string("2000-01-01"))
1✔
362
                .with_mudd_walkin(metadatum_vec_from_string("yes"))
1✔
363
                .build()
1✔
364
                .on_site_only(),
1✔
365
            ThesisAvailability::AvailableOffSite,
UNCOV
366
            "without a specified accession date, it should return AvailableOffSite"
×
367
        );
368
        assert_eq!(
1✔
369
            DataspaceDocument::builder()
1✔
370
                .with_embargo_lift(metadatum_vec_from_string("2000-01-01"))
1✔
371
                .with_mudd_walkin(metadatum_vec_from_string("yes"))
1✔
372
                .with_date_classyear(metadatum_vec_from_string("2013-01-01T00:00:00Z"))
1✔
373
                .build()
1✔
374
                .on_site_only(),
1✔
375
            ThesisAvailability::AvailableOffSite,
UNCOV
376
            "with a specified accession date in 2013, it should return AvailableOffSite"
×
377
        );
378
        assert_eq!(
1✔
379
            DataspaceDocument::builder().build().on_site_only(),
1✔
380
            ThesisAvailability::AvailableOffSite,
UNCOV
381
            "doc with no access-related fields should return AvailableOffSite"
×
382
        );
383
        assert_eq!(
1✔
384
            DataspaceDocument::builder().build().on_site_only(),
1✔
385
            ThesisAvailability::AvailableOffSite
386
        );
387
    }
1✔
388

389
    mod all_authors {
390
        use super::*;
391

392
        #[test]
393
        fn it_includes_author() {
1✔
394
            let document = DataspaceDocument::builder()
1✔
395
                .with_contributor_author(metadatum_vec_from_string("Turing, Alan"))
1✔
396
                .build();
1✔
397
            assert_eq!(document.all_authors(), vec!["Turing, Alan".to_owned()]);
1✔
398
        }
1✔
399

400
        #[test]
401
        fn it_includes_normalized_department() {
1✔
402
            let document = DataspaceDocument::builder()
1✔
403
                .with_department(metadatum_vec_from_string("Astrophysical Sciences"))
1✔
404
                .build();
1✔
405
            assert_eq!(
1✔
406
                document.all_authors(),
1✔
407
                vec!["Princeton University. Department of Astrophysical Sciences".to_owned()]
1✔
408
            );
409
        }
1✔
410

411
        #[test]
412
        fn it_includes_normalized_certificate() {
1✔
413
            let document = DataspaceDocument::builder()
1✔
414
                .with_certificate(metadatum_vec_from_string("Hellenic Studies Program"))
1✔
415
                .build();
1✔
416
            assert_eq!(
1✔
417
                document.all_authors(),
1✔
418
                vec!["Princeton University. Program in Hellenic Studies".to_owned()]
1✔
419
            );
420
        }
1✔
421
    }
422
}
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2025 Coveralls, Inc