• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

pulibrary / bibdata / 6af59736-1eb7-499a-b28b-31234d28f25b

23 Sep 2025 08:17PM UTC coverage: 90.818% (+0.1%) from 90.675%
6af59736-1eb7-499a-b28b-31234d28f25b

Pull #2927

circleci

Ryan Laddusaw
Update Department mapping with feeedback
Pull Request #2927: Index theses from dspace 7+

1340 of 1448 new or added lines in 16 files covered. (92.54%)

41 existing lines in 7 files now uncovered.

8823 of 9715 relevant lines covered (90.82%)

354.65 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

96.63
/lib/bibdata_rs/src/theses/legacy_dataspace/document/normalize.rs
1
// This module is responsible for normalizing data within a DataspaceDocument
2

3
use crate::{
4
    solr::{AccessFacet, ElectronicAccess, LibraryFacet},
5
    theses::{
6
        legacy_dataspace::document::DataspaceDocument,
7
        department,
8
        embargo::{self, Embargo},
9
        holdings::{self, ThesisAvailability},
10
        language, program,
11
    },
12
};
13
use itertools::Itertools;
14
use regex::{Captures, Regex};
15
use std::sync::LazyLock;
16

17
impl DataspaceDocument {
18
    pub fn access_facet(&self) -> Option<AccessFacet> {
26✔
19
        match (self.embargo(), self.on_site_only()) {
26✔
20
            (embargo::Embargo::Current(_), _) => None,
1✔
21
            (_, ThesisAvailability::AvailableOffSite) => Some(AccessFacet::Online),
20✔
22
            (_, ThesisAvailability::OnSiteOnly) => Some(AccessFacet::InTheLibrary),
5✔
23
        }
24
    }
26✔
25

26
    pub fn advanced_location(&self) -> Option<Vec<String>> {
26✔
27
        match self.on_site_only() {
26✔
28
            ThesisAvailability::OnSiteOnly => Some(vec![
6✔
29
                "mudd$stacks".to_owned(),
6✔
30
                "Mudd Manuscript Library".to_owned(),
6✔
31
            ]),
6✔
32
            _ => None,
20✔
33
        }
34
    }
26✔
35

36
    pub fn all_authors(&self) -> Vec<String> {
29✔
37
        let mut authors = match &self.contributor_author {
29✔
38
            Some(authors) => authors.clone(),
3✔
39
            None => Vec::new(),
26✔
40
        };
41
        authors.extend(self.contributor_advisor.clone().unwrap_or_default());
29✔
42
        authors.extend(self.contributor.clone().unwrap_or_default());
29✔
43
        authors.extend(
29✔
44
            self.department
29✔
45
                .clone()
29✔
46
                .unwrap_or_default()
29✔
47
                .iter()
29✔
48
                .filter_map(|dept| department::map_department(dept)),
29✔
49
        );
50
        authors.extend(
29✔
51
            self.certificate
29✔
52
                .clone()
29✔
53
                .unwrap_or_default()
29✔
54
                .iter()
29✔
55
                .filter_map(|program| program::map_program(program)),
29✔
56
        );
57
        authors
29✔
58
    }
29✔
59

60
    pub fn ark_hash(&self) -> Option<ElectronicAccess> {
29✔
61
        holdings::dataspace_url_with_metadata(
29✔
62
            self.identifier_uri.as_ref(),
29✔
63
            self.location.is_some(),
29✔
64
            self.rights_access_rights.is_some(),
29✔
65
            self.walkin_is_yes(),
29✔
66
            match &self.date_classyear {
29✔
67
                Some(class_year) => &class_year,
13✔
68
                None => &[],
16✔
69
            },
70
            self.embargo(),
29✔
71
        )
72
    }
29✔
73

74
    pub fn authorized_ceritificates(&self) -> Option<Vec<String>> {
26✔
75
        self.certificate.as_ref().map(|certificates| {
26✔
76
            certificates
3✔
77
                .iter()
3✔
78
                .filter_map(|certificate| program::map_program(certificate))
4✔
79
                .collect()
3✔
80
        })
3✔
81
    }
26✔
82

83
    pub fn authorized_departments(&self) -> Option<Vec<String>> {
26✔
84
        self.department.as_ref().map(|departments| {
26✔
85
            departments
5✔
86
                .iter()
5✔
87
                .filter_map(|department| department::map_department(department))
8✔
88
                .collect()
5✔
89
        })
5✔
90
    }
26✔
91

92
    pub fn call_number(&self) -> String {
52✔
93
        holdings::call_number(self.identifier_other.as_ref())
52✔
94
    }
52✔
95

96
    pub fn class_year(&self) -> Option<i16> {
78✔
97
        let years = self.date_classyear.clone().unwrap_or_default();
78✔
98
        let year = years.first()?;
78✔
99
        year.parse::<i16>().ok()
30✔
100
    }
78✔
101

102
    pub fn languages(&self) -> Vec<String> {
52✔
103
        language::codes_to_english_names(self.language_iso.clone())
52✔
104
    }
52✔
105

106
    pub fn location(&self) -> Option<LibraryFacet> {
26✔
107
        match self.on_site_only() {
26✔
108
            ThesisAvailability::OnSiteOnly => Some(LibraryFacet::Mudd),
6✔
109
            _ => None,
20✔
110
        }
111
    }
26✔
112

113
    pub fn location_display(&self) -> Option<String> {
26✔
114
        match self.on_site_only() {
26✔
115
            ThesisAvailability::OnSiteOnly => Some("Mudd Manuscript Library".to_owned()),
6✔
116
            _ => None,
20✔
117
        }
118
    }
26✔
119

120
    pub fn location_code(&self) -> Option<String> {
26✔
121
        match self.on_site_only() {
26✔
122
            ThesisAvailability::OnSiteOnly => Some("mudd$stacks".to_owned()),
6✔
123
            _ => None,
20✔
124
        }
125
    }
26✔
126

127
    pub fn on_site_only(&self) -> ThesisAvailability {
191✔
128
        holdings::on_site_only(
191✔
129
            self.location.is_some(),
191✔
130
            self.rights_access_rights.is_some(),
191✔
131
            self.walkin_is_yes(),
191✔
132
            match &self.date_classyear {
191✔
133
                Some(class_year) => &class_year,
72✔
134
                None => &[],
119✔
135
            },
136
            self.embargo(),
191✔
137
        )
138
    }
191✔
139

140
    pub fn online_portfolio_statements(&self) -> Option<String> {
26✔
141
        if self.on_site_only() == ThesisAvailability::OnSiteOnly
26✔
142
            || matches!(self.embargo(), Embargo::Current(_))
20✔
143
        {
144
            None
6✔
145
        } else {
146
            holdings::online_holding_string(self.identifier_other.as_ref())
20✔
147
        }
148
    }
26✔
149

150
    pub fn physical_holding_string(&self) -> Option<String> {
26✔
151
        match self.on_site_only() {
26✔
152
            ThesisAvailability::AvailableOffSite => None,
20✔
153
            ThesisAvailability::OnSiteOnly => {
154
                holdings::physical_holding_string(self.identifier_other.as_ref())
6✔
155
            }
156
        }
157
    }
26✔
158

159
    pub fn restrictions_note_display(&self) -> Option<Vec<String>> {
26✔
160
        match &self.rights_access_rights {
26✔
161
            Some(rights) => rights.first().map(|s| vec![s.clone()]),
7✔
162
            None => {
163
                if self.walkin_is_yes() {
19✔
164
                    Some(vec!["Walk-in Access. This thesis can only be viewed on computer terminals at the '<a href=\"http://mudd.princeton.edu\">Mudd Manuscript Library</a>.".to_owned()])
1✔
165
                } else {
166
                    match self.embargo() {
18✔
NEW
167
                    Embargo::Current(text) => Some(vec![text]),
×
168
                    Embargo::None => None,
16✔
NEW
169
                    Embargo::Expired => None,
×
170
                    Embargo::Invalid => Some(vec![
2✔
171
                        format!("This content is currently under embargo. For more information contact the <a href=\"mailto:dspadmin@princeton.edu?subject=Regarding embargoed DataSpace Item 88435/{}\"> Mudd Manuscript Library</a>.", self.id.clone().unwrap_or_default())
2✔
172
                    ]),
2✔
173
                }
174
                }
175
            }
176
        }
177
    }
26✔
178

179
    /// Take first title, strip out latex expressions when present to include along
180
    /// with non-normalized version (allowing users to get matches both when LaTex
181
    /// is pasted directly into the search box and when sub/superscripts are placed
182
    /// adjacent to regular characters
183
    pub fn title_search_versions(&self) -> Option<Vec<String>> {
26✔
184
        match &self.title {
26✔
185
            Some(titles) => titles.first().map(|title| {
2✔
186
                vec![title.to_string(), normalize_latex(title)]
2✔
187
                    .into_iter()
2✔
188
                    .unique()
2✔
189
                    .collect()
2✔
190
            }),
2✔
191
            None => None,
24✔
192
        }
193
    }
26✔
194

195
    fn embargo(&self) -> embargo::Embargo {
284✔
196
        embargo::Embargo::from_dates(
284✔
197
            self.embargo_lift.as_ref(),
284✔
198
            self.embargo_terms.as_ref(),
284✔
199
            self.id.as_ref().map_or("", |v| v),
284✔
200
        )
201
    }
284✔
202

203
    fn walkin_is_yes(&self) -> bool {
239✔
204
        matches!(&self.mudd_walkin, Some(vec) if vec.first().is_some_and(|walkin| walkin == "yes"))
12✔
205
    }
239✔
206
}
207

208
fn normalize_latex(original: &str) -> String {
3✔
209
    static LATEX_REGEX: LazyLock<Regex> = LazyLock::new(|| Regex::new(r"\\\(.*?\\\)").unwrap());
1✔
210
    LATEX_REGEX
3✔
211
        .replace_all(original, |captures: &Captures| {
3✔
212
            captures[0]
2✔
213
                .chars()
2✔
214
                .filter(|c| c.is_alphanumeric())
17✔
215
                .collect::<String>()
2✔
216
        })
2✔
217
        .to_string()
3✔
218
}
3✔
219

220
#[cfg(test)]
221
mod tests {
222
    use super::*;
223
    #[test]
224
    fn it_normalizes_latex() {
1✔
225
        assert_eq!(
1✔
226
            normalize_latex("2D \\(^{1}\\)H-\\(^{14}\\)N HSQC inverse-detection experiments"),
1✔
227
            "2D 1H-14N HSQC inverse-detection experiments"
228
        );
229
    }
1✔
230

231
    #[test]
232
    fn ark_hash_gets_the_ark_with_fulltext_link_display_when_restrictions() {
1✔
233
        let metadata = DataspaceDocument::builder()
1✔
234
            .with_id("dsp01b2773v788")
1✔
235
            .with_description_abstract("Summary")
1✔
236
            .with_contributor("Wolff, Tamsen".to_string())
1✔
237
            .with_contributor_advisor("Sandberg, Robert".to_string())
1✔
238
            .with_contributor_author("Clark, Hillary".to_string())
1✔
239
            .with_date_classyear("2014")
1✔
240
            .with_department("Princeton University. Department of English")
1✔
241
            .with_department("Princeton University. Program in Theater")
1✔
242
            .with_identifier_uri("http://arks.princeton.edu/ark:/88435/dsp01b2773v788")
1✔
243
            .with_format_extent("102 pages")
1✔
244
            .with_language_iso("en_US")
1✔
245
            .with_title("Dysfunction: A Play in One Act")
1✔
246
            .build();
1✔
247

248
        assert_eq!(
1✔
249
            metadata.ark_hash().unwrap(),
1✔
250
            ElectronicAccess {
1✔
251
                url: "http://arks.princeton.edu/ark:/88435/dsp01b2773v788".to_owned(),
1✔
252
                link_text: "Theses Central".to_owned(),
1✔
253
                link_description: Some("Full text".to_owned()),
1✔
254
                iiif_manifest_paths: None,
1✔
255
                digital_content: None
1✔
256
            }
1✔
257
        );
258
    }
1✔
259

260
    #[test]
261
    fn ark_hash_gets_the_ark_with_fulltext_link_display_when_no_restrictions() {
1✔
262
        let metadata = DataspaceDocument::builder()
1✔
263
            .with_id("dsp01b2773v788")
1✔
264
            .with_description_abstract("Summary")
1✔
265
            .with_contributor("Wolff, Tamsen".to_string())
1✔
266
            .with_contributor_advisor("Sandberg, Robert".to_string())
1✔
267
            .with_contributor_author("Clark, Hillary".to_string())
1✔
268
            .with_date_classyear("2014")
1✔
269
            .with_department("Princeton University. Department of English")
1✔
270
            .with_department("Princeton University. Program in Theater")
1✔
271
            .with_identifier_uri("http://arks.princeton.edu/ark:/88435/dsp01b2773v788")
1✔
272
            .with_format_extent("102 pages")
1✔
273
            .with_language_iso("en_US")
1✔
274
            .with_title("Dysfunction: A Play in One Act")
1✔
275
            .build();
1✔
276

277
        assert_eq!(
1✔
278
            metadata.ark_hash().unwrap(),
1✔
279
            ElectronicAccess {
1✔
280
                url: "http://arks.princeton.edu/ark:/88435/dsp01b2773v788".to_owned(),
1✔
281
                link_text: "Theses Central".to_owned(),
1✔
282
                link_description: Some("Full text".to_owned()),
1✔
283
                iiif_manifest_paths: None,
1✔
284
                digital_content: None
1✔
285
            }
1✔
286
        );
287
    }
1✔
288

289
    #[test]
290
    fn ark_hash_returns_none_when_no_url() {
1✔
291
        let metadata = DataspaceDocument::builder()
1✔
292
            .with_id("dsp01b2773v788")
1✔
293
            .with_description_abstract("Summary")
1✔
294
            .with_contributor("Wolff, Tamsen".to_string())
1✔
295
            .with_contributor_advisor("Sandberg, Robert".to_string())
1✔
296
            .with_contributor_author("Clark, Hillary".to_string())
1✔
297
            .with_date_classyear("2014")
1✔
298
            .with_department("Princeton University. Department of English")
1✔
299
            .with_department("Princeton University. Program in Theater")
1✔
300
            .with_format_extent("102 pages".to_string())
1✔
301
            .with_language_iso("en_US")
1✔
302
            .with_title("Dysfunction: A Play in One Act")
1✔
303
            .build();
1✔
304

305
        assert_eq!(metadata.ark_hash(), None);
1✔
306
    }
1✔
307

308
    #[test]
309
    fn on_site_only() {
1✔
310
        assert_eq!(
1✔
311
            DataspaceDocument::builder()
1✔
312
                .with_embargo_terms("2100-01-01")
1✔
313
                .build()
1✔
314
                .on_site_only(),
1✔
315
            ThesisAvailability::OnSiteOnly,
NEW
316
            "doc with embargo terms field should return OnSiteOnly"
×
317
        );
318
        assert_eq!(
1✔
319
            DataspaceDocument::builder()
1✔
320
                .with_embargo_lift("2100-01-01")
1✔
321
                .build()
1✔
322
                .on_site_only(),
1✔
323
            ThesisAvailability::OnSiteOnly,
NEW
324
            "doc with embargo lift field should return OnSiteOnly"
×
325
        );
326
        assert_eq!(
1✔
327
            DataspaceDocument::builder()
1✔
328
                .with_embargo_lift("2000-01-01")
1✔
329
                .with_mudd_walkin("yes")
1✔
330
                .with_date_classyear("2012-01-01T00:00:00Z")
1✔
331
                .build()
1✔
332
                .on_site_only(),
1✔
333
            ThesisAvailability::OnSiteOnly,
NEW
334
            "with a specified accession date prior to 2013, it should return OnSiteOnly"
×
335
        );
336

337
        assert_eq!(
1✔
338
            DataspaceDocument::builder()
1✔
339
                .with_location("physical location")
1✔
340
                .build()
1✔
341
                .on_site_only(),
1✔
342
            ThesisAvailability::AvailableOffSite,
NEW
343
            "doc with location field should return AvailableOffSite"
×
344
        );
345
        assert_eq!(
1✔
346
            DataspaceDocument::builder()
1✔
347
                .with_embargo_lift("2000-01-01")
1✔
348
                .build()
1✔
349
                .on_site_only(),
1✔
350
            ThesisAvailability::AvailableOffSite,
NEW
351
            "doc with expired embargo lift field should return AvailableOffSite"
×
352
        );
353
        assert_eq!(
1✔
354
            DataspaceDocument::builder()
1✔
355
                .with_embargo_lift("2000-01-01")
1✔
356
                .with_mudd_walkin("yes")
1✔
357
                .build()
1✔
358
                .on_site_only(),
1✔
359
            ThesisAvailability::AvailableOffSite,
NEW
360
            "without a specified accession date, it should return AvailableOffSite"
×
361
        );
362
        assert_eq!(
1✔
363
            DataspaceDocument::builder()
1✔
364
                .with_embargo_lift("2000-01-01")
1✔
365
                .with_mudd_walkin("yes")
1✔
366
                .with_date_classyear("2013-01-01T00:00:00Z")
1✔
367
                .build()
1✔
368
                .on_site_only(),
1✔
369
            ThesisAvailability::AvailableOffSite,
NEW
370
            "with a specified accession date in 2013, it should return AvailableOffSite"
×
371
        );
372
        assert_eq!(
1✔
373
            DataspaceDocument::builder().build().on_site_only(),
1✔
374
            ThesisAvailability::AvailableOffSite,
NEW
375
            "doc with no access-related fields should return AvailableOffSite"
×
376
        );
377
        assert_eq!(
1✔
378
            DataspaceDocument::builder().build().on_site_only(),
1✔
379
            ThesisAvailability::AvailableOffSite
380
        );
381
    }
1✔
382

383
    mod all_authors {
384
        use super::*;
385

386
        #[test]
387
        fn it_includes_author() {
1✔
388
            let document = DataspaceDocument::builder()
1✔
389
                .with_contributor_author("Turing, Alan")
1✔
390
                .build();
1✔
391
            assert_eq!(document.all_authors(), vec!["Turing, Alan".to_owned()]);
1✔
392
        }
1✔
393

394
        #[test]
395
        fn it_includes_normalized_department() {
1✔
396
            let document = DataspaceDocument::builder()
1✔
397
                .with_department("Astrophysical Sciences")
1✔
398
                .build();
1✔
399
            assert_eq!(
1✔
400
                document.all_authors(),
1✔
401
                vec!["Princeton University. Department of Astrophysical Sciences".to_owned()]
1✔
402
            );
403
        }
1✔
404

405
        #[test]
406
        fn it_includes_normalized_certificate() {
1✔
407
            let document = DataspaceDocument::builder()
1✔
408
                .with_certificate("Hellenic Studies Program")
1✔
409
                .build();
1✔
410
            assert_eq!(
1✔
411
                document.all_authors(),
1✔
412
                vec!["Princeton University. Program in Hellenic Studies".to_owned()]
1✔
413
            );
414
        }
1✔
415
    }
416
}
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2025 Coveralls, Inc