• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

pulibrary / bibdata / 49fb6d5d-e981-4ec8-b09b-2a5f44a3120c

23 Sep 2025 08:49PM UTC coverage: 89.659% (-0.03%) from 89.687%
49fb6d5d-e981-4ec8-b09b-2a5f44a3120c

Pull #2938

circleci

sandbergja
Move comment to the correct file
Pull Request #2938: New implementation of cjk_notes field in rust

37 of 119 new or added lines in 3 files covered. (31.09%)

2 existing lines in 1 file now uncovered.

7838 of 8742 relevant lines covered (89.66%)

376.52 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

32.74
/lib/bibdata_rs/src/marc/cjk.rs
1
// This module handles the indexing logic for CJK (Chinese, Japanese, and
2
// Korean) fields.  These fields share specialized CJK analysis logic in
3
// our solr configuration, so we consider them as a group here.
4

5
use itertools::Itertools;
6
use marctk::Record;
7
use unicode_blocks::is_cjk;
8

9
pub fn notes_cjk(record: &Record) -> impl Iterator<Item = String> + use<'_> {
2✔
10
    // These notes are supposedly in Latin script, but still may contain some
11
    // CJK characters
12
    let latin_script_note_fields = record.extract_fields(500..=599);
2✔
13
    let parallel_script_fields =
2✔
14
        record
2✔
15
            .get_fields("880")
2✔
16
            .into_iter()
2✔
17
            .filter(|field| matches!(field.first_subfield("6"), Some(subfield) if subfield.content().starts_with("5")));
2✔
18
    latin_script_note_fields
2✔
19
        .chain(parallel_script_fields)
2✔
20
        .map(|field| {
3✔
21
            field
3✔
22
                .subfields()
3✔
23
                .iter()
3✔
24
                .filter(|subfield| subfield.code() != "6")
5✔
25
                .map(|subfield| subfield.content())
3✔
26
                .join(" ")
3✔
27
        })
3✔
28
        .filter(|note| has_cjk_chars(note))
3✔
29
}
2✔
30

NEW
31
pub fn subjects_cjk(record: &Record) -> impl Iterator<Item = String> + use<'_> {
×
NEW
32
    extract_parallel_values(
×
NEW
33
        record,
×
NEW
34
        "600",
×
NEW
35
        "*",
×
NEW
36
        "0",
×
NEW
37
        &[
×
NEW
38
            "a", "b", "c", "d", "f", "k", "l", "m", "n", "o", "p", "q", "r", "t", "v", "x", "y",
×
NEW
39
            "z",
×
NEW
40
        ],
×
41
    )
NEW
42
    .chain(extract_parallel_values(
×
NEW
43
        record,
×
NEW
44
        "610",
×
NEW
45
        "*",
×
NEW
46
        "0",
×
NEW
47
        &[
×
NEW
48
            "a", "b", "f", "k", "l", "m", "n", "o", "p", "r", "s", "t", "v", "x", "y", "z",
×
NEW
49
        ],
×
50
    ))
NEW
51
    .chain(extract_parallel_values(
×
NEW
52
        record,
×
NEW
53
        "611",
×
NEW
54
        "*",
×
NEW
55
        "0",
×
NEW
56
        &[
×
NEW
57
            "a", "b", "c", "d", "e", "f", "g", "k", "l", "n", "p", "q", "s", "t", "v", "x", "y",
×
NEW
58
            "z",
×
NEW
59
        ],
×
60
    ))
NEW
61
    .chain(extract_parallel_values(
×
NEW
62
        record,
×
NEW
63
        "630",
×
NEW
64
        "*",
×
NEW
65
        "0",
×
NEW
66
        &[
×
NEW
67
            "a", "d", "f", "g", "k", "l", "m", "n", "o", "p", "r", "s", "t", "v", "x", "y", "z",
×
NEW
68
        ],
×
69
    ))
NEW
70
    .chain(extract_parallel_values(
×
NEW
71
        record,
×
NEW
72
        "650",
×
NEW
73
        "*",
×
NEW
74
        "0",
×
NEW
75
        &["a", "b", "c", "v", "x", "y", "z"],
×
76
    ))
NEW
77
    .chain(extract_parallel_values(
×
NEW
78
        record,
×
NEW
79
        "650",
×
NEW
80
        "*",
×
NEW
81
        "7",
×
NEW
82
        &["a", "b", "c", "v", "x", "y", "z"],
×
83
    ))
NEW
84
    .chain(extract_parallel_values(
×
NEW
85
        record,
×
NEW
86
        "651",
×
NEW
87
        "*",
×
NEW
88
        "0",
×
NEW
89
        &["a", "v", "x", "y", "z"],
×
90
    ))
NEW
91
    .filter(|subject| has_cjk_chars(subject))
×
NEW
92
}
×
93

94
fn has_cjk_chars(value: &str) -> bool {
3✔
95
    value.chars().any(is_cjk)
3✔
96
}
3✔
97

NEW
98
fn extract_parallel_values<'record>(
×
NEW
99
    record: &'record Record,
×
NEW
100
    tag: &str,
×
NEW
101
    ind1: &'record str,
×
NEW
102
    ind2: &'record str,
×
NEW
103
    subfields: &'record [&str],
×
NEW
104
) -> impl Iterator<Item = String> + 'record {
×
NEW
105
    record
×
NEW
106
        .get_parallel_fields(tag)
×
NEW
107
        .into_iter()
×
NEW
108
        .filter(move |field| ind1 == "*" || ind1 == field.ind1())
×
NEW
109
        .filter(move |field| ind2 == "*" || ind2 == field.ind2())
×
NEW
110
        .map(|field| {
×
NEW
111
            field
×
NEW
112
                .subfields()
×
NEW
113
                .iter()
×
NEW
114
                .filter(|subfield| subfields.contains(&subfield.code()))
×
NEW
115
                .map(|subfield| subfield.content())
×
NEW
116
                .join(" ")
×
NEW
117
        })
×
NEW
118
}
×
119

120
#[cfg(test)]
121
mod tests {
122
    use super::*;
123

124
    #[test]
125
    fn it_can_extract_cjk_notes_from_500() {
1✔
126
        let record = Record::from_breaker("=500 \\ $a石室合選").unwrap();
1✔
127
        let mut cjk_notes = notes_cjk(&record);
1✔
128
        assert_eq!(cjk_notes.next(), Some("石室合選".to_string()));
1✔
129
        assert_eq!(cjk_notes.next(), None);
1✔
130
    }
1✔
131

132
    #[test]
133
    fn it_can_extract_cjk_notes_from_880() {
1✔
134
        let record = Record::from_breaker(
1✔
135
            r#"=500 \\$6880-01$aThạch thất hợp tuyển
1✔
136
=880 \\$6500-01$a石室合選"#,
1✔
137
        )
138
        .unwrap();
1✔
139
        let mut cjk_notes = notes_cjk(&record);
1✔
140
        assert_eq!(cjk_notes.next(), Some("石室合選".to_string()));
1✔
141
        assert_eq!(cjk_notes.next(), None);
1✔
142
    }
1✔
143
}
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2025 Coveralls, Inc