• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

pulibrary / pymarc_dedupe / 62df4d4f-cdae-45a9-a6bd-0ac11c6e1770

22 May 2025 07:05PM UTC coverage: 99.158% (-0.8%) from 100.0%
62df4d4f-cdae-45a9-a6bd-0ac11c6e1770

Pull #24

circleci

maxkadel
Add output of comparison experiment - uses data set from Mark Z
Pull Request #24: Green locally - connect to Postgres DB

264 of 271 new or added lines in 10 files covered. (97.42%)

4 existing lines in 1 file now uncovered.

824 of 831 relevant lines covered (99.16%)

0.99 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

95.24
/src/streaming_xml_handler.py
1
import unicodedata
1✔
2
from xml.sax import make_parser
1✔
3
from xml.sax.handler import feature_namespaces
1✔
4
from pymarc import Leader, XmlHandler, exceptions
1✔
5

6
MARC_XML_NS = "http://www.loc.gov/MARC21/slim"
1✔
7

8

9
class StreamingXmlHandler(XmlHandler):
1✔
10
    def endElementNS(self, name, qname):
1✔
11
        """End element NS."""
12
        if self._strict and name[0] != MARC_XML_NS:
1✔
NEW
13
            return
×
14

15
        element = name[1]
1✔
16
        if self.normalize_form is not None:
1✔
NEW
17
            text = unicodedata.normalize(self.normalize_form, "".join(self._text))
×
18
        else:
19
            text = "".join(self._text)
1✔
20
        try:
1✔
21
            if element == "record":
1✔
22
                self.process_record(self._record)
1✔
23
                self._record = None
1✔
24
            elif element == "leader":
1✔
25
                self._record.leader = Leader(text)
1✔
26
            elif element == "controlfield":
1✔
27
                self._field.data = text
1✔
28
                self._record.add_field(self._field)
1✔
29
                self._field = None
1✔
30
            elif element == "datafield":
1✔
31
                self._record.add_field(self._field)
1✔
32
                self._field = None
1✔
33
            elif element == "subfield":
1✔
34
                self._field.add_subfield(self._subfield_code, text)
1✔
35
                self._subfield_code = None
1✔
36
        except exceptions.RecordLeaderInvalid:
1✔
37
            pass
1✔
38

39
        self._text = []
1✔
40

41

42
def map_xml(function, *files):
1✔
43
    """Map a function onto the file.
44

45
    So that for each record that is parsed the function will get called with the
46
    extracted record
47

48
    .. code-block:: python
49

50
        def do_it(r):
51
            print(r)
52

53
        map_xml(do_it, 'marc.xml')
54
    """
55
    handler = StreamingXmlHandler()
1✔
56
    handler.process_record = function
1✔
57
    for xml_file in files:
1✔
58
        parse_xml(xml_file, handler)
1✔
59

60

61
def parse_xml(xml_file, handler):
1✔
62
    """Parse a file with a given subclass of xml.sax.handler.ContentHandler."""
63
    parser = make_parser()
1✔
64
    parser.setContentHandler(handler)
1✔
65
    parser.setFeature(feature_namespaces, 1)
1✔
66
    parser.parse(xml_file)
1✔
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2025 Coveralls, Inc