• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

pulibrary / pymarc_dedupe / 62df4d4f-cdae-45a9-a6bd-0ac11c6e1770

22 May 2025 07:05PM UTC coverage: 99.158% (-0.8%) from 100.0%
62df4d4f-cdae-45a9-a6bd-0ac11c6e1770

Pull #24

circleci

maxkadel
Add output of comparison experiment - uses data set from Mark Z
Pull Request #24: Green locally - connect to Postgres DB

264 of 271 new or added lines in 10 files covered. (97.42%)

4 existing lines in 1 file now uncovered.

824 of 831 relevant lines covered (99.16%)

0.99 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

97.3
/src/marc_to_db.py
1
import os.path
1✔
2
import time
1✔
3
import psycopg2
1✔
4
from pymarc import exceptions
1✔
5
from config import settings
1✔
6
from src.marc_record import MarcRecord
1✔
7
from src.streaming_json_handler import map_json
1✔
8
from src.streaming_xml_handler import map_xml
1✔
9

10
CREATE_TABLE_SQL = """CREATE TABLE IF NOT EXISTS records (
1✔
11
id TEXT,
12
title TEXT,
13
author TEXT,
14
publication_year TEXT,
15
pagination TEXT,
16
edition TEXT,
17
publisher_name TEXT,
18
type_of VARCHAR,
19
is_electronic_resource BOOL,
20
source_file TEXT,
21
UNIQUE (id)
22
);
23
"""
24

25
CREATE_RECORD_SQL = """INSERT INTO records VALUES
1✔
26
(%s, %s, %s, %s, %s, %s, %s, %s, %s, %s)
27
"""
28

29

30
class MarcToDb:
1✔
31
    conn = psycopg2.connect(
1✔
32
        database=settings.db_name,
33
        user=settings.db_user,
34
        host=settings.db_host,
35
        port=settings.db_port,
36
    )
37
    conn.autocommit = True
1✔
38

39
    @classmethod
1✔
40
    def find_or_create_table(cls):
1✔
41
        with cls.conn.cursor() as cur:
1✔
42
            cur.execute(CREATE_TABLE_SQL)
1✔
43

44
    def __init__(self, input_file_path):
1✔
45
        self.input_file_path = input_file_path
1✔
46
        self.conn = MarcToDb.conn
1✔
47
        self.source_file, self.file_extension = os.path.splitext(
1✔
48
            os.path.basename(self.input_file_path)
49
        )
50
        self.cursor = self.conn.cursor()
1✔
51

52
    def to_db(self):
1✔
53
        print(
1✔
54
            f"""time: {time.asctime(time.localtime())} -
55
                writing records in {self.input_file_path} to database
56
            """
57
        )
58
        if self.file_extension == ".xml":
1✔
59
            map_xml(self.add_record, self.input_file_path)
1✔
60
        elif self.file_extension == ".json":
1✔
61
            map_json(self.add_record, self.input_file_path)
1✔
62
        else:
NEW
63
            raise ValueError("Files must be either xml or json")
×
64

65
    def add_record(self, record):
1✔
66
        try:
1✔
67
            self.cursor.execute(CREATE_RECORD_SQL, self.record_data(record))
1✔
68
        except (psycopg2.DatabaseError, exceptions.MissingLinkedFields):
1✔
69
            pass
1✔
70

71
    def record_data(self, record):
1✔
72
        mr = MarcRecord(record)
1✔
73
        return (
1✔
74
            mr.id(),
75
            mr.title() or None,
76
            mr.author() or None,
77
            mr.publication_year() or None,
78
            mr.pagination() or None,
79
            mr.edition() or None,
80
            mr.publisher_name() or None,
81
            mr.type_of() or None,
82
            mr.is_electronic_resource(),
83
            self.source_file,
84
        )
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2025 Coveralls, Inc