• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

pulibrary / pymarc_dedupe / 624f93b5-875e-42fc-9ad5-73405b9976b2

13 May 2025 04:49PM UTC coverage: 99.838% (-0.2%) from 100.0%
624f93b5-875e-42fc-9ad5-73405b9976b2

Pull #24

circleci

maxkadel
Ensure that the same records are not re-created
Pull Request #24: Green locally - connect to Postgres DB

36 of 37 new or added lines in 3 files covered. (97.3%)

615 of 616 relevant lines covered (99.84%)

1.0 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

96.88
/src/marc_to_db.py
1
import os.path
1✔
2
from xml.sax import SAXParseException
1✔
3
import psycopg2
1✔
4
from pymarc import parse_xml_to_array
1✔
5
from pymarc import parse_json_to_array
1✔
6
from src.marc_record import MarcRecord
1✔
7
from src.gold_rush import GoldRush
1✔
8

9
CREATE_TABLE_SQL = """CREATE TABLE IF NOT EXISTS records (
1✔
10
id TEXT,
11
title TEXT,
12
transliterated_title TEXT,
13
publication_year INT,
14
pagination TEXT,
15
edition TEXT,
16
publisher_name TEXT,
17
type_of VARCHAR,
18
title_part TEXT,
19
title_number TEXT,
20
author TEXT,
21
title_inclusive_dates TEXT,
22
gov_doc_number TEXT,
23
is_electronic_resource BOOL,
24
gold_rush TEXT,
25
record_source TEXT
26
);
27
"""
28

29
CREATE_RECORD_SQL = """INSERT INTO records VALUES
1✔
30
(%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s)
31
"""
32

33
FIND_RECORD_SQL = """SELECT * FROM records WHERE id = (%s);
1✔
34
"""
35

36

37
class MarcToDb:
1✔
38
    def __init__(self, input_file_path, db_config):
1✔
39
        self.input_file_path = input_file_path
1✔
40
        self.conn = psycopg2.connect(
1✔
41
            database=db_config["dbname"],
42
            user=db_config["user"],
43
            host=db_config["host"],
44
            port=db_config["port"],
45
        )
46

47
    def to_db(self):
1✔
48
        self.conn.autocommit = True
1✔
49
        with self.conn.cursor() as cur:
1✔
50
            cur.execute(CREATE_TABLE_SQL)
1✔
51
            for record in self.pymarc_records_from_file():
1✔
52
                mr = MarcRecord(record)
1✔
53
                cur.execute("SELECT * FROM records WHERE id = (%s)", (mr.id(),))
1✔
54
                result = cur.fetchall()
1✔
55
                if len(result) > 0:
1✔
NEW
56
                    continue
×
57

58
                record_source, _file_extension = os.path.splitext(
1✔
59
                    os.path.basename(self.input_file_path)
60
                )
61
                data = (
1✔
62
                    mr.id(),
63
                    mr.title(),
64
                    mr.transliterated_title(),
65
                    mr.publication_year(),
66
                    mr.pagination(),
67
                    mr.edition(),
68
                    mr.publisher_name(),
69
                    mr.type_of(),
70
                    mr.title_part(),
71
                    mr.title_number(),
72
                    mr.author(),
73
                    mr.title_inclusive_dates(),
74
                    mr.gov_doc_number(),
75
                    mr.is_electronic_resource(),
76
                    GoldRush(mr).as_gold_rush(),
77
                    record_source,
78
                )
79
                cur.execute(CREATE_RECORD_SQL, data)
1✔
80

81
    def pymarc_records_from_file(self):
1✔
82
        try:
1✔
83
            return parse_xml_to_array(self.input_file_path)
1✔
84
        except SAXParseException:
1✔
85
            return parse_json_to_array(self.input_file_path)
1✔
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2025 Coveralls, Inc