• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

zopefoundation / ZODB / 11457046461

22 Oct 2024 09:13AM UTC coverage: 83.911% (+0.2%) from 83.745%
11457046461

Pull #403

github

Sebatyne
fixup! repozo: factorize code doing the actual recover (write), in preparation to the implementation of the incremental recover
Pull Request #403: Repozo incremental recover

2445 of 3554 branches covered (68.8%)

214 of 215 new or added lines in 2 files covered. (99.53%)

1 existing line in 1 file now uncovered.

13466 of 16048 relevant lines covered (83.91%)

0.84 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

91.7
/src/ZODB/scripts/repozo.py
1
#!/usr/bin/env python
2

3
# repozo.py -- incremental and full backups of a Data.fs file.
4
#
5
# Originally written by Anthony Baxter
6
# Significantly modified by Barry Warsaw
7

8
"""repozo.py -- incremental and full backups of a Data.fs file and index.
9

10
Usage: %(program)s [options]
11
Where:
12

13
    Exactly one of -B, -R, or -V must be specified:
14

15
    -B / --backup
16
        Backup current ZODB file.
17

18
    -R / --recover
19
        Restore a ZODB file from a backup.
20

21
    -V / --verify
22
        Verify backup integrity.
23

24
    -v / --verbose
25
        Verbose mode.
26

27
    -h / --help
28
        Print this text and exit.
29

30
    -r dir
31
    --repository=dir
32
        Repository directory containing the backup files.  This argument
33
        is required.  The directory must already exist.  You should not
34
        edit the files in this directory, or add your own files to it.
35

36
Options for -B/--backup:
37
    -f file
38
    --file=file
39
        Source Data.fs file.  This argument is required.
40

41
    -F / --full
42
        Force a full backup.  By default, an incremental backup is made
43
        if possible (e.g., if a pack has occurred since the last
44
        incremental backup, a full backup is necessary).
45

46
    -Q / --quick
47
        Verify via md5 checksum only the last incremental written.  This
48
        significantly reduces the disk i/o at the (theoretical) cost of
49
        inconsistency.  This is a probabilistic way of determining whether
50
        a full backup is necessary.
51

52
    -z / --gzip
53
        Compress with gzip the backup files.  Uses the default zlib
54
        compression level.  By default, gzip compression is not used.
55

56
    -k / --kill-old-on-full
57
        If a full backup is created, remove any prior full or incremental
58
        backup files (and associated metadata files) from the repository
59
        directory.
60

61
Options for -R/--recover:
62
    -D str
63
    --date=str
64
        Recover state as of this date.  Specify UTC (not local) time.
65
            yyyy-mm-dd[-hh[-mm[-ss]]]
66
        By default, current time is used.
67

68
    -o filename
69
    --output=filename
70
        Write recovered ZODB to given file.  By default, the file is
71
        written to stdout.
72

73
        Note:  for the stdout case, the index file will **not** be restored
74
        automatically.
75

76
    -F / --full
77
        Force a full recover. By default, an incremental recover is made
78
        if possible, by only copying the latest backup delta to the recovered
79
        ZODB file. A full recover will always be done if a pack has occured
80
        since the last incremental backup.
81

82
    -w
83
    --with-verify
84
        Verify on the fly the backup files on recovering. This option runs
85
        the same checks as when repozo is run in -V/--verify mode, and
86
        allows to verify and recover a backup in one single step. If a sanity
87
        check fails, the partially recovered ZODB will be left in place.
88

89
Options for -V/--verify:
90
    -Q / --quick
91
        Verify file sizes only (skip md5 checksums).
92
"""
93

94
import errno
1✔
95
import getopt
1✔
96
import gzip
1✔
97
import os
1✔
98
import re
1✔
99
import shutil
1✔
100
import sys
1✔
101
import time
1✔
102
from hashlib import md5
1✔
103

104
from ZODB.FileStorage import FileStorage
1✔
105

106

107
program = sys.argv[0]
1✔
108

109
BACKUP = 1
1✔
110
RECOVER = 2
1✔
111
VERIFY = 3
1✔
112

113
COMMASPACE = ', '
1✔
114
READCHUNK = 16 * 1024
1✔
115
VERBOSE = False
1✔
116

117

118
class RepozoError(Exception):
1✔
119
    pass
1✔
120

121

122
class WouldOverwriteFiles(RepozoError):
1✔
123
    pass
1✔
124

125

126
class NoFiles(RepozoError):
1✔
127
    pass
1✔
128

129

130
class VerificationFail(RepozoError):
1✔
131
    pass
1✔
132

133

134
class _GzipCloser:
1✔
135

136
    def __init__(self, fqn, mode):
1✔
137
        self._opened = gzip.open(fqn, mode)
1✔
138

139
    def __enter__(self):
1✔
140
        return self._opened
1✔
141

142
    def __exit__(self, exc_type, exc_value, traceback):
1✔
143
        self._opened.close()
1✔
144

145

146
def usage(code, msg=''):
1✔
147
    outfp = sys.stderr
1✔
148
    if code == 0:
1✔
149
        outfp = sys.stdout
1✔
150

151
    print(__doc__ % globals(), file=outfp)
1✔
152
    if msg:
1✔
153
        print(msg, file=outfp)
1✔
154

155
    sys.exit(code)
1✔
156

157

158
def log(msg, *args):
1✔
159
    if VERBOSE:
1✔
160
        # Use stderr here so that -v flag works with -R and no -o
161
        print(msg % args, file=sys.stderr)
1✔
162

163

164
def error(msg, *args):
1✔
165
    print(msg % args, file=sys.stderr)
×
166

167

168
def parseargs(argv):
1✔
169
    global VERBOSE
170
    try:
1✔
171
        opts, args = getopt.getopt(argv, 'BRVvhr:f:FQzkD:o:w',
1✔
172
                                   ['backup',
173
                                    'recover',
174
                                    'verify',
175
                                    'verbose',
176
                                    'help',
177
                                    'repository=',
178
                                    'file=',
179
                                    'full',
180
                                    'quick',
181
                                    'gzip',
182
                                    'kill-old-on-full',
183
                                    'date=',
184
                                    'output=',
185
                                    'with-verify',
186
                                    ])
187
    except getopt.error as msg:
1✔
188
        usage(1, msg)
1✔
189

190
    class Options:
1✔
191
        mode = None         # BACKUP, RECOVER or VERIFY
1✔
192
        file = None         # name of input Data.fs file
1✔
193
        repository = None   # name of directory holding backups
1✔
194
        full = False        # True forces full backup or full recovery
1✔
195
        date = None         # -D argument, if any
1✔
196
        output = None       # where to write recovered data; None = stdout
1✔
197
        quick = False       # -Q flag state
1✔
198
        gzip = False        # -z flag state
1✔
199
        killold = False     # -k flag state
1✔
200
        withverify = False  # -w flag state
1✔
201

202
    options = Options()
1✔
203

204
    for opt, arg in opts:
1✔
205
        if opt in ('-h', '--help'):
1✔
206
            usage(0)
1✔
207
        elif opt in ('-v', '--verbose'):
1✔
208
            VERBOSE = True
1✔
209
        elif opt in ('-R', '--recover'):
1✔
210
            if options.mode is not None:
1✔
211
                usage(1, '-B, -R, and -V are mutually exclusive')
1✔
212
            options.mode = RECOVER
1✔
213
        elif opt in ('-B', '--backup'):
1✔
214
            if options.mode is not None:
1✔
215
                usage(1, '-B, -R, and -V are mutually exclusive')
1✔
216
            options.mode = BACKUP
1✔
217
        elif opt in ('-V', '--verify'):
1✔
218
            if options.mode is not None:
1✔
219
                usage(1, '-B, -R, and -V are mutually exclusive')
1✔
220
            options.mode = VERIFY
1✔
221
        elif opt in ('-Q', '--quick'):
1✔
222
            options.quick = True
1✔
223
        elif opt in ('-f', '--file'):
1✔
224
            options.file = arg
1✔
225
        elif opt in ('-r', '--repository'):
1✔
226
            options.repository = arg
1✔
227
        elif opt in ('-F', '--full'):
1✔
228
            options.full = True
1✔
229
        elif opt in ('-D', '--date'):
1✔
230
            options.date = arg
1✔
231
        elif opt in ('-o', '--output'):
1✔
232
            options.output = arg
1✔
233
        elif opt in ('-z', '--gzip'):
1✔
234
            options.gzip = True
1✔
235
        elif opt in ('-k', '--kill-old-on-full'):
1!
236
            options.killold = True
1✔
237
        elif opt in ('-w', '--with-verify'):
×
238
            options.withverify = True
×
239
        else:
240
            assert False, (opt, arg)
×
241

242
    # Any other arguments are invalid
243
    if args:
1✔
244
        usage(1, 'Invalid arguments: ' + COMMASPACE.join(args))
1✔
245

246
    # Sanity checks
247
    if options.mode is None:
1✔
248
        usage(1, 'Either --backup, --recover or --verify is required')
1✔
249
    if options.repository is None:
1✔
250
        usage(1, '--repository is required')
1✔
251
    if options.mode == BACKUP:
1✔
252
        if options.date is not None:
1✔
253
            log('--date option is ignored in backup mode')
1✔
254
            options.date = None
1✔
255
        if options.output is not None:
1✔
256
            log('--output option is ignored in backup mode')
1✔
257
            options.output = None
1✔
258
        if options.withverify is not None:
1!
259
            log('--with-verify option is ignored in backup mode')
1✔
260
            options.withverify = None
1✔
261
        if not options.file:
1✔
262
            usage(1, '--file is required in backup mode')
1✔
263
    elif options.mode == RECOVER:
1✔
264
        if options.file is not None:
1✔
265
            log('--file option is ignored in recover mode')
1✔
266
            options.file = None
1✔
267
        if options.killold:
1✔
268
            log('--kill-old-on-full option is ignored in recover mode')
1✔
269
            options.killold = False
1✔
270
    else:
271
        assert options.mode == VERIFY
1✔
272
        if options.date is not None:
1✔
273
            log("--date option is ignored in verify mode")
1✔
274
            options.date = None
1✔
275
        if options.output is not None:
1✔
276
            log('--output option is ignored in verify mode')
1✔
277
            options.output = None
1✔
278
        if options.full:
1✔
279
            log('--full option is ignored in verify mode')
1✔
280
            options.full = False
1✔
281
        if options.gzip:
1✔
282
            log('--gzip option is ignored in verify mode')
1✔
283
            options.gzip = False
1✔
284
        if options.file is not None:
1✔
285
            log('--file option is ignored in verify mode')
1✔
286
            options.file = None
1✔
287
        if options.killold:
1✔
288
            log('--kill-old-on-full option is ignored in verify mode')
1✔
289
            options.killold = False
1✔
290
        if options.withverify is not None:
1!
291
            log('--with-verify option is ignored in verify mode')
1✔
292
            options.withverify = None
1✔
293
    return options
1✔
294

295

296
# afile is a Python file object, or created by gzip.open().  The latter
297
# doesn't have a fileno() method, so to fsync it we need to reach into
298
# its underlying file object.
299
def fsync(afile):
1✔
300
    afile.flush()
1✔
301
    fileobject = getattr(afile, 'fileobj', afile)
1✔
302
    os.fsync(fileobject.fileno())
1✔
303

304
# Read bytes (no more than n, or to EOF if n is None) in chunks from the
305
# current position in file fp.  Pass each chunk as an argument to func().
306
# Return the total number of bytes read == the total number of bytes
307
# passed in all to func().  Leaves the file position just after the
308
# last byte read.
309

310

311
def dofile(func, fp, n=None):
1✔
312
    bytesread = 0
1✔
313
    while n is None or n > 0:
1✔
314
        if n is None:
1✔
315
            todo = READCHUNK
1✔
316
        else:
317
            todo = min(READCHUNK, n)
1✔
318
        data = fp.read(todo)
1✔
319
        if not data:
1✔
320
            break
1✔
321
        func(data)
1✔
322
        nread = len(data)
1✔
323
        bytesread += nread
1✔
324
        if n is not None:
1✔
325
            n -= nread
1✔
326
    return bytesread
1✔
327

328

329
def checksum(fp, n):
1✔
330
    # Checksum the first n bytes of the specified file
331
    sum = md5()
1✔
332

333
    def func(data):
1✔
334
        sum.update(data)
1✔
335
    dofile(func, fp, n)
1✔
336
    return sum.hexdigest()
1✔
337

338

339
def file_size(fp):
1✔
340
    # Compute number of bytes that can be read from fp
341
    def func(data):
1✔
342
        pass
1✔
343
    return dofile(func, fp, None)
1✔
344

345

346
def checksum_and_size(fp):
1✔
347
    # Checksum and return it with the size of the file
348
    sum = md5()
1✔
349

350
    def func(data):
1✔
351
        sum.update(data)
1✔
352
    size = dofile(func, fp, None)
1✔
353
    return sum.hexdigest(), size
1✔
354

355

356
def copyfile(options, dst, start, n):
1✔
357
    # Copy bytes from file src, to file dst, starting at offset start, for n
358
    # length of bytes.  For robustness, we first write, flush and fsync
359
    # to a temp file, then rename the temp file at the end.
360
    sum = md5()
1✔
361
    ifp = open(options.file, 'rb')
1✔
362
    ifp.seek(start)
1✔
363
    tempname = os.path.join(os.path.dirname(dst), 'tmp.tmp')
1✔
364
    if options.gzip:
1✔
365
        ofp = gzip.open(tempname, 'wb')
1✔
366
    else:
367
        ofp = open(tempname, 'wb')
1✔
368

369
    def func(data):
1✔
370
        sum.update(data)
1✔
371
        ofp.write(data)
1✔
372

373
    ndone = dofile(func, ifp, n)
1✔
374
    assert ndone == n
1✔
375

376
    ifp.close()
1✔
377
    fsync(ofp)
1✔
378
    ofp.close()
1✔
379
    os.rename(tempname, dst)
1✔
380
    return sum.hexdigest()
1✔
381

382

383
def concat(files, ofp=None):
1✔
384
    # Concatenate a bunch of files from the repository, output to 'ofp' if
385
    # given.  Return the number of bytes written and the md5 checksum of the
386
    # bytes.
387
    sum = md5()
1✔
388

389
    def func(data):
1✔
390
        sum.update(data)
1✔
391
        if ofp:
1✔
392
            ofp.write(data)
1✔
393
    bytesread = 0
1✔
394
    for f in files:
1✔
395
        # Auto uncompress
396
        if f.endswith('fsz'):
1✔
397
            ifp = gzip.open(f, 'rb')
1✔
398
        else:
399
            ifp = open(f, 'rb')
1✔
400
        bytesread += dofile(func, ifp)
1✔
401
        ifp.close()
1✔
402
    return bytesread, sum.hexdigest()
1✔
403

404

405
def recover_repofiles(options, repofiles, datfile, outfp):
1✔
406
    if options.withverify:
1✔
407
        with open(datfile) as fp:
1✔
408
            truth_dict = {}
1✔
409
            for line in fp:
1✔
410
                fn, startpos, endpos, sum = line.split()
1✔
411
                startpos = int(startpos)
1✔
412
                endpos = int(endpos)
1✔
413
                filename = os.path.join(options.repository,
1✔
414
                                        os.path.basename(fn))
415
                truth_dict[filename] = {
1✔
416
                    'size': endpos - startpos,
417
                    'sum': sum,
418
                }
419
        totalsz = 0
1✔
420
        for repofile in repofiles:
1✔
421
            reposz, reposum = concat([repofile], outfp)
1✔
422
            expected_truth = truth_dict[repofile]
1✔
423
            if reposz != expected_truth['size']:
1✔
424
                raise VerificationFail(
1✔
425
                    "%s is %d bytes, should be %d bytes" % (
426
                        repofile, reposz, expected_truth['size']))
427
            if reposum != expected_truth['sum']:
1✔
428
                raise VerificationFail(
1✔
429
                    "{} has checksum {} instead of {}".format(
430
                        repofile, reposum, expected_truth['sum']))
431
            totalsz += reposz
1✔
432
            log("Recovered chunk %s : %s bytes, md5: %s",
1✔
433
                repofile, reposz, reposum)
434
        log("Recovered a total of %s bytes", totalsz)
1✔
435
    else:
436
        reposz, reposum = concat(repofiles, outfp)
1✔
437
        log('Recovered %s bytes, md5: %s', reposz, reposum)
1✔
438

439

440
def gen_filedate(options):
1✔
441
    return getattr(options, 'test_now', time.gmtime()[:6])
1✔
442

443

444
def gen_filename(options, ext=None, now=None):
1✔
445
    if ext is None:
1✔
446
        if options.full:
1✔
447
            ext = '.fs'
1✔
448
        else:
449
            ext = '.deltafs'
1✔
450
        if options.gzip:
1✔
451
            ext += 'z'
1✔
452
    # Hook for testing
453
    if now is None:
1✔
454
        now = gen_filedate(options)
1✔
455
    t = now + (ext,)
1✔
456
    return '%04d-%02d-%02d-%02d-%02d-%02d%s' % t
1✔
457

458
# Return a list of files needed to reproduce state at time options.date.
459
# This is a list, in chronological order, of the .fs[z] and .deltafs[z]
460
# files, from the time of the most recent full backup preceding
461
# options.date, up to options.date.
462

463

464
is_data_file = re.compile(r'\d{4}(?:-\d\d){5}\.(?:delta)?fsz?$').match
1✔
465
del re
1✔
466

467

468
def find_files(options):
1✔
469
    when = options.date
1✔
470
    if not when:
1✔
471
        when = gen_filename(options, ext='')
1✔
472
    log('looking for files between last full backup and %s...', when)
1✔
473
    # newest file first
474
    all = sorted(
1✔
475
        filter(is_data_file, os.listdir(options.repository)), reverse=True)
476
    # Find the last full backup before date, then include all the
477
    # incrementals between that full backup and "when".
478
    needed = []
1✔
479
    for fname in all:
1✔
480
        root, ext = os.path.splitext(fname)
1✔
481
        if root <= when:
1✔
482
            needed.append(fname)
1✔
483
            if ext in ('.fs', '.fsz'):
1✔
484
                break
1✔
485
    # Make the file names relative to the repository directory
486
    needed = [os.path.join(options.repository, f) for f in needed]
1✔
487
    # Restore back to chronological order
488
    needed.reverse()
1✔
489
    if needed:
1✔
490
        log('files needed to recover state as of %s:', when)
1✔
491
        for f in needed:
1✔
492
            log('\t%s', f)
1✔
493
    else:
494
        log('no files found')
1✔
495
    return needed
1✔
496

497
# Scan the .dat file corresponding to the last full backup performed.
498
# Return
499
#
500
#     filename, startpos, endpos, checksum
501
#
502
# of the last incremental.  If there is no .dat file, or the .dat file
503
# is empty, return
504
#
505
#     None, None, None, None
506

507

508
def scandat(repofiles):
1✔
509
    fullfile = repofiles[0]
1✔
510
    datfile = os.path.splitext(fullfile)[0] + '.dat'
1✔
511
    fn = startpos = endpos = sum = None  # assume .dat file missing or empty
1✔
512
    try:
1✔
513
        fp = open(datfile)
1✔
514
    except OSError as e:
1✔
515
        if e.errno != errno.ENOENT:
1!
516
            raise
×
517
    else:
518
        # We only care about the last one.
519
        lines = fp.readlines()
1✔
520
        fp.close()
1✔
521
        if lines:
1✔
522
            fn, startpos, endpos, sum = lines[-1].split()
1✔
523
            startpos = int(startpos)
1✔
524
            endpos = int(endpos)
1✔
525

526
    return fn, startpos, endpos, sum
1✔
527

528

529
def delete_old_backups(options):
1✔
530
    # Delete all full backup files except for the most recent full backup file
531
    all = sorted(filter(is_data_file, os.listdir(options.repository)))
1✔
532

533
    deletable = []
1✔
534
    full = []
1✔
535
    for fname in all:
1✔
536
        root, ext = os.path.splitext(fname)
1✔
537
        if ext in ('.fs', '.fsz'):
1✔
538
            full.append(fname)
1✔
539
        if ext in ('.fs', '.fsz', '.deltafs', '.deltafsz'):
1!
540
            deletable.append(fname)
1✔
541

542
    # keep most recent full
543
    if not full:
1✔
544
        return
1✔
545

546
    recentfull = full.pop(-1)
1✔
547
    deletable.remove(recentfull)
1✔
548
    root, ext = os.path.splitext(recentfull)
1✔
549
    dat = root + '.dat'
1✔
550
    if dat in deletable:
1!
551
        deletable.remove(dat)
×
552
    index = root + '.index'
1✔
553
    if index in deletable:
1!
554
        deletable.remove(index)
×
555

556
    for fname in deletable:
1✔
557
        log('removing old backup file %s (and .dat / .index)', fname)
1✔
558
        root, ext = os.path.splitext(fname)
1✔
559
        try:
1✔
560
            os.unlink(os.path.join(options.repository, root + '.dat'))
1✔
561
        except OSError:
1✔
562
            pass
1✔
563
        try:
1✔
564
            os.unlink(os.path.join(options.repository, root + '.index'))
1✔
565
        except OSError:
×
566
            pass
×
567
        os.unlink(os.path.join(options.repository, fname))
1✔
568

569

570
def do_full_backup(options):
1✔
571
    options.full = True
1✔
572
    tnow = gen_filedate(options)
1✔
573
    dest = os.path.join(options.repository, gen_filename(options, now=tnow))
1✔
574
    if os.path.exists(dest):
1✔
575
        raise WouldOverwriteFiles('Cannot overwrite existing file: %s' % dest)
1✔
576
    # Find the file position of the last completed transaction.
577
    fs = FileStorage(options.file, read_only=True)
1✔
578
    # Note that the FileStorage ctor calls read_index() which scans the file
579
    # and returns "the position just after the last valid transaction record".
580
    # getSize() then returns this position, which is exactly what we want,
581
    # because we only want to copy stuff from the beginning of the file to the
582
    # last valid transaction record.
583
    pos = fs.getSize()
1✔
584
    # Save the storage index into the repository
585
    index_file = os.path.join(options.repository,
1✔
586
                              gen_filename(options, '.index', tnow))
587
    log('writing index')
1✔
588
    fs._index.save(pos, index_file)
1✔
589
    fs.close()
1✔
590
    log('writing full backup: %s bytes to %s', pos, dest)
1✔
591
    sum = copyfile(options, dest, 0, pos)
1✔
592
    # Write the data file for this full backup
593
    datfile = os.path.splitext(dest)[0] + '.dat'
1✔
594
    fp = open(datfile, 'w')
1✔
595
    print(dest, 0, pos, sum, file=fp)
1✔
596
    fp.flush()
1✔
597
    os.fsync(fp.fileno())
1✔
598
    fp.close()
1✔
599
    if options.killold:
1!
600
        delete_old_backups(options)
×
601

602

603
def do_incremental_backup(options, reposz, repofiles):
1✔
604
    options.full = False
1✔
605
    tnow = gen_filedate(options)
1✔
606
    dest = os.path.join(options.repository, gen_filename(options, now=tnow))
1✔
607
    if os.path.exists(dest):
1✔
608
        raise WouldOverwriteFiles('Cannot overwrite existing file: %s' % dest)
1✔
609
    # Find the file position of the last completed transaction.
610
    fs = FileStorage(options.file, read_only=True)
1✔
611
    # Note that the FileStorage ctor calls read_index() which scans the file
612
    # and returns "the position just after the last valid transaction record".
613
    # getSize() then returns this position, which is exactly what we want,
614
    # because we only want to copy stuff from the beginning of the file to the
615
    # last valid transaction record.
616
    pos = fs.getSize()
1✔
617
    log('writing index')
1✔
618
    index_file = os.path.join(options.repository,
1✔
619
                              gen_filename(options, '.index', tnow))
620
    fs._index.save(pos, index_file)
1✔
621
    fs.close()
1✔
622
    log('writing incremental: %s bytes to %s',  pos-reposz, dest)
1✔
623
    sum = copyfile(options, dest, reposz, pos - reposz)
1✔
624
    # The first file in repofiles points to the last full backup.  Use this to
625
    # get the .dat file and append the information for this incrementatl to
626
    # that file.
627
    fullfile = repofiles[0]
1✔
628
    datfile = os.path.splitext(fullfile)[0] + '.dat'
1✔
629
    # This .dat file better exist.  Let the exception percolate if not.
630
    fp = open(datfile, 'a')
1✔
631
    print(dest, reposz, pos, sum, file=fp)
1✔
632
    fp.flush()
1✔
633
    os.fsync(fp.fileno())
1✔
634
    fp.close()
1✔
635

636

637
def do_backup(options):
1✔
638
    repofiles = find_files(options)
1✔
639
    # See if we need to do a full backup
640
    if options.full or not repofiles:
1✔
641
        log('doing a full backup')
1✔
642
        do_full_backup(options)
1✔
643
        return
1✔
644
    srcsz = os.path.getsize(options.file)
1✔
645
    if options.quick:
1!
646
        fn, startpos, endpos, sum = scandat(repofiles)
1✔
647
        # If the .dat file was missing, or was empty, do a full backup
648
        if (fn, startpos, endpos, sum) == (None, None, None, None):
1!
649
            log('missing or empty .dat file (full backup)')
×
650
            do_full_backup(options)
×
651
            return
×
652
        # Has the file shrunk, possibly because of a pack?
653
        if srcsz < endpos:
1✔
654
            log('file shrunk, possibly because of a pack (full backup)')
1✔
655
            do_full_backup(options)
1✔
656
            return
1✔
657
        # Now check the md5 sum of the source file, from the last
658
        # incremental's start and stop positions.
659
        srcfp = open(options.file, 'rb')
1✔
660
        srcfp.seek(startpos)
1✔
661
        srcsum = checksum(srcfp, endpos-startpos)
1✔
662
        srcfp.close()
1✔
663
        log('last incremental file: %s', fn)
1✔
664
        log('last incremental checksum: %s', sum)
1✔
665
        log('source checksum range: [%s..%s], sum: %s',
1✔
666
            startpos, endpos, srcsum)
667
        if sum == srcsum:
1✔
668
            if srcsz == endpos:
1!
669
                log('No changes, nothing to do')
×
670
                return
×
671
            log('doing incremental, starting at: %s', endpos)
1✔
672
            do_incremental_backup(options, endpos, repofiles)
1✔
673
            return
1✔
674
    else:
675
        # This was is much slower, and more disk i/o intensive, but it's also
676
        # more accurate since it checks the actual existing files instead of
677
        # the information in the .dat file.
678
        #
679
        # See if we can do an incremental, based on the files that already
680
        # exist.  This call of concat() will not write an output file.
681
        reposz, reposum = concat(repofiles)
×
682
        log('repository state: %s bytes, md5: %s', reposz, reposum)
×
683
        # Get the md5 checksum of the source file, up to two file positions:
684
        # the entire size of the file, and up to the file position of the last
685
        # incremental backup.
686
        srcfp = open(options.file, 'rb')
×
687
        srcsum = checksum(srcfp, srcsz)
×
688
        srcfp.seek(0)
×
689
        srcsum_backedup = checksum(srcfp, reposz)
×
690
        srcfp.close()
×
691
        log('current state   : %s bytes, md5: %s', srcsz, srcsum)
×
692
        log('backed up state : %s bytes, md5: %s', reposz, srcsum_backedup)
×
693
        # Has nothing changed?
694
        if srcsz == reposz and srcsum == reposum:
×
695
            log('No changes, nothing to do')
×
696
            return
×
697
        # Has the file shrunk, probably because of a pack?
698
        if srcsz < reposz:
×
699
            log('file shrunk, possibly because of a pack (full backup)')
×
700
            do_full_backup(options)
×
701
            return
×
702
        # The source file is larger than the repository.  If the md5 checksums
703
        # match, then we know we can do an incremental backup.  If they don't,
704
        # then perhaps the file was packed at some point (or a
705
        # non-transactional undo was performed, but this is deprecated).  Only
706
        # do a full backup if forced to.
707
        if reposum == srcsum_backedup:
×
708
            log('doing incremental, starting at: %s', reposz)
×
709
            do_incremental_backup(options, reposz, repofiles)
×
710
            return
×
711
    # The checksums don't match, meaning the front of the source file has
712
    # changed.  We'll need to do a full backup in that case.
713
    log('file changed, possibly because of a pack (full backup)')
1✔
714
    do_full_backup(options)
1✔
715

716

717
def do_full_recover(options, repofiles):
1✔
718
    files_to_close = ()
1✔
719
    if options.output is None:
1!
720
        log('Recovering file to stdout')
×
721
        outfp = sys.stdout
×
722
    else:
723
        # Delete old ZODB before recovering backup as size of
724
        # old ZODB + full partial file may be superior to free disk space
725
        if os.path.exists(options.output):
1✔
726
            log('Deleting old %s', options.output)
1✔
727
            os.unlink(options.output)
1✔
728
        log('Recovering file to %s', options.output)
1✔
729
        temporary_output_file = options.output + '.part'
1✔
730
        outfp = open(temporary_output_file, 'wb')
1✔
731
        files_to_close += (outfp,)
1✔
732

733
    try:
1✔
734
        datfile = os.path.splitext(repofiles[0])[0] + '.dat'
1✔
735
        recover_repofiles(options, repofiles, datfile, outfp)
1✔
736
    finally:
737
        for f in files_to_close:
1✔
738
            f.close()
1✔
739

740
    if options.output is not None:
1!
741
        try:
1✔
742
            os.rename(temporary_output_file, options.output)
1✔
743
        except OSError:
×
744
            log("ZODB has been fully recovered as %s, but it cannot be renamed"
×
745
                " into : %s", temporary_output_file, options.output)
746
            raise
×
747

748

749
def do_incremental_recover(options, repofiles):
1✔
750
    datfile = os.path.splitext(repofiles[0])[0] + '.dat'
1✔
751
    log('Recovering (incrementally) file to %s', options.output)
1✔
752
    with open(options.output, 'r+b') as outfp:
1✔
753
        outfp.seek(0, 2)
1✔
754
        initial_length = outfp.tell()
1✔
755
    with open(datfile) as fp:
1✔
756
        previous_chunk = None
1✔
757
        for line in fp:
1✔
758
            fn, startpos, endpos, _ = chunk = line.split()
1✔
759
            startpos = int(startpos)
1✔
760
            endpos = int(endpos)
1✔
761
            if endpos > initial_length:
1✔
762
                break
1✔
763
            previous_chunk = chunk
1✔
764

765
    if previous_chunk == chunk:
1✔
766
        if endpos == initial_length:
1✔
767
            log('Target file is same size as latest backup, '
1✔
768
                'doing nothing.')
769
            return
1✔
770
        else:
771
            log('Target file is larger than latest backup, '
1✔
772
                'falling back to a full recover.')
773
            return do_full_recover(options, repofiles)
1✔
774
    if previous_chunk is None:
1✔
775
        log('Target file smaller than full backup, '
1✔
776
            'falling back to a full recover.')
777
        return do_full_recover(options, repofiles)
1✔
778
    check_startpos = int(previous_chunk[1])
1✔
779
    check_endpos = int(previous_chunk[2])
1✔
780
    with open(options.output, 'r+b') as outfp:
1✔
781
        outfp.seek(check_startpos)
1✔
782
        check_sum = checksum(outfp, check_endpos - check_startpos)
1✔
783
        assert outfp.tell() == startpos, (outfp.tell(), startpos)
1✔
784
    if previous_chunk[3] != check_sum:
1✔
785
        log('Last whole common chunk checksum did not match with backup, '
1✔
786
            'falling back to a full recover.')
787
        return do_full_recover(options, repofiles)
1✔
788

789
    if startpos < initial_length:
1!
NEW
790
        log('Truncating target file %i bytes before its end',
×
791
            initial_length - startpos)
792
    filename = os.path.join(options.repository,
1✔
793
                            os.path.basename(fn))
794
    first_file_to_restore = repofiles.index(filename)
1✔
795
    assert first_file_to_restore > 0, (
1✔
796
        first_file_to_restore, options.repository, fn, filename, repofiles)
797

798
    temporary_output_file = options.output + '.part'
1✔
799
    os.rename(options.output, temporary_output_file)
1✔
800
    with open(temporary_output_file, 'r+b') as outfp:
1✔
801
        outfp.seek(startpos)
1✔
802
        recover_repofiles(options,
1✔
803
                          repofiles[first_file_to_restore:],
804
                          datfile,
805
                          outfp)
806
    os.rename(temporary_output_file, options.output)
1✔
807

808

809
def do_recover(options):
1✔
810
    # Find the first full backup at or before the specified date
811
    repofiles = find_files(options)
1✔
812
    if not repofiles:
1✔
813
        if options.date:
1✔
814
            raise NoFiles(f'No files in repository before {options.date}')
1✔
815
        else:
816
            raise NoFiles('No files in repository')
1✔
817

818
    if options.full or not os.path.exists(options.output):
1✔
819
        do_full_recover(options, repofiles)
1✔
820
    else:
821
        do_incremental_recover(options, repofiles)
1✔
822

823
    if options.output is not None:
1!
824
        last_base = os.path.splitext(repofiles[-1])[0]
1✔
825
        source_index = '%s.index' % last_base
1✔
826
        target_index = '%s.index' % options.output
1✔
827
        if os.path.exists(source_index):
1✔
828
            log('Restoring index file %s to %s',
1✔
829
                source_index, target_index)
830
            shutil.copyfile(source_index, target_index)
1✔
831
        else:
832
            log('No index file to restore: %s', source_index)
1✔
833

834

835
def do_verify(options):
1✔
836
    # Verify the sizes and checksums of all files mentioned in the .dat file
837
    repofiles = find_files(options)
1✔
838
    if not repofiles:
1✔
839
        raise NoFiles('No files in repository')
1✔
840
    datfile = os.path.splitext(repofiles[0])[0] + '.dat'
1✔
841
    with open(datfile) as fp:
1✔
842
        for line in fp:
1✔
843
            fn, startpos, endpos, sum = line.split()
1✔
844
            startpos = int(startpos)
1✔
845
            endpos = int(endpos)
1✔
846
            filename = os.path.join(options.repository,
1✔
847
                                    os.path.basename(fn))
848
            expected_size = endpos - startpos
1✔
849
            log("Verifying %s", filename)
1✔
850
            try:
1✔
851
                if filename.endswith('fsz'):
1✔
852
                    actual_sum, size = get_checksum_and_size_of_gzipped_file(
1✔
853
                        filename, options.quick)
854
                    when_uncompressed = ' (when uncompressed)'
1✔
855
                else:
856
                    actual_sum, size = get_checksum_and_size_of_file(
1✔
857
                        filename, options.quick)
858
                    when_uncompressed = ''
1✔
859
            except OSError:
1✔
860
                raise VerificationFail("%s is missing" % filename)
1✔
861
            if size != expected_size:
1✔
862
                raise VerificationFail(
1✔
863
                    "%s is %d bytes%s, should be %d bytes" % (
864
                        filename, size, when_uncompressed, expected_size))
865
            elif not options.quick:
1✔
866
                if actual_sum != sum:
1✔
867
                    raise VerificationFail(
1✔
868
                        "%s has checksum %s%s instead of %s" % (
869
                            filename, actual_sum, when_uncompressed, sum))
870

871

872
def get_checksum_and_size_of_gzipped_file(filename, quick):
1✔
873
    with _GzipCloser(filename, 'rb') as fp:
1✔
874
        if quick:
1✔
875
            return None, file_size(fp)
1✔
876
        else:
877
            return checksum_and_size(fp)
1✔
878

879

880
def get_checksum_and_size_of_file(filename, quick):
1✔
881
    with open(filename, 'rb') as fp:
1✔
882
        fp.seek(0, 2)
1✔
883
        actual_size = fp.tell()
1✔
884
        if quick:
1✔
885
            actual_sum = None
1✔
886
        else:
887
            fp.seek(0)
1✔
888
            actual_sum = checksum(fp, actual_size)
1✔
889
    return actual_sum, actual_size
1✔
890

891

892
def main(argv=None):
1✔
893
    if argv is None:
1!
894
        argv = sys.argv[1:]
×
895
    options = parseargs(argv)
1✔
896
    try:
1✔
897
        if options.mode == BACKUP:
1✔
898
            do_backup(options)
1✔
899
        elif options.mode == RECOVER:
1!
900
            do_recover(options)
1✔
901
        else:
902
            assert options.mode == VERIFY
×
903
            do_verify(options)
×
904
    except (RepozoError, OSError) as e:
×
905
        sys.exit(str(e))
×
906

907

908
if __name__ == '__main__':
909
    main()
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2025 Coveralls, Inc