• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

zopefoundation / ZODB / 11588847965

30 Oct 2024 07:35AM UTC coverage: 83.932% (+0.2%) from 83.766%
11588847965

Pull #403

github

web-flow
Merge branch 'master' into repozo-incremental-recover
Pull Request #403: Repozo incremental recover

2451 of 3556 branches covered (68.93%)

219 of 222 new or added lines in 2 files covered. (98.65%)

37 existing lines in 2 files now uncovered.

13482 of 16063 relevant lines covered (83.93%)

0.84 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

91.73
/src/ZODB/scripts/repozo.py
1
#!/usr/bin/env python
2

3
# repozo.py -- incremental and full backups of a Data.fs file.
4
#
5
# Originally written by Anthony Baxter
6
# Significantly modified by Barry Warsaw
7

8
"""repozo.py -- incremental and full backups of a Data.fs file and index.
9

10
Usage: %(program)s [options]
11
Where:
12

13
    Exactly one of -B, -R, or -V must be specified:
14

15
    -B / --backup
16
        Backup current ZODB file.
17

18
    -R / --recover
19
        Restore a ZODB file from a backup.
20

21
    -V / --verify
22
        Verify backup integrity.
23

24
    -v / --verbose
25
        Verbose mode.
26

27
    -h / --help
28
        Print this text and exit.
29

30
    -r dir
31
    --repository=dir
32
        Repository directory containing the backup files.  This argument
33
        is required.  The directory must already exist.  You should not
34
        edit the files in this directory, or add your own files to it.
35

36
Options for -B/--backup:
37
    -f file
38
    --file=file
39
        Source Data.fs file.  This argument is required.
40

41
    -F / --full
42
        Force a full backup.  By default, an incremental backup is made
43
        if possible (e.g., if a pack has occurred since the last
44
        incremental backup, a full backup is necessary).
45

46
    -Q / --quick
47
        Verify via md5 checksum only the last incremental written.  This
48
        significantly reduces the disk i/o at the (theoretical) cost of
49
        inconsistency.  This is a probabilistic way of determining whether
50
        a full backup is necessary.
51

52
    -z / --gzip
53
        Compress with gzip the backup files.  Uses the default zlib
54
        compression level.  By default, gzip compression is not used.
55

56
    -k / --kill-old-on-full
57
        If a full backup is created, remove any prior full or incremental
58
        backup files (and associated metadata files) from the repository
59
        directory.
60

61
Options for -R/--recover:
62
    -D str
63
    --date=str
64
        Recover state as of this date.  Specify UTC (not local) time.
65
            yyyy-mm-dd[-hh[-mm[-ss]]]
66
        By default, current time is used.
67

68
    -o filename
69
    --output=filename
70
        Write recovered ZODB to given file.  By default, the file is
71
        written to stdout.
72

73
        Note:  for the stdout case, the index file will **not** be restored
74
        automatically.
75

76
    -F / --full
77
        Force a full recover. By default, an incremental recover is made
78
        if possible, by only copying the latest backup delta to the recovered
79
        ZODB file. A full recover will always be done if a pack has occured
80
        since the last incremental backup.
81

82
    -w
83
    --with-verify
84
        Verify on the fly the backup files on recovering. This option runs
85
        the same checks as when repozo is run in -V/--verify mode, and
86
        allows to verify and recover a backup in one single step. If a sanity
87
        check fails, the partially recovered ZODB will be left in place.
88

89
Options for -V/--verify:
90
    -Q / --quick
91
        Verify file sizes only (skip md5 checksums).
92
"""
93

94
import errno
1✔
95
import getopt
1✔
96
import gzip
1✔
97
import os
1✔
98
import re
1✔
99
import shutil
1✔
100
import sys
1✔
101
import time
1✔
102
from hashlib import md5
1✔
103

104
from ZODB.FileStorage import FileStorage
1✔
105

106

107
program = sys.argv[0]
1✔
108

109
BACKUP = 1
1✔
110
RECOVER = 2
1✔
111
VERIFY = 3
1✔
112

113
COMMASPACE = ', '
1✔
114
READCHUNK = 16 * 1024
1✔
115
VERBOSE = False
1✔
116

117

118
class RepozoError(Exception):
1✔
119
    pass
1✔
120

121

122
class WouldOverwriteFiles(RepozoError):
1✔
123
    pass
1✔
124

125

126
class NoFiles(RepozoError):
1✔
127
    pass
1✔
128

129

130
class VerificationFail(RepozoError):
1✔
131
    pass
1✔
132

133

134
class _GzipCloser:
1✔
135

136
    def __init__(self, fqn, mode):
1✔
137
        self._opened = gzip.open(fqn, mode)
1✔
138

139
    def __enter__(self):
1✔
140
        return self._opened
1✔
141

142
    def __exit__(self, exc_type, exc_value, traceback):
1✔
143
        self._opened.close()
1✔
144

145

146
def usage(code, msg=''):
1✔
147
    outfp = sys.stderr
1✔
148
    if code == 0:
1✔
149
        outfp = sys.stdout
1✔
150

151
    print(__doc__ % globals(), file=outfp)
1✔
152
    if msg:
1✔
153
        print(msg, file=outfp)
1✔
154

155
    sys.exit(code)
1✔
156

157

158
def log(msg, *args):
1✔
159
    if VERBOSE:
1✔
160
        # Use stderr here so that -v flag works with -R and no -o
161
        print(msg % args, file=sys.stderr)
1✔
162

163

164
def error(msg, *args):
1✔
UNCOV
165
    print(msg % args, file=sys.stderr)
×
166

167

168
def parseargs(argv):
1✔
169
    global VERBOSE
170
    try:
1✔
171
        opts, args = getopt.getopt(argv, 'BRVvhr:f:FQzkD:o:w',
1✔
172
                                   ['backup',
173
                                    'recover',
174
                                    'verify',
175
                                    'verbose',
176
                                    'help',
177
                                    'repository=',
178
                                    'file=',
179
                                    'full',
180
                                    'quick',
181
                                    'gzip',
182
                                    'kill-old-on-full',
183
                                    'date=',
184
                                    'output=',
185
                                    'with-verify',
186
                                    ])
187
    except getopt.error as msg:
1✔
188
        usage(1, msg)
1✔
189

190
    class Options:
1✔
191
        mode = None         # BACKUP, RECOVER or VERIFY
1✔
192
        file = None         # name of input Data.fs file
1✔
193
        repository = None   # name of directory holding backups
1✔
194
        full = False        # True forces full backup or full recovery
1✔
195
        date = None         # -D argument, if any
1✔
196
        output = None       # where to write recovered data; None = stdout
1✔
197
        quick = False       # -Q flag state
1✔
198
        gzip = False        # -z flag state
1✔
199
        killold = False     # -k flag state
1✔
200
        withverify = False  # -w flag state
1✔
201

202
    options = Options()
1✔
203

204
    for opt, arg in opts:
1✔
205
        if opt in ('-h', '--help'):
1✔
206
            usage(0)
1✔
207
        elif opt in ('-v', '--verbose'):
1✔
208
            VERBOSE = True
1✔
209
        elif opt in ('-R', '--recover'):
1✔
210
            if options.mode is not None:
1✔
211
                usage(1, '-B, -R, and -V are mutually exclusive')
1✔
212
            options.mode = RECOVER
1✔
213
        elif opt in ('-B', '--backup'):
1✔
214
            if options.mode is not None:
1✔
215
                usage(1, '-B, -R, and -V are mutually exclusive')
1✔
216
            options.mode = BACKUP
1✔
217
        elif opt in ('-V', '--verify'):
1✔
218
            if options.mode is not None:
1✔
219
                usage(1, '-B, -R, and -V are mutually exclusive')
1✔
220
            options.mode = VERIFY
1✔
221
        elif opt in ('-Q', '--quick'):
1✔
222
            options.quick = True
1✔
223
        elif opt in ('-f', '--file'):
1✔
224
            options.file = arg
1✔
225
        elif opt in ('-r', '--repository'):
1✔
226
            options.repository = arg
1✔
227
        elif opt in ('-F', '--full'):
1✔
228
            options.full = True
1✔
229
        elif opt in ('-D', '--date'):
1✔
230
            options.date = arg
1✔
231
        elif opt in ('-o', '--output'):
1✔
232
            options.output = arg
1✔
233
        elif opt in ('-z', '--gzip'):
1✔
234
            options.gzip = True
1✔
235
        elif opt in ('-k', '--kill-old-on-full'):
1✔
236
            options.killold = True
1✔
237
        elif opt in ('-w', '--with-verify'):
1!
238
            options.withverify = True
1✔
239
        else:
UNCOV
240
            assert False, (opt, arg)
×
241

242
    # Any other arguments are invalid
243
    if args:
1✔
244
        usage(1, 'Invalid arguments: ' + COMMASPACE.join(args))
1✔
245

246
    # Sanity checks
247
    if options.mode is None:
1✔
248
        usage(1, 'Either --backup, --recover or --verify is required')
1✔
249
    if options.repository is None:
1✔
250
        usage(1, '--repository is required')
1✔
251
    if options.mode == BACKUP:
1✔
252
        if options.date is not None:
1✔
253
            log('--date option is ignored in backup mode')
1✔
254
            options.date = None
1✔
255
        if options.output is not None:
1✔
256
            log('--output option is ignored in backup mode')
1✔
257
            options.output = None
1✔
258
        if options.withverify:
1✔
259
            log('--with-verify option is ignored in backup mode')
1✔
260
            options.withverify = False
1✔
261
        if not options.file:
1✔
262
            usage(1, '--file is required in backup mode')
1✔
263
    elif options.mode == RECOVER:
1✔
264
        if options.file is not None:
1✔
265
            log('--file option is ignored in recover mode')
1✔
266
            options.file = None
1✔
267
        if options.killold:
1✔
268
            log('--kill-old-on-full option is ignored in recover mode')
1✔
269
            options.killold = False
1✔
270
    else:
271
        assert options.mode == VERIFY
1✔
272
        if options.date is not None:
1✔
273
            log("--date option is ignored in verify mode")
1✔
274
            options.date = None
1✔
275
        if options.output is not None:
1✔
276
            log('--output option is ignored in verify mode')
1✔
277
            options.output = None
1✔
278
        if options.full:
1✔
279
            log('--full option is ignored in verify mode')
1✔
280
            options.full = False
1✔
281
        if options.gzip:
1✔
282
            log('--gzip option is ignored in verify mode')
1✔
283
            options.gzip = False
1✔
284
        if options.file is not None:
1✔
285
            log('--file option is ignored in verify mode')
1✔
286
            options.file = None
1✔
287
        if options.killold:
1✔
288
            log('--kill-old-on-full option is ignored in verify mode')
1✔
289
            options.killold = False
1✔
290
        if options.withverify:
1✔
291
            log('--with-verify option is ignored in verify mode')
1✔
292
            options.withverify = False
1✔
293
    return options
1✔
294

295

296
# afile is a Python file object, or created by gzip.open().  The latter
297
# doesn't have a fileno() method, so to fsync it we need to reach into
298
# its underlying file object.
299
def fsync(afile):
1✔
300
    afile.flush()
1✔
301
    fileobject = getattr(afile, 'fileobj', afile)
1✔
302
    os.fsync(fileobject.fileno())
1✔
303

304
# Read bytes (no more than n, or to EOF if n is None) in chunks from the
305
# current position in file fp.  Pass each chunk as an argument to func().
306
# Return the total number of bytes read == the total number of bytes
307
# passed in all to func().  Leaves the file position just after the
308
# last byte read.
309

310

311
def dofile(func, fp, n=None):
1✔
312
    bytesread = 0
1✔
313
    while n is None or n > 0:
1✔
314
        if n is None:
1✔
315
            todo = READCHUNK
1✔
316
        else:
317
            todo = min(READCHUNK, n)
1✔
318
        data = fp.read(todo)
1✔
319
        if not data:
1✔
320
            break
1✔
321
        func(data)
1✔
322
        nread = len(data)
1✔
323
        bytesread += nread
1✔
324
        if n is not None:
1✔
325
            n -= nread
1✔
326
    return bytesread
1✔
327

328

329
def checksum(fp, n):
1✔
330
    # Checksum the first n bytes of the specified file
331
    sum = md5()
1✔
332

333
    def func(data):
1✔
334
        sum.update(data)
1✔
335
    dofile(func, fp, n)
1✔
336
    return sum.hexdigest()
1✔
337

338

339
def file_size(fp):
1✔
340
    # Compute number of bytes that can be read from fp
341
    def func(data):
1✔
342
        pass
1✔
343
    return dofile(func, fp, None)
1✔
344

345

346
def checksum_and_size(fp):
1✔
347
    # Checksum and return it with the size of the file
348
    sum = md5()
1✔
349

350
    def func(data):
1✔
351
        sum.update(data)
1✔
352
    size = dofile(func, fp, None)
1✔
353
    return sum.hexdigest(), size
1✔
354

355

356
def copyfile(options, dst, start, n):
1✔
357
    # Copy bytes from file src, to file dst, starting at offset start, for n
358
    # length of bytes.  For robustness, we first write, flush and fsync
359
    # to a temp file, then rename the temp file at the end.
360
    sum = md5()
1✔
361
    ifp = open(options.file, 'rb')
1✔
362
    ifp.seek(start)
1✔
363
    tempname = os.path.join(os.path.dirname(dst), 'tmp.tmp')
1✔
364
    if options.gzip:
1✔
365
        ofp = gzip.open(tempname, 'wb')
1✔
366
    else:
367
        ofp = open(tempname, 'wb')
1✔
368

369
    def func(data):
1✔
370
        sum.update(data)
1✔
371
        ofp.write(data)
1✔
372

373
    ndone = dofile(func, ifp, n)
1✔
374
    assert ndone == n
1✔
375

376
    ifp.close()
1✔
377
    fsync(ofp)
1✔
378
    ofp.close()
1✔
379
    os.rename(tempname, dst)
1✔
380
    return sum.hexdigest()
1✔
381

382

383
def concat(files, ofp=None):
1✔
384
    # Concatenate a bunch of files from the repository, output to 'ofp' if
385
    # given.  Return the number of bytes written and the md5 checksum of the
386
    # bytes.
387
    sum = md5()
1✔
388

389
    def func(data):
1✔
390
        sum.update(data)
1✔
391
        if ofp:
1✔
392
            ofp.write(data)
1✔
393
    bytesread = 0
1✔
394
    for f in files:
1✔
395
        # Auto uncompress
396
        if f.endswith('fsz'):
1✔
397
            ifp = gzip.open(f, 'rb')
1✔
398
        else:
399
            ifp = open(f, 'rb')
1✔
400
        bytesread += dofile(func, ifp)
1✔
401
        ifp.close()
1✔
402
    return bytesread, sum.hexdigest()
1✔
403

404

405
def recover_repofiles(options, repofiles, datfile, outfp):
1✔
406
    if options.withverify:
1✔
407
        with open(datfile) as fp:
1✔
408
            truth_dict = {}
1✔
409
            for line in fp:
1✔
410
                fn, startpos, endpos, sum = line.split()
1✔
411
                startpos = int(startpos)
1✔
412
                endpos = int(endpos)
1✔
413
                filename = os.path.join(options.repository,
1✔
414
                                        os.path.basename(fn))
415
                truth_dict[filename] = {
1✔
416
                    'size': endpos - startpos,
417
                    'sum': sum,
418
                }
419
        totalsz = 0
1✔
420
        for repofile in repofiles:
1✔
421
            reposz, reposum = concat([repofile], outfp)
1✔
422
            expected_truth = truth_dict[repofile]
1✔
423
            if reposz != expected_truth['size']:
1✔
424
                raise VerificationFail(
1✔
425
                    "%s is %d bytes, should be %d bytes" % (
426
                        repofile, reposz, expected_truth['size']))
427
            if reposum != expected_truth['sum']:
1✔
428
                raise VerificationFail(
1✔
429
                    "{} has checksum {} instead of {}".format(
430
                        repofile, reposum, expected_truth['sum']))
431
            totalsz += reposz
1✔
432
            log("Recovered chunk %s : %s bytes, md5: %s",
1✔
433
                repofile, reposz, reposum)
434
        log("Recovered a total of %s bytes", totalsz)
1✔
435
    else:
436
        reposz, reposum = concat(repofiles, outfp)
1✔
437
        log('Recovered %s bytes, md5: %s', reposz, reposum)
1✔
438

439

440
def gen_filedate(options):
1✔
441
    return getattr(options, 'test_now', time.gmtime()[:6])
1✔
442

443

444
def gen_filename(options, ext=None, now=None):
1✔
445
    if ext is None:
1✔
446
        if options.full:
1✔
447
            ext = '.fs'
1✔
448
        else:
449
            ext = '.deltafs'
1✔
450
        if options.gzip:
1✔
451
            ext += 'z'
1✔
452
    # Hook for testing
453
    if now is None:
1✔
454
        now = gen_filedate(options)
1✔
455
    t = now + (ext,)
1✔
456
    return '%04d-%02d-%02d-%02d-%02d-%02d%s' % t
1✔
457

458
# Return a list of files needed to reproduce state at time options.date.
459
# This is a list, in chronological order, of the .fs[z] and .deltafs[z]
460
# files, from the time of the most recent full backup preceding
461
# options.date, up to options.date.
462

463

464
is_data_file = re.compile(r'\d{4}(?:-\d\d){5}\.(?:delta)?fsz?$').match
1✔
465
del re
1✔
466

467

468
def find_files(options):
1✔
469
    when = options.date
1✔
470
    if not when:
1✔
471
        when = gen_filename(options, ext='')
1✔
472
    log('looking for files between last full backup and %s...', when)
1✔
473
    # newest file first
474
    all = sorted(
1✔
475
        filter(is_data_file, os.listdir(options.repository)), reverse=True)
476
    # Find the last full backup before date, then include all the
477
    # incrementals between that full backup and "when".
478
    needed = []
1✔
479
    for fname in all:
1✔
480
        root, ext = os.path.splitext(fname)
1✔
481
        if root <= when:
1✔
482
            needed.append(fname)
1✔
483
            if ext in ('.fs', '.fsz'):
1✔
484
                break
1✔
485
    # Make the file names relative to the repository directory
486
    needed = [os.path.join(options.repository, f) for f in needed]
1✔
487
    # Restore back to chronological order
488
    needed.reverse()
1✔
489
    if needed:
1✔
490
        log('files needed to recover state as of %s:', when)
1✔
491
        for f in needed:
1✔
492
            log('\t%s', f)
1✔
493
    else:
494
        log('no files found')
1✔
495
    return needed
1✔
496

497
# Scan the .dat file corresponding to the last full backup performed.
498
# Return
499
#
500
#     filename, startpos, endpos, checksum
501
#
502
# of the last incremental.  If there is no .dat file, or the .dat file
503
# is empty, return
504
#
505
#     None, None, None, None
506

507

508
def scandat(repofiles):
1✔
509
    fullfile = repofiles[0]
1✔
510
    datfile = os.path.splitext(fullfile)[0] + '.dat'
1✔
511
    fn = startpos = endpos = sum = None  # assume .dat file missing or empty
1✔
512
    try:
1✔
513
        fp = open(datfile)
1✔
514
    except OSError as e:
1✔
515
        if e.errno != errno.ENOENT:
1!
UNCOV
516
            raise
×
517
    else:
518
        # We only care about the last one.
519
        lines = fp.readlines()
1✔
520
        fp.close()
1✔
521
        if lines:
1✔
522
            fn, startpos, endpos, sum = lines[-1].split()
1✔
523
            startpos = int(startpos)
1✔
524
            endpos = int(endpos)
1✔
525

526
    return fn, startpos, endpos, sum
1✔
527

528

529
def delete_old_backups(options):
1✔
530
    # Delete all full backup files except for the most recent full backup file
531
    all = sorted(filter(is_data_file, os.listdir(options.repository)))
1✔
532

533
    deletable = []
1✔
534
    full = []
1✔
535
    for fname in all:
1✔
536
        root, ext = os.path.splitext(fname)
1✔
537
        if ext in ('.fs', '.fsz'):
1✔
538
            full.append(fname)
1✔
539
        if ext in ('.fs', '.fsz', '.deltafs', '.deltafsz'):
1!
540
            deletable.append(fname)
1✔
541

542
    # keep most recent full
543
    if not full:
1✔
544
        return
1✔
545

546
    recentfull = full.pop(-1)
1✔
547
    deletable.remove(recentfull)
1✔
548
    root, ext = os.path.splitext(recentfull)
1✔
549
    dat = root + '.dat'
1✔
550
    if dat in deletable:
1!
UNCOV
551
        deletable.remove(dat)
×
552
    index = root + '.index'
1✔
553
    if index in deletable:
1!
UNCOV
554
        deletable.remove(index)
×
555

556
    for fname in deletable:
1✔
557
        log('removing old backup file %s (and .dat / .index)', fname)
1✔
558
        root, ext = os.path.splitext(fname)
1✔
559
        try:
1✔
560
            os.unlink(os.path.join(options.repository, root + '.dat'))
1✔
561
        except OSError:
1✔
562
            pass
1✔
563
        try:
1✔
564
            os.unlink(os.path.join(options.repository, root + '.index'))
1✔
UNCOV
565
        except OSError:
×
UNCOV
566
            pass
×
567
        os.unlink(os.path.join(options.repository, fname))
1✔
568

569

570
def do_full_backup(options):
1✔
571
    options.full = True
1✔
572
    tnow = gen_filedate(options)
1✔
573
    dest = os.path.join(options.repository, gen_filename(options, now=tnow))
1✔
574
    if os.path.exists(dest):
1✔
575
        raise WouldOverwriteFiles('Cannot overwrite existing file: %s' % dest)
1✔
576
    # Find the file position of the last completed transaction.
577
    fs = FileStorage(options.file, read_only=True)
1✔
578
    # Note that the FileStorage ctor calls read_index() which scans the file
579
    # and returns "the position just after the last valid transaction record".
580
    # getSize() then returns this position, which is exactly what we want,
581
    # because we only want to copy stuff from the beginning of the file to the
582
    # last valid transaction record.
583
    pos = fs.getSize()
1✔
584
    # Save the storage index into the repository
585
    index_file = os.path.join(options.repository,
1✔
586
                              gen_filename(options, '.index', tnow))
587
    log('writing index')
1✔
588
    fs._index.save(pos, index_file)
1✔
589
    fs.close()
1✔
590
    log('writing full backup: %s bytes to %s', pos, dest)
1✔
591
    sum = copyfile(options, dest, 0, pos)
1✔
592
    # Write the data file for this full backup
593
    datfile = os.path.splitext(dest)[0] + '.dat'
1✔
594
    fp = open(datfile, 'w')
1✔
595
    print(dest, 0, pos, sum, file=fp)
1✔
596
    fp.flush()
1✔
597
    os.fsync(fp.fileno())
1✔
598
    fp.close()
1✔
599
    if options.killold:
1!
UNCOV
600
        delete_old_backups(options)
×
601

602

603
def do_incremental_backup(options, reposz, repofiles):
1✔
604
    options.full = False
1✔
605
    tnow = gen_filedate(options)
1✔
606
    dest = os.path.join(options.repository, gen_filename(options, now=tnow))
1✔
607
    if os.path.exists(dest):
1✔
608
        raise WouldOverwriteFiles('Cannot overwrite existing file: %s' % dest)
1✔
609
    # Find the file position of the last completed transaction.
610
    fs = FileStorage(options.file, read_only=True)
1✔
611
    # Note that the FileStorage ctor calls read_index() which scans the file
612
    # and returns "the position just after the last valid transaction record".
613
    # getSize() then returns this position, which is exactly what we want,
614
    # because we only want to copy stuff from the beginning of the file to the
615
    # last valid transaction record.
616
    pos = fs.getSize()
1✔
617
    log('writing index')
1✔
618
    index_file = os.path.join(options.repository,
1✔
619
                              gen_filename(options, '.index', tnow))
620
    fs._index.save(pos, index_file)
1✔
621
    fs.close()
1✔
622
    log('writing incremental: %s bytes to %s',  pos-reposz, dest)
1✔
623
    sum = copyfile(options, dest, reposz, pos - reposz)
1✔
624
    # The first file in repofiles points to the last full backup.  Use this to
625
    # get the .dat file and append the information for this incrementatl to
626
    # that file.
627
    fullfile = repofiles[0]
1✔
628
    datfile = os.path.splitext(fullfile)[0] + '.dat'
1✔
629
    # This .dat file better exist.  Let the exception percolate if not.
630
    fp = open(datfile, 'a')
1✔
631
    print(dest, reposz, pos, sum, file=fp)
1✔
632
    fp.flush()
1✔
633
    os.fsync(fp.fileno())
1✔
634
    fp.close()
1✔
635

636

637
def do_backup(options):
1✔
638
    repofiles = find_files(options)
1✔
639
    # See if we need to do a full backup
640
    if options.full or not repofiles:
1✔
641
        log('doing a full backup')
1✔
642
        do_full_backup(options)
1✔
643
        return
1✔
644
    srcsz = os.path.getsize(options.file)
1✔
645
    if options.quick:
1!
646
        fn, startpos, endpos, sum = scandat(repofiles)
1✔
647
        # If the .dat file was missing, or was empty, do a full backup
648
        if (fn, startpos, endpos, sum) == (None, None, None, None):
1!
UNCOV
649
            log('missing or empty .dat file (full backup)')
×
UNCOV
650
            do_full_backup(options)
×
UNCOV
651
            return
×
652
        # Has the file shrunk, possibly because of a pack?
653
        if srcsz < endpos:
1✔
654
            log('file shrunk, possibly because of a pack (full backup)')
1✔
655
            do_full_backup(options)
1✔
656
            return
1✔
657
        # Now check the md5 sum of the source file, from the last
658
        # incremental's start and stop positions.
659
        srcfp = open(options.file, 'rb')
1✔
660
        srcfp.seek(startpos)
1✔
661
        srcsum = checksum(srcfp, endpos-startpos)
1✔
662
        srcfp.close()
1✔
663
        log('last incremental file: %s', fn)
1✔
664
        log('last incremental checksum: %s', sum)
1✔
665
        log('source checksum range: [%s..%s], sum: %s',
1✔
666
            startpos, endpos, srcsum)
667
        if sum == srcsum:
1✔
668
            if srcsz == endpos:
1!
UNCOV
669
                log('No changes, nothing to do')
×
UNCOV
670
                return
×
671
            log('doing incremental, starting at: %s', endpos)
1✔
672
            do_incremental_backup(options, endpos, repofiles)
1✔
673
            return
1✔
674
    else:
675
        # This was is much slower, and more disk i/o intensive, but it's also
676
        # more accurate since it checks the actual existing files instead of
677
        # the information in the .dat file.
678
        #
679
        # See if we can do an incremental, based on the files that already
680
        # exist.  This call of concat() will not write an output file.
UNCOV
681
        reposz, reposum = concat(repofiles)
×
UNCOV
682
        log('repository state: %s bytes, md5: %s', reposz, reposum)
×
683
        # Get the md5 checksum of the source file, up to two file positions:
684
        # the entire size of the file, and up to the file position of the last
685
        # incremental backup.
UNCOV
686
        srcfp = open(options.file, 'rb')
×
UNCOV
687
        srcsum = checksum(srcfp, srcsz)
×
UNCOV
688
        srcfp.seek(0)
×
UNCOV
689
        srcsum_backedup = checksum(srcfp, reposz)
×
UNCOV
690
        srcfp.close()
×
691
        log('current state   : %s bytes, md5: %s', srcsz, srcsum)
×
692
        log('backed up state : %s bytes, md5: %s', reposz, srcsum_backedup)
×
693
        # Has nothing changed?
UNCOV
694
        if srcsz == reposz and srcsum == reposum:
×
UNCOV
695
            log('No changes, nothing to do')
×
696
            return
×
697
        # Has the file shrunk, probably because of a pack?
698
        if srcsz < reposz:
×
699
            log('file shrunk, possibly because of a pack (full backup)')
×
700
            do_full_backup(options)
×
701
            return
×
702
        # The source file is larger than the repository.  If the md5 checksums
703
        # match, then we know we can do an incremental backup.  If they don't,
704
        # then perhaps the file was packed at some point (or a
705
        # non-transactional undo was performed, but this is deprecated).  Only
706
        # do a full backup if forced to.
UNCOV
707
        if reposum == srcsum_backedup:
×
708
            log('doing incremental, starting at: %s', reposz)
×
709
            do_incremental_backup(options, reposz, repofiles)
×
710
            return
×
711
    # The checksums don't match, meaning the front of the source file has
712
    # changed.  We'll need to do a full backup in that case.
713
    log('file changed, possibly because of a pack (full backup)')
1✔
714
    do_full_backup(options)
1✔
715

716

717
def do_full_recover(options, repofiles):
1✔
718
    files_to_close = ()
1✔
719
    if options.output is None:
1!
720
        log('Recovering file to stdout')
×
UNCOV
721
        outfp = sys.stdout
×
722
    else:
723
        # Delete old ZODB before recovering backup as size of
724
        # old ZODB + full partial file may be superior to free disk space
725
        if os.path.exists(options.output):
1✔
726
            log('Deleting old %s', options.output)
1✔
727
            os.unlink(options.output)
1✔
728
        log('Recovering file to %s', options.output)
1✔
729
        temporary_output_file = options.output + '.part'
1✔
730
        outfp = open(temporary_output_file, 'wb')
1✔
731
        files_to_close += (outfp,)
1✔
732

733
    try:
1✔
734
        datfile = os.path.splitext(repofiles[0])[0] + '.dat'
1✔
735
        recover_repofiles(options, repofiles, datfile, outfp)
1✔
736
    finally:
737
        for f in files_to_close:
1✔
738
            f.close()
1✔
739

740
    if options.output is not None:
1!
741
        try:
1✔
742
            os.rename(temporary_output_file, options.output)
1✔
UNCOV
743
        except OSError:
×
744
            log("ZODB has been fully recovered as %s, but it cannot be renamed"
×
745
                " into : %s", temporary_output_file, options.output)
UNCOV
746
            raise
×
747

748

749
def do_incremental_recover(options, repofiles):
1✔
750
    datfile = os.path.splitext(repofiles[0])[0] + '.dat'
1✔
751
    log('Recovering (incrementally) file to %s', options.output)
1✔
752
    with open(options.output, 'r+b') as outfp:
1✔
753
        outfp.seek(0, 2)
1✔
754
        initial_length = outfp.tell()
1✔
755
    with open(datfile) as fp:
1✔
756
        previous_chunk = None
1✔
757
        for line in fp:
1✔
758
            fn, startpos, endpos, _ = chunk = line.split()
1✔
759
            startpos = int(startpos)
1✔
760
            endpos = int(endpos)
1✔
761
            if endpos > initial_length:
1✔
762
                break
1✔
763
            previous_chunk = chunk
1✔
764

765
    if previous_chunk is None:
1✔
766
        log('Target file smaller than full backup, '
1✔
767
            'falling back to a full recover.')
768
        return do_full_recover(options, repofiles)
1✔
769
    if endpos < initial_length:
1✔
770
        log('Target file is larger than latest backup, '
1✔
771
            'falling back to a full recover.')
772
        return do_full_recover(options, repofiles)
1✔
773
    check_startpos = int(previous_chunk[1])
1✔
774
    check_endpos = int(previous_chunk[2])
1✔
775
    with open(options.output, 'r+b') as outfp:
1✔
776
        outfp.seek(check_startpos)
1✔
777
        check_sum = checksum(outfp, check_endpos - check_startpos)
1✔
778
    if endpos == initial_length and chunk[3] == check_sum:
1✔
779
        log('Target file is same size as latest backup, '
1✔
780
            'doing nothing.')
781
        return
1✔
782
    elif previous_chunk[3] != check_sum:
1✔
783
        if endpos == initial_length:
1!
NEW
784
            log('Target file is not consistent with latest backup, '
×
785
                'falling back to a full recover.')
NEW
786
            return do_full_recover(options, repofiles)
×
787
        else:
788
            log('Last whole common chunk checksum did not match with backup, '
1✔
789
                'falling back to a full recover.')
790
            return do_full_recover(options, repofiles)
1✔
791

792
    if startpos < initial_length:
1!
NEW
793
        log('Truncating target file %i bytes before its end',
×
794
            initial_length - startpos)
795
    filename = os.path.join(options.repository,
1✔
796
                            os.path.basename(fn))
797
    first_file_to_restore = repofiles.index(filename)
1✔
798
    assert first_file_to_restore > 0, (
1✔
799
        first_file_to_restore, options.repository, fn, filename, repofiles)
800

801
    temporary_output_file = options.output + '.part'
1✔
802
    os.rename(options.output, temporary_output_file)
1✔
803
    with open(temporary_output_file, 'r+b') as outfp:
1✔
804
        outfp.seek(startpos)
1✔
805
        recover_repofiles(options,
1✔
806
                          repofiles[first_file_to_restore:],
807
                          datfile,
808
                          outfp)
809
    os.rename(temporary_output_file, options.output)
1✔
810

811

812
def do_recover(options):
1✔
813
    # Find the first full backup at or before the specified date
814
    repofiles = find_files(options)
1✔
815
    if not repofiles:
1✔
816
        if options.date:
1✔
817
            raise NoFiles(f'No files in repository before {options.date}')
1✔
818
        else:
819
            raise NoFiles('No files in repository')
1✔
820

821
    if (options.full or options.output is None
1✔
822
            or not os.path.exists(options.output)):
823
        do_full_recover(options, repofiles)
1✔
824
    else:
825
        do_incremental_recover(options, repofiles)
1✔
826

827
    if options.output is not None:
1!
828
        last_base = os.path.splitext(repofiles[-1])[0]
1✔
829
        source_index = '%s.index' % last_base
1✔
830
        target_index = '%s.index' % options.output
1✔
831
        if os.path.exists(source_index):
1✔
832
            log('Restoring index file %s to %s',
1✔
833
                source_index, target_index)
834
            shutil.copyfile(source_index, target_index)
1✔
835
        else:
836
            log('No index file to restore: %s', source_index)
1✔
837

838

839
def do_verify(options):
1✔
840
    # Verify the sizes and checksums of all files mentioned in the .dat file
841
    repofiles = find_files(options)
1✔
842
    if not repofiles:
1✔
843
        raise NoFiles('No files in repository')
1✔
844
    datfile = os.path.splitext(repofiles[0])[0] + '.dat'
1✔
845
    with open(datfile) as fp:
1✔
846
        for line in fp:
1✔
847
            fn, startpos, endpos, sum = line.split()
1✔
848
            startpos = int(startpos)
1✔
849
            endpos = int(endpos)
1✔
850
            filename = os.path.join(options.repository,
1✔
851
                                    os.path.basename(fn))
852
            expected_size = endpos - startpos
1✔
853
            log("Verifying %s", filename)
1✔
854
            try:
1✔
855
                if filename.endswith('fsz'):
1✔
856
                    actual_sum, size = get_checksum_and_size_of_gzipped_file(
1✔
857
                        filename, options.quick)
858
                    when_uncompressed = ' (when uncompressed)'
1✔
859
                else:
860
                    actual_sum, size = get_checksum_and_size_of_file(
1✔
861
                        filename, options.quick)
862
                    when_uncompressed = ''
1✔
863
            except OSError:
1✔
864
                raise VerificationFail("%s is missing" % filename)
1✔
865
            if size != expected_size:
1✔
866
                raise VerificationFail(
1✔
867
                    "%s is %d bytes%s, should be %d bytes" % (
868
                        filename, size, when_uncompressed, expected_size))
869
            elif not options.quick:
1✔
870
                if actual_sum != sum:
1✔
871
                    raise VerificationFail(
1✔
872
                        "%s has checksum %s%s instead of %s" % (
873
                            filename, actual_sum, when_uncompressed, sum))
874

875

876
def get_checksum_and_size_of_gzipped_file(filename, quick):
1✔
877
    with _GzipCloser(filename, 'rb') as fp:
1✔
878
        if quick:
1✔
879
            return None, file_size(fp)
1✔
880
        else:
881
            return checksum_and_size(fp)
1✔
882

883

884
def get_checksum_and_size_of_file(filename, quick):
1✔
885
    with open(filename, 'rb') as fp:
1✔
886
        fp.seek(0, 2)
1✔
887
        actual_size = fp.tell()
1✔
888
        if quick:
1✔
889
            actual_sum = None
1✔
890
        else:
891
            fp.seek(0)
1✔
892
            actual_sum = checksum(fp, actual_size)
1✔
893
    return actual_sum, actual_size
1✔
894

895

896
def main(argv=None):
1✔
897
    if argv is None:
1!
UNCOV
898
        argv = sys.argv[1:]
×
899
    options = parseargs(argv)
1✔
900
    try:
1✔
901
        if options.mode == BACKUP:
1✔
902
            do_backup(options)
1✔
903
        elif options.mode == RECOVER:
1!
904
            do_recover(options)
1✔
905
        else:
UNCOV
906
            assert options.mode == VERIFY
×
UNCOV
907
            do_verify(options)
×
UNCOV
908
    except (RepozoError, OSError) as e:
×
UNCOV
909
        sys.exit(str(e))
×
910

911

912
if __name__ == '__main__':
913
    main()
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2025 Coveralls, Inc