Source code for backshift_file_mod

#!/usr/bin/env python

"""
An abstract representation of a file of various kinds, including directories.

Used as a bridge between stat-data and the repo, or the repo and tar output.
"""

# pylint: disable=simplifiable-if-statement

import os
import sys
import stat
import time

import backshift_os_mod
import constants_mod
import helpers
import metadata_mod


[docs]def make_used(var): """Persuade linters that var is 'used'.""" assert True or var
[docs]def perms_string(bits, weird_x=None): """Convert a 3 bit quantity to a more human-readable description - EG, 'rwx'.""" list_ = [] if bits & 4: list_.append('r') else: list_.append('-') if bits & 2: list_.append('w') else: list_.append('-') if bits & 1: if weird_x is not None: list_.append(weird_x) else: list_.append('x') else: list_.append('-') return ''.join(list_)
[docs]class Skipped(Exception): """An exception to raise when we are passed a file of an invalid (unknown) type, including unix domain sockets.""" pass
[docs]class Benign_skipped(Skipped): """An exception to raise when we we skip something unimportant, like a unix domain socket.""" pass
[docs]class Unknown_skipped(Skipped): """An exception to raise when we skip something that could conceivably be important - IOW, an unknown file type.""" pass
[docs]class Problematic_skipped(Skipped): """An exception to raise when we we skip something we know is important.""" pass
[docs]def weird_bit(mode, bit, character): """Deal with sticky, setgid and setuid bits.""" if mode & bit: weird_x = character else: weird_x = None return weird_x
[docs]def get_can_do_device_files(): """Return True iff it looks like the Python interpreter has the necessary support to backup device files.""" if not hasattr(os, 'major'): return False if not hasattr(os, 'minor'): return False stat_buf = os.stat('/') return hasattr(stat_buf, 'st_rdev')
[docs]class Backshift_file(object): # pylint: disable=R0903 # R0903: We don't need a lot of public methods """ An abstract representation of a file of various kinds, including directories. Used as a bridge between stat-data and the repo, or the repo and tar output. """ max_digits = 1 kind_dict = {} kind_dict[constants_mod.Constants.b_block_device] = 'b' kind_dict[constants_mod.Constants.b_character_device] = 'c' kind_dict[constants_mod.Constants.b_directory] = 'd' kind_dict[constants_mod.Constants.b_fifo] = 'p' kind_dict[constants_mod.Constants.b_hardlink] = 'h' kind_dict[constants_mod.Constants.b_regular_file] = '-' kind_dict[constants_mod.Constants.b_symlink] = 'l' def __init__(self, repo, file_, filename, verbose=False): """Initialize our dictionary to something indicating that all attributes missing.""" self.dict_ = {} for key in metadata_mod.File_attributes.dict_: self.dict_[key] = None self.repo = repo self.filename = filename self.type_ = None # now fill in the attributes we actually have if hasattr(os, 'stat_result') and isinstance(file_, os.stat_result): # We're receiving a file from stat - probably during a backup speed = self.init_from_lstat_result(lstat_result=file_) self.repo.speeds.add_speed(speed) if verbose: os.write(sys.stdout.fileno(), helpers.string_to_binary('%s ' % speed)) else: # otherwise we assume this is a file(-like object), and fill in from that self.init_from_string(file_)
[docs] def init_from_lstat_result(self, lstat_result, can_do_device_files=get_can_do_device_files()): """Save a file in the repository.""" if stat.S_ISREG(lstat_result.st_mode): self.type_ = constants_mod.Constants.b_regular_file elif stat.S_ISLNK(lstat_result.st_mode): self.type_ = constants_mod.Constants.b_symlink elif stat.S_ISDIR(lstat_result.st_mode): self.type_ = constants_mod.Constants.b_directory elif stat.S_ISCHR(lstat_result.st_mode): if can_do_device_files: self.type_ = constants_mod.Constants.b_character_device else: raise Problematic_skipped('%s: skipping character device %s because this python does not support it' % (sys.argv[0], self.filename)) elif stat.S_ISBLK(lstat_result.st_mode): if can_do_device_files: self.type_ = constants_mod.Constants.b_block_device else: tuple_ = (sys.argv[0], self.filename) raise Problematic_skipped('%s: skipping block device %s because this python does not support it' % tuple_) elif stat.S_ISFIFO(lstat_result.st_mode): self.type_ = constants_mod.Constants.b_fifo elif stat.S_ISSOCK(lstat_result.st_mode): # we can safely ignore unix domain sockets - these are created by processes, and will be recreated by them as needed. raise Benign_skipped('%s: skipping unix domain socket %s' % (sys.argv[0], self.filename)) else: raise Unknown_skipped('%s: %s is of an unrecognized file type' % (sys.argv[0], self.filename)) speed = self.process_from_lstat(lstat_result) return speed
[docs] def process_from_lstat(self, lstat_result): """Deal with the metadata fields.""" speed = 'stat' if self.type_ in metadata_mod.File_types.dict_: for metadatum in metadata_mod.File_types.dict_[self.type_]: self.dict_[metadatum.field] = metadatum.get_from_stat(self.filename, lstat_result, metadatum.field) assert constants_mod.Constants.b_hash in self.dict_ if self.dict_.get(constants_mod.Constants.b_regular_file): speed = self.repo.save_chunks(lstat_result, self.filename, self.dict_) else: sys.stderr.write('%s: %s not in %s\n' % (sys.argv[0], self.type_, metadata_mod.File_types.dict_.keys())) return '%*s' % (constants_mod.Constants.file_type_width, speed)
[docs] def init_from_string(self, file_): """Construct (initialize, really) a Backshift file from the file like object we get from reading a file's metadata.""" for line in file_.split(constants_mod.Constants.b_newline): fields = line.split() if not fields: # this was a blank line - don't stress about it continue if not fields[1:]: raise AssertionError('%s: Too few fields in %s of %s' % (sys.argv[0], fields, self.filename)) if fields[2:]: raise AssertionError('%s: Too many fields in %s of %s' % (sys.argv[0], fields, self.filename)) key = fields[0] if key not in metadata_mod.File_attributes.dict_: raise AssertionError('%s: Invalid key %s of %s' % (sys.argv[0], key, self.filename)) if key == constants_mod.Constants.b_hash: # hashes can legitimately be repeated cooked_value = metadata_mod.File_attributes.dict_[constants_mod.Constants.b_hash].get_from_fields(fields) if key in self.dict_ and self.dict_[key] is not None: self.dict_[key].append(cooked_value) else: self.dict_[key] = [cooked_value] else: # all others most be unique for a given dictionaryn key if key in self.dict_ and self.dict_[key] is not None: raise AssertionError('%s: Field %s occurs more than once in %s' % (sys.argv[0], key, self.filename)) else: self.dict_[key] = metadata_mod.File_attributes.dict_[key].get_from_fields(fields) self.init_string_set_type()
[docs] def init_string_set_type(self): """Just set the type - for when constructing from init_from_string.""" for what_type in ['regular_file', 'symlink', 'directory', 'character_device', 'block_device', 'fifo']: binary_what_type = helpers.string_to_binary(what_type) if binary_what_type in self.dict_ and self.dict_[binary_what_type]: self.type_ = binary_what_type break else: sys.stderr.write('%s: %s is of an unrecognized file type\n' % (sys.argv[0], self.filename)) return
def __len__(self): """Return length.""" return len(self.dict_) # we intentionally aren't providing __setitem__ - there's no need def __getitem__(self, key): """Look up key in dict.""" return self.dict_[key] def __contains__(self, key): """Return True if key in dict.""" return key in self.dict_[key]
[docs] def as_string(self): """Format this file's metadata as a string for storage in some sort of database.""" keys = [helpers.string_to_binary(key) for key in self.dict_] keys.sort() list_ = [] blank = constants_mod.Constants.b_blank minus = constants_mod.Constants.b_minus for key in keys: if self.dict_[key] is not None: if key == constants_mod.Constants.b_hash: # The hashes are in a list. The others are all just scalars. for hash_ in self.dict_[key]: if isinstance(hash_, (bytes, str)): string = key + blank + helpers.string_to_binary(hash_.rstrip().replace(' ', '-')) elif isinstance(hash_, tuple): string = \ key + \ blank + \ helpers.string_to_binary(hash_[0]) + \ minus + \ helpers.string_to_binary(str(hash_[1])) else: raise ValueError('hash_ is not a bytes_type or tuple: %s' % type(hash_)) list_.append(string) else: value = self.dict_[key] if isinstance(value, bytes): binary = value else: binary = helpers.string_to_binary(str(value)) string = key + blank + binary list_.append(string) return constants_mod.Constants.b_newline.join(list_)
[docs] def get_username(self): """If the user exists, return the username - otherwise return uid.""" try: pwent = backshift_os_mod.my_getpwnam(helpers.binary_to_string(self.dict_[constants_mod.Constants.b_owner])) except KeyError: return '%d' % self.dict_[constants_mod.Constants.b_st_uid] else: make_used(pwent) return helpers.binary_to_string(self.dict_[constants_mod.Constants.b_owner])
[docs] def get_groupname(self): """If the group exists, return the group - otherwise return gid.""" try: grent = backshift_os_mod.my_getgrnam(helpers.binary_to_string(self.dict_[constants_mod.Constants.b_group])) except KeyError: return '%d' % self.dict_[constants_mod.Constants.b_st_gid] else: make_used(grent) return helpers.binary_to_string(self.dict_[constants_mod.Constants.b_group])
[docs] def get_real_length(self): """Compute the real length of the file (as opposed to what's in st_size, by adding up the hash keys' lengths.""" total = 0 binary_hash = constants_mod.Constants.b_hash if binary_hash in self.dict_ and self.dict_[binary_hash] is not None: for hash_entry in self.dict_[binary_hash]: if hash_entry[1:] and not hash_entry[2:]: current = int(hash_entry[1]) total += current else: raise AssertionError('Bad number of entries in hash_entry 2-tuple: %s' % hash_entry) return total
[docs] def to_tar_tf(self): """Generate a "tar tf"-like description of this file - That is, just list the filename.""" # ./tests/40-smoke/Makefile # ./tests/66-rcm-perf/ return helpers.binary_to_string(self.filename)
[docs] def to_tar_tvf(self, hardlink_data): """Generate a "tar tvf"-like description of this file.""" # -rw-r--r-- dstromberg/dstromberg 750 2 -02-08 14:28 ./tests/40-smoke/Makefile # drwxr-xr-x dstromberg/dstromberg 0 2 -04-03 10:37 ./tests/66-rcm-perf/ list_ = [] derived_type = self.type_ prior_filename = None if derived_type == constants_mod.Constants.b_regular_file: deviceno = self.dict_[constants_mod.Constants.b_st_dev] inodeno = self.dict_[constants_mod.Constants.b_st_ino] filename = self.filename prior_filename = hardlink_data.prior_file_for_hardlink(deviceno, inodeno, filename) if prior_filename: # this is a hardlink derived_type = helpers.string_to_binary('hardlink') if derived_type in Backshift_file.kind_dict: list_.append(Backshift_file.kind_dict[derived_type]) else: raise ValueError('Unrecognized filetype: %s' % derived_type) mode = self.dict_[constants_mod.Constants.b_st_mode] weird_x = weird_bit(mode, stat.S_ISUID, 's') list_.append(perms_string((mode & (7 * 64)) // (8 * 8), weird_x=weird_x)) weird_x = weird_bit(mode, stat.S_ISGID, 's') list_.append(perms_string((mode & (7 * 8)) // 8, weird_x=weird_x)) weird_x = weird_bit(mode, stat.S_ISVTX, 't') list_.append(perms_string(mode & 7, weird_x=weird_x)) list_.append(' ') list_.append('%s/%s' % (self.get_username(), self.get_groupname())) list_.append(' ') if derived_type == constants_mod.Constants.b_character_device: length = '%s,%s' % ( self.dict_[constants_mod.Constants.b_character_major], self.dict_[constants_mod.Constants.b_character_minor], ) elif derived_type == constants_mod.Constants.b_block_device: length = '%s,%s' % ( self.dict_[constants_mod.Constants.b_block_major], self.dict_[constants_mod.Constants.b_block_minor], ) elif derived_type in [constants_mod.Constants.b_symlink, constants_mod.Constants.b_hardlink]: length = '0' else: length = '%d' % self.get_real_length() num_digits = len(length) if num_digits > Backshift_file.max_digits: Backshift_file.max_digits = num_digits format_string = '%%%ds' % Backshift_file.max_digits list_.append(format_string % length) list_.append(' ') list_.append(time.strftime('%Y-%m-%d %H:%M', time.localtime(self.dict_[constants_mod.Constants.b_st_mtime]))) list_.append(' ') list_.append(helpers.binary_to_string(self.filename)) self.tail_special(list_, derived_type, prior_filename) stuff = ''.join(list_) # GNU tar doubles backslashes, so we do also return stuff.replace('\\', '\\\\')
[docs] def tail_special(self, list_, derived_type, prior_filename): """Deal with some special cases near the end of a tvf line.""" if derived_type == constants_mod.Constants.b_directory: list_.append('/') if derived_type == constants_mod.Constants.b_symlink: list_.append(' -> ') target = self.dict_[constants_mod.Constants.b_link_target] bts_target = helpers.binary_to_string(target) list_.append(bts_target) if derived_type == constants_mod.Constants.b_hardlink: list_.append(' link to ') assert prior_filename is not None target = prior_filename bts_target = helpers.binary_to_string(target) list_.append(bts_target)