Source code for dohdbm

#!/usr/bin/env python

"""
Implements a dictionary-like database with a degree of persistence.

On open, we read the database into a dictionary.
We mutate the dictionary in memory.
Then on close, we write the dictionary back to disk.

Has the advantage of being in Pure Python, so it works on about any
Python interpreter one cares to consider.
"""

import os
import sys
import errno

import base255
import bufsock

REAL_OPEN = open
MAIN_OPEN = bufsock.rawio
NULL_BYTE = bytes([0])


[docs]def string_to_binary(string): """Convert a text string (or binary string type) to a binary string type.""" if isinstance(string, str): return string.encode('latin-1') return string
DOT_TEMP = string_to_binary('.temp')
[docs]def safe_rename(backup_filename, regular_filename): """Rename backup_filename to regular_filename.""" # This is copied verbatim from backshift # Deal with the fact that CIFS may EEXIST when renaming to a file that already exists. unlink_and_rename = False try: os.rename(backup_filename, regular_filename) except (OSError, IOError) as extra: if extra.errno == errno.EEXIST: unlink_and_rename = True else: sys.stderr.write('Got errno %s while trying to rename %s to %s\n' % (extra.errno, backup_filename, regular_filename)) raise if unlink_and_rename: os.unlink(regular_filename) os.rename(backup_filename, regular_filename)
[docs]def touch(filename): """Just update the timestamp on an existing file, or create a file that doesn't yet exist.""" REAL_OPEN(filename, 'a').close()
[docs]class error(Exception): """An exception to raise when we have problems.""" pass
[docs]def open(filename, flag='rb', mode=6 * 64 + 6 * 8 + 6, backend_open=MAIN_OPEN): """Open a dohdbm database.""" # pylint: disable=W0622 # W0622: We want to redefine open for consistency with other python nosql databases return Dohdbm(filename, flag, mode, backend_open)
def _generate_key_value_pairs(file_): """Read key-value pairs from disk.""" convenient_file = bufsock.bufsock(file_, chunk_len=2**23) to_next_null = convenient_file.readto(NULL_BYTE) minus_trailing_null = to_next_null.rstrip(NULL_BYTE) number_of_pairs = base255.base255_to_number(minus_trailing_null) # we iterate manually to avoid needing to define our own range generator pairno = 0 while pairno < number_of_pairs: key_length = base255.base255_to_number(convenient_file.readto(NULL_BYTE).rstrip(NULL_BYTE)) key = convenient_file.read(key_length) value_length = base255.base255_to_number(convenient_file.readto(NULL_BYTE).rstrip(NULL_BYTE)) value = convenient_file.read(value_length) yield (key, value) pairno += 1
[docs]class Dohdbm(object): # pylint: disable=R0902,R0912 # R0902: We want a bunch of instance attributes # R0912: We need some branches I guess :( - otherwise we end up setting instance attributes from something other than __init__ """A class implementing a simple database.""" def __init__(self, filename, flag='r', mode=6 * 64 + 6 * 8 + 5, backend_open=MAIN_OPEN): """Initialize.""" self.dirty = False self.filename = filename assert len(flag) in [1, 2] self.flag0 = flag[0] assert self.flag0 in ['r', 'w', 'c', 'n'] if flag[1:]: self.flag1 = flag[1] assert self.flag1 in ['f', 's', 'u', 'b'] else: self.flag1 = '' # actually, we always treat our keys and values as binary, irrespective of the "b" flag assert self.flag1 in ['', 'b'] self.mode = mode self.backend_open = backend_open should_read_file = True self.file_ = None if self.flag0 == 'r': self.file_ = self.backend_open(filename, 'rb') should_read_file = True elif self.flag0 == 'w': try: self.file_ = self.backend_open(filename, 'rb') except (OSError, IOError): should_read_file = False touch(filename) elif self.flag0 == 'c': try: self.file_ = self.backend_open(filename, 'rb') except (OSError, IOError): self._create_empty(filename, mode) self.file_ = self.backend_open(filename, 'rb') elif self.flag0 == 'n': should_read_file = False self._create_empty(filename, mode) self.file_ = self.backend_open(filename, 'rb', mode) else: raise AssertionError("Invalid flag: %s" % self.flag0) self.dict_ = {} if should_read_file: for key, value in _generate_key_value_pairs(self.file_): self.dict_[key] = value if self.file_ is not None: self.file_.close() self.is_open = True def _create_empty(self, filename, mode): """Create an empty "database".""" tempfile = self.backend_open(filename, 'wb', mode) tempfile.write(base255.number_to_base255(0) + NULL_BYTE) tempfile.close() def _error_if_not_open(self): """Raise an error if the database isn't currently open.""" if self.is_open: return else: raise error("Database is not open") def _error_if_not_readwrite(self): """Raise an error if the database isn't currently open.""" if self.flag0 in ['w', 'c', 'n']: return else: raise error("Database is not read/write") def __len__(self): """Return the number of items in the database.""" self._error_if_not_open() return len(self.dict_) def __getitem__(self, key): """Return the value in the database associated with key.""" self._ensure_bytes(key) self._error_if_not_open() return self.dict_[key] def __setitem__(self, key, value): """Associate value with key in the database.""" if self.flag0 == 'r': raise error('Attempt to modify a readonly database') self.dirty = True self._ensure_bytes(key) self._ensure_bytes(value) self._error_if_not_open() self._error_if_not_readwrite() self.dict_[key] = value def __delitem__(self, key): """Delete the key-value pair indexed by key in the database.""" if self.flag0 == 'r': raise error('Attempt to modify a readonly database') self.dirty = True self._ensure_bytes(key) self._error_if_not_open() self._error_if_not_readwrite() del self.dict_[key] def __iter__(self): """Generate all keys in the database.""" self._error_if_not_open() for key in self.dict_: yield key def __contains__(self, key): """Return True iff key is in the database.""" self._ensure_bytes(key) self._error_if_not_open() return key in self.dict_
[docs] def sync(self): """Flush the database to disk.""" if self.flag0 == 'r': # Nothing to do - this is a readonly database. return self._error_if_not_open() self._error_if_not_readwrite() temp_filename = string_to_binary(self.filename) + DOT_TEMP file_ = self.backend_open(temp_filename, 'wb', self.mode) file_.write(base255.number_to_base255(len(self.dict_)) + NULL_BYTE) for key in self.dict_: # there's a faster way, but it's not as portable value = self.dict_[key] file_.write(base255.number_to_base255(len(key)) + NULL_BYTE) file_.write(key) file_.write(base255.number_to_base255(len(value)) + NULL_BYTE) file_.write(value) file_.close() safe_rename(temp_filename, self.filename) self.dirty = False
[docs] def close(self): """Close a database: sync it to disk and mark it closed.""" # We're supposed to be idempotent, apparently if self.is_open: if self.flag0 == 'r': assert not self.dirty if self.dirty: self.sync() self.is_open = False
[docs] def keys(self): """Return the keys in the database.""" return self.dict_.keys()
[docs] def items(self): """Return the items, AKA the key-value pairs, in the database.""" return self.dict_.items()
@classmethod def _ensure_bytes(cls, string): """Make sure a key or value is of the bytes type.""" if isinstance(string, bytes): return else: raise AssertionError('dohdbm called with non-bytes key and/or value: {}'.format(type(string)))
# def __enter__(self, *dummy): # return self # # def __exit__(self, type_, value, traceback_): # if value is None: # self.close() # return True # else: # return False