Source code for cacher_mod
#!/usr/bin/env python
# pylint: disable=simplifiable-if-statement
# simplifiable-if-statement
'''Provides a Cacher class, for caching database tables from gdbm files'''
# mport sys
import time
# mport pprint
# mport traceback
import treap
import db_mod
# mport decimal
[docs]class Database_cacher_error(Exception):
# pylint: disable=W0232
# W0232: We don't need an __init__ method
'''A generic exception for cacher errors'''
pass
[docs]class Database_cacher_duplicate_error(Database_cacher_error):
# pylint: disable=W0232
# W0232: We don't need an __init__ method
'''An exception for when we have a duplicate filename'''
pass
[docs]class Database_cacher_time_error(Database_cacher_error):
# pylint: disable=W0232
# W0232: We don't need an __init__ method
'''An exception for when we have a timing problem'''
pass
[docs]class Database_cacher(object):
'''
A Database Cache class, for caching database tables from key-value stores.
The cache is LRU: We evict the Least Recently Used table when we need more room.
'''
# We store one entry in this for each directory backed up
def __init__(self, max_entries):
self.filename_to_database_dict = {}
self.time_to_filename_treap_list = treap.treap()
self.filename_to_time_dict = {}
self.max_entries = max_entries
self.touchno = 0
self.time_of_last_syncs = time.time()
def _touch(self, filename):
'''
Update (or create anew) the timestamps on this filename in the cache, so the filename
isn't evicted from the cache as soon
'''
# We update two collections:
# 1) time_to_filename_treap_list
# 2) filename_to_time_dict
# We needn't do anything with filename_to_database_dict
# We convert the time to decimal, because it isn't so subject to the whims of floating point comparison, and we rely
# on comparisons being precise. We use a strictly increasing "touchno" to deal with the possibility of the clock being
# too granular to disambiguate fully.
# current_time = (decimal.Decimal(str(time.time())), self.touchno)
current_time = self.touchno
self.touchno += 1
# 1) Update filename_to_filename_treap_list
# 1b) Remove any traces of previous entries, if they exist
if filename in self.filename_to_time_dict:
prior_time = self.filename_to_time_dict[filename]
if prior_time in self.time_to_filename_treap_list:
# first, remove the filename from its previous list, deleting the old list if it has become empty
filenames_at_prior_time = self.time_to_filename_treap_list[prior_time]
if filename in filenames_at_prior_time:
filenames_at_prior_time.remove(filename)
if not filenames_at_prior_time:
del self.time_to_filename_treap_list[prior_time]
else:
raise Database_cacher_time_error("a")
else:
raise Database_cacher_time_error("b")
# 1) Update filename_to_filename_treap_list (continued)
# 1c) Add new entries
if current_time in self.time_to_filename_treap_list:
self.time_to_filename_treap_list[current_time].append(filename)
else:
# BTW, this will mostly be lists of a single element. We want to allow duplicate times in case things
# are moving along rapidly, but we don't want to require that all times be distinct.
self.time_to_filename_treap_list[current_time] = [filename]
# 2) Update filename_to_time_dict
self.filename_to_time_dict[filename] = current_time
def __len__(self):
return len(self.filename_to_database_dict)
def __contains__(self, filename):
return filename in self.filename_to_database_dict
[docs] def keys(self):
'''Return a list of keys (filenames) in the cache'''
return self.filename_to_database_dict.keys()
def _possible_syncs(self):
'''Once every three hours: Iterate over the older part of the cache. Call the database's sync method'''
# one hour
interval = 60 * 60 * 3
current_time = time.time()
if current_time > self.time_of_last_syncs + interval:
self.time_of_last_syncs = current_time
threshold_time = current_time - interval
for database_touched_time in self.time_to_filename_treap_list.reverse_iterator():
# We only bother with things that haven't been "touched" (read or written) via a Database_cacher method for over
# an hour. BTW, we know that the cache entries will come up in the above for loop in reverse chronological order,
# orderbecause the treap is ordered and we're using a reverse iterator.
if database_touched_time < threshold_time:
for filename in self.time_to_filename_treap_list[database_touched_time]:
database = self.filename_to_database_dict[filename]
try:
# This will raise some random exception (depending on the database module in use) if the database
# has been closed - this module doesn't see the closes. So we catch and ignore any exception on
# the sync().
database.sync()
except db_mod.error:
pass
else:
# So from here, all other entries in the cache have been used less recently than this one, hence they
# can be skipped.
return
def __getitem__(self, filename):
if filename in self.filename_to_database_dict:
self._touch(filename)
self._possible_syncs()
return self.filename_to_database_dict[filename]
else:
raise KeyError("Filename %s not in cacher instance" % filename)
def __setitem__(self, filename, value):
if filename in self.filename_to_database_dict:
raise Database_cacher_duplicate_error
self.filename_to_database_dict[filename] = value
self._touch(filename)
self._possible_syncs()
self.expire_down_to()
[docs] def expire_down_to(self, down_to=None):
'''Expire entries from the cache until we have < down_to entries'''
if down_to is None:
down_to = self.max_entries
while len(self.filename_to_database_dict) > down_to:
# First we remove from time_to_filename_treap_list
least_time = self.time_to_filename_treap_list.find_min()
least_time_filename_list = self.time_to_filename_treap_list[least_time]
if least_time_filename_list[1:]:
# there's more than one filename at this time; remove one
least_filename = least_time_filename_list.pop()
# We had a reference to what's still inside the treap, so the treap has been updated
else:
least_filename = least_time_filename_list[0]
del self.time_to_filename_treap_list[least_time]
# next we remove from filename_to_time_dict
del self.filename_to_time_dict[least_filename]
# and finally we remove from filename_to_database_dict
self.filename_to_database_dict[least_filename].close()
del self.filename_to_database_dict[least_filename]
[docs] def expire_all(self):
'''Expire all entries from the cache'''
# There's an O(n) way of doing this, but for now, we do it in O(n*log(n)) time for accuracy and speed of coding
self.expire_down_to(down_to=0)
close = expire_all
# def __enter__(self):
# return self
#
# def __exit__(self, type_, value, traceback_):
# if value is None:
# self.expire_all()
# return True
# else:
# return False