'''Provides a very simple rolling checksum for file data'''

import os
import sys

ifdef(`pyx',cdef )class Tri_level_chunker:

	# pylint: disable=R0902,R0903
	# R0902: We want a few instance attributes
	# R0903: We don't need a lot of public methods

	'''Chunk up a file into blocks in a pretty repeatable way, even in the face of small changes to a file'''

	ifdef(`pyx',cdef object _rolling_checksum)
	ifdef(`pyx',cdef int _file_handle)
	ifdef(`pyx',cdef int _threshold)
	ifdef(`pyx',cdef int _consecutive_boundaries)
	ifdef(`pyx',cdef object _block)
	ifdef(`pyx',cdef int _at_eof)
	ifdef(`pyx',cdef int _done)

	def __init__(self, file_handle):
		'''Divide a file up into portions'''
		self._rolling_checksum = Rolling_checksum()
		self._file_handle = file_handle
		# this threshold should give us about 2**20 byte chunks on average
		self._threshold = self._rolling_checksum.get_modulus() * 0.01
		#sys.stdout.write('self._threshold is %d\n' % self._threshold)
		self._consecutive_boundaries = 0
		self._block = os.read(self._file_handle, 2**22)
		self._at_eof = False
		self._done = False

	def __iter__(self):
		return self

	def __next__(self):
		# pylint: disable=R0912
		# R0912: Too many branches: Tell me about it.  If we had a yield, this'd be much simpler, but cython 0.14 doesn't do that

		'''Yield (without using the yield keyword) the next block in the sequence'''

		for byteno, character in enumerate(self._block):
			if isinstance(character, str):
				byte = ord(character)
			else:
				byte = character
			self._rolling_checksum.add(byte)
			checksum = self._rolling_checksum.get_checksum() 
			#sys.stdout.write('checksum is %d\n' % checksum)
			if checksum < self._threshold:
				self._consecutive_boundaries += 1
				#sys.stdout.write('self._consecutive_boundaries is %d\n' % self._consecutive_boundaries)
				if self._consecutive_boundaries == 3:
					result = self._block[:byteno]
					if self._at_eof:
						replenishment = ''
					else:
						replenishment = os.read(self._file_handle, byteno)
					if not replenishment:
						self._at_eof = True
					if replenishment:
						self._block = self._block[byteno:] + replenishment
					else:
						self._block = self._block[byteno:]
					self._consecutive_boundaries = 0
					return result
				elif not (0 < self._consecutive_boundaries < 3):
					sys.stderr.write('%s: Internal error: not 0 < _consecutive_boundaries < 3\n' % sys.argv[0])
					sys.exit(1)
				else:
					continue
			else:
				self._consecutive_boundaries = 0

		if self._done:
			raise StopIteration

		# we're done processing chunks - return what we had, and raise StopIteration next time
		self._done = True
		return self._block

	next = __next__ 

ifdef(`pyx',cdef )class Rolling_checksum:
	# pylint: disable=R0903,R0902
	# R0903: We don't need a lot of public methods
	# R0902: We want lots of instance attributes

	ifdef(`pyx',cdef object _total)
	ifdef(`pyx',cdef int _width)
	ifdef(`pyx',cdef long _multiplicand)
	ifdef(`pyx',cdef long _modulus)
	ifdef(`pyx',cdef long _addend)
	ifdef(`pyx',cdef long _offset)
	ifdef(`pyx',cdef list _window)
	ifdef(`pyx',cdef long _magic_checksum)

	'''Compute a very simple, fast, rolling checksum'''
	#def __init__(self, width=2039, multiplicand=1021, modulus=32713):
	#def __init__(self, width = 3606721, multiplicand = 583621, modulus = 1048573, addend = 11515687):
	def __init__(self, width = 47, multiplicand = 5059, modulus = 389, addend = 83093):
		self._total = addend * width
		self._width = width
		self._multiplicand = multiplicand
		self._modulus = modulus
		self._addend = addend
		self._offset = 0
		self._window = [ addend ] * self._width
		#self._multiplicand = list(primes.primes(self._width))
		#self._magic_checksum = self._width * 64
		self._magic_checksum = 0

	ifdef(`pyx',cp)def add(self, byte):
		'''Add a byte into our rolling checksum function'''
		# Inspired by linear congruential random number generation.  It's not quite the same thing though.
		if isinstance(byte, str):
			byte = ord(byte)
		#self._window[self._offset] = byte * self._multiplicand[self._offset] + self._offset + 1
		adjusted = byte * self._multiplicand + self._addend
		self._window[self._offset] = adjusted
		#self._total += self._window[self._offset]
		self._total += adjusted
		new_offset = (self._offset + 1) % self._width
		self._offset = new_offset
		self._total -= self._window[new_offset]
		return self

	ifdef(`pyx',cp)def get_checksum(self):
		'''Return the current checksum'''
		return self._total % self._modulus
		
	ifdef(`pyx',cp)def demarcation(self):
		'''Is this the dividing line between two chunks?'''
		#print('self._total is %d, checksum is %d' % (self._total, self._total % self._modulus))
		return self.get_checksum() == self._magic_checksum

	ifdef(`pyx',cp)def get_modulus(self):
		'''Return the modulus'''
		return self._modulus