'''Provides a very simple rolling checksum for file data'''

import os
import sys

#mport comma_mod

ifdef(`pyx',from libc.stdint cimport uint64_t)

def min_max_chunker(file_handle):
	'''Make sure chunk sizes are above and below a pair of thresholds'''
	
	empty = True
	minimum_length = 2**19 # 0.5 mebibytes
	maximum_length = 2**22 # 4.0 mebibytes

	for chunk in n_level_chunker(file_handle, levels=3):
		if empty:
			chunk_so_far = chunk
		else:
			chunk_so_far += chunk
		empty = False

		len_chunk_so_far = `len'(chunk_so_far)

		if len_chunk_so_far < minimum_length:
			# go back up for another piece
			continue
		
		if len_chunk_so_far < maximum_length:
			# good - we're in the sweet spot
			yield chunk_so_far
			empty = True
			continue

		if len_chunk_so_far >= maximum_length:
			# we have a long chunk - split it and go around for more
			yield chunk_so_far[:maximum_length]
			chunk_so_far = chunk_so_far[maximum_length:]
			continue

		raise AssertionError("Should never reach this point")

	if not empty and chunk_so_far:
		yield chunk_so_far

def n_level_chunker(file_handle, levels=1):

	# pylint: disable=R0912
	# R0912: I looked into reducing the branch count here, but it seemed to make things more complex, not less

	'''Divide a file up into portions when we get 3 in-range checksums in a row'''
	rolling_checksum = Rolling_checksum()

	# this threshold should give us about 2**20 byte chunks on average
	modulus = rolling_checksum.get_modulus() 
	threshold = modulus * 0.007

	consecutive_boundaries = 0

	preferred_block_len = 2**22

	block = os.read(file_handle, preferred_block_len)

	while True:
		if not block[preferred_block_len-1:]:
			block = block + os.read(file_handle, preferred_block_len - `len'(block))

		if not block:
			break

		for byteno, character in enumerate(block):
			# This if deals with python 2 returing single-character str's, and python 3 returning a integer between 0 and 255
			# Also, jython returns unicode
			if not isinstance(character, int):
				byte = ord(character)
			else:
				byte = character

			rolling_checksum.add(byte)

			checksum = rolling_checksum.get_checksum() 

			if checksum < threshold:
				consecutive_boundaries += 1
			else:
				consecutive_boundaries = 0
				continue

			if consecutive_boundaries == levels:
				consecutive_boundaries = 0
				result = block[:byteno]
				block = block[byteno:]

				if result:
					# result is not empty, so yield it
					yield result
				# we must break out of the loop to restart byteno at 0
				break
		else:
			# We made it all the way through the enumeration without yielding anything, so yield all we have and empty block
			if block:
				yield block
				# in other words, make block an empty string
				block = block[0:0]

	# we're done processing chunks - return what's left, if it's not empty
	if block:
		# result is not empty, so yield it
		sys.stderr.write('yielding block (3) of length %d\n' % `len'(result))
		yield block
		block = os.read(file_handle, 2**22)


# This one actually seems to be better as an iterator than a generator
ifdef(`pyx',cdef )class Rolling_checksum:
	ifdef(`pyx',cdef uint64_t _total)
	ifdef(`pyx',cdef uint64_t _width)
	ifdef(`pyx',cdef uint64_t _multiplicand)
	ifdef(`pyx',cdef uint64_t _modulus)
	ifdef(`pyx',cdef uint64_t _addend)
	ifdef(`pyx',cdef uint64_t _offset)
	ifdef(`pyx',cdef list _window)
	ifdef(`pyx',cdef uint64_t _magic_checksum)

	# pylint: disable=R0902
	# R0902: We need a few instance attributes; we're a nontrivial iterator - but I suspect this'll beat polynomials bigtime

	'''
	Compute a very simple, fast, rolling checksum.  We don't maintain a list of bytes or anything - we just produce checksums
	on demand.
	'''

	def __init__(self, width=607, multiplicand=7, modulus=3677, addend=66041):
		# pylint: disable=R0913
		# R0913: We need a few arguments
		self._total = addend * width
		self._width = width
		self._multiplicand = multiplicand
		self._modulus = modulus
		self._addend = addend
		self._offset = 0
		self._window = [ addend ] * self._width

	ifdef(`pyx',cp)def add(self, byte):
		'''Add a byte into our rolling checksum function'''
		# Inspired by linear congruential random number generation.  It's not quite the same thing though.
		if not isinstance(byte, int):
			byte = ord(byte)
		#self._window[self._offset] = byte * self._multiplicand[self._offset] + self._offset + 1
		adjusted = byte * self._multiplicand + self._addend
		self._window[self._offset] = adjusted
		#self._total += self._window[self._offset]
		self._total += adjusted
		new_offset = (self._offset + 1) % self._width
		self._offset = new_offset
		self._total -= self._window[new_offset]
		return self

	ifdef(`pyx',cp)def get_checksum(self):
		'''Return the current checksum'''
		return self._total % self._modulus
		
	ifdef(`pyx',cp)def get_modulus(self):
		'''Return the modulus'''
		return self._modulus