# cython: profile=True

import sys
import exceptions

sys.path.insert(0, 'drs_buffer')

import cython_drs_buffer_mod as drs_buffer_mod

ctypedef unsigned long long u_int64_t
ctypedef unsigned char u_char_t

cdef extern from "rabinpoly.h":
	ctypedef struct c_Window "window":
		int size
		u_int64_t fingerprint
		int bufpos
		u_int64_t U
		u_char_t *buf
		u_int64_t slide8(u_char_t byte_to_add)
		void reset()
	c_Window *new_Window "new window"(u_int64_t polynomial, unsigned int window_size)
	void del_Window "delete"(c_Window *window)

cdef class Window:
	cdef c_Window *thisptr

	def __cinit__(self, unsigned int window_size=16):
		fingerprint_pt = 0xbfe6b8a5bf378d83
		self.thisptr = new_Window(fingerprint_pt, window_size)

	def __dealloc__(self):
		del_Window(self.thisptr)

	cpdef reset(self):
		self.thisptr.reset()

	cpdef slide8(self, character):
		return self.thisptr.slide8(character)

cdef class Chunker(object):
	cdef Window _window
	cdef object _file
	cdef int _mask
	cdef int _size_exponent
	cdef int _average_size
	cdef int _maximum_size
	cdef object _read_buffer
	cdef int _demarcation
	cdef int _read_buffer_position
	cdef int _read_buffer_len

	def __init__(self, file_, int size_exponent=20):
		self._window = Window()
		self._file = file_
		self._read_buffer = drs_buffer_mod.DRS_buffer()
		self._read_buffer_position = 0
		self._read_buffer_len = 0
		# we arbitrarily pick a magic fingerprint of half the mask, because we need some arbitrary value to use as a chunk separator, and
		# using 0 tends to make files that start with nulls have too many demarcations.
		self._size_exponent = size_exponent
		self._average_size = 2** self._size_exponent
		self._demarcation = self._average_size // 2
		self._mask = self._average_size - 1
		self._maximum_size = self._average_size * 2

	def __iter__(self):
		return self

	def _do_chunk(self):
		temp = self._read_buffer[:self._read_buffer_position]
		del self._read_buffer[:self._read_buffer_position]
		self._read_buffer_len = len(self._read_buffer)
		self._read_buffer_position = 0
		return True, temp

	def _get_fingerprint(self, int byte):
		return self._window.slide8(byte)	

	def _process_byte_prep(self):
		cdef int byte
		cdef long long fingerprint
		byte = self._read_buffer[self._read_buffer_position]
		fingerprint = self._get_fingerprint(byte)
		self._read_buffer_position += 1
		return fingerprint

	def _process_byte(self):
		fingerprint = self._process_byte_prep()
		if fingerprint & self._mask == self._demarcation or self._read_buffer_position == self._maximum_size:
			return self._do_chunk()
		return False, ''
		
	def _feed_read_buffer(self):
		block = self._file.read(2**20)
		if not block:
			raise exceptions.StopIteration
		self._read_buffer.extend(block)
		self._read_buffer_len = len(self._read_buffer)
		
	def __next__(self):
		while True:
			if self._read_buffer_position < self._read_buffer_len:
				done, buf = self._process_byte()
				if done:
					return buf
			elif self._read_buffer_position == self._read_buffer_len:
				self._feed_read_buffer()
			else:
				raise exceptions.AssertionError, "This should never happen"