#!/usr/bin/python # We want profile data for this module, even when we're built with cython - during testing. For RL, we want to turn this off. # cython: profile=False '''This module provides a funnel sort routine - IOW, a k-ary (or exponential) merge sort with an insertion sort at the bottom''' #mport sys import math #mport time import heapq #mport pprint import exceptions #mport collections # In CPython, insertion sort seems to outperform binarysort almost always. # In Cython, insertion sort beats binarysort a little less often, but it still wins most of the time. # So we use insertion sort, not binary sort # However, binarysort is left here as a form of documentation about what I tried. ifdef(`m4purepy',`@profile') ifdef(`m4pyx',cp)def binarysort(list_, ifdef(`m4pyx',int )initial_left, ifdef(`m4pyx',int )initial_right): ifdef(`m4pyx', cdef int left) ifdef(`m4pyx', cdef int left_for_binary_search) ifdef(`m4pyx', cdef int right_for_binary_search) ''' Sort a (small) list using binary sort. It's much like insertion sort, except we use a binary search on the already-sorted portion to find where things should be placed ''' #print 'initial:', list_ sorted_left = initial_left sorted_right = initial_left + 1 #print 'foo' #sys.stdout.flush() while sorted_right < initial_right + 1: #print 'top of outer loop:', list_[sorted_left:sorted_right], list_[sorted_right+1:initial_right+1] #sys.stdout.flush() #time.sleep(0.25) left_for_binary_search = sorted_left right_for_binary_search = sorted_right pivot = list_[initial_right] #print 'put %s in its proper place' % pivot assert left_for_binary_search < right_for_binary_search while True: midpoint = (left_for_binary_search + right_for_binary_search) / 2 #print 'binsearch', 'left_for_binary_search:', left_for_binary_search, 'midpoint:', midpoint, 'right_for_binary_search:', right_for_binary_search if pivot < list_[midpoint]: #print 'moving right to midpoint' right_for_binary_search = midpoint else: #print 'moving left to midpoint' left_for_binary_search = midpoint + 1 if left_for_binary_search >= right_for_binary_search: final_point = left_for_binary_search break #print '%s belongs at index %d' % (pivot, right_for_binary_search) assert left_for_binary_search == right_for_binary_search #print 'before shifts', list_ for shift_index in xrange(initial_right - 1, final_point - 1, -1): #print 'shifting index', shift_index, 'to index', shift_index+1 list_[shift_index + 1] = list_[shift_index] #print 'after that shift:', list_ #print 'just before final placement', list_ #print 'putting %s at index %s' % (pivot, final_point) list_[final_point] = pivot sorted_right += 1 #print 'final:', list_ ifdef(`m4purepy',`@profile') ifdef(`m4pyx',cp)def insertionsort(list_, ifdef(`m4pyx',int )left, ifdef(`m4pyx',int) right): '''Perform an insertion sort on a subregion of a list''' ifdef(`m4pyx',cdef int i) ifdef(`m4pyx',cdef int j) for i in xrange(left+1, right+1): j = i temp = list_[j] #while j > left and list_[j-1] > temp: # list_[j] = list_[j-1] # j -= 1 while True: list_j_minus_1 = list_[j - 1] if j > left and list_j_minus_1 > temp: list_[j] = list_j_minus_1 j -= 1 else: break list_[j] = temp def funnelsort(list_): '''Perform a funnel sort on an entire list''' _funnelsort(list_, 0, `len'(list_) - 1) ifdef(`m4pyx',cdef )class Semi_queue: ''' Class provides a queue datastructure. Initially it doles out portions of a passed-in list, but later it duplicates the relevant subregion to facilitate in-place merging - after the size of the queue has already decreased, often significantly. ''' ifdef(`m4pyx',cdef object list) ifdef(`m4pyx',cdef public int subleft) ifdef(`m4pyx',cdef int subright) ifdef(`m4pyx',cdef int empty) # ifdef(`m4pyx',cdef int duplicated) ifdef(`m4pyx',cdef int has_some) def __init__(self, list_, ifdef(`m4pyx',int )subleft, ifdef(`m4pyx',int )subright): '''Constructor''' self.list = list_ #self.original_length = `len'(list_) self.subleft = subleft self.subright = subright if self.subleft > self.subright: self.has_some = False else: self.has_some = True # self.duplicated = False def __str__(self): return 'list: %s, has_some: %s' % (self.list[self.subleft:self.subright+1], self.has_some) __repr__ = __str__ ifdef(`m4pyx',cp)def popleft(self): '''Remove an element from the queue''' value = self.list[self.subleft] self.subleft += 1 if self.subleft > self.subright: self.has_some = False return value def __nonzero__(self): '''Is the queue nonempty?''' return self.has_some ifdef(`m4pyx',cp)def duplicate(self): '''Duplicate the still-relevant subregion, to facilitate mostly in-place merging''' # if self.duplicated: # print 'Eh? Already duplicated' # sys.exit(1) self.list = self.list[self.subleft:self.subright+1] self.subright -= self.subleft self.subleft = 0 # self.duplicated = True # This seems like a good idea, but in reality, it's very slow ifdef(`m4pyx',cp)def int_cube_root_ceil(input_value): '''Binary search for the cubed root of input_value, rounded up''' if input_value in (0, 1): return input_value # the cube root will always be >= 0, because we're only defined for nonnegative whole numbers low_guess = 0 # the cube root will always be less than input_value. Actually, we know it'll be significantly lower than that, but starting # any lower than input_value gives us rounding problems sometimes. high_guess = input_value # real_value = math.pow(input_value, 1/3.0) while True: # if low_guess > real_value: # print 'low_guess got too high:', low_guess, real_value # sys.exit(1) # if high_guess < real_value: # #print 'high_guess got too high:', high_guess, real_value # sys.exit(1) midpoint = (low_guess + high_guess) // 2 #print low_guess, midpoint, high_guess trial_value = midpoint * midpoint * midpoint if trial_value == input_value: # This is exact - return the midpoint #print 'exact:', midpoint return midpoint if low_guess + 1 == high_guess: # This one's not exact, and we know it's somewhere between low_guess and high_guess, and we know we aren't # going to get any more precision. And, we need to round up. Because the result isn't precise, we know we # need high_guess, not low_guess. #print 'inexact, round up:', high_guess return high_guess if trial_value > input_value: #print 'decreasing high_guess from', high_guess, 'to', midpoint, 'because %d > %f' % (trial_value, input_value) high_guess = midpoint elif trial_value < input_value: #print 'increasing low_guess from', low_guess, 'to', midpoint, 'because %d < %f' % (trial_value, input_value) low_guess = midpoint # It appears that Newton's method of finding a (cubed) root doesn't work for integer values. # However, we can still compute the real root of x**3 - input_value == 0 to a low # degree of precision, and then round up. # However, even this is still too slow. def newtons_cube_root(input_value): '''Compute a cubed root using newton's method''' #print 'starting', input_value float_input_value = float(input_value) trial_value = float_input_value / 3.0 epsilon = 0.499 one_minus_epsilon = 1 - epsilon while True: trial_value_squared = trial_value * trial_value trial_value_cubed = trial_value_squared * trial_value if trial_value_cubed == float_input_value: #print 'ending exact' return int(trial_value) if abs(trial_value_cubed - float_input_value) < epsilon: #print 'ending close enough' return int(trial_value + one_minus_epsilon) new_trial_value = trial_value - (trial_value_cubed - float_input_value) / (3 * trial_value_squared) #print 'got to', new_trial_value, 'from', trial_value trial_value = new_trial_value #ifdef(`m4purepy',`@profile') ifdef(`m4pyx',c)def divide_and_subsort( \ ifdef(m4pyx,int )width, \ list_, \ ifdef(`m4pyx',int )left, \ ifdef(`m4pyx',int )right, \ ): # pylint: disable-msg=R0913 # R0913: We want 6 arguments '''Give each queue an initial values''' ifdef(`m4pyx',cdef int subwidth) ifdef(`m4pyx',cdef int portions) ifdef(`m4pyx',cdef int portionno) ifdef(`m4pyx',cdef int subleft) ifdef(`m4pyx',cdef int subright) # This seems to be faster even than newton's method with a low precision. I wonder if it's getting done in hardware? # It's also faster than math.pow. subwidth = int(math.ceil(width ** 0.33333333333)) queues = [] # get k (portions) sorted sublists #portions = int(math.ceil(float(width) / subwidth)) portions = int((width + subwidth - 1) // subwidth) for portionno in xrange(portions): subleft = left + portionno * subwidth subright = subleft + subwidth - 1 if subright > right: subright = right if subleft > subright: # nothing to do continue _funnelsort(list_, subleft, subright) queue = Semi_queue(list_, subleft, subright) queues.append(queue) return queues #ifdef(`m4purepy',`@profile') ifdef(`m4pyx',c)def seed_heap(queues): '''Fill the heap initially with one value from each queue''' # BTW, I tried using a treap instead of a heapq - it's slower than using heapq heap = [] # pull 1 value from each queue to seed the heap (0 values for an already-empty queue) for queueno, queue in enumerate(queues): if queue: value = queue.popleft() heap.append((value, queueno)) heapq.heapify(heap) return heap #ifdef(`m4purepy',`@profile') def merge(list_, queues, heap, ifdef(`m4pyx',int )add_at): '''Merge the queues into list_''' ifdef(`m4pyx',cdef int duplicated_queues) duplicated_queues = 0 while heap: # get the minimum value minimum_value, queueno = heapq.heappop(heap) # expose space in list_ that we can fill into, as necessary while True: try: possible_duplicate_queue = queues[duplicated_queues] except exceptions.IndexError: # all queues have been duplicated - albeit odds are most were duplicated only after having shrunken quite a lot break else: if possible_duplicate_queue.subleft == add_at: possible_duplicate_queue.duplicate() duplicated_queues += 1 # and go back up for another iteration, in case we need to duplicate something else too else: # nothing to duplicate break list_[add_at] = minimum_value add_at += 1 possible_pull_from_queue = queues[queueno] if possible_pull_from_queue: value = possible_pull_from_queue.popleft() heapq.heappush(heap, (value, queueno)) #ifdef(`m4purepy',`@profile') ifdef(`m4pyx',c)def _funnelsort(list_, ifdef(`m4pyx',int )left, ifdef(`m4pyx',int )right): '''funnel sort a subregion of a list''' ifdef(`m4pyx',cdef int width) ifdef(`m4pyx',cdef int queueno) width = right - left + 1 # 72 seemed to perform the best on DRS' machine, and while that will probably be different on other machines, the performance # difference isn't extreme for other values, and optimizing this parameter dynamically is complex. if width <= 72: # we have a list of small width, it's faster to use insertion sort insertionsort(list_, left, right) else: # divide the list into sublists, recursively, and sort each list as we come back queues = divide_and_subsort(width, list_, left, right) # Set up a heap with one value from each queue heap = seed_heap(queues) # Now merge those regions, in place, using the heap to keep track of what the least element is each step of the way. # We use a heap rather than a straightforward list, because we will sometimes have a large number of lists to merge. merge(list_, queues, heap, left)