#!/usr/bin/python # cython: language_level=3 # We want profile data for this module, even when we're built with cython - during testing. For RL, we want to turn this off. # cython: profile=False """This module provides a funnel sort routine - IOW, a k-ary (or exponential) merge sort with an insertion sort at the bottom.""" #mport sys import math #mport time import heapq #mport pprint #mport exceptions #mport collections # In CPython, insertion sort seems to outperform binarysort almost always. # In Cython, insertion sort beats binarysort a little less often, but it still wins most of the time. # So we use insertion sort, not binary sort # However, binarysort is left here as a form of documentation about what I tried. import insertionsort def funnelsort(ifdef(`m4pyx',list )list_): """Perform a funnel sort on an entire list.""" _funnelsort(list_, 0, `len'(list_) - 1) ifdef(`m4pyx',cdef )class Semi_queue(object): """ Class provides a queue datastructure. Initially it doles out portions of a passed-in list, but later it duplicates the relevant subregion to facilitate in-place merging - after the size of the queue has already decreased, often significantly. """ ifdef(`m4pyx',` cdef list list') ifdef(`m4pyx',` cdef public unsigned int subleft') # we use adjust to make the sort stable - to give a relative position within our first (and sometimes second) buffer(s) ifdef(`m4pyx',` cdef int adjust') ifdef(`m4pyx',` cdef unsigned int subright') ifdef(`m4pyx',` cdef unsigned int empty') ifdef(`m4pyx',` cdef unsigned int has_some') def __init__(self, ifdef(`m4pyx',list )list_, ifdef(`m4pyx',unsigned int )subleft, ifdef(`m4pyx',unsigned int )subright): """Constructor.""" self.list = list_ #self.original_length = `len'(list_) self.adjust = -subleft self.subleft = subleft self.subright = subright if self.subleft > self.subright: self.has_some = False else: self.has_some = True # self.duplicated = False def __str__(self): return 'list: %s, has_some: %s' % (self.list[self.subleft:self.subright+1], self.has_some) __repr__ = __str__ ifdef(`m4pyx',cp)def popleft(self): """Remove an element from the queue.""" value = self.list[self.subleft] self.subleft += 1 if self.subleft > self.subright: self.has_some = False return self.subleft + self.adjust, value def __nonzero__(self): """Is the queue nonempty?""" return self.has_some __bool__ = __nonzero__ ifdef(`m4pyx',cp)def duplicate(self): """Duplicate the still-relevant subregion, to facilitate mostly in-place merging.""" # if self.duplicated: # print 'Eh? Already duplicated' # sys.exit(1) self.list = self.list[self.subleft:self.subright+1] self.adjust = self.subleft + self.adjust self.subright -= self.subleft self.subleft = 0 # self.duplicated = True # This seems like a good idea, but in reality, it's very slow ifdef(`m4pyx',cp)def int_cube_root_ceil(input_value): """Binary search for the cubed root of input_value, rounded up.""" if input_value in (0, 1): return input_value # the cube root will always be >= 0, because we're only defined for nonnegative whole numbers low_guess = 0 # the cube root will always be less than input_value. Actually, we know it'll be significantly lower than that, but starting # any lower than input_value gives us rounding problems sometimes. high_guess = input_value while True: # if low_guess > real_value: # print 'low_guess got too high:', low_guess, real_value # sys.exit(1) # if high_guess < real_value: # #print 'high_guess got too high:', high_guess, real_value # sys.exit(1) midpoint = (low_guess + high_guess) // 2 #print low_guess, midpoint, high_guess trial_value = midpoint * midpoint * midpoint if trial_value == input_value: # This is exact - return the midpoint #print 'exact:', midpoint return midpoint if low_guess + 1 == high_guess: # This one's not exact, and we know it's somewhere between low_guess and high_guess, and we know we aren't # going to get any more precision. And, we need to round up. Because the result isn't precise, we know we # need high_guess, not low_guess. #print 'inexact, round up:', high_guess return high_guess if trial_value > input_value: #print 'decreasing high_guess from', high_guess, 'to', midpoint, 'because %d > %f' % (trial_value, input_value) high_guess = midpoint elif trial_value < input_value: #print 'increasing low_guess from', low_guess, 'to', midpoint, 'because %d < %f' % (trial_value, input_value) low_guess = midpoint # It appears that Newton's method of finding a (cubed) root doesn't work for integer values. # However, we can still compute the real root of x**3 - input_value == 0 to a low # degree of precision, and then round up. # However, even this is still too slow. def newtons_cube_root(input_value): """Compute a cubed root using newton's method.""" #print 'starting', input_value float_input_value = float(input_value) trial_value = float_input_value / 3.0 epsilon = 0.499 one_minus_epsilon = 1 - epsilon while True: trial_value_squared = trial_value * trial_value trial_value_cubed = trial_value_squared * trial_value if trial_value_cubed == float_input_value: #print 'ending exact' return int(trial_value) if abs(trial_value_cubed - float_input_value) < epsilon: #print 'ending close enough' return int(trial_value + one_minus_epsilon) new_trial_value = trial_value - (trial_value_cubed - float_input_value) / (3 * trial_value_squared) #print 'got to', new_trial_value, 'from', trial_value trial_value = new_trial_value #ifdef(`m4purepy',`@profile') ifdef(`m4pyx',c)def divide_and_subsort( \ ifdef(`m4pyx',unsigned int )width, \ ifdef(`m4pyx',list )list_, \ ifdef(`m4pyx',unsigned int )left, \ ifdef(`m4pyx',unsigned int )right, \ ): """Give each queue an initial values.""" ifdef(`m4pyx',` cdef unsigned int subwidth') ifdef(`m4pyx',` cdef unsigned int portions') ifdef(`m4pyx',` cdef unsigned int portionno') ifdef(`m4pyx',` cdef unsigned int subleft') ifdef(`m4pyx',` cdef unsigned int subright') # This seems to be faster even than newton's method with a low precision. I wonder if it's getting done in hardware? # It's also faster than math.pow. subwidth = int(math.ceil(width ** 0.33333333333)) queues = [] # get k (portions) sorted sublists portions = int((width + subwidth - 1) // subwidth) for portionno in range(portions): subleft = left + portionno * subwidth subright = subleft + subwidth - 1 if subright > right: subright = right if subleft > subright: # nothing to do continue _funnelsort(list_, subleft, subright) queue = Semi_queue(list_, subleft, subright) queues.append(queue) return queues #ifdef(`m4purepy',`@profile') ifdef(`m4pyx',c)def seed_heap(queues): """Fill the heap initially with one value from each queue.""" # BTW, I tried using a treap instead of a heapq - it's slower than using heapq heap = [] # pull 1 value from each queue to seed the heap (0 values for an already-empty queue) for queueno, queue in enumerate(queues): if queue: offset, value = queue.popleft() heap.append((value, queueno, offset)) heapq.heapify(heap) return heap #ifdef(`m4purepy',`@profile') def merge(ifdef(`m4pyx',list )list_, queues, ifdef(`m4pyx',list )heap, ifdef(`m4pyx',unsigned int )add_at): """Merge the queues into list_.""" ifdef(`m4pyx',` cdef unsigned int duplicated_queues') duplicated_queues = 0 while heap: # get the minimum value minimum_value, queueno, offset = heapq.heappop(heap) # expose space in list_ that we can fill into, as necessary while True: try: possible_duplicate_queue = queues[duplicated_queues] except IndexError: # all queues have been duplicated - albeit odds are most were duplicated only after having shrunken quite a lot break else: if possible_duplicate_queue.subleft == add_at: possible_duplicate_queue.duplicate() duplicated_queues += 1 # and go back up for another iteration, in case we need to duplicate something else too else: # nothing to duplicate break list_[add_at] = minimum_value add_at += 1 possible_pull_from_queue = queues[queueno] if possible_pull_from_queue: offset, value = possible_pull_from_queue.popleft() heapq.heappush(heap, (value, queueno, offset)) #ifdef(`m4purepy',`@profile') ifdef(`m4pyx',c)def _funnelsort(ifdef(`m4pyx',list )list_, ifdef(`m4pyx',unsigned int )left, ifdef(`m4pyx',unsigned int )right): """funnel sort a subregion of a list.""" ifdef(`m4pyx',` cdef unsigned int width') ifdef(`m4pyx',` cdef unsigned int queueno') width = right - left + 1 # 72 seemed to perform the best on DRS' machine, and while that will probably be different on other machines, the performance # difference isn't extreme for other values, and optimizing this parameter dynamically is complex. if width <= 72: # we have a list of small width, it's faster to use insertion sort insertionsort.insertionsort(list_, left, right) else: # divide the list into sublists, recursively, and sort each list as we come back queues = divide_and_subsort(width, list_, left, right) # Set up a heap with one value from each queue heap = seed_heap(queues) # Now merge those regions, in place, using the heap to keep track of what the least element is each step of the way. # We use a heap rather than a straightforward list, because we will sometimes have a large number of lists to merge. merge(list_, queues, heap, left)