#!/usr/bin/python3 '''Module provides a mergesort. Requires 2n memory''' import os import sys sys.path.insert(0, '/usr/local/lib') sys.path.insert(0, os.path.expanduser('~/lib')) import readline0 def insertionsort(list_, left, right): '''Perform an insertion sort on a subregion of a list''' for i in xrange(left+1, right+1): j = i temp = list_[j] #while j > left and list_[j-1] > temp: # list_[j] = list_[j-1] # j -= 1 while True: list_j_minus_1 = list_[j - 1] if j > left and list_j_minus_1 > temp: list_[j] = list_j_minus_1 j -= 1 else: break list_[j] = temp def mergesort(list_): '''Convenient wrapper for our worker merge sort routine''' length = len(list_) # Just give us a preallocated list of the correct length. We don't really care about the values. help_list = range(length) mergesort_c(list_, help_list, length) def mergesort_c(list_, help_list, size): '''merge sort algorithm''' source_list = list_ dest_list = help_list need_final_copy = False # use insertion sort to get a bunch of sublists sorted initially sublist_size = 128 for sublist_start in xrange(0, size, sublist_size): insertionsort(list_, sublist_start, min(sublist_start + sublist_size - 1, size - 1)) size_times_2 = size * 2 # now start merging from (initially) a sublist size of 256... while sublist_size < size_times_2: sublist_size *= 2 dest_index = 0 for sublist_start in xrange(0, size, sublist_size): sublist1_index = sublist_start sublist1_end = sublist_start + sublist_size // 2 if sublist1_end > size: sublist1_end = size sublist2_index = sublist1_end sublist2_end = sublist_start+sublist_size if(sublist2_end > size): sublist2_end = size # merge the two lists until we hit the end of one of them while sublist1_index < sublist1_end and sublist2_index < sublist2_end: if source_list[sublist1_index] < source_list[sublist2_index]: dest_list[dest_index] = source_list[sublist1_index] dest_index += 1 sublist1_index += 1 else: dest_list[dest_index] = source_list[sublist2_index] dest_index += 1 sublist2_index += 1 # merge list 1 - if it's not already empty while sublist1_index < sublist1_end: dest_list[dest_index] = source_list[sublist1_index] dest_index += 1 sublist1_index += 1 # merge list 2 - if it's not already empty while sublist2_index < sublist2_end: dest_list[dest_index] = source_list[sublist2_index] dest_index += 1 sublist2_index += 1 # swap the source list and destination list, so we can merge back into the other on the next iteration source_list, dest_list = dest_list, source_list # and invert the sense of our "need final copy" flag need_final_copy = not need_final_copy if need_final_copy: for source_and_dest_index in xrange(size): list_[source_and_dest_index] = help_list[source_and_dest_index] class File: def __init__(self, filename): self.filename = filename def __cmp__(self, other): # Python 2.x with open(self.filename, 'rb') as self_file, open(other.filename, 'rb') as other_file: while True: self_char = self_file.read(1024) other_char = other_file.read(1024) if self_char < other_char: comparison_result = -1 elif self_char > other_char: comparison_result = 1 else: comparison_result = 0 if comparison_result == 0: if not self_char and not other_char: # we're at EOF on both return 0 else: continue return comparison_result def __lt__(self, other): # Python 3.x cmp_value = self.__cmp__(other) if cmp_value < 0: return True else: return False def __str__(self): return str(self.filename) def is_prefix_of(self, other): '''Return true if self is a (not necessarily proper) prefix of other''' with open(self.filename, 'rb') as self_file, open(other.filename, 'rb') as other_file: while True: self_char = self_file.read(1) other_char = other_file.read(1) if self_char and other_char: if self_char == other_char: continue else: return False if not self_char and other_char: # self ended before other: proper prefix return True if not self_char and not other_char: # Actually, the files are identical return True def main(): list_ = [] for filename in readline0.readline0(): list_.append(File(filename)) # for element in list_: # print(element.filename) # print(element) print('read %d filenames' % len(list_)) print('sorting...') list_.sort() print('looking for prefixes...') len_list = len(list_) for this_index in range(len_list - 1): that_index = this_index + 1 this = list_[this_index] that = list_[that_index] if this.is_prefix_of(that): print("%s is a prefix of %s" % (this, that)) main()