#!/usr/bin/env python '''Rearrange a series of lines on stdin into a random order''' import os import sys import random import collections import python2x3 def usage(retval): '''Give a usage message''' sys.stderr.write('Usage: %s [-0] [-h|--help]\n' % sys.argv[0]) sys.stderr.write('-0 gives null termination instead of newline termination\n') sys.stderr.write('--preserve-directories says to rearrange directories,and the files within them,\n') sys.stderr.write(' but keep things in the same directory next to each other\n') sys.stderr.write('--skip-size says the file size was be at the beginning of the line, separated from the filename by a blank\n') sys.exit(retval) def newlines(): '''Generate lines of text, minus their newline terminator''' for line in sys.stdin: if line[-1:] == '\n': yield line[:-1] else: yield line def my_range(maximum): '''Generate the values from 0..maximum-1, for consistent semantics between python 2.x and python 3.x''' value = 0 while value < maximum: yield value value += 1 def shuffle(list_): '''rearrange elements of list_ into random order''' randobj = random.Random() temp_list = list_[:] num_elements = len(temp_list) for element_no in my_range(num_elements): random_element_no = int(randobj.random() * num_elements) temp_list[element_no], temp_list[random_element_no] = temp_list[random_element_no], temp_list[element_no] return temp_list def directory_shuffle(list_, skip_size=False): '''rearrange elements of list_ into random order, but keep elements of a single directory adjacent''' randobj = random.Random() dict_ = collections.defaultdict(list) for element in list_: if skip_size: assert ' ' in element parts = element.partition(' ') assert parts[1] == ' ' assert len(parts) == 3 dirname = os.path.dirname(parts[2]) basename = os.path.basename(parts[2]) else: line = element dirname = os.path.dirname(element) basename = os.path.basename(element) dict_[dirname].append(element) dirnames = dict_.keys() # Keys come out of the dictionary in an arbitrary order, but it might be too consistent for some purposes - so we shuffle them shuffle(dirnames) result = [] for dirname in dirnames: lines = dict_[dirname] shuffle(lines) for line in lines: result.append(line) return result def get_input(generator, verbose, every_n): '''Get the lines of input we need to rearrange''' list_ = [] for element_no, line in enumerate(generator()): if verbose and element_no % every_n == 0 and element_no != 0: sys.stderr.write('read %d lines\n' % element_no) list_.append(line) return list_ def main(): '''Main function''' use_readline0 = False verbose = False every_n = 1000 preserve_directories = False skip_size = False warnings = True while sys.argv[1:]: if sys.argv[1] == '-0': use_readline0 = True elif sys.argv[1] == '--preserve-directories': preserve_directories = True elif sys.argv[1] == '--skip-size': skip_size = True elif sys.argv[1] == '-v': verbose = True elif sys.argv[1] == '--no-warnings': warnings = False elif sys.argv[1] in [ '-h', '--help' ]: usage(0) else: sys.stderr.write('%s: Illegal option: %s\n' % (sys.argv[0], sys.argv[1])) usage(1) del sys.argv[1] if use_readline0: import readline0 if use_readline0: generator = readline0.readline0 terminator = python2x3.string_to_binary('\0') else: #generator = sys.stdin.readline generator = newlines terminator = python2x3.string_to_binary('\n') list_ = get_input(generator, verbose, every_n) if warnings: if preserve_directories and not skip_size: all_have_blank = True for element in list_: if ' ' in element: continue else: all_have_blank = False break if all_have_blank: sys.stderr.write('Warning: All lines have a blank in them, but --skip-size not given. --no-warnings to suppress this message\n') if verbose: sys.stderr.write('Read a total of %d lines, about to start shuffling\n' % len(list_)) if preserve_directories: shuffled_list = directory_shuffle(list_, skip_size) else: shuffled_list = shuffle(list_) if verbose: sys.stderr.write('Done shuffling\n') for element in shuffled_list: os.write(1, python2x3.string_to_binary(element) + terminator) main()