#!/usr/bin/python3 """Rearrange a series of lines on stdin into a random order.""" import os import sys import errno import random import collections sys.path.append("/usr/local/lib") def usage(retval): """Give a usage message.""" if retval == 0: write = sys.stdout.write else: write = sys.stderr.write write("Usage: %s [-0] [-h|--help]\n" % sys.argv[0]) write("\n") write("Read n lines of data, and output them in a random order.\n") write("\n") write("-0 gives null termination instead of newline termination\n") write("--preserve-directories says to rearrange directories, and the files within them,\n") write(" but keep things in the same directory next to each other\n") write("--skip-size says the file size will be at the beginning of the line,\n") write(" separated from the filename by a blank\n") write("--seed s says seed the random number generator with s\n") sys.exit(retval) def newlines(): """Generate lines of text, minus their newline terminator.""" for line in sys.stdin: if line[-1:] == "\n": yield line[:-1] else: yield line def shuffle(seed, list_): """Rearrange elements of list_ into random order.""" randobj = random.Random() if seed is not None: randobj.seed(seed) temp_list = list_[:] num_elements = len(temp_list) for element_no in range(num_elements): random_element_no = int(randobj.random() * num_elements) temp_list[element_no], temp_list[random_element_no] = temp_list[random_element_no], temp_list[element_no] return temp_list def directory_shuffle(seed, list_, skip_size=False): """Rearrange elements of list_ into random order, but keep elements of a single directory adjacent.""" dict_ = collections.defaultdict(list) for element in list_: if skip_size: assert " " in element parts = element.partition(" ") assert parts[1] == " " assert len(parts) == 3 dirname = os.path.dirname(parts[2]) else: line = element dirname = os.path.dirname(element) dict_[dirname].append(element) dirnames = dict_.keys() # Keys come out of the dictionary in an arbitrary order, but it might be too consistent for some purposes - so we shuffle them shuffle(seed, dirnames) result = [] for dirname in dirnames: lines = dict_[dirname] shuffle(seed, lines) for line in lines: result.append(line) return result def get_input(generator, verbose, every_n): """Get the lines of input we need to rearrange.""" list_ = [] for element_no, line in enumerate(generator()): if verbose and element_no % every_n == 0 and element_no != 0: sys.stderr.write("read %d lines\n" % element_no) list_.append(line) return list_ def main(): """Start randomizing.""" use_readline0 = False verbose = False every_n = 1000 preserve_directories = False skip_size = False warnings = True seed = None while sys.argv[1:]: if sys.argv[1] == "-0": use_readline0 = True elif sys.argv[1] == "--preserve-directories": preserve_directories = True elif sys.argv[1] == "--skip-size": skip_size = True elif sys.argv[1] == "-v": verbose = True elif sys.argv[1] == "--no-warnings": warnings = False elif sys.argv[1] == "--seed": seed = int(sys.argv[2]) del sys.argv[1] elif sys.argv[1] in ("-h", "--help"): usage(0) else: sys.stderr.write("%s: Illegal option: %s\n" % (sys.argv[0], sys.argv[1])) usage(1) del sys.argv[1] if use_readline0: import readline0 if use_readline0: generator = readline0.readline0 terminator = b"\0" else: generator = newlines terminator = "\n" list_ = get_input(generator, verbose, every_n) if warnings: if preserve_directories and not skip_size: all_have_blank = True for element in list_: if " " in element: continue else: all_have_blank = False break if all_have_blank: sys.stderr.write("Warning: All lines have a blank in them, but --skip-size not given.\n") sys.stderr.write("Use --no-warnings to suppress this message\n") if verbose: sys.stderr.write("Read a total of %d lines, about to start shuffling\n" % len(list_)) if preserve_directories: shuffled_list = directory_shuffle(seed, list_, skip_size) else: shuffled_list = shuffle(seed, list_) if verbose: sys.stderr.write("Done shuffling\n") try: for element in shuffled_list: string = element + terminator if isinstance(string, bytes): os.write(1, string) else: sys.stdout.write(string) except OSError: _unused, extra, _unused = sys.exc_info() if extra.errno == errno.EPIPE: sys.exit(0) else: raise main()