#!/usr/bin/python3 '''Read data ranges from stdin - slow, but effective with a compressed disk image''' # port os import sys # port string import functools def my_range(least, highest, step): value = least while value < highest: yield value value += step def uniq(input): input_as_set = set(input) output = list(input_as_set) output.sort() return output @functools.total_ordering class region_class: def __init__(self, offset, left, right): self.str_offset = offset self.int_offset = int(offset) self.left = self.int_offset - left self.right = self.int_offset + right self.file_open = 0 self.found_one = 0 self.bytes = [] def within(self, offset): if self.left <= offset and self.right >= offset: if not self.file_open: self.file = open("result-%s" % self.str_offset, 'w') self.file_open = 1 if verbose >= 2: if not self.found_one: self.found_one = 1 sys.stderr.write('First offset %d is between %d and %d\n' % (offset, self.left, self.right)) if verbose >= 3: sys.stderr.write('%d is between %d and %d\n' % (offset, self.left, self.right)) return 1 else: if verbose >= 4: sys.stderr.write('%d is not between %d and %d\n' % (offset, self.left, self.right)) if self.file_open: self.file.write(b''.join(self.bytes)) self.file.close() self.file_open = 0 return 0 def __cmp__(self, other): if self.right < other.right: return -1 elif self.right > other.right: return 1 else: return 0 def __lt__(self, other): if self.__cmp__(other) == -1: return True else: return False def __eq__(self, other): if self.__cmp__(other) == 0: return True else: return False def write(self, character): self.file.write(character) def close(self): self.file.close() left_width=10000 right_width=left_width offsets=[] verbose=0 def usage(retval): sys.stderr.write('-l left\t\tdecrease offsets by left\n') sys.stderr.write('-r right\tincrease offsets by right\n') sys.stderr.write('-h\t\tgive this help message\n') sys.stderr.write('-o offset\tSpecify an offset at the center of ranges to write from stdin. May be repeated\n') sys.stderr.write('-O offset1 offset2 ... offsetn\tSpecify a list of offsets at the center of ranges to write from stdin. May be the last option\n') sys.exit(retval) while sys.argv[1:]: if sys.argv[1] == '-l' and sys.argv[2:]: left_width=int(sys.argv[2]) del sys.argv[1] elif sys.argv[1] == '-r' and sys.argv[2:]: right_width=int(sys.argv[2]) del sys.argv[1] elif sys.argv[1] == '-h': usage(0) elif sys.argv[1] == '-v': verbose += 1 elif sys.argv[1] == '-o' and sys.argv[2:]: offsets.append(sys.argv[2]) del sys.argv[1] elif sys.argv[1] == '-O' and sys.argv[2:]: offsets.extend(sys.argv[2:]) del sys.argv[2:] else: sys.stderr.write('Illegal argument %s\n' % sys.argv[1]) sys.stderr.write('\n') usage(1) del sys.argv[1] if len(offsets) == 0: sys.stderr.write('Sorry, at least one -o is required\n') usage(1) # we don't want to be writing to the same file via multiple opens, so eliminate duplicates offsets2 = uniq(offsets) if len(offsets) != len(offsets2): sys.stderr.write('Warning: elminated %d offsets due to duplication\n' % (len(offsets) - len(offsets2))) offsets = offsets2 #regions=map(lambda x: region_class(x, left_width, right_width), offsets) regions=[region_class(offset, left_width, right_width) for offset in offsets] regions.sort() num_regions = len(regions) highest_right=regions[num_regions-1].right blocksize=2**20 def main(): sys.stderr.write('Will search through stdin from 0 to %d\n' % highest_right) for quantized_offset in my_range(0, highest_right, blocksize): if verbose >= 1: sys.stderr.write('quantized_offset is %d\n' % quantized_offset) block = sys.stdin.buffer.read(blocksize) if not block: break length = blocksize if not block[blocksize-1:]: length = len(block) for adjustment in range(length): not_quantized_offset = quantized_offset + adjustment if verbose >= 5: sys.stderr.write('not_quantized_offset is %d\n' % not_quantized_offset) for regionno in range(num_regions): if regions[regionno].within(not_quantized_offset): regions[regionno].bytes.append(block[adjustment]) for regionno in range(num_regions): regions[regionno].close() main()