#!/usr/bin/python3 """Binary grepper.""" import os import re import sys import getopt def usage(retval): """Output a usage message.""" if retval: write = sys.stderr.write else: write = sys.stdout.write write('usage: bgrep [-e] [-b] [-A after_bytes] [-B before_bytes] [-o] pattern filelist\n') write('\n') write('\t-h\t\toutput this help message\n') write('\t-e\t\techo pattern to stderr - for troubleshooting shell quoting issues\n') write('\t-i\t\tignore case\n') write('\t-f\t\tflush frequently\n') write('\t-b buffer_len\tdefines the buffer length\n') write('\t-o overlap_size\tdefines the amount of overlap between two buffers\n') write('\t-l\t\tlist filenames along with the offsets\n') write('\t-A num_bytes\tnumber of bytes to display after the match. Must include the match itself\n') write('\t-B num_bytes\tnumber of bytes to display before the match\n') write('\t-x\t\tTreat pattern as a hexadecimal string. Note that it is still compiled as a regex\n') write('\tpattern\t\tis an emacs-style regular expression (or hex with -x)\n') write('\tfilelist\tis a list of filenames\n') write('\n') sys.exit(retval) LIST_FILENAMES = False LAST_HIT = -1 def to_bytes(string): """Convert string to bytes.""" return string.encode(encoding='ASCII') def show(string): """Output a string, with octal escapes for unprintable characters.""" for i in range(len(string)): character = string[i:i + 1] if not character: break if character in b'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789/.,<>?";:\'|\\][{}=+-_`~!@#$%^&*()\n \t': os.write(1, character) else: os.write(1, b'\\' + to_bytes(oct(ord(character)))) os.write(1, b'\n') def show_occurences(flush_frequently, stuff, reg, offset, before, after, filename): # pylint: disable=too-many-arguments,global-statement """Show occurences of pattern 'reg' within 'filename'.""" global LAST_HIT start = 0 while True: result = reg.search(stuff, start) if result is None: break pos = result.span()[0] left = pos right = pos + len(result.group()) + 1 left -= before right += after if LAST_HIT == -1: # these are zero based LAST_HIT = pos + 1 + offset if LIST_FILENAMES: sys.stdout.write(filename+':') if after: show(stuff[left:right]) else: print(LAST_HIT) elif LAST_HIT != pos + 1 + offset: LAST_HIT = pos + 1 + offset if LIST_FILENAMES: sys.stdout.write(filename+':') if after: show(stuff[pos:pos+after]) else: print(LAST_HIT) if flush_frequently: sys.stdout.flush() start = pos + 1 def main(): # pylint: disable=too-many-locals,too-many-branches,too-many-statements,global-statement # buffer -does- have file-like methods # pylint: disable=no-member """Search in binary data.""" # \( is grouping # \| is "or" # \+ and \? are operators # newlines are normal characters global LIST_FILENAMES buffer_size = 256 * 1024 overlap_size = 4 * 1024 after = 0 before = 0 ignore_case = False echo = False flush_frequently = False treat_pattern_as_hex = False try: (optlist, args) = getopt.getopt(sys.argv[1:], 'fb:o:lA:B:ex') except getopt.GetoptError: usage(1) for opt in optlist: if opt[0] == '-b': try: buffer_size = int(opt[1]) except ValueError: usage(1) elif opt[0] == '-f': flush_frequently = True elif opt[0] == '-l': LIST_FILENAMES = True elif opt[0] == '-e': echo = True elif opt[0] == '-B': before = int(opt[1]) elif opt[0] == '-A': after = int(opt[1]) elif opt[0] == '-i': ignore_case = True elif opt[0] == '-x': treat_pattern_as_hex = True elif opt[0] == '-o': try: overlap_size = int(opt[1]) except ValueError: usage(1) if len(args) < 1: usage(1) if len(args) == 1: args.append('-') if treat_pattern_as_hex: pattern = bytes.fromhex(args[0]) else: pattern = os.fsencode(args[0]) del args[0] filenames = args if ignore_case: reg = re.compile(pattern, re.IGNORECASE | re.MULTILINE) else: reg = re.compile(pattern, re.MULTILINE) if echo: sys.stderr.write('pattern is %s\n' % pattern) for filename in filenames: skip = False if filename == '-': file_ = sys.stdin.buffer else: try: file_ = open(filename, 'rb') except IOError: tuple_ = sys.exc_info() sys.stderr.write("Could not open {}: {}\n".format(filename, tuple_[1])) skip = True if not skip: try: stuff = file_.read(buffer_size+overlap_size) except (OSError, IOError): sys.stderr.write('Got I/O Error reading file %s\n' % filename) if filename != '-': file_.close() continue offset = 0 if stuff: show_occurences(flush_frequently, stuff, reg, offset, before, after, filename) while True: new = file_.read(buffer_size) stuff = stuff[buffer_size:] + new if not new: if filename != '-': file_.close() break # print 'looking in',len(stuff) offset = offset + buffer_size show_occurences(flush_frequently, stuff, reg, offset, before, after, filename) if filename != '-': file_.close() if LAST_HIT == -1: sys.exit(1) else: sys.exit(0) main()