#!/usr/bin/python3 # pylint: disable=import-error '''drscut - cut with consistent semantics and no escaping''' import os import sys sys.path.insert(0, os.path.expanduser('/usr/local/lib')) sys.path.insert(0, os.path.expanduser('~/lib')) import readline0 def usage(retval): '''Output a usage message and exit''' sys.stderr.write(('Usage: %s [-i inputFieldDelimiter] [-o outputFieldDelimiter] ' % (sys.argv[0], )) + \ '[-r outputRecordDelimiter] [-f fieldno] [-0] [-l] [-L] [-h]\n') sys.stderr.write('The default "inputFieldDelimiter" is 1 or more occurences of any whitespace\n') sys.stderr.write('-f may be repeated as many times as you like, in any order you like\n') sys.stderr.write('-n says to ignore lines that do not have enough fields\n') sys.stderr.write('-0 says to read lines that are null terminated, not newline terminated\n') sys.stderr.write('-l says to leave out any missing fields\n') sys.stderr.write('-L says to leave out any line with one or more missing fields\n') sys.stderr.write('-D says to run in debug mode\n') sys.exit(retval) class Options(object): # pylint: disable=too-many-instance-attributes,too-few-public-methods '''Just a container for command line option-related variables''' def __init__(self): self.input_delimiter_specified = False self.input_delimiter = '' self.output_field_delimiter = ' ' self.output_record_delimiter = '\n' self.field_numbers = [] self.use_readline0 = False self.leave_out_short_lines = False self.leave_missing_fields_empty = False self.debug = False def handle_argv(self, argv): '''Parse up sys.argv''' while argv[1:]: if argv[1] == '-i' and argv[2:]: self.input_delimiter = argv[2] self.input_delimiter_specified = 1 del argv[1] elif argv[1] == '-o' and argv[2:]: self.output_field_delimiter = argv[2] del argv[1] elif argv[1] == '-r' and argv[2:]: self.output_record_delimiter = argv[2] del argv[1] elif argv[1] == '-f' and argv[2:]: self.field_numbers.append(int(argv[2])) del argv[1] elif argv[1] == '-D': self.debug = True elif argv[1] == '-l': self.leave_missing_fields_empty = True elif argv[1] == '-L': self.leave_out_short_lines = True elif argv[1] == '-f' and argv[2:]: self.field_numbers.append(int(argv[2])) del argv[1] elif argv[1] == '-h': usage(0) elif argv[1] == '-0': self.use_readline0 = True else: usage(1) del argv[1] def main(): # pylint: disable=too-many-branches '''Main function''' options = Options() options.handle_argv(sys.argv) def my_readline(file_): '''Readline that either delimits using nulls or newlines''' if options.use_readline0: for line in readline0.readline0(file_): yield line else: for line in file_: yield line.rstrip('\n') line_number = 0 for line in my_readline(sys.stdin): if options.debug: sys.stderr.write('Got line %s\n' % line) if options.input_delimiter_specified: fields = line.split(options.input_delimiter) else: fields = line.split() output_list = [] skip = False for fieldno in options.field_numbers: #sys.stderr.write("%d\n" % fieldno) if fields[fieldno:]: output_list.append(fields[fieldno]) else: if options.leave_out_short_lines: skip = True break elif options.leave_missing_fields_empty: output_list.append('') else: sys.stderr.write('Sorry, line number %d (prefix %s) does not have all ' \ 'requested fields - consider using -l or -L\n' % (line_number, line[:20])) usage(1) if not skip: sys.stdout.write(options.output_field_delimiter.join(output_list) + options.output_record_delimiter) skip = False line_number += 1 main()