#!/usr/bin/python3 """Compute some simple statistics related to a collection of numbers on stdin.""" import collections import math import sys import typing # Note that this program extensively uses mypy's ability to treat int's and float's def compute_arithmetic_mean(list_: typing.List[float]) -> float: """Compute the arithmetic mean of list_.""" return sum(list_) / float(len(list_)) def compute_geometric_mean(list_: typing.List[float]) -> float: """Compute the geometric mean of list_.""" # This follows from the usual definition of the geometric mean, but it has problems with large lists product = 1.0 for element in list_: product *= element exponent = 1.0 / len(list_) mean = product ** exponent return mean def compute_geometric_mean_robustly(list_: typing.List[float]) -> float: """Compute the geometric mean of list_.""" # Follows from https://en.wikipedia.org/wiki/Geometric_mean#Relationship_with_arithmetic_mean_of_logarithms total = 0.0 for element in list_: total += math.log(element) divisor = len(list_) mean = math.exp(total / divisor) return mean def compute_median(list_: typing.List[float]) -> float: """Compute the median of list_. We do this a slow-but-functional way.""" # Note: modifies list_. # This is not as fast as it could be. In particular, sorting is a slow way of finding a median. # However, this is simple, and Python's sort method is pretty good. list_.sort() len_list = len(list_) middle = float(len_list) / 2 if middle == math.floor(middle): # middle index is even, so we average the middle two values middle = int(middle) tiny_list = [list_[middle - 1], list_[middle]] return compute_arithmetic_mean(tiny_list) # middle is odd, so we return the middle element middle = int(middle) return list_[middle] def compute_standard_deviation(list_: typing.List[float], arithmetic_mean=typing.Optional[float]) -> float: """ Compute the standard deviation of list_. If arithmetic_mean is provided, use that, otherwise compute a arithmetic_mean ourselves """ if arithmetic_mean is None: arithmetic_mean = compute_arithmetic_mean(list_) temp = 0.0 for num in list_: temp += math.fabs(arithmetic_mean - num) ** 2 # Some sources use len(list_) - 1 . See https://en.wikipedia.org/wiki/Standard_deviation temp /= len(list_) return math.sqrt(temp) def compute_med_abs_dev(list_: typing.List[float], median: typing.Optional[float] = None) -> float: """ Compute median absolute deviation of list_. If median is provided, use that, otherwise compute a median ourselves """ # Note, modifies list_ sometimes if median is None: median = compute_median(list_) differences = [abs(median - element) for element in list_] return compute_median(differences) def compute_mean_abs_dev(list_: typing.List[float], mean: typing.Optional[float] = None) -> float: """ Compute mean absolute deviation of list_. If mean is provided, use that, otherwise compute a mean ourselves """ # Note, modifies list_ sometimes if mean is None: mean = compute_arithmetic_mean(list_) differences = [abs(mean - element) for element in list_] return compute_arithmetic_mean(differences) def compute_mode(list_: typing.List[float]) -> typing.List[float]: """ Compute the mode of list_. Note that the return value is a list, because sometimes there is a tie for "most common value". See https://stackoverflow.com/questions/10797819/finding-the-mode-of-a-list """ if not list_: raise ValueError('Empty list') if len(list_) == 1: raise ValueError('Single-element list') value_to_count_dict: typing.DefaultDict[float, int] = collections.defaultdict(int) for element in list_: value_to_count_dict[element] += 1 count_to_values_dict = collections.defaultdict(list) for value, count in value_to_count_dict.items(): count_to_values_dict[count].append(value) counts = list(count_to_values_dict) if len(counts) == 1: raise ValueError('All elements in list are the same') maximum_occurrence_count = max(counts) if maximum_occurrence_count == 1: raise ValueError('No element occurs more than once') minimum_occurrence_count = min(counts) if maximum_occurrence_count <= minimum_occurrence_count: raise ValueError('Maximum count not greater than minimum count') return count_to_values_dict[maximum_occurrence_count] def usage(retval: int) -> None: """Output a usage message.""" sys.stderr.write('Usage: {} --commas\n'.format(sys.argv[0])) sys.stderr.write('Accepts a list of numbers on stdin to summarize\n') sys.exit(retval) def display(description: str, format_string: typing.Optional[str], value: typing.Optional[float]) -> None: """Output one value.""" assert value is not None assert format_string is not None sys.stdout.write('{} '.format(description)) sys.stdout.write(format_string.format(value)) sys.stdout.write('\n') class Stats(object): # pylint: disable=too-many-instance-attributes # too-many-instance-attributes: We need some attributes to be meaningful """Deal with command line options and statistics generation and output.""" def __init__(self) -> None: """Initialize.""" self.commas = False self.floats = True while sys.argv[1:]: if sys.argv[1] == '--commas': self.commas = True elif sys.argv[1] in ['-h', '--help']: usage(0) else: sys.stderr.write('{}: Unrecognized option: {}\n'.format(sys.argv[0], sys.argv[1])) usage(1) del sys.argv[1] self.length = 0 self.total: typing.Optional[float] = None self.minimum: typing.Optional[float] = None self.maximum: typing.Optional[float] = None self.range_: typing.Optional[float] = None self.arithmetic_mean: typing.Optional[float] = None self.geometric_mean: typing.Optional[float] = None self.median: typing.Optional[float] = None self.have_mode: typing.Optional[bool] = None self.mode: typing.List[float] = [] self.standard_deviation: typing.Optional[float] = None self.median_absolute_deviation: typing.Optional[float] = None self.mean_absolute_deviation: typing.Optional[float] = None self.list_: typing.List[float] = [] self.all_ints = True # We overwrite these elsewhere - they are here just to keep pylint happy self.possible_float_format_string: typing.Optional[str] = None self.always_int_format_string: typing.Optional[str] = None def set_up_format_strings(self) -> None: """Set up the format strings.""" if self.all_ints: char = 'd' else: char = 'f' if self.commas: comma = ',' else: comma = '' self.possible_float_format_string = '{{:{}{}}}'.format(comma, char) self.always_int_format_string = '{{:{}d}}'.format(comma) def slurp(self) -> None: """Read in the numbers.""" for line in sys.stdin: try: value: float = int(line) except ValueError: try: value = float(line) except ValueError: sys.stderr.write('Failure to convert to int or float: "{}"\n'.format( line.rstrip())) else: self.all_ints = False self.list_.append(value) else: self.list_.append(value) self.length = len(self.list_) self.set_up_format_strings() def compute(self) -> None: """Compute the statistics for self.list_.""" self.length = len(self.list_) self.minimum = min(self.list_) self.maximum = max(self.list_) self.range_ = self.maximum - self.minimum self.arithmetic_mean = compute_arithmetic_mean(self.list_) try: # This has problems with zeros self.geometric_mean = compute_geometric_mean_robustly(self.list_) except ValueError: # This has problems with big lists self.geometric_mean = compute_geometric_mean(self.list_) self.standard_deviation = compute_standard_deviation(self.list_, arithmetic_mean=self.arithmetic_mean) self.median = compute_median(self.list_) self.median_absolute_deviation = compute_med_abs_dev(self.list_, median=self.median) self.mean_absolute_deviation = compute_mean_abs_dev(self.list_, mean=self.arithmetic_mean) self.total = sum(self.list_) try: self.mode = compute_mode(self.list_) except ValueError: self.have_mode = False else: self.have_mode = True def output_without_commas(self) -> None: """Output relevant statistics without commas.""" display('minimum', self.possible_float_format_string, self.minimum) display('maximum', self.possible_float_format_string, self.maximum) display('range', self.possible_float_format_string, self.range_) display('count', self.possible_float_format_string, self.length) display('sum', self.possible_float_format_string, self.total) sys.stdout.write('arithmetic_mean {}\n'.format(self.arithmetic_mean)) sys.stdout.write('geometric_mean {}\n'.format(self.geometric_mean)) sys.stdout.write('median {}\n'.format(self.median)) if self.have_mode: assert self.possible_float_format_string is not None string = '_&_'.join(self.possible_float_format_string.format(element) for element in self.mode) sys.stdout.write('mode ') sys.stdout.write(string) sys.stdout.write('\n') else: sys.stdout.write('mode none\n') sys.stdout.write('standard_deviation {}\n'.format(self.standard_deviation)) sys.stdout.write('median_absolute_deviation {}\n'.format(self.median_absolute_deviation)) sys.stdout.write('mean_absolute_deviation {}\n'.format(self.mean_absolute_deviation)) def output_with_commas(self) -> None: """Output relevant statistics with commas.""" display('minimum', self.possible_float_format_string, self.minimum) display('maximum', self.possible_float_format_string, self.maximum) display('range', self.possible_float_format_string, self.range_) display('count', self.always_int_format_string, self.length) display('sum', self.possible_float_format_string, self.total) assert self.arithmetic_mean is not None sys.stdout.write('arithmetic_mean {:,f}\n'.format(self.arithmetic_mean)) assert self.geometric_mean is not None sys.stdout.write('geometric_mean {:,f}\n'.format(self.geometric_mean)) assert self.median is not None sys.stdout.write('median {:,f}\n'.format(self.median)) if self.have_mode: sys.stdout.write('mode ') assert self.possible_float_format_string is not None sys.stdout.write('_&_'.join(self.possible_float_format_string.format(element) for element in self.mode)) sys.stdout.write('\n') else: sys.stdout.write('mode none\n') assert self.standard_deviation is not None sys.stdout.write('standard_deviation {:,f}\n'.format(self.standard_deviation)) assert self.median_absolute_deviation is not None sys.stdout.write('median_absolute_deviation {:,f}\n'.format(self.median_absolute_deviation)) assert self.mean_absolute_deviation is not None sys.stdout.write('mean_absolute_deviation {:,f}\n'.format(self.mean_absolute_deviation)) def main() -> None: """Get things started.""" stats = Stats() stats.slurp() if stats.length >= 1: stats.compute() if stats.commas: stats.output_with_commas() else: stats.output_without_commas() else: sys.stdout.write('empty_list_of_numbers\n') main()