#!/usr/bin/env python3 """Report on gaps found in syslog-formatted files.""" import gzip import sys import time import typing def usage(retval: int) -> None: """Output a usage message.""" if retval == 0: write = sys.stdout.write else: write = sys.stderr.write write(f"Usage: {sys.argv[0]} --gap-duration 300 --both -- file1 file2.gz ... file3\n") write(" The units on --gap-duration are seconds.\n") write(' --both to output the "before gap" and "after gap" lines, separated by a blank line\n') write("\n") write("For systemd, try:\n") write(" journalctl -o short-full | ./log-gap /dev/stdin\n") sys.exit(retval) def get_lines() -> typing.Iterator[typing.Tuple[float, str]]: """Read and generate lines from sys.argv[1:].""" for filename in sys.argv[1:]: if filename.endswith(".gz"): with gzip.open(filename, "rb") as gz_file: for lineno, line in enumerate(gz_file, 1): string = line.decode("iso-8859-1") try: yield (get_time(string), string) except BadTimeError: print(f"{sys.argv[0]}: {filename} has an unexpected timestamp at line {lineno}; ignoring", file=sys.stderr) continue else: with open(filename, "rb") as plain_file: for lineno, line in enumerate(plain_file, 1): string = line.decode("iso-8859-1") try: yield (get_time(string), string) except BadTimeError: print(f"{sys.argv[0]}: {filename} has an unexpected timestamp at line {lineno}; ignoring", file=sys.stderr) continue class BadTimeError(Exception): """An exception to raise on a timestamp with an unexpected or invalid format.""" pass def get_time(line: str) -> float: """Extract the time, in seconds since the epoch, from a single log line.""" # Jan 12 06:06:59 zareason-limbo colord[1010]: failed to get session [pid 2882]: No data available fields = line.split() if fields and fields[0] in {"Sun", "Mon", "Tue", "Wed", "Thu", "Fri", "Sat", "Sun"}: # This is likely systemd / journalctl data. just_time = " ".join(fields[1:3]) try: tm_struct = time.strptime(just_time, "%Y-%m-%d %H:%M:%S") except ValueError: raise BadTimeError else: just_time = " ".join(fields[:3]) try: tm_struct = time.strptime(just_time, "%b %d %H:%M:%S") except ValueError: raise BadTimeError tm_secs = time.mktime(tm_struct) return tm_secs def gap_starts(duration: float) -> typing.Iterator[typing.Tuple[str, str]]: """Yield the first line of all adjacent log line pairs that have a gap of more than 300 seconds.""" lines = list(get_lines()) lines.sort() for first_line, second_line in zip(lines, lines[1:]): first_time = first_line[0] second_time = second_line[0] difference = second_time - first_time if difference > duration: yield (first_line[1], second_line[1]) def main() -> None: """Get the ball rolling.""" gap_duration = 300.0 both = False if not sys.argv[1:]: usage(0) while sys.argv[1:]: if sys.argv[1] == "--gap-duration": gap_duration = float(sys.argv[2]) del sys.argv[1] elif sys.argv[1] == "--both": both = True elif sys.argv[1] in ("-h", "--help"): usage(0) elif sys.argv[1] == "--": del sys.argv[1] break elif not sys.argv[1].startswith("--"): break else: print(f"{sys.argv[0]}: unexpected option: {sys.argv[1]}", file=sys.stderr) usage(1) del sys.argv[1] for gap_start, gap_end in gap_starts(gap_duration): if both: sys.stdout.write(gap_start) sys.stdout.write(gap_end) sys.stdout.write("\n") else: sys.stdout.write(gap_start) main()