#!/usr/bin/env python3 """ Get end hashes. Remove device number and inode number. Each record in our input corresponds to one file. These records in our input have 3 columsn: the device number, the inode number, and the pathname. To form our output, we remove the first two columns, then prepend the "end hash", which is the blake2b hash of the last Kilobyte of the file. There's not a lot of opportunity for reuse here, so we forgo most of the usual parameterization. """ import hashlib import os import sys import readline0 def get_end_hash(filename: bytes) -> bytes: """Get a cryptographic digest of the last 1K of `filename`.""" with open(filename, "rb") as file_: try: # This raises an OSError if the file is not long enough to seek to -1024. file_.seek(-1024, 2) except OSError: pass kilobyte = file_.read(1024) end_hash = hashlib.blake2b(digest_size=32) # Can produce digests between 1 and 64 bytes end_hash.update(kilobyte) return end_hash.hexdigest().encode("UTF-8") def usage(retval: int) -> None: """Output a usage message.""" if retval == 0: file_ = sys.stdout else: file_ = sys.stderr print(f"Usage: {sys.argv[0]} --write-file-to /path/name.txt --help", file=file_) sys.exit(retval) def main() -> None: """Start the ball rolling.""" write_count_to = "" while sys.argv[1:]: match sys.argv[1]: case "--write-count-to": write_count_to = sys.argv[2] del sys.argv[1] case "--help" | "-h": usage(0) case _: print(f"{sys.argv[0]}: unrecognized option: {sys.argv[1]}", file=sys.stderr) usage(1) del sys.argv[1] for index, record in enumerate(readline0.readline0(file_=0, separator=b"\0")): input_fields = record.split(b",", 3) end_hash = get_end_hash(input_fields[2]) output_fields = [end_hash, input_fields[2]] os.write(1, b",".join(output_fields) + b"\0") if write_count_to: with open(write_count_to, "w") as file_: file_.write(str(index) + "\n") main()