#!/usr/local/cpython-3.9/bin/python3 """ This is an attempt to fix the re problem seen in music-pipeline, as an SSCCE. In this one, we only have one compiled regex at any given time. It is still slow. """ import random import re import string def one_alphanum_random_string(): """Return one alphanumeric string of 20 characters.""" alphabet = string.ascii_letters + string.digits return ''.join(random.choices(alphabet, k=20)) def one_regex(): """Return one semi-random, not-compiled regex string.""" return r'^\./{0}/{0}/{0}\.'.format(one_alphanum_random_string()) def one_filename(): """Return one semi-random, filename.""" return './{0}/{0}/{0}.mp3'.format(one_alphanum_random_string()) def main(): """Start the ball rolling.""" regex_strings = [one_regex() for repno in range(2_046)] filenames = [one_filename() for repno in range(14_000)] matches = 0 for regex in regex_strings: # Note that we _loop_ for each regex, but we only _use_ the 0th. compiled = re.compile(regex_strings[0]) for filename in filenames: if compiled.match(filename): matches += 1 del compiled print(matches) main()