#!/usr/bin/env python3

# pylint: disable=superfluous-parens
# superfluous-parens: Parentheses are good for portability and clarity

"""
Pick a good blocksize by guessing lots of random sizes and reusing the good ones.

We start out guessing a lot, and then slowly decrease the guesses and instead
use blocksizes that were working well.
"""

import random
import sys
import typing

use_treap = True

try:
    # This'll prefer pyx_treap, but is able to fall back on py_treap
    import treap
except ImportError:
    use_treap = False

try:
    import dupdict_mod
except ImportError:
    use_treap = False


class BlocksizeClass:
    """A class for keeping track of blocksizes."""

    def __init__(self, size: int) -> None:
        """Initialize."""
        self.size = size
        self.total_size = 0
        self.total_time = 0.0
        self.average = 0.0

    def add_duration(self, duration: float, length: int) -> None:
        """Add a duration into the average."""
        self.total_time += duration
        self.total_size += length
        self.average = float(self.total_time) / self.total_size

    def get_average(self) -> float:
        """Return the average."""
        return self.average

    def __repr__(self) -> str:
        """Convert to string."""
        string = "%d, %f, %f" % (self.size, self.total_time, self.average)
        return string


# Initially we get a little and put a lot
# After we get established, we get all the time, and put a little


class Common:
    """
    Base class for blocksize_optimizer.

    We formerly had a dict or treap option, but now we only use the dict version, so this class is less important than it was.
    """

    def __init__(self) -> None:
        """Initialize."""
        self.probability_of_guess: typing.Optional[float] = None
        self.probability_of_guess_decrement: typing.Optional[float] = None
        self.lowest: typing.Optional[int] = None
        self.highest: typing.Optional[int] = None

    def generate_random_blocksize(self) -> int:
        """Generate a random blocksize."""
        if self.lowest is None or self.highest is None:
            return 1024
        assert isinstance(self.lowest, int)
        assert isinstance(self.highest, int)
        guess = random.randint(self.lowest, self.highest)
        self.decrement()
        return guess

    def decrement(self) -> None:
        """Decrement the probability of a new blocksize guess."""
        if self.probability_of_guess is None or self.probability_of_guess_decrement is None:
            return
        assert isinstance(self.probability_of_guess, float)
        assert isinstance(self.probability_of_guess_decrement, float)
        self.probability_of_guess = max(self.probability_of_guess - self.probability_of_guess_decrement, 0.01)


class TreapBlocksizeOptimizer(Common):
    """Use a dupdict layered over a treap to get O(logn) put and get for blocksize optimization."""

    def __init__(self, lowest: int, highest: int, probability_of_guess_decrement: float) -> None:
        """
        Initialize.

        This implementation is:
            O(logn) on put
            O(logn) on get

        Because after getting established we put And get on almost every operation, this is a performance win
        """
        self.lowest = lowest
        self.highest = highest
        # initially the probability of a guess is 100%, but need not be
        self.probability_of_guess = 1.0
        self.probability_of_guess_decrement = probability_of_guess_decrement
        self.blocksizes_by_size: dict[int, BlocksizeClass] = {}
        self.blocksizes_by_throughput = dupdict_mod.Dupdict(treap.treap())
        self.blocksizes_in_limbo: dict[int, BlocksizeClass] = {}
        self.num_blocksizes = 0
        # nice for debugging
        # random.seed(0)

    def put(self, length: int, duration: float) -> None:
        """Save a blocksize for (possible) later use."""
        if length in self.blocksizes_by_size:
            # We already have this block size in blocksizes_by_size - that's strange
            sys.stderr.write("%s: Internal error: blocksize already in blocksizes_by_size\n" % sys.argv[0])
            sys.exit(1)
        elif length in self.blocksizes_in_limbo:
            # We've seen this blocksize before.  In fact, we passed it back to the caller, expecting to see it again later.
            # Update the blocksize status with the new data.
            self.blocksizes_in_limbo[length].add_duration(duration, length)
            # Put it back into blocksizes_by_size and blocksizes_by_throughput.
            self.blocksizes_by_size[length] = self.blocksizes_in_limbo[length]
            self.blocksizes_by_throughput[self.blocksizes_in_limbo[length].get_average()] = self.blocksizes_in_limbo[length]
            # Delete the blocksize from limbo
            del self.blocksizes_in_limbo[length]
        else:
            # We do not yet have this blocksize anywhere.
            # Update it with the new data.
            # Add it to the dictionary.
            # Add it to the treap.
            # Do not add it to limbo (yet).
            bs = BlocksizeClass(length)
            bs.add_duration(duration, length)
            self.blocksizes_by_size[length] = bs
            self.blocksizes_by_throughput[bs.get_average()] = bs
        self.num_blocksizes += 1
        if self.num_blocksizes >= 200:
            # we have too many blocksizes to worry about them all - delete the one that was the slowest
            rate, best_blocksize = self.blocksizes_by_throughput.remove_min()
            del self.blocksizes_by_size[best_blocksize.size]
            self.num_blocksizes -= 1

    def get(self) -> int:
        """Get a suitable blocksize to use."""
        if self.probability_of_guess is not None and random.random() >= self.probability_of_guess:
            # Reuse the best blocksize.
            # 1) Extract the blocksize with the best rate
            # 2) Delete it from blocksizes_by_size
            # 3) Stash it in "limbo" for later use - it'll probably be coming back again
            try:
                rate, best_blocksize = self.blocksizes_by_throughput.remove_max()
            except KeyError:
                # fall through and use a random blocksize
                pass
            else:
                del self.blocksizes_by_size[best_blocksize.size]
                self.blocksizes_in_limbo[best_blocksize.size] = best_blocksize
                self.num_blocksizes -= 1
                assert isinstance(best_blocksize.size, int)
                return best_blocksize.size

        # Get a new blocksize.
        # Loop until we generate a new blocksize.
        while True:
            new_blocksize = self.generate_random_blocksize()
            if new_blocksize not in self.blocksizes_by_size and new_blocksize not in self.blocksizes_in_limbo:
                break
        return new_blocksize


class DictBlocksizeOptimizer(Common):
    """
    A blocksize optimizer, ala University of Cincinnati.

    UC didn't originate it, that's where I learned it.

    This implementation is:
        O(nlogn) on get (but n is small)
        O(1) (assuming good hashing) on put
        No longer grows without bound, slowly or otherwise.
    """

    def __init__(self, lowest: int, highest: int, probability_of_guess_decrement: float) -> None:
        """Initialize."""
        super().__init__()
        self.max_considered = 200
        self.lowest = lowest
        self.highest = highest
        # initially the probability of a guess is 100%, but need not be
        self.probability_of_guess = 1.0
        self.probability_of_guess_decrement = probability_of_guess_decrement
        self.blocksize_average: dict[int, BlocksizeClass] = {}
        # nice for debugging
        # random.seed(0)
        self.putno = 0
        self.max_considered_over_10 = self.max_considered // 10
        self.max_considered_over_5 = self.max_considered // 5
        self.max_considered_over_2 = self.max_considered // 2

    def put(self, length: int, duration: float) -> None:
        """Save a blocksize for (possible) later use."""
        if length not in self.blocksize_average:
            self.blocksize_average[length] = BlocksizeClass(length)
        self.blocksize_average[length].add_duration(duration, length)

        self.putno += 1
        if self.putno >= self.max_considered_over_10:
            self.putno = 0
            under_consideration = [(self.blocksize_average[key].average, key) for key in self.blocksize_average]
            under_consideration.sort()
            if len(under_consideration) > self.max_considered_over_5:
                for tuple_ in under_consideration[self.max_considered_over_2 :]:
                    size = tuple_[1]
                    del self.blocksize_average[size]

    def get(self) -> int:
        """Get a suitable blocksize to use."""
        if self.probability_of_guess is not None and random.random() < self.probability_of_guess:
            return self.generate_random_blocksize()
        else:
            list_ = [(value.get_average(), key) for key, value in self.blocksize_average.items()]
            if list_:
                minimum = min(list_)
                self.decrement()
                best_key = minimum[1]
                if len(list_) > self.max_considered:
                    maximum = max(list_)
                    worst_key = maximum[1]
                    del self.blocksize_average[worst_key]
                return best_key
            else:
                return self.generate_random_blocksize()


# The treap code (in combination with dupdict) is slower than the dict code. So we always use dict instead of treap.
use_treap = False

# sys.stderr.write(f'Using treap?  {use_treap}\n')

if use_treap:
    blocksize_optimizer: typing.Type[typing.Union[DictBlocksizeOptimizer, TreapBlocksizeOptimizer]] = TreapBlocksizeOptimizer
else:
    blocksize_optimizer = DictBlocksizeOptimizer