#!/usr/bin/env python3

# pylint: disable=superfluous-parens
# superfluous-parens: Parentheses are good for portability and clarity

"""
Pick a good blocksize by guessing lots of random sizes and reusing the good ones.

We start out guessing a lot, and then slowly decrease the guesses and instead
use blocksizes that were working well.
"""

import random
import sys
import typing

use_treap = True

try:
    # This'll prefer pyx_treap, but is able to fall back on py_treap
    import treap
except ImportError:
    use_treap = False

try:
    import dupdict_mod
except ImportError:
    use_treap = False


class Blocksize_class:
    """A class for keeping track of blocksizes."""

    def __init__(self, size):
        """Initialize."""
        self.size = size
        self.total_size = 0
        self.total_time = 0
        self.average = None

    def add_duration(self, duration, length):
        """Add a duration into the average."""
        self.total_time += duration
        self.total_size += length
        self.average = float(self.total_time) / self.total_size

    def get_average(self):
        """Return the average."""
        return self.average

    def __str__(self):
        """Convert to string."""
        string = '%d, %f, %f' % (self.size, self.total_time, self.average)
        return string

# Initially we get a little and put a lot
# After we get established, we get all the time, and put a little


class Common:
    """
    Base class for blocksize_optimizer.

    We formerly had a dict or treap option, but now we only use the dict version, so this class is less important than it was.
    """

    def __init__(self):
        """Initialize."""
        self.probability_of_guess = None
        self.probability_of_guess_decrement = None
        self.lowest = None
        self.highest = None

    def generate_random_blocksize(self):
        """Generate a random blocksize."""
        guess = random.randint(self.lowest, self.highest)
        self.decrement()
        return guess

    def decrement(self):
        """Decrement the probability of a new blocksize guess."""
        self.probability_of_guess = max(self.probability_of_guess - self.probability_of_guess_decrement, 0.01)


# This implementation is:
#    O(logn) on put
#    O(logn) on get
#
# Because after getting established we put And get on almost every operation, this is a performance win
class treap_blocksize_optimizer(Common):
    """Use a dupdict layered over a treap to get O(logn) put and get for blocksize optimization."""

    def __init__(self, lowest, highest, probability_of_guess_decrement):
        """Initialize."""
        self.lowest = lowest
        self.highest = highest
        # initially the probability of a guess is 100%, but need not be
        self.probability_of_guess = 1.0
        self.probability_of_guess_decrement = probability_of_guess_decrement
        self.blocksizes_by_size = {}
        self.blocksizes_by_throughput = dupdict_mod.Dupdict(treap.treap())
        self.blocksizes_in_limbo = {}
        self.num_blocksizes = 0
        # nice for debugging
        # random.seed(0)

    def put(self, length, duration):
        """Save a blocksize for (possible) later use."""
        if length in self.blocksizes_by_size:
            # We already have this block size in blocksizes_by_size - that's strange
            sys.stderr.write('%s: Internal error: blocksize already in blocksizes_by_size\n' % sys.argv[0])
            sys.exit(1)
        elif length in self.blocksizes_in_limbo:
            # We've seen this blocksize before.  In fact, we passed it back to the caller, expecting to see it again later.
            # Update the blocksize status with the new data.
            self.blocksizes_in_limbo[length].add_duration(duration, length)
            # Put it back into blocksizes_by_size and blocksizes_by_throughput.
            self.blocksizes_by_size[length] = self.blocksizes_in_limbo[length]
            self.blocksizes_by_throughput[self.blocksizes_in_limbo[length].get_average()] = self.blocksizes_in_limbo[length]
            # Delete the blocksize from limbo
            del self.blocksizes_in_limbo[length]
        else:
            # We do not yet have this blocksize anywhere.
            # Update it with the new data.
            # Add it to the dictionary.
            # Add it to the treap.
            # Do not add it to limbo (yet).
            bs = Blocksize_class(length)
            bs.add_duration(duration, length)
            self.blocksizes_by_size[length] = bs
            self.blocksizes_by_throughput[bs.get_average()] = bs
        self.num_blocksizes += 1
        if self.num_blocksizes >= 200:
            # we have too many blocksizes to worry about them all - delete the one that was the slowest
            rate, best_blocksize = self.blocksizes_by_throughput.remove_min()
            del self.blocksizes_by_size[best_blocksize.size]
            self.num_blocksizes -= 1

    def get(self):
        """Get a suitable blocksize to use."""
        if random.random() < self.probability_of_guess:
            pick_random = True
        else:
            pick_random = False
            # Reuse the best blocksize.
            # 1) Extract the blocksize with the best rate
            # 2) Delete it from blocksizes_by_size
            # 3) Stash it in "limbo" for later use - it'll probably be coming back again
            try:
                rate, best_blocksize = self.blocksizes_by_throughput.remove_max()
            except KeyError:
                pick_random = True
            else:
                del self.blocksizes_by_size[best_blocksize.size]
                self.blocksizes_in_limbo[best_blocksize.size] = best_blocksize
                self.num_blocksizes -= 1
                return best_blocksize.size
        if pick_random:
            # Get a new blocksize.
            # Loop until we generate a new blocksize.
            while True:
                new_blocksize = self.generate_random_blocksize()
                if new_blocksize not in self.blocksizes_by_size and new_blocksize not in self.blocksizes_in_limbo:
                    break
            return new_blocksize


# This implementation is:
#    O(nlogn) on get (but n is small)
#    O(1) (assuming good hashing) on put
#    No longer grows without bound, slowly or otherwise.
class dict_blocksize_optimizer(Common):
    """
    A blocksize optimizer, ala University of Cincinnati.

    UC didn't originate it, that's where I learned it.
    """

    def __init__(self, lowest, highest, probability_of_guess_decrement):
        """Initialize."""
        super().__init__()
        self.max_considered = 200
        self.lowest = lowest
        self.highest = highest
        # initially the probability of a guess is 100%, but need not be
        self.probability_of_guess = 1.0
        self.probability_of_guess_decrement = probability_of_guess_decrement
        self.blocksize_average = {}
        # nice for debugging
        # random.seed(0)
        self.putno = 0
        self.max_considered_over_10 = self.max_considered // 10
        self.max_considered_over_5 = self.max_considered // 5
        self.max_considered_over_2 = self.max_considered // 2

    def put(self, length, duration):
        """Save a blocksize for (possible) later use."""
        if length not in self.blocksize_average:
            self.blocksize_average[length] = Blocksize_class(length)
        self.blocksize_average[length].add_duration(duration, length)

        self.putno += 1
        if self.putno >= self.max_considered_over_10:
            self.putno = 0
            under_consideration = [(self.blocksize_average[key].average, key) for key in self.blocksize_average]
            under_consideration.sort()
            if len(under_consideration) > self.max_considered_over_5:
                for tuple_ in under_consideration[self.max_considered_over_2:]:
                    size = tuple_[1]
                    del self.blocksize_average[size]

    def get(self):
        """Get a suitable blocksize to use."""
        if random.random() < self.probability_of_guess:
            return self.generate_random_blocksize()
        else:
            list_ = [(value.get_average(), key) for key, value in self.blocksize_average.items()]
            if list_:
                minimum = min(list_)
                self.decrement()
                best_key = minimum[1]
                if len(list_) > self.max_considered:
                    maximum = max(list_)
                    worst_key = maximum[1]
                    del self.blocksize_average[worst_key]
                return best_key
            else:
                return self.generate_random_blocksize()


# The treap code (in combination with dupdict) is slower than the dict code. So we always use dict instead of treap.
use_treap = False

# sys.stderr.write(f'Using treap?  {use_treap}\n')

if use_treap:
    blocksize_optimizer: typing.Type[typing.Union[dict_blocksize_optimizer, treap_blocksize_optimizer]] = treap_blocksize_optimizer
else:
    blocksize_optimizer = dict_blocksize_optimizer
