#!/usr/local/cpython-3.5/bin/python3

"""
Create some test data and solve.

https://realpython.com/numpy-tensorflow-performance/
    "Although it is possible to use this deterministic approach to estimate the coefficients of the linear model, it is not
    possible for some other models, such as neural networks. In these cases, iterative algorithms are used to estimate a
    solution for the parameters of the model."
"""

import json
import typing

import numpy as np


def create_data() -> typing.Tuple[np.ndarray, np.ndarray]:
    """
    Create data.

    This program creates a set of 10,000 inputs evenly_spaced_values_x linearly distributed over the interval from 0 to 2. It then
    creates a set of desired outputs desired_outputs_d = 3 + 2 * x + noise, where noise is taken from a Gaussian (normal)
    distribution with zero mean and standard deviation sigma = 0.1.

    By creating evenly_spaced_values_x and desired_outputs_d in this way, you're effectively stipulating that the optimal solution
    for w_0 and w_1 is 3 and 2 respectively.
    """
    np.random.seed(444)

    num_rows_cap_n = 10000
    # For debugging.
    # num_rows_cap_n = 10
    start = 0
    stop = 2
    sigma = 0.1

    noise = sigma * np.random.randn(num_rows_cap_n)
    evenly_spaced_values_x = np.linspace(start=start, stop=stop, num=num_rows_cap_n)
    desired_outputs_d = 3 + 2 * evenly_spaced_values_x + noise
    desired_outputs_d.shape = (num_rows_cap_n, 1)

    # We need to prepend a column vector of 1s to `evenly_spaced_values_x`.
    ones = np.ones(shape=num_rows_cap_n, dtype=evenly_spaced_values_x.dtype)
    columns = (ones, evenly_spaced_values_x)
    ones_and_esvx_cap_x = np.column_stack(tup=columns)
    assert ones_and_esvx_cap_x.shape == (num_rows_cap_n, stop - start)
    if num_rows_cap_n <= 10:
        print('noise is:')
        print(noise)
        print()

    print('mean of noise is (should be close to zero):')
    print(np.mean(noise))
    print()
    print('stddev of noise is (should be close to {}):'.format(sigma))
    print(np.std(noise))
    print()

    if num_rows_cap_n <= 10:
        print('ones_and_esvx_cap_x is:')
        print(ones_and_esvx_cap_x)
        print()
        print('desired_output_d is:')
        print(desired_outputs_d)

    tuple_ = (ones_and_esvx_cap_x, desired_outputs_d)

    # We write x and d as json for the benefit of pydescent, which is pure python.  pydescent wasn't really pure python in
    # the source document, even though it billed as such, because it creates the data using numpy and converts to native
    # python datatypes afterward.
    data = {
        'evenly_spaced_values_x': evenly_spaced_values_x.tolist(),
        'desired_outputs_d': desired_outputs_d.squeeze().tolist(),
    }
    with open('x-d.json', 'w') as outfile:
        json.dump(data, outfile)

    # print([type(x) for x in tuple_])
    return tuple_


def main() -> None:
    """Solve a simple optimization problem, based on https://realpython.com/numpy-tensorflow-performance/ ."""
    tuple_ = create_data()
    # print(tuple_)

    (ones_and_esvx_cap_x, desired_outputs_d) = tuple_

    # https://docs.scipy.org/doc/numpy-1.14.0/reference/generated/numpy.linalg.pinv.html
    cap_x_plus = np.linalg.pinv(ones_and_esvx_cap_x)
    w_opt = cap_x_plus @ desired_outputs_d
    print('This should be 3 and 2:')
    print(w_opt)


main()