import math
import random

# transform all numbers between 0 and 1
def sigmoid(x):
    return 1 / (1 + math.exp(-x))

# sigmoid's derivation
def sigmoid_deriv(x): 
    y = sigmoid(x)
    return y * (1 - y)

# neuron class
class Neuron:
    """
    z                   : linear combination of inputs and weights plus bias (pre-activation)
    y                   : output of the activation function (sigmoid(z))
    w                   : list of weights, one for each input
    """
    def __init__(self, isize):
        # number of inputs to this neuron
        self.isize = isize
        # importance to each input
        self.weight = [random.uniform(-1, 1) for _ in range(self.isize)]
        # importance of the neuron
        self.bias = random.uniform(-1, 1)

        # last z (linear combination) value
        self.z = 0
        # last output sigmoid(z)
        self.last_output = 0

    def forward(self, x, activate=True):
        """
        x               : list of input values to the neuron
        """
        # computes the weighted sum of inputs and add the bias
        self.z = sum(w * xi for w, xi in zip(self.weight, x)) + self.bias
        # normalize the output between 0 and 1
        if activate: self.last_output = sigmoid(self.z)
        else: self.last_output = self.z

        return self.last_output
    
    # adjust weight and bias of neuron
    def backward(self, x, dcost_dy, learning_rate):
        """
        x               : list of input values to the neuron  
        dcost_dy        : derivate of the cost function `(2 * (output - target))`
        learning_rate   : learning factor (adjust the speed of weight/bias change during training)

        weight -= learning_rate * dC/dy * dy/dz * dz/dw
        bias   -= learning_rate * dC/dy * dy/dz * dz/db
        """
        # dy/dz: derivate of the sigmoid activation
        dy_dz = sigmoid_deriv(self.z)
        # dz/dw = x
        dz_dw = x

        assert len(dz_dw) >= self.isize, "too many value for input size"

        # dz/db = 1
        dz_db = 1

        for i in range(self.isize):
            # update each weight `weight -= learning_rate * dC/dy * dy/dz * x_i`
            self.weight[i] -= learning_rate * dcost_dy * dy_dz * dz_dw[i]

        # update bias: bias -= learning_rate * dC/dy * dy/dz * dz/db
        self.bias -= learning_rate * dcost_dy * dy_dz * dz_db

        # return gradient vector len(input) dimension
        return [dcost_dy * dy_dz * w for w in self.weight]


class Layer:
    def __init__(self, input_size, output_size):
        """
        input_size      : size of each neuron input
        output_size     : size of neurons
        """
        self.size = output_size
        # list of neurons
        self.neurons = [Neuron(input_size) for _ in range(output_size)]

    def forward(self, inputs, activate=True):
        self.inputs = inputs
        #  give the same inputs to each neuron in the layer
        return [neuron.forward(inputs, activate) for neuron in self.neurons]

    # adjust weight and bias of the layer (all neurons)
    def backward(self, dcost_dy_list, learning_rate=0.1):
        # init layer gradient vector len(input) dimention
        input_gradients = [0.0] * len(self.inputs)

        for i, neuron in enumerate(self.neurons):
            dcost_dy = dcost_dy_list[i]
            grad_to_input = neuron.backward(self.inputs, dcost_dy, learning_rate)

            # accumulate the input gradients from all neurons
            for j in range(len(grad_to_input)):
                input_gradients[j] += grad_to_input[j]

        # return layer gradient
        return input_gradients

class NeuralNetwork:
    def __init__(self, layer_size):
        self.layers = [Layer(layer_size[i], layer_size[i+1]) for i in range(len(layer_size) - 1)]

    def forward(self, inputs):
        output = inputs
        for i, layer in enumerate(self.layers):
            activate = (i != len(self.layers) - 1)  # deactivate sigmoid latest neuron
            output = layer.forward(output, activate=activate)
        return output

    def backward(self, inputs, targets, learning_rate=0.1):
        """
        target must be a list with the same length that the final layer
        input
        """
        output = self.forward(inputs)

        # computes the initial gradient of the cost function for each neuron
        # by using Mean Squared Error's derivate: dC/dy = 2 * (output - target)
        dcost_dy_list = [2 * (o - t) for o, t in zip(output, targets)]

        grad = dcost_dy_list
        for layer in reversed(self.layers):
            # backpropagate the gradient of the layer to update weights and biases
            grad = layer.backward(grad, learning_rate)
        
        # return final gradient 
        return grad

if __name__ == "__main__":
    print("you might want to run main.py instead of network.py")