# Libraries.
import numpy as np
from numpy import random as rnd
import matplotlib.pyplot as plt

# Shuffle samples and labels.
def shuffle(samples, labels):
    """ This function shuffles -randomly- the samples and labels.

    Parameters:
        + samples (Numpy array): 2xM array with the samples (2D points),
        being M the number of samples.
        + labels (Numpy array): 1xM array with the class labels for all
        samples. Labels are numbers in the range [1, N], being N the number of
        classes.
    Returns:
        + samples (Numpy array): Shuffled samples with the same size as input
        samples.
        + labels (Numpy array): Shuffled labels with the same size as input
        labels.
    """

    # Check samples and labels size.
    assert samples.shape[1]==labels.shape[1]

    # Number of samples.
    num_samples = samples.shape[1]

    # Shuffle.
    indx = np.arange(num_samples)
    np.random.shuffle(indx)
    labels = labels[:, indx]
    samples = samples[:, indx]

    return samples, labels

# Samples: Scenario 1.
def scenario_1(num_samples, noise):
    """ This function computes samples for scenario 1. This scenario
    corresponds to two -unimodal- Gaussian distributions -two classes-.

    Parameters:
        + num_samples (int list): List with the number of samples per class.
        + noise (float): Noise factor.

    Returns:
        + samples (Numpy array): 2xM array with the samples (2D points),
        being M the number of samples.
        + labels (Numpy array): 1xM array with the class labels for all
        samples. Labels are numbers in the range [1, N], being N the number of
        classes.
        + num_classes (int): Number of classes for this scenario.
    """

    # Parameters.
    mean_1 = (0.6, 0.6)  # Mean -class 1-.
    mean_2 = (0.3, 0.3)  # Mean -class 2-.
    cov_1 = noise*np.array([[0.015, -0.005], [-0.005, 0.009]])  # Covariance -class 1-.
    cov_2 = noise*np.array([[0.007, 0], [0, 0.007]])  # Covariance -class 2-.
    num_classes = 2  # Number of classes.

    # Samples.
    f0_1, f1_1 = rnd.multivariate_normal(mean_1, cov_1, num_samples[0]).T
    f0_2, f1_2 = rnd.multivariate_normal(mean_2, cov_2, num_samples[1]).T

    # Stack.
    samples_1 = np.vstack((f0_1, f1_1))
    samples_2 = np.vstack((f0_2, f1_2))
    samples = np.hstack((samples_1, samples_2))

    # Labels.
    labels_1 = np.ones((1, num_samples[0]), dtype=int);
    labels_2 = 2*np.ones((1, num_samples[1]), dtype=int);
    labels = np.hstack((labels_1, labels_2))

    # Shuffle samples and labels.
    samples, labels = shuffle(samples, labels)

    return samples, labels, num_classes

# Samples: Scenario 2.
def scenario_2(num_samples, noise):
    """ This function computes samples for scenario 2. This scenario contains
    two-class sample distributions with a spiral layout.

    Parameters:
        + num_samples (int list): List with the number of samples per class.
        + noise (float): Noise factor.

    Returns:
        + samples (Numpy array): 2xM array with the samples (2D points),
        being M the number of samples.
        + labels (Numpy array): 1xM array with the class labels for all
        samples. Labels are numbers in the range [1, N], being N the number of
        classes.
        + num_classes (int): Number of classes for this scenario.
    """

    # Parameters.
    start = 50 # Determines how far from the origin the spirals start(degrees).
    degrees = 480  # Controls the length of the spirals.
    num_classes = 2  # Number of classes.

    # Degrees.
    deg2rad = float(2*np.pi)/360.0
    start = start * deg2rad

    # Spiral 1 -class 1-.
    theta = start + np.sqrt(rnd.random((1, num_samples[0]))) * degrees*deg2rad
    f0 = -np.sin(theta)*theta + rnd.random((1, num_samples[0]))*noise*2.0
    f1 = np.cos(theta)*theta + rnd.random((1, num_samples[0]))*noise*2.0
    samples_1 = np.vstack((f0, f1))

    # Spiral 2 -class 2-.
    theta = start + np.sqrt(rnd.random((1, num_samples[1]))) * degrees*deg2rad
    f0 = np.sin(theta)*theta + rnd.random((1, num_samples[1]))*noise*2.0
    f1 = -np.cos(theta)*theta + rnd.random((1, num_samples[1]))*noise*2.0
    samples_2 = np.vstack((f0, f1))

    # Stack.
    samples = np.hstack((samples_1, samples_2))

    # Scale and center the samples distributions.
    samples = 0.04*samples
    samples += 0.5

    # Labels.
    labels_1 = np.ones((1, num_samples[0]), dtype=int);
    labels_2 = 2*np.ones((1, num_samples[1]), dtype=int);
    labels = np.hstack((labels_1, labels_2))

    # Shuffle samples and labels.
    samples, labels = shuffle(samples, labels)

    return samples, labels, num_classes

# Samples: Scenario 3.
def scenario_3(num_samples, noise):
    """ This function computes samples for scenario 3. This scenario contains
    two class distributions. The first class is an uniform distribution inside
    a circle. The second class is also an uniform distribution over the feature
    space but out of the circle. The noise factor controls the number of
    samples that violates the corresponding areas (eg. samples for class two
    falling inside the circle).

    Parameters:
        + num_samples (int list): List with the number of samples per class.
        + noise (float): Noise factor.

    Returns:
        + samples (Numpy array): 2xM array with the samples (2D points),
        being M the number of samples.
        + labels (Numpy array): 1xM array with the class labels for all
        samples. Labels are numbers in the range [1, N], being N the number of
        classes.
        + num_classes (int): Number of classes for this scenario.
    """

    # Parameters.
    center = np.array([0.5, 0.5])  # Circle center.
    radius = 0.2  # Circle radius.
    num_classes = 2  # Number of classes.

    # Data allocation.
    samples_1 = np.zeros((2, num_samples[0]))
    samples_2 = np.zeros((2, num_samples[1]))

    # Samples: class 1.
    for k in range(num_samples[0]):

        # Inside circle.
        if np.random.uniform() > 0.1*noise:
            while True:
                point = np.random.uniform(size=(2))
                dist = point-center
                dist = np.sqrt(np.sum(dist*dist))
                if dist < radius:
                    samples_1[0, k] = point[0]
                    samples_1[1, k] = point[1]
                    break
        # Background.
        else:
            point = np.random.uniform(size=(2))
            samples_1[0, k] = point[0]
            samples_1[1, k] = point[1]

    # Samples: class 2.
    cont = 0
    for k in range(num_samples[1]):
        while True:
            point = np.random.uniform(size=(2))
            dist = point-center
            dist = np.sqrt(np.sum(dist*dist))
            # Inside circle.
            if dist < radius and cont < 0.02*num_samples[1]*noise:
                cont+=1
                samples_2[0, k] = point[0]
                samples_2[1, k] = point[1]
                break
            # Background.
            else:
                if dist>radius:
                    samples_2[0, k] = point[0]
                    samples_2[1, k] = point[1]
                    break

    # Stack.
    samples = np.hstack((samples_1, samples_2))

    # Labels.
    labels_1 = np.ones((1, num_samples[0]), dtype=int);
    labels_2 = 2*np.ones((1, num_samples[1]), dtype=int);
    labels = np.hstack((labels_1, labels_2))

    # Shuffle samples and labels.
    samples, labels = shuffle(samples, labels)

    return samples, labels, num_classes

# Samples: Scenario 4.
def scenario_4(num_samples, noise):
    """ This function computes samples for scenario 4. This scenario
    corresponds to multiple -4- Gaussian distributions.

    Parameters:
        + num_samples (int list): List with the number of samples per class.
        + noise (float): Noise factor.

    Returns:
        + samples (Numpy array): 2xM array with the samples (2D points),
        being M the number of samples.
        + labels (Numpy array): 1xM array with the class labels for all
        samples. Labels are numbers in the range [1, N], being N the number of
        classes.
        + num_classes (int): Number of classes for this scenario.
    """

    # Parameters.
    mean_1 = (0.25, 0.3)  # Class mean -cluster 1-.
    mean_2 = (0.75, 0.75)  # Class mean -cluster 2-.
    mean_3 = (0.4, 0.58)  # Class mean -cluster 3-.
    mean_4 = (0.7, 0.25)  # Class mean -cluster 4-.
    cov_1 = noise*np.array([[0.0045, -0.000], [-0.000, 0.0035]])  # Covariance -cluster 1-.
    cov_2 = noise*np.array([[0.0045, -0.000], [-0.000, 0.0045]])  # Covariance -cluster 2-.
    cov_3 = noise*np.array([[0.012, -0.005], [-0.005, 0.003]])  # Covariance -cluster 3-.
    cov_4 = noise*np.array([[0.004, -0.000], [-0.000, 0.004]])  # Covariance -cluster 4-.
    num_classes = 4  # Number of classes.

    # Samples.
    f0_1, f1_1 = rnd.multivariate_normal(mean_1, cov_1, num_samples[0]).T
    f0_2, f1_2 = rnd.multivariate_normal(mean_2, cov_2, num_samples[1]).T
    f0_3, f1_3 = rnd.multivariate_normal(mean_3, cov_3, num_samples[2]).T
    f0_4, f1_4 = rnd.multivariate_normal(mean_4, cov_4, num_samples[3]).T

    # Stack.
    samples_1 = np.vstack((f0_1, f1_1))
    samples_2 = np.vstack((f0_2, f1_2))
    samples_3 = np.vstack((f0_3, f1_3))
    samples_4 = np.vstack((f0_4, f1_4))
    samples = np.hstack((samples_1, samples_2, samples_3, samples_4))

    # Labels.
    labels_1 = np.ones((1, num_samples[0]), dtype=int);
    labels_2 = 2*np.ones((1, num_samples[1]), dtype=int);
    labels_3 = 3*np.ones((1, num_samples[2]), dtype=int);
    labels_4 = 4*np.ones((1, num_samples[3]), dtype=int);
    labels = np.hstack((labels_1, labels_2, labels_3, labels_4))

    # Shuffle samples and labels.
    samples, labels = shuffle(samples, labels)

    return samples, labels, num_classes

# Load dataset.
def load_dataset(scenario=1, num_samples=[100,100,100,100], noise=1.0, \
                 data='train'):
    """ This function loads a 2D synthetic dataset.

    Parameters:
    + scenario (int): Synthetic scenario index [1-4] 
        (default: scenario 1).
    + num_samples (int list): List with the number of 
        samples per class (default: 100 samples per class).
    + noise (float): Noise factor (default: 1.0 factor).
    + data (string): String flag for loading the training
        (train) or test (test) data (default: train data).

    Returns:
    + dataset (dict): Dictionary containing the dataset with the 
        following data:
        * samples (Numpy array): 2xM array with the samples 
            (2D points), being M the number of samples.
        * labels (Numpy array): 1xM array with the class labels
            for all samples. Labels are numbers in the range 
            [1, N], being N the number of classes.
        * num_classes (int): The number of classes for the
            selected scenario.
    """

    # Check input scenario: Only four possible scenarios.
    assert scenario in [1, 2, 3, 4]

    # Check data.
    assert data in ['train', 'test']

    # Fix random seed for training data.
    if data=='train':
        np.random.seed(1)

    # Fix random seed for test data.
    if data=='test':
        np.random.seed(3)

    # Load dataset scenario.
    if scenario == 1:
        samples, labels, num_classes = scenario_1(num_samples, noise)
    if scenario == 2:
        samples, labels, num_classes = scenario_2(num_samples, noise)
    if scenario == 3:
        samples, labels, num_classes = scenario_3(num_samples, noise)
    if scenario == 4:
        samples, labels, num_classes = scenario_4(num_samples, noise)

    # Dataset dictionary.
    dataset = {'samples':samples, 'labels':labels, 'num_classes':num_classes}

    return dataset

# Show dataset samples.
def show_samples(dataset):
    """ This function shows the dataset samples in the 2D feature space.

    Parameters:
    + dataset (dict): Dictionary containing the dataset with the 
        following data:
        * samples (Numpy array): 2xM array with the samples 
            (2D points), being M the number of samples.
        * labels (Numpy array): 1xM array with the class labels
            for all samples. Labels are numbers in the range 
            [1, N], being N the number of classes.
        * num_classes (int): The number of classes for the
            selected scenario.

    Returns:
        None
    """

    # Dataset.
    samples = dataset['samples']  # Samples.
    labels = dataset['labels']  # Class labels.
    num_classes = dataset['num_classes']  # Number of classes.

    # Check samples and labels size.
    assert samples.shape[1]==labels.shape[1]

    # Plot samples.
    plt.figure(figsize=(5, 5))
    for c in range(num_classes):
        indx = labels[0,:]==c+1  # Indexes for current class.
        plt.plot(samples[0, indx], samples[1, indx], marker='o', \
                 linestyle='None', color=colors(c))

    # Plot figure.
    plt.axis('equal')
    plt.xlabel('f0', fontsize=18)
    plt.ylabel('f1', fontsize=18)
    if num_classes==2:
        plt.title('2D Samples: Class 1 (Red), Class 2 (Blue)', fontsize=18)
    if num_classes==4:
        plt.title('2D Samples: Class 1 (Red), Class 2 (Blue), Class 3 (Green), Class 4 (Purple)', fontsize=18)
    plt.grid()
    plt.show()

# Show samples sets.
def show_samples_sets(train_dataset, test_dataset):

    """ This function shows the samples in the 2D feature space for the train
    and test data.

    Parameters:
    + train_dataset (dict): Dictionary containing the train dataset with the 
        following data:
        * samples (Numpy array): 2xM array with the samples
            (2D points), being M the number of training samples.
        * labels (Numpy array): 1xM array with the class labels
            for all samples. Labels are numbers in the range
            [1, N], being N the number of classes.
        * num_classes (int): The number of classes for the
            selected scenario.
    + test_dataset (dict): Dictionary containing the test dataset with the 
        following data:
        * samples (Numpy array): 2xM array with the samples
            (2D points), being M the number of test samples.
        * labels (Numpy array): 1xM array with the class labels
            for all samples. Labels are numbers in the range
            [1, N], being N the number of classes.
        * num_classes (int): The number of classes for the
            selected scenario.

    Returns:
        None
    """

    # Train and test datasets.
    train_samples = train_dataset['samples']  # Train samples.
    train_labels = train_dataset['labels']  # Train class labels.
    num_classes = train_dataset['num_classes']  # Number of classes.
    test_samples = test_dataset['samples']  # Test samples.
    test_labels = test_dataset['labels']  # Test class labels.
    #num_classes = test_dataset['num_classes']  # Number of classes.

    # Check samples and labels size.
    assert train_samples.shape[1]==train_labels.shape[1]
    assert test_samples.shape[1]==test_labels.shape[1]

    # Train samples.
    plt.figure(figsize=(10,5))
    plt.subplot(1,2,1)
    for c in range(num_classes):
        indx = train_labels[0,:]==c+1  # Indexes for current class.
        plt.plot(train_samples[0, indx], train_samples[1, indx], marker='o', \
                 linestyle='None', color=colors(c))
    plt.axis('equal')
    plt.xlabel('f0', fontsize=18)
    plt.ylabel('f1', fontsize=18)
    plt.title('Train Set', fontsize=18)
    plt.grid()

    # Test samples.
    plt.subplot(1,2,2)
    for c in range(num_classes):
        indx = test_labels[0,:]==c+1  # Indexes for current class.
        plt.plot(test_samples[0, indx], test_samples[1, indx], marker='o', \
                 linestyle='None', color=colors(c))
    plt.axis('equal')
    plt.xlabel('f0', fontsize=18)
    plt.ylabel('f1', fontsize=18)
    plt.title('Test Set', fontsize=18)
    plt.grid()
    plt.show()

# Colors.
def colors(index=0):
    """ This function returns a predefined color according to input index.

    Parameters:
        + index (int): Color index in the range [0-7] (default: 0 index).

    Returns:
        + color (tuple): RGB color values in the range [0, 1]
    """

    # Defined colors.
    colors = [(1, 0, 0), (0, 0, 1), (0.0, 0.5, 0), (0.5, 0, 0.5), \
              (0.7, 0.5, 0), (0, 0.5, 0.7), (0.7, 0.4, 0.3), (0.3, 0.2, 0.8)]
    assert index < len(colors)

    return colors[index]
