# Libraries.
import numpy as np
from numpy import random as rnd
import matplotlib.pyplot as plt

# Shuffle samples and labels.
def shuffle(samples, labels):
    """ This function shuffles -randomly- the samples and labels.

    Parameters:
        + samples (Numpy array): 2xM array with the samples (2D points),
        being M the number of samples.
        + labels (Numpy array): 1xM array with the class labels for all
        samples. Labels are numbers in the range [1, N], being N the number of
        classes.
    Returns:
        + samples (Numpy array): Shuffled samples with the same size as input
        samples.
        + labels (Numpy array): Shuffled labels with the same size as input
        labels.
    """

    # Check samples and labels size.
    assert samples.shape[1]==len(labels)

    # Number of samples.
    num_samples = samples.shape[1]

    # Shuffle.
    indx = np.arange(num_samples)
    np.random.shuffle(indx)
    labels = labels[indx]
    samples = samples[:, indx]

    return samples, labels

# Samples: Scenario 1.
def scenario_1(num_samples, noise):
    """ This function computes samples for scenario 1. This scenario
    corresponds to two -unimodal- Gaussian distributions -two classes-.

    Parameters:
        + num_samples (int list): List with the number of samples per class.
        + noise (float): Noise factor.

    Returns:
        + samples (Numpy array): 2xM array with the samples (2D points),
        being M the number of samples.
        + labels (Numpy array): 1xM array with the class labels for all
        samples. Labels are numbers in the range [1, N], being N the number of
        classes.
        + num_classes (int): Number of classes for this scenario.
    """

    # Parameters.
    mean_1 = (0.6, 0.6)  # Mean -class 1-.
    mean_2 = (0.3, 0.3)  # Mean -class 2-.
    cov_1 = 3*noise*np.array([[0.015, -0.005], [-0.005, 0.009]])  # Covariance -class 1-.
    cov_2 = 3*noise*np.array([[0.007, 0], [0, 0.007]])  # Covariance -class 2-.
    num_classes = 2  # Number of classes.

    # Samples.
    f0_1, f1_1 = rnd.multivariate_normal(mean_1, cov_1, num_samples[0]).T
    f0_2, f1_2 = rnd.multivariate_normal(mean_2, cov_2, num_samples[1]).T

    # Stack.
    samples_1 = np.vstack((f0_1-0.5, f1_1-0.5))
    samples_2 = np.vstack((f0_2-0.5, f1_2-0.5))
    samples = np.hstack((samples_1, samples_2))

    # Labels.
    labels_1 = np.ones((num_samples[0]), dtype=int);
    labels_2 = 2*np.ones((num_samples[1]), dtype=int);
    labels = np.hstack((labels_1, labels_2))

    # Shuffle samples and labels.
    samples, labels = shuffle(samples, labels)

    return samples, labels, num_classes

# Samples: Scenario 2.
def scenario_2(num_samples, noise):

    """ This function computes samples for scenario 2. This scenario
    corresponds two classes with five clusters. The first class has a cluster
    in the center of the 2D feature space while the second class has four
    clusters around. This is a non-linear classification problem.

    Parameters:
        + num_samples (int list): List with the number of samples per class.
        + noise (float): Noise factor.

    Returns:
        + samples (Numpy array): 2xM array with the samples (2D points),
        being M the number of samples.
        + labels (Numpy array): 1xM array with the class labels for all
        samples. Labels are numbers in the range [1, N], being N the number of
        classes.
        + num_classes (int): Number of classes for this scenario.
    """

    # Parameters.
    num_class_1 = num_samples[0]
    num_class_2 = int(0.25*num_samples[1])
    mean_1 = (0.5, 0.5)  # Mean -class 1-.
    mean_2 = (0.85, 0.85)  # Mean -class 2-.
    mean_3 = (0.1, 0.1)  # Mean -class 3-.
    mean_4 = (0.8, 0.15)  # Mean -class 4-.
    mean_5 = (0.1, 0.90)  # Mean -class 5-.
    cov_1 = 3*noise*np.array([[0.011, -0.000], [-0.000, 0.009]])  # Covariance -class 1-.
    cov_2 = 3*noise*np.array([[0.007, 0.001], [0.001, 0.007]])  # Covariance -class 2-.
    cov_3 = 3*noise*np.array([[0.008, -0.002], [-0.002, 0.01]])  # Covariance -class 3-.
    cov_4 = 3*noise*np.array([[0.008, 0.003], [0.003, 0.007]])  # Covariance -class 4-.
    cov_5 = 3*noise*np.array([[0.006, -0.003], [-0.003, 0.009]])  # Covariance -class 5-.
    num_classes = 2  # Number of classes.

    # Samples.
    f0_1, f1_1 = rnd.multivariate_normal(mean_1, cov_1, num_class_1).T
    f0_2, f1_2 = rnd.multivariate_normal(mean_2, cov_2, num_class_2).T
    f0_3, f1_3 = rnd.multivariate_normal(mean_3, cov_3, num_class_2).T
    f0_4, f1_4 = rnd.multivariate_normal(mean_4, cov_4, num_class_2).T
    f0_5, f1_5 = rnd.multivariate_normal(mean_5, cov_5, num_class_2).T

    # Stack.
    samples_1 = np.vstack((f0_1-0.5, f1_1-0.5))
    samples_2 = np.vstack((f0_2-0.5, f1_2-0.5))
    samples_3 = np.vstack((f0_3-0.5, f1_3-0.5))
    samples_4 = np.vstack((f0_4-0.5, f1_4-0.5))
    samples_5 = np.vstack((f0_5-0.5, f1_5-0.5))
    samples = np.hstack((samples_1, samples_2, samples_3, samples_4, samples_5))

    # Labels.
    labels_1 = np.ones((num_class_1), dtype=int);
    labels_2 = 2*np.ones((num_class_2), dtype=int);
    labels_3 = 2*np.ones((num_class_2), dtype=int);
    labels_4 = 2*np.ones((num_class_2), dtype=int);
    labels_5 = 2*np.ones((num_class_2), dtype=int);
    labels = np.hstack((labels_1, labels_2, labels_3, labels_4, labels_5))

    # Shuffle samples and labels.
    samples, labels = shuffle(samples, labels)

    return samples, labels, num_classes

# Load dataset.
def load_dataset(scenario=1, num_samples=[100,100], noise=0.2, \
                 data='train'):
    """ This function loads a 2D synthetic dataset.

    Parameters:
    + scenario (int): Synthetic scenario index [1-3] 
        (default: scenario 1).
    + num_samples (int list): List with the number of 
        samples per class (default: 200 samples per class).
    + noise (float): Noise factor (default: 1.0 factor).
    + data (string): String flag for loading the training
        (train) or test (test) data (default: train data).

    Returns:
    + dataset (dict): Dictionary containing the dataset with the 
        following data:
        * samples (Numpy array): 2xM array with the samples 
            (2D points), being M the number of samples.
        * labels (Numpy array): 1xM array with the class labels
            for all samples. Labels are numbers in the range 
            [1, N], being N the number of classes.
        * num_classes (int): The number of classes for the
            selected scenario.
    """

    # Check input scenario: Only two possible scenarios.
    assert scenario in [1, 2]

    # Check data.
    assert data in ['train', 'test']

    # Fix random seed for training data.
    if data=='train':
        np.random.seed(1)

    # Fix random seed for test data.
    if data=='test':
        np.random.seed(3)

    # Load dataset scenario.
    if scenario == 1:
        samples, labels, num_classes = scenario_1(num_samples, noise)
    if scenario == 2:
        samples, labels, num_classes = scenario_2(num_samples, noise)

    # Dataset dictionary.
    dataset = {'samples':samples.T, 'labels':labels.T, \
               'num_classes':num_classes}

    return dataset
