//------------------------------------------------------------------------------
// Tasting families of features for image classification.
// 
// Copyright (c) 2011 Idiap Research Institute, http://www.idiap.ch/
// Written by Charles Dubout <charles.dubout@idiap.ch>
// 
// This file is part of Tasting.
// 
// Tasting is free software: you can redistribute it and/or modify
// it under the terms of the GNU General Public License version 2 as
// published by the Free Software Foundation.
// 
// Tasting is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
// 
// You should have received a copy of the GNU General Public License
// along with Tasting. If not, see <http://www.gnu.org/licenses/>.
//------------------------------------------------------------------------------

#include "UCBBoostMH.h"
#include "Utils.h"

#include <algorithm>
#include <cassert>
#include <cmath>
#include <cstdlib>
#include <iomanip>
#include <iostream>
#include <numeric>
#include <limits>

using namespace ML;
using namespace std;

UCBBoostMH::UCBBoostMH(unsigned int nbRounds,
					   unsigned int nbFeatures,
					   double scale,
					   InputSet* testSet)
: nbRounds_(nbRounds), nbFeatures_(nbFeatures), scale_(scale), testSet_(testSet) {
	// The number of rounds/features must be strictly positive
	assert(nbRounds);
	assert(nbFeatures);
}

Classifier* UCBBoostMH::clone() const {
	return new UCBBoostMH(*this);
}

void UCBBoostMH::train(InputSet& inputSet) {
	// Clear the previous classifier
	stumps_.clear();
	alphas_.clear();

	// Get the number of samples, features, labels, and heuristics
	const unsigned int nbSamples = inputSet.nbSamples();
	const unsigned int nbFeatures = inputSet.nbFeatures();
	const unsigned int nbLabels = inputSet.nbLabels();
	const unsigned int nbHeuristics = inputSet.nbHeuristics();

	// Get the labels associated to every sample
	const unsigned int* labels = inputSet.labels();

	// Ordering of the samples on every sampled feature
	vector<vector<unsigned int> > indices(nbFeatures_);

	for(unsigned int f = 0; f < nbFeatures_; ++f)
		indices[f].resize(nbSamples);

	// Set the distribution of weights uniformly
	vector<vector<double> > weights(nbLabels);
	vector<vector<double> > hypotheses(nbLabels);
	vector<vector<double> > testHypotheses(nbLabels);

	for(unsigned int l = 0; l < nbLabels; ++l) {
		weights[l].resize(nbSamples, 1.0 / (nbSamples * nbLabels));
		hypotheses[l].resize(nbSamples, 0.0);

		if(testSet_)
			testHypotheses[l].resize(testSet_->nbSamples(), 0.0);
	}

	// Separate the features by heuristic
	vector<vector<unsigned int> > heuristics(nbHeuristics);

	for(unsigned int f = 0; f < nbFeatures; ++f)
		heuristics[inputSet.heuristic(f)].push_back(f);

	// AdaBoost.MH (average) loss
	double logLoss = 0.0;

	// The weights of the heuristics
	vector<double> omegas(nbHeuristics);

	// The number of times every heuristic was played
	vector<unsigned int> nbPlayed(nbHeuristics, 0);

	// Do nbRounds rounds of boosting
	for(unsigned int r = 0; r < nbRounds_; ++r) {
		cout.precision(4);
		cout.setf(ios::fixed, ios::floatfield);
		cout << "Omegas:";

		for(unsigned int h = 0; h < nbHeuristics; ++h)
			cout << setw(7) << omegas[h];

		cout << '.' << endl;

		// Compute the edges for every label
		vector<double> edges(nbLabels, 0.0);

		for(unsigned int l = 0; l < nbLabels; ++l)
			for(unsigned int s = 0; s < nbSamples; ++s)
				edges[l] += (labels[s] == l) ? weights[l][s] : -weights[l][s];

		// Find depending on the strategy from which heuristic to sample

		// Do the initialization if the strategy requires one
		if(!r) {
			// Sample nbFeatures_ from every heuristic to compute the average
			vector<unsigned int> features(nbFeatures_);

			for(unsigned int h = 0; h < nbHeuristics; ++h) {
				for(unsigned int f = 0; f < nbFeatures_; ++f)
					features[f] = heuristics[h][rand() % heuristics[h].size()];

				// Push those features on the input set
				inputSet.pushFeatures(features);

				// Sort the features so as to train stumps on them
				for(unsigned int f = 0; f < nbFeatures_; ++f) {
					for(unsigned int s = 0; s < nbSamples; ++s)
						indices[f][s] = s;

					Utils::sort(&indices[f][0], inputSet.samples(f), nbSamples);
				}

				// Find the stump with the maximum edge
				Stump stump;
				double edge = train(inputSet, indices, weights, edges, stump);
				double drop = sqrt(1.0 - edge * edge);
				double reward = -log(drop);
				omegas[h] = reward;

				// Pop those features from the input set
				inputSet.popFeatures();
			}
		}

		unsigned int heuristic = 0;

		double max = 0.0;

		for(unsigned int h = 0; h < nbHeuristics; ++h) {
			double m = omegas[h] +
					   sqrt(2.0 * log(double(r + nbHeuristics)) /
												nbPlayed[h]);

			if(m > max) {
				heuristic = h;
				max = m;
			}
		}

		// Pick nbFeatures_ from the selected heuristic
		vector<unsigned int> features(nbFeatures_);

		for(unsigned int f = 0; f < nbFeatures_; ++f)
			features[f] = heuristics[heuristic][rand() % heuristics[heuristic].size()];

		// Push those features on the input set
		inputSet.pushFeatures(features);

		// Sort the features so as to train stumps on them
		for(unsigned int f = 0; f < nbFeatures_; ++f) {
			for(unsigned int s = 0; s < nbSamples; ++s)
				indices[f][s] = s;

			Utils::sort(&indices[f][0], inputSet.samples(f), nbSamples);
		}

		// Find the stump with the maximum edge
		Stump stump;
		double edge = train(inputSet, indices, weights, edges, stump);

		// Select the samples of the selected feature
		const scalar_t* samples = inputSet.samples(stump.feature_);

		// Index the feature in the pushed indices
		stump.feature_ = features[stump.feature_];

		// Update the loss and compute the reward
		double drop = sqrt(1.0 - edge * edge);
		logLoss += log10(drop);

		double reward = min(-log(drop) * (r ? scale_ : 1.0), 1.0);

		// Compute the weight to give to the weak learner and update the weights
		// of the samples
		double expAlpha = sqrt((1.0 + edge) / (1.0 - edge));
		double invExpAlpha = 1.0 / expAlpha;
		double alpha = log(expAlpha);
		double norm = 0.0;
		unsigned nbErrors = 0;

		for(unsigned int s = 0; s < nbSamples; ++s) {
			// The label with the maximum hypothesis
			double max = -numeric_limits<double>::infinity();
			unsigned int label;

			bool phi = samples[s] >= stump.split_;

			for(unsigned int l = 0; l < nbLabels; ++l) {
				bool sign = phi ^ stump.signs_[l];

				weights[l][s] *= (sign ^ (l == labels[s])) ? invExpAlpha :
															 expAlpha;

				norm += weights[l][s];

				hypotheses[l][s] += sign ? -alpha : alpha;

				if(hypotheses[l][s] > max) {
					max = hypotheses[l][s];
					label = l;
				}
			}

			if(label != labels[s])
				++nbErrors;
		}

		cout << "[UCBBoostMH::train] round: " << setw(4) << r
			 << ", log10(loss): " << setw(7) << logLoss
			 << ", edge: " << setw(6) << edge
			 << ", heuristic: " << setw(2) << heuristic
			 << ", feature: " << setw(6) << stump.feature_
			 << ", reward: " << setw(6) << reward
			 << ", training error: " << setw(6) << (float(nbErrors) / nbSamples);

		if(testSet_) {
			unsigned int nbTestSamples = testSet_->nbSamples();
			vector<unsigned int> index(1, stump.feature_);
			testSet_->pushFeatures(index);
			const scalar_t* testSamples = testSet_->samples(0);
			unsigned int nbTestErrors = 0;

			for(unsigned int s = 0; s < nbTestSamples; ++s) {
				// The label with the maximum hypothesis
				double max = -numeric_limits<double>::infinity();
				unsigned int label;

				bool phi = testSamples[s] >= stump.split_;

				for(unsigned int l = 0; l < nbLabels; ++l) {
					bool sign = phi ^ stump.signs_[l];

					testHypotheses[l][s] += sign ? -alpha : alpha;

					if(testHypotheses[l][s] > max) {
						max = testHypotheses[l][s];
						label = l;
					}
				}

				if(label != testSet_->label(s))
					++nbTestErrors;
			}

			testSet_->popFeatures();

			cout << ", test error: " << setw(6) << (float(nbTestErrors) / nbTestSamples);
		}

		cout << '.' << endl;

		// Normalize the weights of the samples
		for(unsigned int l = 0; l < nbLabels; ++l)
			transform(weights[l].begin(), weights[l].end(),
						   weights[l].begin(),
						   bind2nd(divides<double>(), norm));

		// Update the average reward or the weights of the heuristics
		omegas[heuristic] = (omegas[heuristic] * nbPlayed[heuristic] + reward) /
							(nbPlayed[heuristic] + 1);

		++nbPlayed[heuristic];

		// Add the weak learner and its weight
		stumps_.push_back(stump);
		alphas_.push_back(alpha);

		// Pop the pushed feature
		inputSet.popFeatures();
	}
}

void UCBBoostMH::distribution(InputSet& inputSet,
							   unsigned int sample,
							   scalar_t* distr) const {
	// Get the number of labels
	const unsigned int nbLabels = inputSet.nbLabels();

	// Fill the distribution with zeros
	fill_n(distr, nbLabels, 0);

	// Add to the distribution the result of each weak learner
	for(unsigned int w = 0; w < stumps_.size(); ++w) {
		assert(stumps_[w].signs_.size() <= nbLabels);

		// Push the feature on which the weak learner was trained
		vector<unsigned int> index(1, stumps_[w].feature_);
		inputSet.pushFeatures(index);

		// The unique feature of the sample
		scalar_t f = *inputSet.features(sample);
		bool phi = f >= stumps_[w].split_;

		for(unsigned int l = 0; l < stumps_[w].signs_.size(); ++l)
			distr[l] += (phi ^ stumps_[w].signs_[l]) ? -alphas_[w] : alphas_[w];

		// Pop the pushed feature
		inputSet.popFeatures();
	}
}

void UCBBoostMH::report(vector<unsigned int>& features) const {
	for(unsigned int w = 0; w < stumps_.size(); ++w)
		features.push_back(stumps_[w].feature_);
}

double UCBBoostMH::train(InputSet& inputSet,
						  const vector<vector<unsigned int> >& indices,
						  const vector<vector<double> >& weights,
						  const vector<double> edges,
						  Stump& stump) {
	// Get the number of samples and labels
	unsigned int nbSamples = inputSet.nbSamples();
	unsigned int nbFeatures = inputSet.nbFeatures();
	unsigned int nbLabels = inputSet.nbLabels();

	// Make sure that the stump has the correct number of signs
	stump.signs_.resize(nbLabels);

	// Make sure the number of indices, weights, and edges are correct
	assert(indices.size() == nbFeatures);
	assert(weights.size() == nbLabels);
	assert(edges.size() == nbLabels);

	// Get the samples' features and labels
	const scalar_t* samples = inputSet.samples();
	const unsigned int* labels = inputSet.labels();

	// The best sum of absolute values of edges so far
	double sumEdges = 0.0;
	stump.feature_ = 0;

	for(unsigned int l = 0; l < nbLabels; ++l) {
		stump.signs_[l] = edges[l] >= 0.0;
		sumEdges += abs(edges[l]);
	}

	stump.split_ = -numeric_limits<scalar_t>::max();

	for(unsigned int f = 0; f < nbFeatures; ++f) {
		// Make sure the number of indices is correct
		assert(indices[f].size() == nbSamples);

		// The right edges are simply edges - leftEdges
		vector<double> leftEdges(nbLabels, 0.0);

		// Try to split in between every sample
		for(unsigned int s = 0; s < nbSamples - 1; ++s) {
			unsigned int index = indices[f][s];
			unsigned int nextIndex = indices[f][s + 1];
			scalar_t feature = samples[index];
			scalar_t nextFeature = samples[nextIndex];
			unsigned int label = labels[index];

			// Include the current sample in the left edge
			double sum = 0.0;

			for(unsigned int l = 0; l < nbLabels; ++l) {
				assert(weights[l].size() == nbSamples);
				leftEdges[l] += (label == l) ? weights[l][index] :
											  -weights[l][index];

				sum += abs(edges[l] - 2.0 * leftEdges[l]);
			}

			// If a stump can be put in between and with a better sum of edges
			if((feature < nextFeature) && (sum > sumEdges)) {
				stump.feature_ = f;

				for(unsigned int l = 0; l < nbLabels; ++l) {
					stump.signs_[l] = edges[l] >= 2.0 * leftEdges[l];
				}

				stump.split_ = (feature + nextFeature) / 2;
				sumEdges = sum;
			}
		}

		samples += nbSamples;
	}

	return sumEdges;
}
