// Copyright (c) 2007 David Grangier
// Copyright (c) 2007 Samy Bengio
// 
// All rights reserved.
// 
// Redistribution and use in source and binary forms, with or without 
// modification, are permitted provided that the following conditions are 
// met: Redistributions of source code must retain the above copyright 
// notice, this list of conditions and the following disclaimer.
// Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the 
// documentation and/or other materials provided with the distribution.
// The name of the author may not be used to endorse or promote products
// derived from this software without specific prior written permission.
// 
// THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 
// IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 
// WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
// DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, 
// INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 
// (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 
// SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 
// HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
// STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF 
// THE POSSIBILITY OF SUCH DAMAGE.


#include "Allocator.h"
#include "Kernel.h"
#include "SequenceKernel.h"
#include "ExpectedLikelihoodGaussian.h"
#include "PASequenceDataset.h"
#include "PASequenceExample.h"
#include "DiskXFile.h"
#include "CmdLine.h"

using namespace Torch;

int main(int argc, char **argv)
{
	// ======= Variables =======
	real std;
	bool elk;
  bool norm;
	char *data1_f;
	char *data2_f;
	char *subset_f;
	char *out;	

	// allocator
  Allocator *allocator = new Allocator;

	// === Read command line ====
  CmdLine cmd;

	// arguments
	//cmd.addRCmdArg("std", &std, "gaussian kernel std");
	cmd.addSCmdArg("data1", &data1_f, "first dataset");
	cmd.addSCmdArg("output", &out, "output file");

 	cmd.addText("Options:");
	cmd.addSCmdOption("-data2", &data2_f, "","second dataset (if not given data1 is used)");
	cmd.addBCmdOption("-norm", &norm, false, "normalization in the feature space");
	cmd.addRCmdOption("-std", &std, -1, "standard deviation of gaussian kernel");
	cmd.addBCmdOption("-elk", &elk, false, "expected likelihood kernel over GMM");
  cmd.addSCmdOption("-subset", &subset_f, "", "subset of pairs for which the kernel should be evaluated");
 
 // read the command line
  cmd.read(argc, argv);

	// === The kernel ===
	Kernel *sequence_kernel = NULL;

	if (!elk)
	{ // max kernel
		Kernel *local_kernel = NULL;	
		if (std < 0)
		{
			message("using Linear Kernel");
			local_kernel = new(allocator) DotKernel();
		}
		else
		{
			message("using RBF Kernel");
    	local_kernel = new(allocator) GaussianKernel(1/(2*std*std));
		}

    Kernel *sequence_kernel_nonorm = new(allocator) MaxKernel(local_kernel);
		sequence_kernel = sequence_kernel_nonorm;
  	if (norm)
    	sequence_kernel = new(allocator) FeatureNormKernel(sequence_kernel_nonorm);
	}
	else // expected likelihood kernel
	{
		message("using Expected Likelihood Kernel");	
		sequence_kernel = new(allocator) ExpectedLikelihoodGaussian(std * std, norm);
	}

	// === Loading data ===
  PASequenceDataset *data1 = new(allocator) PASequenceDataset(data1_f);
	PASequenceDataset *data2 = NULL;
	if (strcmp(data2_f, ""))
   data2 = new(allocator) PASequenceDataset(data2_f);

	// === Loading subset ===
	DataSet *subset = NULL;
	if (strcmp(subset_f, ""))
		subset = new (allocator) MatDataSet(subset_f, 2, 0);

	// === Compute kernel values and write output ===
	XFile *output = new (allocator) DiskXFile(out, "w");
	// write first line
	int n_values = 0;	
	// 1. full matrix, no symetry 	
	if ((data2)&&(!subset))  n_values = data2->n_examples * data1->n_examples;	
	// 2. full matrix, symetry
	if ((!data2)&&(!subset)) n_values = (data1->n_examples - 1) * data1->n_examples / 2;	
	// 3. subset
	if (subset) n_values = subset->n_examples;
	output->printf("%d %d\n", n_values, 3);

	// write needed kernel values
	// 1. full matrix, no symetry
	if ((data2)&&(!subset))
		for (int i = 0; i < data1->n_examples; i++)
			for (int j = 0; j < data2->n_examples; j++)
			{
				Sequence *s1 = ((PASequenceExample*)data1->getExample(i))->sequence;
				Sequence *s2 = ((PASequenceExample*)data2->getExample(j))->sequence;
				real k = sequence_kernel->eval(s1, s2);
				output->printf("%d %d %f\n", i, j, k);
			}
  // 2. full matrix, symetry
  if ((!data2)&&(!subset))	
		for (int i = 0; i < data1->n_examples; i++)
			for (int j = 0; j <= i; j++)
			{
				Sequence *s1 = ((PASequenceExample*)data1->getExample(i))->sequence;
				data1->pushExample();
				Sequence *s2 = ((PASequenceExample*)data1->getExample(j))->sequence;
				real k = sequence_kernel->eval(s1, s2);
				data1->popExample();	
        output->printf("%d %d %f\n", i, j, k);
			}
  // 3. subset
	if (!data2) data2 = data1;
  if (subset)
		for (int index = 0; index < subset->n_examples; index++)
		{
			subset->setExample(index);
			int i = (int) subset->inputs->frames[0][0];
			int j = (int) subset->inputs->frames[0][1];

      Sequence *s1 = ((PASequenceExample*)data1->getExample(i))->sequence;
      data1->pushExample();
      Sequence *s2 = ((PASequenceExample*)data2->getExample(j))->sequence;
      real k = sequence_kernel->eval(s1, s2);
      data1->popExample();
      output->printf("%d %d %f\n", i, j, k);
		}
	delete allocator;
}


