// Copyright (c) 2007 David Grangier
// Copyright (c) 2007 Samy Bengio
// 
// All rights reserved.
// 
// Redistribution and use in source and binary forms, with or without 
// modification, are permitted provided that the following conditions are 
// met: Redistributions of source code must retain the above copyright 
// notice, this list of conditions and the following disclaimer.
// Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the 
// documentation and/or other materials provided with the distribution.
// The name of the author may not be used to endorse or promote products
// derived from this software without specific prior written permission.
// 
// THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 
// IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 
// WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
// DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, 
// INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 
// (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 
// SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 
// HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
// STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF 
// THE POSSIBILITY OF SUCH DAMAGE.


#include "DocumentFinder.h"
#include "Random.h"

namespace Torch
{

DocumentFinder::DocumentFinder(SparseMatrix *doc_, SparseMatrix *tdoc_)
{
  n_doc = tdoc_->nc;
  doc = doc_;
  tdoc = tdoc_;
  bin_scores = (int*) allocator->alloc(n_doc * sizeof(int));
	calc = new(allocator) SVectorCalculator(tdoc_->nl);
	memset(&cur_list, 0, sizeof(svector));
}

void DocumentFinder::binScoring(svector *query)
{
  memset(bin_scores, 0, n_doc * sizeof(int));
  for (int t = 0; t < query->size; t++)
  {
    int tid = query->frame[t].index;
    for (int d = 0; d < tdoc->lines[tid].size; d++)
        bin_scores[tdoc->lines[tid].frame[d].index]++;
  }
}

int DocumentFinder::countDoc(int bin_score)
{
  int n_satisfied = 0;
  for (int d = 0; d < n_doc; d++)
    if (bin_scores[d] == bin_score)
      n_satisfied ++;
	return n_satisfied;
}

svector *DocumentFinder::getList(int bin_score)
{
  // first count number of doc such that bin_scores[d] = bin_score
  int n_satisfied = countDoc(bin_score);
	// allocate memory for list
	cur_list.size = n_satisfied; 
	cur_list.frame = (sreal*) allocator->realloc(cur_list.frame, sizeof(sreal) * n_satisfied);
	// fill the list 
	int d = 0;
	while (n_satisfied > 0)
	{
		if (bin_scores[d] == bin_score)
		{
			n_satisfied--;
			cur_list.frame[n_satisfied].index = d;
			cur_list.frame[n_satisfied].value = 1.0;
		}
		d++;
	}
	return &cur_list;
}

int DocumentFinder::uniformSampling(int bin_score)
{
  // first count number of doc such that bin_scores[d] = bin_score
  int n_satisfied = countDoc(bin_score);
  // pick n uniformly among 0...(n_satisfied - 1)
  int n = Random::random() % n_satisfied;
  // pick the n-th doc verifying bin_scores[d] = bin_score
  int res = -1;
  int d = 0;
  while (res == -1)
  {
    if (bin_scores[d] == bin_score)
    {
      if (n == 0) res = d;
      n--;
    }
    d++;
  }
  return res;
}

void DocumentFinder::samplePair(svector *query, int *pos, int *neg)
{
	binScoring(query);
	// positive is sample among doc containing ALL query terms
	*pos = uniformSampling(query->size);
	// negative is sample among doc containing NO query terms
	*neg = uniformSampling(0);
}

real DocumentFinder::dotprod(svector *query, int d)
{
	return calc->inner(query, doc->lines + d);	
}

DocumentFinder::~DocumentFinder()
{}

}
