/**
 * @file gensvm_train_dataset.h
 * @author Gertjan van den Burg
 * @date August, 2013
 * @brief Structs and functions necessary for the grid search
 *
 * @details
 * The grid search for the optimal parameters is done through a queue.
 * This file contains struct definitions for this queue and a single
 * task in a queue, as well as a structure for the complete training
 * scheme. Function declarations are also included.
 *
 */

#ifndef GENSVM_TRAIN_DATASET_H
#define GENSVM_TRAIN_DATASET_H

#include "globals.h"
#include "types.h"

/**
 * @brief A structure for a single task in the queue.
 *
 * @param folds 	number of folds in cross validation
 * @param ID 		numeric id of the task in the queue
 * @param weight_idx 	parameter for the GenModel
 * @param p 		parameter for the GenModel
 * @param kappa 	parameter for the GenModel
 * @param lambda 	parameter for the GenModel
 * @param epsilon 	parameter for the GenModel
 * @param kerneltype 	parameter for the GenModel
 * @param *kernelparam parameters for the GenModel
 * @param *train_data 	pointer to the training data
 * @param *test_data 	pointer to the test data (if any)
 * @param performance 	performance after cross validation
 */
struct Task {
	KernelType kerneltype;
	int weight_idx;
	long folds;
	long ID;
	double p;
	double kappa;
	double lambda;
	double epsilon;
	double *kernelparam;
	struct GenData *train_data;
	struct GenData *test_data;
	double performance;
};

/**
 * @brief Simple task queue.
 *
 * This struct is basically just an array of pointers to Task instances,
 * with a length and an index of the current task.
 *
 * @param **tasks 	array of pointers to Task structs
 * @param N 		size of task array
 * @param i 		index used for keeping track of the queue
 */
struct Queue {
	struct Task **tasks;
	long N;
	long i;
};

/**
 * @brief Structure for describing the entire grid search
 *
 * @param traintype 		type of training to use
 * @param kerneltype 		type of kernel to use throughout training
 * @param repeats 		number of repeats to be done after the grid 
 * 				search to find the parameter set with the 
 * 				most consistent high performance
 * @param folds 		number of folds in cross validation
 * @param Np 			size of the array of p values
 * @param Nl 			size of the array of lambda values
 * @param Nk 			size of the array of kappa values
 * @param Ne 			size of the array of epsilon values
 * @param Nw 			size of the array of weight_idx values
 * @param Ng 			size of the array of gamma values
 * @param Nc 			size of the array of coef values
 * @param Nd 			size of the array of degree values
 * @param *weight_idxs 		array of weight_idxs
 * @param *ps 			array of p values 
 * @param *lambdas 		array of lambda values
 * @param *kappas 		array of kappa values
 * @param *epsilons 		array of epsilon values
 * @param *gammas 		array of gamma values
 * @param *coefs 		array of coef values
 * @param *degrees 		array of degree values
 * @param *train_data_file 	filename of train data file
 * @param *test_data_file 	filename of test data file
 *
 */
struct Training {
	TrainType traintype;
	KernelType kerneltype;
	long repeats;
	long folds;
	long Np;
	long Nl;
	long Nk;
	long Ne;
	long Nw;
	long Ng;
	long Nc;
	long Nd;
	int *weight_idxs;
	double *ps;
	double *lambdas;
	double *kappas;
	double *epsilons;
	double *gammas;
	double *coefs;
	double *degrees;
	char *train_data_file;
	char *test_data_file;
};

void make_queue(struct Training *training, struct Queue *queue,
		struct GenData *train_data, struct GenData *test_data);

struct Task *get_next_task(struct Queue *q);
void start_training_tt(struct Queue *q);
void start_training_cv(struct Queue *q);
void free_queue(struct Queue *q);

void consistency_repeats(struct Queue *q, long repeats, TrainType traintype);

double cross_validation(struct GenModel *model, struct GenData *data,
	       	long folds);

void make_model_from_task(struct Task *task, struct GenModel *model);
void copy_model(struct GenModel *from, struct GenModel *to);

void print_progress_string(struct Task *task, long N);
#endif