1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
|
/**
* @file msvmmaj_train_dataset.h
* @author Gertjan van den Burg
* @date August, 2013
* @brief Structs and functions necessary for the grid search
*
* @details
* The grid search for the optimal parameters is done through a queue.
* This file contains struct definitions for this queue and a single
* task in a queue, as well as a structure for the complete training
* scheme. Function declarations are also included.
*
*/
#ifndef MSVMMAJ_TRAIN_DATASET_H
#define MSVMMAJ_TRAIN_DATASET_H
#include "globals.h"
#include "types.h"
/**
* @brief A structure for a single task in the queue.
*
* @param folds number of folds in cross validation
* @param ID numeric id of the task in the queue
* @param weight_idx parameter for the MajModel
* @param p parameter for the MajModel
* @param kappa parameter for the MajModel
* @param lambda parameter for the MajModel
* @param epsilon parameter for the MajModel
* @param kerneltype parameter for the MajModel
* @param *kernelparam parameters for the MajModel
* @param *train_data pointer to the training data
* @param *test_data pointer to the test data (if any)
* @param performance performance after cross validation
*/
struct Task {
KernelType kerneltype;
int weight_idx;
long folds;
long ID;
double p;
double kappa;
double lambda;
double epsilon;
double *kernelparam;
struct MajData *train_data;
struct MajData *test_data;
double performance;
};
/**
* @brief Simple task queue.
*
* This struct is basically just an array of pointers to Task instances,
* with a length and an index of the current task.
*
* @param **tasks array of pointers to Task structs
* @param N size of task array
* @param i index used for keeping track of the queue
*/
struct Queue {
struct Task **tasks;
long N;
long i;
};
/**
* @brief Structure for describing the entire grid search
*
* @param traintype type of training to use
* @param kerneltype type of kernel to use throughout training
* @param repeats number of repeats to be done after the grid
* search to find the parameter set with the
* most consistent high performance
* @param folds number of folds in cross validation
* @param Np size of the array of p values
* @param Nl size of the array of lambda values
* @param Nk size of the array of kappa values
* @param Ne size of the array of epsilon values
* @param Nw size of the array of weight_idx values
* @param Ng size of the array of gamma values
* @param Nc size of the array of coef values
* @param Nd size of the array of degree values
* @param *weight_idxs array of weight_idxs
* @param *ps array of p values
* @param *lambdas array of lambda values
* @param *kappas array of kappa values
* @param *epsilons array of epsilon values
* @param *gammas array of gamma values
* @param *coefs array of coef values
* @param *degrees array of degree values
* @param *train_data_file filename of train data file
* @param *test_data_file filename of test data file
*
*/
struct Training {
TrainType traintype;
KernelType kerneltype;
long repeats;
long folds;
long Np;
long Nl;
long Nk;
long Ne;
long Nw;
long Ng;
long Nc;
long Nd;
int *weight_idxs;
double *ps;
double *lambdas;
double *kappas;
double *epsilons;
double *gammas;
double *coefs;
double *degrees;
char *train_data_file;
char *test_data_file;
};
void make_queue(struct Training *training, struct Queue *queue,
struct MajData *train_data, struct MajData *test_data);
struct Task *get_next_task(struct Queue *q);
void start_training_tt(struct Queue *q);
void start_training_cv(struct Queue *q);
void free_queue(struct Queue *q);
void consistency_repeats(struct Queue *q, long repeats, TrainType traintype);
double cross_validation(struct MajModel *model, struct MajData *data,
long folds);
void make_model_from_task(struct Task *task, struct MajModel *model);
void copy_model(struct MajModel *from, struct MajModel *to);
void msvmmaj_reallocate_model(struct MajModel *model, long n, long m);
void print_progress_string(struct Task *task, long N);
#endif
|