aboutsummaryrefslogtreecommitdiff
path: root/src/msvmmaj_train_dataset.c
diff options
context:
space:
mode:
Diffstat (limited to 'src/msvmmaj_train_dataset.c')
-rw-r--r--src/msvmmaj_train_dataset.c88
1 files changed, 75 insertions, 13 deletions
diff --git a/src/msvmmaj_train_dataset.c b/src/msvmmaj_train_dataset.c
index 0221a89..f0c09e5 100644
--- a/src/msvmmaj_train_dataset.c
+++ b/src/msvmmaj_train_dataset.c
@@ -16,6 +16,7 @@
#include "libMSVMMaj.h"
#include "msvmmaj.h"
#include "msvmmaj_init.h"
+#include "msvmmaj_kernel.h"
#include "msvmmaj_matrix.h"
#include "msvmmaj_train.h"
#include "msvmmaj_train_dataset.h"
@@ -73,7 +74,7 @@ void make_queue(struct Training *training, struct Queue *queue,
task->test_data = test_data;
task->folds = training->folds;
task->kerneltype = training->kerneltype;
- task->kernel_param = Calloc(double, training->Ng +
+ task->kernelparam = Calloc(double, training->Ng +
training->Nc + training->Nd);
queue->tasks[i] = task;
}
@@ -135,7 +136,7 @@ void make_queue(struct Training *training, struct Queue *queue,
while (i < N && training->Ng > 0)
for (j=0; j<training->Ng; j++)
for (k=0; k<cnt; k++) {
- queue->tasks[i]->kernel_param[0] =
+ queue->tasks[i]->kernelparam[0] =
training->gammas[j];
i++;
}
@@ -145,7 +146,7 @@ void make_queue(struct Training *training, struct Queue *queue,
while (i < N && training->Nc > 0)
for (j=0; j<training->Nc; j++)
for (k=0; k<cnt; k++) {
- queue->tasks[i]->kernel_param[1] =
+ queue->tasks[i]->kernelparam[1] =
training->coefs[j];
i++;
}
@@ -155,7 +156,7 @@ void make_queue(struct Training *training, struct Queue *queue,
while (i < N && training->Nd > 0)
for (j=0; j<training->Nd; j++)
for (k=0; k<cnt; k++) {
- queue->tasks[i]->kernel_param[2] =
+ queue->tasks[i]->kernelparam[2] =
training->degrees[j];
i++;
}
@@ -448,6 +449,8 @@ double cross_validation(struct MajModel *model, struct MajData *data,
for (f=0; f<folds; f++) {
msvmmaj_get_tt_split(data, train_data, test_data, cv_idx, f);
+
+ msvmmaj_make_kernel(model, train_data);
// reallocate the model if necessary for the new train split
msvmmaj_reallocate_model(model, train_data->n);
@@ -512,11 +515,7 @@ void start_training_cv(struct Queue *q)
main_s = clock();
while (task) {
- note("(%03li/%03li)\tw = %li\te = %f\tp = %f\tk = %f\t "
- "l = %f\t",
- task->ID+1, q->N, task->weight_idx,
- task->epsilon,
- task->p, task->kappa, task->lambda);
+ print_progress_string(task, q->N);
make_model_from_task(task, model);
loop_s = clock();
@@ -590,7 +589,9 @@ void msvmmaj_reallocate_model(struct MajModel *model, long n)
* It would probably be better to train the model on the training set using
* cross validation and only use the test set when comparing with other
* methods. The way it is now, you're finding out which parameters predict
- * _this_ test set best, which is not what you want.
+ * _this_ test set best, which is not what you want. This function should
+ * therefore not be used and is considered deprecated, to be removed in the
+ * future .
*
* @param[in] q Queue with Task structs to run
*
@@ -638,9 +639,11 @@ void start_training_tt(struct Queue *q)
MSVMMAJ_OUTPUT_FILE = fid;
predy = Calloc(long, task->test_data->n);
- msvmmaj_predict_labels(task->test_data, model, predy);
+ msvmmaj_predict_labels(task->test_data, task->train_data,
+ model, predy);
if (task->test_data->y != NULL)
- total_perf = msvmmaj_prediction_perf(task->test_data, predy);
+ total_perf = msvmmaj_prediction_perf(task->test_data,
+ predy);
msvmmaj_seed_model_V(model, seed_model);
msvmmaj_free_model(model);
@@ -675,7 +678,7 @@ void free_queue(struct Queue *q)
{
long i;
for (i=0; i<q->N; i++) {
- free(q->tasks[i]->kernel_param);
+ free(q->tasks[i]->kernelparam);
free(q->tasks[i]);
}
free(q->tasks);
@@ -694,11 +697,16 @@ void free_queue(struct Queue *q)
*/
void make_model_from_task(struct Task *task, struct MajModel *model)
{
+ // copy basic model parameters
model->weight_idx = task->weight_idx;
model->epsilon = task->epsilon;
model->p = task->p;
model->kappa = task->kappa;
model->lambda = task->lambda;
+
+ // copy kernel parameters
+ model->kerneltype = task->kerneltype;
+ model->kernelparam = task->kernelparam;
}
/**
@@ -718,4 +726,58 @@ void copy_model(struct MajModel *from, struct MajModel *to)
to->p = from->p;
to->kappa = from->kappa;
to->lambda = from->lambda;
+
+ to->kerneltype = from->kerneltype;
+ switch (to->kerneltype) {
+ case K_LINEAR:
+ break;
+ case K_POLY:
+ to->kernelparam = Malloc(double, 3);
+ to->kernelparam[0] = from->kernelparam[0];
+ to->kernelparam[1] = from->kernelparam[1];
+ to->kernelparam[2] = from->kernelparam[2];
+ break;
+ case K_RBF:
+ to->kernelparam = Malloc(double, 1);
+ to->kernelparam[0] = from->kernelparam[0];
+ break;
+ case K_SIGMOID:
+ to->kernelparam = Malloc(double, 2);
+ to->kernelparam[0] = from->kernelparam[0];
+ to->kernelparam[1] = from->kernelparam[1];
+ break;
+ }
+}
+
+/**
+ * @brief Print the description of the current task on screen
+ *
+ * @details
+ * To track the progress of the grid search the parameters of the current task
+ * are written to the output specified in MSVMMAJ_OUTPUT_FILE. Since the
+ * parameters differ with the specified kernel, this function writes a
+ * parameter string depending on which kernel is used.
+ *
+ * @param[in] task the Task specified
+ * @param[in] N total number of tasks
+ *
+ */
+void print_progress_string(struct Task *task, long N)
+{
+ char buffer[MAX_LINE_LENGTH];
+ sprintf(buffer, "(%03li/%03li)\t", task->ID+1, N);
+ if (task->kerneltype == K_POLY)
+ sprintf(buffer + strlen(buffer), "d = %2.2f\t",
+ task->kernelparam[2]);
+ if (task->kerneltype == K_POLY || task->kerneltype == K_SIGMOID)
+ sprintf(buffer + strlen(buffer), "c = %2.2f\t",
+ task->kernelparam[1]);
+ if (task->kerneltype == K_POLY || task->kerneltype == K_SIGMOID ||
+ task->kerneltype == K_RBF)
+ sprintf(buffer + strlen(buffer), "g = %2.2f\t",
+ task->kernelparam[0]);
+ sprintf(buffer + strlen(buffer), "eps = %g\tw = %i\tk = %2.2f\t"
+ "l = %f\tp = %2.2f\t", task->epsilon,
+ task->weight_idx, task->kappa, task->lambda, task->p);
+ note(buffer);
}