diff options
Diffstat (limited to 'src/msvmmaj_train_dataset.c')
| -rw-r--r-- | src/msvmmaj_train_dataset.c | 88 |
1 files changed, 75 insertions, 13 deletions
diff --git a/src/msvmmaj_train_dataset.c b/src/msvmmaj_train_dataset.c index 0221a89..f0c09e5 100644 --- a/src/msvmmaj_train_dataset.c +++ b/src/msvmmaj_train_dataset.c @@ -16,6 +16,7 @@ #include "libMSVMMaj.h" #include "msvmmaj.h" #include "msvmmaj_init.h" +#include "msvmmaj_kernel.h" #include "msvmmaj_matrix.h" #include "msvmmaj_train.h" #include "msvmmaj_train_dataset.h" @@ -73,7 +74,7 @@ void make_queue(struct Training *training, struct Queue *queue, task->test_data = test_data; task->folds = training->folds; task->kerneltype = training->kerneltype; - task->kernel_param = Calloc(double, training->Ng + + task->kernelparam = Calloc(double, training->Ng + training->Nc + training->Nd); queue->tasks[i] = task; } @@ -135,7 +136,7 @@ void make_queue(struct Training *training, struct Queue *queue, while (i < N && training->Ng > 0) for (j=0; j<training->Ng; j++) for (k=0; k<cnt; k++) { - queue->tasks[i]->kernel_param[0] = + queue->tasks[i]->kernelparam[0] = training->gammas[j]; i++; } @@ -145,7 +146,7 @@ void make_queue(struct Training *training, struct Queue *queue, while (i < N && training->Nc > 0) for (j=0; j<training->Nc; j++) for (k=0; k<cnt; k++) { - queue->tasks[i]->kernel_param[1] = + queue->tasks[i]->kernelparam[1] = training->coefs[j]; i++; } @@ -155,7 +156,7 @@ void make_queue(struct Training *training, struct Queue *queue, while (i < N && training->Nd > 0) for (j=0; j<training->Nd; j++) for (k=0; k<cnt; k++) { - queue->tasks[i]->kernel_param[2] = + queue->tasks[i]->kernelparam[2] = training->degrees[j]; i++; } @@ -448,6 +449,8 @@ double cross_validation(struct MajModel *model, struct MajData *data, for (f=0; f<folds; f++) { msvmmaj_get_tt_split(data, train_data, test_data, cv_idx, f); + + msvmmaj_make_kernel(model, train_data); // reallocate the model if necessary for the new train split msvmmaj_reallocate_model(model, train_data->n); @@ -512,11 +515,7 @@ void start_training_cv(struct Queue *q) main_s = clock(); while (task) { - note("(%03li/%03li)\tw = %li\te = %f\tp = %f\tk = %f\t " - "l = %f\t", - task->ID+1, q->N, task->weight_idx, - task->epsilon, - task->p, task->kappa, task->lambda); + print_progress_string(task, q->N); make_model_from_task(task, model); loop_s = clock(); @@ -590,7 +589,9 @@ void msvmmaj_reallocate_model(struct MajModel *model, long n) * It would probably be better to train the model on the training set using * cross validation and only use the test set when comparing with other * methods. The way it is now, you're finding out which parameters predict - * _this_ test set best, which is not what you want. + * _this_ test set best, which is not what you want. This function should + * therefore not be used and is considered deprecated, to be removed in the + * future . * * @param[in] q Queue with Task structs to run * @@ -638,9 +639,11 @@ void start_training_tt(struct Queue *q) MSVMMAJ_OUTPUT_FILE = fid; predy = Calloc(long, task->test_data->n); - msvmmaj_predict_labels(task->test_data, model, predy); + msvmmaj_predict_labels(task->test_data, task->train_data, + model, predy); if (task->test_data->y != NULL) - total_perf = msvmmaj_prediction_perf(task->test_data, predy); + total_perf = msvmmaj_prediction_perf(task->test_data, + predy); msvmmaj_seed_model_V(model, seed_model); msvmmaj_free_model(model); @@ -675,7 +678,7 @@ void free_queue(struct Queue *q) { long i; for (i=0; i<q->N; i++) { - free(q->tasks[i]->kernel_param); + free(q->tasks[i]->kernelparam); free(q->tasks[i]); } free(q->tasks); @@ -694,11 +697,16 @@ void free_queue(struct Queue *q) */ void make_model_from_task(struct Task *task, struct MajModel *model) { + // copy basic model parameters model->weight_idx = task->weight_idx; model->epsilon = task->epsilon; model->p = task->p; model->kappa = task->kappa; model->lambda = task->lambda; + + // copy kernel parameters + model->kerneltype = task->kerneltype; + model->kernelparam = task->kernelparam; } /** @@ -718,4 +726,58 @@ void copy_model(struct MajModel *from, struct MajModel *to) to->p = from->p; to->kappa = from->kappa; to->lambda = from->lambda; + + to->kerneltype = from->kerneltype; + switch (to->kerneltype) { + case K_LINEAR: + break; + case K_POLY: + to->kernelparam = Malloc(double, 3); + to->kernelparam[0] = from->kernelparam[0]; + to->kernelparam[1] = from->kernelparam[1]; + to->kernelparam[2] = from->kernelparam[2]; + break; + case K_RBF: + to->kernelparam = Malloc(double, 1); + to->kernelparam[0] = from->kernelparam[0]; + break; + case K_SIGMOID: + to->kernelparam = Malloc(double, 2); + to->kernelparam[0] = from->kernelparam[0]; + to->kernelparam[1] = from->kernelparam[1]; + break; + } +} + +/** + * @brief Print the description of the current task on screen + * + * @details + * To track the progress of the grid search the parameters of the current task + * are written to the output specified in MSVMMAJ_OUTPUT_FILE. Since the + * parameters differ with the specified kernel, this function writes a + * parameter string depending on which kernel is used. + * + * @param[in] task the Task specified + * @param[in] N total number of tasks + * + */ +void print_progress_string(struct Task *task, long N) +{ + char buffer[MAX_LINE_LENGTH]; + sprintf(buffer, "(%03li/%03li)\t", task->ID+1, N); + if (task->kerneltype == K_POLY) + sprintf(buffer + strlen(buffer), "d = %2.2f\t", + task->kernelparam[2]); + if (task->kerneltype == K_POLY || task->kerneltype == K_SIGMOID) + sprintf(buffer + strlen(buffer), "c = %2.2f\t", + task->kernelparam[1]); + if (task->kerneltype == K_POLY || task->kerneltype == K_SIGMOID || + task->kerneltype == K_RBF) + sprintf(buffer + strlen(buffer), "g = %2.2f\t", + task->kernelparam[0]); + sprintf(buffer + strlen(buffer), "eps = %g\tw = %i\tk = %2.2f\t" + "l = %f\tp = %2.2f\t", task->epsilon, + task->weight_idx, task->kappa, task->lambda, task->p); + note(buffer); } |
