diff options
Diffstat (limited to 'src')
| -rw-r--r-- | src/msvmmaj_matrix.c | 4 | ||||
| -rw-r--r-- | src/msvmmaj_sv.c | 45 | ||||
| -rw-r--r-- | src/msvmmaj_train.c | 5 | ||||
| -rw-r--r-- | src/msvmmaj_train_dataset.c | 151 |
4 files changed, 119 insertions, 86 deletions
diff --git a/src/msvmmaj_matrix.c b/src/msvmmaj_matrix.c index 3f5bf4a..6ecc403 100644 --- a/src/msvmmaj_matrix.c +++ b/src/msvmmaj_matrix.c @@ -27,7 +27,7 @@ * @param[in] cols number of columns of M * @param[in] i row index of element to write to * @param[in] j column index of element to write to - * @param[out] val value to write to specified element of M + * @param[in] val value to write to specified element of M */ void matrix_set(double *M, long cols, long i, long j, double val) { @@ -44,7 +44,7 @@ void matrix_set(double *M, long cols, long i, long j, double val) * @param[in] cols number of columns of M * @param[in] i row index (starting from 0) * @param[in] j column index (starting from 0) - * @returns matrix element at (i, j) + * @return matrix element at (i, j) */ double matrix_get(double *M, long cols, long i, long j) { diff --git a/src/msvmmaj_sv.c b/src/msvmmaj_sv.c new file mode 100644 index 0000000..1358d4e --- /dev/null +++ b/src/msvmmaj_sv.c @@ -0,0 +1,45 @@ +/** + * @file msvmmaj_sv.c + * @author Gertjan van den Burg + * @date May, 2014 + * @brief Calculate the number of support vectors + * + * @details + * The function in this file can be used to calculate the number of support + * vectors are left in a model. + * + */ + +#include "msvmmaj.h" +#include "msvmmaj_matrix.h" + +/** + * @brief Calculate the number of support vectors in a model + * + * @details + * If an object is correctly classified, the number of classes for which the + * error q is larger than 1, is K-1 (i.e., there is no error w.r.t. any of the + * other classes). All objects for which this is not the case are thus support + * vectors. + * + * @param[in] model MajModel with solution + * @param[in] data MajData to be used + * @return number of support vectors with this solution + * + */ +long msvmmaj_num_sv(struct MajModel *model, struct MajData *data) +{ + long i, j, num_correct, num_sv = 0; + double value; + + for (i=0; i<data->n; i++) { + num_correct = 0; + for (j=0; j<data->K; j++) { + value = matrix_get(model->Q, data->K, i, j); + num_correct += (value > 1); + } + num_sv += (num_correct < data->K - 1); + } + + return num_sv; +} diff --git a/src/msvmmaj_train.c b/src/msvmmaj_train.c index ff4d23d..0f42ff6 100644 --- a/src/msvmmaj_train.c +++ b/src/msvmmaj_train.c @@ -17,6 +17,7 @@ #include "msvmmaj.h" #include "msvmmaj_lapack.h" #include "msvmmaj_matrix.h" +#include "msvmmaj_sv.h" #include "msvmmaj_train.h" #include "util.h" @@ -93,8 +94,10 @@ void msvmmaj_optimize(struct MajModel *model, struct MajData *data) it++; } - note("optimization finished, iter = %li, error = %8.8f\n", it-1, + note("optimization finished, iter = %li, error = %15.16f\n", it-1, (Lbar - L)/L); + note("number of support vectors: %li\n", msvmmaj_num_sv(model, data)); + model->training_error = (Lbar - L)/L; for (i=0; i<K-1; i++) diff --git a/src/msvmmaj_train_dataset.c b/src/msvmmaj_train_dataset.c index 5f44d07..336be69 100644 --- a/src/msvmmaj_train_dataset.c +++ b/src/msvmmaj_train_dataset.c @@ -339,8 +339,8 @@ void consistency_repeats(struct Queue *q, long repeats, TrainType traintype) for (r=0; r<repeats; r++) { if (traintype == CV) { loop_s = clock(); - p = cross_validation(model, NULL, - task->train_data, task->folds); + p = cross_validation(model, task->train_data, + task->folds); loop_e = clock(); time[i] += elapsed_time(loop_s, loop_e); matrix_set(perf, repeats, i, r, p); @@ -350,6 +350,9 @@ void consistency_repeats(struct Queue *q, long repeats, TrainType traintype) exit(1); } note("%3.3f\t", p); + // this is done because if we reuse the V it's not a + // consistency check + msvmmaj_seed_model_V(NULL, model); } for (r=0; r<repeats; r++) { std[i] += pow(matrix_get( @@ -427,105 +430,59 @@ void consistency_repeats(struct Queue *q, long repeats, TrainType traintype) * @returns performance (hitrate) of the configuration on * cross validation */ -double cross_validation(struct MajModel *model, struct MajModel *seed_model, - struct MajData *data, long folds) +double cross_validation(struct MajModel *model, struct MajData *data, + long folds) { FILE *fid; - bool fs = false; long f, *predy; - double total_perf = 0; - struct MajModel *fold_model; + double performance, total_perf = 0; struct MajData *train_data, *test_data; - long *cv_idx = Calloc(long, model->n); - double *performance = Calloc(double, folds); - - if (seed_model == NULL) { - seed_model = msvmmaj_init_model(); - seed_model->n = 0; // we never use anything other than V - seed_model->m = model->m; - seed_model->K = model->K; - seed_model->V = Calloc(double, (model->m+1)*(model->K-1)); - if (seed_model->V == NULL) { - fprintf(stderr, "Failed to allocate seed_model:V.\n"); - exit(1); - } - msvmmaj_seed_model_V(NULL, seed_model); - fs = true; - } + long *cv_idx = Calloc(long, data->n); train_data = msvmmaj_init_data(); test_data = msvmmaj_init_data(); - // create splits - msvmmaj_make_cv_split(model->n, folds, cv_idx); + + // create splits + msvmmaj_make_cv_split(data->n, folds, cv_idx); for (f=0; f<folds; f++) { //printf("Fold: %li\n", f); msvmmaj_get_tt_split(data, train_data, test_data, cv_idx, f); - // generate kernel - /* - printf("Training data (n = %li, m = %li)\n", train_data->n, - train_data->m); - print_matrix(train_data->Z, train_data->n, train_data->m+1); - printf("Testing data (n = %li, m = %li)\n", test_data->n, - test_data->m); - print_matrix(test_data->Z, test_data->n, test_data->m+1); - */ msvmmaj_make_kernel(model, train_data); - // initialize a model for this fold and copy the model - // parameters - fold_model = msvmmaj_init_model(); - copy_model(model, fold_model); - fold_model->n = train_data->n; - fold_model->m = train_data->m; - fold_model->K = train_data->K; - - // allocate, initialize and seed the fold model - msvmmaj_allocate_model(fold_model); - msvmmaj_initialize_weights(train_data, fold_model); - msvmmaj_seed_model_V(seed_model, fold_model); + // reallocate the model if necessary for the new train split + msvmmaj_reallocate_model(model, train_data->n); + + msvmmaj_initialize_weights(train_data, model); - // train the model (without output) + // train the model (without output) fid = MSVMMAJ_OUTPUT_FILE; MSVMMAJ_OUTPUT_FILE = NULL; - msvmmaj_optimize(fold_model, train_data); + msvmmaj_optimize(model, train_data); MSVMMAJ_OUTPUT_FILE = fid; - // calculate predictive performance on test set + // calculate prediction performance on test set predy = Calloc(long, test_data->n); - msvmmaj_predict_labels(test_data, train_data, fold_model, - predy); - performance[f] = msvmmaj_prediction_perf(test_data, predy); - //printf("Performance fold %li = %f\n", f, performance[f]); - total_perf += performance[f]/((double) folds); - - // seed the seed model with the fold model - msvmmaj_seed_model_V(fold_model, seed_model); - + msvmmaj_predict_labels(test_data, model, predy); + performance = msvmmaj_prediction_perf(test_data, predy); + total_perf += performance * test_data->n; + free(predy); free(train_data->y); free(train_data->Z); free(test_data->y); free(test_data->Z); - - msvmmaj_free_model(fold_model); } - // if a seed model was allocated before, free it. - if (fs) { - free(seed_model->V); - free(seed_model); - } free(train_data); free(test_data); - free(performance); - free(cv_idx); - return total_perf; + total_perf /= ((double) data->n); + return total_perf; } /** @@ -548,32 +505,22 @@ void start_training_cv(struct Queue *q) { double perf, current_max = 0; struct Task *task = get_next_task(q); - struct MajModel *seed_model = msvmmaj_init_model(); struct MajModel *model = msvmmaj_init_model(); clock_t main_s, main_e, loop_s, loop_e; - model->n = task->train_data->n; + model->n = 0; model->m = task->train_data->m; model->K = task->train_data->K; msvmmaj_allocate_model(model); + msvmmaj_seed_model_V(NULL, model); - if (model->kerneltype == K_LINEAR) { - seed_model->n = 0; - seed_model->m = task->train_data->m; - seed_model->K = task->train_data->K; - msvmmaj_allocate_model(seed_model); - msvmmaj_seed_model_V(NULL, seed_model); - } else - seed_model = NULL; - main_s = clock(); while (task) { print_progress_string(task, q->N); make_model_from_task(task, model); loop_s = clock(); - perf = cross_validation(model, seed_model, task->train_data, - task->folds); + perf = cross_validation(model, task->train_data, task->folds); loop_e = clock(); current_max = maximum(current_max, perf); @@ -585,14 +532,52 @@ void start_training_cv(struct Queue *q) task = get_next_task(q); } main_e = clock(); - + note("\nTotal elapsed time: %8.8f seconds\n", elapsed_time(main_s, main_e)); free(task); - msvmmaj_free_model(seed_model); + msvmmaj_free_model(model); } +void msvmmaj_reallocate_model(struct MajModel *model, long n) +{ + long K = model->K; + + model->UU = (double *) realloc(model->UU, n*K*(K-1)*sizeof(double)); + if (model->UU == NULL) { + fprintf(stderr, "Failed to reallocate UU\n"); + exit(1); + } + + model->Q = (double *) realloc(model->Q, n*K*sizeof(double)); + if (model->Q == NULL) { + fprintf(stderr, "Failed to reallocate Q\n"); + exit(1); + } + + model->H = (double *) realloc(model->H, n*K*sizeof(double)); + if (model->H == NULL) { + fprintf(stderr, "Failed to reallocate H\n"); + exit(1); + } + + model->R = (double *) realloc(model->R, n*K*sizeof(double)); + if (model->R == NULL) { + fprintf(stderr, "Failed to reallocate R\n"); + exit(1); + } + + model->rho = (double *) realloc(model->rho, n*sizeof(double)); + if (model->rho == NULL) { + fprintf(stderr, "Failed to reallocte rho\n"); + exit(1); + } + + model->n = n; +} + + /** * @brief Run the grid search for a train/test dataset * |
