aboutsummaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
Diffstat (limited to 'src')
-rw-r--r--src/msvmmaj_matrix.c4
-rw-r--r--src/msvmmaj_sv.c45
-rw-r--r--src/msvmmaj_train.c5
-rw-r--r--src/msvmmaj_train_dataset.c151
4 files changed, 119 insertions, 86 deletions
diff --git a/src/msvmmaj_matrix.c b/src/msvmmaj_matrix.c
index 3f5bf4a..6ecc403 100644
--- a/src/msvmmaj_matrix.c
+++ b/src/msvmmaj_matrix.c
@@ -27,7 +27,7 @@
* @param[in] cols number of columns of M
* @param[in] i row index of element to write to
* @param[in] j column index of element to write to
- * @param[out] val value to write to specified element of M
+ * @param[in] val value to write to specified element of M
*/
void matrix_set(double *M, long cols, long i, long j, double val)
{
@@ -44,7 +44,7 @@ void matrix_set(double *M, long cols, long i, long j, double val)
* @param[in] cols number of columns of M
* @param[in] i row index (starting from 0)
* @param[in] j column index (starting from 0)
- * @returns matrix element at (i, j)
+ * @return matrix element at (i, j)
*/
double matrix_get(double *M, long cols, long i, long j)
{
diff --git a/src/msvmmaj_sv.c b/src/msvmmaj_sv.c
new file mode 100644
index 0000000..1358d4e
--- /dev/null
+++ b/src/msvmmaj_sv.c
@@ -0,0 +1,45 @@
+/**
+ * @file msvmmaj_sv.c
+ * @author Gertjan van den Burg
+ * @date May, 2014
+ * @brief Calculate the number of support vectors
+ *
+ * @details
+ * The function in this file can be used to calculate the number of support
+ * vectors are left in a model.
+ *
+ */
+
+#include "msvmmaj.h"
+#include "msvmmaj_matrix.h"
+
+/**
+ * @brief Calculate the number of support vectors in a model
+ *
+ * @details
+ * If an object is correctly classified, the number of classes for which the
+ * error q is larger than 1, is K-1 (i.e., there is no error w.r.t. any of the
+ * other classes). All objects for which this is not the case are thus support
+ * vectors.
+ *
+ * @param[in] model MajModel with solution
+ * @param[in] data MajData to be used
+ * @return number of support vectors with this solution
+ *
+ */
+long msvmmaj_num_sv(struct MajModel *model, struct MajData *data)
+{
+ long i, j, num_correct, num_sv = 0;
+ double value;
+
+ for (i=0; i<data->n; i++) {
+ num_correct = 0;
+ for (j=0; j<data->K; j++) {
+ value = matrix_get(model->Q, data->K, i, j);
+ num_correct += (value > 1);
+ }
+ num_sv += (num_correct < data->K - 1);
+ }
+
+ return num_sv;
+}
diff --git a/src/msvmmaj_train.c b/src/msvmmaj_train.c
index ff4d23d..0f42ff6 100644
--- a/src/msvmmaj_train.c
+++ b/src/msvmmaj_train.c
@@ -17,6 +17,7 @@
#include "msvmmaj.h"
#include "msvmmaj_lapack.h"
#include "msvmmaj_matrix.h"
+#include "msvmmaj_sv.h"
#include "msvmmaj_train.h"
#include "util.h"
@@ -93,8 +94,10 @@ void msvmmaj_optimize(struct MajModel *model, struct MajData *data)
it++;
}
- note("optimization finished, iter = %li, error = %8.8f\n", it-1,
+ note("optimization finished, iter = %li, error = %15.16f\n", it-1,
(Lbar - L)/L);
+ note("number of support vectors: %li\n", msvmmaj_num_sv(model, data));
+
model->training_error = (Lbar - L)/L;
for (i=0; i<K-1; i++)
diff --git a/src/msvmmaj_train_dataset.c b/src/msvmmaj_train_dataset.c
index 5f44d07..336be69 100644
--- a/src/msvmmaj_train_dataset.c
+++ b/src/msvmmaj_train_dataset.c
@@ -339,8 +339,8 @@ void consistency_repeats(struct Queue *q, long repeats, TrainType traintype)
for (r=0; r<repeats; r++) {
if (traintype == CV) {
loop_s = clock();
- p = cross_validation(model, NULL,
- task->train_data, task->folds);
+ p = cross_validation(model, task->train_data,
+ task->folds);
loop_e = clock();
time[i] += elapsed_time(loop_s, loop_e);
matrix_set(perf, repeats, i, r, p);
@@ -350,6 +350,9 @@ void consistency_repeats(struct Queue *q, long repeats, TrainType traintype)
exit(1);
}
note("%3.3f\t", p);
+ // this is done because if we reuse the V it's not a
+ // consistency check
+ msvmmaj_seed_model_V(NULL, model);
}
for (r=0; r<repeats; r++) {
std[i] += pow(matrix_get(
@@ -427,105 +430,59 @@ void consistency_repeats(struct Queue *q, long repeats, TrainType traintype)
* @returns performance (hitrate) of the configuration on
* cross validation
*/
-double cross_validation(struct MajModel *model, struct MajModel *seed_model,
- struct MajData *data, long folds)
+double cross_validation(struct MajModel *model, struct MajData *data,
+ long folds)
{
FILE *fid;
- bool fs = false;
long f, *predy;
- double total_perf = 0;
- struct MajModel *fold_model;
+ double performance, total_perf = 0;
struct MajData *train_data, *test_data;
- long *cv_idx = Calloc(long, model->n);
- double *performance = Calloc(double, folds);
-
- if (seed_model == NULL) {
- seed_model = msvmmaj_init_model();
- seed_model->n = 0; // we never use anything other than V
- seed_model->m = model->m;
- seed_model->K = model->K;
- seed_model->V = Calloc(double, (model->m+1)*(model->K-1));
- if (seed_model->V == NULL) {
- fprintf(stderr, "Failed to allocate seed_model:V.\n");
- exit(1);
- }
- msvmmaj_seed_model_V(NULL, seed_model);
- fs = true;
- }
+ long *cv_idx = Calloc(long, data->n);
train_data = msvmmaj_init_data();
test_data = msvmmaj_init_data();
- // create splits
- msvmmaj_make_cv_split(model->n, folds, cv_idx);
+
+ // create splits
+ msvmmaj_make_cv_split(data->n, folds, cv_idx);
for (f=0; f<folds; f++) {
//printf("Fold: %li\n", f);
msvmmaj_get_tt_split(data, train_data, test_data, cv_idx, f);
- // generate kernel
- /*
- printf("Training data (n = %li, m = %li)\n", train_data->n,
- train_data->m);
- print_matrix(train_data->Z, train_data->n, train_data->m+1);
- printf("Testing data (n = %li, m = %li)\n", test_data->n,
- test_data->m);
- print_matrix(test_data->Z, test_data->n, test_data->m+1);
- */
msvmmaj_make_kernel(model, train_data);
- // initialize a model for this fold and copy the model
- // parameters
- fold_model = msvmmaj_init_model();
- copy_model(model, fold_model);
- fold_model->n = train_data->n;
- fold_model->m = train_data->m;
- fold_model->K = train_data->K;
-
- // allocate, initialize and seed the fold model
- msvmmaj_allocate_model(fold_model);
- msvmmaj_initialize_weights(train_data, fold_model);
- msvmmaj_seed_model_V(seed_model, fold_model);
+ // reallocate the model if necessary for the new train split
+ msvmmaj_reallocate_model(model, train_data->n);
+
+ msvmmaj_initialize_weights(train_data, model);
- // train the model (without output)
+ // train the model (without output)
fid = MSVMMAJ_OUTPUT_FILE;
MSVMMAJ_OUTPUT_FILE = NULL;
- msvmmaj_optimize(fold_model, train_data);
+ msvmmaj_optimize(model, train_data);
MSVMMAJ_OUTPUT_FILE = fid;
- // calculate predictive performance on test set
+ // calculate prediction performance on test set
predy = Calloc(long, test_data->n);
- msvmmaj_predict_labels(test_data, train_data, fold_model,
- predy);
- performance[f] = msvmmaj_prediction_perf(test_data, predy);
- //printf("Performance fold %li = %f\n", f, performance[f]);
- total_perf += performance[f]/((double) folds);
-
- // seed the seed model with the fold model
- msvmmaj_seed_model_V(fold_model, seed_model);
-
+ msvmmaj_predict_labels(test_data, model, predy);
+ performance = msvmmaj_prediction_perf(test_data, predy);
+ total_perf += performance * test_data->n;
+
free(predy);
free(train_data->y);
free(train_data->Z);
free(test_data->y);
free(test_data->Z);
-
- msvmmaj_free_model(fold_model);
}
- // if a seed model was allocated before, free it.
- if (fs) {
- free(seed_model->V);
- free(seed_model);
- }
free(train_data);
free(test_data);
- free(performance);
- free(cv_idx);
- return total_perf;
+ total_perf /= ((double) data->n);
+ return total_perf;
}
/**
@@ -548,32 +505,22 @@ void start_training_cv(struct Queue *q)
{
double perf, current_max = 0;
struct Task *task = get_next_task(q);
- struct MajModel *seed_model = msvmmaj_init_model();
struct MajModel *model = msvmmaj_init_model();
clock_t main_s, main_e, loop_s, loop_e;
- model->n = task->train_data->n;
+ model->n = 0;
model->m = task->train_data->m;
model->K = task->train_data->K;
msvmmaj_allocate_model(model);
+ msvmmaj_seed_model_V(NULL, model);
- if (model->kerneltype == K_LINEAR) {
- seed_model->n = 0;
- seed_model->m = task->train_data->m;
- seed_model->K = task->train_data->K;
- msvmmaj_allocate_model(seed_model);
- msvmmaj_seed_model_V(NULL, seed_model);
- } else
- seed_model = NULL;
-
main_s = clock();
while (task) {
print_progress_string(task, q->N);
make_model_from_task(task, model);
loop_s = clock();
- perf = cross_validation(model, seed_model, task->train_data,
- task->folds);
+ perf = cross_validation(model, task->train_data, task->folds);
loop_e = clock();
current_max = maximum(current_max, perf);
@@ -585,14 +532,52 @@ void start_training_cv(struct Queue *q)
task = get_next_task(q);
}
main_e = clock();
-
+
note("\nTotal elapsed time: %8.8f seconds\n",
elapsed_time(main_s, main_e));
free(task);
- msvmmaj_free_model(seed_model);
+ msvmmaj_free_model(model);
}
+void msvmmaj_reallocate_model(struct MajModel *model, long n)
+{
+ long K = model->K;
+
+ model->UU = (double *) realloc(model->UU, n*K*(K-1)*sizeof(double));
+ if (model->UU == NULL) {
+ fprintf(stderr, "Failed to reallocate UU\n");
+ exit(1);
+ }
+
+ model->Q = (double *) realloc(model->Q, n*K*sizeof(double));
+ if (model->Q == NULL) {
+ fprintf(stderr, "Failed to reallocate Q\n");
+ exit(1);
+ }
+
+ model->H = (double *) realloc(model->H, n*K*sizeof(double));
+ if (model->H == NULL) {
+ fprintf(stderr, "Failed to reallocate H\n");
+ exit(1);
+ }
+
+ model->R = (double *) realloc(model->R, n*K*sizeof(double));
+ if (model->R == NULL) {
+ fprintf(stderr, "Failed to reallocate R\n");
+ exit(1);
+ }
+
+ model->rho = (double *) realloc(model->rho, n*sizeof(double));
+ if (model->rho == NULL) {
+ fprintf(stderr, "Failed to reallocte rho\n");
+ exit(1);
+ }
+
+ model->n = n;
+}
+
+
/**
* @brief Run the grid search for a train/test dataset
*