diff options
| author | Gertjan van den Burg <gertjanvandenburg@gmail.com> | 2018-03-27 19:34:30 +0100 |
|---|---|---|
| committer | Gertjan van den Burg <gertjanvandenburg@gmail.com> | 2018-03-27 19:34:30 +0100 |
| commit | 93ac979f5f7a663557d7669babfb5370f4531f96 (patch) | |
| tree | 8d979db9391bc76bb432c54ec9249d404ea30589 | |
| parent | Add command line flag to set the maximum number of iterations (diff) | |
| download | gensvm-93ac979f5f7a663557d7669babfb5370f4531f96.tar.gz gensvm-93ac979f5f7a663557d7669babfb5370f4531f96.zip | |
Allow seeding for nonlinear GenSVM
| -rw-r--r-- | src/gensvm_init.c | 44 | ||||
| -rw-r--r-- | src/gensvm_train.c | 6 | ||||
| -rw-r--r-- | tests/src/test_gensvm_train.c | 18 |
3 files changed, 37 insertions, 31 deletions
diff --git a/src/gensvm_init.c b/src/gensvm_init.c index d65e307..444f488 100644 --- a/src/gensvm_init.c +++ b/src/gensvm_init.c @@ -62,22 +62,21 @@ void gensvm_init_V(struct GenModel *from_model, double *col_min = NULL, *col_max = NULL; - long n = data->n; - long m = data->m; - long K = data->K; - - if (from_model == NULL) { - col_min = Calloc(double, m+1); - col_max = Calloc(double, m+1); - for (j=0; j<m+1; j++) { + // if no model is supplied, or the dimensions of the supplied model + // don't match, then we use random initialization. + if (from_model == NULL || from_model->m != to_model->m || + from_model->K != to_model->K) { + col_min = Calloc(double, to_model->m+1); + col_max = Calloc(double, to_model->m+1); + for (j=0; j<to_model->m+1; j++) { col_min[j] = 1.0e100; col_max[j] = -1.0e100; } if (data->Z == NULL) { // sparse matrix - long *visit_count = Calloc(long, m+1); - for (i=0; i<n; i++) { + long *visit_count = Calloc(long, to_model->m+1); + for (i=0; i<data->spZ->n_row; i++) { jj_start = data->spZ->ia[i]; jj_end = data->spZ->ia[i+1]; for (jj=jj_start; jj<jj_end; jj++) { @@ -90,8 +89,8 @@ void gensvm_init_V(struct GenModel *from_model, } } // correction in case the minimum or maximum is 0 - for (j=0; j<m+1; j++) { - if (visit_count[j] < n) { + for (j=0; j<to_model->m+1; j++) { + if (visit_count[j] < data->spZ->n_row) { col_min[j] = minimum(col_min[j], 0.0); col_max[j] = maximum(col_max[j], 0.0); } @@ -99,30 +98,31 @@ void gensvm_init_V(struct GenModel *from_model, free(visit_count); } else { // dense matrix - for (i=0; i<n; i++) { - for (j=0; j<m+1; j++) { - value = matrix_get(data->Z, m+1, i, j); + for (i=0; i<to_model->n; i++) { + for (j=0; j<to_model->m+1; j++) { + value = matrix_get(data->Z, + to_model->m+1, i, j); col_min[j] = minimum(col_min[j], value); col_max[j] = maximum(col_max[j], value); } } } - for (j=0; j<m+1; j++) { + for (j=0; j<to_model->m+1; j++) { cmin = (fabs(col_min[j]) < 1e-10) ? -1 : col_min[j]; cmax = (fabs(col_max[j]) < 1e-10) ? 1 : col_max[j]; - for (k=0; k<K-1; k++) { + for (k=0; k<to_model->K-1; k++) { rnd = ((double) rand()) / ((double) RAND_MAX); value = 1.0/cmin + (1.0/cmax - 1.0/cmin)*rnd; - matrix_set(to_model->V, K-1, j, k, value); + matrix_set(to_model->V, to_model->K-1, j, k, value); } } free(col_min); free(col_max); } else { - for (i=0; i<m+1; i++) { - for (j=0; j<K-1; j++) { - value = matrix_get(from_model->V, K-1, i, j); - matrix_set(to_model->V, K-1, i, j, value); + for (i=0; i<to_model->m+1; i++) { + for (j=0; j<to_model->K-1; j++) { + value = matrix_get(from_model->V, from_model->K-1, i, j); + matrix_set(to_model->V, to_model->K-1, i, j, value); } } } diff --git a/src/gensvm_train.c b/src/gensvm_train.c index 5c668e0..e313738 100644 --- a/src/gensvm_train.c +++ b/src/gensvm_train.c @@ -58,15 +58,15 @@ void gensvm_train(struct GenModel *model, struct GenData *data, real_seed = (model->seed == -1) ? time(NULL) : model->seed; srand(real_seed); - // initialize the V matrix (potentially with a seed model) - gensvm_init_V(seed_model, model, data); - // preprocess kernel gensvm_kernel_preprocess(model, data); // reallocate model for kernels gensvm_reallocate_model(model, data->n, data->r); + // initialize the V matrix (potentially with a seed model) + gensvm_init_V(seed_model, model, data); + // initialize weights gensvm_initialize_weights(data, model); diff --git a/tests/src/test_gensvm_train.c b/tests/src/test_gensvm_train.c index f7033f0..f17cfef 100644 --- a/tests/src/test_gensvm_train.c +++ b/tests/src/test_gensvm_train.c @@ -48,6 +48,10 @@ char *test_gensvm_train_seed_linear() data->Z = data->RAW; data->y = Calloc(long, data->n); + seed->n = data->n; + seed->m = data->m; + seed->K = data->K; + matrix_set(data->Z, data->m+1, 0, 0, 1.0000000000000000); matrix_set(data->Z, data->m+1, 0, 1, 0.8056271362589000); matrix_set(data->Z, data->m+1, 0, 2, 0.4874175854113872); @@ -261,14 +265,15 @@ char *test_gensvm_train_seed_kernel() matrix_set(data->y, 1, 0, 8, 3); matrix_set(data->y, 1, 0, 9, 4); + + struct GenModel *seed = gensvm_init_model(); + seed->V = Calloc(double, 7*3); + seed->m = 6; + seed->K = 4; + // start test code // - // because the kernel eigendecomposition isn't known in advance, - // there's no way to seed the model when using kernels. We therefore - // use seed == NULL here. Note that due to the Memset in - // gensvm_reallocate_model(), V will be a matrix of zeros after - // reallocation, so we compare with the V = 0 result from Octave. - gensvm_train(model, data, NULL); + gensvm_train(model, data, seed); mu_assert(model->n == data->n, "Incorrect model n"); mu_assert(model->m == data->r, "Incorrect model m"); @@ -368,6 +373,7 @@ char *test_gensvm_train_seed_kernel() // end test code // gensvm_free_model(model); + gensvm_free_model(seed); gensvm_free_data(data); return NULL; |
