aboutsummaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorGertjan van den Burg <burg@ese.eur.nl>2015-01-30 16:22:52 +0100
committerGertjan van den Burg <burg@ese.eur.nl>2015-01-30 16:22:52 +0100
commitdf9c3ca0b62f1a20071bee3a55d24d673c5d11e0 (patch)
treed3a2d6be5dfe6e2a4e248ad04dfdbb40852c8f7a /src
parentupdate documentation gensvm structs (diff)
downloadgensvm-df9c3ca0b62f1a20071bee3a55d24d673c5d11e0.tar.gz
gensvm-df9c3ca0b62f1a20071bee3a55d24d673c5d11e0.zip
first working version of new kernel GenSVM
Diffstat (limited to 'src')
-rw-r--r--src/GenSVMgrid.c6
-rw-r--r--src/GenSVMtrain.c2
-rw-r--r--src/gensvm_init.c14
-rw-r--r--src/gensvm_io.c1
-rw-r--r--src/gensvm_kernel.c294
-rw-r--r--src/gensvm_pred.c149
-rw-r--r--src/gensvm_train.c12
-rw-r--r--src/gensvm_train_dataset.c155
8 files changed, 383 insertions, 250 deletions
diff --git a/src/GenSVMgrid.c b/src/GenSVMgrid.c
index eb1f477..94d3f0b 100644
--- a/src/GenSVMgrid.c
+++ b/src/GenSVMgrid.c
@@ -75,8 +75,8 @@ int main(int argc, char **argv)
char input_filename[MAX_LINE_LENGTH];
struct Training *training = Malloc(struct Training, 1);
- struct GenData *train_data = Malloc(struct GenData, 1);
- struct GenData *test_data = Malloc(struct GenData, 1);
+ struct GenData *train_data = gensvm_init_data();
+ struct GenData *test_data = gensvm_init_data();
if (argc < MINARGS || gensvm_check_argv(argc, argv, "-help")
|| gensvm_check_argv_eq(argc, argv, "-h") )
@@ -104,7 +104,7 @@ int main(int argc, char **argv)
if (training->traintype == TT)
start_training_tt(q);
else
- start_training_cv(q);
+ start_training(q);
note("Training finished\n");
if (training->repeats > 0) {
diff --git a/src/GenSVMtrain.c b/src/GenSVMtrain.c
index 0c1c6bc..f0b931f 100644
--- a/src/GenSVMtrain.c
+++ b/src/GenSVMtrain.c
@@ -100,7 +100,7 @@ int main(int argc, char **argv)
gensvm_allocate_model(model);
// initialize kernel (if necessary)
- gensvm_make_kernel(model, data);
+ //gensvm_make_kernel(model, data);
// reallocate model and initialize weights
gensvm_reallocate_model(model, data->n, data->m);
diff --git a/src/gensvm_init.c b/src/gensvm_init.c
index b3f214e..8722464 100644
--- a/src/gensvm_init.c
+++ b/src/gensvm_init.c
@@ -68,7 +68,7 @@ struct GenModel *gensvm_init_model()
struct GenData *gensvm_init_data()
{
struct GenData *data = Malloc(struct GenData, 1);
- data->J = NULL;
+ data->Sigma = NULL;
data->y = NULL;
data->Z = NULL;
data->RAW = NULL;
@@ -275,8 +275,16 @@ void gensvm_free_model(struct GenModel *model)
*/
void gensvm_free_data(struct GenData *data)
{
- free(data->Z);
+ if (data == NULL)
+ return;
+
+ if (data->Z == data->RAW) {
+ free(data->Z);
+ }else {
+ free(data->Z);
+ free(data->RAW);
+ }
free(data->y);
- free(data->J);
+ free(data->Sigma);
free(data);
}
diff --git a/src/gensvm_io.c b/src/gensvm_io.c
index 546ecd5..3de0794 100644
--- a/src/gensvm_io.c
+++ b/src/gensvm_io.c
@@ -116,6 +116,7 @@ void gensvm_read_data(struct GenData *dataset, char *data_file)
dataset->n = n;
dataset->m = m;
+ dataset->r = m;
dataset->K = K;
dataset->Z = dataset->RAW;
}
diff --git a/src/gensvm_kernel.c b/src/gensvm_kernel.c
index 55cfa03..f85cb38 100644
--- a/src/gensvm_kernel.c
+++ b/src/gensvm_kernel.c
@@ -11,6 +11,7 @@
*
*/
+#include <cblas.h>
#include <math.h>
#include "gensvm.h"
@@ -20,88 +21,110 @@
#include "util.h"
/**
- * @brief Create the kernel matrix
+ * @brief Do the preprocessing steps needed to perform kernel GenSVM
*
- * Create a kernel matrix based on the specified kerneltype. Kernel parameters
- * are assumed to be specified in the model.
- *
- * @param[in] model GenModel specifying the parameters
- * @param[in] data GenData specifying the data.
+ * @details
+ * tdb
*
*/
-void gensvm_make_kernel(struct GenModel *model, struct GenData *data)
+void gensvm_kernel_preprocess(struct GenModel *model, struct GenData *data)
{
- long i, j;
- // Determine if a kernel needs to be computed. This is not the case if
- // a LINEAR kernel is requested in the model, or if the requested
- // kernel is already in the data.
-
if (model->kerneltype == K_LINEAR) {
- data->J = Calloc(double, data->m+1);
- for (i=1; i<data->m+1; i++) {
- matrix_set(data->J, 1, i, 0, 1.0);
- }
+ data->r = data->m;
return;
}
- /*
+ int i;
+ long r,
+ n = data->n;
+ double *P = NULL,
+ *Sigma = NULL,
+ *K = NULL;
+
+ // build the kernel matrix
+ K = Calloc(double, n*n);
+ if (K == NULL) {
+ fprintf(stderr, "Failed to allocate memory for K in "
+ "gensvm_kernel_preprocess.\n");
+ exit(1);
+ }
+ gensvm_make_kernel(model, data, K);
+
+ // generate the eigen decomposition
+ r = gensvm_make_eigen(K, n, &P, &Sigma);
+ note("[DEBUG]: n = %li\tr = %li\n", n, r);
+
+ // build M and set to data (leave RAW intact)
+ gensvm_make_trainfactor(data, P, Sigma, r);
+
+ // Set Sigma to data->Sigma (need it again for prediction)
+ if (data->Sigma != NULL)
+ free(data->Sigma);
+ data->Sigma = Sigma;
+
+ // write kernel params to data
+ data->kerneltype = model->kerneltype;
+ free(data->kernelparam);
switch (model->kerneltype) {
case K_LINEAR:
- // if data has another kernel, free that matrix and
- // assign Z to RAW
- if (data->kerneltype != K_LINEAR) {
- free(data->Z);
- data->Z = data->RAW;
- }
- data->J = Calloc(double, data->m+1);
- for (i=1; i<model->m+1; i++) {
- matrix_set(data->J, 1, i, 0, 1.0);
- }
- return;
+ break;
case K_POLY:
- // if data has another kernel, we need to recalculate
- if (data->kerneltype != K_POLY) {
- break;
- }
- // if it is poly, we only recalculate if the kernel
- // parameters differ
- if (data->kernelparam[0] == model->kernelparam[0] &&
- data->kernelparam[1] == model->kernelparam[1] &&
- data->kernelparam[2] == model->kernelparam[2])
- // < do something with J ?
- return;
+ data->kernelparam = Calloc(double, 3);
+ for (i=0; i<3; i++)
+ data->kernelparam[i] = model->kernelparam[i];
+ break;
case K_RBF:
- if (data->kerneltype != K_RBF)
- break;
- if (data->kernelparam[0] == model->kernelparam[0])
- // < do something with J ?
- return;
+ data->kernelparam = Calloc(double, 1);
+ data->kernelparam[0] = model->kernelparam[0];
+ break;
case K_SIGMOID:
- if (data->kerneltype != K_SIGMOID)
- break;
- if (data->kernelparam[0] == model->kernelparam[0] &&
- data->kernelparam[1] == model->kernelparam[1])
- // < do something with J ?
- return;
+ data->kernelparam = Calloc(double, 2);
+ data->kernelparam[0] = model->kernelparam[0];
+ data->kernelparam[1] = model->kernelparam[1];
+ }
+
+ free(K);
+ free(P);
+}
+
+void gensvm_kernel_postprocess(struct GenModel *model,
+ struct GenData *traindata, struct GenData *testdata)
+{
+ if (model->kerneltype == K_LINEAR) {
+ testdata->r = testdata->m;
+ return;
}
- */
+
+ // build the cross kernel matrix between train and test
+ double *K2 = NULL;
+ gensvm_make_crosskernel(model, traindata, testdata, &K2);
+
+ // generate the data matrix N = K2 * M * Sigma^{-2}
+ gensvm_make_testfactor(testdata, traindata, K2);
+
+ free(K2);
+}
+
+void gensvm_make_kernel(struct GenModel *model, struct GenData *data,
+ double *K)
+{
+ long i, j;
long n = data->n;
double value;
double *x1, *x2;
- double *K = Calloc(double, n*n);
for (i=0; i<n; i++) {
for (j=i; j<n; j++) {
x1 = &data->RAW[i*(data->m+1)+1];
x2 = &data->RAW[j*(data->m+1)+1];
if (model->kerneltype == K_POLY)
- value = gensvm_compute_poly(x1, x2,
+ value = gensvm_dot_poly(x1, x2,
model->kernelparam, data->m);
else if (model->kerneltype == K_RBF)
- value = gensvm_compute_rbf(x1, x2,
+ value = gensvm_dot_rbf(x1, x2,
model->kernelparam, data->m);
else if (model->kerneltype == K_SIGMOID)
- value = gensvm_compute_sigmoid(x1, x2,
+ value = gensvm_dot_sigmoid(x1, x2,
model->kernelparam, data->m);
else {
fprintf(stderr, "Unknown kernel type in "
@@ -112,56 +135,6 @@ void gensvm_make_kernel(struct GenModel *model, struct GenData *data)
matrix_set(K, n, j, i, value);
}
}
-
- double *P = NULL;
- double *Sigma = NULL;
- long num_eigen = gensvm_make_eigen(K, n, &P, &Sigma);
- //printf("num eigen: %li\n", num_eigen);
- data->m = num_eigen;
-
- // copy eigendecomp to data
- data->Z = Calloc(double, n*(num_eigen+1));
- for (i=0; i<n; i++) {
- for (j=0; j<num_eigen; j++) {
- value = matrix_get(P, num_eigen, i, j);
- matrix_set(data->Z, num_eigen+1, i, j, value);
- }
- matrix_set(data->Z, num_eigen+1, i, 0, 1.0);
- }
-
- // Set the regularization matrix (change if not full rank used)
- if (data->J != NULL)
- free(data->J);
- data->J = Calloc(double, data->m+1);
- for (i=1; i<data->m+1; i++) {
- value = 1.0/matrix_get(Sigma, 1, i-1, 0);
- matrix_set(data->J, 1, i, 0, value);
- }
-
- // let data know what it's made of
- data->kerneltype = model->kerneltype;
- free(data->kernelparam);
- switch (model->kerneltype) {
- case K_LINEAR:
- break;
- case K_POLY:
- data->kernelparam = Calloc(double, 3);
- data->kernelparam[0] = model->kernelparam[0];
- data->kernelparam[1] = model->kernelparam[1];
- data->kernelparam[2] = model->kernelparam[2];
- break;
- case K_RBF:
- data->kernelparam = Calloc(double, 1);
- data->kernelparam[0] = model->kernelparam[0];
- break;
- case K_SIGMOID:
- data->kernelparam = Calloc(double, 2);
- data->kernelparam[0] = model->kernelparam[0];
- data->kernelparam[1] = model->kernelparam[1];
- }
- free(K);
- free(Sigma);
- free(P);
}
/**
@@ -241,7 +214,6 @@ long gensvm_make_eigen(double *K, long n, double **P, double **Sigma)
// Select the desired number of eigenvalues, depending on their size.
// dsyevx sorts eigenvalues in ascending order.
- //
max_eigen = tempSigma[n-1];
cutoff_idx = 0;
@@ -261,7 +233,6 @@ long gensvm_make_eigen(double *K, long n, double **P, double **Sigma)
// revert P to row-major order and copy only the the columns
// corresponding to the selected eigenvalues
- //
*P = Calloc(double, n*num_eigen);
for (j=n-1; j>n-1-num_eigen; j--) {
for (i=0; i<n; i++) {
@@ -291,26 +262,20 @@ void gensvm_make_crosskernel(struct GenModel *model,
*K2 = Calloc(double, n_test*n_train);
- //printf("Training RAW\n");
- //print_matrix(data_train->RAW, n_train, m+1);
-
- //printf("Testing RAW\n");
- //print_matrix(data_test->RAW, n_test, m+1);
-
for (i=0; i<n_test; i++) {
for (j=0; j<n_train; j++) {
x1 = &data_test->RAW[i*(m+1)+1];
x2 = &data_train->RAW[j*(m+1)+1];
if (model->kerneltype == K_POLY)
- value = gensvm_compute_poly(x1, x2,
+ value = gensvm_dot_poly(x1, x2,
model->kernelparam,
m);
else if (model->kerneltype == K_RBF)
- value = gensvm_compute_rbf(x1, x2,
+ value = gensvm_dot_rbf(x1, x2,
model->kernelparam,
m);
else if (model->kerneltype == K_SIGMOID)
- value = gensvm_compute_sigmoid(x1, x2,
+ value = gensvm_dot_sigmoid(x1, x2,
model->kernelparam,
m);
else {
@@ -321,10 +286,97 @@ void gensvm_make_crosskernel(struct GenModel *model,
matrix_set((*K2), n_train, i, j, value);
}
}
+}
+
+void gensvm_make_trainfactor(struct GenData *data, double *P, double *Sigma,
+ long r)
+{
+ long i, j, n = data->n;
+ double value;
+
+ // allocate Z
+ data->Z = Calloc(double, n*(r+1));
+ if (data->Z == NULL) {
+ fprintf(stderr, "Failed to allocate memory for data->Z in "
+ "gensvm_make_trainfactor.\n");
+ exit(1);
+ }
+
+ // Write data->Z = [1 M] = [1 P*Sigma]
+ for (i=0; i<n; i++) {
+ for (j=0; j<r; j++) {
+ value = matrix_get(P, r, i, j);
+ value *= matrix_get(Sigma, 1, j, 0);
+ matrix_set(data->Z, r+1, i, j+1, value);
+ }
+ matrix_set(data->Z, r+1, i, 0, 1.0);
+ }
+
+ // Set data->r to r so data knows the width of Z
+ data->r = r;
+}
+
+void gensvm_make_testfactor(struct GenData *testdata,
+ struct GenData *traindata, double *K2)
+{
+ long n1, n2, r, i, j;
+ double value,
+ *N = NULL,
+ *M = NULL;
+
+ n1 = traindata->n;
+ n2 = testdata->n;
+ r = traindata->r;
+
+ N = Calloc(double, n2*(r+1));
+ if (N == NULL) {
+ fprintf(stderr, "Failed to allocate memory for N in "
+ "gensvm_make_testfactor.\n");
+ exit(1);
+ }
+ M = Calloc(double, n1*r);
+ if (M == NULL) {
+ fprintf(stderr, "Failed to allocate memory for M in "
+ "gensvm_make_testfactor.\n");
+ exit(1);
+ }
+
+ // copy M from traindata->Z because we need it in dgemm without column
+ // of 1's.
+ for (i=0; i<n1; i++)
+ for (j=0; j<r; j++)
+ matrix_set(M, r, i, j,
+ matrix_get(traindata->Z, r+1, i, j+1));
+
+ // Multiply K2 with M and store in N
+ cblas_dgemm(
+ CblasRowMajor,
+ CblasNoTrans,
+ CblasNoTrans,
+ n2,
+ r,
+ n1,
+ 1.0,
+ K2,
+ n1,
+ M,
+ r,
+ 0.0,
+ N,
+ r);
+
+ // Multiply N with Sigma^{-2}
+ for (j=0; j<r; j++) {
+ value = pow(matrix_get(traindata->Sigma, 1, j, 0), -2.0);
+ for (i=0; i<n2; i++)
+ matrix_mul(N, r, i, j, value);
+ }
- //printf("cross K2:\n");
- //print_matrix((*K2), n_test, n_train);
+ // Set N and r to testdata
+ testdata->Z = N;
+ testdata->r = r;
+ free(M);
}
/**
@@ -344,7 +396,7 @@ void gensvm_make_crosskernel(struct GenModel *model,
* @param[in] n length of the vectors x1 and x2
* @returns kernel evaluation
*/
-double gensvm_compute_rbf(double *x1, double *x2, double *kernelparam, long n)
+double gensvm_dot_rbf(double *x1, double *x2, double *kernelparam, long n)
{
long i;
double value = 0.0;
@@ -372,7 +424,7 @@ double gensvm_compute_rbf(double *x1, double *x2, double *kernelparam, long n)
* @param[in] n length of the vectors x1 and x2
* @returns kernel evaluation
*/
-double gensvm_compute_poly(double *x1, double *x2, double *kernelparam, long n)
+double gensvm_dot_poly(double *x1, double *x2, double *kernelparam, long n)
{
long i;
double value = 0.0;
@@ -400,7 +452,7 @@ double gensvm_compute_poly(double *x1, double *x2, double *kernelparam, long n)
* @param[in] n length of the vectors x1 and x2
* @returns kernel evaluation
*/
-double gensvm_compute_sigmoid(double *x1, double *x2, double *kernelparam, long n)
+double gensvm_dot_sigmoid(double *x1, double *x2, double *kernelparam, long n)
{
long i;
double value = 0.0;
diff --git a/src/gensvm_pred.c b/src/gensvm_pred.c
index 88678d7..7baae07 100644
--- a/src/gensvm_pred.c
+++ b/src/gensvm_pred.c
@@ -12,6 +12,7 @@
*/
#include <cblas.h>
+#include <math.h>
#include "libGenSVM.h"
#include "gensvm.h"
@@ -19,19 +20,6 @@
#include "gensvm_matrix.h"
#include "gensvm_pred.h"
-#include "util.h" // testing
-
-void gensvm_predict_labels(struct GenData *data_test,
- struct GenData *data_train, struct GenModel *model,
- long *predy)
-{
- if (model->kerneltype == K_LINEAR)
- gensvm_predict_labels_linear(data_test, model, predy);
- else
- gensvm_predict_labels_kernel(data_test, data_train, model,
- predy);
-}
-
/**
* @brief Predict class labels of data given and output in predy
*
@@ -46,24 +34,40 @@ void gensvm_predict_labels(struct GenData *data_test,
* @param[in] model GenModel with optimized V
* @param[out] predy pre-allocated vector to record predictions in
*/
-void gensvm_predict_labels_linear(struct GenData *data,
- struct GenModel *model, long *predy)
+void gensvm_predict_labels(struct GenData *testdata, struct GenModel *model,
+ long *predy)
{
- long i, j, k, label;
- double norm, min_dist;
-
- long n = data->n; // note that model->n is the size of the training sample.
- long m = data->m;
- long K = model->K; //data->K does not necessarily equal the original K.
-
- double *S = Calloc(double, K-1);
- double *ZV = Calloc(double, n*(K-1));
- double *U = Calloc(double, K*(K-1));
+ long i, j, k, n, m, K, label;
+ double norm, min_dist, *S, *ZV, *U;
+
+ n = testdata->n;
+ m = testdata->r;
+ K = model->K;
+
+ // allocate necessary memory
+ S = Calloc(double, K-1);
+ if (S == NULL) {
+ fprintf(stderr, "Failed to allocate memory for S in "
+ "gensvm_predict_labels.\n");
+ exit(1);
+ }
+ ZV = Calloc(double, n*(K-1));
+ if (ZV == NULL) {
+ fprintf(stderr, "Failed to allocate memory for ZV in "
+ "gensvm_predict_labels.\n");
+ exit(1);
+ }
+ U = Calloc(double, K*(K-1));
+ if (U == NULL) {
+ fprintf(stderr, "Failed to allocate memory for U in "
+ "gensvm_predict_labels.\n");
+ exit(1);
+ }
- // Get the simplex matrix
+ // Generate the simplex matrix
gensvm_simplex_gen(K, U);
- // Generate the simplex-space vectors
+ // Generate the simplex space vectors
cblas_dgemm(
CblasRowMajor,
CblasNoTrans,
@@ -72,7 +76,7 @@ void gensvm_predict_labels_linear(struct GenData *data,
K-1,
m+1,
1.0,
- data->Z,
+ testdata->Z,
m+1,
model->V,
K-1,
@@ -81,96 +85,16 @@ void gensvm_predict_labels_linear(struct GenData *data,
K-1);
// Calculate the distance to each of the vertices of the simplex.
- // The closest vertex defines the class label.
+ // The closest vertex defines the class label
for (i=0; i<n; i++) {
label = 0;
- min_dist = 1000000000.0;
- for (j=0; j<K; j++) {
- for (k=0; k<K-1; k++) {
- S[k] = matrix_get(ZV, K-1, i, k) -
- matrix_get(U, K-1, j, k);
- }
- norm = cblas_dnrm2(K-1, S, 1);
- if (norm < min_dist) {
- label = j+1; // labels start counting from 1
- min_dist = norm;
- }
- }
- predy[i] = label;
- }
-
- free(ZV);
- free(U);
- free(S);
-}
-
-void gensvm_predict_labels_kernel(struct GenData *data_test,
- struct GenData *data_train, struct GenModel *model,
- long *predy)
-{
- long i, j, k, label;
- double norm, min_dist;
-
- long n_train = data_train->n;
- long n_test = data_test->n;
- long r = model->m;
- long K = model->K;
-
- double *K2 = NULL;
- gensvm_make_crosskernel(model, data_train, data_test, &K2);
-
- double *S = Calloc(double, K-1);
- double *ZV = Calloc(double, n_test*(r+1));
- double *KPS = Calloc(double, n_test*(r+1));
- double *U = Calloc(double, K*(K-1));
-
- gensvm_simplex_gen(K, U);
-
- // were doing the computations explicitly since P is included in
- // data_train->Z. Might want to look at this some more if it turns out
- // to be slow.
-
- double value, rowvalue;
- for (i=0; i<n_test; i++) {
- for (j=1; j<r+1; j++) {
- value = 0.0;
- for (k=0; k<n_train; k++) {
- rowvalue = matrix_get(K2, n_train, i, k);
- rowvalue *= matrix_get(data_train->Z, r+1, k,
- j);
- value += rowvalue;
- }
- value *= matrix_get(data_train->J, 1, j, 0);
- matrix_set(KPS, r+1, i, j, value);
- }
- matrix_set(KPS, r+1, i, 0, 1.0);
- }
-
- cblas_dgemm(
- CblasRowMajor,
- CblasNoTrans,
- CblasNoTrans,
- n_test,
- K-1,
- r+1,
- 1.0,
- KPS,
- r+1,
- model->V,
- K-1,
- 0.0,
- ZV,
- K-1);
-
- for (i=0; i<n_test; i++) {
- label = 0;
- min_dist = 1e10;
+ min_dist = INFINITY;
for (j=0; j<K; j++) {
for (k=0; k<K-1; k++) {
S[k] = matrix_get(ZV, K-1, i, k) -
matrix_get(U, K-1, j, k);
}
- norm = cblas_dnrm2(K, S, 1);
+ norm = cblas_dnrm2(K-1, S, 1);
if (norm < min_dist) {
label = j+1;
min_dist = norm;
@@ -182,9 +106,6 @@ void gensvm_predict_labels_kernel(struct GenData *data_test,
free(ZV);
free(U);
free(S);
- free(KPS);
- free(K2);
-
}
/**
diff --git a/src/gensvm_train.c b/src/gensvm_train.c
index 09f2560..c264ffa 100644
--- a/src/gensvm_train.c
+++ b/src/gensvm_train.c
@@ -94,10 +94,10 @@ void gensvm_optimize(struct GenModel *model, struct GenData *data)
it++;
}
if (L > Lbar)
- fprintf(stderr, "GenSVM warning: Negative step occurred in "
+ fprintf(stderr, "[WARNING]: Negative step occurred in "
"majorization.\n");
if (it >= MAX_ITER)
- fprintf(stderr, "GenSVM warning: maximum number of iterations "
+ fprintf(stderr, "[WARNING]: maximum number of iterations "
"reached.\n");
note("Optimization finished, iter = %li, loss = %15.16f, "
@@ -166,12 +166,10 @@ double gensvm_get_loss(struct GenModel *model, struct GenData *data,
loss /= ((double) n);
value = 0;
- for (i=0; i<m+1; i++) {
- rowvalue = 0;
+ for (i=1; i<m+1; i++) {
for (j=0; j<K-1; j++) {
- rowvalue += pow(matrix_get(model->V, K-1, i, j), 2.0);
+ value += pow(matrix_get(model->V, K-1, i, j), 2.0);
}
- value += data->J[i] * rowvalue;
}
loss += model->lambda * value;
@@ -445,7 +443,7 @@ void gensvm_get_update(struct GenModel *model, struct GenData *data, double *B,
i = 0;
for (j=0; j<m; j++) {
i += (m+1) + 1;
- ZAZ[i] += model->lambda * data->J[j+1];
+ ZAZ[i] += model->lambda;
}
// For the LAPACK call we need to switch to Column-
diff --git a/src/gensvm_train_dataset.c b/src/gensvm_train_dataset.c
index 3034bb4..eee4bf9 100644
--- a/src/gensvm_train_dataset.c
+++ b/src/gensvm_train_dataset.c
@@ -435,6 +435,12 @@ void consistency_repeats(struct Queue *q, long repeats, TrainType traintype)
* cross validation
*/
double cross_validation(struct GenModel *model, struct GenData *data,
+ long folds)
+{
+ return 0.0;
+}
+/*
+double cross_validation(struct GenModel *model, struct GenData *data,
long folds)
{
FILE *fid;
@@ -487,7 +493,7 @@ double cross_validation(struct GenModel *model, struct GenData *data,
return total_perf;
}
-
+*/
/**
* @brief Run the grid search for a cross validation dataset
*
@@ -542,6 +548,147 @@ void start_training_cv(struct Queue *q)
gensvm_free_model(model);
}
+
+bool kernel_changed(struct Task *newtask, struct Task *oldtask)
+{
+ if (oldtask == NULL)
+ return true;
+ int i;
+ if (newtask->kerneltype != oldtask->kerneltype) {
+ return true;
+ } else if (newtask->kerneltype == K_POLY) {
+ for (i=0; i<3; i++)
+ if (newtask->kernelparam[i] != oldtask->kernelparam[i])
+ return true;
+ return false;
+ } else if (newtask->kerneltype == K_RBF) {
+ if (newtask->kernelparam[0] != oldtask->kernelparam[0])
+ return true;
+ return false;
+ } else if (newtask->kerneltype == K_SIGMOID) {
+ for (i=0; i<2; i++)
+ if (newtask->kernelparam[i] != oldtask->kernelparam[i])
+ return true;
+ return false;
+ }
+ return false;
+}
+
+
+void start_training(struct Queue *q)
+{
+ int f, folds;
+ double perf, current_max = 0;
+ struct Task *task = get_next_task(q);
+ struct Task *prevtask = NULL;
+ struct GenModel *model = gensvm_init_model();
+ clock_t main_s, main_e, loop_s, loop_e;
+
+ // in principle this can change between tasks, but this shouldn't be
+ // the case TODO
+ folds = task->folds;
+
+ model->n = 0;
+ model->m = task->train_data->m;
+ model->K = task->train_data->K;
+ gensvm_allocate_model(model);
+ gensvm_seed_model_V(NULL, model, task->train_data);
+
+ long *cv_idx = Calloc(long, task->train_data->n);
+ gensvm_make_cv_split(task->train_data->n, task->folds, cv_idx);
+
+ struct GenData **train_folds = Malloc(struct GenData *, task->folds);
+ struct GenData **test_folds = Malloc(struct GenData *, task->folds);
+ for (f=0; f<folds; f++) {
+ train_folds[f] = gensvm_init_data();
+ test_folds[f] = gensvm_init_data();
+ gensvm_get_tt_split(task->train_data, train_folds[f],
+ test_folds[f], cv_idx, f);
+ }
+
+ main_s = clock();
+ while (task) {
+ print_progress_string(task, q->N);
+ make_model_from_task(task, model);
+
+ if (kernel_changed(task, prevtask)) {
+ note("*");
+ for (f=0; f<folds; f++) {
+ gensvm_kernel_preprocess(model,
+ train_folds[f]);
+ gensvm_kernel_postprocess(model,
+ train_folds[f], test_folds[f]);
+ }
+ note("*");
+ }
+
+ loop_s = clock();
+ perf = gensvm_cross_validation(model, train_folds, test_folds,
+ folds, task->train_data->n);
+ loop_e = clock();
+ current_max = maximum(current_max, perf);
+
+ note("\t%3.3f%% (%3.3fs)\t(best = %3.3f%%)\n", perf,
+ elapsed_time(loop_s, loop_e), current_max);
+
+ q->tasks[task->ID]->performance = perf;
+ prevtask = task;
+ task = get_next_task(q);
+ }
+ main_e = clock();
+
+ note("\nTotal elapsed training time: %8.8f seconds\n",
+ elapsed_time(main_s, main_e));
+
+ gensvm_free_model(model);
+ for (f=0; f<folds; f++) {
+ gensvm_free_data(train_folds[f]);
+ gensvm_free_data(test_folds[f]);
+ }
+ free(train_folds);
+ free(test_folds);
+}
+
+
+double gensvm_cross_validation(struct GenModel *model,
+ struct GenData **train_folds, struct GenData **test_folds,
+ int folds, long n_total)
+{
+ FILE *fid;
+
+ int f;
+ long *predy;
+ double performance, total_perf = 0;
+
+ for (f=0; f<folds; f++) {
+ // reallocate model in case dimensions differ with data
+ gensvm_reallocate_model(model, train_folds[f]->n,
+ train_folds[f]->r);
+
+ // initialize object weights
+ gensvm_initialize_weights(train_folds[f], model);
+
+ // train the model (surpressing output)
+ fid = GENSVM_OUTPUT_FILE;
+ GENSVM_OUTPUT_FILE = NULL;
+ gensvm_optimize(model, train_folds[f]);
+ GENSVM_OUTPUT_FILE = fid;
+
+ // calculate prediction performance on test set
+ predy = Calloc(long, test_folds[f]->n);
+ gensvm_predict_labels(test_folds[f], model, predy);
+ performance = gensvm_prediction_perf(test_folds[f], predy);
+ total_perf += performance * test_folds[f]->n;
+
+ free(predy);
+ }
+
+ total_perf /= ((double) n_total);
+
+ return total_perf;
+}
+
+
/**
* @brief Run the grid search for a train/test dataset
*
@@ -563,6 +710,11 @@ void start_training_cv(struct Queue *q)
*/
void start_training_tt(struct Queue *q)
{
+ return;
+}
+/*
+void start_training_tt(struct Queue *q)
+{
FILE *fid;
long c = 0;
@@ -628,6 +780,7 @@ void start_training_tt(struct Queue *q)
free(task);
gensvm_free_model(seed_model);
}
+*/
/**
* @brief Free the Queue struct