diff options
| -rw-r--r-- | include/gensvm.h | 6 | ||||
| -rw-r--r-- | include/gensvm_kernel.h | 24 | ||||
| -rw-r--r-- | include/gensvm_pred.h | 8 | ||||
| -rw-r--r-- | include/gensvm_train_dataset.h | 8 | ||||
| -rw-r--r-- | src/GenSVMgrid.c | 6 | ||||
| -rw-r--r-- | src/GenSVMtrain.c | 2 | ||||
| -rw-r--r-- | src/gensvm_init.c | 14 | ||||
| -rw-r--r-- | src/gensvm_io.c | 1 | ||||
| -rw-r--r-- | src/gensvm_kernel.c | 294 | ||||
| -rw-r--r-- | src/gensvm_pred.c | 149 | ||||
| -rw-r--r-- | src/gensvm_train.c | 12 | ||||
| -rw-r--r-- | src/gensvm_train_dataset.c | 155 |
12 files changed, 410 insertions, 269 deletions
diff --git a/include/gensvm.h b/include/gensvm.h index ddae3ae..5101b41 100644 --- a/include/gensvm.h +++ b/include/gensvm.h @@ -88,7 +88,9 @@ struct GenData { long n; ///< number of instances long m; - ///< number of predictors + ///< number of predictors (width of RAW) + long r; + ///< number of eigenvalues (width of Z) long *y; ///< array of class labels, 1..K double *Z; @@ -96,7 +98,7 @@ struct GenData { ///< of the kernel matrix) double *RAW; ///< augmented raw data matrix - double *J; + double *Sigma; KernelType kerneltype; double *kernelparam; }; diff --git a/include/gensvm_kernel.h b/include/gensvm_kernel.h index bf46bbc..d5c5e8d 100644 --- a/include/gensvm_kernel.h +++ b/include/gensvm_kernel.h @@ -21,18 +21,22 @@ struct GenData; struct GenModel; // function declarations -void gensvm_make_kernel(struct GenModel *model, struct GenData *data); - -long gensvm_make_eigen(double *K, long n, double **P, double **Lambda); +void gensvm_kernel_preprocess(struct GenModel *model, struct GenData *data); +void gensvm_kernel_postprocess(struct GenModel *model, + struct GenData *traindata, struct GenData *testdata); +void gensvm_make_kernel(struct GenModel *model, struct GenData *data, + double *K); +long gensvm_make_eigen(double *K, long n, double **P, double **Sigma); void gensvm_make_crosskernel(struct GenModel *model, - struct GenData *data_train, struct GenData *data_test, + struct GenData *data_train, struct GenData *data_test, double **K2); +void gensvm_make_trainfactor(struct GenData *data, double *P, double *Sigma, + long r); +void gensvm_make_testfactor(struct GenData *testdata, + struct GenData *traindata, double *K2); +double gensvm_dot_rbf(double *x1, double *x2, double *kernelparam, long n); +double gensvm_dot_poly(double *x1, double *x2, double *kernelparam, long n); +double gensvm_dot_sigmoid(double *x1, double *x2, double *kernelparam, long n); -double gensvm_compute_rbf(double *x1, double *x2, double *kernelparam, - long n); -double gensvm_compute_poly(double *x1, double *x2, double *kernelparam, - long n); -double gensvm_compute_sigmoid(double *x1, double *x2, double *kernelparam, - long n); #endif diff --git a/include/gensvm_pred.h b/include/gensvm_pred.h index 0cce20b..76b3ad3 100644 --- a/include/gensvm_pred.h +++ b/include/gensvm_pred.h @@ -19,14 +19,8 @@ struct GenData; struct GenModel; // function declarations -void gensvm_predict_labels(struct GenData *data_test, - struct GenData *data_train, struct GenModel *model, - long *predy); -void gensvm_predict_labels_linear(struct GenData *data, +void gensvm_predict_labels(struct GenData *testdata, struct GenModel *model, long *predy); -void gensvm_predict_labels_kernel(struct GenData *data_test, - struct GenData *data_train, struct GenModel *model, - long *predy); double gensvm_prediction_perf(struct GenData *data, long *perdy); #endif diff --git a/include/gensvm_train_dataset.h b/include/gensvm_train_dataset.h index 299bc52..0dc4319 100644 --- a/include/gensvm_train_dataset.h +++ b/include/gensvm_train_dataset.h @@ -136,4 +136,12 @@ void make_model_from_task(struct Task *task, struct GenModel *model); void copy_model(struct GenModel *from, struct GenModel *to); void print_progress_string(struct Task *task, long N); + +// new +void start_training(struct Queue *q); +double gensvm_cross_validation(struct GenModel *model, + struct GenData **train_folds, struct GenData **test_folds, + int folds, long n_total); + + #endif diff --git a/src/GenSVMgrid.c b/src/GenSVMgrid.c index eb1f477..94d3f0b 100644 --- a/src/GenSVMgrid.c +++ b/src/GenSVMgrid.c @@ -75,8 +75,8 @@ int main(int argc, char **argv) char input_filename[MAX_LINE_LENGTH]; struct Training *training = Malloc(struct Training, 1); - struct GenData *train_data = Malloc(struct GenData, 1); - struct GenData *test_data = Malloc(struct GenData, 1); + struct GenData *train_data = gensvm_init_data(); + struct GenData *test_data = gensvm_init_data(); if (argc < MINARGS || gensvm_check_argv(argc, argv, "-help") || gensvm_check_argv_eq(argc, argv, "-h") ) @@ -104,7 +104,7 @@ int main(int argc, char **argv) if (training->traintype == TT) start_training_tt(q); else - start_training_cv(q); + start_training(q); note("Training finished\n"); if (training->repeats > 0) { diff --git a/src/GenSVMtrain.c b/src/GenSVMtrain.c index 0c1c6bc..f0b931f 100644 --- a/src/GenSVMtrain.c +++ b/src/GenSVMtrain.c @@ -100,7 +100,7 @@ int main(int argc, char **argv) gensvm_allocate_model(model); // initialize kernel (if necessary) - gensvm_make_kernel(model, data); + //gensvm_make_kernel(model, data); // reallocate model and initialize weights gensvm_reallocate_model(model, data->n, data->m); diff --git a/src/gensvm_init.c b/src/gensvm_init.c index b3f214e..8722464 100644 --- a/src/gensvm_init.c +++ b/src/gensvm_init.c @@ -68,7 +68,7 @@ struct GenModel *gensvm_init_model() struct GenData *gensvm_init_data() { struct GenData *data = Malloc(struct GenData, 1); - data->J = NULL; + data->Sigma = NULL; data->y = NULL; data->Z = NULL; data->RAW = NULL; @@ -275,8 +275,16 @@ void gensvm_free_model(struct GenModel *model) */ void gensvm_free_data(struct GenData *data) { - free(data->Z); + if (data == NULL) + return; + + if (data->Z == data->RAW) { + free(data->Z); + }else { + free(data->Z); + free(data->RAW); + } free(data->y); - free(data->J); + free(data->Sigma); free(data); } diff --git a/src/gensvm_io.c b/src/gensvm_io.c index 546ecd5..3de0794 100644 --- a/src/gensvm_io.c +++ b/src/gensvm_io.c @@ -116,6 +116,7 @@ void gensvm_read_data(struct GenData *dataset, char *data_file) dataset->n = n; dataset->m = m; + dataset->r = m; dataset->K = K; dataset->Z = dataset->RAW; } diff --git a/src/gensvm_kernel.c b/src/gensvm_kernel.c index 55cfa03..f85cb38 100644 --- a/src/gensvm_kernel.c +++ b/src/gensvm_kernel.c @@ -11,6 +11,7 @@ * */ +#include <cblas.h> #include <math.h> #include "gensvm.h" @@ -20,88 +21,110 @@ #include "util.h" /** - * @brief Create the kernel matrix + * @brief Do the preprocessing steps needed to perform kernel GenSVM * - * Create a kernel matrix based on the specified kerneltype. Kernel parameters - * are assumed to be specified in the model. - * - * @param[in] model GenModel specifying the parameters - * @param[in] data GenData specifying the data. + * @details + * tdb * */ -void gensvm_make_kernel(struct GenModel *model, struct GenData *data) +void gensvm_kernel_preprocess(struct GenModel *model, struct GenData *data) { - long i, j; - // Determine if a kernel needs to be computed. This is not the case if - // a LINEAR kernel is requested in the model, or if the requested - // kernel is already in the data. - if (model->kerneltype == K_LINEAR) { - data->J = Calloc(double, data->m+1); - for (i=1; i<data->m+1; i++) { - matrix_set(data->J, 1, i, 0, 1.0); - } + data->r = data->m; return; } - /* + int i; + long r, + n = data->n; + double *P = NULL, + *Sigma = NULL, + *K = NULL; + + // build the kernel matrix + K = Calloc(double, n*n); + if (K == NULL) { + fprintf(stderr, "Failed to allocate memory for K in " + "gensvm_kernel_preprocess.\n"); + exit(1); + } + gensvm_make_kernel(model, data, K); + + // generate the eigen decomposition + r = gensvm_make_eigen(K, n, &P, &Sigma); + note("[DEBUG]: n = %li\tr = %li\n", n, r); + + // build M and set to data (leave RAW intact) + gensvm_make_trainfactor(data, P, Sigma, r); + + // Set Sigma to data->Sigma (need it again for prediction) + if (data->Sigma != NULL) + free(data->Sigma); + data->Sigma = Sigma; + + // write kernel params to data + data->kerneltype = model->kerneltype; + free(data->kernelparam); switch (model->kerneltype) { case K_LINEAR: - // if data has another kernel, free that matrix and - // assign Z to RAW - if (data->kerneltype != K_LINEAR) { - free(data->Z); - data->Z = data->RAW; - } - data->J = Calloc(double, data->m+1); - for (i=1; i<model->m+1; i++) { - matrix_set(data->J, 1, i, 0, 1.0); - } - return; + break; case K_POLY: - // if data has another kernel, we need to recalculate - if (data->kerneltype != K_POLY) { - break; - } - // if it is poly, we only recalculate if the kernel - // parameters differ - if (data->kernelparam[0] == model->kernelparam[0] && - data->kernelparam[1] == model->kernelparam[1] && - data->kernelparam[2] == model->kernelparam[2]) - // < do something with J ? - return; + data->kernelparam = Calloc(double, 3); + for (i=0; i<3; i++) + data->kernelparam[i] = model->kernelparam[i]; + break; case K_RBF: - if (data->kerneltype != K_RBF) - break; - if (data->kernelparam[0] == model->kernelparam[0]) - // < do something with J ? - return; + data->kernelparam = Calloc(double, 1); + data->kernelparam[0] = model->kernelparam[0]; + break; case K_SIGMOID: - if (data->kerneltype != K_SIGMOID) - break; - if (data->kernelparam[0] == model->kernelparam[0] && - data->kernelparam[1] == model->kernelparam[1]) - // < do something with J ? - return; + data->kernelparam = Calloc(double, 2); + data->kernelparam[0] = model->kernelparam[0]; + data->kernelparam[1] = model->kernelparam[1]; + } + + free(K); + free(P); +} + +void gensvm_kernel_postprocess(struct GenModel *model, + struct GenData *traindata, struct GenData *testdata) +{ + if (model->kerneltype == K_LINEAR) { + testdata->r = testdata->m; + return; } - */ + + // build the cross kernel matrix between train and test + double *K2 = NULL; + gensvm_make_crosskernel(model, traindata, testdata, &K2); + + // generate the data matrix N = K2 * M * Sigma^{-2} + gensvm_make_testfactor(testdata, traindata, K2); + + free(K2); +} + +void gensvm_make_kernel(struct GenModel *model, struct GenData *data, + double *K) +{ + long i, j; long n = data->n; double value; double *x1, *x2; - double *K = Calloc(double, n*n); for (i=0; i<n; i++) { for (j=i; j<n; j++) { x1 = &data->RAW[i*(data->m+1)+1]; x2 = &data->RAW[j*(data->m+1)+1]; if (model->kerneltype == K_POLY) - value = gensvm_compute_poly(x1, x2, + value = gensvm_dot_poly(x1, x2, model->kernelparam, data->m); else if (model->kerneltype == K_RBF) - value = gensvm_compute_rbf(x1, x2, + value = gensvm_dot_rbf(x1, x2, model->kernelparam, data->m); else if (model->kerneltype == K_SIGMOID) - value = gensvm_compute_sigmoid(x1, x2, + value = gensvm_dot_sigmoid(x1, x2, model->kernelparam, data->m); else { fprintf(stderr, "Unknown kernel type in " @@ -112,56 +135,6 @@ void gensvm_make_kernel(struct GenModel *model, struct GenData *data) matrix_set(K, n, j, i, value); } } - - double *P = NULL; - double *Sigma = NULL; - long num_eigen = gensvm_make_eigen(K, n, &P, &Sigma); - //printf("num eigen: %li\n", num_eigen); - data->m = num_eigen; - - // copy eigendecomp to data - data->Z = Calloc(double, n*(num_eigen+1)); - for (i=0; i<n; i++) { - for (j=0; j<num_eigen; j++) { - value = matrix_get(P, num_eigen, i, j); - matrix_set(data->Z, num_eigen+1, i, j, value); - } - matrix_set(data->Z, num_eigen+1, i, 0, 1.0); - } - - // Set the regularization matrix (change if not full rank used) - if (data->J != NULL) - free(data->J); - data->J = Calloc(double, data->m+1); - for (i=1; i<data->m+1; i++) { - value = 1.0/matrix_get(Sigma, 1, i-1, 0); - matrix_set(data->J, 1, i, 0, value); - } - - // let data know what it's made of - data->kerneltype = model->kerneltype; - free(data->kernelparam); - switch (model->kerneltype) { - case K_LINEAR: - break; - case K_POLY: - data->kernelparam = Calloc(double, 3); - data->kernelparam[0] = model->kernelparam[0]; - data->kernelparam[1] = model->kernelparam[1]; - data->kernelparam[2] = model->kernelparam[2]; - break; - case K_RBF: - data->kernelparam = Calloc(double, 1); - data->kernelparam[0] = model->kernelparam[0]; - break; - case K_SIGMOID: - data->kernelparam = Calloc(double, 2); - data->kernelparam[0] = model->kernelparam[0]; - data->kernelparam[1] = model->kernelparam[1]; - } - free(K); - free(Sigma); - free(P); } /** @@ -241,7 +214,6 @@ long gensvm_make_eigen(double *K, long n, double **P, double **Sigma) // Select the desired number of eigenvalues, depending on their size. // dsyevx sorts eigenvalues in ascending order. - // max_eigen = tempSigma[n-1]; cutoff_idx = 0; @@ -261,7 +233,6 @@ long gensvm_make_eigen(double *K, long n, double **P, double **Sigma) // revert P to row-major order and copy only the the columns // corresponding to the selected eigenvalues - // *P = Calloc(double, n*num_eigen); for (j=n-1; j>n-1-num_eigen; j--) { for (i=0; i<n; i++) { @@ -291,26 +262,20 @@ void gensvm_make_crosskernel(struct GenModel *model, *K2 = Calloc(double, n_test*n_train); - //printf("Training RAW\n"); - //print_matrix(data_train->RAW, n_train, m+1); - - //printf("Testing RAW\n"); - //print_matrix(data_test->RAW, n_test, m+1); - for (i=0; i<n_test; i++) { for (j=0; j<n_train; j++) { x1 = &data_test->RAW[i*(m+1)+1]; x2 = &data_train->RAW[j*(m+1)+1]; if (model->kerneltype == K_POLY) - value = gensvm_compute_poly(x1, x2, + value = gensvm_dot_poly(x1, x2, model->kernelparam, m); else if (model->kerneltype == K_RBF) - value = gensvm_compute_rbf(x1, x2, + value = gensvm_dot_rbf(x1, x2, model->kernelparam, m); else if (model->kerneltype == K_SIGMOID) - value = gensvm_compute_sigmoid(x1, x2, + value = gensvm_dot_sigmoid(x1, x2, model->kernelparam, m); else { @@ -321,10 +286,97 @@ void gensvm_make_crosskernel(struct GenModel *model, matrix_set((*K2), n_train, i, j, value); } } +} + +void gensvm_make_trainfactor(struct GenData *data, double *P, double *Sigma, + long r) +{ + long i, j, n = data->n; + double value; + + // allocate Z + data->Z = Calloc(double, n*(r+1)); + if (data->Z == NULL) { + fprintf(stderr, "Failed to allocate memory for data->Z in " + "gensvm_make_trainfactor.\n"); + exit(1); + } + + // Write data->Z = [1 M] = [1 P*Sigma] + for (i=0; i<n; i++) { + for (j=0; j<r; j++) { + value = matrix_get(P, r, i, j); + value *= matrix_get(Sigma, 1, j, 0); + matrix_set(data->Z, r+1, i, j+1, value); + } + matrix_set(data->Z, r+1, i, 0, 1.0); + } + + // Set data->r to r so data knows the width of Z + data->r = r; +} + +void gensvm_make_testfactor(struct GenData *testdata, + struct GenData *traindata, double *K2) +{ + long n1, n2, r, i, j; + double value, + *N = NULL, + *M = NULL; + + n1 = traindata->n; + n2 = testdata->n; + r = traindata->r; + + N = Calloc(double, n2*(r+1)); + if (N == NULL) { + fprintf(stderr, "Failed to allocate memory for N in " + "gensvm_make_testfactor.\n"); + exit(1); + } + M = Calloc(double, n1*r); + if (M == NULL) { + fprintf(stderr, "Failed to allocate memory for M in " + "gensvm_make_testfactor.\n"); + exit(1); + } + + // copy M from traindata->Z because we need it in dgemm without column + // of 1's. + for (i=0; i<n1; i++) + for (j=0; j<r; j++) + matrix_set(M, r, i, j, + matrix_get(traindata->Z, r+1, i, j+1)); + + // Multiply K2 with M and store in N + cblas_dgemm( + CblasRowMajor, + CblasNoTrans, + CblasNoTrans, + n2, + r, + n1, + 1.0, + K2, + n1, + M, + r, + 0.0, + N, + r); + + // Multiply N with Sigma^{-2} + for (j=0; j<r; j++) { + value = pow(matrix_get(traindata->Sigma, 1, j, 0), -2.0); + for (i=0; i<n2; i++) + matrix_mul(N, r, i, j, value); + } - //printf("cross K2:\n"); - //print_matrix((*K2), n_test, n_train); + // Set N and r to testdata + testdata->Z = N; + testdata->r = r; + free(M); } /** @@ -344,7 +396,7 @@ void gensvm_make_crosskernel(struct GenModel *model, * @param[in] n length of the vectors x1 and x2 * @returns kernel evaluation */ -double gensvm_compute_rbf(double *x1, double *x2, double *kernelparam, long n) +double gensvm_dot_rbf(double *x1, double *x2, double *kernelparam, long n) { long i; double value = 0.0; @@ -372,7 +424,7 @@ double gensvm_compute_rbf(double *x1, double *x2, double *kernelparam, long n) * @param[in] n length of the vectors x1 and x2 * @returns kernel evaluation */ -double gensvm_compute_poly(double *x1, double *x2, double *kernelparam, long n) +double gensvm_dot_poly(double *x1, double *x2, double *kernelparam, long n) { long i; double value = 0.0; @@ -400,7 +452,7 @@ double gensvm_compute_poly(double *x1, double *x2, double *kernelparam, long n) * @param[in] n length of the vectors x1 and x2 * @returns kernel evaluation */ -double gensvm_compute_sigmoid(double *x1, double *x2, double *kernelparam, long n) +double gensvm_dot_sigmoid(double *x1, double *x2, double *kernelparam, long n) { long i; double value = 0.0; diff --git a/src/gensvm_pred.c b/src/gensvm_pred.c index 88678d7..7baae07 100644 --- a/src/gensvm_pred.c +++ b/src/gensvm_pred.c @@ -12,6 +12,7 @@ */ #include <cblas.h> +#include <math.h> #include "libGenSVM.h" #include "gensvm.h" @@ -19,19 +20,6 @@ #include "gensvm_matrix.h" #include "gensvm_pred.h" -#include "util.h" // testing - -void gensvm_predict_labels(struct GenData *data_test, - struct GenData *data_train, struct GenModel *model, - long *predy) -{ - if (model->kerneltype == K_LINEAR) - gensvm_predict_labels_linear(data_test, model, predy); - else - gensvm_predict_labels_kernel(data_test, data_train, model, - predy); -} - /** * @brief Predict class labels of data given and output in predy * @@ -46,24 +34,40 @@ void gensvm_predict_labels(struct GenData *data_test, * @param[in] model GenModel with optimized V * @param[out] predy pre-allocated vector to record predictions in */ -void gensvm_predict_labels_linear(struct GenData *data, - struct GenModel *model, long *predy) +void gensvm_predict_labels(struct GenData *testdata, struct GenModel *model, + long *predy) { - long i, j, k, label; - double norm, min_dist; - - long n = data->n; // note that model->n is the size of the training sample. - long m = data->m; - long K = model->K; //data->K does not necessarily equal the original K. - - double *S = Calloc(double, K-1); - double *ZV = Calloc(double, n*(K-1)); - double *U = Calloc(double, K*(K-1)); + long i, j, k, n, m, K, label; + double norm, min_dist, *S, *ZV, *U; + + n = testdata->n; + m = testdata->r; + K = model->K; + + // allocate necessary memory + S = Calloc(double, K-1); + if (S == NULL) { + fprintf(stderr, "Failed to allocate memory for S in " + "gensvm_predict_labels.\n"); + exit(1); + } + ZV = Calloc(double, n*(K-1)); + if (ZV == NULL) { + fprintf(stderr, "Failed to allocate memory for ZV in " + "gensvm_predict_labels.\n"); + exit(1); + } + U = Calloc(double, K*(K-1)); + if (U == NULL) { + fprintf(stderr, "Failed to allocate memory for U in " + "gensvm_predict_labels.\n"); + exit(1); + } - // Get the simplex matrix + // Generate the simplex matrix gensvm_simplex_gen(K, U); - // Generate the simplex-space vectors + // Generate the simplex space vectors cblas_dgemm( CblasRowMajor, CblasNoTrans, @@ -72,7 +76,7 @@ void gensvm_predict_labels_linear(struct GenData *data, K-1, m+1, 1.0, - data->Z, + testdata->Z, m+1, model->V, K-1, @@ -81,96 +85,16 @@ void gensvm_predict_labels_linear(struct GenData *data, K-1); // Calculate the distance to each of the vertices of the simplex. - // The closest vertex defines the class label. + // The closest vertex defines the class label for (i=0; i<n; i++) { label = 0; - min_dist = 1000000000.0; - for (j=0; j<K; j++) { - for (k=0; k<K-1; k++) { - S[k] = matrix_get(ZV, K-1, i, k) - - matrix_get(U, K-1, j, k); - } - norm = cblas_dnrm2(K-1, S, 1); - if (norm < min_dist) { - label = j+1; // labels start counting from 1 - min_dist = norm; - } - } - predy[i] = label; - } - - free(ZV); - free(U); - free(S); -} - -void gensvm_predict_labels_kernel(struct GenData *data_test, - struct GenData *data_train, struct GenModel *model, - long *predy) -{ - long i, j, k, label; - double norm, min_dist; - - long n_train = data_train->n; - long n_test = data_test->n; - long r = model->m; - long K = model->K; - - double *K2 = NULL; - gensvm_make_crosskernel(model, data_train, data_test, &K2); - - double *S = Calloc(double, K-1); - double *ZV = Calloc(double, n_test*(r+1)); - double *KPS = Calloc(double, n_test*(r+1)); - double *U = Calloc(double, K*(K-1)); - - gensvm_simplex_gen(K, U); - - // were doing the computations explicitly since P is included in - // data_train->Z. Might want to look at this some more if it turns out - // to be slow. - - double value, rowvalue; - for (i=0; i<n_test; i++) { - for (j=1; j<r+1; j++) { - value = 0.0; - for (k=0; k<n_train; k++) { - rowvalue = matrix_get(K2, n_train, i, k); - rowvalue *= matrix_get(data_train->Z, r+1, k, - j); - value += rowvalue; - } - value *= matrix_get(data_train->J, 1, j, 0); - matrix_set(KPS, r+1, i, j, value); - } - matrix_set(KPS, r+1, i, 0, 1.0); - } - - cblas_dgemm( - CblasRowMajor, - CblasNoTrans, - CblasNoTrans, - n_test, - K-1, - r+1, - 1.0, - KPS, - r+1, - model->V, - K-1, - 0.0, - ZV, - K-1); - - for (i=0; i<n_test; i++) { - label = 0; - min_dist = 1e10; + min_dist = INFINITY; for (j=0; j<K; j++) { for (k=0; k<K-1; k++) { S[k] = matrix_get(ZV, K-1, i, k) - matrix_get(U, K-1, j, k); } - norm = cblas_dnrm2(K, S, 1); + norm = cblas_dnrm2(K-1, S, 1); if (norm < min_dist) { label = j+1; min_dist = norm; @@ -182,9 +106,6 @@ void gensvm_predict_labels_kernel(struct GenData *data_test, free(ZV); free(U); free(S); - free(KPS); - free(K2); - } /** diff --git a/src/gensvm_train.c b/src/gensvm_train.c index 09f2560..c264ffa 100644 --- a/src/gensvm_train.c +++ b/src/gensvm_train.c @@ -94,10 +94,10 @@ void gensvm_optimize(struct GenModel *model, struct GenData *data) it++; } if (L > Lbar) - fprintf(stderr, "GenSVM warning: Negative step occurred in " + fprintf(stderr, "[WARNING]: Negative step occurred in " "majorization.\n"); if (it >= MAX_ITER) - fprintf(stderr, "GenSVM warning: maximum number of iterations " + fprintf(stderr, "[WARNING]: maximum number of iterations " "reached.\n"); note("Optimization finished, iter = %li, loss = %15.16f, " @@ -166,12 +166,10 @@ double gensvm_get_loss(struct GenModel *model, struct GenData *data, loss /= ((double) n); value = 0; - for (i=0; i<m+1; i++) { - rowvalue = 0; + for (i=1; i<m+1; i++) { for (j=0; j<K-1; j++) { - rowvalue += pow(matrix_get(model->V, K-1, i, j), 2.0); + value += pow(matrix_get(model->V, K-1, i, j), 2.0); } - value += data->J[i] * rowvalue; } loss += model->lambda * value; @@ -445,7 +443,7 @@ void gensvm_get_update(struct GenModel *model, struct GenData *data, double *B, i = 0; for (j=0; j<m; j++) { i += (m+1) + 1; - ZAZ[i] += model->lambda * data->J[j+1]; + ZAZ[i] += model->lambda; } // For the LAPACK call we need to switch to Column- diff --git a/src/gensvm_train_dataset.c b/src/gensvm_train_dataset.c index 3034bb4..eee4bf9 100644 --- a/src/gensvm_train_dataset.c +++ b/src/gensvm_train_dataset.c @@ -435,6 +435,12 @@ void consistency_repeats(struct Queue *q, long repeats, TrainType traintype) * cross validation */ double cross_validation(struct GenModel *model, struct GenData *data, + long folds) +{ + return 0.0; +} +/* +double cross_validation(struct GenModel *model, struct GenData *data, long folds) { FILE *fid; @@ -487,7 +493,7 @@ double cross_validation(struct GenModel *model, struct GenData *data, return total_perf; } - +*/ /** * @brief Run the grid search for a cross validation dataset * @@ -542,6 +548,147 @@ void start_training_cv(struct Queue *q) gensvm_free_model(model); } + +bool kernel_changed(struct Task *newtask, struct Task *oldtask) +{ + if (oldtask == NULL) + return true; + int i; + if (newtask->kerneltype != oldtask->kerneltype) { + return true; + } else if (newtask->kerneltype == K_POLY) { + for (i=0; i<3; i++) + if (newtask->kernelparam[i] != oldtask->kernelparam[i]) + return true; + return false; + } else if (newtask->kerneltype == K_RBF) { + if (newtask->kernelparam[0] != oldtask->kernelparam[0]) + return true; + return false; + } else if (newtask->kerneltype == K_SIGMOID) { + for (i=0; i<2; i++) + if (newtask->kernelparam[i] != oldtask->kernelparam[i]) + return true; + return false; + } + return false; +} + + +void start_training(struct Queue *q) +{ + int f, folds; + double perf, current_max = 0; + struct Task *task = get_next_task(q); + struct Task *prevtask = NULL; + struct GenModel *model = gensvm_init_model(); + clock_t main_s, main_e, loop_s, loop_e; + + // in principle this can change between tasks, but this shouldn't be + // the case TODO + folds = task->folds; + + model->n = 0; + model->m = task->train_data->m; + model->K = task->train_data->K; + gensvm_allocate_model(model); + gensvm_seed_model_V(NULL, model, task->train_data); + + long *cv_idx = Calloc(long, task->train_data->n); + gensvm_make_cv_split(task->train_data->n, task->folds, cv_idx); + + struct GenData **train_folds = Malloc(struct GenData *, task->folds); + struct GenData **test_folds = Malloc(struct GenData *, task->folds); + for (f=0; f<folds; f++) { + train_folds[f] = gensvm_init_data(); + test_folds[f] = gensvm_init_data(); + gensvm_get_tt_split(task->train_data, train_folds[f], + test_folds[f], cv_idx, f); + } + + main_s = clock(); + while (task) { + print_progress_string(task, q->N); + make_model_from_task(task, model); + + if (kernel_changed(task, prevtask)) { + note("*"); + for (f=0; f<folds; f++) { + gensvm_kernel_preprocess(model, + train_folds[f]); + gensvm_kernel_postprocess(model, + train_folds[f], test_folds[f]); + } + note("*"); + } + + loop_s = clock(); + perf = gensvm_cross_validation(model, train_folds, test_folds, + folds, task->train_data->n); + loop_e = clock(); + current_max = maximum(current_max, perf); + + note("\t%3.3f%% (%3.3fs)\t(best = %3.3f%%)\n", perf, + elapsed_time(loop_s, loop_e), current_max); + + q->tasks[task->ID]->performance = perf; + prevtask = task; + task = get_next_task(q); + } + main_e = clock(); + + note("\nTotal elapsed training time: %8.8f seconds\n", + elapsed_time(main_s, main_e)); + + gensvm_free_model(model); + for (f=0; f<folds; f++) { + gensvm_free_data(train_folds[f]); + gensvm_free_data(test_folds[f]); + } + free(train_folds); + free(test_folds); +} + + +double gensvm_cross_validation(struct GenModel *model, + struct GenData **train_folds, struct GenData **test_folds, + int folds, long n_total) +{ + FILE *fid; + + int f; + long *predy; + double performance, total_perf = 0; + + for (f=0; f<folds; f++) { + // reallocate model in case dimensions differ with data + gensvm_reallocate_model(model, train_folds[f]->n, + train_folds[f]->r); + + // initialize object weights + gensvm_initialize_weights(train_folds[f], model); + + // train the model (surpressing output) + fid = GENSVM_OUTPUT_FILE; + GENSVM_OUTPUT_FILE = NULL; + gensvm_optimize(model, train_folds[f]); + GENSVM_OUTPUT_FILE = fid; + + // calculate prediction performance on test set + predy = Calloc(long, test_folds[f]->n); + gensvm_predict_labels(test_folds[f], model, predy); + performance = gensvm_prediction_perf(test_folds[f], predy); + total_perf += performance * test_folds[f]->n; + + free(predy); + } + + total_perf /= ((double) n_total); + + return total_perf; +} + + /** * @brief Run the grid search for a train/test dataset * @@ -563,6 +710,11 @@ void start_training_cv(struct Queue *q) */ void start_training_tt(struct Queue *q) { + return; +} +/* +void start_training_tt(struct Queue *q) +{ FILE *fid; long c = 0; @@ -628,6 +780,7 @@ void start_training_tt(struct Queue *q) free(task); gensvm_free_model(seed_model); } +*/ /** * @brief Free the Queue struct |
