From 238110891d8cd0b95aae0b273c8b627580a9274a Mon Sep 17 00:00:00 2001 From: Gertjan van den Burg Date: Mon, 5 Dec 2016 11:19:24 +0100 Subject: Tests and documentation kernel module --- include/gensvm_kernel.h | 23 ++-- src/gensvm_kernel.c | 248 +++++++++++++++++++++++++++-------------- tests/Makefile | 2 +- tests/src/test_gensvm_kernel.c | 153 +++++++++++++++++++++++++ 4 files changed, 334 insertions(+), 92 deletions(-) create mode 100644 tests/src/test_gensvm_kernel.c diff --git a/include/gensvm_kernel.h b/include/gensvm_kernel.h index 44b3555..c24426e 100644 --- a/include/gensvm_kernel.h +++ b/include/gensvm_kernel.h @@ -39,19 +39,22 @@ void gensvm_kernel_preprocess(struct GenModel *model, struct GenData *data); void gensvm_kernel_postprocess(struct GenModel *model, struct GenData *traindata, struct GenData *testdata); -void gensvm_make_kernel(struct GenModel *model, struct GenData *data, +void gensvm_kernel_compute(struct GenModel *model, struct GenData *data, double *K); -long gensvm_make_eigen(double *K, long n, double **P, double **Sigma); -void gensvm_make_crosskernel(struct GenModel *model, - struct GenData *data_train, struct GenData *data_test, - double **K2); -void gensvm_make_trainfactor(struct GenData *data, double *P, double *Sigma, +long gensvm_kernel_eigendecomp(double *K, long n, double **P_ret, + double **Sigma_ret); +double *gensvm_kernel_cross(struct GenModel *model, struct GenData *data_train, + struct GenData *data_test); +void gensvm_kernel_trainfactor(struct GenData *data, double *P, double *Sigma, long r); -void gensvm_make_testfactor(struct GenData *testdata, +void gensvm_kernel_testfactor(struct GenData *testdata, struct GenData *traindata, double *K2); -double gensvm_dot_rbf(double *x1, double *x2, double *kernelparam, long n); -double gensvm_dot_poly(double *x1, double *x2, double *kernelparam, long n); -double gensvm_dot_sigmoid(double *x1, double *x2, double *kernelparam, long n); +double gensvm_kernel_dot_rbf(double *x1, double *x2, double *kernelparam, + long n); +double gensvm_kernel_dot_poly(double *x1, double *x2, double *kernelparam, + long n); +double gensvm_kernel_dot_sigmoid(double *x1, double *x2, double *kernelparam, + long n); int dsyevx(char JOBZ, char RANGE, char UPLO, int N, double *A, int LDA, double VL, double VU, int IL, int IU, double ABSTOL, int *M, double *W, double *Z, int LDZ, double *WORK, int LWORK, diff --git a/src/gensvm_kernel.c b/src/gensvm_kernel.c index 7dcfb4e..d236553 100644 --- a/src/gensvm_kernel.c +++ b/src/gensvm_kernel.c @@ -27,16 +27,37 @@ You should have received a copy of the GNU General Public License along with GenSVM. If not, see . - */ +*/ #include "gensvm_kernel.h" #include "gensvm_print.h" +#ifndef GENSVM_EIGEN_CUTOFF + /** + * Mimimum ratio between an eigenvalue and the largest eigenvalue for it to + * be included in the reduced eigendecomposition + */ + #define GENSVM_EIGEN_CUTOFF 1e-8 +#endif + /** * @brief Do the preprocessing steps needed to perform kernel GenSVM * * @details - * tdb + * To achieve nonlinearity through kernels in GenSVM, a preprocessing step is + * needed. This preprocessing step computes the full kernel matrix, and an + * eigendecomposition of this matrix. Next, it computes a matrix @f$\textbf{M} + * = \textbf{P}\boldsymbol{\Sigma}@f$ which takes the role as data matrix in + * the optimization algorithm. + * + * @sa + * gensvm_kernel_compute(), gensvm_kernel_eigendecomp(), + * gensvm_kernel_trainfactor(), gensvm_kernel_postprocess() + * + * @param[in] model input GenSVM model + * @param[in,out] data input structure with the data. On exit, + * contains the training factor in GenData::Z, + * and the original data in GenData::RAW * */ void gensvm_kernel_preprocess(struct GenModel *model, struct GenData *data) @@ -47,21 +68,20 @@ void gensvm_kernel_preprocess(struct GenModel *model, struct GenData *data) } int i; - long r, - n = data->n; + long r, n = data->n; double *P = NULL, *Sigma = NULL, *K = NULL; // build the kernel matrix K = Calloc(double, n*n); - gensvm_make_kernel(model, data, K); + gensvm_kernel_compute(model, data, K); // generate the eigen decomposition - r = gensvm_make_eigen(K, n, &P, &Sigma); + r = gensvm_kernel_eigendecomp(K, n, &P, &Sigma); // build M and set to data (leave RAW intact) - gensvm_make_trainfactor(data, P, Sigma, r); + gensvm_kernel_trainfactor(data, P, Sigma, r); // Set Sigma to data->Sigma (need it again for prediction) if (data->Sigma != NULL) { @@ -98,6 +118,19 @@ void gensvm_kernel_preprocess(struct GenModel *model, struct GenData *data) free(P); } +/** + * @brief Compute the kernel postprocessing factor + * + * @details + * This function computes the postprocessing factor needed to do predictions + * with kernels in GenSVM. This is a wrapper around gensvm_kernel_cross() and + * gensvm_kernel_testfactor(). + * + * @param[in] model a GenSVM model + * @param[in] traindata the training dataset + * @param[in,out] testdata the test dataset. On exit, GenData::Z + * contains the testfactor + */ void gensvm_kernel_postprocess(struct GenModel *model, struct GenData *traindata, struct GenData *testdata) { @@ -107,11 +140,10 @@ void gensvm_kernel_postprocess(struct GenModel *model, } // build the cross kernel matrix between train and test - double *K2 = NULL; - gensvm_make_crosskernel(model, traindata, testdata, &K2); + double *K2 = gensvm_kernel_cross(model, traindata, testdata); // generate the data matrix N = K2 * M * Sigma^{-2} - gensvm_make_testfactor(testdata, traindata, K2); + gensvm_kernel_testfactor(testdata, traindata, K2); free(K2); } @@ -120,10 +152,10 @@ void gensvm_kernel_postprocess(struct GenModel *model, * @brief Compute the kernel matrix * * @details - * This function computes the kernel matrix of a data matrix based on the - * requested kernel type and the kernel parameters. The potential types of - * kernel functions are document in KernelType. This function uses a naive - * multiplication and computes the entire upper triangle of the kernel matrix, + * This function computes the kernel matrix of a data matrix based on the + * requested kernel type and the kernel parameters. The potential types of + * kernel functions are document in KernelType. This function uses a naive + * multiplication and computes the entire upper triangle of the kernel matrix, * then copies this over to the lower triangle. * * @param[in] model a GenModel structure with the model @@ -131,8 +163,8 @@ void gensvm_kernel_postprocess(struct GenModel *model, * @param[out] K an nxn preallocated kernel matrix * */ -void gensvm_make_kernel(struct GenModel *model, struct GenData *data, - double *K) +void gensvm_kernel_compute(struct GenModel *model, struct GenData *data, + double *K) { long i, j; long n = data->n; @@ -145,13 +177,13 @@ void gensvm_make_kernel(struct GenModel *model, struct GenData *data, x1 = &data->RAW[i*(data->m+1)+1]; x2 = &data->RAW[j*(data->m+1)+1]; if (model->kerneltype == K_POLY) - value = gensvm_dot_poly(x1, x2, + value = gensvm_kernel_dot_poly(x1, x2, model->kernelparam, data->m); else if (model->kerneltype == K_RBF) - value = gensvm_dot_rbf(x1, x2, + value = gensvm_kernel_dot_rbf(x1, x2, model->kernelparam, data->m); else if (model->kerneltype == K_SIGMOID) - value = gensvm_dot_sigmoid(x1, x2, + value = gensvm_kernel_dot_sigmoid(x1, x2, model->kernelparam, data->m); else { err("[GenSVM Error]: Unknown kernel type in " @@ -167,23 +199,30 @@ void gensvm_make_kernel(struct GenModel *model, struct GenData *data, /** * @brief Find the (reduced) eigendecomposition of a kernel matrix * - * @todo - * Document this function + * @details + * Compute the reduced eigendecomposition of the kernel matrix. This uses the + * LAPACK function dsyevx to do the computation. Only those + * eigenvalues/eigenvectors are kept for which the ratio between the + * eigenvalue and the largest eigenvalue is larger than GENSVM_EIGEN_CUTOFF. + * This function uses the highest precision eigenvalues, twice the underflow + * threshold (see dsyevx documentation). * - * @param[in] K - * @param[in] n - * @param[out] P - * @param[out] Sigma + * @param[in] K the kernel matrix + * @param[in] n the dimension of the kernel matrix + * @param[out] P on exit contains the eigenvectors + * @param[out] Sigma on exit contains the eigenvalues * - * @return + * @return the number of eigenvalues kept */ -long gensvm_make_eigen(double *K, long n, double **P, double **Sigma) +long gensvm_kernel_eigendecomp(double *K, long n, double **P_ret, + double **Sigma_ret) { - int M, status, LWORK, - *IWORK = NULL, + int M, status, LWORK, *IWORK = NULL, *IFAIL = NULL; long i, j, num_eigen, cutoff_idx; - double max_eigen, abstol, *WORK = NULL; + double max_eigen, abstol, *WORK = NULL, + *Sigma = NULL, + *P = NULL; double *tempSigma = Malloc(double, n); double *tempP = Malloc(double, n*n); @@ -216,26 +255,26 @@ long gensvm_make_eigen(double *K, long n, double **P, double **Sigma) max_eigen = tempSigma[n-1]; cutoff_idx = 0; - for (i=0; i 1e-8 ) { + for (i=0; i GENSVM_EIGEN_CUTOFF) { cutoff_idx = i; break; } + } num_eigen = n - cutoff_idx; - *Sigma = Calloc(double, num_eigen); - + Sigma = Calloc(double, num_eigen); for (i=0; in-1-num_eigen; j--) { for (i=0; in; long n_test = data_test->n; long m = data_test->m; - double value; - double *x1 = NULL, - *x2 = NULL; - - *K2 = Calloc(double, n_test*n_train); + double value, *x1 = NULL, + *x2 = NULL, + *K2 = Calloc(double, n_test * n_train); for (i=0; iRAW[i*(m+1)+1]; x2 = &data_train->RAW[j*(m+1)+1]; if (model->kerneltype == K_POLY) - value = gensvm_dot_poly(x1, x2, - model->kernelparam, - m); + value = gensvm_kernel_dot_poly(x1, x2, + model->kernelparam, m); else if (model->kerneltype == K_RBF) - value = gensvm_dot_rbf(x1, x2, - model->kernelparam, - m); + value = gensvm_kernel_dot_rbf(x1, x2, + model->kernelparam, m); else if (model->kerneltype == K_SIGMOID) - value = gensvm_dot_sigmoid(x1, x2, - model->kernelparam, - m); + value = gensvm_kernel_dot_sigmoid(x1, x2, + model->kernelparam, m); else { err("[GenSVM Error]: Unknown kernel type in " "gensvm_make_crosskernel\n"); exit(EXIT_FAILURE); } - matrix_set((*K2), n_train, i, j, value); + matrix_set(K2, n_train, i, j, value); } } + return K2; } -void gensvm_make_trainfactor(struct GenData *data, double *P, double *Sigma, - long r) +/** + * @brief Compute the training factor as part of kernel preprocessing + * + * @details + * This function computes the matrix product @f$\textbf{M} = + * \textbf{P}\boldsymbol{\Sigma}@f$ and stores the result in GenData::Z, + * preceded by a column of ones. It also sets GenData::r to the number of + * eigenvectors that were includedin P and Sigma. Note that P and Sigma + * correspond to the reduced eigendecomposition of the kernel matrix. + * + * @param[in,out] data a GenData structure. On exit, GenData::Z and + * GenData::r are updated as described above. + * @param[in] P the eigenvectors + * @param[in] Sigma the eigenvalues + * @param[in] r the number of eigenvalues and eigenvectors + */ +void gensvm_kernel_trainfactor(struct GenData *data, double *P, double *Sigma, + long r) { long i, j, n = data->n; double value; @@ -311,12 +382,29 @@ void gensvm_make_trainfactor(struct GenData *data, double *P, double *Sigma, data->r = r; } -void gensvm_make_testfactor(struct GenData *testdata, - struct GenData *traindata, double *K2) +/** + * @brief Calculate the matrix product for the testfactor + * + * @details + * To predict class labels when kernels are used, a transformation of the + * testdata has to be performed to get the simplex space vectors. This + * transformation is based on the matrix @f$\textbf{K}_2@f$ (as calculated by + * gensvm_make_crosskernel()) and the matrices @f$\textbf{M} = + * \textbf{P}*\boldsymbol{\Sigma}@f$) and @f$\boldsymbol{\Sigma}@f$. The + * testfactor is equal to @f$\textbf{K}_2 \textbf{M} + * \boldsymbol{\Sigma}^{-2}@f$. + * + * @param[out] testdata a GenData struct with the testdata, contains + * the testfactor in GenData::Z on exit preceded + * by a column of ones. + * @param[in] traindata a GenData struct with the training data + * @param[in] K2 crosskernel between the train and test data + */ +void gensvm_kernel_testfactor(struct GenData *testdata, + struct GenData *traindata, double *K2) { long n1, n2, r, i, j; - double value, - *N = NULL, + double value, *N = NULL, *M = NULL; n1 = traindata->n; @@ -328,10 +416,12 @@ void gensvm_make_testfactor(struct GenData *testdata, // copy M from traindata->Z because we need it in dgemm without column // of 1's. - for (i=0; iZ, r+1, i, j+1)); + for (i=0; iZ, r+1, i, j+1); + matrix_set(M, r, i, j, value); + } + } // Multiply K2 with M and store in N cblas_dgemm(CblasRowMajor, CblasNoTrans, CblasNoTrans, n2, r, n1, 1.0, @@ -377,7 +467,8 @@ void gensvm_make_testfactor(struct GenData *testdata, * @param[in] n length of the vectors x1 and x2 * @returns kernel evaluation */ -double gensvm_dot_rbf(double *x1, double *x2, double *kernelparam, long n) +double gensvm_kernel_dot_rbf(double *x1, double *x2, double *kernelparam, + long n) { long i; double value = 0.0; @@ -405,15 +496,13 @@ double gensvm_dot_rbf(double *x1, double *x2, double *kernelparam, long n) * @param[in] n length of the vectors x1 and x2 * @returns kernel evaluation */ -double gensvm_dot_poly(double *x1, double *x2, double *kernelparam, long n) +double gensvm_kernel_dot_poly(double *x1, double *x2, double *kernelparam, + long n) { - long i; - double value = 0.0; - for (i=0; i. + + */ + +#include "minunit.h" +#include "gensvm_kernel.h" + +char *test_dot_rbf() +{ + double dot; + double *a = Malloc(double, 5); + double *b = Malloc(double, 5); + double *kernelparam = Malloc(double, 1); + + a[0] = 0.5203363837176203; + a[1] = 0.3860628599460129; + a[2] = 0.3592536954640216; + a[3] = 0.6824659760765744; + a[4] = 0.5390520090020700; + + b[0] = 0.1782643262351465; + b[1] = 0.0314270210724957; + b[2] = 0.5887219369641497; + b[3] = 0.7710042954911620; + b[4] = 0.8805451245738238; + + // start test code // + kernelparam[0] = 1.0; + dot = gensvm_kernel_dot_rbf(a, b, kernelparam, 5); + mu_assert(fabs(dot - 0.657117701533133) < 1e-14, "Incorrect dot (1)"); + + kernelparam[0] = 5.0; + dot = gensvm_kernel_dot_rbf(a, b, kernelparam, 5); + mu_assert(fabs(dot - 0.122522495044048) < 1e-14, "Incorrect dot (2)"); + + // end test code // + free(a); + free(b); + free(kernelparam); + + return NULL; +} + +char *test_dot_poly() +{ + double dot; + double *a = Malloc(double, 5); + double *b = Malloc(double, 5); + double *kernelparam = Malloc(double, 3); + + a[0] = 0.5203363837176203; + a[1] = 0.3860628599460129; + a[2] = 0.3592536954640216; + a[3] = 0.6824659760765744; + a[4] = 0.5390520090020700; + + b[0] = 0.1782643262351465; + b[1] = 0.0314270210724957; + b[2] = 0.5887219369641497; + b[3] = 0.7710042954911620; + b[4] = 0.8805451245738238; + + // start test code // + kernelparam[0] = 1.0; + kernelparam[1] = 1.0; + kernelparam[2] = 1.0; + dot = gensvm_kernel_dot_poly(a, b, kernelparam, 5); + mu_assert(fabs(dot - 2.31723456944910) < 1e-14, "Incorrect dot (1)"); + + kernelparam[0] = 1.5; + kernelparam[1] = 2.5; + kernelparam[2] = 3.5; + dot = gensvm_kernel_dot_poly(a, b, kernelparam, 5); + mu_assert(fabs(dot - 189.6989652572890179) < 1e-14, "Incorrect dot (2)"); + + // end test code // + free(a); + free(b); + free(kernelparam); + return NULL; +} + +char *test_dot_sigmoid() +{ + double dot; + double *a = Malloc(double, 5); + double *b = Malloc(double, 5); + double *kernelparam = Malloc(double, 2); + + a[0] = 0.5203363837176203; + a[1] = 0.3860628599460129; + a[2] = 0.3592536954640216; + a[3] = 0.6824659760765744; + a[4] = 0.5390520090020700; + + b[0] = 0.1782643262351465; + b[1] = 0.0314270210724957; + b[2] = 0.5887219369641497; + b[3] = 0.7710042954911620; + b[4] = 0.8805451245738238; + + // start test code // + kernelparam[0] = 1.0; + kernelparam[1] = 1.0; + dot = gensvm_kernel_dot_sigmoid(a, b, kernelparam, 5); + mu_assert(fabs(dot - 0.9807642810850747) < 1e-14, "Incorrect dot (1)"); + + kernelparam[0] = 1.5; + kernelparam[1] = 2.5; + dot = gensvm_kernel_dot_sigmoid(a, b, kernelparam, 5); + mu_assert(fabs(dot - 0.9997410009167159) < 1e-14, "Incorrect dot (2)"); + + // end test code // + free(a); + free(b); + free(kernelparam); + + return NULL; +} + +char *all_tests() +{ + mu_suite_start(); + mu_run_test(test_dot_rbf); + mu_run_test(test_dot_poly); + mu_run_test(test_dot_sigmoid); + + return NULL; +} + +RUN_TESTS(all_tests); -- cgit v1.2.3