diff options
| -rw-r--r-- | include/gensvm_base.h | 5 | ||||
| -rw-r--r-- | include/gensvm_io.h | 1 | ||||
| -rw-r--r-- | include/gensvm_optimize.h | 25 | ||||
| -rw-r--r-- | include/gensvm_sparse.h | 80 | ||||
| -rw-r--r-- | include/gensvm_update.h | 55 | ||||
| -rw-r--r-- | src/GenSVMtraintest.c | 5 | ||||
| -rw-r--r-- | src/gensvm_base.c | 4 | ||||
| -rw-r--r-- | src/gensvm_init.c | 63 | ||||
| -rw-r--r-- | src/gensvm_io.c | 10 | ||||
| -rw-r--r-- | src/gensvm_optimize.c | 521 | ||||
| -rw-r--r-- | src/gensvm_sparse.c | 181 | ||||
| -rw-r--r-- | src/gensvm_update.c | 592 | ||||
| -rw-r--r-- | tests/data/test_file_read_data_sparse.txt | 12 | ||||
| -rw-r--r-- | tests/src/test_gensvm_init.c | 66 | ||||
| -rw-r--r-- | tests/src/test_gensvm_io.c | 97 | ||||
| -rw-r--r-- | tests/src/test_gensvm_optimize.c | 754 | ||||
| -rw-r--r-- | tests/src/test_gensvm_sparse.c | 128 | ||||
| -rw-r--r-- | tests/src/test_gensvm_update.c | 841 |
18 files changed, 2164 insertions, 1276 deletions
diff --git a/include/gensvm_base.h b/include/gensvm_base.h index 03b7ffa..986e2a5 100644 --- a/include/gensvm_base.h +++ b/include/gensvm_base.h @@ -14,7 +14,7 @@ #define GENSVM_BASE_H // includes -#include "gensvm_globals.h" +#include "gensvm_sparse.h" // type declarations @@ -26,6 +26,7 @@ * @param m number of predictors * @param *y pointer to vector of class labels * @param *Z pointer to augmented data matrix + * @param *spZ pointer to the sparse augmented data matrix * @param *RAW pointer to augmented raw data matrix * @param *J pointer to regularization vector * @param kerneltype kerneltype used in GenData::Z @@ -46,6 +47,8 @@ struct GenData { double *Z; ///< augmented data matrix (either equal to RAW or to the eigenvectors ///< of the kernel matrix) + struct GenSparse *spZ; + ///< sparse representation of the augmented data matrix double *RAW; ///< augmented raw data matrix double *Sigma; diff --git a/include/gensvm_io.h b/include/gensvm_io.h index 9b0d973..3f34c1a 100644 --- a/include/gensvm_io.h +++ b/include/gensvm_io.h @@ -14,6 +14,7 @@ // includes #include "gensvm_base.h" +#include "gensvm_print.h" #include "gensvm_strutil.h" // function declarations diff --git a/include/gensvm_optimize.h b/include/gensvm_optimize.h index b342d6b..93f6676 100644 --- a/include/gensvm_optimize.h +++ b/include/gensvm_optimize.h @@ -2,7 +2,7 @@ * @file gensvm_optimize.h * @author Gertjan van den Burg * @date August, 2013 - * @brief Header file for gensvm_train.c + * @brief Header file for gensvm_optimize.c * * @details * Contains function declarations for functions used to train a single @@ -14,33 +14,20 @@ #define GENSVM_OPTIMIZE_H #include "gensvm_sv.h" -#include "gensvm_print.h" #include "gensvm_simplex.h" +#include "gensvm_update.h" // function declarations void gensvm_optimize(struct GenModel *model, struct GenData *data); double gensvm_get_loss(struct GenModel *model, struct GenData *data, struct GenWork *work); - -double gensvm_calculate_omega(struct GenModel *model, struct GenData *data, - long i); -bool gensvm_majorize_is_simple(struct GenModel *model, struct GenData *data, - long i); -void gensvm_calculate_ab_non_simple(struct GenModel *model, long i, long j, - double *a, double *b_aq); -void gensvm_calculate_ab_simple(struct GenModel *model, long i, long j, - double *a, double *b_aq); -double gensvm_get_alpha_beta(struct GenModel *model, struct GenData *data, - long i, double *beta); - -void gensvm_get_update(struct GenModel *model, struct GenData *data, - struct GenWork *work); void gensvm_calculate_errors(struct GenModel *model, struct GenData *data, double *ZV); +void gensvm_calculate_ZV_dense(struct GenModel *model, struct GenData *data, + double *ZV); +void gensvm_calculate_ZV_sparse(struct GenModel *model, struct GenData *data, + double *ZV); void gensvm_calculate_huber(struct GenModel *model); void gensvm_step_doubling(struct GenModel *model); -int dposv(char UPLO, int N, int NRHS, double *A, int LDA, double *B, int LDB); -int dsysv(char UPLO, int N, int NRHS, double *A, int LDA, int *IPIV, double *B, - int LDB, double *WORK, int LWORK); #endif diff --git a/include/gensvm_sparse.h b/include/gensvm_sparse.h new file mode 100644 index 0000000..bac576a --- /dev/null +++ b/include/gensvm_sparse.h @@ -0,0 +1,80 @@ +/** + * @file gensvm_sparse.h + * @author Gertjan van den Burg + * @date 2016-10-11 + * @brief Header file for gensvm_sparse.c + * + * @details + * Contains declarations of the GenSparse structure and related functions. + * + * @copyright + + This program is free software; you can redistribute it and/or + modify it under the terms of the GNU General Public License + as published by the Free Software Foundation; either version 2 + of the License, or (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + + */ + +#ifndef GENSVM_SPARSE_H +#define GENSVM_SPARSE_H + +// includes +#include "gensvm_globals.h" + +// type declarations + +/** + * @brief A structure to represent a sparse matrix in CSR format + * + * @details + * This structure holds a sparse matrix in the classic CSR format. Refer to + * <a href="https://en.wikipedia.org/wiki/Sparse_matrix">Wikipedia</a> for + * more details. The total storage requirement for this format is + * 2*nnz+n_row+1, so it only makes sense to use this format if the number of + * nonzeros is smaller than @f$(n(m - 1) - 1)/2@f$. + * + * @note + * We use @f$n@f$ for rows, and @f$m@f$ for columns, to conform to GenSVM + * notation. This is in exact contrast with the Wikipedia page. + * + * @param nnz number of nonzero elements + * @param n_row rows of the matrix + * @param n_col columns of the matrix + * @param values nonzero values (length nnz) + * @param ia row indices (length n+1) + * @param ja column indices (length nnz) + */ +struct GenSparse { + long nnz; + ///< number of nonzero elements + long n_row; + ///< number of rows of the original matrix + long n_col; + ///< number of columns of the original matrix + + double *values; + ///< actual nonzero values, should be of length nnz + int *ia; + ///< cumulative row lengths, should be of length n+1 + int *ja; + ///< column indices, should be of length nnz +}; + +struct GenSparse *gensvm_init_sparse(); +void gensvm_free_sparse(struct GenSparse *sp); +long gensvm_count_nnz(double *A, long rows, long cols); +bool gensvm_could_sparse(double *A, long rows, long cols); +struct GenSparse *gensvm_dense_to_sparse(double *A, long rows, long cols); +double *gensvm_sparse_to_dense(struct GenSparse *A); + +#endif diff --git a/include/gensvm_update.h b/include/gensvm_update.h new file mode 100644 index 0000000..9242d86 --- /dev/null +++ b/include/gensvm_update.h @@ -0,0 +1,55 @@ +/** + * @file gensvm_update.h + * @author Gertjan van den Burg + * @date 2016-10-14 + * @brief Header file for gensvm_update.c + + * Copyright (C) + + This program is free software; you can redistribute it and/or + modify it under the terms of the GNU General Public License + as published by the Free Software Foundation; either version 2 + of the License, or (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + + */ + +#ifndef GENSVM_UPDATE_H +#define GENSVM_UPDATE_H + +#include "gensvm_base.h" +#include "gensvm_print.h" + +// function declarations +double gensvm_calculate_omega(struct GenModel *model, struct GenData *data, + long i); +bool gensvm_majorize_is_simple(struct GenModel *model, struct GenData *data, + long i); +void gensvm_calculate_ab_non_simple(struct GenModel *model, long i, long j, + double *a, double *b_aq); +void gensvm_calculate_ab_simple(struct GenModel *model, long i, long j, + double *a, double *b_aq); +double gensvm_get_alpha_beta(struct GenModel *model, struct GenData *data, + long i, double *beta); +void gensvm_get_update(struct GenModel *model, struct GenData *data, + struct GenWork *work); +void gensvm_get_ZAZ_ZB_dense(struct GenModel *model, struct GenData *data, + struct GenWork *work); +void gensvm_get_ZAZ_ZB_sparse(struct GenModel *model, struct GenData *data, + struct GenWork *work); +void gensvm_get_ZAZ_ZB(struct GenModel *model, struct GenData *data, + struct GenWork *work); +int dposv(char UPLO, int N, int NRHS, double *A, int LDA, double *B, + int LDB); +int dsysv(char UPLO, int N, int NRHS, double *A, int LDA, int *IPIV, + double *B, int LDB, double *WORK, int LWORK); + +#endif diff --git a/src/GenSVMtraintest.c b/src/GenSVMtraintest.c index f70fa97..cc0284e 100644 --- a/src/GenSVMtraintest.c +++ b/src/GenSVMtraintest.c @@ -83,7 +83,10 @@ int main(int argc, char **argv) strcpy(model->data_file, training_inputfile); // seed the random number generator - srand(time(NULL)); + //srand(time(NULL)); + // + // XXX temporary + srand(2135); // load a seed model from file if it is specified if (gensvm_check_argv_eq(argc, argv, "-s")) { diff --git a/src/gensvm_base.c b/src/gensvm_base.c index cef0a3c..e4fc20a 100644 --- a/src/gensvm_base.c +++ b/src/gensvm_base.c @@ -30,6 +30,7 @@ struct GenData *gensvm_init_data() data->Sigma = NULL; data->y = NULL; data->Z = NULL; + data->spZ = NULL; data->RAW = NULL; // set default values @@ -54,6 +55,9 @@ void gensvm_free_data(struct GenData *data) if (data == NULL) return; + if (data->spZ != NULL) + gensvm_free_sparse(data->spZ); + if (data->Z == data->RAW) { free(data->Z); } else { diff --git a/src/gensvm_init.c b/src/gensvm_init.c index aa96ca6..e0f44c1 100644 --- a/src/gensvm_init.c +++ b/src/gensvm_init.c @@ -33,30 +33,67 @@ void gensvm_init_V(struct GenModel *from_model, struct GenModel *to_model, struct GenData *data) { - long i, j, k; + long i, j, k, jj_start, jj_end, jj; double cmin, cmax, value, rnd; + double *col_min = NULL, + *col_max = NULL; long n = data->n; long m = data->m; long K = data->K; if (from_model == NULL) { - for (i=0; i<m+1; i++) { - cmin = 1e100; - cmax = -1e100; - for (k=0; k<n; k++) { - value = matrix_get(data->Z, m+1, k, i); - cmin = minimum(cmin, value); - cmax = maximum(cmax, value); + col_min = Calloc(double, m+1); + col_max = Calloc(double, m+1); + for (j=0; j<m+1; j++) { + col_min[j] = 1.0e100; + col_max[j] = -1.0e100; + } + + if (data->Z == NULL) { + // sparse matrix + int *visit_count = Calloc(int, m+1); + for (i=0; i<n; i++) { + jj_start = data->spZ->ia[i]; + jj_end = data->spZ->ia[i+1]; + for (jj=jj_start; jj<jj_end; jj++) { + j = data->spZ->ja[jj]; + value = data->spZ->values[jj]; + + col_min[j] = minimum(col_min[j], value); + col_max[j] = maximum(col_max[j], value); + visit_count[j]++; + } } - for (j=0; j<K-1; j++) { - cmin = (abs(cmin) < 1e-10) ? -1 : cmin; - cmax = (abs(cmax) < 1e-10) ? 1 : cmax; - rnd = ((double) rand()) / RAND_MAX; + // correction in case the minimum or maximum is 0 + for (j=0; j<m+1; j++) { + if (visit_count[j] < n) { + col_min[j] = minimum(col_min[j], 0.0); + col_max[j] = maximum(col_max[j], 0.0); + } + } + free(visit_count); + } else { + // dense matrix + for (i=0; i<n; i++) { + for (j=0; j<m+1; j++) { + value = matrix_get(data->Z, m+1, i, j); + col_min[j] = minimum(col_min[j], value); + col_max[j] = maximum(col_max[j], value); + } + } + } + for (j=0; j<m+1; j++) { + cmin = (fabs(col_min[j]) < 1e-10) ? -1 : col_min[j]; + cmax = (fabs(col_max[j]) < 1e-10) ? 1 : col_max[j]; + for (k=0; k<K-1; k++) { + rnd = ((double) rand()) / ((double) RAND_MAX); value = 1.0/cmin + (1.0/cmax - 1.0/cmin)*rnd; - matrix_set(to_model->V, K-1, i, j, value); + matrix_set(to_model->V, K-1, j, k, value); } } + free(col_min); + free(col_max); } else { for (i=0; i<m+1; i++) for (j=0; j<K-1; j++) { diff --git a/src/gensvm_io.c b/src/gensvm_io.c index a574654..a9ab734 100644 --- a/src/gensvm_io.c +++ b/src/gensvm_io.c @@ -12,7 +12,6 @@ */ #include <limits.h> #include "gensvm_io.h" -#include "gensvm_print.h" /** * @brief Read data from file @@ -139,6 +138,15 @@ void gensvm_read_data(struct GenData *dataset, char *data_file) dataset->K = K; dataset->Z = dataset->RAW; + if (gensvm_could_sparse(dataset->Z, n, m+1)) { + note("Converting to sparse ... "); + dataset->spZ = gensvm_dense_to_sparse(dataset->Z, n, m+1); + note("done.\n"); + free(dataset->RAW); + dataset->RAW = NULL; + dataset->Z = NULL; + } + free(uniq_y); } diff --git a/src/gensvm_optimize.c b/src/gensvm_optimize.c index e517332..e83fa47 100644 --- a/src/gensvm_optimize.c +++ b/src/gensvm_optimize.c @@ -167,395 +167,6 @@ double gensvm_get_loss(struct GenModel *model, struct GenData *data, } /** - * @brief Calculate the value of omega for a single instance - * - * @details - * This function calculates the value of the @f$ \omega_i @f$ variable for a - * single instance, where - * @f[ - * \omega_i = \frac{1}{p} \left( \sum_{j \neq y_i} h^p\left( - * \overline{q}_i^{(y_i j)} \right) \right)^{1/p-1} - * @f] - * Note that his function uses the precalculated values from GenModel::H and - * GenModel::R to speed up the computation. - * - * @param[in] model GenModel structure with the current model - * @param[in] i index of the instance for which to calculate omega - * @returns the value of omega for instance i - * - */ -double gensvm_calculate_omega(struct GenModel *model, struct GenData *data, - long i) -{ - long j; - double h, omega = 0.0, - p = model->p; - - for (j=0; j<model->K; j++) { - if (j == (data->y[i]-1)) - continue; - h = matrix_get(model->H, model->K, i, j); - omega += pow(h, p); - } - omega = (1.0/p)*pow(omega, 1.0/p - 1.0); - - return omega; -} - -/** - * @brief Check if we can do simple majorization for a given instance - * - * @details - * A simple majorization is possible if at most one of the Huberized hinge - * errors is nonzero for an instance. This is checked here. For this we - * compute the product of the Huberized error for all @f$j \neq y_i@f$ and - * check if strictly less than 2 are nonzero. See also the @ref update_math. - * - * @param[in] model GenModel structure with the current model - * @param[in] i index of the instance for which to check - * @returns whether or not we can do simple majorization - * - */ -bool gensvm_majorize_is_simple(struct GenModel *model, struct GenData *data, - long i) -{ - long j; - double h, value = 0; - for (j=0; j<model->K; j++) { - if (j == (data->y[i]-1)) - continue; - h = matrix_get(model->H, model->K, i, j); - value += (h > 0) ? 1 : 0; - if (value > 1) - return false; - } - return true; -} - -/** - * @brief Compute majorization coefficients for non-simple instance - * - * @details - * In this function we compute the majorization coefficients needed for an - * instance with a non-simple majorization (@f$\varepsilon_i = 0@f$). In this - * function, we distinguish a number of cases depending on the value of - * GenModel::p and the respective value of @f$\overline{q}_i^{(y_ij)}@f$. Note - * that the linear coefficient is of the form @f$b - a\overline{q}@f$, but - * often the second term is included in the definition of @f$b@f$, so it can - * be optimized out. The output argument \p b_aq contains this difference - * therefore in one go. More details on this function can be found in the @ref - * update_math. See also gensvm_calculate_ab_simple(). - * - * @param[in] model GenModel structure with the current model - * @param[in] i index for the instance - * @param[in] j index for the class - * @param[out] *a output argument for the quadratic coefficient - * @param[out] *b_aq output argument for the linear coefficient. - * - */ -void gensvm_calculate_ab_non_simple(struct GenModel *model, long i, long j, - double *a, double *b_aq) -{ - double q = matrix_get(model->Q, model->K, i, j); - double p = model->p; - double kappa = model->kappa; - const double a2g2 = 0.25*p*(2.0*p - 1.0)*pow((kappa+1.0)/2.0,p-2.0); - - if (2.0 - model->p < 1e-2) { - if (q <= - kappa) { - *b_aq = 0.5 - kappa/2.0 - q; - } else if ( q <= 1.0) { - *b_aq = pow(1.0 - q, 3.0)/(2.0*pow(kappa + 1.0, 2.0)); - } else { - *b_aq = 0; - } - *a = 1.5; - } else { - if (q <= (p + kappa - 1.0)/(p - 2.0)) { - *a = 0.25*pow(p, 2.0)*pow(0.5 - kappa/2.0 - q, p - 2.0); - } else if (q <= 1.0) { - *a = a2g2; - } else { - *a = 0.25*pow(p, 2.0)*pow((p/(p - 2.0))*(0.5 - - kappa/2.0 - q), p - 2.0); - *b_aq = (*a)*(2.0*q + kappa - 1.0)/(p - 2.0) + - 0.5*p*pow(p/(p - 2.0)*(0.5 - kappa/2.0 - q), - p - 1.0); - } - if (q <= -kappa) { - *b_aq = 0.5*p*pow(0.5 - kappa/2.0 - q, p - 1.0); - } else if ( q <= 1.0) { - *b_aq = p*pow(1.0 - q, 2.0*p - 1.0)/pow(2*kappa+2.0, p); - } - } -} - -/** - * @brief Compute majorization coefficients for simple instances - * - * @details - * In this function we compute the majorization coefficients needed for an - * instance with a simple majorization. This corresponds to the non-simple - * majorization for the case where GenModel::p equals 1. Due to this condition - * the majorization coefficients are quite simple to compute. Note that the - * linear coefficient of the majorization is of the form @f$b - - * a\overline{q}@f$, but often the second term is included in the definition - * of @f$b@f$, so it can be optimized out. For more details see the @ref - * update_math, and gensvm_calculate_ab_non_simple(). - * - * @param[in] model GenModel structure with the current model - * @param[in] i index for the instance - * @param[in] j index for the class - * @param[out] *a output argument for the quadratic coefficient - * @param[out] *b_aq output argument for the linear coefficient - * - */ -void gensvm_calculate_ab_simple(struct GenModel *model, long i, long j, - double *a, double *b_aq) -{ - double q = matrix_get(model->Q, model->K, i, j); - - if (q <= - model->kappa) { - *a = 0.25/(0.5 - model->kappa/2.0 - q); - *b_aq = 0.5; - } else if (q <= 1.0) { - *a = 1.0/(2.0*model->kappa + 2.0); - *b_aq = (1.0 - q)*(*a); - } else { - *a = -0.25/(0.5 - model->kappa/2.0 - q); - *b_aq = 0; - } -} - -/** - * @brief Compute the alpha_i and beta_i for an instance - * - * @details - * This computes the @f$\alpha_i@f$ value for an instance, and simultaneously - * updating the row of the B matrix corresponding to that - * instance (the @f$\boldsymbol{\beta}_i'@f$). The advantage of doing this at - * the same time is that we can compute the a and b values simultaneously in - * the gensvm_calculate_ab_simple() and gensvm_calculate_ab_non_simple() - * functions. - * - * The computation is done by first checking whether simple majorization is - * possible for this instance. If so, the @f$\omega_i@f$ value is set to 1.0, - * otherwise this value is computed. If simple majorization is possible, the - * coefficients a and b_aq are computed by gensvm_calculate_ab_simple(), - * otherwise they're computed by gensvm_calculate_ab_non_simple(). Next, the - * beta_i updated through the efficient BLAS daxpy function, and part of the - * value of @f$\alpha_i@f$ is computed. The final value of @f$\alpha_i@f$ is - * returned. - * - * @param[in] model GenModel structure with the current model - * @param[in] i index of the instance to update - * @param[out] beta beta vector of linear coefficients (assumed to - * be allocated elsewhere, initialized here) - * @returns the @f$\alpha_i@f$ value of this instance - * - */ -double gensvm_get_alpha_beta(struct GenModel *model, struct GenData *data, - long i, double *beta) -{ - bool simple; - long j, - K = model->K; - double omega, a, b_aq = 0.0, - alpha = 0.0; - double *uu_row = NULL; - const double in = 1.0/((double) model->n); - - simple = gensvm_majorize_is_simple(model, data, i); - omega = simple ? 1.0 : gensvm_calculate_omega(model, data, i); - - Memset(beta, double, K-1); - for (j=0; j<K; j++) { - // skip the class y_i = k - if (j == (data->y[i]-1)) - continue; - - // calculate the a_ijk and (b_ijk - a_ijk q_i^(kj)) values - if (simple) { - gensvm_calculate_ab_simple(model, i, j, &a, &b_aq); - } else { - gensvm_calculate_ab_non_simple(model, i, j, &a, &b_aq); - } - - // daxpy on beta and UU - // daxpy does: y = a*x + y - // so y = beta, UU_row = x, a = factor - b_aq *= model->rho[i] * omega * in; - uu_row = &model->UU[((data->y[i]-1)*K+j)*(K-1)]; - cblas_daxpy(K-1, b_aq, uu_row, 1, beta, 1); - - // increment Avalue - alpha += a; - } - alpha *= omega * model->rho[i] * in; - return alpha; -} - -/** - * @brief Perform a single step of the majorization algorithm to update V - * - * @details - * This function contains the main update calculations of the algorithm. These - * calculations are necessary to find a new update V. The calculations exist of - * recalculating the majorization coefficients for all instances and all - * classes, and solving a linear system to find V. - * - * Because the function gensvm_get_update() is always called after a call to - * gensvm_get_loss() with the same GenModel::V, it is unnecessary to calculate - * the updated errors GenModel::Q and GenModel::H here too. This saves on - * computation time. - * - * In calculating the majorization coefficients we calculate the elements of a - * diagonal matrix A with elements - * @f[ - * A_{i, i} = \frac{1}{n} \rho_i \sum_{j \neq k} \left[ - * \varepsilon_i a_{ijk}^{(1)} + (1 - \varepsilon_i) \omega_i - * a_{ijk}^{(p)} \right], - * @f] - * where @f$ k = y_i @f$. - * Since this matrix is only used to calculate the matrix @f$ Z' A Z @f$, it - * is efficient to update a matrix ZAZ through consecutive rank 1 updates with - * a single element of A and the corresponding row of Z. The BLAS function - * dsyr is used for this. - * - * The B matrix is has rows - * @f[ - * \boldsymbol{\beta}_i' = \frac{1}{n} \rho_i \sum_{j \neq k} \left[ - * \varepsilon_i \left( b_{ijk}^{(1)} - a_{ijk}^{(1)} - * \overline{q}_i^{(kj)} \right) + (1 - \varepsilon_i) - * \omega_i \left( b_{ijk}^{(p)} - a_{ijk}^{(p)} - * \overline{q}_i^{(kj)} \right) \right] - * \boldsymbol{\delta}_{kj}' - * @f] - * This is also split into two cases, one for which @f$ \varepsilon_i = 1 @f$, - * and one for when it is 0. The 3D simplex difference matrix is used here, in - * the form of the @f$ \boldsymbol{\delta}_{kj}' @f$. - * - * Finally, the following system is solved - * @f[ - * (\textbf{Z}'\textbf{AZ} + \lambda \textbf{J})\textbf{V} = - * (\textbf{Z}'\textbf{AZ}\overline{\textbf{V}} + \textbf{Z}' - * \textbf{B}) - * @f] - * solving this system is done through dposv(). - * - * @todo - * Consider using CblasColMajor everywhere - * - * @param[in,out] model model to be updated - * @param[in] data data used in model - * @param[in] work allocated workspace to use - */ -void gensvm_get_update(struct GenModel *model, struct GenData *data, - struct GenWork *work) -{ - int status; - long i, j; - double alpha, sqalpha; - - long n = model->n; - long m = model->m; - long K = model->K; - - gensvm_reset_work(work); - - // generate Z'*A*Z and Z'*B by rank 1 operations - for (i=0; i<n; i++) { - alpha = gensvm_get_alpha_beta(model, data, i, work->beta); - - // calculate row of matrix LZ, which is a scalar - // multiplication of sqrt(alpha_i) and row z_i' of Z - // Note that we use the fact that the first column of Z is - // always 1, by only computing the product for m values and - // copying the first element over. - sqalpha = sqrt(alpha); - work->LZ[i*(m+1)] = sqalpha; - cblas_daxpy(m, sqalpha, &data->Z[i*(m+1)+1], 1, - &work->LZ[i*(m+1)+1], 1); - - // rank 1 update of matrix Z'*B - // Note: LDA is the second dimension of ZB because of - // Row-Major order - cblas_dger(CblasRowMajor, m+1, K-1, 1, &data->Z[i*(m+1)], 1, - work->beta, 1, work->ZB, K-1); - } - - // calculate Z'*A*Z by symmetric multiplication of LZ with itself - // (ZAZ = (LZ)' * (LZ) - cblas_dsyrk(CblasRowMajor, CblasUpper, CblasTrans, m+1, n, 1.0, - work->LZ, m+1, 0.0, work->ZAZ, m+1); - - // Calculate right-hand side of system we want to solve - // dsymm performs ZB := 1.0 * (ZAZ) * Vbar + 1.0 * ZB - // the right-hand side is thus stored in ZB after this call - // Note: LDB and LDC are second dimensions of the matrices due to - // Row-Major order - cblas_dsymm(CblasRowMajor, CblasLeft, CblasUpper, m+1, K-1, 1, - work->ZAZ, m+1, model->V, K-1, 1.0, work->ZB, K-1); - - // Calculate left-hand side of system we want to solve - // Add lambda to all diagonal elements except the first one. Recall - // that ZAZ is of size m+1 and is symmetric. - for (i=m+2; i<=m*(m+2); i+=m+2) - work->ZAZ[i] += model->lambda; - - // Lapack uses column-major order, so we transform the ZB matrix to - // correspond to this. - for (i=0; i<m+1; i++) - for (j=0; j<K-1; j++) - work->ZBc[j*(m+1)+i] = work->ZB[i*(K-1)+j]; - - // Solve the system using dposv. Note that above the upper triangular - // part has always been used in row-major order for ZAZ. This - // corresponds to the lower triangular part in column-major order. - status = dposv('L', m+1, K-1, work->ZAZ, m+1, work->ZBc, m+1); - - // Use dsysv as fallback, for when the ZAZ matrix is not positive - // semi-definite for some reason (perhaps due to rounding errors). - // This step shouldn't be necessary but is included for safety. - if (status != 0) { - err("[GenSVM Warning]: Received nonzero status from " - "dposv: %i\n", status); - int *IPIV = Malloc(int, m+1); - double *WORK = Malloc(double, 1); - status = dsysv('L', m+1, K-1, work->ZAZ, m+1, IPIV, work->ZBc, - m+1, WORK, -1); - - int LWORK = WORK[0]; - WORK = Realloc(WORK, double, LWORK); - status = dsysv('L', m+1, K-1, work->ZAZ, m+1, IPIV, work->ZBc, - m+1, WORK, LWORK); - if (status != 0) - err("[GenSVM Warning]: Received nonzero " - "status from dsysv: %i\n", status); - free(WORK); - WORK = NULL; - free(IPIV); - IPIV = NULL; - } - - // the solution is now stored in ZBc, in column-major order. Here we - // convert this back to row-major order - for (i=0; i<m+1; i++) - for (j=0; j<K-1; j++) - work->ZB[i*(K-1)+j] = work->ZBc[j*(m+1)+i]; - - // copy the old V to Vbar and the new solution to V - for (i=0; i<m+1; i++) { - for (j=0; j<K-1; j++) { - matrix_set(model->Vbar, K-1, i, j, - matrix_get(model->V, K-1, i, j)); - matrix_set(model->V, K-1, i, j, - matrix_get(work->ZB, K-1, i, j)); - } - } -} - -/** * @brief Use step doubling * * @details @@ -644,24 +255,12 @@ void gensvm_calculate_errors(struct GenModel *model, struct GenData *data, double q, *uu_row = NULL; long n = model->n; - long m = model->m; long K = model->K; - cblas_dgemm( - CblasRowMajor, - CblasNoTrans, - CblasNoTrans, - n, - K-1, - m+1, - 1.0, - data->Z, - m+1, - model->V, - K-1, - 0, - ZV, - K-1); + if (data->spZ == NULL) + gensvm_calculate_ZV_dense(model, data, ZV); + else + gensvm_calculate_ZV_sparse(model, data, ZV); for (i=0; i<n; i++) { for (j=0; j<K; j++) { @@ -674,87 +273,41 @@ void gensvm_calculate_errors(struct GenModel *model, struct GenData *data, } } -/** - * @brief Solve AX = B where A is symmetric positive definite. - * - * @details - * Solve a linear system of equations AX = B where A is symmetric positive - * definite. This function is a wrapper for the external LAPACK routine - * dposv. - * - * @param[in] UPLO which triangle of A is stored - * @param[in] N order of A - * @param[in] NRHS number of columns of B - * @param[in,out] A double precision array of size (LDA, N). On - * exit contains the upper or lower factor of the - * Cholesky factorization of A. - * @param[in] LDA leading dimension of A - * @param[in,out] B double precision array of size (LDB, NRHS). On - * exit contains the N-by-NRHS solution matrix X. - * @param[in] LDB the leading dimension of B - * @returns info parameter which contains the status of the - * computation: - * - =0: success - * - <0: if -i, the i-th argument had - * an illegal value - * - >0: if i, the leading minor of A - * was not positive definite - * - * See the LAPACK documentation at: - * http://www.netlib.org/lapack/explore-html/dc/de9/group__double_p_osolve.html - */ -int dposv(char UPLO, int N, int NRHS, double *A, int LDA, double *B, - int LDB) +void gensvm_calculate_ZV_sparse(struct GenModel *model, + struct GenData *data, double *ZV) { - extern void dposv_(char *UPLO, int *Np, int *NRHSp, double *A, - int *LDAp, double *B, int *LDBp, int *INFOp); - int INFO; - dposv_(&UPLO, &N, &NRHS, A, &LDA, B, &LDB, &INFO); - return INFO; + long i, j, jj, jj_start, jj_end, K, + n_row = data->spZ->n_row; + double z_ij; + + K = model->K; + + int *Zia = data->spZ->ia; + int *Zja = data->spZ->ja; + double *vals = data->spZ->values; + + for (i=0; i<n_row; i++) { + jj_start = Zia[i]; + jj_end = Zia[i+1]; + + for (jj=jj_start; jj<jj_end; jj++) { + j = Zja[jj]; + z_ij = vals[jj]; + + cblas_daxpy(K-1, z_ij, &model->V[j*(K-1)], 1, + &ZV[i*(K-1)], 1); + } + } } -/** - * @brief Solve a system of equations AX = B where A is symmetric. - * - * @details - * Solve a linear system of equations AX = B where A is symmetric. This - * function is a wrapper for the external LAPACK routine dsysv. - * - * @param[in] UPLO which triangle of A is stored - * @param[in] N order of A - * @param[in] NRHS number of columns of B - * @param[in,out] A double precision array of size (LDA, N). On - * exit contains the block diagonal matrix D and - * the multipliers used to obtain the factor U or - * L from the factorization A = U*D*U**T or - * A = L*D*L**T. - * @param[in] LDA leading dimension of A - * @param[in] IPIV integer array containing the details of D - * @param[in,out] B double precision array of size (LDB, NRHS). On - * exit contains the N-by-NRHS matrix X - * @param[in] LDB leading dimension of B - * @param[out] WORK double precision array of size max(1,LWORK). On - * exit, WORK(1) contains the optimal LWORK - * @param[in] LWORK the length of WORK, can be used for determining - * the optimal blocksize for dsystrf. - * @returns info parameter which contains the status of the - * computation: - * - =0: success - * - <0: if -i, the i-th argument had an - * illegal value - * - >0: if i, D(i, i) is exactly zero, - * no solution can be computed. - * - * See the LAPACK documentation at: - * http://www.netlib.org/lapack/explore-html/d6/d0e/group__double_s_ysolve.html - */ -int dsysv(char UPLO, int N, int NRHS, double *A, int LDA, int *IPIV, - double *B, int LDB, double *WORK, int LWORK) +void gensvm_calculate_ZV_dense(struct GenModel *model, + struct GenData *data, double *ZV) { - extern void dsysv_(char *UPLO, int *Np, int *NRHSp, double *A, - int *LDAp, int *IPIV, double *B, int *LDBp, - double *WORK, int *LWORK, int *INFOp); - int INFO; - dsysv_(&UPLO, &N, &NRHS, A, &LDA, IPIV, B, &LDB, WORK, &LWORK, &INFO); - return INFO; + long n = model->n; + long m = model->m; + long K = model->K; + + cblas_dgemm(CblasRowMajor, CblasNoTrans, CblasNoTrans, n, K-1, m+1, + 1.0, data->Z, m+1, model->V, K-1, 0, ZV, K-1); } + diff --git a/src/gensvm_sparse.c b/src/gensvm_sparse.c new file mode 100644 index 0000000..ebc2c9d --- /dev/null +++ b/src/gensvm_sparse.c @@ -0,0 +1,181 @@ +/** + * @file gensvm_sparse.c + * @author Gertjan van den Burg + * @date October 11, 2016 + * @brief Functions for dealing with sparse data matrices + * + */ + +#include "gensvm_sparse.h" + +/** + * @brief Initialize a GenSparse structure + * + * @details + * A GenSparse structure is used to hold a sparse data matrix. We work with + * Compressed Row Storage (CSR) storage, also known as old Yale format. + * + * @return initialized GenSparse instance + */ +struct GenSparse *gensvm_init_sparse() +{ + struct GenSparse *sp = Malloc(struct GenSparse, 1); + sp->nnz = 0; + sp->n_row = 0; + sp->n_col = 0; + + sp->values = NULL; + sp->ia = NULL; + sp->ja = NULL; + + return sp; +} + +/** + * @brief Free an allocated GenSparse structure + * + * @details + * Simply free a previously allocated GenSparse structure by freeing all of + * its components. Finally, the structure itself is freed, and the pointer is + * set to NULL for safety. + * + * @param[in] sp GenSparse structure to free + */ +void gensvm_free_sparse(struct GenSparse *sp) +{ + free(sp->values); + free(sp->ia); + free(sp->ja); + free(sp); + sp = NULL; +} + +/** + * @brief Count the number of nonzeros in a matrix + * + * @details + * This is a utility function to count the number of nonzeros in a dense + * matrix. This is simply done by comparing with 0.0. + * + * @param[in] A a dense matrix (RowMajor order) + * @param[in] rows the number of rows of A + * @param[in] cols the number of columns of A + * + * @return the number of nonzeros in A + */ +long gensvm_count_nnz(double *A, long rows, long cols) +{ + long i, nnz = 0; + + for (i=0; i<rows*cols; i++) + nnz += (A[i] != 0.0) ? 1 : 0; + return nnz; +} + +/** + * @brief Check if it is worthwile to convert to a sparse matrix + * + * @details + * It is only worth to convert to a sparse matrix if the amount of sparsity is + * sufficient. For this to be the case, the number of nonzeros must be + * smaller than @f$(n(m - 1) - 1)/2@f$. This is tested here. If the amount of + * nonzero entries is small enough, the function returns the number of + * nonzeros. If it is too big, it returns -1. + * + * @param[in] A matrix in dense format (RowMajor order) + * @param[in] rows number of rows of A + * @param[in] cols number of columns of A + * + * @return + */ +bool gensvm_could_sparse(double *A, long rows, long cols) +{ + long nnz = gensvm_count_nnz(A, rows, cols); + + if (nnz < (rows*(cols-1.0)-1.0)/2.0) { + return true; + } + return false; +} + + +/** + * @brief Convert a dense matrix to a GenSparse structure if advantageous + * + * @details + * This utility function can be used to convert a dense matrix to a sparse + * matrix in the form of a GenSparse struture. Note that the allocated memory + * must be freed by the caller. The user should first check whether using a + * sparse matrix is worth it by calling gensvm_could_sparse(). + * + * @param[in] A a dense matrix in RowMajor order + * @param[in] rows number of rows of the matrix A + * @param[in] cols number of columns of the matrix A + * + * @return a GenSparse struct + */ +struct GenSparse *gensvm_dense_to_sparse(double *A, long rows, long cols) +{ + double value; + int row_cnt; + long i, j, cnt, nnz = 0; + struct GenSparse *spA = NULL; + + nnz = gensvm_count_nnz(A, rows, cols); + + spA = gensvm_init_sparse(); + + spA->nnz = nnz; + spA->n_row = rows; + spA->n_col = cols; + spA->values = Calloc(double, nnz); + spA->ia = Calloc(int, rows+1); + spA->ja = Calloc(int, nnz); + + cnt = 0; + spA->ia[0] = 0; + for (i=0; i<rows; i++) { + row_cnt = 0; + for (j=0; j<cols; j++) { + value = matrix_get(A, cols, i, j); + if (value != 0) { + row_cnt++; + spA->values[cnt] = value; + spA->ja[cnt] = j; + cnt++; + } + } + spA->ia[i+1] = spA->ia[i] + row_cnt; + } + + return spA; +} + +/** + * @brief Convert a GenSparse structure to a dense matrix + * + * @details + * This function converts a GenSparse structure back to a normal dense matrix + * in RowMajor order. Note that the allocated memory must be freed by the + * caller. + * + * @param[in] A a GenSparse structure + * + * @return a dense matrix + */ +double *gensvm_sparse_to_dense(struct GenSparse *A) +{ + double value; + long i, j, jj; + double *B = Calloc(double, (A->n_row)*(A->n_col)); + for (i=0; i<A->n_row; i++) { + for (jj=A->ia[i]; jj<A->ia[i+1]; jj++) { + j = A->ja[jj]; + value = A->values[jj]; + matrix_set(B, A->n_col, i, j, value); + } + } + + return B; +} + diff --git a/src/gensvm_update.c b/src/gensvm_update.c new file mode 100644 index 0000000..289f50c --- /dev/null +++ b/src/gensvm_update.c @@ -0,0 +1,592 @@ +/** + * @file gensvm_update.c + * @author Gertjan van den Burg + * @date 2016-10-14 + * @brief Functions for getting an update of the majorization algorithm + + * Copyright (C) + + This program is free software; you can redistribute it and/or + modify it under the terms of the GNU General Public License + as published by the Free Software Foundation; either version 2 + of the License, or (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + + */ + +#include "gensvm_update.h" + +/** + * @brief Calculate the value of omega for a single instance + * + * @details + * This function calculates the value of the @f$ \omega_i @f$ variable for a + * single instance, where + * @f[ + * \omega_i = \frac{1}{p} \left( \sum_{j \neq y_i} h^p\left( + * \overline{q}_i^{(y_i j)} \right) \right)^{1/p-1} + * @f] + * Note that his function uses the precalculated values from GenModel::H and + * GenModel::R to speed up the computation. + * + * @param[in] model GenModel structure with the current model + * @param[in] i index of the instance for which to calculate omega + * @returns the value of omega for instance i + * + */ +double gensvm_calculate_omega(struct GenModel *model, struct GenData *data, + long i) +{ + long j; + double h, omega = 0.0, + p = model->p; + + for (j=0; j<model->K; j++) { + if (j == (data->y[i]-1)) + continue; + h = matrix_get(model->H, model->K, i, j); + omega += pow(h, p); + } + omega = (1.0/p)*pow(omega, 1.0/p - 1.0); + + return omega; +} + +/** + * @brief Check if we can do simple majorization for a given instance + * + * @details + * A simple majorization is possible if at most one of the Huberized hinge + * errors is nonzero for an instance. This is checked here. For this we + * compute the product of the Huberized error for all @f$j \neq y_i@f$ and + * check if strictly less than 2 are nonzero. See also the @ref update_math. + * + * @param[in] model GenModel structure with the current model + * @param[in] i index of the instance for which to check + * @returns whether or not we can do simple majorization + * + */ +bool gensvm_majorize_is_simple(struct GenModel *model, struct GenData *data, + long i) +{ + long j; + double h, value = 0; + for (j=0; j<model->K; j++) { + if (j == (data->y[i]-1)) + continue; + h = matrix_get(model->H, model->K, i, j); + value += (h > 0) ? 1 : 0; + if (value > 1) + return false; + } + return true; +} + +/** + * @brief Compute majorization coefficients for non-simple instance + * + * @details + * In this function we compute the majorization coefficients needed for an + * instance with a non-simple majorization (@f$\varepsilon_i = 0@f$). In this + * function, we distinguish a number of cases depending on the value of + * GenModel::p and the respective value of @f$\overline{q}_i^{(y_ij)}@f$. Note + * that the linear coefficient is of the form @f$b - a\overline{q}@f$, but + * often the second term is included in the definition of @f$b@f$, so it can + * be optimized out. The output argument \p b_aq contains this difference + * therefore in one go. More details on this function can be found in the @ref + * update_math. See also gensvm_calculate_ab_simple(). + * + * @param[in] model GenModel structure with the current model + * @param[in] i index for the instance + * @param[in] j index for the class + * @param[out] *a output argument for the quadratic coefficient + * @param[out] *b_aq output argument for the linear coefficient. + * + */ +void gensvm_calculate_ab_non_simple(struct GenModel *model, long i, long j, + double *a, double *b_aq) +{ + double q = matrix_get(model->Q, model->K, i, j); + double p = model->p; + double kappa = model->kappa; + const double a2g2 = 0.25*p*(2.0*p - 1.0)*pow((kappa+1.0)/2.0,p-2.0); + + if (2.0 - model->p < 1e-2) { + if (q <= - kappa) { + *b_aq = 0.5 - kappa/2.0 - q; + } else if ( q <= 1.0) { + *b_aq = pow(1.0 - q, 3.0)/(2.0*pow(kappa + 1.0, 2.0)); + } else { + *b_aq = 0; + } + *a = 1.5; + } else { + if (q <= (p + kappa - 1.0)/(p - 2.0)) { + *a = 0.25*pow(p, 2.0)*pow(0.5 - kappa/2.0 - q, p - 2.0); + } else if (q <= 1.0) { + *a = a2g2; + } else { + *a = 0.25*pow(p, 2.0)*pow((p/(p - 2.0))*(0.5 - + kappa/2.0 - q), p - 2.0); + *b_aq = (*a)*(2.0*q + kappa - 1.0)/(p - 2.0) + + 0.5*p*pow(p/(p - 2.0)*(0.5 - kappa/2.0 - q), + p - 1.0); + } + if (q <= -kappa) { + *b_aq = 0.5*p*pow(0.5 - kappa/2.0 - q, p - 1.0); + } else if ( q <= 1.0) { + *b_aq = p*pow(1.0 - q, 2.0*p - 1.0)/pow(2*kappa+2.0, p); + } + } +} + +/** + * @brief Compute majorization coefficients for simple instances + * + * @details + * In this function we compute the majorization coefficients needed for an + * instance with a simple majorization. This corresponds to the non-simple + * majorization for the case where GenModel::p equals 1. Due to this condition + * the majorization coefficients are quite simple to compute. Note that the + * linear coefficient of the majorization is of the form @f$b - + * a\overline{q}@f$, but often the second term is included in the definition + * of @f$b@f$, so it can be optimized out. For more details see the @ref + * update_math, and gensvm_calculate_ab_non_simple(). + * + * @param[in] model GenModel structure with the current model + * @param[in] i index for the instance + * @param[in] j index for the class + * @param[out] *a output argument for the quadratic coefficient + * @param[out] *b_aq output argument for the linear coefficient + * + */ +void gensvm_calculate_ab_simple(struct GenModel *model, long i, long j, + double *a, double *b_aq) +{ + double q = matrix_get(model->Q, model->K, i, j); + + if (q <= - model->kappa) { + *a = 0.25/(0.5 - model->kappa/2.0 - q); + *b_aq = 0.5; + } else if (q <= 1.0) { + *a = 1.0/(2.0*model->kappa + 2.0); + *b_aq = (1.0 - q)*(*a); + } else { + *a = -0.25/(0.5 - model->kappa/2.0 - q); + *b_aq = 0; + } +} + +/** + * @brief Compute the alpha_i and beta_i for an instance + * + * @details + * This computes the @f$\alpha_i@f$ value for an instance, and simultaneously + * updating the row of the B matrix corresponding to that + * instance (the @f$\boldsymbol{\beta}_i'@f$). The advantage of doing this at + * the same time is that we can compute the a and b values simultaneously in + * the gensvm_calculate_ab_simple() and gensvm_calculate_ab_non_simple() + * functions. + * + * The computation is done by first checking whether simple majorization is + * possible for this instance. If so, the @f$\omega_i@f$ value is set to 1.0, + * otherwise this value is computed. If simple majorization is possible, the + * coefficients a and b_aq are computed by gensvm_calculate_ab_simple(), + * otherwise they're computed by gensvm_calculate_ab_non_simple(). Next, the + * beta_i updated through the efficient BLAS daxpy function, and part of the + * value of @f$\alpha_i@f$ is computed. The final value of @f$\alpha_i@f$ is + * returned. + * + * @param[in] model GenModel structure with the current model + * @param[in] i index of the instance to update + * @param[out] beta beta vector of linear coefficients (assumed to + * be allocated elsewhere, initialized here) + * @returns the @f$\alpha_i@f$ value of this instance + * + */ +double gensvm_get_alpha_beta(struct GenModel *model, struct GenData *data, + long i, double *beta) +{ + bool simple; + long j, + K = model->K; + double omega, a, b_aq = 0.0, + alpha = 0.0; + double *uu_row = NULL; + const double in = 1.0/((double) model->n); + + simple = gensvm_majorize_is_simple(model, data, i); + omega = simple ? 1.0 : gensvm_calculate_omega(model, data, i); + + Memset(beta, double, K-1); + for (j=0; j<K; j++) { + // skip the class y_i = k + if (j == (data->y[i]-1)) + continue; + + // calculate the a_ijk and (b_ijk - a_ijk q_i^(kj)) values + if (simple) { + gensvm_calculate_ab_simple(model, i, j, &a, &b_aq); + } else { + gensvm_calculate_ab_non_simple(model, i, j, &a, &b_aq); + } + + // daxpy on beta and UU + // daxpy does: y = a*x + y + // so y = beta, UU_row = x, a = factor + b_aq *= model->rho[i] * omega * in; + uu_row = &model->UU[((data->y[i]-1)*K+j)*(K-1)]; + cblas_daxpy(K-1, b_aq, uu_row, 1, beta, 1); + + // increment Avalue + alpha += a; + } + alpha *= omega * model->rho[i] * in; + return alpha; +} + +/** + * @brief Perform a single step of the majorization algorithm to update V + * + * @details + * This function contains the main update calculations of the algorithm. These + * calculations are necessary to find a new update V. The calculations exist of + * recalculating the majorization coefficients for all instances and all + * classes, and solving a linear system to find V. + * + * Because the function gensvm_get_update() is always called after a call to + * gensvm_get_loss() with the same GenModel::V, it is unnecessary to calculate + * the updated errors GenModel::Q and GenModel::H here too. This saves on + * computation time. + * + * In calculating the majorization coefficients we calculate the elements of a + * diagonal matrix A with elements + * @f[ + * A_{i, i} = \frac{1}{n} \rho_i \sum_{j \neq k} \left[ + * \varepsilon_i a_{ijk}^{(1)} + (1 - \varepsilon_i) \omega_i + * a_{ijk}^{(p)} \right], + * @f] + * where @f$ k = y_i @f$. + * Since this matrix is only used to calculate the matrix @f$ Z' A Z @f$, it + * is efficient to update a matrix ZAZ through consecutive rank 1 updates with + * a single element of A and the corresponding row of Z. The BLAS function + * dsyr is used for this. + * + * The B matrix is has rows + * @f[ + * \boldsymbol{\beta}_i' = \frac{1}{n} \rho_i \sum_{j \neq k} \left[ + * \varepsilon_i \left( b_{ijk}^{(1)} - a_{ijk}^{(1)} + * \overline{q}_i^{(kj)} \right) + (1 - \varepsilon_i) + * \omega_i \left( b_{ijk}^{(p)} - a_{ijk}^{(p)} + * \overline{q}_i^{(kj)} \right) \right] + * \boldsymbol{\delta}_{kj}' + * @f] + * This is also split into two cases, one for which @f$ \varepsilon_i = 1 @f$, + * and one for when it is 0. The 3D simplex difference matrix is used here, in + * the form of the @f$ \boldsymbol{\delta}_{kj}' @f$. + * + * Finally, the following system is solved + * @f[ + * (\textbf{Z}'\textbf{AZ} + \lambda \textbf{J})\textbf{V} = + * (\textbf{Z}'\textbf{AZ}\overline{\textbf{V}} + \textbf{Z}' + * \textbf{B}) + * @f] + * solving this system is done through dposv(). + * + * @todo + * Consider using CblasColMajor everywhere + * + * @param[in,out] model model to be updated + * @param[in] data data used in model + * @param[in] work allocated workspace to use + */ +void gensvm_get_update(struct GenModel *model, struct GenData *data, + struct GenWork *work) +{ + int status; + long i, j; + + long m = model->m; + long K = model->K; + + gensvm_get_ZAZ_ZB(model, data, work); + + // Calculate right-hand side of system we want to solve + // dsymm performs ZB := 1.0 * (ZAZ) * Vbar + 1.0 * ZB + // the right-hand side is thus stored in ZB after this call + // Note: LDB and LDC are second dimensions of the matrices due to + // Row-Major order + cblas_dsymm(CblasRowMajor, CblasLeft, CblasUpper, m+1, K-1, 1, + work->ZAZ, m+1, model->V, K-1, 1.0, work->ZB, K-1); + + // Calculate left-hand side of system we want to solve + // Add lambda to all diagonal elements except the first one. Recall + // that ZAZ is of size m+1 and is symmetric. + for (i=m+2; i<=m*(m+2); i+=m+2) + work->ZAZ[i] += model->lambda; + + // Lapack uses column-major order, so we transform the ZB matrix to + // correspond to this. + for (i=0; i<m+1; i++) + for (j=0; j<K-1; j++) + work->ZBc[j*(m+1)+i] = work->ZB[i*(K-1)+j]; + + // Solve the system using dposv. Note that above the upper triangular + // part has always been used in row-major order for ZAZ. This + // corresponds to the lower triangular part in column-major order. + status = dposv('L', m+1, K-1, work->ZAZ, m+1, work->ZBc, m+1); + + // Use dsysv as fallback, for when the ZAZ matrix is not positive + // semi-definite for some reason (perhaps due to rounding errors). + // This step shouldn't be necessary but is included for safety. + if (status != 0) { + err("[GenSVM Warning]: Received nonzero status from " + "dposv: %i\n", status); + int *IPIV = Malloc(int, m+1); + double *WORK = Malloc(double, 1); + status = dsysv('L', m+1, K-1, work->ZAZ, m+1, IPIV, work->ZBc, + m+1, WORK, -1); + + int LWORK = WORK[0]; + WORK = Realloc(WORK, double, LWORK); + status = dsysv('L', m+1, K-1, work->ZAZ, m+1, IPIV, work->ZBc, + m+1, WORK, LWORK); + if (status != 0) + err("[GenSVM Warning]: Received nonzero " + "status from dsysv: %i\n", status); + free(WORK); + WORK = NULL; + free(IPIV); + IPIV = NULL; + } + + // the solution is now stored in ZBc, in column-major order. Here we + // convert this back to row-major order + for (i=0; i<m+1; i++) + for (j=0; j<K-1; j++) + work->ZB[i*(K-1)+j] = work->ZBc[j*(m+1)+i]; + + // copy the old V to Vbar and the new solution to V + for (i=0; i<m+1; i++) { + for (j=0; j<K-1; j++) { + matrix_set(model->Vbar, K-1, i, j, + matrix_get(model->V, K-1, i, j)); + matrix_set(model->V, K-1, i, j, + matrix_get(work->ZB, K-1, i, j)); + } + } +} + +/** + * @brief Calculate Z'*A*Z and Z'*B for dense matrices + * + * @details + * This function calculates the matrices Z'*A*Z and Z'*B for the case where Z + * is stored as a dense matrix. It calculates the Z'*A*Z product by + * constructing a matrix LZ = (A^(1/2) * Z), and calculating (LZ)'*(LZ) with + * the BLAS dsyrk function. The matrix Z'*B is calculated with successive + * rank-1 updates using the BLAS dger function. These functions came out as + * the most efficient way to do these computations in several simulation + * studies. + * + * @param[in] model a GenModel holding the current model + * @param[in] data a GenData with the data + * @param[in,out] work an allocated GenWork structure, contains + * updated ZAZ and ZB matrices on exit. + */ +void gensvm_get_ZAZ_ZB_dense(struct GenModel *model, struct GenData *data, + struct GenWork *work) +{ + long i; + double alpha, sqalpha; + + long n = model->n; + long m = model->m; + long K = model->K; + + // generate Z'*A*Z and Z'*B by rank 1 operations + for (i=0; i<n; i++) { + alpha = gensvm_get_alpha_beta(model, data, i, work->beta); + + // calculate row of matrix LZ, which is a scalar + // multiplication of sqrt(alpha_i) and row z_i' of Z + // Note that we use the fact that the first column of Z is + // always 1, by only computing the product for m values and + // copying the first element over. + sqalpha = sqrt(alpha); + work->LZ[i*(m+1)] = sqalpha; + cblas_daxpy(m, sqalpha, &data->Z[i*(m+1)+1], 1, + &work->LZ[i*(m+1)+1], 1); + + // rank 1 update of matrix Z'*B + // Note: LDA is the second dimension of ZB because of + // Row-Major order + cblas_dger(CblasRowMajor, m+1, K-1, 1, &data->Z[i*(m+1)], 1, + work->beta, 1, work->ZB, K-1); + } + + // calculate Z'*A*Z by symmetric multiplication of LZ with itself + // (ZAZ = (LZ)' * (LZ) + cblas_dsyrk(CblasRowMajor, CblasUpper, CblasTrans, m+1, n, 1.0, + work->LZ, m+1, 0.0, work->ZAZ, m+1); +} + +/** + * @brief Calculate Z'*A*Z and Z'*B for sparse matrices + * + * @details + * This function calculates the matrices Z'*A*Z and Z'*B for the case where Z + * is stored as a CSR sparse matrix (GenSparse structure). It computes only + * the products of the Z'*A*Z matrix that need to be computed, and updates the + * Z'*B matrix row-wise for each non-zero element of a row of Z, using a BLAS + * daxpy call. + * + * @sa + * gensvm_get_ZAZ_ZB() + * gensvm_get_ZAZ_ZB_dense() + * + * @param[in] model a GenModel holding the current model + * @param[in] data a GenData with the data + * @param[in,out] work an allocated GenWork structure, contains + * updated ZAZ and ZB matrices on exit. + */ +void gensvm_get_ZAZ_ZB_sparse(struct GenModel *model, struct GenData *data, + struct GenWork *work) +{ + long i, j, k, jj, kk, jj_start, jj_end, K, + n_row = data->spZ->n_row, + n_col = data->spZ->n_col; + double alpha, z_ij; + + K = model->K; + + int *Zia = data->spZ->ia; + int *Zja = data->spZ->ja; + double *vals = data->spZ->values; + + for (i=0; i<n_row; i++) { + alpha = gensvm_get_alpha_beta(model, data, i, work->beta); + + jj_start = Zia[i]; + jj_end = Zia[i+1]; + + for (jj=jj_start; jj<jj_end; jj++) { + j = Zja[jj]; + z_ij = vals[jj]; + cblas_daxpy(K-1, z_ij, work->beta, 1, + &work->ZB[j*(K-1)], 1); + z_ij *= alpha; + for (kk=jj; kk<jj_end; kk++) { + k = Zja[kk]; + matrix_add(work->ZAZ, n_col, j, k, + z_ij*vals[kk]); + } + } + } +} + + +void gensvm_get_ZAZ_ZB(struct GenModel *model, struct GenData *data, + struct GenWork *work) +{ + gensvm_reset_work(work); + + if (data->spZ == NULL) { + gensvm_get_ZAZ_ZB_dense(model, data, work); + } else { + gensvm_get_ZAZ_ZB_sparse(model, data, work); + } +} + +/** + * @brief Solve AX = B where A is symmetric positive definite. + * + * @details + * Solve a linear system of equations AX = B where A is symmetric positive + * definite. This function is a wrapper for the external LAPACK routine + * dposv. + * + * @param[in] UPLO which triangle of A is stored + * @param[in] N order of A + * @param[in] NRHS number of columns of B + * @param[in,out] A double precision array of size (LDA, N). On + * exit contains the upper or lower factor of the + * Cholesky factorization of A. + * @param[in] LDA leading dimension of A + * @param[in,out] B double precision array of size (LDB, NRHS). On + * exit contains the N-by-NRHS solution matrix X. + * @param[in] LDB the leading dimension of B + * @returns info parameter which contains the status of the + * computation: + * - =0: success + * - <0: if -i, the i-th argument had + * an illegal value + * - >0: if i, the leading minor of A + * was not positive definite + * + * See the LAPACK documentation at: + * http://www.netlib.org/lapack/explore-html/dc/de9/group__double_p_osolve.html + */ +int dposv(char UPLO, int N, int NRHS, double *A, int LDA, double *B, + int LDB) +{ + extern void dposv_(char *UPLO, int *Np, int *NRHSp, double *A, + int *LDAp, double *B, int *LDBp, int *INFOp); + int INFO; + dposv_(&UPLO, &N, &NRHS, A, &LDA, B, &LDB, &INFO); + return INFO; +} + +/** + * @brief Solve a system of equations AX = B where A is symmetric. + * + * @details + * Solve a linear system of equations AX = B where A is symmetric. This + * function is a wrapper for the external LAPACK routine dsysv. + * + * @param[in] UPLO which triangle of A is stored + * @param[in] N order of A + * @param[in] NRHS number of columns of B + * @param[in,out] A double precision array of size (LDA, N). On + * exit contains the block diagonal matrix D and + * the multipliers used to obtain the factor U or + * L from the factorization A = U*D*U**T or + * A = L*D*L**T. + * @param[in] LDA leading dimension of A + * @param[in] IPIV integer array containing the details of D + * @param[in,out] B double precision array of size (LDB, NRHS). On + * exit contains the N-by-NRHS matrix X + * @param[in] LDB leading dimension of B + * @param[out] WORK double precision array of size max(1,LWORK). On + * exit, WORK(1) contains the optimal LWORK + * @param[in] LWORK the length of WORK, can be used for determining + * the optimal blocksize for dsystrf. + * @returns info parameter which contains the status of the + * computation: + * - =0: success + * - <0: if -i, the i-th argument had an + * illegal value + * - >0: if i, D(i, i) is exactly zero, + * no solution can be computed. + * + * See the LAPACK documentation at: + * http://www.netlib.org/lapack/explore-html/d6/d0e/group__double_s_ysolve.html + */ +int dsysv(char UPLO, int N, int NRHS, double *A, int LDA, int *IPIV, + double *B, int LDB, double *WORK, int LWORK) +{ + extern void dsysv_(char *UPLO, int *Np, int *NRHSp, double *A, + int *LDAp, int *IPIV, double *B, int *LDBp, + double *WORK, int *LWORK, int *INFOp); + int INFO; + dsysv_(&UPLO, &N, &NRHS, A, &LDA, IPIV, B, &LDB, WORK, &LWORK, &INFO); + return INFO; +} diff --git a/tests/data/test_file_read_data_sparse.txt b/tests/data/test_file_read_data_sparse.txt new file mode 100644 index 0000000..98b9ccf --- /dev/null +++ b/tests/data/test_file_read_data_sparse.txt @@ -0,0 +1,12 @@ +10 +3 +0.0 0.7016517970438980 0.0 2 +0.0 0.0 0.0370930278245423 1 +0.0 0.0 0.0 3 +0.0 0.4853242744610165 0.0 4 +0.7630904372339489 0.0 0.0 3 +0.0 0.0 0.0 1 +0.0 0.0 0.0 2 +0.0 0.0 0.0 3 +0.0 0.0 0.0 4 +0.0 0.0 0.0 1 diff --git a/tests/src/test_gensvm_init.c b/tests/src/test_gensvm_init.c index 8cacd9b..16f5dfd 100644 --- a/tests/src/test_gensvm_init.c +++ b/tests/src/test_gensvm_init.c @@ -8,7 +8,7 @@ #include "minunit.h" #include "gensvm_init.h" -char *test_init_null() +char *test_init_null_dense() { int i; long n = 5, @@ -63,9 +63,68 @@ char *test_init_null() return NULL; } -char *test_init_seed() +char *test_init_null_sparse() { + int i; + long n = 5, + m = 2, + K = 3; + double value; + struct GenModel *model = gensvm_init_model(); + struct GenData *data = gensvm_init_data(); + + // start test code + model->n = n; + model->m = m; + model->K = K; + gensvm_allocate_model(model); + + data->n = n; + data->m = m; + data->K = K; + data->RAW = Calloc(double, n*(m+1)); + for (i=0; i<n; i++) { + matrix_set(data->RAW, m+1, i, 0, 1.0); + matrix_set(data->RAW, m+1, i, 1, i); + matrix_set(data->RAW, m+1, i, 2, -i); + } + + data->Z = data->RAW; + data->spZ = gensvm_dense_to_sparse(data->Z, n, m+1); + free(data->RAW); + data->RAW = NULL; + data->Z = NULL; + + gensvm_init_V(NULL, model, data); + // first row all ones + value = matrix_get(model->V, K-1, 0, 0); + mu_assert(value == 1.0, "Incorrect value at 0, 0"); + value = matrix_get(model->V, K-1, 0, 1); + mu_assert(value == 1.0, "Incorrect value at 0, 1"); + + // second row between -1 and 0.25 + value = matrix_get(model->V, K-1, 1, 0); + mu_assert(value >= -1.0 && value <= 0.25, "Incorrect value at 1, 0"); + value = matrix_get(model->V, K-1, 1, 1); + mu_assert(value >= -1.0 && value <= 0.25, "Incorrect value at 1, 1"); + + // third row between -0.25 and 1 + value = matrix_get(model->V, K-1, 2, 0); + mu_assert(value >= -0.25 && value <= 1.0, "Incorrect value at 2, 0"); + value = matrix_get(model->V, K-1, 2, 1); + mu_assert(value >= -0.25 && value <= 1.0, "Incorrect value at 2, 1"); + + // end test code + + gensvm_free_model(model); + gensvm_free_data(data); + + return NULL; +} + +char *test_init_seed() +{ long n = 7, m = 5, K = 3; @@ -203,7 +262,8 @@ char *test_init_weights_wrong() char *all_tests() { mu_suite_start(); - mu_run_test(test_init_null); + mu_run_test(test_init_null_dense); + mu_run_test(test_init_null_sparse); mu_run_test(test_init_seed); mu_run_test(test_init_weights_1); mu_run_test(test_init_weights_2); diff --git a/tests/src/test_gensvm_io.c b/tests/src/test_gensvm_io.c index c97e2d8..208b894 100644 --- a/tests/src/test_gensvm_io.c +++ b/tests/src/test_gensvm_io.c @@ -81,6 +81,102 @@ char *test_gensvm_read_data() return NULL; } +char *test_gensvm_read_data_sparse() +{ + char *filename = "./data/test_file_read_data_sparse.txt"; + struct GenData *data = gensvm_init_data(); + + // start test code // + gensvm_read_data(data, filename); + + // check if dimensions are correctly read + mu_assert(data->n == 10, "Incorrect value for n"); + mu_assert(data->m == 3, "Incorrect value for m"); + mu_assert(data->r == 3, "Incorrect value for r"); + mu_assert(data->K == 4, "Incorrect value for K"); + + // check if dense data pointers are NULL + mu_assert(data->Z == NULL, "Z pointer isn't NULL"); + mu_assert(data->RAW == NULL, "RAW pointer isn't NULL"); + + // check sparse data structure + mu_assert(data->spZ != NULL, "spZ is NULL"); + mu_assert(data->spZ->nnz == 14, "Incorrect nnz"); + mu_assert(data->spZ->n_row == 10, "Incorrect n_row"); + mu_assert(data->spZ->n_col == 4, "Incorrect n_col"); + + // check sparse values + mu_assert(data->spZ->values[0] == 1.0, + "Incorrect nonzero value at 0"); + mu_assert(data->spZ->values[1] == 0.7016517970438980, + "Incorrect nonzero value at 1"); + mu_assert(data->spZ->values[2] == 1.0, + "Incorrect nonzero value at 2"); + mu_assert(data->spZ->values[3] == 0.0370930278245423, + "Incorrect nonzero value at 3"); + mu_assert(data->spZ->values[4] == 1.0, + "Incorrect nonzero value at 4"); + mu_assert(data->spZ->values[5] == 1.0, + "Incorrect nonzero value at 5"); + mu_assert(data->spZ->values[6] == 0.4853242744610165, + "Incorrect nonzero value at 6"); + mu_assert(data->spZ->values[7] == 1.0, + "Incorrect nonzero value at 7"); + mu_assert(data->spZ->values[8] == 0.7630904372339489, + "Incorrect nonzero value at 8"); + mu_assert(data->spZ->values[9] == 1.0, + "Incorrect nonzero value at 9"); + mu_assert(data->spZ->values[10] == 1.0, + "Incorrect nonzero value at 10"); + mu_assert(data->spZ->values[11] == 1.0, + "Incorrect nonzero value at 11"); + mu_assert(data->spZ->values[12] == 1.0, + "Incorrect nonzero value at 12"); + mu_assert(data->spZ->values[13] == 1.0, + "Incorrect nonzero value at 13"); + + // check sparse row lengths + mu_assert(data->spZ->ia[0] == 0, "Incorrect ia value at 0"); + mu_assert(data->spZ->ia[1] == 2, "Incorrect ia value at 1"); + mu_assert(data->spZ->ia[2] == 4, "Incorrect ia value at 2"); + mu_assert(data->spZ->ia[3] == 5, "Incorrect ia value at 3"); + mu_assert(data->spZ->ia[4] == 7, "Incorrect ia value at 4"); + mu_assert(data->spZ->ia[5] == 9, "Incorrect ia value at 5"); + mu_assert(data->spZ->ia[6] == 10, "Incorrect ia value at 5"); + mu_assert(data->spZ->ia[7] == 11, "Incorrect ia value at 5"); + mu_assert(data->spZ->ia[8] == 12, "Incorrect ia value at 5"); + mu_assert(data->spZ->ia[9] == 13, "Incorrect ia value at 5"); + mu_assert(data->spZ->ia[10] == 14, "Incorrect ia value at 5"); + + // check sparse column indices + mu_assert(data->spZ->ja[0] == 0, "Incorrect ja value at 0"); + mu_assert(data->spZ->ja[1] == 2, "Incorrect ja value at 1"); + mu_assert(data->spZ->ja[2] == 0, "Incorrect ja value at 2"); + mu_assert(data->spZ->ja[3] == 3, "Incorrect ja value at 3"); + mu_assert(data->spZ->ja[4] == 0, "Incorrect ja value at 4"); + mu_assert(data->spZ->ja[5] == 0, "Incorrect ja value at 5"); + mu_assert(data->spZ->ja[6] == 2, "Incorrect ja value at 6"); + mu_assert(data->spZ->ja[7] == 0, "Incorrect ja value at 7"); + mu_assert(data->spZ->ja[8] == 1, "Incorrect ja value at 8"); + mu_assert(data->spZ->ja[9] == 0, "Incorrect ja value at 7"); + mu_assert(data->spZ->ja[10] == 0, "Incorrect ja value at 7"); + mu_assert(data->spZ->ja[11] == 0, "Incorrect ja value at 7"); + mu_assert(data->spZ->ja[12] == 0, "Incorrect ja value at 7"); + mu_assert(data->spZ->ja[13] == 0, "Incorrect ja value at 7"); + + // check if labels read correctly + mu_assert(data->y[0] == 2, "Incorrect label read at 0"); + mu_assert(data->y[1] == 1, "Incorrect label read at 1"); + mu_assert(data->y[2] == 3, "Incorrect label read at 2"); + mu_assert(data->y[3] == 4, "Incorrect label read at 3"); + mu_assert(data->y[4] == 3, "Incorrect label read at 4"); + + // end test code // + + gensvm_free_data(data); + return NULL; +} + char *test_gensvm_read_data_no_label() { char *filename = "./data/test_file_read_data_no_label.txt"; @@ -419,6 +515,7 @@ char *all_tests() { mu_suite_start(); mu_run_test(test_gensvm_read_data); + mu_run_test(test_gensvm_read_data_sparse); mu_run_test(test_gensvm_read_data_no_label); mu_run_test(test_gensvm_read_model); mu_run_test(test_gensvm_write_model); diff --git a/tests/src/test_gensvm_optimize.c b/tests/src/test_gensvm_optimize.c index d7326f7..c233ec3 100644 --- a/tests/src/test_gensvm_optimize.c +++ b/tests/src/test_gensvm_optimize.c @@ -324,453 +324,6 @@ char *test_gensvm_get_loss_2() return NULL; } -char *test_gensvm_calculate_omega() -{ - struct GenModel *model = gensvm_init_model(); - struct GenData *data = gensvm_init_data(); - - int n = 5, - m = 3, - K = 3; - - data->n = n; - data->m = m; - data->K = K; - - data->y = Calloc(long, n); - data->y[0] = 2; - data->y[1] = 1; - data->y[2] = 3; - data->y[3] = 2; - data->y[4] = 3; - - model->n = n; - model->m = m; - model->K = K; - model->p = 1.213; - gensvm_allocate_model(model); - - matrix_set(model->H, model->K, 0, 0, 0.8465725800087526); - matrix_set(model->H, model->K, 0, 1, 1.2876921677680249); - matrix_set(model->H, model->K, 0, 2, 1.0338561593991831); - matrix_set(model->H, model->K, 1, 0, 1.1891038526621391); - matrix_set(model->H, model->K, 1, 1, 0.4034192031226095); - matrix_set(model->H, model->K, 1, 2, 1.5298894170910078); - matrix_set(model->H, model->K, 2, 0, 1.3505111116922732); - matrix_set(model->H, model->K, 2, 1, 1.4336863304586636); - matrix_set(model->H, model->K, 2, 2, 1.7847533480330757); - matrix_set(model->H, model->K, 3, 0, 1.7712504341475415); - matrix_set(model->H, model->K, 3, 1, 1.6905146737773038); - matrix_set(model->H, model->K, 3, 2, 0.8189336598535132); - matrix_set(model->H, model->K, 4, 0, 0.6164203008844277); - matrix_set(model->H, model->K, 4, 1, 0.2456444285093894); - matrix_set(model->H, model->K, 4, 2, 0.8184193969741095); - - // start test code // - mu_assert(fabs(gensvm_calculate_omega(model, data, 0) - - 0.7394076262220608) < 1e-14, - "Incorrect omega at 0"); - mu_assert(fabs(gensvm_calculate_omega(model, data, 1) - - 0.7294526264247443) < 1e-14, - "Incorrect omega at 1"); - mu_assert(fabs(gensvm_calculate_omega(model, data, 2) - - 0.6802499471888741) < 1e-14, - "Incorrect omega at 2"); - mu_assert(fabs(gensvm_calculate_omega(model, data, 3) - - 0.6886792032441273) < 1e-14, - "Incorrect omega at 3"); - mu_assert(fabs(gensvm_calculate_omega(model, data, 4) - - 0.8695329737474283) < 1e-14, - "Incorrect omega at 4"); - - // end test code // - - gensvm_free_model(model); - gensvm_free_data(data); - - return NULL; -} - -char *test_gensvm_majorize_is_simple() -{ - struct GenModel *model = gensvm_init_model(); - struct GenData *data = gensvm_init_data(); - - int n = 5, - m = 3, - K = 3; - - data->n = n; - data->m = m; - data->K = K; - - data->y = Calloc(long, n); - data->y[0] = 2; - data->y[1] = 1; - data->y[2] = 3; - data->y[3] = 2; - data->y[4] = 3; - - model->n = n; - model->m = m; - model->K = K; - model->p = 1.213; - gensvm_allocate_model(model); - - matrix_set(model->H, model->K, 0, 0, 0.8465725800087526); - matrix_set(model->H, model->K, 0, 1, 1.2876921677680249); - matrix_set(model->H, model->K, 0, 2, 1.0338561593991831); - matrix_set(model->H, model->K, 1, 0, 1.1891038526621391); - matrix_set(model->H, model->K, 1, 1, 0.4034192031226095); - matrix_set(model->H, model->K, 1, 2, 0.0); - matrix_set(model->H, model->K, 2, 0, 0.5); - matrix_set(model->H, model->K, 2, 1, 0.0); - matrix_set(model->H, model->K, 2, 2, 1.1); - matrix_set(model->H, model->K, 3, 0, 0.0); - matrix_set(model->H, model->K, 3, 1, 0.0); - matrix_set(model->H, model->K, 3, 2, 0.8189336598535132); - matrix_set(model->H, model->K, 4, 0, 0.6164203008844277); - matrix_set(model->H, model->K, 4, 1, 0.2456444285093894); - matrix_set(model->H, model->K, 4, 2, 0.8184193969741095); - - // start test code // - mu_assert(gensvm_majorize_is_simple(model, data, 0) == false, - "Incorrect simple at 0"); - mu_assert(gensvm_majorize_is_simple(model, data, 1) == true, - "Incorrect simple at 1"); - mu_assert(gensvm_majorize_is_simple(model, data, 2) == true, - "Incorrect simple at 2"); - mu_assert(gensvm_majorize_is_simple(model, data, 3) == true, - "Incorrect simple at 3"); - mu_assert(gensvm_majorize_is_simple(model, data, 4) == false, - "Incorrect simple at 4"); - - // end test code // - - gensvm_free_model(model); - gensvm_free_data(data); - return NULL; -} - -char *test_gensvm_calculate_ab_non_simple() -{ - struct GenModel *model = gensvm_init_model(); - int n = 1, - m = 1, - K = 1; - - model->n = n; - model->m = m; - model->K = K; - model->kappa = 0.5; - - gensvm_allocate_model(model); - - // start test code // - double a, b_aq; - - // tests with p = 2 - model->p = 2.0; - matrix_set(model->Q, K, 0, 0, -1.0); - gensvm_calculate_ab_non_simple(model, 0, 0, &a, &b_aq); - mu_assert(fabs(a - 1.5) < 1e-14, "Incorrect value for a (1)"); - mu_assert(fabs(b_aq - 1.25) < 1e-14, "Incorrect value for b (1)"); - - matrix_set(model->Q, K, 0, 0, 0.5); - gensvm_calculate_ab_non_simple(model, 0, 0, &a, &b_aq); - mu_assert(fabs(a - 1.5) < 1e-14, "Incorrect value for a (2)"); - mu_assert(fabs(b_aq - 0.0277777777777778) < 1e-14, - "Incorrect value for b (2)"); - - matrix_set(model->Q, K, 0, 0, 2.0); - gensvm_calculate_ab_non_simple(model, 0, 0, &a, &b_aq); - mu_assert(fabs(a - 1.5) < 1e-14, "Incorrect value for a (3)"); - mu_assert(fabs(b_aq - 0.0) < 1e-14, "Incorrect value for b (3)"); - - // tests with p != 2 (namely, 1.5) - // Note that here (p + kappa - 1)/(p - 2) = -2 - // - // We distinguish: q <= (p + kappa - 1)/(p - 2) - // q in (p + kappa - 1)/(p - 2), -kappa] - // q in (-kappa, 1] - // q > 1 - model->p = 1.5; - matrix_set(model->Q, K, 0, 0, -3.0); - gensvm_calculate_ab_non_simple(model, 0, 0, &a, &b_aq); - mu_assert(fabs(a - 0.312018860376691) < 1e-14, - "Incorrect value for a (4)"); - mu_assert(fabs(b_aq - 1.35208172829900) < 1e-14, - "Incorrect value for b (4)"); - - matrix_set(model->Q, K, 0, 0, -1.0); - gensvm_calculate_ab_non_simple(model, 0, 0, &a, &b_aq); - mu_assert(fabs(a - 0.866025403784439) < 1e-14, - "Incorrect value for a (5)"); - mu_assert(fabs(b_aq - 0.838525491562421) < 1e-14, - "Incorrect value for b (5)"); - - matrix_set(model->Q, K, 0, 0, 0.5); - gensvm_calculate_ab_non_simple(model, 0, 0, &a, &b_aq); - mu_assert(fabs(a - 0.866025403784439) < 1e-14, - "Incorrect value for a (6)"); - mu_assert(fabs(b_aq - 0.0721687836487032) < 1e-14, - "Incorrect value for b (6)"); - - matrix_set(model->Q, K, 0, 0, 2.0); - gensvm_calculate_ab_non_simple(model, 0, 0, &a, &b_aq); - mu_assert(fabs(a - 0.245495126515491) < 1e-14, - "Incorrect value for a (7)"); - mu_assert(fabs(b_aq - 0.0) < 1e-14, - "Incorrect value for b (7)"); - // end test code // - - gensvm_free_model(model); - - return NULL; -} - -char *test_gensvm_calculate_ab_simple() -{ - struct GenModel *model = gensvm_init_model(); - int n = 1, - m = 1, - K = 1; - - model->n = n; - model->m = m; - model->K = K; - model->kappa = 0.5; - - gensvm_allocate_model(model); - - // start test code // - double a, b_aq; - - matrix_set(model->Q, K, 0, 0, -1.5); - gensvm_calculate_ab_simple(model, 0, 0, &a, &b_aq); - mu_assert(fabs(a - 0.142857142857143) < 1e-14, - "Incorrect value for a (1)"); - mu_assert(fabs(b_aq - 0.5) < 1e-14, - "Incorrect value for b (1)"); - - matrix_set(model->Q, K, 0, 0, 0.75); - gensvm_calculate_ab_simple(model, 0, 0, &a, &b_aq); - mu_assert(fabs(a - 0.333333333333333) < 1e-14, - "Incorrect value for a (2)"); - mu_assert(fabs(b_aq - 0.0833333333333333) < 1e-14, - "Incorrect value for b (2)"); - - matrix_set(model->Q, K, 0, 0, 2.0); - gensvm_calculate_ab_simple(model, 0, 0, &a, &b_aq); - mu_assert(fabs(a - 0.142857142857143) < 1e-14, - "Incorrect value for a (3)"); - mu_assert(fabs(b_aq - 0.0) < 1e-14, - "Incorrect value for b (3)"); - - // end test code // - gensvm_free_model(model); - - return NULL; -} - -/* -char *test_gensvm_update_B() -{ - struct GenData *data = gensvm_init_data(); - struct GenModel *model = gensvm_init_model(); - int n = 3, - m = 2, - K = 3; - - data->n = n; - data->m = m; - data->K = K; - data->y = Calloc(long, K); - data->y[0] = 1; - data->y[1] = 2; - data->y[2] = 3; - - model->n = n; - model->m = m; - model->K = K; - model->kappa = 0.5; - - gensvm_allocate_model(model); - gensvm_simplex(model); - gensvm_simplex_diff(model); - gensvm_initialize_weights(data, model); - - // start test code // - double *B = Calloc(double, n*(K-1)); - double omega = 1.0; - - gensvm_update_B(model, 0, 0, 0.75, omega, B); - gensvm_update_B(model, 0, 1, 0.1, omega, B); - gensvm_update_B(model, 0, 2, 0.25, omega, B); - mu_assert(fabs(matrix_get(B, K-1, 0, 0) - -0.0750000000000000) < 1e-14, - "Incorrect value of B at 0, 0 (1)"); - mu_assert(fabs(matrix_get(B, K-1, 0, 1) - -0.0721687836487032) < 1e-14, - "Incorrect value of B at 0, 1 (1)"); - - omega = 0.213; - gensvm_update_B(model, 1, 0, 0.75, omega, B); - gensvm_update_B(model, 1, 1, 0.1, omega, B); - gensvm_update_B(model, 1, 2, 0.25, omega, B); - mu_assert(fabs(matrix_get(B, K-1, 1, 0) - 0.0621250000000000) < 1e-14, - "Incorrect value of B at 0, 0 (2)"); - mu_assert(fabs(matrix_get(B, K-1, 1, 1) - -0.0153719509171738) < 1e-14, - "Incorrect value of B at 0, 1 (2)"); - - model->rho[2] = 1.213; - gensvm_update_B(model, 2, 0, 0.75, omega, B); - gensvm_update_B(model, 2, 1, 0.1, omega, B); - gensvm_update_B(model, 2, 2, 0.25, omega, B); - mu_assert(fabs(matrix_get(B, K-1, 2, 0) - 0.0279899750000000) < 1e-14, - "Incorrect value of B at 0, 0 (3)"); - mu_assert(fabs(matrix_get(B, K-1, 2, 1) - 0.0633969999726082) < 1e-14, - "Incorrect value of B at 0, 1 (3)"); - // end test code // - - gensvm_free_data(data); - gensvm_free_model(model); - - return NULL; -} - -char *test_gensvm_get_Avalue_update_B() -{ - mu_test_missing(); - return NULL; -} -*/ - -char *test_gensvm_get_update() -{ - struct GenModel *model = gensvm_init_model(); - struct GenData *data = gensvm_init_data(); - int n = 8, - m = 3, - K = 3; - - model->n = n; - model->m = m; - model->K = K; - struct GenWork *work = gensvm_init_work(model); - - // initialize data - data->n = n; - data->m = m; - data->K = K; - - data->y = Calloc(long, n); - data->y[0] = 2; - data->y[1] = 1; - data->y[2] = 3; - data->y[3] = 2; - data->y[4] = 3; - data->y[5] = 3; - data->y[6] = 1; - data->y[7] = 2; - - data->Z = Calloc(double, n*(m+1)); - matrix_set(data->Z, data->m+1, 0, 0, 1.0000000000000000); - matrix_set(data->Z, data->m+1, 0, 1, 0.6437306339619082); - matrix_set(data->Z, data->m+1, 0, 2, -0.3276778319121999); - matrix_set(data->Z, data->m+1, 0, 3, 0.1564053473463392); - matrix_set(data->Z, data->m+1, 1, 0, 1.0000000000000000); - matrix_set(data->Z, data->m+1, 1, 1, -0.8683091763200105); - matrix_set(data->Z, data->m+1, 1, 2, -0.6910830836015162); - matrix_set(data->Z, data->m+1, 1, 3, -0.9675430665130734); - matrix_set(data->Z, data->m+1, 2, 0, 1.0000000000000000); - matrix_set(data->Z, data->m+1, 2, 1, -0.5024888699077029); - matrix_set(data->Z, data->m+1, 2, 2, -0.9649738292750712); - matrix_set(data->Z, data->m+1, 2, 3, 0.0776560791351473); - matrix_set(data->Z, data->m+1, 3, 0, 1.0000000000000000); - matrix_set(data->Z, data->m+1, 3, 1, 0.8206429991392579); - matrix_set(data->Z, data->m+1, 3, 2, -0.7255681388968501); - matrix_set(data->Z, data->m+1, 3, 3, -0.9475952272877165); - matrix_set(data->Z, data->m+1, 4, 0, 1.0000000000000000); - matrix_set(data->Z, data->m+1, 4, 1, 0.3426050950418613); - matrix_set(data->Z, data->m+1, 4, 2, -0.5340602451864306); - matrix_set(data->Z, data->m+1, 4, 3, -0.7159704241662815); - matrix_set(data->Z, data->m+1, 5, 0, 1.0000000000000000); - matrix_set(data->Z, data->m+1, 5, 1, -0.3077314049206620); - matrix_set(data->Z, data->m+1, 5, 2, 0.1141288036288195); - matrix_set(data->Z, data->m+1, 5, 3, -0.7060114827535847); - matrix_set(data->Z, data->m+1, 6, 0, 1.0000000000000000); - matrix_set(data->Z, data->m+1, 6, 1, 0.6301294373610109); - matrix_set(data->Z, data->m+1, 6, 2, -0.9983027363627769); - matrix_set(data->Z, data->m+1, 6, 3, -0.9365684178444004); - matrix_set(data->Z, data->m+1, 7, 0, 1.0000000000000000); - matrix_set(data->Z, data->m+1, 7, 1, -0.0665379368401439); - matrix_set(data->Z, data->m+1, 7, 2, -0.1781385556871763); - matrix_set(data->Z, data->m+1, 7, 3, -0.7292593770500276); - - // initialize model - model->p = 1.1; - model->lambda = 0.123; - model->weight_idx = 1; - model->kappa = 0.5; - - // initialize matrices - gensvm_allocate_model(model); - gensvm_initialize_weights(data, model); - gensvm_simplex(model); - gensvm_simplex_diff(model); - - // initialize V - matrix_set(model->V, model->K-1, 0, 0, -0.7593642121025029); - matrix_set(model->V, model->K-1, 0, 1, -0.5497320698504756); - matrix_set(model->V, model->K-1, 1, 0, 0.2982680646268177); - matrix_set(model->V, model->K-1, 1, 1, -0.2491408622891925); - matrix_set(model->V, model->K-1, 2, 0, -0.3118572761092807); - matrix_set(model->V, model->K-1, 2, 1, 0.5461219445756100); - matrix_set(model->V, model->K-1, 3, 0, -0.3198994238626641); - matrix_set(model->V, model->K-1, 3, 1, 0.7134997072555367); - - // start test code // - - // these need to be prepared for the update call - gensvm_calculate_errors(model, data, work->ZV); - gensvm_calculate_huber(model); - - // run the actual update call - gensvm_get_update(model, data, work); - - // test values - mu_assert(fabs(matrix_get(model->V, model->K-1, 0, 0) - - -0.1323791019594062) < 1e-14, - "Incorrect value of model->V at 0, 0"); - mu_assert(fabs(matrix_get(model->V, model->K-1, 0, 1) - - -0.3598407983154332) < 1e-14, - "Incorrect value of model->V at 0, 1"); - mu_assert(fabs(matrix_get(model->V, model->K-1, 1, 0) - - 0.3532993103400935) < 1e-14, - "Incorrect value of model->V at 1, 0"); - mu_assert(fabs(matrix_get(model->V, model->K-1, 1, 1) - - -0.4094572388475382) < 1e-14, - "Incorrect value of model->V at 1, 1"); - mu_assert(fabs(matrix_get(model->V, model->K-1, 2, 0) - - 0.1313169839871234) < 1e-14, - "Incorrect value of model->V at 2, 0"); - mu_assert(fabs(matrix_get(model->V, model->K-1, 2, 1) - - 0.2423439972728328) < 1e-14, - "Incorrect value of model->V at 2, 1"); - mu_assert(fabs(matrix_get(model->V, model->K-1, 3, 0) - - 0.0458431025455224) < 1e-14, - "Incorrect value of model->V at 3, 0"); - mu_assert(fabs(matrix_get(model->V, model->K-1, 3, 1) - - 0.4390030236354089) < 1e-14, - "Incorrect value of model->V at 3, 1"); - // end test code // - - gensvm_free_model(model); - gensvm_free_data(data); - gensvm_free_work(work); - - return NULL; -} - char *test_gensvm_calculate_errors() { struct GenModel *model = gensvm_init_model(); @@ -1128,323 +681,16 @@ char *test_gensvm_step_doubling() return NULL; } -char *test_dposv() -{ - int n = 6, - m = 5; - - // start test code // - double *A = Calloc(double, n*n); - double *B = Calloc(double, n*m); - - // We're only storing the upper triangular part of the symmetric - // matrix A. - matrix_set(A, n, 0, 0, 6.2522023496540386); - matrix_set(A, n, 0, 1, 1.2760969977888754); - matrix_set(A, n, 0, 2, 1.1267774552193974); - matrix_set(A, n, 0, 3, 0.8384267227932789); - matrix_set(A, n, 0, 4, 0.9588857509656767); - matrix_set(A, n, 0, 5, 0.7965747978871199); - matrix_set(A, n, 1, 1, 6.7539376310748924); - matrix_set(A, n, 1, 2, 0.5908599276261999); - matrix_set(A, n, 1, 3, 0.9987368128192129); - matrix_set(A, n, 1, 4, 1.1142949385131484); - matrix_set(A, n, 1, 5, 1.4150107613377123); - matrix_set(A, n, 2, 2, 7.3361678639533139); - matrix_set(A, n, 2, 3, 1.5596679563906113); - matrix_set(A, n, 2, 4, 0.8162441257417704); - matrix_set(A, n, 2, 5, 0.8701893160678078); - matrix_set(A, n, 3, 3, 6.8330423955320834); - matrix_set(A, n, 3, 4, 1.6081779105091201); - matrix_set(A, n, 3, 5, 1.0498769837396527); - matrix_set(A, n, 4, 4, 7.6810607313742949); - matrix_set(A, n, 4, 5, 1.1352511350739003); - matrix_set(A, n, 5, 5, 7.0573435553104567); - - // this is the matrix B (n x m), stored in COLUMN-MAJOR ORDER - B[0] = 0.5759809004700531; - B[1] = 0.4643751885289473; - B[2] = 0.1914807543974765; - B[3] = 0.2875503245961965; - B[4] = 0.0493123646253395; - B[5] = 0.4333053066976881; - B[6] = 0.4738306027724854; - B[7] = 0.2460182087225041; - B[8] = 0.1620492662433550; - B[9] = 0.9596380576403235; - B[10] = 0.7244837218691289; - B[11] = 0.9437116578537014; - B[12] = 0.3320986772155025; - B[13] = 0.4717424581951766; - B[14] = 0.9206089360217588; - B[15] = 0.7059004575000609; - B[16] = 0.1696670763906902; - B[17] = 0.4896586269167711; - B[18] = 0.9539497766794410; - B[19] = 0.2269749103273779; - B[20] = 0.8832156948007016; - B[21] = 0.4682217970327739; - B[22] = 0.5293439096127632; - B[23] = 0.8699136677253214; - B[24] = 0.1622687366790325; - B[25] = 0.4511598310105013; - B[26] = 0.5587302139109592; - B[27] = 0.7456952498557438; - B[28] = 0.5923112589693547; - B[29] = 0.2243481938151050; - - // note the 'L' here denotes the lower triangular part of A. Above we - // stored the upper triangular part of A in row-major order, so that's - // the lower triangular part in column-major order, which Lapack uses. - int status = dposv('L', n, m, A, n, B, n); - mu_assert(status == 0, "dposv didn't return status success"); - - // Since B now contains the solution in Column-Major order, we have to - // check it in that order. - - mu_assert(fabs(B[0] - 0.0770250502756885) < 1e-14, - "Incorrect value of B at 0"); - mu_assert(fabs(B[1] - 0.0449013611583528) < 1e-14, - "Incorrect value of B at 1"); - mu_assert(fabs(B[2] - 0.0028421256926631) < 1e-14, - "Incorrect value of B at 2"); - mu_assert(fabs(B[3] - 0.0238082780914757) < 1e-14, - "Incorrect value of B at 3"); - mu_assert(fabs(B[4] - -0.0213884392480803) < 1e-14, - "Incorrect value of B at 4"); - mu_assert(fabs(B[5] - 0.0432493445363141) < 1e-14, - "Incorrect value of B at 5"); - mu_assert(fabs(B[6] - 0.0469188408250497) < 1e-14, - "Incorrect value of B at 6"); - mu_assert(fabs(B[7] - -0.0188676544565197) < 1e-14, - "Incorrect value of B at 7"); - mu_assert(fabs(B[8] - -0.0268693544126544) < 1e-14, - "Incorrect value of B at 8"); - mu_assert(fabs(B[9] - 0.1139942447258460) < 1e-14, - "Incorrect value of B at 9"); - mu_assert(fabs(B[10] - 0.0539483576192093) < 1e-14, - "Incorrect value of B at 10"); - mu_assert(fabs(B[11] - 0.1098843745987866) < 1e-14, - "Incorrect value of B at 11"); - mu_assert(fabs(B[12] - 0.0142822905211067) < 1e-14, - "Incorrect value of B at 12"); - mu_assert(fabs(B[13] - 0.0425078586146396) < 1e-14, - "Incorrect value of B at 13"); - mu_assert(fabs(B[14] - 0.1022650353097420) < 1e-14, - "Incorrect value of B at 14"); - mu_assert(fabs(B[15] - 0.0700476338859921) < 1e-14, - "Incorrect value of B at 15"); - mu_assert(fabs(B[16] - -0.0171546096353451) < 1e-14, - "Incorrect value of B at 16"); - mu_assert(fabs(B[17] - 0.0389772844468578) < 1e-14, - "Incorrect value of B at 17"); - mu_assert(fabs(B[18] - 0.1231757430810565) < 1e-14, - "Incorrect value of B at 18"); - mu_assert(fabs(B[19] - -0.0246954324681607) < 1e-14, - "Incorrect value of B at 19"); - mu_assert(fabs(B[20] - 0.0853098528328778) < 1e-14, - "Incorrect value of B at 20"); - mu_assert(fabs(B[21] - 0.0155226252622933) < 1e-14, - "Incorrect value of B at 21"); - mu_assert(fabs(B[22] - 0.0305321945431931) < 1e-14, - "Incorrect value of B at 22"); - mu_assert(fabs(B[23] - 0.0965724559730953) < 1e-14, - "Incorrect value of B at 23"); - mu_assert(fabs(B[24] - -0.0106596940426243) < 1e-14, - "Incorrect value of B at 24"); - mu_assert(fabs(B[25] - 0.0446093337039209) < 1e-14, - "Incorrect value of B at 25"); - mu_assert(fabs(B[26] - 0.0517721408799503) < 1e-14, - "Incorrect value of B at 26"); - mu_assert(fabs(B[27] - 0.0807167333268751) < 1e-14, - "Incorrect value of B at 27"); - mu_assert(fabs(B[28] - 0.0499219869343351) < 1e-14, - "Incorrect value of B at 28"); - mu_assert(fabs(B[29] - -0.0023736192508975) < 1e-14, - "Incorrect value of B at 29"); - - // end test code // - - free(A); - free(B); - - return NULL; -} - -char *test_dsysv() -{ - int n = 6, - m = 5; - - // start test code // - double *A = Calloc(double, n*n); - double *B = Calloc(double, n*m); - - // only store the upper triangular part of A - matrix_set(A, n, 0, 0, 0.4543421836368821); - matrix_set(A, n, 0, 1, 0.8708338836669620); - matrix_set(A, n, 0, 2, 1.3638340495356920); - matrix_set(A, n, 0, 3, 0.8361050302144852); - matrix_set(A, n, 0, 4, 1.3203463886997013); - matrix_set(A, n, 0, 5, 0.3915472119381547); - matrix_set(A, n, 1, 1, 1.4781728513484600); - matrix_set(A, n, 1, 2, 1.7275151336935415); - matrix_set(A, n, 1, 3, 1.1817139356024176); - matrix_set(A, n, 1, 4, 0.7436086782250922); - matrix_set(A, n, 1, 5, 0.1101758222549450); - matrix_set(A, n, 2, 2, 1.9363682709237851); - matrix_set(A, n, 2, 3, 1.1255164391384127); - matrix_set(A, n, 2, 4, 1.0935575148560115); - matrix_set(A, n, 2, 5, 1.4678279983625921); - matrix_set(A, n, 3, 3, 1.7500757162326757); - matrix_set(A, n, 3, 4, 1.5490921663229316); - matrix_set(A, n, 3, 5, 1.0305675837706338); - matrix_set(A, n, 4, 4, 0.4015851628106807); - matrix_set(A, n, 4, 5, 1.2487496402900566); - matrix_set(A, n, 5, 5, 0.7245473723012897); - - // Store B in column-major order! - B[0] = 0.6037912122210694; - B[1] = 0.5464186020522516; - B[2] = 0.1810847918541411; - B[3] = 0.1418268895582175; - B[4] = 0.5459836535934901; - B[5] = 0.5890609930309275; - B[6] = 0.1128454279279324; - B[7] = 0.8930541056550655; - B[8] = 0.6946437745982983; - B[9] = 0.0955380494302154; - B[10] = 0.5750037200376288; - B[11] = 0.0326245221201559; - B[12] = 0.3336701777312929; - B[13] = 0.7648765739095678; - B[14] = 0.2662986413718805; - B[15] = 0.7850810368985298; - B[16] = 0.5432388739552745; - B[17] = 0.4387739258059151; - B[18] = 0.4257906469646436; - B[19] = 0.1272470768775465; - B[20] = 0.4276616397814972; - B[21] = 0.8137579718316245; - B[22] = 0.6849003723960281; - B[23] = 0.1768571691078990; - B[24] = 0.4183278358395650; - B[25] = 0.6517633972400351; - B[26] = 0.1154775550239331; - B[27] = 0.4217248849174023; - B[28] = 0.9179697263236190; - B[29] = 0.6532254399609347; - - // run dsysv, note that Lapack expects matrices to be in column-major - // order, so we can use the 'L' notation for the triangle of A, since - // we've stored the upper triangle in Row-Major order. - - int *IPIV = Calloc(int, n); - double *WORK = Calloc(double, 1); - int status; - - // first we determine the necessary size of the WORK array - status = dsysv('L', n, m, A, n, IPIV, B, n, WORK, -1); - mu_assert(status == 0, "dsysv workspace query failed"); - - int LWORK = WORK[0]; - WORK = Realloc(WORK, double, LWORK); - status = dsysv('L', n, m, A, n, IPIV, B, n, WORK, LWORK); - mu_assert(status == 0, "dsysv didn't return status success"); - - // Since B now contains the solution in Column-Major order, we have to - // check it in that order - - mu_assert(fabs(B[0] - 3.0915448286548806) < 1e-14, - "Incorrect value of B at 0"); - mu_assert(fabs(B[1] - -1.2114333666218096) < 1e-14, - "Incorrect value of B at 1"); - mu_assert(fabs(B[2] - -0.1734297056577389) < 1e-14, - "Incorrect value of B at 2"); - mu_assert(fabs(B[3] - -0.6989941801726605) < 1e-14, - "Incorrect value of B at 3"); - mu_assert(fabs(B[4] - 1.2577948324106381) < 1e-14, - "Incorrect value of B at 4"); - mu_assert(fabs(B[5] - -1.4956913279293909) < 1e-14, - "Incorrect value of B at 5"); - mu_assert(fabs(B[6] - -0.2923881304345451) < 1e-14, - "Incorrect value of B at 6"); - mu_assert(fabs(B[7] - 0.3467010144627596) < 1e-14, - "Incorrect value of B at 7"); - mu_assert(fabs(B[8] - 0.4892730831569431) < 1e-14, - "Incorrect value of B at 8"); - mu_assert(fabs(B[9] - 0.2811039364176572) < 1e-14, - "Incorrect value of B at 9"); - mu_assert(fabs(B[10] - -0.7323586733947237) < 1e-14, - "Incorrect value of B at 10"); - mu_assert(fabs(B[11] - 0.0214996365534143) < 1e-14, - "Incorrect value of B at 11"); - mu_assert(fabs(B[12] - -0.9355264353773129) < 1e-14, - "Incorrect value of B at 12"); - mu_assert(fabs(B[13] - 0.2709743256273919) < 1e-14, - "Incorrect value of B at 13"); - mu_assert(fabs(B[14] - 0.2328234557913496) < 1e-14, - "Incorrect value of B at 14"); - mu_assert(fabs(B[15] - 0.9367092697976086) < 1e-14, - "Incorrect value of B at 15"); - mu_assert(fabs(B[16] - -0.4501075692310449) < 1e-14, - "Incorrect value of B at 16"); - mu_assert(fabs(B[17] - 0.0416902255163366) < 1e-14, - "Incorrect value of B at 17"); - mu_assert(fabs(B[18] - 2.2216982312706905) < 1e-14, - "Incorrect value of B at 18"); - mu_assert(fabs(B[19] - -0.4820931673893176) < 1e-14, - "Incorrect value of B at 19"); - mu_assert(fabs(B[20] - -0.8456462251088332) < 1e-14, - "Incorrect value of B at 20"); - mu_assert(fabs(B[21] - -0.3761761825939751) < 1e-14, - "Incorrect value of B at 21"); - mu_assert(fabs(B[22] - 1.1921920764994696) < 1e-14, - "Incorrect value of B at 22"); - mu_assert(fabs(B[23] - -0.6897255145640184) < 1e-14, - "Incorrect value of B at 23"); - mu_assert(fabs(B[24] - 2.0325624816957180) < 1e-14, - "Incorrect value of B at 24"); - mu_assert(fabs(B[25] - -0.9900930297944344) < 1e-14, - "Incorrect value of B at 25"); - mu_assert(fabs(B[26] - -0.0462533459389938) < 1e-14, - "Incorrect value of B at 26"); - mu_assert(fabs(B[27] - 0.0960916931433909) < 1e-14, - "Incorrect value of B at 27"); - mu_assert(fabs(B[28] - 0.5805302287627144) < 1e-14, - "Incorrect value of B at 28"); - mu_assert(fabs(B[29] - -1.0897953557455400) < 1e-14, - "Incorrect value of B at 29"); - - free(WORK); - free(IPIV); - - // end test code // - - free(A); - free(B); - - return NULL; -} - char *all_tests() { mu_suite_start(); mu_run_test(test_gensvm_calculate_errors); - mu_run_test(test_gensvm_calculate_omega); - mu_run_test(test_gensvm_majorize_is_simple); - mu_run_test(test_gensvm_calculate_ab_non_simple); - mu_run_test(test_gensvm_calculate_ab_simple); mu_run_test(test_gensvm_get_loss_1); mu_run_test(test_gensvm_get_loss_2); mu_run_test(test_gensvm_calculate_huber); mu_run_test(test_gensvm_step_doubling); - mu_run_test(test_dposv); - mu_run_test(test_dsysv); - - mu_run_test(test_gensvm_get_update); mu_run_test(test_gensvm_optimize); return NULL; diff --git a/tests/src/test_gensvm_sparse.c b/tests/src/test_gensvm_sparse.c new file mode 100644 index 0000000..27f3e20 --- /dev/null +++ b/tests/src/test_gensvm_sparse.c @@ -0,0 +1,128 @@ +/** + * @file test_gensvm_sparse.c + * @author Gertjan van den Burg + * @date May, 2016 + * @brief Unit tests for gensvm_sparse.c functions + */ + +#include "minunit.h" +#include "gensvm_sparse.h" + +char *test_init_free_sparse() +{ + struct GenSparse *sp = gensvm_init_sparse(); + + mu_assert(sp->nnz == 0, "nnz not initialized correctly"); + mu_assert(sp->n_row == 0, "n not initialized correctly"); + mu_assert(sp->n_col == 0, "m not initialized correctly"); + mu_assert(sp->values == NULL, "values not initialized correctly"); + mu_assert(sp->ia == NULL, "ia not initialized correctly"); + mu_assert(sp->ja == NULL, "ja not initialized correctly"); + + gensvm_free_sparse(sp); + + return NULL; +} + +char *test_count_nnz() +{ + double *A = Calloc(double, 3*2); + A[0] = 1.0; + A[3] = 1.0; + mu_assert(gensvm_count_nnz(A, 3, 2) == 2, "Incorrect nnz (1)"); + + A[1] = 3.0; + A[4] = 1e-20; + mu_assert(gensvm_count_nnz(A, 3, 2) == 4, "Incorrect nnz (2)"); + + free(A); + + return NULL; +} + +char *test_gensvm_could_sparse() +{ + double *A = Calloc(double, 5*2); + A[0] = 1.0; + + mu_assert(gensvm_could_sparse(A, 5, 2) == true, + "Incorrect could sparse (1)"); + A[1] = -1.0; + mu_assert(gensvm_could_sparse(A, 5, 2) == false, + "Incorrect could sparse (2)"); + + free(A); + return NULL; +} + +char *test_dense_to_sparse() +{ + double *A = Calloc(double, 4*4); + A[4] = 5; + A[5] = 8; + A[10] = 3; + A[13] = 6; + + struct GenSparse *sp = gensvm_dense_to_sparse(A, 4, 4); + mu_assert(sp->nnz == 4, "Incorrect nnz"); + mu_assert(sp->n_row == 4, "Incorrect n_row"); + mu_assert(sp->n_col == 4, "Incorrect n_col"); + + mu_assert(sp->values[0] == 5.0, "Incorrect value at 0"); + mu_assert(sp->values[1] == 8.0, "Incorrect value at 1"); + mu_assert(sp->values[2] == 3.0, "Incorrect value at 2"); + mu_assert(sp->values[3] == 6.0, "Incorrect value at 3"); + + mu_assert(sp->ia[0] == 0, "Incorrect ia at 0"); + mu_assert(sp->ia[1] == 0, "Incorrect ia at 1"); + mu_assert(sp->ia[2] == 2, "Incorrect ia at 2"); + mu_assert(sp->ia[3] == 3, "Incorrect ia at 3"); + mu_assert(sp->ia[4] == 4, "Incorrect ia at 4"); + + mu_assert(sp->ja[0] == 0, "Incorrect ja at 0"); + mu_assert(sp->ja[1] == 1, "Incorrect ja at 1"); + mu_assert(sp->ja[2] == 2, "Incorrect ja at 2"); + mu_assert(sp->ja[3] == 1, "Incorrect ja at 3"); + + gensvm_free_sparse(sp); + free(A); + + return NULL; +} + +char *test_sparse_to_dense() +{ + double *A = Calloc(double, 4*4); + A[4] = 5; + A[5] = 8; + A[10] = 3; + A[13] = 6; + + struct GenSparse *sp = gensvm_dense_to_sparse(A, 4, 4); + + double *B = gensvm_sparse_to_dense(sp); + int i; + for (i=0; i<4*4; i++) + mu_assert(B[i] == A[i], "Incorrect element of B"); + + gensvm_free_sparse(sp); + free(A); + free(B); + + return NULL; +} + +char *all_tests() +{ + mu_suite_start(); + + mu_run_test(test_init_free_sparse); + mu_run_test(test_count_nnz); + mu_run_test(test_gensvm_could_sparse); + mu_run_test(test_dense_to_sparse); + mu_run_test(test_sparse_to_dense); + + return NULL; +} + +RUN_TESTS(all_tests); diff --git a/tests/src/test_gensvm_update.c b/tests/src/test_gensvm_update.c new file mode 100644 index 0000000..81dcb37 --- /dev/null +++ b/tests/src/test_gensvm_update.c @@ -0,0 +1,841 @@ +/** + * @file test_gensvm_optimize.c + * @author Gertjan van den Burg + * @date September, 2016 + * @brief Unit tests for gensvm_optimize.c functions + */ + +#include "minunit.h" +#include "gensvm_optimize.h" +#include "gensvm_init.h" +#include "gensvm_simplex.h" + +char *test_gensvm_calculate_omega() +{ + struct GenModel *model = gensvm_init_model(); + struct GenData *data = gensvm_init_data(); + + int n = 5, + m = 3, + K = 3; + + data->n = n; + data->m = m; + data->K = K; + + data->y = Calloc(long, n); + data->y[0] = 2; + data->y[1] = 1; + data->y[2] = 3; + data->y[3] = 2; + data->y[4] = 3; + + model->n = n; + model->m = m; + model->K = K; + model->p = 1.213; + gensvm_allocate_model(model); + + matrix_set(model->H, model->K, 0, 0, 0.8465725800087526); + matrix_set(model->H, model->K, 0, 1, 1.2876921677680249); + matrix_set(model->H, model->K, 0, 2, 1.0338561593991831); + matrix_set(model->H, model->K, 1, 0, 1.1891038526621391); + matrix_set(model->H, model->K, 1, 1, 0.4034192031226095); + matrix_set(model->H, model->K, 1, 2, 1.5298894170910078); + matrix_set(model->H, model->K, 2, 0, 1.3505111116922732); + matrix_set(model->H, model->K, 2, 1, 1.4336863304586636); + matrix_set(model->H, model->K, 2, 2, 1.7847533480330757); + matrix_set(model->H, model->K, 3, 0, 1.7712504341475415); + matrix_set(model->H, model->K, 3, 1, 1.6905146737773038); + matrix_set(model->H, model->K, 3, 2, 0.8189336598535132); + matrix_set(model->H, model->K, 4, 0, 0.6164203008844277); + matrix_set(model->H, model->K, 4, 1, 0.2456444285093894); + matrix_set(model->H, model->K, 4, 2, 0.8184193969741095); + + // start test code // + mu_assert(fabs(gensvm_calculate_omega(model, data, 0) - + 0.7394076262220608) < 1e-14, + "Incorrect omega at 0"); + mu_assert(fabs(gensvm_calculate_omega(model, data, 1) - + 0.7294526264247443) < 1e-14, + "Incorrect omega at 1"); + mu_assert(fabs(gensvm_calculate_omega(model, data, 2) - + 0.6802499471888741) < 1e-14, + "Incorrect omega at 2"); + mu_assert(fabs(gensvm_calculate_omega(model, data, 3) - + 0.6886792032441273) < 1e-14, + "Incorrect omega at 3"); + mu_assert(fabs(gensvm_calculate_omega(model, data, 4) - + 0.8695329737474283) < 1e-14, + "Incorrect omega at 4"); + + // end test code // + + gensvm_free_model(model); + gensvm_free_data(data); + + return NULL; +} + +char *test_gensvm_majorize_is_simple() +{ + struct GenModel *model = gensvm_init_model(); + struct GenData *data = gensvm_init_data(); + + int n = 5, + m = 3, + K = 3; + + data->n = n; + data->m = m; + data->K = K; + + data->y = Calloc(long, n); + data->y[0] = 2; + data->y[1] = 1; + data->y[2] = 3; + data->y[3] = 2; + data->y[4] = 3; + + model->n = n; + model->m = m; + model->K = K; + model->p = 1.213; + gensvm_allocate_model(model); + + matrix_set(model->H, model->K, 0, 0, 0.8465725800087526); + matrix_set(model->H, model->K, 0, 1, 1.2876921677680249); + matrix_set(model->H, model->K, 0, 2, 1.0338561593991831); + matrix_set(model->H, model->K, 1, 0, 1.1891038526621391); + matrix_set(model->H, model->K, 1, 1, 0.4034192031226095); + matrix_set(model->H, model->K, 1, 2, 0.0); + matrix_set(model->H, model->K, 2, 0, 0.5); + matrix_set(model->H, model->K, 2, 1, 0.0); + matrix_set(model->H, model->K, 2, 2, 1.1); + matrix_set(model->H, model->K, 3, 0, 0.0); + matrix_set(model->H, model->K, 3, 1, 0.0); + matrix_set(model->H, model->K, 3, 2, 0.8189336598535132); + matrix_set(model->H, model->K, 4, 0, 0.6164203008844277); + matrix_set(model->H, model->K, 4, 1, 0.2456444285093894); + matrix_set(model->H, model->K, 4, 2, 0.8184193969741095); + + // start test code // + mu_assert(gensvm_majorize_is_simple(model, data, 0) == false, + "Incorrect simple at 0"); + mu_assert(gensvm_majorize_is_simple(model, data, 1) == true, + "Incorrect simple at 1"); + mu_assert(gensvm_majorize_is_simple(model, data, 2) == true, + "Incorrect simple at 2"); + mu_assert(gensvm_majorize_is_simple(model, data, 3) == true, + "Incorrect simple at 3"); + mu_assert(gensvm_majorize_is_simple(model, data, 4) == false, + "Incorrect simple at 4"); + + // end test code // + + gensvm_free_model(model); + gensvm_free_data(data); + return NULL; +} + +char *test_gensvm_calculate_ab_non_simple() +{ + struct GenModel *model = gensvm_init_model(); + int n = 1, + m = 1, + K = 1; + + model->n = n; + model->m = m; + model->K = K; + model->kappa = 0.5; + + gensvm_allocate_model(model); + + // start test code // + double a, b_aq; + + // tests with p = 2 + model->p = 2.0; + matrix_set(model->Q, K, 0, 0, -1.0); + gensvm_calculate_ab_non_simple(model, 0, 0, &a, &b_aq); + mu_assert(fabs(a - 1.5) < 1e-14, "Incorrect value for a (1)"); + mu_assert(fabs(b_aq - 1.25) < 1e-14, "Incorrect value for b (1)"); + + matrix_set(model->Q, K, 0, 0, 0.5); + gensvm_calculate_ab_non_simple(model, 0, 0, &a, &b_aq); + mu_assert(fabs(a - 1.5) < 1e-14, "Incorrect value for a (2)"); + mu_assert(fabs(b_aq - 0.0277777777777778) < 1e-14, + "Incorrect value for b (2)"); + + matrix_set(model->Q, K, 0, 0, 2.0); + gensvm_calculate_ab_non_simple(model, 0, 0, &a, &b_aq); + mu_assert(fabs(a - 1.5) < 1e-14, "Incorrect value for a (3)"); + mu_assert(fabs(b_aq - 0.0) < 1e-14, "Incorrect value for b (3)"); + + // tests with p != 2 (namely, 1.5) + // Note that here (p + kappa - 1)/(p - 2) = -2 + // + // We distinguish: q <= (p + kappa - 1)/(p - 2) + // q in (p + kappa - 1)/(p - 2), -kappa] + // q in (-kappa, 1] + // q > 1 + model->p = 1.5; + matrix_set(model->Q, K, 0, 0, -3.0); + gensvm_calculate_ab_non_simple(model, 0, 0, &a, &b_aq); + mu_assert(fabs(a - 0.312018860376691) < 1e-14, + "Incorrect value for a (4)"); + mu_assert(fabs(b_aq - 1.35208172829900) < 1e-14, + "Incorrect value for b (4)"); + + matrix_set(model->Q, K, 0, 0, -1.0); + gensvm_calculate_ab_non_simple(model, 0, 0, &a, &b_aq); + mu_assert(fabs(a - 0.866025403784439) < 1e-14, + "Incorrect value for a (5)"); + mu_assert(fabs(b_aq - 0.838525491562421) < 1e-14, + "Incorrect value for b (5)"); + + matrix_set(model->Q, K, 0, 0, 0.5); + gensvm_calculate_ab_non_simple(model, 0, 0, &a, &b_aq); + mu_assert(fabs(a - 0.866025403784439) < 1e-14, + "Incorrect value for a (6)"); + mu_assert(fabs(b_aq - 0.0721687836487032) < 1e-14, + "Incorrect value for b (6)"); + + matrix_set(model->Q, K, 0, 0, 2.0); + gensvm_calculate_ab_non_simple(model, 0, 0, &a, &b_aq); + mu_assert(fabs(a - 0.245495126515491) < 1e-14, + "Incorrect value for a (7)"); + mu_assert(fabs(b_aq - 0.0) < 1e-14, + "Incorrect value for b (7)"); + // end test code // + + gensvm_free_model(model); + + return NULL; +} + +char *test_gensvm_calculate_ab_simple() +{ + struct GenModel *model = gensvm_init_model(); + int n = 1, + m = 1, + K = 1; + + model->n = n; + model->m = m; + model->K = K; + model->kappa = 0.5; + + gensvm_allocate_model(model); + + // start test code // + double a, b_aq; + + matrix_set(model->Q, K, 0, 0, -1.5); + gensvm_calculate_ab_simple(model, 0, 0, &a, &b_aq); + mu_assert(fabs(a - 0.142857142857143) < 1e-14, + "Incorrect value for a (1)"); + mu_assert(fabs(b_aq - 0.5) < 1e-14, + "Incorrect value for b (1)"); + + matrix_set(model->Q, K, 0, 0, 0.75); + gensvm_calculate_ab_simple(model, 0, 0, &a, &b_aq); + mu_assert(fabs(a - 0.333333333333333) < 1e-14, + "Incorrect value for a (2)"); + mu_assert(fabs(b_aq - 0.0833333333333333) < 1e-14, + "Incorrect value for b (2)"); + + matrix_set(model->Q, K, 0, 0, 2.0); + gensvm_calculate_ab_simple(model, 0, 0, &a, &b_aq); + mu_assert(fabs(a - 0.142857142857143) < 1e-14, + "Incorrect value for a (3)"); + mu_assert(fabs(b_aq - 0.0) < 1e-14, + "Incorrect value for b (3)"); + + // end test code // + gensvm_free_model(model); + + return NULL; +} + +char *test_gensvm_get_update() +{ + struct GenModel *model = gensvm_init_model(); + struct GenData *data = gensvm_init_data(); + int n = 8, + m = 3, + K = 3; + + model->n = n; + model->m = m; + model->K = K; + struct GenWork *work = gensvm_init_work(model); + + // initialize data + data->n = n; + data->m = m; + data->K = K; + + data->y = Calloc(long, n); + data->y[0] = 2; + data->y[1] = 1; + data->y[2] = 3; + data->y[3] = 2; + data->y[4] = 3; + data->y[5] = 3; + data->y[6] = 1; + data->y[7] = 2; + + data->Z = Calloc(double, n*(m+1)); + matrix_set(data->Z, data->m+1, 0, 0, 1.0000000000000000); + matrix_set(data->Z, data->m+1, 0, 1, 0.6437306339619082); + matrix_set(data->Z, data->m+1, 0, 2, -0.3276778319121999); + matrix_set(data->Z, data->m+1, 0, 3, 0.1564053473463392); + matrix_set(data->Z, data->m+1, 1, 0, 1.0000000000000000); + matrix_set(data->Z, data->m+1, 1, 1, -0.8683091763200105); + matrix_set(data->Z, data->m+1, 1, 2, -0.6910830836015162); + matrix_set(data->Z, data->m+1, 1, 3, -0.9675430665130734); + matrix_set(data->Z, data->m+1, 2, 0, 1.0000000000000000); + matrix_set(data->Z, data->m+1, 2, 1, -0.5024888699077029); + matrix_set(data->Z, data->m+1, 2, 2, -0.9649738292750712); + matrix_set(data->Z, data->m+1, 2, 3, 0.0776560791351473); + matrix_set(data->Z, data->m+1, 3, 0, 1.0000000000000000); + matrix_set(data->Z, data->m+1, 3, 1, 0.8206429991392579); + matrix_set(data->Z, data->m+1, 3, 2, -0.7255681388968501); + matrix_set(data->Z, data->m+1, 3, 3, -0.9475952272877165); + matrix_set(data->Z, data->m+1, 4, 0, 1.0000000000000000); + matrix_set(data->Z, data->m+1, 4, 1, 0.3426050950418613); + matrix_set(data->Z, data->m+1, 4, 2, -0.5340602451864306); + matrix_set(data->Z, data->m+1, 4, 3, -0.7159704241662815); + matrix_set(data->Z, data->m+1, 5, 0, 1.0000000000000000); + matrix_set(data->Z, data->m+1, 5, 1, -0.3077314049206620); + matrix_set(data->Z, data->m+1, 5, 2, 0.1141288036288195); + matrix_set(data->Z, data->m+1, 5, 3, -0.7060114827535847); + matrix_set(data->Z, data->m+1, 6, 0, 1.0000000000000000); + matrix_set(data->Z, data->m+1, 6, 1, 0.6301294373610109); + matrix_set(data->Z, data->m+1, 6, 2, -0.9983027363627769); + matrix_set(data->Z, data->m+1, 6, 3, -0.9365684178444004); + matrix_set(data->Z, data->m+1, 7, 0, 1.0000000000000000); + matrix_set(data->Z, data->m+1, 7, 1, -0.0665379368401439); + matrix_set(data->Z, data->m+1, 7, 2, -0.1781385556871763); + matrix_set(data->Z, data->m+1, 7, 3, -0.7292593770500276); + + // initialize model + model->p = 1.1; + model->lambda = 0.123; + model->weight_idx = 1; + model->kappa = 0.5; + + // initialize matrices + gensvm_allocate_model(model); + gensvm_initialize_weights(data, model); + gensvm_simplex(model); + gensvm_simplex_diff(model); + + // initialize V + matrix_set(model->V, model->K-1, 0, 0, -0.7593642121025029); + matrix_set(model->V, model->K-1, 0, 1, -0.5497320698504756); + matrix_set(model->V, model->K-1, 1, 0, 0.2982680646268177); + matrix_set(model->V, model->K-1, 1, 1, -0.2491408622891925); + matrix_set(model->V, model->K-1, 2, 0, -0.3118572761092807); + matrix_set(model->V, model->K-1, 2, 1, 0.5461219445756100); + matrix_set(model->V, model->K-1, 3, 0, -0.3198994238626641); + matrix_set(model->V, model->K-1, 3, 1, 0.7134997072555367); + + // start test code // + + // these need to be prepared for the update call + gensvm_calculate_errors(model, data, work->ZV); + gensvm_calculate_huber(model); + + // run the actual update call + gensvm_get_update(model, data, work); + + // test values + mu_assert(fabs(matrix_get(model->V, model->K-1, 0, 0) - + -0.1323791019594062) < 1e-14, + "Incorrect value of model->V at 0, 0"); + mu_assert(fabs(matrix_get(model->V, model->K-1, 0, 1) - + -0.3598407983154332) < 1e-14, + "Incorrect value of model->V at 0, 1"); + mu_assert(fabs(matrix_get(model->V, model->K-1, 1, 0) - + 0.3532993103400935) < 1e-14, + "Incorrect value of model->V at 1, 0"); + mu_assert(fabs(matrix_get(model->V, model->K-1, 1, 1) - + -0.4094572388475382) < 1e-14, + "Incorrect value of model->V at 1, 1"); + mu_assert(fabs(matrix_get(model->V, model->K-1, 2, 0) - + 0.1313169839871234) < 1e-14, + "Incorrect value of model->V at 2, 0"); + mu_assert(fabs(matrix_get(model->V, model->K-1, 2, 1) - + 0.2423439972728328) < 1e-14, + "Incorrect value of model->V at 2, 1"); + mu_assert(fabs(matrix_get(model->V, model->K-1, 3, 0) - + 0.0458431025455224) < 1e-14, + "Incorrect value of model->V at 3, 0"); + mu_assert(fabs(matrix_get(model->V, model->K-1, 3, 1) - + 0.4390030236354089) < 1e-14, + "Incorrect value of model->V at 3, 1"); + // end test code // + + gensvm_free_model(model); + gensvm_free_data(data); + gensvm_free_work(work); + + return NULL; +} + +char *test_gensvm_get_update_sparse() +{ + struct GenModel *model = gensvm_init_model(); + struct GenData *data = gensvm_init_data(); + int n = 8, + m = 3, + K = 3; + + model->n = n; + model->m = m; + model->K = K; + struct GenWork *work = gensvm_init_work(model); + + // initialize data + data->n = n; + data->m = m; + data->K = K; + + data->y = Calloc(long, n); + data->y[0] = 2; + data->y[1] = 1; + data->y[2] = 3; + data->y[3] = 2; + data->y[4] = 3; + data->y[5] = 3; + data->y[6] = 1; + data->y[7] = 2; + + data->Z = Calloc(double, n*(m+1)); + matrix_set(data->Z, data->m+1, 0, 0, 1.0000000000000000); + matrix_set(data->Z, data->m+1, 0, 1, 0.6437306339619082); + matrix_set(data->Z, data->m+1, 0, 2, -0.3276778319121999); + matrix_set(data->Z, data->m+1, 0, 3, 0.1564053473463392); + matrix_set(data->Z, data->m+1, 1, 0, 1.0000000000000000); + matrix_set(data->Z, data->m+1, 1, 1, -0.8683091763200105); + matrix_set(data->Z, data->m+1, 1, 2, -0.6910830836015162); + matrix_set(data->Z, data->m+1, 1, 3, -0.9675430665130734); + matrix_set(data->Z, data->m+1, 2, 0, 1.0000000000000000); + matrix_set(data->Z, data->m+1, 2, 1, -0.5024888699077029); + matrix_set(data->Z, data->m+1, 2, 2, -0.9649738292750712); + matrix_set(data->Z, data->m+1, 2, 3, 0.0776560791351473); + matrix_set(data->Z, data->m+1, 3, 0, 1.0000000000000000); + matrix_set(data->Z, data->m+1, 3, 1, 0.8206429991392579); + matrix_set(data->Z, data->m+1, 3, 2, -0.7255681388968501); + matrix_set(data->Z, data->m+1, 3, 3, -0.9475952272877165); + matrix_set(data->Z, data->m+1, 4, 0, 1.0000000000000000); + matrix_set(data->Z, data->m+1, 4, 1, 0.3426050950418613); + matrix_set(data->Z, data->m+1, 4, 2, -0.5340602451864306); + matrix_set(data->Z, data->m+1, 4, 3, -0.7159704241662815); + matrix_set(data->Z, data->m+1, 5, 0, 1.0000000000000000); + matrix_set(data->Z, data->m+1, 5, 1, -0.3077314049206620); + matrix_set(data->Z, data->m+1, 5, 2, 0.1141288036288195); + matrix_set(data->Z, data->m+1, 5, 3, -0.7060114827535847); + matrix_set(data->Z, data->m+1, 6, 0, 1.0000000000000000); + matrix_set(data->Z, data->m+1, 6, 1, 0.6301294373610109); + matrix_set(data->Z, data->m+1, 6, 2, -0.9983027363627769); + matrix_set(data->Z, data->m+1, 6, 3, -0.9365684178444004); + matrix_set(data->Z, data->m+1, 7, 0, 1.0000000000000000); + matrix_set(data->Z, data->m+1, 7, 1, -0.0665379368401439); + matrix_set(data->Z, data->m+1, 7, 2, -0.1781385556871763); + matrix_set(data->Z, data->m+1, 7, 3, -0.7292593770500276); + + // convert Z to a sparse matrix to test the sparse functions + data->spZ = gensvm_dense_to_sparse(data->Z, data->n, data->m+1); + free(data->RAW); + data->RAW = NULL; + free(data->Z); + data->Z = NULL; + + // initialize model + model->p = 1.1; + model->lambda = 0.123; + model->weight_idx = 1; + model->kappa = 0.5; + + // initialize matrices + gensvm_allocate_model(model); + gensvm_initialize_weights(data, model); + gensvm_simplex(model); + gensvm_simplex_diff(model); + + // initialize V + matrix_set(model->V, model->K-1, 0, 0, -0.7593642121025029); + matrix_set(model->V, model->K-1, 0, 1, -0.5497320698504756); + matrix_set(model->V, model->K-1, 1, 0, 0.2982680646268177); + matrix_set(model->V, model->K-1, 1, 1, -0.2491408622891925); + matrix_set(model->V, model->K-1, 2, 0, -0.3118572761092807); + matrix_set(model->V, model->K-1, 2, 1, 0.5461219445756100); + matrix_set(model->V, model->K-1, 3, 0, -0.3198994238626641); + matrix_set(model->V, model->K-1, 3, 1, 0.7134997072555367); + + // start test code // + + // these need to be prepared for the update call + gensvm_calculate_errors(model, data, work->ZV); + gensvm_calculate_huber(model); + + // run the actual update call + gensvm_get_update(model, data, work); + + // test values + mu_assert(fabs(matrix_get(model->V, model->K-1, 0, 0) - + -0.1323791019594062) < 1e-14, + "Incorrect value of model->V at 0, 0"); + mu_assert(fabs(matrix_get(model->V, model->K-1, 0, 1) - + -0.3598407983154332) < 1e-14, + "Incorrect value of model->V at 0, 1"); + mu_assert(fabs(matrix_get(model->V, model->K-1, 1, 0) - + 0.3532993103400935) < 1e-14, + "Incorrect value of model->V at 1, 0"); + mu_assert(fabs(matrix_get(model->V, model->K-1, 1, 1) - + -0.4094572388475382) < 1e-14, + "Incorrect value of model->V at 1, 1"); + mu_assert(fabs(matrix_get(model->V, model->K-1, 2, 0) - + 0.1313169839871234) < 1e-14, + "Incorrect value of model->V at 2, 0"); + mu_assert(fabs(matrix_get(model->V, model->K-1, 2, 1) - + 0.2423439972728328) < 1e-14, + "Incorrect value of model->V at 2, 1"); + mu_assert(fabs(matrix_get(model->V, model->K-1, 3, 0) - + 0.0458431025455224) < 1e-14, + "Incorrect value of model->V at 3, 0"); + mu_assert(fabs(matrix_get(model->V, model->K-1, 3, 1) - + 0.4390030236354089) < 1e-14, + "Incorrect value of model->V at 3, 1"); + // end test code // + + gensvm_free_model(model); + gensvm_free_data(data); + gensvm_free_work(work); + + return NULL; +} + + +char *test_dposv() +{ + int n = 6, + m = 5; + + // start test code // + double *A = Calloc(double, n*n); + double *B = Calloc(double, n*m); + + // We're only storing the upper triangular part of the symmetric + // matrix A. + matrix_set(A, n, 0, 0, 6.2522023496540386); + matrix_set(A, n, 0, 1, 1.2760969977888754); + matrix_set(A, n, 0, 2, 1.1267774552193974); + matrix_set(A, n, 0, 3, 0.8384267227932789); + matrix_set(A, n, 0, 4, 0.9588857509656767); + matrix_set(A, n, 0, 5, 0.7965747978871199); + matrix_set(A, n, 1, 1, 6.7539376310748924); + matrix_set(A, n, 1, 2, 0.5908599276261999); + matrix_set(A, n, 1, 3, 0.9987368128192129); + matrix_set(A, n, 1, 4, 1.1142949385131484); + matrix_set(A, n, 1, 5, 1.4150107613377123); + matrix_set(A, n, 2, 2, 7.3361678639533139); + matrix_set(A, n, 2, 3, 1.5596679563906113); + matrix_set(A, n, 2, 4, 0.8162441257417704); + matrix_set(A, n, 2, 5, 0.8701893160678078); + matrix_set(A, n, 3, 3, 6.8330423955320834); + matrix_set(A, n, 3, 4, 1.6081779105091201); + matrix_set(A, n, 3, 5, 1.0498769837396527); + matrix_set(A, n, 4, 4, 7.6810607313742949); + matrix_set(A, n, 4, 5, 1.1352511350739003); + matrix_set(A, n, 5, 5, 7.0573435553104567); + + // this is the matrix B (n x m), stored in COLUMN-MAJOR ORDER + B[0] = 0.5759809004700531; + B[1] = 0.4643751885289473; + B[2] = 0.1914807543974765; + B[3] = 0.2875503245961965; + B[4] = 0.0493123646253395; + B[5] = 0.4333053066976881; + B[6] = 0.4738306027724854; + B[7] = 0.2460182087225041; + B[8] = 0.1620492662433550; + B[9] = 0.9596380576403235; + B[10] = 0.7244837218691289; + B[11] = 0.9437116578537014; + B[12] = 0.3320986772155025; + B[13] = 0.4717424581951766; + B[14] = 0.9206089360217588; + B[15] = 0.7059004575000609; + B[16] = 0.1696670763906902; + B[17] = 0.4896586269167711; + B[18] = 0.9539497766794410; + B[19] = 0.2269749103273779; + B[20] = 0.8832156948007016; + B[21] = 0.4682217970327739; + B[22] = 0.5293439096127632; + B[23] = 0.8699136677253214; + B[24] = 0.1622687366790325; + B[25] = 0.4511598310105013; + B[26] = 0.5587302139109592; + B[27] = 0.7456952498557438; + B[28] = 0.5923112589693547; + B[29] = 0.2243481938151050; + + // note the 'L' here denotes the lower triangular part of A. Above we + // stored the upper triangular part of A in row-major order, so that's + // the lower triangular part in column-major order, which Lapack uses. + int status = dposv('L', n, m, A, n, B, n); + mu_assert(status == 0, "dposv didn't return status success"); + + // Since B now contains the solution in Column-Major order, we have to + // check it in that order. + + mu_assert(fabs(B[0] - 0.0770250502756885) < 1e-14, + "Incorrect value of B at 0"); + mu_assert(fabs(B[1] - 0.0449013611583528) < 1e-14, + "Incorrect value of B at 1"); + mu_assert(fabs(B[2] - 0.0028421256926631) < 1e-14, + "Incorrect value of B at 2"); + mu_assert(fabs(B[3] - 0.0238082780914757) < 1e-14, + "Incorrect value of B at 3"); + mu_assert(fabs(B[4] - -0.0213884392480803) < 1e-14, + "Incorrect value of B at 4"); + mu_assert(fabs(B[5] - 0.0432493445363141) < 1e-14, + "Incorrect value of B at 5"); + mu_assert(fabs(B[6] - 0.0469188408250497) < 1e-14, + "Incorrect value of B at 6"); + mu_assert(fabs(B[7] - -0.0188676544565197) < 1e-14, + "Incorrect value of B at 7"); + mu_assert(fabs(B[8] - -0.0268693544126544) < 1e-14, + "Incorrect value of B at 8"); + mu_assert(fabs(B[9] - 0.1139942447258460) < 1e-14, + "Incorrect value of B at 9"); + mu_assert(fabs(B[10] - 0.0539483576192093) < 1e-14, + "Incorrect value of B at 10"); + mu_assert(fabs(B[11] - 0.1098843745987866) < 1e-14, + "Incorrect value of B at 11"); + mu_assert(fabs(B[12] - 0.0142822905211067) < 1e-14, + "Incorrect value of B at 12"); + mu_assert(fabs(B[13] - 0.0425078586146396) < 1e-14, + "Incorrect value of B at 13"); + mu_assert(fabs(B[14] - 0.1022650353097420) < 1e-14, + "Incorrect value of B at 14"); + mu_assert(fabs(B[15] - 0.0700476338859921) < 1e-14, + "Incorrect value of B at 15"); + mu_assert(fabs(B[16] - -0.0171546096353451) < 1e-14, + "Incorrect value of B at 16"); + mu_assert(fabs(B[17] - 0.0389772844468578) < 1e-14, + "Incorrect value of B at 17"); + mu_assert(fabs(B[18] - 0.1231757430810565) < 1e-14, + "Incorrect value of B at 18"); + mu_assert(fabs(B[19] - -0.0246954324681607) < 1e-14, + "Incorrect value of B at 19"); + mu_assert(fabs(B[20] - 0.0853098528328778) < 1e-14, + "Incorrect value of B at 20"); + mu_assert(fabs(B[21] - 0.0155226252622933) < 1e-14, + "Incorrect value of B at 21"); + mu_assert(fabs(B[22] - 0.0305321945431931) < 1e-14, + "Incorrect value of B at 22"); + mu_assert(fabs(B[23] - 0.0965724559730953) < 1e-14, + "Incorrect value of B at 23"); + mu_assert(fabs(B[24] - -0.0106596940426243) < 1e-14, + "Incorrect value of B at 24"); + mu_assert(fabs(B[25] - 0.0446093337039209) < 1e-14, + "Incorrect value of B at 25"); + mu_assert(fabs(B[26] - 0.0517721408799503) < 1e-14, + "Incorrect value of B at 26"); + mu_assert(fabs(B[27] - 0.0807167333268751) < 1e-14, + "Incorrect value of B at 27"); + mu_assert(fabs(B[28] - 0.0499219869343351) < 1e-14, + "Incorrect value of B at 28"); + mu_assert(fabs(B[29] - -0.0023736192508975) < 1e-14, + "Incorrect value of B at 29"); + + // end test code // + + free(A); + free(B); + + return NULL; +} + +char *test_dsysv() +{ + int n = 6, + m = 5; + + // start test code // + double *A = Calloc(double, n*n); + double *B = Calloc(double, n*m); + + // only store the upper triangular part of A + matrix_set(A, n, 0, 0, 0.4543421836368821); + matrix_set(A, n, 0, 1, 0.8708338836669620); + matrix_set(A, n, 0, 2, 1.3638340495356920); + matrix_set(A, n, 0, 3, 0.8361050302144852); + matrix_set(A, n, 0, 4, 1.3203463886997013); + matrix_set(A, n, 0, 5, 0.3915472119381547); + matrix_set(A, n, 1, 1, 1.4781728513484600); + matrix_set(A, n, 1, 2, 1.7275151336935415); + matrix_set(A, n, 1, 3, 1.1817139356024176); + matrix_set(A, n, 1, 4, 0.7436086782250922); + matrix_set(A, n, 1, 5, 0.1101758222549450); + matrix_set(A, n, 2, 2, 1.9363682709237851); + matrix_set(A, n, 2, 3, 1.1255164391384127); + matrix_set(A, n, 2, 4, 1.0935575148560115); + matrix_set(A, n, 2, 5, 1.4678279983625921); + matrix_set(A, n, 3, 3, 1.7500757162326757); + matrix_set(A, n, 3, 4, 1.5490921663229316); + matrix_set(A, n, 3, 5, 1.0305675837706338); + matrix_set(A, n, 4, 4, 0.4015851628106807); + matrix_set(A, n, 4, 5, 1.2487496402900566); + matrix_set(A, n, 5, 5, 0.7245473723012897); + + // Store B in column-major order! + B[0] = 0.6037912122210694; + B[1] = 0.5464186020522516; + B[2] = 0.1810847918541411; + B[3] = 0.1418268895582175; + B[4] = 0.5459836535934901; + B[5] = 0.5890609930309275; + B[6] = 0.1128454279279324; + B[7] = 0.8930541056550655; + B[8] = 0.6946437745982983; + B[9] = 0.0955380494302154; + B[10] = 0.5750037200376288; + B[11] = 0.0326245221201559; + B[12] = 0.3336701777312929; + B[13] = 0.7648765739095678; + B[14] = 0.2662986413718805; + B[15] = 0.7850810368985298; + B[16] = 0.5432388739552745; + B[17] = 0.4387739258059151; + B[18] = 0.4257906469646436; + B[19] = 0.1272470768775465; + B[20] = 0.4276616397814972; + B[21] = 0.8137579718316245; + B[22] = 0.6849003723960281; + B[23] = 0.1768571691078990; + B[24] = 0.4183278358395650; + B[25] = 0.6517633972400351; + B[26] = 0.1154775550239331; + B[27] = 0.4217248849174023; + B[28] = 0.9179697263236190; + B[29] = 0.6532254399609347; + + // run dsysv, note that Lapack expects matrices to be in column-major + // order, so we can use the 'L' notation for the triangle of A, since + // we've stored the upper triangle in Row-Major order. + + int *IPIV = Calloc(int, n); + double *WORK = Calloc(double, 1); + int status; + + // first we determine the necessary size of the WORK array + status = dsysv('L', n, m, A, n, IPIV, B, n, WORK, -1); + mu_assert(status == 0, "dsysv workspace query failed"); + + int LWORK = WORK[0]; + WORK = Realloc(WORK, double, LWORK); + status = dsysv('L', n, m, A, n, IPIV, B, n, WORK, LWORK); + mu_assert(status == 0, "dsysv didn't return status success"); + + // Since B now contains the solution in Column-Major order, we have to + // check it in that order + + mu_assert(fabs(B[0] - 3.0915448286548806) < 1e-14, + "Incorrect value of B at 0"); + mu_assert(fabs(B[1] - -1.2114333666218096) < 1e-14, + "Incorrect value of B at 1"); + mu_assert(fabs(B[2] - -0.1734297056577389) < 1e-14, + "Incorrect value of B at 2"); + mu_assert(fabs(B[3] - -0.6989941801726605) < 1e-14, + "Incorrect value of B at 3"); + mu_assert(fabs(B[4] - 1.2577948324106381) < 1e-14, + "Incorrect value of B at 4"); + mu_assert(fabs(B[5] - -1.4956913279293909) < 1e-14, + "Incorrect value of B at 5"); + mu_assert(fabs(B[6] - -0.2923881304345451) < 1e-14, + "Incorrect value of B at 6"); + mu_assert(fabs(B[7] - 0.3467010144627596) < 1e-14, + "Incorrect value of B at 7"); + mu_assert(fabs(B[8] - 0.4892730831569431) < 1e-14, + "Incorrect value of B at 8"); + mu_assert(fabs(B[9] - 0.2811039364176572) < 1e-14, + "Incorrect value of B at 9"); + mu_assert(fabs(B[10] - -0.7323586733947237) < 1e-14, + "Incorrect value of B at 10"); + mu_assert(fabs(B[11] - 0.0214996365534143) < 1e-14, + "Incorrect value of B at 11"); + mu_assert(fabs(B[12] - -0.9355264353773129) < 1e-14, + "Incorrect value of B at 12"); + mu_assert(fabs(B[13] - 0.2709743256273919) < 1e-14, + "Incorrect value of B at 13"); + mu_assert(fabs(B[14] - 0.2328234557913496) < 1e-14, + "Incorrect value of B at 14"); + mu_assert(fabs(B[15] - 0.9367092697976086) < 1e-14, + "Incorrect value of B at 15"); + mu_assert(fabs(B[16] - -0.4501075692310449) < 1e-14, + "Incorrect value of B at 16"); + mu_assert(fabs(B[17] - 0.0416902255163366) < 1e-14, + "Incorrect value of B at 17"); + mu_assert(fabs(B[18] - 2.2216982312706905) < 1e-14, + "Incorrect value of B at 18"); + mu_assert(fabs(B[19] - -0.4820931673893176) < 1e-14, + "Incorrect value of B at 19"); + mu_assert(fabs(B[20] - -0.8456462251088332) < 1e-14, + "Incorrect value of B at 20"); + mu_assert(fabs(B[21] - -0.3761761825939751) < 1e-14, + "Incorrect value of B at 21"); + mu_assert(fabs(B[22] - 1.1921920764994696) < 1e-14, + "Incorrect value of B at 22"); + mu_assert(fabs(B[23] - -0.6897255145640184) < 1e-14, + "Incorrect value of B at 23"); + mu_assert(fabs(B[24] - 2.0325624816957180) < 1e-14, + "Incorrect value of B at 24"); + mu_assert(fabs(B[25] - -0.9900930297944344) < 1e-14, + "Incorrect value of B at 25"); + mu_assert(fabs(B[26] - -0.0462533459389938) < 1e-14, + "Incorrect value of B at 26"); + mu_assert(fabs(B[27] - 0.0960916931433909) < 1e-14, + "Incorrect value of B at 27"); + mu_assert(fabs(B[28] - 0.5805302287627144) < 1e-14, + "Incorrect value of B at 28"); + mu_assert(fabs(B[29] - -1.0897953557455400) < 1e-14, + "Incorrect value of B at 29"); + + free(WORK); + free(IPIV); + + // end test code // + + free(A); + free(B); + + return NULL; +} + +char *all_tests() +{ + mu_suite_start(); + + mu_run_test(test_gensvm_calculate_omega); + mu_run_test(test_gensvm_majorize_is_simple); + mu_run_test(test_gensvm_calculate_ab_non_simple); + mu_run_test(test_gensvm_calculate_ab_simple); + + mu_run_test(test_dposv); + mu_run_test(test_dsysv); + + mu_run_test(test_gensvm_get_update); + mu_run_test(test_gensvm_get_update_sparse); + + return NULL; +} + +RUN_TESTS(all_tests); |
