/** * @file gensvm_train.c * @author Gertjan van den Burg * @date August 9, 2013 * @brief Main functions for training the GenSVM solution. * * @details * Contains update and loss functions used to actually find * the optimal V. * */ #include #include #include "libGenSVM.h" #include "gensvm.h" #include "gensvm_lapack.h" #include "gensvm_matrix.h" #include "gensvm_sv.h" #include "gensvm_train.h" #include "util.h" /** * Maximum number of iterations of the algorithm. */ #define MAX_ITER 1000000000 /** * @brief The main training loop for GenSVM * * @details * This function is the main training function. This function * handles the optimization of the model with the given model parameters, with * the data given. On return the matrix GenModel::V contains the optimal * weight matrix. * * In this function, step doubling is used in the majorization algorithm after * a burn-in of 50 iterations. If the training is finished, GenModel::t and * GenModel::W are extracted from GenModel::V. * * @param[in,out] model the GenModel to be trained. Contains optimal * V on exit. * @param[in] data the GenData to train the model with. */ void gensvm_optimize(struct GenModel *model, struct GenData *data) { long i, j, it = 0; double L, Lbar, value; long n = model->n; long m = model->m; long K = model->K; double *B = Calloc(double, n*(K-1)); double *ZV = Calloc(double, n*(K-1)); double *ZAZ = Calloc(double, (m+1)*(m+1)); double *ZAZV = Calloc(double, (m+1)*(K-1)); double *ZAZVT = Calloc(double, (m+1)*(K-1)); note("Starting main loop.\n"); note("Dataset:\n"); note("\tn = %i\n", n); note("\tm = %i\n", m); note("\tK = %i\n", K); note("Parameters:\n"); note("\tkappa = %f\n", model->kappa); note("\tp = %f\n", model->p); note("\tlambda = %15.16f\n", model->lambda); note("\tepsilon = %g\n", model->epsilon); note("\n"); gensvm_simplex_gen(model->K, model->U); gensvm_simplex_diff(model, data); gensvm_category_matrix(model, data); L = gensvm_get_loss(model, data, ZV); Lbar = L + 2.0*model->epsilon*L; while ((it < MAX_ITER) && (Lbar - L)/L > model->epsilon) { // ensure V contains newest V and Vbar contains V from // previous gensvm_get_update(model, data, B, ZAZ, ZAZV, ZAZVT); if (it > 50) gensvm_step_doubling(model); Lbar = L; L = gensvm_get_loss(model, data, ZV); if (it%100 == 0) note("iter = %li, L = %15.16f, Lbar = %15.16f, " "reldiff = %15.16f\n", it, L, Lbar, (Lbar - L)/L); it++; } if (L > Lbar) fprintf(stderr, "[WARNING]: Negative step occurred in " "majorization.\n"); if (it >= MAX_ITER) fprintf(stderr, "[WARNING]: maximum number of iterations " "reached.\n"); note("Optimization finished, iter = %li, loss = %15.16f, " "rel. diff. = %15.16f\n", it-1, L, (Lbar - L)/L); note("Number of support vectors: %li\n", gensvm_num_sv(model, data)); model->training_error = (Lbar - L)/L; for (i=0; it[i] = matrix_get(model->V, K-1, 0, i); for (i=1; iV, K-1, i, j); matrix_set(model->W, K-1, i-1, j, value); } } free(B); free(ZV); free(ZAZ); free(ZAZV); free(ZAZVT); } /** * @brief Calculate the current value of the loss function * * @details * The current loss function value is calculated based on the matrix V in the * given model. Note that the matrix ZV is passed explicitly to avoid having * to reallocate memory at every step. * * @param[in] model GenModel structure which holds the current * estimate V * @param[in] data GenData structure * @param[in,out] ZV pre-allocated matrix ZV which is updated on * output * @returns the current value of the loss function */ double gensvm_get_loss(struct GenModel *model, struct GenData *data, double *ZV) { long i, j; long n = model->n; long K = model->K; long m = model->m; double value, rowvalue, loss = 0.0; gensvm_calculate_errors(model, data, ZV); gensvm_calculate_huber(model); for (i=0; iH, K, i, j); value = pow(value, model->p); value *= matrix_get(model->R, K, i, j); rowvalue += value; } rowvalue = pow(rowvalue, 1.0/(model->p)); rowvalue *= model->rho[i]; loss += rowvalue; } loss /= ((double) n); value = 0; for (i=1; iV, K-1, i, j), 2.0); } } loss += model->lambda * value; return loss; } /** * @brief Perform a single step of the majorization algorithm to update V * * @details * This function contains the main update calculations of the algorithm. These * calculations are necessary to find a new update V. The calculations exist of * recalculating the majorization coefficients for all instances and all * classes, and solving a linear system to find V. * * Because the function gensvm_get_update() is always called after a call to * gensvm_get_loss() with the same GenModel::V, it is unnecessary to calculate * the updated errors GenModel::Q and GenModel::H here too. This saves on * computation time. * * In calculating the majorization coefficients we calculate the elements of a * diagonal matrix A with elements * @f[ * A_{i, i} = \frac{1}{n} \rho_i \sum_{j \neq k} \left[ * \varepsilon_i a_{ijk}^{(p)} + (1 - \varepsilon_i) \omega_i * a_{ijk}^{(p)} \right], * @f] * where @f$ k = y_i @f$. * Since this matrix is only used to calculate the matrix @f$ Z' A Z @f$, it is * efficient to update a matrix ZAZ through consecutive rank 1 updates with * a single element of A and the corresponding row of Z. The BLAS function * dsyr is used for this. * * The B matrix is has rows * @f[ * \boldsymbol{\beta}_i' = \frac{1}{n} \rho_i \sum_{j \neq k} \left[ * \varepsilon_i \left( b_{ijk}^{(1)} - a_{ijk}^{(1)} * \overline{q}_i^{(kj)} \right) + (1 - \varepsilon_i) * \omega_i \left( b_{ijk}^{(p)} - a_{ijk}^{(p)} * \overline{q}_i^{(kj)} \right) \right] * \boldsymbol{\delta}_{kj}' * @f] * This is also split into two cases, one for which @f$ \varepsilon_i = 1 @f$, * and one for when it is 0. The 3D simplex difference matrix is used here, in * the form of the @f$ \boldsymbol{\delta}_{kj}' @f$. * * Finally, the following system is solved * @f[ * (\textbf{Z}'\textbf{AZ} + \lambda \textbf{J})\textbf{V} = * (\textbf{Z}'\textbf{AZ}\overline{\textbf{V}} + \textbf{Z}' * \textbf{B}) * @f] * solving this system is done through dposv(). * * @todo * Consider allocating IPIV and WORK at a higher level, they probably don't * change much during the iterations. * * @param [in,out] model model to be updated * @param [in] data data used in model * @param [in] B pre-allocated matrix used for linear coefficients * @param [in] ZAZ pre-allocated matrix used in system * @param [in] ZAZV pre-allocated matrix used in system solving * @param [in] ZAZVT pre-allocated matrix used in system solving */ void gensvm_get_update(struct GenModel *model, struct GenData *data, double *B, double *ZAZ, double *ZAZV, double *ZAZVT) { int status, class; long i, j, k; double Avalue, Bvalue; double omega, value, a, b, q, h, r; long n = model->n; long m = model->m; long K = model->K; double kappa = model->kappa; double p = model->p; double *rho = model->rho; // constants which are used often throughout const double a2g2 = 0.25*p*(2.0*p - 1.0)*pow((kappa+1.0)/2.0,p-2.0); const double in = 1.0/((double) n); // clear matrices Memset(B, double, n*(K-1)); Memset(ZAZ, double, (m+1)*(m+1)); b = 0; for (i=0; iH, K, i, j); r = matrix_get(model->R, K, i, j); value += (h*r > 0) ? 1 : 0; omega += pow(h, p)*r; } class = (value <= 1.0) ? 1 : 0; omega = (1.0/p)*pow(omega, 1.0/p - 1.0); Avalue = 0; if (class == 1) { for (j=0; jQ, K, i, j); if (q <= -kappa) { a = 0.25/(0.5 - kappa/2.0 - q); b = 0.5; } else if (q <= 1.0) { a = 1.0/(2.0*kappa + 2.0); b = (1.0 - q)*a; } else { a = -0.25/(0.5 - kappa/2.0 - q); b = 0; } for (k=0; kUU, K-1, K, i, k, j); matrix_add(B, K-1, i, k, Bvalue); } Avalue += a*matrix_get(model->R, K, i, j); } } else { if (2.0 - p < 0.0001) { for (j=0; jQ, K, i, j); if (q <= -kappa) { b = 0.5 - kappa/2.0 - q; } else if ( q <= 1.0) { b = pow(1.0 - q, 3.0)/( 2.0*pow(kappa + 1.0, 2.0)); } else { b = 0; } for (k=0; kUU, K-1, K, i, k, j); matrix_add( B, K-1, i, k, Bvalue); } } Avalue = 1.5*(K - 1.0); } else { for (j=0; jQ, K, i, j); if (q <= (p + kappa - 1.0)/(p - 2.0)) { a = 0.25*pow(p, 2.0)*pow( 0.5 - kappa/2.0 - q, p - 2.0); } else if (q <= 1.0) { a = a2g2; } else { a = 0.25*pow(p, 2.0)*pow( (p/(p - 2.0))* (0.5 - kappa/2.0 - q), p - 2.0); b = a*(2.0*q + kappa - 1.0)/ (p - 2.0) + 0.5*p*pow( p/(p - 2.0)* (0.5 - kappa/ 2.0 - q), p - 1.0); } if (q <= -kappa) { b = 0.5*p*pow( 0.5 - kappa/2.0 - q, p - 1.0); } else if ( q <= 1.0) { b = p*pow(1.0 - q, 2.0*p - 1.0)/ pow(2*kappa+2.0, p); } for (k=0; kUU, K-1, K, i, k, j); matrix_add( B, K-1, i, k, Bvalue); } Avalue += a*matrix_get(model->R, K, i, j); } } Avalue *= omega; } Avalue *= in * rho[i]; // Now we calculate the matrix ZAZ. Since this is // guaranteed to be symmetric, we only calculate the // upper part of the matrix, and then copy this over // to the lower part after all calculations are done. // Note that the use of dsym is faster than dspr, even // though dspr uses less memory. cblas_dsyr( CblasRowMajor, CblasUpper, m+1, Avalue, &data->Z[i*(m+1)], 1, ZAZ, m+1); } // Copy upper to lower (necessary because we need to switch // to Col-Major order for LAPACK). /* for (i=0; iV, K-1, 0.0, ZAZV, K-1); cblas_dgemm( CblasRowMajor, CblasTrans, CblasNoTrans, m+1, K-1, n, 1.0, data->Z, m+1, B, K-1, 1.0, ZAZV, K-1); /* * Add lambda to all diagonal elements except the first one. Recall * that ZAZ is of size m+1 and is symmetric. */ i = 0; for (j=0; jlambda; } // For the LAPACK call we need to switch to Column- // Major order. This is unnecessary for the matrix // ZAZ because it is symmetric. The matrix ZAZV // must be converted however. for (i=0; iVbar; model->Vbar = model->V; model->V = ZAZVT; ZAZVT = ptr; */ for (i=0; iV, K-1, i, j); matrix_set(model->Vbar, K-1, i, j, value); value = matrix_get(ZAZV, K-1, i, j); matrix_set(model->V, K-1, i, j, value); } } }