diff options
| author | Gertjan van den Burg <gertjanvandenburg@gmail.com> | 2014-01-15 00:35:21 +0100 |
|---|---|---|
| committer | Gertjan van den Burg <gertjanvandenburg@gmail.com> | 2014-01-15 00:35:21 +0100 |
| commit | ddbd423f54e2fd92659a0d277ee844659eee8ba1 (patch) | |
| tree | 316a82d463009364a6cdf07892bc3e28330698db /src/msvmmaj_train.c | |
| parent | remove note in read_data (diff) | |
| download | gensvm-ddbd423f54e2fd92659a0d277ee844659eee8ba1.tar.gz gensvm-ddbd423f54e2fd92659a0d277ee844659eee8ba1.zip | |
added documentation, restart git usage, start implementing kernels
Diffstat (limited to 'src/msvmmaj_train.c')
| -rw-r--r-- | src/msvmmaj_train.c | 202 |
1 files changed, 156 insertions, 46 deletions
diff --git a/src/msvmmaj_train.c b/src/msvmmaj_train.c index 272d86a..97ee6a1 100644 --- a/src/msvmmaj_train.c +++ b/src/msvmmaj_train.c @@ -1,6 +1,6 @@ /** * @file msvmmaj_train.c - * @author Gertjan van den Burg (burg@ese.eur.nl) + * @author Gertjan van den Burg * @date August 9, 2013 * @brief Main functions for training the MSVMMaj solution. * @@ -13,25 +13,34 @@ #include <math.h> #include <cblas.h> -#include "msvmmaj_train.h" -#include "MSVMMaj.h" #include "libMSVMMaj.h" -#include "mylapack.h" -#include "matrix.h" +#include "msvmmaj.h" +#include "msvmmaj_lapack.h" +#include "msvmmaj_matrix.h" +#include "msvmmaj_train.h" #include "util.h" +/** + * Maximum number of iterations of the algorithm. + */ #define MAX_ITER 1000000 /** - * @name msvmmaj_optimize * @brief The main training loop for MSVMMaj * - * The msvmmaj_optimize() function is the main training function. This function + * @details + * This function is the main training function. This function * handles the optimization of the model with the given model parameters, with - * the data given. On return the matrix model->V contains the optimal weight matrix. + * the data given. On return the matrix MajModel::V contains the optimal + * weight matrix. + * + * In this function, step doubling is used in the majorization algorithm after + * a burn-in of 50 iterations. If the training is finished, MajModel::t and + * MajModel::W are extracted from MajModel::V. * - * @param [in,out] model the model to be trained. Contains optimal V on exit. - * @param [in] data the data to train the model with. + * @param[in,out] model the MajModel to be trained. Contains optimal + * V on exit. + * @param[in] data the MajData to train the model with. */ void msvmmaj_optimize(struct MajModel *model, struct MajData *data) { @@ -49,7 +58,7 @@ void msvmmaj_optimize(struct MajModel *model, struct MajData *data) double *ZAZVT = Calloc(double, (m+1)*(K-1)); note("Starting main loop.\n"); - note("MajDataset:\n"); + note("Dataset:\n"); note("\tn = %i\n", n); note("\tm = %i\n", m); note("\tK = %i\n", K); @@ -78,8 +87,8 @@ void msvmmaj_optimize(struct MajModel *model, struct MajData *data) L = msvmmaj_get_loss(model, data, ZV); if (it%50 == 0) - note("iter = %li, L = %15.16f, Lbar = %15.16f, reldiff = %15.16f\n", - it, L, Lbar, (Lbar - L)/L); + note("iter = %li, L = %15.16f, Lbar = %15.16f, " + "reldiff = %15.16f\n", it, L, Lbar, (Lbar - L)/L); it++; } @@ -91,7 +100,8 @@ void msvmmaj_optimize(struct MajModel *model, struct MajData *data) model->t[i] = matrix_get(model->V, K-1, 0, i); for (i=1; i<m+1; i++) for (j=0; j<K-1; j++) - matrix_set(model->W, K-1, i-1, j, matrix_get(model->V, K-1, i, j)); + matrix_set(model->W, K-1, i-1, j, + matrix_get(model->V, K-1, i, j)); free(B); free(ZV); free(ZAZ); @@ -100,19 +110,22 @@ void msvmmaj_optimize(struct MajModel *model, struct MajData *data) } /** - * @name msvmmaj_get_loss - * @brief calculate the current value of the loss function + * @brief Calculate the current value of the loss function * - * The current loss value is calculated based on the matrix V in the given - * model. + * @details + * The current loss function value is calculated based on the matrix V in the + * given model. Note that the matrix ZV is passed explicitly to avoid having + * to reallocate memory at every step. * - * @param [in] model model structure which holds the current estimate V - * @param [in] data data structure - * @param [in,out] ZV pre-allocated matrix ZV which is updated on output - * - * @return the current value of the loss function + * @param[in] model MajModel structure which holds the current + * estimate V + * @param[in] data MajData structure + * @param[in,out] ZV pre-allocated matrix ZV which is updated on + * output + * @returns the current value of the loss function */ -double msvmmaj_get_loss(struct MajModel *model, struct MajData *data, double *ZV) +double msvmmaj_get_loss(struct MajModel *model, struct MajData *data, + double *ZV) { long i, j; long n = data->n; @@ -151,10 +164,52 @@ double msvmmaj_get_loss(struct MajModel *model, struct MajData *data, double *ZV } /** - * @name msvmmaj_get_update - * @brief perform a single step of the majorization algorithm to update V + * @brief Perform a single step of the majorization algorithm to update V + * + * @details + * This function contains the main update calculations of the algorithm. These + * calculations are necessary to find a new update V. The calculations exist of + * recalculating the majorization coefficients for all instances and all + * classes, and solving a linear system to find V. + * + * Because the function msvmmaj_get_update() is always called after a call to + * msvmmaj_get_loss() with the same MajModel::V, it is unnecessary to calculate + * the updated errors MajModel::Q and MajModel::H here too. This saves on + * computation time. * - * details + * In calculating the majorization coefficients we calculate the elements of a + * diagonal matrix A with elements + * @f[ + * A_{i, i} = \frac{1}{n} \rho_i \sum_{j \neq k} \left[ + * \varepsilon_i a_{ijk}^{(p)} + (1 - \varepsilon_i) \omega_i + * a_{ijk}^{(p)} \right], + * @f] + * where @f$ k = y_i @f$. + * Since this matrix is only used to calculate the matrix @f$ Z' A Z @f$, it is + * efficient to update a matrix ZAZ through consecutive rank 1 updates with + * a single element of A and the corresponding row of Z. The BLAS function + * dsyr is used for this. + * + * The B matrix is has rows + * @f[ + * \boldsymbol{\beta}_i' = \frac{1}{n} \rho_i \sum_{j \neq k} \left[ + * \varepsilon_i \left( b_{ijk}^{(1)} - a_{ijk}^{(1)} + * \overline{q}_i^{(kj)} \right) + (1 - \varepsilon_i) + * \omega_i \left( b_{ijk}^{(p)} - a_{ijk}^{(p)} + * \overline{q}_i^{(kj)} \right) \right] + * \boldsymbol{\delta}_{kj}' + * @f] + * This is also split into two cases, one for which @f$ \varepsilon_i = 1 @f$, + * and one for when it is 0. The 3D simplex difference matrix is used here, in + * the form of the @f$ \boldsymbol{\delta}_{kj}' @f$. + * + * Finally, the following system is solved + * @f[ + * (\textbf{Z}'\textbf{AZ} + \lambda \textbf{J})\textbf{V} = + * (\textbf{Z}'\textbf{AZ}\overline{\textbf{V}} + \textbf{Z}' + * \textbf{B}) + * @f] + * solving this system is done through dposv(). * * @param [in,out] model model to be updated * @param [in] data data used in model @@ -166,9 +221,6 @@ double msvmmaj_get_loss(struct MajModel *model, struct MajData *data, double *ZV void msvmmaj_get_update(struct MajModel *model, struct MajData *data, double *B, double *ZAZ, double *ZAZV, double *ZAZVT) { - // Because msvmmaj_update is always called after a call to - // msvmmaj_get_loss() with the latest V, it is unnecessary to recalculate - // the matrix ZV, the errors Q, or the Huber errors H. Awesome! int status, class; long i, j, k; double Avalue, Bvalue; @@ -182,11 +234,14 @@ void msvmmaj_get_update(struct MajModel *model, struct MajData *data, double *B, double p = model->p; double *rho = model->rho; + // constants which are used often throughout const double a2g2 = 0.25*p*(2.0*p - 1.0)*pow((kappa+1.0)/2.0,p-2.0); const double in = 1.0/((double) n); + // clear matrices Memset(B, double, n*(K-1)); Memset(ZAZ, double, (m+1)*(m+1)); + b = 0; for (i=0; i<n; i++) { value = 0; @@ -215,7 +270,8 @@ void msvmmaj_get_update(struct MajModel *model, struct MajData *data, double *B, b = 0; } for (k=0; k<K-1; k++) { - Bvalue = in*rho[i]*b*matrix3_get(model->UU, K-1, K, i, k, j); + Bvalue = in*rho[i]*b*matrix3_get( + model->UU, K-1, K, i, k, j); matrix_add(B, K-1, i, k, Bvalue); } Avalue += a*matrix_get(model->R, K, i, j); @@ -227,13 +283,27 @@ void msvmmaj_get_update(struct MajModel *model, struct MajData *data, double *B, if (q <= -kappa) { b = 0.5 - kappa/2.0 - q; } else if ( q <= 1.0) { - b = pow(1.0 - q, 3.0)/(2.0*pow(kappa + 1.0, 2.0)); + b = pow(1.0 - q, 3.0)/( + 2.0*pow(kappa + 1.0, + 2.0)); } else { b = 0; } for (k=0; k<K-1; k++) { - Bvalue = in*rho[i]*omega*b*matrix3_get(model->UU, K-1, K, i, k, j); - matrix_add(B, K-1, i, k, Bvalue); + Bvalue = in*rho[i]*omega*b* + matrix3_get( + model->UU, + K-1, + K, + i, + k, + j); + matrix_add( + B, + K-1, + i, + k, + Bvalue); } } Avalue = 1.5*(K - 1.0); @@ -241,23 +311,51 @@ void msvmmaj_get_update(struct MajModel *model, struct MajData *data, double *B, for (j=0; j<K; j++) { q = matrix_get(model->Q, K, i, j); if (q <= (p + kappa - 1.0)/(p - 2.0)) { - a = 0.25*pow(p, 2.0)*pow(0.5 - kappa/2.0 - q, p - 2.0); + a = 0.25*pow(p, 2.0)*pow( + 0.5 - kappa/2.0 - q, + p - 2.0); } else if (q <= 1.0) { a = a2g2; } else { - a = 0.25*pow(p, 2.0)*pow((p/(p - 2.0))*(0.5 - kappa/2.0 - q), p - 2.0); - b = a*(2.0*q + kappa - 1.0)/(p - 2.0) + 0.5*p*pow((p/(p - 2.0))*(0.5 - kappa/2.0 - q), p - 1.0); + a = 0.25*pow(p, 2.0)*pow( + (p/(p - 2.0))* + (0.5 - kappa/2.0 - q), + p - 2.0); + b = a*(2.0*q + kappa - 1.0)/ + (p - 2.0) + + 0.5*p*pow( + p/(p - 2.0)* + (0.5 - kappa/ + 2.0 - q), + p - 1.0); } if (q <= -kappa) { - b = 0.5*p*pow(0.5 - kappa/2.0 - q, p - 1.0); + b = 0.5*p*pow( + 0.5 - kappa/2.0 - q, + p - 1.0); } else if ( q <= 1.0) { - b = p*pow(1.0 - q, 2.0*p - 1.0)/pow(2*kappa+2.0, p); + b = p*pow(1.0 - q, + 2.0*p - 1.0)/ + pow(2*kappa+2.0, p); } for (k=0; k<K-1; k++) { - Bvalue = in*rho[i]*omega*b*matrix3_get(model->UU, K-1, K, i, k, j); - matrix_add(B, K-1, i, k, Bvalue); + Bvalue = in*rho[i]*omega*b* + matrix3_get( + model->UU, + K-1, + K, + i, + k, + j); + matrix_add( + B, + K-1, + i, + k, + Bvalue); } - Avalue += a*matrix_get(model->R, K, i, j); + Avalue += a*matrix_get(model->R, + K, i, j); } } Avalue *= omega; @@ -352,7 +450,8 @@ void msvmmaj_get_update(struct MajModel *model, struct MajData *data, double *B, // This step should not be necessary, as the matrix // ZAZ is positive semi-definite by definition. It // is included for safety. - fprintf(stderr, "Received nonzero status from dposv: %i\n", status); + fprintf(stderr, "Received nonzero status from dposv: %i\n", + status); int *IPIV = malloc((m+1)*sizeof(int)); double *WORK = malloc(1*sizeof(double)); status = dsysv( @@ -379,7 +478,8 @@ void msvmmaj_get_update(struct MajModel *model, struct MajData *data, double *B, WORK, sizeof(WORK)/sizeof(double)); if (status != 0) - fprintf(stderr, "Received nonzero status from dsysv: %i\n", status); + fprintf(stderr, "Received nonzero status from " + "dsysv: %i\n", status); } // Return to Row-major order. The matrix ZAZVT contains the @@ -403,8 +503,18 @@ void msvmmaj_get_update(struct MajModel *model, struct MajData *data, double *B, for (i=0; i<m+1; i++) { for (j=0; j<K-1; j++) { - matrix_set(model->Vbar, K-1, i, j, matrix_get(model->V, K-1, i, j)); - matrix_set(model->V, K-1, i, j, matrix_get(ZAZV, K-1, i, j)); + matrix_set( + model->Vbar, + K-1, + i, + j, + matrix_get(model->V, K-1, i, j)); + matrix_set( + model->V, + K-1, + i, + j, + matrix_get(ZAZV, K-1, i, j)); } } } |
