diff options
Diffstat (limited to 'src')
| -rw-r--r-- | src/crossval.c | 80 | ||||
| -rw-r--r-- | src/libMSVMMaj.c | 506 | ||||
| -rw-r--r-- | src/matrix.c | 7 | ||||
| -rw-r--r-- | src/msvmmaj_pred.c | 111 | ||||
| -rw-r--r-- | src/msvmmaj_train.c | 410 | ||||
| -rw-r--r-- | src/msvmmaj_train_dataset.c | 402 | ||||
| -rw-r--r-- | src/mylapack.c | 49 | ||||
| -rw-r--r-- | src/predMSVMMaj.c | 45 | ||||
| -rw-r--r-- | src/strutil.c | 91 | ||||
| -rw-r--r-- | src/timer.c | 8 | ||||
| -rw-r--r-- | src/trainMSVMMaj.c | 62 | ||||
| -rw-r--r-- | src/trainMSVMMajdataset.c | 186 | ||||
| -rw-r--r-- | src/util.c | 138 |
13 files changed, 1472 insertions, 623 deletions
diff --git a/src/crossval.c b/src/crossval.c new file mode 100644 index 0000000..9a3c1cc --- /dev/null +++ b/src/crossval.c @@ -0,0 +1,80 @@ +#include "crossval.h" +#include "matrix.h" +#include "MSVMMaj.h" + +void msvmmaj_make_cv_split(long N, long folds, long *cv_idx) +{ + long i, j, idx; + + long big_folds = N%folds; + long small_fold_size = N/folds; + + j = 0; + for (i=0; i<small_fold_size*folds; i++) + while (1) { + idx = rand()%N; + if (cv_idx[idx] == 0) { + cv_idx[idx] = j; + j++; + j%=folds; + break; + } + } + j = 0; + i = 0; + while (i < big_folds) { + if (cv_idx[j] == 0) { + cv_idx[j] = i++; + } + j++; + } +} + +void msvmmaj_get_tt_split(struct MajData *full_data, struct MajData *train_data, + struct MajData *test_data, long *cv_idx, long fold_idx) +{ + long i, j, k, l, test_n, train_n; + + long n = full_data->n; + long m = full_data->m; + long K = full_data->K; + + test_n = 0; + for (i=0; i<n; i++) + if (cv_idx[i] == fold_idx) + test_n++; + train_n = n - test_n; + + test_data->n = test_n; + train_data->n = train_n; + + train_data->K = K; + test_data->K = K; + + train_data->m = m; + test_data->m = m; + + train_data->y = Calloc(long, train_n); + test_data->y = Calloc(long, test_n); + + train_data->Z = Calloc(double, train_n*(m+1)); + test_data->Z = Calloc(double, test_n*(m+1)); + + k = 0; + l = 0; + for (i=0; i<n; i++) { + if (cv_idx[i] == fold_idx) { + test_data->y[k] = full_data->y[i]; + for (j=0; j<m+1; j++) + matrix_set(test_data->Z, m+1, k, j, + matrix_get(full_data->Z, m+1, i, j)); + k++; + } else { + train_data->y[l] = full_data->y[i]; + for (j=0; j<m+1; j++) + matrix_set(train_data->Z, m+1, l, j, + matrix_get(full_data->Z, m+1, i, j)); + l++; + } + } +} diff --git a/src/libMSVMMaj.c b/src/libMSVMMaj.c index 202e228..9544830 100644 --- a/src/libMSVMMaj.c +++ b/src/libMSVMMaj.c @@ -12,10 +12,17 @@ * */ +#include <cblas.h> +#include <math.h> + #include "libMSVMMaj.h" +#include "MSVMMaj.h" +#include "matrix.h" + +inline double rnd() { return (double) rand()/0x7FFFFFFF; } /** - * @name simplex_gen + * @name msvmmaj_simplex_gen * @brief Generate matrix of simplex vertex coordinates * @ingroup libMSVMMaj * @@ -28,7 +35,7 @@ * @param [in] K number of classes * @param [in,out] U simplex matrix of size K * (K-1) */ -void simplex_gen(long K, double *U) +void msvmmaj_simplex_gen(long K, double *U) { long i, j; for (i=0; i<K; i++) { @@ -48,7 +55,7 @@ void simplex_gen(long K, double *U) Generate the category matrix R. The category matrix has 1's everywhere except at the column corresponding to the label of instance i. */ -void category_matrix(struct Model *model, struct Data *dataset) +void msvmmaj_category_matrix(struct MajModel *model, struct MajData *dataset) { long i, j; long n = model->n; @@ -66,7 +73,7 @@ void category_matrix(struct Model *model, struct Data *dataset) /*! * Simplex diff */ -void simplex_diff(struct Model *model, struct Data *data) +void msvmmaj_simplex_diff(struct MajModel *model, struct MajData *data) { long i, j, k; double value; @@ -92,7 +99,7 @@ void simplex_diff(struct Model *model, struct Data *data) pre-allocated block of memory, since it would be inefficient to keep reassigning this block at every iteration. */ -void calculate_errors(struct Model *model, struct Data *data, double *ZV) +void msvmmaj_calculate_errors(struct MajModel *model, struct MajData *data, double *ZV) { long i, j, k; double a, value; @@ -132,7 +139,7 @@ void calculate_errors(struct Model *model, struct Data *data, double *ZV) /*! Calculate the Huber hinge errors for each error in the matrix Q. */ -void calculate_huber(struct Model *model) +void msvmmaj_calculate_huber(struct MajModel *model) { long i, j; double q, value; @@ -151,51 +158,8 @@ void calculate_huber(struct Model *model) } } -/*! - Calculate the value of the loss function based on the current estimate - of V. -*/ -double get_msvmmaj_loss(struct Model *model, struct Data *data, double *ZV) -{ - long i, j; - long n = data->n; - long K = data->K; - long m = data->m; - - double value, rowvalue, loss = 0.0; - - calculate_errors(model, data, ZV); - calculate_huber(model); - - for (i=0; i<n; i++) { - rowvalue = 0; - value = 0; - for (j=0; j<K; j++) { - value = matrix_get(model->H, K, i, j); - value = pow(value, model->p); - value *= matrix_get(model->R, K, i, j); - rowvalue += value; - } - rowvalue = pow(rowvalue, 1.0/(model->p)); - rowvalue *= model->rho[i]; - loss += rowvalue; - } - loss /= ((double) n); - - value = 0; - for (i=1; i<m+1; i++) { - for (j=0; j<K-1; j++) { - value += pow(matrix_get(model->V, K-1, i, j), 2.0); - } - } - loss += model->lambda * value; - - return loss; -} - - /** - * @name seed_model_V + * @name msvmmaj_seed_model_V * @brief seed the matrix V from an existing model or using rand * @ingroup libMSVMMaj * @@ -209,7 +173,7 @@ double get_msvmmaj_loss(struct Model *model, struct Data *data, double *ZV) * @param [in] from_model model from which to copy V * @param [in,out] to_model model to which V will be copied */ -void seed_model_V(struct Model *from_model, struct Model *to_model) +void msvmmaj_seed_model_V(struct MajModel *from_model, struct MajModel *to_model) { long i, j; long m = to_model->m; @@ -230,82 +194,10 @@ void seed_model_V(struct Model *from_model, struct Model *to_model) } /*! - Training loop is defined here. -*/ - -void main_loop(struct Model *model, struct Data *data) -{ - long i, j, it = 0; - double L, Lbar; - - long n = model->n; - long m = model->m; - long K = model->K; - - srand(time(NULL)); - - double *B = Calloc(double, n*(K-1)); - double *ZV = Calloc(double, n*(K-1)); - double *ZAZ = Calloc(double, (m+1)*(m+1)); - double *ZAZV = Calloc(double, (m+1)*(K-1)); - double *ZAZVT = Calloc(double, (m+1)*(K-1)); - - info("Starting main loop.\n"); - info("Dataset:\n"); - info("\tn = %i\n", n); - info("\tm = %i\n", m); - info("\tK = %i\n", K); - info("Parameters:\n"); - info("\tkappa = %f\n", model->kappa); - info("\tp = %f\n", model->p); - info("\tlambda = %15.16f\n", model->lambda); - info("\tepsilon = %g\n", model->epsilon); - info("\n"); - - simplex_gen(model->K, model->U); - simplex_diff(model, data); - category_matrix(model, data); - - L = get_msvmmaj_loss(model, data, ZV); - Lbar = L + 2.0*model->epsilon*L; - - while ((it < MAX_ITER) && (Lbar - L)/L > model->epsilon) - { - // ensure V contains newest V and Vbar contains V from previous - msvmmaj_update(model, data, B, ZAZ, - ZAZV, ZAZVT); - if (it > 50) - step_doubling(model); - - Lbar = L; - L = get_msvmmaj_loss(model, data, ZV); - - if (it%100 == 0) - info("iter = %li, L = %15.16f, Lbar = %15.16f, reldiff = %15.16f\n", - it, L, Lbar, (Lbar - L)/L); - it++; - } - - info("optimization finished, iter = %li\n", it-1); - - for (i=0; i<K-1; i++) - model->t[i] = matrix_get(model->V, K-1, 0, i); - for (i=1; i<m+1; i++) - for (j=0; j<K-1; j++) - matrix_set(model->W, K-1, i-1, j, matrix_get(model->V, K-1, i, j)); - - free(B); - free(ZV); - free(ZAZ); - free(ZAZV); - free(ZAZVT); -} - -/*! * Step doubling */ -void step_doubling(struct Model *model) +void msvmmaj_step_doubling(struct MajModel *model) { long i, j; @@ -321,289 +213,10 @@ void step_doubling(struct Model *model) } /*! - * dposv - */ - -int dposv(char UPLO, int N, int NRHS, double *A, int LDA, double *B, - int LDB) -{ - extern void dposv_(char *UPLO, int *Np, int *NRHSp, double *A, - int *LDAp, double *B, int *LDBp, int *INFOp); - int INFO; - dposv_(&UPLO, &N, &NRHS, A, &LDA, B, &LDB, &INFO); - return INFO; -} - -/*! - * dsysv - */ - -int dsysv(char UPLO, int N, int NRHS, double *A, int LDA, int *IPIV, - double *B, int LDB, double *WORK, int LWORK) -{ - extern void dsysv_(char *UPLO, int *Np, int *NRHSp, double *A, - int *LDAp, int *IPIV, double *B, int *LDBp, - double *WORK, int *LWORK, int *INFOp); - int INFO; - dsysv_(&UPLO, &N, &NRHS, A, &LDA, IPIV, B, &LDB, WORK, &LWORK, &INFO); - return INFO; -} - -/*! - * msvmmaj_update - */ - -void msvmmaj_update(struct Model *model, struct Data *data, - double *B, double *ZAZ, double *ZAZV, double *ZAZVT) -{ - // Because msvmmaj_update is always called after a call to - // get_msvmmaj_loss with the latest V, it is unnecessary to recalculate - // the matrix ZV, the errors Q, or the Huber errors H. Awesome! - int status, class; - long i, j, k; - double Avalue, Bvalue; - double omega, value, a, b, q, h, r; - - long n = model->n; - long m = model->m; - long K = model->K; - - double kappa = model->kappa; - double p = model->p; - double *rho = model->rho; - - const double a2g2 = 0.25*p*(2.0*p - 1.0)*pow((kappa+1.0)/2.0,p-2.0); - const double in = 1.0/((double) n); - - Memset(B, double, n*(K-1)); - Memset(ZAZ, double, (m+1)*(m+1)); - b = 0; - for (i=0; i<n; i++) { - value = 0; - omega = 0; - for (j=0; j<K; j++) { - h = matrix_get(model->H, K, i, j); - r = matrix_get(model->R, K, i, j); - value += (h*r > 0) ? 1 : 0; - omega += pow(h, p)*r; - } - class = (value <= 1.0) ? 1 : 0; - omega = (1.0/p)*pow(omega, 1.0/p - 1.0); - - Avalue = 0; - if (class == 1) { - for (j=0; j<K; j++) { - q = matrix_get(model->Q, K, i, j); - if (q <= -kappa) { - a = 0.25/(0.5 - kappa/2.0 - q); - b = 0.5; - } else if (q <= 1.0) { - a = 1.0/(2.0*kappa + 2.0); - b = (1.0 - q)*a; - } else { - a = -0.25/(0.5 - kappa/2.0 - q); - b = 0; - } - for (k=0; k<K-1; k++) { - Bvalue = in*rho[i]*b*matrix3_get(model->UU, K-1, K, i, k, j); - matrix_add(B, K-1, i, k, Bvalue); - } - Avalue += a*matrix_get(model->R, K, i, j); - } - } else { - if (2.0 - p < 0.0001) { - for (j=0; j<K; j++) { - q = matrix_get(model->Q, K, i, j); - if (q <= -kappa) { - b = 0.5 - kappa/2.0 - q; - } else if ( q <= 1.0) { - b = pow(1.0 - q, 3.0)/(2.0*pow(kappa + 1.0, 2.0)); - } else { - b = 0; - } - for (k=0; k<K-1; k++) { - Bvalue = in*rho[i]*omega*b*matrix3_get(model->UU, K-1, K, i, k, j); - matrix_add(B, K-1, i, k, Bvalue); - } - } - Avalue = 1.5*(K - 1.0); - } else { - for (j=0; j<K; j++) { - q = matrix_get(model->Q, K, i, j); - if (q <= (p + kappa - 1.0)/(p - 2.0)) { - a = 0.25*pow(p, 2.0)*pow(0.5 - kappa/2.0 - q, p - 2.0); - } else if (q <= 1.0) { - a = a2g2; - } else { - a = 0.25*pow(p, 2.0)*pow((p/(p - 2.0))*(0.5 - kappa/2.0 - q), p - 2.0); - b = a*(2.0*q + kappa - 1.0)/(p - 2.0) + 0.5*p*pow((p/(p - 2.0))*(0.5 - kappa/2.0 - q), p - 1.0); - } - if (q <= -kappa) { - b = 0.5*p*pow(0.5 - kappa/2.0 - q, p - 1.0); - } else if ( q <= 1.0) { - b = p*pow(1.0 - q, 2.0*p - 1.0)/pow(2*kappa+2.0, p); - } - for (k=0; k<K-1; k++) { - Bvalue = in*rho[i]*omega*b*matrix3_get(model->UU, K-1, K, i, k, j); - matrix_add(B, K-1, i, k, Bvalue); - } - Avalue += a*matrix_get(model->R, K, i, j); - } - } - Avalue *= omega; - } - Avalue *= in * rho[i]; - - // Now we calculate the matrix ZAZ. Since this is - // guaranteed to be symmetric, we only calculate the - // upper part of the matrix, and then copy this over - // to the lower part after all calculations are done. - // Note that the use of dsym is faster than dspr, even - // though dspr uses less memory. - cblas_dsyr( - CblasRowMajor, - CblasUpper, - m+1, - Avalue, - &data->Z[i*(m+1)], - 1, - ZAZ, - m+1); - } - // Copy upper to lower (necessary because we need to switch - // to Col-Major order for LAPACK). - /* - for (i=0; i<m+1; i++) - for (j=0; j<m+1; j++) - matrix_set(ZAZ, m+1, j, i, matrix_get(ZAZ, m+1, i, j)); - */ - - // Calculate the right hand side of the system we - // want to solve. - cblas_dsymm( - CblasRowMajor, - CblasLeft, - CblasUpper, - m+1, - K-1, - 1.0, - ZAZ, - m+1, - model->V, - K-1, - 0.0, - ZAZV, - K-1); - cblas_dgemm( - CblasRowMajor, - CblasTrans, - CblasNoTrans, - m+1, - K-1, - n, - 1.0, - data->Z, - m+1, - B, - K-1, - 1.0, - ZAZV, - K-1); - /* - * Add lambda to all diagonal elements except the - * first one. - */ - i = 0; - for (j=0; j<m; j++) - ZAZ[i+=m+1 + 1] += model->lambda; - - // For the LAPACK call we need to switch to Column- - // Major order. This is unnecessary for the matrix - // ZAZ because it is symmetric. The matrix ZAZV - // must be converted however. - for (i=0; i<m+1; i++) - for (j=0; j<K-1; j++) - ZAZVT[j*(m+1)+i] = ZAZV[i*(K-1)+j]; - - // We use the lower ('L') part of the matrix ZAZ, - // because we have used the upper part in the BLAS - // calls above in Row-major order, and Lapack uses - // column major order. - status = dposv( - 'L', - m+1, - K-1, - ZAZ, - m+1, - ZAZVT, - m+1); - - if (status != 0) { - // This step should not be necessary, as the matrix - // ZAZ is positive semi-definite by definition. It - // is included for safety. - fprintf(stderr, "Received nonzero status from dposv: %i\n", status); - int *IPIV = malloc((m+1)*sizeof(int)); - double *WORK = malloc(1*sizeof(double)); - status = dsysv( - 'L', - m+1, - K-1, - ZAZ, - m+1, - IPIV, - ZAZVT, - m+1, - WORK, - -1); - WORK = (double *)realloc(WORK, WORK[0]*sizeof(double)); - status = dsysv( - 'L', - m+1, - K-1, - ZAZ, - m+1, - IPIV, - ZAZVT, - m+1, - WORK, - sizeof(WORK)/sizeof(double)); - if (status != 0) - fprintf(stderr, "Received nonzero status from dsysv: %i\n", status); - } - - // Return to Row-major order. The matrix ZAZVT contains the - // solution after the dposv/dsysv call. - for (i=0; i<m+1; i++) - for (j=0; j<K-1; j++) - ZAZV[i*(K-1)+j] = ZAZVT[j*(m+1)+i]; - - // Store the previous V in Vbar, assign the new V - // (which is stored in ZAZVT) to the model, and give ZAZVT the - // address of Vbar. This should ensure that we keep - // re-using assigned memory instead of reallocating at every - // update. - /* See this answer: http://stackoverflow.com/q/13246615/ - * For now we'll just do it by value until the rest is figured out. - ptr = model->Vbar; - model->Vbar = model->V; - model->V = ZAZVT; - ZAZVT = ptr; - */ - - for (i=0; i<m+1; i++) { - for (j=0; j<K-1; j++) { - matrix_set(model->Vbar, K-1, i, j, matrix_get(model->V, K-1, i, j)); - matrix_set(model->V, K-1, i, j, matrix_get(ZAZV, K-1, i, j)); - } - } -} - -/*! * initialize_weights */ -void initialize_weights(struct Data *data, struct Model *model) +void msvmmaj_initialize_weights(struct MajData *data, struct MajModel *model) { long *groups; long i; @@ -617,92 +230,13 @@ void initialize_weights(struct Data *data, struct Model *model) } else if (model->weight_idx == 2) { groups = Calloc(long, K); - for (i=0; i<n; i++) { + for (i=0; i<n; i++) groups[data->y[i]-1]++; - } - for (i=0; i<n; i++) { - model->rho[i] = 1.0/((double) groups[data->y[i]-1]); - } + for (i=0; i<n; i++) + model->rho[i] = ((double) n)/((double) (groups[data->y[i]-1]*K)); } else { fprintf(stderr, "Unknown weight specification.\n"); exit(1); } } -/*! - * predict_labels - */ - -void predict_labels(struct Data *data, struct Model *model, long *predy) -{ - long i, j, k, label; - double norm, min_dist; - - long n = data->n; // note that model->n is the size of the training sample. - long m = data->m; - long K = model->K; //data->K does not necessarily equal the original K. - - double *S = Calloc(double, K-1); - double *ZV = Calloc(double, n*(K-1)); - double *U = Calloc(double, K*(K-1)); - - // Get the simplex matrix - simplex_gen(K, U); - - // Generate the simplex-space vectors - cblas_dgemm( - CblasRowMajor, - CblasNoTrans, - CblasNoTrans, - n, - K-1, - m+1, - 1.0, - data->Z, - m+1, - model->V, - K-1, - 0.0, - ZV, - K-1); - - // Calculate the distance to each of the vertices of the simplex. - // The closest vertex defines the class label. - for (i=0; i<n; i++) { - label = 0; - min_dist = 1000000000.0; - for (j=0; j<K; j++) { - for (k=0; k<K-1; k++) { - S[k] = matrix_get(ZV, K-1, i, k) - matrix_get(U, K-1, j, k); - } - norm = cblas_dnrm2(K, S, 1); - if (norm < min_dist) { - label = j+1; - min_dist = norm; - } - } - predy[i] = label; - } - - free(ZV); - free(U); - free(S); -} - -/*! - * prediction_perf - */ - -double prediction_perf(struct Data *data, long *predy) -{ - long i, correct = 0; - double performance; - - for (i=0; i<data->n; i++) - if (data->y[i] == predy[i]) - correct++; - - performance = ((double) correct)/((double) data->n) * 100.0; - - return performance; -} diff --git a/src/matrix.c b/src/matrix.c index fc29ecf..8803e8b 100644 --- a/src/matrix.c +++ b/src/matrix.c @@ -13,6 +13,7 @@ */ #include "matrix.h" +#include "util.h" /** * @name matrix_set @@ -68,9 +69,9 @@ void print_matrix(double *M, long rows, long cols) for (i=0; i<rows; i++) { for (j=0; j<cols; j++) - info("%8.8f ", matrix_get(M, cols, i, j)); - info("\n"); + note("%8.8f ", matrix_get(M, cols, i, j)); + note("\n"); } - info("\n"); + note("\n"); } diff --git a/src/msvmmaj_pred.c b/src/msvmmaj_pred.c new file mode 100644 index 0000000..5f1b1ae --- /dev/null +++ b/src/msvmmaj_pred.c @@ -0,0 +1,111 @@ +/** + * @file msvmmaj_pred.c + * @author Gertjan van den Burg (burg@ese.eur.nl) + * @date August 9, 2013 + * @brief Main functions for predicting class labels.. + * + */ + +#include <cblas.h> + +#include "libMSVMMaj.h" +#include "MSVMMaj.h" +#include "matrix.h" +#include "msvmmaj_pred.h" + +/** + * @name predict_labels + * @brief Predict class labels of data given and output in predy + * + * The labels are predicted by mapping each instance in data to the + * simplex space using the matrix V in the given model. Next, for each + * instance the nearest simplex vertex is determined using an Euclidean + * norm. The nearest simplex vertex determines the predicted class label, + * which is recorded in predy + * + * @param [in] data data to predict labels for + * @param [in] model model with optimized V + * @param [out] predy pre-allocated vector to record predictions in + */ +void msvmmaj_predict_labels(struct MajData *data, struct MajModel *model, long *predy) +{ + long i, j, k, label; + double norm, min_dist; + + long n = data->n; // note that model->n is the size of the training sample. + long m = data->m; + long K = model->K; //data->K does not necessarily equal the original K. + + double *S = Calloc(double, K-1); + double *ZV = Calloc(double, n*(K-1)); + double *U = Calloc(double, K*(K-1)); + + // Get the simplex matrix + msvmmaj_simplex_gen(K, U); + + // Generate the simplex-space vectors + cblas_dgemm( + CblasRowMajor, + CblasNoTrans, + CblasNoTrans, + n, + K-1, + m+1, + 1.0, + data->Z, + m+1, + model->V, + K-1, + 0.0, + ZV, + K-1); + + // Calculate the distance to each of the vertices of the simplex. + // The closest vertex defines the class label. + for (i=0; i<n; i++) { + label = 0; + min_dist = 1000000000.0; + for (j=0; j<K; j++) { + for (k=0; k<K-1; k++) { + S[k] = matrix_get(ZV, K-1, i, k) - matrix_get(U, K-1, j, k); + } + norm = cblas_dnrm2(K, S, 1); + if (norm < min_dist) { + label = j+1; // labels start counting from 1 + min_dist = norm; + } + } + predy[i] = label; + } + + free(ZV); + free(U); + free(S); +} + +/** + * @name msvmmaj_prediction_perf + * @brief Calculate the predictive performance (percentage correct) + * + * The predictive performance is calculated by simply counting the number + * of correctly classified samples and dividing by the total number of + * samples, multiplying by 100. + * + * @param [in] data the dataset with known labels + * @param [in] predy the predicted class labels + * + * @returns percentage correctly classified. + */ +double msvmmaj_prediction_perf(struct MajData *data, long *predy) +{ + long i, correct = 0; + double performance; + + for (i=0; i<data->n; i++) + if (data->y[i] == predy[i]) + correct++; + + performance = ((double) correct)/((double) data->n)* 100.0; + + return performance; +} diff --git a/src/msvmmaj_train.c b/src/msvmmaj_train.c new file mode 100644 index 0000000..272d86a --- /dev/null +++ b/src/msvmmaj_train.c @@ -0,0 +1,410 @@ +/** + * @file msvmmaj_train.c + * @author Gertjan van den Burg (burg@ese.eur.nl) + * @date August 9, 2013 + * @brief Main functions for training the MSVMMaj solution. + * + * @details + * Contains update and loss functions used to actually find + * the optimal V. + * + */ + +#include <math.h> +#include <cblas.h> + +#include "msvmmaj_train.h" +#include "MSVMMaj.h" +#include "libMSVMMaj.h" +#include "mylapack.h" +#include "matrix.h" +#include "util.h" + +#define MAX_ITER 1000000 + +/** + * @name msvmmaj_optimize + * @brief The main training loop for MSVMMaj + * + * The msvmmaj_optimize() function is the main training function. This function + * handles the optimization of the model with the given model parameters, with + * the data given. On return the matrix model->V contains the optimal weight matrix. + * + * @param [in,out] model the model to be trained. Contains optimal V on exit. + * @param [in] data the data to train the model with. + */ +void msvmmaj_optimize(struct MajModel *model, struct MajData *data) +{ + long i, j, it = 0; + double L, Lbar; + + long n = model->n; + long m = model->m; + long K = model->K; + + double *B = Calloc(double, n*(K-1)); + double *ZV = Calloc(double, n*(K-1)); + double *ZAZ = Calloc(double, (m+1)*(m+1)); + double *ZAZV = Calloc(double, (m+1)*(K-1)); + double *ZAZVT = Calloc(double, (m+1)*(K-1)); + + note("Starting main loop.\n"); + note("MajDataset:\n"); + note("\tn = %i\n", n); + note("\tm = %i\n", m); + note("\tK = %i\n", K); + note("Parameters:\n"); + note("\tkappa = %f\n", model->kappa); + note("\tp = %f\n", model->p); + note("\tlambda = %15.16f\n", model->lambda); + note("\tepsilon = %g\n", model->epsilon); + note("\n"); + + msvmmaj_simplex_gen(model->K, model->U); + msvmmaj_simplex_diff(model, data); + msvmmaj_category_matrix(model, data); + + L = msvmmaj_get_loss(model, data, ZV); + Lbar = L + 2.0*model->epsilon*L; + + while ((it < MAX_ITER) && (Lbar - L)/L > model->epsilon) + { + // ensure V contains newest V and Vbar contains V from previous + msvmmaj_get_update(model, data, B, ZAZ, ZAZV, ZAZVT); + if (it > 50) + msvmmaj_step_doubling(model); + + Lbar = L; + L = msvmmaj_get_loss(model, data, ZV); + + if (it%50 == 0) + note("iter = %li, L = %15.16f, Lbar = %15.16f, reldiff = %15.16f\n", + it, L, Lbar, (Lbar - L)/L); + it++; + } + + note("optimization finished, iter = %li, error = %8.8f\n", it-1, + (Lbar - L)/L); + model->training_error = (Lbar - L)/L; + + for (i=0; i<K-1; i++) + model->t[i] = matrix_get(model->V, K-1, 0, i); + for (i=1; i<m+1; i++) + for (j=0; j<K-1; j++) + matrix_set(model->W, K-1, i-1, j, matrix_get(model->V, K-1, i, j)); + free(B); + free(ZV); + free(ZAZ); + free(ZAZV); + free(ZAZVT); +} + +/** + * @name msvmmaj_get_loss + * @brief calculate the current value of the loss function + * + * The current loss value is calculated based on the matrix V in the given + * model. + * + * @param [in] model model structure which holds the current estimate V + * @param [in] data data structure + * @param [in,out] ZV pre-allocated matrix ZV which is updated on output + * + * @return the current value of the loss function + */ +double msvmmaj_get_loss(struct MajModel *model, struct MajData *data, double *ZV) +{ + long i, j; + long n = data->n; + long K = data->K; + long m = data->m; + + double value, rowvalue, loss = 0.0; + + msvmmaj_calculate_errors(model, data, ZV); + msvmmaj_calculate_huber(model); + + for (i=0; i<n; i++) { + rowvalue = 0; + value = 0; + for (j=0; j<K; j++) { + value = matrix_get(model->H, K, i, j); + value = pow(value, model->p); + value *= matrix_get(model->R, K, i, j); + rowvalue += value; + } + rowvalue = pow(rowvalue, 1.0/(model->p)); + rowvalue *= model->rho[i]; + loss += rowvalue; + } + loss /= ((double) n); + + value = 0; + for (i=1; i<m+1; i++) { + for (j=0; j<K-1; j++) { + value += pow(matrix_get(model->V, K-1, i, j), 2.0); + } + } + loss += model->lambda * value; + + return loss; +} + +/** + * @name msvmmaj_get_update + * @brief perform a single step of the majorization algorithm to update V + * + * details + * + * @param [in,out] model model to be updated + * @param [in] data data used in model + * @param [in] B pre-allocated matrix used for linear coefficients + * @param [in] ZAZ pre-allocated matrix used in system + * @param [in] ZAZV pre-allocated matrix used in system solving + * @param [in] ZAZVT pre-allocated matrix used in system solving + */ +void msvmmaj_get_update(struct MajModel *model, struct MajData *data, double *B, + double *ZAZ, double *ZAZV, double *ZAZVT) +{ + // Because msvmmaj_update is always called after a call to + // msvmmaj_get_loss() with the latest V, it is unnecessary to recalculate + // the matrix ZV, the errors Q, or the Huber errors H. Awesome! + int status, class; + long i, j, k; + double Avalue, Bvalue; + double omega, value, a, b, q, h, r; + + long n = model->n; + long m = model->m; + long K = model->K; + + double kappa = model->kappa; + double p = model->p; + double *rho = model->rho; + + const double a2g2 = 0.25*p*(2.0*p - 1.0)*pow((kappa+1.0)/2.0,p-2.0); + const double in = 1.0/((double) n); + + Memset(B, double, n*(K-1)); + Memset(ZAZ, double, (m+1)*(m+1)); + b = 0; + for (i=0; i<n; i++) { + value = 0; + omega = 0; + for (j=0; j<K; j++) { + h = matrix_get(model->H, K, i, j); + r = matrix_get(model->R, K, i, j); + value += (h*r > 0) ? 1 : 0; + omega += pow(h, p)*r; + } + class = (value <= 1.0) ? 1 : 0; + omega = (1.0/p)*pow(omega, 1.0/p - 1.0); + + Avalue = 0; + if (class == 1) { + for (j=0; j<K; j++) { + q = matrix_get(model->Q, K, i, j); + if (q <= -kappa) { + a = 0.25/(0.5 - kappa/2.0 - q); + b = 0.5; + } else if (q <= 1.0) { + a = 1.0/(2.0*kappa + 2.0); + b = (1.0 - q)*a; + } else { + a = -0.25/(0.5 - kappa/2.0 - q); + b = 0; + } + for (k=0; k<K-1; k++) { + Bvalue = in*rho[i]*b*matrix3_get(model->UU, K-1, K, i, k, j); + matrix_add(B, K-1, i, k, Bvalue); + } + Avalue += a*matrix_get(model->R, K, i, j); + } + } else { + if (2.0 - p < 0.0001) { + for (j=0; j<K; j++) { + q = matrix_get(model->Q, K, i, j); + if (q <= -kappa) { + b = 0.5 - kappa/2.0 - q; + } else if ( q <= 1.0) { + b = pow(1.0 - q, 3.0)/(2.0*pow(kappa + 1.0, 2.0)); + } else { + b = 0; + } + for (k=0; k<K-1; k++) { + Bvalue = in*rho[i]*omega*b*matrix3_get(model->UU, K-1, K, i, k, j); + matrix_add(B, K-1, i, k, Bvalue); + } + } + Avalue = 1.5*(K - 1.0); + } else { + for (j=0; j<K; j++) { + q = matrix_get(model->Q, K, i, j); + if (q <= (p + kappa - 1.0)/(p - 2.0)) { + a = 0.25*pow(p, 2.0)*pow(0.5 - kappa/2.0 - q, p - 2.0); + } else if (q <= 1.0) { + a = a2g2; + } else { + a = 0.25*pow(p, 2.0)*pow((p/(p - 2.0))*(0.5 - kappa/2.0 - q), p - 2.0); + b = a*(2.0*q + kappa - 1.0)/(p - 2.0) + 0.5*p*pow((p/(p - 2.0))*(0.5 - kappa/2.0 - q), p - 1.0); + } + if (q <= -kappa) { + b = 0.5*p*pow(0.5 - kappa/2.0 - q, p - 1.0); + } else if ( q <= 1.0) { + b = p*pow(1.0 - q, 2.0*p - 1.0)/pow(2*kappa+2.0, p); + } + for (k=0; k<K-1; k++) { + Bvalue = in*rho[i]*omega*b*matrix3_get(model->UU, K-1, K, i, k, j); + matrix_add(B, K-1, i, k, Bvalue); + } + Avalue += a*matrix_get(model->R, K, i, j); + } + } + Avalue *= omega; + } + Avalue *= in * rho[i]; + + // Now we calculate the matrix ZAZ. Since this is + // guaranteed to be symmetric, we only calculate the + // upper part of the matrix, and then copy this over + // to the lower part after all calculations are done. + // Note that the use of dsym is faster than dspr, even + // though dspr uses less memory. + cblas_dsyr( + CblasRowMajor, + CblasUpper, + m+1, + Avalue, + &data->Z[i*(m+1)], + 1, + ZAZ, + m+1); + } + // Copy upper to lower (necessary because we need to switch + // to Col-Major order for LAPACK). + /* + for (i=0; i<m+1; i++) + for (j=0; j<m+1; j++) + matrix_set(ZAZ, m+1, j, i, matrix_get(ZAZ, m+1, i, j)); + */ + + // Calculate the right hand side of the system we + // want to solve. + cblas_dsymm( + CblasRowMajor, + CblasLeft, + CblasUpper, + m+1, + K-1, + 1.0, + ZAZ, + m+1, + model->V, + K-1, + 0.0, + ZAZV, + K-1); + cblas_dgemm( + CblasRowMajor, + CblasTrans, + CblasNoTrans, + m+1, + K-1, + n, + 1.0, + data->Z, + m+1, + B, + K-1, + 1.0, + ZAZV, + K-1); + /* + * Add lambda to all diagonal elements except the + * first one. + */ + i = 0; + for (j=0; j<m; j++) + ZAZ[i+=m+1 + 1] += model->lambda; + + // For the LAPACK call we need to switch to Column- + // Major order. This is unnecessary for the matrix + // ZAZ because it is symmetric. The matrix ZAZV + // must be converted however. + for (i=0; i<m+1; i++) + for (j=0; j<K-1; j++) + ZAZVT[j*(m+1)+i] = ZAZV[i*(K-1)+j]; + + // We use the lower ('L') part of the matrix ZAZ, + // because we have used the upper part in the BLAS + // calls above in Row-major order, and Lapack uses + // column major order. + status = dposv( + 'L', + m+1, + K-1, + ZAZ, + m+1, + ZAZVT, + m+1); + + if (status != 0) { + // This step should not be necessary, as the matrix + // ZAZ is positive semi-definite by definition. It + // is included for safety. + fprintf(stderr, "Received nonzero status from dposv: %i\n", status); + int *IPIV = malloc((m+1)*sizeof(int)); + double *WORK = malloc(1*sizeof(double)); + status = dsysv( + 'L', + m+1, + K-1, + ZAZ, + m+1, + IPIV, + ZAZVT, + m+1, + WORK, + -1); + WORK = (double *)realloc(WORK, WORK[0]*sizeof(double)); + status = dsysv( + 'L', + m+1, + K-1, + ZAZ, + m+1, + IPIV, + ZAZVT, + m+1, + WORK, + sizeof(WORK)/sizeof(double)); + if (status != 0) + fprintf(stderr, "Received nonzero status from dsysv: %i\n", status); + } + + // Return to Row-major order. The matrix ZAZVT contains the + // solution after the dposv/dsysv call. + for (i=0; i<m+1; i++) + for (j=0; j<K-1; j++) + ZAZV[i*(K-1)+j] = ZAZVT[j*(m+1)+i]; + + // Store the previous V in Vbar, assign the new V + // (which is stored in ZAZVT) to the model, and give ZAZVT the + // address of Vbar. This should ensure that we keep + // re-using assigned memory instead of reallocating at every + // update. + /* See this answer: http://stackoverflow.com/q/13246615/ + * For now we'll just do it by value until the rest is figured out. + ptr = model->Vbar; + model->Vbar = model->V; + model->V = ZAZVT; + ZAZVT = ptr; + */ + + for (i=0; i<m+1; i++) { + for (j=0; j<K-1; j++) { + matrix_set(model->Vbar, K-1, i, j, matrix_get(model->V, K-1, i, j)); + matrix_set(model->V, K-1, i, j, matrix_get(ZAZV, K-1, i, j)); + } + } +} diff --git a/src/msvmmaj_train_dataset.c b/src/msvmmaj_train_dataset.c new file mode 100644 index 0000000..2da8bee --- /dev/null +++ b/src/msvmmaj_train_dataset.c @@ -0,0 +1,402 @@ +#include <math.h> +#include <time.h> + +#include "crossval.h" +#include "libMSVMMaj.h" +#include "matrix.h" +#include "msvmmaj_train.h" +#include "msvmmaj_train_dataset.h" +#include "msvmmaj_pred.h" +#include "MSVMMaj.h" +#include "util.h" +#include "timer.h" + +extern FILE *MSVMMAJ_OUTPUT_FILE; + +void make_queue(struct Training *training, struct Queue *queue, + struct MajData *train_data, struct MajData *test_data) +{ + long i, j, k, l, m; + long N, cnt = 0; + struct Task *task; + queue->i = 0; + + N = training->Np; + N *= training->Nl; + N *= training->Nk; + N *= training->Ne; + N *= training->Nw; + + queue->tasks = Malloc(struct Task *, N); + queue->N = N; + + for (i=0; i<training->Ne; i++) + for (j=0; j<training->Nw; j++) + for (k=0; k<training->Nk; k++) + for (l=0; l<training->Nl; l++) + for (m=0; m<training->Np; m++) { + task = Malloc(struct Task, 1); + task->epsilon = training->epsilons[i]; + task->weight_idx = training->weight_idxs[j]; + task->kappa = training->kappas[k]; + task->lambda = training->lambdas[l]; + task->p = training->ps[m]; + task->train_data = train_data; + task->test_data = test_data; + task->folds = training->folds; + task->ID = cnt; + queue->tasks[cnt] = task; + cnt++; + } +} + +struct Task *get_next_task(struct Queue *q) +{ + long i = q->i; + if (i < q->N) { + q->i++; + return q->tasks[i]; + } + return NULL; +} + +int tasksort(const void *elem1, const void *elem2) +{ + const struct Task *t1 = (*(struct Task **) elem1); + const struct Task *t2 = (*(struct Task **) elem2); + return (t1->performance > t2->performance); +} + +int doublesort(const void *elem1, const void *elem2) +{ + const double t1 = (*(double *) elem1); + const double t2 = (*(double *) elem2); + return t1 > t2; +} + + +double prctile(double *values, long N, double p) +{ + long i; + double pi, pr, boundary; + double *local = Malloc(double, N); + for (i=0; i<N; i++) + local[i] = values[i]; + + qsort(local, N, sizeof(double), doublesort); + p = p*N + 0.5; + pi = maximum(minimum(floor(p), N-1), 1); + pr = maximum(minimum(p - pi, 1), 0); + boundary = (1 - pr)*local[((long) pi)-1] + pr*local[((long) pi)]; + + free(local); + + return boundary; +} + +void consistency_repeats(struct Queue *q, long repeats, TrainType traintype) +{ + long i, r, N; + double p, pi, pr, boundary, time, *std, *mean, *perf; + struct Queue *nq = Malloc(struct Queue, 1); + struct MajModel *model = Malloc(struct MajModel, 1); + struct Task *task = Malloc(struct Task, 1); + clock_t loop_s, loop_e; + + // calculate the percentile (Matlab style) + qsort(q->tasks, q->N, sizeof(struct Task *), tasksort); + p = 0.95*q->N + 0.5; + pi = maximum(minimum(floor(p), q->N-1), 1); + pr = maximum(minimum(p - pi, 1), 0); + boundary = (1 - pr)*q->tasks[((long) pi)-1]->performance; + boundary += pr*q->tasks[((long) pi)]->performance; + note("boundary determined at: %f\n", boundary); + + N = 0; + for (i=0; i<q->N; i++) + if (q->tasks[i]->performance >= boundary) + N++; + note("Number of items: %li\n", N); + std = Calloc(double, N); + mean = Calloc(double, N); + perf = Calloc(double, N*repeats); + + nq->tasks = Malloc(struct Task *, N); + for (i=q->N-1; i>q->N-N-1; i--) + nq->tasks[q->N-i-1] = q->tasks[i]; + nq->N = N; + nq->i = 0; + + for (i=0; i<N; i++) { + task = get_next_task(nq); + make_model_from_task(task, model); + + model->n = task->train_data->n; + model->m = task->train_data->m; + model->K = task->train_data->K; + + time = 0; + note("(%02li/%02li:%03li)\t", i+1, N, task->ID); + for (r=0; r<repeats; r++) { + if (traintype == CV) { + loop_s = clock(); + p = cross_validation(model, NULL, task->train_data, task->folds); + loop_e = clock(); + time += elapsed_time(loop_s, loop_e); + matrix_set(perf, repeats, i, r, p); + mean[i] += p/((double) repeats); + } else { + note("Only cv is implemented\n"); + exit(1); + } + note("%3.3f\t", p); + } + for (r=0; r<repeats; r++) { + std[i] += pow(matrix_get(perf, repeats, i, r) - mean[i], 2); + } + std[i] /= ((double) repeats) - 1.0; + std[i] = sqrt(std[i]); + note("(m = %3.3f, s = %3.3f, t = %3.3f)\n", mean[i], std[i], time); + } + + note("\nBest overall configuration(s):\n"); + note("ID\tweights\tepsilon\t\tp\t\tkappa\t\tlambda\t\tmean_perf\tstd_perf\n"); + p = 0.0; + bool breakout = false; + while (breakout == false) { + pi = prctile(mean, N, (100.0-p)/100.0); + pr = prctile(std, N, p/100.0); + for (i=0; i<N; i++) + if ((pi - mean[i] < 0.0001) && (std[i] - pr < 0.0001)) { + note("(%li)\tw = %li\te = %f\tp = %f\tk = %f\tl = %f\t" + "mean: %3.3f\tstd: %3.3f\n", + nq->tasks[i]->ID, + nq->tasks[i]->weight_idx, + nq->tasks[i]->epsilon, nq->tasks[i]->p, + nq->tasks[i]->kappa, nq->tasks[i]->lambda, + mean[i], std[i]); + breakout = true; + } + p += 1.0; + } + + free(task); + free(model); + free(perf); + free(std); + free(mean); +} + +double cross_validation(struct MajModel *model, struct MajModel *seed_model, + struct MajData *data, long folds) +{ + FILE *fid; + + bool fs = false; + long f, *predy; + double total_perf = 0; + struct MajModel *fold_model; + struct MajData *train_data, *test_data; + + long *cv_idx = Calloc(long, model->n); + double *performance = Calloc(double, folds); + + if (seed_model == NULL) { + seed_model = Malloc(struct MajModel, 1); + seed_model->n = 0; // we never use anything other than V + seed_model->m = model->m; + seed_model->K = model->K; + msvmmaj_allocate_model(seed_model); + msvmmaj_seed_model_V(NULL, seed_model); + fs = true; + } + + train_data = Malloc(struct MajData, 1); + test_data = Malloc(struct MajData, 1); + + msvmmaj_make_cv_split(model->n, folds, cv_idx); + for (f=0; f<folds; f++) { + msvmmaj_get_tt_split(data, train_data, test_data, cv_idx, f); + + fold_model = Malloc(struct MajModel, 1); + copy_model(model, fold_model); + + fold_model->n = train_data->n; + fold_model->m = train_data->m; + fold_model->K = train_data->K; + + msvmmaj_allocate_model(fold_model); + msvmmaj_initialize_weights(train_data, fold_model); + msvmmaj_seed_model_V(seed_model, fold_model); + + fid = MSVMMAJ_OUTPUT_FILE; + MSVMMAJ_OUTPUT_FILE = NULL; + msvmmaj_optimize(fold_model, train_data); + MSVMMAJ_OUTPUT_FILE = fid; + + predy = Calloc(long, test_data->n); + msvmmaj_predict_labels(test_data, fold_model, predy); + performance[f] = msvmmaj_prediction_perf(test_data, predy); + total_perf += performance[f]/((double) folds); + + msvmmaj_seed_model_V(fold_model, seed_model); + + free(predy); + free(train_data->y); + free(train_data->Z); + free(test_data->y); + free(test_data->Z); + + msvmmaj_free_model(fold_model); + } + + if (fs) + msvmmaj_free_model(seed_model); + free(train_data); + free(test_data); + free(performance); + free(cv_idx); + + return total_perf; + +} + +void start_training_cv(struct Queue *q) +{ + double perf, current_max = 0; + struct Task *task = get_next_task(q); + struct MajModel *seed_model = Malloc(struct MajModel, 1); + struct MajModel *model = Malloc(struct MajModel, 1); + clock_t main_s, main_e, loop_s, loop_e; + + model->n = task->train_data->n; + model->m = task->train_data->m; + model->K = task->train_data->K; + msvmmaj_allocate_model(model); + + seed_model->n = 0; + seed_model->m = task->train_data->m; + seed_model->K = task->train_data->K; + msvmmaj_allocate_model(seed_model); + msvmmaj_seed_model_V(NULL, seed_model); + + main_s = clock(); + while (task) { + note("(%03li/%03li)\tw = %li\te = %f\tp = %f\tk = %f\t l = %f\t", + task->ID+1, q->N, task->weight_idx, task->epsilon, + task->p, task->kappa, task->lambda); + make_model_from_task(task, model); + + loop_s = clock(); + perf = cross_validation(model, seed_model, task->train_data, task->folds); + loop_e = clock(); + current_max = maximum(current_max, perf); + + note("\t%3.3f%% (%3.3fs)\t(best = %3.3f%%)\n", perf, + elapsed_time(loop_s, loop_e), + current_max); + + q->tasks[task->ID]->performance = perf; + task = get_next_task(q); + } + main_e = clock(); + + note("\nTotal elapsed time: %8.8f seconds\n", + elapsed_time(main_s, main_e)); + + free(task); + msvmmaj_free_model(seed_model); +} + +void start_training_tt(struct Queue *q) +{ + FILE *fid; + + long c = 0; + long *predy; + double total_perf, current_max = 0; + + struct Task *task = get_next_task(q); + struct MajModel *seed_model = Malloc(struct MajModel, 1); + + clock_t main_s, main_e; + clock_t loop_s, loop_e; + + seed_model->m = task->train_data->m; + seed_model->K = task->train_data->K; + msvmmaj_allocate_model(seed_model); + msvmmaj_seed_model_V(NULL, seed_model); + + main_s = clock(); + while (task) { + total_perf = 0; + note("(%li/%li)\tw = %li\te = %f\tp = %f\tk = %f\tl = %f\t", + c+1, q->N, task->weight_idx, task->epsilon, + task->p, task->kappa, task->lambda); + loop_s = clock(); + struct MajModel *model = Malloc(struct MajModel, 1); + make_model_from_task(task, model); + + model->n = task->train_data->n; + model->m = task->train_data->m; + model->K = task->train_data->K; + + msvmmaj_allocate_model(model); + msvmmaj_initialize_weights(task->train_data, model); + msvmmaj_seed_model_V(seed_model, model); + + fid = MSVMMAJ_OUTPUT_FILE; + MSVMMAJ_OUTPUT_FILE = NULL; + msvmmaj_optimize(model, task->train_data); + MSVMMAJ_OUTPUT_FILE = fid; + + predy = Calloc(long, task->test_data->n); + msvmmaj_predict_labels(task->test_data, model, predy); + if (task->test_data->y != NULL) + total_perf = msvmmaj_prediction_perf(task->test_data, predy); + msvmmaj_seed_model_V(model, seed_model); + + msvmmaj_free_model(model); + free(predy); + note("."); + loop_e = clock(); + current_max = maximum(current_max, total_perf); + note("\t%3.3f%% (%3.3fs)\t(best = %3.3f%%)\n", total_perf, + elapsed_time(loop_s, loop_e), current_max); + q->tasks[task->ID]->performance = total_perf; + task = get_next_task(q); + } + main_e = clock(); + + note("\nTotal elapsed time: %8.8f seconds\n", + elapsed_time(main_s, main_e)); + free(task); + msvmmaj_free_model(seed_model); +} + +void free_queue(struct Queue *q) +{ + long i; + for (i=0; i<q->N; i++) + free(q->tasks[i]); + free(q->tasks); + free(q); +} + +void make_model_from_task(struct Task *task, struct MajModel *model) +{ + model->weight_idx = task->weight_idx; + model->epsilon = task->epsilon; + model->p = task->p; + model->kappa = task->kappa; + model->lambda = task->lambda; +} + +void copy_model(struct MajModel *from, struct MajModel *to) +{ + to->weight_idx = from->weight_idx; + to->epsilon = from->epsilon; + to->p = from->p; + to->kappa = from->kappa; + to->lambda = from->lambda; +} diff --git a/src/mylapack.c b/src/mylapack.c new file mode 100644 index 0000000..4a9cf81 --- /dev/null +++ b/src/mylapack.c @@ -0,0 +1,49 @@ +/** + * @file mylapack.c + * @author Gertjan van den Burg (burg@ese.eur.nl) + * @date August 9, 2013 + * @brief Utility functions for interacting with LAPACK + * + * @details + * Functions in this file are auxiliary functions which make it easier + * to use LAPACK functions from liblapack. + */ + +#include "mylapack.h" + +/** + * @name dposv + * @brief Solve a system of equations AX = B where A is symmetric positive definite. + * @ingroup libMSVMMaj + * + * See the LAPACK documentation at: + * http://www.netlib.org/lapack/explore-html/dc/de9/group__double_p_osolve.html + */ +int dposv(char UPLO, int N, int NRHS, double *A, int LDA, double *B, + int LDB) +{ + extern void dposv_(char *UPLO, int *Np, int *NRHSp, double *A, + int *LDAp, double *B, int *LDBp, int *INFOp); + int INFO; + dposv_(&UPLO, &N, &NRHS, A, &LDA, B, &LDB, &INFO); + return INFO; +} + +/** + * @name dsysv + * @brief Solve a system of equations AX = B where A is symmetric. + * @ingroup libMSVMMaj + * + * See the LAPACK documentation at: + * http://www.netlib.org/lapack/explore-html/d6/d0e/group__double_s_ysolve.html + */ +int dsysv(char UPLO, int N, int NRHS, double *A, int LDA, int *IPIV, + double *B, int LDB, double *WORK, int LWORK) +{ + extern void dsysv_(char *UPLO, int *Np, int *NRHSp, double *A, + int *LDAp, int *IPIV, double *B, int *LDBp, + double *WORK, int *LWORK, int *INFOp); + int INFO; + dsysv_(&UPLO, &N, &NRHS, A, &LDA, IPIV, B, &LDB, WORK, &LWORK, &INFO); + return INFO; +} diff --git a/src/predMSVMMaj.c b/src/predMSVMMaj.c index 5d26dc3..966c7c0 100644 --- a/src/predMSVMMaj.c +++ b/src/predMSVMMaj.c @@ -1,10 +1,14 @@ -#include "libMSVMMaj.h" +#include "msvmmaj_pred.h" +#include "MSVMMaj.h" +#include "util.h" #define MINARGS 3 +extern FILE *MSVMMAJ_OUTPUT_FILE; + void print_null(const char *s) {} void exit_with_help(); -void parse_command_line(int argc, char **argv, struct Model *model, +void parse_command_line(int argc, char **argv, struct MajModel *model, char *input_filename, char *output_filename, char *model_filename); @@ -27,17 +31,18 @@ int main(int argc, char **argv) char model_filename[MAX_LINE_LENGTH]; char output_filename[MAX_LINE_LENGTH];; - struct Model *model = Malloc(struct Model, 1); - struct Data *data = Malloc(struct Data, 1); + struct MajModel *model = Malloc(struct MajModel, 1); + struct MajData *data = Malloc(struct MajData, 1); - if (argc < MINARGS || check_argv(argc, argv, "-help") || check_argv_eq(argc, argv, "-h") ) + if (argc < MINARGS || msvmmaj_check_argv(argc, argv, "-help") + || msvmmaj_check_argv_eq(argc, argv, "-h") ) exit_with_help(); parse_command_line(argc, argv, model, input_filename, output_filename, model_filename); // TODO: make sure that read_data allows for files without labels - read_data(data, input_filename); - read_model(model, model_filename); + msvmmaj_read_data(data, input_filename); + msvmmaj_read_model(model, model_filename); // check if the number of attributes in data equals that in model if (data->m != model->m) { @@ -48,29 +53,30 @@ int main(int argc, char **argv) } predy = Calloc(long, data->n); - predict_labels(data, model, predy); + msvmmaj_predict_labels(data, model, predy); if (data->y != NULL) { - performance = prediction_perf(data, predy); - info("Predictive performance: %3.2f%%\n", performance); + performance = msvmmaj_prediction_perf(data, predy); + note("Predictive performance: %3.2f%%\n", performance); } - if (check_argv_eq(argc, argv, "-o")) { - write_predictions(data, predy, output_filename); - info("Predictions written to: %s\n", output_filename); + if (msvmmaj_check_argv_eq(argc, argv, "-o")) { + msvmmaj_write_predictions(data, predy, output_filename); + note("Predictions written to: %s\n", output_filename); } - free_model(model); - free_data(data); + msvmmaj_free_model(model); + msvmmaj_free_data(data); free(predy); return 0; } -void parse_command_line(int argc, char **argv, struct Model *model, +void parse_command_line(int argc, char **argv, struct MajModel *model, char *input_filename, char *output_filename, char *model_filename) { int i; - void (*print_func)(const char*) = NULL; + + MSVMMAJ_OUTPUT_FILE = stdout; for (i=1; i<argc; i++) { if (argv[i][0] != '-') break; @@ -81,16 +87,15 @@ void parse_command_line(int argc, char **argv, struct Model *model, strcpy(output_filename, argv[i]); break; case 'q': - print_func = &print_null; + MSVMMAJ_OUTPUT_FILE = NULL; i--; + break; default: fprintf(stderr, "Unknown option: -%c\n", argv[i-1][1]); exit_with_help(); } } - set_print_string_function(print_func); - if (i >= argc) exit_with_help(); diff --git a/src/strutil.c b/src/strutil.c new file mode 100644 index 0000000..ae96239 --- /dev/null +++ b/src/strutil.c @@ -0,0 +1,91 @@ +#include "strutil.h" + +bool str_startswith(const char *str, const char *pre) +{ + size_t lenpre = strlen(pre), + lenstr = strlen(str); + return lenstr < lenpre ? false : strncmp(pre, str, lenpre) == 0; +} + +bool str_endswith(const char *str, const char *suf) +{ + size_t lensuf = strlen(suf), + lenstr = strlen(str); + return lenstr < lensuf ? false : strncmp(str + lenstr - lensuf, suf, lensuf) == 0; +} + +void next_line(FILE *fid, char *filename) +{ + char buffer[MAX_LINE_LENGTH]; + get_line(fid, filename, buffer); +} + +void get_line(FILE *fid, char *filename, char *buffer) +{ + if (fgets(buffer, MAX_LINE_LENGTH, fid) == NULL) { + fprintf(stderr, "Error reading file %s\n", filename); + exit(1); + } +} + +double get_fmt_double(FILE *fid, char *filename, const char *fmt) +{ + char buffer[MAX_LINE_LENGTH]; + double value; + + get_line(fid, filename, buffer); + sscanf(buffer, fmt, &value); + return value; +} + +long get_fmt_long(FILE *fid, char *filename, const char *fmt) +{ + char buffer[MAX_LINE_LENGTH]; + long value; + + get_line(fid, filename, buffer); + sscanf(buffer, fmt, &value); + return value; +} + +long all_doubles_str(char *buffer, long offset, double *all_doubles) +{ + double value; + long i = 0; + char *start, *end; + + start = buffer + offset; + while (true) { + value = strtod(start, &end); + if (start != end) { + all_doubles[i] = value; + i++; + } else + break; + start = end; + end = NULL; + } + + return i; +} + +long all_longs_str(char *buffer, long offset, long *all_longs) +{ + long value; + long i = 0; + char *start, *end; + + start = buffer + offset; + while (true) { + value = strtol(start, &end, 10); + if (start != end) { + all_longs[i] = value; + i++; + } else + break; + start = end; + end = NULL; + } + + return i; +} diff --git a/src/timer.c b/src/timer.c new file mode 100644 index 0000000..2187fb2 --- /dev/null +++ b/src/timer.c @@ -0,0 +1,8 @@ +#include <time.h> + +#include "timer.h" + +double elapsed_time(clock_t s_time, clock_t e_time) +{ + return ((double) (e_time - s_time))/((double) CLOCKS_PER_SEC); +} diff --git a/src/trainMSVMMaj.c b/src/trainMSVMMaj.c index 5a403be..ebcf36c 100644 --- a/src/trainMSVMMaj.c +++ b/src/trainMSVMMaj.c @@ -1,10 +1,18 @@ +#include <time.h> +#include <math.h> + #include "libMSVMMaj.h" +#include "msvmmaj_train.h" +#include "util.h" +#include "MSVMMaj.h" #define MINARGS 2 +extern FILE *MSVMMAJ_OUTPUT_FILE; + void print_null(const char *s) {} void exit_with_help(); -void parse_command_line(int argc, char **argv, struct Model *model, +void parse_command_line(int argc, char **argv, struct MajModel *model, char *input_filename, char *output_filename, char *model_filename); void exit_with_help() @@ -12,7 +20,6 @@ void exit_with_help() printf("This is MSVMMaj, version %1.1f\n\n", VERSION); printf("Usage: trainMSVMMaj [options] training_data_file\n"); printf("Options:\n"); - printf("-c folds : perform cross validation with given number of folds\n"); printf("-e epsilon : set the value of the stopping criterion\n"); printf("-h | -help : print this help.\n"); printf("-k kappa : set the value of kappa used in the Huber hinge\n"); @@ -35,15 +42,16 @@ int main(int argc, char **argv) char model_filename[MAX_LINE_LENGTH]; char output_filename[MAX_LINE_LENGTH]; - struct Model *model = Malloc(struct Model, 1); - struct Data *data = Malloc(struct Data, 1); + struct MajModel *model = Malloc(struct MajModel, 1); + struct MajData *data = Malloc(struct MajData, 1); - if (argc < MINARGS || check_argv(argc, argv, "-help") || check_argv_eq(argc, argv, "-h") ) + if (argc < MINARGS || msvmmaj_check_argv(argc, argv, "-help") + || msvmmaj_check_argv_eq(argc, argv, "-h") ) exit_with_help(); parse_command_line(argc, argv, model, input_filename, output_filename, model_filename); // read data file - read_data(data, input_filename); + msvmmaj_read_data(data, input_filename); // copy dataset parameters to model model->n = data->n; @@ -52,39 +60,40 @@ int main(int argc, char **argv) model->data_file = input_filename; // allocate model and initialize weights - allocate_model(model); - initialize_weights(data, model); - - if (check_argv_eq(argc, argv, "-m")) { - struct Model *seed_model = Malloc(struct Model, 1); - read_model(seed_model, model_filename); - seed_model_V(seed_model, model); - free_model(seed_model); + msvmmaj_allocate_model(model); + msvmmaj_initialize_weights(data, model); + + srand(time(NULL)); + + if (msvmmaj_check_argv_eq(argc, argv, "-m")) { + struct MajModel *seed_model = Malloc(struct MajModel, 1); + msvmmaj_read_model(seed_model, model_filename); + msvmmaj_seed_model_V(seed_model, model); + msvmmaj_free_model(seed_model); } else { - seed_model_V(NULL, model); + msvmmaj_seed_model_V(NULL, model); } // start training - main_loop(model, data); + msvmmaj_optimize(model, data); // write_model to file - if (check_argv_eq(argc, argv, "-o")) { - write_model(model, output_filename); - info("Output written to %s\n", output_filename); + if (msvmmaj_check_argv_eq(argc, argv, "-o")) { + msvmmaj_write_model(model, output_filename); + note("Output written to %s\n", output_filename); } // free model and data - free_model(model); - free_data(data); + msvmmaj_free_model(model); + msvmmaj_free_data(data); return 0; } -void parse_command_line(int argc, char **argv, struct Model *model, +void parse_command_line(int argc, char **argv, struct MajModel *model, char *input_filename, char *output_filename, char *model_filename) { int i; - void (*print_func)(const char*) = NULL; // default values model->p = 1.0; @@ -92,6 +101,8 @@ void parse_command_line(int argc, char **argv, struct Model *model, model->epsilon = 1e-6; model->kappa = 0.0; model->weight_idx = 1; + + MSVMMAJ_OUTPUT_FILE = stdout; // parse options for (i=1; i<argc; i++) { @@ -122,7 +133,7 @@ void parse_command_line(int argc, char **argv, struct Model *model, model->weight_idx = atoi(argv[i]); break; case 'q': - print_func = &print_null; + MSVMMAJ_OUTPUT_FILE = NULL; i--; break; default: @@ -131,9 +142,6 @@ void parse_command_line(int argc, char **argv, struct Model *model, } } - // set print function - set_print_string_function(print_func); - // read input filename if (i >= argc) exit_with_help(); diff --git a/src/trainMSVMMajdataset.c b/src/trainMSVMMajdataset.c new file mode 100644 index 0000000..7c3385c --- /dev/null +++ b/src/trainMSVMMajdataset.c @@ -0,0 +1,186 @@ +#include <time.h> + +#include "crossval.h" +#include "MSVMMaj.h" +#include "msvmmaj_pred.h" +#include "msvmmaj_train.h" +#include "msvmmaj_train_dataset.h" +#include "strutil.h" +#include "util.h" + +#define MINARGS 2 + +extern FILE *MSVMMAJ_OUTPUT_FILE; + +void print_null(const char *s) {} +void exit_with_help(); +void parse_command_line(int argc, char **argv, char *input_filename); +void read_training_from_file(char *input_filename, struct Training *training); + +void exit_with_help() +{ + printf("This is MSVMMaj, version %1.1f\n\n", VERSION); + printf("Usage: trainMSVMMajdataset [options] training_file\n"); + printf("Options:\n"); + printf("-h | -help : print this help.\n"); + printf("-q : quiet mode (no output)\n"); + + exit(0); +} + +int main(int argc, char **argv) +{ + char input_filename[MAX_LINE_LENGTH]; + + struct Training *training = Malloc(struct Training, 1); + struct MajData *train_data = Malloc(struct MajData, 1); + struct MajData *test_data = Malloc(struct MajData, 1); + + if (argc < MINARGS || msvmmaj_check_argv(argc, argv, "-help") + || msvmmaj_check_argv_eq(argc, argv, "-h") ) + exit_with_help(); + parse_command_line(argc, argv, input_filename); + + training->repeats = 0; + note("Reading training file\n"); + read_training_from_file(input_filename, training); + + note("Reading data from %s\n", training->train_data_file); + msvmmaj_read_data(train_data, training->train_data_file); + if (training->traintype == TT) { + note("Reading data from %s\n", training->test_data_file); + msvmmaj_read_data(test_data, training->test_data_file); + } + + note("Creating queue\n"); + struct Queue *q = Malloc(struct Queue, 1); + make_queue(training, q, train_data, test_data); + + srand(time(NULL)); + + note("Starting training\n"); + if (training->traintype == TT) + start_training_tt(q); + else + start_training_cv(q); + note("Training finished\n"); + + if (training->repeats > 0) { + consistency_repeats(q, training->repeats, training->traintype); + } + + free_queue(q); + free(training); + msvmmaj_free_data(train_data); + msvmmaj_free_data(test_data); + + note("Done.\n"); + return 0; +} + +void parse_command_line(int argc, char **argv, char *input_filename) +{ + int i; + + MSVMMAJ_OUTPUT_FILE = stdout; + + for (i=1; i<argc; i++) { + if (argv[i][0] != '-') break; + if (++i>=argc) + exit_with_help(); + switch (argv[i-1][1]) { + case 'q': + MSVMMAJ_OUTPUT_FILE = NULL; + i--; + break; + default: + fprintf(stderr, "Unknown option: -%c\n", argv[i-1][1]); + exit_with_help(); + } + } + + if (i >= argc) + exit_with_help(); + + strcpy(input_filename, argv[i]); +} + +void read_training_from_file(char *input_filename, struct Training *training) +{ + long i, nr = 0; + FILE *fid; + char buffer[MAX_LINE_LENGTH]; + char train_filename[MAX_LINE_LENGTH]; + char test_filename[MAX_LINE_LENGTH]; + double *params = Calloc(double, MAX_LINE_LENGTH); + long *lparams = Calloc(long, MAX_LINE_LENGTH); + + fid = fopen(input_filename, "r"); + if (fid == NULL) { + fprintf(stderr, "Error opening training file %s\n", input_filename); + exit(1); + } + training->traintype = CV; + while ( fgets(buffer, MAX_LINE_LENGTH, fid) != NULL ) { + Memset(params, double, MAX_LINE_LENGTH); + Memset(lparams, long, MAX_LINE_LENGTH); + if (str_startswith(buffer, "train:")) { + sscanf(buffer, "train: %s\n", train_filename); + training->train_data_file = Calloc(char, MAX_LINE_LENGTH); + strcpy(training->train_data_file, train_filename); + } else if (str_startswith(buffer, "test:")) { + sscanf(buffer, "test: %s\n", test_filename); + training->test_data_file = Calloc(char, MAX_LINE_LENGTH); + strcpy(training->test_data_file, test_filename); + training->traintype = TT; + } else if (str_startswith(buffer, "p:")) { + nr = all_doubles_str(buffer, 2, params); + training->ps = Calloc(double, nr); + for (i=0; i<nr; i++) + training->ps[i] = params[i]; + training->Np = nr; + } else if (str_startswith(buffer, "lambda:")) { + nr = all_doubles_str(buffer, 7, params); + training->lambdas = Calloc(double, nr); + for (i=0; i<nr; i++) + training->lambdas[i] = params[i]; + training->Nl = nr; + } else if (str_startswith(buffer, "kappa:")) { + nr = all_doubles_str(buffer, 6, params); + training->kappas = Calloc(double, nr); + for (i=0; i<nr; i++) + training->kappas[i] = params[i]; + training->Nk = nr; + } else if (str_startswith(buffer, "epsilon:")) { + nr = all_doubles_str(buffer, 8, params); + training->epsilons = Calloc(double, nr); + for (i=0; i<nr; i++) + training->epsilons[i] = params[i]; + training->Ne = nr; + } else if (str_startswith(buffer, "weight:")) { + nr = all_longs_str(buffer, 7, lparams); + training->weight_idxs = Calloc(int, nr); + for (i=0; i<nr; i++) + training->weight_idxs[i] = lparams[i]; + training->Nw = nr; + } else if (str_startswith(buffer, "folds:")) { + nr = all_longs_str(buffer, 6, lparams); + training->folds = lparams[0]; + if (nr > 1) + fprintf(stderr, "Field \"folds\" only takes one value. " + "Additional fields are ignored.\n"); + } else if (str_startswith(buffer, "repeats:")) { + nr = all_longs_str(buffer, 8, lparams); + training->repeats = lparams[0]; + if (nr > 1) + fprintf(stderr, "Field \"repeats\" only takes one value. " + "Additional fields are ignored.\n"); + } else { + fprintf(stderr, "Cannot find any parameters on line: %s\n", buffer); + } + } + + free(params); + free(lparams); + fclose(fid); +} @@ -1,10 +1,20 @@ -#include "util.h" +#include <math.h> +#include <stdarg.h> +#include <time.h> + #include "matrix.h" +#include "MSVMMaj.h" +#include "strutil.h" + +#include "util.h" + +FILE *MSVMMAJ_OUTPUT_FILE; + /* Read the data from the data_file. The data matrix X is augmented with a column of ones, to get the matrix Z. */ -void read_data(struct Data *dataset, char *data_file) +void msvmmaj_read_data(struct MajData *dataset, char *data_file) { FILE *fid; long i, j; @@ -17,7 +27,7 @@ void read_data(struct Data *dataset, char *data_file) char buf[MAX_LINE_LENGTH]; if ((fid = fopen(data_file, "r")) == NULL) { - printf("\nERROR: datafile %s could not be opened.\n", + fprintf(stderr, "\nERROR: datafile %s could not be opened.\n", data_file); exit(0); } @@ -37,7 +47,7 @@ void read_data(struct Data *dataset, char *data_file) // Check if there is a label at the end of the line if (fgets(buf, MAX_LINE_LENGTH, fid) == NULL) { - printf("ERROR: No label found on first line.\n"); + fprintf(stderr, "ERROR: No label found on first line.\n"); exit(1); } if (sscanf(buf, "%lf", &value) > 0) { @@ -67,14 +77,15 @@ void read_data(struct Data *dataset, char *data_file) if (min_y == 0) { for (i=0; i<n; i++) dataset->y[i]++; + K++; } else if (min_y < 0 ) { - printf("ERROR: wrong class labels in %s, minimum value is: %ld\n", + fprintf(stderr, "ERROR: wrong class labels in %s, minimum value is: %ld\n", data_file, min_y); exit(0); } if (nr < n * m) { - printf("ERROR: not enough data found in %s\n", data_file); + fprintf(stderr, "ERROR: not enough data found in %s\n", data_file); exit(0); } @@ -85,49 +96,10 @@ void read_data(struct Data *dataset, char *data_file) dataset->n = n; dataset->m = m; dataset->K = K; - - info("Succesfully read data file: %s\n", data_file); -} - -void next_line(FILE *fid, char *filename) -{ - char buffer[MAX_LINE_LENGTH]; - if (fgets(buffer, MAX_LINE_LENGTH, fid) == NULL) { - fprintf(stderr, "Error reading file %s\n", filename); - exit(1); - } + note("Succesfully read data file: %s\n", data_file); } -double get_fmt_double(FILE *fid, char *filename, const char *fmt) -{ - char buffer[MAX_LINE_LENGTH]; - double value; - - if (fgets(buffer, MAX_LINE_LENGTH, fid) == NULL) { - fprintf(stderr, "Error reading line from file %s\n", filename); - exit(1); - } - sscanf(buffer, fmt, &value); - - return value; -} - -long get_fmt_long(FILE *fid, char *filename, const char *fmt) -{ - char buffer[MAX_LINE_LENGTH]; - long value; - - if (fgets(buffer, MAX_LINE_LENGTH, fid) == NULL) { - fprintf(stderr, "Error reading line from file %s\n", filename); - exit(1); - } - sscanf(buffer, fmt, &value); - - return value; -} - - -void read_model(struct Model *model, char *model_filename) +void msvmmaj_read_model(struct MajModel *model, char *model_filename) { long i, j, nr = 0; FILE *fid; @@ -188,10 +160,11 @@ void read_model(struct Model *model, char *model_filename) } -void write_model(struct Model *model, char *output_filename) +void msvmmaj_write_model(struct MajModel *model, char *output_filename) { FILE *fid; - int i, j, diff, hours, minutes; + long i, j; + int diff, hours, minutes; char timestr[1000]; time_t current_time, lt, gt; struct tm *lclt; @@ -255,11 +228,27 @@ void write_model(struct Model *model, char *output_filename) } -void write_predictions(struct Data *data, long *predy, char *output_filename) +void msvmmaj_write_predictions(struct MajData *data, long *predy, char *output_filename) { + long i, j; + FILE *fid; + + fid = fopen(output_filename, "w"); + if (fid == NULL) { + fprintf(stderr, "Error opening output file %s", output_filename); + exit(1); + } + + for (i=0; i<data->n; i++) { + for (j=0; j<data->m; j++) + fprintf(fid, "%f ", matrix_get(data->Z, data->m+1, i, j+1)); + fprintf(fid, "%li\n", predy[i]); + } + + fclose(fid); } -int check_argv(int argc, char **argv, char *str) +int msvmmaj_check_argv(int argc, char **argv, char *str) { int i; int arg_str = 0; @@ -272,7 +261,7 @@ int check_argv(int argc, char **argv, char *str) return arg_str; } -int check_argv_eq(int argc, char **argv, char *str) +int msvmmaj_check_argv_eq(int argc, char **argv, char *str) { int i; int arg_str = 0; @@ -285,38 +274,26 @@ int check_argv_eq(int argc, char **argv, char *str) return arg_str; } -static void print_string_stdout(const char *s) -{ - fputs(s, stdout); - fflush(stdout); -} - -static void (*print_string) (const char *) = &print_string_stdout; -void set_print_string_function(void (*print_func)(const char *)) +static void msvmmaj_print_string(const char *s) { - if (print_func == NULL) - print_string = &print_string_stdout; - else - print_string = print_func; + if (MSVMMAJ_OUTPUT_FILE != NULL) { + fputs(s, MSVMMAJ_OUTPUT_FILE); + fflush(MSVMMAJ_OUTPUT_FILE); + } } -void info(const char *fmt,...) +void note(const char *fmt,...) { char buf[BUFSIZ]; va_list ap; va_start(ap,fmt); vsprintf(buf,fmt,ap); va_end(ap); - (*print_string)(buf); -} - -double rnd() -{ - return (double) rand()/0x7FFFFFFF; + (*msvmmaj_print_string)(buf); } -void allocate_model(struct Model *model) +void msvmmaj_allocate_model(struct MajModel *model) { long n = model->n; long m = model->m; @@ -384,7 +361,7 @@ void allocate_model(struct Model *model) } -void free_model(struct Model *model) +void msvmmaj_free_model(struct MajModel *model) { free(model->W); free(model->t); @@ -400,23 +377,10 @@ void free_model(struct Model *model) free(model); } -void free_data(struct Data *data) +void msvmmaj_free_data(struct MajData *data) { free(data->Z); free(data->y); free(data); } -/* -void print_matrix(double *M, long rows, long cols) -{ - long i, j; - for (i=0; i<rows; i++) { - for (j=0; j<cols; j++) { - info("%8.8f ", matrix_get(M, cols, i, j)); - } - info("\n"); - } - info("\n"); -} -*/ |
