diff options
| author | Gertjan van den Burg <gertjanvandenburg@gmail.com> | 2016-05-09 20:55:24 +0200 |
|---|---|---|
| committer | Gertjan van den Burg <gertjanvandenburg@gmail.com> | 2016-05-09 20:55:24 +0200 |
| commit | 7c8a5e4b2a7cff7573b1a308daf19d2dbd558a9c (patch) | |
| tree | 2c4774a63ec91d60931d822a7607a5011711c946 | |
| parent | add doc and test to phony (diff) | |
| download | gensvm-7c8a5e4b2a7cff7573b1a308daf19d2dbd558a9c.tar.gz gensvm-7c8a5e4b2a7cff7573b1a308daf19d2dbd558a9c.zip | |
strip whitespaces
| -rw-r--r-- | include/gensvm.h | 16 | ||||
| -rw-r--r-- | include/gensvm_io.h | 2 | ||||
| -rw-r--r-- | include/gensvm_kernel.h | 2 | ||||
| -rw-r--r-- | include/gensvm_lapack.h | 4 | ||||
| -rw-r--r-- | include/gensvm_train_dataset.h | 6 | ||||
| -rw-r--r-- | include/gensvm_util.h | 2 | ||||
| -rw-r--r-- | include/globals.h | 6 | ||||
| -rw-r--r-- | include/libGenSVM.h | 6 | ||||
| -rw-r--r-- | include/types.h | 2 | ||||
| -rw-r--r-- | src/GenSVMgrid.c | 14 | ||||
| -rw-r--r-- | src/GenSVMpred.c | 10 | ||||
| -rw-r--r-- | src/GenSVMtrain.c | 30 | ||||
| -rw-r--r-- | src/GenSVMtraintest.c | 8 | ||||
| -rw-r--r-- | src/gensvm_crossval.c | 24 | ||||
| -rw-r--r-- | src/gensvm_init.c | 10 | ||||
| -rw-r--r-- | src/gensvm_io.c | 42 | ||||
| -rw-r--r-- | src/gensvm_kernel.c | 54 | ||||
| -rw-r--r-- | src/gensvm_lapack.c | 12 | ||||
| -rw-r--r-- | src/gensvm_matrix.c | 2 | ||||
| -rw-r--r-- | src/gensvm_pred.c | 6 | ||||
| -rw-r--r-- | src/gensvm_strutil.c | 8 | ||||
| -rw-r--r-- | src/gensvm_sv.c | 10 | ||||
| -rw-r--r-- | src/gensvm_timer.c | 2 | ||||
| -rw-r--r-- | src/gensvm_train.c | 78 | ||||
| -rw-r--r-- | src/gensvm_train_dataset.c | 86 | ||||
| -rw-r--r-- | src/gensvm_util.c | 16 | ||||
| -rw-r--r-- | src/libGenSVM.c | 60 |
27 files changed, 259 insertions, 259 deletions
diff --git a/include/gensvm.h b/include/gensvm.h index 5101b41..ef85157 100644 --- a/include/gensvm.h +++ b/include/gensvm.h @@ -31,7 +31,7 @@ struct GenModel { double epsilon; ///< stopping criterion for the IM algorithm. double p; - ///< parameter for the L-p norm in the loss function + ///< parameter for the L-p norm in the loss function double kappa; ///< parameter for the Huber hinge function double lambda; @@ -51,16 +51,16 @@ struct GenModel { ///< 3D simplex difference matrix double *Q; ///< error matrix - double *H; + double *H; ///< Huber weighted error matrix double *R; - ///< 0-1 auixiliary matrix, this matrix is n x K, with for row i a 0 on + ///< 0-1 auixiliary matrix, this matrix is n x K, with for row i a 0 on ///< column y[i]-1, and 1 everywhere else. - double *rho; + double *rho; ///< vector of instance weights double training_error; ///< loss function value after training has finished - char *data_file; + char *data_file; ///< filename of the data KernelType kerneltype; ///< type of kernel used in the model @@ -94,13 +94,13 @@ struct GenData { long *y; ///< array of class labels, 1..K double *Z; - ///< augmented data matrix (either equal to RAW or to the eigenvectors + ///< augmented data matrix (either equal to RAW or to the eigenvectors ///< of the kernel matrix) double *RAW; ///< augmented raw data matrix double *Sigma; - KernelType kerneltype; - double *kernelparam; + KernelType kerneltype; + double *kernelparam; }; #endif diff --git a/include/gensvm_io.h b/include/gensvm_io.h index 35b6a5a..73c24bd 100644 --- a/include/gensvm_io.h +++ b/include/gensvm_io.h @@ -24,7 +24,7 @@ void gensvm_read_data(struct GenData *dataset, char *data_file); void gensvm_read_model(struct GenModel *model, char *model_filename); void gensvm_write_model(struct GenModel *model, char *output_filename); -void gensvm_write_predictions(struct GenData *data, long *predy, +void gensvm_write_predictions(struct GenData *data, long *predy, char *output_filename); #endif diff --git a/include/gensvm_kernel.h b/include/gensvm_kernel.h index d5c5e8d..d01eb88 100644 --- a/include/gensvm_kernel.h +++ b/include/gensvm_kernel.h @@ -6,7 +6,7 @@ * * @details * Contains function declarations for computing the kernel matrix - * in nonlinear MSVMGen. Additional kernel functions should be + * in nonlinear MSVMGen. Additional kernel functions should be * included here and in gensvm_kernel.c * */ diff --git a/include/gensvm_lapack.h b/include/gensvm_lapack.h index 7ac4fc9..a664cba 100644 --- a/include/gensvm_lapack.h +++ b/include/gensvm_lapack.h @@ -3,9 +3,9 @@ * @author Gertjan van den Burg * @date August, 2013 * @brief Header file for gensvm_lapack.c - * + * * @details - * Function declarations for external LAPACK functions + * Function declarations for external LAPACK functions * */ diff --git a/include/gensvm_train_dataset.h b/include/gensvm_train_dataset.h index 16f8e07..c743bd7 100644 --- a/include/gensvm_train_dataset.h +++ b/include/gensvm_train_dataset.h @@ -74,8 +74,8 @@ struct Queue { * * @param traintype type of training to use * @param kerneltype type of kernel to use throughout training - * @param repeats number of repeats to be done after the grid - * search to find the parameter set with the + * @param repeats number of repeats to be done after the grid + * search to find the parameter set with the * most consistent high performance * @param folds number of folds in cross validation * @param Np size of the array of p values @@ -87,7 +87,7 @@ struct Queue { * @param Nc size of the array of coef values * @param Nd size of the array of degree values * @param *weight_idxs array of weight_idxs - * @param *ps array of p values + * @param *ps array of p values * @param *lambdas array of lambda values * @param *kappas array of kappa values * @param *epsilons array of epsilon values diff --git a/include/gensvm_util.h b/include/gensvm_util.h index fe8d2a3..18aa553 100644 --- a/include/gensvm_util.h +++ b/include/gensvm_util.h @@ -3,7 +3,7 @@ * @author Gertjan van den Burg * @date August, 2013 * @brief Header file for util.c - * + * * @details * Function declarations for utility functions of the program. * diff --git a/include/globals.h b/include/globals.h index 46cc3d2..dfd65ad 100644 --- a/include/globals.h +++ b/include/globals.h @@ -8,10 +8,10 @@ * This header file contains defines and includes which are used in many * parts of the program. Most notable are the Calloc, Malloc and Memset * defines, which are commonly used to allocate memory. These functions - * are shorthands for their lowercase counterparts. + * are shorthands for their lowercase counterparts. * - * Furthermore, a maximum and minimum function are defined here. These - * functions have their own include guards, to ensure potential linked + * Furthermore, a maximum and minimum function are defined here. These + * functions have their own include guards, to ensure potential linked * libraries don't conflict with these definitions. * */ diff --git a/include/libGenSVM.h b/include/libGenSVM.h index cfa2815..dbf0903 100644 --- a/include/libGenSVM.h +++ b/include/libGenSVM.h @@ -11,16 +11,16 @@ */ /** - * @todo + * @todo * rename this file and libGenSVM.c to correspond with the lowercase convention. * Also change the name of the include guard. - */ + */ #ifndef LIBGENSVM_H #define LIBGENSVM_H #include "globals.h" -// forward declarations +// forward declarations struct GenData; struct GenModel; diff --git a/include/types.h b/include/types.h index 1cbcba0..329401a 100644 --- a/include/types.h +++ b/include/types.h @@ -4,7 +4,7 @@ * @date August, 2013 * @brief Definitions of common types * - * @details + * @details * Here common types used throughout the program are defined. * */ diff --git a/src/GenSVMgrid.c b/src/GenSVMgrid.c index 0b6d33e..3cf1346 100644 --- a/src/GenSVMgrid.c +++ b/src/GenSVMgrid.c @@ -8,7 +8,7 @@ * This is a command line interface to the parameter grid search functionality * of the algorithm. The grid search is specified in a separate file, thereby * reducing the number of command line arguments. See - * read_training_from_file() for documentation on the training file. + * read_training_from_file() for documentation on the training file. * * The program runs a grid search as specified in the training file. If * desired the grid search can incorporate consistency checks to find the @@ -82,7 +82,7 @@ int main(int argc, char **argv) struct GenData *test_data = gensvm_init_data(); struct Queue *q = gensvm_init_queue(); - if (argc < MINARGS || gensvm_check_argv(argc, argv, "-help") + if (argc < MINARGS || gensvm_check_argv(argc, argv, "-help") || gensvm_check_argv_eq(argc, argv, "-h") ) exit_with_help(); parse_command_line(argc, argv, input_filename); @@ -150,7 +150,7 @@ void parse_command_line(int argc, char **argv, char *input_filename) i--; break; default: - fprintf(stderr, "Unknown option: -%c\n", + fprintf(stderr, "Unknown option: -%c\n", argv[i-1][1]); exit_with_help(); } @@ -184,10 +184,10 @@ KernelType parse_kernel_str(char *kernel_line) * * @details * Read the Training struct from a file. The training file follows a specific - * format specified in @ref spec_training_file. + * format specified in @ref spec_training_file. * * Commonly used string functions in this function are all_doubles_str() and - * all_longs_str(). + * all_longs_str(). * * @param[in] input_filename filename of the training file * @param[in] training Training structure to place the parsed @@ -206,7 +206,7 @@ void read_training_from_file(char *input_filename, struct Training *training) fid = fopen(input_filename, "r"); if (fid == NULL) { - fprintf(stderr, "Error opening training file %s\n", + fprintf(stderr, "Error opening training file %s\n", input_filename); exit(1); } @@ -216,7 +216,7 @@ void read_training_from_file(char *input_filename, struct Training *training) Memset(lparams, long, MAX_LINE_LENGTH); if (str_startswith(buffer, "train:")) { sscanf(buffer, "train: %s\n", train_filename); - training->train_data_file = Calloc(char, + training->train_data_file = Calloc(char, MAX_LINE_LENGTH); strcpy(training->train_data_file, train_filename); } else if (str_startswith(buffer, "test:")) { diff --git a/src/GenSVMpred.c b/src/GenSVMpred.c index ef2c97c..57680b1 100644 --- a/src/GenSVMpred.c +++ b/src/GenSVMpred.c @@ -36,8 +36,8 @@ extern FILE *GENSVM_OUTPUT_FILE; // function declarations void exit_with_help(); -void parse_command_line(int argc, char **argv, - char *input_filename, char *output_filename, +void parse_command_line(int argc, char **argv, + char *input_filename, char *output_filename, char *model_filename); /** @@ -80,7 +80,7 @@ int main(int argc, char **argv) char model_filename[MAX_LINE_LENGTH]; char output_filename[MAX_LINE_LENGTH];; - if (argc < MINARGS || gensvm_check_argv(argc, argv, "-help") + if (argc < MINARGS || gensvm_check_argv(argc, argv, "-help") || gensvm_check_argv_eq(argc, argv, "-h") ) exit_with_help(); parse_command_line(argc, argv, input_filename, output_filename, @@ -137,7 +137,7 @@ int main(int argc, char **argv) * * @param[in] argc number of command line arguments * @param[in] argv array of command line arguments - * @param[in] input_filename pre-allocated array for the input + * @param[in] input_filename pre-allocated array for the input * filename * @param[in] output_filename pre-allocated array for the output * filename @@ -165,7 +165,7 @@ void parse_command_line(int argc, char **argv, char *input_filename, i--; break; default: - fprintf(stderr, "Unknown option: -%c\n", + fprintf(stderr, "Unknown option: -%c\n", argv[i-1][1]); exit_with_help(); } diff --git a/src/GenSVMtrain.c b/src/GenSVMtrain.c index 63e2512..b9dc250 100644 --- a/src/GenSVMtrain.c +++ b/src/GenSVMtrain.c @@ -6,8 +6,8 @@ * * @details * This is a command line program for training a single model on a given - * dataset. To run a grid search over a number of parameter configurations, - * see trainGenSVMdataset.c. + * dataset. To run a grid search over a number of parameter configurations, + * see trainGenSVMdataset.c. * */ @@ -28,7 +28,7 @@ extern FILE *GENSVM_OUTPUT_FILE; // function declarations void exit_with_help(); -void parse_command_line(int argc, char **argv, struct GenModel *model, +void parse_command_line(int argc, char **argv, struct GenModel *model, char *input_filename, char *output_filename, char *model_filename); /** @@ -81,24 +81,24 @@ int main(int argc, char **argv) struct GenModel *model = gensvm_init_model(); struct GenData *data = gensvm_init_data(); - if (argc < MINARGS || gensvm_check_argv(argc, argv, "-help") - || gensvm_check_argv_eq(argc, argv, "-h") ) + if (argc < MINARGS || gensvm_check_argv(argc, argv, "-help") + || gensvm_check_argv_eq(argc, argv, "-h") ) exit_with_help(); - parse_command_line(argc, argv, model, input_filename, + parse_command_line(argc, argv, model, input_filename, output_filename, model_filename); // read data file gensvm_read_data(data, input_filename); - // copy dataset parameters to model + // copy dataset parameters to model model->n = data->n; model->m = data->m; model->K = data->K; model->data_file = input_filename; - + // allocate model gensvm_allocate_model(model); - + // initialize kernel (if necessary) //gensvm_make_kernel(model, data); @@ -131,7 +131,7 @@ int main(int argc, char **argv) // free model and data gensvm_free_model(model); gensvm_free_data(data); - + return 0; } @@ -155,12 +155,12 @@ int main(int argc, char **argv) * filename * */ -void parse_command_line(int argc, char **argv, struct GenModel *model, +void parse_command_line(int argc, char **argv, struct GenModel *model, char *input_filename, char *output_filename, char *model_filename) { int i; - double gamma = 1.0, - degree = 2.0, + double gamma = 1.0, + degree = 2.0, coef = 0.0; GENSVM_OUTPUT_FILE = stdout; @@ -210,12 +210,12 @@ void parse_command_line(int argc, char **argv, struct GenModel *model, i--; break; default: - fprintf(stderr, "Unknown option: -%c\n", + fprintf(stderr, "Unknown option: -%c\n", argv[i-1][1]); exit_with_help(); } } - + // read input filename if (i >= argc) exit_with_help(); diff --git a/src/GenSVMtraintest.c b/src/GenSVMtraintest.c index c5f09f8..955f4fa 100644 --- a/src/GenSVMtraintest.c +++ b/src/GenSVMtraintest.c @@ -5,7 +5,7 @@ * @brief Command line interface for training and testing with a GenSVM model * * @details - * This is a command line program for training and testing on a single model + * This is a command line program for training and testing on a single model * with specified model parameters. * */ @@ -126,13 +126,13 @@ int main(int argc, char **argv) // start training gensvm_optimize(model, traindata); - // if we also have a test set, predict labels and write to predictions + // if we also have a test set, predict labels and write to predictions // to an output file if specified if (with_test) { // predict labels predy = Calloc(long, testdata->n); gensvm_predict_labels(testdata, model, predy); - + if (testdata->y != NULL) { performance = gensvm_prediction_perf(testdata, predy); note("Predictive performance: %3.2f%%\n", performance); @@ -181,7 +181,7 @@ void parse_command_line(int argc, char **argv, struct GenModel *model, double gamma = 1.0, degree = 2.0, coef = 0.0; - + GENSVM_OUTPUT_FILE = stdout; // parse options diff --git a/src/gensvm_crossval.c b/src/gensvm_crossval.c index 864e692..6930166 100644 --- a/src/gensvm_crossval.c +++ b/src/gensvm_crossval.c @@ -1,13 +1,13 @@ /** * @file crossval.c - * @author Gertjan van den Burg + * @author Gertjan van den Burg * @date January 7, 2014 * @brief Functions for cross validation * * @details * This file contains functions for performing cross validation. The funtion * gensvm_make_cv_split() creates a cross validation vector for non-stratified - * cross validation. The function gensvm_get_tt_split() creates a train and + * cross validation. The function gensvm_get_tt_split() creates a train and * test dataset from a given dataset and a pre-determined CV partition vector. * See individual function documentation for details. * @@ -22,17 +22,17 @@ * * @details * A pre-allocated vector of length N is created which can be used to define - * cross validation splits. The folds are contain between - * @f$ \lfloor N / folds \rfloor @f$ and @f$ \lceil N / folds \rceil @f$ - * instances. An instance is mapped to a partition randomly until all folds - * contain @f$ N \% folds @f$ instances. The zero fold then contains + * cross validation splits. The folds are contain between + * @f$ \lfloor N / folds \rfloor @f$ and @f$ \lceil N / folds \rceil @f$ + * instances. An instance is mapped to a partition randomly until all folds + * contain @f$ N \% folds @f$ instances. The zero fold then contains * @f$ N / folds + N \% folds @f$ instances. These remaining @f$ N \% folds @f$ - * instances are then distributed over the first @f$ N \% folds @f$ folds. + * instances are then distributed over the first @f$ N \% folds @f$ folds. * * @param[in] N number of instances * @param[in] folds number of folds * @param[in,out] cv_idx array of size N which contains the fold index - * for each observation on exit + * for each observation on exit * */ void gensvm_make_cv_split(long N, long folds, long *cv_idx) @@ -44,7 +44,7 @@ void gensvm_make_cv_split(long N, long folds, long *cv_idx) long big_folds = N%folds; long small_fold_size = N/folds; - + j = 0; for (i=0; i<small_fold_size*folds; i++) while (1) { @@ -73,9 +73,9 @@ void gensvm_make_cv_split(long N, long folds, long *cv_idx) * @details * Given a GenData structure for the full dataset, a previously created * cross validation split vector and a fold index, a training and test dataset - * are created. + * are created. * - * @param[in] full_data a GenData structure for the entire + * @param[in] full_data a GenData structure for the entire * dataset * @param[in,out] train_data an initialized GenData structure which * on exit contains the training dataset @@ -83,7 +83,7 @@ void gensvm_make_cv_split(long N, long folds, long *cv_idx) * on exit contains the test dataset * @param[in] cv_idx a vector of cv partitions created by * gensvm_make_cv_split() - * @param[in] fold_idx index of the fold which becomes the + * @param[in] fold_idx index of the fold which becomes the * test dataset */ void gensvm_get_tt_split(struct GenData *full_data, struct GenData *train_data, diff --git a/src/gensvm_init.c b/src/gensvm_init.c index 6be2706..228e9fe 100644 --- a/src/gensvm_init.c +++ b/src/gensvm_init.c @@ -4,9 +4,9 @@ * @date January 7, 2014 * @brief Functions for initializing model and data structures * - * @details + * @details * This file contains functions for initializing a GenModel instance - * and a GenData instance. In addition, default values for these + * and a GenData instance. In addition, default values for these * structures are defined here (and only here). Functions for allocating * memory for the model structure and freeing of the model and data structures * are also included. @@ -59,7 +59,7 @@ struct GenModel *gensvm_init_model() * @brief Initialize a GenData structure * * @details - * A GenData structure is initialized and default values are set. + * A GenData structure is initialized and default values are set. * A pointer to the initialized data is returned. * * @returns initialized GenData @@ -161,8 +161,8 @@ void gensvm_allocate_model(struct GenModel *model) * @brief Reallocate memory for GenModel * * @details - * This function can be used to reallocate existing memory for a GenModel, - * upon a change in the model dimensions. This is used in combination with + * This function can be used to reallocate existing memory for a GenModel, + * upon a change in the model dimensions. This is used in combination with * kernels. * * @param[in] model GenModel to reallocate diff --git a/src/gensvm_io.c b/src/gensvm_io.c index 01f1db5..d81f19b 100644 --- a/src/gensvm_io.c +++ b/src/gensvm_io.c @@ -6,7 +6,7 @@ * * @details * This file contains functions for reading and writing model files, and data - * files. + * files. * */ @@ -18,12 +18,12 @@ /** * @brief Read data from file - * + * * @details * Read the data from the data_file. The data matrix X is augmented * with a column of ones, to get the matrix Z. The data is expected * to follow a specific format, which is specified in the @ref spec_data_file. - * The class labels are corrected internally to correspond to the interval + * The class labels are corrected internally to correspond to the interval * [1 .. K], where K is the total number of classes. * * @todo @@ -105,11 +105,11 @@ void gensvm_read_data(struct GenData *dataset, char *data_file) } if (nr < n * m) { - fprintf(stderr, "ERROR: not enough data found in %s\n", + fprintf(stderr, "ERROR: not enough data found in %s\n", data_file); exit(0); } - + // Set the column of ones for (i=0; i<n; i++) matrix_set(dataset->RAW, m+1, i, 0, 1.0); @@ -145,7 +145,7 @@ void gensvm_read_model(struct GenModel *model, char *model_filename) fid = fopen(model_filename, "r"); if (fid == NULL) { - fprintf(stderr, "Error opening model file %s\n", + fprintf(stderr, "Error opening model file %s\n", model_filename); exit(1); } @@ -158,7 +158,7 @@ void gensvm_read_model(struct GenModel *model, char *model_filename) model->lambda = get_fmt_double(fid, model_filename, "lambda = %lf"); model->kappa = get_fmt_double(fid, model_filename, "kappa = %lf"); model->epsilon = get_fmt_double(fid, model_filename, "epsilon = %lf"); - model->weight_idx = (int) get_fmt_long(fid, model_filename, + model->weight_idx = (int) get_fmt_long(fid, model_filename, "weight_idx = %li"); // skip to data section @@ -167,7 +167,7 @@ void gensvm_read_model(struct GenModel *model, char *model_filename) // read filename of data file if (fgets(buffer, MAX_LINE_LENGTH, fid) == NULL) { - fprintf(stderr, "Error reading model file %s\n", + fprintf(stderr, "Error reading model file %s\n", model_filename); exit(1); } @@ -193,7 +193,7 @@ void gensvm_read_model(struct GenModel *model, char *model_filename) } if (nr != (model->m+1)*(model->K-1)) { fprintf(stderr, "Error reading model file %s. " - "Not enough elements of V found.\n", + "Not enough elements of V found.\n", model_filename); exit(1); } @@ -207,7 +207,7 @@ void gensvm_read_model(struct GenModel *model, char *model_filename) * UTC + offset. The model file further corresponds to the @ref * spec_model_file. * - * @param[in] model GenModel which contains an estimate for + * @param[in] model GenModel which contains an estimate for * GenModel::V * @param[in] output_filename the output file to write the model to * @@ -221,7 +221,7 @@ void gensvm_write_model(struct GenModel *model, char *output_filename) // open output file fid = fopen(output_filename, "w"); if (fid == NULL) { - fprintf(stderr, "Error opening output file %s", + fprintf(stderr, "Error opening output file %s", output_filename); exit(1); } @@ -246,8 +246,8 @@ void gensvm_write_model(struct GenModel *model, char *output_filename) fprintf(fid, "Output:\n"); for (i=0; i<model->m+1; i++) { for (j=0; j<model->K-1; j++) { - fprintf(fid, "%+15.16f ", - matrix_get(model->V, + fprintf(fid, "%+15.16f ", + matrix_get(model->V, model->K-1, i, j)); } fprintf(fid, "\n"); @@ -261,17 +261,17 @@ void gensvm_write_model(struct GenModel *model, char *output_filename) * * @details * Write the given predictions to an output file, such that the resulting file - * corresponds to the @ref spec_data_file. + * corresponds to the @ref spec_data_file. * * @param[in] data GenData with the original instances - * @param[in] predy predictions of the class labels of the + * @param[in] predy predictions of the class labels of the * instances in the given GenData. Note that the * order of the instances is assumed to be the * same. * @param[in] output_filename the file to which the predictions are written * */ -void gensvm_write_predictions(struct GenData *data, long *predy, +void gensvm_write_predictions(struct GenData *data, long *predy, char *output_filename) { long i, j; @@ -279,18 +279,18 @@ void gensvm_write_predictions(struct GenData *data, long *predy, fid = fopen(output_filename, "w"); if (fid == NULL) { - fprintf(stderr, "Error opening output file %s", + fprintf(stderr, "Error opening output file %s", output_filename); exit(1); } - + fprintf(fid, "%li\n", data->n); fprintf(fid, "%li\n", data->m); for (i=0; i<data->n; i++) { - for (j=0; j<data->m; j++) - fprintf(fid, "%f ", - matrix_get(data->Z, + for (j=0; j<data->m; j++) + fprintf(fid, "%f ", + matrix_get(data->Z, data->m+1, i, j+1)); fprintf(fid, "%li\n", predy[i]); } diff --git a/src/gensvm_kernel.c b/src/gensvm_kernel.c index a6bc9fc..f53bcce 100644 --- a/src/gensvm_kernel.c +++ b/src/gensvm_kernel.c @@ -5,7 +5,7 @@ * @brief Defines main functions for use of kernels in GenSVM. * * @details - * Functions for constructing different kernels using user-supplied + * Functions for constructing different kernels using user-supplied * parameters. Also contains the functions for decomposing the * kernel matrix using several decomposition methods. * @@ -34,7 +34,7 @@ void gensvm_kernel_preprocess(struct GenModel *model, struct GenData *data) return; } - int i; + int i; long r, n = data->n; double *P = NULL, @@ -55,7 +55,7 @@ void gensvm_kernel_preprocess(struct GenModel *model, struct GenData *data) // build M and set to data (leave RAW intact) gensvm_make_trainfactor(data, P, Sigma, r); - + // Set Sigma to data->Sigma (need it again for prediction) if (data->Sigma != NULL) free(data->Sigma); @@ -81,7 +81,7 @@ void gensvm_kernel_preprocess(struct GenModel *model, struct GenData *data) data->kernelparam[0] = model->kernelparam[0]; data->kernelparam[1] = model->kernelparam[1]; } - + free(K); free(P); } @@ -117,13 +117,13 @@ void gensvm_make_kernel(struct GenModel *model, struct GenData *data, x1 = &data->RAW[i*(data->m+1)+1]; x2 = &data->RAW[j*(data->m+1)+1]; if (model->kerneltype == K_POLY) - value = gensvm_dot_poly(x1, x2, + value = gensvm_dot_poly(x1, x2, model->kernelparam, data->m); else if (model->kerneltype == K_RBF) - value = gensvm_dot_rbf(x1, x2, + value = gensvm_dot_rbf(x1, x2, model->kernelparam, data->m); else if (model->kerneltype == K_SIGMOID) - value = gensvm_dot_sigmoid(x1, x2, + value = gensvm_dot_sigmoid(x1, x2, model->kernelparam, data->m); else { fprintf(stderr, "Unknown kernel type in " @@ -154,11 +154,11 @@ long gensvm_make_eigen(double *K, long n, double **P, double **Sigma) IWORK = Malloc(int, 5*n); IFAIL = Malloc(int, n); - - // highest precision eigenvalues, may reduce for speed + + // highest precision eigenvalues, may reduce for speed abstol = 2.0*dlamch('S'); - // first perform a workspace query to determine optimal size of the + // first perform a workspace query to determine optimal size of the // WORK array. WORK = Malloc(double, 1); status = dsyevx( @@ -183,7 +183,7 @@ long gensvm_make_eigen(double *K, long n, double **P, double **Sigma) IFAIL); LWORK = WORK[0]; - // allocate the requested memory for the eigendecomposition + // allocate the requested memory for the eigendecomposition WORK = (double *)realloc(WORK, LWORK*sizeof(double)); status = dsyevx( 'V', @@ -211,7 +211,7 @@ long gensvm_make_eigen(double *K, long n, double **P, double **Sigma) exit(1); } - // Select the desired number of eigenvalues, depending on their size. + // Select the desired number of eigenvalues, depending on their size. // dsyevx sorts eigenvalues in ascending order. max_eigen = tempSigma[n-1]; cutoff_idx = 0; @@ -223,23 +223,23 @@ long gensvm_make_eigen(double *K, long n, double **P, double **Sigma) } num_eigen = n - cutoff_idx; - + *Sigma = Calloc(double, num_eigen); - + for (i=0; i<num_eigen; i++) { (*Sigma)[i] = tempSigma[n-1 - i]; } - // revert P to row-major order and copy only the the columns + // revert P to row-major order and copy only the the columns // corresponding to the selected eigenvalues - *P = Calloc(double, n*num_eigen); + *P = Calloc(double, n*num_eigen); for (j=n-1; j>n-1-num_eigen; j--) { for (i=0; i<n; i++) { (*P)[i*num_eigen + (n-1)-j] = tempP[i + j*n]; } } - free(tempSigma); + free(tempSigma); free(tempP); free(IWORK); free(IFAIL); @@ -322,7 +322,7 @@ void gensvm_make_testfactor(struct GenData *testdata, double value, *N = NULL, *M = NULL; - + n1 = traindata->n; n2 = testdata->n; r = traindata->r; @@ -340,7 +340,7 @@ void gensvm_make_testfactor(struct GenData *testdata, exit(1); } - // copy M from traindata->Z because we need it in dgemm without column + // copy M from traindata->Z because we need it in dgemm without column // of 1's. for (i=0; i<n1; i++) for (j=0; j<r; j++) @@ -370,7 +370,7 @@ void gensvm_make_testfactor(struct GenData *testdata, for (i=0; i<n2; i++) matrix_mul(N, r, i, j, value); } - + // write N to Z with a column of ones testdata->Z = Calloc(double, n2*(r+1)); if (testdata->Z == NULL) { @@ -380,7 +380,7 @@ void gensvm_make_testfactor(struct GenData *testdata, } for (i=0; i<n2; i++) { for (j=0; j<r; j++) { - matrix_set(testdata->Z, r+1, i, j+1, + matrix_set(testdata->Z, r+1, i, j+1, matrix_get(N, r, i, j)); } matrix_set(testdata->Z, r+1, i, 0, 1.0); @@ -394,7 +394,7 @@ void gensvm_make_testfactor(struct GenData *testdata, /** * @brief Compute the RBF kernel between two vectors - * + * * @details * The RBF kernel is computed between two vectors. This kernel is defined as * @f[ @@ -404,7 +404,7 @@ void gensvm_make_testfactor(struct GenData *testdata, * * @param[in] x1 first vector * @param[in] x2 second vector - * @param[in] kernelparam array of kernel parameters (gamma is first + * @param[in] kernelparam array of kernel parameters (gamma is first * element) * @param[in] n length of the vectors x1 and x2 * @returns kernel evaluation @@ -413,8 +413,8 @@ double gensvm_dot_rbf(double *x1, double *x2, double *kernelparam, long n) { long i; double value = 0.0; - - for (i=0; i<n; i++) + + for (i=0; i<n; i++) value += (x1[i] - x2[i]) * (x1[i] - x2[i]); value *= -kernelparam[0]; return exp(value); @@ -424,7 +424,7 @@ double gensvm_dot_rbf(double *x1, double *x2, double *kernelparam, long n) * @brief Compute the polynomial kernel between two vectors * * @details - * The polynomial kernel is computed between two vectors. This kernel is + * The polynomial kernel is computed between two vectors. This kernel is * defined as * @f[ * k(x_1, x_2) = ( \gamma \langle x_1, x_2 \rangle + c)^d @@ -450,7 +450,7 @@ double gensvm_dot_poly(double *x1, double *x2, double *kernelparam, long n) /** * @brief Compute the sigmoid kernel between two vectors - * + * * @details * The sigmoid kernel is computed between two vectors. This kernel is defined * as diff --git a/src/gensvm_lapack.c b/src/gensvm_lapack.c index 6f50340..56dfc20 100644 --- a/src/gensvm_lapack.c +++ b/src/gensvm_lapack.c @@ -15,13 +15,13 @@ * @brief Solve AX = B where A is symmetric positive definite. * * @details - * Solve a linear system of equations AX = B where A is symmetric positive + * Solve a linear system of equations AX = B where A is symmetric positive * definite. This function uses the externel LAPACK routine dposv. * * @param[in] UPLO which triangle of A is stored * @param[in] N order of A * @param[in] NRHS number of columns of B - * @param[in,out] A double precision array of size (LDA, N). On + * @param[in,out] A double precision array of size (LDA, N). On * exit contains the upper or lower factor of the * Cholesky factorization of A. * @param[in] LDA leading dimension of A @@ -36,7 +36,7 @@ * - >0: if i, the leading minor of A * was not positive definite * - * See the LAPACK documentation at: + * See the LAPACK documentation at: * http://www.netlib.org/lapack/explore-html/dc/de9/group__double_p_osolve.html */ int dposv(char UPLO, int N, int NRHS, double *A, int LDA, double *B, @@ -53,13 +53,13 @@ int dposv(char UPLO, int N, int NRHS, double *A, int LDA, double *B, * @brief Solve a system of equations AX = B where A is symmetric. * * @details - * Solve a linear system of equations AX = B where A is symmetric. This + * Solve a linear system of equations AX = B where A is symmetric. This * function uses the external LAPACK routine dsysv. * * @param[in] UPLO which triangle of A is stored * @param[in] N order of A * @param[in] NRHS number of columns of B - * @param[in,out] A double precision array of size (LDA, N). On + * @param[in,out] A double precision array of size (LDA, N). On * exit contains the block diagonal matrix D and * the multipliers used to obtain the factor U or * L from the factorization A = U*D*U**T or @@ -96,7 +96,7 @@ int dsysv(char UPLO, int N, int NRHS, double *A, int LDA, int *IPIV, } /** - * @brief Compute the eigenvalues and optionally the eigenvectors of a + * @brief Compute the eigenvalues and optionally the eigenvectors of a * symmetric matrix. * * @details diff --git a/src/gensvm_matrix.c b/src/gensvm_matrix.c index 66e3947..c2e5986 100644 --- a/src/gensvm_matrix.c +++ b/src/gensvm_matrix.c @@ -17,7 +17,7 @@ /** * @brief print a matrix - * + * * @details * Debug function to print a matrix * diff --git a/src/gensvm_pred.c b/src/gensvm_pred.c index 7baae07..7835bf6 100644 --- a/src/gensvm_pred.c +++ b/src/gensvm_pred.c @@ -6,7 +6,7 @@ * * @details * This file contains functions for predicting the class labels of instances - * and a function for calculating the predictive performance (hitrate) of + * and a function for calculating the predictive performance (hitrate) of * a prediction given true class labels. * */ @@ -24,7 +24,7 @@ * @brief Predict class labels of data given and output in predy * * @details - * The labels are predicted by mapping each instance in data to the + * The labels are predicted by mapping each instance in data to the * simplex space using the matrix V in the given model. Next, for each * instance the nearest simplex vertex is determined using an Euclidean * norm. The nearest simplex vertex determines the predicted class label, @@ -113,7 +113,7 @@ void gensvm_predict_labels(struct GenData *testdata, struct GenModel *model, * * @details * The predictive performance is calculated by simply counting the number - * of correctly classified samples and dividing by the total number of + * of correctly classified samples and dividing by the total number of * samples, multiplying by 100. * * @param[in] data the GenData dataset with known labels diff --git a/src/gensvm_strutil.c b/src/gensvm_strutil.c index aab441c..57e9399 100644 --- a/src/gensvm_strutil.c +++ b/src/gensvm_strutil.c @@ -38,7 +38,7 @@ bool str_endswith(const char *str, const char *suf) { size_t lensuf = strlen(suf), lenstr = strlen(str); - return lenstr < lensuf ? false : strncmp(str + lenstr - lensuf, suf, + return lenstr < lensuf ? false : strncmp(str + lenstr - lensuf, suf, lensuf) == 0; } @@ -110,7 +110,7 @@ long get_fmt_long(FILE *fid, char *filename, const char *fmt) * * @details * This function is used to read a line of doubles from a buffer. All the - * doubles found are stored in a pre-allocated array. + * doubles found are stored in a pre-allocated array. * * @param[in] buffer a string buffer * @param[in] offset an offset of the string to start looking for @@ -131,7 +131,7 @@ long all_doubles_str(char *buffer, long offset, double *all_doubles) if (start != end) { all_doubles[i] = value; i++; - } else + } else break; start = end; end = NULL; @@ -145,7 +145,7 @@ long all_doubles_str(char *buffer, long offset, double *all_doubles) * * @details * This function is used to read a line of longs from a buffer. All the - * longs found are stored in a pre-allocated array. + * longs found are stored in a pre-allocated array. * * @param[in] buffer a string buffer * @param[in] offset an offset of the string to start looking for diff --git a/src/gensvm_sv.c b/src/gensvm_sv.c index 787b869..c61347a 100644 --- a/src/gensvm_sv.c +++ b/src/gensvm_sv.c @@ -5,7 +5,7 @@ * @brief Calculate the number of support vectors * * @details - * The function in this file can be used to calculate the number of support + * The function in this file can be used to calculate the number of support * vectors are left in a model. * */ @@ -17,9 +17,9 @@ * @brief Calculate the number of support vectors in a model * * @details - * If an object is correctly classified, the number of classes for which the - * error q is larger than 1, is K-1 (i.e., there is no error w.r.t. any of the - * other classes). All objects for which this is not the case are thus support + * If an object is correctly classified, the number of classes for which the + * error q is larger than 1, is K-1 (i.e., there is no error w.r.t. any of the + * other classes). All objects for which this is not the case are thus support * vectors. * * @param[in] model GenModel with solution @@ -40,6 +40,6 @@ long gensvm_num_sv(struct GenModel *model, struct GenData *data) } num_sv += (num_correct < data->K - 1); } - + return num_sv; } diff --git a/src/gensvm_timer.c b/src/gensvm_timer.c index a871887..93cd77e 100644 --- a/src/gensvm_timer.c +++ b/src/gensvm_timer.c @@ -31,7 +31,7 @@ double elapsed_time(clock_t s_time, clock_t e_time) * * @details * Create a string for the current system time. Include an offset of UTC for - * consistency. The format of the generated string is "DDD MMM D HH:MM:SS + * consistency. The format of the generated string is "DDD MMM D HH:MM:SS * YYYY (UTC +HH:MM)", e.g. "Fri Aug 9, 12:34:56 2013 (UTC +02:00)". * * @param[in,out] buffer allocated string buffer, on exit contains diff --git a/src/gensvm_train.c b/src/gensvm_train.c index 680d0dd..8c32809 100644 --- a/src/gensvm_train.c +++ b/src/gensvm_train.c @@ -3,7 +3,7 @@ * @author Gertjan van den Burg * @date August 9, 2013 * @brief Main functions for training the GenSVM solution. - * + * * @details * Contains update and loss functions used to actually find * the optimal V. @@ -32,14 +32,14 @@ * @details * This function is the main training function. This function * handles the optimization of the model with the given model parameters, with - * the data given. On return the matrix GenModel::V contains the optimal + * the data given. On return the matrix GenModel::V contains the optimal * weight matrix. * * In this function, step doubling is used in the majorization algorithm after - * a burn-in of 50 iterations. If the training is finished, GenModel::t and + * a burn-in of 50 iterations. If the training is finished, GenModel::t and * GenModel::W are extracted from GenModel::V. * - * @param[in,out] model the GenModel to be trained. Contains optimal + * @param[in,out] model the GenModel to be trained. Contains optimal * V on exit. * @param[in] data the GenData to train the model with. */ @@ -54,7 +54,7 @@ void gensvm_optimize(struct GenModel *model, struct GenData *data) double *B = Calloc(double, n*(K-1)); double *ZV = Calloc(double, n*(K-1)); - double *ZAZ = Calloc(double, (m+1)*(m+1)); + double *ZAZ = Calloc(double, (m+1)*(m+1)); double *ZAZV = Calloc(double, (m+1)*(K-1)); double *ZAZVT = Calloc(double, (m+1)*(K-1)); @@ -79,16 +79,16 @@ void gensvm_optimize(struct GenModel *model, struct GenData *data) while ((it < MAX_ITER) && (Lbar - L)/L > model->epsilon) { - // ensure V contains newest V and Vbar contains V from + // ensure V contains newest V and Vbar contains V from // previous gensvm_get_update(model, data, B, ZAZ, ZAZV, ZAZVT); if (it > 50) gensvm_step_doubling(model); - + Lbar = L; L = gensvm_get_loss(model, data, ZV); - if (it%100 == 0) + if (it%100 == 0) note("iter = %li, L = %15.16f, Lbar = %15.16f, " "reldiff = %15.16f\n", it, L, Lbar, (Lbar - L)/L); it++; @@ -124,20 +124,20 @@ void gensvm_optimize(struct GenModel *model, struct GenData *data) /** * @brief Calculate the current value of the loss function - * + * * @details - * The current loss function value is calculated based on the matrix V in the + * The current loss function value is calculated based on the matrix V in the * given model. Note that the matrix ZV is passed explicitly to avoid having * to reallocate memory at every step. * - * @param[in] model GenModel structure which holds the current + * @param[in] model GenModel structure which holds the current * estimate V * @param[in] data GenData structure - * @param[in,out] ZV pre-allocated matrix ZV which is updated on + * @param[in,out] ZV pre-allocated matrix ZV which is updated on * output * @returns the current value of the loss function */ -double gensvm_get_loss(struct GenModel *model, struct GenData *data, +double gensvm_get_loss(struct GenModel *model, struct GenData *data, double *ZV) { long i, j; @@ -187,19 +187,19 @@ double gensvm_get_loss(struct GenModel *model, struct GenData *data, * * Because the function gensvm_get_update() is always called after a call to * gensvm_get_loss() with the same GenModel::V, it is unnecessary to calculate - * the updated errors GenModel::Q and GenModel::H here too. This saves on + * the updated errors GenModel::Q and GenModel::H here too. This saves on * computation time. * - * In calculating the majorization coefficients we calculate the elements of a + * In calculating the majorization coefficients we calculate the elements of a * diagonal matrix A with elements * @f[ - * A_{i, i} = \frac{1}{n} \rho_i \sum_{j \neq k} \left[ + * A_{i, i} = \frac{1}{n} \rho_i \sum_{j \neq k} \left[ * \varepsilon_i a_{ijk}^{(p)} + (1 - \varepsilon_i) \omega_i * a_{ijk}^{(p)} \right], * @f] * where @f$ k = y_i @f$. * Since this matrix is only used to calculate the matrix @f$ Z' A Z @f$, it is - * efficient to update a matrix ZAZ through consecutive rank 1 updates with + * efficient to update a matrix ZAZ through consecutive rank 1 updates with * a single element of A and the corresponding row of Z. The BLAS function * dsyr is used for this. * @@ -225,7 +225,7 @@ double gensvm_get_loss(struct GenModel *model, struct GenData *data, * solving this system is done through dposv(). * * @todo - * Consider allocating IPIV and WORK at a higher level, they probably don't + * Consider allocating IPIV and WORK at a higher level, they probably don't * change much during the iterations. * * @param [in,out] model model to be updated @@ -301,7 +301,7 @@ void gensvm_get_update(struct GenModel *model, struct GenData *data, double *B, b = 0.5 - kappa/2.0 - q; } else if ( q <= 1.0) { b = pow(1.0 - q, 3.0)/( - 2.0*pow(kappa + 1.0, + 2.0*pow(kappa + 1.0, 2.0)); } else { b = 0; @@ -336,22 +336,22 @@ void gensvm_get_update(struct GenModel *model, struct GenData *data, double *B, } else { a = 0.25*pow(p, 2.0)*pow( (p/(p - 2.0))* - (0.5 - kappa/2.0 - q), + (0.5 - kappa/2.0 - q), p - 2.0); b = a*(2.0*q + kappa - 1.0)/ - (p - 2.0) + + (p - 2.0) + 0.5*p*pow( p/(p - 2.0)* (0.5 - kappa/ - 2.0 - q), + 2.0 - q), p - 1.0); } if (q <= -kappa) { b = 0.5*p*pow( - 0.5 - kappa/2.0 - q, + 0.5 - kappa/2.0 - q, p - 1.0); } else if ( q <= 1.0) { - b = p*pow(1.0 - q, + b = p*pow(1.0 - q, 2.0*p - 1.0)/ pow(2*kappa+2.0, p); } @@ -379,8 +379,8 @@ void gensvm_get_update(struct GenModel *model, struct GenData *data, double *B, } Avalue *= in * rho[i]; - // Now we calculate the matrix ZAZ. Since this is - // guaranteed to be symmetric, we only calculate the + // Now we calculate the matrix ZAZ. Since this is + // guaranteed to be symmetric, we only calculate the // upper part of the matrix, and then copy this over // to the lower part after all calculations are done. // Note that the use of dsym is faster than dspr, even @@ -402,8 +402,8 @@ void gensvm_get_update(struct GenModel *model, struct GenData *data, double *B, for (j=0; j<m+1; j++) matrix_set(ZAZ, m+1, j, i, matrix_get(ZAZ, m+1, i, j)); */ - - // Calculate the right hand side of the system we + + // Calculate the right hand side of the system we // want to solve. cblas_dsymm( CblasRowMajor, @@ -417,7 +417,7 @@ void gensvm_get_update(struct GenModel *model, struct GenData *data, double *B, model->V, K-1, 0.0, - ZAZV, + ZAZV, K-1); cblas_dgemm( @@ -436,8 +436,8 @@ void gensvm_get_update(struct GenModel *model, struct GenData *data, double *B, ZAZV, K-1); - /* - * Add lambda to all diagonal elements except the first one. Recall + /* + * Add lambda to all diagonal elements except the first one. Recall * that ZAZ is of size m+1 and is symmetric. */ i = 0; @@ -445,19 +445,19 @@ void gensvm_get_update(struct GenModel *model, struct GenData *data, double *B, i += (m+1) + 1; ZAZ[i] += model->lambda; } - + // For the LAPACK call we need to switch to Column- // Major order. This is unnecessary for the matrix - // ZAZ because it is symmetric. The matrix ZAZV + // ZAZ because it is symmetric. The matrix ZAZV // must be converted however. for (i=0; i<m+1; i++) for (j=0; j<K-1; j++) ZAZVT[j*(m+1)+i] = ZAZV[i*(K-1)+j]; - - // We use the lower ('L') part of the matrix ZAZ, + + // We use the lower ('L') part of the matrix ZAZ, // because we have used the upper part in the BLAS // calls above in Row-major order, and Lapack uses - // column major order. + // column major order. status = dposv( 'L', @@ -470,10 +470,10 @@ void gensvm_get_update(struct GenModel *model, struct GenData *data, double *B, if (status != 0) { // This step should not be necessary, as the matrix - // ZAZ is positive semi-definite by definition. It + // ZAZ is positive semi-definite by definition. It // is included for safety. fprintf(stderr, "GenSVM warning: Received nonzero status from " - "dposv: %i\n", + "dposv: %i\n", status); int *IPIV = malloc((m+1)*sizeof(int)); double *WORK = malloc(1*sizeof(double)); @@ -507,7 +507,7 @@ void gensvm_get_update(struct GenModel *model, struct GenData *data, double *B, free(IPIV); } - // Return to Row-major order. The matrix ZAZVT contains the solution + // Return to Row-major order. The matrix ZAZVT contains the solution // after the dposv/dsysv call. for (i=0; i<m+1; i++) for (j=0; j<K-1; j++) diff --git a/src/gensvm_train_dataset.c b/src/gensvm_train_dataset.c index 4f42040..a70b457 100644 --- a/src/gensvm_train_dataset.c +++ b/src/gensvm_train_dataset.c @@ -6,7 +6,7 @@ * * @details * The GenSVM algorithm takes a number of parameters. The functions in - * this file are used to find the optimal parameters. + * this file are used to find the optimal parameters. */ #include <math.h> @@ -31,7 +31,7 @@ extern FILE *GENSVM_OUTPUT_FILE; * * @details * A Training instance describes the grid to search over. This funtion - * creates all tasks that need to be performed and adds these to + * creates all tasks that need to be performed and adds these to * a Queue. Each task contains a pointer to the train and test datasets * which are supplied. Note that the tasks are created in a specific order of * the parameters, to ensure that the GenModel::V of a previous parameter @@ -39,13 +39,13 @@ extern FILE *GENSVM_OUTPUT_FILE; * parameter set. * * @param[in] training Training struct describing the grid search - * @param[in] queue pointer to a Queue that will be used to + * @param[in] queue pointer to a Queue that will be used to * add the tasks to * @param[in] train_data GenData of the training set * @param[in] test_data GenData of the test set * */ -void make_queue(struct Training *training, struct Queue *queue, +void make_queue(struct Training *training, struct Queue *queue, struct GenData *train_data, struct GenData *test_data) { long i, j, k; @@ -74,7 +74,7 @@ void make_queue(struct Training *training, struct Queue *queue, task->test_data = test_data; task->folds = training->folds; task->kerneltype = training->kerneltype; - task->kernelparam = Calloc(double, training->Ng + + task->kernelparam = Calloc(double, training->Ng + training->Nc + training->Nd); queue->tasks[i] = task; } @@ -86,7 +86,7 @@ void make_queue(struct Training *training, struct Queue *queue, cnt = 1; i = 0; while (i < N ) - for (j=0; j<training->Np; j++) + for (j=0; j<training->Np; j++) for (k=0; k<cnt; k++) { queue->tasks[i]->p = training->ps[j]; i++; @@ -95,9 +95,9 @@ void make_queue(struct Training *training, struct Queue *queue, cnt *= training->Np; i = 0; while (i < N ) - for (j=0; j<training->Nl; j++) + for (j=0; j<training->Nl; j++) for (k=0; k<cnt; k++) { - queue->tasks[i]->lambda = + queue->tasks[i]->lambda = training->lambdas[j]; i++; } @@ -116,7 +116,7 @@ void make_queue(struct Training *training, struct Queue *queue, while (i < N ) for (j=0; j<training->Nw; j++) for (k=0; k<cnt; k++) { - queue->tasks[i]->weight_idx = + queue->tasks[i]->weight_idx = training->weight_idxs[j]; i++; } @@ -126,7 +126,7 @@ void make_queue(struct Training *training, struct Queue *queue, while (i < N ) for (j=0; j<training->Ne; j++) for (k=0; k<cnt; k++) { - queue->tasks[i]->epsilon = + queue->tasks[i]->epsilon = training->epsilons[j]; i++; } @@ -136,7 +136,7 @@ void make_queue(struct Training *training, struct Queue *queue, while (i < N && training->Ng > 0) for (j=0; j<training->Ng; j++) for (k=0; k<cnt; k++) { - queue->tasks[i]->kernelparam[0] = + queue->tasks[i]->kernelparam[0] = training->gammas[j]; i++; } @@ -146,7 +146,7 @@ void make_queue(struct Training *training, struct Queue *queue, while (i < N && training->Nc > 0) for (j=0; j<training->Nc; j++) for (k=0; k<cnt; k++) { - queue->tasks[i]->kernelparam[1] = + queue->tasks[i]->kernelparam[1] = training->coefs[j]; i++; } @@ -156,7 +156,7 @@ void make_queue(struct Training *training, struct Queue *queue, while (i < N && training->Nd > 0) for (j=0; j<training->Nd; j++) for (k=0; k<cnt; k++) { - queue->tasks[i]->kernelparam[2] = + queue->tasks[i]->kernelparam[2] = training->degrees[j]; i++; } @@ -285,7 +285,7 @@ struct Queue *create_top_queue(struct Queue *q) } nq->N = N; nq->i = 0; - + return nq; } @@ -298,12 +298,12 @@ struct Queue *create_top_queue(struct Queue *q) * The best performing tasks in the supplied Queue are found by taking those * Task structs that have a performance greater or equal to the 95% percentile * of the performance of all tasks. These tasks are then gathered in a new - * Queue. For each of the tasks in this new Queue the cross validation run is - * repeated a number of times. + * Queue. For each of the tasks in this new Queue the cross validation run is + * repeated a number of times. * * For each of the Task configurations that are repeated the mean performance, * standard deviation of the performance and the mean computation time are - * reported. + * reported. * * Finally, the overall best tasks are written to the specified output. These * tasks are selected to have both the highest mean performance, as well as the @@ -317,7 +317,7 @@ struct Queue *create_top_queue(struct Queue *q) * an interval is found which contains tasks. If one or more tasks are found, * this loop stops. * - * @param[in] q Queue of Task structs which have already been + * @param[in] q Queue of Task structs which have already been * run and have a Task::performance value * @param[in] repeats Number of times to repeat the best * configurations for consistency @@ -385,7 +385,7 @@ void consistency_repeats(struct Queue *q, long repeats, TrainType traintype) matrix_set(perf, repeats, i, r, p); mean[i] += p/((double) repeats); note("%3.3f\t", p); - // this is done because if we reuse the V it's not a + // this is done because if we reuse the V it's not a // consistency check gensvm_seed_model_V(NULL, model, task->train_data); for (f=0; f<task->folds; f++) { @@ -420,21 +420,21 @@ void consistency_repeats(struct Queue *q, long repeats, TrainType traintype) pi = prctile(mean, N, (100.0-p)); pr = prctile(std, N, p); pt = prctile(time, N, p); - for (i=0; i<N; i++) + for (i=0; i<N; i++) if ((pi - mean[i] < 0.0001) && - (std[i] - pr < 0.0001) && + (std[i] - pr < 0.0001) && (time[i] - pt < 0.0001)) { note("(%li)\tw = %li\te = %f\tp = %f\t" "k = %f\tl = %f\t" "mean: %3.3f\tstd: %3.3f\t" "time: %3.3f\n", - nq->tasks[i]->ID, + nq->tasks[i]->ID, nq->tasks[i]->weight_idx, - nq->tasks[i]->epsilon, + nq->tasks[i]->epsilon, nq->tasks[i]->p, - nq->tasks[i]->kappa, + nq->tasks[i]->kappa, nq->tasks[i]->lambda, - mean[i], + mean[i], std[i], time[i]); breakout = true; @@ -458,15 +458,15 @@ void consistency_repeats(struct Queue *q, long repeats, TrainType traintype) * @brief Check if the kernel parameters change between tasks * * @details - * In the current strategy for training the kernel matrix is decomposed once, - * and tasks with the same kernel settings are performed sequentially. When a - * task needs to be done with different kernel parameters, the kernel matrix - * needs to be recalculated. This function is used to check whether this is + * In the current strategy for training the kernel matrix is decomposed once, + * and tasks with the same kernel settings are performed sequentially. When a + * task needs to be done with different kernel parameters, the kernel matrix + * needs to be recalculated. This function is used to check whether this is * the case. * * @param[in] newtask the next task * @param[in] oldtask the old task - * @return whether the kernel needs to be reevaluated + * @return whether the kernel needs to be reevaluated */ bool kernel_changed(struct Task *newtask, struct Task *oldtask) { @@ -502,10 +502,10 @@ bool kernel_changed(struct Task *newtask, struct Task *oldtask) * cross_validation(), the optimal weights of one parameter set are used as * initial estimates for GenModel::V in the next parameter set. Note that to * optimally exploit this feature of the optimization algorithm, the order in - * which tasks are considered is important. This is considered in + * which tasks are considered is important. This is considered in * make_queue(). - * - * The performance found by cross validation is stored in the Task struct. + * + * The performance found by cross validation is stored in the Task struct. * * @param[in,out] q Queue with Task instances to run */ @@ -519,7 +519,7 @@ void start_training(struct Queue *q) struct GenModel *model = gensvm_init_model(); clock_t main_s, main_e, loop_s, loop_e; - // in principle this can change between tasks, but this shouldn't be + // in principle this can change between tasks, but this shouldn't be // the case TODO folds = task->folds; @@ -594,17 +594,17 @@ void start_training(struct Queue *q) * @brief Run cross validation with a given set of train/test folds * * @details - * This cross validation function uses predefined train/test splits. Also, the - * the optimal parameters GenModel::V of a previous fold as initial conditions - * for GenModel::V of the next fold. + * This cross validation function uses predefined train/test splits. Also, the + * the optimal parameters GenModel::V of a previous fold as initial conditions + * for GenModel::V of the next fold. * * @param[in] model GenModel with the configuration to train * @param[in] train_folds array of training datasets * @param[in] test_folds array of test datasets * @param[in] folds number of folds - * @param[in] n_total number of objects in the union of the train + * @param[in] n_total number of objects in the union of the train * datasets - * @return performance (hitrate) of the configuration on + * @return performance (hitrate) of the configuration on * cross validation */ double gensvm_cross_validation(struct GenModel *model, @@ -643,7 +643,7 @@ double gensvm_cross_validation(struct GenModel *model, total_perf /= ((double) n_total); return total_perf; -} +} /** @@ -735,9 +735,9 @@ void copy_model(struct GenModel *from, struct GenModel *to) * @brief Print the description of the current task on screen * * @details - * To track the progress of the grid search the parameters of the current task - * are written to the output specified in GENSVM_OUTPUT_FILE. Since the - * parameters differ with the specified kernel, this function writes a + * To track the progress of the grid search the parameters of the current task + * are written to the output specified in GENSVM_OUTPUT_FILE. Since the + * parameters differ with the specified kernel, this function writes a * parameter string depending on which kernel is used. * * @param[in] task the Task specified diff --git a/src/gensvm_util.c b/src/gensvm_util.c index aa4e5d9..db99e21 100644 --- a/src/gensvm_util.c +++ b/src/gensvm_util.c @@ -13,23 +13,23 @@ #include "gensvm_util.h" -FILE *GENSVM_OUTPUT_FILE; ///< The #GENSVM_OUTPUT_FILE specifies the - ///< output stream to which all output is +FILE *GENSVM_OUTPUT_FILE; ///< The #GENSVM_OUTPUT_FILE specifies the + ///< output stream to which all output is ///< written. This is done through the ///< internal (!) ///< function gensvm_print_string(). The - ///< advantage of using a global output + ///< advantage of using a global output ///< stream variable is that the output can ///< temporarily be suppressed by importing - ///< this variable through @c extern and + ///< this variable through @c extern and ///< (temporarily) setting it to NULL. /** * @brief Check if any command line arguments contain string * * @details - * Check if any of a given array of command line arguments contains a given - * string. If the string is found, the index of the string in argv is + * Check if any of a given array of command line arguments contains a given + * string. If the string is found, the index of the string in argv is * returned. If the string is not found, 0 is returned. * * This function is copied from MSVMpack/libMSVM.c. @@ -69,7 +69,7 @@ int gensvm_check_argv(int argc, char **argv, char *str) * @returns index of the command line argument that corresponds to * the string, 0 if none matches. */ -int gensvm_check_argv_eq(int argc, char **argv, char *str) +int gensvm_check_argv_eq(int argc, char **argv, char *str) { int i; int arg_str = 0; @@ -88,7 +88,7 @@ int gensvm_check_argv_eq(int argc, char **argv, char *str) * * @details * This function is used to print a given string to the output stream - * specified by #GENSVM_OUTPUT_FILE. The stream is flushed after the string + * specified by #GENSVM_OUTPUT_FILE. The stream is flushed after the string * is written to the stream. If #GENSVM_OUTPUT_FILE is NULL, nothing is * written. Note that this function is only used by note(), it should never be * used directly. diff --git a/src/libGenSVM.c b/src/libGenSVM.c index b692bdb..c9b0b3c 100644 --- a/src/libGenSVM.c +++ b/src/libGenSVM.c @@ -6,8 +6,8 @@ * * @details * The functions in this file are all functions needed - * to calculate the optimal separation boundaries for - * a multiclass classification problem, using the + * to calculate the optimal separation boundaries for + * a multiclass classification problem, using the * GenSVM algorithm. * */ @@ -23,10 +23,10 @@ inline double rnd() { return (double) rand()/0x7FFFFFFF; } /** * @brief Generate matrix of simplex vertex coordinates - * + * * @details - * Generate the simplex matrix. Each row of the created - * matrix contains the coordinate vector of a single + * Generate the simplex matrix. Each row of the created + * matrix contains the coordinate vector of a single * vertex of the K-simplex in K-1 dimensions. The simplex * generated is a special simplex with edges of length 1. * The simplex matrix U must already have been allocated. @@ -83,9 +83,9 @@ void gensvm_category_matrix(struct GenModel *model, struct GenData *dataset) * * @details * The simplex difference matrix is a 3D matrix which is constructed - * as follows. For each instance i, the difference vectors between the row of + * as follows. For each instance i, the difference vectors between the row of * the simplex matrix corresponding to the class label of instance i and the - * other rows of the simplex matrix are calculated. These difference vectors + * other rows of the simplex matrix are calculated. These difference vectors * are stored in a matrix, which is one horizontal slice of the 3D matrix. * * @param[in,out] model the corresponding GenModel @@ -113,11 +113,11 @@ void gensvm_simplex_diff(struct GenModel *model, struct GenData *data) /** * @brief Calculate the scalar errors - * + * * @details * Calculate the scalar errors q based on the current estimate of V, and - * store these in Q. It is assumed that the memory for Q has already been - * allocated. In addition, the matrix ZV is calculated here. It is assigned + * store these in Q. It is assumed that the memory for Q has already been + * allocated. In addition, the matrix ZV is calculated here. It is assigned * to a pre-allocated block of memory, which is passed to this function. * * @param[in,out] model the corresponding GenModel @@ -164,16 +164,16 @@ void gensvm_calculate_errors(struct GenModel *model, struct GenData *data, } } } -} +} /** * @brief Calculate the Huber hinge errors * * @details - * For each of the scalar errors in Q the Huber hinge errors are - * calculated. The Huber hinge is here defined as + * For each of the scalar errors in Q the Huber hinge errors are + * calculated. The Huber hinge is here defined as * @f[ - * h(q) = + * h(q) = * \begin{dcases} * 1 - q - \frac{\kappa + 1}{2} & \text{if } q \leq -\kappa \\ * \frac{1}{2(\kappa + 1)} ( 1 - q)^2 & \text{if } q \in (-\kappa, 1] \\ @@ -183,7 +183,7 @@ void gensvm_calculate_errors(struct GenModel *model, struct GenData *data, * * @param[in,out] model the corresponding GenModel */ -void gensvm_calculate_huber(struct GenModel *model) +void gensvm_calculate_huber(struct GenModel *model) { long i, j; double q, value; @@ -206,10 +206,10 @@ void gensvm_calculate_huber(struct GenModel *model) * @brief seed the matrix V from an existing model or using rand * * @details - * The matrix V must be seeded before the main_loop() can start. - * This can be done by either seeding it with random numbers or + * The matrix V must be seeded before the main_loop() can start. + * This can be done by either seeding it with random numbers or * using the solution from a previous model on the same dataset - * as initial seed. The latter option usually allows for a + * as initial seed. The latter option usually allows for a * significant improvement in the number of iterations necessary * because the seeded model V is closer to the optimal V. * @@ -221,11 +221,11 @@ void gensvm_seed_model_V(struct GenModel *from_model, { long i, j, k; double cmin, cmax, value; - + long n = data->n; long m = data->m; long K = data->K; - + if (from_model == NULL) { for (i=0; i<m+1; i++) { cmin = 1e100; @@ -243,7 +243,7 @@ void gensvm_seed_model_V(struct GenModel *from_model, } } } else { - for (i=0; i<m+1; i++) + for (i=0; i<m+1; i++) for (j=0; j<K-1; j++) { value = matrix_get(from_model->V, K-1, i, j); matrix_set(to_model->V, K-1, i, j, value); @@ -255,7 +255,7 @@ void gensvm_seed_model_V(struct GenModel *from_model, * @brief Use step doubling * * @details - * Step doubling can be used to speed up the maorization algorithm. Instead of + * Step doubling can be used to speed up the maorization algorithm. Instead of * using the value at the minimimum of the majorization function, the value * ``opposite'' the majorization point is used. This can essentially cut the * number of iterations necessary to reach the minimum in half. @@ -283,10 +283,10 @@ void gensvm_step_doubling(struct GenModel *model) * @brief Initialize instance weights * * @details - * Instance weights can for instance be used to add additional weights to + * Instance weights can for instance be used to add additional weights to * instances of certain classes. Two default weight possibilities are - * implemented here. The first is unit weights, where each instance gets - * weight 1. + * implemented here. The first is unit weights, where each instance gets + * weight 1. * * The second are group size correction weights, which are calculated as * @f[ @@ -296,8 +296,8 @@ void gensvm_step_doubling(struct GenModel *model) * @f$ y_i = k @f$. * * @param[in] data GenData with the dataset - * @param[in,out] model GenModel with the weight specification. On - * exit GenModel::rho contains the instance + * @param[in,out] model GenModel with the weight specification. On + * exit GenModel::rho contains the instance * weights. */ void gensvm_initialize_weights(struct GenData *data, struct GenModel *model) @@ -311,12 +311,12 @@ void gensvm_initialize_weights(struct GenData *data, struct GenModel *model) if (model->weight_idx == 1) { for (i=0; i<n; i++) model->rho[i] = 1.0; - } + } else if (model->weight_idx == 2) { groups = Calloc(long, K); - for (i=0; i<n; i++) + for (i=0; i<n; i++) groups[data->y[i]-1]++; - for (i=0; i<n; i++) + for (i=0; i<n; i++) model->rho[i] = ((double) n)/((double) (groups[data->y[i]-1]*K)); } else { fprintf(stderr, "Unknown weight specification.\n"); |
