diff options
| author | Gertjan van den Burg <gertjanvandenburg@gmail.com> | 2016-05-16 18:47:09 +0200 |
|---|---|---|
| committer | Gertjan van den Burg <gertjanvandenburg@gmail.com> | 2016-05-16 18:47:09 +0200 |
| commit | 044dc5a93c33d7aa4c9c98a626890c16446a56fc (patch) | |
| tree | 23cc17a595d36a35ad9cb50e3ab18c2956b5f65c /include | |
| parent | Move includes to header (diff) | |
| download | gensvm-044dc5a93c33d7aa4c9c98a626890c16446a56fc.tar.gz gensvm-044dc5a93c33d7aa4c9c98a626890c16446a56fc.zip | |
major refactor of the code
Diffstat (limited to 'include')
| -rw-r--r-- | include/gensvm_base.h | 116 | ||||
| -rw-r--r-- | include/gensvm_cmdarg.h | 21 | ||||
| -rw-r--r-- | include/gensvm_copy.h | 18 | ||||
| -rw-r--r-- | include/gensvm_cv_util.h (renamed from include/gensvm_crossval.h) | 11 | ||||
| -rw-r--r-- | include/gensvm_debug.h | 20 | ||||
| -rw-r--r-- | include/gensvm_grid.h | 78 | ||||
| -rw-r--r-- | include/gensvm_gridsearch.h | 37 | ||||
| -rw-r--r-- | include/gensvm_init.h | 21 | ||||
| -rw-r--r-- | include/gensvm_io.h | 7 | ||||
| -rw-r--r-- | include/gensvm_kernel.h | 12 | ||||
| -rw-r--r-- | include/gensvm_lapack.h | 9 | ||||
| -rw-r--r-- | include/gensvm_matrix.h | 23 | ||||
| -rw-r--r-- | include/gensvm_memory.h | 2 | ||||
| -rw-r--r-- | include/gensvm_optimize.h | 36 | ||||
| -rw-r--r-- | include/gensvm_pred.h | 6 | ||||
| -rw-r--r-- | include/gensvm_print.h | 22 | ||||
| -rw-r--r-- | include/gensvm_queue.h | 40 | ||||
| -rw-r--r-- | include/gensvm_simplex.h | 18 | ||||
| -rw-r--r-- | include/gensvm_strutil.h | 6 | ||||
| -rw-r--r-- | include/gensvm_sv.h | 4 | ||||
| -rw-r--r-- | include/gensvm_task.h | 53 | ||||
| -rw-r--r-- | include/gensvm_timer.h | 6 | ||||
| -rw-r--r-- | include/gensvm_train.h | 29 | ||||
| -rw-r--r-- | include/gensvm_train_dataset.h | 143 | ||||
| -rw-r--r-- | include/gensvm_util.h | 26 | ||||
| -rw-r--r-- | include/globals.h | 27 | ||||
| -rw-r--r-- | include/libGenSVM.h | 4 |
27 files changed, 525 insertions, 270 deletions
diff --git a/include/gensvm_base.h b/include/gensvm_base.h new file mode 100644 index 0000000..b1f4a6b --- /dev/null +++ b/include/gensvm_base.h @@ -0,0 +1,116 @@ +/** + * @file gensvm_base.h + * @author Gertjan van den Burg + * @date May, 2016 + * @brief Definitions for GenData and GenModel structures + * + * @details + * Contains documentation and declarations of GenModel and GenData. + * + */ + +#ifndef GENSVM_BASE_H +#define GENSVM_BASE_H + +// includes +#include "globals.h" + +// type declarations + +/** + * @brief A structure to represent the data. + * + * @param K number of classes + * @param n number of instances + * @param m number of predictors + * @param *y pointer to vector of class labels + * @param *Z pointer to augmented data matrix + * @param *RAW pointer to augmented raw data matrix + * @param *J pointer to regularization vector + * @param kerneltype kerneltype used in GenData::Z + * @param *kernelparam kernel parameters used in GenData::Z + * + */ +struct GenData { + long K; + ///< number of classes + long n; + ///< number of instances + long m; + ///< number of predictors (width of RAW) + long r; + ///< number of eigenvalues (width of Z) + long *y; + ///< array of class labels, 1..K + double *Z; + ///< augmented data matrix (either equal to RAW or to the eigenvectors + ///< of the kernel matrix) + double *RAW; + ///< augmented raw data matrix + double *Sigma; + KernelType kerneltype; + double *kernelparam; +}; + +/** + * @brief A structure to represent a single GenSVM model. + * + */ +struct GenModel { + int weight_idx; + ///< which weights to use (1 = unit, 2 = group) + long K; + ///< number of classes in the dataset + long n; + ///< number of instances in the dataset + long m; + ///< number of predictor variables in the dataset + double epsilon; + ///< stopping criterion for the IM algorithm. + double p; + ///< parameter for the L-p norm in the loss function + double kappa; + ///< parameter for the Huber hinge function + double lambda; + ///< regularization parameter in the loss function + double *W; + ///< weight matrix + double *t; + ///< translation vector + double *V; + ///< augmented weight matrix + double *Vbar; + ///< augmented weight matrix from the previous iteration of the IM + ///< algorithm + double *U; + ///< simplex matrix + double *UU; + ///< 3D simplex difference matrix + double *Q; + ///< error matrix + double *H; + ///< Huber weighted error matrix + double *R; + ///< 0-1 auixiliary matrix, this matrix is n x K, with for row i a 0 on + ///< column y[i]-1, and 1 everywhere else. + double *rho; + ///< vector of instance weights + double training_error; + ///< loss function value after training has finished + char *data_file; + ///< filename of the data + KernelType kerneltype; + ///< type of kernel used in the model + double *kernelparam; + ///< array of kernel parameters, size depends on kernel type +}; + +// function declarations +struct GenModel *gensvm_init_model(); +void gensvm_allocate_model(struct GenModel *model); +void gensvm_reallocate_model(struct GenModel *model, long n, long m); +void gensvm_free_model(struct GenModel *model); +struct GenData *gensvm_init_data(); +void gensvm_free_data(struct GenData *data); + +#endif diff --git a/include/gensvm_cmdarg.h b/include/gensvm_cmdarg.h new file mode 100644 index 0000000..ac33be8 --- /dev/null +++ b/include/gensvm_cmdarg.h @@ -0,0 +1,21 @@ +/** + * @file gensvm_cmdarg.h + * @author Gertjan van den Burg + * @date May, 2016 + * @brief Header file for gensvm_cmdarg.c + * + * @details + * Function declarations for dealing with command line arguments. + * + */ + +#ifndef GENSVM_CMDARG_H +#define GENSVM_CMDARG_H + +#include "globals.h" + +// function declarations +int gensvm_check_argv(int argc, char **argv, char *str); +int gensvm_check_argv_eq(int argc, char **argv, char *str); + +#endif diff --git a/include/gensvm_copy.h b/include/gensvm_copy.h new file mode 100644 index 0000000..bdf6eec --- /dev/null +++ b/include/gensvm_copy.h @@ -0,0 +1,18 @@ +/** + * @file gensvm_copy.h + * @author Gertjan van den Burg + * @date May, 2016 + * @brief Header file for gensvm_copy.c + * + */ + +#ifndef GENSVM_COPY_H +#define GENSVM_COPY_H + +// includes +#include "gensvm_base.h" + +// function declarations +void gensvm_copy_model(struct GenModel *from, struct GenModel *to); + +#endif diff --git a/include/gensvm_crossval.h b/include/gensvm_cv_util.h index 3ac5fa9..ada727d 100644 --- a/include/gensvm_crossval.h +++ b/include/gensvm_cv_util.h @@ -1,8 +1,8 @@ /** - * @file crossval.h + * @file gensvm_cv_util.h * @author Gertjan van den Burg * @date January, 2014 - * @brief Header file for crossval.c + * @brief Header file for gensvm_cv_util.c * * @details * Contains function declarations for functions needed for performing cross @@ -10,11 +10,10 @@ * */ -#ifndef GENSVM_CROSSVAL_H -#define GENSVM_CROSSVAL_H +#ifndef GENSVM_CV_UTIL_H +#define GENSVM_CV_UTIL_H -// forward delaration -struct GenData; +#include "gensvm_base.h" void gensvm_make_cv_split(long N, long folds, long *cv_idx); void gensvm_get_tt_split(struct GenData *full_data, struct GenData *train_data, diff --git a/include/gensvm_debug.h b/include/gensvm_debug.h new file mode 100644 index 0000000..1cab4ca --- /dev/null +++ b/include/gensvm_debug.h @@ -0,0 +1,20 @@ + +/** + * @file gensvm_debug.h + * @author Gertjan van den Burg + * @date May, 2016 + * @brief Header for useful debug functions + * + * @details + * Contains defines useful for debugging. + * + */ + +#ifndef GENSVM_DEBUG_H +#define GENSVM_DEBUG_H + +#include "gensvm_print.h" + +void print_matrix(double *M, long rows, long cols); + +#endif diff --git a/include/gensvm_grid.h b/include/gensvm_grid.h new file mode 100644 index 0000000..d335d7c --- /dev/null +++ b/include/gensvm_grid.h @@ -0,0 +1,78 @@ +/** + * @file gensvm_grid.h + * @author Gertjan van den Burg + * @date May, 2016 + * @brief Structs necessary for the grid search + * + * @details + * The grid search for the optimal parameters is done through a queue. + * This file contains struct definitions for this queue and a single + * task in a queue, as well as a structure for the complete training + * scheme. Function declarations are also included. + * + */ + +#ifndef GENSVM_GRID_H +#define GENSVM_GRID_H + +#include "globals.h" + +/** + * @brief Structure for describing the entire grid search + * + * @param traintype type of training to use + * @param kerneltype type of kernel to use throughout training + * @param repeats number of repeats to be done after the grid + * search to find the parameter set with the + * most consistent high performance + * @param folds number of folds in cross validation + * @param Np size of the array of p values + * @param Nl size of the array of lambda values + * @param Nk size of the array of kappa values + * @param Ne size of the array of epsilon values + * @param Nw size of the array of weight_idx values + * @param Ng size of the array of gamma values + * @param Nc size of the array of coef values + * @param Nd size of the array of degree values + * @param *weight_idxs array of weight_idxs + * @param *ps array of p values + * @param *lambdas array of lambda values + * @param *kappas array of kappa values + * @param *epsilons array of epsilon values + * @param *gammas array of gamma values + * @param *coefs array of coef values + * @param *degrees array of degree values + * @param *train_data_file filename of train data file + * @param *test_data_file filename of test data file + * + */ +struct GenGrid { + TrainType traintype; + KernelType kerneltype; + long repeats; + long folds; + long Np; + long Nl; + long Nk; + long Ne; + long Nw; + long Ng; + long Nc; + long Nd; + int *weight_idxs; + double *ps; + double *lambdas; + double *kappas; + double *epsilons; + double *gammas; + double *coefs; + double *degrees; + char *train_data_file; + char *test_data_file; +}; + +// function declarations +struct GenGrid *gensvm_init_grid(); +void gensvm_free_grid(struct GenGrid *grid); + +#endif diff --git a/include/gensvm_gridsearch.h b/include/gensvm_gridsearch.h new file mode 100644 index 0000000..dcd9b93 --- /dev/null +++ b/include/gensvm_gridsearch.h @@ -0,0 +1,37 @@ +/** + * @file gensvm_gridsearch.h + * @author Gertjan van den Burg + * @date August, 2013 + * @brief Header file for gensvm_gridsearch.c + * + * @details + * The grid search for the optimal parameters is done through a queue. + * This file contains struct definitions for this queue and a single + * task in a queue, as well as a structure for the complete training + * scheme. Function declarations are also included. + * + */ + +#ifndef GENSVM_GRIDSEARCH_H +#define GENSVM_GRIDSEARCH_H + +// includes +#include "gensvm_cv_util.h" +#include "gensvm_init.h" +#include "gensvm_grid.h" +#include "gensvm_optimize.h" +#include "gensvm_pred.h" +#include "gensvm_queue.h" +#include "gensvm_timer.h" + +// function declarations +void gensvm_fill_queue(struct GenGrid *grid, struct GenQueue *queue, + struct GenData *train_data, struct GenData *test_data); +void consistency_repeats(struct GenQueue *q, long repeats, TrainType traintype); +void make_model_from_task(struct GenTask *task, struct GenModel *model); +void print_progress_string(struct GenTask *task, long N); +void start_training(struct GenQueue *q); +double gensvm_cross_validation(struct GenModel *model, + struct GenData **train_folds, struct GenData **test_folds, + int folds, long n_total); +#endif diff --git a/include/gensvm_init.h b/include/gensvm_init.h index 980366b..3f4a1cb 100644 --- a/include/gensvm_init.h +++ b/include/gensvm_init.h @@ -1,28 +1,21 @@ /** * @file gensvm_init.h * @author Gertjan van den Burg - * @date January, 2014 + * @date May, 2016 * @brief Header file for gensvm_init.c * * @details - * Contains function declarations for the initialization functions for - * GenModel and GenData structures. + * Contains function declarations for the initialization functions for the + * model weights and model V matrix. */ #ifndef GENSVM_INIT_H #define GENSVM_INIT_H -// include -#include "globals.h" -#include "gensvm.h" +#include "gensvm_base.h" -struct GenModel *gensvm_init_model(); - -struct GenData *gensvm_init_data(); - -void gensvm_allocate_model(struct GenModel *model); -void gensvm_reallocate_model(struct GenModel *model, long n, long m); -void gensvm_free_model(struct GenModel *model); -void gensvm_free_data(struct GenData *data); +void gensvm_init_V(struct GenModel *from_model, struct GenModel *to_model, + struct GenData *data); +void gensvm_initialize_weights(struct GenData *data, struct GenModel *model); #endif diff --git a/include/gensvm_io.h b/include/gensvm_io.h index 4581c5f..9b0d973 100644 --- a/include/gensvm_io.h +++ b/include/gensvm_io.h @@ -12,9 +12,9 @@ #ifndef GENSVM_IO_H #define GENSVM_IO_H -// forward declarations -struct GenData; -struct GenModel; +// includes +#include "gensvm_base.h" +#include "gensvm_strutil.h" // function declarations void gensvm_read_data(struct GenData *dataset, char *data_file); @@ -24,5 +24,6 @@ void gensvm_write_model(struct GenModel *model, char *output_filename); void gensvm_write_predictions(struct GenData *data, long *predy, char *output_filename); +void gensvm_time_string(char *buffer); #endif diff --git a/include/gensvm_kernel.h b/include/gensvm_kernel.h index 45b7e62..a1fac20 100644 --- a/include/gensvm_kernel.h +++ b/include/gensvm_kernel.h @@ -14,12 +14,10 @@ #ifndef GENSVM_KERNEL_H #define GENSVM_KERNEL_H -// forward declarations -struct GenData; -struct GenModel; +// includes +#include "gensvm_base.h" // function declarations - void gensvm_kernel_preprocess(struct GenModel *model, struct GenData *data); void gensvm_kernel_postprocess(struct GenModel *model, struct GenData *traindata, struct GenData *testdata); @@ -36,5 +34,9 @@ void gensvm_make_testfactor(struct GenData *testdata, double gensvm_dot_rbf(double *x1, double *x2, double *kernelparam, long n); double gensvm_dot_poly(double *x1, double *x2, double *kernelparam, long n); double gensvm_dot_sigmoid(double *x1, double *x2, double *kernelparam, long n); - +int dsyevx(char JOBZ, char RANGE, char UPLO, int N, double *A, int LDA, + double VL, double VU, int IL, int IU, double ABSTOL, + int *M, double *W, double *Z, int LDZ, double *WORK, int LWORK, + int *IWORK, int *IFAIL); +double dlamch(char CMACH); #endif diff --git a/include/gensvm_lapack.h b/include/gensvm_lapack.h index c4e58e8..843169b 100644 --- a/include/gensvm_lapack.h +++ b/include/gensvm_lapack.h @@ -12,13 +12,4 @@ #ifndef GENSVM_LAPACK_H #define GENSVM_LAPACK_H -int dposv(char UPLO, int N, int NRHS, double *A, int LDA, double *B, - int LDB); -int dsysv(char UPLO, int N, int NRHS, double *A, int LDA, int *IPIV, - double *B, int LDB, double *WORK, int LWORK); -int dsyevx(char JOBZ, char RANGE, char UPLO, int N, double *A, int LDA, - double VL, double VU, int IL, int IU, double ABSTOL, - int *M, double *W, double *Z, int LDZ, double *WORK, int LWORK, - int *IWORK, int *IFAIL); -double dlamch(char CMACH); #endif diff --git a/include/gensvm_matrix.h b/include/gensvm_matrix.h index 5c88f0b..9982b78 100644 --- a/include/gensvm_matrix.h +++ b/include/gensvm_matrix.h @@ -2,34 +2,15 @@ * @file gensvm_matrix.h * @author Gertjan van den Burg * @date August, 2013 - * @brief Header file for gensvm_matrix.c + * @brief Header with defines for matrix access * * @details - * Contains function declarations for functions useful for dealing with matrices. + * Contains defines useful for dealing with matrices. * */ #ifndef GENSVM_MATRIX_H #define GENSVM_MATRIX_H -// Set a matrix element (RowMajor) -#define matrix_set(M, cols, i, j, val) M[(i)*(cols)+j] = val - -// Get a matrix element (RowMajor) -#define matrix_get(M, cols, i, j) M[(i)*(cols)+j] - -// Add to a matrix element (RowMajor) -#define matrix_add(M, cols, i, j, val) M[(i)*(cols)+j] += val - -// Multiply a matrix element (RowMajor) -#define matrix_mul(M, cols, i, j, val) M[(i)*(cols)+j] *= val - -// Set a 3D matrix element (N2 = second dim, N3 = third dim, RowMajor) -#define matrix3_set(M, N2, N3, i, j, k, val) M[k+(N3)*(j+(N2)*(i))] = val - -// Get a 3D matrix element (N2 = second dim, N3 = third dim, RowMajor) -#define matrix3_get(M, N2, N3, i, j, k) M[k+(N3)*(j+(N2)*(i))] - -void print_matrix(double *M, long rows, long cols); #endif diff --git a/include/gensvm_memory.h b/include/gensvm_memory.h index bc4aae9..08d6f2d 100644 --- a/include/gensvm_memory.h +++ b/include/gensvm_memory.h @@ -9,6 +9,8 @@ #ifndef GENSVM_MEMORY_H #define GENSVM_MEMORY_H +#include <stddef.h> + #define Calloc(type, size) \ mycalloc(__FILE__, __LINE__, size, sizeof(type)) #define Malloc(type, size) \ diff --git a/include/gensvm_optimize.h b/include/gensvm_optimize.h new file mode 100644 index 0000000..7a23bdb --- /dev/null +++ b/include/gensvm_optimize.h @@ -0,0 +1,36 @@ +/** + * @file gensvm_train.h + * @author Gertjan van den Burg + * @date August, 2013 + * @brief Header file for gensvm_train.c + * + * @details + * Contains function declarations for functions used to train a single + * GenModel. + * + */ + +#ifndef GENSVM_TRAIN_H +#define GENSVM_TRAIN_H + +#include "gensvm_sv.h" +#include "gensvm_print.h" +#include "gensvm_simplex.h" + +// function declarations +void gensvm_optimize(struct GenModel *model, struct GenData *data); +double gensvm_get_loss(struct GenModel *model, struct GenData *data, + double *ZV); +void gensvm_get_update(struct GenModel *model, struct GenData *data, + double *B, double *ZAZ, double *ZAZV, double *ZAZVT); +void gensvm_category_matrix(struct GenModel *model, struct GenData *data); +void gensvm_simplex_diff(struct GenModel *model, struct GenData *dataset); +void gensvm_calculate_errors(struct GenModel *model, struct GenData *data, + double *ZV); +void gensvm_calculate_huber(struct GenModel *model); +void gensvm_step_doubling(struct GenModel *model); +int dposv(char UPLO, int N, int NRHS, double *A, int LDA, double *B, int LDB); +int dsysv(char UPLO, int N, int NRHS, double *A, int LDA, int *IPIV, double *B, + int LDB, double *WORK, int LWORK); + +#endif diff --git a/include/gensvm_pred.h b/include/gensvm_pred.h index 97af01f..56e16e8 100644 --- a/include/gensvm_pred.h +++ b/include/gensvm_pred.h @@ -12,9 +12,9 @@ #ifndef GENSVM_PRED_H #define GENSVM_PRED_H -// forward declarations -struct GenData; -struct GenModel; +// includes +#include "gensvm_kernel.h" +#include "gensvm_simplex.h" // function declarations void gensvm_predict_labels(struct GenData *testdata, diff --git a/include/gensvm_print.h b/include/gensvm_print.h new file mode 100644 index 0000000..fff7af5 --- /dev/null +++ b/include/gensvm_print.h @@ -0,0 +1,22 @@ +/** + * @file gensvm_print.h + * @author Gertjan van den Burg + * @date May, 2016 + * @brief Header file for gensvm_print.c + * + * @details + * Function declarations for printing to stdout and stderr. + * + */ + +#ifndef GENSVM_PRINT_H +#define GENSVM_PRINT_H + +// includes +#include "globals.h" + +// function declarations +void note(const char *fmt,...); +void err(const char *fmt,...); + +#endif diff --git a/include/gensvm_queue.h b/include/gensvm_queue.h new file mode 100644 index 0000000..e8d26d6 --- /dev/null +++ b/include/gensvm_queue.h @@ -0,0 +1,40 @@ +/** + * @file gensvm_queue.h + * @author Gertjan van den Burg + * @date August, 2013 + * @brief Header file for gensvm_queue.c + * + * @details + * The grid search for the optimal parameters is done through a queue. + * This file contains struct definitions for this queue. Function declarations + * for initializing and freeing the queue are also included. + * + */ + +#ifndef GENSVM_QUEUE_H +#define GENSVM_QUEUE_H + +#include "gensvm_task.h" + +/** + * @brief Simple task queue. + * + * This struct is basically just an array of pointers to Task instances, + * with a length and an index of the current task. + * + * @param **tasks array of pointers to Task structs + * @param N size of task array + * @param i index used for keeping track of the queue + */ +struct GenQueue { + struct GenTask **tasks; + long N; + long i; +}; + +// function declarations +struct GenQueue *gensvm_init_queue(); +void gensvm_free_queue(struct GenQueue *q); +struct GenTask *get_next_task(struct GenQueue *q); + +#endif diff --git a/include/gensvm_simplex.h b/include/gensvm_simplex.h new file mode 100644 index 0000000..9bb40b1 --- /dev/null +++ b/include/gensvm_simplex.h @@ -0,0 +1,18 @@ +/** + * @file gensvm_simplex.h + * @author Gertjan van den Burg + * @date May, 2016 + * @brief Header file for gensvm_simplex.c + * + */ + +#ifndef GENSVM_SIMPLEX_H +#define GENSVM_SIMPLEX_H + +// includes +#include "globals.h" + +// forward declarations +void gensvm_simplex(long K, double *U); + +#endif diff --git a/include/gensvm_strutil.h b/include/gensvm_strutil.h index c51422f..efaa5ec 100644 --- a/include/gensvm_strutil.h +++ b/include/gensvm_strutil.h @@ -1,8 +1,8 @@ /** - * @file strutil.h + * @file gensvm_strutil.h * @author Gertjan van den Burg * @date August, 2013 - * @brief Header file for strutil.c + * @brief Header file for gensvm_strutil.c * * @details * Function declarations for useful string functions used in parsing @@ -13,7 +13,7 @@ #ifndef GENSVM_STRUTIL_H #define GENSVM_STRUTIL_H -#include "types.h" +#include "globals.h" bool str_startswith(const char *str, const char *pre); bool str_endswith(const char *str, const char *suf); diff --git a/include/gensvm_sv.h b/include/gensvm_sv.h index 2c7cf57..8347b95 100644 --- a/include/gensvm_sv.h +++ b/include/gensvm_sv.h @@ -12,6 +12,10 @@ #ifndef GENSVM_SV_H #define GENSVM_SV_H +// includes +#include "gensvm_base.h" + +// function declarations long gensvm_num_sv(struct GenModel *model, struct GenData *data); #endif diff --git a/include/gensvm_task.h b/include/gensvm_task.h new file mode 100644 index 0000000..98c8f26 --- /dev/null +++ b/include/gensvm_task.h @@ -0,0 +1,53 @@ +/** + * @file gensvm_task.h + * @author Gertjan van den Burg + * @date August, 2013 + * @brief Struct for a single task in the queue + * + * @details + * The grid search for the optimal parameters is done through a queue. + * This file contains struct definitions for the tasks in the queue. + * Initialization and free functions are also included. + * + */ + +#ifndef GENSVM_TASK_H +#define GENSVM_TASK_H + +#include "gensvm_base.h" + +/** + * @brief A structure for a single task in the queue. + * + * @param folds number of folds in cross validation + * @param ID numeric id of the task in the queue + * @param weight_idx parameter for the GenModel + * @param p parameter for the GenModel + * @param kappa parameter for the GenModel + * @param lambda parameter for the GenModel + * @param epsilon parameter for the GenModel + * @param kerneltype parameter for the GenModel + * @param *kernelparam parameters for the GenModel + * @param *train_data pointer to the training data + * @param *test_data pointer to the test data (if any) + * @param performance performance after cross validation + */ +struct GenTask { + KernelType kerneltype; + int weight_idx; + long folds; + long ID; + double p; + double kappa; + double lambda; + double epsilon; + double *kernelparam; + struct GenData *train_data; + struct GenData *test_data; + double performance; +}; + +struct GenTask *gensvm_init_task(); +void gensvm_free_task(struct GenTask *task); + +#endif diff --git a/include/gensvm_timer.h b/include/gensvm_timer.h index 29c45cd..11e61e5 100644 --- a/include/gensvm_timer.h +++ b/include/gensvm_timer.h @@ -12,8 +12,10 @@ #ifndef GENSVM_TIMER_H #define GENSVM_TIMER_H -double elapsed_time(clock_t s_time, clock_t e_time); +// includes +#include "globals.h" -void get_time_string(char *buffer); +// function declarations +double gensvm_elapsed_time(clock_t s_time, clock_t e_time); #endif diff --git a/include/gensvm_train.h b/include/gensvm_train.h deleted file mode 100644 index 466b8e2..0000000 --- a/include/gensvm_train.h +++ /dev/null @@ -1,29 +0,0 @@ -/** - * @file gensvm_train.h - * @author Gertjan van den Burg - * @date August, 2013 - * @brief Header file for gensvm_train.c - * - * @details - * Contains function declarations for functions used to train a single - * GenModel. - * - */ - -#ifndef GENSVM_TRAIN_H -#define GENSVM_TRAIN_H - -//forward declarations -struct GenData; -struct GenModel; - -// function declarations -void gensvm_optimize(struct GenModel *model, struct GenData *data); - -double gensvm_get_loss(struct GenModel *model, struct GenData *data, - double *ZV); - -void gensvm_get_update(struct GenModel *model, struct GenData *data, - double *B, double *ZAZ, double *ZAZV, double *ZAZVT); - -#endif diff --git a/include/gensvm_train_dataset.h b/include/gensvm_train_dataset.h deleted file mode 100644 index 9a3fe86..0000000 --- a/include/gensvm_train_dataset.h +++ /dev/null @@ -1,143 +0,0 @@ -/** - * @file gensvm_train_dataset.h - * @author Gertjan van den Burg - * @date August, 2013 - * @brief Structs and functions necessary for the grid search - * - * @details - * The grid search for the optimal parameters is done through a queue. - * This file contains struct definitions for this queue and a single - * task in a queue, as well as a structure for the complete training - * scheme. Function declarations are also included. - * - */ - -#ifndef GENSVM_TRAIN_DATASET_H -#define GENSVM_TRAIN_DATASET_H - -#include "types.h" - -// forward declarations -struct GenData; -struct GenModel; - -/** - * @brief A structure for a single task in the queue. - * - * @param folds number of folds in cross validation - * @param ID numeric id of the task in the queue - * @param weight_idx parameter for the GenModel - * @param p parameter for the GenModel - * @param kappa parameter for the GenModel - * @param lambda parameter for the GenModel - * @param epsilon parameter for the GenModel - * @param kerneltype parameter for the GenModel - * @param *kernelparam parameters for the GenModel - * @param *train_data pointer to the training data - * @param *test_data pointer to the test data (if any) - * @param performance performance after cross validation - */ -struct Task { - KernelType kerneltype; - int weight_idx; - long folds; - long ID; - double p; - double kappa; - double lambda; - double epsilon; - double *kernelparam; - struct GenData *train_data; - struct GenData *test_data; - double performance; -}; - -/** - * @brief Simple task queue. - * - * This struct is basically just an array of pointers to Task instances, - * with a length and an index of the current task. - * - * @param **tasks array of pointers to Task structs - * @param N size of task array - * @param i index used for keeping track of the queue - */ -struct Queue { - struct Task **tasks; - long N; - long i; -}; - -/** - * @brief Structure for describing the entire grid search - * - * @param traintype type of training to use - * @param kerneltype type of kernel to use throughout training - * @param repeats number of repeats to be done after the grid - * search to find the parameter set with the - * most consistent high performance - * @param folds number of folds in cross validation - * @param Np size of the array of p values - * @param Nl size of the array of lambda values - * @param Nk size of the array of kappa values - * @param Ne size of the array of epsilon values - * @param Nw size of the array of weight_idx values - * @param Ng size of the array of gamma values - * @param Nc size of the array of coef values - * @param Nd size of the array of degree values - * @param *weight_idxs array of weight_idxs - * @param *ps array of p values - * @param *lambdas array of lambda values - * @param *kappas array of kappa values - * @param *epsilons array of epsilon values - * @param *gammas array of gamma values - * @param *coefs array of coef values - * @param *degrees array of degree values - * @param *train_data_file filename of train data file - * @param *test_data_file filename of test data file - * - */ -struct Training { - TrainType traintype; - KernelType kerneltype; - long repeats; - long folds; - long Np; - long Nl; - long Nk; - long Ne; - long Nw; - long Ng; - long Nc; - long Nd; - int *weight_idxs; - double *ps; - double *lambdas; - double *kappas; - double *epsilons; - double *gammas; - double *coefs; - double *degrees; - char *train_data_file; - char *test_data_file; -}; - -void make_queue(struct Training *training, struct Queue *queue, - struct GenData *train_data, struct GenData *test_data); - -struct Task *get_next_task(struct Queue *q); -void free_queue(struct Queue *q); - -void consistency_repeats(struct Queue *q, long repeats, TrainType traintype); - -void make_model_from_task(struct Task *task, struct GenModel *model); -void copy_model(struct GenModel *from, struct GenModel *to); - -void print_progress_string(struct Task *task, long N); - -// new -void start_training(struct Queue *q); -double gensvm_cross_validation(struct GenModel *model, - struct GenData **train_folds, struct GenData **test_folds, - int folds, long n_total); -#endif diff --git a/include/gensvm_util.h b/include/gensvm_util.h deleted file mode 100644 index 5ea2198..0000000 --- a/include/gensvm_util.h +++ /dev/null @@ -1,26 +0,0 @@ -/** - * @file util.h - * @author Gertjan van den Burg - * @date August, 2013 - * @brief Header file for util.c - * - * @details - * Function declarations for utility functions of the program. - * - */ - -#ifndef GENSVM_UTIL_H -#define GENSVM_UTIL_H - -// forward declarations -struct GenData; -struct GenModel; - -// function declarations -int gensvm_check_argv(int argc, char **argv, char *str); -int gensvm_check_argv_eq(int argc, char **argv, char *str); - -void note(const char *fmt,...); -void err(const char *fmt,...); - -#endif diff --git a/include/globals.h b/include/globals.h index becde35..7fad7b1 100644 --- a/include/globals.h +++ b/include/globals.h @@ -18,12 +18,17 @@ #ifndef GENSVM_GLOBALS_H #define GENSVM_GLOBALS_H +#include "gensvm_memory.h" +#include "types.h" + +#include <stdarg.h> #include <stdio.h> #include <stdlib.h> #include <stdbool.h> #include <string.h> - -#include "gensvm_memory.h" +#include <math.h> +#include <time.h> +#include <cblas.h> #define MAX_LINE_LENGTH 1024 @@ -33,4 +38,22 @@ #define minimum(a, b) (a) < (b) ? (a) : (b) #endif +// Set a matrix element (RowMajor) +#define matrix_set(M, cols, i, j, val) M[(i)*(cols)+j] = val + +// Get a matrix element (RowMajor) +#define matrix_get(M, cols, i, j) M[(i)*(cols)+j] + +// Add to a matrix element (RowMajor) +#define matrix_add(M, cols, i, j, val) M[(i)*(cols)+j] += val + +// Multiply a matrix element (RowMajor) +#define matrix_mul(M, cols, i, j, val) M[(i)*(cols)+j] *= val + +// Set a 3D matrix element (N2 = second dim, N3 = third dim, RowMajor) +#define matrix3_set(M, N2, N3, i, j, k, val) M[k+(N3)*(j+(N2)*(i))] = val + +// Get a 3D matrix element (N2 = second dim, N3 = third dim, RowMajor) +#define matrix3_get(M, N2, N3, i, j, k) M[k+(N3)*(j+(N2)*(i))] + #endif diff --git a/include/libGenSVM.h b/include/libGenSVM.h index 9e2d4c2..146fc67 100644 --- a/include/libGenSVM.h +++ b/include/libGenSVM.h @@ -23,7 +23,6 @@ struct GenData; struct GenModel; // function declarations -void gensvm_simplex_gen(long K, double *U); void gensvm_category_matrix(struct GenModel *model, struct GenData *data); void gensvm_simplex_diff(struct GenModel *model, struct GenData *dataset); @@ -33,8 +32,5 @@ void gensvm_calculate_huber(struct GenModel *model); void gensvm_step_doubling(struct GenModel *model); -void gensvm_seed_model_V(struct GenModel *from_model, - struct GenModel *to_model, struct GenData *data); -void gensvm_initialize_weights(struct GenData *data, struct GenModel *model); #endif |
