aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorGertjan van den Burg <gertjanvandenburg@gmail.com>2016-05-16 18:47:09 +0200
committerGertjan van den Burg <gertjanvandenburg@gmail.com>2016-05-16 18:47:09 +0200
commit044dc5a93c33d7aa4c9c98a626890c16446a56fc (patch)
tree23cc17a595d36a35ad9cb50e3ab18c2956b5f65c
parentMove includes to header (diff)
downloadgensvm-044dc5a93c33d7aa4c9c98a626890c16446a56fc.tar.gz
gensvm-044dc5a93c33d7aa4c9c98a626890c16446a56fc.zip
major refactor of the code
-rw-r--r--.gitignore6
-rw-r--r--Makefile65
-rw-r--r--include/gensvm_base.h116
-rw-r--r--include/gensvm_cmdarg.h21
-rw-r--r--include/gensvm_copy.h18
-rw-r--r--include/gensvm_cv_util.h (renamed from include/gensvm_crossval.h)11
-rw-r--r--include/gensvm_debug.h20
-rw-r--r--include/gensvm_grid.h78
-rw-r--r--include/gensvm_gridsearch.h37
-rw-r--r--include/gensvm_init.h21
-rw-r--r--include/gensvm_io.h7
-rw-r--r--include/gensvm_kernel.h12
-rw-r--r--include/gensvm_lapack.h9
-rw-r--r--include/gensvm_matrix.h23
-rw-r--r--include/gensvm_memory.h2
-rw-r--r--include/gensvm_optimize.h36
-rw-r--r--include/gensvm_pred.h6
-rw-r--r--include/gensvm_print.h22
-rw-r--r--include/gensvm_queue.h40
-rw-r--r--include/gensvm_simplex.h18
-rw-r--r--include/gensvm_strutil.h6
-rw-r--r--include/gensvm_sv.h4
-rw-r--r--include/gensvm_task.h53
-rw-r--r--include/gensvm_timer.h6
-rw-r--r--include/gensvm_train.h29
-rw-r--r--include/gensvm_train_dataset.h143
-rw-r--r--include/gensvm_util.h26
-rw-r--r--include/globals.h27
-rw-r--r--include/libGenSVM.h4
-rw-r--r--src/GenSVMgrid.c215
-rw-r--r--src/GenSVMpred.c180
-rw-r--r--src/GenSVMtrain.c245
-rw-r--r--src/GenSVMtraintest.c15
-rw-r--r--src/gensvm_base.c211
-rw-r--r--src/gensvm_cmdarg.c71
-rw-r--r--src/gensvm_copy.c49
-rw-r--r--src/gensvm_cv_util.c (renamed from src/gensvm_crossval.c)7
-rw-r--r--src/gensvm_debug.c (renamed from src/gensvm_matrix.c)14
-rw-r--r--src/gensvm_grid.c62
-rw-r--r--src/gensvm_gridsearch.c (renamed from src/gensvm_train_dataset.c)284
-rw-r--r--src/gensvm_init.c251
-rw-r--r--src/gensvm_io.c58
-rw-r--r--src/gensvm_kernel.c51
-rw-r--r--src/gensvm_lapack.c122
-rw-r--r--src/gensvm_memory.c7
-rw-r--r--src/gensvm_optimize.c (renamed from src/gensvm_train.c)280
-rw-r--r--src/gensvm_pred.c10
-rw-r--r--src/gensvm_print.c (renamed from src/gensvm_util.c)63
-rw-r--r--src/gensvm_queue.c71
-rw-r--r--src/gensvm_simplex.c45
-rw-r--r--src/gensvm_strutil.c3
-rw-r--r--src/gensvm_sv.c3
-rw-r--r--src/gensvm_task.c55
-rw-r--r--src/gensvm_timer.c55
-rw-r--r--src/libGenSVM.c304
55 files changed, 1740 insertions, 1857 deletions
diff --git a/.gitignore b/.gitignore
index 98ab832..aed01b8 100644
--- a/.gitignore
+++ b/.gitignore
@@ -2,3 +2,9 @@
*.swp
GenSVM_train
GenSVM_grid
+doc/*
+training/*
+codegrep*
+log*
+gensvm
+gensvm_grid
diff --git a/Makefile b/Makefile
index 91cb916..d281bb4 100644
--- a/Makefile
+++ b/Makefile
@@ -7,7 +7,7 @@ DOXY=doxygen
DOCDIR=doc
DOXYFILE=$(DOCDIR)/Doxyfile
-EXECS=GenSVM_train GenSVM_grid gensvm
+EXECS=gensvm gensvm_grid
.PHONY: all clean doc test
@@ -29,58 +29,55 @@ test: lib/libgensvm.a
$(MAKE) -C tests all
lib/libgensvm.a: \
- src/libGenSVM.o \
- src/gensvm_crossval.o \
+ src/gensvm_base.o \
+ src/gensvm_cmdarg.o \
+ src/gensvm_copy.o \
+ src/gensvm_cv_util.o \
+ src/gensvm_grid.o \
+ src/gensvm_gridsearch.o \
src/gensvm_init.o \
src/gensvm_io.o \
src/gensvm_kernel.o \
- src/gensvm_lapack.o \
- src/gensvm_matrix.o \
src/gensvm_memory.o \
+ src/gensvm_optimize.o \
src/gensvm_pred.o \
+ src/gensvm_print.o \
+ src/gensvm_queue.o \
+ src/gensvm_simplex.o \
src/gensvm_strutil.o \
src/gensvm_sv.o \
- src/gensvm_train.o \
- src/gensvm_train_dataset.o \
- src/gensvm_timer.o \
- src/gensvm_util.o
+ src/gensvm_task.o \
+ src/gensvm_timer.o
@ar rcs lib/libgensvm.a \
- src/libGenSVM.o \
- src/gensvm_crossval.o \
+ src/gensvm_base.o \
+ src/gensvm_cmdarg.o \
+ src/gensvm_copy.o \
+ src/gensvm_cv_util.o \
+ src/gensvm_grid.o \
+ src/gensvm_gridsearch.o \
src/gensvm_init.o \
src/gensvm_io.o \
- src/gensvm_matrix.o \
- src/gensvm_memory.o \
src/gensvm_kernel.o \
- src/gensvm_lapack.o \
+ src/gensvm_memory.o \
+ src/gensvm_optimize.o \
src/gensvm_pred.o \
+ src/gensvm_print.o \
+ src/gensvm_queue.o \
+ src/gensvm_simplex.o \
src/gensvm_strutil.o \
src/gensvm_sv.o \
- src/gensvm_train.o \
- src/gensvm_train_dataset.o \
- src/gensvm_timer.o \
- src/gensvm_util.o
+ src/gensvm_task.o \
+ src/gensvm_timer.o
@echo libgensvm.a...
gensvm: src/GenSVMtraintest.c lib/libgensvm.a
@$(CC) -o $@ $< $(CFLAGS) $(INCLUDE) $(LIB) -lgensvm $(LDFLAGS)
- @echo gensvm...
-
-GenSVM_train: src/GenSVMtrain.c lib/libgensvm.a
- @$(CC) -o GenSVM_train src/GenSVMtrain.c $(CFLAGS) $(INCLUDE) $(LIB)\
- -lgensvm $(LDFLAGS)
- @echo GenSVM_train...
+ @echo gensvm ...
-GenSVM_grid: src/GenSVMgrid.c lib/libgensvm.a
- @$(CC) -o GenSVM_grid src/GenSVMgrid.c $(CFLAGS) $(INCLUDE) $(LIB) \
- -lgensvm $(LDFLAGS)
- @echo GenSVM_grid...
-
-GenSVM_pred: src/GenSVMpred.c lib/libgensvm.a
- @$(CC) -o GenSVM_pred src/GenSVMpred.c $(CFLAGS) $(INCLUDE) $(LIB) \
- -lgensvm $(LDFLAGS)
- @echo GenSVM_pred...
+gensvm_grid: src/GenSVMgrid.c lib/libgensvm.a
+ @$(CC) -o $@ $< $(CFLAGS) $(INCLUDE) $(LIB) -lgensvm $(LDFLAGS)
+ @echo gensvm_grid ...
src/%.o: src/%.c
@$(CC) $(CFLAGS) $(INCLUDE) $(LDFLAGS) -c $< -o $@
- @echo $<...
+ @echo $< ...
diff --git a/include/gensvm_base.h b/include/gensvm_base.h
new file mode 100644
index 0000000..b1f4a6b
--- /dev/null
+++ b/include/gensvm_base.h
@@ -0,0 +1,116 @@
+/**
+ * @file gensvm_base.h
+ * @author Gertjan van den Burg
+ * @date May, 2016
+ * @brief Definitions for GenData and GenModel structures
+ *
+ * @details
+ * Contains documentation and declarations of GenModel and GenData.
+ *
+ */
+
+#ifndef GENSVM_BASE_H
+#define GENSVM_BASE_H
+
+// includes
+#include "globals.h"
+
+// type declarations
+
+/**
+ * @brief A structure to represent the data.
+ *
+ * @param K number of classes
+ * @param n number of instances
+ * @param m number of predictors
+ * @param *y pointer to vector of class labels
+ * @param *Z pointer to augmented data matrix
+ * @param *RAW pointer to augmented raw data matrix
+ * @param *J pointer to regularization vector
+ * @param kerneltype kerneltype used in GenData::Z
+ * @param *kernelparam kernel parameters used in GenData::Z
+ *
+ */
+struct GenData {
+ long K;
+ ///< number of classes
+ long n;
+ ///< number of instances
+ long m;
+ ///< number of predictors (width of RAW)
+ long r;
+ ///< number of eigenvalues (width of Z)
+ long *y;
+ ///< array of class labels, 1..K
+ double *Z;
+ ///< augmented data matrix (either equal to RAW or to the eigenvectors
+ ///< of the kernel matrix)
+ double *RAW;
+ ///< augmented raw data matrix
+ double *Sigma;
+ KernelType kerneltype;
+ double *kernelparam;
+};
+
+/**
+ * @brief A structure to represent a single GenSVM model.
+ *
+ */
+struct GenModel {
+ int weight_idx;
+ ///< which weights to use (1 = unit, 2 = group)
+ long K;
+ ///< number of classes in the dataset
+ long n;
+ ///< number of instances in the dataset
+ long m;
+ ///< number of predictor variables in the dataset
+ double epsilon;
+ ///< stopping criterion for the IM algorithm.
+ double p;
+ ///< parameter for the L-p norm in the loss function
+ double kappa;
+ ///< parameter for the Huber hinge function
+ double lambda;
+ ///< regularization parameter in the loss function
+ double *W;
+ ///< weight matrix
+ double *t;
+ ///< translation vector
+ double *V;
+ ///< augmented weight matrix
+ double *Vbar;
+ ///< augmented weight matrix from the previous iteration of the IM
+ ///< algorithm
+ double *U;
+ ///< simplex matrix
+ double *UU;
+ ///< 3D simplex difference matrix
+ double *Q;
+ ///< error matrix
+ double *H;
+ ///< Huber weighted error matrix
+ double *R;
+ ///< 0-1 auixiliary matrix, this matrix is n x K, with for row i a 0 on
+ ///< column y[i]-1, and 1 everywhere else.
+ double *rho;
+ ///< vector of instance weights
+ double training_error;
+ ///< loss function value after training has finished
+ char *data_file;
+ ///< filename of the data
+ KernelType kerneltype;
+ ///< type of kernel used in the model
+ double *kernelparam;
+ ///< array of kernel parameters, size depends on kernel type
+};
+
+// function declarations
+struct GenModel *gensvm_init_model();
+void gensvm_allocate_model(struct GenModel *model);
+void gensvm_reallocate_model(struct GenModel *model, long n, long m);
+void gensvm_free_model(struct GenModel *model);
+struct GenData *gensvm_init_data();
+void gensvm_free_data(struct GenData *data);
+
+#endif
diff --git a/include/gensvm_cmdarg.h b/include/gensvm_cmdarg.h
new file mode 100644
index 0000000..ac33be8
--- /dev/null
+++ b/include/gensvm_cmdarg.h
@@ -0,0 +1,21 @@
+/**
+ * @file gensvm_cmdarg.h
+ * @author Gertjan van den Burg
+ * @date May, 2016
+ * @brief Header file for gensvm_cmdarg.c
+ *
+ * @details
+ * Function declarations for dealing with command line arguments.
+ *
+ */
+
+#ifndef GENSVM_CMDARG_H
+#define GENSVM_CMDARG_H
+
+#include "globals.h"
+
+// function declarations
+int gensvm_check_argv(int argc, char **argv, char *str);
+int gensvm_check_argv_eq(int argc, char **argv, char *str);
+
+#endif
diff --git a/include/gensvm_copy.h b/include/gensvm_copy.h
new file mode 100644
index 0000000..bdf6eec
--- /dev/null
+++ b/include/gensvm_copy.h
@@ -0,0 +1,18 @@
+/**
+ * @file gensvm_copy.h
+ * @author Gertjan van den Burg
+ * @date May, 2016
+ * @brief Header file for gensvm_copy.c
+ *
+ */
+
+#ifndef GENSVM_COPY_H
+#define GENSVM_COPY_H
+
+// includes
+#include "gensvm_base.h"
+
+// function declarations
+void gensvm_copy_model(struct GenModel *from, struct GenModel *to);
+
+#endif
diff --git a/include/gensvm_crossval.h b/include/gensvm_cv_util.h
index 3ac5fa9..ada727d 100644
--- a/include/gensvm_crossval.h
+++ b/include/gensvm_cv_util.h
@@ -1,8 +1,8 @@
/**
- * @file crossval.h
+ * @file gensvm_cv_util.h
* @author Gertjan van den Burg
* @date January, 2014
- * @brief Header file for crossval.c
+ * @brief Header file for gensvm_cv_util.c
*
* @details
* Contains function declarations for functions needed for performing cross
@@ -10,11 +10,10 @@
*
*/
-#ifndef GENSVM_CROSSVAL_H
-#define GENSVM_CROSSVAL_H
+#ifndef GENSVM_CV_UTIL_H
+#define GENSVM_CV_UTIL_H
-// forward delaration
-struct GenData;
+#include "gensvm_base.h"
void gensvm_make_cv_split(long N, long folds, long *cv_idx);
void gensvm_get_tt_split(struct GenData *full_data, struct GenData *train_data,
diff --git a/include/gensvm_debug.h b/include/gensvm_debug.h
new file mode 100644
index 0000000..1cab4ca
--- /dev/null
+++ b/include/gensvm_debug.h
@@ -0,0 +1,20 @@
+
+/**
+ * @file gensvm_debug.h
+ * @author Gertjan van den Burg
+ * @date May, 2016
+ * @brief Header for useful debug functions
+ *
+ * @details
+ * Contains defines useful for debugging.
+ *
+ */
+
+#ifndef GENSVM_DEBUG_H
+#define GENSVM_DEBUG_H
+
+#include "gensvm_print.h"
+
+void print_matrix(double *M, long rows, long cols);
+
+#endif
diff --git a/include/gensvm_grid.h b/include/gensvm_grid.h
new file mode 100644
index 0000000..d335d7c
--- /dev/null
+++ b/include/gensvm_grid.h
@@ -0,0 +1,78 @@
+/**
+ * @file gensvm_grid.h
+ * @author Gertjan van den Burg
+ * @date May, 2016
+ * @brief Structs necessary for the grid search
+ *
+ * @details
+ * The grid search for the optimal parameters is done through a queue.
+ * This file contains struct definitions for this queue and a single
+ * task in a queue, as well as a structure for the complete training
+ * scheme. Function declarations are also included.
+ *
+ */
+
+#ifndef GENSVM_GRID_H
+#define GENSVM_GRID_H
+
+#include "globals.h"
+
+/**
+ * @brief Structure for describing the entire grid search
+ *
+ * @param traintype type of training to use
+ * @param kerneltype type of kernel to use throughout training
+ * @param repeats number of repeats to be done after the grid
+ * search to find the parameter set with the
+ * most consistent high performance
+ * @param folds number of folds in cross validation
+ * @param Np size of the array of p values
+ * @param Nl size of the array of lambda values
+ * @param Nk size of the array of kappa values
+ * @param Ne size of the array of epsilon values
+ * @param Nw size of the array of weight_idx values
+ * @param Ng size of the array of gamma values
+ * @param Nc size of the array of coef values
+ * @param Nd size of the array of degree values
+ * @param *weight_idxs array of weight_idxs
+ * @param *ps array of p values
+ * @param *lambdas array of lambda values
+ * @param *kappas array of kappa values
+ * @param *epsilons array of epsilon values
+ * @param *gammas array of gamma values
+ * @param *coefs array of coef values
+ * @param *degrees array of degree values
+ * @param *train_data_file filename of train data file
+ * @param *test_data_file filename of test data file
+ *
+ */
+struct GenGrid {
+ TrainType traintype;
+ KernelType kerneltype;
+ long repeats;
+ long folds;
+ long Np;
+ long Nl;
+ long Nk;
+ long Ne;
+ long Nw;
+ long Ng;
+ long Nc;
+ long Nd;
+ int *weight_idxs;
+ double *ps;
+ double *lambdas;
+ double *kappas;
+ double *epsilons;
+ double *gammas;
+ double *coefs;
+ double *degrees;
+ char *train_data_file;
+ char *test_data_file;
+};
+
+// function declarations
+struct GenGrid *gensvm_init_grid();
+void gensvm_free_grid(struct GenGrid *grid);
+
+#endif
diff --git a/include/gensvm_gridsearch.h b/include/gensvm_gridsearch.h
new file mode 100644
index 0000000..dcd9b93
--- /dev/null
+++ b/include/gensvm_gridsearch.h
@@ -0,0 +1,37 @@
+/**
+ * @file gensvm_gridsearch.h
+ * @author Gertjan van den Burg
+ * @date August, 2013
+ * @brief Header file for gensvm_gridsearch.c
+ *
+ * @details
+ * The grid search for the optimal parameters is done through a queue.
+ * This file contains struct definitions for this queue and a single
+ * task in a queue, as well as a structure for the complete training
+ * scheme. Function declarations are also included.
+ *
+ */
+
+#ifndef GENSVM_GRIDSEARCH_H
+#define GENSVM_GRIDSEARCH_H
+
+// includes
+#include "gensvm_cv_util.h"
+#include "gensvm_init.h"
+#include "gensvm_grid.h"
+#include "gensvm_optimize.h"
+#include "gensvm_pred.h"
+#include "gensvm_queue.h"
+#include "gensvm_timer.h"
+
+// function declarations
+void gensvm_fill_queue(struct GenGrid *grid, struct GenQueue *queue,
+ struct GenData *train_data, struct GenData *test_data);
+void consistency_repeats(struct GenQueue *q, long repeats, TrainType traintype);
+void make_model_from_task(struct GenTask *task, struct GenModel *model);
+void print_progress_string(struct GenTask *task, long N);
+void start_training(struct GenQueue *q);
+double gensvm_cross_validation(struct GenModel *model,
+ struct GenData **train_folds, struct GenData **test_folds,
+ int folds, long n_total);
+#endif
diff --git a/include/gensvm_init.h b/include/gensvm_init.h
index 980366b..3f4a1cb 100644
--- a/include/gensvm_init.h
+++ b/include/gensvm_init.h
@@ -1,28 +1,21 @@
/**
* @file gensvm_init.h
* @author Gertjan van den Burg
- * @date January, 2014
+ * @date May, 2016
* @brief Header file for gensvm_init.c
*
* @details
- * Contains function declarations for the initialization functions for
- * GenModel and GenData structures.
+ * Contains function declarations for the initialization functions for the
+ * model weights and model V matrix.
*/
#ifndef GENSVM_INIT_H
#define GENSVM_INIT_H
-// include
-#include "globals.h"
-#include "gensvm.h"
+#include "gensvm_base.h"
-struct GenModel *gensvm_init_model();
-
-struct GenData *gensvm_init_data();
-
-void gensvm_allocate_model(struct GenModel *model);
-void gensvm_reallocate_model(struct GenModel *model, long n, long m);
-void gensvm_free_model(struct GenModel *model);
-void gensvm_free_data(struct GenData *data);
+void gensvm_init_V(struct GenModel *from_model, struct GenModel *to_model,
+ struct GenData *data);
+void gensvm_initialize_weights(struct GenData *data, struct GenModel *model);
#endif
diff --git a/include/gensvm_io.h b/include/gensvm_io.h
index 4581c5f..9b0d973 100644
--- a/include/gensvm_io.h
+++ b/include/gensvm_io.h
@@ -12,9 +12,9 @@
#ifndef GENSVM_IO_H
#define GENSVM_IO_H
-// forward declarations
-struct GenData;
-struct GenModel;
+// includes
+#include "gensvm_base.h"
+#include "gensvm_strutil.h"
// function declarations
void gensvm_read_data(struct GenData *dataset, char *data_file);
@@ -24,5 +24,6 @@ void gensvm_write_model(struct GenModel *model, char *output_filename);
void gensvm_write_predictions(struct GenData *data, long *predy,
char *output_filename);
+void gensvm_time_string(char *buffer);
#endif
diff --git a/include/gensvm_kernel.h b/include/gensvm_kernel.h
index 45b7e62..a1fac20 100644
--- a/include/gensvm_kernel.h
+++ b/include/gensvm_kernel.h
@@ -14,12 +14,10 @@
#ifndef GENSVM_KERNEL_H
#define GENSVM_KERNEL_H
-// forward declarations
-struct GenData;
-struct GenModel;
+// includes
+#include "gensvm_base.h"
// function declarations
-
void gensvm_kernel_preprocess(struct GenModel *model, struct GenData *data);
void gensvm_kernel_postprocess(struct GenModel *model,
struct GenData *traindata, struct GenData *testdata);
@@ -36,5 +34,9 @@ void gensvm_make_testfactor(struct GenData *testdata,
double gensvm_dot_rbf(double *x1, double *x2, double *kernelparam, long n);
double gensvm_dot_poly(double *x1, double *x2, double *kernelparam, long n);
double gensvm_dot_sigmoid(double *x1, double *x2, double *kernelparam, long n);
-
+int dsyevx(char JOBZ, char RANGE, char UPLO, int N, double *A, int LDA,
+ double VL, double VU, int IL, int IU, double ABSTOL,
+ int *M, double *W, double *Z, int LDZ, double *WORK, int LWORK,
+ int *IWORK, int *IFAIL);
+double dlamch(char CMACH);
#endif
diff --git a/include/gensvm_lapack.h b/include/gensvm_lapack.h
index c4e58e8..843169b 100644
--- a/include/gensvm_lapack.h
+++ b/include/gensvm_lapack.h
@@ -12,13 +12,4 @@
#ifndef GENSVM_LAPACK_H
#define GENSVM_LAPACK_H
-int dposv(char UPLO, int N, int NRHS, double *A, int LDA, double *B,
- int LDB);
-int dsysv(char UPLO, int N, int NRHS, double *A, int LDA, int *IPIV,
- double *B, int LDB, double *WORK, int LWORK);
-int dsyevx(char JOBZ, char RANGE, char UPLO, int N, double *A, int LDA,
- double VL, double VU, int IL, int IU, double ABSTOL,
- int *M, double *W, double *Z, int LDZ, double *WORK, int LWORK,
- int *IWORK, int *IFAIL);
-double dlamch(char CMACH);
#endif
diff --git a/include/gensvm_matrix.h b/include/gensvm_matrix.h
index 5c88f0b..9982b78 100644
--- a/include/gensvm_matrix.h
+++ b/include/gensvm_matrix.h
@@ -2,34 +2,15 @@
* @file gensvm_matrix.h
* @author Gertjan van den Burg
* @date August, 2013
- * @brief Header file for gensvm_matrix.c
+ * @brief Header with defines for matrix access
*
* @details
- * Contains function declarations for functions useful for dealing with matrices.
+ * Contains defines useful for dealing with matrices.
*
*/
#ifndef GENSVM_MATRIX_H
#define GENSVM_MATRIX_H
-// Set a matrix element (RowMajor)
-#define matrix_set(M, cols, i, j, val) M[(i)*(cols)+j] = val
-
-// Get a matrix element (RowMajor)
-#define matrix_get(M, cols, i, j) M[(i)*(cols)+j]
-
-// Add to a matrix element (RowMajor)
-#define matrix_add(M, cols, i, j, val) M[(i)*(cols)+j] += val
-
-// Multiply a matrix element (RowMajor)
-#define matrix_mul(M, cols, i, j, val) M[(i)*(cols)+j] *= val
-
-// Set a 3D matrix element (N2 = second dim, N3 = third dim, RowMajor)
-#define matrix3_set(M, N2, N3, i, j, k, val) M[k+(N3)*(j+(N2)*(i))] = val
-
-// Get a 3D matrix element (N2 = second dim, N3 = third dim, RowMajor)
-#define matrix3_get(M, N2, N3, i, j, k) M[k+(N3)*(j+(N2)*(i))]
-
-void print_matrix(double *M, long rows, long cols);
#endif
diff --git a/include/gensvm_memory.h b/include/gensvm_memory.h
index bc4aae9..08d6f2d 100644
--- a/include/gensvm_memory.h
+++ b/include/gensvm_memory.h
@@ -9,6 +9,8 @@
#ifndef GENSVM_MEMORY_H
#define GENSVM_MEMORY_H
+#include <stddef.h>
+
#define Calloc(type, size) \
mycalloc(__FILE__, __LINE__, size, sizeof(type))
#define Malloc(type, size) \
diff --git a/include/gensvm_optimize.h b/include/gensvm_optimize.h
new file mode 100644
index 0000000..7a23bdb
--- /dev/null
+++ b/include/gensvm_optimize.h
@@ -0,0 +1,36 @@
+/**
+ * @file gensvm_train.h
+ * @author Gertjan van den Burg
+ * @date August, 2013
+ * @brief Header file for gensvm_train.c
+ *
+ * @details
+ * Contains function declarations for functions used to train a single
+ * GenModel.
+ *
+ */
+
+#ifndef GENSVM_TRAIN_H
+#define GENSVM_TRAIN_H
+
+#include "gensvm_sv.h"
+#include "gensvm_print.h"
+#include "gensvm_simplex.h"
+
+// function declarations
+void gensvm_optimize(struct GenModel *model, struct GenData *data);
+double gensvm_get_loss(struct GenModel *model, struct GenData *data,
+ double *ZV);
+void gensvm_get_update(struct GenModel *model, struct GenData *data,
+ double *B, double *ZAZ, double *ZAZV, double *ZAZVT);
+void gensvm_category_matrix(struct GenModel *model, struct GenData *data);
+void gensvm_simplex_diff(struct GenModel *model, struct GenData *dataset);
+void gensvm_calculate_errors(struct GenModel *model, struct GenData *data,
+ double *ZV);
+void gensvm_calculate_huber(struct GenModel *model);
+void gensvm_step_doubling(struct GenModel *model);
+int dposv(char UPLO, int N, int NRHS, double *A, int LDA, double *B, int LDB);
+int dsysv(char UPLO, int N, int NRHS, double *A, int LDA, int *IPIV, double *B,
+ int LDB, double *WORK, int LWORK);
+
+#endif
diff --git a/include/gensvm_pred.h b/include/gensvm_pred.h
index 97af01f..56e16e8 100644
--- a/include/gensvm_pred.h
+++ b/include/gensvm_pred.h
@@ -12,9 +12,9 @@
#ifndef GENSVM_PRED_H
#define GENSVM_PRED_H
-// forward declarations
-struct GenData;
-struct GenModel;
+// includes
+#include "gensvm_kernel.h"
+#include "gensvm_simplex.h"
// function declarations
void gensvm_predict_labels(struct GenData *testdata,
diff --git a/include/gensvm_print.h b/include/gensvm_print.h
new file mode 100644
index 0000000..fff7af5
--- /dev/null
+++ b/include/gensvm_print.h
@@ -0,0 +1,22 @@
+/**
+ * @file gensvm_print.h
+ * @author Gertjan van den Burg
+ * @date May, 2016
+ * @brief Header file for gensvm_print.c
+ *
+ * @details
+ * Function declarations for printing to stdout and stderr.
+ *
+ */
+
+#ifndef GENSVM_PRINT_H
+#define GENSVM_PRINT_H
+
+// includes
+#include "globals.h"
+
+// function declarations
+void note(const char *fmt,...);
+void err(const char *fmt,...);
+
+#endif
diff --git a/include/gensvm_queue.h b/include/gensvm_queue.h
new file mode 100644
index 0000000..e8d26d6
--- /dev/null
+++ b/include/gensvm_queue.h
@@ -0,0 +1,40 @@
+/**
+ * @file gensvm_queue.h
+ * @author Gertjan van den Burg
+ * @date August, 2013
+ * @brief Header file for gensvm_queue.c
+ *
+ * @details
+ * The grid search for the optimal parameters is done through a queue.
+ * This file contains struct definitions for this queue. Function declarations
+ * for initializing and freeing the queue are also included.
+ *
+ */
+
+#ifndef GENSVM_QUEUE_H
+#define GENSVM_QUEUE_H
+
+#include "gensvm_task.h"
+
+/**
+ * @brief Simple task queue.
+ *
+ * This struct is basically just an array of pointers to Task instances,
+ * with a length and an index of the current task.
+ *
+ * @param **tasks array of pointers to Task structs
+ * @param N size of task array
+ * @param i index used for keeping track of the queue
+ */
+struct GenQueue {
+ struct GenTask **tasks;
+ long N;
+ long i;
+};
+
+// function declarations
+struct GenQueue *gensvm_init_queue();
+void gensvm_free_queue(struct GenQueue *q);
+struct GenTask *get_next_task(struct GenQueue *q);
+
+#endif
diff --git a/include/gensvm_simplex.h b/include/gensvm_simplex.h
new file mode 100644
index 0000000..9bb40b1
--- /dev/null
+++ b/include/gensvm_simplex.h
@@ -0,0 +1,18 @@
+/**
+ * @file gensvm_simplex.h
+ * @author Gertjan van den Burg
+ * @date May, 2016
+ * @brief Header file for gensvm_simplex.c
+ *
+ */
+
+#ifndef GENSVM_SIMPLEX_H
+#define GENSVM_SIMPLEX_H
+
+// includes
+#include "globals.h"
+
+// forward declarations
+void gensvm_simplex(long K, double *U);
+
+#endif
diff --git a/include/gensvm_strutil.h b/include/gensvm_strutil.h
index c51422f..efaa5ec 100644
--- a/include/gensvm_strutil.h
+++ b/include/gensvm_strutil.h
@@ -1,8 +1,8 @@
/**
- * @file strutil.h
+ * @file gensvm_strutil.h
* @author Gertjan van den Burg
* @date August, 2013
- * @brief Header file for strutil.c
+ * @brief Header file for gensvm_strutil.c
*
* @details
* Function declarations for useful string functions used in parsing
@@ -13,7 +13,7 @@
#ifndef GENSVM_STRUTIL_H
#define GENSVM_STRUTIL_H
-#include "types.h"
+#include "globals.h"
bool str_startswith(const char *str, const char *pre);
bool str_endswith(const char *str, const char *suf);
diff --git a/include/gensvm_sv.h b/include/gensvm_sv.h
index 2c7cf57..8347b95 100644
--- a/include/gensvm_sv.h
+++ b/include/gensvm_sv.h
@@ -12,6 +12,10 @@
#ifndef GENSVM_SV_H
#define GENSVM_SV_H
+// includes
+#include "gensvm_base.h"
+
+// function declarations
long gensvm_num_sv(struct GenModel *model, struct GenData *data);
#endif
diff --git a/include/gensvm_task.h b/include/gensvm_task.h
new file mode 100644
index 0000000..98c8f26
--- /dev/null
+++ b/include/gensvm_task.h
@@ -0,0 +1,53 @@
+/**
+ * @file gensvm_task.h
+ * @author Gertjan van den Burg
+ * @date August, 2013
+ * @brief Struct for a single task in the queue
+ *
+ * @details
+ * The grid search for the optimal parameters is done through a queue.
+ * This file contains struct definitions for the tasks in the queue.
+ * Initialization and free functions are also included.
+ *
+ */
+
+#ifndef GENSVM_TASK_H
+#define GENSVM_TASK_H
+
+#include "gensvm_base.h"
+
+/**
+ * @brief A structure for a single task in the queue.
+ *
+ * @param folds number of folds in cross validation
+ * @param ID numeric id of the task in the queue
+ * @param weight_idx parameter for the GenModel
+ * @param p parameter for the GenModel
+ * @param kappa parameter for the GenModel
+ * @param lambda parameter for the GenModel
+ * @param epsilon parameter for the GenModel
+ * @param kerneltype parameter for the GenModel
+ * @param *kernelparam parameters for the GenModel
+ * @param *train_data pointer to the training data
+ * @param *test_data pointer to the test data (if any)
+ * @param performance performance after cross validation
+ */
+struct GenTask {
+ KernelType kerneltype;
+ int weight_idx;
+ long folds;
+ long ID;
+ double p;
+ double kappa;
+ double lambda;
+ double epsilon;
+ double *kernelparam;
+ struct GenData *train_data;
+ struct GenData *test_data;
+ double performance;
+};
+
+struct GenTask *gensvm_init_task();
+void gensvm_free_task(struct GenTask *task);
+
+#endif
diff --git a/include/gensvm_timer.h b/include/gensvm_timer.h
index 29c45cd..11e61e5 100644
--- a/include/gensvm_timer.h
+++ b/include/gensvm_timer.h
@@ -12,8 +12,10 @@
#ifndef GENSVM_TIMER_H
#define GENSVM_TIMER_H
-double elapsed_time(clock_t s_time, clock_t e_time);
+// includes
+#include "globals.h"
-void get_time_string(char *buffer);
+// function declarations
+double gensvm_elapsed_time(clock_t s_time, clock_t e_time);
#endif
diff --git a/include/gensvm_train.h b/include/gensvm_train.h
deleted file mode 100644
index 466b8e2..0000000
--- a/include/gensvm_train.h
+++ /dev/null
@@ -1,29 +0,0 @@
-/**
- * @file gensvm_train.h
- * @author Gertjan van den Burg
- * @date August, 2013
- * @brief Header file for gensvm_train.c
- *
- * @details
- * Contains function declarations for functions used to train a single
- * GenModel.
- *
- */
-
-#ifndef GENSVM_TRAIN_H
-#define GENSVM_TRAIN_H
-
-//forward declarations
-struct GenData;
-struct GenModel;
-
-// function declarations
-void gensvm_optimize(struct GenModel *model, struct GenData *data);
-
-double gensvm_get_loss(struct GenModel *model, struct GenData *data,
- double *ZV);
-
-void gensvm_get_update(struct GenModel *model, struct GenData *data,
- double *B, double *ZAZ, double *ZAZV, double *ZAZVT);
-
-#endif
diff --git a/include/gensvm_train_dataset.h b/include/gensvm_train_dataset.h
deleted file mode 100644
index 9a3fe86..0000000
--- a/include/gensvm_train_dataset.h
+++ /dev/null
@@ -1,143 +0,0 @@
-/**
- * @file gensvm_train_dataset.h
- * @author Gertjan van den Burg
- * @date August, 2013
- * @brief Structs and functions necessary for the grid search
- *
- * @details
- * The grid search for the optimal parameters is done through a queue.
- * This file contains struct definitions for this queue and a single
- * task in a queue, as well as a structure for the complete training
- * scheme. Function declarations are also included.
- *
- */
-
-#ifndef GENSVM_TRAIN_DATASET_H
-#define GENSVM_TRAIN_DATASET_H
-
-#include "types.h"
-
-// forward declarations
-struct GenData;
-struct GenModel;
-
-/**
- * @brief A structure for a single task in the queue.
- *
- * @param folds number of folds in cross validation
- * @param ID numeric id of the task in the queue
- * @param weight_idx parameter for the GenModel
- * @param p parameter for the GenModel
- * @param kappa parameter for the GenModel
- * @param lambda parameter for the GenModel
- * @param epsilon parameter for the GenModel
- * @param kerneltype parameter for the GenModel
- * @param *kernelparam parameters for the GenModel
- * @param *train_data pointer to the training data
- * @param *test_data pointer to the test data (if any)
- * @param performance performance after cross validation
- */
-struct Task {
- KernelType kerneltype;
- int weight_idx;
- long folds;
- long ID;
- double p;
- double kappa;
- double lambda;
- double epsilon;
- double *kernelparam;
- struct GenData *train_data;
- struct GenData *test_data;
- double performance;
-};
-
-/**
- * @brief Simple task queue.
- *
- * This struct is basically just an array of pointers to Task instances,
- * with a length and an index of the current task.
- *
- * @param **tasks array of pointers to Task structs
- * @param N size of task array
- * @param i index used for keeping track of the queue
- */
-struct Queue {
- struct Task **tasks;
- long N;
- long i;
-};
-
-/**
- * @brief Structure for describing the entire grid search
- *
- * @param traintype type of training to use
- * @param kerneltype type of kernel to use throughout training
- * @param repeats number of repeats to be done after the grid
- * search to find the parameter set with the
- * most consistent high performance
- * @param folds number of folds in cross validation
- * @param Np size of the array of p values
- * @param Nl size of the array of lambda values
- * @param Nk size of the array of kappa values
- * @param Ne size of the array of epsilon values
- * @param Nw size of the array of weight_idx values
- * @param Ng size of the array of gamma values
- * @param Nc size of the array of coef values
- * @param Nd size of the array of degree values
- * @param *weight_idxs array of weight_idxs
- * @param *ps array of p values
- * @param *lambdas array of lambda values
- * @param *kappas array of kappa values
- * @param *epsilons array of epsilon values
- * @param *gammas array of gamma values
- * @param *coefs array of coef values
- * @param *degrees array of degree values
- * @param *train_data_file filename of train data file
- * @param *test_data_file filename of test data file
- *
- */
-struct Training {
- TrainType traintype;
- KernelType kerneltype;
- long repeats;
- long folds;
- long Np;
- long Nl;
- long Nk;
- long Ne;
- long Nw;
- long Ng;
- long Nc;
- long Nd;
- int *weight_idxs;
- double *ps;
- double *lambdas;
- double *kappas;
- double *epsilons;
- double *gammas;
- double *coefs;
- double *degrees;
- char *train_data_file;
- char *test_data_file;
-};
-
-void make_queue(struct Training *training, struct Queue *queue,
- struct GenData *train_data, struct GenData *test_data);
-
-struct Task *get_next_task(struct Queue *q);
-void free_queue(struct Queue *q);
-
-void consistency_repeats(struct Queue *q, long repeats, TrainType traintype);
-
-void make_model_from_task(struct Task *task, struct GenModel *model);
-void copy_model(struct GenModel *from, struct GenModel *to);
-
-void print_progress_string(struct Task *task, long N);
-
-// new
-void start_training(struct Queue *q);
-double gensvm_cross_validation(struct GenModel *model,
- struct GenData **train_folds, struct GenData **test_folds,
- int folds, long n_total);
-#endif
diff --git a/include/gensvm_util.h b/include/gensvm_util.h
deleted file mode 100644
index 5ea2198..0000000
--- a/include/gensvm_util.h
+++ /dev/null
@@ -1,26 +0,0 @@
-/**
- * @file util.h
- * @author Gertjan van den Burg
- * @date August, 2013
- * @brief Header file for util.c
- *
- * @details
- * Function declarations for utility functions of the program.
- *
- */
-
-#ifndef GENSVM_UTIL_H
-#define GENSVM_UTIL_H
-
-// forward declarations
-struct GenData;
-struct GenModel;
-
-// function declarations
-int gensvm_check_argv(int argc, char **argv, char *str);
-int gensvm_check_argv_eq(int argc, char **argv, char *str);
-
-void note(const char *fmt,...);
-void err(const char *fmt,...);
-
-#endif
diff --git a/include/globals.h b/include/globals.h
index becde35..7fad7b1 100644
--- a/include/globals.h
+++ b/include/globals.h
@@ -18,12 +18,17 @@
#ifndef GENSVM_GLOBALS_H
#define GENSVM_GLOBALS_H
+#include "gensvm_memory.h"
+#include "types.h"
+
+#include <stdarg.h>
#include <stdio.h>
#include <stdlib.h>
#include <stdbool.h>
#include <string.h>
-
-#include "gensvm_memory.h"
+#include <math.h>
+#include <time.h>
+#include <cblas.h>
#define MAX_LINE_LENGTH 1024
@@ -33,4 +38,22 @@
#define minimum(a, b) (a) < (b) ? (a) : (b)
#endif
+// Set a matrix element (RowMajor)
+#define matrix_set(M, cols, i, j, val) M[(i)*(cols)+j] = val
+
+// Get a matrix element (RowMajor)
+#define matrix_get(M, cols, i, j) M[(i)*(cols)+j]
+
+// Add to a matrix element (RowMajor)
+#define matrix_add(M, cols, i, j, val) M[(i)*(cols)+j] += val
+
+// Multiply a matrix element (RowMajor)
+#define matrix_mul(M, cols, i, j, val) M[(i)*(cols)+j] *= val
+
+// Set a 3D matrix element (N2 = second dim, N3 = third dim, RowMajor)
+#define matrix3_set(M, N2, N3, i, j, k, val) M[k+(N3)*(j+(N2)*(i))] = val
+
+// Get a 3D matrix element (N2 = second dim, N3 = third dim, RowMajor)
+#define matrix3_get(M, N2, N3, i, j, k) M[k+(N3)*(j+(N2)*(i))]
+
#endif
diff --git a/include/libGenSVM.h b/include/libGenSVM.h
index 9e2d4c2..146fc67 100644
--- a/include/libGenSVM.h
+++ b/include/libGenSVM.h
@@ -23,7 +23,6 @@ struct GenData;
struct GenModel;
// function declarations
-void gensvm_simplex_gen(long K, double *U);
void gensvm_category_matrix(struct GenModel *model, struct GenData *data);
void gensvm_simplex_diff(struct GenModel *model, struct GenData *dataset);
@@ -33,8 +32,5 @@ void gensvm_calculate_huber(struct GenModel *model);
void gensvm_step_doubling(struct GenModel *model);
-void gensvm_seed_model_V(struct GenModel *from_model,
- struct GenModel *to_model, struct GenData *data);
-void gensvm_initialize_weights(struct GenData *data, struct GenModel *model);
#endif
diff --git a/src/GenSVMgrid.c b/src/GenSVMgrid.c
index 89b85a7..a6c749e 100644
--- a/src/GenSVMgrid.c
+++ b/src/GenSVMgrid.c
@@ -8,9 +8,9 @@
* This is a command line interface to the parameter grid search functionality
* of the algorithm. The grid search is specified in a separate file, thereby
* reducing the number of command line arguments. See
- * read_training_from_file() for documentation on the training file.
+ * read_grid_from_file() for documentation on the grid file.
*
- * The program runs a grid search as specified in the training file. If
+ * The program runs a grid search as specified in the grid file. If
* desired the grid search can incorporate consistency checks to find the
* configuration among the best configurations which scores consistently high.
* All output is written to stdout, unless the quiet mode is specified.
@@ -19,18 +19,9 @@
*
*/
-#include <time.h>
-
-#include "globals.h"
-#include "gensvm.h"
-#include "gensvm_crossval.h"
+#include "gensvm_cmdarg.h"
#include "gensvm_io.h"
-#include "gensvm_init.h"
-#include "gensvm_pred.h"
-#include "gensvm_strutil.h"
-#include "gensvm_train.h"
-#include "gensvm_train_dataset.h"
-#include "gensvm_util.h"
+#include "gensvm_gridsearch.h"
#define MINARGS 2
@@ -39,10 +30,7 @@ extern FILE *GENSVM_OUTPUT_FILE;
// function declarations
void exit_with_help();
void parse_command_line(int argc, char **argv, char *input_filename);
-void read_training_from_file(char *input_filename, struct Training *training);
-struct Training *gensvm_init_training();
-struct Queue *gensvm_init_queue();
-void gensvm_free_training(struct Training *training);
+void read_grid_from_file(char *input_filename, struct GenGrid *grid);
/**
* @brief Help function
@@ -50,7 +38,7 @@ void gensvm_free_training(struct Training *training);
void exit_with_help()
{
printf("This is GenSVM, version %1.1f\n\n", VERSION);
- printf("Usage: trainGenSVMdataset [options] training_file\n");
+ printf("Usage: trainGenSVMdataset [options] grid_file\n");
printf("Options:\n");
printf("-h | -help : print this help.\n");
printf("-q : quiet mode (no output)\n");
@@ -62,7 +50,7 @@ void exit_with_help()
* @brief Main interface function for trainGenSVMdataset
*
* @details
- * Main interface for the command line program. A given training file which
+ * Main interface for the command line program. A given grid file which
* specifies a grid search over a single dataset is read. From this, a Queue
* is created containing all Task instances that need to be performed in the
* search. Depending on the type of dataset, either cross validation or
@@ -78,28 +66,28 @@ int main(int argc, char **argv)
{
char input_filename[MAX_LINE_LENGTH];
- struct Training *training = gensvm_init_training();
+ struct GenGrid *grid = gensvm_init_grid();
struct GenData *train_data = gensvm_init_data();
struct GenData *test_data = gensvm_init_data();
- struct Queue *q = gensvm_init_queue();
+ struct GenQueue *q = gensvm_init_queue();
if (argc < MINARGS || gensvm_check_argv(argc, argv, "-help")
|| gensvm_check_argv_eq(argc, argv, "-h") )
exit_with_help();
parse_command_line(argc, argv, input_filename);
- note("Reading training file\n");
- read_training_from_file(input_filename, training);
+ note("Reading grid file\n");
+ read_grid_from_file(input_filename, grid);
- note("Reading data from %s\n", training->train_data_file);
- gensvm_read_data(train_data, training->train_data_file);
- if (training->traintype == TT) {
- note("Reading data from %s\n", training->test_data_file);
- gensvm_read_data(test_data, training->test_data_file);
+ note("Reading data from %s\n", grid->train_data_file);
+ gensvm_read_data(train_data, grid->train_data_file);
+ if (grid->traintype == TT) {
+ note("Reading data from %s\n", grid->test_data_file);
+ gensvm_read_data(test_data, grid->test_data_file);
}
note("Creating queue\n");
- make_queue(training, q, train_data, test_data);
+ gensvm_fill_queue(grid, q, train_data, test_data);
srand(time(NULL));
@@ -107,12 +95,12 @@ int main(int argc, char **argv)
start_training(q);
note("Training finished\n");
- if (training->repeats > 0) {
- consistency_repeats(q, training->repeats, training->traintype);
+ if (grid->repeats > 0) {
+ consistency_repeats(q, grid->repeats, grid->traintype);
}
- free_queue(q);
- gensvm_free_training(training);
+ gensvm_free_queue(q);
+ gensvm_free_grid(grid);
gensvm_free_data(train_data);
gensvm_free_data(test_data);
@@ -125,13 +113,13 @@ int main(int argc, char **argv)
*
* @details
* Few arguments can be supplied to the command line. Only quiet mode can be
- * specified, or help can be requested. The filename of the training file is
- * read from the arguments. Parsing of the training file is done separately in
- * read_training_from_file().
+ * specified, or help can be requested. The filename of the grid file is
+ * read from the arguments. Parsing of the grid file is done separately in
+ * read_grid_from_file().
*
* @param[in] argc number of command line arguments
* @param[in] argv array of command line arguments
- * @param[in] input_filename pre-allocated buffer for the training
+ * @param[in] input_filename pre-allocated buffer for the grid
* filename.
*
*/
@@ -181,21 +169,21 @@ KernelType parse_kernel_str(char *kernel_line)
}
/**
- * @brief Read the Training struct from file
+ * @brief Read the GenGrid struct from file
*
* @details
- * Read the Training struct from a file. The training file follows a specific
- * format specified in @ref spec_training_file.
+ * Read the GenGrid struct from a file. The grid file follows a specific
+ * format specified in @ref spec_grid_file.
*
* Commonly used string functions in this function are all_doubles_str() and
* all_longs_str().
*
- * @param[in] input_filename filename of the training file
- * @param[in] training Training structure to place the parsed
+ * @param[in] input_filename filename of the grid file
+ * @param[in] grid GenGrid structure to place the parsed
* parameter grid.
*
*/
-void read_training_from_file(char *input_filename, struct Training *training)
+void read_grid_from_file(char *input_filename, struct GenGrid *grid)
{
long i, nr = 0;
FILE *fid;
@@ -207,108 +195,108 @@ void read_training_from_file(char *input_filename, struct Training *training)
fid = fopen(input_filename, "r");
if (fid == NULL) {
- fprintf(stderr, "Error opening training file %s\n",
+ fprintf(stderr, "Error opening grid file %s\n",
input_filename);
exit(1);
}
- training->traintype = CV;
+ grid->traintype = CV;
while ( fgets(buffer, MAX_LINE_LENGTH, fid) != NULL ) {
Memset(params, double, MAX_LINE_LENGTH);
Memset(lparams, long, MAX_LINE_LENGTH);
if (str_startswith(buffer, "train:")) {
sscanf(buffer, "train: %s\n", train_filename);
- training->train_data_file = Calloc(char,
+ grid->train_data_file = Calloc(char,
MAX_LINE_LENGTH);
- strcpy(training->train_data_file, train_filename);
+ strcpy(grid->train_data_file, train_filename);
} else if (str_startswith(buffer, "test:")) {
sscanf(buffer, "test: %s\n", test_filename);
- training->test_data_file = Calloc(char,
+ grid->test_data_file = Calloc(char,
MAX_LINE_LENGTH);
- strcpy(training->test_data_file, test_filename);
- training->traintype = TT;
+ strcpy(grid->test_data_file, test_filename);
+ grid->traintype = TT;
} else if (str_startswith(buffer, "p:")) {
nr = all_doubles_str(buffer, 2, params);
- training->ps = Calloc(double, nr);
+ grid->ps = Calloc(double, nr);
for (i=0; i<nr; i++)
- training->ps[i] = params[i];
- training->Np = nr;
+ grid->ps[i] = params[i];
+ grid->Np = nr;
} else if (str_startswith(buffer, "lambda:")) {
nr = all_doubles_str(buffer, 7, params);
- training->lambdas = Calloc(double, nr);
+ grid->lambdas = Calloc(double, nr);
for (i=0; i<nr; i++)
- training->lambdas[i] = params[i];
- training->Nl = nr;
+ grid->lambdas[i] = params[i];
+ grid->Nl = nr;
} else if (str_startswith(buffer, "kappa:")) {
nr = all_doubles_str(buffer, 6, params);
- training->kappas = Calloc(double, nr);
+ grid->kappas = Calloc(double, nr);
for (i=0; i<nr; i++)
- training->kappas[i] = params[i];
- training->Nk = nr;
+ grid->kappas[i] = params[i];
+ grid->Nk = nr;
} else if (str_startswith(buffer, "epsilon:")) {
nr = all_doubles_str(buffer, 8, params);
- training->epsilons = Calloc(double, nr);
+ grid->epsilons = Calloc(double, nr);
for (i=0; i<nr; i++)
- training->epsilons[i] = params[i];
- training->Ne = nr;
+ grid->epsilons[i] = params[i];
+ grid->Ne = nr;
} else if (str_startswith(buffer, "weight:")) {
nr = all_longs_str(buffer, 7, lparams);
- training->weight_idxs = Calloc(int, nr);
+ grid->weight_idxs = Calloc(int, nr);
for (i=0; i<nr; i++)
- training->weight_idxs[i] = lparams[i];
- training->Nw = nr;
+ grid->weight_idxs[i] = lparams[i];
+ grid->Nw = nr;
} else if (str_startswith(buffer, "folds:")) {
nr = all_longs_str(buffer, 6, lparams);
- training->folds = lparams[0];
+ grid->folds = lparams[0];
if (nr > 1)
fprintf(stderr, "Field \"folds\" only takes "
"one value. Additional "
"fields are ignored.\n");
} else if (str_startswith(buffer, "repeats:")) {
nr = all_longs_str(buffer, 8, lparams);
- training->repeats = lparams[0];
+ grid->repeats = lparams[0];
if (nr > 1)
fprintf(stderr, "Field \"repeats\" only "
"takes one value. Additional "
"fields are ignored.\n");
} else if (str_startswith(buffer, "kernel:")) {
- training->kerneltype = parse_kernel_str(buffer);
+ grid->kerneltype = parse_kernel_str(buffer);
} else if (str_startswith(buffer, "gamma:")) {
nr = all_doubles_str(buffer, 6, params);
- if (training->kerneltype == K_LINEAR) {
+ if (grid->kerneltype == K_LINEAR) {
fprintf(stderr, "Field \"gamma\" ignored, "
"linear kernel is used.\n");
- training->Ng = 0;
+ grid->Ng = 0;
break;
}
- training->gammas = Calloc(double, nr);
+ grid->gammas = Calloc(double, nr);
for (i=0; i<nr; i++)
- training->gammas[i] = params[i];
- training->Ng = nr;
+ grid->gammas[i] = params[i];
+ grid->Ng = nr;
} else if (str_startswith(buffer, "coef:")) {
nr = all_doubles_str(buffer, 5, params);
- if (training->kerneltype == K_LINEAR ||
- training->kerneltype == K_RBF) {
+ if (grid->kerneltype == K_LINEAR ||
+ grid->kerneltype == K_RBF) {
fprintf(stderr, "Field \"coef\" ignored with "
"specified kernel.\n");
- training->Nc = 0;
+ grid->Nc = 0;
break;
}
- training->coefs = Calloc(double, nr);
+ grid->coefs = Calloc(double, nr);
for (i=0; i<nr; i++)
- training->coefs[i] = params[i];
- training->Nc = nr;
+ grid->coefs[i] = params[i];
+ grid->Nc = nr;
} else if (str_startswith(buffer, "degree:")) {
nr = all_doubles_str(buffer, 7, params);
- if (training->kerneltype != K_POLY) {
+ if (grid->kerneltype != K_POLY) {
fprintf(stderr, "Field \"degree\" ignored "
"with specified kernel.\n");
- training->Nd = 0;
+ grid->Nd = 0;
break;
}
- training->degrees = Calloc(double, nr);
+ grid->degrees = Calloc(double, nr);
for (i=0; i<nr; i++)
- training->degrees[i] = params[i];
- training->Nd = nr;
+ grid->degrees[i] = params[i];
+ grid->Nd = nr;
} else {
fprintf(stderr, "Cannot find any parameters on line: "
"%s\n", buffer);
@@ -319,62 +307,3 @@ void read_training_from_file(char *input_filename, struct Training *training)
free(lparams);
fclose(fid);
}
-
-struct Training *gensvm_init_training()
-{
- struct Training *training = Malloc(struct Training, 1);
-
- // initialize to defaults
- training->traintype = CV;
- training->kerneltype = K_LINEAR;
- training->repeats = 0;
- training->folds = 10;
- training->Np = 0;
- training->Nl = 0;
- training->Nk = 0;
- training->Ne = 0;
- training->Nw = 0;
- training->Ng = 0;
- training->Nc = 0;
- training->Nd = 0;
-
- // set arrays to NULL
- training->weight_idxs = NULL;
- training->ps = NULL;
- training->lambdas = NULL;
- training->kappas = NULL;
- training->epsilons = NULL;
- training->gammas = NULL;
- training->coefs = NULL;
- training->degrees = NULL;
- training->train_data_file = NULL;
- training->test_data_file = NULL;
-
- return training;
-}
-
-struct Queue *gensvm_init_queue()
-{
- struct Queue *q = Malloc(struct Queue, 1);
-
- q->tasks = NULL;
- q->N = 0;
- q->i = 0;
-
- return q;
-}
-
-void gensvm_free_training(struct Training *training)
-{
- free(training->weight_idxs);
- free(training->ps);
- free(training->lambdas);
- free(training->kappas);
- free(training->epsilons);
- free(training->gammas);
- free(training->coefs);
- free(training->degrees);
- free(training->train_data_file);
- free(training->test_data_file);
- free(training);
-}
diff --git a/src/GenSVMpred.c b/src/GenSVMpred.c
deleted file mode 100644
index 57680b1..0000000
--- a/src/GenSVMpred.c
+++ /dev/null
@@ -1,180 +0,0 @@
-/*
- * 20140317:
- * THIS FUNCTION IS DEPRECATED, SINCE IT DOES NOT WORK WITH KERNELS.
- *
- */
-
-/**
- * @file GenSVM_pred.c
- * @author Gertjan van den Burg
- * @date January, 2014
- * @brief Command line interface for predicting class labels
- *
- * @details
- * This is a command line program for predicting the class labels or
- * determining the predictive performance of a pre-determined model on a given
- * test dataset. The predictive performance can be written to the screen or
- * the predicted class labels can be written to a specified output file. This
- * is done using gensvm_write_predictions().
- *
- * The specified model file must follow the specification given in
- * gensvm_write_model().
- *
- * For usage information, see the program help function.
- *
- */
-
-#include "gensvm.h"
-#include "gensvm_init.h"
-#include "gensvm_io.h"
-#include "gensvm_pred.h"
-#include "gensvm_util.h"
-
-#define MINARGS 3
-
-extern FILE *GENSVM_OUTPUT_FILE;
-
-// function declarations
-void exit_with_help();
-void parse_command_line(int argc, char **argv,
- char *input_filename, char *output_filename,
- char *model_filename);
-
-/**
- * @brief Help function
- */
-void exit_with_help()
-{
- printf("This is GenSVM, version %1.1f\n\n", VERSION);
- printf("Usage: predGenSVM [options] test_data_file model_file\n");
- printf("Options:\n");
- printf("-o output_file : write output to file\n");
- printf("-q : quiet mode (no output)\n");
- exit(0);
-}
-
-/**
- * @brief Main interface function for predGenSVM
- *
- * @details
- * Main interface for the command line program. A given model file is read and
- * a test dataset is initialized from the given data. The predictive
- * performance (hitrate) of the model on the test set is printed to the output
- * stream (default = stdout). If an output file is specified the predictions
- * are written to the file.
- *
- * @todo
- * Ensure that the program can read model files without class labels
- * specified. In that case no prediction accuracy is printed to the screen.
- *
- * @param[in] argc number of command line arguments
- * @param[in] argv array of command line arguments
- *
- */
-int main(int argc, char **argv)
-{
- long *predy;
- double performance;
-
- char input_filename[MAX_LINE_LENGTH];
- char model_filename[MAX_LINE_LENGTH];
- char output_filename[MAX_LINE_LENGTH];;
-
- if (argc < MINARGS || gensvm_check_argv(argc, argv, "-help")
- || gensvm_check_argv_eq(argc, argv, "-h") )
- exit_with_help();
- parse_command_line(argc, argv, input_filename, output_filename,
- model_filename);
-
- // read the data and model
- struct GenModel *model = gensvm_init_model();
- struct GenData *data = gensvm_init_data();
- gensvm_read_data(data, input_filename);
- gensvm_read_model(model, model_filename);
-
- // check if the number of attributes in data equals that in model
- if (data->m != model->m) {
- fprintf(stderr, "Error: number of attributes in data (%li) "
- "does not equal the number of attributes in "
- "model (%li)\n", data->m, model->m);
- exit(1);
- } else if (data->K != model->K) {
- fprintf(stderr, "Error: number of classes in data (%li) "
- "does not equal the number of classes in "
- "model (%li)\n", data->K, model->K);
- exit(1);
- }
-
- // predict labels and performance if test data has labels
- predy = Calloc(long, data->n);
- gensvm_predict_labels(data, model, predy);
- if (data->y != NULL) {
- performance = gensvm_prediction_perf(data, predy);
- note("Predictive performance: %3.2f%%\n", performance);
- }
-
- // if output file is specified, write predictions to it
- if (gensvm_check_argv_eq(argc, argv, "-o")) {
- gensvm_write_predictions(data, predy, output_filename);
- note("Predictions written to: %s\n", output_filename);
- }
-
- // free the model, data, and predictions
- gensvm_free_model(model);
- gensvm_free_data(data);
- free(predy);
-
- return 0;
-}
-
-/**
- * @brief Parse command line arguments
- *
- * @details
- * Read the data filename and model filename from the command line arguments.
- * If specified, also read the output filename. If the quiet flag is given,
- * set the global output stream to NULL. On error, exit_with_help().
- *
- * @param[in] argc number of command line arguments
- * @param[in] argv array of command line arguments
- * @param[in] input_filename pre-allocated array for the input
- * filename
- * @param[in] output_filename pre-allocated array for the output
- * filename
- * @param[in] model_filename pre-allocated array for the model
- * filename
- *
- */
-void parse_command_line(int argc, char **argv, char *input_filename,
- char *output_filename, char *model_filename)
-{
- int i;
-
- GENSVM_OUTPUT_FILE = stdout;
-
- for (i=1; i<argc; i++) {
- if (argv[i][0] != '-') break;
- if (++i >= argc)
- exit_with_help();
- switch (argv[i-1][1]) {
- case 'o':
- strcpy(output_filename, argv[i]);
- break;
- case 'q':
- GENSVM_OUTPUT_FILE = NULL;
- i--;
- break;
- default:
- fprintf(stderr, "Unknown option: -%c\n",
- argv[i-1][1]);
- exit_with_help();
- }
- }
-
- if (i >= argc)
- exit_with_help();
-
- strcpy(input_filename, argv[i]);
- i++;
- strcpy(model_filename, argv[i]);
-}
diff --git a/src/GenSVMtrain.c b/src/GenSVMtrain.c
deleted file mode 100644
index 3bb9c09..0000000
--- a/src/GenSVMtrain.c
+++ /dev/null
@@ -1,245 +0,0 @@
-/**
- * @file GenSVM_train.c
- * @author Gertjan van den Burg
- * @date August, 2013
- * @brief Command line interface for training a single model with GenSVM
- *
- * @details
- * This is a command line program for training a single model on a given
- * dataset. To run a grid search over a number of parameter configurations,
- * see trainGenSVMdataset.c.
- *
- */
-
-#include <time.h>
-#include <math.h>
-
-#include "globals.h"
-#include "libGenSVM.h"
-#include "gensvm.h"
-#include "gensvm_io.h"
-#include "gensvm_init.h"
-#include "gensvm_kernel.h"
-#include "gensvm_train.h"
-#include "gensvm_util.h"
-
-#define MINARGS 2
-
-extern FILE *GENSVM_OUTPUT_FILE;
-
-// function declarations
-void exit_with_help();
-void parse_command_line(int argc, char **argv, struct GenModel *model,
- char *input_filename, char *output_filename, char *model_filename);
-
-/**
- * @brief Help function
- */
-void exit_with_help()
-{
- printf("This is GenSVM, version %1.1f\n\n", VERSION);
- printf("Usage: trainGenSVM [options] training_data_file\n");
- printf("Options:\n");
- printf("-c coef : coefficient for the polynomial and sigmoid kernel\n");
- printf("-d degree : degree for the polynomial kernel\n");
- printf("-e epsilon : set the value of the stopping criterion\n");
- printf("-g gamma : parameter for the rbf, polynomial or sigmoid "
- "kernel\n");
- printf("-h | -help : print this help.\n");
- printf("-k kappa : set the value of kappa used in the Huber hinge\n");
- printf("-l lambda : set the value of lambda (lambda > 0)\n");
- printf("-m model_file : use previous model as seed for W and t\n");
- printf("-o output_file : write output to file\n");
- printf("-p p-value : set the value of p in the lp norm "
- "(1.0 <= p <= 2.0)\n");
- printf("-q : quiet mode (no output)\n");
- printf("-r rho : choose the weigth specification (1 = unit, 2 = "
- "group)\n");
- printf("-t type: kerneltype (LINEAR=0, POLY=1, RBF=2, SIGMOID=3)\n");
-
- exit(0);
-}
-
-/**
- * @brief Main interface function for trainGenSVM
- *
- * @details
- * Main interface for the command line program. A given dataset file is read
- * and a GenSVM model is trained on this data. By default the progress of the
- * computations are written to stdout. See for full options of the program the
- * help function.
- *
- * @param[in] argc number of command line arguments
- * @param[in] argv array of command line arguments
- *
- */
-int main(int argc, char **argv)
-{
- char input_filename[MAX_LINE_LENGTH];
- char model_filename[MAX_LINE_LENGTH];
- char output_filename[MAX_LINE_LENGTH];
-
- struct GenModel *model = gensvm_init_model();
- struct GenData *data = gensvm_init_data();
-
- if (argc < MINARGS || gensvm_check_argv(argc, argv, "-help")
- || gensvm_check_argv_eq(argc, argv, "-h") )
- exit_with_help();
- parse_command_line(argc, argv, model, input_filename,
- output_filename, model_filename);
-
- // read data file
- gensvm_read_data(data, input_filename);
-
- // copy dataset parameters to model
- model->n = data->n;
- model->m = data->m;
- model->K = data->K;
- model->data_file = input_filename;
-
- // allocate model
- gensvm_allocate_model(model);
-
- // initialize kernel (if necessary)
- //gensvm_make_kernel(model, data);
-
- // reallocate model and initialize weights
- gensvm_reallocate_model(model, data->n, data->m);
- gensvm_initialize_weights(data, model);
-
- // seed the random number generator (only place in programs is in
- // command line interfaces)
- srand(time(NULL));
-
- if (gensvm_check_argv_eq(argc, argv, "-m")) {
- struct GenModel *seed_model = gensvm_init_model();
- gensvm_read_model(seed_model, model_filename);
- gensvm_seed_model_V(seed_model, model, data);
- gensvm_free_model(seed_model);
- } else {
- gensvm_seed_model_V(NULL, model, data);
- }
-
- // start training
- gensvm_optimize(model, data);
-
- // write_model to file
- if (gensvm_check_argv_eq(argc, argv, "-o")) {
- gensvm_write_model(model, output_filename);
- note("Output written to %s\n", output_filename);
- }
-
- // free model and data
- gensvm_free_model(model);
- gensvm_free_data(data);
-
- return 0;
-}
-
-/**
- * @brief Parse command line arguments
- *
- * @details
- * Process the command line arguments for the model parameters, and record
- * them in the specified GenModel. An input filename for the dataset is read
- * and if specified an output filename and a model filename for the seed
- * model.
- *
- * @param[in] argc number of command line arguments
- * @param[in] argv array of command line arguments
- * @param[in] model initialized model
- * @param[in] input_filename pre-allocated buffer for the input
- * filename
- * @param[in] output_filename pre-allocated buffer for the output
- * filename
- * @param[in] model_filename pre-allocated buffer for the model
- * filename
- *
- */
-void parse_command_line(int argc, char **argv, struct GenModel *model,
- char *input_filename, char *output_filename, char *model_filename)
-{
- int i;
- double gamma = 1.0,
- degree = 2.0,
- coef = 0.0;
-
- GENSVM_OUTPUT_FILE = stdout;
-
- // parse options
- for (i=1; i<argc; i++) {
- if (argv[i][0] != '-') break;
- if (++i>=argc) {
- exit_with_help();
- }
- switch (argv[i-1][1]) {
- case 'c':
- coef = atof(argv[i]);
- break;
- case 'd':
- degree = atof(argv[i]);
- break;
- case 'e':
- model->epsilon = atof(argv[i]);
- break;
- case 'g':
- gamma = atof(argv[i]);
- break;
- case 'k':
- model->kappa = atof(argv[i]);
- break;
- case 'l':
- model->lambda = atof(argv[i]);
- break;
- case 'm':
- strcpy(model_filename, argv[i]);
- break;
- case 'o':
- strcpy(output_filename, argv[i]);
- break;
- case 'p':
- model->p = atof(argv[i]);
- break;
- case 'r':
- model->weight_idx = atoi(argv[i]);
- break;
- case 't':
- model->kerneltype = atoi(argv[i]);
- break;
- case 'q':
- GENSVM_OUTPUT_FILE = NULL;
- i--;
- break;
- default:
- fprintf(stderr, "Unknown option: -%c\n",
- argv[i-1][1]);
- exit_with_help();
- }
- }
-
- // read input filename
- if (i >= argc)
- exit_with_help();
-
- strcpy(input_filename, argv[i]);
-
- // set kernel parameters
- switch (model->kerneltype) {
- case K_LINEAR:
- break;
- case K_POLY:
- model->kernelparam = Calloc(double, 3);
- model->kernelparam[0] = gamma;
- model->kernelparam[1] = coef;
- model->kernelparam[2] = degree;
- break;
- case K_RBF:
- model->kernelparam = Calloc(double, 1);
- model->kernelparam[0] = gamma;
- break;
- case K_SIGMOID:
- model->kernelparam = Calloc(double, 1);
- model->kernelparam[0] = gamma;
- model->kernelparam[1] = coef;
- }
-}
diff --git a/src/GenSVMtraintest.c b/src/GenSVMtraintest.c
index 0199725..47cc900 100644
--- a/src/GenSVMtraintest.c
+++ b/src/GenSVMtraintest.c
@@ -10,17 +10,11 @@
*
*/
-#include <time.h>
-
-#include "globals.h"
-#include "gensvm.h"
+#include "gensvm_cmdarg.h"
#include "gensvm_io.h"
#include "gensvm_init.h"
-#include "gensvm_kernel.h"
-#include "gensvm_train.h"
+#include "gensvm_optimize.h"
#include "gensvm_pred.h"
-#include "libGenSVM.h"
-#include "gensvm_util.h"
#define MINARGS 2
@@ -118,10 +112,10 @@ int main(int argc, char **argv)
if (gensvm_check_argv_eq(argc, argv, "-s")) {
struct GenModel *seed_model = gensvm_init_model();
gensvm_read_model(seed_model, model_inputfile);
- gensvm_seed_model_V(seed_model, model, traindata);
+ gensvm_init_V(seed_model, model, traindata);
gensvm_free_model(seed_model);
} else {
- gensvm_seed_model_V(NULL, model, traindata);
+ gensvm_init_V(NULL, model, traindata);
}
// start training
@@ -274,4 +268,3 @@ void parse_command_line(int argc, char **argv, struct GenModel *model,
model->kernelparam[1] = coef;
}
}
-
diff --git a/src/gensvm_base.c b/src/gensvm_base.c
new file mode 100644
index 0000000..eddef5c
--- /dev/null
+++ b/src/gensvm_base.c
@@ -0,0 +1,211 @@
+/**
+ * @file gensvm_base.c
+ * @author Gertjan van den Burg
+ * @date May, 2016
+ * @brief Functions for initializing GenModel and GenData structures
+ *
+ * @details
+ * This file contains functions for initializing, freeing, allocating, and
+ * reallocating a GenModel instance. It also contains functions for
+ * initializing and freeing a GenData structure. In addition, default values
+ * for these structures are defined here (and only here).
+ *
+ */
+
+#include "gensvm_base.h"
+
+/**
+ * @brief Initialize a GenData structure
+ *
+ * @details
+ * A GenData structure is initialized and default values are set.
+ * A pointer to the initialized data is returned.
+ *
+ * @returns initialized GenData
+ *
+ */
+struct GenData *gensvm_init_data()
+{
+ struct GenData *data = Malloc(struct GenData, 1);
+ data->Sigma = NULL;
+ data->y = NULL;
+ data->Z = NULL;
+ data->RAW = NULL;
+
+ // set default values
+ data->kerneltype = K_LINEAR;
+ data->kernelparam = NULL;
+
+ return data;
+}
+
+/**
+ * @brief Free allocated GenData struct
+ *
+ * @details
+ * Simply free a previously allocated GenData struct by freeing all its
+ * components. Note that the data struct itself is also freed here.
+ *
+ * @param[in] data GenData struct to free
+ *
+ */
+void gensvm_free_data(struct GenData *data)
+{
+ if (data == NULL)
+ return;
+
+ if (data->Z == data->RAW) {
+ free(data->Z);
+ } else {
+ free(data->Z);
+ free(data->RAW);
+ }
+ free(data->kernelparam);
+ free(data->y);
+ free(data->Sigma);
+ free(data);
+}
+
+/**
+ * @brief Initialize a GenModel structure
+ *
+ * @details
+ * A GenModel structure is initialized and the default value for the
+ * parameters are set. A pointer to the initialized model is returned.
+ *
+ * @returns initialized GenModel
+ */
+struct GenModel *gensvm_init_model()
+{
+ struct GenModel *model = Malloc(struct GenModel, 1);
+
+ // set default values
+ model->p = 1.0;
+ model->lambda = pow(2, -8.0);
+ model->epsilon = 1e-6;
+ model->kappa = 0.0;
+ model->weight_idx = 1;
+ model->kerneltype = K_LINEAR;
+ model->kernelparam = NULL;
+
+ model->W = NULL;
+ model->t = NULL;
+ model->V = NULL;
+ model->Vbar = NULL;
+ model->U = NULL;
+ model->UU = NULL;
+ model->Q = NULL;
+ model->H = NULL;
+ model->R = NULL;
+ model->rho = NULL;
+ model->data_file = NULL;
+
+ return model;
+}
+
+/**
+ * @brief Allocate memory for a GenModel
+ *
+ * @details
+ * This function can be used to allocate the memory needed for a GenModel. All
+ * arrays in the model are specified and initialized to 0.
+ *
+ * @param[in] model GenModel to allocate
+ *
+ */
+void gensvm_allocate_model(struct GenModel *model)
+{
+ long n = model->n;
+ long m = model->m;
+ long K = model->K;
+
+ model->W = Calloc(double, m*(K-1));
+ model->t = Calloc(double, K-1);
+ model->V = Calloc(double, (m+1)*(K-1));
+ model->Vbar = Calloc(double, (m+1)*(K-1));
+ model->U = Calloc(double, K*(K-1));
+ model->UU = Calloc(double, n*K*(K-1));
+ model->Q = Calloc(double, n*K);
+ model->H = Calloc(double, n*K);
+ model->R = Calloc(double, n*K);
+ model->rho = Calloc(double, n);
+}
+
+/**
+ * @brief Reallocate memory for GenModel
+ *
+ * @details
+ * This function can be used to reallocate existing memory for a GenModel,
+ * upon a change in the model dimensions. This is used in combination with
+ * kernels.
+ *
+ * @param[in] model GenModel to reallocate
+ * @param[in] n new value of GenModel->n
+ * @param[in] m new value of GenModel->m
+ *
+ */
+void gensvm_reallocate_model(struct GenModel *model, long n, long m)
+{
+ long K = model->K;
+
+ if (model->n == n && model->m == m)
+ return;
+ if (model->n != n) {
+ model->UU = Realloc(model->UU, double, n*K*(K-1));
+ Memset(model->UU, double, n*K*(K-1));
+
+ model->Q = Realloc(model->Q, double, n*K);
+ Memset(model->Q, double, n*K);
+
+ model->H = Realloc(model->H, double, n*K);
+ Memset(model->H, double, n*K);
+
+ model->R = Realloc(model->R, double, n*K);
+ Memset(model->R, double, n*K);
+
+ model->rho = Realloc(model->rho, double, n);
+ Memset(model->rho, double, n);
+
+ model->n = n;
+ }
+ if (model->m != m) {
+ model->W = Realloc(model->W, double, m*(K-1));
+ Memset(model->W, double, m*(K-1));
+
+ model->V = Realloc(model->V, double, (m+1)*(K-1));
+ Memset(model->V, double, (m+1)*(K-1));
+
+ model->Vbar = Realloc(model->Vbar, double, (m+1)*(K-1));
+ Memset(model->Vbar, double, (m+1)*(K-1));
+
+ model->m = m;
+ }
+}
+
+/**
+ * @brief Free allocated GenModel struct
+ *
+ * @details
+ * Simply free a previously allocated GenModel by freeing all its component
+ * arrays. Note that the model struct itself is also freed here.
+ *
+ * @param[in] model GenModel to free
+ *
+ */
+void gensvm_free_model(struct GenModel *model)
+{
+ free(model->W);
+ free(model->t);
+ free(model->V);
+ free(model->Vbar);
+ free(model->U);
+ free(model->UU);
+ free(model->Q);
+ free(model->H);
+ free(model->rho);
+ free(model->R);
+ free(model->kernelparam);
+
+ free(model);
+}
+
diff --git a/src/gensvm_cmdarg.c b/src/gensvm_cmdarg.c
new file mode 100644
index 0000000..8f796bb
--- /dev/null
+++ b/src/gensvm_cmdarg.c
@@ -0,0 +1,71 @@
+/**
+ * @file gensvm_cmdarg.c
+ * @author Gertjan van den Burg
+ * @date May, 2016
+ * @brief Functions for dealing with command line arguments
+ *
+ * @details
+ * This file contains several utility functions for coordinating input and
+ * output of data and model files.
+ *
+ */
+
+#include "gensvm_cmdarg.h"
+
+/**
+ * @brief Check if any command line arguments contain string
+ *
+ * @details
+ * Check if any of a given array of command line arguments contains a given
+ * string. If the string is found, the index of the string in argv is
+ * returned. If the string is not found, 0 is returned.
+ *
+ * This function is copied from MSVMpack/libMSVM.c.
+ *
+ * @param[in] argc number of command line arguments
+ * @param[in] argv command line arguments
+ * @param[in] str string to find in the arguments
+ * @returns index of the string in the arguments if found, 0
+ * otherwise
+ */
+int gensvm_check_argv(int argc, char **argv, char *str)
+{
+ int i;
+ int arg_str = 0;
+ for (i=1; i<argc; i++)
+ if (strstr(argv[i], str) != NULL) {
+ arg_str = i;
+ break;
+ }
+
+ return arg_str;
+}
+
+/**
+ * @brief Check if a command line argument equals a string
+ *
+ * @details
+ * Check if any of the command line arguments is exactly equal to a given
+ * string. If so, return the index of the corresponding command line argument.
+ * If not, return 0.
+ *
+ * This function is copied from MSVMpack/libMSVM.c
+ *
+ * @param[in] argc number of command line arguments
+ * @param[in] argv command line arguments
+ * @param[in] str string to find in the arguments
+ * @returns index of the command line argument that corresponds to
+ * the string, 0 if none matches.
+ */
+int gensvm_check_argv_eq(int argc, char **argv, char *str)
+{
+ int i;
+ int arg_str = 0;
+ for (i=1; i<argc; i++)
+ if (strcmp(argv[i], str) == 0) {
+ arg_str = i;
+ break;
+ }
+
+ return arg_str;
+}
diff --git a/src/gensvm_copy.c b/src/gensvm_copy.c
new file mode 100644
index 0000000..501e405
--- /dev/null
+++ b/src/gensvm_copy.c
@@ -0,0 +1,49 @@
+/**
+ * @file gensvm_copy.c
+ * @author Gertjan van den Burg
+ * @date May, 2016
+ * @brief Function for copying a GenModel instance
+ *
+ */
+
+#include "gensvm_copy.h"
+
+/**
+ * @brief Copy model parameters between two GenModel structs
+ *
+ * @details
+ * The parameters copied are GenModel::weight_idx, GenModel::epsilon,
+ * GenModel::p, GenModel::kappa, and GenModel::lambda.
+ *
+ * @param[in] from GenModel to copy parameters from
+ * @param[in,out] to GenModel to copy parameters to
+ */
+void gensvm_copy_model(struct GenModel *from, struct GenModel *to)
+{
+ to->weight_idx = from->weight_idx;
+ to->epsilon = from->epsilon;
+ to->p = from->p;
+ to->kappa = from->kappa;
+ to->lambda = from->lambda;
+
+ to->kerneltype = from->kerneltype;
+ switch (to->kerneltype) {
+ case K_LINEAR:
+ break;
+ case K_POLY:
+ to->kernelparam = Malloc(double, 3);
+ to->kernelparam[0] = from->kernelparam[0];
+ to->kernelparam[1] = from->kernelparam[1];
+ to->kernelparam[2] = from->kernelparam[2];
+ break;
+ case K_RBF:
+ to->kernelparam = Malloc(double, 1);
+ to->kernelparam[0] = from->kernelparam[0];
+ break;
+ case K_SIGMOID:
+ to->kernelparam = Malloc(double, 2);
+ to->kernelparam[0] = from->kernelparam[0];
+ to->kernelparam[1] = from->kernelparam[1];
+ break;
+ }
+}
diff --git a/src/gensvm_crossval.c b/src/gensvm_cv_util.c
index 8f09cb5..d9cde09 100644
--- a/src/gensvm_crossval.c
+++ b/src/gensvm_cv_util.c
@@ -1,5 +1,5 @@
/**
- * @file crossval.c
+ * @file gensvm_cv_util.c
* @author Gertjan van den Burg
* @date January 7, 2014
* @brief Functions for cross validation
@@ -13,10 +13,7 @@
*
*/
-#include "globals.h"
-#include "gensvm.h"
-#include "gensvm_crossval.h"
-#include "gensvm_matrix.h"
+#include "gensvm_cv_util.h"
/**
* @brief Create a cross validation split vector
diff --git a/src/gensvm_matrix.c b/src/gensvm_debug.c
index c2e5986..d94711a 100644
--- a/src/gensvm_matrix.c
+++ b/src/gensvm_debug.c
@@ -1,19 +1,15 @@
/**
- * @file gensvm_matrix.c
+ * @file gensvm_debug.c
* @author Gertjan van den Burg
- * @date August, 2013
- * @brief Functions facilitating matrix access
+ * @date May, 2016
+ * @brief Functions facilitating debugging
*
* @details
- * The functions contained in this file are used when
- * accessing or writing to matrices. Seperate functions
- * exist of adding and multiplying existing matrix
- * elements, to ensure this is done in place.
+ * Defines functions useful for debugging matrices.
*
*/
-#include "gensvm_matrix.h"
-#include "gensvm_util.h"
+#include "gensvm_debug.h"
/**
* @brief print a matrix
diff --git a/src/gensvm_grid.c b/src/gensvm_grid.c
new file mode 100644
index 0000000..ea25b89
--- /dev/null
+++ b/src/gensvm_grid.c
@@ -0,0 +1,62 @@
+/**
+ * @file gensvm_grid.c
+ * @author Gertjan van den Burg
+ * @date Mayy, 2016
+ * @brief Functions for initializing GenGrid structures
+ *
+ * @details
+ * This file contains functions for initializing and freeing a GenGrid
+ * instance. In addition, default values for this structure are defined here
+ * (and only here).
+ *
+ */
+
+#include "gensvm_grid.h"
+
+struct GenGrid *gensvm_init_grid()
+{
+ struct GenGrid *grid = Malloc(struct GenGrid, 1);
+
+ // initialize to defaults
+ grid->traintype = CV;
+ grid->kerneltype = K_LINEAR;
+ grid->repeats = 0;
+ grid->folds = 10;
+ grid->Np = 0;
+ grid->Nl = 0;
+ grid->Nk = 0;
+ grid->Ne = 0;
+ grid->Nw = 0;
+ grid->Ng = 0;
+ grid->Nc = 0;
+ grid->Nd = 0;
+
+ // set arrays to NULL
+ grid->weight_idxs = NULL;
+ grid->ps = NULL;
+ grid->lambdas = NULL;
+ grid->kappas = NULL;
+ grid->epsilons = NULL;
+ grid->gammas = NULL;
+ grid->coefs = NULL;
+ grid->degrees = NULL;
+ grid->train_data_file = NULL;
+ grid->test_data_file = NULL;
+
+ return grid;
+}
+
+void gensvm_free_grid(struct GenGrid *grid)
+{
+ free(grid->weight_idxs);
+ free(grid->ps);
+ free(grid->lambdas);
+ free(grid->kappas);
+ free(grid->epsilons);
+ free(grid->gammas);
+ free(grid->coefs);
+ free(grid->degrees);
+ free(grid->train_data_file);
+ free(grid->test_data_file);
+ free(grid);
+}
diff --git a/src/gensvm_train_dataset.c b/src/gensvm_gridsearch.c
index d1650a7..deee033 100644
--- a/src/gensvm_train_dataset.c
+++ b/src/gensvm_gridsearch.c
@@ -9,74 +9,60 @@
* this file are used to find the optimal parameters.
*/
-#include <math.h>
-#include <time.h>
-
-#include "globals.h"
-#include "libGenSVM.h"
-#include "gensvm.h"
-#include "gensvm_crossval.h"
-#include "gensvm_init.h"
-#include "gensvm_kernel.h"
-#include "gensvm_matrix.h"
-#include "gensvm_train.h"
-#include "gensvm_train_dataset.h"
-#include "gensvm_pred.h"
-#include "gensvm_util.h"
-#include "gensvm_timer.h"
+#include "gensvm_gridsearch.h"
extern FILE *GENSVM_OUTPUT_FILE;
/**
- * @brief Initialize a Queue from a Training instance
+ * @brief Initialize a GenQueue from a Training instance
*
* @details
* A Training instance describes the grid to search over. This funtion
* creates all tasks that need to be performed and adds these to
- * a Queue. Each task contains a pointer to the train and test datasets
+ * a GenQueue. Each task contains a pointer to the train and test datasets
* which are supplied. Note that the tasks are created in a specific order of
* the parameters, to ensure that the GenModel::V of a previous parameter
* set provides the best possible initial estimate of GenModel::V for the next
* parameter set.
*
- * @param[in] training Training struct describing the grid search
- * @param[in] queue pointer to a Queue that will be used to
+ * @param[in] grid Training struct describing the grid search
+ * @param[in] queue pointer to a GenQueue that will be used to
* add the tasks to
* @param[in] train_data GenData of the training set
* @param[in] test_data GenData of the test set
*
*/
-void make_queue(struct Training *training, struct Queue *queue,
+void gensvm_fill_queue(struct GenGrid *grid, struct GenQueue *queue,
struct GenData *train_data, struct GenData *test_data)
{
long i, j, k;
long N, cnt = 0;
- struct Task *task;
+ struct GenTask *task;
queue->i = 0;
- N = training->Np;
- N *= training->Nl;
- N *= training->Nk;
- N *= training->Ne;
- N *= training->Nw;
+ N = grid->Np;
+ N *= grid->Nl;
+ N *= grid->Nk;
+ N *= grid->Ne;
+ N *= grid->Nw;
// these parameters are not necessarily non-zero
- N *= training->Ng > 0 ? training->Ng : 1;
- N *= training->Nc > 0 ? training->Nc : 1;
- N *= training->Nd > 0 ? training->Nd : 1;
+ N *= grid->Ng > 0 ? grid->Ng : 1;
+ N *= grid->Nc > 0 ? grid->Nc : 1;
+ N *= grid->Nd > 0 ? grid->Nd : 1;
- queue->tasks = Calloc(struct Task *, N);
+ queue->tasks = Calloc(struct GenTask *, N);
queue->N = N;
// initialize all tasks
for (i=0; i<N; i++) {
- task = Calloc(struct Task, 1);
+ task = gensvm_init_task();
task->ID = i;
task->train_data = train_data;
task->test_data = test_data;
- task->folds = training->folds;
- task->kerneltype = training->kerneltype;
- task->kernelparam = Calloc(double, training->Ng +
- training->Nc + training->Nd);
+ task->folds = grid->folds;
+ task->kerneltype = grid->kerneltype;
+ task->kernelparam = Calloc(double, grid->Ng +
+ grid->Nc + grid->Nd);
queue->tasks[i] = task;
}
@@ -87,125 +73,83 @@ void make_queue(struct Training *training, struct Queue *queue,
cnt = 1;
i = 0;
while (i < N )
- for (j=0; j<training->Np; j++)
+ for (j=0; j<grid->Np; j++)
for (k=0; k<cnt; k++) {
- queue->tasks[i]->p = training->ps[j];
+ queue->tasks[i]->p = grid->ps[j];
i++;
}
- cnt *= training->Np;
+ cnt *= grid->Np;
i = 0;
while (i < N )
- for (j=0; j<training->Nl; j++)
+ for (j=0; j<grid->Nl; j++)
for (k=0; k<cnt; k++) {
queue->tasks[i]->lambda =
- training->lambdas[j];
+ grid->lambdas[j];
i++;
}
- cnt *= training->Nl;
+ cnt *= grid->Nl;
i = 0;
while (i < N )
- for (j=0; j<training->Nk; j++)
+ for (j=0; j<grid->Nk; j++)
for (k=0; k<cnt; k++) {
- queue->tasks[i]->kappa = training->kappas[j];
+ queue->tasks[i]->kappa = grid->kappas[j];
i++;
}
- cnt *= training->Nk;
+ cnt *= grid->Nk;
i = 0;
while (i < N )
- for (j=0; j<training->Nw; j++)
+ for (j=0; j<grid->Nw; j++)
for (k=0; k<cnt; k++) {
queue->tasks[i]->weight_idx =
- training->weight_idxs[j];
+ grid->weight_idxs[j];
i++;
}
- cnt *= training->Nw;
+ cnt *= grid->Nw;
i = 0;
while (i < N )
- for (j=0; j<training->Ne; j++)
+ for (j=0; j<grid->Ne; j++)
for (k=0; k<cnt; k++) {
queue->tasks[i]->epsilon =
- training->epsilons[j];
+ grid->epsilons[j];
i++;
}
- cnt *= training->Ne;
+ cnt *= grid->Ne;
i = 0;
- while (i < N && training->Ng > 0)
- for (j=0; j<training->Ng; j++)
+ while (i < N && grid->Ng > 0)
+ for (j=0; j<grid->Ng; j++)
for (k=0; k<cnt; k++) {
queue->tasks[i]->kernelparam[0] =
- training->gammas[j];
+ grid->gammas[j];
i++;
}
- cnt *= training->Ng > 0 ? training->Ng : 1;
+ cnt *= grid->Ng > 0 ? grid->Ng : 1;
i = 0;
- while (i < N && training->Nc > 0)
- for (j=0; j<training->Nc; j++)
+ while (i < N && grid->Nc > 0)
+ for (j=0; j<grid->Nc; j++)
for (k=0; k<cnt; k++) {
queue->tasks[i]->kernelparam[1] =
- training->coefs[j];
+ grid->coefs[j];
i++;
}
- cnt *= training->Nc > 0 ? training->Nc : 1;
+ cnt *= grid->Nc > 0 ? grid->Nc : 1;
i = 0;
- while (i < N && training->Nd > 0)
- for (j=0; j<training->Nd; j++)
+ while (i < N && grid->Nd > 0)
+ for (j=0; j<grid->Nd; j++)
for (k=0; k<cnt; k++) {
queue->tasks[i]->kernelparam[2] =
- training->degrees[j];
+ grid->degrees[j];
i++;
}
}
/**
- * @brief Get new Task from Queue
- *
- * @details
- * Return a pointer to the next Task in the Queue. If no Task instances are
- * left, NULL is returned. The internal counter Queue::i is used for finding
- * the next Task.
- *
- * @param[in] q Queue instance
- * @returns pointer to next Task
- *
- */
-struct Task *get_next_task(struct Queue *q)
-{
- long i = q->i;
- if (i < q->N) {
- q->i++;
- return q->tasks[i];
- }
- return NULL;
-}
-
-/**
- * @brief Comparison function for Tasks based on performance
- *
- * @details
- * To be able to sort Task structures on the performance of their specific
- * set of parameters, this comparison function is implemented. Task structs
- * are sorted with highest performance first.
- *
- * @param[in] elem1 Task 1
- * @param[in] elem2 Task 2
- * @returns result of inequality of Task 1 performance over
- * Task 2 performance
- */
-int tasksort(const void *elem1, const void *elem2)
-{
- const struct Task *t1 = (*(struct Task **) elem1);
- const struct Task *t2 = (*(struct Task **) elem2);
- return (t1->performance > t2->performance);
-}
-
-/**
* @brief Comparison function for doubl
*
* @param[in] elem1 number 1
@@ -256,11 +200,11 @@ double prctile(double *values, long N, double p)
return boundary;
}
-struct Queue *create_top_queue(struct Queue *q)
+struct GenQueue *create_top_queue(struct GenQueue *q)
{
long i, k, N = 0;
double boundary, *perf;
- struct Queue *nq = Malloc(struct Queue, 1);
+ struct GenQueue *nq = Malloc(struct GenQueue, 1);
// find the 95th percentile of performance
perf = Calloc(double, q->N);
@@ -278,7 +222,7 @@ struct Queue *create_top_queue(struct Queue *q)
}
// create a new queue with the best tasks
- nq->tasks = Malloc(struct Task *, N);
+ nq->tasks = Malloc(struct GenTask *, N);
k = 0;
for (i=0; i<q->N; i++) {
if (q->tasks[i]->performance >= boundary)
@@ -292,17 +236,17 @@ struct Queue *create_top_queue(struct Queue *q)
/**
- * @brief Run repeats of the Task structs in Queue to find the best
+ * @brief Run repeats of the GenTask structs in GenQueue to find the best
* configuration
*
* @details
- * The best performing tasks in the supplied Queue are found by taking those
- * Task structs that have a performance greater or equal to the 95% percentile
+ * The best performing tasks in the supplied GenQueue are found by taking those
+ * GenTask structs that have a performance greater or equal to the 95% percentile
* of the performance of all tasks. These tasks are then gathered in a new
- * Queue. For each of the tasks in this new Queue the cross validation run is
+ * GenQueue. For each of the tasks in this new GenQueue the cross validation run is
* repeated a number of times.
*
- * For each of the Task configurations that are repeated the mean performance,
+ * For each of the GenTask configurations that are repeated the mean performance,
* standard deviation of the performance and the mean computation time are
* reported.
*
@@ -318,21 +262,21 @@ struct Queue *create_top_queue(struct Queue *q)
* an interval is found which contains tasks. If one or more tasks are found,
* this loop stops.
*
- * @param[in] q Queue of Task structs which have already been
- * run and have a Task::performance value
+ * @param[in] q GenQueue of GenTask structs which have already been
+ * run and have a GenTask::performance value
* @param[in] repeats Number of times to repeat the best
* configurations for consistency
* @param[in] traintype type of training to do (CV or TT)
*
*/
-void consistency_repeats(struct Queue *q, long repeats, TrainType traintype)
+void consistency_repeats(struct GenQueue *q, long repeats, TrainType traintype)
{
long i, f, r, N, *cv_idx;
double p, pi, pr, pt, *time, *std, *mean, *perf;
- struct Queue *nq;
+ struct GenQueue *nq;
struct GenData **train_folds, **test_folds;
struct GenModel *model = gensvm_init_model();
- struct Task *task = NULL;
+ struct GenTask *task = NULL;
clock_t loop_s, loop_e;
nq = create_top_queue(q);
@@ -350,7 +294,7 @@ void consistency_repeats(struct Queue *q, long repeats, TrainType traintype)
model->m = task->train_data->m;
model->K = task->train_data->K;
gensvm_allocate_model(model);
- gensvm_seed_model_V(NULL, model, task->train_data);
+ gensvm_init_V(NULL, model, task->train_data);
cv_idx = Calloc(long, task->train_data->n);
@@ -382,13 +326,13 @@ void consistency_repeats(struct Queue *q, long repeats, TrainType traintype)
p = gensvm_cross_validation(model, train_folds, test_folds,
task->folds, task->train_data->n);
loop_e = clock();
- time[i] += elapsed_time(loop_s, loop_e);
+ time[i] += gensvm_elapsed_time(loop_s, loop_e);
matrix_set(perf, repeats, i, r, p);
mean[i] += p/((double) repeats);
note("%3.3f\t", p);
// this is done because if we reuse the V it's not a
// consistency check
- gensvm_seed_model_V(NULL, model, task->train_data);
+ gensvm_init_V(NULL, model, task->train_data);
for (f=0; f<task->folds; f++) {
gensvm_free_data(train_folds[f]);
gensvm_free_data(test_folds[f]);
@@ -397,7 +341,8 @@ void consistency_repeats(struct Queue *q, long repeats, TrainType traintype)
free(test_folds);
}
for (r=0; r<repeats; r++) {
- std[i] += pow(matrix_get(perf, repeats, i, r) - mean[i], 2.0);
+ std[i] += pow(matrix_get(perf, repeats, i, r) - mean[i],
+ 2.0);
}
if (r > 1) {
std[i] /= ((double) repeats) - 1.0;
@@ -405,7 +350,8 @@ void consistency_repeats(struct Queue *q, long repeats, TrainType traintype)
} else {
std[i] = 0.0;
}
- note("(m = %3.3f, s = %3.3f, t = %3.3f)\n", mean[i], std[i], time[i]);
+ note("(m = %3.3f, s = %3.3f, t = %3.3f)\n", mean[i], std[i],
+ time[i]);
task = get_next_task(nq);
i++;
}
@@ -469,7 +415,7 @@ void consistency_repeats(struct Queue *q, long repeats, TrainType traintype)
* @param[in] oldtask the old task
* @return whether the kernel needs to be reevaluated
*/
-bool kernel_changed(struct Task *newtask, struct Task *oldtask)
+bool kernel_changed(struct GenTask *newtask, struct GenTask *oldtask)
{
int i;
if (oldtask == NULL)
@@ -495,10 +441,10 @@ bool kernel_changed(struct Task *newtask, struct Task *oldtask)
}
/**
- * @brief Run the grid search for a Queue
+ * @brief Run the grid search for a GenQueue
*
* @details
- * Given a Queue of Task struct to be trained, a grid search is launched to
+ * Given a GenQueue of GenTask struct to be trained, a grid search is launched to
* find the optimal parameter configuration. As is also done within
* cross_validation(), the optimal weights of one parameter set are used as
* initial estimates for GenModel::V in the next parameter set. Note that to
@@ -506,17 +452,17 @@ bool kernel_changed(struct Task *newtask, struct Task *oldtask)
* which tasks are considered is important. This is considered in
* make_queue().
*
- * The performance found by cross validation is stored in the Task struct.
+ * The performance found by cross validation is stored in the GenTask struct.
*
- * @param[in,out] q Queue with Task instances to run
+ * @param[in,out] q GenQueue with GenTask instances to run
*/
-void start_training(struct Queue *q)
+void start_training(struct GenQueue *q)
{
int f, folds;
double perf, current_max = 0;
- struct Task *task = get_next_task(q);
- struct Task *prevtask = NULL;
+ struct GenTask *task = get_next_task(q);
+ struct GenTask *prevtask = NULL;
struct GenModel *model = gensvm_init_model();
clock_t main_s, main_e, loop_s, loop_e;
@@ -528,7 +474,7 @@ void start_training(struct Queue *q)
model->m = task->train_data->m;
model->K = task->train_data->K;
gensvm_allocate_model(model);
- gensvm_seed_model_V(NULL, model, task->train_data);
+ gensvm_init_V(NULL, model, task->train_data);
long *cv_idx = Calloc(long, task->train_data->n);
gensvm_make_cv_split(task->train_data->n, task->folds, cv_idx);
@@ -568,7 +514,8 @@ void start_training(struct Queue *q)
current_max = maximum(current_max, perf);
note("\t%3.3f%% (%3.3fs)\t(best = %3.3f%%)\n", perf,
- elapsed_time(loop_s, loop_e), current_max);
+ gensvm_elapsed_time(loop_s, loop_e),
+ current_max);
q->tasks[task->ID]->performance = perf;
prevtask = task;
@@ -577,7 +524,7 @@ void start_training(struct Queue *q)
main_e = clock();
note("\nTotal elapsed training time: %8.8f seconds\n",
- elapsed_time(main_s, main_e));
+ gensvm_elapsed_time(main_s, main_e));
// make sure no double free occurs with the copied kernelparam
model->kernelparam = NULL;
@@ -648,37 +595,16 @@ double gensvm_cross_validation(struct GenModel *model,
/**
- * @brief Free the Queue struct
+ * @brief Copy parameters from GenTask to GenModel
*
* @details
- * Freeing the allocated memory of the Queue means freeing every Task struct
- * and then freeing the Queue.
- *
- * @param[in] q Queue to be freed
- *
- */
-void free_queue(struct Queue *q)
-{
- long i;
- for (i=0; i<q->N; i++) {
- free(q->tasks[i]->kernelparam);
- free(q->tasks[i]);
- }
- free(q->tasks);
- free(q);
-}
-
-/**
- * @brief Copy parameters from Task to GenModel
- *
- * @details
- * A Task struct only contains the parameters of the GenModel to be estimated.
+ * A GenTask struct only contains the parameters of the GenModel to be estimated.
* This function is used to copy these parameters.
*
- * @param[in] task Task instance with parameters
+ * @param[in] task GenTask instance with parameters
* @param[in,out] model GenModel to which the parameters are copied
*/
-void make_model_from_task(struct Task *task, struct GenModel *model)
+void make_model_from_task(struct GenTask *task, struct GenModel *model)
{
// copy basic model parameters
model->weight_idx = task->weight_idx;
@@ -693,46 +619,6 @@ void make_model_from_task(struct Task *task, struct GenModel *model)
}
/**
- * @brief Copy model parameters between two GenModel structs
- *
- * @details
- * The parameters copied are GenModel::weight_idx, GenModel::epsilon,
- * GenModel::p, GenModel::kappa, and GenModel::lambda.
- *
- * @param[in] from GenModel to copy parameters from
- * @param[in,out] to GenModel to copy parameters to
- */
-void copy_model(struct GenModel *from, struct GenModel *to)
-{
- to->weight_idx = from->weight_idx;
- to->epsilon = from->epsilon;
- to->p = from->p;
- to->kappa = from->kappa;
- to->lambda = from->lambda;
-
- to->kerneltype = from->kerneltype;
- switch (to->kerneltype) {
- case K_LINEAR:
- break;
- case K_POLY:
- to->kernelparam = Malloc(double, 3);
- to->kernelparam[0] = from->kernelparam[0];
- to->kernelparam[1] = from->kernelparam[1];
- to->kernelparam[2] = from->kernelparam[2];
- break;
- case K_RBF:
- to->kernelparam = Malloc(double, 1);
- to->kernelparam[0] = from->kernelparam[0];
- break;
- case K_SIGMOID:
- to->kernelparam = Malloc(double, 2);
- to->kernelparam[0] = from->kernelparam[0];
- to->kernelparam[1] = from->kernelparam[1];
- break;
- }
-}
-
-/**
* @brief Print the description of the current task on screen
*
* @details
@@ -741,11 +627,11 @@ void copy_model(struct GenModel *from, struct GenModel *to)
* parameters differ with the specified kernel, this function writes a
* parameter string depending on which kernel is used.
*
- * @param[in] task the Task specified
+ * @param[in] task the GenTask specified
* @param[in] N total number of tasks
*
*/
-void print_progress_string(struct Task *task, long N)
+void print_progress_string(struct GenTask *task, long N)
{
char buffer[MAX_LINE_LENGTH];
sprintf(buffer, "(%03li/%03li)\t", task->ID+1, N);
diff --git a/src/gensvm_init.c b/src/gensvm_init.c
index 947c60a..28f34ba 100644
--- a/src/gensvm_init.c
+++ b/src/gensvm_init.c
@@ -13,201 +13,100 @@
*
*/
-#include <math.h>
-
#include "gensvm_init.h"
-/**
- * @brief Initialize a GenModel structure
- *
- * @details
- * A GenModel structure is initialized and the default value for the
- * parameters are set. A pointer to the initialized model is returned.
- *
- * @returns initialized GenModel
- */
-struct GenModel *gensvm_init_model()
-{
- struct GenModel *model = Malloc(struct GenModel, 1);
-
- // set default values
- model->p = 1.0;
- model->lambda = pow(2, -8.0);
- model->epsilon = 1e-6;
- model->kappa = 0.0;
- model->weight_idx = 1;
- model->kerneltype = K_LINEAR;
- model->kernelparam = NULL;
-
- model->W = NULL;
- model->t = NULL;
- model->V = NULL;
- model->Vbar = NULL;
- model->U = NULL;
- model->UU = NULL;
- model->Q = NULL;
- model->H = NULL;
- model->R = NULL;
- model->rho = NULL;
- model->data_file = NULL;
-
- return model;
-}
+inline double rnd() { return (double) rand()/0x7FFFFFFF; }
/**
- * @brief Initialize a GenData structure
+ * @brief seed the matrix V from an existing model or using rand
*
* @details
- * A GenData structure is initialized and default values are set.
- * A pointer to the initialized data is returned.
- *
- * @returns initialized GenData
- *
+ * The matrix V must be seeded before the main_loop() can start.
+ * This can be done by either seeding it with random numbers or
+ * using the solution from a previous model on the same dataset
+ * as initial seed. The latter option usually allows for a
+ * significant improvement in the number of iterations necessary
+ * because the seeded model V is closer to the optimal V.
+ *
+ * @param[in] from_model GenModel from which to copy V
+ * @param[in,out] to_model GenModel to which V will be copied
*/
-struct GenData *gensvm_init_data()
+void gensvm_init_V(struct GenModel *from_model,
+ struct GenModel *to_model, struct GenData *data)
{
- struct GenData *data = Malloc(struct GenData, 1);
- data->Sigma = NULL;
- data->y = NULL;
- data->Z = NULL;
- data->RAW = NULL;
-
- // set default values
- data->kerneltype = K_LINEAR;
- data->kernelparam = NULL;
-
- return data;
+ long i, j, k;
+ double cmin, cmax, value;
+
+ long n = data->n;
+ long m = data->m;
+ long K = data->K;
+
+ if (from_model == NULL) {
+ for (i=0; i<m+1; i++) {
+ cmin = 1e100;
+ cmax = -1e100;
+ for (k=0; k<n; k++) {
+ value = matrix_get(data->Z, m+1, k, i);
+ cmin = minimum(cmin, value);
+ cmax = maximum(cmax, value);
+ }
+ for (j=0; j<K-1; j++) {
+ cmin = (abs(cmin) < 1e-10) ? -1 : cmin;
+ cmax = (abs(cmax) < 1e-10) ? 1 : cmax;
+ value = 1.0/cmin + (1.0/cmax - 1.0/cmin)*rnd();
+ matrix_set(to_model->V, K-1, i, j, value);
+ }
+ }
+ } else {
+ for (i=0; i<m+1; i++)
+ for (j=0; j<K-1; j++) {
+ value = matrix_get(from_model->V, K-1, i, j);
+ matrix_set(to_model->V, K-1, i, j, value);
+ }
+ }
}
/**
- * @brief Allocate memory for a GenModel
+ * @brief Initialize instance weights
*
* @details
- * This function can be used to allocate the memory needed for a GenModel. All
- * arrays in the model are specified and initialized to 0.
- *
- * @param[in] model GenModel to allocate
- *
+ * Instance weights can for instance be used to add additional weights to
+ * instances of certain classes. Two default weight possibilities are
+ * implemented here. The first is unit weights, where each instance gets
+ * weight 1.
+ *
+ * The second are group size correction weights, which are calculated as
+ * @f[
+ * \rho_i = \frac{n}{Kn_k} ,
+ * @f]
+ * where @f$ n_k @f$ is the number of instances in group @f$ k @f$ and
+ * @f$ y_i = k @f$.
+ *
+ * @param[in] data GenData with the dataset
+ * @param[in,out] model GenModel with the weight specification. On
+ * exit GenModel::rho contains the instance
+ * weights.
*/
-void gensvm_allocate_model(struct GenModel *model)
+void gensvm_initialize_weights(struct GenData *data, struct GenModel *model)
{
- long n = model->n;
- long m = model->m;
- long K = model->K;
-
- model->W = Calloc(double, m*(K-1));
- model->t = Calloc(double, K-1);
- model->V = Calloc(double, (m+1)*(K-1));
- model->Vbar = Calloc(double, (m+1)*(K-1));
- model->U = Calloc(double, K*(K-1));
- model->UU = Calloc(double, n*K*(K-1));
- model->Q = Calloc(double, n*K);
- model->H = Calloc(double, n*K);
- model->R = Calloc(double, n*K);
- model->rho = Calloc(double, n);
-}
+ long *groups;
+ long i;
-/**
- * @brief Reallocate memory for GenModel
- *
- * @details
- * This function can be used to reallocate existing memory for a GenModel,
- * upon a change in the model dimensions. This is used in combination with
- * kernels.
- *
- * @param[in] model GenModel to reallocate
- * @param[in] n new value of GenModel->n
- * @param[in] m new value of GenModel->m
- *
- */
-void gensvm_reallocate_model(struct GenModel *model, long n, long m)
-{
+ long n = model->n;
long K = model->K;
- if (model->n == n && model->m == m)
- return;
- if (model->n != n) {
- model->UU = Realloc(model->UU, double, n*K*(K-1));
- Memset(model->UU, double, n*K*(K-1));
-
- model->Q = Realloc(model->Q, double, n*K);
- Memset(model->Q, double, n*K);
-
- model->H = Realloc(model->H, double, n*K);
- Memset(model->H, double, n*K);
-
- model->R = Realloc(model->R, double, n*K);
- Memset(model->R, double, n*K);
-
- model->rho = Realloc(model->rho, double, n);
- Memset(model->rho, double, n);
-
- model->n = n;
+ if (model->weight_idx == 1) {
+ for (i=0; i<n; i++)
+ model->rho[i] = 1.0;
}
- if (model->m != m) {
- model->W = Realloc(model->W, double, m*(K-1));
- Memset(model->W, double, m*(K-1));
-
- model->V = Realloc(model->V, double, (m+1)*(K-1));
- Memset(model->V, double, (m+1)*(K-1));
-
- model->Vbar = Realloc(model->Vbar, double, (m+1)*(K-1));
- Memset(model->Vbar, double, (m+1)*(K-1));
-
- model->m = m;
- }
-}
-
-/**
- * @brief Free allocated GenModel struct
- *
- * @details
- * Simply free a previously allocated GenModel by freeing all its component
- * arrays. Note that the model struct itself is also freed here.
- *
- * @param[in] model GenModel to free
- *
- */
-void gensvm_free_model(struct GenModel *model)
-{
- free(model->W);
- free(model->t);
- free(model->V);
- free(model->Vbar);
- free(model->U);
- free(model->UU);
- free(model->Q);
- free(model->H);
- free(model->rho);
- free(model->R);
- free(model->kernelparam);
-
- free(model);
-}
-
-/**
- * @brief Free allocated GenData struct
- *
- * @details
- * Simply free a previously allocated GenData struct by freeing all its
- * components. Note that the data struct itself is also freed here.
- *
- * @param[in] data GenData struct to free
- *
- */
-void gensvm_free_data(struct GenData *data)
-{
- if (data == NULL)
- return;
-
- if (data->Z == data->RAW) {
- free(data->Z);
+ else if (model->weight_idx == 2) {
+ groups = Calloc(long, K);
+ for (i=0; i<n; i++)
+ groups[data->y[i]-1]++;
+ for (i=0; i<n; i++)
+ model->rho[i] = ((double) n)/((double) (groups[data->y[i]-1]*K));
} else {
- free(data->Z);
- free(data->RAW);
+ fprintf(stderr, "Unknown weight specification.\n");
+ exit(1);
}
- free(data->kernelparam);
- free(data->y);
- free(data->Sigma);
- free(data);
}
diff --git a/src/gensvm_io.c b/src/gensvm_io.c
index c4798d8..696f46f 100644
--- a/src/gensvm_io.c
+++ b/src/gensvm_io.c
@@ -6,16 +6,12 @@
*
* @details
* This file contains functions for reading and writing model files, and data
- * files.
- *
+ * files. It also contains a function for generating a string of the current
+ * time, used in writing output files.
+*
*/
-#include "globals.h"
-#include "gensvm.h"
#include "gensvm_io.h"
-#include "gensvm_matrix.h"
-#include "gensvm_strutil.h"
-#include "gensvm_timer.h"
/**
* @brief Read data from file
@@ -226,7 +222,7 @@ void gensvm_write_model(struct GenModel *model, char *output_filename)
output_filename);
exit(1);
}
- get_time_string(timestr);
+ gensvm_time_string(timestr);
// Write output to file
fprintf(fid, "Output file for GenSVM (version %1.1f)\n", VERSION);
@@ -298,3 +294,49 @@ void gensvm_write_predictions(struct GenData *data, long *predy,
fclose(fid);
}
+
+/**
+ * @brief Get time string with UTC offset
+ *
+ * @details
+ * Create a string for the current system time. Include an offset of UTC for
+ * consistency. The format of the generated string is "DDD MMM D HH:MM:SS
+ * YYYY (UTC +HH:MM)", e.g. "Fri Aug 9, 12:34:56 2013 (UTC +02:00)".
+ *
+ * @param[in,out] buffer allocated string buffer, on exit contains
+ * formatted string
+ *
+ */
+void gensvm_time_string(char *buffer)
+{
+ int diff, hours, minutes;
+ char timestr[MAX_LINE_LENGTH];
+ time_t current_time, lt, gt;
+ struct tm *lclt;
+
+ // get current time (in epoch)
+ current_time = time(NULL);
+ if (current_time == ((time_t)-1)) {
+ fprintf(stderr, "Failed to compute the current time.\n");
+ return;
+ }
+
+ // convert time to local time and create a string
+ lclt = localtime(&current_time);
+ strftime(timestr, MAX_LINE_LENGTH, "%c", lclt);
+ if (timestr == NULL) {
+ fprintf(stderr, "Failed to convert time to string.\n");
+ return;
+ }
+
+ // calculate the UTC offset including DST
+ lt = mktime(localtime(&current_time));
+ gt = mktime(gmtime(&current_time));
+ diff = -difftime(gt, lt);
+ hours = (diff/3600);
+ minutes = (diff%3600)/60;
+ if (lclt->tm_isdst == 1)
+ hours++;
+
+ sprintf(buffer, "%s (UTC %+03i:%02i)", timestr, hours, minutes);
+}
diff --git a/src/gensvm_kernel.c b/src/gensvm_kernel.c
index 1d7e5e4..a4f2277 100644
--- a/src/gensvm_kernel.c
+++ b/src/gensvm_kernel.c
@@ -11,15 +11,7 @@
*
*/
-#include <cblas.h>
-#include <math.h>
-
-#include "globals.h"
-#include "gensvm.h"
#include "gensvm_kernel.h"
-#include "gensvm_lapack.h"
-#include "gensvm_matrix.h"
-#include "gensvm_util.h"
/**
* @brief Do the preprocessing steps needed to perform kernel GenSVM
@@ -451,3 +443,46 @@ double gensvm_dot_sigmoid(double *x1, double *x2, double *kernelparam, long n)
value += kernelparam[1];
return tanh(value);
}
+
+/**
+ * @brief Compute the eigenvalues and optionally the eigenvectors of a
+ * symmetric matrix.
+ *
+ * @details
+ * This is a wrapper function around the external LAPACK function.
+ *
+ * See the LAPACK documentation at:
+ * http://www.netlib.org/lapack/explore-html/d2/d97/dsyevx_8f.html
+ *
+ *
+ */
+int dsyevx(char JOBZ, char RANGE, char UPLO, int N, double *A, int LDA,
+ double VL, double VU, int IL, int IU, double ABSTOL, int *M,
+ double *W, double *Z, int LDZ, double *WORK, int LWORK,
+ int *IWORK, int *IFAIL)
+{
+ extern void dsyevx_(char *JOBZ, char *RANGE, char *UPLO, int *Np,
+ double *A, int *LDAp, double *VLp, double *VUp,
+ int *ILp, int *IUp, double *ABSTOLp, int *M,
+ double *W, double *Z, int *LDZp, double *WORK,
+ int *LWORKp, int *IWORK, int *IFAIL, int *INFOp);
+ int INFO;
+ dsyevx_(&JOBZ, &RANGE, &UPLO, &N, A, &LDA, &VL, &VU, &IL, &IU, &ABSTOL,
+ M, W, Z, &LDZ, WORK, &LWORK, IWORK, IFAIL, &INFO);
+ return INFO;
+}
+
+/**
+ * @brief Determine double precision machine parameters.
+ *
+ * @details
+ * This is a wrapper function around the external LAPACK function.
+ *
+ * See the LAPACK documentation at:
+ * http://www.netlib.org/lapack/explore-html/d5/dd4/dlamch_8f.html
+ */
+double dlamch(char CMACH)
+{
+ extern double dlamch_(char *CMACH);
+ return dlamch_(&CMACH);
+}
diff --git a/src/gensvm_lapack.c b/src/gensvm_lapack.c
index 56dfc20..2a9c120 100644
--- a/src/gensvm_lapack.c
+++ b/src/gensvm_lapack.c
@@ -11,125 +11,3 @@
#include "gensvm_lapack.h"
-/**
- * @brief Solve AX = B where A is symmetric positive definite.
- *
- * @details
- * Solve a linear system of equations AX = B where A is symmetric positive
- * definite. This function uses the externel LAPACK routine dposv.
- *
- * @param[in] UPLO which triangle of A is stored
- * @param[in] N order of A
- * @param[in] NRHS number of columns of B
- * @param[in,out] A double precision array of size (LDA, N). On
- * exit contains the upper or lower factor of the
- * Cholesky factorization of A.
- * @param[in] LDA leading dimension of A
- * @param[in,out] B double precision array of size (LDB, NRHS). On
- * exit contains the N-by-NRHS solution matrix X.
- * @param[in] LDB the leading dimension of B
- * @returns info parameter which contains the status of the
- * computation:
- * - =0: success
- * - <0: if -i, the i-th argument had
- * an illegal value
- * - >0: if i, the leading minor of A
- * was not positive definite
- *
- * See the LAPACK documentation at:
- * http://www.netlib.org/lapack/explore-html/dc/de9/group__double_p_osolve.html
- */
-int dposv(char UPLO, int N, int NRHS, double *A, int LDA, double *B,
- int LDB)
-{
- extern void dposv_(char *UPLO, int *Np, int *NRHSp, double *A,
- int *LDAp, double *B, int *LDBp, int *INFOp);
- int INFO;
- dposv_(&UPLO, &N, &NRHS, A, &LDA, B, &LDB, &INFO);
- return INFO;
-}
-
-/**
- * @brief Solve a system of equations AX = B where A is symmetric.
- *
- * @details
- * Solve a linear system of equations AX = B where A is symmetric. This
- * function uses the external LAPACK routine dsysv.
- *
- * @param[in] UPLO which triangle of A is stored
- * @param[in] N order of A
- * @param[in] NRHS number of columns of B
- * @param[in,out] A double precision array of size (LDA, N). On
- * exit contains the block diagonal matrix D and
- * the multipliers used to obtain the factor U or
- * L from the factorization A = U*D*U**T or
- * A = L*D*L**T.
- * @param[in] LDA leading dimension of A
- * @param[in] IPIV integer array containing the details of D
- * @param[in,out] B double precision array of size (LDB, NRHS). On
- * exit contains the N-by-NRHS matrix X
- * @param[in] LDB leading dimension of B
- * @param[out] WORK double precision array of size max(1,LWORK). On
- * exit, WORK(1) contains the optimal LWORK
- * @param[in] LWORK the length of WORK, can be used for determining
- * the optimal blocksize for dsystrf.
- * @returns info parameter which contains the status of the
- * computation:
- * - =0: success
- * - <0: if -i, the i-th argument had an
- * illegal value
- * - >0: if i, D(i, i) is exactly zero,
- * no solution can be computed.
- *
- * See the LAPACK documentation at:
- * http://www.netlib.org/lapack/explore-html/d6/d0e/group__double_s_ysolve.html
- */
-int dsysv(char UPLO, int N, int NRHS, double *A, int LDA, int *IPIV,
- double *B, int LDB, double *WORK, int LWORK)
-{
- extern void dsysv_(char *UPLO, int *Np, int *NRHSp, double *A,
- int *LDAp, int *IPIV, double *B, int *LDBp,
- double *WORK, int *LWORK, int *INFOp);
- int INFO;
- dsysv_(&UPLO, &N, &NRHS, A, &LDA, IPIV, B, &LDB, WORK, &LWORK, &INFO);
- return INFO;
-}
-
-/**
- * @brief Compute the eigenvalues and optionally the eigenvectors of a
- * symmetric matrix.
- *
- * @details
- * See the LAPACK documentation at:
- * http://www.netlib.org/lapack/explore-html/d2/d97/dsyevx_8f.html
- *
- *
- */
-int dsyevx(char JOBZ, char RANGE, char UPLO, int N, double *A, int LDA,
- double VL, double VU, int IL, int IU, double ABSTOL, int *M,
- double *W, double *Z, int LDZ, double *WORK, int LWORK,
- int *IWORK, int *IFAIL)
-{
- extern void dsyevx_(char *JOBZ, char *RANGE, char *UPLO, int *Np,
- double *A, int *LDAp, double *VLp, double *VUp,
- int *ILp, int *IUp, double *ABSTOLp, int *M,
- double *W, double *Z, int *LDZp, double *WORK,
- int *LWORKp, int *IWORK, int *IFAIL, int *INFOp);
- int INFO;
- dsyevx_(&JOBZ, &RANGE, &UPLO, &N, A, &LDA, &VL, &VU, &IL, &IU, &ABSTOL,
- M, W, Z, &LDZ, WORK, &LWORK, IWORK, IFAIL, &INFO);
- return INFO;
-}
-
-/**
- * @brief Determine double precision machine parameters.
- *
- * @details
- * See the LAPACK documentation at:
- * http://www.netlib.org/lapack/explore-html/d5/dd4/dlamch_8f.html
- */
-double dlamch(char CMACH)
-{
- extern double dlamch_(char *CMACH);
- return dlamch_(&CMACH);
-}
diff --git a/src/gensvm_memory.c b/src/gensvm_memory.c
index 529ef79..63c8965 100644
--- a/src/gensvm_memory.c
+++ b/src/gensvm_memory.c
@@ -7,7 +7,6 @@
*/
#include "globals.h" // imports gensvm_memory.h
-#include "gensvm_util.h"
/**
* @brief Wrapper for calloc() which warns when allocation fails
@@ -34,7 +33,7 @@ void *mycalloc(const char *file, int line, unsigned long size,
void *ptr = calloc(size, typesize);
if (!ptr) {
- err("Could not allocate memory: %d bytes (%s:%d)\n",
+ fprintf(stderr, "Couldn't allocate memory: %lu bytes (%s:%d)\n",
size, file, line);
exit(EXIT_FAILURE);
}
@@ -63,7 +62,7 @@ void *mymalloc(const char *file, int line, unsigned long size)
{
void *ptr = malloc(size);
if (!ptr) {
- err("Could not allocate memory: %d bytes (%s:%d)\n",
+ fprintf(stderr, "Couldn't allocate memory: %lu bytes (%s:%d)\n",
size, file, line);
exit(EXIT_FAILURE);
}
@@ -93,7 +92,7 @@ void *myrealloc(const char *file, int line, unsigned long size, void *var)
{
void *ptr = realloc(var, size);
if (!ptr) {
- err("Could not reallocate memory: %d bytes (%s:%d)\n",
+ fprintf(stderr, "Couldn't reallocate memory: %lu bytes (%s:%d)\n",
size, file, line);
exit(EXIT_FAILURE);
}
diff --git a/src/gensvm_train.c b/src/gensvm_optimize.c
index 371970a..70b3620 100644
--- a/src/gensvm_train.c
+++ b/src/gensvm_optimize.c
@@ -10,17 +10,7 @@
*
*/
-#include <math.h>
-#include <cblas.h>
-
-#include "globals.h"
-#include "libGenSVM.h"
-#include "gensvm.h"
-#include "gensvm_lapack.h"
-#include "gensvm_matrix.h"
-#include "gensvm_sv.h"
-#include "gensvm_train.h"
-#include "gensvm_util.h"
+#include "gensvm_optimize.h"
/**
* Maximum number of iterations of the algorithm.
@@ -71,7 +61,7 @@ void gensvm_optimize(struct GenModel *model, struct GenData *data)
note("\tepsilon = %g\n", model->epsilon);
note("\n");
- gensvm_simplex_gen(model->K, model->U);
+ gensvm_simplex(model->K, model->U);
gensvm_simplex_diff(model, data);
gensvm_category_matrix(model, data);
@@ -536,3 +526,269 @@ void gensvm_get_update(struct GenModel *model, struct GenData *data, double *B,
}
}
}
+
+/**
+ * @brief Generate the category matrix
+ *
+ * @details
+ * Generate the category matrix R. The category matrix has 1's everywhere
+ * except at the column corresponding to the label of instance i, there the
+ * element is 0.
+ *
+ * @param[in,out] model corresponding GenModel
+ * @param[in] dataset corresponding GenData
+ *
+ */
+void gensvm_category_matrix(struct GenModel *model, struct GenData *dataset)
+{
+ long i, j;
+ long n = model->n;
+ long K = model->K;
+
+ for (i=0; i<n; i++) {
+ for (j=0; j<K; j++) {
+ if (dataset->y[i] != j+1)
+ matrix_set(model->R, K, i, j, 1.0);
+ else
+ matrix_set(model->R, K, i, j, 0.0);
+ }
+ }
+}
+
+/**
+ * @brief Generate the simplex difference matrix
+ *
+ * @details
+ * The simplex difference matrix is a 3D matrix which is constructed
+ * as follows. For each instance i, the difference vectors between the row of
+ * the simplex matrix corresponding to the class label of instance i and the
+ * other rows of the simplex matrix are calculated. These difference vectors
+ * are stored in a matrix, which is one horizontal slice of the 3D matrix.
+ *
+ * @param[in,out] model the corresponding GenModel
+ * @param[in] data the corresponding GenData
+ *
+ */
+void gensvm_simplex_diff(struct GenModel *model, struct GenData *data)
+{
+ long i, j, k;
+ double value;
+
+ long n = model->n;
+ long K = model->K;
+
+ for (i=0; i<n; i++) {
+ for (j=0; j<K-1; j++) {
+ for (k=0; k<K; k++) {
+ value = matrix_get(model->U, K-1, data->y[i]-1, j);
+ value -= matrix_get(model->U, K-1, k, j);
+ matrix3_set(model->UU, K-1, K, i, j, k, value);
+ }
+ }
+ }
+}
+
+/**
+ * @brief Use step doubling
+ *
+ * @details
+ * Step doubling can be used to speed up the maorization algorithm. Instead of
+ * using the value at the minimimum of the majorization function, the value
+ * ``opposite'' the majorization point is used. This can essentially cut the
+ * number of iterations necessary to reach the minimum in half.
+ *
+ * @param[in] model GenModel containing the augmented parameters
+ */
+void gensvm_step_doubling(struct GenModel *model)
+{
+ long i, j;
+ double value;
+
+ long m = model->m;
+ long K = model->K;
+
+ for (i=0; i<m+1; i++) {
+ for (j=0; j<K-1; j++) {
+ matrix_mul(model->V, K-1, i, j, 2.0);
+ value = - matrix_get(model->Vbar, K-1, i, j);
+ matrix_add(model->V, K-1, i, j, value);
+ }
+ }
+}
+
+/**
+ * @brief Calculate the Huber hinge errors
+ *
+ * @details
+ * For each of the scalar errors in Q the Huber hinge errors are
+ * calculated. The Huber hinge is here defined as
+ * @f[
+ * h(q) =
+ * \begin{dcases}
+ * 1 - q - \frac{\kappa + 1}{2} & \text{if } q \leq -\kappa \\
+ * \frac{1}{2(\kappa + 1)} ( 1 - q)^2 & \text{if } q \in (-\kappa, 1] \\
+ * 0 & \text{if } q > 1
+ * \end{dcases}
+ * @f]
+ *
+ * @param[in,out] model the corresponding GenModel
+ */
+void gensvm_calculate_huber(struct GenModel *model)
+{
+ long i, j;
+ double q, value;
+
+ for (i=0; i<model->n; i++) {
+ for (j=0; j<model->K; j++) {
+ q = matrix_get(model->Q, model->K, i, j);
+ value = 0.0;
+ if (q <= -model->kappa) {
+ value = 1.0 - q - (model->kappa+1.0)/2.0;
+ } else if (q <= 1.0) {
+ value = 1.0/(2.0*model->kappa+2.0)*pow(1.0 - q,
+ 2.0);
+ }
+ matrix_set(model->H, model->K, i, j, value);
+ }
+ }
+}
+
+/**
+ * @brief Calculate the scalar errors
+ *
+ * @details
+ * Calculate the scalar errors q based on the current estimate of V, and
+ * store these in Q. It is assumed that the memory for Q has already been
+ * allocated. In addition, the matrix ZV is calculated here. It is assigned
+ * to a pre-allocated block of memory, which is passed to this function.
+ *
+ * @param[in,out] model the corresponding GenModel
+ * @param[in] data the corresponding GenData
+ * @param[in,out] ZV a pointer to a memory block for ZV. On exit
+ * this block is updated with the new ZV matrix
+ * calculated with GenModel::V.
+ *
+ */
+void gensvm_calculate_errors(struct GenModel *model, struct GenData *data,
+ double *ZV)
+{
+ long i, j, k;
+ double a, value;
+
+ long n = model->n;
+ long m = model->m;
+ long K = model->K;
+
+ cblas_dgemm(
+ CblasRowMajor,
+ CblasNoTrans,
+ CblasNoTrans,
+ n,
+ K-1,
+ m+1,
+ 1.0,
+ data->Z,
+ m+1,
+ model->V,
+ K-1,
+ 0.0,
+ ZV,
+ K-1);
+
+ Memset(model->Q, double, n*K);
+ for (i=0; i<n; i++) {
+ for (j=0; j<K-1; j++) {
+ a = matrix_get(ZV, K-1, i, j);
+ for (k=0; k<K; k++) {
+ value = a * matrix3_get(model->UU, K-1, K, i,
+ j, k);
+ matrix_add(model->Q, K, i, k, value);
+ }
+ }
+ }
+}
+
+/**
+ * @brief Solve AX = B where A is symmetric positive definite.
+ *
+ * @details
+ * Solve a linear system of equations AX = B where A is symmetric positive
+ * definite. This function is a wrapper for the external LAPACK routine
+ * dposv.
+ *
+ * @param[in] UPLO which triangle of A is stored
+ * @param[in] N order of A
+ * @param[in] NRHS number of columns of B
+ * @param[in,out] A double precision array of size (LDA, N). On
+ * exit contains the upper or lower factor of the
+ * Cholesky factorization of A.
+ * @param[in] LDA leading dimension of A
+ * @param[in,out] B double precision array of size (LDB, NRHS). On
+ * exit contains the N-by-NRHS solution matrix X.
+ * @param[in] LDB the leading dimension of B
+ * @returns info parameter which contains the status of the
+ * computation:
+ * - =0: success
+ * - <0: if -i, the i-th argument had
+ * an illegal value
+ * - >0: if i, the leading minor of A
+ * was not positive definite
+ *
+ * See the LAPACK documentation at:
+ * http://www.netlib.org/lapack/explore-html/dc/de9/group__double_p_osolve.html
+ */
+int dposv(char UPLO, int N, int NRHS, double *A, int LDA, double *B,
+ int LDB)
+{
+ extern void dposv_(char *UPLO, int *Np, int *NRHSp, double *A,
+ int *LDAp, double *B, int *LDBp, int *INFOp);
+ int INFO;
+ dposv_(&UPLO, &N, &NRHS, A, &LDA, B, &LDB, &INFO);
+ return INFO;
+}
+
+/**
+ * @brief Solve a system of equations AX = B where A is symmetric.
+ *
+ * @details
+ * Solve a linear system of equations AX = B where A is symmetric. This
+ * function is a wrapper for the external LAPACK routine dsysv.
+ *
+ * @param[in] UPLO which triangle of A is stored
+ * @param[in] N order of A
+ * @param[in] NRHS number of columns of B
+ * @param[in,out] A double precision array of size (LDA, N). On
+ * exit contains the block diagonal matrix D and
+ * the multipliers used to obtain the factor U or
+ * L from the factorization A = U*D*U**T or
+ * A = L*D*L**T.
+ * @param[in] LDA leading dimension of A
+ * @param[in] IPIV integer array containing the details of D
+ * @param[in,out] B double precision array of size (LDB, NRHS). On
+ * exit contains the N-by-NRHS matrix X
+ * @param[in] LDB leading dimension of B
+ * @param[out] WORK double precision array of size max(1,LWORK). On
+ * exit, WORK(1) contains the optimal LWORK
+ * @param[in] LWORK the length of WORK, can be used for determining
+ * the optimal blocksize for dsystrf.
+ * @returns info parameter which contains the status of the
+ * computation:
+ * - =0: success
+ * - <0: if -i, the i-th argument had an
+ * illegal value
+ * - >0: if i, D(i, i) is exactly zero,
+ * no solution can be computed.
+ *
+ * See the LAPACK documentation at:
+ * http://www.netlib.org/lapack/explore-html/d6/d0e/group__double_s_ysolve.html
+ */
+int dsysv(char UPLO, int N, int NRHS, double *A, int LDA, int *IPIV,
+ double *B, int LDB, double *WORK, int LWORK)
+{
+ extern void dsysv_(char *UPLO, int *Np, int *NRHSp, double *A,
+ int *LDAp, int *IPIV, double *B, int *LDBp,
+ double *WORK, int *LWORK, int *INFOp);
+ int INFO;
+ dsysv_(&UPLO, &N, &NRHS, A, &LDA, IPIV, B, &LDB, WORK, &LWORK, &INFO);
+ return INFO;
+}
diff --git a/src/gensvm_pred.c b/src/gensvm_pred.c
index 15a6be6..8a9a43e 100644
--- a/src/gensvm_pred.c
+++ b/src/gensvm_pred.c
@@ -11,14 +11,6 @@
*
*/
-#include <cblas.h>
-#include <math.h>
-
-#include "globals.h"
-#include "libGenSVM.h"
-#include "gensvm.h"
-#include "gensvm_kernel.h"
-#include "gensvm_matrix.h"
#include "gensvm_pred.h"
/**
@@ -51,7 +43,7 @@ void gensvm_predict_labels(struct GenData *testdata, struct GenModel *model,
U = Calloc(double, K*(K-1));
// Generate the simplex matrix
- gensvm_simplex_gen(K, U);
+ gensvm_simplex(K, U);
// Generate the simplex space vectors
cblas_dgemm(
diff --git a/src/gensvm_util.c b/src/gensvm_print.c
index d12a85c..2c00512 100644
--- a/src/gensvm_util.c
+++ b/src/gensvm_print.c
@@ -9,10 +9,8 @@
* output of data and model files. It also contains string functions.
*
*/
-#include <stdarg.h>
-#include "globals.h"
-#include "gensvm_util.h"
+#include "gensvm_print.h"
FILE *GENSVM_OUTPUT_FILE; ///< The #GENSVM_OUTPUT_FILE specifies the
///< output stream to which all output is
@@ -26,65 +24,6 @@ FILE *GENSVM_OUTPUT_FILE; ///< The #GENSVM_OUTPUT_FILE specifies the
///< (temporarily) setting it to NULL.
/**
- * @brief Check if any command line arguments contain string
- *
- * @details
- * Check if any of a given array of command line arguments contains a given
- * string. If the string is found, the index of the string in argv is
- * returned. If the string is not found, 0 is returned.
- *
- * This function is copied from MSVMpack/libMSVM.c.
- *
- * @param[in] argc number of command line arguments
- * @param[in] argv command line arguments
- * @param[in] str string to find in the arguments
- * @returns index of the string in the arguments if found, 0
- * otherwise
- */
-int gensvm_check_argv(int argc, char **argv, char *str)
-{
- int i;
- int arg_str = 0;
- for (i=1; i<argc; i++)
- if (strstr(argv[i], str) != NULL) {
- arg_str = i;
- break;
- }
-
- return arg_str;
-}
-
-/**
- * @brief Check if a command line argument equals a string
- *
- * @details
- * Check if any of the command line arguments is exactly equal to a given
- * string. If so, return the index of the corresponding command line argument.
- * If not, return 0.
- *
- * This function is copied from MSVMpack/libMSVM.c
- *
- * @param[in] argc number of command line arguments
- * @param[in] argv command line arguments
- * @param[in] str string to find in the arguments
- * @returns index of the command line argument that corresponds to
- * the string, 0 if none matches.
- */
-int gensvm_check_argv_eq(int argc, char **argv, char *str)
-{
- int i;
- int arg_str = 0;
- for (i=1; i<argc; i++)
- if (strcmp(argv[i], str) == 0) {
- arg_str = i;
- break;
- }
-
- return arg_str;
-}
-
-
-/**
* @brief Print a given string to the specified output stream
*
* @details
diff --git a/src/gensvm_queue.c b/src/gensvm_queue.c
new file mode 100644
index 0000000..bbf57b2
--- /dev/null
+++ b/src/gensvm_queue.c
@@ -0,0 +1,71 @@
+/**
+ * @file gensvm_queue.c
+ * @author Gertjan van den Burg
+ * @date May, 2016
+ * @brief Functions for initializing and freeing a GenQueue
+ *
+ */
+
+#include "gensvm_queue.h"
+
+/**
+ * @brief Initialize a GenQueue structure
+ *
+ * @details
+ * A GenQueue structure is initialized and the default value for the
+ * parameters are set. A pointer to the initialized queue is returned.
+ *
+ * @returns initialized GenQueue
+ */
+struct GenQueue *gensvm_init_queue()
+{
+ struct GenQueue *q = Malloc(struct GenQueue, 1);
+
+ q->tasks = NULL;
+ q->N = 0;
+ q->i = 0;
+
+ return q;
+}
+
+/**
+ * @brief Free the GenQueue struct
+ *
+ * @details
+ * Freeing the allocated memory of the GenQueue means freeing every GenTask
+ * struct and then freeing the Queue.
+ *
+ * @param[in] q GenQueue to be freed
+ *
+ */
+void gensvm_free_queue(struct GenQueue *q)
+{
+ long i;
+ for (i=0; i<q->N; i++) {
+ gensvm_free_task(q->tasks[i]);
+ }
+ free(q->tasks);
+ free(q);
+}
+
+/**
+ * @brief Get new GenTask from GenQueue
+ *
+ * @details
+ * Return a pointer to the next GenTask in the GenQueue. If no GenTask
+ * instances are left, NULL is returned. The internal counter GenQueue::i is
+ * used for finding the next GenTask.
+ *
+ * @param[in] q GenQueue instance
+ * @returns pointer to next GenTask
+ *
+ */
+struct GenTask *get_next_task(struct GenQueue *q)
+{
+ long i = q->i;
+ if (i < q->N) {
+ q->i++;
+ return q->tasks[i];
+ }
+ return NULL;
+}
diff --git a/src/gensvm_simplex.c b/src/gensvm_simplex.c
new file mode 100644
index 0000000..1fd5f14
--- /dev/null
+++ b/src/gensvm_simplex.c
@@ -0,0 +1,45 @@
+/**
+ * @file gensvm_simplex.c
+ * @author Gertjan van den Burg
+ * @date May, 2016
+ * @brief Function for generating the simplex matrix
+ *
+ * @details
+ * Contains the function for generating the simplex matrix for a given number
+ * of classes.
+ *
+ */
+
+#include "gensvm_simplex.h"
+
+/**
+ * @brief Generate matrix of simplex vertex coordinates
+ *
+ * @details
+ * Generate the simplex matrix. Each row of the created
+ * matrix contains the coordinate vector of a single
+ * vertex of the K-simplex in K-1 dimensions. The simplex
+ * generated is a special simplex with edges of length 1.
+ * The simplex matrix U must already have been allocated.
+ *
+ * @param[in] K number of classes
+ * @param[in,out] U simplex matrix of size K * (K-1)
+ */
+void gensvm_simplex(long K, double *U)
+{
+ long i, j;
+ for (i=0; i<K; i++) {
+ for (j=0; j<K-1; j++) {
+ if (i <= j) {
+ matrix_set(U, K-1, i, j,
+ -1.0/sqrt(2.0*(j+1)*(j+2)));
+ } else if (i == j+1) {
+ matrix_set(U, K-1, i, j,
+ sqrt((j+1)/(2.0*(j+2))));
+ } else {
+ matrix_set(U, K-1, i, j, 0.0);
+ }
+ }
+ }
+}
+
diff --git a/src/gensvm_strutil.c b/src/gensvm_strutil.c
index 97dec81..b4c5c54 100644
--- a/src/gensvm_strutil.c
+++ b/src/gensvm_strutil.c
@@ -1,5 +1,5 @@
/**
- * @file strutil.c
+ * @file gensvm_strutil.c
* @author Gertjan van den Burg
* @date January, 2014
* @brief Utility functions for dealing with strings
@@ -9,7 +9,6 @@
* format and checking start and ends of strings.
*/
-#include "globals.h"
#include "gensvm_strutil.h"
/**
diff --git a/src/gensvm_sv.c b/src/gensvm_sv.c
index c61347a..8a8e6d3 100644
--- a/src/gensvm_sv.c
+++ b/src/gensvm_sv.c
@@ -10,8 +10,7 @@
*
*/
-#include "gensvm.h"
-#include "gensvm_matrix.h"
+#include "gensvm_sv.h"
/**
* @brief Calculate the number of support vectors in a model
diff --git a/src/gensvm_task.c b/src/gensvm_task.c
new file mode 100644
index 0000000..587c4c4
--- /dev/null
+++ b/src/gensvm_task.c
@@ -0,0 +1,55 @@
+/**
+ * @file gensvm_task.c
+ * @author Gertjan van den Burg
+ * @date May, 2016
+ * @brief Functions for initializing and freeing a GenTask
+ *
+ */
+
+#include "gensvm_task.h"
+
+/**
+ * @brief Initialize a GenTask structure
+ *
+ * @details
+ * A GenTask structure is initialized and the default value for the
+ * parameters are set. A pointer to the initialized GenTask is returned.
+ *
+ * @returns initialized GenTask
+ */
+struct GenTask *gensvm_init_task()
+{
+ struct GenTask *t = Malloc(struct GenTask, 1);
+
+ t->kerneltype = K_LINEAR;
+ t->weight_idx = 1;
+ t->folds = 10;
+ t->ID = -1;
+ t->p = 1.0;
+ t->kappa = 0.0;
+ t->lambda = 1.0;
+ t->epsilon = 1e-6;
+ t->kernelparam = NULL;
+ t->train_data = NULL;
+ t->test_data = NULL;
+ t->performance = 0.0;
+
+ return t;
+}
+
+/**
+ * @brief Free the GenTask struct
+ *
+ * @details
+ * Freeing the allocated memory of the GenTask means freeing _only_ the
+ * kernelparam array, and the task itself. The datasets are not freed, as
+ * these are shared between all tasks.
+ *
+ * @param[in] t GenTask to be freed
+ *
+ */
+void gensvm_free_task(struct GenTask *t)
+{
+ free(t->kernelparam);
+ free(t);
+}
diff --git a/src/gensvm_timer.c b/src/gensvm_timer.c
index 9802091..04d93b1 100644
--- a/src/gensvm_timer.c
+++ b/src/gensvm_timer.c
@@ -6,13 +6,10 @@
*
* @details
* This file contains a simple function for calculating the time in seconds
- * elapsed between two clock() calls. It also contains a function for
- * generating a string of the current time, used in writing output files.
+ * elapsed between two clock() calls.
+ *
*/
-#include <time.h>
-
-#include "globals.h"
#include "gensvm_timer.h"
/**
@@ -22,53 +19,7 @@
* @param[in] e_time end time
* @returns time elapsed in seconds
*/
-double elapsed_time(clock_t s_time, clock_t e_time)
+double gensvm_elapsed_time(clock_t s_time, clock_t e_time)
{
return ((double) (e_time - s_time))/((double) CLOCKS_PER_SEC);
}
-
-/**
- * @brief Get time string with UTC offset
- *
- * @details
- * Create a string for the current system time. Include an offset of UTC for
- * consistency. The format of the generated string is "DDD MMM D HH:MM:SS
- * YYYY (UTC +HH:MM)", e.g. "Fri Aug 9, 12:34:56 2013 (UTC +02:00)".
- *
- * @param[in,out] buffer allocated string buffer, on exit contains
- * formatted string
- *
- */
-void get_time_string(char *buffer)
-{
- int diff, hours, minutes;
- char timestr[MAX_LINE_LENGTH];
- time_t current_time, lt, gt;
- struct tm *lclt;
-
- // get current time (in epoch)
- current_time = time(NULL);
- if (current_time == ((time_t)-1)) {
- fprintf(stderr, "Failed to compute the current time.\n");
- return;
- }
-
- // convert time to local time and create a string
- lclt = localtime(&current_time);
- strftime(timestr, MAX_LINE_LENGTH, "%c", lclt);
- if (timestr == NULL) {
- fprintf(stderr, "Failed to convert time to string.\n");
- return;
- }
-
- // calculate the UTC offset including DST
- lt = mktime(localtime(&current_time));
- gt = mktime(gmtime(&current_time));
- diff = -difftime(gt, lt);
- hours = (diff/3600);
- minutes = (diff%3600)/60;
- if (lclt->tm_isdst == 1)
- hours++;
-
- sprintf(buffer, "%s (UTC %+03i:%02i)", timestr, hours, minutes);
-}
diff --git a/src/libGenSVM.c b/src/libGenSVM.c
index 8c22f3e..8917a3e 100644
--- a/src/libGenSVM.c
+++ b/src/libGenSVM.c
@@ -20,308 +20,4 @@
#include "gensvm.h"
#include "gensvm_matrix.h"
-inline double rnd() { return (double) rand()/0x7FFFFFFF; }
-
-/**
- * @brief Generate matrix of simplex vertex coordinates
- *
- * @details
- * Generate the simplex matrix. Each row of the created
- * matrix contains the coordinate vector of a single
- * vertex of the K-simplex in K-1 dimensions. The simplex
- * generated is a special simplex with edges of length 1.
- * The simplex matrix U must already have been allocated.
- *
- * @param[in] K number of classes
- * @param[in,out] U simplex matrix of size K * (K-1)
- */
-void gensvm_simplex_gen(long K, double *U)
-{
- long i, j;
- for (i=0; i<K; i++) {
- for (j=0; j<K-1; j++) {
- if (i <= j) {
- matrix_set(U, K-1, i, j, -1.0/sqrt(2.0*(j+1)*(j+2)));
- } else if (i == j+1) {
- matrix_set(U, K-1, i, j, sqrt((j+1)/(2.0*(j+2))));
- } else {
- matrix_set(U, K-1, i, j, 0.0);
- }
- }
- }
-}
-
-/**
- * @brief Generate the category matrix
- *
- * @details
- * Generate the category matrix R. The category matrix has 1's everywhere
- * except at the column corresponding to the label of instance i, there the
- * element is 0.
- *
- * @param[in,out] model corresponding GenModel
- * @param[in] dataset corresponding GenData
- *
- */
-void gensvm_category_matrix(struct GenModel *model, struct GenData *dataset)
-{
- long i, j;
- long n = model->n;
- long K = model->K;
-
- for (i=0; i<n; i++) {
- for (j=0; j<K; j++) {
- if (dataset->y[i] != j+1)
- matrix_set(model->R, K, i, j, 1.0);
- else
- matrix_set(model->R, K, i, j, 0.0);
- }
- }
-}
-
-/**
- * @brief Generate the simplex difference matrix
- *
- * @details
- * The simplex difference matrix is a 3D matrix which is constructed
- * as follows. For each instance i, the difference vectors between the row of
- * the simplex matrix corresponding to the class label of instance i and the
- * other rows of the simplex matrix are calculated. These difference vectors
- * are stored in a matrix, which is one horizontal slice of the 3D matrix.
- *
- * @param[in,out] model the corresponding GenModel
- * @param[in] data the corresponding GenData
- *
- */
-void gensvm_simplex_diff(struct GenModel *model, struct GenData *data)
-{
- long i, j, k;
- double value;
-
- long n = model->n;
- long K = model->K;
-
- for (i=0; i<n; i++) {
- for (j=0; j<K-1; j++) {
- for (k=0; k<K; k++) {
- value = matrix_get(model->U, K-1, data->y[i]-1, j);
- value -= matrix_get(model->U, K-1, k, j);
- matrix3_set(model->UU, K-1, K, i, j, k, value);
- }
- }
- }
-}
-
-/**
- * @brief Calculate the scalar errors
- *
- * @details
- * Calculate the scalar errors q based on the current estimate of V, and
- * store these in Q. It is assumed that the memory for Q has already been
- * allocated. In addition, the matrix ZV is calculated here. It is assigned
- * to a pre-allocated block of memory, which is passed to this function.
- *
- * @param[in,out] model the corresponding GenModel
- * @param[in] data the corresponding GenData
- * @param[in,out] ZV a pointer to a memory block for ZV. On exit
- * this block is updated with the new ZV matrix
- * calculated with GenModel::V.
- *
- */
-void gensvm_calculate_errors(struct GenModel *model, struct GenData *data,
- double *ZV)
-{
- long i, j, k;
- double a, value;
-
- long n = model->n;
- long m = model->m;
- long K = model->K;
-
- cblas_dgemm(
- CblasRowMajor,
- CblasNoTrans,
- CblasNoTrans,
- n,
- K-1,
- m+1,
- 1.0,
- data->Z,
- m+1,
- model->V,
- K-1,
- 0.0,
- ZV,
- K-1);
-
- Memset(model->Q, double, n*K);
- for (i=0; i<n; i++) {
- for (j=0; j<K-1; j++) {
- a = matrix_get(ZV, K-1, i, j);
- for (k=0; k<K; k++) {
- value = a * matrix3_get(model->UU, K-1, K, i,
- j, k);
- matrix_add(model->Q, K, i, k, value);
- }
- }
- }
-}
-
-/**
- * @brief Calculate the Huber hinge errors
- *
- * @details
- * For each of the scalar errors in Q the Huber hinge errors are
- * calculated. The Huber hinge is here defined as
- * @f[
- * h(q) =
- * \begin{dcases}
- * 1 - q - \frac{\kappa + 1}{2} & \text{if } q \leq -\kappa \\
- * \frac{1}{2(\kappa + 1)} ( 1 - q)^2 & \text{if } q \in (-\kappa, 1] \\
- * 0 & \text{if } q > 1
- * \end{dcases}
- * @f]
- *
- * @param[in,out] model the corresponding GenModel
- */
-void gensvm_calculate_huber(struct GenModel *model)
-{
- long i, j;
- double q, value;
-
- for (i=0; i<model->n; i++) {
- for (j=0; j<model->K; j++) {
- q = matrix_get(model->Q, model->K, i, j);
- value = 0.0;
- if (q <= -model->kappa) {
- value = 1.0 - q - (model->kappa+1.0)/2.0;
- } else if (q <= 1.0) {
- value = 1.0/(2.0*model->kappa+2.0)*pow(1.0 - q, 2.0);
- }
- matrix_set(model->H, model->K, i, j, value);
- }
- }
-}
-
-/**
- * @brief seed the matrix V from an existing model or using rand
- *
- * @details
- * The matrix V must be seeded before the main_loop() can start.
- * This can be done by either seeding it with random numbers or
- * using the solution from a previous model on the same dataset
- * as initial seed. The latter option usually allows for a
- * significant improvement in the number of iterations necessary
- * because the seeded model V is closer to the optimal V.
- *
- * @param[in] from_model GenModel from which to copy V
- * @param[in,out] to_model GenModel to which V will be copied
- */
-void gensvm_seed_model_V(struct GenModel *from_model,
- struct GenModel *to_model, struct GenData *data)
-{
- long i, j, k;
- double cmin, cmax, value;
-
- long n = data->n;
- long m = data->m;
- long K = data->K;
-
- if (from_model == NULL) {
- for (i=0; i<m+1; i++) {
- cmin = 1e100;
- cmax = -1e100;
- for (k=0; k<n; k++) {
- value = matrix_get(data->Z, m+1, k, i);
- cmin = minimum(cmin, value);
- cmax = maximum(cmax, value);
- }
- for (j=0; j<K-1; j++) {
- cmin = (abs(cmin) < 1e-10) ? -1 : cmin;
- cmax = (abs(cmax) < 1e-10) ? 1 : cmax;
- value = 1.0/cmin + (1.0/cmax - 1.0/cmin)*rnd();
- matrix_set(to_model->V, K-1, i, j, value);
- }
- }
- } else {
- for (i=0; i<m+1; i++)
- for (j=0; j<K-1; j++) {
- value = matrix_get(from_model->V, K-1, i, j);
- matrix_set(to_model->V, K-1, i, j, value);
- }
- }
-}
-
-/**
- * @brief Use step doubling
- *
- * @details
- * Step doubling can be used to speed up the maorization algorithm. Instead of
- * using the value at the minimimum of the majorization function, the value
- * ``opposite'' the majorization point is used. This can essentially cut the
- * number of iterations necessary to reach the minimum in half.
- *
- * @param[in] model GenModel containing the augmented parameters
- */
-void gensvm_step_doubling(struct GenModel *model)
-{
- long i, j;
- double value;
-
- long m = model->m;
- long K = model->K;
-
- for (i=0; i<m+1; i++) {
- for (j=0; j<K-1; j++) {
- matrix_mul(model->V, K-1, i, j, 2.0);
- value = - matrix_get(model->Vbar, K-1, i, j);
- matrix_add(model->V, K-1, i, j, value);
- }
- }
-}
-
-/**
- * @brief Initialize instance weights
- *
- * @details
- * Instance weights can for instance be used to add additional weights to
- * instances of certain classes. Two default weight possibilities are
- * implemented here. The first is unit weights, where each instance gets
- * weight 1.
- *
- * The second are group size correction weights, which are calculated as
- * @f[
- * \rho_i = \frac{n}{Kn_k} ,
- * @f]
- * where @f$ n_k @f$ is the number of instances in group @f$ k @f$ and
- * @f$ y_i = k @f$.
- *
- * @param[in] data GenData with the dataset
- * @param[in,out] model GenModel with the weight specification. On
- * exit GenModel::rho contains the instance
- * weights.
- */
-void gensvm_initialize_weights(struct GenData *data, struct GenModel *model)
-{
- long *groups;
- long i;
-
- long n = model->n;
- long K = model->K;
-
- if (model->weight_idx == 1) {
- for (i=0; i<n; i++)
- model->rho[i] = 1.0;
- }
- else if (model->weight_idx == 2) {
- groups = Calloc(long, K);
- for (i=0; i<n; i++)
- groups[data->y[i]-1]++;
- for (i=0; i<n; i++)
- model->rho[i] = ((double) n)/((double) (groups[data->y[i]-1]*K));
- } else {
- fprintf(stderr, "Unknown weight specification.\n");
- exit(1);
- }
-}