aboutsummaryrefslogtreecommitdiff
path: root/include
diff options
context:
space:
mode:
authorGertjan van den Burg <gertjanvandenburg@gmail.com>2014-01-15 00:35:21 +0100
committerGertjan van den Burg <gertjanvandenburg@gmail.com>2014-01-15 00:35:21 +0100
commitddbd423f54e2fd92659a0d277ee844659eee8ba1 (patch)
tree316a82d463009364a6cdf07892bc3e28330698db /include
parentremove note in read_data (diff)
downloadgensvm-ddbd423f54e2fd92659a0d277ee844659eee8ba1.tar.gz
gensvm-ddbd423f54e2fd92659a0d277ee844659eee8ba1.zip
added documentation, restart git usage, start implementing kernels
Diffstat (limited to 'include')
-rw-r--r--include/MSVMMaj.h46
-rw-r--r--include/crossval.h12
-rw-r--r--include/globals.h22
-rw-r--r--include/kernel.h11
-rw-r--r--include/libMSVMMaj.h17
-rw-r--r--include/msvmmaj.h98
-rw-r--r--include/msvmmaj_init.h23
-rw-r--r--include/msvmmaj_kernel.h32
-rw-r--r--include/msvmmaj_lapack.h23
-rw-r--r--include/msvmmaj_matrix.h (renamed from include/matrix.h)15
-rw-r--r--include/msvmmaj_pred.h11
-rw-r--r--include/msvmmaj_train.h12
-rw-r--r--include/msvmmaj_train_dataset.h76
-rw-r--r--include/mylapack.h11
-rw-r--r--include/parallel.h13
-rw-r--r--include/strutil.h12
-rw-r--r--include/timer.h15
-rw-r--r--include/types.h40
-rw-r--r--include/util.h15
19 files changed, 405 insertions, 99 deletions
diff --git a/include/MSVMMaj.h b/include/MSVMMaj.h
deleted file mode 100644
index de99f91..0000000
--- a/include/MSVMMaj.h
+++ /dev/null
@@ -1,46 +0,0 @@
-#ifndef MSVMMAJ_H
-#define MSVMMAJ_H
-
-#include "globals.h"
-#include "types.h"
-
-/*
- Model structure
-*/
-struct MajModel {
- int weight_idx;
- long K;
- long n;
- long m;
- double epsilon;
- double p;
- double kappa;
- double lambda;
- double *W;
- double *t;
- double *V;
- double *Vbar;
- double *U;
- double *UU;
- double *Q;
- double *H;
- double *R;
- double *rho;
- double training_error;
- char *data_file;
- KernelType kerneltype;
- double *kernelparam;
-};
-
-/*
- Data structure
-*/
-struct MajData {
- long K;
- long n;
- long m;
- long *y;
- double *Z;
-};
-
-#endif
diff --git a/include/crossval.h b/include/crossval.h
index 0794622..0dff0b9 100644
--- a/include/crossval.h
+++ b/include/crossval.h
@@ -1,3 +1,15 @@
+/**
+ * @file crossval.h
+ * @author Gertjan van den Burg
+ * @date January, 2014
+ * @brief Header file for crossval.c
+ *
+ * @details
+ * Contains function declarations for functions needed for performing cross
+ * validation on MajData structures.
+ *
+ */
+
#ifndef CROSSVAL_H
#define CROSSVAL_H
diff --git a/include/globals.h b/include/globals.h
index 8420f76..55fb6c4 100644
--- a/include/globals.h
+++ b/include/globals.h
@@ -1,5 +1,23 @@
-#ifndef GLOBALS_H
-#define GLOBALS_H
+/**
+ * @file globals.h
+ * @author Gertjan van den Burg
+ * @date January, 2014
+ * @brief Global definitions
+ *
+ * @details
+ * This header file contains defines and includes which are used in many
+ * parts of the program. Most notable are the Calloc, Malloc and Memset
+ * defines, which are commonly used to allocate memory. These functions
+ * are shorthands for their lowercase counterparts.
+ *
+ * Furthermore, a maximum and minimum function are defined here. These
+ * functions have their own include guards, to ensure potential linked
+ * libraries don't conflict with these definitions.
+ *
+ */
+
+#ifndef MSVMMAJ_GLOBALS_H
+#define MSVMMAJ_GLOBALS_H
#include <stdio.h>
#include <stdlib.h>
diff --git a/include/kernel.h b/include/kernel.h
deleted file mode 100644
index ac5c35d..0000000
--- a/include/kernel.h
+++ /dev/null
@@ -1,11 +0,0 @@
-#ifndef KERNEL_H
-#define KERNEL_H
-
-#include "globals.h"
-#include "types.h"
-
-// forward declarations
-struct MajData;
-
-// function declarations
-
diff --git a/include/libMSVMMaj.h b/include/libMSVMMaj.h
index 21efc2f..b7261dc 100644
--- a/include/libMSVMMaj.h
+++ b/include/libMSVMMaj.h
@@ -1,3 +1,20 @@
+/**
+ * @file libMSVMMaj.h
+ * @author Gertjan van den Burg
+ * @date August, 2013
+ * @brief Header file for the core MSVMMaj library libMSVMMaj.c
+ *
+ * @details
+ * The core computational routines for MSVMMaj are defined in libMSVMMaj.c.
+ * This file contains function declarations for these functions.
+ *
+ */
+
+/**
+ * @todo
+ * rename this file and libMSVMMaj.c to correspond with the lowercase convention.
+ * Also change the name of the include guard.
+ */
#ifndef LIBMSVMMAJ_H
#define LIBMSVMMAJ_H
diff --git a/include/msvmmaj.h b/include/msvmmaj.h
new file mode 100644
index 0000000..d67ad8b
--- /dev/null
+++ b/include/msvmmaj.h
@@ -0,0 +1,98 @@
+/**
+ * @file msvmmaj.h
+ * @author Gertjan van den Burg
+ * @date August, 2013
+ * @brief Definitions for common structures
+ *
+ * @details
+ * Contains documentation and declarations of MajModel and MajData.
+ *
+ */
+
+#ifndef MSVMMAJ_H
+#define MSVMMAJ_H
+
+#include "globals.h"
+#include "types.h"
+
+/**
+ * @brief A structure to represent a single MSVMMaj model.
+ *
+ * @param weight_idx which weights to use (1 = unit, 2 = group)
+ * @param K number of classes in the dataset
+ * @param n number of instances in the dataset
+ * @param m number of predictors in the dataset
+ * @param epsilon stopping criterion
+ * @param p parameter for the L_p norm
+ * @param kappa parameter for the Huber hinge
+ * @param lambda regularization parameter
+ * @param *W pointer to the weight matrix
+ * @param *t pointer to the translation vector
+ * @param *V pointer to the augmented weight matrix
+ * @param *Vbar pointer to the augmented weight matrix from a
+ * previous iteration
+ * @param *U pointer to the simplex matrix
+ * @param *UU pointer to the 3D simplex difference matrix
+ * @param *Q pointer to the error matrix
+ * @param *H pointer to the Huber weighted error matrix
+ * @param *R pointer to the 0-1 auxiliary matrix
+ * @param *rho pointer to the instance weight vector
+ * @param training_error error after training has completed
+ * @param *data_file pointer to the filename of the data
+ * @param kerneltype kernel to be used in the model
+ * @param kernelparam pointer to the vector of kernel parameters
+ * @param use_cholesky whether the Cholesky decomposition should be
+ * used
+ *
+ */
+struct MajModel {
+ int weight_idx;
+ long K;
+ long n;
+ long m;
+ double epsilon;
+ double p;
+ double kappa;
+ double lambda;
+ double *W;
+ double *t;
+ double *V;
+ double *Vbar;
+ double *U;
+ double *UU;
+ double *Q;
+ double *H;
+ double *R;
+ double *rho;
+ double training_error;
+ char *data_file;
+ KernelType kerneltype;
+ double *kernelparam;
+ bool use_cholesky;
+};
+
+/**
+ * @brief A structure to represent the data.
+ *
+ * @param K number of classes
+ * @param n number of instances
+ * @param m number of predictors
+ * @param *y pointer to vector of class labels
+ * @param *Z pointer to augmented data matrix
+ * @param kerneltype kerneltype used in MajData::Z
+ * @param *kernelparam kernel parameters used in MajData::Z
+ * @param use_cholesky whether the Cholesky decomposition is used in MajData::Z
+ *
+ */
+struct MajData {
+ long K;
+ long n;
+ long m;
+ long *y;
+ double *Z;
+ KernelType kerneltype;
+ double *kernelparam;
+ bool use_cholesky;
+};
+
+#endif
diff --git a/include/msvmmaj_init.h b/include/msvmmaj_init.h
new file mode 100644
index 0000000..6e2e36f
--- /dev/null
+++ b/include/msvmmaj_init.h
@@ -0,0 +1,23 @@
+/**
+ * @file msvmmaj_init.h
+ * @author Gertjan van den Burg
+ * @date January, 2014
+ * @brief Header file for msvmmaj_init.c
+ *
+ * @details
+ * Contains function declarations for the initialization functions for
+ * MajModel and MajData structures.
+ */
+
+#ifndef MSVMMAJ_INIT_H
+#define MSVMMAJ_INIT_H
+
+// forward declaration
+struct MajData;
+struct MajModel;
+
+struct MajModel *msvmmaj_init_model();
+
+struct MajData *msvmmaj_init_data();
+
+#endif
diff --git a/include/msvmmaj_kernel.h b/include/msvmmaj_kernel.h
new file mode 100644
index 0000000..69bf267
--- /dev/null
+++ b/include/msvmmaj_kernel.h
@@ -0,0 +1,32 @@
+/**
+ * @file msvmmaj_kernel.h
+ * @author Gertjan van den Burg
+ * @date January, 2014
+ * @brief Header file for kernel functionality
+ *
+ * @details
+ * Contains function declarations for computing the kernel matrix
+ * in nonlinear MSVMMaj. Additional kernel functions should be
+ * included here and in msvmmaj_kernel.c
+ *
+ */
+
+#ifndef MSVMMAJ_KERNEL_H
+#define MSVMMAJ_KERNEL_H
+
+#include "globals.h"
+
+// forward declarations
+struct MajData;
+struct MajModel;
+
+// function declarations
+void msvmmaj_make_kernel(struct MajModel *model, struct MajData *data);
+
+double msvmmaj_compute_rbf(double *x1, double *x2, double *kernelparam,
+ long n);
+double msvmmaj_compute_poly(double *x1, double *x2, double *kernelparam,
+ long n);
+double msvmmaj_compute_sigmoid(double *x1, double *x2, double *kernelparam,
+ long n);
+#endif
diff --git a/include/msvmmaj_lapack.h b/include/msvmmaj_lapack.h
new file mode 100644
index 0000000..766a475
--- /dev/null
+++ b/include/msvmmaj_lapack.h
@@ -0,0 +1,23 @@
+/**
+ * @file msvmmaj_lapack.h
+ * @author Gertjan van den Burg
+ * @date August, 2013
+ * @brief Header file for msvmmaj_lapack.c
+ *
+ * @details
+ * Function declarations for external LAPACK functions
+ *
+ */
+
+#ifndef MSVMMAJ_LAPACK_H
+#define MSVMMAJ_LAPACK_H
+
+#include "globals.h"
+
+int dposv(char UPLO, int N, int NRHS, double *A, int LDA, double *B,
+ int LDB);
+int dsysv(char UPLO, int N, int NRHS, double *A, int LDA, int *IPIV,
+ double *B, int LDB, double *WORK, int LWORK);
+int dpotrf(char UPLO, int N, double *A, int LDA);
+
+#endif
diff --git a/include/matrix.h b/include/msvmmaj_matrix.h
index 5f0a441..8f5ca59 100644
--- a/include/matrix.h
+++ b/include/msvmmaj_matrix.h
@@ -1,5 +1,16 @@
-#ifndef MATRIX_H
-#define MATRIX_H
+/**
+ * @file msvmmaj_matrix.h
+ * @author Gertjan van den Burg
+ * @date August, 2013
+ * @brief Header file for msvmmaj_matrix.c
+ *
+ * @details
+ * Contains function declarations for functions useful for dealing with matrices.
+ *
+ */
+
+#ifndef MSVMMAJ_MATRIX_H
+#define MSVMMAJ_MATRIX_H
#include "globals.h"
diff --git a/include/msvmmaj_pred.h b/include/msvmmaj_pred.h
index 952389c..ce22b10 100644
--- a/include/msvmmaj_pred.h
+++ b/include/msvmmaj_pred.h
@@ -1,3 +1,14 @@
+/**
+ * @file msvmmaj_pred.h
+ * @author Gertjan van den Burg
+ * @date August, 2013
+ * @brief Header file for msvmmaj_pred.c
+ *
+ * @details
+ * Contains function declarations for prediction functions.
+ *
+ */
+
#ifndef MSVMMAJ_PRED_H
#define MSVMMAJ_PRED_H
diff --git a/include/msvmmaj_train.h b/include/msvmmaj_train.h
index 4fb198e..835100f 100644
--- a/include/msvmmaj_train.h
+++ b/include/msvmmaj_train.h
@@ -1,3 +1,15 @@
+/**
+ * @file msvmmaj_train.h
+ * @author Gertjan van den Burg
+ * @date August, 2013
+ * @brief Header file for msvmmaj_train.c
+ *
+ * @details
+ * Contains function declarations for functions used to train a single
+ * MajModel.
+ *
+ */
+
#ifndef MSVMMAJ_TRAIN_H
#define MSVMMAJ_TRAIN_H
diff --git a/include/msvmmaj_train_dataset.h b/include/msvmmaj_train_dataset.h
index fdcdb4c..5248b4a 100644
--- a/include/msvmmaj_train_dataset.h
+++ b/include/msvmmaj_train_dataset.h
@@ -1,9 +1,39 @@
+/**
+ * @file msvmmaj_train_dataset.h
+ * @author Gertjan van den Burg
+ * @date August, 2013
+ * @brief Structs and functions necessary for the grid search
+ *
+ * @details
+ * The grid search for the optimal parameters is done through a queue.
+ * This file contains struct definitions for this queue and a single
+ * task in a queue, as well as a structure for the complete training
+ * scheme. Function declarations are also included.
+ *
+ */
+
#ifndef MSVMMAJ_TRAIN_DATASET_H
#define MSVMMAJ_TRAIN_DATASET_H
#include "globals.h"
#include "types.h"
+/**
+ * @brief A structure for a single task in the queue.
+ *
+ * @param folds number of folds in cross validation
+ * @param ID numeric id of the task in the queue
+ * @param weight_idx parameter for the MajModel
+ * @param p parameter for the MajModel
+ * @param kappa parameter for the MajModel
+ * @param lambda parameter for the MajModel
+ * @param epsilon parameter for the MajModel
+ * @param kerneltype parameter for the MajModel
+ * @param *kernel_param parameters for the MajModel
+ * @param *train_data pointer to the training data
+ * @param *test_data pointer to the test data (if any)
+ * @param performance performance after cross validation
+ */
struct Task {
KernelType kerneltype;
int weight_idx;
@@ -19,14 +49,54 @@ struct Task {
double performance;
};
+/**
+ * @brief Simple task queue.
+ *
+ * This struct is basically just an array of pointers to Task instances,
+ * with a length and an index of the current task.
+ *
+ * @param **tasks array of pointers to Task structs
+ * @param N size of task array
+ * @param i index used for keeping track of the queue
+ */
struct Queue {
struct Task **tasks;
long N;
long i;
};
+/**
+ * @brief Structure for describing the entire grid search
+ *
+ * @param traintype type of training to use
+ * @param kerneltype type of kernel to use throughout training
+ * @param repeats number of repeats to be done after the grid
+ * search to find the parameter set with the
+ * most consistent high performance
+ * @param folds number of folds in cross validation
+ * @param Np size of the array of p values
+ * @param Nl size of the array of lambda values
+ * @param Nk size of the array of kappa values
+ * @param Ne size of the array of epsilon values
+ * @param Nw size of the array of weight_idx values
+ * @param Ng size of the array of gamma values
+ * @param Nc size of the array of coef values
+ * @param Nd size of the array of degree values
+ * @param *weight_idxs array of weight_idxs
+ * @param *ps array of p values
+ * @param *lambdas array of lambda values
+ * @param *kappas array of kappa values
+ * @param *epsilons array of epsilon values
+ * @param *gammas array of gamma values
+ * @param *coefs array of coef values
+ * @param *degrees array of degree values
+ * @param *train_data_file filename of train data file
+ * @param *test_data_file filename of test data file
+ *
+ */
struct Training {
TrainType traintype;
+ KernelType kerneltype;
long repeats;
long folds;
long Np;
@@ -34,11 +104,17 @@ struct Training {
long Nk;
long Ne;
long Nw;
+ long Ng;
+ long Nc;
+ long Nd;
int *weight_idxs;
double *ps;
double *lambdas;
double *kappas;
double *epsilons;
+ double *gammas;
+ double *coefs;
+ double *degrees;
char *train_data_file;
char *test_data_file;
};
diff --git a/include/mylapack.h b/include/mylapack.h
deleted file mode 100644
index 4c79e0e..0000000
--- a/include/mylapack.h
+++ /dev/null
@@ -1,11 +0,0 @@
-#ifndef MYLAPACK_H
-#define MYLAPACK_H
-
-#include "globals.h"
-
-int dposv(char UPLO, int N, int NRHS, double *A, int LDA, double *B,
- int LDB);
-int dsysv(char UPLO, int N, int NRHS, double *A, int LDA, int *IPIV,
- double *B, int LDB, double *WORK, int LWORK);
-
-#endif
diff --git a/include/parallel.h b/include/parallel.h
deleted file mode 100644
index 8747347..0000000
--- a/include/parallel.h
+++ /dev/null
@@ -1,13 +0,0 @@
-
-struct Task {
- enum KernelType kernel_type;
- int weight_idx;
- double epsilon;
- double p;
- double kappa;
- double lambda;
- double *kernel_param;
- struct MajData **data;
-}
-
-
diff --git a/include/strutil.h b/include/strutil.h
index 66722ae..740fde1 100644
--- a/include/strutil.h
+++ b/include/strutil.h
@@ -1,3 +1,15 @@
+/**
+ * @file strutil.h
+ * @author Gertjan van den Burg
+ * @date August, 2013
+ * @brief Header file for strutil.c
+ *
+ * @details
+ * Function declarations for useful string functions used in parsing
+ * input files.
+ *
+ */
+
#ifndef STRUTIL_H
#define STRUTIL_H
diff --git a/include/timer.h b/include/timer.h
index 8a737e0..d4d4d23 100644
--- a/include/timer.h
+++ b/include/timer.h
@@ -1,5 +1,16 @@
-#ifndef TIMER_H
-#define TIMER_H
+/**
+ * @file timer.h
+ * @author Gertjan van den Burg
+ * @date August, 2013
+ * @brief Header file for timer.c
+ *
+ * @details
+ * Function declaration for timer function used to measure computation time.
+ *
+ */
+
+#ifndef MSVMMAJ_TIMER_H
+#define MSVMMAJ_TIMER_H
#include "globals.h"
diff --git a/include/types.h b/include/types.h
index b4db8d8..f6d008b 100644
--- a/include/types.h
+++ b/include/types.h
@@ -1,21 +1,41 @@
-#ifndef TYPES_H
-#define TYPES_H
+/**
+ * @file types.h
+ * @author Gertjan van den Burg
+ * @date August, 2013
+ * @brief Definitions of common types
+ *
+ * @details
+ * Here common types used throughout the program are defined.
+ *
+ */
+#ifndef MSVMMAJ_TYPES_H
+#define MSVMMAJ_TYPES_H
+
+/**
+ * @brief Implementation of true and false
+ */
typedef enum {
- false,
- true
+ false=0, /**< false keyword, corresponding to 0. */
+ true=1 /**< true keyword, corresponding to 1. */
} bool;
+/**
+ * @brief type of training used in parameter grid search
+ */
typedef enum {
- CV=0,
- TT=1
+ CV=0, /**< cross validation */
+ TT=1 /**< data with existing train/test split */
} TrainType;
+/**
+ * @brief type of kernel used in training
+ */
typedef enum {
- K_LINEAR=0,
- K_POLY=1,
- K_RBF=2,
- K_SIGMOID=3,
+ K_LINEAR=0, /**< Linear kernel */
+ K_POLY=1, /**< Polynomial kernel */
+ K_RBF=2, /**< RBF kernel */
+ K_SIGMOID=3, /**< Sigmoid kernel */
} KernelType;
#endif
diff --git a/include/util.h b/include/util.h
index facae79..995a927 100644
--- a/include/util.h
+++ b/include/util.h
@@ -1,5 +1,16 @@
-#ifndef UTIL_H
-#define UTIL_H
+/**
+ * @file util.h
+ * @author Gertjan van den Burg
+ * @date August, 2013
+ * @brief Header file for util.c
+ *
+ * @details
+ * Function declarations for utility functions of the program.
+ *
+ */
+
+#ifndef MSVMMAJ_UTIL_H
+#define MSVMMAJ_UTIL_H
#include "globals.h"