aboutsummaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
Diffstat (limited to 'src')
-rw-r--r--src/GenSVMgrid.c42
-rw-r--r--src/GenSVMtraintest.c100
-rw-r--r--src/gensvm_consistency.c40
-rw-r--r--src/gensvm_grid.c20
-rw-r--r--src/gensvm_gridsearch.c7
-rw-r--r--src/gensvm_kernel.c24
-rw-r--r--src/gensvm_update.c13
7 files changed, 187 insertions, 59 deletions
diff --git a/src/GenSVMgrid.c b/src/GenSVMgrid.c
index 681b90c..14b7457 100644
--- a/src/GenSVMgrid.c
+++ b/src/GenSVMgrid.c
@@ -42,23 +42,33 @@
#include "gensvm_gridsearch.h"
#include "gensvm_consistency.h"
+/**
+ * Minimal number of command line arguments
+ */
#define MINARGS 2
extern FILE *GENSVM_OUTPUT_FILE;
extern FILE *GENSVM_ERROR_FILE;
// function declarations
-void exit_with_help();
+void exit_with_help(char **argv);
void parse_command_line(int argc, char **argv, char *input_filename);
void read_grid_from_file(char *input_filename, struct GenGrid *grid);
/**
* @brief Help function
+ *
+ * @details
+ * Print help for this program and exit. Note that VERSION is provided by the
+ * Makefile.
+ *
+ * @param[in] argv command line arguments
+ *
*/
-void exit_with_help()
+void exit_with_help(char **argv)
{
printf("This is GenSVM, version %1.1f\n\n", VERSION);
- printf("Usage: trainGenSVMdataset [options] grid_file\n");
+ printf("Usage: %s [options] grid_file\n", argv[0]);
printf("Options:\n");
printf("-h | -help : print this help.\n");
printf("-q : quiet mode (no output, not even errors!)\n");
@@ -67,7 +77,7 @@ void exit_with_help()
}
/**
- * @brief Main interface function for trainGenSVMdataset
+ * @brief Main interface function for GenSVMgrid
*
* @details
* Main interface for the command line program. A given grid file which
@@ -81,6 +91,8 @@ void exit_with_help()
* @param[in] argc number of command line arguments
* @param[in] argv array of command line arguments
*
+ * @return exit status
+ *
*/
int main(int argc, char **argv)
{
@@ -93,7 +105,7 @@ int main(int argc, char **argv)
if (argc < MINARGS || gensvm_check_argv(argc, argv, "-help")
|| gensvm_check_argv_eq(argc, argv, "-h") )
- exit_with_help();
+ exit_with_help(argv);
parse_command_line(argc, argv, input_filename);
note("Reading grid file\n");
@@ -153,7 +165,7 @@ void parse_command_line(int argc, char **argv, char *input_filename)
for (i=1; i<argc; i++) {
if (argv[i][0] != '-') break;
if (++i>=argc)
- exit_with_help();
+ exit_with_help(argv);
switch (argv[i-1][1]) {
case 'q':
GENSVM_OUTPUT_FILE = NULL;
@@ -163,16 +175,28 @@ void parse_command_line(int argc, char **argv, char *input_filename)
default:
fprintf(stderr, "Unknown option: -%c\n",
argv[i-1][1]);
- exit_with_help();
+ exit_with_help(argv);
}
}
if (i >= argc)
- exit_with_help();
+ exit_with_help(argv);
strcpy(input_filename, argv[i]);
}
+/**
+ * @brief Parse the kernel string from the training file
+ *
+ * @details
+ * This is a utility function for the read_grid_from_file() function, to keep
+ * the main code a bit shorter. It reads the line from the given buffer and
+ * returns the corresponding KernelType.
+ *
+ * @param[in] kernel_line line from the file with the kernel
+ * specification
+ * @return the corresponding kerneltype
+ */
KernelType parse_kernel_str(char *kernel_line)
{
if (str_endswith(kernel_line, "LINEAR\n")) {
@@ -186,7 +210,7 @@ KernelType parse_kernel_str(char *kernel_line)
} else {
fprintf(stderr, "Unknown kernel specified on line: %s\n",
kernel_line);
- exit(1);
+ exit(EXIT_FAILURE);
}
}
diff --git a/src/GenSVMtraintest.c b/src/GenSVMtraintest.c
index ec0eb91..4f4cabb 100644
--- a/src/GenSVMtraintest.c
+++ b/src/GenSVMtraintest.c
@@ -33,45 +33,78 @@
#include "gensvm_train.h"
#include "gensvm_predict.h"
+/**
+ * Minimal number of command line arguments
+ */
#define MINARGS 2
extern FILE *GENSVM_OUTPUT_FILE;
extern FILE *GENSVM_ERROR_FILE;
// function declarations
-void exit_with_help();
+void exit_with_help(char **argv);
void parse_command_line(int argc, char **argv, struct GenModel *model,
char **model_inputfile, char **training_inputfile,
char **testing_inputfile, char **model_outputfile,
char **prediction_outputfile);
-void exit_with_help()
+/**
+ * @brief Help function
+ *
+ * @details
+ * Print help for this program and exit. Note that the VERSION is defined in
+ * the Makefile.
+ *
+ * @param[in] argv command line arguments
+ *
+ */
+void exit_with_help(char **argv)
{
printf("This is GenSVM, version %1.1f\n\n", VERSION);
- printf("Usage: ./gensvm [options] training_data [test_data]\n");
+ printf("Usage: %s [options] training_data [test_data]\n\n", argv[0]);
printf("Options:\n");
- printf("-c coef : coefficient for the polynomial and sigmoid kernel\n");
- printf("-d degree : degree for the polynomial kernel\n");
- printf("-e epsilon : set the value of the stopping criterion\n");
- printf("-g gamma : parameter for the rbf, polynomial or sigmoid "
- "kernel\n");
- printf("-h | -help : print this help.\n");
- printf("-k kappa : set the value of kappa used in the Huber hinge\n");
- printf("-l lambda : set the value of lambda (lambda > 0)\n");
- printf("-s seed_model_file : use previous model as seed for V\n");
- printf("-m model_output_file : write model output to file\n");
+ printf("--------\n");
+ printf("-c coef : coefficient for the polynomial and "
+ "sigmoid kernel\n");
+ printf("-d degree : degree for the polynomial kernel\n");
+ printf("-e epsilon : set the value of the stopping "
+ "criterion\n");
+ printf("-g gamma : parameter for the rbf, polynomial or "
+ "sigmoid kernel\n");
+ printf("-h | -help : print this help.\n");
+ printf("-k kappa : set the value of kappa used in the "
+ "Huber hinge\n");
+ printf("-l lambda : set the value of lambda "
+ "(lambda > 0)\n");
+ printf("-s seed_model_file : use previous model as seed for V\n");
+ printf("-m model_output_file : write model output to file "
+ "(not saved if no file provided)\n");
printf("-o prediction_output : write predictions of test data to "
- "file\n");
- printf("-p p-value : set the value of p in the lp norm "
+ "file (uses stdout if not provided)\n");
+ printf("-p p-value : set the value of p in the lp norm "
"(1.0 <= p <= 2.0)\n");
- printf("-q : quiet mode (no output, not even errors!)\n");
- printf("-r rho : choose the weigth specification (1 = unit, 2 = "
- "group)\n");
- printf("-t type: kerneltype (0=LINEAR, 1=POLY, 2=RBF, 3=SIGMOID)\n");
+ printf("-q : quiet mode (no output, not even "
+ "errors!)\n");
+ printf("-r rho : choose the weigth specification "
+ "(1 = unit, 2 = group)\n");
+ printf("-t type : kerneltype (0=LINEAR, 1=POLY, 2=RBF, "
+ "3=SIGMOID)\n");
+ printf("\n");
exit(EXIT_FAILURE);
}
+/**
+ * @brief Main interface function for GenSVMtraintest
+ *
+ * @details
+ * Main interface for the GenSVMtraintest commandline program.
+ *
+ * @param[in] argc number of command line arguments
+ * @param[in] argv array of command line arguments
+ *
+ * @return exit status
+ */
int main(int argc, char **argv)
{
long i, *predy = NULL;
@@ -90,7 +123,9 @@ int main(int argc, char **argv)
if (argc < MINARGS || gensvm_check_argv(argc, argv, "-help")
|| gensvm_check_argv_eq(argc, argv, "-h"))
- exit_with_help();
+ exit_with_help(argv);
+
+ // parse command line arguments
parse_command_line(argc, argv, model, &model_inputfile,
&training_inputfile, &testing_inputfile,
&model_outputfile, &prediction_outputfile);
@@ -162,6 +197,25 @@ int main(int argc, char **argv)
return 0;
}
+/**
+ * @brief Parse the command line arguments
+ *
+ * @details
+ * For a full overview of the command line arguments and their meaning see
+ * exit_with_help(). This function furthermore sets the default output streams
+ * to stdout/stderr, and initializes the kernel parameters if none are
+ * supplied: gamma = 1.0, degree = 2.0, coef = 0.0.
+ *
+ * @param[in] argc number of command line arguments
+ * @param[in] argv array of command line arguments
+ * @param[in] model initialized GenModel struct
+ * @param[out] model_inputfile filename for the seed model
+ * @param[out] training_inputfile filename for the training data
+ * @param[out] testing_inputfile filename for the test data
+ * @param[out] model_outputfile filename for the output model
+ * @param[out] prediction_outputfile filename for the predictions
+ *
+ */
void parse_command_line(int argc, char **argv, struct GenModel *model,
char **model_inputfile, char **training_inputfile,
char **testing_inputfile, char **model_outputfile,
@@ -179,7 +233,7 @@ void parse_command_line(int argc, char **argv, struct GenModel *model,
for (i=1; i<argc; i++) {
if (argv[i][0] != '-') break;
if (++i>=argc) {
- exit_with_help();
+ exit_with_help(argv);
}
switch (argv[i-1][1]) {
case 'c':
@@ -235,11 +289,11 @@ void parse_command_line(int argc, char **argv, struct GenModel *model,
// otherwise you can't debug cmdline flags.
fprintf(stderr, "Unknown option: -%c\n",
argv[i-1][1]);
- exit_with_help();
+ exit_with_help(argv);
}
}
if (i >= argc)
- exit_with_help();
+ exit_with_help(argv);
(*training_inputfile) = Malloc(char, strlen(argv[i])+1);
strcpy((*training_inputfile), argv[i]);
diff --git a/src/gensvm_consistency.c b/src/gensvm_consistency.c
index 3881bb9..bf8cf9a 100644
--- a/src/gensvm_consistency.c
+++ b/src/gensvm_consistency.c
@@ -96,32 +96,34 @@ struct GenQueue *gensvm_top_queue(struct GenQueue *q, double percentile)
* configuration
*
* @details
- * The best performing tasks in the supplied GenQueue are found by taking those
- * GenTask structs that have a performance greater or equal to the 95% percentile
- * of the performance of all tasks. These tasks are then gathered in a new
- * GenQueue. For each of the tasks in this new GenQueue the cross validation run is
- * repeated a number of times.
+ * The best performing tasks in the supplied GenQueue are found by taking
+ * those GenTask structs that have a performance greater or equal to the given
+ * percentile of the performance of all tasks. These tasks are then gathered
+ * in a new GenQueue. For each of the tasks in this new GenQueue the cross
+ * validation run is repeated a number of times.
*
- * For each of the GenTask configurations that are repeated the mean performance,
- * standard deviation of the performance and the mean computation time are
- * reported.
+ * For each of the GenTask configurations that are repeated the mean
+ * performance, standard deviation of the performance and the mean computation
+ * time are reported.
*
- * Finally, the overall best tasks are written to the specified output. These
- * tasks are selected to have both the highest mean performance, as well as the
- * smallest standard deviation in their performance. This is done as follows.
- * First the 99th percentile of task performance and the 1st percentile of
- * standard deviation is calculated. If a task exists for which the mean
- * performance of the repeats and the standard deviation equals these values
- * respectively, this task is found to be the best and is written to the
- * output. If no such task exists, the 98th percentile of performance and the
- * 2nd percentile of standard deviation is considered. This is repeated until
- * an interval is found which contains tasks. If one or more tasks are found,
- * this loop stops.
+ * Finally, the overall best tasks are written to the specified output. These
+ * tasks are selected to have both the highest mean performance, as well as
+ * the smallest standard deviation in their performance. This is done as
+ * follows. First the 99th percentile of task performance and the 1st
+ * percentile of standard deviation is calculated. If a task exists for which
+ * the mean performance of the repeats and the standard deviation equals these
+ * values respectively, this task is found to be the best and is written to
+ * the output. If no such task exists, the 98th percentile of performance and
+ * the 2nd percentile of standard deviation is considered. This is repeated
+ * until an interval is found which contains tasks. If one or more tasks are
+ * found, this loop stops.
*
* @param[in] q GenQueue of GenTask structs which have already been
* run and have a GenTask::performance value
* @param[in] repeats Number of times to repeat the best
* configurations for consistency
+ * @param[in] percentile percentile of performance to determine which
+ * tasks to repeat
*/
void gensvm_consistency_repeats(struct GenQueue *q, long repeats,
double percentile)
diff --git a/src/gensvm_grid.c b/src/gensvm_grid.c
index 5aa1a3f..984eb96 100644
--- a/src/gensvm_grid.c
+++ b/src/gensvm_grid.c
@@ -31,6 +31,16 @@
#include "gensvm_grid.h"
+/**
+ * @brief Initialize a GenGrid structure
+ *
+ * @brief
+ * This function is used to initialize a GenGrid struct, and set its default
+ * parameters. A pointer to the generated struct is returned.
+ *
+ * @return initialized GenGrid struct
+ *
+ */
struct GenGrid *gensvm_init_grid()
{
struct GenGrid *grid = Malloc(struct GenGrid, 1);
@@ -65,6 +75,16 @@ struct GenGrid *gensvm_init_grid()
return grid;
}
+/**
+ * @brief Free a GenGrid structure
+ *
+ * @details
+ * This function frees all elements of a GenGrid structure, including the
+ * GenGrid structure itself. The provided argument is set to NULL on exit.
+ *
+ * @param[in] grid a GenGrid struct to free
+ *
+ */
void gensvm_free_grid(struct GenGrid *grid)
{
free(grid->weight_idxs);
diff --git a/src/gensvm_gridsearch.c b/src/gensvm_gridsearch.c
index 7a81c51..2ac9639 100644
--- a/src/gensvm_gridsearch.c
+++ b/src/gensvm_gridsearch.c
@@ -350,8 +350,11 @@ void gensvm_train_queue(struct GenQueue *q)
* parameters differ with the specified kernel, this function writes a
* parameter string depending on which kernel is used.
*
- * @param[in] task the GenTask specified
- * @param[in] N total number of tasks
+ * @param[in] task the GenTask specified
+ * @param[in] N total number of tasks
+ * @param[in] perf performance of the current task
+ * @param[in] duration time duration of the current task
+ * @param[in] current_max current best performance
*
*/
void gensvm_gridsearch_progress(struct GenTask *task, long N, double perf,
diff --git a/src/gensvm_kernel.c b/src/gensvm_kernel.c
index 410ac5f..8f0b6da 100644
--- a/src/gensvm_kernel.c
+++ b/src/gensvm_kernel.c
@@ -32,6 +32,17 @@
#include "gensvm_kernel.h"
#include "gensvm_print.h"
+/**
+ * @brief Copy the kernelparameters from GenModel to GenData
+ *
+ * @details
+ * This is a little utility function to copy the kernel type and kernel
+ * parameters from a GenModel struct to a GenData struct.
+ *
+ * @param[in] model a GenModel struct
+ * @param[in] data a GenData struct
+ *
+ */
void gensvm_kernel_copy_kernelparam_to_data(struct GenModel *model,
struct GenData *data)
{
@@ -209,12 +220,13 @@ void gensvm_kernel_compute(struct GenModel *model, struct GenData *data,
* uses the highest precision eigenvalues, twice the underflow threshold (see
* dsyevx documentation).
*
- * @param[in] K the kernel matrix
- * @param[in] n the dimension of the kernel matrix
- * @param[in] cutoff mimimum ratio of eigenvalue to largest
- * eigenvalue for the eigenvector to be included
- * @param[out] P on exit contains the eigenvectors
- * @param[out] Sigma on exit contains the eigenvalues
+ * @param[in] K the kernel matrix
+ * @param[in] n the dimension of the kernel matrix
+ * @param[in] cutoff mimimum ratio of eigenvalue to largest
+ * eigenvalue for the eigenvector to be
+ * included
+ * @param[out] P_ret on exit contains the eigenvectors
+ * @param[out] Sigma_ret on exit contains the eigenvalues
*
* @return the number of eigenvalues kept
*/
diff --git a/src/gensvm_update.c b/src/gensvm_update.c
index d74965c..3bf7549 100644
--- a/src/gensvm_update.c
+++ b/src/gensvm_update.c
@@ -535,6 +535,19 @@ void gensvm_get_ZAZ_ZB_sparse(struct GenModel *model, struct GenData *data,
}
+/**
+ * @brief Wrapper around calculation of Z'*A*Z and Z'*B for sparse and dense
+ *
+ * @details
+ * This is a wrapper around gensvm_get_ZAZ_ZB_dense() and
+ * gensvm_get_ZAZ_ZB_sparse(). See the documentation of those functions for
+ * more info.
+ *
+ * @param[in] model a GenModel struct
+ * @param[in] data a GenData struct
+ * @param[in] work a GenWork struct
+ *
+ */
void gensvm_get_ZAZ_ZB(struct GenModel *model, struct GenData *data,
struct GenWork *work)
{