diff options
Diffstat (limited to 'src')
| -rw-r--r-- | src/GenSVMgrid.c | 42 | ||||
| -rw-r--r-- | src/GenSVMtraintest.c | 100 | ||||
| -rw-r--r-- | src/gensvm_consistency.c | 40 | ||||
| -rw-r--r-- | src/gensvm_grid.c | 20 | ||||
| -rw-r--r-- | src/gensvm_gridsearch.c | 7 | ||||
| -rw-r--r-- | src/gensvm_kernel.c | 24 | ||||
| -rw-r--r-- | src/gensvm_update.c | 13 |
7 files changed, 187 insertions, 59 deletions
diff --git a/src/GenSVMgrid.c b/src/GenSVMgrid.c index 681b90c..14b7457 100644 --- a/src/GenSVMgrid.c +++ b/src/GenSVMgrid.c @@ -42,23 +42,33 @@ #include "gensvm_gridsearch.h" #include "gensvm_consistency.h" +/** + * Minimal number of command line arguments + */ #define MINARGS 2 extern FILE *GENSVM_OUTPUT_FILE; extern FILE *GENSVM_ERROR_FILE; // function declarations -void exit_with_help(); +void exit_with_help(char **argv); void parse_command_line(int argc, char **argv, char *input_filename); void read_grid_from_file(char *input_filename, struct GenGrid *grid); /** * @brief Help function + * + * @details + * Print help for this program and exit. Note that VERSION is provided by the + * Makefile. + * + * @param[in] argv command line arguments + * */ -void exit_with_help() +void exit_with_help(char **argv) { printf("This is GenSVM, version %1.1f\n\n", VERSION); - printf("Usage: trainGenSVMdataset [options] grid_file\n"); + printf("Usage: %s [options] grid_file\n", argv[0]); printf("Options:\n"); printf("-h | -help : print this help.\n"); printf("-q : quiet mode (no output, not even errors!)\n"); @@ -67,7 +77,7 @@ void exit_with_help() } /** - * @brief Main interface function for trainGenSVMdataset + * @brief Main interface function for GenSVMgrid * * @details * Main interface for the command line program. A given grid file which @@ -81,6 +91,8 @@ void exit_with_help() * @param[in] argc number of command line arguments * @param[in] argv array of command line arguments * + * @return exit status + * */ int main(int argc, char **argv) { @@ -93,7 +105,7 @@ int main(int argc, char **argv) if (argc < MINARGS || gensvm_check_argv(argc, argv, "-help") || gensvm_check_argv_eq(argc, argv, "-h") ) - exit_with_help(); + exit_with_help(argv); parse_command_line(argc, argv, input_filename); note("Reading grid file\n"); @@ -153,7 +165,7 @@ void parse_command_line(int argc, char **argv, char *input_filename) for (i=1; i<argc; i++) { if (argv[i][0] != '-') break; if (++i>=argc) - exit_with_help(); + exit_with_help(argv); switch (argv[i-1][1]) { case 'q': GENSVM_OUTPUT_FILE = NULL; @@ -163,16 +175,28 @@ void parse_command_line(int argc, char **argv, char *input_filename) default: fprintf(stderr, "Unknown option: -%c\n", argv[i-1][1]); - exit_with_help(); + exit_with_help(argv); } } if (i >= argc) - exit_with_help(); + exit_with_help(argv); strcpy(input_filename, argv[i]); } +/** + * @brief Parse the kernel string from the training file + * + * @details + * This is a utility function for the read_grid_from_file() function, to keep + * the main code a bit shorter. It reads the line from the given buffer and + * returns the corresponding KernelType. + * + * @param[in] kernel_line line from the file with the kernel + * specification + * @return the corresponding kerneltype + */ KernelType parse_kernel_str(char *kernel_line) { if (str_endswith(kernel_line, "LINEAR\n")) { @@ -186,7 +210,7 @@ KernelType parse_kernel_str(char *kernel_line) } else { fprintf(stderr, "Unknown kernel specified on line: %s\n", kernel_line); - exit(1); + exit(EXIT_FAILURE); } } diff --git a/src/GenSVMtraintest.c b/src/GenSVMtraintest.c index ec0eb91..4f4cabb 100644 --- a/src/GenSVMtraintest.c +++ b/src/GenSVMtraintest.c @@ -33,45 +33,78 @@ #include "gensvm_train.h" #include "gensvm_predict.h" +/** + * Minimal number of command line arguments + */ #define MINARGS 2 extern FILE *GENSVM_OUTPUT_FILE; extern FILE *GENSVM_ERROR_FILE; // function declarations -void exit_with_help(); +void exit_with_help(char **argv); void parse_command_line(int argc, char **argv, struct GenModel *model, char **model_inputfile, char **training_inputfile, char **testing_inputfile, char **model_outputfile, char **prediction_outputfile); -void exit_with_help() +/** + * @brief Help function + * + * @details + * Print help for this program and exit. Note that the VERSION is defined in + * the Makefile. + * + * @param[in] argv command line arguments + * + */ +void exit_with_help(char **argv) { printf("This is GenSVM, version %1.1f\n\n", VERSION); - printf("Usage: ./gensvm [options] training_data [test_data]\n"); + printf("Usage: %s [options] training_data [test_data]\n\n", argv[0]); printf("Options:\n"); - printf("-c coef : coefficient for the polynomial and sigmoid kernel\n"); - printf("-d degree : degree for the polynomial kernel\n"); - printf("-e epsilon : set the value of the stopping criterion\n"); - printf("-g gamma : parameter for the rbf, polynomial or sigmoid " - "kernel\n"); - printf("-h | -help : print this help.\n"); - printf("-k kappa : set the value of kappa used in the Huber hinge\n"); - printf("-l lambda : set the value of lambda (lambda > 0)\n"); - printf("-s seed_model_file : use previous model as seed for V\n"); - printf("-m model_output_file : write model output to file\n"); + printf("--------\n"); + printf("-c coef : coefficient for the polynomial and " + "sigmoid kernel\n"); + printf("-d degree : degree for the polynomial kernel\n"); + printf("-e epsilon : set the value of the stopping " + "criterion\n"); + printf("-g gamma : parameter for the rbf, polynomial or " + "sigmoid kernel\n"); + printf("-h | -help : print this help.\n"); + printf("-k kappa : set the value of kappa used in the " + "Huber hinge\n"); + printf("-l lambda : set the value of lambda " + "(lambda > 0)\n"); + printf("-s seed_model_file : use previous model as seed for V\n"); + printf("-m model_output_file : write model output to file " + "(not saved if no file provided)\n"); printf("-o prediction_output : write predictions of test data to " - "file\n"); - printf("-p p-value : set the value of p in the lp norm " + "file (uses stdout if not provided)\n"); + printf("-p p-value : set the value of p in the lp norm " "(1.0 <= p <= 2.0)\n"); - printf("-q : quiet mode (no output, not even errors!)\n"); - printf("-r rho : choose the weigth specification (1 = unit, 2 = " - "group)\n"); - printf("-t type: kerneltype (0=LINEAR, 1=POLY, 2=RBF, 3=SIGMOID)\n"); + printf("-q : quiet mode (no output, not even " + "errors!)\n"); + printf("-r rho : choose the weigth specification " + "(1 = unit, 2 = group)\n"); + printf("-t type : kerneltype (0=LINEAR, 1=POLY, 2=RBF, " + "3=SIGMOID)\n"); + printf("\n"); exit(EXIT_FAILURE); } +/** + * @brief Main interface function for GenSVMtraintest + * + * @details + * Main interface for the GenSVMtraintest commandline program. + * + * @param[in] argc number of command line arguments + * @param[in] argv array of command line arguments + * + * @return exit status + */ int main(int argc, char **argv) { long i, *predy = NULL; @@ -90,7 +123,9 @@ int main(int argc, char **argv) if (argc < MINARGS || gensvm_check_argv(argc, argv, "-help") || gensvm_check_argv_eq(argc, argv, "-h")) - exit_with_help(); + exit_with_help(argv); + + // parse command line arguments parse_command_line(argc, argv, model, &model_inputfile, &training_inputfile, &testing_inputfile, &model_outputfile, &prediction_outputfile); @@ -162,6 +197,25 @@ int main(int argc, char **argv) return 0; } +/** + * @brief Parse the command line arguments + * + * @details + * For a full overview of the command line arguments and their meaning see + * exit_with_help(). This function furthermore sets the default output streams + * to stdout/stderr, and initializes the kernel parameters if none are + * supplied: gamma = 1.0, degree = 2.0, coef = 0.0. + * + * @param[in] argc number of command line arguments + * @param[in] argv array of command line arguments + * @param[in] model initialized GenModel struct + * @param[out] model_inputfile filename for the seed model + * @param[out] training_inputfile filename for the training data + * @param[out] testing_inputfile filename for the test data + * @param[out] model_outputfile filename for the output model + * @param[out] prediction_outputfile filename for the predictions + * + */ void parse_command_line(int argc, char **argv, struct GenModel *model, char **model_inputfile, char **training_inputfile, char **testing_inputfile, char **model_outputfile, @@ -179,7 +233,7 @@ void parse_command_line(int argc, char **argv, struct GenModel *model, for (i=1; i<argc; i++) { if (argv[i][0] != '-') break; if (++i>=argc) { - exit_with_help(); + exit_with_help(argv); } switch (argv[i-1][1]) { case 'c': @@ -235,11 +289,11 @@ void parse_command_line(int argc, char **argv, struct GenModel *model, // otherwise you can't debug cmdline flags. fprintf(stderr, "Unknown option: -%c\n", argv[i-1][1]); - exit_with_help(); + exit_with_help(argv); } } if (i >= argc) - exit_with_help(); + exit_with_help(argv); (*training_inputfile) = Malloc(char, strlen(argv[i])+1); strcpy((*training_inputfile), argv[i]); diff --git a/src/gensvm_consistency.c b/src/gensvm_consistency.c index 3881bb9..bf8cf9a 100644 --- a/src/gensvm_consistency.c +++ b/src/gensvm_consistency.c @@ -96,32 +96,34 @@ struct GenQueue *gensvm_top_queue(struct GenQueue *q, double percentile) * configuration * * @details - * The best performing tasks in the supplied GenQueue are found by taking those - * GenTask structs that have a performance greater or equal to the 95% percentile - * of the performance of all tasks. These tasks are then gathered in a new - * GenQueue. For each of the tasks in this new GenQueue the cross validation run is - * repeated a number of times. + * The best performing tasks in the supplied GenQueue are found by taking + * those GenTask structs that have a performance greater or equal to the given + * percentile of the performance of all tasks. These tasks are then gathered + * in a new GenQueue. For each of the tasks in this new GenQueue the cross + * validation run is repeated a number of times. * - * For each of the GenTask configurations that are repeated the mean performance, - * standard deviation of the performance and the mean computation time are - * reported. + * For each of the GenTask configurations that are repeated the mean + * performance, standard deviation of the performance and the mean computation + * time are reported. * - * Finally, the overall best tasks are written to the specified output. These - * tasks are selected to have both the highest mean performance, as well as the - * smallest standard deviation in their performance. This is done as follows. - * First the 99th percentile of task performance and the 1st percentile of - * standard deviation is calculated. If a task exists for which the mean - * performance of the repeats and the standard deviation equals these values - * respectively, this task is found to be the best and is written to the - * output. If no such task exists, the 98th percentile of performance and the - * 2nd percentile of standard deviation is considered. This is repeated until - * an interval is found which contains tasks. If one or more tasks are found, - * this loop stops. + * Finally, the overall best tasks are written to the specified output. These + * tasks are selected to have both the highest mean performance, as well as + * the smallest standard deviation in their performance. This is done as + * follows. First the 99th percentile of task performance and the 1st + * percentile of standard deviation is calculated. If a task exists for which + * the mean performance of the repeats and the standard deviation equals these + * values respectively, this task is found to be the best and is written to + * the output. If no such task exists, the 98th percentile of performance and + * the 2nd percentile of standard deviation is considered. This is repeated + * until an interval is found which contains tasks. If one or more tasks are + * found, this loop stops. * * @param[in] q GenQueue of GenTask structs which have already been * run and have a GenTask::performance value * @param[in] repeats Number of times to repeat the best * configurations for consistency + * @param[in] percentile percentile of performance to determine which + * tasks to repeat */ void gensvm_consistency_repeats(struct GenQueue *q, long repeats, double percentile) diff --git a/src/gensvm_grid.c b/src/gensvm_grid.c index 5aa1a3f..984eb96 100644 --- a/src/gensvm_grid.c +++ b/src/gensvm_grid.c @@ -31,6 +31,16 @@ #include "gensvm_grid.h" +/** + * @brief Initialize a GenGrid structure + * + * @brief + * This function is used to initialize a GenGrid struct, and set its default + * parameters. A pointer to the generated struct is returned. + * + * @return initialized GenGrid struct + * + */ struct GenGrid *gensvm_init_grid() { struct GenGrid *grid = Malloc(struct GenGrid, 1); @@ -65,6 +75,16 @@ struct GenGrid *gensvm_init_grid() return grid; } +/** + * @brief Free a GenGrid structure + * + * @details + * This function frees all elements of a GenGrid structure, including the + * GenGrid structure itself. The provided argument is set to NULL on exit. + * + * @param[in] grid a GenGrid struct to free + * + */ void gensvm_free_grid(struct GenGrid *grid) { free(grid->weight_idxs); diff --git a/src/gensvm_gridsearch.c b/src/gensvm_gridsearch.c index 7a81c51..2ac9639 100644 --- a/src/gensvm_gridsearch.c +++ b/src/gensvm_gridsearch.c @@ -350,8 +350,11 @@ void gensvm_train_queue(struct GenQueue *q) * parameters differ with the specified kernel, this function writes a * parameter string depending on which kernel is used. * - * @param[in] task the GenTask specified - * @param[in] N total number of tasks + * @param[in] task the GenTask specified + * @param[in] N total number of tasks + * @param[in] perf performance of the current task + * @param[in] duration time duration of the current task + * @param[in] current_max current best performance * */ void gensvm_gridsearch_progress(struct GenTask *task, long N, double perf, diff --git a/src/gensvm_kernel.c b/src/gensvm_kernel.c index 410ac5f..8f0b6da 100644 --- a/src/gensvm_kernel.c +++ b/src/gensvm_kernel.c @@ -32,6 +32,17 @@ #include "gensvm_kernel.h" #include "gensvm_print.h" +/** + * @brief Copy the kernelparameters from GenModel to GenData + * + * @details + * This is a little utility function to copy the kernel type and kernel + * parameters from a GenModel struct to a GenData struct. + * + * @param[in] model a GenModel struct + * @param[in] data a GenData struct + * + */ void gensvm_kernel_copy_kernelparam_to_data(struct GenModel *model, struct GenData *data) { @@ -209,12 +220,13 @@ void gensvm_kernel_compute(struct GenModel *model, struct GenData *data, * uses the highest precision eigenvalues, twice the underflow threshold (see * dsyevx documentation). * - * @param[in] K the kernel matrix - * @param[in] n the dimension of the kernel matrix - * @param[in] cutoff mimimum ratio of eigenvalue to largest - * eigenvalue for the eigenvector to be included - * @param[out] P on exit contains the eigenvectors - * @param[out] Sigma on exit contains the eigenvalues + * @param[in] K the kernel matrix + * @param[in] n the dimension of the kernel matrix + * @param[in] cutoff mimimum ratio of eigenvalue to largest + * eigenvalue for the eigenvector to be + * included + * @param[out] P_ret on exit contains the eigenvectors + * @param[out] Sigma_ret on exit contains the eigenvalues * * @return the number of eigenvalues kept */ diff --git a/src/gensvm_update.c b/src/gensvm_update.c index d74965c..3bf7549 100644 --- a/src/gensvm_update.c +++ b/src/gensvm_update.c @@ -535,6 +535,19 @@ void gensvm_get_ZAZ_ZB_sparse(struct GenModel *model, struct GenData *data, } +/** + * @brief Wrapper around calculation of Z'*A*Z and Z'*B for sparse and dense + * + * @details + * This is a wrapper around gensvm_get_ZAZ_ZB_dense() and + * gensvm_get_ZAZ_ZB_sparse(). See the documentation of those functions for + * more info. + * + * @param[in] model a GenModel struct + * @param[in] data a GenData struct + * @param[in] work a GenWork struct + * + */ void gensvm_get_ZAZ_ZB(struct GenModel *model, struct GenData *data, struct GenWork *work) { |
