From ddbd423f54e2fd92659a0d277ee844659eee8ba1 Mon Sep 17 00:00:00 2001 From: Gertjan van den Burg Date: Wed, 15 Jan 2014 00:35:21 +0100 Subject: added documentation, restart git usage, start implementing kernels --- src/trainMSVMMajdataset.c | 155 +++++++++++++++++++++++++++++++++++++++++++--- 1 file changed, 145 insertions(+), 10 deletions(-) (limited to 'src/trainMSVMMajdataset.c') diff --git a/src/trainMSVMMajdataset.c b/src/trainMSVMMajdataset.c index 7c3385c..097df85 100644 --- a/src/trainMSVMMajdataset.c +++ b/src/trainMSVMMajdataset.c @@ -1,7 +1,28 @@ +/** + * @file trainMSVMMajdataset.c + * @author Gertjan van den Burg + * @date January, 2014 + * @brief Command line interface for the grid search program + * + * @details + * This is a command line interface to the parameter grid search functionality + * of the algorithm. The grid search is specified in a separate file, thereby + * reducing the number of command line arguments. See + * read_training_from_file() for documentation on the training file. + * + * The program runs a grid search as specified in the training file. If + * desired the grid search can incorporate consistency checks to find the + * configuration among the best configurations which scores consistently high. + * All output is written to stdout, unless the quiet mode is specified. + * + * For further usage information, see the program help function. + * + */ + #include #include "crossval.h" -#include "MSVMMaj.h" +#include "msvmmaj.h" #include "msvmmaj_pred.h" #include "msvmmaj_train.h" #include "msvmmaj_train_dataset.h" @@ -12,11 +33,15 @@ extern FILE *MSVMMAJ_OUTPUT_FILE; +// function declarations void print_null(const char *s) {} void exit_with_help(); void parse_command_line(int argc, char **argv, char *input_filename); void read_training_from_file(char *input_filename, struct Training *training); +/** + * @brief Help function + */ void exit_with_help() { printf("This is MSVMMaj, version %1.1f\n\n", VERSION); @@ -28,6 +53,22 @@ void exit_with_help() exit(0); } +/** + * @brief Main interface function for trainMSVMMajdataset + * + * @details + * Main interface for the command line program. A given training file which + * specifies a grid search over a single dataset is read. From this, a Queue + * is created containing all Task instances that need to be performed in the + * search. Depending on the type of dataset, either cross validation or + * train/test split training is performed for all tasks. If specified, + * consistency repeats are done at the end of the grid search. Note that + * currently no output is produced other than what is written to stdout. + * + * @param[in] argc number of command line arguments + * @param[in] argv array of command line arguments + * + */ int main(int argc, char **argv) { char input_filename[MAX_LINE_LENGTH]; @@ -78,6 +119,21 @@ int main(int argc, char **argv) return 0; } +/** + * @brief Parse command line arguments + * + * @details + * Few arguments can be supplied to the command line. Only quiet mode can be + * specified, or help can be requested. The filename of the training file is + * read from the arguments. Parsing of the training file is done separately in + * read_training_from_file(). + * + * @param[in] argc number of command line arguments + * @param[in] argv array of command line arguments + * @param[in] input_filename pre-allocated buffer for the training + * filename. + * + */ void parse_command_line(int argc, char **argv, char *input_filename) { int i; @@ -94,7 +150,8 @@ void parse_command_line(int argc, char **argv, char *input_filename) i--; break; default: - fprintf(stderr, "Unknown option: -%c\n", argv[i-1][1]); + fprintf(stderr, "Unknown option: -%c\n", + argv[i-1][1]); exit_with_help(); } } @@ -105,6 +162,21 @@ void parse_command_line(int argc, char **argv, char *input_filename) strcpy(input_filename, argv[i]); } +/** + * @brief Read the Training struct from file + * + * @details + * Read the Training struct from a file. The training file follows a specific + * format specified in @ref spec_training_file. + * + * Commonly used string functions in this function are all_doubles_str() and + * all_longs_str(). + * + * @param[in] input_filename filename of the training file + * @param[in] training Training structure to place the parsed + * parameter grid. + * + */ void read_training_from_file(char *input_filename, struct Training *training) { long i, nr = 0; @@ -117,7 +189,8 @@ void read_training_from_file(char *input_filename, struct Training *training) fid = fopen(input_filename, "r"); if (fid == NULL) { - fprintf(stderr, "Error opening training file %s\n", input_filename); + fprintf(stderr, "Error opening training file %s\n", + input_filename); exit(1); } training->traintype = CV; @@ -126,11 +199,13 @@ void read_training_from_file(char *input_filename, struct Training *training) Memset(lparams, long, MAX_LINE_LENGTH); if (str_startswith(buffer, "train:")) { sscanf(buffer, "train: %s\n", train_filename); - training->train_data_file = Calloc(char, MAX_LINE_LENGTH); + training->train_data_file = Calloc(char, + MAX_LINE_LENGTH); strcpy(training->train_data_file, train_filename); } else if (str_startswith(buffer, "test:")) { sscanf(buffer, "test: %s\n", test_filename); - training->test_data_file = Calloc(char, MAX_LINE_LENGTH); + training->test_data_file = Calloc(char, + MAX_LINE_LENGTH); strcpy(training->test_data_file, test_filename); training->traintype = TT; } else if (str_startswith(buffer, "p:")) { @@ -167,16 +242,76 @@ void read_training_from_file(char *input_filename, struct Training *training) nr = all_longs_str(buffer, 6, lparams); training->folds = lparams[0]; if (nr > 1) - fprintf(stderr, "Field \"folds\" only takes one value. " - "Additional fields are ignored.\n"); + fprintf(stderr, "Field \"folds\" only takes " + "one value. Additional " + "fields are ignored.\n"); } else if (str_startswith(buffer, "repeats:")) { nr = all_longs_str(buffer, 8, lparams); training->repeats = lparams[0]; if (nr > 1) - fprintf(stderr, "Field \"repeats\" only takes one value. " - "Additional fields are ignored.\n"); + fprintf(stderr, "Field \"repeats\" only " + "takes one value. Additional " + "fields are ignored.\n"); + } else if (str_startswith(buffer, "kernel:")) { + nr = all_longs_str(buffer, 7, lparams); + if (nr > 1) + fprintf(stderr, "Field \"kernel\" only takes " + "one value. Additional " + "fields are ignored.\n"); + switch (lparams[0]) { + case 0: + training->kerneltype = K_LINEAR; + break; + case 1: + training->kerneltype = K_POLY; + break; + case 2: + training->kerneltype = K_RBF; + break; + case 3: + training->kerneltype = K_SIGMOID; + break; + } + } else if (str_startswith(buffer, "gamma:")) { + nr = all_doubles_str(buffer, 6, params); + if (training->kerneltype == K_LINEAR) { + fprintf(stderr, "Field \"gamma\" ignored, " + "linear kernel is used.\n"); + training->Ng = 0; + break; + } + training->gammas = Calloc(double, nr); + for (i=0; igammas[i] = params[i]; + training->Ng = nr; + } else if (str_startswith(buffer, "coef:")) { + nr = all_doubles_str(buffer, 5, params); + if (training->kerneltype == K_LINEAR || + training->kerneltype == K_RBF) { + fprintf(stderr, "Field \"coef\" ignored with" + "specified kernel.\n"); + training->Nc = 0; + break; + } + training->coefs = Calloc(double, nr); + for (i=0; icoefs[i] = params[i]; + training->Nc = nr; + } else if (str_startswith(buffer, "degree:")) { + nr = all_doubles_str(buffer, 7, params); + if (training->kerneltype != K_POLY) { + fprintf(stderr, "Field \"degree\" ignored " + "with specified kernel.\n"); + training->Nd = 0; + break; + } + training->degrees = Calloc(double, nr); + for (i=0; idegrees[i] = params[i]; + training->Nd = nr; } else { - fprintf(stderr, "Cannot find any parameters on line: %s\n", buffer); + fprintf(stderr, "Cannot find any parameters on line: " + "%s\n", buffer); } } -- cgit v1.2.3