From 1e340d509f229120eb3aaa98c91028dc3c0d3305 Mon Sep 17 00:00:00 2001 From: Gertjan van den Burg Date: Mon, 25 Aug 2014 14:38:03 +0200 Subject: rename msvmmaj to gensvm --- src/trainMSVMMajdataset.c | 321 ---------------------------------------------- 1 file changed, 321 deletions(-) delete mode 100644 src/trainMSVMMajdataset.c (limited to 'src/trainMSVMMajdataset.c') diff --git a/src/trainMSVMMajdataset.c b/src/trainMSVMMajdataset.c deleted file mode 100644 index b9d9180..0000000 --- a/src/trainMSVMMajdataset.c +++ /dev/null @@ -1,321 +0,0 @@ -/** - * @file trainMSVMMajdataset.c - * @author Gertjan van den Burg - * @date January, 2014 - * @brief Command line interface for the grid search program - * - * @details - * This is a command line interface to the parameter grid search functionality - * of the algorithm. The grid search is specified in a separate file, thereby - * reducing the number of command line arguments. See - * read_training_from_file() for documentation on the training file. - * - * The program runs a grid search as specified in the training file. If - * desired the grid search can incorporate consistency checks to find the - * configuration among the best configurations which scores consistently high. - * All output is written to stdout, unless the quiet mode is specified. - * - * For further usage information, see the program help function. - * - */ - -#include - -#include "crossval.h" -#include "msvmmaj.h" -#include "msvmmaj_io.h" -#include "msvmmaj_init.h" -#include "msvmmaj_pred.h" -#include "msvmmaj_train.h" -#include "msvmmaj_train_dataset.h" -#include "strutil.h" -#include "util.h" - -#define MINARGS 2 - -extern FILE *MSVMMAJ_OUTPUT_FILE; - -// function declarations -void exit_with_help(); -void parse_command_line(int argc, char **argv, char *input_filename); -void read_training_from_file(char *input_filename, struct Training *training); - -/** - * @brief Help function - */ -void exit_with_help() -{ - printf("This is MSVMMaj, version %1.1f\n\n", VERSION); - printf("Usage: trainMSVMMajdataset [options] training_file\n"); - printf("Options:\n"); - printf("-h | -help : print this help.\n"); - printf("-q : quiet mode (no output)\n"); - - exit(0); -} - -/** - * @brief Main interface function for trainMSVMMajdataset - * - * @details - * Main interface for the command line program. A given training file which - * specifies a grid search over a single dataset is read. From this, a Queue - * is created containing all Task instances that need to be performed in the - * search. Depending on the type of dataset, either cross validation or - * train/test split training is performed for all tasks. If specified, - * consistency repeats are done at the end of the grid search. Note that - * currently no output is produced other than what is written to stdout. - * - * @param[in] argc number of command line arguments - * @param[in] argv array of command line arguments - * - */ -int main(int argc, char **argv) -{ - char input_filename[MAX_LINE_LENGTH]; - - struct Training *training = Malloc(struct Training, 1); - struct MajData *train_data = Malloc(struct MajData, 1); - struct MajData *test_data = Malloc(struct MajData, 1); - - if (argc < MINARGS || msvmmaj_check_argv(argc, argv, "-help") - || msvmmaj_check_argv_eq(argc, argv, "-h") ) - exit_with_help(); - parse_command_line(argc, argv, input_filename); - - training->repeats = 0; - note("Reading training file\n"); - read_training_from_file(input_filename, training); - - note("Reading data from %s\n", training->train_data_file); - msvmmaj_read_data(train_data, training->train_data_file); - if (training->traintype == TT) { - note("Reading data from %s\n", training->test_data_file); - msvmmaj_read_data(test_data, training->test_data_file); - } - - note("Creating queue\n"); - struct Queue *q = Malloc(struct Queue, 1); - make_queue(training, q, train_data, test_data); - - srand(time(NULL)); - - note("Starting training\n"); - if (training->traintype == TT) - start_training_tt(q); - else - start_training_cv(q); - note("Training finished\n"); - - if (training->repeats > 0) { - consistency_repeats(q, training->repeats, training->traintype); - } - - free_queue(q); - free(training); - msvmmaj_free_data(train_data); - msvmmaj_free_data(test_data); - - note("Done.\n"); - return 0; -} - -/** - * @brief Parse command line arguments - * - * @details - * Few arguments can be supplied to the command line. Only quiet mode can be - * specified, or help can be requested. The filename of the training file is - * read from the arguments. Parsing of the training file is done separately in - * read_training_from_file(). - * - * @param[in] argc number of command line arguments - * @param[in] argv array of command line arguments - * @param[in] input_filename pre-allocated buffer for the training - * filename. - * - */ -void parse_command_line(int argc, char **argv, char *input_filename) -{ - int i; - - MSVMMAJ_OUTPUT_FILE = stdout; - - for (i=1; i=argc) - exit_with_help(); - switch (argv[i-1][1]) { - case 'q': - MSVMMAJ_OUTPUT_FILE = NULL; - i--; - break; - default: - fprintf(stderr, "Unknown option: -%c\n", - argv[i-1][1]); - exit_with_help(); - } - } - - if (i >= argc) - exit_with_help(); - - strcpy(input_filename, argv[i]); -} - -KernelType parse_kernel_str(char *kernel_line) -{ - if (str_endswith(kernel_line, "LINEAR\n")) { - return K_LINEAR; - } else if (str_endswith(kernel_line, "POLY\n")) { - return K_POLY; - } else if (str_endswith(kernel_line, "RBF\n")) { - return K_RBF; - } else if (str_endswith(kernel_line, "SIGMOID\n")) { - return K_SIGMOID; - } else { - fprintf(stderr, "Unknown kernel specified on line: %s\n", - kernel_line); - exit(1); - } -} - -/** - * @brief Read the Training struct from file - * - * @details - * Read the Training struct from a file. The training file follows a specific - * format specified in @ref spec_training_file. - * - * Commonly used string functions in this function are all_doubles_str() and - * all_longs_str(). - * - * @param[in] input_filename filename of the training file - * @param[in] training Training structure to place the parsed - * parameter grid. - * - */ -void read_training_from_file(char *input_filename, struct Training *training) -{ - long i, nr = 0; - FILE *fid; - char buffer[MAX_LINE_LENGTH]; - char train_filename[MAX_LINE_LENGTH]; - char test_filename[MAX_LINE_LENGTH]; - double *params = Calloc(double, MAX_LINE_LENGTH); - long *lparams = Calloc(long, MAX_LINE_LENGTH); - - fid = fopen(input_filename, "r"); - if (fid == NULL) { - fprintf(stderr, "Error opening training file %s\n", - input_filename); - exit(1); - } - training->traintype = CV; - while ( fgets(buffer, MAX_LINE_LENGTH, fid) != NULL ) { - Memset(params, double, MAX_LINE_LENGTH); - Memset(lparams, long, MAX_LINE_LENGTH); - if (str_startswith(buffer, "train:")) { - sscanf(buffer, "train: %s\n", train_filename); - training->train_data_file = Calloc(char, - MAX_LINE_LENGTH); - strcpy(training->train_data_file, train_filename); - } else if (str_startswith(buffer, "test:")) { - sscanf(buffer, "test: %s\n", test_filename); - training->test_data_file = Calloc(char, - MAX_LINE_LENGTH); - strcpy(training->test_data_file, test_filename); - training->traintype = TT; - } else if (str_startswith(buffer, "p:")) { - nr = all_doubles_str(buffer, 2, params); - training->ps = Calloc(double, nr); - for (i=0; ips[i] = params[i]; - training->Np = nr; - } else if (str_startswith(buffer, "lambda:")) { - nr = all_doubles_str(buffer, 7, params); - training->lambdas = Calloc(double, nr); - for (i=0; ilambdas[i] = params[i]; - training->Nl = nr; - } else if (str_startswith(buffer, "kappa:")) { - nr = all_doubles_str(buffer, 6, params); - training->kappas = Calloc(double, nr); - for (i=0; ikappas[i] = params[i]; - training->Nk = nr; - } else if (str_startswith(buffer, "epsilon:")) { - nr = all_doubles_str(buffer, 8, params); - training->epsilons = Calloc(double, nr); - for (i=0; iepsilons[i] = params[i]; - training->Ne = nr; - } else if (str_startswith(buffer, "weight:")) { - nr = all_longs_str(buffer, 7, lparams); - training->weight_idxs = Calloc(int, nr); - for (i=0; iweight_idxs[i] = lparams[i]; - training->Nw = nr; - } else if (str_startswith(buffer, "folds:")) { - nr = all_longs_str(buffer, 6, lparams); - training->folds = lparams[0]; - if (nr > 1) - fprintf(stderr, "Field \"folds\" only takes " - "one value. Additional " - "fields are ignored.\n"); - } else if (str_startswith(buffer, "repeats:")) { - nr = all_longs_str(buffer, 8, lparams); - training->repeats = lparams[0]; - if (nr > 1) - fprintf(stderr, "Field \"repeats\" only " - "takes one value. Additional " - "fields are ignored.\n"); - } else if (str_startswith(buffer, "kernel:")) { - training->kerneltype = parse_kernel_str(buffer); - } else if (str_startswith(buffer, "gamma:")) { - nr = all_doubles_str(buffer, 6, params); - if (training->kerneltype == K_LINEAR) { - fprintf(stderr, "Field \"gamma\" ignored, " - "linear kernel is used.\n"); - training->Ng = 0; - break; - } - training->gammas = Calloc(double, nr); - for (i=0; igammas[i] = params[i]; - training->Ng = nr; - } else if (str_startswith(buffer, "coef:")) { - nr = all_doubles_str(buffer, 5, params); - if (training->kerneltype == K_LINEAR || - training->kerneltype == K_RBF) { - fprintf(stderr, "Field \"coef\" ignored with " - "specified kernel.\n"); - training->Nc = 0; - break; - } - training->coefs = Calloc(double, nr); - for (i=0; icoefs[i] = params[i]; - training->Nc = nr; - } else if (str_startswith(buffer, "degree:")) { - nr = all_doubles_str(buffer, 7, params); - if (training->kerneltype != K_POLY) { - fprintf(stderr, "Field \"degree\" ignored " - "with specified kernel.\n"); - training->Nd = 0; - break; - } - training->degrees = Calloc(double, nr); - for (i=0; idegrees[i] = params[i]; - training->Nd = nr; - } else { - fprintf(stderr, "Cannot find any parameters on line: " - "%s\n", buffer); - } - } - - free(params); - free(lparams); - fclose(fid); -} -- cgit v1.2.3