/** * @file GenSVM_grid.c * @author Gertjan van den Burg * @date January, 2014 * @brief Command line interface for the grid search program * * @details * This is a command line interface to the parameter grid search functionality * of the algorithm. The grid search is specified in a separate file, thereby * reducing the number of command line arguments. See * read_training_from_file() for documentation on the training file. * * The program runs a grid search as specified in the training file. If * desired the grid search can incorporate consistency checks to find the * configuration among the best configurations which scores consistently high. * All output is written to stdout, unless the quiet mode is specified. * * For further usage information, see the program help function. * */ #include #include "crossval.h" #include "gensvm.h" #include "gensvm_io.h" #include "gensvm_init.h" #include "gensvm_pred.h" #include "gensvm_train.h" #include "gensvm_train_dataset.h" #include "strutil.h" #include "util.h" #define MINARGS 2 extern FILE *GENSVM_OUTPUT_FILE; // function declarations void exit_with_help(); void parse_command_line(int argc, char **argv, char *input_filename); void read_training_from_file(char *input_filename, struct Training *training); /** * @brief Help function */ void exit_with_help() { printf("This is GenSVM, version %1.1f\n\n", VERSION); printf("Usage: trainGenSVMdataset [options] training_file\n"); printf("Options:\n"); printf("-h | -help : print this help.\n"); printf("-q : quiet mode (no output)\n"); exit(0); } /** * @brief Main interface function for trainGenSVMdataset * * @details * Main interface for the command line program. A given training file which * specifies a grid search over a single dataset is read. From this, a Queue * is created containing all Task instances that need to be performed in the * search. Depending on the type of dataset, either cross validation or * train/test split training is performed for all tasks. If specified, * consistency repeats are done at the end of the grid search. Note that * currently no output is produced other than what is written to stdout. * * @param[in] argc number of command line arguments * @param[in] argv array of command line arguments * */ int main(int argc, char **argv) { char input_filename[MAX_LINE_LENGTH]; struct Training *training = Malloc(struct Training, 1); struct GenData *train_data = Malloc(struct GenData, 1); struct GenData *test_data = Malloc(struct GenData, 1); if (argc < MINARGS || gensvm_check_argv(argc, argv, "-help") || gensvm_check_argv_eq(argc, argv, "-h") ) exit_with_help(); parse_command_line(argc, argv, input_filename); training->repeats = 0; note("Reading training file\n"); read_training_from_file(input_filename, training); note("Reading data from %s\n", training->train_data_file); gensvm_read_data(train_data, training->train_data_file); if (training->traintype == TT) { note("Reading data from %s\n", training->test_data_file); gensvm_read_data(test_data, training->test_data_file); } note("Creating queue\n"); struct Queue *q = Malloc(struct Queue, 1); make_queue(training, q, train_data, test_data); srand(time(NULL)); note("Starting training\n"); if (training->traintype == TT) start_training_tt(q); else start_training_cv(q); note("Training finished\n"); if (training->repeats > 0) { consistency_repeats(q, training->repeats, training->traintype); } free_queue(q); free(training); gensvm_free_data(train_data); gensvm_free_data(test_data); note("Done.\n"); return 0; } /** * @brief Parse command line arguments * * @details * Few arguments can be supplied to the command line. Only quiet mode can be * specified, or help can be requested. The filename of the training file is * read from the arguments. Parsing of the training file is done separately in * read_training_from_file(). * * @param[in] argc number of command line arguments * @param[in] argv array of command line arguments * @param[in] input_filename pre-allocated buffer for the training * filename. * */ void parse_command_line(int argc, char **argv, char *input_filename) { int i; GENSVM_OUTPUT_FILE = stdout; for (i=1; i=argc) exit_with_help(); switch (argv[i-1][1]) { case 'q': GENSVM_OUTPUT_FILE = NULL; i--; break; default: fprintf(stderr, "Unknown option: -%c\n", argv[i-1][1]); exit_with_help(); } } if (i >= argc) exit_with_help(); strcpy(input_filename, argv[i]); } KernelType parse_kernel_str(char *kernel_line) { if (str_endswith(kernel_line, "LINEAR\n")) { return K_LINEAR; } else if (str_endswith(kernel_line, "POLY\n")) { return K_POLY; } else if (str_endswith(kernel_line, "RBF\n")) { return K_RBF; } else if (str_endswith(kernel_line, "SIGMOID\n")) { return K_SIGMOID; } else { fprintf(stderr, "Unknown kernel specified on line: %s\n", kernel_line); exit(1); } } /** * @brief Read the Training struct from file * * @details * Read the Training struct from a file. The training file follows a specific * format specified in @ref spec_training_file. * * Commonly used string functions in this function are all_doubles_str() and * all_longs_str(). * * @param[in] input_filename filename of the training file * @param[in] training Training structure to place the parsed * parameter grid. * */ void read_training_from_file(char *input_filename, struct Training *training) { long i, nr = 0; FILE *fid; char buffer[MAX_LINE_LENGTH]; char train_filename[MAX_LINE_LENGTH]; char test_filename[MAX_LINE_LENGTH]; double *params = Calloc(double, MAX_LINE_LENGTH); long *lparams = Calloc(long, MAX_LINE_LENGTH); fid = fopen(input_filename, "r"); if (fid == NULL) { fprintf(stderr, "Error opening training file %s\n", input_filename); exit(1); } training->traintype = CV; while ( fgets(buffer, MAX_LINE_LENGTH, fid) != NULL ) { Memset(params, double, MAX_LINE_LENGTH); Memset(lparams, long, MAX_LINE_LENGTH); if (str_startswith(buffer, "train:")) { sscanf(buffer, "train: %s\n", train_filename); training->train_data_file = Calloc(char, MAX_LINE_LENGTH); strcpy(training->train_data_file, train_filename); } else if (str_startswith(buffer, "test:")) { sscanf(buffer, "test: %s\n", test_filename); training->test_data_file = Calloc(char, MAX_LINE_LENGTH); strcpy(training->test_data_file, test_filename); training->traintype = TT; } else if (str_startswith(buffer, "p:")) { nr = all_doubles_str(buffer, 2, params); training->ps = Calloc(double, nr); for (i=0; ips[i] = params[i]; training->Np = nr; } else if (str_startswith(buffer, "lambda:")) { nr = all_doubles_str(buffer, 7, params); training->lambdas = Calloc(double, nr); for (i=0; ilambdas[i] = params[i]; training->Nl = nr; } else if (str_startswith(buffer, "kappa:")) { nr = all_doubles_str(buffer, 6, params); training->kappas = Calloc(double, nr); for (i=0; ikappas[i] = params[i]; training->Nk = nr; } else if (str_startswith(buffer, "epsilon:")) { nr = all_doubles_str(buffer, 8, params); training->epsilons = Calloc(double, nr); for (i=0; iepsilons[i] = params[i]; training->Ne = nr; } else if (str_startswith(buffer, "weight:")) { nr = all_longs_str(buffer, 7, lparams); training->weight_idxs = Calloc(int, nr); for (i=0; iweight_idxs[i] = lparams[i]; training->Nw = nr; } else if (str_startswith(buffer, "folds:")) { nr = all_longs_str(buffer, 6, lparams); training->folds = lparams[0]; if (nr > 1) fprintf(stderr, "Field \"folds\" only takes " "one value. Additional " "fields are ignored.\n"); } else if (str_startswith(buffer, "repeats:")) { nr = all_longs_str(buffer, 8, lparams); training->repeats = lparams[0]; if (nr > 1) fprintf(stderr, "Field \"repeats\" only " "takes one value. Additional " "fields are ignored.\n"); } else if (str_startswith(buffer, "kernel:")) { training->kerneltype = parse_kernel_str(buffer); } else if (str_startswith(buffer, "gamma:")) { nr = all_doubles_str(buffer, 6, params); if (training->kerneltype == K_LINEAR) { fprintf(stderr, "Field \"gamma\" ignored, " "linear kernel is used.\n"); training->Ng = 0; break; } training->gammas = Calloc(double, nr); for (i=0; igammas[i] = params[i]; training->Ng = nr; } else if (str_startswith(buffer, "coef:")) { nr = all_doubles_str(buffer, 5, params); if (training->kerneltype == K_LINEAR || training->kerneltype == K_RBF) { fprintf(stderr, "Field \"coef\" ignored with " "specified kernel.\n"); training->Nc = 0; break; } training->coefs = Calloc(double, nr); for (i=0; icoefs[i] = params[i]; training->Nc = nr; } else if (str_startswith(buffer, "degree:")) { nr = all_doubles_str(buffer, 7, params); if (training->kerneltype != K_POLY) { fprintf(stderr, "Field \"degree\" ignored " "with specified kernel.\n"); training->Nd = 0; break; } training->degrees = Calloc(double, nr); for (i=0; idegrees[i] = params[i]; training->Nd = nr; } else { fprintf(stderr, "Cannot find any parameters on line: " "%s\n", buffer); } } free(params); free(lparams); fclose(fid); }