From bc7ac4f2b40cf60cd7997c28244a1f8eba4bad05 Mon Sep 17 00:00:00 2001 From: Gertjan van den Burg Date: Mon, 25 Aug 2014 14:51:55 +0200 Subject: rename execs and fix some unintended renames --- Makefile | 23 ++-- src/GenSVMgrid.c | 321 +++++++++++++++++++++++++++++++++++++++++++++++ src/GenSVMpred.c | 180 ++++++++++++++++++++++++++ src/GenSVMtrain.c | 244 +++++++++++++++++++++++++++++++++++ src/gensvm_pred.c | 4 +- src/gensvm_train.c | 10 +- src/libGenSVM.c | 6 +- src/predGenSVM.c | 180 -------------------------- src/trainGenSVM.c | 244 ----------------------------------- src/trainGenSVMdataset.c | 321 ----------------------------------------------- 10 files changed, 768 insertions(+), 765 deletions(-) create mode 100644 src/GenSVMgrid.c create mode 100644 src/GenSVMpred.c create mode 100644 src/GenSVMtrain.c delete mode 100644 src/predGenSVM.c delete mode 100644 src/trainGenSVM.c delete mode 100644 src/trainGenSVMdataset.c diff --git a/Makefile b/Makefile index 35e6864..a26d111 100644 --- a/Makefile +++ b/Makefile @@ -3,7 +3,7 @@ CC=gcc CFLAGS=-Wall -O3 -DVERSION=$(VERSION) INCLUDE= -Iinclude LIB= -Llib -EXECS=trainGenSVM trainGenSVMdataset +EXECS=GenSVM_train GenSVM_grid .PHONY: all clean tar @@ -43,18 +43,20 @@ lib/libgensvm.a: \ src/util.o @echo libgensvm.a... -trainGenSVM: src/trainGenSVM.c lib/libgensvm.a - @$(CC) -o trainGenSVM src/trainGenSVM.c $(CFLAGS) $(INCLUDE) $(LIB)\ +GenSVM_train: src/GenSVMtrain.c lib/libgensvm.a + @$(CC) -o GenSVM_train src/GenSVMtrain.c $(CFLAGS) $(INCLUDE) $(LIB)\ -lgensvm $(LDFLAGS) - @echo trainGenSVM... + @echo GenSVM_train... -trainGenSVMdataset: src/trainGenSVMdataset.c lib/libgensvm.a - @$(CC) -o trainGenSVMdataset src/trainGenSVMdataset.c $(CFLAGS) $(INCLUDE) $(LIB) -lgensvm $(LDFLAGS) - @echo trainGenSVMdataset... +GenSVM_grid: src/GenSVMgrid.c lib/libgensvm.a + @$(CC) -o GenSVM_grid src/GenSVMgrid.c $(CFLAGS) $(INCLUDE) $(LIB) \ + -lgensvm $(LDFLAGS) + @echo GenSVM_grid... -predGenSVM: src/predGenSVM.c lib/libgensvm.a - @$(CC) -o predGenSVM src/predGenSVM.c $(CFLAGS) $(INCLUDE) $(LIB) -lgensvm $(LDFLAGS) - @echo predGenSVM... +GenSVM_pred: src/GenSVMpred.c lib/libgensvm.a + @$(CC) -o GenSVM_pred src/GenSVMpred.c $(CFLAGS) $(INCLUDE) $(LIB) \ + -lgensvm $(LDFLAGS) + @echo GenSVM_pred... src/crossval.o: @$(CC) -c -o src/crossval.o src/crossval.c $(CFLAGS) $(INCLUDE) @@ -78,6 +80,7 @@ src/gensvm_init.o: src/gensvm_io.o: @$(CC) -c -o $@ src/gensvm_io.c $(CFLAGS) $(INCLUDE) + @echo gensvm_io.o... src/gensvm_pred.o: @$(CC) -c -o src/gensvm_pred.o src/gensvm_pred.c $(CFLAGS) $(INCLUDE) diff --git a/src/GenSVMgrid.c b/src/GenSVMgrid.c new file mode 100644 index 0000000..eb1f477 --- /dev/null +++ b/src/GenSVMgrid.c @@ -0,0 +1,321 @@ +/** + * @file GenSVM_grid.c + * @author Gertjan van den Burg + * @date January, 2014 + * @brief Command line interface for the grid search program + * + * @details + * This is a command line interface to the parameter grid search functionality + * of the algorithm. The grid search is specified in a separate file, thereby + * reducing the number of command line arguments. See + * read_training_from_file() for documentation on the training file. + * + * The program runs a grid search as specified in the training file. If + * desired the grid search can incorporate consistency checks to find the + * configuration among the best configurations which scores consistently high. + * All output is written to stdout, unless the quiet mode is specified. + * + * For further usage information, see the program help function. + * + */ + +#include + +#include "crossval.h" +#include "gensvm.h" +#include "gensvm_io.h" +#include "gensvm_init.h" +#include "gensvm_pred.h" +#include "gensvm_train.h" +#include "gensvm_train_dataset.h" +#include "strutil.h" +#include "util.h" + +#define MINARGS 2 + +extern FILE *GENSVM_OUTPUT_FILE; + +// function declarations +void exit_with_help(); +void parse_command_line(int argc, char **argv, char *input_filename); +void read_training_from_file(char *input_filename, struct Training *training); + +/** + * @brief Help function + */ +void exit_with_help() +{ + printf("This is GenSVM, version %1.1f\n\n", VERSION); + printf("Usage: trainGenSVMdataset [options] training_file\n"); + printf("Options:\n"); + printf("-h | -help : print this help.\n"); + printf("-q : quiet mode (no output)\n"); + + exit(0); +} + +/** + * @brief Main interface function for trainGenSVMdataset + * + * @details + * Main interface for the command line program. A given training file which + * specifies a grid search over a single dataset is read. From this, a Queue + * is created containing all Task instances that need to be performed in the + * search. Depending on the type of dataset, either cross validation or + * train/test split training is performed for all tasks. If specified, + * consistency repeats are done at the end of the grid search. Note that + * currently no output is produced other than what is written to stdout. + * + * @param[in] argc number of command line arguments + * @param[in] argv array of command line arguments + * + */ +int main(int argc, char **argv) +{ + char input_filename[MAX_LINE_LENGTH]; + + struct Training *training = Malloc(struct Training, 1); + struct GenData *train_data = Malloc(struct GenData, 1); + struct GenData *test_data = Malloc(struct GenData, 1); + + if (argc < MINARGS || gensvm_check_argv(argc, argv, "-help") + || gensvm_check_argv_eq(argc, argv, "-h") ) + exit_with_help(); + parse_command_line(argc, argv, input_filename); + + training->repeats = 0; + note("Reading training file\n"); + read_training_from_file(input_filename, training); + + note("Reading data from %s\n", training->train_data_file); + gensvm_read_data(train_data, training->train_data_file); + if (training->traintype == TT) { + note("Reading data from %s\n", training->test_data_file); + gensvm_read_data(test_data, training->test_data_file); + } + + note("Creating queue\n"); + struct Queue *q = Malloc(struct Queue, 1); + make_queue(training, q, train_data, test_data); + + srand(time(NULL)); + + note("Starting training\n"); + if (training->traintype == TT) + start_training_tt(q); + else + start_training_cv(q); + note("Training finished\n"); + + if (training->repeats > 0) { + consistency_repeats(q, training->repeats, training->traintype); + } + + free_queue(q); + free(training); + gensvm_free_data(train_data); + gensvm_free_data(test_data); + + note("Done.\n"); + return 0; +} + +/** + * @brief Parse command line arguments + * + * @details + * Few arguments can be supplied to the command line. Only quiet mode can be + * specified, or help can be requested. The filename of the training file is + * read from the arguments. Parsing of the training file is done separately in + * read_training_from_file(). + * + * @param[in] argc number of command line arguments + * @param[in] argv array of command line arguments + * @param[in] input_filename pre-allocated buffer for the training + * filename. + * + */ +void parse_command_line(int argc, char **argv, char *input_filename) +{ + int i; + + GENSVM_OUTPUT_FILE = stdout; + + for (i=1; i=argc) + exit_with_help(); + switch (argv[i-1][1]) { + case 'q': + GENSVM_OUTPUT_FILE = NULL; + i--; + break; + default: + fprintf(stderr, "Unknown option: -%c\n", + argv[i-1][1]); + exit_with_help(); + } + } + + if (i >= argc) + exit_with_help(); + + strcpy(input_filename, argv[i]); +} + +KernelType parse_kernel_str(char *kernel_line) +{ + if (str_endswith(kernel_line, "LINEAR\n")) { + return K_LINEAR; + } else if (str_endswith(kernel_line, "POLY\n")) { + return K_POLY; + } else if (str_endswith(kernel_line, "RBF\n")) { + return K_RBF; + } else if (str_endswith(kernel_line, "SIGMOID\n")) { + return K_SIGMOID; + } else { + fprintf(stderr, "Unknown kernel specified on line: %s\n", + kernel_line); + exit(1); + } +} + +/** + * @brief Read the Training struct from file + * + * @details + * Read the Training struct from a file. The training file follows a specific + * format specified in @ref spec_training_file. + * + * Commonly used string functions in this function are all_doubles_str() and + * all_longs_str(). + * + * @param[in] input_filename filename of the training file + * @param[in] training Training structure to place the parsed + * parameter grid. + * + */ +void read_training_from_file(char *input_filename, struct Training *training) +{ + long i, nr = 0; + FILE *fid; + char buffer[MAX_LINE_LENGTH]; + char train_filename[MAX_LINE_LENGTH]; + char test_filename[MAX_LINE_LENGTH]; + double *params = Calloc(double, MAX_LINE_LENGTH); + long *lparams = Calloc(long, MAX_LINE_LENGTH); + + fid = fopen(input_filename, "r"); + if (fid == NULL) { + fprintf(stderr, "Error opening training file %s\n", + input_filename); + exit(1); + } + training->traintype = CV; + while ( fgets(buffer, MAX_LINE_LENGTH, fid) != NULL ) { + Memset(params, double, MAX_LINE_LENGTH); + Memset(lparams, long, MAX_LINE_LENGTH); + if (str_startswith(buffer, "train:")) { + sscanf(buffer, "train: %s\n", train_filename); + training->train_data_file = Calloc(char, + MAX_LINE_LENGTH); + strcpy(training->train_data_file, train_filename); + } else if (str_startswith(buffer, "test:")) { + sscanf(buffer, "test: %s\n", test_filename); + training->test_data_file = Calloc(char, + MAX_LINE_LENGTH); + strcpy(training->test_data_file, test_filename); + training->traintype = TT; + } else if (str_startswith(buffer, "p:")) { + nr = all_doubles_str(buffer, 2, params); + training->ps = Calloc(double, nr); + for (i=0; ips[i] = params[i]; + training->Np = nr; + } else if (str_startswith(buffer, "lambda:")) { + nr = all_doubles_str(buffer, 7, params); + training->lambdas = Calloc(double, nr); + for (i=0; ilambdas[i] = params[i]; + training->Nl = nr; + } else if (str_startswith(buffer, "kappa:")) { + nr = all_doubles_str(buffer, 6, params); + training->kappas = Calloc(double, nr); + for (i=0; ikappas[i] = params[i]; + training->Nk = nr; + } else if (str_startswith(buffer, "epsilon:")) { + nr = all_doubles_str(buffer, 8, params); + training->epsilons = Calloc(double, nr); + for (i=0; iepsilons[i] = params[i]; + training->Ne = nr; + } else if (str_startswith(buffer, "weight:")) { + nr = all_longs_str(buffer, 7, lparams); + training->weight_idxs = Calloc(int, nr); + for (i=0; iweight_idxs[i] = lparams[i]; + training->Nw = nr; + } else if (str_startswith(buffer, "folds:")) { + nr = all_longs_str(buffer, 6, lparams); + training->folds = lparams[0]; + if (nr > 1) + fprintf(stderr, "Field \"folds\" only takes " + "one value. Additional " + "fields are ignored.\n"); + } else if (str_startswith(buffer, "repeats:")) { + nr = all_longs_str(buffer, 8, lparams); + training->repeats = lparams[0]; + if (nr > 1) + fprintf(stderr, "Field \"repeats\" only " + "takes one value. Additional " + "fields are ignored.\n"); + } else if (str_startswith(buffer, "kernel:")) { + training->kerneltype = parse_kernel_str(buffer); + } else if (str_startswith(buffer, "gamma:")) { + nr = all_doubles_str(buffer, 6, params); + if (training->kerneltype == K_LINEAR) { + fprintf(stderr, "Field \"gamma\" ignored, " + "linear kernel is used.\n"); + training->Ng = 0; + break; + } + training->gammas = Calloc(double, nr); + for (i=0; igammas[i] = params[i]; + training->Ng = nr; + } else if (str_startswith(buffer, "coef:")) { + nr = all_doubles_str(buffer, 5, params); + if (training->kerneltype == K_LINEAR || + training->kerneltype == K_RBF) { + fprintf(stderr, "Field \"coef\" ignored with " + "specified kernel.\n"); + training->Nc = 0; + break; + } + training->coefs = Calloc(double, nr); + for (i=0; icoefs[i] = params[i]; + training->Nc = nr; + } else if (str_startswith(buffer, "degree:")) { + nr = all_doubles_str(buffer, 7, params); + if (training->kerneltype != K_POLY) { + fprintf(stderr, "Field \"degree\" ignored " + "with specified kernel.\n"); + training->Nd = 0; + break; + } + training->degrees = Calloc(double, nr); + for (i=0; idegrees[i] = params[i]; + training->Nd = nr; + } else { + fprintf(stderr, "Cannot find any parameters on line: " + "%s\n", buffer); + } + } + + free(params); + free(lparams); + fclose(fid); +} diff --git a/src/GenSVMpred.c b/src/GenSVMpred.c new file mode 100644 index 0000000..c874aaf --- /dev/null +++ b/src/GenSVMpred.c @@ -0,0 +1,180 @@ +/* + * 20140317: + * THIS FUNCTION IS DEPRECATED, SINCE IT DOES NOT WORK WITH KERNELS. + * + */ + +/** + * @file GenSVM_pred.c + * @author Gertjan van den Burg + * @date January, 2014 + * @brief Command line interface for predicting class labels + * + * @details + * This is a command line program for predicting the class labels or + * determining the predictive performance of a pre-determined model on a given + * test dataset. The predictive performance can be written to the screen or + * the predicted class labels can be written to a specified output file. This + * is done using gensvm_write_predictions(). + * + * The specified model file must follow the specification given in + * gensvm_write_model(). + * + * For usage information, see the program help function. + * + */ + +#include "gensvm.h" +#include "gensvm_init.h" +#include "gensvm_io.h" +#include "gensvm_pred.h" +#include "util.h" + +#define MINARGS 3 + +extern FILE *GENSVM_OUTPUT_FILE; + +// function declarations +void exit_with_help(); +void parse_command_line(int argc, char **argv, + char *input_filename, char *output_filename, + char *model_filename); + +/** + * @brief Help function + */ +void exit_with_help() +{ + printf("This is GenSVM, version %1.1f\n\n", VERSION); + printf("Usage: predGenSVM [options] test_data_file model_file\n"); + printf("Options:\n"); + printf("-o output_file : write output to file\n"); + printf("-q : quiet mode (no output)\n"); + exit(0); +} + +/** + * @brief Main interface function for predGenSVM + * + * @details + * Main interface for the command line program. A given model file is read and + * a test dataset is initialized from the given data. The predictive + * performance (hitrate) of the model on the test set is printed to the output + * stream (default = stdout). If an output file is specified the predictions + * are written to the file. + * + * @todo + * Ensure that the program can read model files without class labels + * specified. In that case no prediction accuracy is printed to the screen. + * + * @param[in] argc number of command line arguments + * @param[in] argv array of command line arguments + * + */ +int main(int argc, char **argv) +{ + long *predy; + double performance; + + char input_filename[MAX_LINE_LENGTH]; + char model_filename[MAX_LINE_LENGTH]; + char output_filename[MAX_LINE_LENGTH];; + + if (argc < MINARGS || gensvm_check_argv(argc, argv, "-help") + || gensvm_check_argv_eq(argc, argv, "-h") ) + exit_with_help(); + parse_command_line(argc, argv, input_filename, output_filename, + model_filename); + + // read the data and model + struct GenModel *model = gensvm_init_model(); + struct GenData *data = gensvm_init_data(); + gensvm_read_data(data, input_filename); + gensvm_read_model(model, model_filename); + + // check if the number of attributes in data equals that in model + if (data->m != model->m) { + fprintf(stderr, "Error: number of attributes in data (%li) " + "does not equal the number of attributes in " + "model (%li)\n", data->m, model->m); + exit(1); + } else if (data->K != model->K) { + fprintf(stderr, "Error: number of classes in data (%li) " + "does not equal the number of classes in " + "model (%li)\n", data->K, model->K); + exit(1); + } + + // predict labels and performance if test data has labels + predy = Calloc(long, data->n); + gensvm_predict_labels(data, model, predy); + if (data->y != NULL) { + performance = gensvm_prediction_perf(data, predy); + note("Predictive performance: %3.2f%%\n", performance); + } + + // if output file is specified, write predictions to it + if (gensvm_check_argv_eq(argc, argv, "-o")) { + gensvm_write_predictions(data, predy, output_filename); + note("Predictions written to: %s\n", output_filename); + } + + // free the model, data, and predictions + gensvm_free_model(model); + gensvm_free_data(data); + free(predy); + + return 0; +} + +/** + * @brief Parse command line arguments + * + * @details + * Read the data filename and model filename from the command line arguments. + * If specified, also read the output filename. If the quiet flag is given, + * set the global output stream to NULL. On error, exit_with_help(). + * + * @param[in] argc number of command line arguments + * @param[in] argv array of command line arguments + * @param[in] input_filename pre-allocated array for the input + * filename + * @param[in] output_filename pre-allocated array for the output + * filename + * @param[in] model_filename pre-allocated array for the model + * filename + * + */ +void parse_command_line(int argc, char **argv, char *input_filename, + char *output_filename, char *model_filename) +{ + int i; + + GENSVM_OUTPUT_FILE = stdout; + + for (i=1; i= argc) + exit_with_help(); + switch (argv[i-1][1]) { + case 'o': + strcpy(output_filename, argv[i]); + break; + case 'q': + GENSVM_OUTPUT_FILE = NULL; + i--; + break; + default: + fprintf(stderr, "Unknown option: -%c\n", + argv[i-1][1]); + exit_with_help(); + } + } + + if (i >= argc) + exit_with_help(); + + strcpy(input_filename, argv[i]); + i++; + strcpy(model_filename, argv[i]); +} diff --git a/src/GenSVMtrain.c b/src/GenSVMtrain.c new file mode 100644 index 0000000..0c1c6bc --- /dev/null +++ b/src/GenSVMtrain.c @@ -0,0 +1,244 @@ +/** + * @file GenSVM_train.c + * @author Gertjan van den Burg + * @date August, 2013 + * @brief Command line interface for training a single model with GenSVM + * + * @details + * This is a command line program for training a single model on a given + * dataset. To run a grid search over a number of parameter configurations, + * see trainGenSVMdataset.c. + * + */ + +#include +#include + +#include "gensvm_kernel.h" +#include "libGenSVM.h" +#include "gensvm.h" +#include "gensvm_io.h" +#include "gensvm_init.h" +#include "gensvm_train.h" +#include "util.h" + +#define MINARGS 2 + +extern FILE *GENSVM_OUTPUT_FILE; + +// function declarations +void exit_with_help(); +void parse_command_line(int argc, char **argv, struct GenModel *model, + char *input_filename, char *output_filename, char *model_filename); + +/** + * @brief Help function + */ +void exit_with_help() +{ + printf("This is GenSVM, version %1.1f\n\n", VERSION); + printf("Usage: trainGenSVM [options] training_data_file\n"); + printf("Options:\n"); + printf("-c coef : coefficient for the polynomial and sigmoid kernel\n"); + printf("-d degree : degree for the polynomial kernel\n"); + printf("-e epsilon : set the value of the stopping criterion\n"); + printf("-g gamma : parameter for the rbf, polynomial or sigmoid " + "kernel\n"); + printf("-h | -help : print this help.\n"); + printf("-k kappa : set the value of kappa used in the Huber hinge\n"); + printf("-l lambda : set the value of lambda (lambda > 0)\n"); + printf("-m model_file : use previous model as seed for W and t\n"); + printf("-o output_file : write output to file\n"); + printf("-p p-value : set the value of p in the lp norm " + "(1.0 <= p <= 2.0)\n"); + printf("-q : quiet mode (no output)\n"); + printf("-r rho : choose the weigth specification (1 = unit, 2 = " + "group)\n"); + printf("-t type: kerneltype (LINEAR=0, POLY=1, RBF=2, SIGMOID=3)\n"); + + exit(0); +} + +/** + * @brief Main interface function for trainGenSVM + * + * @details + * Main interface for the command line program. A given dataset file is read + * and a GenSVM model is trained on this data. By default the progress of the + * computations are written to stdout. See for full options of the program the + * help function. + * + * @param[in] argc number of command line arguments + * @param[in] argv array of command line arguments + * + */ +int main(int argc, char **argv) +{ + char input_filename[MAX_LINE_LENGTH]; + char model_filename[MAX_LINE_LENGTH]; + char output_filename[MAX_LINE_LENGTH]; + + struct GenModel *model = gensvm_init_model(); + struct GenData *data = gensvm_init_data(); + + if (argc < MINARGS || gensvm_check_argv(argc, argv, "-help") + || gensvm_check_argv_eq(argc, argv, "-h") ) + exit_with_help(); + parse_command_line(argc, argv, model, input_filename, + output_filename, model_filename); + + // read data file + gensvm_read_data(data, input_filename); + + // copy dataset parameters to model + model->n = data->n; + model->m = data->m; + model->K = data->K; + model->data_file = input_filename; + + // allocate model + gensvm_allocate_model(model); + + // initialize kernel (if necessary) + gensvm_make_kernel(model, data); + + // reallocate model and initialize weights + gensvm_reallocate_model(model, data->n, data->m); + gensvm_initialize_weights(data, model); + + // seed the random number generator (only place in programs is in + // command line interfaces) + srand(time(NULL)); + + if (gensvm_check_argv_eq(argc, argv, "-m")) { + struct GenModel *seed_model = gensvm_init_model(); + gensvm_read_model(seed_model, model_filename); + gensvm_seed_model_V(seed_model, model, data); + gensvm_free_model(seed_model); + } else { + gensvm_seed_model_V(NULL, model, data); + } + + // start training + gensvm_optimize(model, data); + + // write_model to file + if (gensvm_check_argv_eq(argc, argv, "-o")) { + gensvm_write_model(model, output_filename); + note("Output written to %s\n", output_filename); + } + + // free model and data + gensvm_free_model(model); + gensvm_free_data(data); + + return 0; +} + +/** + * @brief Parse command line arguments + * + * @details + * Process the command line arguments for the model parameters, and record + * them in the specified GenModel. An input filename for the dataset is read + * and if specified an output filename and a model filename for the seed + * model. + * + * @param[in] argc number of command line arguments + * @param[in] argv array of command line arguments + * @param[in] model initialized model + * @param[in] input_filename pre-allocated buffer for the input + * filename + * @param[in] output_filename pre-allocated buffer for the output + * filename + * @param[in] model_filename pre-allocated buffer for the model + * filename + * + */ +void parse_command_line(int argc, char **argv, struct GenModel *model, + char *input_filename, char *output_filename, char *model_filename) +{ + int i; + double gamma = 1.0, + degree = 2.0, + coef = 0.0; + + GENSVM_OUTPUT_FILE = stdout; + + // parse options + for (i=1; i=argc) { + exit_with_help(); + } + switch (argv[i-1][1]) { + case 'c': + coef = atof(argv[i]); + break; + case 'd': + degree = atof(argv[i]); + break; + case 'e': + model->epsilon = atof(argv[i]); + break; + case 'g': + gamma = atof(argv[i]); + break; + case 'k': + model->kappa = atof(argv[i]); + break; + case 'l': + model->lambda = atof(argv[i]); + break; + case 'm': + strcpy(model_filename, argv[i]); + break; + case 'o': + strcpy(output_filename, argv[i]); + break; + case 'p': + model->p = atof(argv[i]); + break; + case 'r': + model->weight_idx = atoi(argv[i]); + break; + case 't': + model->kerneltype = atoi(argv[i]); + break; + case 'q': + GENSVM_OUTPUT_FILE = NULL; + i--; + break; + default: + fprintf(stderr, "Unknown option: -%c\n", + argv[i-1][1]); + exit_with_help(); + } + } + + // read input filename + if (i >= argc) + exit_with_help(); + + strcpy(input_filename, argv[i]); + + // set kernel parameters + switch (model->kerneltype) { + case K_LINEAR: + break; + case K_POLY: + model->kernelparam = Calloc(double, 3); + model->kernelparam[0] = gamma; + model->kernelparam[1] = coef; + model->kernelparam[2] = degree; + break; + case K_RBF: + model->kernelparam = Calloc(double, 1); + model->kernelparam[0] = gamma; + break; + case K_SIGMOID: + model->kernelparam = Calloc(double, 1); + model->kernelparam[0] = gamma; + model->kernelparam[1] = coef; + } +} diff --git a/src/gensvm_pred.c b/src/gensvm_pred.c index f331116..88678d7 100644 --- a/src/gensvm_pred.c +++ b/src/gensvm_pred.c @@ -65,7 +65,7 @@ void gensvm_predict_labels_linear(struct GenData *data, // Generate the simplex-space vectors cblas_dgemm( - CblasRowGenor, + CblasRowMajor, CblasNoTrans, CblasNoTrans, n, @@ -147,7 +147,7 @@ void gensvm_predict_labels_kernel(struct GenData *data_test, } cblas_dgemm( - CblasRowGenor, + CblasRowMajor, CblasNoTrans, CblasNoTrans, n_test, diff --git a/src/gensvm_train.c b/src/gensvm_train.c index 9deac80..4100b8a 100644 --- a/src/gensvm_train.c +++ b/src/gensvm_train.c @@ -384,7 +384,7 @@ void gensvm_get_update(struct GenModel *model, struct GenData *data, double *B, // Note that the use of dsym is faster than dspr, even // though dspr uses less memory. cblas_dsyr( - CblasRowGenor, + CblasRowMajor, CblasUpper, m+1, Avalue, @@ -394,7 +394,7 @@ void gensvm_get_update(struct GenModel *model, struct GenData *data, double *B, m+1); } // Copy upper to lower (necessary because we need to switch - // to Col-Genor order for LAPACK). + // to Col-Major order for LAPACK). /* for (i=0; iK; cblas_dgemm( - CblasRowGenor, + CblasRowMajor, CblasNoTrans, CblasNoTrans, n, @@ -255,8 +255,8 @@ void gensvm_seed_model_V(struct GenModel *from_model, * @brief Use step doubling * * @details - * Step doubling can be used to speed up the Genorization algorithm. Instead - * of using the value at the minimimum of the majorization function, the value + * Step doubling can be used to speed up the maorization algorithm. Instead of + * using the value at the minimimum of the majorization function, the value * ``opposite'' the majorization point is used. This can essentially cut the * number of iterations necessary to reach the minimum in half. * diff --git a/src/predGenSVM.c b/src/predGenSVM.c deleted file mode 100644 index 7fac2ef..0000000 --- a/src/predGenSVM.c +++ /dev/null @@ -1,180 +0,0 @@ -/* - * 20140317: - * THIS FUNCTION IS DEPRECATED, SINCE IT DOES NOT WORK WITH KERNELS. - * - */ - -/** - * @file predGenSVM.c - * @author Gertjan van den Burg - * @date January, 2014 - * @brief Command line interface for predicting class labels - * - * @details - * This is a command line program for predicting the class labels or - * determining the predictive performance of a pre-determined model on a given - * test dataset. The predictive performance can be written to the screen or - * the predicted class labels can be written to a specified output file. This - * is done using gensvm_write_predictions(). - * - * The specified model file must follow the specification given in - * gensvm_write_model(). - * - * For usage information, see the program help function. - * - */ - -#include "gensvm.h" -#include "gensvm_init.h" -#include "gensvm_io.h" -#include "gensvm_pred.h" -#include "util.h" - -#define MINARGS 3 - -extern FILE *GENSVM_OUTPUT_FILE; - -// function declarations -void exit_with_help(); -void parse_command_line(int argc, char **argv, - char *input_filename, char *output_filename, - char *model_filename); - -/** - * @brief Help function - */ -void exit_with_help() -{ - printf("This is GenSVM, version %1.1f\n\n", VERSION); - printf("Usage: predGenSVM [options] test_data_file model_file\n"); - printf("Options:\n"); - printf("-o output_file : write output to file\n"); - printf("-q : quiet mode (no output)\n"); - exit(0); -} - -/** - * @brief Main interface function for predGenSVM - * - * @details - * Main interface for the command line program. A given model file is read and - * a test dataset is initialized from the given data. The predictive - * performance (hitrate) of the model on the test set is printed to the output - * stream (default = stdout). If an output file is specified the predictions - * are written to the file. - * - * @todo - * Ensure that the program can read model files without class labels - * specified. In that case no prediction accuracy is printed to the screen. - * - * @param[in] argc number of command line arguments - * @param[in] argv array of command line arguments - * - */ -int main(int argc, char **argv) -{ - long *predy; - double performance; - - char input_filename[MAX_LINE_LENGTH]; - char model_filename[MAX_LINE_LENGTH]; - char output_filename[MAX_LINE_LENGTH];; - - if (argc < MINARGS || gensvm_check_argv(argc, argv, "-help") - || gensvm_check_argv_eq(argc, argv, "-h") ) - exit_with_help(); - parse_command_line(argc, argv, input_filename, output_filename, - model_filename); - - // read the data and model - struct GenModel *model = gensvm_init_model(); - struct GenData *data = gensvm_init_data(); - gensvm_read_data(data, input_filename); - gensvm_read_model(model, model_filename); - - // check if the number of attributes in data equals that in model - if (data->m != model->m) { - fprintf(stderr, "Error: number of attributes in data (%li) " - "does not equal the number of attributes in " - "model (%li)\n", data->m, model->m); - exit(1); - } else if (data->K != model->K) { - fprintf(stderr, "Error: number of classes in data (%li) " - "does not equal the number of classes in " - "model (%li)\n", data->K, model->K); - exit(1); - } - - // predict labels and performance if test data has labels - predy = Calloc(long, data->n); - gensvm_predict_labels(data, model, predy); - if (data->y != NULL) { - performance = gensvm_prediction_perf(data, predy); - note("Predictive performance: %3.2f%%\n", performance); - } - - // if output file is specified, write predictions to it - if (gensvm_check_argv_eq(argc, argv, "-o")) { - gensvm_write_predictions(data, predy, output_filename); - note("Predictions written to: %s\n", output_filename); - } - - // free the model, data, and predictions - gensvm_free_model(model); - gensvm_free_data(data); - free(predy); - - return 0; -} - -/** - * @brief Parse command line arguments - * - * @details - * Read the data filename and model filename from the command line arguments. - * If specified, also read the output filename. If the quiet flag is given, - * set the global output stream to NULL. On error, exit_with_help(). - * - * @param[in] argc number of command line arguments - * @param[in] argv array of command line arguments - * @param[in] input_filename pre-allocated array for the input - * filename - * @param[in] output_filename pre-allocated array for the output - * filename - * @param[in] model_filename pre-allocated array for the model - * filename - * - */ -void parse_command_line(int argc, char **argv, char *input_filename, - char *output_filename, char *model_filename) -{ - int i; - - GENSVM_OUTPUT_FILE = stdout; - - for (i=1; i= argc) - exit_with_help(); - switch (argv[i-1][1]) { - case 'o': - strcpy(output_filename, argv[i]); - break; - case 'q': - GENSVM_OUTPUT_FILE = NULL; - i--; - break; - default: - fprintf(stderr, "Unknown option: -%c\n", - argv[i-1][1]); - exit_with_help(); - } - } - - if (i >= argc) - exit_with_help(); - - strcpy(input_filename, argv[i]); - i++; - strcpy(model_filename, argv[i]); -} diff --git a/src/trainGenSVM.c b/src/trainGenSVM.c deleted file mode 100644 index eb75f5d..0000000 --- a/src/trainGenSVM.c +++ /dev/null @@ -1,244 +0,0 @@ -/** - * @file trainGenSVM.c - * @author Gertjan van den Burg - * @date August, 2013 - * @brief Command line interface for training a single model with GenSVM - * - * @details - * This is a command line program for training a single model on a given - * dataset. To run a grid search over a number of parameter configurations, - * see trainGenSVMdataset.c. - * - */ - -#include -#include - -#include "gensvm_kernel.h" -#include "libGenSVM.h" -#include "gensvm.h" -#include "gensvm_io.h" -#include "gensvm_init.h" -#include "gensvm_train.h" -#include "util.h" - -#define MINARGS 2 - -extern FILE *GENSVM_OUTPUT_FILE; - -// function declarations -void exit_with_help(); -void parse_command_line(int argc, char **argv, struct GenModel *model, - char *input_filename, char *output_filename, char *model_filename); - -/** - * @brief Help function - */ -void exit_with_help() -{ - printf("This is GenSVM, version %1.1f\n\n", VERSION); - printf("Usage: trainGenSVM [options] training_data_file\n"); - printf("Options:\n"); - printf("-c coef : coefficient for the polynomial and sigmoid kernel\n"); - printf("-d degree : degree for the polynomial kernel\n"); - printf("-e epsilon : set the value of the stopping criterion\n"); - printf("-g gamma : parameter for the rbf, polynomial or sigmoid " - "kernel\n"); - printf("-h | -help : print this help.\n"); - printf("-k kappa : set the value of kappa used in the Huber hinge\n"); - printf("-l lambda : set the value of lambda (lambda > 0)\n"); - printf("-m model_file : use previous model as seed for W and t\n"); - printf("-o output_file : write output to file\n"); - printf("-p p-value : set the value of p in the lp norm " - "(1.0 <= p <= 2.0)\n"); - printf("-q : quiet mode (no output)\n"); - printf("-r rho : choose the weigth specification (1 = unit, 2 = " - "group)\n"); - printf("-t type: kerneltype (LINEAR=0, POLY=1, RBF=2, SIGMOID=3)\n"); - - exit(0); -} - -/** - * @brief Main interface function for trainGenSVM - * - * @details - * Main interface for the command line program. A given dataset file is read - * and a GenSVM model is trained on this data. By default the progress of the - * computations are written to stdout. See for full options of the program the - * help function. - * - * @param[in] argc number of command line arguments - * @param[in] argv array of command line arguments - * - */ -int main(int argc, char **argv) -{ - char input_filename[MAX_LINE_LENGTH]; - char model_filename[MAX_LINE_LENGTH]; - char output_filename[MAX_LINE_LENGTH]; - - struct GenModel *model = gensvm_init_model(); - struct GenData *data = gensvm_init_data(); - - if (argc < MINARGS || gensvm_check_argv(argc, argv, "-help") - || gensvm_check_argv_eq(argc, argv, "-h") ) - exit_with_help(); - parse_command_line(argc, argv, model, input_filename, - output_filename, model_filename); - - // read data file - gensvm_read_data(data, input_filename); - - // copy dataset parameters to model - model->n = data->n; - model->m = data->m; - model->K = data->K; - model->data_file = input_filename; - - // allocate model - gensvm_allocate_model(model); - - // initialize kernel (if necessary) - gensvm_make_kernel(model, data); - - // reallocate model and initialize weights - gensvm_reallocate_model(model, data->n, data->m); - gensvm_initialize_weights(data, model); - - // seed the random number generator (only place in programs is in - // command line interfaces) - srand(time(NULL)); - - if (gensvm_check_argv_eq(argc, argv, "-m")) { - struct GenModel *seed_model = gensvm_init_model(); - gensvm_read_model(seed_model, model_filename); - gensvm_seed_model_V(seed_model, model, data); - gensvm_free_model(seed_model); - } else { - gensvm_seed_model_V(NULL, model, data); - } - - // start training - gensvm_optimize(model, data); - - // write_model to file - if (gensvm_check_argv_eq(argc, argv, "-o")) { - gensvm_write_model(model, output_filename); - note("Output written to %s\n", output_filename); - } - - // free model and data - gensvm_free_model(model); - gensvm_free_data(data); - - return 0; -} - -/** - * @brief Parse command line arguments - * - * @details - * Process the command line arguments for the model parameters, and record - * them in the specified GenModel. An input filename for the dataset is read - * and if specified an output filename and a model filename for the seed - * model. - * - * @param[in] argc number of command line arguments - * @param[in] argv array of command line arguments - * @param[in] model initialized model - * @param[in] input_filename pre-allocated buffer for the input - * filename - * @param[in] output_filename pre-allocated buffer for the output - * filename - * @param[in] model_filename pre-allocated buffer for the model - * filename - * - */ -void parse_command_line(int argc, char **argv, struct GenModel *model, - char *input_filename, char *output_filename, char *model_filename) -{ - int i; - double gamma = 1.0, - degree = 2.0, - coef = 0.0; - - GENSVM_OUTPUT_FILE = stdout; - - // parse options - for (i=1; i=argc) { - exit_with_help(); - } - switch (argv[i-1][1]) { - case 'c': - coef = atof(argv[i]); - break; - case 'd': - degree = atof(argv[i]); - break; - case 'e': - model->epsilon = atof(argv[i]); - break; - case 'g': - gamma = atof(argv[i]); - break; - case 'k': - model->kappa = atof(argv[i]); - break; - case 'l': - model->lambda = atof(argv[i]); - break; - case 'm': - strcpy(model_filename, argv[i]); - break; - case 'o': - strcpy(output_filename, argv[i]); - break; - case 'p': - model->p = atof(argv[i]); - break; - case 'r': - model->weight_idx = atoi(argv[i]); - break; - case 't': - model->kerneltype = atoi(argv[i]); - break; - case 'q': - GENSVM_OUTPUT_FILE = NULL; - i--; - break; - default: - fprintf(stderr, "Unknown option: -%c\n", - argv[i-1][1]); - exit_with_help(); - } - } - - // read input filename - if (i >= argc) - exit_with_help(); - - strcpy(input_filename, argv[i]); - - // set kernel parameters - switch (model->kerneltype) { - case K_LINEAR: - break; - case K_POLY: - model->kernelparam = Calloc(double, 3); - model->kernelparam[0] = gamma; - model->kernelparam[1] = coef; - model->kernelparam[2] = degree; - break; - case K_RBF: - model->kernelparam = Calloc(double, 1); - model->kernelparam[0] = gamma; - break; - case K_SIGMOID: - model->kernelparam = Calloc(double, 1); - model->kernelparam[0] = gamma; - model->kernelparam[1] = coef; - } -} diff --git a/src/trainGenSVMdataset.c b/src/trainGenSVMdataset.c deleted file mode 100644 index 2882c8f..0000000 --- a/src/trainGenSVMdataset.c +++ /dev/null @@ -1,321 +0,0 @@ -/** - * @file trainGenSVMdataset.c - * @author Gertjan van den Burg - * @date January, 2014 - * @brief Command line interface for the grid search program - * - * @details - * This is a command line interface to the parameter grid search functionality - * of the algorithm. The grid search is specified in a separate file, thereby - * reducing the number of command line arguments. See - * read_training_from_file() for documentation on the training file. - * - * The program runs a grid search as specified in the training file. If - * desired the grid search can incorporate consistency checks to find the - * configuration among the best configurations which scores consistently high. - * All output is written to stdout, unless the quiet mode is specified. - * - * For further usage information, see the program help function. - * - */ - -#include - -#include "crossval.h" -#include "gensvm.h" -#include "gensvm_io.h" -#include "gensvm_init.h" -#include "gensvm_pred.h" -#include "gensvm_train.h" -#include "gensvm_train_dataset.h" -#include "strutil.h" -#include "util.h" - -#define MINARGS 2 - -extern FILE *GENSVM_OUTPUT_FILE; - -// function declarations -void exit_with_help(); -void parse_command_line(int argc, char **argv, char *input_filename); -void read_training_from_file(char *input_filename, struct Training *training); - -/** - * @brief Help function - */ -void exit_with_help() -{ - printf("This is GenSVM, version %1.1f\n\n", VERSION); - printf("Usage: trainGenSVMdataset [options] training_file\n"); - printf("Options:\n"); - printf("-h | -help : print this help.\n"); - printf("-q : quiet mode (no output)\n"); - - exit(0); -} - -/** - * @brief Main interface function for trainGenSVMdataset - * - * @details - * Main interface for the command line program. A given training file which - * specifies a grid search over a single dataset is read. From this, a Queue - * is created containing all Task instances that need to be performed in the - * search. Depending on the type of dataset, either cross validation or - * train/test split training is performed for all tasks. If specified, - * consistency repeats are done at the end of the grid search. Note that - * currently no output is produced other than what is written to stdout. - * - * @param[in] argc number of command line arguments - * @param[in] argv array of command line arguments - * - */ -int main(int argc, char **argv) -{ - char input_filename[MAX_LINE_LENGTH]; - - struct Training *training = Malloc(struct Training, 1); - struct GenData *train_data = Malloc(struct GenData, 1); - struct GenData *test_data = Malloc(struct GenData, 1); - - if (argc < MINARGS || gensvm_check_argv(argc, argv, "-help") - || gensvm_check_argv_eq(argc, argv, "-h") ) - exit_with_help(); - parse_command_line(argc, argv, input_filename); - - training->repeats = 0; - note("Reading training file\n"); - read_training_from_file(input_filename, training); - - note("Reading data from %s\n", training->train_data_file); - gensvm_read_data(train_data, training->train_data_file); - if (training->traintype == TT) { - note("Reading data from %s\n", training->test_data_file); - gensvm_read_data(test_data, training->test_data_file); - } - - note("Creating queue\n"); - struct Queue *q = Malloc(struct Queue, 1); - make_queue(training, q, train_data, test_data); - - srand(time(NULL)); - - note("Starting training\n"); - if (training->traintype == TT) - start_training_tt(q); - else - start_training_cv(q); - note("Training finished\n"); - - if (training->repeats > 0) { - consistency_repeats(q, training->repeats, training->traintype); - } - - free_queue(q); - free(training); - gensvm_free_data(train_data); - gensvm_free_data(test_data); - - note("Done.\n"); - return 0; -} - -/** - * @brief Parse command line arguments - * - * @details - * Few arguments can be supplied to the command line. Only quiet mode can be - * specified, or help can be requested. The filename of the training file is - * read from the arguments. Parsing of the training file is done separately in - * read_training_from_file(). - * - * @param[in] argc number of command line arguments - * @param[in] argv array of command line arguments - * @param[in] input_filename pre-allocated buffer for the training - * filename. - * - */ -void parse_command_line(int argc, char **argv, char *input_filename) -{ - int i; - - GENSVM_OUTPUT_FILE = stdout; - - for (i=1; i=argc) - exit_with_help(); - switch (argv[i-1][1]) { - case 'q': - GENSVM_OUTPUT_FILE = NULL; - i--; - break; - default: - fprintf(stderr, "Unknown option: -%c\n", - argv[i-1][1]); - exit_with_help(); - } - } - - if (i >= argc) - exit_with_help(); - - strcpy(input_filename, argv[i]); -} - -KernelType parse_kernel_str(char *kernel_line) -{ - if (str_endswith(kernel_line, "LINEAR\n")) { - return K_LINEAR; - } else if (str_endswith(kernel_line, "POLY\n")) { - return K_POLY; - } else if (str_endswith(kernel_line, "RBF\n")) { - return K_RBF; - } else if (str_endswith(kernel_line, "SIGMOID\n")) { - return K_SIGMOID; - } else { - fprintf(stderr, "Unknown kernel specified on line: %s\n", - kernel_line); - exit(1); - } -} - -/** - * @brief Read the Training struct from file - * - * @details - * Read the Training struct from a file. The training file follows a specific - * format specified in @ref spec_training_file. - * - * Commonly used string functions in this function are all_doubles_str() and - * all_longs_str(). - * - * @param[in] input_filename filename of the training file - * @param[in] training Training structure to place the parsed - * parameter grid. - * - */ -void read_training_from_file(char *input_filename, struct Training *training) -{ - long i, nr = 0; - FILE *fid; - char buffer[MAX_LINE_LENGTH]; - char train_filename[MAX_LINE_LENGTH]; - char test_filename[MAX_LINE_LENGTH]; - double *params = Calloc(double, MAX_LINE_LENGTH); - long *lparams = Calloc(long, MAX_LINE_LENGTH); - - fid = fopen(input_filename, "r"); - if (fid == NULL) { - fprintf(stderr, "Error opening training file %s\n", - input_filename); - exit(1); - } - training->traintype = CV; - while ( fgets(buffer, MAX_LINE_LENGTH, fid) != NULL ) { - Memset(params, double, MAX_LINE_LENGTH); - Memset(lparams, long, MAX_LINE_LENGTH); - if (str_startswith(buffer, "train:")) { - sscanf(buffer, "train: %s\n", train_filename); - training->train_data_file = Calloc(char, - MAX_LINE_LENGTH); - strcpy(training->train_data_file, train_filename); - } else if (str_startswith(buffer, "test:")) { - sscanf(buffer, "test: %s\n", test_filename); - training->test_data_file = Calloc(char, - MAX_LINE_LENGTH); - strcpy(training->test_data_file, test_filename); - training->traintype = TT; - } else if (str_startswith(buffer, "p:")) { - nr = all_doubles_str(buffer, 2, params); - training->ps = Calloc(double, nr); - for (i=0; ips[i] = params[i]; - training->Np = nr; - } else if (str_startswith(buffer, "lambda:")) { - nr = all_doubles_str(buffer, 7, params); - training->lambdas = Calloc(double, nr); - for (i=0; ilambdas[i] = params[i]; - training->Nl = nr; - } else if (str_startswith(buffer, "kappa:")) { - nr = all_doubles_str(buffer, 6, params); - training->kappas = Calloc(double, nr); - for (i=0; ikappas[i] = params[i]; - training->Nk = nr; - } else if (str_startswith(buffer, "epsilon:")) { - nr = all_doubles_str(buffer, 8, params); - training->epsilons = Calloc(double, nr); - for (i=0; iepsilons[i] = params[i]; - training->Ne = nr; - } else if (str_startswith(buffer, "weight:")) { - nr = all_longs_str(buffer, 7, lparams); - training->weight_idxs = Calloc(int, nr); - for (i=0; iweight_idxs[i] = lparams[i]; - training->Nw = nr; - } else if (str_startswith(buffer, "folds:")) { - nr = all_longs_str(buffer, 6, lparams); - training->folds = lparams[0]; - if (nr > 1) - fprintf(stderr, "Field \"folds\" only takes " - "one value. Additional " - "fields are ignored.\n"); - } else if (str_startswith(buffer, "repeats:")) { - nr = all_longs_str(buffer, 8, lparams); - training->repeats = lparams[0]; - if (nr > 1) - fprintf(stderr, "Field \"repeats\" only " - "takes one value. Additional " - "fields are ignored.\n"); - } else if (str_startswith(buffer, "kernel:")) { - training->kerneltype = parse_kernel_str(buffer); - } else if (str_startswith(buffer, "gamma:")) { - nr = all_doubles_str(buffer, 6, params); - if (training->kerneltype == K_LINEAR) { - fprintf(stderr, "Field \"gamma\" ignored, " - "linear kernel is used.\n"); - training->Ng = 0; - break; - } - training->gammas = Calloc(double, nr); - for (i=0; igammas[i] = params[i]; - training->Ng = nr; - } else if (str_startswith(buffer, "coef:")) { - nr = all_doubles_str(buffer, 5, params); - if (training->kerneltype == K_LINEAR || - training->kerneltype == K_RBF) { - fprintf(stderr, "Field \"coef\" ignored with " - "specified kernel.\n"); - training->Nc = 0; - break; - } - training->coefs = Calloc(double, nr); - for (i=0; icoefs[i] = params[i]; - training->Nc = nr; - } else if (str_startswith(buffer, "degree:")) { - nr = all_doubles_str(buffer, 7, params); - if (training->kerneltype != K_POLY) { - fprintf(stderr, "Field \"degree\" ignored " - "with specified kernel.\n"); - training->Nd = 0; - break; - } - training->degrees = Calloc(double, nr); - for (i=0; idegrees[i] = params[i]; - training->Nd = nr; - } else { - fprintf(stderr, "Cannot find any parameters on line: " - "%s\n", buffer); - } - } - - free(params); - free(lparams); - fclose(fid); -} -- cgit v1.2.3