aboutsummaryrefslogtreecommitdiff
path: root/src/trainMSVMMajdataset.c
diff options
context:
space:
mode:
Diffstat (limited to 'src/trainMSVMMajdataset.c')
-rw-r--r--src/trainMSVMMajdataset.c155
1 files changed, 145 insertions, 10 deletions
diff --git a/src/trainMSVMMajdataset.c b/src/trainMSVMMajdataset.c
index 7c3385c..097df85 100644
--- a/src/trainMSVMMajdataset.c
+++ b/src/trainMSVMMajdataset.c
@@ -1,7 +1,28 @@
+/**
+ * @file trainMSVMMajdataset.c
+ * @author Gertjan van den Burg
+ * @date January, 2014
+ * @brief Command line interface for the grid search program
+ *
+ * @details
+ * This is a command line interface to the parameter grid search functionality
+ * of the algorithm. The grid search is specified in a separate file, thereby
+ * reducing the number of command line arguments. See
+ * read_training_from_file() for documentation on the training file.
+ *
+ * The program runs a grid search as specified in the training file. If
+ * desired the grid search can incorporate consistency checks to find the
+ * configuration among the best configurations which scores consistently high.
+ * All output is written to stdout, unless the quiet mode is specified.
+ *
+ * For further usage information, see the program help function.
+ *
+ */
+
#include <time.h>
#include "crossval.h"
-#include "MSVMMaj.h"
+#include "msvmmaj.h"
#include "msvmmaj_pred.h"
#include "msvmmaj_train.h"
#include "msvmmaj_train_dataset.h"
@@ -12,11 +33,15 @@
extern FILE *MSVMMAJ_OUTPUT_FILE;
+// function declarations
void print_null(const char *s) {}
void exit_with_help();
void parse_command_line(int argc, char **argv, char *input_filename);
void read_training_from_file(char *input_filename, struct Training *training);
+/**
+ * @brief Help function
+ */
void exit_with_help()
{
printf("This is MSVMMaj, version %1.1f\n\n", VERSION);
@@ -28,6 +53,22 @@ void exit_with_help()
exit(0);
}
+/**
+ * @brief Main interface function for trainMSVMMajdataset
+ *
+ * @details
+ * Main interface for the command line program. A given training file which
+ * specifies a grid search over a single dataset is read. From this, a Queue
+ * is created containing all Task instances that need to be performed in the
+ * search. Depending on the type of dataset, either cross validation or
+ * train/test split training is performed for all tasks. If specified,
+ * consistency repeats are done at the end of the grid search. Note that
+ * currently no output is produced other than what is written to stdout.
+ *
+ * @param[in] argc number of command line arguments
+ * @param[in] argv array of command line arguments
+ *
+ */
int main(int argc, char **argv)
{
char input_filename[MAX_LINE_LENGTH];
@@ -78,6 +119,21 @@ int main(int argc, char **argv)
return 0;
}
+/**
+ * @brief Parse command line arguments
+ *
+ * @details
+ * Few arguments can be supplied to the command line. Only quiet mode can be
+ * specified, or help can be requested. The filename of the training file is
+ * read from the arguments. Parsing of the training file is done separately in
+ * read_training_from_file().
+ *
+ * @param[in] argc number of command line arguments
+ * @param[in] argv array of command line arguments
+ * @param[in] input_filename pre-allocated buffer for the training
+ * filename.
+ *
+ */
void parse_command_line(int argc, char **argv, char *input_filename)
{
int i;
@@ -94,7 +150,8 @@ void parse_command_line(int argc, char **argv, char *input_filename)
i--;
break;
default:
- fprintf(stderr, "Unknown option: -%c\n", argv[i-1][1]);
+ fprintf(stderr, "Unknown option: -%c\n",
+ argv[i-1][1]);
exit_with_help();
}
}
@@ -105,6 +162,21 @@ void parse_command_line(int argc, char **argv, char *input_filename)
strcpy(input_filename, argv[i]);
}
+/**
+ * @brief Read the Training struct from file
+ *
+ * @details
+ * Read the Training struct from a file. The training file follows a specific
+ * format specified in @ref spec_training_file.
+ *
+ * Commonly used string functions in this function are all_doubles_str() and
+ * all_longs_str().
+ *
+ * @param[in] input_filename filename of the training file
+ * @param[in] training Training structure to place the parsed
+ * parameter grid.
+ *
+ */
void read_training_from_file(char *input_filename, struct Training *training)
{
long i, nr = 0;
@@ -117,7 +189,8 @@ void read_training_from_file(char *input_filename, struct Training *training)
fid = fopen(input_filename, "r");
if (fid == NULL) {
- fprintf(stderr, "Error opening training file %s\n", input_filename);
+ fprintf(stderr, "Error opening training file %s\n",
+ input_filename);
exit(1);
}
training->traintype = CV;
@@ -126,11 +199,13 @@ void read_training_from_file(char *input_filename, struct Training *training)
Memset(lparams, long, MAX_LINE_LENGTH);
if (str_startswith(buffer, "train:")) {
sscanf(buffer, "train: %s\n", train_filename);
- training->train_data_file = Calloc(char, MAX_LINE_LENGTH);
+ training->train_data_file = Calloc(char,
+ MAX_LINE_LENGTH);
strcpy(training->train_data_file, train_filename);
} else if (str_startswith(buffer, "test:")) {
sscanf(buffer, "test: %s\n", test_filename);
- training->test_data_file = Calloc(char, MAX_LINE_LENGTH);
+ training->test_data_file = Calloc(char,
+ MAX_LINE_LENGTH);
strcpy(training->test_data_file, test_filename);
training->traintype = TT;
} else if (str_startswith(buffer, "p:")) {
@@ -167,16 +242,76 @@ void read_training_from_file(char *input_filename, struct Training *training)
nr = all_longs_str(buffer, 6, lparams);
training->folds = lparams[0];
if (nr > 1)
- fprintf(stderr, "Field \"folds\" only takes one value. "
- "Additional fields are ignored.\n");
+ fprintf(stderr, "Field \"folds\" only takes "
+ "one value. Additional "
+ "fields are ignored.\n");
} else if (str_startswith(buffer, "repeats:")) {
nr = all_longs_str(buffer, 8, lparams);
training->repeats = lparams[0];
if (nr > 1)
- fprintf(stderr, "Field \"repeats\" only takes one value. "
- "Additional fields are ignored.\n");
+ fprintf(stderr, "Field \"repeats\" only "
+ "takes one value. Additional "
+ "fields are ignored.\n");
+ } else if (str_startswith(buffer, "kernel:")) {
+ nr = all_longs_str(buffer, 7, lparams);
+ if (nr > 1)
+ fprintf(stderr, "Field \"kernel\" only takes "
+ "one value. Additional "
+ "fields are ignored.\n");
+ switch (lparams[0]) {
+ case 0:
+ training->kerneltype = K_LINEAR;
+ break;
+ case 1:
+ training->kerneltype = K_POLY;
+ break;
+ case 2:
+ training->kerneltype = K_RBF;
+ break;
+ case 3:
+ training->kerneltype = K_SIGMOID;
+ break;
+ }
+ } else if (str_startswith(buffer, "gamma:")) {
+ nr = all_doubles_str(buffer, 6, params);
+ if (training->kerneltype == K_LINEAR) {
+ fprintf(stderr, "Field \"gamma\" ignored, "
+ "linear kernel is used.\n");
+ training->Ng = 0;
+ break;
+ }
+ training->gammas = Calloc(double, nr);
+ for (i=0; i<nr; i++)
+ training->gammas[i] = params[i];
+ training->Ng = nr;
+ } else if (str_startswith(buffer, "coef:")) {
+ nr = all_doubles_str(buffer, 5, params);
+ if (training->kerneltype == K_LINEAR ||
+ training->kerneltype == K_RBF) {
+ fprintf(stderr, "Field \"coef\" ignored with"
+ "specified kernel.\n");
+ training->Nc = 0;
+ break;
+ }
+ training->coefs = Calloc(double, nr);
+ for (i=0; i<nr; i++)
+ training->coefs[i] = params[i];
+ training->Nc = nr;
+ } else if (str_startswith(buffer, "degree:")) {
+ nr = all_doubles_str(buffer, 7, params);
+ if (training->kerneltype != K_POLY) {
+ fprintf(stderr, "Field \"degree\" ignored "
+ "with specified kernel.\n");
+ training->Nd = 0;
+ break;
+ }
+ training->degrees = Calloc(double, nr);
+ for (i=0; i<nr; i++)
+ training->degrees[i] = params[i];
+ training->Nd = nr;
} else {
- fprintf(stderr, "Cannot find any parameters on line: %s\n", buffer);
+ fprintf(stderr, "Cannot find any parameters on line: "
+ "%s\n", buffer);
}
}