aboutsummaryrefslogtreecommitdiff
path: root/src/util.c
diff options
context:
space:
mode:
Diffstat (limited to 'src/util.c')
-rw-r--r--src/util.c224
1 files changed, 200 insertions, 24 deletions
diff --git a/src/util.c b/src/util.c
index 101cb00..8e4b806 100644
--- a/src/util.c
+++ b/src/util.c
@@ -1,19 +1,55 @@
+/**
+ * @file util.c
+ * @author Gertjan van den Burg
+ * @date January, 2014
+ * @brief Utility functions
+ *
+ * @details
+ * This file contains several utility functions for coordinating input and
+ * output of data and model files. It also contains string functions.
+ *
+ * @todo
+ * Pull this apart.
+ *
+ */
#include <math.h>
#include <stdarg.h>
#include <time.h>
-#include "matrix.h"
-#include "MSVMMaj.h"
+#include "msvmmaj.h"
+#include "msvmmaj_matrix.h"
#include "strutil.h"
#include "util.h"
-FILE *MSVMMAJ_OUTPUT_FILE;
-
-/*
- Read the data from the data_file. The data matrix X is augmented
- with a column of ones, to get the matrix Z.
-*/
+FILE *MSVMMAJ_OUTPUT_FILE; ///< The #MSVMMAJ_OUTPUT_FILE specifies the
+ ///< output stream to which all output is
+ ///< written. This is done through the
+ ///< internal (!)
+ ///< function msvmmaj_print_string(). The
+ ///< advantage of using a global output
+ ///< stream variable is that the output can
+ ///< temporarily be suppressed by importing
+ ///< this variable through @c extern and
+ ///< (temporarily) setting it to NULL.
+
+/**
+ * @brief Read data from file
+ *
+ * @details
+ * Read the data from the data_file. The data matrix X is augmented
+ * with a column of ones, to get the matrix Z. The data is expected
+ * to follow a specific format, which is specified in the @ref spec_data_file.
+ * The class labels are corrected internally to correspond to the interval
+ * [1 .. K], where K is the total number of classes.
+ *
+ * @todo
+ * Make sure that this function allows datasets without class labels for
+ * testing.
+ *
+ * @param[in,out] dataset initialized MajData struct
+ * @param[in] data_file filename of the data file.
+ */
void msvmmaj_read_data(struct MajData *dataset, char *data_file)
{
FILE *fid;
@@ -22,7 +58,7 @@ void msvmmaj_read_data(struct MajData *dataset, char *data_file)
long nr = 0; // used to check consistency of data
double value;
long K = 0;
- long min_y = 1000;
+ long min_y = 1000000;
char buf[MAX_LINE_LENGTH];
@@ -79,13 +115,15 @@ void msvmmaj_read_data(struct MajData *dataset, char *data_file)
dataset->y[i]++;
K++;
} else if (min_y < 0 ) {
- fprintf(stderr, "ERROR: wrong class labels in %s, minimum value is: %ld\n",
+ fprintf(stderr, "ERROR: wrong class labels in %s, minimum "
+ "value is: %ld\n",
data_file, min_y);
exit(0);
}
if (nr < n * m) {
- fprintf(stderr, "ERROR: not enough data found in %s\n", data_file);
+ fprintf(stderr, "ERROR: not enough data found in %s\n",
+ data_file);
exit(0);
}
@@ -98,6 +136,19 @@ void msvmmaj_read_data(struct MajData *dataset, char *data_file)
dataset->K = K;
}
+/**
+ * @brief Read model from file
+ *
+ * @details
+ * Read a MajModel from a model file. The MajModel struct must have been
+ * initalized elswhere. The model file is expected to follow the @ref
+ * spec_model_file. The easiest way to generate a model file is through
+ * msvmmaj_write_model(), which can for instance be used in trainMSVMMaj.c.
+ *
+ * @param[in,out] model initialized MajModel
+ * @param[in] model_filename filename of the model file
+ *
+ */
void msvmmaj_read_model(struct MajModel *model, char *model_filename)
{
long i, j, nr = 0;
@@ -108,7 +159,8 @@ void msvmmaj_read_model(struct MajModel *model, char *model_filename)
fid = fopen(model_filename, "r");
if (fid == NULL) {
- fprintf(stderr, "Error opening model file %s\n", model_filename);
+ fprintf(stderr, "Error opening model file %s\n",
+ model_filename);
exit(1);
}
// skip the first four lines
@@ -120,7 +172,8 @@ void msvmmaj_read_model(struct MajModel *model, char *model_filename)
model->lambda = get_fmt_double(fid, model_filename, "lambda = %lf");
model->kappa = get_fmt_double(fid, model_filename, "kappa = %lf");
model->epsilon = get_fmt_double(fid, model_filename, "epsilon = %lf");
- model->weight_idx = (int) get_fmt_long(fid, model_filename, "weight_idx = %li");
+ model->weight_idx = (int) get_fmt_long(fid, model_filename,
+ "weight_idx = %li");
// skip to data section
for (i=0; i<2; i++)
@@ -128,7 +181,8 @@ void msvmmaj_read_model(struct MajModel *model, char *model_filename)
// read filename of data file
if (fgets(buffer, MAX_LINE_LENGTH, fid) == NULL) {
- fprintf(stderr, "Error reading model file %s\n", model_filename);
+ fprintf(stderr, "Error reading model file %s\n",
+ model_filename);
exit(1);
}
sscanf(buffer, "filename = %s\n", data_filename);
@@ -153,12 +207,25 @@ void msvmmaj_read_model(struct MajModel *model, char *model_filename)
}
if (nr != (model->m+1)*(model->K-1)) {
fprintf(stderr, "Error reading model file %s. "
- "Not enough elements of V found.\n", model_filename);
+ "Not enough elements of V found.\n",
+ model_filename);
exit(1);
}
-
}
+/**
+ * @brief Write model to file
+ *
+ * @details
+ * Write a MajModel to a file. The current time is specified in the file in
+ * UTC + offset. The model file further corresponds to the @ref
+ * spec_model_file.
+ *
+ * @param[in] model MajModel which contains an estimate for
+ * MajModel::V
+ * @param[in] output_filename the output file to write the model to
+ *
+ */
void msvmmaj_write_model(struct MajModel *model, char *output_filename)
{
FILE *fid;
@@ -171,7 +238,8 @@ void msvmmaj_write_model(struct MajModel *model, char *output_filename)
// open output file
fid = fopen(output_filename, "w");
if (fid == NULL) {
- fprintf(stderr, "Error opening output file %s", output_filename);
+ fprintf(stderr, "Error opening output file %s",
+ output_filename);
exit(1);
}
@@ -201,7 +269,8 @@ void msvmmaj_write_model(struct MajModel *model, char *output_filename)
// Write output to file
fprintf(fid, "Output file for MSVMMaj (version %1.1f)\n", VERSION);
- fprintf(fid, "Generated on: %s (UTC %+03i:%02i)\n\n", timestr, hours, minutes);
+ fprintf(fid, "Generated on: %s (UTC %+03i:%02i)\n\n",
+ timestr, hours, minutes);
fprintf(fid, "Model:\n");
fprintf(fid, "p = %15.16f\n", model->p);
fprintf(fid, "lambda = %15.16f\n", model->lambda);
@@ -218,35 +287,71 @@ void msvmmaj_write_model(struct MajModel *model, char *output_filename)
fprintf(fid, "Output:\n");
for (i=0; i<model->m+1; i++) {
for (j=0; j<model->K-1; j++) {
- fprintf(fid, "%+15.16f ", matrix_get(model->V, model->K-1, i, j));
+ fprintf(fid, "%+15.16f ",
+ matrix_get(model->V,
+ model->K-1, i, j));
}
fprintf(fid, "\n");
}
fclose(fid);
-
}
-void msvmmaj_write_predictions(struct MajData *data, long *predy, char *output_filename)
+/**
+ * @brief Write predictions to file
+ *
+ * @details
+ * Write the given predictions to an output file, such that the resulting file
+ * corresponds to the @ref spec_data_file.
+ *
+ * @param[in] data MajData with the original instances
+ * @param[in] predy predictions of the class labels of the
+ * instances in the given MajData. Note that the
+ * order of the instances is assumed to be the
+ * same.
+ * @param[in] output_filename the file to which the predictions are written
+ *
+ */
+void msvmmaj_write_predictions(struct MajData *data, long *predy,
+ char *output_filename)
{
long i, j;
FILE *fid;
fid = fopen(output_filename, "w");
if (fid == NULL) {
- fprintf(stderr, "Error opening output file %s", output_filename);
+ fprintf(stderr, "Error opening output file %s",
+ output_filename);
exit(1);
}
for (i=0; i<data->n; i++) {
for (j=0; j<data->m; j++)
- fprintf(fid, "%f ", matrix_get(data->Z, data->m+1, i, j+1));
+ fprintf(fid, "%f ",
+ matrix_get(data->Z,
+ data->m+1, i, j+1));
fprintf(fid, "%li\n", predy[i]);
}
fclose(fid);
}
+/**
+ * @brief Check if any command line arguments contain string
+ *
+ * @details
+ * Check if any of a given array of command line arguments contains a given
+ * string. If the string is found, the index of the string in argv is
+ * returned. If the string is not found, 0 is returned.
+ *
+ * This function is copied from MSVMpack/libMSVM.c.
+ *
+ * @param[in] argc number of command line arguments
+ * @param[in] argv command line arguments
+ * @param[in] str string to find in the arguments
+ * @returns index of the string in the arguments if found, 0
+ * otherwise
+ */
int msvmmaj_check_argv(int argc, char **argv, char *str)
{
int i;
@@ -260,6 +365,22 @@ int msvmmaj_check_argv(int argc, char **argv, char *str)
return arg_str;
}
+/**
+ * @brief Check if a command line argument equals a string
+ *
+ * @details
+ * Check if any of the command line arguments is exactly equal to a given
+ * string. If so, return the index of the corresponding command line argument.
+ * If not, return 0.
+ *
+ * This function is copied from MSVMpack/libMSVM.c
+ *
+ * @param[in] argc number of command line arguments
+ * @param[in] argv command line arguments
+ * @param[in] str string to find in the arguments
+ * @returns index of the command line argument that corresponds to
+ * the string, 0 if none matches.
+ */
int msvmmaj_check_argv_eq(int argc, char **argv, char *str)
{
int i;
@@ -274,6 +395,19 @@ int msvmmaj_check_argv_eq(int argc, char **argv, char *str)
}
+/**
+ * @brief Print a given string to the specified output stream
+ *
+ * @details
+ * This function is used to print a given string to the output stream
+ * specified by #MSVMMAJ_OUTPUT_FILE. The stream is flushed after the string
+ * is written to the stream. If #MSVMMAJ_OUTPUT_FILE is NULL, nothing is
+ * written. Note that this function is only used by note(), it should never be
+ * used directly.
+ *
+ * @param[in] s string to write to the stream
+ *
+ */
static void msvmmaj_print_string(const char *s)
{
if (MSVMMAJ_OUTPUT_FILE != NULL) {
@@ -282,6 +416,19 @@ static void msvmmaj_print_string(const char *s)
}
}
+/**
+ * @brief Parse a formatted string and write to the output stream
+ *
+ * @details
+ * This function is a replacement of fprintf(), such that the output stream
+ * does not have to be specified at each function call. The functionality is
+ * exactly the same however. Writing the formatted string to the output stream
+ * is handled by msvmmaj_print_string().
+ *
+ * @param[in] fmt String format
+ * @param[in] ... variable argument list for the string format
+ *
+ */
void note(const char *fmt,...)
{
char buf[BUFSIZ];
@@ -292,6 +439,16 @@ void note(const char *fmt,...)
(*msvmmaj_print_string)(buf);
}
+/**
+ * @brief Allocate memory for a MajModel
+ *
+ * @details
+ * This function can be used to allocate the memory needed for a MajModel. All
+ * arrays in the model are specified and initialized to 0.
+ *
+ * @param[in] model MajModel to allocate
+ *
+ */
void msvmmaj_allocate_model(struct MajModel *model)
{
long n = model->n;
@@ -360,6 +517,16 @@ void msvmmaj_allocate_model(struct MajModel *model)
}
+/**
+ * @brief Free allocated MajModel struct
+ *
+ * @details
+ * Simply free a previously allocated MajModel by freeing all its component
+ * arrays. Note that the model struct itself is also freed here.
+ *
+ * @param[in] model MajModel to free
+ *
+ */
void msvmmaj_free_model(struct MajModel *model)
{
free(model->W);
@@ -376,10 +543,19 @@ void msvmmaj_free_model(struct MajModel *model)
free(model);
}
+/**
+ * @brief Free allocated MajData struct
+ *
+ * @details
+ * Simply free a previously allocated MajData struct by freeing all its
+ * components. Note that the data struct itself is also freed here.
+ *
+ * @param[in] data MajData struct to free
+ *
+ */
void msvmmaj_free_data(struct MajData *data)
{
free(data->Z);
free(data->y);
free(data);
}
-