/**
* @file gensvm_init.c
* @author G.J.J. van den Burg
* @date 2014-01-07
* @brief Functions for initializing model and data structures
* @details
*
* This file contains functions for initializing a GenModel instance
* and a GenData instance. In addition, default values for these
* structures are defined here (and only here). Functions for allocating
* memory for the model structure and freeing of the model and data structures
* are also included.
*
* @copyright
Copyright 2016, G.J.J. van den Burg.
This file is part of GenSVM.
GenSVM is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
GenSVM is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with GenSVM. If not, see .
*/
#include "gensvm_init.h"
#include "gensvm_print.h"
/**
* @brief Seed the matrix V from an existing model or using rand
*
* @details
* The matrix V must be seeded before the main_loop() can start.
* This can be done by either seeding it with random numbers or
* using the solution from a previous model on the same dataset
* as initial seed. The latter option usually allows for a
* significant improvement in the number of iterations necessary
* because the seeded model V is closer to the optimal V.
*
* When no seed model is supplied, the rows of V are seeded with random
* numbers between the inverse of the minimum and the inverse of the maximum
* of the corresponding column of Z. This is done to center the product of the
* two in the simplex space.
*
* @param[in] from_model GenModel from which to copy V
* @param[in,out] to_model GenModel to which V will be copied
* @param[in] data GenData structure with the data
*/
void gensvm_init_V(struct GenModel *from_model,
struct GenModel *to_model, struct GenData *data)
{
long i, j, k, jj_start, jj_end, jj;
double cmin, cmax, value, rnd;
double *col_min = NULL,
*col_max = NULL;
// if no model is supplied, or the dimensions of the supplied model
// don't match, then we use random initialization.
if (from_model == NULL || from_model->m != to_model->m ||
from_model->K != to_model->K) {
col_min = Calloc(double, to_model->m+1);
col_max = Calloc(double, to_model->m+1);
for (j=0; jm+1; j++) {
col_min[j] = 1.0e100;
col_max[j] = -1.0e100;
}
if (data->Z == NULL) {
// sparse matrix
long *visit_count = Calloc(long, to_model->m+1);
for (i=0; ispZ->n_row; i++) {
jj_start = data->spZ->ia[i];
jj_end = data->spZ->ia[i+1];
for (jj=jj_start; jjspZ->ja[jj];
value = data->spZ->values[jj];
col_min[j] = minimum(col_min[j], value);
col_max[j] = maximum(col_max[j], value);
visit_count[j]++;
}
}
// correction in case the minimum or maximum is 0
for (j=0; jm+1; j++) {
if (visit_count[j] < data->spZ->n_row) {
col_min[j] = minimum(col_min[j], 0.0);
col_max[j] = maximum(col_max[j], 0.0);
}
}
free(visit_count);
} else {
// dense matrix
for (i=0; in; i++) {
for (j=0; jm+1; j++) {
value = matrix_get(data->Z,
to_model->m+1, i, j);
col_min[j] = minimum(col_min[j], value);
col_max[j] = maximum(col_max[j], value);
}
}
}
for (j=0; jm+1; j++) {
cmin = (fabs(col_min[j]) < 1e-10) ? -1 : col_min[j];
cmax = (fabs(col_max[j]) < 1e-10) ? 1 : col_max[j];
for (k=0; kK-1; k++) {
rnd = ((double) rand()) / ((double) RAND_MAX);
value = 1.0/cmin + (1.0/cmax - 1.0/cmin)*rnd;
matrix_set(to_model->V, to_model->K-1, j, k, value);
}
}
free(col_min);
free(col_max);
} else {
for (i=0; im+1; i++) {
for (j=0; jK-1; j++) {
value = matrix_get(from_model->V, from_model->K-1, i, j);
matrix_set(to_model->V, to_model->K-1, i, j, value);
}
}
}
}
/**
* @brief Initialize instance weights
*
* @details
* Instance weights can for instance be used to add additional weights to
* instances of certain classes. Two default weight possibilities are
* implemented here. The first is unit weights, where each instance gets
* weight 1.
*
* The second are group size correction weights, which are calculated as
* @f[
* \rho_i = \frac{n}{Kn_k} ,
* @f]
* where @f$ n_k @f$ is the number of instances in group @f$ k @f$ and
* @f$ y_i = k @f$.
*
* @param[in] data GenData with the dataset
* @param[in,out] model GenModel with the weight specification. On
* exit GenModel::rho contains the instance
* weights.
*/
void gensvm_initialize_weights(struct GenData *data, struct GenModel *model)
{
long *groups = NULL;
long i;
long n = model->n;
long K = model->K;
if (model->weight_idx == 1) {
for (i=0; irho[i] = 1.0;
}
else if (model->weight_idx == 2) {
groups = Calloc(long, K);
for (i=0; iy[i]-1]++;
for (i=0; irho[i] = ((double) n)/((double) (
groups[data->y[i]-1]*K));
} else {
// LCOV_EXCL_START
err("[GenSVM Error]: Unknown weight specification.\n");
exit(EXIT_FAILURE);
// LCOV_EXCL_STOP
}
free(groups);
}