add sparse matrices to GenSVM and reorganize update functionality

author: Gertjan van den Burg <burg@ese.eur.nl> 2016-10-14 18:35:38 +0200
committer: Gertjan van den Burg <burg@ese.eur.nl> 2016-10-14 18:35:38 +0200
commit: e34123e1055c26d740148cefdb8d1b90208e424e (patch)
tree: 51c62b010f4beddaa5cd8259fd420a433a8fd1b1 /src
parent: documentation fixes (diff)
download: gensvm-e34123e1055c26d740148cefdb8d1b90208e424e.tar.gz
gensvm-e34123e1055c26d740148cefdb8d1b90208e424e.zip
7 files changed, 877 insertions, 499 deletions
diff --git a/src/GenSVMtraintest.c b/src/GenSVMtraintest.c
index f70fa97..cc0284e 100644
--- a/src/GenSVMtraintest.c
+++ b/src/GenSVMtraintest.c
@@ -83,7 +83,10 @@ int main(int argc, char **argv)
 	strcpy(model->data_file, training_inputfile);
 
 	// seed the random number generator
-	srand(time(NULL));
+	//srand(time(NULL));
+	//
+	// XXX temporary
+	srand(2135);
 
 	// load a seed model from file if it is specified
 	if (gensvm_check_argv_eq(argc, argv, "-s")) {
diff --git a/src/gensvm_base.c b/src/gensvm_base.c
index cef0a3c..e4fc20a 100644
--- a/src/gensvm_base.c
+++ b/src/gensvm_base.c
@@ -30,6 +30,7 @@ struct GenData *gensvm_init_data()
 	data->Sigma = NULL;
 	data->y = NULL;
 	data->Z = NULL;
+	data->spZ = NULL;
 	data->RAW = NULL;
 
 	// set default values
@@ -54,6 +55,9 @@ void gensvm_free_data(struct GenData *data)
 	if (data == NULL)
 		return;
 
+	if (data->spZ != NULL)
+		gensvm_free_sparse(data->spZ);
+
 	if (data->Z == data->RAW) {
 		free(data->Z);
 	} else {
diff --git a/src/gensvm_init.c b/src/gensvm_init.c
index aa96ca6..e0f44c1 100644
--- a/src/gensvm_init.c
+++ b/src/gensvm_init.c
@@ -33,30 +33,67 @@
 void gensvm_init_V(struct GenModel *from_model,
 	       	struct GenModel *to_model, struct GenData *data)
 {
-	long i, j, k;
+	long i, j, k, jj_start, jj_end, jj;
 	double cmin, cmax, value, rnd;
+	double *col_min = NULL,
+	       *col_max = NULL;
 
 	long n = data->n;
 	long m = data->m;
 	long K = data->K;
 
 	if (from_model == NULL) {
-		for (i=0; i<m+1; i++) {
-			cmin = 1e100;
-			cmax = -1e100;
-			for (k=0; k<n; k++) {
-				value = matrix_get(data->Z, m+1, k, i);
-				cmin = minimum(cmin, value);
-				cmax = maximum(cmax, value);
+		col_min = Calloc(double, m+1);
+		col_max = Calloc(double, m+1);
+		for (j=0; j<m+1; j++) {
+			col_min[j] = 1.0e100;
+			col_max[j] = -1.0e100;
+		}
+
+		if (data->Z == NULL) {
+			// sparse matrix
+			int *visit_count = Calloc(int, m+1);
+			for (i=0; i<n; i++) {
+				jj_start = data->spZ->ia[i];
+				jj_end = data->spZ->ia[i+1];
+				for (jj=jj_start; jj<jj_end; jj++) {
+					j = data->spZ->ja[jj];
+					value = data->spZ->values[jj];
+
+					col_min[j] = minimum(col_min[j], value);
+					col_max[j] = maximum(col_max[j], value);
+					visit_count[j]++;
+				}
 			}
-			for (j=0; j<K-1; j++) {
-				cmin = (abs(cmin) < 1e-10) ? -1 : cmin;
-				cmax = (abs(cmax) < 1e-10) ? 1 : cmax;
-				rnd = ((double) rand()) / RAND_MAX;
+			// correction in case the minimum or maximum is 0
+			for (j=0; j<m+1; j++) {
+				if (visit_count[j] < n) {
+					col_min[j] = minimum(col_min[j], 0.0);
+					col_max[j] = maximum(col_max[j], 0.0);
+				}
+			}
+			free(visit_count);
+		} else {
+			// dense matrix
+			for (i=0; i<n; i++) {
+				for (j=0; j<m+1; j++) {
+					value = matrix_get(data->Z, m+1, i, j);
+					col_min[j] = minimum(col_min[j], value);
+					col_max[j] = maximum(col_max[j], value);
+				}
+			}
+		}
+		for (j=0; j<m+1; j++) {
+			cmin = (fabs(col_min[j]) < 1e-10) ? -1 : col_min[j];
+			cmax = (fabs(col_max[j]) < 1e-10) ? 1 : col_max[j];
+			for (k=0; k<K-1; k++) {
+				rnd = ((double) rand()) / ((double) RAND_MAX);
 				value = 1.0/cmin + (1.0/cmax - 1.0/cmin)*rnd;
-				matrix_set(to_model->V, K-1, i, j, value);
+				matrix_set(to_model->V, K-1, j, k, value);
 			}
 		}
+		free(col_min);
+		free(col_max);
 	} else {
 		for (i=0; i<m+1; i++)
 			for (j=0; j<K-1; j++) {
diff --git a/src/gensvm_io.c b/src/gensvm_io.c
index a574654..a9ab734 100644
--- a/src/gensvm_io.c
+++ b/src/gensvm_io.c
@@ -12,7 +12,6 @@
  */
 #include <limits.h>
 #include "gensvm_io.h"
-#include "gensvm_print.h"
 
 /**
  * @brief Read data from file
@@ -139,6 +138,15 @@ void gensvm_read_data(struct GenData *dataset, char *data_file)
 	dataset->K = K;
 	dataset->Z = dataset->RAW;
 
+	if (gensvm_could_sparse(dataset->Z, n, m+1)) {
+		note("Converting to sparse ... ");
+		dataset->spZ = gensvm_dense_to_sparse(dataset->Z, n, m+1);
+		note("done.\n");
+		free(dataset->RAW);
+		dataset->RAW = NULL;
+		dataset->Z = NULL;
+	}
+
 	free(uniq_y);
 }
 
diff --git a/src/gensvm_optimize.c b/src/gensvm_optimize.c
index e517332..e83fa47 100644
--- a/src/gensvm_optimize.c
+++ b/src/gensvm_optimize.c
@@ -167,395 +167,6 @@ double gensvm_get_loss(struct GenModel *model, struct GenData *data,
 }
 
 /**
- * @brief Calculate the value of omega for a single instance
- *
- * @details
- * This function calculates the value of the @f$ \omega_i @f$ variable for a
- * single instance, where
- * @f[
- * 	\omega_i = \frac{1}{p} \left( \sum_{j \neq y_i} h^p\left(
- * 	\overline{q}_i^{(y_i j)} \right)  \right)^{1/p-1}
- * @f]
- * Note that his function uses the precalculated values from GenModel::H and
- * GenModel::R to speed up the computation.
- *
- * @param[in] 	model 	GenModel structure with the current model
- * @param[in] 	i 	index of the instance for which to calculate omega
- * @returns 		the value of omega for instance i
- *
- */
-double gensvm_calculate_omega(struct GenModel *model, struct GenData *data,
-		long i)
-{
-	long j;
-	double h, omega = 0.0,
-	       p = model->p;
-
-	for (j=0; j<model->K; j++) {
-		if (j == (data->y[i]-1))
-			continue;
-		h = matrix_get(model->H, model->K, i, j);
-		omega += pow(h, p);
-	}
-	omega = (1.0/p)*pow(omega, 1.0/p - 1.0);
-
-	return omega;
-}
-
-/**
- * @brief Check if we can do simple majorization for a given instance
- *
- * @details
- * A simple majorization is possible if at most one of the Huberized hinge
- * errors is nonzero for an instance. This is checked here. For this we
- * compute the product of the Huberized error for all @f$j \neq y_i@f$ and
- * check if strictly less than 2 are nonzero. See also the @ref update_math.
- *
- * @param[in] 	model 	GenModel structure with the current model
- * @param[in] 	i 	index of the instance for which to check
- * @returns 		whether or not we can do simple majorization
- *
- */
-bool gensvm_majorize_is_simple(struct GenModel *model, struct GenData *data,
-		long i)
-{
-	long j;
-	double h, value = 0;
-	for (j=0; j<model->K; j++) {
-		if (j == (data->y[i]-1))
-			continue;
-		h = matrix_get(model->H, model->K, i, j);
-		value += (h > 0) ? 1 : 0;
-		if (value > 1)
-			return false;
-	}
-	return true;
-}
-
-/**
- * @brief Compute majorization coefficients for non-simple instance
- *
- * @details
- * In this function we compute the majorization coefficients needed for an
- * instance with a non-simple majorization (@f$\varepsilon_i = 0@f$). In this
- * function, we distinguish a number of cases depending on the value of
- * GenModel::p and the respective value of @f$\overline{q}_i^{(y_ij)}@f$. Note
- * that the linear coefficient is of the form @f$b - a\overline{q}@f$, but
- * often the second term is included in the definition of @f$b@f$, so it can
- * be optimized out. The output argument \p b_aq contains this difference
- * therefore in one go. More details on this function can be found in the @ref
- * update_math. See also gensvm_calculate_ab_simple().
- *
- * @param[in] 	model 	GenModel structure with the current model
- * @param[in] 	i 	index for the instance
- * @param[in] 	j 	index for the class
- * @param[out] 	*a 	output argument for the quadratic coefficient
- * @param[out]  *b_aq 	output argument for the linear coefficient.
- *
- */
-void gensvm_calculate_ab_non_simple(struct GenModel *model, long i, long j,
-		double *a, double *b_aq)
-{
-	double q = matrix_get(model->Q, model->K, i, j);
-	double p = model->p;
-	double kappa = model->kappa;
-	const double a2g2 = 0.25*p*(2.0*p - 1.0)*pow((kappa+1.0)/2.0,p-2.0);
-
-	if (2.0 - model->p < 1e-2) {
-		if (q <= - kappa) {
-			*b_aq = 0.5 - kappa/2.0 - q;
-		} else if ( q <= 1.0) {
-			*b_aq = pow(1.0 - q, 3.0)/(2.0*pow(kappa + 1.0, 2.0));
-		} else {
-			*b_aq = 0;
-		}
-		*a = 1.5;
-	} else {
-		if (q <= (p + kappa - 1.0)/(p - 2.0)) {
-			*a = 0.25*pow(p, 2.0)*pow(0.5 - kappa/2.0 - q, p - 2.0);
-		} else if (q <= 1.0) {
-			*a = a2g2;
-		} else {
-			*a = 0.25*pow(p, 2.0)*pow((p/(p - 2.0))*(0.5 -
-						kappa/2.0 - q), p - 2.0);
-			*b_aq = (*a)*(2.0*q + kappa - 1.0)/(p - 2.0) +
-				0.5*p*pow(p/(p - 2.0)*(0.5 - kappa/2.0 - q),
-						p - 1.0);
-		}
-		if (q <= -kappa) {
-			*b_aq = 0.5*p*pow(0.5 - kappa/2.0 - q, p - 1.0);
-		} else if ( q <= 1.0) {
-			*b_aq = p*pow(1.0 - q, 2.0*p - 1.0)/pow(2*kappa+2.0, p);
-		}
-	}
-}
-
-/**
- * @brief Compute majorization coefficients for simple instances
- *
- * @details
- * In this function we compute the majorization coefficients needed for an
- * instance with a simple majorization. This corresponds to the non-simple
- * majorization for the case where GenModel::p equals 1. Due to this condition
- * the majorization coefficients are quite simple to compute.  Note that the
- * linear coefficient of the majorization is of the form @f$b -
- * a\overline{q}@f$, but often the second term is included in the definition
- * of @f$b@f$, so it can be optimized out. For more details see the @ref
- * update_math, and gensvm_calculate_ab_non_simple().
- *
- * @param[in] 	model 	GenModel structure with the current model
- * @param[in] 	i 	index for the instance
- * @param[in] 	j 	index for the class
- * @param[out] 	*a 	output argument for the quadratic coefficient
- * @param[out] 	*b_aq 	output argument for the linear coefficient
- *
- */
-void gensvm_calculate_ab_simple(struct GenModel *model, long i, long j,
-		double *a, double *b_aq)
-{
-	double q = matrix_get(model->Q, model->K, i, j);
-
-	if (q <= - model->kappa) {
-		*a = 0.25/(0.5 - model->kappa/2.0 - q);
-		*b_aq = 0.5;
-	} else if (q <= 1.0) {
-		*a = 1.0/(2.0*model->kappa + 2.0);
-		*b_aq = (1.0 - q)*(*a);
-	} else {
-		*a = -0.25/(0.5 - model->kappa/2.0 - q);
-		*b_aq = 0;
-	}
-}
-
-/**
- * @brief Compute the alpha_i and beta_i for an instance
- *
- * @details
- * This computes the @f$\alpha_i@f$ value for an instance, and simultaneously 
- * updating the row of the B matrix corresponding to that
- * instance (the @f$\boldsymbol{\beta}_i'@f$). The advantage of doing this at 
- * the same time is that we can compute the a and b values simultaneously in 
- * the gensvm_calculate_ab_simple() and gensvm_calculate_ab_non_simple()
- * functions.
- *
- * The computation is done by first checking whether simple majorization is
- * possible for this instance. If so, the @f$\omega_i@f$ value is set to 1.0,
- * otherwise this value is computed. If simple majorization is possible, the
- * coefficients a and b_aq are computed by gensvm_calculate_ab_simple(),
- * otherwise they're computed by gensvm_calculate_ab_non_simple(). Next, the
- * beta_i updated through the efficient BLAS daxpy function, and part of the 
- * value of @f$\alpha_i@f$ is computed. The final value of @f$\alpha_i@f$ is 
- * returned.
- *
- * @param[in] 		model 	GenModel structure with the current model
- * @param[in] 		i 	index of the instance to update
- * @param[out] 		beta	beta vector of linear coefficients (assumed to
- * 				be allocated elsewhere, initialized here)
- * @returns 			the @f$\alpha_i@f$ value of this instance
- *
- */
-double gensvm_get_alpha_beta(struct GenModel *model, struct GenData *data,
-		long i, double *beta)
-{
-	bool simple;
-	long j,
-	     K = model->K;
-	double omega, a, b_aq = 0.0,
-	       alpha = 0.0;
-	double *uu_row = NULL;
-	const double in = 1.0/((double) model->n);
-
-	simple = gensvm_majorize_is_simple(model, data, i);
-	omega = simple ? 1.0 : gensvm_calculate_omega(model, data, i);
-
-	Memset(beta, double, K-1);
-	for (j=0; j<K; j++) {
-		// skip the class y_i = k
-		if (j == (data->y[i]-1))
-			continue;
-
-		// calculate the a_ijk and (b_ijk - a_ijk q_i^(kj)) values
-		if (simple) {
-			gensvm_calculate_ab_simple(model, i, j, &a, &b_aq);
-		} else {
-			gensvm_calculate_ab_non_simple(model, i, j, &a, &b_aq);
-		}
-
-		// daxpy on beta and UU
-		// daxpy does: y = a*x + y
-		// so y = beta, UU_row = x, a = factor
-		b_aq *= model->rho[i] * omega * in;
-		uu_row = &model->UU[((data->y[i]-1)*K+j)*(K-1)];
-		cblas_daxpy(K-1, b_aq, uu_row, 1, beta, 1);
-
-		// increment Avalue
-		alpha += a;
-	}
-	alpha *= omega * model->rho[i] * in;
-	return alpha;
-}
-
-/**
- * @brief Perform a single step of the majorization algorithm to update V
- *
- * @details
- * This function contains the main update calculations of the algorithm. These
- * calculations are necessary to find a new update V. The calculations exist of
- * recalculating the majorization coefficients for all instances and all
- * classes, and solving a linear system to find V.
- *
- * Because the function gensvm_get_update() is always called after a call to
- * gensvm_get_loss() with the same GenModel::V, it is unnecessary to calculate
- * the updated errors GenModel::Q and GenModel::H here too. This saves on
- * computation time.
- *
- * In calculating the majorization coefficients we calculate the elements of a
- * diagonal matrix A with elements
- * @f[
- * 	A_{i, i} = \frac{1}{n} \rho_i \sum_{j \neq k} \left[
- * 		\varepsilon_i a_{ijk}^{(1)} + (1 - \varepsilon_i) \omega_i
- * 		a_{ijk}^{(p)} \right],
- * @f]
- * where @f$ k = y_i @f$.
- * Since this matrix is only used to calculate the matrix @f$ Z' A Z @f$, it
- * is efficient to update a matrix ZAZ through consecutive rank 1 updates with
- * a single element of A and the corresponding row of Z. The BLAS function
- * dsyr is used for this.
- *
- * The B matrix is has rows
- * @f[
- * 	\boldsymbol{\beta}_i' = \frac{1}{n} \rho_i \sum_{j \neq k} \left[
- * 		\varepsilon_i \left( b_{ijk}^{(1)} - a_{ijk}^{(1)}
- * 			\overline{q}_i^{(kj)} \right) + (1 - \varepsilon_i)
- * 		\omega_i \left( b_{ijk}^{(p)} - a_{ijk}^{(p)}
- * 			\overline{q}_i^{(kj)} \right) \right]
- * 		\boldsymbol{\delta}_{kj}'
- * @f]
- * This is also split into two cases, one for which @f$ \varepsilon_i = 1 @f$,
- * and one for when it is 0. The 3D simplex difference matrix is used here, in
- * the form of the @f$ \boldsymbol{\delta}_{kj}' @f$.
- *
- * Finally, the following system is solved
- * @f[
- * 	(\textbf{Z}'\textbf{AZ} + \lambda \textbf{J})\textbf{V} =
- * 		(\textbf{Z}'\textbf{AZ}\overline{\textbf{V}} + \textbf{Z}'
- * 		\textbf{B})
- * @f]
- * solving this system is done through dposv().
- *
- * @todo
- * Consider using CblasColMajor everywhere
- *
- * @param[in,out] 	model 	model to be updated
- * @param[in] 		data 	data used in model
- * @param[in] 		work 	allocated workspace to use
- */
-void gensvm_get_update(struct GenModel *model, struct GenData *data, 
-		struct GenWork *work)
-{
-	int status;
-	long i, j;
-	double alpha, sqalpha;
-
-	long n = model->n;
-	long m = model->m;
-	long K = model->K;
-
-	gensvm_reset_work(work);
-
-	// generate Z'*A*Z and Z'*B by rank 1 operations
-	for (i=0; i<n; i++) {
-		alpha = gensvm_get_alpha_beta(model, data, i, work->beta);
-
-		// calculate row of matrix LZ, which is a scalar 
-		// multiplication of sqrt(alpha_i) and row z_i' of Z
-		// Note that we use the fact that the first column of Z is 
-		// always 1, by only computing the product for m values and 
-		// copying the first element over.
-		sqalpha = sqrt(alpha);
-		work->LZ[i*(m+1)] = sqalpha;
-		cblas_daxpy(m, sqalpha, &data->Z[i*(m+1)+1], 1, 
-				&work->LZ[i*(m+1)+1], 1);
-
-		// rank 1 update of matrix Z'*B
-		// Note: LDA is the second dimension of ZB because of
-		// Row-Major order
-		cblas_dger(CblasRowMajor, m+1, K-1, 1, &data->Z[i*(m+1)], 1,
-				work->beta, 1, work->ZB, K-1);
-	}
-
-	// calculate Z'*A*Z by symmetric multiplication of LZ with itself 
-	// (ZAZ = (LZ)' * (LZ)
-	cblas_dsyrk(CblasRowMajor, CblasUpper, CblasTrans, m+1, n, 1.0,
-			work->LZ, m+1, 0.0, work->ZAZ, m+1);
-
-	// Calculate right-hand side of system we want to solve
-	// dsymm performs ZB := 1.0 * (ZAZ) * Vbar + 1.0 * ZB
-	// the right-hand side is thus stored in ZB after this call
-	// Note: LDB and LDC are second dimensions of the matrices due to
-	// Row-Major order
-	cblas_dsymm(CblasRowMajor, CblasLeft, CblasUpper, m+1, K-1, 1, 
-			work->ZAZ, m+1, model->V, K-1, 1.0, work->ZB, K-1);
-
-	// Calculate left-hand side of system we want to solve
-	// Add lambda to all diagonal elements except the first one. Recall
-	// that ZAZ is of size m+1 and is symmetric.
-	for (i=m+2; i<=m*(m+2); i+=m+2)
-		work->ZAZ[i] += model->lambda;
-
-	// Lapack uses column-major order, so we transform the ZB matrix to
-	// correspond to this.
-	for (i=0; i<m+1; i++)
-		for (j=0; j<K-1; j++)
-			work->ZBc[j*(m+1)+i] = work->ZB[i*(K-1)+j];
-
-	// Solve the system using dposv. Note that above the upper triangular
-	// part has always been used in row-major order for ZAZ. This
-	// corresponds to the lower triangular part in column-major order.
-	status = dposv('L', m+1, K-1, work->ZAZ, m+1, work->ZBc, m+1);
-
-	// Use dsysv as fallback, for when the ZAZ matrix is not positive
-	// semi-definite for some reason (perhaps due to rounding errors).
-	// This step shouldn't be necessary but is included for safety.
-	if (status != 0) {
-		err("[GenSVM Warning]: Received nonzero status from "
-				"dposv: %i\n", status);
-		int *IPIV = Malloc(int, m+1);
-		double *WORK = Malloc(double, 1);
-		status = dsysv('L', m+1, K-1, work->ZAZ, m+1, IPIV, work->ZBc, 
-				m+1, WORK, -1);
-
-		int LWORK = WORK[0];
-		WORK = Realloc(WORK, double, LWORK);
-		status = dsysv('L', m+1, K-1, work->ZAZ, m+1, IPIV, work->ZBc, 
-				m+1, WORK, LWORK);
-		if (status != 0)
-			err("[GenSVM Warning]: Received nonzero "
-					"status from dsysv: %i\n", status);
-		free(WORK);
-		WORK = NULL;
-		free(IPIV);
-		IPIV = NULL;
-	}
-
-	// the solution is now stored in ZBc, in column-major order. Here we
-	// convert this back to row-major order
-	for (i=0; i<m+1; i++)
-		for (j=0; j<K-1; j++)
-			work->ZB[i*(K-1)+j] = work->ZBc[j*(m+1)+i];
-
-	// copy the old V to Vbar and the new solution to V
-	for (i=0; i<m+1; i++) {
-		for (j=0; j<K-1; j++) {
-			matrix_set(model->Vbar, K-1, i, j,
-					matrix_get(model->V, K-1, i, j));
-			matrix_set(model->V, K-1, i, j,
-					matrix_get(work->ZB, K-1, i, j));
-		}
-	}
-}
-
-/**
  * @brief Use step doubling
  *
  * @details
@@ -644,24 +255,12 @@ void gensvm_calculate_errors(struct GenModel *model, struct GenData *data,
 	double q, *uu_row = NULL;
 
 	long n = model->n;
-	long m = model->m;
 	long K = model->K;
 
-	cblas_dgemm(
-			CblasRowMajor,
-			CblasNoTrans,
-			CblasNoTrans,
-			n,
-			K-1,
-			m+1,
-			1.0,
-			data->Z,
-			m+1,
-			model->V,
-			K-1,
-			0,
-			ZV,
-			K-1);
+	if (data->spZ == NULL)
+		gensvm_calculate_ZV_dense(model, data, ZV);
+	else
+		gensvm_calculate_ZV_sparse(model, data, ZV);
 
 	for (i=0; i<n; i++) {
 		for (j=0; j<K; j++) {
@@ -674,87 +273,41 @@ void gensvm_calculate_errors(struct GenModel *model, struct GenData *data,
 	}
 }
 
-/**
- * @brief Solve AX = B where A is symmetric positive definite.
- *
- * @details
- * Solve a linear system of equations AX = B where A is symmetric positive
- * definite. This function is a wrapper for the external  LAPACK routine
- * dposv.
- *
- * @param[in] 		UPLO 	which triangle of A is stored
- * @param[in] 		N 	order of A
- * @param[in] 		NRHS 	number of columns of B
- * @param[in,out] 	A 	double precision array of size (LDA, N). On
- * 				exit contains the upper or lower factor of the
- * 				Cholesky factorization of A.
- * @param[in] 		LDA 	leading dimension of A
- * @param[in,out] 	B 	double precision array of size (LDB, NRHS). On
- * 				exit contains the N-by-NRHS solution matrix X.
- * @param[in] 		LDB 	the leading dimension of B
- * @returns 			info parameter which contains the status of the
- * 				computation:
- * 					- =0: 	success
- * 					- <0: 	if -i, the i-th argument had
- * 						an illegal value
- * 					- >0: 	if i, the leading minor of A
- * 						was not positive definite
- *
- * See the LAPACK documentation at:
- * http://www.netlib.org/lapack/explore-html/dc/de9/group__double_p_osolve.html
- */
-int dposv(char UPLO, int N, int NRHS, double *A, int LDA, double *B,
-		int LDB)
+void gensvm_calculate_ZV_sparse(struct GenModel *model, 
+		struct GenData *data, double *ZV)
 {
-	extern void dposv_(char *UPLO, int *Np, int *NRHSp, double *A,
-			int *LDAp, double *B, int *LDBp, int *INFOp);
-	int INFO;
-	dposv_(&UPLO, &N, &NRHS, A, &LDA, B, &LDB, &INFO);
-	return INFO;
+	long i, j, jj, jj_start, jj_end, K,
+	    n_row = data->spZ->n_row;
+	double z_ij;
+
+	K = model->K;
+
+	int *Zia = data->spZ->ia;
+	int *Zja = data->spZ->ja;
+	double *vals = data->spZ->values;
+
+	for (i=0; i<n_row; i++) {
+		jj_start = Zia[i];
+		jj_end = Zia[i+1];
+
+		for (jj=jj_start; jj<jj_end; jj++) {
+			j = Zja[jj];
+			z_ij = vals[jj];
+
+			cblas_daxpy(K-1, z_ij, &model->V[j*(K-1)], 1, 
+					&ZV[i*(K-1)], 1);
+		}
+	}
 }
 
-/**
- * @brief Solve a system of equations AX = B where A is symmetric.
- *
- * @details
- * Solve a linear system of equations AX = B where A is symmetric. This
- * function is a wrapper for the external LAPACK routine dsysv.
- *
- * @param[in] 		UPLO 	which triangle of A is stored
- * @param[in] 		N 	order of A
- * @param[in] 		NRHS 	number of columns of B
- * @param[in,out] 	A 	double precision array of size (LDA, N). On
- * 				exit contains the block diagonal matrix D and
- * 				the multipliers used to obtain the factor U or
- * 				L from the factorization A = U*D*U**T or
- * 				A = L*D*L**T.
- * @param[in] 		LDA 	leading dimension of A
- * @param[in] 		IPIV 	integer array containing the details of D
- * @param[in,out] 	B 	double precision array of size (LDB, NRHS). On
- * 				exit contains the N-by-NRHS matrix X
- * @param[in] 		LDB 	leading dimension of B
- * @param[out] 		WORK 	double precision array of size max(1,LWORK). On
- * 				exit, WORK(1) contains the optimal LWORK
- * @param[in] 		LWORK 	the length of WORK, can be used for determining
- * 				the optimal blocksize for dsystrf.
- * @returns 			info parameter which contains the status of the
- * 				computation:
- * 					- =0: 	success
- * 					- <0: 	if -i, the i-th argument had an
- * 						illegal value
- * 					- >0: 	if i, D(i, i) is exactly zero,
- * 						no solution can be computed.
- *
- * See the LAPACK documentation at:
- * http://www.netlib.org/lapack/explore-html/d6/d0e/group__double_s_ysolve.html
- */
-int dsysv(char UPLO, int N, int NRHS, double *A, int LDA, int *IPIV,
-		double *B, int LDB, double *WORK, int LWORK)
+void gensvm_calculate_ZV_dense(struct GenModel *model, 
+		struct GenData *data, double *ZV)
 {
-	extern void dsysv_(char *UPLO, int *Np, int *NRHSp, double *A,
-			int *LDAp, int *IPIV, double *B, int *LDBp,
-			double *WORK, int *LWORK, int *INFOp);
-	int INFO;
-	dsysv_(&UPLO, &N, &NRHS, A, &LDA, IPIV, B, &LDB, WORK, &LWORK, &INFO);
-	return INFO;
+	long n = model->n;
+	long m = model->m;
+	long K = model->K;
+
+	cblas_dgemm(CblasRowMajor, CblasNoTrans, CblasNoTrans, n, K-1, m+1, 
+			1.0, data->Z, m+1, model->V, K-1, 0, ZV, K-1);
 }
+
diff --git a/src/gensvm_sparse.c b/src/gensvm_sparse.c
new file mode 100644
index 0000000..ebc2c9d
--- /dev/null
+++ b/src/gensvm_sparse.c
@@ -0,0 +1,181 @@
+/**
+ * @file gensvm_sparse.c
+ * @author Gertjan van den Burg
+ * @date October 11, 2016
+ * @brief Functions for dealing with sparse data matrices
+ *
+ */
+
+#include "gensvm_sparse.h"
+
+/**
+ * @brief Initialize a GenSparse structure
+ *
+ * @details
+ * A GenSparse structure is used to hold a sparse data matrix. We work with
+ * Compressed Row Storage (CSR) storage, also known as old Yale format.
+ *
+ * @return 	initialized GenSparse instance
+ */
+struct GenSparse *gensvm_init_sparse()
+{
+	struct GenSparse *sp = Malloc(struct GenSparse, 1);
+	sp->nnz = 0;
+	sp->n_row = 0;
+	sp->n_col = 0;
+
+	sp->values = NULL;
+	sp->ia = NULL;
+	sp->ja = NULL;
+
+	return sp;
+}
+
+/**
+ * @brief Free an allocated GenSparse structure
+ *
+ * @details
+ * Simply free a previously allocated GenSparse structure by freeing all of
+ * its components. Finally, the structure itself is freed, and the pointer is
+ * set to NULL for safety.
+ *
+ * @param[in] 	sp 	GenSparse structure to free
+ */
+void gensvm_free_sparse(struct GenSparse *sp)
+{
+	free(sp->values);
+	free(sp->ia);
+	free(sp->ja);
+	free(sp);
+	sp = NULL;
+}
+
+/**
+ * @brief Count the number of nonzeros in a matrix
+ *
+ * @details
+ * This is a utility function to count the number of nonzeros in a dense 
+ * matrix. This is simply done by comparing with 0.0.
+ *
+ * @param[in] 	A 	a dense matrix (RowMajor order)
+ * @param[in] 	rows 	the number of rows of A
+ * @param[in] 	cols 	the number of columns of A
+ *
+ * @return 		the number of nonzeros in A
+ */
+long gensvm_count_nnz(double *A, long rows, long cols)
+{
+	long i, nnz = 0;
+
+	for (i=0; i<rows*cols; i++)
+		nnz += (A[i] != 0.0) ? 1 : 0;
+	return nnz;
+}
+
+/**
+ * @brief Check if it is worthwile to convert to a sparse matrix
+ *
+ * @details
+ * It is only worth to convert to a sparse matrix if the amount of sparsity is
+ * sufficient. For this to be the case, the number of nonzeros must be
+ * smaller than @f$(n(m - 1) - 1)/2@f$. This is tested here. If the amount of
+ * nonzero entries is small enough, the function returns the number of
+ * nonzeros. If it is too big, it returns -1.
+ *
+ * @param[in] 	A 	matrix in dense format (RowMajor order)
+ * @param[in] 	rows 	number of rows of A
+ * @param[in] 	cols 	number of columns of A
+ *
+ * @return
+ */
+bool gensvm_could_sparse(double *A, long rows, long cols)
+{
+	long nnz = gensvm_count_nnz(A, rows, cols);
+
+	if (nnz < (rows*(cols-1.0)-1.0)/2.0) {
+		return true;
+	}
+	return false;
+}
+
+
+/**
+ * @brief Convert a dense matrix to a GenSparse structure if advantageous
+ *
+ * @details
+ * This utility function can be used to convert a dense matrix to a sparse
+ * matrix in the form of a GenSparse struture. Note that the allocated memory
+ * must be freed by the caller. The user should first check whether using a
+ * sparse matrix is worth it by calling gensvm_could_sparse().
+ *
+ * @param[in] 	A 	a dense matrix in RowMajor order
+ * @param[in] 	rows 	number of rows of the matrix A
+ * @param[in] 	cols 	number of columns of the matrix A
+ *
+ * @return 		a GenSparse struct
+ */
+struct GenSparse *gensvm_dense_to_sparse(double *A, long rows, long cols)
+{
+	double value;
+	int row_cnt;
+	long i, j, cnt, nnz = 0;
+	struct GenSparse *spA = NULL;
+
+	nnz = gensvm_count_nnz(A, rows, cols);
+
+	spA = gensvm_init_sparse();
+
+	spA->nnz = nnz;
+	spA->n_row = rows;
+	spA->n_col = cols;
+	spA->values = Calloc(double, nnz);
+	spA->ia = Calloc(int, rows+1);
+	spA->ja = Calloc(int, nnz);
+
+	cnt = 0;
+	spA->ia[0] = 0;
+	for (i=0; i<rows; i++) {
+		row_cnt = 0;
+		for (j=0; j<cols; j++) {
+			value = matrix_get(A, cols, i, j);
+			if (value != 0) {
+				row_cnt++;
+				spA->values[cnt] = value;
+				spA->ja[cnt] = j;
+				cnt++;
+			}
+		}
+		spA->ia[i+1] = spA->ia[i] + row_cnt;
+	}
+
+	return spA;
+}
+
+/**
+ * @brief Convert a GenSparse structure to a dense matrix
+ *
+ * @details
+ * This function converts a GenSparse structure back to a normal dense matrix
+ * in RowMajor order. Note that the allocated memory must be freed by the
+ * caller.
+ *
+ * @param[in] 	A 	a GenSparse structure
+ *
+ * @return 		a dense matrix
+ */
+double *gensvm_sparse_to_dense(struct GenSparse *A)
+{
+	double value;
+	long i, j, jj;
+	double *B = Calloc(double, (A->n_row)*(A->n_col));
+	for (i=0; i<A->n_row; i++) {
+		for (jj=A->ia[i]; jj<A->ia[i+1]; jj++) {
+			j = A->ja[jj];
+			value = A->values[jj];
+			matrix_set(B, A->n_col, i, j, value);
+		}
+	}
+
+	return B;
+}
+
diff --git a/src/gensvm_update.c b/src/gensvm_update.c
new file mode 100644
index 0000000..289f50c
--- /dev/null
+++ b/src/gensvm_update.c
@@ -0,0 +1,592 @@
+/**
+ * @file gensvm_update.c
+ * @author Gertjan van den Burg
+ * @date 2016-10-14
+ * @brief Functions for getting an update of the majorization algorithm
+
+ * Copyright (C)
+
+ This program is free software; you can redistribute it and/or
+ modify it under the terms of the GNU General Public License
+ as published by the Free Software Foundation; either version 2
+ of the License, or (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.
+
+ */
+
+#include "gensvm_update.h"
+
+/**
+ * @brief Calculate the value of omega for a single instance
+ *
+ * @details
+ * This function calculates the value of the @f$ \omega_i @f$ variable for a
+ * single instance, where
+ * @f[
+ * 	\omega_i = \frac{1}{p} \left( \sum_{j \neq y_i} h^p\left(
+ * 	\overline{q}_i^{(y_i j)} \right)  \right)^{1/p-1}
+ * @f]
+ * Note that his function uses the precalculated values from GenModel::H and
+ * GenModel::R to speed up the computation.
+ *
+ * @param[in] 	model 	GenModel structure with the current model
+ * @param[in] 	i 	index of the instance for which to calculate omega
+ * @returns 		the value of omega for instance i
+ *
+ */
+double gensvm_calculate_omega(struct GenModel *model, struct GenData *data,
+		long i)
+{
+	long j;
+	double h, omega = 0.0,
+	       p = model->p;
+
+	for (j=0; j<model->K; j++) {
+		if (j == (data->y[i]-1))
+			continue;
+		h = matrix_get(model->H, model->K, i, j);
+		omega += pow(h, p);
+	}
+	omega = (1.0/p)*pow(omega, 1.0/p - 1.0);
+
+	return omega;
+}
+
+/**
+ * @brief Check if we can do simple majorization for a given instance
+ *
+ * @details
+ * A simple majorization is possible if at most one of the Huberized hinge
+ * errors is nonzero for an instance. This is checked here. For this we
+ * compute the product of the Huberized error for all @f$j \neq y_i@f$ and
+ * check if strictly less than 2 are nonzero. See also the @ref update_math.
+ *
+ * @param[in] 	model 	GenModel structure with the current model
+ * @param[in] 	i 	index of the instance for which to check
+ * @returns 		whether or not we can do simple majorization
+ *
+ */
+bool gensvm_majorize_is_simple(struct GenModel *model, struct GenData *data,
+		long i)
+{
+	long j;
+	double h, value = 0;
+	for (j=0; j<model->K; j++) {
+		if (j == (data->y[i]-1))
+			continue;
+		h = matrix_get(model->H, model->K, i, j);
+		value += (h > 0) ? 1 : 0;
+		if (value > 1)
+			return false;
+	}
+	return true;
+}
+
+/**
+ * @brief Compute majorization coefficients for non-simple instance
+ *
+ * @details
+ * In this function we compute the majorization coefficients needed for an
+ * instance with a non-simple majorization (@f$\varepsilon_i = 0@f$). In this
+ * function, we distinguish a number of cases depending on the value of
+ * GenModel::p and the respective value of @f$\overline{q}_i^{(y_ij)}@f$. Note
+ * that the linear coefficient is of the form @f$b - a\overline{q}@f$, but
+ * often the second term is included in the definition of @f$b@f$, so it can
+ * be optimized out. The output argument \p b_aq contains this difference
+ * therefore in one go. More details on this function can be found in the @ref
+ * update_math. See also gensvm_calculate_ab_simple().
+ *
+ * @param[in] 	model 	GenModel structure with the current model
+ * @param[in] 	i 	index for the instance
+ * @param[in] 	j 	index for the class
+ * @param[out] 	*a 	output argument for the quadratic coefficient
+ * @param[out]  *b_aq 	output argument for the linear coefficient.
+ *
+ */
+void gensvm_calculate_ab_non_simple(struct GenModel *model, long i, long j,
+		double *a, double *b_aq)
+{
+	double q = matrix_get(model->Q, model->K, i, j);
+	double p = model->p;
+	double kappa = model->kappa;
+	const double a2g2 = 0.25*p*(2.0*p - 1.0)*pow((kappa+1.0)/2.0,p-2.0);
+
+	if (2.0 - model->p < 1e-2) {
+		if (q <= - kappa) {
+			*b_aq = 0.5 - kappa/2.0 - q;
+		} else if ( q <= 1.0) {
+			*b_aq = pow(1.0 - q, 3.0)/(2.0*pow(kappa + 1.0, 2.0));
+		} else {
+			*b_aq = 0;
+		}
+		*a = 1.5;
+	} else {
+		if (q <= (p + kappa - 1.0)/(p - 2.0)) {
+			*a = 0.25*pow(p, 2.0)*pow(0.5 - kappa/2.0 - q, p - 2.0);
+		} else if (q <= 1.0) {
+			*a = a2g2;
+		} else {
+			*a = 0.25*pow(p, 2.0)*pow((p/(p - 2.0))*(0.5 -
+						kappa/2.0 - q), p - 2.0);
+			*b_aq = (*a)*(2.0*q + kappa - 1.0)/(p - 2.0) +
+				0.5*p*pow(p/(p - 2.0)*(0.5 - kappa/2.0 - q),
+						p - 1.0);
+		}
+		if (q <= -kappa) {
+			*b_aq = 0.5*p*pow(0.5 - kappa/2.0 - q, p - 1.0);
+		} else if ( q <= 1.0) {
+			*b_aq = p*pow(1.0 - q, 2.0*p - 1.0)/pow(2*kappa+2.0, p);
+		}
+	}
+}
+
+/**
+ * @brief Compute majorization coefficients for simple instances
+ *
+ * @details
+ * In this function we compute the majorization coefficients needed for an
+ * instance with a simple majorization. This corresponds to the non-simple
+ * majorization for the case where GenModel::p equals 1. Due to this condition
+ * the majorization coefficients are quite simple to compute.  Note that the
+ * linear coefficient of the majorization is of the form @f$b -
+ * a\overline{q}@f$, but often the second term is included in the definition
+ * of @f$b@f$, so it can be optimized out. For more details see the @ref
+ * update_math, and gensvm_calculate_ab_non_simple().
+ *
+ * @param[in] 	model 	GenModel structure with the current model
+ * @param[in] 	i 	index for the instance
+ * @param[in] 	j 	index for the class
+ * @param[out] 	*a 	output argument for the quadratic coefficient
+ * @param[out] 	*b_aq 	output argument for the linear coefficient
+ *
+ */
+void gensvm_calculate_ab_simple(struct GenModel *model, long i, long j,
+		double *a, double *b_aq)
+{
+	double q = matrix_get(model->Q, model->K, i, j);
+
+	if (q <= - model->kappa) {
+		*a = 0.25/(0.5 - model->kappa/2.0 - q);
+		*b_aq = 0.5;
+	} else if (q <= 1.0) {
+		*a = 1.0/(2.0*model->kappa + 2.0);
+		*b_aq = (1.0 - q)*(*a);
+	} else {
+		*a = -0.25/(0.5 - model->kappa/2.0 - q);
+		*b_aq = 0;
+	}
+}
+
+/**
+ * @brief Compute the alpha_i and beta_i for an instance
+ *
+ * @details
+ * This computes the @f$\alpha_i@f$ value for an instance, and simultaneously
+ * updating the row of the B matrix corresponding to that
+ * instance (the @f$\boldsymbol{\beta}_i'@f$). The advantage of doing this at
+ * the same time is that we can compute the a and b values simultaneously in
+ * the gensvm_calculate_ab_simple() and gensvm_calculate_ab_non_simple()
+ * functions.
+ *
+ * The computation is done by first checking whether simple majorization is
+ * possible for this instance. If so, the @f$\omega_i@f$ value is set to 1.0,
+ * otherwise this value is computed. If simple majorization is possible, the
+ * coefficients a and b_aq are computed by gensvm_calculate_ab_simple(),
+ * otherwise they're computed by gensvm_calculate_ab_non_simple(). Next, the
+ * beta_i updated through the efficient BLAS daxpy function, and part of the
+ * value of @f$\alpha_i@f$ is computed. The final value of @f$\alpha_i@f$ is
+ * returned.
+ *
+ * @param[in] 		model 	GenModel structure with the current model
+ * @param[in] 		i 	index of the instance to update
+ * @param[out] 		beta	beta vector of linear coefficients (assumed to
+ * 				be allocated elsewhere, initialized here)
+ * @returns 			the @f$\alpha_i@f$ value of this instance
+ *
+ */
+double gensvm_get_alpha_beta(struct GenModel *model, struct GenData *data,
+		long i, double *beta)
+{
+	bool simple;
+	long j,
+	     K = model->K;
+	double omega, a, b_aq = 0.0,
+	       alpha = 0.0;
+	double *uu_row = NULL;
+	const double in = 1.0/((double) model->n);
+
+	simple = gensvm_majorize_is_simple(model, data, i);
+	omega = simple ? 1.0 : gensvm_calculate_omega(model, data, i);
+
+	Memset(beta, double, K-1);
+	for (j=0; j<K; j++) {
+		// skip the class y_i = k
+		if (j == (data->y[i]-1))
+			continue;
+
+		// calculate the a_ijk and (b_ijk - a_ijk q_i^(kj)) values
+		if (simple) {
+			gensvm_calculate_ab_simple(model, i, j, &a, &b_aq);
+		} else {
+			gensvm_calculate_ab_non_simple(model, i, j, &a, &b_aq);
+		}
+
+		// daxpy on beta and UU
+		// daxpy does: y = a*x + y
+		// so y = beta, UU_row = x, a = factor
+		b_aq *= model->rho[i] * omega * in;
+		uu_row = &model->UU[((data->y[i]-1)*K+j)*(K-1)];
+		cblas_daxpy(K-1, b_aq, uu_row, 1, beta, 1);
+
+		// increment Avalue
+		alpha += a;
+	}
+	alpha *= omega * model->rho[i] * in;
+	return alpha;
+}
+
+/**
+ * @brief Perform a single step of the majorization algorithm to update V
+ *
+ * @details
+ * This function contains the main update calculations of the algorithm. These
+ * calculations are necessary to find a new update V. The calculations exist of
+ * recalculating the majorization coefficients for all instances and all
+ * classes, and solving a linear system to find V.
+ *
+ * Because the function gensvm_get_update() is always called after a call to
+ * gensvm_get_loss() with the same GenModel::V, it is unnecessary to calculate
+ * the updated errors GenModel::Q and GenModel::H here too. This saves on
+ * computation time.
+ *
+ * In calculating the majorization coefficients we calculate the elements of a
+ * diagonal matrix A with elements
+ * @f[
+ * 	A_{i, i} = \frac{1}{n} \rho_i \sum_{j \neq k} \left[
+ * 		\varepsilon_i a_{ijk}^{(1)} + (1 - \varepsilon_i) \omega_i
+ * 		a_{ijk}^{(p)} \right],
+ * @f]
+ * where @f$ k = y_i @f$.
+ * Since this matrix is only used to calculate the matrix @f$ Z' A Z @f$, it
+ * is efficient to update a matrix ZAZ through consecutive rank 1 updates with
+ * a single element of A and the corresponding row of Z. The BLAS function
+ * dsyr is used for this.
+ *
+ * The B matrix is has rows
+ * @f[
+ * 	\boldsymbol{\beta}_i' = \frac{1}{n} \rho_i \sum_{j \neq k} \left[
+ * 		\varepsilon_i \left( b_{ijk}^{(1)} - a_{ijk}^{(1)}
+ * 			\overline{q}_i^{(kj)} \right) + (1 - \varepsilon_i)
+ * 		\omega_i \left( b_{ijk}^{(p)} - a_{ijk}^{(p)}
+ * 			\overline{q}_i^{(kj)} \right) \right]
+ * 		\boldsymbol{\delta}_{kj}'
+ * @f]
+ * This is also split into two cases, one for which @f$ \varepsilon_i = 1 @f$,
+ * and one for when it is 0. The 3D simplex difference matrix is used here, in
+ * the form of the @f$ \boldsymbol{\delta}_{kj}' @f$.
+ *
+ * Finally, the following system is solved
+ * @f[
+ * 	(\textbf{Z}'\textbf{AZ} + \lambda \textbf{J})\textbf{V} =
+ * 		(\textbf{Z}'\textbf{AZ}\overline{\textbf{V}} + \textbf{Z}'
+ * 		\textbf{B})
+ * @f]
+ * solving this system is done through dposv().
+ *
+ * @todo
+ * Consider using CblasColMajor everywhere
+ *
+ * @param[in,out] 	model 	model to be updated
+ * @param[in] 		data 	data used in model
+ * @param[in] 		work 	allocated workspace to use
+ */
+void gensvm_get_update(struct GenModel *model, struct GenData *data,
+		struct GenWork *work)
+{
+	int status;
+	long i, j;
+
+	long m = model->m;
+	long K = model->K;
+
+	gensvm_get_ZAZ_ZB(model, data, work);
+
+	// Calculate right-hand side of system we want to solve
+	// dsymm performs ZB := 1.0 * (ZAZ) * Vbar + 1.0 * ZB
+	// the right-hand side is thus stored in ZB after this call
+	// Note: LDB and LDC are second dimensions of the matrices due to
+	// Row-Major order
+	cblas_dsymm(CblasRowMajor, CblasLeft, CblasUpper, m+1, K-1, 1,
+			work->ZAZ, m+1, model->V, K-1, 1.0, work->ZB, K-1);
+
+	// Calculate left-hand side of system we want to solve
+	// Add lambda to all diagonal elements except the first one. Recall
+	// that ZAZ is of size m+1 and is symmetric.
+	for (i=m+2; i<=m*(m+2); i+=m+2)
+		work->ZAZ[i] += model->lambda;
+
+	// Lapack uses column-major order, so we transform the ZB matrix to
+	// correspond to this.
+	for (i=0; i<m+1; i++)
+		for (j=0; j<K-1; j++)
+			work->ZBc[j*(m+1)+i] = work->ZB[i*(K-1)+j];
+
+	// Solve the system using dposv. Note that above the upper triangular
+	// part has always been used in row-major order for ZAZ. This
+	// corresponds to the lower triangular part in column-major order.
+	status = dposv('L', m+1, K-1, work->ZAZ, m+1, work->ZBc, m+1);
+
+	// Use dsysv as fallback, for when the ZAZ matrix is not positive
+	// semi-definite for some reason (perhaps due to rounding errors).
+	// This step shouldn't be necessary but is included for safety.
+	if (status != 0) {
+		err("[GenSVM Warning]: Received nonzero status from "
+				"dposv: %i\n", status);
+		int *IPIV = Malloc(int, m+1);
+		double *WORK = Malloc(double, 1);
+		status = dsysv('L', m+1, K-1, work->ZAZ, m+1, IPIV, work->ZBc,
+				m+1, WORK, -1);
+
+		int LWORK = WORK[0];
+		WORK = Realloc(WORK, double, LWORK);
+		status = dsysv('L', m+1, K-1, work->ZAZ, m+1, IPIV, work->ZBc,
+				m+1, WORK, LWORK);
+		if (status != 0)
+			err("[GenSVM Warning]: Received nonzero "
+					"status from dsysv: %i\n", status);
+		free(WORK);
+		WORK = NULL;
+		free(IPIV);
+		IPIV = NULL;
+	}
+
+	// the solution is now stored in ZBc, in column-major order. Here we
+	// convert this back to row-major order
+	for (i=0; i<m+1; i++)
+		for (j=0; j<K-1; j++)
+			work->ZB[i*(K-1)+j] = work->ZBc[j*(m+1)+i];
+
+	// copy the old V to Vbar and the new solution to V
+	for (i=0; i<m+1; i++) {
+		for (j=0; j<K-1; j++) {
+			matrix_set(model->Vbar, K-1, i, j,
+					matrix_get(model->V, K-1, i, j));
+			matrix_set(model->V, K-1, i, j,
+					matrix_get(work->ZB, K-1, i, j));
+		}
+	}
+}
+
+/**
+ * @brief Calculate Z'*A*Z and Z'*B for dense matrices
+ *
+ * @details
+ * This function calculates the matrices Z'*A*Z and Z'*B for the case where Z
+ * is stored as a dense matrix. It calculates the Z'*A*Z product by
+ * constructing a matrix LZ = (A^(1/2) * Z), and calculating (LZ)'*(LZ) with
+ * the BLAS dsyrk function. The matrix Z'*B is calculated with successive
+ * rank-1 updates using the BLAS dger function. These functions came out as
+ * the most efficient way to do these computations in several simulation
+ * studies.
+ *
+ * @param[in] 		model 	a GenModel holding the current model
+ * @param[in] 		data 	a GenData with the data
+ * @param[in,out] 	work 	an allocated GenWork structure, contains
+ * 				updated ZAZ and ZB matrices on exit.
+ */
+void gensvm_get_ZAZ_ZB_dense(struct GenModel *model, struct GenData *data,
+		struct GenWork *work)
+{
+	long i;
+	double alpha, sqalpha;
+
+	long n = model->n;
+	long m = model->m;
+	long K = model->K;
+
+	// generate Z'*A*Z and Z'*B by rank 1 operations
+	for (i=0; i<n; i++) {
+		alpha = gensvm_get_alpha_beta(model, data, i, work->beta);
+
+		// calculate row of matrix LZ, which is a scalar
+		// multiplication of sqrt(alpha_i) and row z_i' of Z
+		// Note that we use the fact that the first column of Z is
+		// always 1, by only computing the product for m values and
+		// copying the first element over.
+		sqalpha = sqrt(alpha);
+		work->LZ[i*(m+1)] = sqalpha;
+		cblas_daxpy(m, sqalpha, &data->Z[i*(m+1)+1], 1,
+				&work->LZ[i*(m+1)+1], 1);
+
+		// rank 1 update of matrix Z'*B
+		// Note: LDA is the second dimension of ZB because of
+		// Row-Major order
+		cblas_dger(CblasRowMajor, m+1, K-1, 1, &data->Z[i*(m+1)], 1,
+				work->beta, 1, work->ZB, K-1);
+	}
+
+	// calculate Z'*A*Z by symmetric multiplication of LZ with itself
+	// (ZAZ = (LZ)' * (LZ)
+	cblas_dsyrk(CblasRowMajor, CblasUpper, CblasTrans, m+1, n, 1.0,
+			work->LZ, m+1, 0.0, work->ZAZ, m+1);
+}
+
+/**
+ * @brief Calculate Z'*A*Z and Z'*B for sparse matrices
+ *
+ * @details
+ * This function calculates the matrices Z'*A*Z and Z'*B for the case where Z 
+ * is stored as a CSR sparse matrix (GenSparse structure). It computes only 
+ * the products of the Z'*A*Z matrix that need to be computed, and updates the 
+ * Z'*B matrix row-wise for each non-zero element of a row of Z, using a BLAS 
+ * daxpy call.
+ *
+ * @sa
+ * gensvm_get_ZAZ_ZB()
+ * gensvm_get_ZAZ_ZB_dense()
+ *
+ * @param[in] 		model 	a GenModel holding the current model
+ * @param[in] 		data 	a GenData with the data
+ * @param[in,out] 	work 	an allocated GenWork structure, contains
+ * 				updated ZAZ and ZB matrices on exit.
+ */
+void gensvm_get_ZAZ_ZB_sparse(struct GenModel *model, struct GenData *data,
+		struct GenWork *work)
+{
+	long i, j, k, jj, kk, jj_start, jj_end, K,
+	     n_row = data->spZ->n_row,
+	     n_col = data->spZ->n_col;
+	double alpha, z_ij;
+
+	K = model->K;
+
+	int *Zia = data->spZ->ia;
+	int *Zja = data->spZ->ja;
+	double *vals = data->spZ->values;
+
+	for (i=0; i<n_row; i++) {
+		alpha = gensvm_get_alpha_beta(model, data, i, work->beta);
+
+		jj_start = Zia[i];
+		jj_end = Zia[i+1];
+
+		for (jj=jj_start; jj<jj_end; jj++) {
+			j = Zja[jj];
+			z_ij = vals[jj];
+			cblas_daxpy(K-1, z_ij, work->beta, 1,
+					&work->ZB[j*(K-1)], 1);
+			z_ij *= alpha;
+			for (kk=jj; kk<jj_end; kk++) {
+				k = Zja[kk];
+				matrix_add(work->ZAZ, n_col, j, k,
+						z_ij*vals[kk]);
+			}
+		}
+	}
+}
+
+
+void gensvm_get_ZAZ_ZB(struct GenModel *model, struct GenData *data,
+		struct GenWork *work)
+{
+	gensvm_reset_work(work);
+
+	if (data->spZ == NULL) {
+		gensvm_get_ZAZ_ZB_dense(model, data, work);
+	} else {
+		gensvm_get_ZAZ_ZB_sparse(model, data, work);
+	}
+}
+
+/**
+ * @brief Solve AX = B where A is symmetric positive definite.
+ *
+ * @details
+ * Solve a linear system of equations AX = B where A is symmetric positive
+ * definite. This function is a wrapper for the external  LAPACK routine
+ * dposv.
+ *
+ * @param[in] 		UPLO 	which triangle of A is stored
+ * @param[in] 		N 	order of A
+ * @param[in] 		NRHS 	number of columns of B
+ * @param[in,out] 	A 	double precision array of size (LDA, N). On
+ * 				exit contains the upper or lower factor of the
+ * 				Cholesky factorization of A.
+ * @param[in] 		LDA 	leading dimension of A
+ * @param[in,out] 	B 	double precision array of size (LDB, NRHS). On
+ * 				exit contains the N-by-NRHS solution matrix X.
+ * @param[in] 		LDB 	the leading dimension of B
+ * @returns 			info parameter which contains the status of the
+ * 				computation:
+ * 					- =0: 	success
+ * 					- <0: 	if -i, the i-th argument had
+ * 						an illegal value
+ * 					- >0: 	if i, the leading minor of A
+ * 						was not positive definite
+ *
+ * See the LAPACK documentation at:
+ * http://www.netlib.org/lapack/explore-html/dc/de9/group__double_p_osolve.html
+ */
+int dposv(char UPLO, int N, int NRHS, double *A, int LDA, double *B,
+		int LDB)
+{
+	extern void dposv_(char *UPLO, int *Np, int *NRHSp, double *A,
+			int *LDAp, double *B, int *LDBp, int *INFOp);
+	int INFO;
+	dposv_(&UPLO, &N, &NRHS, A, &LDA, B, &LDB, &INFO);
+	return INFO;
+}
+
+/**
+ * @brief Solve a system of equations AX = B where A is symmetric.
+ *
+ * @details
+ * Solve a linear system of equations AX = B where A is symmetric. This
+ * function is a wrapper for the external LAPACK routine dsysv.
+ *
+ * @param[in] 		UPLO 	which triangle of A is stored
+ * @param[in] 		N 	order of A
+ * @param[in] 		NRHS 	number of columns of B
+ * @param[in,out] 	A 	double precision array of size (LDA, N). On
+ * 				exit contains the block diagonal matrix D and
+ * 				the multipliers used to obtain the factor U or
+ * 				L from the factorization A = U*D*U**T or
+ * 				A = L*D*L**T.
+ * @param[in] 		LDA 	leading dimension of A
+ * @param[in] 		IPIV 	integer array containing the details of D
+ * @param[in,out] 	B 	double precision array of size (LDB, NRHS). On
+ * 				exit contains the N-by-NRHS matrix X
+ * @param[in] 		LDB 	leading dimension of B
+ * @param[out] 		WORK 	double precision array of size max(1,LWORK). On
+ * 				exit, WORK(1) contains the optimal LWORK
+ * @param[in] 		LWORK 	the length of WORK, can be used for determining
+ * 				the optimal blocksize for dsystrf.
+ * @returns 			info parameter which contains the status of the
+ * 				computation:
+ * 					- =0: 	success
+ * 					- <0: 	if -i, the i-th argument had an
+ * 						illegal value
+ * 					- >0: 	if i, D(i, i) is exactly zero,
+ * 						no solution can be computed.
+ *
+ * See the LAPACK documentation at:
+ * http://www.netlib.org/lapack/explore-html/d6/d0e/group__double_s_ysolve.html
+ */
+int dsysv(char UPLO, int N, int NRHS, double *A, int LDA, int *IPIV,
+		double *B, int LDB, double *WORK, int LWORK)
+{
+	extern void dsysv_(char *UPLO, int *Np, int *NRHSp, double *A,
+			int *LDAp, int *IPIV, double *B, int *LDBp,
+			double *WORK, int *LWORK, int *INFOp);
+	int INFO;
+	dsysv_(&UPLO, &N, &NRHS, A, &LDA, IPIV, B, &LDB, WORK, &LWORK, &INFO);
+	return INFO;
+}
author	Gertjan van den Burg <burg@ese.eur.nl>	2016-10-14 18:35:38 +0200
committer	Gertjan van den Burg <burg@ese.eur.nl>	2016-10-14 18:35:38 +0200
commit	e34123e1055c26d740148cefdb8d1b90208e424e (patch)
tree	51c62b010f4beddaa5cd8259fd420a433a8fd1b1 /src
parent	documentation fixes (diff)
download	gensvm-e34123e1055c26d740148cefdb8d1b90208e424e.tar.gz gensvm-e34123e1055c26d740148cefdb8d1b90208e424e.zip