diff options
| author | Gertjan van den Burg <burg@ese.eur.nl> | 2016-12-07 16:39:55 +0100 |
|---|---|---|
| committer | Gertjan van den Burg <burg@ese.eur.nl> | 2016-12-07 16:39:55 +0100 |
| commit | 918d463103215207b9d9975bb6c0ea75754da6f9 (patch) | |
| tree | e3e466a3a281e223700d1ce2b06c3e5e0f99e935 /src/gensvm_checks.c | |
| parent | throw warning when using sparse matrices with kernels (diff) | |
| download | gensvm-918d463103215207b9d9975bb6c0ea75754da6f9.tar.gz gensvm-918d463103215207b9d9975bb6c0ea75754da6f9.zip | |
moved check for class labels to seperate module
Diffstat (limited to 'src/gensvm_checks.c')
| -rw-r--r-- | src/gensvm_checks.c | 77 |
1 files changed, 77 insertions, 0 deletions
diff --git a/src/gensvm_checks.c b/src/gensvm_checks.c new file mode 100644 index 0000000..0f7c499 --- /dev/null +++ b/src/gensvm_checks.c @@ -0,0 +1,77 @@ +/** + * @file gensvm_checks.c + * @author G.J.J. van den Burg + * @date 2016-12-07 + * @brief Sanity checks used to ensure inputs are as expected + * + * @copyright + Copyright 2016, G.J.J. van den Burg. + + This file is part of GenSVM. + + GenSVM is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + GenSVM is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with GenSVM. If not, see <http://www.gnu.org/licenses/>. + + */ + +#include "gensvm_checks.h" + +/** + * @brief Check if the labels are contiguous on [1 .. K] + * + * @details + * The GenSVM library currently requires that the labels that are supplied in + * a dataset are contigous on the interval [1 .. K] and have no gaps. This is + * required because the dimensionality of the problem is directly related to + * the maximum class label K. This function checks if the labels are indeed in + * the desired range. + * + * @param[in] data a GenData struct with the current data + * + * @return whether the labels are contiguous or not + */ +bool gensvm_check_outcome_contiguous(struct GenData *data) +{ + bool in_uniq, is_contiguous = true; + long i, j, K = 1; + long max_y = -1, + min_y = LONG_MAX; + long *uniq_y = Calloc(long, K); + uniq_y[0] = data->y[0]; + + for (i=1; i<data->n; i++) { + in_uniq = false; + for (j=0; j<K; j++) { + if (uniq_y[j] == data->y[i]) { + in_uniq = true; + break; + } + } + + if (!in_uniq) { + uniq_y = Realloc(uniq_y, long, K+1); + uniq_y[K++] = data->y[i]; + } + + max_y = maximum(max_y, data->y[i]); + min_y = minimum(min_y, data->y[i]); + } + + if (min_y < 1 || max_y > K) { + is_contiguous = false; + } + + free(uniq_y); + + return is_contiguous; +} |
