From 5ba8b8652acd7756216552a38f4a07b049d74d4e Mon Sep 17 00:00:00 2001 From: Gertjan van den Burg Date: Tue, 15 Jan 2019 12:14:22 +0000 Subject: Move wrapper to better folder structure --- gensvm/core.py | 2 +- gensvm/cython_wrapper/wrapper.pxd | 136 +++++++++++++++++++++++ gensvm/cython_wrapper/wrapper.pyx | 223 ++++++++++++++++++++++++++++++++++++++ gensvm/gridsearch.py | 2 +- setup.py | 16 +-- src/wrapper.pxd | 136 ----------------------- src/wrapper.pyx | 223 -------------------------------------- 7 files changed, 367 insertions(+), 371 deletions(-) create mode 100644 gensvm/cython_wrapper/wrapper.pxd create mode 100644 gensvm/cython_wrapper/wrapper.pyx delete mode 100644 src/wrapper.pxd delete mode 100644 src/wrapper.pyx diff --git a/gensvm/core.py b/gensvm/core.py index a729bea..77a3a7f 100644 --- a/gensvm/core.py +++ b/gensvm/core.py @@ -18,7 +18,7 @@ from sklearn.utils import check_X_y, check_random_state from sklearn.utils.multiclass import type_of_target from sklearn.utils.validation import check_is_fitted -from . import wrapper +from .cython_wrapper import wrapper def _fit_gensvm(X, y, n_class, p, lmd, kappa, epsilon, weights, kernel, gamma, diff --git a/gensvm/cython_wrapper/wrapper.pxd b/gensvm/cython_wrapper/wrapper.pxd new file mode 100644 index 0000000..441c15b --- /dev/null +++ b/gensvm/cython_wrapper/wrapper.pxd @@ -0,0 +1,136 @@ +cimport numpy as np + +# Includes + +cdef extern from "gensvm_globals.h": + # Stuff for kerneltype + ctypedef enum KernelType: + pass + +cdef extern from "gensvm_sparse.h": + # stuff for GenSparse + + cdef struct GenSparse: + long nnz + long n_row + long n_col + double *values + long *ia + long *ja + + GenSparse *gensvm_init_sparse() + void gensvm_free_sparse(GenSparse *) + +cdef extern from "gensvm_base.h": + + cdef struct GenData: + long K + long n + long m + long r + long *y + double *Z + GenSparse *spZ + double *RAW + double *Sigma + KernelType kerneltype + double *kernelparam + + cdef struct GenModel: + int weight_idx + long K + long n + long m + double epsilon + double p + double kappa + double lmd + double *V + double *Vbar + double *U + double *UU + double *Q + double *H + double *rho + double training_error + KernelType kerneltype + double *kernelparam + double kernel_eigen_cutoff + + GenModel *gensvm_init_model() + void gensvm_free_model(GenModel *) + + GenData *gensvm_init_data() + void gensvm_free_data(GenData *) + + +cdef extern from "gensvm_task.h": + + cdef struct GenTask: + long ID + long folds + GenData *train_data + GenData *test_data + + KernelType kerneltype + int weight_idx + double p + double kappa + double lmd + double epsilon + double gamma + double coef + double degree + double max_iter + + double performance + double duration + long *predictions + + GenTask *gensvm_init_task() + gensvm_free_task(GenTask *) + +cdef extern from "gensvm_train.h": + + void gensvm_train(GenModel *, GenData *, GenModel *) nogil + +cdef extern from "gensvm_sv.h": + + long gensvm_num_sv(GenModel *) + +cdef extern from "gensvm_queue.h": + + cdef struct GenQueue: + GenTask **tasks + long N + long i + + GenQueue *gensvm_init_queue() + void gensvm_free_queue(GenQueue *) + +cdef extern from "gensvm_helper.c": + + ctypedef char* char_const_ptr "char const *" + void set_model(GenModel *, double, double, double, double, int, int, + double, double, double, double, long, long) + void set_seed_model(GenModel *, double, double, double, double, int, int, + double, double, double, double, long, long, char *, long, long) + void set_data(GenData *, char *, char *, np.npy_intp *, long) + void set_task(GenTask *, int, GenData *, int, double, double, double, + double, double, int, double, double, double, long) + char_const_ptr check_model(GenModel *) + void copy_V(void *, GenModel *) + long get_iter_count(GenModel *) + double get_training_error(GenModel *) + int get_status(GenModel *) + long get_n(GenModel *) + long get_m(GenModel *) + long get_K(GenModel *) + void free_data(GenData *) + void set_verbosity(int) + void gensvm_predict(char *, char *, long, long, long, char *) nogil + void gensvm_train_q_helper(GenQueue *, char *, int) nogil + void set_queue(GenQueue *, long, GenTask **) + double get_task_duration(GenTask *) + double get_task_performance(GenTask *) + void copy_task_predictions(GenTask *, char *, long) diff --git a/gensvm/cython_wrapper/wrapper.pyx b/gensvm/cython_wrapper/wrapper.pyx new file mode 100644 index 0000000..4ded637 --- /dev/null +++ b/gensvm/cython_wrapper/wrapper.pyx @@ -0,0 +1,223 @@ +""" +Wrapper for GenSVM + +Not implemented yet: + - vector of instance weights + - class weights + - seed model + - max_iter = -1 for unlimited + +""" + +from __future__ import print_function + +from libc.stdlib cimport malloc, free + +import numpy as np +cimport numpy as np + +cimport wrapper + +np.import_array() + +GENSVM_KERNEL_TYPES = ["linear", "poly", "rbf", "sigmoid"] + +def train_wrap( + np.ndarray[np.float64_t, ndim=2, mode='c'] X, + np.ndarray[np.int_t, ndim=1, mode='c'] y, + long n_class, + double p=1.0, + double lmd=pow(2, -8), + double kappa=0.0, + double epsilon=1e-6, + int weight_idx=1, + str kernel='linear', + double gamma=1.0, + double coef=0.0, + double degree=2.0, + double kernel_eigen_cutoff=1e-8, + int max_iter=100000000, + int random_seed=-1, + np.ndarray[np.float64_t, ndim=2, mode='c'] seed_V=None + ): + """ + """ + + # Initialize model and data + cdef GenModel *model = gensvm_init_model() + cdef GenData *data = gensvm_init_data() + cdef GenModel *seed_model = gensvm_init_model() + cdef long n_obs + cdef long n_var + + # get the kernel index + kernel_index = GENSVM_KERNEL_TYPES.index(kernel) + + # get the number of classes + n_obs = X.shape[0] + n_var = X.shape[1] + + # Set the data + set_data(data, X.data, y.data, X.shape, n_class) + + # Set the model + set_model(model, p, lmd, kappa, epsilon, weight_idx, kernel_index, degree, + gamma, coef, kernel_eigen_cutoff, max_iter, random_seed) + + if not seed_V is None: + set_seed_model(seed_model, p, lmd, kappa, epsilon, weight_idx, + kernel_index, degree, gamma, coef, kernel_eigen_cutoff, + max_iter, random_seed, seed_V.data, n_var, n_class) + else: + gensvm_free_model(seed_model) + seed_model = NULL + + # Check the parameters + error_msg = check_model(model) + if error_msg: + gensvm_free_model(model) + free_data(data) + error_repl = error_msg.decode('utf-8') + raise ValueError(error_repl) + + # Do the actual training + with nogil: + gensvm_train(model, data, seed_model) + + # update the number of variables (this may have changed due to kernel) + n_var = get_m(model) + + # copy the results + cdef np.ndarray[np.float64_t, ndim=2, mode='c'] V + V = np.empty((n_var+1, n_class-1)) + copy_V(V.data, model) + + # get other results from model + iter_count = get_iter_count(model) + training_error = get_training_error(model) + fit_status = get_status(model) + n_SV = gensvm_num_sv(model) + + # free model and data + gensvm_free_model(model); + free_data(data); + + return (V, n_SV, iter_count, training_error, fit_status) + + +def predict_wrap( + np.ndarray[np.float64_t, ndim=2, mode='c'] X, + np.ndarray[np.float64_t, ndim=2, mode='c'] V + ): + """ + """ + + cdef long n_test_obs + cdef long n_var + cdef long n_class + + n_test_obs = X.shape[0] + n_var = X.shape[1] + n_class = V.shape[1] + 1 + + # output vector + cdef np.ndarray[np.int_t, ndim=1, mode='c'] predictions + predictions = np.empty((n_test_obs, ), dtype=np.int) + + # do the prediction + with nogil: + gensvm_predict(X.data, V.data, n_test_obs, n_var, n_class, + predictions.data) + + return predictions + +def grid_wrap( + np.ndarray[np.float64_t, ndim=2, mode='c'] X, + np.ndarray[np.int_t, ndim=1, mode='c'] y, + candidate_params, + int store_predictions, + np.ndarray[np.int_t, ndim=1, mode='c'] cv_idx, + int n_folds, + ): + """ + """ + + cdef GenQueue *queue = gensvm_init_queue() + cdef GenData *data = gensvm_init_data() + cdef GenTask *task + cdef long n_obs + cdef long n_var + cdef long n_class + cdef long n_tasks = len(candidate_params) + + # get the number of classes + classes = np.unique(y) + n_obs = X.shape[0] + n_var = X.shape[1] + n_class = classes.shape[0] + + set_data(data, X.data, y.data, X.shape, n_class) + + cdef GenTask **tasks = malloc(n_tasks * sizeof(GenTask *)) + + ID = 0 + for candidate in candidate_params: + params = { + 'p': 1.0, + 'lmd': 1e-5, + 'kappa': 0.0, + 'epsilon': 1e-6, + 'weight_idx': 1, + 'kernel': GENSVM_KERNEL_TYPES.index('linear'), + 'gamma': 1.0, + 'coef': 0.0, + 'degree': 2.0, + 'max_iter': 1e8 + } + params.update(candidate) + if 'kernel' in candidate: + params['kernel'] = GENSVM_KERNEL_TYPES.index(candidate['kernel']) + if 'weights' in candidate: + params['weight_idx'] = 1 if candidate['weights'] == 'unit' else 2 + + task = gensvm_init_task() + set_task(task, ID, data, n_folds, params['p'], params['lmd'], + params['kappa'], params['epsilon'], params['weight_idx'], + params['kernel'], params['degree'], params['gamma'], + params['coef'], params['max_iter']) + + tasks[ID] = task + ID += 1 + + set_queue(queue, n_tasks, tasks) + + with nogil: + gensvm_train_q_helper(queue, cv_idx.data, store_predictions) + + cdef np.ndarray[np.int_t, ndim=1, mode='c'] pred + + results = dict() + results['params'] = [] + results['duration'] = [] + results['scores'] = [] + results['predictions'] = [] + for ID in range(n_tasks): + results['params'].append(candidate_params[ID]) + results['duration'].append(get_task_duration(tasks[ID])) + results['scores'].append(get_task_performance(tasks[ID])) + if store_predictions: + pred = np.zeros((n_obs, ), dtype=np.int) + copy_task_predictions(tasks[ID], pred.data, n_obs) + results['predictions'].append(pred.copy()) + + gensvm_free_queue(queue) + free_data(data) + + return results + + +def set_verbosity_wrap(int verbosity): + """ + Control verbosity of gensvm library + """ + set_verbosity(verbosity) diff --git a/gensvm/gridsearch.py b/gensvm/gridsearch.py index e49d3ce..d5ea31e 100644 --- a/gensvm/gridsearch.py +++ b/gensvm/gridsearch.py @@ -25,7 +25,7 @@ from sklearn.utils import check_X_y from sklearn.utils.multiclass import type_of_target from sklearn.utils.validation import indexable -from . import wrapper +from .cython_wrapper import wrapper from .core import GenSVM from .sklearn_util import (_skl_format_cv_results, _skl_check_scorers, _skl_check_is_fitted, _skl_grid_score) diff --git a/setup.py b/setup.py index 1d8a873..c70b81d 100644 --- a/setup.py +++ b/setup.py @@ -94,18 +94,14 @@ def configuration(): # Wrapper code in Cython uses the .pyx extension if we want to USE_CYTHON, # otherwise it ends in .c. - wrappers = [ - os.path.join('src', 'wrapper.pyx'), - ] - if not USE_CYTHON: - wrappers = [os.path.splitext(w)[0] + '.c' for w in wrappers] + wrapper_extension = "*.pyx" if USE_CYTHON else "*.c" # Sources include the C/Cython code from the wrapper and the source code of # the C library - gensvm_sources = wrappers[:] - gensvm_sources.append([ - os.path.join('src', 'gensvm', 'src', '*.c'), - ]) + gensvm_sources = [ + os.path.join("gensvm", "cython_wrapper", wrapper_extension), + os.path.join("src", "gensvm", "src", "*.c"), + ] # Dependencies are the header files of the C library and any potential # helper code between the library and the Cython code @@ -115,7 +111,7 @@ def configuration(): ] from numpy import get_include - config.add_extension('wrapper', + config.add_extension('cython_wrapper.wrapper', sources=gensvm_sources, libraries=cblas_libs, include_dirs=[ diff --git a/src/wrapper.pxd b/src/wrapper.pxd deleted file mode 100644 index 441c15b..0000000 --- a/src/wrapper.pxd +++ /dev/null @@ -1,136 +0,0 @@ -cimport numpy as np - -# Includes - -cdef extern from "gensvm_globals.h": - # Stuff for kerneltype - ctypedef enum KernelType: - pass - -cdef extern from "gensvm_sparse.h": - # stuff for GenSparse - - cdef struct GenSparse: - long nnz - long n_row - long n_col - double *values - long *ia - long *ja - - GenSparse *gensvm_init_sparse() - void gensvm_free_sparse(GenSparse *) - -cdef extern from "gensvm_base.h": - - cdef struct GenData: - long K - long n - long m - long r - long *y - double *Z - GenSparse *spZ - double *RAW - double *Sigma - KernelType kerneltype - double *kernelparam - - cdef struct GenModel: - int weight_idx - long K - long n - long m - double epsilon - double p - double kappa - double lmd - double *V - double *Vbar - double *U - double *UU - double *Q - double *H - double *rho - double training_error - KernelType kerneltype - double *kernelparam - double kernel_eigen_cutoff - - GenModel *gensvm_init_model() - void gensvm_free_model(GenModel *) - - GenData *gensvm_init_data() - void gensvm_free_data(GenData *) - - -cdef extern from "gensvm_task.h": - - cdef struct GenTask: - long ID - long folds - GenData *train_data - GenData *test_data - - KernelType kerneltype - int weight_idx - double p - double kappa - double lmd - double epsilon - double gamma - double coef - double degree - double max_iter - - double performance - double duration - long *predictions - - GenTask *gensvm_init_task() - gensvm_free_task(GenTask *) - -cdef extern from "gensvm_train.h": - - void gensvm_train(GenModel *, GenData *, GenModel *) nogil - -cdef extern from "gensvm_sv.h": - - long gensvm_num_sv(GenModel *) - -cdef extern from "gensvm_queue.h": - - cdef struct GenQueue: - GenTask **tasks - long N - long i - - GenQueue *gensvm_init_queue() - void gensvm_free_queue(GenQueue *) - -cdef extern from "gensvm_helper.c": - - ctypedef char* char_const_ptr "char const *" - void set_model(GenModel *, double, double, double, double, int, int, - double, double, double, double, long, long) - void set_seed_model(GenModel *, double, double, double, double, int, int, - double, double, double, double, long, long, char *, long, long) - void set_data(GenData *, char *, char *, np.npy_intp *, long) - void set_task(GenTask *, int, GenData *, int, double, double, double, - double, double, int, double, double, double, long) - char_const_ptr check_model(GenModel *) - void copy_V(void *, GenModel *) - long get_iter_count(GenModel *) - double get_training_error(GenModel *) - int get_status(GenModel *) - long get_n(GenModel *) - long get_m(GenModel *) - long get_K(GenModel *) - void free_data(GenData *) - void set_verbosity(int) - void gensvm_predict(char *, char *, long, long, long, char *) nogil - void gensvm_train_q_helper(GenQueue *, char *, int) nogil - void set_queue(GenQueue *, long, GenTask **) - double get_task_duration(GenTask *) - double get_task_performance(GenTask *) - void copy_task_predictions(GenTask *, char *, long) diff --git a/src/wrapper.pyx b/src/wrapper.pyx deleted file mode 100644 index 4ded637..0000000 --- a/src/wrapper.pyx +++ /dev/null @@ -1,223 +0,0 @@ -""" -Wrapper for GenSVM - -Not implemented yet: - - vector of instance weights - - class weights - - seed model - - max_iter = -1 for unlimited - -""" - -from __future__ import print_function - -from libc.stdlib cimport malloc, free - -import numpy as np -cimport numpy as np - -cimport wrapper - -np.import_array() - -GENSVM_KERNEL_TYPES = ["linear", "poly", "rbf", "sigmoid"] - -def train_wrap( - np.ndarray[np.float64_t, ndim=2, mode='c'] X, - np.ndarray[np.int_t, ndim=1, mode='c'] y, - long n_class, - double p=1.0, - double lmd=pow(2, -8), - double kappa=0.0, - double epsilon=1e-6, - int weight_idx=1, - str kernel='linear', - double gamma=1.0, - double coef=0.0, - double degree=2.0, - double kernel_eigen_cutoff=1e-8, - int max_iter=100000000, - int random_seed=-1, - np.ndarray[np.float64_t, ndim=2, mode='c'] seed_V=None - ): - """ - """ - - # Initialize model and data - cdef GenModel *model = gensvm_init_model() - cdef GenData *data = gensvm_init_data() - cdef GenModel *seed_model = gensvm_init_model() - cdef long n_obs - cdef long n_var - - # get the kernel index - kernel_index = GENSVM_KERNEL_TYPES.index(kernel) - - # get the number of classes - n_obs = X.shape[0] - n_var = X.shape[1] - - # Set the data - set_data(data, X.data, y.data, X.shape, n_class) - - # Set the model - set_model(model, p, lmd, kappa, epsilon, weight_idx, kernel_index, degree, - gamma, coef, kernel_eigen_cutoff, max_iter, random_seed) - - if not seed_V is None: - set_seed_model(seed_model, p, lmd, kappa, epsilon, weight_idx, - kernel_index, degree, gamma, coef, kernel_eigen_cutoff, - max_iter, random_seed, seed_V.data, n_var, n_class) - else: - gensvm_free_model(seed_model) - seed_model = NULL - - # Check the parameters - error_msg = check_model(model) - if error_msg: - gensvm_free_model(model) - free_data(data) - error_repl = error_msg.decode('utf-8') - raise ValueError(error_repl) - - # Do the actual training - with nogil: - gensvm_train(model, data, seed_model) - - # update the number of variables (this may have changed due to kernel) - n_var = get_m(model) - - # copy the results - cdef np.ndarray[np.float64_t, ndim=2, mode='c'] V - V = np.empty((n_var+1, n_class-1)) - copy_V(V.data, model) - - # get other results from model - iter_count = get_iter_count(model) - training_error = get_training_error(model) - fit_status = get_status(model) - n_SV = gensvm_num_sv(model) - - # free model and data - gensvm_free_model(model); - free_data(data); - - return (V, n_SV, iter_count, training_error, fit_status) - - -def predict_wrap( - np.ndarray[np.float64_t, ndim=2, mode='c'] X, - np.ndarray[np.float64_t, ndim=2, mode='c'] V - ): - """ - """ - - cdef long n_test_obs - cdef long n_var - cdef long n_class - - n_test_obs = X.shape[0] - n_var = X.shape[1] - n_class = V.shape[1] + 1 - - # output vector - cdef np.ndarray[np.int_t, ndim=1, mode='c'] predictions - predictions = np.empty((n_test_obs, ), dtype=np.int) - - # do the prediction - with nogil: - gensvm_predict(X.data, V.data, n_test_obs, n_var, n_class, - predictions.data) - - return predictions - -def grid_wrap( - np.ndarray[np.float64_t, ndim=2, mode='c'] X, - np.ndarray[np.int_t, ndim=1, mode='c'] y, - candidate_params, - int store_predictions, - np.ndarray[np.int_t, ndim=1, mode='c'] cv_idx, - int n_folds, - ): - """ - """ - - cdef GenQueue *queue = gensvm_init_queue() - cdef GenData *data = gensvm_init_data() - cdef GenTask *task - cdef long n_obs - cdef long n_var - cdef long n_class - cdef long n_tasks = len(candidate_params) - - # get the number of classes - classes = np.unique(y) - n_obs = X.shape[0] - n_var = X.shape[1] - n_class = classes.shape[0] - - set_data(data, X.data, y.data, X.shape, n_class) - - cdef GenTask **tasks = malloc(n_tasks * sizeof(GenTask *)) - - ID = 0 - for candidate in candidate_params: - params = { - 'p': 1.0, - 'lmd': 1e-5, - 'kappa': 0.0, - 'epsilon': 1e-6, - 'weight_idx': 1, - 'kernel': GENSVM_KERNEL_TYPES.index('linear'), - 'gamma': 1.0, - 'coef': 0.0, - 'degree': 2.0, - 'max_iter': 1e8 - } - params.update(candidate) - if 'kernel' in candidate: - params['kernel'] = GENSVM_KERNEL_TYPES.index(candidate['kernel']) - if 'weights' in candidate: - params['weight_idx'] = 1 if candidate['weights'] == 'unit' else 2 - - task = gensvm_init_task() - set_task(task, ID, data, n_folds, params['p'], params['lmd'], - params['kappa'], params['epsilon'], params['weight_idx'], - params['kernel'], params['degree'], params['gamma'], - params['coef'], params['max_iter']) - - tasks[ID] = task - ID += 1 - - set_queue(queue, n_tasks, tasks) - - with nogil: - gensvm_train_q_helper(queue, cv_idx.data, store_predictions) - - cdef np.ndarray[np.int_t, ndim=1, mode='c'] pred - - results = dict() - results['params'] = [] - results['duration'] = [] - results['scores'] = [] - results['predictions'] = [] - for ID in range(n_tasks): - results['params'].append(candidate_params[ID]) - results['duration'].append(get_task_duration(tasks[ID])) - results['scores'].append(get_task_performance(tasks[ID])) - if store_predictions: - pred = np.zeros((n_obs, ), dtype=np.int) - copy_task_predictions(tasks[ID], pred.data, n_obs) - results['predictions'].append(pred.copy()) - - gensvm_free_queue(queue) - free_data(data) - - return results - - -def set_verbosity_wrap(int verbosity): - """ - Control verbosity of gensvm library - """ - set_verbosity(verbosity) -- cgit v1.2.3