diff options
| author | Gertjan van den Burg <gertjanvandenburg@gmail.com> | 2017-12-12 20:18:28 -0500 |
|---|---|---|
| committer | Gertjan van den Burg <gertjanvandenburg@gmail.com> | 2017-12-12 20:18:28 -0500 |
| commit | 7ed6c4ac3ea5c409c073f1db3e62d989ffe5f351 (patch) | |
| tree | 574f193b67438ba739be0f41af0d89bb0fa56a2c /src | |
| parent | update library for python package (diff) | |
| download | pygensvm-7ed6c4ac3ea5c409c073f1db3e62d989ffe5f351.tar.gz pygensvm-7ed6c4ac3ea5c409c073f1db3e62d989ffe5f351.zip | |
added gridsearch and extended gensvm class
Diffstat (limited to 'src')
| -rw-r--r-- | src/wrapper.pxd | 47 | ||||
| -rw-r--r-- | src/wrapper.pyx | 110 |
2 files changed, 151 insertions, 6 deletions
diff --git a/src/wrapper.pxd b/src/wrapper.pxd index be4d5f5..441c15b 100644 --- a/src/wrapper.pxd +++ b/src/wrapper.pxd @@ -21,7 +21,6 @@ cdef extern from "gensvm_sparse.h": GenSparse *gensvm_init_sparse() void gensvm_free_sparse(GenSparse *) - cdef extern from "gensvm_base.h": cdef struct GenData: @@ -64,6 +63,33 @@ cdef extern from "gensvm_base.h": GenData *gensvm_init_data() void gensvm_free_data(GenData *) + +cdef extern from "gensvm_task.h": + + cdef struct GenTask: + long ID + long folds + GenData *train_data + GenData *test_data + + KernelType kerneltype + int weight_idx + double p + double kappa + double lmd + double epsilon + double gamma + double coef + double degree + double max_iter + + double performance + double duration + long *predictions + + GenTask *gensvm_init_task() + gensvm_free_task(GenTask *) + cdef extern from "gensvm_train.h": void gensvm_train(GenModel *, GenData *, GenModel *) nogil @@ -72,12 +98,26 @@ cdef extern from "gensvm_sv.h": long gensvm_num_sv(GenModel *) +cdef extern from "gensvm_queue.h": + + cdef struct GenQueue: + GenTask **tasks + long N + long i + + GenQueue *gensvm_init_queue() + void gensvm_free_queue(GenQueue *) + cdef extern from "gensvm_helper.c": ctypedef char* char_const_ptr "char const *" void set_model(GenModel *, double, double, double, double, int, int, double, double, double, double, long, long) + void set_seed_model(GenModel *, double, double, double, double, int, int, + double, double, double, double, long, long, char *, long, long) void set_data(GenData *, char *, char *, np.npy_intp *, long) + void set_task(GenTask *, int, GenData *, int, double, double, double, + double, double, int, double, double, double, long) char_const_ptr check_model(GenModel *) void copy_V(void *, GenModel *) long get_iter_count(GenModel *) @@ -89,3 +129,8 @@ cdef extern from "gensvm_helper.c": void free_data(GenData *) void set_verbosity(int) void gensvm_predict(char *, char *, long, long, long, char *) nogil + void gensvm_train_q_helper(GenQueue *, char *, int) nogil + void set_queue(GenQueue *, long, GenTask **) + double get_task_duration(GenTask *) + double get_task_performance(GenTask *) + void copy_task_predictions(GenTask *, char *, long) diff --git a/src/wrapper.pyx b/src/wrapper.pyx index 1d84b59..4ded637 100644 --- a/src/wrapper.pyx +++ b/src/wrapper.pyx @@ -11,6 +11,8 @@ Not implemented yet: from __future__ import print_function +from libc.stdlib cimport malloc, free + import numpy as np cimport numpy as np @@ -23,6 +25,7 @@ GENSVM_KERNEL_TYPES = ["linear", "poly", "rbf", "sigmoid"] def train_wrap( np.ndarray[np.float64_t, ndim=2, mode='c'] X, np.ndarray[np.int_t, ndim=1, mode='c'] y, + long n_class, double p=1.0, double lmd=pow(2, -8), double kappa=0.0, @@ -34,25 +37,25 @@ def train_wrap( double degree=2.0, double kernel_eigen_cutoff=1e-8, int max_iter=100000000, - int random_seed=-1): + int random_seed=-1, + np.ndarray[np.float64_t, ndim=2, mode='c'] seed_V=None + ): """ """ # Initialize model and data cdef GenModel *model = gensvm_init_model() cdef GenData *data = gensvm_init_data() + cdef GenModel *seed_model = gensvm_init_model() cdef long n_obs cdef long n_var - cdef long n_class # get the kernel index kernel_index = GENSVM_KERNEL_TYPES.index(kernel) # get the number of classes - classes = np.unique(y) n_obs = X.shape[0] n_var = X.shape[1] - n_class = classes.shape[0] # Set the data set_data(data, X.data, y.data, X.shape, n_class) @@ -61,6 +64,14 @@ def train_wrap( set_model(model, p, lmd, kappa, epsilon, weight_idx, kernel_index, degree, gamma, coef, kernel_eigen_cutoff, max_iter, random_seed) + if not seed_V is None: + set_seed_model(seed_model, p, lmd, kappa, epsilon, weight_idx, + kernel_index, degree, gamma, coef, kernel_eigen_cutoff, + max_iter, random_seed, seed_V.data, n_var, n_class) + else: + gensvm_free_model(seed_model) + seed_model = NULL + # Check the parameters error_msg = check_model(model) if error_msg: @@ -71,7 +82,10 @@ def train_wrap( # Do the actual training with nogil: - gensvm_train(model, data, NULL) + gensvm_train(model, data, seed_model) + + # update the number of variables (this may have changed due to kernel) + n_var = get_m(model) # copy the results cdef np.ndarray[np.float64_t, ndim=2, mode='c'] V @@ -90,6 +104,7 @@ def train_wrap( return (V, n_SV, iter_count, training_error, fit_status) + def predict_wrap( np.ndarray[np.float64_t, ndim=2, mode='c'] X, np.ndarray[np.float64_t, ndim=2, mode='c'] V @@ -116,6 +131,91 @@ def predict_wrap( return predictions +def grid_wrap( + np.ndarray[np.float64_t, ndim=2, mode='c'] X, + np.ndarray[np.int_t, ndim=1, mode='c'] y, + candidate_params, + int store_predictions, + np.ndarray[np.int_t, ndim=1, mode='c'] cv_idx, + int n_folds, + ): + """ + """ + + cdef GenQueue *queue = gensvm_init_queue() + cdef GenData *data = gensvm_init_data() + cdef GenTask *task + cdef long n_obs + cdef long n_var + cdef long n_class + cdef long n_tasks = len(candidate_params) + + # get the number of classes + classes = np.unique(y) + n_obs = X.shape[0] + n_var = X.shape[1] + n_class = classes.shape[0] + + set_data(data, X.data, y.data, X.shape, n_class) + + cdef GenTask **tasks = <GenTask **>malloc(n_tasks * sizeof(GenTask *)) + + ID = 0 + for candidate in candidate_params: + params = { + 'p': 1.0, + 'lmd': 1e-5, + 'kappa': 0.0, + 'epsilon': 1e-6, + 'weight_idx': 1, + 'kernel': GENSVM_KERNEL_TYPES.index('linear'), + 'gamma': 1.0, + 'coef': 0.0, + 'degree': 2.0, + 'max_iter': 1e8 + } + params.update(candidate) + if 'kernel' in candidate: + params['kernel'] = GENSVM_KERNEL_TYPES.index(candidate['kernel']) + if 'weights' in candidate: + params['weight_idx'] = 1 if candidate['weights'] == 'unit' else 2 + + task = gensvm_init_task() + set_task(task, ID, data, n_folds, params['p'], params['lmd'], + params['kappa'], params['epsilon'], params['weight_idx'], + params['kernel'], params['degree'], params['gamma'], + params['coef'], params['max_iter']) + + tasks[ID] = task + ID += 1 + + set_queue(queue, n_tasks, tasks) + + with nogil: + gensvm_train_q_helper(queue, cv_idx.data, store_predictions) + + cdef np.ndarray[np.int_t, ndim=1, mode='c'] pred + + results = dict() + results['params'] = [] + results['duration'] = [] + results['scores'] = [] + results['predictions'] = [] + for ID in range(n_tasks): + results['params'].append(candidate_params[ID]) + results['duration'].append(get_task_duration(tasks[ID])) + results['scores'].append(get_task_performance(tasks[ID])) + if store_predictions: + pred = np.zeros((n_obs, ), dtype=np.int) + copy_task_predictions(tasks[ID], pred.data, n_obs) + results['predictions'].append(pred.copy()) + + gensvm_free_queue(queue) + free_data(data) + + return results + + def set_verbosity_wrap(int verbosity): """ Control verbosity of gensvm library |
