aboutsummaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorGertjan van den Burg <gertjanvandenburg@gmail.com>2017-12-12 20:18:28 -0500
committerGertjan van den Burg <gertjanvandenburg@gmail.com>2017-12-12 20:18:28 -0500
commit7ed6c4ac3ea5c409c073f1db3e62d989ffe5f351 (patch)
tree574f193b67438ba739be0f41af0d89bb0fa56a2c /src
parentupdate library for python package (diff)
downloadpygensvm-7ed6c4ac3ea5c409c073f1db3e62d989ffe5f351.tar.gz
pygensvm-7ed6c4ac3ea5c409c073f1db3e62d989ffe5f351.zip
added gridsearch and extended gensvm class
Diffstat (limited to 'src')
-rw-r--r--src/wrapper.pxd47
-rw-r--r--src/wrapper.pyx110
2 files changed, 151 insertions, 6 deletions
diff --git a/src/wrapper.pxd b/src/wrapper.pxd
index be4d5f5..441c15b 100644
--- a/src/wrapper.pxd
+++ b/src/wrapper.pxd
@@ -21,7 +21,6 @@ cdef extern from "gensvm_sparse.h":
GenSparse *gensvm_init_sparse()
void gensvm_free_sparse(GenSparse *)
-
cdef extern from "gensvm_base.h":
cdef struct GenData:
@@ -64,6 +63,33 @@ cdef extern from "gensvm_base.h":
GenData *gensvm_init_data()
void gensvm_free_data(GenData *)
+
+cdef extern from "gensvm_task.h":
+
+ cdef struct GenTask:
+ long ID
+ long folds
+ GenData *train_data
+ GenData *test_data
+
+ KernelType kerneltype
+ int weight_idx
+ double p
+ double kappa
+ double lmd
+ double epsilon
+ double gamma
+ double coef
+ double degree
+ double max_iter
+
+ double performance
+ double duration
+ long *predictions
+
+ GenTask *gensvm_init_task()
+ gensvm_free_task(GenTask *)
+
cdef extern from "gensvm_train.h":
void gensvm_train(GenModel *, GenData *, GenModel *) nogil
@@ -72,12 +98,26 @@ cdef extern from "gensvm_sv.h":
long gensvm_num_sv(GenModel *)
+cdef extern from "gensvm_queue.h":
+
+ cdef struct GenQueue:
+ GenTask **tasks
+ long N
+ long i
+
+ GenQueue *gensvm_init_queue()
+ void gensvm_free_queue(GenQueue *)
+
cdef extern from "gensvm_helper.c":
ctypedef char* char_const_ptr "char const *"
void set_model(GenModel *, double, double, double, double, int, int,
double, double, double, double, long, long)
+ void set_seed_model(GenModel *, double, double, double, double, int, int,
+ double, double, double, double, long, long, char *, long, long)
void set_data(GenData *, char *, char *, np.npy_intp *, long)
+ void set_task(GenTask *, int, GenData *, int, double, double, double,
+ double, double, int, double, double, double, long)
char_const_ptr check_model(GenModel *)
void copy_V(void *, GenModel *)
long get_iter_count(GenModel *)
@@ -89,3 +129,8 @@ cdef extern from "gensvm_helper.c":
void free_data(GenData *)
void set_verbosity(int)
void gensvm_predict(char *, char *, long, long, long, char *) nogil
+ void gensvm_train_q_helper(GenQueue *, char *, int) nogil
+ void set_queue(GenQueue *, long, GenTask **)
+ double get_task_duration(GenTask *)
+ double get_task_performance(GenTask *)
+ void copy_task_predictions(GenTask *, char *, long)
diff --git a/src/wrapper.pyx b/src/wrapper.pyx
index 1d84b59..4ded637 100644
--- a/src/wrapper.pyx
+++ b/src/wrapper.pyx
@@ -11,6 +11,8 @@ Not implemented yet:
from __future__ import print_function
+from libc.stdlib cimport malloc, free
+
import numpy as np
cimport numpy as np
@@ -23,6 +25,7 @@ GENSVM_KERNEL_TYPES = ["linear", "poly", "rbf", "sigmoid"]
def train_wrap(
np.ndarray[np.float64_t, ndim=2, mode='c'] X,
np.ndarray[np.int_t, ndim=1, mode='c'] y,
+ long n_class,
double p=1.0,
double lmd=pow(2, -8),
double kappa=0.0,
@@ -34,25 +37,25 @@ def train_wrap(
double degree=2.0,
double kernel_eigen_cutoff=1e-8,
int max_iter=100000000,
- int random_seed=-1):
+ int random_seed=-1,
+ np.ndarray[np.float64_t, ndim=2, mode='c'] seed_V=None
+ ):
"""
"""
# Initialize model and data
cdef GenModel *model = gensvm_init_model()
cdef GenData *data = gensvm_init_data()
+ cdef GenModel *seed_model = gensvm_init_model()
cdef long n_obs
cdef long n_var
- cdef long n_class
# get the kernel index
kernel_index = GENSVM_KERNEL_TYPES.index(kernel)
# get the number of classes
- classes = np.unique(y)
n_obs = X.shape[0]
n_var = X.shape[1]
- n_class = classes.shape[0]
# Set the data
set_data(data, X.data, y.data, X.shape, n_class)
@@ -61,6 +64,14 @@ def train_wrap(
set_model(model, p, lmd, kappa, epsilon, weight_idx, kernel_index, degree,
gamma, coef, kernel_eigen_cutoff, max_iter, random_seed)
+ if not seed_V is None:
+ set_seed_model(seed_model, p, lmd, kappa, epsilon, weight_idx,
+ kernel_index, degree, gamma, coef, kernel_eigen_cutoff,
+ max_iter, random_seed, seed_V.data, n_var, n_class)
+ else:
+ gensvm_free_model(seed_model)
+ seed_model = NULL
+
# Check the parameters
error_msg = check_model(model)
if error_msg:
@@ -71,7 +82,10 @@ def train_wrap(
# Do the actual training
with nogil:
- gensvm_train(model, data, NULL)
+ gensvm_train(model, data, seed_model)
+
+ # update the number of variables (this may have changed due to kernel)
+ n_var = get_m(model)
# copy the results
cdef np.ndarray[np.float64_t, ndim=2, mode='c'] V
@@ -90,6 +104,7 @@ def train_wrap(
return (V, n_SV, iter_count, training_error, fit_status)
+
def predict_wrap(
np.ndarray[np.float64_t, ndim=2, mode='c'] X,
np.ndarray[np.float64_t, ndim=2, mode='c'] V
@@ -116,6 +131,91 @@ def predict_wrap(
return predictions
+def grid_wrap(
+ np.ndarray[np.float64_t, ndim=2, mode='c'] X,
+ np.ndarray[np.int_t, ndim=1, mode='c'] y,
+ candidate_params,
+ int store_predictions,
+ np.ndarray[np.int_t, ndim=1, mode='c'] cv_idx,
+ int n_folds,
+ ):
+ """
+ """
+
+ cdef GenQueue *queue = gensvm_init_queue()
+ cdef GenData *data = gensvm_init_data()
+ cdef GenTask *task
+ cdef long n_obs
+ cdef long n_var
+ cdef long n_class
+ cdef long n_tasks = len(candidate_params)
+
+ # get the number of classes
+ classes = np.unique(y)
+ n_obs = X.shape[0]
+ n_var = X.shape[1]
+ n_class = classes.shape[0]
+
+ set_data(data, X.data, y.data, X.shape, n_class)
+
+ cdef GenTask **tasks = <GenTask **>malloc(n_tasks * sizeof(GenTask *))
+
+ ID = 0
+ for candidate in candidate_params:
+ params = {
+ 'p': 1.0,
+ 'lmd': 1e-5,
+ 'kappa': 0.0,
+ 'epsilon': 1e-6,
+ 'weight_idx': 1,
+ 'kernel': GENSVM_KERNEL_TYPES.index('linear'),
+ 'gamma': 1.0,
+ 'coef': 0.0,
+ 'degree': 2.0,
+ 'max_iter': 1e8
+ }
+ params.update(candidate)
+ if 'kernel' in candidate:
+ params['kernel'] = GENSVM_KERNEL_TYPES.index(candidate['kernel'])
+ if 'weights' in candidate:
+ params['weight_idx'] = 1 if candidate['weights'] == 'unit' else 2
+
+ task = gensvm_init_task()
+ set_task(task, ID, data, n_folds, params['p'], params['lmd'],
+ params['kappa'], params['epsilon'], params['weight_idx'],
+ params['kernel'], params['degree'], params['gamma'],
+ params['coef'], params['max_iter'])
+
+ tasks[ID] = task
+ ID += 1
+
+ set_queue(queue, n_tasks, tasks)
+
+ with nogil:
+ gensvm_train_q_helper(queue, cv_idx.data, store_predictions)
+
+ cdef np.ndarray[np.int_t, ndim=1, mode='c'] pred
+
+ results = dict()
+ results['params'] = []
+ results['duration'] = []
+ results['scores'] = []
+ results['predictions'] = []
+ for ID in range(n_tasks):
+ results['params'].append(candidate_params[ID])
+ results['duration'].append(get_task_duration(tasks[ID]))
+ results['scores'].append(get_task_performance(tasks[ID]))
+ if store_predictions:
+ pred = np.zeros((n_obs, ), dtype=np.int)
+ copy_task_predictions(tasks[ID], pred.data, n_obs)
+ results['predictions'].append(pred.copy())
+
+ gensvm_free_queue(queue)
+ free_data(data)
+
+ return results
+
+
def set_verbosity_wrap(int verbosity):
"""
Control verbosity of gensvm library