From 3e269c1c6369af3ffbae031d096c29cb9f0a1e76 Mon Sep 17 00:00:00 2001 From: Gertjan van den Burg Date: Sat, 7 Oct 2017 15:35:44 +0200 Subject: rearrange and update setup.py --- .gitmodules | 2 +- Makefile | 2 +- gensvm/pyx_gensvm.pxd | 91 ------------------------------------- gensvm/pyx_gensvm.pyx | 123 -------------------------------------------------- gensvm/src/gensvm | 1 - setup.py | 56 +++++++++++++++++------ src/gensvm | 1 + src/pyx_gensvm.pxd | 91 +++++++++++++++++++++++++++++++++++++ src/pyx_gensvm.pyx | 123 ++++++++++++++++++++++++++++++++++++++++++++++++++ 9 files changed, 260 insertions(+), 230 deletions(-) delete mode 100644 gensvm/pyx_gensvm.pxd delete mode 100644 gensvm/pyx_gensvm.pyx delete mode 160000 gensvm/src/gensvm create mode 160000 src/gensvm create mode 100644 src/pyx_gensvm.pxd create mode 100644 src/pyx_gensvm.pyx diff --git a/.gitmodules b/.gitmodules index 130a83c..084eddf 100644 --- a/.gitmodules +++ b/.gitmodules @@ -1,3 +1,3 @@ [submodule "gensvm/src/gensvm"] - path = gensvm/src/gensvm + path = src/gensvm url = https://github.com/GjjvdBurg/GenSVM diff --git a/Makefile b/Makefile index cb8022d..8d8bb6c 100644 --- a/Makefile +++ b/Makefile @@ -28,7 +28,7 @@ install2: ## Install for the current user using the python2 command python2 setup.py build_ext --inplace python2 setup.py install --user -test: develop ## Run nosetests using the default nosetests command +test: in ## Run nosetests using the default nosetests command nosetests -v test2: develop2 ## Run nosetests using the nosetests2 command diff --git a/gensvm/pyx_gensvm.pxd b/gensvm/pyx_gensvm.pxd deleted file mode 100644 index be4d5f5..0000000 --- a/gensvm/pyx_gensvm.pxd +++ /dev/null @@ -1,91 +0,0 @@ -cimport numpy as np - -# Includes - -cdef extern from "gensvm_globals.h": - # Stuff for kerneltype - ctypedef enum KernelType: - pass - -cdef extern from "gensvm_sparse.h": - # stuff for GenSparse - - cdef struct GenSparse: - long nnz - long n_row - long n_col - double *values - long *ia - long *ja - - GenSparse *gensvm_init_sparse() - void gensvm_free_sparse(GenSparse *) - - -cdef extern from "gensvm_base.h": - - cdef struct GenData: - long K - long n - long m - long r - long *y - double *Z - GenSparse *spZ - double *RAW - double *Sigma - KernelType kerneltype - double *kernelparam - - cdef struct GenModel: - int weight_idx - long K - long n - long m - double epsilon - double p - double kappa - double lmd - double *V - double *Vbar - double *U - double *UU - double *Q - double *H - double *rho - double training_error - KernelType kerneltype - double *kernelparam - double kernel_eigen_cutoff - - GenModel *gensvm_init_model() - void gensvm_free_model(GenModel *) - - GenData *gensvm_init_data() - void gensvm_free_data(GenData *) - -cdef extern from "gensvm_train.h": - - void gensvm_train(GenModel *, GenData *, GenModel *) nogil - -cdef extern from "gensvm_sv.h": - - long gensvm_num_sv(GenModel *) - -cdef extern from "gensvm_helper.c": - - ctypedef char* char_const_ptr "char const *" - void set_model(GenModel *, double, double, double, double, int, int, - double, double, double, double, long, long) - void set_data(GenData *, char *, char *, np.npy_intp *, long) - char_const_ptr check_model(GenModel *) - void copy_V(void *, GenModel *) - long get_iter_count(GenModel *) - double get_training_error(GenModel *) - int get_status(GenModel *) - long get_n(GenModel *) - long get_m(GenModel *) - long get_K(GenModel *) - void free_data(GenData *) - void set_verbosity(int) - void gensvm_predict(char *, char *, long, long, long, char *) nogil diff --git a/gensvm/pyx_gensvm.pyx b/gensvm/pyx_gensvm.pyx deleted file mode 100644 index 394d4ca..0000000 --- a/gensvm/pyx_gensvm.pyx +++ /dev/null @@ -1,123 +0,0 @@ -""" -Wrapper for GenSVM - -Not implemented yet: - - vector of instance weights - - class weights - - seed model - - max_iter = -1 for unlimited - -""" - -from __future__ import print_function - -import numpy as np -cimport numpy as np - -cimport pyx_gensvm - -np.import_array() - -GENSVM_KERNEL_TYPES = ["linear", "poly", "rbf", "sigmoid"] - -def train_wrap( - np.ndarray[np.float64_t, ndim=2, mode='c'] X, - np.ndarray[np.int_t, ndim=1, mode='c'] y, - double p=1.0, - double lmd=pow(2, -8), - double kappa=0.0, - double epsilon=1e-6, - int weight_idx=1, - str kernel='linear', - double gamma=1.0, - double coef=0.0, - double degree=2.0, - double kernel_eigen_cutoff=1e-8, - int max_iter=100000000, - int random_seed=-1): - """ - """ - - # Initialize model and data - cdef GenModel *model = gensvm_init_model() - cdef GenData *data = gensvm_init_data() - cdef long n_obs - cdef long n_var - cdef long n_class - - # get the kernel index - kernel_index = GENSVM_KERNEL_TYPES.index(kernel) - - # get the number of classes - classes = np.unique(y) - n_obs = X.shape[0] - n_var = X.shape[1] - n_class = classes.shape[0] - - # Set the data - set_data(data, X.data, y.data, X.shape, n_class) - - # Set the model - set_model(model, p, lmd, kappa, epsilon, weight_idx, kernel_index, degree, - gamma, coef, kernel_eigen_cutoff, max_iter, random_seed) - - # Check the parameters - error_msg = check_model(model) - if error_msg: - gensvm_free_model(model) - free_data(data) - error_repl = error_msg.decode('utf-8') - raise ValueError(error_repl) - - # Do the actual training - with nogil: - gensvm_train(model, data, NULL) - - # copy the results - cdef np.ndarray[np.float64_t, ndim=2, mode='c'] V - V = np.empty((n_var+1, n_class-1)) - copy_V(V.data, model) - - # get other results from model - iter_count = get_iter_count(model) - training_error = get_training_error(model) - fit_status = get_status(model) - n_SV = gensvm_num_sv(model) - - # free model and data - gensvm_free_model(model); - free_data(data); - - return (V, n_SV, iter_count, training_error, fit_status) - -def predict_wrap( - np.ndarray[np.float64_t, ndim=2, mode='c'] X, - np.ndarray[np.float64_t, ndim=2, mode='c'] V - ): - """ - """ - - cdef long n_test_obs - cdef long n_var - cdef long n_class - - n_test_obs = X.shape[0] - n_var = X.shape[1] - n_class = V.shape[1] + 1 - - # output vector - cdef np.ndarray[np.int_t, ndim=1, mode='c'] predictions - predictions = np.empty((n_test_obs, ), dtype=np.int) - - # do the prediction - with nogil: - gensvm_predict(X.data, V.data, n_test_obs, n_var, n_class, - predictions.data) - - return predictions - -def set_verbosity_wrap(int verbosity): - """ - Control verbosity of gensvm library - """ - set_verbosity(verbosity) diff --git a/gensvm/src/gensvm b/gensvm/src/gensvm deleted file mode 160000 index 1f32ecf..0000000 --- a/gensvm/src/gensvm +++ /dev/null @@ -1 +0,0 @@ -Subproject commit 1f32ecf1d2414bf8e8107a95552c1164498a9771 diff --git a/setup.py b/setup.py index 6d369ef..e9e223b 100644 --- a/setup.py +++ b/setup.py @@ -7,41 +7,71 @@ import numpy from numpy.distutils.core import setup from numpy.distutils.misc_util import Configuration -from sklearn._build_utils import get_blas_info, maybe_cythonize_extensions +from sklearn._build_utils import get_blas_info +# Set this to True to enable building extensions using Cython. Set it to False· +# to build extensions from the C file (that was previously generated using· +# Cython). Set it to 'auto' to build with Cython if available, otherwise from· +# the C file. +USE_CYTHON = 'auto' -def configuration(parent_package='', top_path=None): - config = Configuration('gensvm', parent_package, top_path) +# If we are in a release, we always never use Cython directly +IS_RELEASE = os.path.exists('PKG-INFO') +if IS_RELEASE: + USE_CYTHON = False + +# If we do want to use Cython, we double check if it is available +if USE_CYTHON: + try: + from Cython.Build import cythonize + except ImportError: + if USE_CYTHON == 'auto': + USE_CYTHON = False + else: + raise + + +def configuration(): + config = Configuration('gensvm', '', None) - # gensvm module cblas_libs, blas_info = get_blas_info() if os.name == 'posix': cblas_libs.append('m') + # Wrapper code in Cython uses the .pyx extension if we want to USE_CYTHON, + # otherwise it ends in .c. If you have more Cython code, you may want to + # extend this a bit + wrapper = 'pyx_gensvm.pyx' if USE_CYTHON else 'pyx_gensvm.c' + + # Sources include the C/Cython code from the wrapper and the source code of + # the C library gensvm_sources = [ - os.path.join('gensvm', 'pyx_gensvm.pyx'), - os.path.join('gensvm', 'src', 'gensvm', 'src', '*.c'), + os.path.join('src', wrapper), + os.path.join('src', 'gensvm', 'src', '*.c'), ] + # Dependencies are the header files of the C library and any potential + # helper code between the library and the Cython code gensvm_depends = [ - os.path.join('gensvm', 'src', 'gensvm', 'include', '*.h'), - os.path.join('gensvm', 'src', 'gensvm', 'gensvm_helper.c') + os.path.join('src', 'gensvm', 'include', '*.h'), + os.path.join('src', 'gensvm', 'gensvm_helper.c') ] config.add_extension('pyx_gensvm', sources=gensvm_sources, libraries=cblas_libs, include_dirs=[ - os.path.join('gensvm', 'src', 'gensvm'), - os.path.join('gensvm', 'src', 'gensvm', 'include'), + os.path.join('src', 'gensvm'), + os.path.join('src', 'gensvm', 'include'), numpy.get_include(), blas_info.pop('include_dirs', [])], extra_compile_args=blas_info.pop('extra_compile_args', []), depends=gensvm_depends, **blas_info) - # end gensvm module - maybe_cythonize_extensions(top_path, config) + # Cythonize if necessary + if USE_CYTHON: + config.ext_modules = cythonize(config.ext_modules) return config @@ -55,7 +85,7 @@ if __name__ == '__main__': version = re.search("__version__ = '([^']+)'", open('gensvm/__init__.py').read()).group(1) - attr = configuration(top_path='').todict() + attr = configuration().todict() attr['description'] = 'Python package for the GenSVM classifier' attr['long_description'] = read('README.rst') diff --git a/src/gensvm b/src/gensvm new file mode 160000 index 0000000..1f32ecf --- /dev/null +++ b/src/gensvm @@ -0,0 +1 @@ +Subproject commit 1f32ecf1d2414bf8e8107a95552c1164498a9771 diff --git a/src/pyx_gensvm.pxd b/src/pyx_gensvm.pxd new file mode 100644 index 0000000..be4d5f5 --- /dev/null +++ b/src/pyx_gensvm.pxd @@ -0,0 +1,91 @@ +cimport numpy as np + +# Includes + +cdef extern from "gensvm_globals.h": + # Stuff for kerneltype + ctypedef enum KernelType: + pass + +cdef extern from "gensvm_sparse.h": + # stuff for GenSparse + + cdef struct GenSparse: + long nnz + long n_row + long n_col + double *values + long *ia + long *ja + + GenSparse *gensvm_init_sparse() + void gensvm_free_sparse(GenSparse *) + + +cdef extern from "gensvm_base.h": + + cdef struct GenData: + long K + long n + long m + long r + long *y + double *Z + GenSparse *spZ + double *RAW + double *Sigma + KernelType kerneltype + double *kernelparam + + cdef struct GenModel: + int weight_idx + long K + long n + long m + double epsilon + double p + double kappa + double lmd + double *V + double *Vbar + double *U + double *UU + double *Q + double *H + double *rho + double training_error + KernelType kerneltype + double *kernelparam + double kernel_eigen_cutoff + + GenModel *gensvm_init_model() + void gensvm_free_model(GenModel *) + + GenData *gensvm_init_data() + void gensvm_free_data(GenData *) + +cdef extern from "gensvm_train.h": + + void gensvm_train(GenModel *, GenData *, GenModel *) nogil + +cdef extern from "gensvm_sv.h": + + long gensvm_num_sv(GenModel *) + +cdef extern from "gensvm_helper.c": + + ctypedef char* char_const_ptr "char const *" + void set_model(GenModel *, double, double, double, double, int, int, + double, double, double, double, long, long) + void set_data(GenData *, char *, char *, np.npy_intp *, long) + char_const_ptr check_model(GenModel *) + void copy_V(void *, GenModel *) + long get_iter_count(GenModel *) + double get_training_error(GenModel *) + int get_status(GenModel *) + long get_n(GenModel *) + long get_m(GenModel *) + long get_K(GenModel *) + void free_data(GenData *) + void set_verbosity(int) + void gensvm_predict(char *, char *, long, long, long, char *) nogil diff --git a/src/pyx_gensvm.pyx b/src/pyx_gensvm.pyx new file mode 100644 index 0000000..394d4ca --- /dev/null +++ b/src/pyx_gensvm.pyx @@ -0,0 +1,123 @@ +""" +Wrapper for GenSVM + +Not implemented yet: + - vector of instance weights + - class weights + - seed model + - max_iter = -1 for unlimited + +""" + +from __future__ import print_function + +import numpy as np +cimport numpy as np + +cimport pyx_gensvm + +np.import_array() + +GENSVM_KERNEL_TYPES = ["linear", "poly", "rbf", "sigmoid"] + +def train_wrap( + np.ndarray[np.float64_t, ndim=2, mode='c'] X, + np.ndarray[np.int_t, ndim=1, mode='c'] y, + double p=1.0, + double lmd=pow(2, -8), + double kappa=0.0, + double epsilon=1e-6, + int weight_idx=1, + str kernel='linear', + double gamma=1.0, + double coef=0.0, + double degree=2.0, + double kernel_eigen_cutoff=1e-8, + int max_iter=100000000, + int random_seed=-1): + """ + """ + + # Initialize model and data + cdef GenModel *model = gensvm_init_model() + cdef GenData *data = gensvm_init_data() + cdef long n_obs + cdef long n_var + cdef long n_class + + # get the kernel index + kernel_index = GENSVM_KERNEL_TYPES.index(kernel) + + # get the number of classes + classes = np.unique(y) + n_obs = X.shape[0] + n_var = X.shape[1] + n_class = classes.shape[0] + + # Set the data + set_data(data, X.data, y.data, X.shape, n_class) + + # Set the model + set_model(model, p, lmd, kappa, epsilon, weight_idx, kernel_index, degree, + gamma, coef, kernel_eigen_cutoff, max_iter, random_seed) + + # Check the parameters + error_msg = check_model(model) + if error_msg: + gensvm_free_model(model) + free_data(data) + error_repl = error_msg.decode('utf-8') + raise ValueError(error_repl) + + # Do the actual training + with nogil: + gensvm_train(model, data, NULL) + + # copy the results + cdef np.ndarray[np.float64_t, ndim=2, mode='c'] V + V = np.empty((n_var+1, n_class-1)) + copy_V(V.data, model) + + # get other results from model + iter_count = get_iter_count(model) + training_error = get_training_error(model) + fit_status = get_status(model) + n_SV = gensvm_num_sv(model) + + # free model and data + gensvm_free_model(model); + free_data(data); + + return (V, n_SV, iter_count, training_error, fit_status) + +def predict_wrap( + np.ndarray[np.float64_t, ndim=2, mode='c'] X, + np.ndarray[np.float64_t, ndim=2, mode='c'] V + ): + """ + """ + + cdef long n_test_obs + cdef long n_var + cdef long n_class + + n_test_obs = X.shape[0] + n_var = X.shape[1] + n_class = V.shape[1] + 1 + + # output vector + cdef np.ndarray[np.int_t, ndim=1, mode='c'] predictions + predictions = np.empty((n_test_obs, ), dtype=np.int) + + # do the prediction + with nogil: + gensvm_predict(X.data, V.data, n_test_obs, n_var, n_class, + predictions.data) + + return predictions + +def set_verbosity_wrap(int verbosity): + """ + Control verbosity of gensvm library + """ + set_verbosity(verbosity) -- cgit v1.2.3