give the wrapper a better name

author: Gertjan van den Burg <gertjanvandenburg@gmail.com> 2017-10-07 15:39:12 +0200
committer: Gertjan van den Burg <gertjanvandenburg@gmail.com> 2017-10-07 15:39:12 +0200
commit: 3179373ad91245d8712c97be5add387d1b8e2304 (patch)
tree: a622b0d73a3b8306a2674a2a4d975700d7183dbc /gensvm/core.py
parent: rearrange and update setup.py (diff)
download: pygensvm-3179373ad91245d8712c97be5add387d1b8e2304.tar.gz
pygensvm-3179373ad91245d8712c97be5add387d1b8e2304.zip
1 files changed, 190 insertions, 0 deletions
diff --git a/gensvm/core.py b/gensvm/core.py
new file mode 100644
index 0000000..7594eba
--- /dev/null
+++ b/gensvm/core.py
@@ -0,0 +1,190 @@
+# -*- coding: utf-8 -*-
+
+"""
+"""
+
+from __future__ import print_function, division
+
+import numpy as np
+import warnings
+
+from sklearn.base import BaseEstimator
+from sklearn.exceptions import ConvergenceWarning, FitFailedWarning
+from sklearn.preprocessing import LabelEncoder
+from sklearn.utils import check_X_y, check_random_state
+from sklearn.utils.multiclass import type_of_target
+from sklearn.utils.validation import check_is_fitted
+
+from . import wrapper
+
+
+def _fit_gensvm(X, y, p, lmd, kappa, epsilon, weight_idx, kernel, gamma, coef, 
+        degree, kernel_eigen_cutoff, verbose, max_iter, random_state=None):
+
+    # process the random state
+    rnd = check_random_state(random_state)
+
+    # set the verbosity in GenSVM
+    wrapper.set_verbosity_wrap(verbose)
+
+    # run the actual training
+    raw_coef_, n_SV_, n_iter_, training_error_, status_ = wrapper.train_wrap(
+            X, y, p, lmd, kappa, epsilon, weight_idx, kernel, gamma, coef, 
+            degree, kernel_eigen_cutoff, max_iter, 
+            rnd.randint(np.iinfo('i').max))
+
+    # process output
+    if status_ == 1 and verbose > 0:
+        warnings.warn("GenSVM optimization prematurely ended due to a "
+                "incorrect step in the optimization algorithm.", 
+                FitFailedWarning)
+
+    if status_ == 2 and verbose > 0:
+        warnings.warn("GenSVM failed to converge, increase "
+                "the number of iterations.", ConvergenceWarning)
+
+    coef_ = raw_coef_[1:, :]
+    intercept_ = raw_coef_[0, :]
+
+    return coef_, intercept_, n_iter_, n_SV_
+
+
+class GenSVM(BaseEstimator):
+    """Generalized Multiclass Support Vector Machine Classification.
+
+    This class implements the basic GenSVM classifier. GenSVM is a generalized 
+    multiclass SVM which is flexible in the weighting of misclassification 
+    errors. It is this flexibility that makes it perform well on diverse 
+    datasets.
+
+    This methods of this class use the GenSVM C library for the actual 
+    computations.
+
+    Parameters
+    ----------
+    p : float, optional (default=1.0)
+        Parameter for the L_p norm of the loss function (1.0 <= p <= 2.0)
+
+    lmd : float, optional (default=1e-5)
+        Parameter for the regularization term of the loss function (lmd > 0)
+
+    kappa : float, optional (default=0.0)
+        Parameter for the hinge function in the loss function (kappa > -1.0)
+
+    weight_idx : int, optional (default=1)
+        Type of sample weights to use (1 = unit weights, 2 = size correction 
+        weights)
+
+    kernel : string, optional (default='linear')
+        Specify the kernel type to use in the classifier. It must be one of 
+        'linear', 'poly', 'rbf', or 'sigmoid'.
+
+    gamma : float, optional (default=1.0)
+        Kernel parameter for the rbf, poly, and sigmoid kernel
+
+    coef : float, optional (default=0.0)
+        Kernel parameter for the poly and sigmoid kernel
+
+    degree : float, optional (default=2.0)
+        Kernel parameter for the poly kernel
+
+    kernel_eigen_cutoff : float, optional (default=1e-8)
+        Cutoff point for the reduced eigendecomposition used with 
+        kernel-GenSVM. Eigenvectors for which the ratio between their 
+        corresponding eigenvalue and the largest eigenvalue is smaller than the 
+        cutoff will be dropped.
+
+    verbose : int, (default=0)
+        Enable verbose output
+
+    max_iter : int, (default=1e8)
+        The maximum number of iterations to be run.
+
+
+    Attributes
+    ----------
+    coef_ : array, shape = [n_features, n_classes-1]
+        Weights assigned to the features (coefficients in the primal problem)
+
+    intercept_ : array, shape = [n_classes]
+        Constants in the decision function
+
+    n_iter_ : int
+        The number of iterations that were run during training.
+
+    n_support_ : int
+        The number of support vectors that were found
+
+
+    References
+    ----------
+    * Van den Burg, G.J.J. and Groenen, P.J.F.. GenSVM: A Generalized 
+    Multiclass Support Vector Machine. Journal of Machine Learning Research, 
+    17(225):1--42, 2016.
+
+    """
+
+    def __init__(self, p=1.0, lmd=1e-5, kappa=0.0, epsilon=1e-6, weight_idx=1, 
+            kernel='linear', gamma=1.0, coef=0.0, degree=2.0, 
+            kernel_eigen_cutoff=1e-8, verbose=0, random_state=None, 
+            max_iter=1e8):
+        self.p = p
+        self.lmd = lmd
+        self.kappa = kappa
+        self.epsilon = epsilon
+        self.weight_idx = weight_idx
+        self.kernel = kernel
+        self.gamma = gamma
+        self.coef = coef
+        self.degree = degree
+        self.kernel_eigen_cutoff = kernel_eigen_cutoff
+        self.verbose = verbose
+        self.random_state = random_state
+        self.max_iter = max_iter
+
+
+    def fit(self, X, y):
+        if not 1.0 <= self.p <= 2.0:
+            raise ValueError("Value for p should be within [1, 2]; got p = %r)" 
+                    % self.p)
+        if not self.kappa > -1.0:
+            raise ValueError("Value for kappa should be larger than -1; got "
+                    "kappa = %r" % self.kappa)
+        if not self.lmd > 0:
+            raise ValueError("Value for lmd should be larger than 0; got "
+                    "lmd = %r" % self.lmd)
+        if not self.epsilon > 0:
+            raise ValueError("Value for epsilon should be larger than 0; got "
+                    "epsilon = %r" % self.epsilon)
+        X, y_org = check_X_y(X, y, accept_sparse=False, dtype=np.float64, 
+                order="C")
+
+        y_type = type_of_target(y_org)
+        if y_type not in ["binary", "multiclass"]:
+            raise ValueError("Label type not allowed for GenSVM: %r" % y_type)
+
+        # This is necessary because GenSVM expects classes to go from 1 to 
+        # n_class
+        self.encoder = LabelEncoder()
+        y = self.encoder.fit_transform(y_org)
+        y += 1
+
+        self.coef_, self.intercept_, self.n_iter_, self.n_support_ = \
+                _fit_gensvm(X, y, self.p, self.lmd, self.kappa, self.epsilon, 
+                        self.weight_idx, self.kernel, self.gamma, self.coef, 
+                        self.degree, self.kernel_eigen_cutoff, self.verbose, 
+                        self.max_iter, self.random_state)
+        return self
+
+
+    def predict(self, X):
+        check_is_fitted(self, "coef_")
+
+        V = np.vstack((self.intercept_, self.coef_))
+        predictions = wrapper.predict_wrap(X, V)
+
+        # Transform the classes back to the original form
+        predictions -= 1
+        outcome = self.encoder.inverse_transform(predictions)
+
+        return outcome
author	Gertjan van den Burg <gertjanvandenburg@gmail.com>	2017-10-07 15:39:12 +0200
committer	Gertjan van den Burg <gertjanvandenburg@gmail.com>	2017-10-07 15:39:12 +0200
commit	3179373ad91245d8712c97be5add387d1b8e2304 (patch)
tree	a622b0d73a3b8306a2674a2a4d975700d7183dbc /gensvm/core.py
parent	rearrange and update setup.py (diff)
download	pygensvm-3179373ad91245d8712c97be5add387d1b8e2304.tar.gz pygensvm-3179373ad91245d8712c97be5add387d1b8e2304.zip