Code formatting with Black

author: Gertjan van den Burg <gertjanvandenburg@gmail.com> 2019-01-15 12:21:24 +0000
committer: Gertjan van den Burg <gertjanvandenburg@gmail.com> 2019-01-15 12:21:24 +0000
commit: d1ddd504802072d930170b802d2cf98fb309cd46 (patch)
tree: 2421ba88a37d686eca467cf85960ab7da4ae991a /gensvm
parent: Move wrapper to better folder structure (diff)
download: pygensvm-d1ddd504802072d930170b802d2cf98fb309cd46.tar.gz
pygensvm-d1ddd504802072d930170b802d2cf98fb309cd46.zip
5 files changed, 370 insertions, 212 deletions
diff --git a/gensvm/__init__.py b/gensvm/__init__.py
index 712da42..430b929 100644
--- a/gensvm/__init__.py
+++ b/gensvm/__init__.py
@@ -1,6 +1,6 @@
 # -*- coding: utf-8 -*-
 
-__version__ = '0.1.7'
+__version__ = "0.1.7"
 
 from .core import GenSVM
 from .gridsearch import GenSVMGridSearchCV
diff --git a/gensvm/core.py b/gensvm/core.py
index 77a3a7f..edd5236 100644
--- a/gensvm/core.py
+++ b/gensvm/core.py
@@ -21,9 +21,25 @@ from sklearn.utils.validation import check_is_fitted
 from .cython_wrapper import wrapper
 
 
-def _fit_gensvm(X, y, n_class, p, lmd, kappa, epsilon, weights, kernel, gamma, 
-        coef, degree, kernel_eigen_cutoff, verbose, max_iter, 
-        random_state=None, seed_V=None):
+def _fit_gensvm(
+    X,
+    y,
+    n_class,
+    p,
+    lmd,
+    kappa,
+    epsilon,
+    weights,
+    kernel,
+    gamma,
+    coef,
+    degree,
+    kernel_eigen_cutoff,
+    verbose,
+    max_iter,
+    random_state=None,
+    seed_V=None,
+):
 
     # process the random state
     rnd = check_random_state(random_state)
@@ -32,23 +48,41 @@ def _fit_gensvm(X, y, n_class, p, lmd, kappa, epsilon, weights, kernel, gamma,
     wrapper.set_verbosity_wrap(verbose)
 
     # convert the weight index
-    weight_idx = 1 if weights == 'unit' else 2
+    weight_idx = 1 if weights == "unit" else 2
 
     # run the actual training
     raw_coef_, n_SV_, n_iter_, training_error_, status_ = wrapper.train_wrap(
-            X, y, n_class, p, lmd, kappa, epsilon, weight_idx, kernel, gamma, 
-            coef, degree, kernel_eigen_cutoff, max_iter, 
-            rnd.randint(np.iinfo('i').max), seed_V)
+        X,
+        y,
+        n_class,
+        p,
+        lmd,
+        kappa,
+        epsilon,
+        weight_idx,
+        kernel,
+        gamma,
+        coef,
+        degree,
+        kernel_eigen_cutoff,
+        max_iter,
+        rnd.randint(np.iinfo("i").max),
+        seed_V,
+    )
 
     # process output
     if status_ == 1 and verbose > 0:
-        warnings.warn("GenSVM optimization prematurely ended due to a "
-                "incorrect step in the optimization algorithm.", 
-                FitFailedWarning)
+        warnings.warn(
+            "GenSVM optimization prematurely ended due to a "
+            "incorrect step in the optimization algorithm.",
+            FitFailedWarning,
+        )
 
     if status_ == 2 and verbose > 0:
-        warnings.warn("GenSVM failed to converge, increase "
-                "the number of iterations.", ConvergenceWarning)
+        warnings.warn(
+            "GenSVM failed to converge, increase " "the number of iterations.",
+            ConvergenceWarning,
+        )
 
     coef_ = raw_coef_[1:, :]
     intercept_ = raw_coef_[0, :]
@@ -141,32 +175,53 @@ class GenSVM(BaseEstimator, ClassifierMixin):
 
     """
 
-    def __init__(self, p=1.0, lmd=1e-5, kappa=0.0, epsilon=1e-6, 
-            weights='unit', kernel='linear', gamma='auto', coef=1.0, 
-            degree=2.0, kernel_eigen_cutoff=1e-8, verbose=0, random_state=None, 
-            max_iter=1e8):
+    def __init__(
+        self,
+        p=1.0,
+        lmd=1e-5,
+        kappa=0.0,
+        epsilon=1e-6,
+        weights="unit",
+        kernel="linear",
+        gamma="auto",
+        coef=1.0,
+        degree=2.0,
+        kernel_eigen_cutoff=1e-8,
+        verbose=0,
+        random_state=None,
+        max_iter=1e8,
+    ):
 
         if not 1.0 <= p <= 2.0:
-            raise ValueError("Value for p should be within [1, 2]; got p = %r" 
-                    % p)
+            raise ValueError(
+                "Value for p should be within [1, 2]; got p = %r" % p
+            )
         if not kappa > -1.0:
-            raise ValueError("Value for kappa should be larger than -1; got "
-                    "kappa = %r" % kappa)
+            raise ValueError(
+                "Value for kappa should be larger than -1; got "
+                "kappa = %r" % kappa
+            )
         if not lmd > 0:
-            raise ValueError("Value for lmd should be larger than 0; got "
-                    "lmd = %r" % lmd)
+            raise ValueError(
+                "Value for lmd should be larger than 0; got " "lmd = %r" % lmd
+            )
         if not epsilon > 0:
-            raise ValueError("Value for epsilon should be larger than 0; got "
-                    "epsilon = %r" % epsilon)
+            raise ValueError(
+                "Value for epsilon should be larger than 0; got "
+                "epsilon = %r" % epsilon
+            )
         if gamma == 0.0:
             raise ValueError("A gamma value of 0.0 is invalid")
-        if not weights in ('unit', 'group'):
-            raise ValueError("Unknown weight parameter specified. Should be "
-                    "'unit' or 'group'; got %r" % weights)
-        if not kernel in ('linear', 'rbf', 'poly', 'sigmoid'):
-            raise ValueError("Unknown kernel specified. Should be "
-                    "'linear', 'rbf', 'poly', or 'sigmoid'; got %r" % kernel)
-
+        if not weights in ("unit", "group"):
+            raise ValueError(
+                "Unknown weight parameter specified. Should be "
+                "'unit' or 'group'; got %r" % weights
+            )
+        if not kernel in ("linear", "rbf", "poly", "sigmoid"):
+            raise ValueError(
+                "Unknown kernel specified. Should be "
+                "'linear', 'rbf', 'poly', or 'sigmoid'; got %r" % kernel
+            )
 
         self.p = p
         self.lmd = lmd
@@ -182,7 +237,6 @@ class GenSVM(BaseEstimator, ClassifierMixin):
         self.random_state = random_state
         self.max_iter = max_iter
 
-
     def fit(self, X, y, seed_V=None):
         """Fit the GenSVM model on the given data
 
@@ -219,44 +273,69 @@ class GenSVM(BaseEstimator, ClassifierMixin):
             Returns self.
 
         """
-        X, y_org = check_X_y(X, y, accept_sparse=False, dtype=np.float64, 
-                order="C")
+        X, y_org = check_X_y(
+            X, y, accept_sparse=False, dtype=np.float64, order="C"
+        )
 
         y_type = type_of_target(y_org)
         if y_type not in ["binary", "multiclass"]:
             raise ValueError("Label type not allowed for GenSVM: %r" % y_type)
 
-        if self.gamma == 'auto':
+        if self.gamma == "auto":
             gamma = 1 / X.shape[1]
         else:
             gamma = self.gamma
 
-        # This is necessary because GenSVM expects classes to go from 1 to 
+        # This is necessary because GenSVM expects classes to go from 1 to
         # n_class
         self.encoder = LabelEncoder()
         y = self.encoder.fit_transform(y_org)
         y += 1
 
         n_class = len(np.unique(y))
-        if not seed_V is None and self.kernel != 'linear':
-            warnings.warn("Warm starts are only supported for the "
-                    "linear kernel. The seed_V parameter will be ignored.")
+        if not seed_V is None and self.kernel != "linear":
+            warnings.warn(
+                "Warm starts are only supported for the "
+                "linear kernel. The seed_V parameter will be ignored."
+            )
             seed_V = None
         if not seed_V is None:
             n_samples, n_features = X.shape
             if seed_V.shape[1] + 1 > n_class:
                 n_class = seed_V.shape[1]
-            if seed_V.shape[0] - 1 != n_features or (seed_V.shape[1] + 1 < 
-                    n_class):
-                raise ValueError("Seed V must have shape [%i, %i], "
-                        "but has shape [%i, %i]" % (n_features+1, n_class-1, 
-                            seed_V.shape[0], seed_V.shape[1]))
-
-        self.coef_, self.intercept_, self.n_iter_, self.n_support_ = \
-                _fit_gensvm(X, y, n_class, self.p, self.lmd, self.kappa, 
-                        self.epsilon, self.weights, self.kernel, gamma, 
-                        self.coef, self.degree, self.kernel_eigen_cutoff, 
-                        self.verbose, self.max_iter, self.random_state, seed_V)
+            if seed_V.shape[0] - 1 != n_features or (
+                seed_V.shape[1] + 1 < n_class
+            ):
+                raise ValueError(
+                    "Seed V must have shape [%i, %i], "
+                    "but has shape [%i, %i]"
+                    % (
+                        n_features + 1,
+                        n_class - 1,
+                        seed_V.shape[0],
+                        seed_V.shape[1],
+                    )
+                )
+
+        self.coef_, self.intercept_, self.n_iter_, self.n_support_ = _fit_gensvm(
+            X,
+            y,
+            n_class,
+            self.p,
+            self.lmd,
+            self.kappa,
+            self.epsilon,
+            self.weights,
+            self.kernel,
+            gamma,
+            self.coef,
+            self.degree,
+            self.kernel_eigen_cutoff,
+            self.verbose,
+            self.max_iter,
+            self.random_state,
+            seed_V,
+        )
         return self
 
     def predict(self, X):
diff --git a/gensvm/gridsearch.py b/gensvm/gridsearch.py
index d5ea31e..dc835f9 100644
--- a/gensvm/gridsearch.py
+++ b/gensvm/gridsearch.py
@@ -27,29 +27,33 @@ from sklearn.utils.validation import indexable
 
 from .cython_wrapper import wrapper
 from .core import GenSVM
-from .sklearn_util import (_skl_format_cv_results, _skl_check_scorers, 
-        _skl_check_is_fitted, _skl_grid_score)
+from .sklearn_util import (
+    _skl_format_cv_results,
+    _skl_check_scorers,
+    _skl_check_is_fitted,
+    _skl_grid_score,
+)
 
 
 def _sort_candidate_params(candidate_params):
-    if any(('epsilon' in p for p in candidate_params)):
-        candidate_params.sort(key=itemgetter('epsilon'), reverse=True)
-    if any(('p' in p for p in candidate_params)):
-        candidate_params.sort(key=itemgetter('p'))
-    if any(('lmd' in p for p in candidate_params)):
-        candidate_params.sort(key=itemgetter('lmd'))
-    if any(('kappa' in p for p in candidate_params)):
-        candidate_params.sort(key=itemgetter('kappa'))
-    if any(('weights' in p for p in candidate_params)):
-        candidate_params.sort(key=itemgetter('weights'))
-    if any(('gamma' in p for p in candidate_params)):
-        candidate_params.sort(key=itemgetter('gamma'))
-    if any(('degree' in p for p in candidate_params)):
-        candidate_params.sort(key=itemgetter('degree'))
-    if any(('coef' in p for p in candidate_params)):
-        candidate_params.sort(key=itemgetter('coef'))
-    if any(('kernel' in p for p in candidate_params)):
-        candidate_params.sort(key=itemgetter('kernel'))
+    if any(("epsilon" in p for p in candidate_params)):
+        candidate_params.sort(key=itemgetter("epsilon"), reverse=True)
+    if any(("p" in p for p in candidate_params)):
+        candidate_params.sort(key=itemgetter("p"))
+    if any(("lmd" in p for p in candidate_params)):
+        candidate_params.sort(key=itemgetter("lmd"))
+    if any(("kappa" in p for p in candidate_params)):
+        candidate_params.sort(key=itemgetter("kappa"))
+    if any(("weights" in p for p in candidate_params)):
+        candidate_params.sort(key=itemgetter("weights"))
+    if any(("gamma" in p for p in candidate_params)):
+        candidate_params.sort(key=itemgetter("gamma"))
+    if any(("degree" in p for p in candidate_params)):
+        candidate_params.sort(key=itemgetter("degree"))
+    if any(("coef" in p for p in candidate_params)):
+        candidate_params.sort(key=itemgetter("coef"))
+    if any(("kernel" in p for p in candidate_params)):
+        candidate_params.sort(key=itemgetter("kernel"))
 
 
 def _validate_param_grid(param_grid):
@@ -61,32 +65,32 @@ def _validate_param_grid(param_grid):
     """
     # the conditions that the parameters must satisfy
     conditions = {
-            'p': lambda x : 1.0 <= x <= 2.0,
-            'kappa': lambda x : x > -1.0,
-            'lmd': lambda x : x > 0,
-            'epsilon': lambda x : x > 0,
-            'gamma' : lambda x : x != 0,
-            'weights' : lambda x : x in ['unit', 'group'],
-            }
+        "p": lambda x: 1.0 <= x <= 2.0,
+        "kappa": lambda x: x > -1.0,
+        "lmd": lambda x: x > 0,
+        "epsilon": lambda x: x > 0,
+        "gamma": lambda x: x != 0,
+        "weights": lambda x: x in ["unit", "group"],
+    }
 
     for param in conditions:
         if param in param_grid:
             if not all(map(conditions[param], param_grid[param])):
                 raise ValueError(
-                        "Invalid value in grid for parameter: %s." % (param)
-                        )
+                    "Invalid value in grid for parameter: %s." % (param)
+                )
 
 
 class _MockEstimator(ClassifierMixin):
-    #This mock estimator facilitates the use of the Scorer class of 
-    #Scikit-Learn. Basically, we want to use the _score function of 
-    #sklearn.model_selection._validation, but we don't keep track of the 
-    #individual estimators in the GenSVM C grid search code. With this wrapper 
-    #we can mock an estimator for the _score function.
+    # This mock estimator facilitates the use of the Scorer class of
+    # Scikit-Learn. Basically, we want to use the _score function of
+    # sklearn.model_selection._validation, but we don't keep track of the
+    # individual estimators in the GenSVM C grid search code. With this wrapper
+    # we can mock an estimator for the _score function.
 
-    #The ClassifierMixin adds the score method to the estimator. This allows us 
-    #to leave scoring=None as the default to the GenSVMGridSearchCV class and 
-    #ends up using the accuracy_score metric.
+    # The ClassifierMixin adds the score method to the estimator. This allows us
+    # to leave scoring=None as the default to the GenSVMGridSearchCV class and
+    # ends up using the accuracy_score metric.
 
     def __init__(self, predictions):
         self.predictions = predictions
@@ -95,11 +99,17 @@ class _MockEstimator(ClassifierMixin):
         return self.predictions
 
 
-def _format_results(results, cv_idx, true_y, scorers, iid, 
-        return_train_score=True,
-        return_n_test_samples=True,
-        return_times=True,
-        return_parameters=False):
+def _format_results(
+    results,
+    cv_idx,
+    true_y,
+    scorers,
+    iid,
+    return_train_score=True,
+    return_n_test_samples=True,
+    return_times=True,
+    return_parameters=False,
+):
     """Format the results from the grid search
 
     Parameters
@@ -118,49 +128,52 @@ def _format_results(results, cv_idx, true_y, scorers, iid,
     """
 
     out = []
-    candidate_params = results['params']
+    candidate_params = results["params"]
     n_candidates = len(candidate_params)
     n_splits = len(np.unique(cv_idx))
 
     is_multimetric = not callable(scorers)
 
-    # Out must be a list of dicts of size n_params x n_splits that iterates 
+    # Out must be a list of dicts of size n_params x n_splits that iterates
     # over the params in the list and for each param iterates over the splits.
-    for param, duration, predictions in zip(results['params'], 
-            results['duration'], results['predictions']):
+    for param, duration, predictions in zip(
+        results["params"], results["duration"], results["predictions"]
+    ):
         for test_idx in np.unique(cv_idx):
 
             ret = []
             score_time = 0
 
             if return_train_score:
-                train_pred = predictions[cv_idx != test_idx, ]
-                y_train = true_y[cv_idx != test_idx, ]
+                train_pred = predictions[cv_idx != test_idx,]
+                y_train = true_y[cv_idx != test_idx,]
                 train_mock = _MockEstimator(train_pred)
                 start_time = time.time()
-                train_scores = _score(train_mock, None, y_train, scorers, 
-                        is_multimetric)
+                train_scores = _score(
+                    train_mock, None, y_train, scorers, is_multimetric
+                )
                 score_time += time.time() - start_time
                 ret.append(train_scores)
 
-            test_pred = predictions[cv_idx == test_idx, ]
-            y_test = true_y[cv_idx == test_idx, ]
+            test_pred = predictions[cv_idx == test_idx,]
+            y_test = true_y[cv_idx == test_idx,]
             test_mock = _MockEstimator(test_pred)
             start_time = time.time()
-            test_scores = _score(test_mock, None, y_test, scorers, 
-                    is_multimetric)
+            test_scores = _score(
+                test_mock, None, y_test, scorers, is_multimetric
+            )
             score_time += time.time() - start_time
             ret.append(test_scores)
 
             if return_n_test_samples:
                 ret.append(len(y_test))
             if return_times:
-                # Note, the C library returns the duration for a task (i.e. all 
-                # splits). The _skkl_format_cv_results() computes the mean of 
-                # the values, which should represent the average time per 
-                # split. To compute this correctly, we here divide by the 
-                # number of splits. Since we calculate the mean later, the mean 
-                # is still correct, but this is not the exact fit_time for this 
+                # Note, the C library returns the duration for a task (i.e. all
+                # splits). The _skkl_format_cv_results() computes the mean of
+                # the values, which should represent the average time per
+                # split. To compute this correctly, we here divide by the
+                # number of splits. Since we calculate the mean later, the mean
+                # is still correct, but this is not the exact fit_time for this
                 # fold.
                 fit_time = duration / n_splits
                 ret.extend([fit_time, score_time])
@@ -169,14 +182,31 @@ def _format_results(results, cv_idx, true_y, scorers, iid,
 
             out.append(ret)
 
-    cv_results_ = _skl_format_cv_results(out, return_train_score, 
-            candidate_params, n_candidates, n_splits, scorers, iid)
+    cv_results_ = _skl_format_cv_results(
+        out,
+        return_train_score,
+        candidate_params,
+        n_candidates,
+        n_splits,
+        scorers,
+        iid,
+    )
 
     return cv_results_
 
 
-def _fit_grid_gensvm(X, y, groups, candidate_params, scorers, cv, refit, 
-        verbose, return_train_score, iid):
+def _fit_grid_gensvm(
+    X,
+    y,
+    groups,
+    candidate_params,
+    scorers,
+    cv,
+    refit,
+    verbose,
+    return_train_score,
+    iid,
+):
     """Utility function for fitting the grid search for GenSVM
 
     This function sorts the parameter grid for optimal computation speed, sets 
@@ -193,35 +223,30 @@ def _fit_grid_gensvm(X, y, groups, candidate_params, scorers, cv, refit,
     """
 
     # sort the candidate params
-    # the optimal order of the parameters from inner to outer loop is: epsilon, 
+    # the optimal order of the parameters from inner to outer loop is: epsilon,
     # p, lambda, kappa, weights, kernel, ???
     _sort_candidate_params(candidate_params)
 
     # set the verbosity in GenSVM
     wrapper.set_verbosity_wrap(verbose)
 
-    # NOTE: The C library can compute the accuracy score and destroy the exact 
-    # predictions, but this doesn't allow us to compute the score per fold. So 
+    # NOTE: The C library can compute the accuracy score and destroy the exact
+    # predictions, but this doesn't allow us to compute the score per fold. So
     # we always want to get the raw predictions for each grid point.
     store_predictions = True
 
     # Convert the cv variable to a cv_idx array
     cv = check_cv(cv, y, classifier=True)
     n_folds = cv.get_n_splits(X, y, groups)
-    cv_idx = np.zeros((X.shape[0], ), dtype=np.int_) - 1
+    cv_idx = np.zeros((X.shape[0],), dtype=np.int_) - 1
     fold_idx = 0
     for train, test in cv.split(X, y, groups):
-        cv_idx[test, ] = fold_idx
+        cv_idx[test,] = fold_idx
         fold_idx += 1
 
     results_ = wrapper.grid_wrap(
-            X,
-            y,
-            candidate_params,
-            int(store_predictions),
-            cv_idx,
-            int(n_folds),
-            )
+        X, y, candidate_params, int(store_predictions), cv_idx, int(n_folds)
+    )
     cv_results_ = _format_results(results_, cv_idx, y, scorers, iid)
 
     return cv_results_, n_folds
@@ -449,8 +474,16 @@ class GenSVMGridSearchCV(BaseEstimator, MetaEstimatorMixin):
         https://pandas.pydata.org/pandas-docs/stable/generated/pandas.DataFrame.html
     """
 
-    def __init__(self, param_grid, scoring=None, iid=True, cv=None, refit=True, 
-            verbose=0, return_train_score=True):
+    def __init__(
+        self,
+        param_grid,
+        scoring=None,
+        iid=True,
+        cv=None,
+        refit=True,
+        verbose=0,
+        return_train_score=True,
+    ):
 
         self.param_grid = param_grid
         _check_param_grid(self.param_grid)
@@ -466,7 +499,6 @@ class GenSVMGridSearchCV(BaseEstimator, MetaEstimatorMixin):
     def _get_param_iterator(self):
         return ParameterGrid(self.param_grid)
 
-
     def fit(self, X, y, groups=None):
         """Run GenSVM grid search with all sets of parameters
 
@@ -491,14 +523,15 @@ class GenSVMGridSearchCV(BaseEstimator, MetaEstimatorMixin):
 
         """
 
-        X, y_orig = check_X_y(X, y, accept_sparse=False, dtype=np.float64, 
-                order="C")
+        X, y_orig = check_X_y(
+            X, y, accept_sparse=False, dtype=np.float64, order="C"
+        )
 
         y_type = type_of_target(y_orig)
         if y_type not in ["binary", "multiclass"]:
             raise ValueError("Label type not allowed for GenSVM: %r" % y_type)
 
-        # This is necessary because GenSVM expects classes to go from 1 to 
+        # This is necessary because GenSVM expects classes to go from 1 to
         # n_class
         self.encoder = LabelEncoder()
         y = self.encoder.fit_transform(y_orig)
@@ -507,13 +540,23 @@ class GenSVMGridSearchCV(BaseEstimator, MetaEstimatorMixin):
         candidate_params = list(self._get_param_iterator())
 
         scorers, self.multimetric_, refit_metric = _skl_check_scorers(
-                self.scoring, self.refit)
+            self.scoring, self.refit
+        )
 
         X, y, groups = indexable(X, y, groups)
 
-        results, n_splits = _fit_grid_gensvm(X, y, groups, candidate_params, 
-                scorers, self.cv, self.refit, self.verbose, 
-                self.return_train_score, self.iid)
+        results, n_splits = _fit_grid_gensvm(
+            X,
+            y,
+            groups,
+            candidate_params,
+            scorers,
+            self.cv,
+            self.refit,
+            self.verbose,
+            self.return_train_score,
+            self.iid,
+        )
 
         self.cv_results_ = results
 
@@ -524,23 +567,23 @@ class GenSVMGridSearchCV(BaseEstimator, MetaEstimatorMixin):
             self.best_index_ = results["rank_test_%s" % refit_metric].argmin()
             self.best_params_ = candidate_params[self.best_index_]
             self.best_score_ = results["mean_test_%s" % refit_metric][
-                self.best_index_]
+                self.best_index_
+            ]
 
         if self.refit:
             self.best_estimator_ = GenSVM(**self.best_params_)
-            # y_orig because GenSVM fit must know the conversion for predict to 
+            # y_orig because GenSVM fit must know the conversion for predict to
             # work correctly
             self.best_estimator_.fit(X, y_orig)
 
         ## Store the only scorer not as a dict for single metric evaluation
-        self.scorer_ = scorers if self.multimetric_ else scorers['score']
+        self.scorer_ = scorers if self.multimetric_ else scorers["score"]
 
         self.cv_results_ = results
         self.n_splits_ = n_splits
 
         return self
 
-
     def score(self, X, y):
         """Compute the score on the test data given the true labels
 
@@ -559,9 +602,15 @@ class GenSVMGridSearchCV(BaseEstimator, MetaEstimatorMixin):
         score : float
 
         """
-        _skl_check_is_fitted(self, 'score', self.refit)
-        return _skl_grid_score(X, y, self.scorer_, self.best_estimator_, 
-                self.refit, self.multimetric_)
+        _skl_check_is_fitted(self, "score", self.refit)
+        return _skl_grid_score(
+            X,
+            y,
+            self.scorer_,
+            self.best_estimator_,
+            self.refit,
+            self.multimetric_,
+        )
 
     def predict(self, X):
         """Predict the class labels on the test data
@@ -578,7 +627,7 @@ class GenSVMGridSearchCV(BaseEstimator, MetaEstimatorMixin):
             Predicted class labels of the data in X.
 
         """
-        _skl_check_is_fitted(self, 'predict', self.refit)
+        _skl_check_is_fitted(self, "predict", self.refit)
         return self.best_estimator_.predict(X)
 
 
@@ -609,11 +658,11 @@ def load_default_grid():
         used as input for the :class:`.GenSVMGridSearchCV` class.
     """
     pg = {
-            'lmd': [pow(2, x) for x in range(-18, 19, 2)],
-            'kappa': [-0.9, 0.5, 5.0],
-            'p': [1.0, 1.5, 2.0],
-            'weights': ['unit', 'group'],
-            'epsilon': [1e-8],
-            'kernel': ['linear']
-            }
+        "lmd": [pow(2, x) for x in range(-18, 19, 2)],
+        "kappa": [-0.9, 0.5, 5.0],
+        "p": [1.0, 1.5, 2.0],
+        "weights": ["unit", "group"],
+        "epsilon": [1e-8],
+        "kernel": ["linear"],
+    }
     return pg
diff --git a/gensvm/sklearn_util.py b/gensvm/sklearn_util.py
index 0829bf6..05d9618 100644
--- a/gensvm/sklearn_util.py
+++ b/gensvm/sklearn_util.py
@@ -68,16 +68,30 @@ from sklearn.model_selection._validation import _aggregate_score_dicts
 from sklearn.utils.fixes import MaskedArray
 from sklearn.utils.validation import check_is_fitted
 
-def _skl_format_cv_results(out, return_train_score, candidate_params, 
-        n_candidates, n_splits, scorers, iid):
+
+def _skl_format_cv_results(
+    out,
+    return_train_score,
+    candidate_params,
+    n_candidates,
+    n_splits,
+    scorers,
+    iid,
+):
 
     # if one choose to see train score, "out" will contain train score info
     if return_train_score:
-        (train_score_dicts, test_score_dicts, test_sample_counts, fit_time,
-         score_time) = zip(*out)
+        (
+            train_score_dicts,
+            test_score_dicts,
+            test_sample_counts,
+            fit_time,
+            score_time,
+        ) = zip(*out)
     else:
-        (test_score_dicts, test_sample_counts, fit_time,
-         score_time) = zip(*out)
+        (test_score_dicts, test_sample_counts, fit_time, score_time) = zip(
+            *out
+        )
 
     # test_score_dicts and train_score dicts are lists of dictionaries and
     # we make them into dict of lists
@@ -91,35 +105,39 @@ def _skl_format_cv_results(out, return_train_score, candidate_params,
         """A small helper to store the scores/times to the cv_results_"""
         # When iterated first by splits, then by parameters
         # We want `array` to have `n_candidates` rows and `n_splits` cols.
-        array = np.array(array, dtype=np.float64).reshape(n_candidates,
-                                                          n_splits)
+        array = np.array(array, dtype=np.float64).reshape(
+            n_candidates, n_splits
+        )
         if splits:
             for split_i in range(n_splits):
                 # Uses closure to alter the results
-                results["split%d_%s"
-                        % (split_i, key_name)] = array[:, split_i]
+                results["split%d_%s" % (split_i, key_name)] = array[:, split_i]
 
         array_means = np.average(array, axis=1, weights=weights)
-        results['mean_%s' % key_name] = array_means
+        results["mean_%s" % key_name] = array_means
         # Weighted std is not directly available in numpy
-        array_stds = np.sqrt(np.average((array -
-                                         array_means[:, np.newaxis]) ** 2,
-                                        axis=1, weights=weights))
-        results['std_%s' % key_name] = array_stds
+        array_stds = np.sqrt(
+            np.average(
+                (array - array_means[:, np.newaxis]) ** 2,
+                axis=1,
+                weights=weights,
+            )
+        )
+        results["std_%s" % key_name] = array_stds
 
         if rank:
             results["rank_%s" % key_name] = np.asarray(
-                get_ranks(-array_means), dtype=np.int32)
+                get_ranks(-array_means), dtype=np.int32
+            )
 
-    _store('fit_time', fit_time)
-    _store('score_time', score_time)
+    _store("fit_time", fit_time)
+    _store("score_time", score_time)
     # Use one MaskedArray and mask all the places where the param is not
     # applicable for that candidate. Use defaultdict as each candidate may
     # not contain all the params
-    param_results = defaultdict(partial(MaskedArray,
-                                        np.empty(n_candidates,),
-                                        mask=True,
-                                        dtype=object))
+    param_results = defaultdict(
+        partial(MaskedArray, np.empty(n_candidates), mask=True, dtype=object)
+    )
     for cand_i, params in enumerate(candidate_params):
         for name, value in params.items():
             # An all masked empty array gets created for the key
@@ -129,19 +147,25 @@ def _skl_format_cv_results(out, return_train_score, candidate_params,
 
     results.update(param_results)
     # Store a list of param dicts at the key 'params'
-    results['params'] = candidate_params
+    results["params"] = candidate_params
 
     # NOTE test_sample counts (weights) remain the same for all candidates
-    test_sample_counts = np.array(test_sample_counts[:n_splits],
-                                  dtype=np.int)
+    test_sample_counts = np.array(test_sample_counts[:n_splits], dtype=np.int)
     for scorer_name in scorers.keys():
         # Computed the (weighted) mean and std for test scores alone
-        _store('test_%s' % scorer_name, test_scores[scorer_name],
-               splits=True, rank=True,
-               weights=test_sample_counts if iid else None)
+        _store(
+            "test_%s" % scorer_name,
+            test_scores[scorer_name],
+            splits=True,
+            rank=True,
+            weights=test_sample_counts if iid else None,
+        )
         if return_train_score:
-            _store('train_%s' % scorer_name, train_scores[scorer_name],
-                   splits=True)
+            _store(
+                "train_%s" % scorer_name,
+                train_scores[scorer_name],
+                splits=True,
+            )
 
     return results
 
@@ -149,44 +173,49 @@ def _skl_format_cv_results(out, return_train_score, candidate_params,
 def _skl_check_scorers(scoring, refit):
 
     scorers, multimetric_ = _check_multimetric_scoring(
-        GenSVM(), scoring=scoring)
+        GenSVM(), scoring=scoring
+    )
     if multimetric_:
         if refit is not False and (
-                not isinstance(refit, six.string_types) or
-                # This will work for both dict / list (tuple)
-                refit not in scorers):
-            raise ValueError("For multi-metric scoring, the parameter "
-                             "refit must be set to a scorer key "
-                             "to refit an estimator with the best "
-                             "parameter setting on the whole data and "
-                             "make the best_* attributes "
-                             "available for that metric. If this is not "
-                             "needed, refit should be set to False "
-                             "explicitly. %r was passed." % refit)
+            not isinstance(refit, six.string_types)
+            or
+            # This will work for both dict / list (tuple)
+            refit not in scorers
+        ):
+            raise ValueError(
+                "For multi-metric scoring, the parameter "
+                "refit must be set to a scorer key "
+                "to refit an estimator with the best "
+                "parameter setting on the whole data and "
+                "make the best_* attributes "
+                "available for that metric. If this is not "
+                "needed, refit should be set to False "
+                "explicitly. %r was passed." % refit
+            )
         else:
             refit_metric = refit
     else:
-        refit_metric = 'score'
+        refit_metric = "score"
 
     return scorers, multimetric_, refit_metric
 
 
 def _skl_check_is_fitted(estimator, method_name, refit):
     if not refit:
-        raise NotFittedError('This %s instance was initialized '
-                'with refit=False. %s is '
-                'available only after refitting on the best '
-                'parameters. You can refit an estimator '
-                'manually using the ``best_parameters_`` '
-                'attribute'
-                % (type(estimator).__name__, method_name))
+        raise NotFittedError(
+            "This %s instance was initialized "
+            "with refit=False. %s is "
+            "available only after refitting on the best "
+            "parameters. You can refit an estimator "
+            "manually using the ``best_parameters_`` "
+            "attribute" % (type(estimator).__name__, method_name)
+        )
     else:
-        check_is_fitted(estimator, 'best_estimator_')
-
+        check_is_fitted(estimator, "best_estimator_")
 
 
 def _skl_grid_score(X, y, scorer_, best_estimator_, refit, multimetric_):
-        """Returns the score on the given data, if the estimator has been 
+    """Returns the score on the given data, if the estimator has been 
         refit.
 
         This uses the score defined by ``scoring`` where provided, and the
@@ -206,9 +235,10 @@ def _skl_grid_score(X, y, scorer_, best_estimator_, refit, multimetric_):
         -------
         score : float
         """
-        if scorer_ is None:
-            raise ValueError("No score function explicitly defined, "
-                             "and the estimator doesn't provide one %s"
-                             % best_estimator_)
-        score = scorer_[refit] if multimetric_ else scorer_
-        return score(best_estimator_, X, y)
+    if scorer_ is None:
+        raise ValueError(
+            "No score function explicitly defined, "
+            "and the estimator doesn't provide one %s" % best_estimator_
+        )
+    score = scorer_[refit] if multimetric_ else scorer_
+    return score(best_estimator_, X, y)
diff --git a/gensvm/util.py b/gensvm/util.py
index 8d2a3e4..0b7cd1d 100644
--- a/gensvm/util.py
+++ b/gensvm/util.py
@@ -24,7 +24,7 @@ def get_ranks(x):
     x = np.ravel(np.asarray(x))
     l = len(x)
     r = 1
-    ranks = np.zeros((l, ))
+    ranks = np.zeros((l,))
     while not all([k is None for k in x]):
         m = min([k for k in x if not k is None])
         idx = [1 if k == m else 0 for k in x]
author	Gertjan van den Burg <gertjanvandenburg@gmail.com>	2019-01-15 12:21:24 +0000
committer	Gertjan van den Burg <gertjanvandenburg@gmail.com>	2019-01-15 12:21:24 +0000
commit	d1ddd504802072d930170b802d2cf98fb309cd46 (patch)
tree	2421ba88a37d686eca467cf85960ab7da4ae991a /gensvm
parent	Move wrapper to better folder structure (diff)
download	pygensvm-d1ddd504802072d930170b802d2cf98fb309cd46.tar.gz pygensvm-d1ddd504802072d930170b802d2cf98fb309cd46.zip