diff options
| author | Gertjan van den Burg <gertjanvandenburg@gmail.com> | 2020-03-06 17:49:40 +0000 |
|---|---|---|
| committer | Gertjan van den Burg <gertjanvandenburg@gmail.com> | 2020-03-06 17:49:40 +0000 |
| commit | 10fabb58010d80f10ffe5de58faf8eea90c223b9 (patch) | |
| tree | a4630cd6cc17437949827062a2b9639640bbbf73 | |
| parent | Merge branch 'travis_releases' (diff) | |
| parent | Documentation improvements (diff) | |
| download | pygensvm-10fabb58010d80f10ffe5de58faf8eea90c223b9.tar.gz pygensvm-10fabb58010d80f10ffe5de58faf8eea90c223b9.zip | |
Merge branch 'docs'
| -rw-r--r-- | docs/auto_functions.rst | 2 | ||||
| -rw-r--r-- | docs/changelog.rst | 1 | ||||
| -rw-r--r-- | docs/cls_gensvm.rst | 2 | ||||
| -rw-r--r-- | docs/cls_gensvm.txt | 139 | ||||
| -rw-r--r-- | docs/cls_gridsearch.rst | 2 | ||||
| -rw-r--r-- | docs/cls_gridsearch.txt | 285 | ||||
| -rw-r--r-- | docs/generate_autodocs.py | 22 | ||||
| -rw-r--r-- | docs/index.rst | 35 | ||||
| -rw-r--r-- | docs/kernels.rst (renamed from docs/kernels.txt) | 0 | ||||
| -rw-r--r-- | docs/readme.rst | 1 | ||||
| -rw-r--r-- | gensvm/core.py | 33 | ||||
| -rw-r--r-- | gensvm/gridsearch.py | 59 |
12 files changed, 103 insertions, 478 deletions
diff --git a/docs/auto_functions.rst b/docs/auto_functions.rst index 2a6596f..de8438d 100644 --- a/docs/auto_functions.rst +++ b/docs/auto_functions.rst @@ -1,3 +1,5 @@ +Parameter Grids +=============== .. py:function:: load_grid_tiny() :noindex: diff --git a/docs/changelog.rst b/docs/changelog.rst new file mode 100644 index 0000000..446252e --- /dev/null +++ b/docs/changelog.rst @@ -0,0 +1 @@ +.. include:: ./CHANGELOG.rst diff --git a/docs/cls_gensvm.rst b/docs/cls_gensvm.rst index b4bc9a7..80a2b25 100644 --- a/docs/cls_gensvm.rst +++ b/docs/cls_gensvm.rst @@ -1,3 +1,5 @@ +GenSVM +====== .. py:class:: GenSVM(p=1.0, lmd=1e-05, kappa=0.0, epsilon=1e-06, weights='unit', kernel='linear', gamma='auto', coef=1.0, degree=2.0, kernel_eigen_cutoff=1e-08, verbose=0, random_state=None, max_iter=100000000.0) :noindex: diff --git a/docs/cls_gensvm.txt b/docs/cls_gensvm.txt deleted file mode 100644 index b4bc9a7..0000000 --- a/docs/cls_gensvm.txt +++ /dev/null @@ -1,139 +0,0 @@ - -.. py:class:: GenSVM(p=1.0, lmd=1e-05, kappa=0.0, epsilon=1e-06, weights='unit', kernel='linear', gamma='auto', coef=1.0, degree=2.0, kernel_eigen_cutoff=1e-08, verbose=0, random_state=None, max_iter=100000000.0) - :noindex: - :module: gensvm.core - - Generalized Multiclass Support Vector Machine Classification. - - This class implements the basic GenSVM classifier. GenSVM is a generalized - multiclass SVM which is flexible in the weighting of misclassification - errors. It is this flexibility that makes it perform well on diverse - datasets. - - The :func:`~GenSVM.fit` and :func:`~GenSVM.predict` methods of this class - use the GenSVM C library for the actual computations. - - :param p: Parameter for the L_p norm of the loss function (1.0 <= p <= 2.0) - :type p: float, optional (default=1.0) - :param lmd: Parameter for the regularization term of the loss function (lmd > 0) - :type lmd: float, optional (default=1e-5) - :param kappa: Parameter for the hinge function in the loss function (kappa > -1.0) - :type kappa: float, optional (default=0.0) - :param weights: Type of sample weights to use. Options are 'unit' for unit weights and - 'group' for group size correction weights (equation 4 in the paper). - - It is also possible to provide an explicit vector of sample weights - through the :func:`~GenSVM.fit` method. If so, it will override the - setting provided here. - :type weights: string, optional (default='unit') - :param kernel: Specify the kernel type to use in the classifier. It must be one of - 'linear', 'poly', 'rbf', or 'sigmoid'. - :type kernel: string, optional (default='linear') - :param gamma: Kernel parameter for the rbf, poly, and sigmoid kernel. If gamma is - 'auto' then 1/n_features will be used. See `Kernels in GenSVM - <gensvm_kernels_>`_ for the exact implementation of the kernels. - :type gamma: float, optional (default='auto') - :param coef: Kernel parameter for the poly and sigmoid kernel. See `Kernels in - GenSVM <gensvm_kernels_>`_ for the exact implementation of the kernels. - :type coef: float, optional (default=1.0) - :param degree: Kernel parameter for the poly kernel. See `Kernels in GenSVM - <gensvm_kernels_>`_ for the exact implementation of the kernels. - :type degree: float, optional (default=2.0) - :param kernel_eigen_cutoff: Cutoff point for the reduced eigendecomposition used with nonlinear - GenSVM. Eigenvectors for which the ratio between their corresponding - eigenvalue and the largest eigenvalue is smaller than the cutoff will - be dropped. - :type kernel_eigen_cutoff: float, optional (default=1e-8) - :param verbose: Enable verbose output - :type verbose: int, (default=0) - :param random_state: The seed for the random number generation used for initialization where - necessary. See the documentation of - ``sklearn.utils.check_random_state`` for more info. - :type random_state: None, int, instance of RandomState - :param max_iter: The maximum number of iterations to be run. - :type max_iter: int, (default=1e8) - - .. attribute:: coef_ - - *array, shape = [n_features, n_classes-1]* -- Weights assigned to the features (coefficients in the primal problem) - - .. attribute:: intercept_ - - *array, shape = [n_classes-1]* -- Constants in the decision function - - .. attribute:: combined_coef_ - - *array, shape = [n_features+1, n_classes-1]* -- Combined weights matrix for the seed_V parameter to the fit method - - .. attribute:: n_iter_ - - *int* -- The number of iterations that were run during training. - - .. attribute:: n_support_ - - *int* -- The number of support vectors that were found - - .. attribute:: SVs_ - - *array, shape = [n_observations, ]* -- Index vector that marks the support vectors (1 = SV, 0 = no SV) - - .. seealso:: - - :class:`.GenSVMGridSearchCV` - Helper class to run an efficient grid search for GenSVM. - - .. _gensvm_kernels: - https://gensvm.readthedocs.io/en/latest/#kernels-in-gensvm - - - - .. py:method:: GenSVM.fit(X, y, sample_weight=None, seed_V=None) - :noindex: - :module: gensvm.core - - Fit the GenSVM model on the given data - - The model can be fit with or without a seed matrix (``seed_V``). This - can be used to provide warm starts for the algorithm. - - :param X: The input data. It is expected that only numeric data is given. - :type X: array, shape = (n_observations, n_features) - :param y: The label vector, labels can be numbers or strings. - :type y: array, shape = (n_observations, ) - :param sample_weight: Array of weights that are assigned to individual samples. If not - provided, then the weight specification in the constructor is used - ('unit' or 'group'). - :type sample_weight: array, shape = (n_observations, ) - :param seed_V: Seed coefficient array to use as a warm start for the optimization. - It can for instance be the :attr:`combined_coef_ - <.GenSVM.combined_coef_>` attribute of a different GenSVM model. - This is only supported for the linear kernel. - - NOTE: the size of the seed_V matrix is ``n_features+1`` by - ``n_classes - 1``. The number of columns of ``seed_V`` is leading - for the number of classes in the model. For example, if ``y`` - contains 3 different classes and ``seed_V`` has 3 columns, we - assume that there are actually 4 classes in the problem but one - class is just represented in this training data. This can be useful - for problems were a certain class has only a few samples. - :type seed_V: array, shape = (n_features+1, n_classes-1), optional - - :returns: **self** -- Returns self. - :rtype: object - - - .. py:method:: GenSVM.predict(X, trainX=None) - :noindex: - :module: gensvm.core - - Predict the class labels on the given data - - :param X: Data for which to predict the labels - :type X: array, shape = [n_test_samples, n_features] - :param trainX: Only for nonlinear prediction with kernels: the training data used - to train the model. - :type trainX: array, shape = [n_train_samples, n_features] - - :returns: **y_pred** -- Predicted class labels of the data in X. - :rtype: array, shape = (n_samples, ) - diff --git a/docs/cls_gridsearch.rst b/docs/cls_gridsearch.rst index 6a2c05e..13cce92 100644 --- a/docs/cls_gridsearch.rst +++ b/docs/cls_gridsearch.rst @@ -1,3 +1,5 @@ +GenSVMGridSearchCV +================== .. py:class:: GenSVMGridSearchCV(param_grid='tiny', scoring=None, iid=True, cv=None, refit=True, verbose=0, return_train_score=True) :noindex: diff --git a/docs/cls_gridsearch.txt b/docs/cls_gridsearch.txt deleted file mode 100644 index 6a2c05e..0000000 --- a/docs/cls_gridsearch.txt +++ /dev/null @@ -1,285 +0,0 @@ - -.. py:class:: GenSVMGridSearchCV(param_grid='tiny', scoring=None, iid=True, cv=None, refit=True, verbose=0, return_train_score=True) - :noindex: - :module: gensvm.gridsearch - - GenSVM cross validated grid search - - This class implements efficient GenSVM grid search with cross validation. - One of the strong features of GenSVM is that seeding the classifier - properly can greatly reduce total training time. This class ensures that - the grid search is done in the most efficient way possible. - - The implementation of this class is based on the `GridSearchCV`_ class in - scikit-learn. The documentation of the various parameters is therefore - mostly the same. This is done to provide the user with a familiar and - easy-to-use interface to doing a grid search with GenSVM. A separate class - was needed to benefit from the fast low-level C implementation of grid - search in the GenSVM library. - - :param param_grid: If a string, it must be either 'tiny', 'small', or 'full' to load the - predefined parameter grids (see the functions :func:`load_grid_tiny`, - :func:`load_grid_small`, and :func:`load_grid_full`). - - Otherwise, a dictionary of parameter names (strings) as keys and lists - of parameter settings to evaluate as values, or a list of such dicts. - The GenSVM model will be evaluated at all combinations of the - parameters. - :type param_grid: string, dict, or list of dicts - :param scoring: A single string (see :ref:`scoring_parameter`) or a callable (see - :ref:`scoring`) to evaluate the predictions on the test set. - - For evaluating multiple metrics, either give a list of (unique) strings - or a dict with names as keys and callables as values. - - NOTE that when using custom scorers, each scorer should return a single - value. Metric functions returning a list/array of values can be wrapped - into multiple scorers that return one value each. - - If None, the `accuracy_score`_ is used. - :type scoring: string, callable, list/tuple, dict or None - :param iid: If True, the data is assumed to be identically distributed across the - folds, and the loss minimized is the total loss per sample and not the - mean loss across the folds. - :type iid: boolean, default=True - :param cv: Determines the cross-validation splitting strategy. Possible inputs for - cv are: - - - None, to use the default 5-fold cross validation, - - integer, to specify the number of folds in a `(Stratified)KFold`, - - An object to be used as a cross-validation generator. - - An iterable yielding train, test splits. - - For integer/None inputs, :class:`StratifiedKFold - <sklearn.model_selection.StratifiedKFold>` is used. In all other - cases, :class:`KFold <sklearn.model_selection.KFold>` is used. - - Refer to the `scikit-learn User Guide on cross validation`_ for the - various strategies that can be used here. - - NOTE: At the moment, the ShuffleSplit and StratifiedShuffleSplit are - not supported in this class. If you need these, you can use the GenSVM - classifier directly with the GridSearchCV object from scikit-learn. - (these methods require significant changes in the low-level library - before they can be supported). - :type cv: int, cross-validation generator or an iterable, optional - :param refit: Refit the GenSVM estimator with the best found parameters on the whole - dataset. - - For multiple metric evaluation, this needs to be a string denoting the - scorer to be used to find the best parameters for refitting the - estimator at the end. - - The refitted estimator is made available at the `:attr:best_estimator_ - <.GenSVMGridSearchCV.best_estimator_>` attribute and allows the user to - use the :func:`~GenSVMGridSearchCV.predict` method directly on this - :class:`.GenSVMGridSearchCV` instance. - - Also for multiple metric evaluation, the attributes :attr:`best_index_ - <.GenSVMGridSearchCV.best_index_>`, :attr:`best_score_ - <.GenSVMGridSearchCV.best_score_>` and :attr:`best_params_ - <.GenSVMGridSearchCV:best_params_>` will only be available if ``refit`` - is set and all of them will be determined w.r.t this specific scorer. - - See ``scoring`` parameter to know more about multiple metric - evaluation. - :type refit: boolean, or string, default=True - :param verbose: Controls the verbosity: the higher, the more messages. - :type verbose: integer - :param return_train_score: If ``False``, the :attr:`cv_results_ <.GenSVMGridSearchCV.cv_results_>` - attribute will not include training scores. - :type return_train_score: boolean, default=True - - .. rubric:: Examples - - >>> from gensvm import GenSVMGridSearchCV - >>> from sklearn.datasets import load_iris - >>> iris = load_iris() - >>> param_grid = {'p': [1.0, 2.0], 'kappa': [-0.9, 0.0, 1.0]} - >>> clf = GenSVMGridSearchCV(param_grid) - >>> clf.fit(iris.data, iris.target) - GenSVMGridSearchCV(cv=None, iid=True, - param_grid={'p': [1.0, 2.0], 'kappa': [-0.9, 0.0, 1.0]}, - refit=True, return_train_score=True, scoring=None, verbose=0) - - .. attribute:: cv_results_ - - *dict of numpy (masked) ndarrays* -- A dict with keys as column headers and values as columns, that can be - imported into a pandas `DataFrame`_. - - For instance the below given table - - +------------+-----------+------------+-----------------+---+---------+ - |param_kernel|param_gamma|param_degree|split0_test_score|...|rank_t...| - +============+===========+============+=================+===+=========+ - | 'poly' | -- | 2 | 0.8 |...| 2 | - +------------+-----------+------------+-----------------+---+---------+ - | 'poly' | -- | 3 | 0.7 |...| 4 | - +------------+-----------+------------+-----------------+---+---------+ - | 'rbf' | 0.1 | -- | 0.8 |...| 3 | - +------------+-----------+------------+-----------------+---+---------+ - | 'rbf' | 0.2 | -- | 0.9 |...| 1 | - +------------+-----------+------------+-----------------+---+---------+ - - will be represented by a ``cv_results_`` dict of:: - - { - 'param_kernel': masked_array(data = ['poly', 'poly', 'rbf', 'rbf'], - mask = [False False False False]...) - 'param_gamma': masked_array(data = [-- -- 0.1 0.2], - mask = [ True True False False]...), - 'param_degree': masked_array(data = [2.0 3.0 -- --], - mask = [False False True True]...), - 'split0_test_score' : [0.8, 0.7, 0.8, 0.9], - 'split1_test_score' : [0.82, 0.5, 0.7, 0.78], - 'mean_test_score' : [0.81, 0.60, 0.75, 0.82], - 'std_test_score' : [0.02, 0.01, 0.03, 0.03], - 'rank_test_score' : [2, 4, 3, 1], - 'split0_train_score' : [0.8, 0.9, 0.7], - 'split1_train_score' : [0.82, 0.5, 0.7], - 'mean_train_score' : [0.81, 0.7, 0.7], - 'std_train_score' : [0.03, 0.03, 0.04], - 'mean_fit_time' : [0.73, 0.63, 0.43, 0.49], - 'std_fit_time' : [0.01, 0.02, 0.01, 0.01], - 'mean_score_time' : [0.007, 0.06, 0.04, 0.04], - 'std_score_time' : [0.001, 0.002, 0.003, 0.005], - 'params' : [{'kernel': 'poly', 'degree': 2}, ...], - } - - NOTE: - - The key ``'params'`` is used to store a list of parameter settings - dicts for all the parameter candidates. - - The ``mean_fit_time``, ``std_fit_time``, ``mean_score_time`` and - ``std_score_time`` are all in seconds. - - For multi-metric evaluation, the scores for all the scorers are - available in the :attr:`cv_results_ <.GenSVMGridSearchCV.cv_results_>` - dict at the keys ending with that scorer's name (``'_<scorer_name>'``) - instead of ``'_score'`` shown above. ('split0_test_precision', - 'mean_train_precision' etc.) - - .. attribute:: best_estimator_ - - *estimator or dict* -- Estimator that was chosen by the search, i.e. estimator which gave - highest score (or smallest loss if specified) on the left out data. Not - available if ``refit=False``. - - See ``refit`` parameter for more information on allowed values. - - .. attribute:: best_score_ - - *float* -- Mean cross-validated score of the best_estimator - - For multi-metric evaluation, this is present only if ``refit`` is - specified. - - .. attribute:: best_params_ - - *dict* -- Parameter setting that gave the best results on the hold out data. - - For multi-metric evaluation, this is present only if ``refit`` is - specified. - - .. attribute:: best_index_ - - *int* -- The index (of the ``cv_results_`` arrays) which corresponds to the best - candidate parameter setting. - - The dict at ``search.cv_results_['params'][search.best_index_]`` gives - the parameter setting for the best model, that gives the highest mean - score (``search.best_score_``). - - For multi-metric evaluation, this is present only if ``refit`` is - specified. - - .. attribute:: scorer_ - - *function or a dict* -- Scorer function used on the held out data to choose the best parameters - for the model. - - For multi-metric evaluation, this attribute holds the validated - ``scoring`` dict which maps the scorer key to the scorer callable. - - .. attribute:: n_splits_ - - *int* -- The number of cross-validation splits (folds/iterations). - - .. rubric:: Notes - - The parameters selected are those that maximize the score of the left out - data, unless an explicit score is passed in which case it is used instead. - - .. seealso:: - - `ParameterGrid`_: - Generates all the combinations of a hyperparameter grid. - - :class:`.GenSVM`: - The GenSVM classifier - - .. _GridSearchCV: - http://scikit-learn.org/stable/modules/generated/sklearn.model_selection.GridSearchCV.html - .. _accuracy_score: - http://scikit-learn.org/stable/modules/generated/sklearn.metrics.accuracy_score.html - .. _scikit-learn User Guide on cross validation: - http://scikit-learn.org/stable/modules/cross_validation.html - - .. _ParameterGrid: - http://scikit-learn.org/stable/modules/generated/sklearn.model_selection.ParameterGrid.html - .. _DataFrame: - https://pandas.pydata.org/pandas-docs/stable/generated/pandas.DataFrame.html - - - .. py:method:: GenSVMGridSearchCV.fit(X, y, groups=None) - :noindex: - :module: gensvm.gridsearch - - Run GenSVM grid search with all sets of parameters - - :param X: Training data, where n_samples is the number of observations and - n_features is the number of features. - :type X: array-like, shape = (n_samples, n_features) - :param y: Target vector for the training data. - :type y: array-like, shape = (n_samples, ) - :param groups: Group labels for the samples used while splitting the dataset into - train/test sets. - :type groups: array-like, with shape (n_samples, ), optional - - :returns: **self** -- Return self. - :rtype: object - - - .. py:method:: GenSVMGridSearchCV.predict(X, trainX=None) - :noindex: - :module: gensvm.gridsearch - - Predict the class labels on the test data - - :param X: Test data, where n_samples is the number of observations and - n_features is the number of features. - :type X: array-like, shape = (n_samples, n_features) - :param trainX: Only for nonlinear prediction with kernels: the training data used - to train the model. - :type trainX: array, shape = [n_train_samples, n_features] - - :returns: **y_pred** -- Predicted class labels of the data in X. - :rtype: array-like, shape = (n_samples, ) - - - .. py:method:: GenSVMGridSearchCV.score(X, y) - :noindex: - :module: gensvm.gridsearch - - Compute the score on the test data given the true labels - - :param X: Test data, where n_samples is the number of observations and - n_features is the number of features. - :type X: array-like, shape = (n_samples, n_features) - :param y: True labels for the test data. - :type y: array-like, shape = (n_samples, ) - - :returns: **score** - :rtype: float - diff --git a/docs/generate_autodocs.py b/docs/generate_autodocs.py index a0544ef..a2f7e49 100644 --- a/docs/generate_autodocs.py +++ b/docs/generate_autodocs.py @@ -42,11 +42,11 @@ FULL_NAMES = { } OUTPUT_FILES = { - "GenSVMGridSearchCV": os.path.join(DOCDIR, "cls_gridsearch.txt"), - "GenSVM": os.path.join(DOCDIR, "cls_gensvm.txt"), - "load_grid_tiny": os.path.join(DOCDIR, "auto_functions.txt"), - "load_grid_small": os.path.join(DOCDIR, "auto_functions.txt"), - "load_grid_full": os.path.join(DOCDIR, "auto_functions.txt"), + "GenSVMGridSearchCV": os.path.join(DOCDIR, "cls_gridsearch.rst"), + "GenSVM": os.path.join(DOCDIR, "cls_gensvm.rst"), + "load_grid_tiny": os.path.join(DOCDIR, "auto_functions.rst"), + "load_grid_small": os.path.join(DOCDIR, "auto_functions.rst"), + "load_grid_full": os.path.join(DOCDIR, "auto_functions.rst"), } @@ -83,11 +83,13 @@ def generate_class_autodoc(app, cls): documenter.generate(all_members=True) with open(OUTPUT_FILES[cls], "w") as fid: + fid.write(cls + '\n') + fid.write(''.join(['=']*len(cls)) + '\n') for line in ad.result: fid.write(line + "\n") -def generate_func_autodoc(app, func): +def generate_func_autodoc(app, func, add_title=True): ad = AutoDirective( name="autofunc", arguments=[FULL_NAMES[func]], @@ -108,7 +110,11 @@ def generate_func_autodoc(app, func): documenter = FunctionDocumenter(ad, ad.arguments[0]) documenter.generate(all_members=True) + title = 'Parameter Grids' with open(OUTPUT_FILES[func], "a") as fid: + if add_title: + fid.write(title + '\n') + fid.write(''.join(['=']*len(title)) + '\n') for line in ad.result: fid.write(line + "\n") @@ -121,8 +127,10 @@ def main(): app = load_app() for cls in CLASSES: generate_class_autodoc(app, cls) + add_title = True for func in FUNCTIONS: - generate_func_autodoc(app, func) + generate_func_autodoc(app, func, add_title=add_title) + add_title = False if __name__ == "__main__": diff --git a/docs/index.rst b/docs/index.rst index 6845d73..55b6a10 100644 --- a/docs/index.rst +++ b/docs/index.rst @@ -3,29 +3,24 @@ You can adapt this file completely to your liking, but it should at least contain the root `toctree` directive. -.. toctree:: - -.. include:: ./README.rst - -Classes -------- - -GenSVM -^^^^^^ - -.. include:: ./cls_gensvm.txt +.. include:: ./readme.rst -GenSVMGridSearchCV -^^^^^^^^^^^^^^^^^^ - -.. include:: ./cls_gridsearch.txt +.. toctree:: + :maxdepth: 2 -Functions ---------- + readme -.. include:: ./auto_functions.txt +.. toctree:: + :caption: API Documentation + :maxdepth: 2 -.. include:: ./kernels.txt + cls_gensvm + cls_gridsearch + auto_functions + kernels -.. include:: ./CHANGELOG.rst +.. toctree:: + :caption: Further Documentation + :maxdepth: 2 + changelog diff --git a/docs/kernels.txt b/docs/kernels.rst index 479b6c0..479b6c0 100644 --- a/docs/kernels.txt +++ b/docs/kernels.rst diff --git a/docs/readme.rst b/docs/readme.rst new file mode 100644 index 0000000..d62c29e --- /dev/null +++ b/docs/readme.rst @@ -0,0 +1 @@ +.. include:: ./README.rst diff --git a/gensvm/core.py b/gensvm/core.py index 45d59ad..169a30c 100644 --- a/gensvm/core.py +++ b/gensvm/core.py @@ -104,7 +104,7 @@ class GenSVM(BaseEstimator, ClassifierMixin): errors. It is this flexibility that makes it perform well on diverse datasets. - The :func:`~GenSVM.fit` and :func:`~GenSVM.predict` methods of this class + The :meth:`~GenSVM.fit` and :meth:`~GenSVM.predict` methods of this class use the GenSVM C library for the actual computations. Parameters @@ -123,7 +123,7 @@ class GenSVM(BaseEstimator, ClassifierMixin): 'group' for group size correction weights (equation 4 in the paper). It is also possible to provide an explicit vector of sample weights - through the :func:`~GenSVM.fit` method. If so, it will override the + through the :meth:`~GenSVM.fit` method. If so, it will override the setting provided here. kernel : string, optional (default='linear') @@ -183,7 +183,7 @@ class GenSVM(BaseEstimator, ClassifierMixin): See Also -------- - :class:`.GenSVMGridSearchCV`: + :class:`~.gridsearch.GenSVMGridSearchCV`: Helper class to run an efficient grid search for GenSVM. @@ -257,8 +257,8 @@ class GenSVM(BaseEstimator, ClassifierMixin): def fit(self, X, y, sample_weight=None, seed_V=None): """Fit the GenSVM model on the given data - The model can be fit with or without a seed matrix (``seed_V``). This - can be used to provide warm starts for the algorithm. + The model can be fit with or without a seed matrix (`seed_V`). This can + be used to provide warm starts for the algorithm. Parameters ---------- @@ -280,14 +280,13 @@ class GenSVM(BaseEstimator, ClassifierMixin): <.GenSVM.combined_coef_>` attribute of a different GenSVM model. This is only supported for the linear kernel. - NOTE: the size of the seed_V matrix is ``n_features+1`` by - ``n_classes - 1``. The number of columns of ``seed_V`` is leading - for the number of classes in the model. For example, if ``y`` - contains 3 different classes and ``seed_V`` has 3 columns, we - assume that there are actually 4 classes in the problem but one - class is just represented in this training data. This can be useful - for problems were a certain class has only a few samples. - + NOTE: the size of the seed_V matrix is `n_features+1` by `n_classes + - 1`. The number of columns of `seed_V` is leading for the number + of classes in the model. For example, if `y` contains 3 different + classes and `seed_V` has 3 columns, we assume that there are + actually 4 classes in the problem but one class is just + represented in this training data. This can be useful for + problems were a certain class has only a few samples. Returns ------- @@ -354,7 +353,13 @@ class GenSVM(BaseEstimator, ClassifierMixin): ) ) - self.coef_, self.intercept_, self.n_iter_, self.n_support_, self.SVs_ = _fit_gensvm( + ( + self.coef_, + self.intercept_, + self.n_iter_, + self.n_support_, + self.SVs_, + ) = _fit_gensvm( X, y, n_class, diff --git a/gensvm/gridsearch.py b/gensvm/gridsearch.py index 22125a4..550ae64 100644 --- a/gensvm/gridsearch.py +++ b/gensvm/gridsearch.py @@ -65,7 +65,7 @@ def _validate_param_grid(param_grid): """Check if the parameter values are valid This basically does the same checks as in the constructor of the - :class:`core.GenSVM` class, but for the entire parameter grid. + :class:`~.core.GenSVM` class, but for the entire parameter grid. """ # the conditions that the parameters must satisfy @@ -169,16 +169,24 @@ def _format_results( score_time = 0 if return_train_score: - train_pred = predictions[cv_idx != test_idx,] - y_train = true_y[cv_idx != test_idx,] + train_pred = predictions[ + cv_idx != test_idx, + ] + y_train = true_y[ + cv_idx != test_idx, + ] train_score, score_t = _wrap_score( train_pred, y_train, scorers, is_multimetric ) score_time += score_t ret.append(train_score) - test_pred = predictions[cv_idx == test_idx,] - y_test = true_y[cv_idx == test_idx,] + test_pred = predictions[ + cv_idx == test_idx, + ] + y_test = true_y[ + cv_idx == test_idx, + ] test_score, score_t = _wrap_score( test_pred, y_test, scorers, is_multimetric ) @@ -232,7 +240,7 @@ def _fit_grid_gensvm( Returns ------- cv_results_ : dict - The cross validation results. See :func:`~GenSVMGridSearchCV.fit`. + The cross validation results. See :meth:`~GenSVMGridSearchCV.fit`. """ @@ -349,7 +357,7 @@ class GenSVMGridSearchCV(BaseEstimator, MetaEstimatorMixin): The refitted estimator is made available at the `:attr:best_estimator_ <.GenSVMGridSearchCV.best_estimator_>` attribute and allows the user to - use the :func:`~GenSVMGridSearchCV.predict` method directly on this + use the :meth:`~GenSVMGridSearchCV.predict` method directly on this :class:`.GenSVMGridSearchCV` instance. Also for multiple metric evaluation, the attributes :attr:`best_index_ @@ -623,7 +631,7 @@ class GenSVMGridSearchCV(BaseEstimator, MetaEstimatorMixin): and not self.best_params_["kernel"] == "linear" and not "gamma" in self.best_params_ ): - self.best_params_["gamma"] = 1. / X.shape[1] + self.best_params_["gamma"] = 1.0 / X.shape[1] self.best_estimator_ = GenSVM(**self.best_params_) # y_orig because GenSVM fit must know the conversion for predict to # work correctly @@ -711,9 +719,24 @@ def load_grid_tiny(): """ pg = [ - {"p": [2.0], "kappa": [5.0], "lmd": [pow(2, -16)], "weights": ["unit"]}, - {"p": [2.0], "kappa": [5.0], "lmd": [pow(2, -18)], "weights": ["unit"]}, - {"p": [2.0], "kappa": [0.5], "lmd": [pow(2, -18)], "weights": ["unit"]}, + { + "p": [2.0], + "kappa": [5.0], + "lmd": [pow(2, -16)], + "weights": ["unit"], + }, + { + "p": [2.0], + "kappa": [5.0], + "lmd": [pow(2, -18)], + "weights": ["unit"], + }, + { + "p": [2.0], + "kappa": [0.5], + "lmd": [pow(2, -18)], + "weights": ["unit"], + }, { "p": [2.0], "kappa": [5.0], @@ -726,7 +749,12 @@ def load_grid_tiny(): "lmd": [pow(2, -18)], "weights": ["unit"], }, - {"p": [2.0], "kappa": [5.0], "lmd": [pow(2, -14)], "weights": ["unit"]}, + { + "p": [2.0], + "kappa": [5.0], + "lmd": [pow(2, -14)], + "weights": ["unit"], + }, { "p": [2.0], "kappa": [0.5], @@ -739,7 +767,12 @@ def load_grid_tiny(): "lmd": [pow(2, -18)], "weights": ["unit"], }, - {"p": [2.0], "kappa": [0.5], "lmd": [pow(2, -16)], "weights": ["unit"]}, + { + "p": [2.0], + "kappa": [0.5], + "lmd": [pow(2, -16)], + "weights": ["unit"], + }, { "p": [2.0], "kappa": [0.5], |
