diff options
| author | Gertjan van den Burg <gertjanvandenburg@gmail.com> | 2019-05-30 18:39:05 +0100 |
|---|---|---|
| committer | Gertjan van den Burg <gertjanvandenburg@gmail.com> | 2019-05-30 18:39:05 +0100 |
| commit | 47116a4682edb1f22d00da06802cc3eff40bf5bd (patch) | |
| tree | 1f8bcbb1b86e112eefed5a6dd4fe0ea1541183d7 /docs | |
| parent | Merge branch 'master' of github.com:GjjvdBurg/PyGenSVM (diff) | |
| download | pygensvm-47116a4682edb1f22d00da06802cc3eff40bf5bd.tar.gz pygensvm-47116a4682edb1f22d00da06802cc3eff40bf5bd.zip | |
Update documentation
Diffstat (limited to 'docs')
| -rw-r--r-- | docs/auto_functions.rst | 47 | ||||
| -rw-r--r-- | docs/cls_gensvm.rst | 33 | ||||
| -rw-r--r-- | docs/cls_gridsearch.rst | 28 | ||||
| -rw-r--r-- | docs/generate_autodocs.py | 92 |
4 files changed, 148 insertions, 52 deletions
diff --git a/docs/auto_functions.rst b/docs/auto_functions.rst index 3ba1fff..2a6596f 100644 --- a/docs/auto_functions.rst +++ b/docs/auto_functions.rst @@ -1,9 +1,52 @@ -.. py:function:: load_default_grid() +.. py:function:: load_grid_tiny() :noindex: :module: gensvm.gridsearch - Load the default parameter grid for GenSVM + Load a tiny parameter grid for the GenSVM grid search + + This function returns a parameter grid to use in the GenSVM grid search. + This grid was obtained by analyzing the experiments done for the GenSVM + paper and selecting the configurations that achieve accuracy within the + 95th percentile on over 90% of the datasets. It is a good start for a + parameter search with a reasonably high chance of achieving good + performance on most datasets. + + Note that this grid is only tested to work well in combination with the + linear kernel. + + :returns: **pg** -- List of 10 parameter configurations that are likely to perform + reasonably well. + :rtype: list + + +.. py:function:: load_grid_small() + :noindex: + :module: gensvm.gridsearch + + Load a small parameter grid for GenSVM + + This function loads a default parameter grid to use for the #' GenSVM + gridsearch. It contains all possible combinations of the following #' + parameter sets:: + + pg = { + 'p': [1.0, 1.5, 2.0], + 'lmd': [1e-8, 1e-6, 1e-4, 1e-2, 1], + 'kappa': [-0.9, 0.5, 5.0], + 'weights': ['unit', 'group'], + } + + :returns: **pg** -- Mapping from parameters to lists of values for those parameters. To be + used as input for the :class:`.GenSVMGridSearchCV` class. + :rtype: dict + + +.. py:function:: load_grid_full() + :noindex: + :module: gensvm.gridsearch + + Load the full parameter grid for GenSVM This is the parameter grid used in the GenSVM paper to run the grid search experiments. It uses a large grid for the ``lmd`` regularization parameter diff --git a/docs/cls_gensvm.rst b/docs/cls_gensvm.rst index fc19bf4..b4bc9a7 100644 --- a/docs/cls_gensvm.rst +++ b/docs/cls_gensvm.rst @@ -1,5 +1,5 @@ -.. py:class:: GenSVM(p=1.0, lmd=1e-05, kappa=0.0, epsilon=1e-06, weights='unit', kernel='linear', gamma='auto', coef=0.0, degree=2.0, kernel_eigen_cutoff=1e-08, verbose=0, random_state=None, max_iter=100000000.0) +.. py:class:: GenSVM(p=1.0, lmd=1e-05, kappa=0.0, epsilon=1e-06, weights='unit', kernel='linear', gamma='auto', coef=1.0, degree=2.0, kernel_eigen_cutoff=1e-08, verbose=0, random_state=None, max_iter=100000000.0) :noindex: :module: gensvm.core @@ -21,6 +21,10 @@ :type kappa: float, optional (default=0.0) :param weights: Type of sample weights to use. Options are 'unit' for unit weights and 'group' for group size correction weights (equation 4 in the paper). + + It is also possible to provide an explicit vector of sample weights + through the :func:`~GenSVM.fit` method. If so, it will override the + setting provided here. :type weights: string, optional (default='unit') :param kernel: Specify the kernel type to use in the classifier. It must be one of 'linear', 'poly', 'rbf', or 'sigmoid'. @@ -31,7 +35,7 @@ :type gamma: float, optional (default='auto') :param coef: Kernel parameter for the poly and sigmoid kernel. See `Kernels in GenSVM <gensvm_kernels_>`_ for the exact implementation of the kernels. - :type coef: float, optional (default=0.0) + :type coef: float, optional (default=1.0) :param degree: Kernel parameter for the poly kernel. See `Kernels in GenSVM <gensvm_kernels_>`_ for the exact implementation of the kernels. :type degree: float, optional (default=2.0) @@ -42,6 +46,10 @@ :type kernel_eigen_cutoff: float, optional (default=1e-8) :param verbose: Enable verbose output :type verbose: int, (default=0) + :param random_state: The seed for the random number generation used for initialization where + necessary. See the documentation of + ``sklearn.utils.check_random_state`` for more info. + :type random_state: None, int, instance of RandomState :param max_iter: The maximum number of iterations to be run. :type max_iter: int, (default=1e8) @@ -65,6 +73,10 @@ *int* -- The number of support vectors that were found + .. attribute:: SVs_ + + *array, shape = [n_observations, ]* -- Index vector that marks the support vectors (1 = SV, 0 = no SV) + .. seealso:: :class:`.GenSVMGridSearchCV` @@ -75,7 +87,7 @@ - .. py:method:: GenSVM.fit(X, y, seed_V=None) + .. py:method:: GenSVM.fit(X, y, sample_weight=None, seed_V=None) :noindex: :module: gensvm.core @@ -88,6 +100,10 @@ :type X: array, shape = (n_observations, n_features) :param y: The label vector, labels can be numbers or strings. :type y: array, shape = (n_observations, ) + :param sample_weight: Array of weights that are assigned to individual samples. If not + provided, then the weight specification in the constructor is used + ('unit' or 'group'). + :type sample_weight: array, shape = (n_observations, ) :param seed_V: Seed coefficient array to use as a warm start for the optimization. It can for instance be the :attr:`combined_coef_ <.GenSVM.combined_coef_>` attribute of a different GenSVM model. @@ -106,15 +122,18 @@ :rtype: object - .. py:method:: GenSVM.predict(X) + .. py:method:: GenSVM.predict(X, trainX=None) :noindex: :module: gensvm.core Predict the class labels on the given data - :param X: - :type X: array, shape = [n_samples, n_features] + :param X: Data for which to predict the labels + :type X: array, shape = [n_test_samples, n_features] + :param trainX: Only for nonlinear prediction with kernels: the training data used + to train the model. + :type trainX: array, shape = [n_train_samples, n_features] - :returns: **y_pred** + :returns: **y_pred** -- Predicted class labels of the data in X. :rtype: array, shape = (n_samples, ) diff --git a/docs/cls_gridsearch.rst b/docs/cls_gridsearch.rst index 8708123..6a2c05e 100644 --- a/docs/cls_gridsearch.rst +++ b/docs/cls_gridsearch.rst @@ -1,5 +1,5 @@ -.. py:class:: GenSVMGridSearchCV(param_grid, scoring=None, iid=True, cv=None, refit=True, verbose=0, return_train_score=True) +.. py:class:: GenSVMGridSearchCV(param_grid='tiny', scoring=None, iid=True, cv=None, refit=True, verbose=0, return_train_score=True) :noindex: :module: gensvm.gridsearch @@ -17,10 +17,15 @@ was needed to benefit from the fast low-level C implementation of grid search in the GenSVM library. - :param param_grid: Dictionary of parameter names (strings) as keys and lists of parameter - settings to evaluate as values, or a list of such dicts. The GenSVM - model will be evaluated at all combinations of the parameters. - :type param_grid: dict or list of dicts + :param param_grid: If a string, it must be either 'tiny', 'small', or 'full' to load the + predefined parameter grids (see the functions :func:`load_grid_tiny`, + :func:`load_grid_small`, and :func:`load_grid_full`). + + Otherwise, a dictionary of parameter names (strings) as keys and lists + of parameter settings to evaluate as values, or a list of such dicts. + The GenSVM model will be evaluated at all combinations of the + parameters. + :type param_grid: string, dict, or list of dicts :param scoring: A single string (see :ref:`scoring_parameter`) or a callable (see :ref:`scoring`) to evaluate the predictions on the test set. @@ -40,7 +45,7 @@ :param cv: Determines the cross-validation splitting strategy. Possible inputs for cv are: - - None, to use the default 3-fold cross validation, + - None, to use the default 5-fold cross validation, - integer, to specify the number of folds in a `(Stratified)KFold`, - An object to be used as a cross-validation generator. - An iterable yielding train, test splits. @@ -51,6 +56,12 @@ Refer to the `scikit-learn User Guide on cross validation`_ for the various strategies that can be used here. + + NOTE: At the moment, the ShuffleSplit and StratifiedShuffleSplit are + not supported in this class. If you need these, you can use the GenSVM + classifier directly with the GridSearchCV object from scikit-learn. + (these methods require significant changes in the low-level library + before they can be supported). :type cv: int, cross-validation generator or an iterable, optional :param refit: Refit the GenSVM estimator with the best found parameters on the whole dataset. @@ -240,7 +251,7 @@ :rtype: object - .. py:method:: GenSVMGridSearchCV.predict(X) + .. py:method:: GenSVMGridSearchCV.predict(X, trainX=None) :noindex: :module: gensvm.gridsearch @@ -249,6 +260,9 @@ :param X: Test data, where n_samples is the number of observations and n_features is the number of features. :type X: array-like, shape = (n_samples, n_features) + :param trainX: Only for nonlinear prediction with kernels: the training data used + to train the model. + :type trainX: array, shape = [n_train_samples, n_features] :returns: **y_pred** -- Predicted class labels of the data in X. :rtype: array-like, shape = (n_samples, ) diff --git a/docs/generate_autodocs.py b/docs/generate_autodocs.py index b2c9fb6..1aa8f7d 100644 --- a/docs/generate_autodocs.py +++ b/docs/generate_autodocs.py @@ -15,52 +15,64 @@ import os from docutils.statemachine import StringList, ViewList -from sphinx.ext.autodoc import (AutoDirective, ClassDocumenter, Options, - FunctionDocumenter) +from sphinx.ext.autodoc import ( + AutoDirective, + ClassDocumenter, + Options, + FunctionDocumenter, +) from sphinx.application import Sphinx from sphinx.environment import BuildEnvironment -BASE_DIR = '/home/gertjan/Dropbox/phd/research/msvm/python/start_here/' -DOCDIR = os.path.join(BASE_DIR, 'gensvm', 'docs') +HERE_DIR = os.path.dirname(os.path.abspath(__file__)) +BASE_DIR = os.path.abspath(os.path.join(HERE_DIR, "..", "..")) -CLASSES = [ - 'GenSVMGridSearchCV', - 'GenSVM' - ] +DOCDIR = os.path.join(BASE_DIR, "gensvm", "docs") -FUNCTIONS = [ - 'load_default_grid' - ] +CLASSES = ["GenSVMGridSearchCV", "GenSVM"] + +FUNCTIONS = ["load_grid_tiny", "load_grid_small", "load_grid_full"] FULL_NAMES = { - 'GenSVM': 'gensvm.core.GenSVM', - 'GenSVMGridSearchCV': 'gensvm.gridsearch.GenSVMGridSearchCV', - 'load_default_grid': 'gensvm.gridsearch.load_default_grid' - } + "GenSVM": "gensvm.core.GenSVM", + "GenSVMGridSearchCV": "gensvm.gridsearch.GenSVMGridSearchCV", + "load_grid_tiny": "gensvm.gridsearch.load_grid_tiny", + "load_grid_small": "gensvm.gridsearch.load_grid_small", + "load_grid_full": "gensvm.gridsearch.load_grid_full", +} OUTPUT_FILES = { - 'GenSVMGridSearchCV': os.path.join(DOCDIR, 'cls_gridsearch.rst'), - 'GenSVM': os.path.join(DOCDIR, 'cls_gensvm.rst'), - 'load_default_grid': os.path.join(DOCDIR, 'auto_functions.rst') - } + "GenSVMGridSearchCV": os.path.join(DOCDIR, "cls_gridsearch.rst"), + "GenSVM": os.path.join(DOCDIR, "cls_gensvm.rst"), + "load_grid_tiny": os.path.join(DOCDIR, "auto_functions.rst"), + "load_grid_small": os.path.join(DOCDIR, "auto_functions.rst"), + "load_grid_full": os.path.join(DOCDIR, "auto_functions.rst"), +} def load_app(): srcdir = DOCDIR[:] confdir = DOCDIR[:] - outdir = os.path.join(BASE_DIR, 'gensvm_docs', 'html') - doctreedir = os.path.join(BASE_DIR, 'gensvm_docs', 'doctrees') - buildername = 'html' + outdir = os.path.join(BASE_DIR, "gensvm_docs", "html") + doctreedir = os.path.join(BASE_DIR, "gensvm_docs", "doctrees") + buildername = "html" app = Sphinx(srcdir, confdir, outdir, doctreedir, buildername) return app def generate_class_autodoc(app, cls): - ad = AutoDirective(name='autoclass', arguments=[FULL_NAMES[cls]], - options={'noindex': True}, content=StringList([], items=[]), - lineno=0, content_offset=1, block_text='', state=None, - state_machine=None) + ad = AutoDirective( + name="autoclass", + arguments=[FULL_NAMES[cls]], + options={"noindex": True}, + content=StringList([], items=[]), + lineno=0, + content_offset=1, + block_text="", + state=None, + state_machine=None, + ) ad.env = BuildEnvironment(app) ad.genopt = Options(noindex=True) @@ -70,16 +82,23 @@ def generate_class_autodoc(app, cls): documenter = ClassDocumenter(ad, ad.arguments[0]) documenter.generate(all_members=True) - with open(OUTPUT_FILES[cls], 'w') as fid: + with open(OUTPUT_FILES[cls], "w") as fid: for line in ad.result: - fid.write(line + '\n') + fid.write(line + "\n") def generate_func_autodoc(app, func): - ad = AutoDirective(name='autofunc', arguments=[FULL_NAMES[func]], - options={'noindex': True}, content=StringList([], items=[]), - lineno=0, content_offset=1, block_text='', state=None, - state_machine=None) + ad = AutoDirective( + name="autofunc", + arguments=[FULL_NAMES[func]], + options={"noindex": True}, + content=StringList([], items=[]), + lineno=0, + content_offset=1, + block_text="", + state=None, + state_machine=None, + ) ad.env = BuildEnvironment(app) ad.genopt = Options(noindex=True) @@ -89,15 +108,16 @@ def generate_func_autodoc(app, func): documenter = FunctionDocumenter(ad, ad.arguments[0]) documenter.generate(all_members=True) - with open(OUTPUT_FILES[func], 'a') as fid: + with open(OUTPUT_FILES[func], "a") as fid: for line in ad.result: - fid.write(line + '\n') + fid.write(line + "\n") def main(): for of in OUTPUT_FILES: fname = OUTPUT_FILES[of] - os.unlink(fname) + if os.path.exists(fname): + os.unlink(fname) app = load_app() for cls in CLASSES: generate_class_autodoc(app, cls) @@ -105,5 +125,5 @@ def main(): generate_func_autodoc(app, func) -if __name__ == '__main__': +if __name__ == "__main__": main() |
