aboutsummaryrefslogtreecommitdiff
path: root/docs
diff options
context:
space:
mode:
authorGertjan van den Burg <gertjanvandenburg@gmail.com>2019-05-30 18:39:05 +0100
committerGertjan van den Burg <gertjanvandenburg@gmail.com>2019-05-30 18:39:05 +0100
commit47116a4682edb1f22d00da06802cc3eff40bf5bd (patch)
tree1f8bcbb1b86e112eefed5a6dd4fe0ea1541183d7 /docs
parentMerge branch 'master' of github.com:GjjvdBurg/PyGenSVM (diff)
downloadpygensvm-47116a4682edb1f22d00da06802cc3eff40bf5bd.tar.gz
pygensvm-47116a4682edb1f22d00da06802cc3eff40bf5bd.zip
Update documentation
Diffstat (limited to 'docs')
-rw-r--r--docs/auto_functions.rst47
-rw-r--r--docs/cls_gensvm.rst33
-rw-r--r--docs/cls_gridsearch.rst28
-rw-r--r--docs/generate_autodocs.py92
4 files changed, 148 insertions, 52 deletions
diff --git a/docs/auto_functions.rst b/docs/auto_functions.rst
index 3ba1fff..2a6596f 100644
--- a/docs/auto_functions.rst
+++ b/docs/auto_functions.rst
@@ -1,9 +1,52 @@
-.. py:function:: load_default_grid()
+.. py:function:: load_grid_tiny()
:noindex:
:module: gensvm.gridsearch
- Load the default parameter grid for GenSVM
+ Load a tiny parameter grid for the GenSVM grid search
+
+ This function returns a parameter grid to use in the GenSVM grid search.
+ This grid was obtained by analyzing the experiments done for the GenSVM
+ paper and selecting the configurations that achieve accuracy within the
+ 95th percentile on over 90% of the datasets. It is a good start for a
+ parameter search with a reasonably high chance of achieving good
+ performance on most datasets.
+
+ Note that this grid is only tested to work well in combination with the
+ linear kernel.
+
+ :returns: **pg** -- List of 10 parameter configurations that are likely to perform
+ reasonably well.
+ :rtype: list
+
+
+.. py:function:: load_grid_small()
+ :noindex:
+ :module: gensvm.gridsearch
+
+ Load a small parameter grid for GenSVM
+
+ This function loads a default parameter grid to use for the #' GenSVM
+ gridsearch. It contains all possible combinations of the following #'
+ parameter sets::
+
+ pg = {
+ 'p': [1.0, 1.5, 2.0],
+ 'lmd': [1e-8, 1e-6, 1e-4, 1e-2, 1],
+ 'kappa': [-0.9, 0.5, 5.0],
+ 'weights': ['unit', 'group'],
+ }
+
+ :returns: **pg** -- Mapping from parameters to lists of values for those parameters. To be
+ used as input for the :class:`.GenSVMGridSearchCV` class.
+ :rtype: dict
+
+
+.. py:function:: load_grid_full()
+ :noindex:
+ :module: gensvm.gridsearch
+
+ Load the full parameter grid for GenSVM
This is the parameter grid used in the GenSVM paper to run the grid search
experiments. It uses a large grid for the ``lmd`` regularization parameter
diff --git a/docs/cls_gensvm.rst b/docs/cls_gensvm.rst
index fc19bf4..b4bc9a7 100644
--- a/docs/cls_gensvm.rst
+++ b/docs/cls_gensvm.rst
@@ -1,5 +1,5 @@
-.. py:class:: GenSVM(p=1.0, lmd=1e-05, kappa=0.0, epsilon=1e-06, weights='unit', kernel='linear', gamma='auto', coef=0.0, degree=2.0, kernel_eigen_cutoff=1e-08, verbose=0, random_state=None, max_iter=100000000.0)
+.. py:class:: GenSVM(p=1.0, lmd=1e-05, kappa=0.0, epsilon=1e-06, weights='unit', kernel='linear', gamma='auto', coef=1.0, degree=2.0, kernel_eigen_cutoff=1e-08, verbose=0, random_state=None, max_iter=100000000.0)
:noindex:
:module: gensvm.core
@@ -21,6 +21,10 @@
:type kappa: float, optional (default=0.0)
:param weights: Type of sample weights to use. Options are 'unit' for unit weights and
'group' for group size correction weights (equation 4 in the paper).
+
+ It is also possible to provide an explicit vector of sample weights
+ through the :func:`~GenSVM.fit` method. If so, it will override the
+ setting provided here.
:type weights: string, optional (default='unit')
:param kernel: Specify the kernel type to use in the classifier. It must be one of
'linear', 'poly', 'rbf', or 'sigmoid'.
@@ -31,7 +35,7 @@
:type gamma: float, optional (default='auto')
:param coef: Kernel parameter for the poly and sigmoid kernel. See `Kernels in
GenSVM <gensvm_kernels_>`_ for the exact implementation of the kernels.
- :type coef: float, optional (default=0.0)
+ :type coef: float, optional (default=1.0)
:param degree: Kernel parameter for the poly kernel. See `Kernels in GenSVM
<gensvm_kernels_>`_ for the exact implementation of the kernels.
:type degree: float, optional (default=2.0)
@@ -42,6 +46,10 @@
:type kernel_eigen_cutoff: float, optional (default=1e-8)
:param verbose: Enable verbose output
:type verbose: int, (default=0)
+ :param random_state: The seed for the random number generation used for initialization where
+ necessary. See the documentation of
+ ``sklearn.utils.check_random_state`` for more info.
+ :type random_state: None, int, instance of RandomState
:param max_iter: The maximum number of iterations to be run.
:type max_iter: int, (default=1e8)
@@ -65,6 +73,10 @@
*int* -- The number of support vectors that were found
+ .. attribute:: SVs_
+
+ *array, shape = [n_observations, ]* -- Index vector that marks the support vectors (1 = SV, 0 = no SV)
+
.. seealso::
:class:`.GenSVMGridSearchCV`
@@ -75,7 +87,7 @@
- .. py:method:: GenSVM.fit(X, y, seed_V=None)
+ .. py:method:: GenSVM.fit(X, y, sample_weight=None, seed_V=None)
:noindex:
:module: gensvm.core
@@ -88,6 +100,10 @@
:type X: array, shape = (n_observations, n_features)
:param y: The label vector, labels can be numbers or strings.
:type y: array, shape = (n_observations, )
+ :param sample_weight: Array of weights that are assigned to individual samples. If not
+ provided, then the weight specification in the constructor is used
+ ('unit' or 'group').
+ :type sample_weight: array, shape = (n_observations, )
:param seed_V: Seed coefficient array to use as a warm start for the optimization.
It can for instance be the :attr:`combined_coef_
<.GenSVM.combined_coef_>` attribute of a different GenSVM model.
@@ -106,15 +122,18 @@
:rtype: object
- .. py:method:: GenSVM.predict(X)
+ .. py:method:: GenSVM.predict(X, trainX=None)
:noindex:
:module: gensvm.core
Predict the class labels on the given data
- :param X:
- :type X: array, shape = [n_samples, n_features]
+ :param X: Data for which to predict the labels
+ :type X: array, shape = [n_test_samples, n_features]
+ :param trainX: Only for nonlinear prediction with kernels: the training data used
+ to train the model.
+ :type trainX: array, shape = [n_train_samples, n_features]
- :returns: **y_pred**
+ :returns: **y_pred** -- Predicted class labels of the data in X.
:rtype: array, shape = (n_samples, )
diff --git a/docs/cls_gridsearch.rst b/docs/cls_gridsearch.rst
index 8708123..6a2c05e 100644
--- a/docs/cls_gridsearch.rst
+++ b/docs/cls_gridsearch.rst
@@ -1,5 +1,5 @@
-.. py:class:: GenSVMGridSearchCV(param_grid, scoring=None, iid=True, cv=None, refit=True, verbose=0, return_train_score=True)
+.. py:class:: GenSVMGridSearchCV(param_grid='tiny', scoring=None, iid=True, cv=None, refit=True, verbose=0, return_train_score=True)
:noindex:
:module: gensvm.gridsearch
@@ -17,10 +17,15 @@
was needed to benefit from the fast low-level C implementation of grid
search in the GenSVM library.
- :param param_grid: Dictionary of parameter names (strings) as keys and lists of parameter
- settings to evaluate as values, or a list of such dicts. The GenSVM
- model will be evaluated at all combinations of the parameters.
- :type param_grid: dict or list of dicts
+ :param param_grid: If a string, it must be either 'tiny', 'small', or 'full' to load the
+ predefined parameter grids (see the functions :func:`load_grid_tiny`,
+ :func:`load_grid_small`, and :func:`load_grid_full`).
+
+ Otherwise, a dictionary of parameter names (strings) as keys and lists
+ of parameter settings to evaluate as values, or a list of such dicts.
+ The GenSVM model will be evaluated at all combinations of the
+ parameters.
+ :type param_grid: string, dict, or list of dicts
:param scoring: A single string (see :ref:`scoring_parameter`) or a callable (see
:ref:`scoring`) to evaluate the predictions on the test set.
@@ -40,7 +45,7 @@
:param cv: Determines the cross-validation splitting strategy. Possible inputs for
cv are:
- - None, to use the default 3-fold cross validation,
+ - None, to use the default 5-fold cross validation,
- integer, to specify the number of folds in a `(Stratified)KFold`,
- An object to be used as a cross-validation generator.
- An iterable yielding train, test splits.
@@ -51,6 +56,12 @@
Refer to the `scikit-learn User Guide on cross validation`_ for the
various strategies that can be used here.
+
+ NOTE: At the moment, the ShuffleSplit and StratifiedShuffleSplit are
+ not supported in this class. If you need these, you can use the GenSVM
+ classifier directly with the GridSearchCV object from scikit-learn.
+ (these methods require significant changes in the low-level library
+ before they can be supported).
:type cv: int, cross-validation generator or an iterable, optional
:param refit: Refit the GenSVM estimator with the best found parameters on the whole
dataset.
@@ -240,7 +251,7 @@
:rtype: object
- .. py:method:: GenSVMGridSearchCV.predict(X)
+ .. py:method:: GenSVMGridSearchCV.predict(X, trainX=None)
:noindex:
:module: gensvm.gridsearch
@@ -249,6 +260,9 @@
:param X: Test data, where n_samples is the number of observations and
n_features is the number of features.
:type X: array-like, shape = (n_samples, n_features)
+ :param trainX: Only for nonlinear prediction with kernels: the training data used
+ to train the model.
+ :type trainX: array, shape = [n_train_samples, n_features]
:returns: **y_pred** -- Predicted class labels of the data in X.
:rtype: array-like, shape = (n_samples, )
diff --git a/docs/generate_autodocs.py b/docs/generate_autodocs.py
index b2c9fb6..1aa8f7d 100644
--- a/docs/generate_autodocs.py
+++ b/docs/generate_autodocs.py
@@ -15,52 +15,64 @@ import os
from docutils.statemachine import StringList, ViewList
-from sphinx.ext.autodoc import (AutoDirective, ClassDocumenter, Options,
- FunctionDocumenter)
+from sphinx.ext.autodoc import (
+ AutoDirective,
+ ClassDocumenter,
+ Options,
+ FunctionDocumenter,
+)
from sphinx.application import Sphinx
from sphinx.environment import BuildEnvironment
-BASE_DIR = '/home/gertjan/Dropbox/phd/research/msvm/python/start_here/'
-DOCDIR = os.path.join(BASE_DIR, 'gensvm', 'docs')
+HERE_DIR = os.path.dirname(os.path.abspath(__file__))
+BASE_DIR = os.path.abspath(os.path.join(HERE_DIR, "..", ".."))
-CLASSES = [
- 'GenSVMGridSearchCV',
- 'GenSVM'
- ]
+DOCDIR = os.path.join(BASE_DIR, "gensvm", "docs")
-FUNCTIONS = [
- 'load_default_grid'
- ]
+CLASSES = ["GenSVMGridSearchCV", "GenSVM"]
+
+FUNCTIONS = ["load_grid_tiny", "load_grid_small", "load_grid_full"]
FULL_NAMES = {
- 'GenSVM': 'gensvm.core.GenSVM',
- 'GenSVMGridSearchCV': 'gensvm.gridsearch.GenSVMGridSearchCV',
- 'load_default_grid': 'gensvm.gridsearch.load_default_grid'
- }
+ "GenSVM": "gensvm.core.GenSVM",
+ "GenSVMGridSearchCV": "gensvm.gridsearch.GenSVMGridSearchCV",
+ "load_grid_tiny": "gensvm.gridsearch.load_grid_tiny",
+ "load_grid_small": "gensvm.gridsearch.load_grid_small",
+ "load_grid_full": "gensvm.gridsearch.load_grid_full",
+}
OUTPUT_FILES = {
- 'GenSVMGridSearchCV': os.path.join(DOCDIR, 'cls_gridsearch.rst'),
- 'GenSVM': os.path.join(DOCDIR, 'cls_gensvm.rst'),
- 'load_default_grid': os.path.join(DOCDIR, 'auto_functions.rst')
- }
+ "GenSVMGridSearchCV": os.path.join(DOCDIR, "cls_gridsearch.rst"),
+ "GenSVM": os.path.join(DOCDIR, "cls_gensvm.rst"),
+ "load_grid_tiny": os.path.join(DOCDIR, "auto_functions.rst"),
+ "load_grid_small": os.path.join(DOCDIR, "auto_functions.rst"),
+ "load_grid_full": os.path.join(DOCDIR, "auto_functions.rst"),
+}
def load_app():
srcdir = DOCDIR[:]
confdir = DOCDIR[:]
- outdir = os.path.join(BASE_DIR, 'gensvm_docs', 'html')
- doctreedir = os.path.join(BASE_DIR, 'gensvm_docs', 'doctrees')
- buildername = 'html'
+ outdir = os.path.join(BASE_DIR, "gensvm_docs", "html")
+ doctreedir = os.path.join(BASE_DIR, "gensvm_docs", "doctrees")
+ buildername = "html"
app = Sphinx(srcdir, confdir, outdir, doctreedir, buildername)
return app
def generate_class_autodoc(app, cls):
- ad = AutoDirective(name='autoclass', arguments=[FULL_NAMES[cls]],
- options={'noindex': True}, content=StringList([], items=[]),
- lineno=0, content_offset=1, block_text='', state=None,
- state_machine=None)
+ ad = AutoDirective(
+ name="autoclass",
+ arguments=[FULL_NAMES[cls]],
+ options={"noindex": True},
+ content=StringList([], items=[]),
+ lineno=0,
+ content_offset=1,
+ block_text="",
+ state=None,
+ state_machine=None,
+ )
ad.env = BuildEnvironment(app)
ad.genopt = Options(noindex=True)
@@ -70,16 +82,23 @@ def generate_class_autodoc(app, cls):
documenter = ClassDocumenter(ad, ad.arguments[0])
documenter.generate(all_members=True)
- with open(OUTPUT_FILES[cls], 'w') as fid:
+ with open(OUTPUT_FILES[cls], "w") as fid:
for line in ad.result:
- fid.write(line + '\n')
+ fid.write(line + "\n")
def generate_func_autodoc(app, func):
- ad = AutoDirective(name='autofunc', arguments=[FULL_NAMES[func]],
- options={'noindex': True}, content=StringList([], items=[]),
- lineno=0, content_offset=1, block_text='', state=None,
- state_machine=None)
+ ad = AutoDirective(
+ name="autofunc",
+ arguments=[FULL_NAMES[func]],
+ options={"noindex": True},
+ content=StringList([], items=[]),
+ lineno=0,
+ content_offset=1,
+ block_text="",
+ state=None,
+ state_machine=None,
+ )
ad.env = BuildEnvironment(app)
ad.genopt = Options(noindex=True)
@@ -89,15 +108,16 @@ def generate_func_autodoc(app, func):
documenter = FunctionDocumenter(ad, ad.arguments[0])
documenter.generate(all_members=True)
- with open(OUTPUT_FILES[func], 'a') as fid:
+ with open(OUTPUT_FILES[func], "a") as fid:
for line in ad.result:
- fid.write(line + '\n')
+ fid.write(line + "\n")
def main():
for of in OUTPUT_FILES:
fname = OUTPUT_FILES[of]
- os.unlink(fname)
+ if os.path.exists(fname):
+ os.unlink(fname)
app = load_app()
for cls in CLASSES:
generate_class_autodoc(app, cls)
@@ -105,5 +125,5 @@ def main():
generate_func_autodoc(app, func)
-if __name__ == '__main__':
+if __name__ == "__main__":
main()