Update documentation

author: Gertjan van den Burg <gertjanvandenburg@gmail.com> 2019-05-30 18:39:05 +0100
committer: Gertjan van den Burg <gertjanvandenburg@gmail.com> 2019-05-30 18:39:05 +0100
commit: 47116a4682edb1f22d00da06802cc3eff40bf5bd (patch)
tree: 1f8bcbb1b86e112eefed5a6dd4fe0ea1541183d7 /docs
parent: Merge branch 'master' of github.com:GjjvdBurg/PyGenSVM (diff)
download: pygensvm-47116a4682edb1f22d00da06802cc3eff40bf5bd.tar.gz
pygensvm-47116a4682edb1f22d00da06802cc3eff40bf5bd.zip
4 files changed, 148 insertions, 52 deletions
diff --git a/docs/auto_functions.rst b/docs/auto_functions.rst
index 3ba1fff..2a6596f 100644
--- a/docs/auto_functions.rst
+++ b/docs/auto_functions.rst
@@ -1,9 +1,52 @@
 
-.. py:function:: load_default_grid()
+.. py:function:: load_grid_tiny()
    :noindex:
    :module: gensvm.gridsearch
 
-   Load the default parameter grid for GenSVM
+   Load a tiny parameter grid for the GenSVM grid search
+   
+   This function returns a parameter grid to use in the GenSVM grid search.
+   This grid was obtained by analyzing the experiments done for the GenSVM
+   paper and selecting the configurations that achieve accuracy within the
+   95th percentile on over 90% of the datasets. It is a good start for a
+   parameter search with a reasonably high chance of achieving good
+   performance on most datasets.
+   
+   Note that this grid is only tested to work well in combination with the
+   linear kernel.
+   
+   :returns: **pg** -- List of 10 parameter configurations that are likely to perform
+             reasonably well.
+   :rtype: list
+   
+
+.. py:function:: load_grid_small()
+   :noindex:
+   :module: gensvm.gridsearch
+
+   Load a small parameter grid for GenSVM
+   
+   This function loads a default parameter grid to use for the #' GenSVM
+   gridsearch. It contains all possible combinations of the following #'
+   parameter sets::
+   
+       pg = {
+           'p': [1.0, 1.5, 2.0],
+           'lmd': [1e-8, 1e-6, 1e-4, 1e-2, 1],
+           'kappa': [-0.9, 0.5, 5.0],
+           'weights': ['unit', 'group'],
+       }
+   
+   :returns: **pg** -- Mapping from parameters to lists of values for those parameters. To be
+             used as input for the :class:`.GenSVMGridSearchCV` class.
+   :rtype: dict
+   
+
+.. py:function:: load_grid_full()
+   :noindex:
+   :module: gensvm.gridsearch
+
+   Load the full parameter grid for GenSVM
    
    This is the parameter grid used in the GenSVM paper to run the grid search
    experiments. It uses a large grid for the ``lmd`` regularization parameter
diff --git a/docs/cls_gensvm.rst b/docs/cls_gensvm.rst
index fc19bf4..b4bc9a7 100644
--- a/docs/cls_gensvm.rst
+++ b/docs/cls_gensvm.rst
@@ -1,5 +1,5 @@
 
-.. py:class:: GenSVM(p=1.0, lmd=1e-05, kappa=0.0, epsilon=1e-06, weights='unit', kernel='linear', gamma='auto', coef=0.0, degree=2.0, kernel_eigen_cutoff=1e-08, verbose=0, random_state=None, max_iter=100000000.0)
+.. py:class:: GenSVM(p=1.0, lmd=1e-05, kappa=0.0, epsilon=1e-06, weights='unit', kernel='linear', gamma='auto', coef=1.0, degree=2.0, kernel_eigen_cutoff=1e-08, verbose=0, random_state=None, max_iter=100000000.0)
    :noindex:
    :module: gensvm.core
 
@@ -21,6 +21,10 @@
    :type kappa: float, optional (default=0.0)
    :param weights: Type of sample weights to use. Options are 'unit' for unit weights and
                    'group' for group size correction weights (equation 4 in the paper).
+   
+                   It is also possible to provide an explicit vector of sample weights
+                   through the :func:`~GenSVM.fit` method. If so, it will override the
+                   setting provided here.
    :type weights: string, optional (default='unit')
    :param kernel: Specify the kernel type to use in the classifier. It must be one of
                   'linear', 'poly', 'rbf', or 'sigmoid'.
@@ -31,7 +35,7 @@
    :type gamma: float, optional (default='auto')
    :param coef: Kernel parameter for the poly and sigmoid kernel. See `Kernels in
                 GenSVM <gensvm_kernels_>`_ for the exact implementation of the kernels.
-   :type coef: float, optional (default=0.0)
+   :type coef: float, optional (default=1.0)
    :param degree: Kernel parameter for the poly kernel. See `Kernels in GenSVM
                   <gensvm_kernels_>`_ for the exact implementation of the kernels.
    :type degree: float, optional (default=2.0)
@@ -42,6 +46,10 @@
    :type kernel_eigen_cutoff: float, optional (default=1e-8)
    :param verbose: Enable verbose output
    :type verbose: int, (default=0)
+   :param random_state: The seed for the random number generation used for initialization where
+                        necessary. See the documentation of
+                        ``sklearn.utils.check_random_state`` for more info.
+   :type random_state: None, int, instance of RandomState
    :param max_iter: The maximum number of iterations to be run.
    :type max_iter: int, (default=1e8)
    
@@ -65,6 +73,10 @@
    
       *int* -- The number of support vectors that were found
    
+   .. attribute:: SVs_
+   
+      *array, shape = [n_observations, ]* -- Index vector that marks the support vectors (1 = SV, 0 = no SV)
+   
    .. seealso::
    
       :class:`.GenSVMGridSearchCV`
@@ -75,7 +87,7 @@
    
    
    
-   .. py:method:: GenSVM.fit(X, y, seed_V=None)
+   .. py:method:: GenSVM.fit(X, y, sample_weight=None, seed_V=None)
       :noindex:
       :module: gensvm.core
    
@@ -88,6 +100,10 @@
       :type X: array, shape = (n_observations, n_features)
       :param y: The label vector, labels can be numbers or strings.
       :type y: array, shape = (n_observations, )
+      :param sample_weight: Array of weights that are assigned to individual samples. If not
+                            provided, then the weight specification in the constructor is used
+                            ('unit' or 'group').
+      :type sample_weight: array, shape = (n_observations, )
       :param seed_V: Seed coefficient array to use as a warm start for the optimization.
                      It can for instance be the :attr:`combined_coef_
                      <.GenSVM.combined_coef_>` attribute of a different GenSVM model.
@@ -106,15 +122,18 @@
       :rtype: object
       
    
-   .. py:method:: GenSVM.predict(X)
+   .. py:method:: GenSVM.predict(X, trainX=None)
       :noindex:
       :module: gensvm.core
    
       Predict the class labels on the given data
       
-      :param X:
-      :type X: array, shape = [n_samples, n_features]
+      :param X: Data for which to predict the labels
+      :type X: array, shape = [n_test_samples, n_features]
+      :param trainX: Only for nonlinear prediction with kernels: the training data used
+                     to train the model.
+      :type trainX: array, shape = [n_train_samples, n_features]
       
-      :returns: **y_pred**
+      :returns: **y_pred** -- Predicted class labels of the data in X.
       :rtype: array, shape = (n_samples, )
       
diff --git a/docs/cls_gridsearch.rst b/docs/cls_gridsearch.rst
index 8708123..6a2c05e 100644
--- a/docs/cls_gridsearch.rst
+++ b/docs/cls_gridsearch.rst
@@ -1,5 +1,5 @@
 
-.. py:class:: GenSVMGridSearchCV(param_grid, scoring=None, iid=True, cv=None, refit=True, verbose=0, return_train_score=True)
+.. py:class:: GenSVMGridSearchCV(param_grid='tiny', scoring=None, iid=True, cv=None, refit=True, verbose=0, return_train_score=True)
    :noindex:
    :module: gensvm.gridsearch
 
@@ -17,10 +17,15 @@
    was needed to benefit from the fast low-level C implementation of grid
    search in the GenSVM library.
    
-   :param param_grid: Dictionary of parameter names (strings) as keys and lists of parameter
-                      settings to evaluate as values, or a list of such dicts. The GenSVM
-                      model will be evaluated at all combinations of the parameters.
-   :type param_grid: dict or list of dicts
+   :param param_grid: If a string, it must be either 'tiny', 'small', or 'full' to load the
+                      predefined parameter grids (see the functions :func:`load_grid_tiny`,
+                      :func:`load_grid_small`, and :func:`load_grid_full`).
+   
+                      Otherwise, a dictionary of parameter names (strings) as keys and lists
+                      of parameter settings to evaluate as values, or a list of such dicts.
+                      The GenSVM model will be evaluated at all combinations of the
+                      parameters.
+   :type param_grid: string, dict, or list of dicts
    :param scoring: A single string (see :ref:`scoring_parameter`) or a callable (see
                    :ref:`scoring`) to evaluate the predictions on the test set.
    
@@ -40,7 +45,7 @@
    :param cv: Determines the cross-validation splitting strategy. Possible inputs for
               cv are:
    
-                - None, to use the default 3-fold cross validation,
+                - None, to use the default 5-fold cross validation,
                 - integer, to specify the number of folds in a `(Stratified)KFold`,
                 - An object to be used as a cross-validation generator.
                 - An iterable yielding train, test splits.
@@ -51,6 +56,12 @@
    
               Refer to the `scikit-learn User Guide on cross validation`_ for the
               various strategies that can be used here.
+   
+              NOTE: At the moment, the ShuffleSplit and StratifiedShuffleSplit are
+              not supported in this class. If you need these, you can use the GenSVM
+              classifier directly with the GridSearchCV object from scikit-learn.
+              (these methods require significant changes in the low-level library
+              before they can be supported).
    :type cv: int, cross-validation generator or an iterable, optional
    :param refit: Refit the GenSVM estimator with the best found parameters on the whole
                  dataset.
@@ -240,7 +251,7 @@
       :rtype: object
       
    
-   .. py:method:: GenSVMGridSearchCV.predict(X)
+   .. py:method:: GenSVMGridSearchCV.predict(X, trainX=None)
       :noindex:
       :module: gensvm.gridsearch
    
@@ -249,6 +260,9 @@
       :param X: Test data, where n_samples is the number of observations and
                 n_features is the number of features.
       :type X: array-like, shape = (n_samples, n_features)
+      :param trainX: Only for nonlinear prediction with kernels: the training data used
+                     to train the model.
+      :type trainX: array, shape = [n_train_samples, n_features]
       
       :returns: **y_pred** -- Predicted class labels of the data in X.
       :rtype: array-like, shape = (n_samples, )
diff --git a/docs/generate_autodocs.py b/docs/generate_autodocs.py
index b2c9fb6..1aa8f7d 100644
--- a/docs/generate_autodocs.py
+++ b/docs/generate_autodocs.py
@@ -15,52 +15,64 @@ import os
 
 from docutils.statemachine import StringList, ViewList
 
-from sphinx.ext.autodoc import (AutoDirective, ClassDocumenter, Options, 
-        FunctionDocumenter)
+from sphinx.ext.autodoc import (
+    AutoDirective,
+    ClassDocumenter,
+    Options,
+    FunctionDocumenter,
+)
 from sphinx.application import Sphinx
 from sphinx.environment import BuildEnvironment
 
-BASE_DIR = '/home/gertjan/Dropbox/phd/research/msvm/python/start_here/'
-DOCDIR = os.path.join(BASE_DIR, 'gensvm', 'docs')
+HERE_DIR = os.path.dirname(os.path.abspath(__file__))
+BASE_DIR = os.path.abspath(os.path.join(HERE_DIR, "..", ".."))
 
-CLASSES = [
-        'GenSVMGridSearchCV',
-        'GenSVM'
-        ]
+DOCDIR = os.path.join(BASE_DIR, "gensvm", "docs")
 
-FUNCTIONS = [
-        'load_default_grid'
-        ]
+CLASSES = ["GenSVMGridSearchCV", "GenSVM"]
+
+FUNCTIONS = ["load_grid_tiny", "load_grid_small", "load_grid_full"]
 
 FULL_NAMES = {
-        'GenSVM': 'gensvm.core.GenSVM',
-        'GenSVMGridSearchCV': 'gensvm.gridsearch.GenSVMGridSearchCV',
-        'load_default_grid': 'gensvm.gridsearch.load_default_grid'
-        }
+    "GenSVM": "gensvm.core.GenSVM",
+    "GenSVMGridSearchCV": "gensvm.gridsearch.GenSVMGridSearchCV",
+    "load_grid_tiny": "gensvm.gridsearch.load_grid_tiny",
+    "load_grid_small": "gensvm.gridsearch.load_grid_small",
+    "load_grid_full": "gensvm.gridsearch.load_grid_full",
+}
 
 OUTPUT_FILES = {
-        'GenSVMGridSearchCV': os.path.join(DOCDIR, 'cls_gridsearch.rst'),
-        'GenSVM': os.path.join(DOCDIR, 'cls_gensvm.rst'),
-        'load_default_grid': os.path.join(DOCDIR, 'auto_functions.rst')
-        }
+    "GenSVMGridSearchCV": os.path.join(DOCDIR, "cls_gridsearch.rst"),
+    "GenSVM": os.path.join(DOCDIR, "cls_gensvm.rst"),
+    "load_grid_tiny": os.path.join(DOCDIR, "auto_functions.rst"),
+    "load_grid_small": os.path.join(DOCDIR, "auto_functions.rst"),
+    "load_grid_full": os.path.join(DOCDIR, "auto_functions.rst"),
+}
 
 
 def load_app():
     srcdir = DOCDIR[:]
     confdir = DOCDIR[:]
-    outdir = os.path.join(BASE_DIR, 'gensvm_docs', 'html')
-    doctreedir = os.path.join(BASE_DIR, 'gensvm_docs', 'doctrees')
-    buildername = 'html'
+    outdir = os.path.join(BASE_DIR, "gensvm_docs", "html")
+    doctreedir = os.path.join(BASE_DIR, "gensvm_docs", "doctrees")
+    buildername = "html"
 
     app = Sphinx(srcdir, confdir, outdir, doctreedir, buildername)
     return app
 
 
 def generate_class_autodoc(app, cls):
-    ad = AutoDirective(name='autoclass', arguments=[FULL_NAMES[cls]], 
-            options={'noindex': True}, content=StringList([], items=[]), 
-            lineno=0, content_offset=1, block_text='', state=None, 
-            state_machine=None)
+    ad = AutoDirective(
+        name="autoclass",
+        arguments=[FULL_NAMES[cls]],
+        options={"noindex": True},
+        content=StringList([], items=[]),
+        lineno=0,
+        content_offset=1,
+        block_text="",
+        state=None,
+        state_machine=None,
+    )
 
     ad.env = BuildEnvironment(app)
     ad.genopt = Options(noindex=True)
@@ -70,16 +82,23 @@ def generate_class_autodoc(app, cls):
     documenter = ClassDocumenter(ad, ad.arguments[0])
     documenter.generate(all_members=True)
 
-    with open(OUTPUT_FILES[cls], 'w') as fid:
+    with open(OUTPUT_FILES[cls], "w") as fid:
         for line in ad.result:
-            fid.write(line + '\n')
+            fid.write(line + "\n")
 
 
 def generate_func_autodoc(app, func):
-    ad = AutoDirective(name='autofunc', arguments=[FULL_NAMES[func]], 
-            options={'noindex': True}, content=StringList([], items=[]), 
-            lineno=0, content_offset=1, block_text='', state=None, 
-            state_machine=None)
+    ad = AutoDirective(
+        name="autofunc",
+        arguments=[FULL_NAMES[func]],
+        options={"noindex": True},
+        content=StringList([], items=[]),
+        lineno=0,
+        content_offset=1,
+        block_text="",
+        state=None,
+        state_machine=None,
+    )
 
     ad.env = BuildEnvironment(app)
     ad.genopt = Options(noindex=True)
@@ -89,15 +108,16 @@ def generate_func_autodoc(app, func):
     documenter = FunctionDocumenter(ad, ad.arguments[0])
     documenter.generate(all_members=True)
 
-    with open(OUTPUT_FILES[func], 'a') as fid:
+    with open(OUTPUT_FILES[func], "a") as fid:
         for line in ad.result:
-            fid.write(line + '\n')
+            fid.write(line + "\n")
 
 
 def main():
     for of in OUTPUT_FILES:
         fname = OUTPUT_FILES[of]
-        os.unlink(fname)
+        if os.path.exists(fname):
+            os.unlink(fname)
     app = load_app()
     for cls in CLASSES:
         generate_class_autodoc(app, cls)
@@ -105,5 +125,5 @@ def main():
         generate_func_autodoc(app, func)
 
 
-if __name__ == '__main__':
+if __name__ == "__main__":
     main()
author	Gertjan van den Burg <gertjanvandenburg@gmail.com>	2019-05-30 18:39:05 +0100
committer	Gertjan van den Burg <gertjanvandenburg@gmail.com>	2019-05-30 18:39:05 +0100
commit	47116a4682edb1f22d00da06802cc3eff40bf5bd (patch)
tree	1f8bcbb1b86e112eefed5a6dd4fe0ea1541183d7 /docs
parent	Merge branch 'master' of github.com:GjjvdBurg/PyGenSVM (diff)
download	pygensvm-47116a4682edb1f22d00da06802cc3eff40bf5bd.tar.gz pygensvm-47116a4682edb1f22d00da06802cc3eff40bf5bd.zip