aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--Makefile42
-rw-r--r--data/README4
-rw-r--r--doc/Doxyfile1893
-rw-r--r--doc/mainpage.c23
-rw-r--r--doc/specifications.c170
-rw-r--r--include/MSVMMaj.h46
-rw-r--r--include/crossval.h12
-rw-r--r--include/globals.h22
-rw-r--r--include/kernel.h11
-rw-r--r--include/libMSVMMaj.h17
-rw-r--r--include/msvmmaj.h98
-rw-r--r--include/msvmmaj_init.h23
-rw-r--r--include/msvmmaj_kernel.h32
-rw-r--r--include/msvmmaj_lapack.h23
-rw-r--r--include/msvmmaj_matrix.h (renamed from include/matrix.h)15
-rw-r--r--include/msvmmaj_pred.h11
-rw-r--r--include/msvmmaj_train.h12
-rw-r--r--include/msvmmaj_train_dataset.h76
-rw-r--r--include/mylapack.h11
-rw-r--r--include/parallel.h13
-rw-r--r--include/strutil.h12
-rw-r--r--include/timer.h15
-rw-r--r--include/types.h40
-rw-r--r--include/util.h15
-rw-r--r--src/crossval.c63
-rw-r--r--src/kernel.c85
-rw-r--r--src/libMSVMMaj.c133
-rw-r--r--src/matrix.c77
-rw-r--r--src/msvmmaj_init.c64
-rw-r--r--src/msvmmaj_kernel.c195
-rw-r--r--src/msvmmaj_lapack.c129
-rw-r--r--src/msvmmaj_matrix.c153
-rw-r--r--src/msvmmaj_pred.c27
-rw-r--r--src/msvmmaj_train.c202
-rw-r--r--src/msvmmaj_train_dataset.c406
-rw-r--r--src/mylapack.c49
-rw-r--r--src/predMSVMMaj.c89
-rw-r--r--src/strutil.c87
-rw-r--r--src/timer.c18
-rw-r--r--src/trainMSVMMaj.c145
-rw-r--r--src/trainMSVMMajdataset.c155
-rw-r--r--src/util.c224
-rw-r--r--training/glass.training5
-rw-r--r--training/iris.training8
-rw-r--r--training/nursery.training4
-rw-r--r--training/vehicle.training7
46 files changed, 4417 insertions, 544 deletions
diff --git a/Makefile b/Makefile
index b28019d..6bef5a9 100644
--- a/Makefile
+++ b/Makefile
@@ -11,8 +11,8 @@ all: lib/libmsvmmaj.a $(EXECS)
override LDFLAGS+=-lblas -llapack -lm
-lib/libmsvmmaj.a: src/libMSVMMaj.o src/util.o src/matrix.o src/mylapack.o src/strutil.o src/crossval.o src/msvmmaj_train.o src/msvmmaj_train_dataset.o src/msvmmaj_pred.o src/timer.o
- @ar rcs lib/libmsvmmaj.a src/libMSVMMaj.o src/util.o src/matrix.o src/mylapack.o src/strutil.o src/crossval.o src/msvmmaj_train.o src/msvmmaj_train_dataset.o src/msvmmaj_pred.o src/timer.o
+lib/libmsvmmaj.a: src/libMSVMMaj.o src/util.o src/msvmmaj_matrix.o src/msvmmaj_lapack.o src/strutil.o src/crossval.o src/msvmmaj_init.o src/msvmmaj_train.o src/msvmmaj_train_dataset.o src/msvmmaj_pred.o src/timer.o src/msvmmaj_kernel.o
+ @ar rcs lib/libmsvmmaj.a src/libMSVMMaj.o src/util.o src/msvmmaj_matrix.o src/msvmmaj_lapack.o src/strutil.o src/crossval.o src/msvmmaj_init.o src/msvmmaj_train.o src/msvmmaj_train_dataset.o src/msvmmaj_pred.o src/timer.o src/msvmmaj_kernel.o
@echo libmsvmmaj.a...
trainMSVMMaj: src/trainMSVMMaj.c lib/libmsvmmaj.a
@@ -27,21 +27,29 @@ predMSVMMaj: src/predMSVMMaj.c lib/libmsvmmaj.a
@$(CC) -o predMVSMMaj src/predMSVMMaj.c $(CFLAGS) $(INCLUDE) $(LIB) -lmsvmmaj $(LDFLAGS)
@echo predMSVMMaj...
+src/crossval.o:
+ @$(CC) -c -o src/crossval.o src/crossval.c $(CFLAGS) $(INCLUDE)
+ @echo crossval.o...
+
+src/msvmmaj_kernel.o:
+ @$(CC) -c -o src/msvmmaj_kernel.o src/msvmmaj_kernel.c $(CFLAGS) $(INCLUDE)
+ @echo msvmmaj_kernel.o...
+
src/libMSVMMaj.o:
@$(CC) -c -o src/libMSVMMaj.o src/libMSVMMaj.c $(CFLAGS) $(INCLUDE)
@echo libMSVMMaj.o...
-src/util.o:
- @$(CC) -c -o src/util.o src/util.c $(CFLAGS) $(INCLUDE)
- @echo util.o...
+src/msvmmaj_matrix.o:
+ @$(CC) -c -o src/msvmmaj_matrix.o src/msvmmaj_matrix.c $(CFLAGS) $(INCLUDE)
+ @echo msvmmaj_matrix.o...
-src/matrix.o:
- @$(CC) -c -o src/matrix.o src/matrix.c $(CFLAGS) $(INCLUDE)
- @echo matrix.o...
+src/msvmmaj_init.o:
+ @$(CC) -c -o src/msvmmaj_init.o src/msvmmaj_init.c $(CFLAGS) $(INCLUDE)
+ @echo msvmmaj_init.o...
-src/crossval.o:
- @$(CC) -c -o src/crossval.o src/crossval.c $(CFLAGS) $(INCLUDE)
- @echo crossval.o...
+src/msvmmaj_pred.o:
+ @$(CC) -c -o src/msvmmaj_pred.o src/msvmmaj_pred.c $(CFLAGS) $(INCLUDE)
+ @echo msvmmaj_pred.o...
src/msvmmaj_train.o:
@$(CC) -c -o src/msvmmaj_train.o src/msvmmaj_train.c $(CFLAGS) $(INCLUDE)
@@ -51,12 +59,8 @@ src/msvmmaj_train_dataset.o:
@$(CC) -c -o src/msvmmaj_train_dataset.o src/msvmmaj_train_dataset.c $(CFLAGS) $(INCLUDE)
@echo msvmmaj_train_dataset.o...
-src/msvmmaj_pred.o:
- @$(CC) -c -o src/msvmmaj_pred.o src/msvmmaj_pred.c $(CFLAGS) $(INCLUDE)
- @echo msvmmaj_pred.o...
-
-src/mylapack.o:
- @$(CC) -c -o src/mylapack.o src/mylapack.c $(CFLAGS) $(INCLUDE)
+src/msvmmaj_lapack.o:
+ @$(CC) -c -o src/msvmmaj_lapack.o src/msvmmaj_lapack.c $(CFLAGS) $(INCLUDE)
@echo mylapack.o...
src/strutil.o:
@@ -67,5 +71,9 @@ src/timer.o:
@$(CC) -c -o src/timer.o src/timer.c $(CFLAGS) $(INCLUDE)
@echo timer.o...
+src/util.o:
+ @$(CC) -c -o src/util.o src/util.c $(CFLAGS) $(INCLUDE)
+ @echo util.o...
+
clean:
rm -rf $(EXECS) *.o src/*.o lib/*.a
diff --git a/data/README b/data/README
new file mode 100644
index 0000000..5c38a90
--- /dev/null
+++ b/data/README
@@ -0,0 +1,4 @@
+All datasets downloaded from the UCI repository.
+
+All datasets scaled to the interval [-1, 1] in Matlab and saved in
+full available precision (16 digits).
diff --git a/doc/Doxyfile b/doc/Doxyfile
new file mode 100644
index 0000000..5b43a2b
--- /dev/null
+++ b/doc/Doxyfile
@@ -0,0 +1,1893 @@
+# Doxyfile 1.8.4
+
+# This file describes the settings to be used by the documentation system
+# doxygen (www.doxygen.org) for a project.
+#
+# All text after a double hash (##) is considered a comment and is placed
+# in front of the TAG it is preceding .
+# All text after a hash (#) is considered a comment and will be ignored.
+# The format is:
+# TAG = value [value, ...]
+# For lists items can also be appended using:
+# TAG += value [value, ...]
+# Values that contain spaces should be placed between quotes (" ").
+
+#---------------------------------------------------------------------------
+# Project related configuration options
+#---------------------------------------------------------------------------
+
+# This tag specifies the encoding used for all characters in the config file
+# that follow. The default is UTF-8 which is also the encoding used for all
+# text before the first occurrence of this tag. Doxygen uses libiconv (or the
+# iconv built into libc) for the transcoding. See
+# http://www.gnu.org/software/libiconv for the list of possible encodings.
+
+DOXYFILE_ENCODING = UTF-8
+
+# The PROJECT_NAME tag is a single word (or sequence of words) that should
+# identify the project. Note that if you do not use Doxywizard you need
+# to put quotes around the project name if it contains spaces.
+
+PROJECT_NAME = "MSVMMaj"
+
+# The PROJECT_NUMBER tag can be used to enter a project or revision number.
+# This could be handy for archiving the generated documentation or
+# if some version control system is used.
+
+PROJECT_NUMBER =
+
+# Using the PROJECT_BRIEF tag one can provide an optional one line description
+# for a project that appears at the top of each page and should give viewer
+# a quick idea about the purpose of the project. Keep the description short.
+
+PROJECT_BRIEF =
+
+# With the PROJECT_LOGO tag one can specify an logo or icon that is
+# included in the documentation. The maximum height of the logo should not
+# exceed 55 pixels and the maximum width should not exceed 200 pixels.
+# Doxygen will copy the logo to the output directory.
+
+PROJECT_LOGO =
+
+# The OUTPUT_DIRECTORY tag is used to specify the (relative or absolute)
+# base path where the generated documentation will be put.
+# If a relative path is entered, it will be relative to the location
+# where doxygen was started. If left blank the current directory will be used.
+
+OUTPUT_DIRECTORY =
+
+# If the CREATE_SUBDIRS tag is set to YES, then doxygen will create
+# 4096 sub-directories (in 2 levels) under the output directory of each output
+# format and will distribute the generated files over these directories.
+# Enabling this option can be useful when feeding doxygen a huge amount of
+# source files, where putting all generated files in the same directory would
+# otherwise cause performance problems for the file system.
+
+CREATE_SUBDIRS = NO
+
+# The OUTPUT_LANGUAGE tag is used to specify the language in which all
+# documentation generated by doxygen is written. Doxygen will use this
+# information to generate all constant output in the proper language.
+# The default language is English, other supported languages are:
+# Afrikaans, Arabic, Brazilian, Catalan, Chinese, Chinese-Traditional,
+# Croatian, Czech, Danish, Dutch, Esperanto, Farsi, Finnish, French, German,
+# Greek, Hungarian, Italian, Japanese, Japanese-en (Japanese with English
+# messages), Korean, Korean-en, Latvian, Lithuanian, Norwegian, Macedonian,
+# Persian, Polish, Portuguese, Romanian, Russian, Serbian, Serbian-Cyrillic,
+# Slovak, Slovene, Spanish, Swedish, Ukrainian, and Vietnamese.
+
+OUTPUT_LANGUAGE = English
+
+# If the BRIEF_MEMBER_DESC tag is set to YES (the default) Doxygen will
+# include brief member descriptions after the members that are listed in
+# the file and class documentation (similar to JavaDoc).
+# Set to NO to disable this.
+
+BRIEF_MEMBER_DESC = YES
+
+# If the REPEAT_BRIEF tag is set to YES (the default) Doxygen will prepend
+# the brief description of a member or function before the detailed description.
+# Note: if both HIDE_UNDOC_MEMBERS and BRIEF_MEMBER_DESC are set to NO, the
+# brief descriptions will be completely suppressed.
+
+REPEAT_BRIEF = YES
+
+# This tag implements a quasi-intelligent brief description abbreviator
+# that is used to form the text in various listings. Each string
+# in this list, if found as the leading text of the brief description, will be
+# stripped from the text and the result after processing the whole list, is
+# used as the annotated text. Otherwise, the brief description is used as-is.
+# If left blank, the following values are used ("$name" is automatically
+# replaced with the name of the entity): "The $name class" "The $name widget"
+# "The $name file" "is" "provides" "specifies" "contains"
+# "represents" "a" "an" "the"
+
+ABBREVIATE_BRIEF =
+
+# If the ALWAYS_DETAILED_SEC and REPEAT_BRIEF tags are both set to YES then
+# Doxygen will generate a detailed section even if there is only a brief
+# description.
+
+ALWAYS_DETAILED_SEC = NO
+
+# If the INLINE_INHERITED_MEMB tag is set to YES, doxygen will show all
+# inherited members of a class in the documentation of that class as if those
+# members were ordinary class members. Constructors, destructors and assignment
+# operators of the base classes will not be shown.
+
+INLINE_INHERITED_MEMB = NO
+
+# If the FULL_PATH_NAMES tag is set to YES then Doxygen will prepend the full
+# path before files name in the file list and in the header files. If set
+# to NO the shortest path that makes the file name unique will be used.
+
+FULL_PATH_NAMES = YES
+
+# If the FULL_PATH_NAMES tag is set to YES then the STRIP_FROM_PATH tag
+# can be used to strip a user-defined part of the path. Stripping is
+# only done if one of the specified strings matches the left-hand part of
+# the path. The tag can be used to show relative paths in the file list.
+# If left blank the directory from which doxygen is run is used as the
+# path to strip. Note that you specify absolute paths here, but also
+# relative paths, which will be relative from the directory where doxygen is
+# started.
+
+STRIP_FROM_PATH =
+
+# The STRIP_FROM_INC_PATH tag can be used to strip a user-defined part of
+# the path mentioned in the documentation of a class, which tells
+# the reader which header file to include in order to use a class.
+# If left blank only the name of the header file containing the class
+# definition is used. Otherwise one should specify the include paths that
+# are normally passed to the compiler using the -I flag.
+
+STRIP_FROM_INC_PATH =
+
+# If the SHORT_NAMES tag is set to YES, doxygen will generate much shorter
+# (but less readable) file names. This can be useful if your file system
+# doesn't support long names like on DOS, Mac, or CD-ROM.
+
+SHORT_NAMES = NO
+
+# If the JAVADOC_AUTOBRIEF tag is set to YES then Doxygen
+# will interpret the first line (until the first dot) of a JavaDoc-style
+# comment as the brief description. If set to NO, the JavaDoc
+# comments will behave just like regular Qt-style comments
+# (thus requiring an explicit @brief command for a brief description.)
+
+JAVADOC_AUTOBRIEF = NO
+
+# If the QT_AUTOBRIEF tag is set to YES then Doxygen will
+# interpret the first line (until the first dot) of a Qt-style
+# comment as the brief description. If set to NO, the comments
+# will behave just like regular Qt-style comments (thus requiring
+# an explicit \brief command for a brief description.)
+
+QT_AUTOBRIEF = NO
+
+# The MULTILINE_CPP_IS_BRIEF tag can be set to YES to make Doxygen
+# treat a multi-line C++ special comment block (i.e. a block of //! or ///
+# comments) as a brief description. This used to be the default behaviour.
+# The new default is to treat a multi-line C++ comment block as a detailed
+# description. Set this tag to YES if you prefer the old behaviour instead.
+
+MULTILINE_CPP_IS_BRIEF = NO
+
+# If the INHERIT_DOCS tag is set to YES (the default) then an undocumented
+# member inherits the documentation from any documented member that it
+# re-implements.
+
+INHERIT_DOCS = YES
+
+# If the SEPARATE_MEMBER_PAGES tag is set to YES, then doxygen will produce
+# a new page for each member. If set to NO, the documentation of a member will
+# be part of the file/class/namespace that contains it.
+
+SEPARATE_MEMBER_PAGES = NO
+
+# The TAB_SIZE tag can be used to set the number of spaces in a tab.
+# Doxygen uses this value to replace tabs by spaces in code fragments.
+
+TAB_SIZE = 4
+
+# This tag can be used to specify a number of aliases that acts
+# as commands in the documentation. An alias has the form "name=value".
+# For example adding "sideeffect=\par Side Effects:\n" will allow you to
+# put the command \sideeffect (or @sideeffect) in the documentation, which
+# will result in a user-defined paragraph with heading "Side Effects:".
+# You can put \n's in the value part of an alias to insert newlines.
+
+ALIASES = "TODO=\todo"
+
+# This tag can be used to specify a number of word-keyword mappings (TCL only).
+# A mapping has the form "name=value". For example adding
+# "class=itcl::class" will allow you to use the command class in the
+# itcl::class meaning.
+
+TCL_SUBST =
+
+# Set the OPTIMIZE_OUTPUT_FOR_C tag to YES if your project consists of C
+# sources only. Doxygen will then generate output that is more tailored for C.
+# For instance, some of the names that are used will be different. The list
+# of all members will be omitted, etc.
+
+OPTIMIZE_OUTPUT_FOR_C = NO
+
+# Set the OPTIMIZE_OUTPUT_JAVA tag to YES if your project consists of Java
+# sources only. Doxygen will then generate output that is more tailored for
+# Java. For instance, namespaces will be presented as packages, qualified
+# scopes will look different, etc.
+
+OPTIMIZE_OUTPUT_JAVA = NO
+
+# Set the OPTIMIZE_FOR_FORTRAN tag to YES if your project consists of Fortran
+# sources only. Doxygen will then generate output that is more tailored for
+# Fortran.
+
+OPTIMIZE_FOR_FORTRAN = NO
+
+# Set the OPTIMIZE_OUTPUT_VHDL tag to YES if your project consists of VHDL
+# sources. Doxygen will then generate output that is tailored for
+# VHDL.
+
+OPTIMIZE_OUTPUT_VHDL = NO
+
+# Doxygen selects the parser to use depending on the extension of the files it
+# parses. With this tag you can assign which parser to use for a given
+# extension. Doxygen has a built-in mapping, but you can override or extend it
+# using this tag. The format is ext=language, where ext is a file extension,
+# and language is one of the parsers supported by doxygen: IDL, Java,
+# Javascript, CSharp, C, C++, D, PHP, Objective-C, Python, Fortran, VHDL, C,
+# C++. For instance to make doxygen treat .inc files as Fortran files (default
+# is PHP), and .f files as C (default is Fortran), use: inc=Fortran f=C. Note
+# that for custom extensions you also need to set FILE_PATTERNS otherwise the
+# files are not read by doxygen.
+
+EXTENSION_MAPPING =
+
+# If MARKDOWN_SUPPORT is enabled (the default) then doxygen pre-processes all
+# comments according to the Markdown format, which allows for more readable
+# documentation. See http://daringfireball.net/projects/markdown/ for details.
+# The output of markdown processing is further processed by doxygen, so you
+# can mix doxygen, HTML, and XML commands with Markdown formatting.
+# Disable only in case of backward compatibilities issues.
+
+MARKDOWN_SUPPORT = YES
+
+# When enabled doxygen tries to link words that correspond to documented
+# classes, or namespaces to their corresponding documentation. Such a link can
+# be prevented in individual cases by by putting a % sign in front of the word
+# or globally by setting AUTOLINK_SUPPORT to NO.
+
+AUTOLINK_SUPPORT = YES
+
+# If you use STL classes (i.e. std::string, std::vector, etc.) but do not want
+# to include (a tag file for) the STL sources as input, then you should
+# set this tag to YES in order to let doxygen match functions declarations and
+# definitions whose arguments contain STL classes (e.g. func(std::string); v.s.
+# func(std::string) {}). This also makes the inheritance and collaboration
+# diagrams that involve STL classes more complete and accurate.
+
+BUILTIN_STL_SUPPORT = NO
+
+# If you use Microsoft's C++/CLI language, you should set this option to YES to
+# enable parsing support.
+
+CPP_CLI_SUPPORT = NO
+
+# Set the SIP_SUPPORT tag to YES if your project consists of sip sources only.
+# Doxygen will parse them like normal C++ but will assume all classes use public
+# instead of private inheritance when no explicit protection keyword is present.
+
+SIP_SUPPORT = NO
+
+# For Microsoft's IDL there are propget and propput attributes to indicate
+# getter and setter methods for a property. Setting this option to YES (the
+# default) will make doxygen replace the get and set methods by a property in
+# the documentation. This will only work if the methods are indeed getting or
+# setting a simple type. If this is not the case, or you want to show the
+# methods anyway, you should set this option to NO.
+
+IDL_PROPERTY_SUPPORT = YES
+
+# If member grouping is used in the documentation and the DISTRIBUTE_GROUP_DOC
+# tag is set to YES, then doxygen will reuse the documentation of the first
+# member in the group (if any) for the other members of the group. By default
+# all members of a group must be documented explicitly.
+
+DISTRIBUTE_GROUP_DOC = NO
+
+# Set the SUBGROUPING tag to YES (the default) to allow class member groups of
+# the same type (for instance a group of public functions) to be put as a
+# subgroup of that type (e.g. under the Public Functions section). Set it to
+# NO to prevent subgrouping. Alternatively, this can be done per class using
+# the \nosubgrouping command.
+
+SUBGROUPING = YES
+
+# When the INLINE_GROUPED_CLASSES tag is set to YES, classes, structs and
+# unions are shown inside the group in which they are included (e.g. using
+# @ingroup) instead of on a separate page (for HTML and Man pages) or
+# section (for LaTeX and RTF).
+
+INLINE_GROUPED_CLASSES = NO
+
+# When the INLINE_SIMPLE_STRUCTS tag is set to YES, structs, classes, and
+# unions with only public data fields or simple typedef fields will be shown
+# inline in the documentation of the scope in which they are defined (i.e. file,
+# namespace, or group documentation), provided this scope is documented. If set
+# to NO (the default), structs, classes, and unions are shown on a separate
+# page (for HTML and Man pages) or section (for LaTeX and RTF).
+
+INLINE_SIMPLE_STRUCTS = NO
+
+# When TYPEDEF_HIDES_STRUCT is enabled, a typedef of a struct, union, or enum
+# is documented as struct, union, or enum with the name of the typedef. So
+# typedef struct TypeS {} TypeT, will appear in the documentation as a struct
+# with name TypeT. When disabled the typedef will appear as a member of a file,
+# namespace, or class. And the struct will be named TypeS. This can typically
+# be useful for C code in case the coding convention dictates that all compound
+# types are typedef'ed and only the typedef is referenced, never the tag name.
+
+TYPEDEF_HIDES_STRUCT = NO
+
+# The size of the symbol lookup cache can be set using LOOKUP_CACHE_SIZE. This
+# cache is used to resolve symbols given their name and scope. Since this can
+# be an expensive process and often the same symbol appear multiple times in
+# the code, doxygen keeps a cache of pre-resolved symbols. If the cache is too
+# small doxygen will become slower. If the cache is too large, memory is wasted.
+# The cache size is given by this formula: 2^(16+LOOKUP_CACHE_SIZE). The valid
+# range is 0..9, the default is 0, corresponding to a cache size of 2^16 = 65536
+# symbols.
+
+LOOKUP_CACHE_SIZE = 0
+
+#---------------------------------------------------------------------------
+# Build related configuration options
+#---------------------------------------------------------------------------
+
+# If the EXTRACT_ALL tag is set to YES doxygen will assume all entities in
+# documentation are documented, even if no documentation was available.
+# Private class members and static file members will be hidden unless
+# the EXTRACT_PRIVATE respectively EXTRACT_STATIC tags are set to YES
+
+EXTRACT_ALL = YES
+
+# If the EXTRACT_PRIVATE tag is set to YES all private members of a class
+# will be included in the documentation.
+
+EXTRACT_PRIVATE = NO
+
+# If the EXTRACT_PACKAGE tag is set to YES all members with package or internal
+# scope will be included in the documentation.
+
+EXTRACT_PACKAGE = NO
+
+# If the EXTRACT_STATIC tag is set to YES all static members of a file
+# will be included in the documentation.
+
+EXTRACT_STATIC = YES
+
+# If the EXTRACT_LOCAL_CLASSES tag is set to YES classes (and structs)
+# defined locally in source files will be included in the documentation.
+# If set to NO only classes defined in header files are included.
+
+EXTRACT_LOCAL_CLASSES = YES
+
+# This flag is only useful for Objective-C code. When set to YES local
+# methods, which are defined in the implementation section but not in
+# the interface are included in the documentation.
+# If set to NO (the default) only methods in the interface are included.
+
+EXTRACT_LOCAL_METHODS = NO
+
+# If this flag is set to YES, the members of anonymous namespaces will be
+# extracted and appear in the documentation as a namespace called
+# 'anonymous_namespace{file}', where file will be replaced with the base
+# name of the file that contains the anonymous namespace. By default
+# anonymous namespaces are hidden.
+
+EXTRACT_ANON_NSPACES = NO
+
+# If the HIDE_UNDOC_MEMBERS tag is set to YES, Doxygen will hide all
+# undocumented members of documented classes, files or namespaces.
+# If set to NO (the default) these members will be included in the
+# various overviews, but no documentation section is generated.
+# This option has no effect if EXTRACT_ALL is enabled.
+
+HIDE_UNDOC_MEMBERS = NO
+
+# If the HIDE_UNDOC_CLASSES tag is set to YES, Doxygen will hide all
+# undocumented classes that are normally visible in the class hierarchy.
+# If set to NO (the default) these classes will be included in the various
+# overviews. This option has no effect if EXTRACT_ALL is enabled.
+
+HIDE_UNDOC_CLASSES = NO
+
+# If the HIDE_FRIEND_COMPOUNDS tag is set to YES, Doxygen will hide all
+# friend (class|struct|union) declarations.
+# If set to NO (the default) these declarations will be included in the
+# documentation.
+
+HIDE_FRIEND_COMPOUNDS = NO
+
+# If the HIDE_IN_BODY_DOCS tag is set to YES, Doxygen will hide any
+# documentation blocks found inside the body of a function.
+# If set to NO (the default) these blocks will be appended to the
+# function's detailed documentation block.
+
+HIDE_IN_BODY_DOCS = NO
+
+# The INTERNAL_DOCS tag determines if documentation
+# that is typed after a \internal command is included. If the tag is set
+# to NO (the default) then the documentation will be excluded.
+# Set it to YES to include the internal documentation.
+
+INTERNAL_DOCS = NO
+
+# If the CASE_SENSE_NAMES tag is set to NO then Doxygen will only generate
+# file names in lower-case letters. If set to YES upper-case letters are also
+# allowed. This is useful if you have classes or files whose names only differ
+# in case and if your file system supports case sensitive file names. Windows
+# and Mac users are advised to set this option to NO.
+
+CASE_SENSE_NAMES = YES
+
+# If the HIDE_SCOPE_NAMES tag is set to NO (the default) then Doxygen
+# will show members with their full class and namespace scopes in the
+# documentation. If set to YES the scope will be hidden.
+
+HIDE_SCOPE_NAMES = NO
+
+# If the SHOW_INCLUDE_FILES tag is set to YES (the default) then Doxygen
+# will put a list of the files that are included by a file in the documentation
+# of that file.
+
+SHOW_INCLUDE_FILES = YES
+
+# If the FORCE_LOCAL_INCLUDES tag is set to YES then Doxygen
+# will list include files with double quotes in the documentation
+# rather than with sharp brackets.
+
+FORCE_LOCAL_INCLUDES = NO
+
+# If the INLINE_INFO tag is set to YES (the default) then a tag [inline]
+# is inserted in the documentation for inline members.
+
+INLINE_INFO = YES
+
+# If the SORT_MEMBER_DOCS tag is set to YES (the default) then doxygen
+# will sort the (detailed) documentation of file and class members
+# alphabetically by member name. If set to NO the members will appear in
+# declaration order.
+
+SORT_MEMBER_DOCS = YES
+
+# If the SORT_BRIEF_DOCS tag is set to YES then doxygen will sort the
+# brief documentation of file, namespace and class members alphabetically
+# by member name. If set to NO (the default) the members will appear in
+# declaration order.
+
+SORT_BRIEF_DOCS = NO
+
+# If the SORT_MEMBERS_CTORS_1ST tag is set to YES then doxygen
+# will sort the (brief and detailed) documentation of class members so that
+# constructors and destructors are listed first. If set to NO (the default)
+# the constructors will appear in the respective orders defined by
+# SORT_MEMBER_DOCS and SORT_BRIEF_DOCS.
+# This tag will be ignored for brief docs if SORT_BRIEF_DOCS is set to NO
+# and ignored for detailed docs if SORT_MEMBER_DOCS is set to NO.
+
+SORT_MEMBERS_CTORS_1ST = NO
+
+# If the SORT_GROUP_NAMES tag is set to YES then doxygen will sort the
+# hierarchy of group names into alphabetical order. If set to NO (the default)
+# the group names will appear in their defined order.
+
+SORT_GROUP_NAMES = NO
+
+# If the SORT_BY_SCOPE_NAME tag is set to YES, the class list will be
+# sorted by fully-qualified names, including namespaces. If set to
+# NO (the default), the class list will be sorted only by class name,
+# not including the namespace part.
+# Note: This option is not very useful if HIDE_SCOPE_NAMES is set to YES.
+# Note: This option applies only to the class list, not to the
+# alphabetical list.
+
+SORT_BY_SCOPE_NAME = NO
+
+# If the STRICT_PROTO_MATCHING option is enabled and doxygen fails to
+# do proper type resolution of all parameters of a function it will reject a
+# match between the prototype and the implementation of a member function even
+# if there is only one candidate or it is obvious which candidate to choose
+# by doing a simple string match. By disabling STRICT_PROTO_MATCHING doxygen
+# will still accept a match between prototype and implementation in such cases.
+
+STRICT_PROTO_MATCHING = NO
+
+# The GENERATE_TODOLIST tag can be used to enable (YES) or
+# disable (NO) the todo list. This list is created by putting \todo
+# commands in the documentation.
+
+GENERATE_TODOLIST = YES
+
+# The GENERATE_TESTLIST tag can be used to enable (YES) or
+# disable (NO) the test list. This list is created by putting \test
+# commands in the documentation.
+
+GENERATE_TESTLIST = YES
+
+# The GENERATE_BUGLIST tag can be used to enable (YES) or
+# disable (NO) the bug list. This list is created by putting \bug
+# commands in the documentation.
+
+GENERATE_BUGLIST = YES
+
+# The GENERATE_DEPRECATEDLIST tag can be used to enable (YES) or
+# disable (NO) the deprecated list. This list is created by putting
+# \deprecated commands in the documentation.
+
+GENERATE_DEPRECATEDLIST= YES
+
+# The ENABLED_SECTIONS tag can be used to enable conditional
+# documentation sections, marked by \if section-label ... \endif
+# and \cond section-label ... \endcond blocks.
+
+ENABLED_SECTIONS =
+
+# The MAX_INITIALIZER_LINES tag determines the maximum number of lines
+# the initial value of a variable or macro consists of for it to appear in
+# the documentation. If the initializer consists of more lines than specified
+# here it will be hidden. Use a value of 0 to hide initializers completely.
+# The appearance of the initializer of individual variables and macros in the
+# documentation can be controlled using \showinitializer or \hideinitializer
+# command in the documentation regardless of this setting.
+
+MAX_INITIALIZER_LINES = 30
+
+# Set the SHOW_USED_FILES tag to NO to disable the list of files generated
+# at the bottom of the documentation of classes and structs. If set to YES the
+# list will mention the files that were used to generate the documentation.
+
+SHOW_USED_FILES = YES
+
+# Set the SHOW_FILES tag to NO to disable the generation of the Files page.
+# This will remove the Files entry from the Quick Index and from the
+# Folder Tree View (if specified). The default is YES.
+
+SHOW_FILES = YES
+
+# Set the SHOW_NAMESPACES tag to NO to disable the generation of the
+# Namespaces page.
+# This will remove the Namespaces entry from the Quick Index
+# and from the Folder Tree View (if specified). The default is YES.
+
+SHOW_NAMESPACES = YES
+
+# The FILE_VERSION_FILTER tag can be used to specify a program or script that
+# doxygen should invoke to get the current version for each file (typically from
+# the version control system). Doxygen will invoke the program by executing (via
+# popen()) the command <command> <input-file>, where <command> is the value of
+# the FILE_VERSION_FILTER tag, and <input-file> is the name of an input file
+# provided by doxygen. Whatever the program writes to standard output
+# is used as the file version. See the manual for examples.
+
+FILE_VERSION_FILTER =
+
+# The LAYOUT_FILE tag can be used to specify a layout file which will be parsed
+# by doxygen. The layout file controls the global structure of the generated
+# output files in an output format independent way. To create the layout file
+# that represents doxygen's defaults, run doxygen with the -l option.
+# You can optionally specify a file name after the option, if omitted
+# DoxygenLayout.xml will be used as the name of the layout file.
+
+LAYOUT_FILE =
+
+# The CITE_BIB_FILES tag can be used to specify one or more bib files
+# containing the references data. This must be a list of .bib files. The
+# .bib extension is automatically appended if omitted. Using this command
+# requires the bibtex tool to be installed. See also
+# http://en.wikipedia.org/wiki/BibTeX for more info. For LaTeX the style
+# of the bibliography can be controlled using LATEX_BIB_STYLE. To use this
+# feature you need bibtex and perl available in the search path. Do not use
+# file names with spaces, bibtex cannot handle them.
+
+CITE_BIB_FILES =
+
+#---------------------------------------------------------------------------
+# configuration options related to warning and progress messages
+#---------------------------------------------------------------------------
+
+# The QUIET tag can be used to turn on/off the messages that are generated
+# by doxygen. Possible values are YES and NO. If left blank NO is used.
+
+QUIET = NO
+
+# The WARNINGS tag can be used to turn on/off the warning messages that are
+# generated by doxygen. Possible values are YES and NO. If left blank
+# NO is used.
+
+WARNINGS = YES
+
+# If WARN_IF_UNDOCUMENTED is set to YES, then doxygen will generate warnings
+# for undocumented members. If EXTRACT_ALL is set to YES then this flag will
+# automatically be disabled.
+
+WARN_IF_UNDOCUMENTED = YES
+
+# If WARN_IF_DOC_ERROR is set to YES, doxygen will generate warnings for
+# potential errors in the documentation, such as not documenting some
+# parameters in a documented function, or documenting parameters that
+# don't exist or using markup commands wrongly.
+
+WARN_IF_DOC_ERROR = YES
+
+# The WARN_NO_PARAMDOC option can be enabled to get warnings for
+# functions that are documented, but have no documentation for their parameters
+# or return value. If set to NO (the default) doxygen will only warn about
+# wrong or incomplete parameter documentation, but not about the absence of
+# documentation.
+
+WARN_NO_PARAMDOC = NO
+
+# The WARN_FORMAT tag determines the format of the warning messages that
+# doxygen can produce. The string should contain the $file, $line, and $text
+# tags, which will be replaced by the file and line number from which the
+# warning originated and the warning text. Optionally the format may contain
+# $version, which will be replaced by the version of the file (if it could
+# be obtained via FILE_VERSION_FILTER)
+
+WARN_FORMAT = "$file:$line: $text"
+
+# The WARN_LOGFILE tag can be used to specify a file to which warning
+# and error messages should be written. If left blank the output is written
+# to stderr.
+
+WARN_LOGFILE =
+
+#---------------------------------------------------------------------------
+# configuration options related to the input files
+#---------------------------------------------------------------------------
+
+# The INPUT tag can be used to specify the files and/or directories that contain
+# documented source files. You may enter file names like "myfile.cpp" or
+# directories like "/usr/src/myproject". Separate the files or directories
+# with spaces.
+
+INPUT = "../include" \
+ "../src" \
+ "."
+
+# This tag can be used to specify the character encoding of the source files
+# that doxygen parses. Internally doxygen uses the UTF-8 encoding, which is
+# also the default input encoding. Doxygen uses libiconv (or the iconv built
+# into libc) for the transcoding. See http://www.gnu.org/software/libiconv for
+# the list of possible encodings.
+
+INPUT_ENCODING = UTF-8
+
+# If the value of the INPUT tag contains directories, you can use the
+# FILE_PATTERNS tag to specify one or more wildcard pattern (like *.cpp
+# and *.h) to filter out the source-files in the directories. If left
+# blank the following patterns are tested:
+# *.c *.cc *.cxx *.cpp *.c++ *.d *.java *.ii *.ixx *.ipp *.i++ *.inl *.h *.hh
+# *.hxx *.hpp *.h++ *.idl *.odl *.cs *.php *.php3 *.inc *.m *.mm *.dox *.py
+# *.f90 *.f *.for *.vhd *.vhdl
+
+FILE_PATTERNS =
+
+# The RECURSIVE tag can be used to turn specify whether or not subdirectories
+# should be searched for input files as well. Possible values are YES and NO.
+# If left blank NO is used.
+
+RECURSIVE = YES
+
+# The EXCLUDE tag can be used to specify files and/or directories that should be
+# excluded from the INPUT source files. This way you can easily exclude a
+# subdirectory from a directory tree whose root is specified with the INPUT tag.
+# Note that relative paths are relative to the directory from which doxygen is
+# run.
+
+EXCLUDE =
+
+# The EXCLUDE_SYMLINKS tag can be used to select whether or not files or
+# directories that are symbolic links (a Unix file system feature) are excluded
+# from the input.
+
+EXCLUDE_SYMLINKS = NO
+
+# If the value of the INPUT tag contains directories, you can use the
+# EXCLUDE_PATTERNS tag to specify one or more wildcard patterns to exclude
+# certain files from those directories. Note that the wildcards are matched
+# against the file with absolute path, so to exclude all test directories
+# for example use the pattern */test/*
+
+EXCLUDE_PATTERNS =
+
+# The EXCLUDE_SYMBOLS tag can be used to specify one or more symbol names
+# (namespaces, classes, functions, etc.) that should be excluded from the
+# output. The symbol name can be a fully qualified name, a word, or if the
+# wildcard * is used, a substring. Examples: ANamespace, AClass,
+# AClass::ANamespace, ANamespace::*Test
+
+EXCLUDE_SYMBOLS =
+
+# The EXAMPLE_PATH tag can be used to specify one or more files or
+# directories that contain example code fragments that are included (see
+# the \include command).
+
+EXAMPLE_PATH =
+
+# If the value of the EXAMPLE_PATH tag contains directories, you can use the
+# EXAMPLE_PATTERNS tag to specify one or more wildcard pattern (like *.cpp
+# and *.h) to filter out the source-files in the directories. If left
+# blank all files are included.
+
+EXAMPLE_PATTERNS =
+
+# If the EXAMPLE_RECURSIVE tag is set to YES then subdirectories will be
+# searched for input files to be used with the \include or \dontinclude
+# commands irrespective of the value of the RECURSIVE tag.
+# Possible values are YES and NO. If left blank NO is used.
+
+EXAMPLE_RECURSIVE = NO
+
+# The IMAGE_PATH tag can be used to specify one or more files or
+# directories that contain image that are included in the documentation (see
+# the \image command).
+
+IMAGE_PATH =
+
+# The INPUT_FILTER tag can be used to specify a program that doxygen should
+# invoke to filter for each input file. Doxygen will invoke the filter program
+# by executing (via popen()) the command <filter> <input-file>, where <filter>
+# is the value of the INPUT_FILTER tag, and <input-file> is the name of an
+# input file. Doxygen will then use the output that the filter program writes
+# to standard output.
+# If FILTER_PATTERNS is specified, this tag will be ignored.
+# Note that the filter must not add or remove lines; it is applied before the
+# code is scanned, but not when the output code is generated. If lines are added
+# or removed, the anchors will not be placed correctly.
+
+INPUT_FILTER =
+
+# The FILTER_PATTERNS tag can be used to specify filters on a per file pattern
+# basis.
+# Doxygen will compare the file name with each pattern and apply the
+# filter if there is a match.
+# The filters are a list of the form:
+# pattern=filter (like *.cpp=my_cpp_filter). See INPUT_FILTER for further
+# info on how filters are used. If FILTER_PATTERNS is empty or if
+# non of the patterns match the file name, INPUT_FILTER is applied.
+
+FILTER_PATTERNS =
+
+# If the FILTER_SOURCE_FILES tag is set to YES, the input filter (if set using
+# INPUT_FILTER) will be used to filter the input files when producing source
+# files to browse (i.e. when SOURCE_BROWSER is set to YES).
+
+FILTER_SOURCE_FILES = NO
+
+# The FILTER_SOURCE_PATTERNS tag can be used to specify source filters per file
+# pattern. A pattern will override the setting for FILTER_PATTERN (if any)
+# and it is also possible to disable source filtering for a specific pattern
+# using *.ext= (so without naming a filter). This option only has effect when
+# FILTER_SOURCE_FILES is enabled.
+
+FILTER_SOURCE_PATTERNS =
+
+# If the USE_MD_FILE_AS_MAINPAGE tag refers to the name of a markdown file that
+# is part of the input, its contents will be placed on the main page
+# (index.html). This can be useful if you have a project on for instance GitHub
+# and want reuse the introduction page also for the doxygen output.
+
+USE_MDFILE_AS_MAINPAGE =
+
+#---------------------------------------------------------------------------
+# configuration options related to source browsing
+#---------------------------------------------------------------------------
+
+# If the SOURCE_BROWSER tag is set to YES then a list of source files will
+# be generated. Documented entities will be cross-referenced with these sources.
+# Note: To get rid of all source code in the generated output, make sure also
+# VERBATIM_HEADERS is set to NO.
+
+SOURCE_BROWSER = NO
+
+# Setting the INLINE_SOURCES tag to YES will include the body
+# of functions and classes directly in the documentation.
+
+INLINE_SOURCES = NO
+
+# Setting the STRIP_CODE_COMMENTS tag to YES (the default) will instruct
+# doxygen to hide any special comment blocks from generated source code
+# fragments. Normal C, C++ and Fortran comments will always remain visible.
+
+STRIP_CODE_COMMENTS = YES
+
+# If the REFERENCED_BY_RELATION tag is set to YES
+# then for each documented function all documented
+# functions referencing it will be listed.
+
+REFERENCED_BY_RELATION = NO
+
+# If the REFERENCES_RELATION tag is set to YES
+# then for each documented function all documented entities
+# called/used by that function will be listed.
+
+REFERENCES_RELATION = NO
+
+# If the REFERENCES_LINK_SOURCE tag is set to YES (the default)
+# and SOURCE_BROWSER tag is set to YES, then the hyperlinks from
+# functions in REFERENCES_RELATION and REFERENCED_BY_RELATION lists will
+# link to the source code.
+# Otherwise they will link to the documentation.
+
+REFERENCES_LINK_SOURCE = YES
+
+# If the USE_HTAGS tag is set to YES then the references to source code
+# will point to the HTML generated by the htags(1) tool instead of doxygen
+# built-in source browser. The htags tool is part of GNU's global source
+# tagging system (see http://www.gnu.org/software/global/global.html). You
+# will need version 4.8.6 or higher.
+
+USE_HTAGS = NO
+
+# If the VERBATIM_HEADERS tag is set to YES (the default) then Doxygen
+# will generate a verbatim copy of the header file for each class for
+# which an include is specified. Set to NO to disable this.
+
+VERBATIM_HEADERS = YES
+
+#---------------------------------------------------------------------------
+# configuration options related to the alphabetical class index
+#---------------------------------------------------------------------------
+
+# If the ALPHABETICAL_INDEX tag is set to YES, an alphabetical index
+# of all compounds will be generated. Enable this if the project
+# contains a lot of classes, structs, unions or interfaces.
+
+ALPHABETICAL_INDEX = YES
+
+# If the alphabetical index is enabled (see ALPHABETICAL_INDEX) then
+# the COLS_IN_ALPHA_INDEX tag can be used to specify the number of columns
+# in which this list will be split (can be a number in the range [1..20])
+
+COLS_IN_ALPHA_INDEX = 5
+
+# In case all classes in a project start with a common prefix, all
+# classes will be put under the same header in the alphabetical index.
+# The IGNORE_PREFIX tag can be used to specify one or more prefixes that
+# should be ignored while generating the index headers.
+
+IGNORE_PREFIX =
+
+#---------------------------------------------------------------------------
+# configuration options related to the HTML output
+#---------------------------------------------------------------------------
+
+# If the GENERATE_HTML tag is set to YES (the default) Doxygen will
+# generate HTML output.
+
+GENERATE_HTML = YES
+
+# The HTML_OUTPUT tag is used to specify where the HTML docs will be put.
+# If a relative path is entered the value of OUTPUT_DIRECTORY will be
+# put in front of it. If left blank `html' will be used as the default path.
+
+HTML_OUTPUT = html
+
+# The HTML_FILE_EXTENSION tag can be used to specify the file extension for
+# each generated HTML page (for example: .htm,.php,.asp). If it is left blank
+# doxygen will generate files with .html extension.
+
+HTML_FILE_EXTENSION = .html
+
+# The HTML_HEADER tag can be used to specify a personal HTML header for
+# each generated HTML page. If it is left blank doxygen will generate a
+# standard header. Note that when using a custom header you are responsible
+# for the proper inclusion of any scripts and style sheets that doxygen
+# needs, which is dependent on the configuration options used.
+# It is advised to generate a default header using "doxygen -w html
+# header.html footer.html stylesheet.css YourConfigFile" and then modify
+# that header. Note that the header is subject to change so you typically
+# have to redo this when upgrading to a newer version of doxygen or when
+# changing the value of configuration settings such as GENERATE_TREEVIEW!
+
+HTML_HEADER =
+
+# The HTML_FOOTER tag can be used to specify a personal HTML footer for
+# each generated HTML page. If it is left blank doxygen will generate a
+# standard footer.
+
+HTML_FOOTER =
+
+# The HTML_STYLESHEET tag can be used to specify a user-defined cascading
+# style sheet that is used by each HTML page. It can be used to
+# fine-tune the look of the HTML output. If left blank doxygen will
+# generate a default style sheet. Note that it is recommended to use
+# HTML_EXTRA_STYLESHEET instead of this one, as it is more robust and this
+# tag will in the future become obsolete.
+
+HTML_STYLESHEET =
+
+# The HTML_EXTRA_STYLESHEET tag can be used to specify an additional
+# user-defined cascading style sheet that is included after the standard
+# style sheets created by doxygen. Using this option one can overrule
+# certain style aspects. This is preferred over using HTML_STYLESHEET
+# since it does not replace the standard style sheet and is therefor more
+# robust against future updates. Doxygen will copy the style sheet file to
+# the output directory.
+
+HTML_EXTRA_STYLESHEET =
+
+# The HTML_EXTRA_FILES tag can be used to specify one or more extra images or
+# other source files which should be copied to the HTML output directory. Note
+# that these files will be copied to the base HTML output directory. Use the
+# $relpath^ marker in the HTML_HEADER and/or HTML_FOOTER files to load these
+# files. In the HTML_STYLESHEET file, use the file name only. Also note that
+# the files will be copied as-is; there are no commands or markers available.
+
+HTML_EXTRA_FILES =
+
+# The HTML_COLORSTYLE_HUE tag controls the color of the HTML output.
+# Doxygen will adjust the colors in the style sheet and background images
+# according to this color. Hue is specified as an angle on a colorwheel,
+# see http://en.wikipedia.org/wiki/Hue for more information.
+# For instance the value 0 represents red, 60 is yellow, 120 is green,
+# 180 is cyan, 240 is blue, 300 purple, and 360 is red again.
+# The allowed range is 0 to 359.
+
+HTML_COLORSTYLE_HUE = 220
+
+# The HTML_COLORSTYLE_SAT tag controls the purity (or saturation) of
+# the colors in the HTML output. For a value of 0 the output will use
+# grayscales only. A value of 255 will produce the most vivid colors.
+
+HTML_COLORSTYLE_SAT = 100
+
+# The HTML_COLORSTYLE_GAMMA tag controls the gamma correction applied to
+# the luminance component of the colors in the HTML output. Values below
+# 100 gradually make the output lighter, whereas values above 100 make
+# the output darker. The value divided by 100 is the actual gamma applied,
+# so 80 represents a gamma of 0.8, The value 220 represents a gamma of 2.2,
+# and 100 does not change the gamma.
+
+HTML_COLORSTYLE_GAMMA = 80
+
+# If the HTML_TIMESTAMP tag is set to YES then the footer of each generated HTML
+# page will contain the date and time when the page was generated. Setting
+# this to NO can help when comparing the output of multiple runs.
+
+HTML_TIMESTAMP = YES
+
+# If the HTML_DYNAMIC_SECTIONS tag is set to YES then the generated HTML
+# documentation will contain sections that can be hidden and shown after the
+# page has loaded.
+
+HTML_DYNAMIC_SECTIONS = NO
+
+# With HTML_INDEX_NUM_ENTRIES one can control the preferred number of
+# entries shown in the various tree structured indices initially; the user
+# can expand and collapse entries dynamically later on. Doxygen will expand
+# the tree to such a level that at most the specified number of entries are
+# visible (unless a fully collapsed tree already exceeds this amount).
+# So setting the number of entries 1 will produce a full collapsed tree by
+# default. 0 is a special value representing an infinite number of entries
+# and will result in a full expanded tree by default.
+
+HTML_INDEX_NUM_ENTRIES = 100
+
+# If the GENERATE_DOCSET tag is set to YES, additional index files
+# will be generated that can be used as input for Apple's Xcode 3
+# integrated development environment, introduced with OSX 10.5 (Leopard).
+# To create a documentation set, doxygen will generate a Makefile in the
+# HTML output directory. Running make will produce the docset in that
+# directory and running "make install" will install the docset in
+# ~/Library/Developer/Shared/Documentation/DocSets so that Xcode will find
+# it at startup.
+# See http://developer.apple.com/tools/creatingdocsetswithdoxygen.html
+# for more information.
+
+GENERATE_DOCSET = NO
+
+# When GENERATE_DOCSET tag is set to YES, this tag determines the name of the
+# feed. A documentation feed provides an umbrella under which multiple
+# documentation sets from a single provider (such as a company or product suite)
+# can be grouped.
+
+DOCSET_FEEDNAME = "Doxygen generated docs"
+
+# When GENERATE_DOCSET tag is set to YES, this tag specifies a string that
+# should uniquely identify the documentation set bundle. This should be a
+# reverse domain-name style string, e.g. com.mycompany.MyDocSet. Doxygen
+# will append .docset to the name.
+
+DOCSET_BUNDLE_ID = org.doxygen.Project
+
+# When GENERATE_PUBLISHER_ID tag specifies a string that should uniquely
+# identify the documentation publisher. This should be a reverse domain-name
+# style string, e.g. com.mycompany.MyDocSet.documentation.
+
+DOCSET_PUBLISHER_ID = org.doxygen.Publisher
+
+# The GENERATE_PUBLISHER_NAME tag identifies the documentation publisher.
+
+DOCSET_PUBLISHER_NAME = Publisher
+
+# If the GENERATE_HTMLHELP tag is set to YES, additional index files
+# will be generated that can be used as input for tools like the
+# Microsoft HTML help workshop to generate a compiled HTML help file (.chm)
+# of the generated HTML documentation.
+
+GENERATE_HTMLHELP = NO
+
+# If the GENERATE_HTMLHELP tag is set to YES, the CHM_FILE tag can
+# be used to specify the file name of the resulting .chm file. You
+# can add a path in front of the file if the result should not be
+# written to the html output directory.
+
+CHM_FILE =
+
+# If the GENERATE_HTMLHELP tag is set to YES, the HHC_LOCATION tag can
+# be used to specify the location (absolute path including file name) of
+# the HTML help compiler (hhc.exe). If non-empty doxygen will try to run
+# the HTML help compiler on the generated index.hhp.
+
+HHC_LOCATION =
+
+# If the GENERATE_HTMLHELP tag is set to YES, the GENERATE_CHI flag
+# controls if a separate .chi index file is generated (YES) or that
+# it should be included in the master .chm file (NO).
+
+GENERATE_CHI = NO
+
+# If the GENERATE_HTMLHELP tag is set to YES, the CHM_INDEX_ENCODING
+# is used to encode HtmlHelp index (hhk), content (hhc) and project file
+# content.
+
+CHM_INDEX_ENCODING =
+
+# If the GENERATE_HTMLHELP tag is set to YES, the BINARY_TOC flag
+# controls whether a binary table of contents is generated (YES) or a
+# normal table of contents (NO) in the .chm file.
+
+BINARY_TOC = NO
+
+# The TOC_EXPAND flag can be set to YES to add extra items for group members
+# to the contents of the HTML help documentation and to the tree view.
+
+TOC_EXPAND = NO
+
+# If the GENERATE_QHP tag is set to YES and both QHP_NAMESPACE and
+# QHP_VIRTUAL_FOLDER are set, an additional index file will be generated
+# that can be used as input for Qt's qhelpgenerator to generate a
+# Qt Compressed Help (.qch) of the generated HTML documentation.
+
+GENERATE_QHP = NO
+
+# If the QHG_LOCATION tag is specified, the QCH_FILE tag can
+# be used to specify the file name of the resulting .qch file.
+# The path specified is relative to the HTML output folder.
+
+QCH_FILE =
+
+# The QHP_NAMESPACE tag specifies the namespace to use when generating
+# Qt Help Project output. For more information please see
+# http://doc.trolltech.com/qthelpproject.html#namespace
+
+QHP_NAMESPACE = org.doxygen.Project
+
+# The QHP_VIRTUAL_FOLDER tag specifies the namespace to use when generating
+# Qt Help Project output. For more information please see
+# http://doc.trolltech.com/qthelpproject.html#virtual-folders
+
+QHP_VIRTUAL_FOLDER = doc
+
+# If QHP_CUST_FILTER_NAME is set, it specifies the name of a custom filter to
+# add. For more information please see
+# http://doc.trolltech.com/qthelpproject.html#custom-filters
+
+QHP_CUST_FILTER_NAME =
+
+# The QHP_CUST_FILT_ATTRS tag specifies the list of the attributes of the
+# custom filter to add. For more information please see
+# <a href="http://doc.trolltech.com/qthelpproject.html#custom-filters">
+# Qt Help Project / Custom Filters</a>.
+
+QHP_CUST_FILTER_ATTRS =
+
+# The QHP_SECT_FILTER_ATTRS tag specifies the list of the attributes this
+# project's
+# filter section matches.
+# <a href="http://doc.trolltech.com/qthelpproject.html#filter-attributes">
+# Qt Help Project / Filter Attributes</a>.
+
+QHP_SECT_FILTER_ATTRS =
+
+# If the GENERATE_QHP tag is set to YES, the QHG_LOCATION tag can
+# be used to specify the location of Qt's qhelpgenerator.
+# If non-empty doxygen will try to run qhelpgenerator on the generated
+# .qhp file.
+
+QHG_LOCATION =
+
+# If the GENERATE_ECLIPSEHELP tag is set to YES, additional index files
+# will be generated, which together with the HTML files, form an Eclipse help
+# plugin. To install this plugin and make it available under the help contents
+# menu in Eclipse, the contents of the directory containing the HTML and XML
+# files needs to be copied into the plugins directory of eclipse. The name of
+# the directory within the plugins directory should be the same as
+# the ECLIPSE_DOC_ID value. After copying Eclipse needs to be restarted before
+# the help appears.
+
+GENERATE_ECLIPSEHELP = NO
+
+# A unique identifier for the eclipse help plugin. When installing the plugin
+# the directory name containing the HTML and XML files should also have
+# this name.
+
+ECLIPSE_DOC_ID = org.doxygen.Project
+
+# The DISABLE_INDEX tag can be used to turn on/off the condensed index (tabs)
+# at top of each HTML page. The value NO (the default) enables the index and
+# the value YES disables it. Since the tabs have the same information as the
+# navigation tree you can set this option to NO if you already set
+# GENERATE_TREEVIEW to YES.
+
+DISABLE_INDEX = NO
+
+# The GENERATE_TREEVIEW tag is used to specify whether a tree-like index
+# structure should be generated to display hierarchical information.
+# If the tag value is set to YES, a side panel will be generated
+# containing a tree-like index structure (just like the one that
+# is generated for HTML Help). For this to work a browser that supports
+# JavaScript, DHTML, CSS and frames is required (i.e. any modern browser).
+# Windows users are probably better off using the HTML help feature.
+# Since the tree basically has the same information as the tab index you
+# could consider to set DISABLE_INDEX to NO when enabling this option.
+
+GENERATE_TREEVIEW = NO
+
+# The ENUM_VALUES_PER_LINE tag can be used to set the number of enum values
+# (range [0,1..20]) that doxygen will group on one line in the generated HTML
+# documentation. Note that a value of 0 will completely suppress the enum
+# values from appearing in the overview section.
+
+ENUM_VALUES_PER_LINE = 4
+
+# If the treeview is enabled (see GENERATE_TREEVIEW) then this tag can be
+# used to set the initial width (in pixels) of the frame in which the tree
+# is shown.
+
+TREEVIEW_WIDTH = 250
+
+# When the EXT_LINKS_IN_WINDOW option is set to YES doxygen will open
+# links to external symbols imported via tag files in a separate window.
+
+EXT_LINKS_IN_WINDOW = NO
+
+# Use this tag to change the font size of Latex formulas included
+# as images in the HTML documentation. The default is 10. Note that
+# when you change the font size after a successful doxygen run you need
+# to manually remove any form_*.png images from the HTML output directory
+# to force them to be regenerated.
+
+FORMULA_FONTSIZE = 10
+
+# Use the FORMULA_TRANPARENT tag to determine whether or not the images
+# generated for formulas are transparent PNGs. Transparent PNGs are
+# not supported properly for IE 6.0, but are supported on all modern browsers.
+# Note that when changing this option you need to delete any form_*.png files
+# in the HTML output before the changes have effect.
+
+FORMULA_TRANSPARENT = YES
+
+# Enable the USE_MATHJAX option to render LaTeX formulas using MathJax
+# (see http://www.mathjax.org) which uses client side Javascript for the
+# rendering instead of using prerendered bitmaps. Use this if you do not
+# have LaTeX installed or if you want to formulas look prettier in the HTML
+# output. When enabled you may also need to install MathJax separately and
+# configure the path to it using the MATHJAX_RELPATH option.
+
+USE_MATHJAX = NO
+
+# When MathJax is enabled you can set the default output format to be used for
+# the MathJax output. Supported types are HTML-CSS, NativeMML (i.e. MathML) and
+# SVG. The default value is HTML-CSS, which is slower, but has the best
+# compatibility.
+
+MATHJAX_FORMAT = HTML-CSS
+
+# When MathJax is enabled you need to specify the location relative to the
+# HTML output directory using the MATHJAX_RELPATH option. The destination
+# directory should contain the MathJax.js script. For instance, if the mathjax
+# directory is located at the same level as the HTML output directory, then
+# MATHJAX_RELPATH should be ../mathjax. The default value points to
+# the MathJax Content Delivery Network so you can quickly see the result without
+# installing MathJax.
+# However, it is strongly recommended to install a local
+# copy of MathJax from http://www.mathjax.org before deployment.
+
+MATHJAX_RELPATH = http://cdn.mathjax.org/mathjax/latest
+
+# The MATHJAX_EXTENSIONS tag can be used to specify one or MathJax extension
+# names that should be enabled during MathJax rendering.
+
+MATHJAX_EXTENSIONS =
+
+# The MATHJAX_CODEFILE tag can be used to specify a file with javascript
+# pieces of code that will be used on startup of the MathJax code.
+
+MATHJAX_CODEFILE =
+
+# When the SEARCHENGINE tag is enabled doxygen will generate a search box
+# for the HTML output. The underlying search engine uses javascript
+# and DHTML and should work on any modern browser. Note that when using
+# HTML help (GENERATE_HTMLHELP), Qt help (GENERATE_QHP), or docsets
+# (GENERATE_DOCSET) there is already a search function so this one should
+# typically be disabled. For large projects the javascript based search engine
+# can be slow, then enabling SERVER_BASED_SEARCH may provide a better solution.
+
+SEARCHENGINE = YES
+
+# When the SERVER_BASED_SEARCH tag is enabled the search engine will be
+# implemented using a web server instead of a web client using Javascript.
+# There are two flavours of web server based search depending on the
+# EXTERNAL_SEARCH setting. When disabled, doxygen will generate a PHP script for
+# searching and an index file used by the script. When EXTERNAL_SEARCH is
+# enabled the indexing and searching needs to be provided by external tools.
+# See the manual for details.
+
+SERVER_BASED_SEARCH = NO
+
+# When EXTERNAL_SEARCH is enabled doxygen will no longer generate the PHP
+# script for searching. Instead the search results are written to an XML file
+# which needs to be processed by an external indexer. Doxygen will invoke an
+# external search engine pointed to by the SEARCHENGINE_URL option to obtain
+# the search results. Doxygen ships with an example indexer (doxyindexer) and
+# search engine (doxysearch.cgi) which are based on the open source search
+# engine library Xapian. See the manual for configuration details.
+
+EXTERNAL_SEARCH = NO
+
+# The SEARCHENGINE_URL should point to a search engine hosted by a web server
+# which will returned the search results when EXTERNAL_SEARCH is enabled.
+# Doxygen ships with an example search engine (doxysearch) which is based on
+# the open source search engine library Xapian. See the manual for configuration
+# details.
+
+SEARCHENGINE_URL =
+
+# When SERVER_BASED_SEARCH and EXTERNAL_SEARCH are both enabled the unindexed
+# search data is written to a file for indexing by an external tool. With the
+# SEARCHDATA_FILE tag the name of this file can be specified.
+
+SEARCHDATA_FILE = searchdata.xml
+
+# When SERVER_BASED_SEARCH AND EXTERNAL_SEARCH are both enabled the
+# EXTERNAL_SEARCH_ID tag can be used as an identifier for the project. This is
+# useful in combination with EXTRA_SEARCH_MAPPINGS to search through multiple
+# projects and redirect the results back to the right project.
+
+EXTERNAL_SEARCH_ID =
+
+# The EXTRA_SEARCH_MAPPINGS tag can be used to enable searching through doxygen
+# projects other than the one defined by this configuration file, but that are
+# all added to the same external search index. Each project needs to have a
+# unique id set via EXTERNAL_SEARCH_ID. The search mapping then maps the id
+# of to a relative location where the documentation can be found.
+# The format is: EXTRA_SEARCH_MAPPINGS = id1=loc1 id2=loc2 ...
+
+EXTRA_SEARCH_MAPPINGS =
+
+#---------------------------------------------------------------------------
+# configuration options related to the LaTeX output
+#---------------------------------------------------------------------------
+
+# If the GENERATE_LATEX tag is set to YES (the default) Doxygen will
+# generate Latex output.
+
+GENERATE_LATEX = YES
+
+# The LATEX_OUTPUT tag is used to specify where the LaTeX docs will be put.
+# If a relative path is entered the value of OUTPUT_DIRECTORY will be
+# put in front of it. If left blank `latex' will be used as the default path.
+
+LATEX_OUTPUT = latex
+
+# The LATEX_CMD_NAME tag can be used to specify the LaTeX command name to be
+# invoked. If left blank `latex' will be used as the default command name.
+# Note that when enabling USE_PDFLATEX this option is only used for
+# generating bitmaps for formulas in the HTML output, but not in the
+# Makefile that is written to the output directory.
+
+LATEX_CMD_NAME = latex
+
+# The MAKEINDEX_CMD_NAME tag can be used to specify the command name to
+# generate index for LaTeX. If left blank `makeindex' will be used as the
+# default command name.
+
+MAKEINDEX_CMD_NAME = makeindex
+
+# If the COMPACT_LATEX tag is set to YES Doxygen generates more compact
+# LaTeX documents. This may be useful for small projects and may help to
+# save some trees in general.
+
+COMPACT_LATEX = NO
+
+# The PAPER_TYPE tag can be used to set the paper type that is used
+# by the printer. Possible values are: a4, letter, legal and
+# executive. If left blank a4 will be used.
+
+PAPER_TYPE = a4
+
+# The EXTRA_PACKAGES tag can be to specify one or more names of LaTeX
+# packages that should be included in the LaTeX output.
+
+EXTRA_PACKAGES = amsmath \
+ mathtools
+
+# The LATEX_HEADER tag can be used to specify a personal LaTeX header for
+# the generated latex document. The header should contain everything until
+# the first chapter. If it is left blank doxygen will generate a
+# standard header. Notice: only use this tag if you know what you are doing!
+
+LATEX_HEADER =
+
+# The LATEX_FOOTER tag can be used to specify a personal LaTeX footer for
+# the generated latex document. The footer should contain everything after
+# the last chapter. If it is left blank doxygen will generate a
+# standard footer. Notice: only use this tag if you know what you are doing!
+
+LATEX_FOOTER =
+
+# The LATEX_EXTRA_FILES tag can be used to specify one or more extra images
+# or other source files which should be copied to the LaTeX output directory.
+# Note that the files will be copied as-is; there are no commands or markers
+# available.
+
+LATEX_EXTRA_FILES =
+
+# If the PDF_HYPERLINKS tag is set to YES, the LaTeX that is generated
+# is prepared for conversion to pdf (using ps2pdf). The pdf file will
+# contain links (just like the HTML output) instead of page references
+# This makes the output suitable for online browsing using a pdf viewer.
+
+PDF_HYPERLINKS = YES
+
+# If the USE_PDFLATEX tag is set to YES, pdflatex will be used instead of
+# plain latex in the generated Makefile. Set this option to YES to get a
+# higher quality PDF documentation.
+
+USE_PDFLATEX = YES
+
+# If the LATEX_BATCHMODE tag is set to YES, doxygen will add the \\batchmode.
+# command to the generated LaTeX files. This will instruct LaTeX to keep
+# running if errors occur, instead of asking the user for help.
+# This option is also used when generating formulas in HTML.
+
+LATEX_BATCHMODE = NO
+
+# If LATEX_HIDE_INDICES is set to YES then doxygen will not
+# include the index chapters (such as File Index, Compound Index, etc.)
+# in the output.
+
+LATEX_HIDE_INDICES = NO
+
+# If LATEX_SOURCE_CODE is set to YES then doxygen will include
+# source code with syntax highlighting in the LaTeX output.
+# Note that which sources are shown also depends on other settings
+# such as SOURCE_BROWSER.
+
+LATEX_SOURCE_CODE = NO
+
+# The LATEX_BIB_STYLE tag can be used to specify the style to use for the
+# bibliography, e.g. plainnat, or ieeetr. The default style is "plain". See
+# http://en.wikipedia.org/wiki/BibTeX for more info.
+
+LATEX_BIB_STYLE = plain
+
+#---------------------------------------------------------------------------
+# configuration options related to the RTF output
+#---------------------------------------------------------------------------
+
+# If the GENERATE_RTF tag is set to YES Doxygen will generate RTF output
+# The RTF output is optimized for Word 97 and may not look very pretty with
+# other RTF readers or editors.
+
+GENERATE_RTF = NO
+
+# The RTF_OUTPUT tag is used to specify where the RTF docs will be put.
+# If a relative path is entered the value of OUTPUT_DIRECTORY will be
+# put in front of it. If left blank `rtf' will be used as the default path.
+
+RTF_OUTPUT = rtf
+
+# If the COMPACT_RTF tag is set to YES Doxygen generates more compact
+# RTF documents. This may be useful for small projects and may help to
+# save some trees in general.
+
+COMPACT_RTF = NO
+
+# If the RTF_HYPERLINKS tag is set to YES, the RTF that is generated
+# will contain hyperlink fields. The RTF file will
+# contain links (just like the HTML output) instead of page references.
+# This makes the output suitable for online browsing using WORD or other
+# programs which support those fields.
+# Note: wordpad (write) and others do not support links.
+
+RTF_HYPERLINKS = NO
+
+# Load style sheet definitions from file. Syntax is similar to doxygen's
+# config file, i.e. a series of assignments. You only have to provide
+# replacements, missing definitions are set to their default value.
+
+RTF_STYLESHEET_FILE =
+
+# Set optional variables used in the generation of an rtf document.
+# Syntax is similar to doxygen's config file.
+
+RTF_EXTENSIONS_FILE =
+
+#---------------------------------------------------------------------------
+# configuration options related to the man page output
+#---------------------------------------------------------------------------
+
+# If the GENERATE_MAN tag is set to YES (the default) Doxygen will
+# generate man pages
+
+GENERATE_MAN = NO
+
+# The MAN_OUTPUT tag is used to specify where the man pages will be put.
+# If a relative path is entered the value of OUTPUT_DIRECTORY will be
+# put in front of it. If left blank `man' will be used as the default path.
+
+MAN_OUTPUT = man
+
+# The MAN_EXTENSION tag determines the extension that is added to
+# the generated man pages (default is the subroutine's section .3)
+
+MAN_EXTENSION = .3
+
+# If the MAN_LINKS tag is set to YES and Doxygen generates man output,
+# then it will generate one additional man file for each entity
+# documented in the real man page(s). These additional files
+# only source the real man page, but without them the man command
+# would be unable to find the correct page. The default is NO.
+
+MAN_LINKS = NO
+
+#---------------------------------------------------------------------------
+# configuration options related to the XML output
+#---------------------------------------------------------------------------
+
+# If the GENERATE_XML tag is set to YES Doxygen will
+# generate an XML file that captures the structure of
+# the code including all documentation.
+
+GENERATE_XML = NO
+
+# The XML_OUTPUT tag is used to specify where the XML pages will be put.
+# If a relative path is entered the value of OUTPUT_DIRECTORY will be
+# put in front of it. If left blank `xml' will be used as the default path.
+
+XML_OUTPUT = xml
+
+# The XML_SCHEMA tag can be used to specify an XML schema,
+# which can be used by a validating XML parser to check the
+# syntax of the XML files.
+
+XML_SCHEMA =
+
+# The XML_DTD tag can be used to specify an XML DTD,
+# which can be used by a validating XML parser to check the
+# syntax of the XML files.
+
+XML_DTD =
+
+# If the XML_PROGRAMLISTING tag is set to YES Doxygen will
+# dump the program listings (including syntax highlighting
+# and cross-referencing information) to the XML output. Note that
+# enabling this will significantly increase the size of the XML output.
+
+XML_PROGRAMLISTING = YES
+
+#---------------------------------------------------------------------------
+# configuration options related to the DOCBOOK output
+#---------------------------------------------------------------------------
+
+# If the GENERATE_DOCBOOK tag is set to YES Doxygen will generate DOCBOOK files
+# that can be used to generate PDF.
+
+GENERATE_DOCBOOK = NO
+
+# The DOCBOOK_OUTPUT tag is used to specify where the DOCBOOK pages will be put.
+# If a relative path is entered the value of OUTPUT_DIRECTORY will be put in
+# front of it. If left blank docbook will be used as the default path.
+
+DOCBOOK_OUTPUT = docbook
+
+#---------------------------------------------------------------------------
+# configuration options for the AutoGen Definitions output
+#---------------------------------------------------------------------------
+
+# If the GENERATE_AUTOGEN_DEF tag is set to YES Doxygen will
+# generate an AutoGen Definitions (see autogen.sf.net) file
+# that captures the structure of the code including all
+# documentation. Note that this feature is still experimental
+# and incomplete at the moment.
+
+GENERATE_AUTOGEN_DEF = NO
+
+#---------------------------------------------------------------------------
+# configuration options related to the Perl module output
+#---------------------------------------------------------------------------
+
+# If the GENERATE_PERLMOD tag is set to YES Doxygen will
+# generate a Perl module file that captures the structure of
+# the code including all documentation. Note that this
+# feature is still experimental and incomplete at the
+# moment.
+
+GENERATE_PERLMOD = NO
+
+# If the PERLMOD_LATEX tag is set to YES Doxygen will generate
+# the necessary Makefile rules, Perl scripts and LaTeX code to be able
+# to generate PDF and DVI output from the Perl module output.
+
+PERLMOD_LATEX = NO
+
+# If the PERLMOD_PRETTY tag is set to YES the Perl module output will be
+# nicely formatted so it can be parsed by a human reader.
+# This is useful
+# if you want to understand what is going on.
+# On the other hand, if this
+# tag is set to NO the size of the Perl module output will be much smaller
+# and Perl will parse it just the same.
+
+PERLMOD_PRETTY = YES
+
+# The names of the make variables in the generated doxyrules.make file
+# are prefixed with the string contained in PERLMOD_MAKEVAR_PREFIX.
+# This is useful so different doxyrules.make files included by the same
+# Makefile don't overwrite each other's variables.
+
+PERLMOD_MAKEVAR_PREFIX =
+
+#---------------------------------------------------------------------------
+# Configuration options related to the preprocessor
+#---------------------------------------------------------------------------
+
+# If the ENABLE_PREPROCESSING tag is set to YES (the default) Doxygen will
+# evaluate all C-preprocessor directives found in the sources and include
+# files.
+
+ENABLE_PREPROCESSING = YES
+
+# If the MACRO_EXPANSION tag is set to YES Doxygen will expand all macro
+# names in the source code. If set to NO (the default) only conditional
+# compilation will be performed. Macro expansion can be done in a controlled
+# way by setting EXPAND_ONLY_PREDEF to YES.
+
+MACRO_EXPANSION = NO
+
+# If the EXPAND_ONLY_PREDEF and MACRO_EXPANSION tags are both set to YES
+# then the macro expansion is limited to the macros specified with the
+# PREDEFINED and EXPAND_AS_DEFINED tags.
+
+EXPAND_ONLY_PREDEF = NO
+
+# If the SEARCH_INCLUDES tag is set to YES (the default) the includes files
+# pointed to by INCLUDE_PATH will be searched when a #include is found.
+
+SEARCH_INCLUDES = YES
+
+# The INCLUDE_PATH tag can be used to specify one or more directories that
+# contain include files that are not input files but should be processed by
+# the preprocessor.
+
+INCLUDE_PATH =
+
+# You can use the INCLUDE_FILE_PATTERNS tag to specify one or more wildcard
+# patterns (like *.h and *.hpp) to filter out the header-files in the
+# directories. If left blank, the patterns specified with FILE_PATTERNS will
+# be used.
+
+INCLUDE_FILE_PATTERNS =
+
+# The PREDEFINED tag can be used to specify one or more macro names that
+# are defined before the preprocessor is started (similar to the -D option of
+# gcc). The argument of the tag is a list of macros of the form: name
+# or name=definition (no spaces). If the definition and the = are
+# omitted =1 is assumed. To prevent a macro definition from being
+# undefined via #undef or recursively expanded use the := operator
+# instead of the = operator.
+
+PREDEFINED =
+
+# If the MACRO_EXPANSION and EXPAND_ONLY_PREDEF tags are set to YES then
+# this tag can be used to specify a list of macro names that should be expanded.
+# The macro definition that is found in the sources will be used.
+# Use the PREDEFINED tag if you want to use a different macro definition that
+# overrules the definition found in the source code.
+
+EXPAND_AS_DEFINED =
+
+# If the SKIP_FUNCTION_MACROS tag is set to YES (the default) then
+# doxygen's preprocessor will remove all references to function-like macros
+# that are alone on a line, have an all uppercase name, and do not end with a
+# semicolon, because these will confuse the parser if not removed.
+
+SKIP_FUNCTION_MACROS = YES
+
+#---------------------------------------------------------------------------
+# Configuration::additions related to external references
+#---------------------------------------------------------------------------
+
+# The TAGFILES option can be used to specify one or more tagfiles. For each
+# tag file the location of the external documentation should be added. The
+# format of a tag file without this location is as follows:
+#
+# TAGFILES = file1 file2 ...
+# Adding location for the tag files is done as follows:
+#
+# TAGFILES = file1=loc1 "file2 = loc2" ...
+# where "loc1" and "loc2" can be relative or absolute paths
+# or URLs. Note that each tag file must have a unique name (where the name does
+# NOT include the path). If a tag file is not located in the directory in which
+# doxygen is run, you must also specify the path to the tagfile here.
+
+TAGFILES =
+
+# When a file name is specified after GENERATE_TAGFILE, doxygen will create
+# a tag file that is based on the input files it reads.
+
+GENERATE_TAGFILE =
+
+# If the ALLEXTERNALS tag is set to YES all external classes will be listed
+# in the class index. If set to NO only the inherited external classes
+# will be listed.
+
+ALLEXTERNALS = NO
+
+# If the EXTERNAL_GROUPS tag is set to YES all external groups will be listed
+# in the modules index. If set to NO, only the current project's groups will
+# be listed.
+
+EXTERNAL_GROUPS = YES
+
+# If the EXTERNAL_PAGES tag is set to YES all external pages will be listed
+# in the related pages index. If set to NO, only the current project's
+# pages will be listed.
+
+EXTERNAL_PAGES = YES
+
+# The PERL_PATH should be the absolute path and name of the perl script
+# interpreter (i.e. the result of `which perl').
+
+PERL_PATH = /usr/bin/perl
+
+#---------------------------------------------------------------------------
+# Configuration options related to the dot tool
+#---------------------------------------------------------------------------
+
+# If the CLASS_DIAGRAMS tag is set to YES (the default) Doxygen will
+# generate a inheritance diagram (in HTML, RTF and LaTeX) for classes with base
+# or super classes. Setting the tag to NO turns the diagrams off. Note that
+# this option also works with HAVE_DOT disabled, but it is recommended to
+# install and use dot, since it yields more powerful graphs.
+
+CLASS_DIAGRAMS = YES
+
+# You can define message sequence charts within doxygen comments using the \msc
+# command. Doxygen will then run the mscgen tool (see
+# http://www.mcternan.me.uk/mscgen/) to produce the chart and insert it in the
+# documentation. The MSCGEN_PATH tag allows you to specify the directory where
+# the mscgen tool resides. If left empty the tool is assumed to be found in the
+# default search path.
+
+MSCGEN_PATH =
+
+# If set to YES, the inheritance and collaboration graphs will hide
+# inheritance and usage relations if the target is undocumented
+# or is not a class.
+
+HIDE_UNDOC_RELATIONS = YES
+
+# If you set the HAVE_DOT tag to YES then doxygen will assume the dot tool is
+# available from the path. This tool is part of Graphviz, a graph visualization
+# toolkit from AT&T and Lucent Bell Labs. The other options in this section
+# have no effect if this option is set to NO (the default)
+
+HAVE_DOT = YES
+
+# The DOT_NUM_THREADS specifies the number of dot invocations doxygen is
+# allowed to run in parallel. When set to 0 (the default) doxygen will
+# base this on the number of processors available in the system. You can set it
+# explicitly to a value larger than 0 to get control over the balance
+# between CPU load and processing speed.
+
+DOT_NUM_THREADS = 0
+
+# By default doxygen will use the Helvetica font for all dot files that
+# doxygen generates. When you want a differently looking font you can specify
+# the font name using DOT_FONTNAME. You need to make sure dot is able to find
+# the font, which can be done by putting it in a standard location or by setting
+# the DOTFONTPATH environment variable or by setting DOT_FONTPATH to the
+# directory containing the font.
+
+DOT_FONTNAME = Helvetica
+
+# The DOT_FONTSIZE tag can be used to set the size of the font of dot graphs.
+# The default size is 10pt.
+
+DOT_FONTSIZE = 10
+
+# By default doxygen will tell dot to use the Helvetica font.
+# If you specify a different font using DOT_FONTNAME you can use DOT_FONTPATH to
+# set the path where dot can find it.
+
+DOT_FONTPATH =
+
+# If the CLASS_GRAPH and HAVE_DOT tags are set to YES then doxygen
+# will generate a graph for each documented class showing the direct and
+# indirect inheritance relations. Setting this tag to YES will force the
+# CLASS_DIAGRAMS tag to NO.
+
+CLASS_GRAPH = YES
+
+# If the COLLABORATION_GRAPH and HAVE_DOT tags are set to YES then doxygen
+# will generate a graph for each documented class showing the direct and
+# indirect implementation dependencies (inheritance, containment, and
+# class references variables) of the class with other documented classes.
+
+COLLABORATION_GRAPH = YES
+
+# If the GROUP_GRAPHS and HAVE_DOT tags are set to YES then doxygen
+# will generate a graph for groups, showing the direct groups dependencies
+
+GROUP_GRAPHS = YES
+
+# If the UML_LOOK tag is set to YES doxygen will generate inheritance and
+# collaboration diagrams in a style similar to the OMG's Unified Modeling
+# Language.
+
+UML_LOOK = NO
+
+# If the UML_LOOK tag is enabled, the fields and methods are shown inside
+# the class node. If there are many fields or methods and many nodes the
+# graph may become too big to be useful. The UML_LIMIT_NUM_FIELDS
+# threshold limits the number of items for each type to make the size more
+# manageable. Set this to 0 for no limit. Note that the threshold may be
+# exceeded by 50% before the limit is enforced.
+
+UML_LIMIT_NUM_FIELDS = 10
+
+# If set to YES, the inheritance and collaboration graphs will show the
+# relations between templates and their instances.
+
+TEMPLATE_RELATIONS = NO
+
+# If the ENABLE_PREPROCESSING, SEARCH_INCLUDES, INCLUDE_GRAPH, and HAVE_DOT
+# tags are set to YES then doxygen will generate a graph for each documented
+# file showing the direct and indirect include dependencies of the file with
+# other documented files.
+
+INCLUDE_GRAPH = YES
+
+# If the ENABLE_PREPROCESSING, SEARCH_INCLUDES, INCLUDED_BY_GRAPH, and
+# HAVE_DOT tags are set to YES then doxygen will generate a graph for each
+# documented header file showing the documented files that directly or
+# indirectly include this file.
+
+INCLUDED_BY_GRAPH = YES
+
+# If the CALL_GRAPH and HAVE_DOT options are set to YES then
+# doxygen will generate a call dependency graph for every global function
+# or class method. Note that enabling this option will significantly increase
+# the time of a run. So in most cases it will be better to enable call graphs
+# for selected functions only using the \callgraph command.
+
+CALL_GRAPH = NO
+
+# If the CALLER_GRAPH and HAVE_DOT tags are set to YES then
+# doxygen will generate a caller dependency graph for every global function
+# or class method. Note that enabling this option will significantly increase
+# the time of a run. So in most cases it will be better to enable caller
+# graphs for selected functions only using the \callergraph command.
+
+CALLER_GRAPH = NO
+
+# If the GRAPHICAL_HIERARCHY and HAVE_DOT tags are set to YES then doxygen
+# will generate a graphical hierarchy of all classes instead of a textual one.
+
+GRAPHICAL_HIERARCHY = YES
+
+# If the DIRECTORY_GRAPH and HAVE_DOT tags are set to YES
+# then doxygen will show the dependencies a directory has on other directories
+# in a graphical way. The dependency relations are determined by the #include
+# relations between the files in the directories.
+
+DIRECTORY_GRAPH = YES
+
+# The DOT_IMAGE_FORMAT tag can be used to set the image format of the images
+# generated by dot. Possible values are svg, png, jpg, or gif.
+# If left blank png will be used. If you choose svg you need to set
+# HTML_FILE_EXTENSION to xhtml in order to make the SVG files
+# visible in IE 9+ (other browsers do not have this requirement).
+
+DOT_IMAGE_FORMAT = png
+
+# If DOT_IMAGE_FORMAT is set to svg, then this option can be set to YES to
+# enable generation of interactive SVG images that allow zooming and panning.
+# Note that this requires a modern browser other than Internet Explorer.
+# Tested and working are Firefox, Chrome, Safari, and Opera. For IE 9+ you
+# need to set HTML_FILE_EXTENSION to xhtml in order to make the SVG files
+# visible. Older versions of IE do not have SVG support.
+
+INTERACTIVE_SVG = NO
+
+# The tag DOT_PATH can be used to specify the path where the dot tool can be
+# found. If left blank, it is assumed the dot tool can be found in the path.
+
+DOT_PATH =
+
+# The DOTFILE_DIRS tag can be used to specify one or more directories that
+# contain dot files that are included in the documentation (see the
+# \dotfile command).
+
+DOTFILE_DIRS =
+
+# The MSCFILE_DIRS tag can be used to specify one or more directories that
+# contain msc files that are included in the documentation (see the
+# \mscfile command).
+
+MSCFILE_DIRS =
+
+# The DOT_GRAPH_MAX_NODES tag can be used to set the maximum number of
+# nodes that will be shown in the graph. If the number of nodes in a graph
+# becomes larger than this value, doxygen will truncate the graph, which is
+# visualized by representing a node as a red box. Note that doxygen if the
+# number of direct children of the root node in a graph is already larger than
+# DOT_GRAPH_MAX_NODES then the graph will not be shown at all. Also note
+# that the size of a graph can be further restricted by MAX_DOT_GRAPH_DEPTH.
+
+DOT_GRAPH_MAX_NODES = 50
+
+# The MAX_DOT_GRAPH_DEPTH tag can be used to set the maximum depth of the
+# graphs generated by dot. A depth value of 3 means that only nodes reachable
+# from the root by following a path via at most 3 edges will be shown. Nodes
+# that lay further from the root node will be omitted. Note that setting this
+# option to 1 or 2 may greatly reduce the computation time needed for large
+# code bases. Also note that the size of a graph can be further restricted by
+# DOT_GRAPH_MAX_NODES. Using a depth of 0 means no depth restriction.
+
+MAX_DOT_GRAPH_DEPTH = 0
+
+# Set the DOT_TRANSPARENT tag to YES to generate images with a transparent
+# background. This is disabled by default, because dot on Windows does not
+# seem to support this out of the box. Warning: Depending on the platform used,
+# enabling this option may lead to badly anti-aliased labels on the edges of
+# a graph (i.e. they become hard to read).
+
+DOT_TRANSPARENT = NO
+
+# Set the DOT_MULTI_TARGETS tag to YES allow dot to generate multiple output
+# files in one run (i.e. multiple -o and -T options on the command line). This
+# makes dot run faster, but since only newer versions of dot (>1.8.10)
+# support this, this feature is disabled by default.
+
+DOT_MULTI_TARGETS = YES
+
+# If the GENERATE_LEGEND tag is set to YES (the default) Doxygen will
+# generate a legend page explaining the meaning of the various boxes and
+# arrows in the dot generated graphs.
+
+GENERATE_LEGEND = YES
+
+# If the DOT_CLEANUP tag is set to YES (the default) Doxygen will
+# remove the intermediate dot files that are used to generate
+# the various graphs.
+
+DOT_CLEANUP = YES
diff --git a/doc/mainpage.c b/doc/mainpage.c
new file mode 100644
index 0000000..e3c4057
--- /dev/null
+++ b/doc/mainpage.c
@@ -0,0 +1,23 @@
+/**
+ * @mainpage MSVMMaj C Package Documentation
+ * @author Gertjan van den Burg (add future safe email adres)
+ * @date January, 2014
+ * @version 0.01a
+ *
+ * @section intro_sec Introduction
+ *
+ * This is the introduction
+ *
+ * @section Usage
+ *
+ * Describe the usage of the program here.
+ *
+ * @section License
+ *
+ * Put some license information here.
+ *
+ * @section Reference
+ *
+ * Reference to the paper published. Include BibTeX entry as code block here.
+ *
+ */
diff --git a/doc/specifications.c b/doc/specifications.c
new file mode 100644
index 0000000..5b5a8ae
--- /dev/null
+++ b/doc/specifications.c
@@ -0,0 +1,170 @@
+/**
+ * @page spec_training_file Training Input File Specification
+ *
+ * This page specifies the training file that can be parsed by
+ * read_training_from_file(). Below is an example training file.
+ *
+ * @verbatim
+ train: /path/to/training/dataset.txt
+ test: /path/to/test/dataset.txt
+ p: 1.0 1.5 2.0
+ kappa: -0.9 0.0 1.0
+ lambda: 64 16 4 1 0.25 0.0625 0.015625 0.00390625 0.0009765625 0.000244140625
+ epsilon: 1e-6
+ weight: 1 2
+ folds: 10
+ kernel: LINEAR
+ gamma: 1e-3 1e-1 1e1 1e3
+ coef: 1.0 2.0
+ degree: 2.0 3.0
+ @endverbatim
+ *
+ * Note that with a @c LINEAR kernel specification, the @c gamma, @c coef, and
+ * @c degree parameters do not need to be specified. The above merely shows
+ * all available parameters that can be specified in the grid search. Below
+ * each of the parameters are described in more detail. Arguments followed by
+ * an asterisk are optional.
+ *
+ * @c train: @n
+ * The location of the training dataset file. See @ref spec_data_file for the
+ * specification of a dataset file.
+ *
+ * @c test:* @n
+ * The location of a test dataset file. See @ref spec_data_file for the
+ * specification of a dataset file. This is optional, if specified the
+ * train/test split will be used for training.
+ *
+ * @c p: @n
+ * The values of the @c p parameter of the algorithm to search over. The @c p
+ * parameter is used in the @f$ \ell_p @f$ norm over the Huber weighted scalar
+ * misclassification errors. Note: @f$ 1 \leq p \leq 2 @f$.
+ *
+ * @c kappa: @n
+ * The values of the @c kappa parameter of the algorithm to search over. The
+ * @c kappa parameter is used in the Huber hinge error over the scalar
+ * misclassification errors. Note: @f$ \kappa > -1 @f$.
+ *
+ * @c lambda: @n
+ * The values of the @c lambda parameter of the algorithm to search over. The
+ * @c lambda parameter is used in the regularization term of the loss
+ * function. Note: @f$ \lambda > 0 @f$.
+ *
+ * @c epsilon: @n
+ * The values of the @c epsilon parameter of the algorithm to search over. The
+ * @c epsilon parameter is used as the stopping parameter in the majorization
+ * algorithm. Note that it often suffices to use only one epsilon value. Using
+ * more than one value increases the size of the grid search considerably.
+ *
+ * @c weight: @n
+ * The weight specifications for the algorithm to use. Two weight
+ * specifications are implemented: the unit weights (index = 1) and the group
+ * size correction weights (index = 2). See also msvmmaj_initialize_weights().
+ *
+ * @c folds: @n
+ * The number of cross validation folds to use.
+ *
+ * @c kernel:* @n
+ * Kernel to use in training. Only one kernel can be specified. See KernelType
+ * for available kernel functions. Note: if multiple kernel types are
+ * specified on this line, only the last value will be used (see the
+ * implementation of parse_kernel_str() for details). If no kernel is
+ * specified, the @c LINEAR kernel will be used.
+ *
+ * @c gamma:* @n
+ * Gamma parameters for the @c RBF, @c POLY, and @c SIGMOID kernels. This
+ * parameter is only optional if the @c LINEAR kernel is specified. See
+ * msvmmaj_compute_rbf(), msvmmaj_compute_poly(), and
+ * msvmmaj_compute_sigmoid() for kernel specifications.
+ *
+ * @c coef:* @n
+ * Coefficients for the @c POLY and @c SIGMOID kernels. This parameter is only
+ * optional if the @c LINEAR or @c RBF kernels are used. See
+ * msvmmaj_compute_poly() and msvmmaj_compute_sigmoid() for kernel
+ * specifications.
+ *
+ * @c degree:* @n
+ * Degrees to search over in the grid search when the @c POLY kernel is
+ * specified. With other kernel specifications this parameter is unnecessary.
+ * See msvmmaj_compute_poly() for the polynomial kernel specification.
+ *
+ */
+
+
+/**
+ * @page spec_data_file Data File Specification
+ *
+ * This page describes the input file format for a dataset. This specification
+ * is used by msvmmaj_read_data() and msvmmaj_write_predictions(). The data
+ * file specification is the same as that used in <a
+ * href="http://www.loria.fr/~lauer/MSVMpack/MSVMpack.html">MSVMpack</a>
+ * (verified in v. 1.3).
+ *
+ * The file is expected to be as follows
+ * @verbatim
+n
+m
+x_11 x_12 ... x_1m y_1
+x_21 x_22 ... x_2m y_2
+...
+x_n1 x_n2 ... x_nm y_n
+@endverbatim
+ *
+ * Here, @c n denotes the number of instances and @c m denotes the number of
+ * predictors. The class labels @c y_i are expected in the final column of
+ * each line.
+ *
+ * As an example, below the first 5 lines of the iris dataset are shown.
+ *
+ * @verbatim
+150
+4
+5.10000 3.50000 1.40000 0.20000 1.00000
+4.90000 3.00000 1.40000 0.20000 1.00000
+4.70000 3.20000 1.30000 0.20000 1.00000
+@endverbatim
+ *
+ */
+
+/**
+ * @page spec_model_file Model File Specification
+ *
+ * This page describes the input file format for a MajModel. This
+ * specification is used by msvmmaj_read_model() and msvmmaj_write_model().
+ * The model file is designed to fully reproduce a MajModel.
+ *
+ * The model output file follows the format
+ * @verbatim
+Output file for MSVMMaj (version 0.1)
+Generated on: Tue Jan 14 12:00:00 2014 (UTC +01:00)
+
+Model:
+p = 2.00
+lambda = 0.001
+kappa = 1.0
+epsilon = 1e-06
+weight_idx = 1
+
+Data:
+filename = /path/to/data_file.txt
+n = 150
+m = 4
+K = 3
+
+Output:
+-0.7693429935131153 -1.9335141926875414
++0.3425555992439160 +1.0939198172438194
++0.3100589593140404 +0.9872012663780092
++0.1319873613546321 +0.1207806485439152
++0.8052481376988456 +0.6507524553955120
+@endverbatim
+ *
+ * The first two lines of the file mainly serve a logging purpose, and are
+ * ignored when reading the model file. The model section fully describes the
+ * model parameters. Next, the data section describes the data file that was
+ * used in training and the size of the dataset. Finally, the output section
+ * shows the augmented weight matrix MajModel::V, in row-major order.
+ *
+ * @todo
+ * Write kernel specification to model file as well and adjust the format
+ * above.
+ */
diff --git a/include/MSVMMaj.h b/include/MSVMMaj.h
deleted file mode 100644
index de99f91..0000000
--- a/include/MSVMMaj.h
+++ /dev/null
@@ -1,46 +0,0 @@
-#ifndef MSVMMAJ_H
-#define MSVMMAJ_H
-
-#include "globals.h"
-#include "types.h"
-
-/*
- Model structure
-*/
-struct MajModel {
- int weight_idx;
- long K;
- long n;
- long m;
- double epsilon;
- double p;
- double kappa;
- double lambda;
- double *W;
- double *t;
- double *V;
- double *Vbar;
- double *U;
- double *UU;
- double *Q;
- double *H;
- double *R;
- double *rho;
- double training_error;
- char *data_file;
- KernelType kerneltype;
- double *kernelparam;
-};
-
-/*
- Data structure
-*/
-struct MajData {
- long K;
- long n;
- long m;
- long *y;
- double *Z;
-};
-
-#endif
diff --git a/include/crossval.h b/include/crossval.h
index 0794622..0dff0b9 100644
--- a/include/crossval.h
+++ b/include/crossval.h
@@ -1,3 +1,15 @@
+/**
+ * @file crossval.h
+ * @author Gertjan van den Burg
+ * @date January, 2014
+ * @brief Header file for crossval.c
+ *
+ * @details
+ * Contains function declarations for functions needed for performing cross
+ * validation on MajData structures.
+ *
+ */
+
#ifndef CROSSVAL_H
#define CROSSVAL_H
diff --git a/include/globals.h b/include/globals.h
index 8420f76..55fb6c4 100644
--- a/include/globals.h
+++ b/include/globals.h
@@ -1,5 +1,23 @@
-#ifndef GLOBALS_H
-#define GLOBALS_H
+/**
+ * @file globals.h
+ * @author Gertjan van den Burg
+ * @date January, 2014
+ * @brief Global definitions
+ *
+ * @details
+ * This header file contains defines and includes which are used in many
+ * parts of the program. Most notable are the Calloc, Malloc and Memset
+ * defines, which are commonly used to allocate memory. These functions
+ * are shorthands for their lowercase counterparts.
+ *
+ * Furthermore, a maximum and minimum function are defined here. These
+ * functions have their own include guards, to ensure potential linked
+ * libraries don't conflict with these definitions.
+ *
+ */
+
+#ifndef MSVMMAJ_GLOBALS_H
+#define MSVMMAJ_GLOBALS_H
#include <stdio.h>
#include <stdlib.h>
diff --git a/include/kernel.h b/include/kernel.h
deleted file mode 100644
index ac5c35d..0000000
--- a/include/kernel.h
+++ /dev/null
@@ -1,11 +0,0 @@
-#ifndef KERNEL_H
-#define KERNEL_H
-
-#include "globals.h"
-#include "types.h"
-
-// forward declarations
-struct MajData;
-
-// function declarations
-
diff --git a/include/libMSVMMaj.h b/include/libMSVMMaj.h
index 21efc2f..b7261dc 100644
--- a/include/libMSVMMaj.h
+++ b/include/libMSVMMaj.h
@@ -1,3 +1,20 @@
+/**
+ * @file libMSVMMaj.h
+ * @author Gertjan van den Burg
+ * @date August, 2013
+ * @brief Header file for the core MSVMMaj library libMSVMMaj.c
+ *
+ * @details
+ * The core computational routines for MSVMMaj are defined in libMSVMMaj.c.
+ * This file contains function declarations for these functions.
+ *
+ */
+
+/**
+ * @todo
+ * rename this file and libMSVMMaj.c to correspond with the lowercase convention.
+ * Also change the name of the include guard.
+ */
#ifndef LIBMSVMMAJ_H
#define LIBMSVMMAJ_H
diff --git a/include/msvmmaj.h b/include/msvmmaj.h
new file mode 100644
index 0000000..d67ad8b
--- /dev/null
+++ b/include/msvmmaj.h
@@ -0,0 +1,98 @@
+/**
+ * @file msvmmaj.h
+ * @author Gertjan van den Burg
+ * @date August, 2013
+ * @brief Definitions for common structures
+ *
+ * @details
+ * Contains documentation and declarations of MajModel and MajData.
+ *
+ */
+
+#ifndef MSVMMAJ_H
+#define MSVMMAJ_H
+
+#include "globals.h"
+#include "types.h"
+
+/**
+ * @brief A structure to represent a single MSVMMaj model.
+ *
+ * @param weight_idx which weights to use (1 = unit, 2 = group)
+ * @param K number of classes in the dataset
+ * @param n number of instances in the dataset
+ * @param m number of predictors in the dataset
+ * @param epsilon stopping criterion
+ * @param p parameter for the L_p norm
+ * @param kappa parameter for the Huber hinge
+ * @param lambda regularization parameter
+ * @param *W pointer to the weight matrix
+ * @param *t pointer to the translation vector
+ * @param *V pointer to the augmented weight matrix
+ * @param *Vbar pointer to the augmented weight matrix from a
+ * previous iteration
+ * @param *U pointer to the simplex matrix
+ * @param *UU pointer to the 3D simplex difference matrix
+ * @param *Q pointer to the error matrix
+ * @param *H pointer to the Huber weighted error matrix
+ * @param *R pointer to the 0-1 auxiliary matrix
+ * @param *rho pointer to the instance weight vector
+ * @param training_error error after training has completed
+ * @param *data_file pointer to the filename of the data
+ * @param kerneltype kernel to be used in the model
+ * @param kernelparam pointer to the vector of kernel parameters
+ * @param use_cholesky whether the Cholesky decomposition should be
+ * used
+ *
+ */
+struct MajModel {
+ int weight_idx;
+ long K;
+ long n;
+ long m;
+ double epsilon;
+ double p;
+ double kappa;
+ double lambda;
+ double *W;
+ double *t;
+ double *V;
+ double *Vbar;
+ double *U;
+ double *UU;
+ double *Q;
+ double *H;
+ double *R;
+ double *rho;
+ double training_error;
+ char *data_file;
+ KernelType kerneltype;
+ double *kernelparam;
+ bool use_cholesky;
+};
+
+/**
+ * @brief A structure to represent the data.
+ *
+ * @param K number of classes
+ * @param n number of instances
+ * @param m number of predictors
+ * @param *y pointer to vector of class labels
+ * @param *Z pointer to augmented data matrix
+ * @param kerneltype kerneltype used in MajData::Z
+ * @param *kernelparam kernel parameters used in MajData::Z
+ * @param use_cholesky whether the Cholesky decomposition is used in MajData::Z
+ *
+ */
+struct MajData {
+ long K;
+ long n;
+ long m;
+ long *y;
+ double *Z;
+ KernelType kerneltype;
+ double *kernelparam;
+ bool use_cholesky;
+};
+
+#endif
diff --git a/include/msvmmaj_init.h b/include/msvmmaj_init.h
new file mode 100644
index 0000000..6e2e36f
--- /dev/null
+++ b/include/msvmmaj_init.h
@@ -0,0 +1,23 @@
+/**
+ * @file msvmmaj_init.h
+ * @author Gertjan van den Burg
+ * @date January, 2014
+ * @brief Header file for msvmmaj_init.c
+ *
+ * @details
+ * Contains function declarations for the initialization functions for
+ * MajModel and MajData structures.
+ */
+
+#ifndef MSVMMAJ_INIT_H
+#define MSVMMAJ_INIT_H
+
+// forward declaration
+struct MajData;
+struct MajModel;
+
+struct MajModel *msvmmaj_init_model();
+
+struct MajData *msvmmaj_init_data();
+
+#endif
diff --git a/include/msvmmaj_kernel.h b/include/msvmmaj_kernel.h
new file mode 100644
index 0000000..69bf267
--- /dev/null
+++ b/include/msvmmaj_kernel.h
@@ -0,0 +1,32 @@
+/**
+ * @file msvmmaj_kernel.h
+ * @author Gertjan van den Burg
+ * @date January, 2014
+ * @brief Header file for kernel functionality
+ *
+ * @details
+ * Contains function declarations for computing the kernel matrix
+ * in nonlinear MSVMMaj. Additional kernel functions should be
+ * included here and in msvmmaj_kernel.c
+ *
+ */
+
+#ifndef MSVMMAJ_KERNEL_H
+#define MSVMMAJ_KERNEL_H
+
+#include "globals.h"
+
+// forward declarations
+struct MajData;
+struct MajModel;
+
+// function declarations
+void msvmmaj_make_kernel(struct MajModel *model, struct MajData *data);
+
+double msvmmaj_compute_rbf(double *x1, double *x2, double *kernelparam,
+ long n);
+double msvmmaj_compute_poly(double *x1, double *x2, double *kernelparam,
+ long n);
+double msvmmaj_compute_sigmoid(double *x1, double *x2, double *kernelparam,
+ long n);
+#endif
diff --git a/include/msvmmaj_lapack.h b/include/msvmmaj_lapack.h
new file mode 100644
index 0000000..766a475
--- /dev/null
+++ b/include/msvmmaj_lapack.h
@@ -0,0 +1,23 @@
+/**
+ * @file msvmmaj_lapack.h
+ * @author Gertjan van den Burg
+ * @date August, 2013
+ * @brief Header file for msvmmaj_lapack.c
+ *
+ * @details
+ * Function declarations for external LAPACK functions
+ *
+ */
+
+#ifndef MSVMMAJ_LAPACK_H
+#define MSVMMAJ_LAPACK_H
+
+#include "globals.h"
+
+int dposv(char UPLO, int N, int NRHS, double *A, int LDA, double *B,
+ int LDB);
+int dsysv(char UPLO, int N, int NRHS, double *A, int LDA, int *IPIV,
+ double *B, int LDB, double *WORK, int LWORK);
+int dpotrf(char UPLO, int N, double *A, int LDA);
+
+#endif
diff --git a/include/matrix.h b/include/msvmmaj_matrix.h
index 5f0a441..8f5ca59 100644
--- a/include/matrix.h
+++ b/include/msvmmaj_matrix.h
@@ -1,5 +1,16 @@
-#ifndef MATRIX_H
-#define MATRIX_H
+/**
+ * @file msvmmaj_matrix.h
+ * @author Gertjan van den Burg
+ * @date August, 2013
+ * @brief Header file for msvmmaj_matrix.c
+ *
+ * @details
+ * Contains function declarations for functions useful for dealing with matrices.
+ *
+ */
+
+#ifndef MSVMMAJ_MATRIX_H
+#define MSVMMAJ_MATRIX_H
#include "globals.h"
diff --git a/include/msvmmaj_pred.h b/include/msvmmaj_pred.h
index 952389c..ce22b10 100644
--- a/include/msvmmaj_pred.h
+++ b/include/msvmmaj_pred.h
@@ -1,3 +1,14 @@
+/**
+ * @file msvmmaj_pred.h
+ * @author Gertjan van den Burg
+ * @date August, 2013
+ * @brief Header file for msvmmaj_pred.c
+ *
+ * @details
+ * Contains function declarations for prediction functions.
+ *
+ */
+
#ifndef MSVMMAJ_PRED_H
#define MSVMMAJ_PRED_H
diff --git a/include/msvmmaj_train.h b/include/msvmmaj_train.h
index 4fb198e..835100f 100644
--- a/include/msvmmaj_train.h
+++ b/include/msvmmaj_train.h
@@ -1,3 +1,15 @@
+/**
+ * @file msvmmaj_train.h
+ * @author Gertjan van den Burg
+ * @date August, 2013
+ * @brief Header file for msvmmaj_train.c
+ *
+ * @details
+ * Contains function declarations for functions used to train a single
+ * MajModel.
+ *
+ */
+
#ifndef MSVMMAJ_TRAIN_H
#define MSVMMAJ_TRAIN_H
diff --git a/include/msvmmaj_train_dataset.h b/include/msvmmaj_train_dataset.h
index fdcdb4c..5248b4a 100644
--- a/include/msvmmaj_train_dataset.h
+++ b/include/msvmmaj_train_dataset.h
@@ -1,9 +1,39 @@
+/**
+ * @file msvmmaj_train_dataset.h
+ * @author Gertjan van den Burg
+ * @date August, 2013
+ * @brief Structs and functions necessary for the grid search
+ *
+ * @details
+ * The grid search for the optimal parameters is done through a queue.
+ * This file contains struct definitions for this queue and a single
+ * task in a queue, as well as a structure for the complete training
+ * scheme. Function declarations are also included.
+ *
+ */
+
#ifndef MSVMMAJ_TRAIN_DATASET_H
#define MSVMMAJ_TRAIN_DATASET_H
#include "globals.h"
#include "types.h"
+/**
+ * @brief A structure for a single task in the queue.
+ *
+ * @param folds number of folds in cross validation
+ * @param ID numeric id of the task in the queue
+ * @param weight_idx parameter for the MajModel
+ * @param p parameter for the MajModel
+ * @param kappa parameter for the MajModel
+ * @param lambda parameter for the MajModel
+ * @param epsilon parameter for the MajModel
+ * @param kerneltype parameter for the MajModel
+ * @param *kernel_param parameters for the MajModel
+ * @param *train_data pointer to the training data
+ * @param *test_data pointer to the test data (if any)
+ * @param performance performance after cross validation
+ */
struct Task {
KernelType kerneltype;
int weight_idx;
@@ -19,14 +49,54 @@ struct Task {
double performance;
};
+/**
+ * @brief Simple task queue.
+ *
+ * This struct is basically just an array of pointers to Task instances,
+ * with a length and an index of the current task.
+ *
+ * @param **tasks array of pointers to Task structs
+ * @param N size of task array
+ * @param i index used for keeping track of the queue
+ */
struct Queue {
struct Task **tasks;
long N;
long i;
};
+/**
+ * @brief Structure for describing the entire grid search
+ *
+ * @param traintype type of training to use
+ * @param kerneltype type of kernel to use throughout training
+ * @param repeats number of repeats to be done after the grid
+ * search to find the parameter set with the
+ * most consistent high performance
+ * @param folds number of folds in cross validation
+ * @param Np size of the array of p values
+ * @param Nl size of the array of lambda values
+ * @param Nk size of the array of kappa values
+ * @param Ne size of the array of epsilon values
+ * @param Nw size of the array of weight_idx values
+ * @param Ng size of the array of gamma values
+ * @param Nc size of the array of coef values
+ * @param Nd size of the array of degree values
+ * @param *weight_idxs array of weight_idxs
+ * @param *ps array of p values
+ * @param *lambdas array of lambda values
+ * @param *kappas array of kappa values
+ * @param *epsilons array of epsilon values
+ * @param *gammas array of gamma values
+ * @param *coefs array of coef values
+ * @param *degrees array of degree values
+ * @param *train_data_file filename of train data file
+ * @param *test_data_file filename of test data file
+ *
+ */
struct Training {
TrainType traintype;
+ KernelType kerneltype;
long repeats;
long folds;
long Np;
@@ -34,11 +104,17 @@ struct Training {
long Nk;
long Ne;
long Nw;
+ long Ng;
+ long Nc;
+ long Nd;
int *weight_idxs;
double *ps;
double *lambdas;
double *kappas;
double *epsilons;
+ double *gammas;
+ double *coefs;
+ double *degrees;
char *train_data_file;
char *test_data_file;
};
diff --git a/include/mylapack.h b/include/mylapack.h
deleted file mode 100644
index 4c79e0e..0000000
--- a/include/mylapack.h
+++ /dev/null
@@ -1,11 +0,0 @@
-#ifndef MYLAPACK_H
-#define MYLAPACK_H
-
-#include "globals.h"
-
-int dposv(char UPLO, int N, int NRHS, double *A, int LDA, double *B,
- int LDB);
-int dsysv(char UPLO, int N, int NRHS, double *A, int LDA, int *IPIV,
- double *B, int LDB, double *WORK, int LWORK);
-
-#endif
diff --git a/include/parallel.h b/include/parallel.h
deleted file mode 100644
index 8747347..0000000
--- a/include/parallel.h
+++ /dev/null
@@ -1,13 +0,0 @@
-
-struct Task {
- enum KernelType kernel_type;
- int weight_idx;
- double epsilon;
- double p;
- double kappa;
- double lambda;
- double *kernel_param;
- struct MajData **data;
-}
-
-
diff --git a/include/strutil.h b/include/strutil.h
index 66722ae..740fde1 100644
--- a/include/strutil.h
+++ b/include/strutil.h
@@ -1,3 +1,15 @@
+/**
+ * @file strutil.h
+ * @author Gertjan van den Burg
+ * @date August, 2013
+ * @brief Header file for strutil.c
+ *
+ * @details
+ * Function declarations for useful string functions used in parsing
+ * input files.
+ *
+ */
+
#ifndef STRUTIL_H
#define STRUTIL_H
diff --git a/include/timer.h b/include/timer.h
index 8a737e0..d4d4d23 100644
--- a/include/timer.h
+++ b/include/timer.h
@@ -1,5 +1,16 @@
-#ifndef TIMER_H
-#define TIMER_H
+/**
+ * @file timer.h
+ * @author Gertjan van den Burg
+ * @date August, 2013
+ * @brief Header file for timer.c
+ *
+ * @details
+ * Function declaration for timer function used to measure computation time.
+ *
+ */
+
+#ifndef MSVMMAJ_TIMER_H
+#define MSVMMAJ_TIMER_H
#include "globals.h"
diff --git a/include/types.h b/include/types.h
index b4db8d8..f6d008b 100644
--- a/include/types.h
+++ b/include/types.h
@@ -1,21 +1,41 @@
-#ifndef TYPES_H
-#define TYPES_H
+/**
+ * @file types.h
+ * @author Gertjan van den Burg
+ * @date August, 2013
+ * @brief Definitions of common types
+ *
+ * @details
+ * Here common types used throughout the program are defined.
+ *
+ */
+#ifndef MSVMMAJ_TYPES_H
+#define MSVMMAJ_TYPES_H
+
+/**
+ * @brief Implementation of true and false
+ */
typedef enum {
- false,
- true
+ false=0, /**< false keyword, corresponding to 0. */
+ true=1 /**< true keyword, corresponding to 1. */
} bool;
+/**
+ * @brief type of training used in parameter grid search
+ */
typedef enum {
- CV=0,
- TT=1
+ CV=0, /**< cross validation */
+ TT=1 /**< data with existing train/test split */
} TrainType;
+/**
+ * @brief type of kernel used in training
+ */
typedef enum {
- K_LINEAR=0,
- K_POLY=1,
- K_RBF=2,
- K_SIGMOID=3,
+ K_LINEAR=0, /**< Linear kernel */
+ K_POLY=1, /**< Polynomial kernel */
+ K_RBF=2, /**< RBF kernel */
+ K_SIGMOID=3, /**< Sigmoid kernel */
} KernelType;
#endif
diff --git a/include/util.h b/include/util.h
index facae79..995a927 100644
--- a/include/util.h
+++ b/include/util.h
@@ -1,5 +1,16 @@
-#ifndef UTIL_H
-#define UTIL_H
+/**
+ * @file util.h
+ * @author Gertjan van den Burg
+ * @date August, 2013
+ * @brief Header file for util.c
+ *
+ * @details
+ * Function declarations for utility functions of the program.
+ *
+ */
+
+#ifndef MSVMMAJ_UTIL_H
+#define MSVMMAJ_UTIL_H
#include "globals.h"
diff --git a/src/crossval.c b/src/crossval.c
index 9a3c1cc..10e3051 100644
--- a/src/crossval.c
+++ b/src/crossval.c
@@ -1,7 +1,40 @@
+/**
+ * @file crossval.c
+ * @author Gertjan van den Burg
+ * @date January 7, 2014
+ * @brief Functions for cross validation
+ *
+ * @details
+ * This file contains functions for performing cross validation. The funtion
+ * msvmmaj_make_cv_split() creates a cross validation vector for non-stratified
+ * cross validation. The function msvmmaj_get_tt_split() creates a train and
+ * test dataset from a given dataset and a pre-determined CV partition vector.
+ * See individual function documentation for details.
+ *
+ */
+
#include "crossval.h"
-#include "matrix.h"
-#include "MSVMMaj.h"
+#include "msvmmaj.h"
+#include "msvmmaj_matrix.h"
+/**
+ * @brief Create a cross validation split vector
+ *
+ * @details
+ * A pre-allocated vector of length N is created which can be used to define
+ * cross validation splits. The folds are contain between
+ * @f$ \lfloor N / folds \rfloor @f$ and @f$ \lceil N / folds \rceil @f$
+ * instances. An instance is mapped to a partition randomly until all folds
+ * contain @f$ N \% folds @f$ instances. The zero fold then contains
+ * @f$ N / folds + N \% folds @f$ instances. These remaining @f$ N \% folds @f$
+ * instances are then distributed over the first @f$ N \% folds @f$ folds.
+ *
+ * @param[in] N number of instances
+ * @param[in] folds number of folds
+ * @param[in,out] cv_idx array of size N which contains the fold index
+ * for each observation on exit
+ *
+ */
void msvmmaj_make_cv_split(long N, long folds, long *cv_idx)
{
long i, j, idx;
@@ -30,6 +63,26 @@ void msvmmaj_make_cv_split(long N, long folds, long *cv_idx)
}
}
+
+/**
+ * @brief Create train and test datasets for a CV split
+ *
+ * @details
+ * Given a MajData structure for the full dataset, a previously created
+ * cross validation split vector and a fold index, a training and test dataset
+ * are created.
+ *
+ * @param[in] full_data a MajData structure for the entire
+ * dataset
+ * @param[in,out] train_data an initialized MajData structure which
+ * on exit contains the training dataset
+ * @param[in,out] test_data an initialized MajData structure which
+ * on exit contains the test dataset
+ * @param[in] cv_idx a vector of cv partitions created by
+ * msvmmaj_make_cv_split()
+ * @param[in] fold_idx index of the fold which becomes the
+ * test dataset
+ */
void msvmmaj_get_tt_split(struct MajData *full_data, struct MajData *train_data,
struct MajData *test_data, long *cv_idx, long fold_idx)
{
@@ -67,13 +120,15 @@ void msvmmaj_get_tt_split(struct MajData *full_data, struct MajData *train_data,
test_data->y[k] = full_data->y[i];
for (j=0; j<m+1; j++)
matrix_set(test_data->Z, m+1, k, j,
- matrix_get(full_data->Z, m+1, i, j));
+ matrix_get(full_data->Z, m+1,
+ i, j));
k++;
} else {
train_data->y[l] = full_data->y[i];
for (j=0; j<m+1; j++)
matrix_set(train_data->Z, m+1, l, j,
- matrix_get(full_data->Z, m+1, i, j));
+ matrix_get(full_data->Z, m+1,
+ i, j));
l++;
}
}
diff --git a/src/kernel.c b/src/kernel.c
deleted file mode 100644
index ee64871..0000000
--- a/src/kernel.c
+++ /dev/null
@@ -1,85 +0,0 @@
-/**
- * @file kernel.c
- * @author Gertjan van den Burg (burg@ese.eur.nl)
- * @date October 18, 2013
- * @brief Defines main functions for use of kernels in MSVMMaj.
- *
- * @details
- * Functions for constructing different kernels using user-supplied
- * parameters. Also contains the functions for decomposing the
- * kernel matrix using several decomposition methods.
- *
- */
-#include <math.h>
-
-#include "kernel.h"
-
-void msvmmaj_make_kernel(struct MajModel *model, struct MajData *data)
-{
- switch (model->kerneltype) {
- case K_LINEAR:
- break;
- case K_POLY:
- msvmmaj_make_kernel_poly(model, data);
- break;
- case K_RBF:
- msvmmaj_make_kernel_rbf(model, data);
- break;
- case K_SIGMOID:
- msvmmaj_make_kernel_sigmoid(model, data);
- break;
- }
-}
-
-void msvmmaj_make_kernel_rbf(struct MajModel *model, struct MajData *data)
-{
- long i, j;
- long n = model->n;
- double value;
- double *x1, *x2;
- double *K = Calloc(double, n*(n+1));
-
- for (i=0; i<n; i++) {
- for (j=0; j<n; j++) {
- x1 = &data->Z[i*(data->m+1)+1];
- x2 = &data->Z[j*(data->m+1)+1];
- value = msvmmaj_compute_rbf(x1, x2, model->kernelparam, n);
- matrix_set(K, n+1, i, j+1, value);
- }
- matrix_set(K, n+1, i, 0, 1.0);
- }
-
- free(data->Z);
- data->Z = K;
- data->m = n;
- model->m = n;
-}
-
-/**
- * Implements k(x, z) = exp( -gamma * || x - z ||^2)
- */
-double msvmmaj_compute_rbf(double *x1, double *x2, double *kernelparam, long n)
-{
- long i;
- double value = 0.0;
-
- for (i=0; i<n; i++)
- value += (x1[i] - x2[i]) * (x1[i] - x2[i]);
- value *= -kernelparam[0];
- return exp(value);
-}
-
-/**
- * Implements k(x, z) = (gamma * <x, z> + c)^degree
- */
-double msvmmaj_compute_poly(double *x1, double *x2, double *kernelparam, long n)
-{
- long i;
- double value = 0.0;
- for (i=0; i<n; i++)
- value += x1[i]*x2[i];
- value *= kernelparam[0];
- value += kernelparam[1];
- for (i=1; i<(int kernelparam[2]); i++)
- value *= value;
- :w
diff --git a/src/libMSVMMaj.c b/src/libMSVMMaj.c
index 9544830..a0bef97 100644
--- a/src/libMSVMMaj.c
+++ b/src/libMSVMMaj.c
@@ -1,6 +1,6 @@
/**
* @file libMSVMMaj.c
- * @author Gertjan van den Burg (burg@ese.eur.nl)
+ * @author Gertjan van den Burg
* @date August 8, 2013
* @brief Main functions for the MSVMMaj algorithm
*
@@ -16,24 +16,23 @@
#include <math.h>
#include "libMSVMMaj.h"
-#include "MSVMMaj.h"
-#include "matrix.h"
+#include "msvmmaj.h"
+#include "msvmmaj_matrix.h"
inline double rnd() { return (double) rand()/0x7FFFFFFF; }
/**
- * @name msvmmaj_simplex_gen
* @brief Generate matrix of simplex vertex coordinates
- * @ingroup libMSVMMaj
*
+ * @details
* Generate the simplex matrix. Each row of the created
* matrix contains the coordinate vector of a single
* vertex of the K-simplex in K-1 dimensions. The simplex
* generated is a special simplex with edges of length 1.
* The simplex matrix U must already have been allocated.
*
- * @param [in] K number of classes
- * @param [in,out] U simplex matrix of size K * (K-1)
+ * @param[in] K number of classes
+ * @param[in,out] U simplex matrix of size K * (K-1)
*/
void msvmmaj_simplex_gen(long K, double *U)
{
@@ -51,10 +50,18 @@ void msvmmaj_simplex_gen(long K, double *U)
}
}
-/*!
- Generate the category matrix R. The category matrix has 1's everywhere
- except at the column corresponding to the label of instance i.
-*/
+/**
+ * @brief Generate the category matrix
+ *
+ * @details
+ * Generate the category matrix R. The category matrix has 1's everywhere
+ * except at the column corresponding to the label of instance i, there the
+ * element is 0.
+ *
+ * @param[in,out] model corresponding MajModel
+ * @param[in] dataset corresponding MajData
+ *
+ */
void msvmmaj_category_matrix(struct MajModel *model, struct MajData *dataset)
{
long i, j;
@@ -70,8 +77,19 @@ void msvmmaj_category_matrix(struct MajModel *model, struct MajData *dataset)
}
}
-/*!
- * Simplex diff
+/**
+ * @brief Generate the simplex difference matrix
+ *
+ * @details
+ * The simplex difference matrix is a 3D matrix which is constructed
+ * as follows. For each instance i, the difference vectors between the row of
+ * the simplex matrix corresponding to the class label of instance i and the
+ * other rows of the simplex matrix are calculated. These difference vectors
+ * are stored in a matrix, which is one horizontal slice of the 3D matrix.
+ *
+ * @param[in,out] model the corresponding MajModel
+ * @param[in] data the corresponding MajData
+ *
*/
void msvmmaj_simplex_diff(struct MajModel *model, struct MajData *data)
{
@@ -92,13 +110,22 @@ void msvmmaj_simplex_diff(struct MajModel *model, struct MajData *data)
}
}
-/*!
- Calculate the errors Q based on the current value of V.
- It is assumed that the memory for Q has already been allocated.
- In addition, the matrix ZV is calculated here. It is assigned to a
- pre-allocated block of memory, since it would be inefficient to keep
- reassigning this block at every iteration.
-*/
+/**
+ * @brief Calculate the scalar errors
+ *
+ * @details
+ * Calculate the scalar errors q based on the current estimate of V, and
+ * store these in Q. It is assumed that the memory for Q has already been
+ * allocated. In addition, the matrix ZV is calculated here. It is assigned
+ * to a pre-allocated block of memory, which is passed to this function.
+ *
+ * @param[in,out] model the corresponding MajModel
+ * @param[in] data the corresponding MajData
+ * @param[in,out] ZV a pointer to a memory block for ZV. On exit
+ * this block is updated with the new ZV matrix
+ * calculated with MajModel::V.
+ *
+ */
void msvmmaj_calculate_errors(struct MajModel *model, struct MajData *data, double *ZV)
{
long i, j, k;
@@ -136,9 +163,23 @@ void msvmmaj_calculate_errors(struct MajModel *model, struct MajData *data, doub
}
}
-/*!
- Calculate the Huber hinge errors for each error in the matrix Q.
-*/
+/**
+ * @brief Calculate the Huber hinge errors
+ *
+ * @details
+ * For each of the scalar errors in Q the Huber hinge errors are
+ * calculated. The Huber hinge is here defined as
+ * @f[
+ * h(q) =
+ * \begin{dcases}
+ * 1 - q - \frac{\kappa + 1}{2} & \text{if } q \leq -\kappa \\
+ * \frac{1}{2(\kappa + 1)} ( 1 - q)^2 & \text{if } q \in (-\kappa, 1] \\
+ * 0 & \text{if } q > 1
+ * \end{dcases}
+ * @f]
+ *
+ * @param[in,out] model the corresponding MajModel
+ */
void msvmmaj_calculate_huber(struct MajModel *model)
{
long i, j;
@@ -159,10 +200,9 @@ void msvmmaj_calculate_huber(struct MajModel *model)
}
/**
- * @name msvmmaj_seed_model_V
* @brief seed the matrix V from an existing model or using rand
- * @ingroup libMSVMMaj
*
+ * @details
* The matrix V must be seeded before the main_loop() can start.
* This can be done by either seeding it with random numbers or
* using the solution from a previous model on the same dataset
@@ -170,8 +210,8 @@ void msvmmaj_calculate_huber(struct MajModel *model)
* significant improvement in the number of iterations necessary
* because the seeded model V is closer to the optimal V.
*
- * @param [in] from_model model from which to copy V
- * @param [in,out] to_model model to which V will be copied
+ * @param[in] from_model MajModel from which to copy V
+ * @param[in,out] to_model MajModel to which V will be copied
*/
void msvmmaj_seed_model_V(struct MajModel *from_model, struct MajModel *to_model)
{
@@ -193,10 +233,17 @@ void msvmmaj_seed_model_V(struct MajModel *from_model, struct MajModel *to_model
}
}
-/*!
- * Step doubling
+/**
+ * @brief Use step doubling
+ *
+ * @details
+ * Step doubling can be used to speed up the Majorization algorithm. Instead
+ * of using the value at the minimimum of the majorization function, the value
+ * ``opposite'' the majorization point is used. This can essentially cut the
+ * number of iterations necessary to reach the minimum in half.
+ *
+ * @param[in] model MajModel containing the augmented parameters
*/
-
void msvmmaj_step_doubling(struct MajModel *model)
{
long i, j;
@@ -207,15 +254,33 @@ void msvmmaj_step_doubling(struct MajModel *model)
for (i=0; i<m+1; i++) {
for (j=0; j<K-1; j++) {
matrix_mul(model->V, K-1, i, j, 2.0);
- matrix_add(model->V, K-1, i, j, -matrix_get(model->Vbar, K-1, i, j));
+ matrix_add(model->V, K-1, i, j,
+ -matrix_get(model->Vbar, K-1, i, j));
}
}
}
-/*!
- * initialize_weights
+/**
+ * @brief Initialize instance weights
+ *
+ * @details
+ * Instance weights can for instance be used to add additional weights to
+ * instances of certain classes. Two default weight possibilities are
+ * implemented here. The first is unit weights, where each instance gets
+ * weight 1.
+ *
+ * The second are group size correction weights, which are calculated as
+ * @f[
+ * \rho_i = \frac{n}{Kn_k} ,
+ * @f]
+ * where @f$ n_k @f$ is the number of instances in group @f$ k @f$ and
+ * @f$ y_i = k @f$.
+ *
+ * @param[in] data MajData with the dataset
+ * @param[in,out] model MajModel with the weight specification. On
+ * exit MajModel::rho contains the instance
+ * weights.
*/
-
void msvmmaj_initialize_weights(struct MajData *data, struct MajModel *model)
{
long *groups;
diff --git a/src/matrix.c b/src/matrix.c
deleted file mode 100644
index 8803e8b..0000000
--- a/src/matrix.c
+++ /dev/null
@@ -1,77 +0,0 @@
-/**
- * @file matrix.c
- * @author Gertjan van den Burg (burg@ese.eur.nl)
- * @date August 8, 2013
- * @brief Functions facilitating matrix access
- *
- * @details
- * The functions contained in this file are used when
- * accessing or writing to matrices. Seperate functions
- * exist of adding and multiplying existing matrix
- * elements, to ensure this is done in place.
- *
- */
-
-#include "matrix.h"
-#include "util.h"
-
-/**
- * @name matrix_set
- * @brief Set element of matrix
- * @ingroup matrix
- *
- * Row-Major order is used to set a matrix element. Since matrices
- * of type double are most common in MSVMMaj, this function only
- * deals with that type.
- *
- * @param [in] M matrix to set element of
- * @param [in] cols number of columns of M
- * @param [in] i row index of element to write to
- * @param [in] j column index of element to write to
- * @param [out] val value to write to specified element of M
- */
-void matrix_set(double *M, long cols, long i, long j, double val)
-{
- M[i*cols+j] = val;
-}
-
-double matrix_get(double *M, long cols, long i, long j)
-{
- return M[i*cols+j];
-}
-
-void matrix_add(double *M, long cols, long i, long j, double val)
-{
- M[i*cols+j] += val;
-}
-
-void matrix_mul(double *M, long cols, long i, long j, double val)
-{
- M[i*cols+j] *= val;
-}
-
-void matrix3_set(double *M, long N2, long N3, long i, long j,
- long k, double val)
-{
- M[k+N3*(j+N2*i)] = val;
-}
-
-double matrix3_get(double *M, long N2, long N3, long i, long j,
- long k)
-{
- return M[k+N3*(j+N2*i)];
-}
-
-
-void print_matrix(double *M, long rows, long cols)
-{
- long i, j;
-
- for (i=0; i<rows; i++) {
- for (j=0; j<cols; j++)
- note("%8.8f ", matrix_get(M, cols, i, j));
- note("\n");
- }
- note("\n");
-}
-
diff --git a/src/msvmmaj_init.c b/src/msvmmaj_init.c
new file mode 100644
index 0000000..14278f9
--- /dev/null
+++ b/src/msvmmaj_init.c
@@ -0,0 +1,64 @@
+/**
+ * @file msvmmaj_init.c
+ * @author Gertjan van den Burg
+ * @date January 7, 2014
+ * @brief Functions for initializing model and data structures
+ *
+ * @details
+ * This file contains functions for initializing a MajModel instance
+ * and a MajData instance. In addition, default values for these
+ * structures are defined here (and only here).
+ *
+ */
+
+#include <math.h>
+
+#include "msvmmaj.h"
+#include "msvmmaj_init.h"
+
+/**
+ * @brief Initialize a MajModel structure
+ *
+ * @details
+ * A MajModel structure is initialized and the default value for the
+ * parameters are set. A pointer to the initialized model is returned.
+ *
+ * @returns initialized MajModel
+ */
+struct MajModel *msvmmaj_init_model()
+{
+ struct MajModel *model = Malloc(struct MajModel, 1);
+
+ // set default values
+ model->p = 1.0;
+ model->lambda = pow(2, -8.0);
+ model->epsilon = 1e-6;
+ model->kappa = 0.0;
+ model->weight_idx = 1;
+ model->kerneltype = K_LINEAR;
+ model->use_cholesky = false;
+
+ return model;
+}
+
+/**
+ * @brief Initialize a MajData structure
+ *
+ * @details
+ * A MajData structure is initialized and default values are set.
+ * A pointer to the initialized data is returned.
+ *
+ * @returns initialized MajData
+ *
+ */
+struct MajData *msvmmaj_init_data()
+{
+ struct MajData *data = Malloc(struct MajData, 1);
+
+ // set default values
+ data->kerneltype = K_LINEAR;
+ data->use_cholesky = false;
+
+ return data;
+}
+
diff --git a/src/msvmmaj_kernel.c b/src/msvmmaj_kernel.c
new file mode 100644
index 0000000..6238fc1
--- /dev/null
+++ b/src/msvmmaj_kernel.c
@@ -0,0 +1,195 @@
+/**
+ * @file msvmmaj_kernel.c
+ * @author Gertjan van den Burg
+ * @date October 18, 2013
+ * @brief Defines main functions for use of kernels in MSVMMaj.
+ *
+ * @details
+ * Functions for constructing different kernels using user-supplied
+ * parameters. Also contains the functions for decomposing the
+ * kernel matrix using several decomposition methods.
+ *
+ */
+#include <math.h>
+
+#include "msvmmaj.h"
+#include "msvmmaj_kernel.h"
+#include "msvmmaj_lapack.h"
+#include "msvmmaj_matrix.h"
+#include "util.h"
+
+/**
+ * @brief Create the kernel matrix
+ *
+ * Create a kernel matrix based on the specified kerneltype. Kernel parameters
+ * are assumed to be specified in the model.
+ *
+ * @param[in] model MajModel specifying the parameters
+ * @param[in] data MajData specifying the data.
+ *
+ */
+void msvmmaj_make_kernel(struct MajModel *model, struct MajData *data)
+{
+ if (model->kerneltype == K_LINEAR)
+ return;
+
+ long i, j;
+ long n = model->n;
+ double value;
+ double *x1, *x2;
+ double *K = Calloc(double, n*n*sizeof(double));
+
+ for (i=0; i<n; i++) {
+ for (j=i; j<n; j++) {
+ x1 = &data->Z[i*(data->m+1)+1];
+ x2 = &data->Z[j*(data->m+1)+1];
+ if (model->kerneltype == K_POLY)
+ value = msvmmaj_compute_poly(x1, x2,
+ model->kernelparam, data->m);
+ else if (model->kerneltype == K_RBF)
+ value = msvmmaj_compute_rbf(x1, x2,
+ model->kernelparam, data->m);
+ else if (model->kerneltype == K_SIGMOID)
+ value = msvmmaj_compute_rbf(x1, x2,
+ model->kernelparam, data->m);
+ else {
+ fprintf(stderr, "Unknown kernel type in "
+ "msvmmaj_make_kernel\n");
+ exit(1);
+ }
+ matrix_set(K, n, i, j, value);
+ matrix_set(K, n, j, i, value);
+ }
+ }
+
+ // get cholesky if necessary.
+ if (model->use_cholesky == true) {
+ int status = dpotrf('L', n, K, n);
+ if (status != 0) {
+ fprintf(stderr, "Error (%i) computing Cholesky "
+ "decomposition of kernel matrix.\n",
+ status);
+ exit(0);
+ }
+ note("Got Cholesky.\n");
+ }
+
+ // copy kernel/cholesky to data
+ data->Z = realloc(data->Z, n*(n+1)*(sizeof(double)));
+ for (i=0; i<n; i++) {
+ for (j=0; j<n; j++)
+ matrix_set(data->Z, n+1, i, j+1,
+ matrix_get(K, n, i, j));
+ matrix_set(data->Z, n+1, i, 0, 1.0);
+ }
+ data->m = n;
+
+ // let data know what it's made of
+ data->kerneltype = model->kerneltype;
+ free(data->kernelparam);
+ switch (model->kerneltype) {
+ case K_LINEAR:
+ break;
+ case K_POLY:
+ data->kernelparam = Calloc(double, 3);
+ data->kernelparam[0] = model->kernelparam[0];
+ data->kernelparam[1] = model->kernelparam[1];
+ data->kernelparam[2] = model->kernelparam[2];
+ break;
+ case K_RBF:
+ data->kernelparam = Calloc(double, 1);
+ data->kernelparam[0] = model->kernelparam[0];
+ break;
+ case K_SIGMOID:
+ data->kernelparam = Calloc(double, 2);
+ data->kernelparam[0] = model->kernelparam[0];
+ data->kernelparam[1] = model->kernelparam[1];
+ }
+ data->use_cholesky = model->use_cholesky;
+ model->m = n;
+ free(K);
+}
+
+/**
+ * @brief Compute the RBF kernel between two vectors
+ *
+ * @details
+ * The RBF kernel is computed between two vectors. This kernel is defined as
+ * @f[
+ * k(x_1, x_2) = \exp( -\gamma \| x_1 - x_2 \|^2 )
+ * @f]
+ * where @f$ \gamma @f$ is a kernel parameter specified.
+ *
+ * @param[in] x1 first vector
+ * @param[in] x2 second vector
+ * @param[in] kernelparam array of kernel parameters (gamma is first
+ * element)
+ * @param[in] n length of the vectors x1 and x2
+ * @returns kernel evaluation
+ */
+double msvmmaj_compute_rbf(double *x1, double *x2, double *kernelparam, long n)
+{
+ long i;
+ double value = 0.0;
+
+ for (i=0; i<n; i++)
+ value += (x1[i] - x2[i]) * (x1[i] - x2[i]);
+ value *= -kernelparam[0];
+ return exp(value);
+}
+
+/**
+ * @brief Compute the polynomial kernel between two vectors
+ *
+ * @details
+ * The polynomial kernel is computed between two vectors. This kernel is
+ * defined as
+ * @f[
+ * k(x_1, x_2) = ( \gamma \langle x_1, x_2 \rangle + c)^d
+ * @f]
+ * where @f$ \gamma @f$, @f$ c @f$ and @f$ d @f$ are kernel parameters.
+ *
+ * @param[in] x1 first vector
+ * @param[in] x2 second vector
+ * @param[in] kernelparam array of kernel parameters (gamma, c, d)
+ * @param[in] n length of the vectors x1 and x2
+ * @returns kernel evaluation
+ */
+double msvmmaj_compute_poly(double *x1, double *x2, double *kernelparam, long n)
+{
+ long i;
+ double value = 0.0;
+ for (i=0; i<n; i++)
+ value += x1[i]*x2[i];
+ value *= kernelparam[0];
+ value += kernelparam[1];
+ return pow(value, ((int) kernelparam[2]));
+}
+
+/**
+ * @brief Compute the sigmoid kernel between two vectors
+ *
+ * @details
+ * The sigmoid kernel is computed between two vectors. This kernel is defined
+ * as
+ * @f[
+ * k(x_1, x_2) = \tanh( \gamma \langle x_1 , x_2 \rangle + c)
+ * @f]
+ * where @f$ \gamma @f$ and @f$ c @f$ are kernel parameters.
+ *
+ * @param[in] x1 first vector
+ * @param[in] x2 second vector
+ * @param[in] kernelparam array of kernel parameters (gamma, c)
+ * @param[in] n length of the vectors x1 and x2
+ * @returns kernel evaluation
+ */
+double msvmmaj_compute_sigmoid(double *x1, double *x2, double *kernelparam, long n)
+{
+ long i;
+ double value = 0.0;
+ for (i=0; i<n; i++)
+ value += x1[i]*x2[i];
+ value *= kernelparam[0];
+ value += kernelparam[1];
+ return tanh(value);
+}
diff --git a/src/msvmmaj_lapack.c b/src/msvmmaj_lapack.c
new file mode 100644
index 0000000..9ca8dab
--- /dev/null
+++ b/src/msvmmaj_lapack.c
@@ -0,0 +1,129 @@
+/**
+ * @file msvmmaj_lapack.c
+ * @author Gertjan van den Burg
+ * @date August 9, 2013
+ * @brief Utility functions for interacting with LAPACK
+ *
+ * @details
+ * Functions in this file are auxiliary functions which make it easier
+ * to use LAPACK functions from liblapack.
+ */
+
+#include "msvmmaj_lapack.h"
+
+/**
+ * @brief Solve AX = B where A is symmetric positive definite.
+ *
+ * @details
+ * Solve a linear system of equations AX = B where A is symmetric positive
+ * definite. This function uses the externel LAPACK routine dposv.
+ *
+ * @param[in] UPLO which triangle of A is stored
+ * @param[in] N order of A
+ * @param[in] NRHS number of columns of B
+ * @param[in,out] A double precision array of size (LDA, N). On
+ * exit contains the upper or lower factor of the
+ * Cholesky factorization of A.
+ * @param[in] LDA leading dimension of A
+ * @param[in,out] B double precision array of size (LDB, NRHS). On
+ * exit contains the N-by-NRHS solution matrix X.
+ * @param[in] LDB the leading dimension of B
+ * @returns info parameter which contains the status of the
+ * computation:
+ * - =0: success
+ * - <0: if -i, the i-th argument had
+ * an illegal value
+ * - >0: if i, the leading minor of A
+ * was not positive definite
+ *
+ * See the LAPACK documentation at:
+ * http://www.netlib.org/lapack/explore-html/dc/de9/group__double_p_osolve.html
+ */
+int dposv(char UPLO, int N, int NRHS, double *A, int LDA, double *B,
+ int LDB)
+{
+ extern void dposv_(char *UPLO, int *Np, int *NRHSp, double *A,
+ int *LDAp, double *B, int *LDBp, int *INFOp);
+ int INFO;
+ dposv_(&UPLO, &N, &NRHS, A, &LDA, B, &LDB, &INFO);
+ return INFO;
+}
+
+/**
+ * @brief Solve a system of equations AX = B where A is symmetric.
+ *
+ * @details
+ * Solve a linear system of equations AX = B where A is symmetric. This
+ * function uses the external LAPACK routine dsysv.
+ *
+ * @param[in] UPLO which triangle of A is stored
+ * @param[in] N order of A
+ * @param[in] NRHS number of columns of B
+ * @param[in,out] A double precision array of size (LDA, N). On
+ * exit contains the block diagonal matrix D and
+ * the multipliers used to obtain the factor U or
+ * L from the factorization A = U*D*U**T or
+ * A = L*D*L**T.
+ * @param[in] LDA leading dimension of A
+ * @param[in] IPIV integer array containing the details of D
+ * @param[in,out] B double precision array of size (LDB, NRHS). On
+ * exit contains the N-by-NRHS matrix X
+ * @param[in] LDB leading dimension of B
+ * @param[out] WORK double precision array of size max(1,LWORK). On
+ * exit, WORK(1) contains the optimal LWORK
+ * @param[in] LWORK the length of WORK, can be used for determining
+ * the optimal blocksize for dsystrf.
+ * @returns info parameter which contains the status of the
+ * computation:
+ * - =0: success
+ * - <0: if -i, the i-th argument had an
+ * illegal value
+ * - >0: if i, D(i, i) is exactly zero,
+ * no solution can be computed.
+ *
+ * See the LAPACK documentation at:
+ * http://www.netlib.org/lapack/explore-html/d6/d0e/group__double_s_ysolve.html
+ */
+int dsysv(char UPLO, int N, int NRHS, double *A, int LDA, int *IPIV,
+ double *B, int LDB, double *WORK, int LWORK)
+{
+ extern void dsysv_(char *UPLO, int *Np, int *NRHSp, double *A,
+ int *LDAp, int *IPIV, double *B, int *LDBp,
+ double *WORK, int *LWORK, int *INFOp);
+ int INFO;
+ dsysv_(&UPLO, &N, &NRHS, A, &LDA, IPIV, B, &LDB, WORK, &LWORK, &INFO);
+ return INFO;
+}
+
+/**
+ * @brief Compute the Cholesky factorization of a real symmetric positive
+ * definite matrix.
+ *
+ * @details
+ * This function uses the external LAPACK routine dpotrf.
+ *
+ * @param[in] UPLO which triangle of A is stored
+ * @param[in] N order of A
+ * @param[in,out] A double precision array of size (LDA, N). On
+ * exit contains the factor U or L of the Cholesky
+ * factorization
+ * @param[in] LDA leading dimension of A
+ * @returns info parameter which contains the status of the
+ * computation:
+ * - =0: success
+ * - <0: if -i, the i-th argument had an
+ * illegal value
+ * - >0: if i, the leading minor of
+ * order i is not positive
+ * definite
+ *
+ * See the LAPACK documentation at:
+ * http://www.netlib.org/lapack/explore-html/d0/d8a/dpotrf_8f.html
+ */
+int dpotrf(char UPLO, int N, double *A, int LDA)
+{
+ extern void dpotrf_(char *UPLO, int *N, double *A, int *LDA, int *INFOp);
+ int INFO;
+ dpotrf_(&UPLO, &N, A, &LDA, &INFO);
+ return INFO;
+}
diff --git a/src/msvmmaj_matrix.c b/src/msvmmaj_matrix.c
new file mode 100644
index 0000000..ffa0c21
--- /dev/null
+++ b/src/msvmmaj_matrix.c
@@ -0,0 +1,153 @@
+/**
+ * @file msvmmaj_matrix.c
+ * @author Gertjan van den Burg
+ * @date August, 2013
+ * @brief Functions facilitating matrix access
+ *
+ * @details
+ * The functions contained in this file are used when
+ * accessing or writing to matrices. Seperate functions
+ * exist of adding and multiplying existing matrix
+ * elements, to ensure this is done in place.
+ *
+ */
+
+#include "msvmmaj_matrix.h"
+#include "util.h"
+
+/**
+ * @brief Set element of matrix
+ *
+ * @details
+ * Row-Major order is used to set a matrix element. Since matrices
+ * of type double are most common in MSVMMaj, this function only
+ * deals with that type.
+ *
+ * @param[in] M matrix to set element of
+ * @param[in] cols number of columns of M
+ * @param[in] i row index of element to write to
+ * @param[in] j column index of element to write to
+ * @param[out] val value to write to specified element of M
+ */
+void matrix_set(double *M, long cols, long i, long j, double val)
+{
+ M[i*cols+j] = val;
+}
+
+/**
+ * @brief Retrieve value from matrix
+ *
+ * @details
+ * Return a value from a matrix using row-major order.
+ *
+ * @param[in] M matrix to retrieve value from
+ * @param[in] cols number of columns of M
+ * @param[in] i row index (starting from 0)
+ * @param[in] j column index (starting from 0)
+ * @returns matrix element at (i, j)
+ */
+double matrix_get(double *M, long cols, long i, long j)
+{
+ return M[i*cols+j];
+}
+
+/**
+ * @brief Add value to matrix element
+ *
+ * @details
+ * This function is added to efficiently add values to matrix
+ * elements, without having to use get and set methods.
+ *
+ * @param[in] M matrix
+ * @param[in] cols number of columns of M
+ * @param[in] i row index (starting from 0)
+ * @param[in] j column index (starting from 0)
+ * @param[in] val value to add to matrix element (i, j)
+ */
+void matrix_add(double *M, long cols, long i, long j, double val)
+{
+ M[i*cols+j] += val;
+}
+
+/**
+ * @brief Multiply matrix element by value
+ *
+ * @details
+ * This function is added to efficiently multiply a matrix element
+ * by a certain value, without having to use get and set methods.
+ *
+ * @param[in] M matrix
+ * @param[in] cols number of columns of M
+ * @param[in] i row index (starting from 0)
+ * @param[in] j column index (starting from 0)
+ * @param[in] val value to multiply matrix element (i, j) with
+ */
+void matrix_mul(double *M, long cols, long i, long j, double val)
+{
+ M[i*cols+j] *= val;
+}
+
+/**
+ * @brief Set element of 3D matrix
+ *
+ * @details
+ * Set an element of a 3D matrix using row-major order.
+ *
+ * @param[in] M matrix
+ * @param[in] N2 second dimension of M
+ * @param[in] N3 third dimension of M
+ * @param[in] i index along first dimension
+ * @param[in] j index along second dimension
+ * @param[in] k index along third dimension
+ * @param[in] val value to set element (i, j, k) to
+ *
+ * See:
+ * http://en.wikipedia.org/wiki/Row-major_order
+ */
+void matrix3_set(double *M, long N2, long N3, long i, long j,
+ long k, double val)
+{
+ M[k+N3*(j+N2*i)] = val;
+}
+
+/**
+ * @brief Get element of 3D matrix
+ *
+ * @details
+ * Retrieve an element from a 3D matrix.
+ *
+ * @param[in] M matrix
+ * @param[in] N2 second dimension of M
+ * @param[in] N3 third dimension of M
+ * @param[in] i index along first dimension
+ * @param[in] j index along second dimension
+ * @param[in] k index along third dimension
+ * @returns value at the (i, j, k) element of M
+ */
+double matrix3_get(double *M, long N2, long N3, long i, long j,
+ long k)
+{
+ return M[k+N3*(j+N2*i)];
+}
+
+/**
+ * @brief print a matrix
+ *
+ * @details
+ * Debug function to print a matrix
+ *
+ * @param[in] M matrix
+ * @param[in] rows number of rows of M
+ * @param[in] cols number of columns of M
+ */
+void print_matrix(double *M, long rows, long cols)
+{
+ long i, j;
+
+ for (i=0; i<rows; i++) {
+ for (j=0; j<cols; j++)
+ note("%8.8f ", matrix_get(M, cols, i, j));
+ note("\n");
+ }
+ note("\n");
+}
diff --git a/src/msvmmaj_pred.c b/src/msvmmaj_pred.c
index 5f1b1ae..98b6e0a 100644
--- a/src/msvmmaj_pred.c
+++ b/src/msvmmaj_pred.c
@@ -1,31 +1,36 @@
/**
* @file msvmmaj_pred.c
- * @author Gertjan van den Burg (burg@ese.eur.nl)
+ * @author Gertjan van den Burg
* @date August 9, 2013
* @brief Main functions for predicting class labels..
*
+ * @details
+ * This file contains functions for predicting the class labels of instances
+ * and a function for calculating the predictive performance (hitrate) of
+ * a prediction given true class labels.
+ *
*/
#include <cblas.h>
#include "libMSVMMaj.h"
-#include "MSVMMaj.h"
-#include "matrix.h"
+#include "msvmmaj.h"
+#include "msvmmaj_matrix.h"
#include "msvmmaj_pred.h"
/**
- * @name predict_labels
* @brief Predict class labels of data given and output in predy
*
+ * @details
* The labels are predicted by mapping each instance in data to the
* simplex space using the matrix V in the given model. Next, for each
* instance the nearest simplex vertex is determined using an Euclidean
* norm. The nearest simplex vertex determines the predicted class label,
- * which is recorded in predy
+ * which is recorded in predy.
*
- * @param [in] data data to predict labels for
- * @param [in] model model with optimized V
- * @param [out] predy pre-allocated vector to record predictions in
+ * @param[in] data MajData to predict labels for
+ * @param[in] model MajModel with optimized V
+ * @param[out] predy pre-allocated vector to record predictions in
*/
void msvmmaj_predict_labels(struct MajData *data, struct MajModel *model, long *predy)
{
@@ -84,15 +89,15 @@ void msvmmaj_predict_labels(struct MajData *data, struct MajModel *model, long *
}
/**
- * @name msvmmaj_prediction_perf
* @brief Calculate the predictive performance (percentage correct)
*
+ * @details
* The predictive performance is calculated by simply counting the number
* of correctly classified samples and dividing by the total number of
* samples, multiplying by 100.
*
- * @param [in] data the dataset with known labels
- * @param [in] predy the predicted class labels
+ * @param[in] data the MajData dataset with known labels
+ * @param[in] predy the predicted class labels
*
* @returns percentage correctly classified.
*/
diff --git a/src/msvmmaj_train.c b/src/msvmmaj_train.c
index 272d86a..97ee6a1 100644
--- a/src/msvmmaj_train.c
+++ b/src/msvmmaj_train.c
@@ -1,6 +1,6 @@
/**
* @file msvmmaj_train.c
- * @author Gertjan van den Burg (burg@ese.eur.nl)
+ * @author Gertjan van den Burg
* @date August 9, 2013
* @brief Main functions for training the MSVMMaj solution.
*
@@ -13,25 +13,34 @@
#include <math.h>
#include <cblas.h>
-#include "msvmmaj_train.h"
-#include "MSVMMaj.h"
#include "libMSVMMaj.h"
-#include "mylapack.h"
-#include "matrix.h"
+#include "msvmmaj.h"
+#include "msvmmaj_lapack.h"
+#include "msvmmaj_matrix.h"
+#include "msvmmaj_train.h"
#include "util.h"
+/**
+ * Maximum number of iterations of the algorithm.
+ */
#define MAX_ITER 1000000
/**
- * @name msvmmaj_optimize
* @brief The main training loop for MSVMMaj
*
- * The msvmmaj_optimize() function is the main training function. This function
+ * @details
+ * This function is the main training function. This function
* handles the optimization of the model with the given model parameters, with
- * the data given. On return the matrix model->V contains the optimal weight matrix.
+ * the data given. On return the matrix MajModel::V contains the optimal
+ * weight matrix.
+ *
+ * In this function, step doubling is used in the majorization algorithm after
+ * a burn-in of 50 iterations. If the training is finished, MajModel::t and
+ * MajModel::W are extracted from MajModel::V.
*
- * @param [in,out] model the model to be trained. Contains optimal V on exit.
- * @param [in] data the data to train the model with.
+ * @param[in,out] model the MajModel to be trained. Contains optimal
+ * V on exit.
+ * @param[in] data the MajData to train the model with.
*/
void msvmmaj_optimize(struct MajModel *model, struct MajData *data)
{
@@ -49,7 +58,7 @@ void msvmmaj_optimize(struct MajModel *model, struct MajData *data)
double *ZAZVT = Calloc(double, (m+1)*(K-1));
note("Starting main loop.\n");
- note("MajDataset:\n");
+ note("Dataset:\n");
note("\tn = %i\n", n);
note("\tm = %i\n", m);
note("\tK = %i\n", K);
@@ -78,8 +87,8 @@ void msvmmaj_optimize(struct MajModel *model, struct MajData *data)
L = msvmmaj_get_loss(model, data, ZV);
if (it%50 == 0)
- note("iter = %li, L = %15.16f, Lbar = %15.16f, reldiff = %15.16f\n",
- it, L, Lbar, (Lbar - L)/L);
+ note("iter = %li, L = %15.16f, Lbar = %15.16f, "
+ "reldiff = %15.16f\n", it, L, Lbar, (Lbar - L)/L);
it++;
}
@@ -91,7 +100,8 @@ void msvmmaj_optimize(struct MajModel *model, struct MajData *data)
model->t[i] = matrix_get(model->V, K-1, 0, i);
for (i=1; i<m+1; i++)
for (j=0; j<K-1; j++)
- matrix_set(model->W, K-1, i-1, j, matrix_get(model->V, K-1, i, j));
+ matrix_set(model->W, K-1, i-1, j,
+ matrix_get(model->V, K-1, i, j));
free(B);
free(ZV);
free(ZAZ);
@@ -100,19 +110,22 @@ void msvmmaj_optimize(struct MajModel *model, struct MajData *data)
}
/**
- * @name msvmmaj_get_loss
- * @brief calculate the current value of the loss function
+ * @brief Calculate the current value of the loss function
*
- * The current loss value is calculated based on the matrix V in the given
- * model.
+ * @details
+ * The current loss function value is calculated based on the matrix V in the
+ * given model. Note that the matrix ZV is passed explicitly to avoid having
+ * to reallocate memory at every step.
*
- * @param [in] model model structure which holds the current estimate V
- * @param [in] data data structure
- * @param [in,out] ZV pre-allocated matrix ZV which is updated on output
- *
- * @return the current value of the loss function
+ * @param[in] model MajModel structure which holds the current
+ * estimate V
+ * @param[in] data MajData structure
+ * @param[in,out] ZV pre-allocated matrix ZV which is updated on
+ * output
+ * @returns the current value of the loss function
*/
-double msvmmaj_get_loss(struct MajModel *model, struct MajData *data, double *ZV)
+double msvmmaj_get_loss(struct MajModel *model, struct MajData *data,
+ double *ZV)
{
long i, j;
long n = data->n;
@@ -151,10 +164,52 @@ double msvmmaj_get_loss(struct MajModel *model, struct MajData *data, double *ZV
}
/**
- * @name msvmmaj_get_update
- * @brief perform a single step of the majorization algorithm to update V
+ * @brief Perform a single step of the majorization algorithm to update V
+ *
+ * @details
+ * This function contains the main update calculations of the algorithm. These
+ * calculations are necessary to find a new update V. The calculations exist of
+ * recalculating the majorization coefficients for all instances and all
+ * classes, and solving a linear system to find V.
+ *
+ * Because the function msvmmaj_get_update() is always called after a call to
+ * msvmmaj_get_loss() with the same MajModel::V, it is unnecessary to calculate
+ * the updated errors MajModel::Q and MajModel::H here too. This saves on
+ * computation time.
*
- * details
+ * In calculating the majorization coefficients we calculate the elements of a
+ * diagonal matrix A with elements
+ * @f[
+ * A_{i, i} = \frac{1}{n} \rho_i \sum_{j \neq k} \left[
+ * \varepsilon_i a_{ijk}^{(p)} + (1 - \varepsilon_i) \omega_i
+ * a_{ijk}^{(p)} \right],
+ * @f]
+ * where @f$ k = y_i @f$.
+ * Since this matrix is only used to calculate the matrix @f$ Z' A Z @f$, it is
+ * efficient to update a matrix ZAZ through consecutive rank 1 updates with
+ * a single element of A and the corresponding row of Z. The BLAS function
+ * dsyr is used for this.
+ *
+ * The B matrix is has rows
+ * @f[
+ * \boldsymbol{\beta}_i' = \frac{1}{n} \rho_i \sum_{j \neq k} \left[
+ * \varepsilon_i \left( b_{ijk}^{(1)} - a_{ijk}^{(1)}
+ * \overline{q}_i^{(kj)} \right) + (1 - \varepsilon_i)
+ * \omega_i \left( b_{ijk}^{(p)} - a_{ijk}^{(p)}
+ * \overline{q}_i^{(kj)} \right) \right]
+ * \boldsymbol{\delta}_{kj}'
+ * @f]
+ * This is also split into two cases, one for which @f$ \varepsilon_i = 1 @f$,
+ * and one for when it is 0. The 3D simplex difference matrix is used here, in
+ * the form of the @f$ \boldsymbol{\delta}_{kj}' @f$.
+ *
+ * Finally, the following system is solved
+ * @f[
+ * (\textbf{Z}'\textbf{AZ} + \lambda \textbf{J})\textbf{V} =
+ * (\textbf{Z}'\textbf{AZ}\overline{\textbf{V}} + \textbf{Z}'
+ * \textbf{B})
+ * @f]
+ * solving this system is done through dposv().
*
* @param [in,out] model model to be updated
* @param [in] data data used in model
@@ -166,9 +221,6 @@ double msvmmaj_get_loss(struct MajModel *model, struct MajData *data, double *ZV
void msvmmaj_get_update(struct MajModel *model, struct MajData *data, double *B,
double *ZAZ, double *ZAZV, double *ZAZVT)
{
- // Because msvmmaj_update is always called after a call to
- // msvmmaj_get_loss() with the latest V, it is unnecessary to recalculate
- // the matrix ZV, the errors Q, or the Huber errors H. Awesome!
int status, class;
long i, j, k;
double Avalue, Bvalue;
@@ -182,11 +234,14 @@ void msvmmaj_get_update(struct MajModel *model, struct MajData *data, double *B,
double p = model->p;
double *rho = model->rho;
+ // constants which are used often throughout
const double a2g2 = 0.25*p*(2.0*p - 1.0)*pow((kappa+1.0)/2.0,p-2.0);
const double in = 1.0/((double) n);
+ // clear matrices
Memset(B, double, n*(K-1));
Memset(ZAZ, double, (m+1)*(m+1));
+
b = 0;
for (i=0; i<n; i++) {
value = 0;
@@ -215,7 +270,8 @@ void msvmmaj_get_update(struct MajModel *model, struct MajData *data, double *B,
b = 0;
}
for (k=0; k<K-1; k++) {
- Bvalue = in*rho[i]*b*matrix3_get(model->UU, K-1, K, i, k, j);
+ Bvalue = in*rho[i]*b*matrix3_get(
+ model->UU, K-1, K, i, k, j);
matrix_add(B, K-1, i, k, Bvalue);
}
Avalue += a*matrix_get(model->R, K, i, j);
@@ -227,13 +283,27 @@ void msvmmaj_get_update(struct MajModel *model, struct MajData *data, double *B,
if (q <= -kappa) {
b = 0.5 - kappa/2.0 - q;
} else if ( q <= 1.0) {
- b = pow(1.0 - q, 3.0)/(2.0*pow(kappa + 1.0, 2.0));
+ b = pow(1.0 - q, 3.0)/(
+ 2.0*pow(kappa + 1.0,
+ 2.0));
} else {
b = 0;
}
for (k=0; k<K-1; k++) {
- Bvalue = in*rho[i]*omega*b*matrix3_get(model->UU, K-1, K, i, k, j);
- matrix_add(B, K-1, i, k, Bvalue);
+ Bvalue = in*rho[i]*omega*b*
+ matrix3_get(
+ model->UU,
+ K-1,
+ K,
+ i,
+ k,
+ j);
+ matrix_add(
+ B,
+ K-1,
+ i,
+ k,
+ Bvalue);
}
}
Avalue = 1.5*(K - 1.0);
@@ -241,23 +311,51 @@ void msvmmaj_get_update(struct MajModel *model, struct MajData *data, double *B,
for (j=0; j<K; j++) {
q = matrix_get(model->Q, K, i, j);
if (q <= (p + kappa - 1.0)/(p - 2.0)) {
- a = 0.25*pow(p, 2.0)*pow(0.5 - kappa/2.0 - q, p - 2.0);
+ a = 0.25*pow(p, 2.0)*pow(
+ 0.5 - kappa/2.0 - q,
+ p - 2.0);
} else if (q <= 1.0) {
a = a2g2;
} else {
- a = 0.25*pow(p, 2.0)*pow((p/(p - 2.0))*(0.5 - kappa/2.0 - q), p - 2.0);
- b = a*(2.0*q + kappa - 1.0)/(p - 2.0) + 0.5*p*pow((p/(p - 2.0))*(0.5 - kappa/2.0 - q), p - 1.0);
+ a = 0.25*pow(p, 2.0)*pow(
+ (p/(p - 2.0))*
+ (0.5 - kappa/2.0 - q),
+ p - 2.0);
+ b = a*(2.0*q + kappa - 1.0)/
+ (p - 2.0) +
+ 0.5*p*pow(
+ p/(p - 2.0)*
+ (0.5 - kappa/
+ 2.0 - q),
+ p - 1.0);
}
if (q <= -kappa) {
- b = 0.5*p*pow(0.5 - kappa/2.0 - q, p - 1.0);
+ b = 0.5*p*pow(
+ 0.5 - kappa/2.0 - q,
+ p - 1.0);
} else if ( q <= 1.0) {
- b = p*pow(1.0 - q, 2.0*p - 1.0)/pow(2*kappa+2.0, p);
+ b = p*pow(1.0 - q,
+ 2.0*p - 1.0)/
+ pow(2*kappa+2.0, p);
}
for (k=0; k<K-1; k++) {
- Bvalue = in*rho[i]*omega*b*matrix3_get(model->UU, K-1, K, i, k, j);
- matrix_add(B, K-1, i, k, Bvalue);
+ Bvalue = in*rho[i]*omega*b*
+ matrix3_get(
+ model->UU,
+ K-1,
+ K,
+ i,
+ k,
+ j);
+ matrix_add(
+ B,
+ K-1,
+ i,
+ k,
+ Bvalue);
}
- Avalue += a*matrix_get(model->R, K, i, j);
+ Avalue += a*matrix_get(model->R,
+ K, i, j);
}
}
Avalue *= omega;
@@ -352,7 +450,8 @@ void msvmmaj_get_update(struct MajModel *model, struct MajData *data, double *B,
// This step should not be necessary, as the matrix
// ZAZ is positive semi-definite by definition. It
// is included for safety.
- fprintf(stderr, "Received nonzero status from dposv: %i\n", status);
+ fprintf(stderr, "Received nonzero status from dposv: %i\n",
+ status);
int *IPIV = malloc((m+1)*sizeof(int));
double *WORK = malloc(1*sizeof(double));
status = dsysv(
@@ -379,7 +478,8 @@ void msvmmaj_get_update(struct MajModel *model, struct MajData *data, double *B,
WORK,
sizeof(WORK)/sizeof(double));
if (status != 0)
- fprintf(stderr, "Received nonzero status from dsysv: %i\n", status);
+ fprintf(stderr, "Received nonzero status from "
+ "dsysv: %i\n", status);
}
// Return to Row-major order. The matrix ZAZVT contains the
@@ -403,8 +503,18 @@ void msvmmaj_get_update(struct MajModel *model, struct MajData *data, double *B,
for (i=0; i<m+1; i++) {
for (j=0; j<K-1; j++) {
- matrix_set(model->Vbar, K-1, i, j, matrix_get(model->V, K-1, i, j));
- matrix_set(model->V, K-1, i, j, matrix_get(ZAZV, K-1, i, j));
+ matrix_set(
+ model->Vbar,
+ K-1,
+ i,
+ j,
+ matrix_get(model->V, K-1, i, j));
+ matrix_set(
+ model->V,
+ K-1,
+ i,
+ j,
+ matrix_get(ZAZV, K-1, i, j));
}
}
}
diff --git a/src/msvmmaj_train_dataset.c b/src/msvmmaj_train_dataset.c
index 2da8bee..4f5f4d9 100644
--- a/src/msvmmaj_train_dataset.c
+++ b/src/msvmmaj_train_dataset.c
@@ -1,22 +1,53 @@
+/**
+ * @file msvmmaj_train_dataset.c
+ * @author Gertjan van den Burg
+ * @date January, 2014
+ * @brief Functions for finding the optimal parameters for the dataset
+ *
+ * @details
+ * The MSVMMaj algorithm takes a number of parameters. The functions in
+ * this file are used to find the optimal parameters.
+ */
+
#include <math.h>
#include <time.h>
#include "crossval.h"
#include "libMSVMMaj.h"
-#include "matrix.h"
+#include "msvmmaj.h"
+#include "msvmmaj_init.h"
+#include "msvmmaj_matrix.h"
#include "msvmmaj_train.h"
#include "msvmmaj_train_dataset.h"
#include "msvmmaj_pred.h"
-#include "MSVMMaj.h"
#include "util.h"
#include "timer.h"
extern FILE *MSVMMAJ_OUTPUT_FILE;
+/**
+ * @brief Initialize a Queue from a Training instance
+ *
+ * @details
+ * A Training instance describes the grid to search over. This funtion
+ * creates all tasks that need to be performed and adds these to
+ * a Queue. Each task contains a pointer to the train and test datasets
+ * which are supplied. Note that the tasks are created in a specific order of
+ * the parameters, to ensure that the MajModel::V of a previous parameter
+ * set provides the best possible initial estimate of MajModel::V for the next
+ * parameter set.
+ *
+ * @param[in] training Training struct describing the grid search
+ * @param[in] queue pointer to a Queue that will be used to
+ * add the tasks to
+ * @param[in] train_data MajData of the training set
+ * @param[in] test_data MajData of the test set
+ *
+ */
void make_queue(struct Training *training, struct Queue *queue,
struct MajData *train_data, struct MajData *test_data)
{
- long i, j, k, l, m;
+ long i, j, k;
long N, cnt = 0;
struct Task *task;
queue->i = 0;
@@ -26,30 +57,122 @@ void make_queue(struct Training *training, struct Queue *queue,
N *= training->Nk;
N *= training->Ne;
N *= training->Nw;
+ // these parameters are not necessarily non-zero
+ N *= training->Ng > 0 ? training->Ng : 1;
+ N *= training->Nc > 0 ? training->Nc : 1;
+ N *= training->Nd > 0 ? training->Nd : 1;
queue->tasks = Malloc(struct Task *, N);
queue->N = N;
- for (i=0; i<training->Ne; i++)
+ // initialize all tasks
+ for (i=0; i<N; i++) {
+ task = Malloc(struct Task, 1);
+ task->ID = i;
+ task->train_data = train_data;
+ task->test_data = test_data;
+ task->folds = training->folds;
+ task->kerneltype = training->kerneltype;
+ task->kernel_param = Calloc(double, training->Ng +
+ training->Nc + training->Nd);
+ queue->tasks[i] = task;
+ }
+
+ // These loops mimick a large nested for loop. The advantage is that
+ // Nd, Nc and Ng which are on the outside of the nested for loop can
+ // now be zero, without large modification (see below). Whether this
+ // is indeed better than the nested for loop has not been tested.
+ cnt = 1;
+ i = 0;
+ while (i < N )
+ for (j=0; j<training->Np; j++)
+ for (k=0; k<cnt; k++) {
+ queue->tasks[i]->p = training->ps[j];
+ i++;
+ }
+
+ cnt *= training->Np;
+ i = 0;
+ while (i < N )
+ for (j=0; j<training->Nl; j++)
+ for (k=0; k<cnt; k++) {
+ queue->tasks[i]->lambda =
+ training->lambdas[j];
+ i++;
+ }
+
+ cnt *= training->Nl;
+ i = 0;
+ while (i < N )
+ for (j=0; j<training->Nk; j++)
+ for (k=0; k<cnt; k++) {
+ queue->tasks[i]->kappa = training->kappas[j];
+ i++;
+ }
+
+ cnt *= training->Nk;
+ i = 0;
+ while (i < N )
for (j=0; j<training->Nw; j++)
- for (k=0; k<training->Nk; k++)
- for (l=0; l<training->Nl; l++)
- for (m=0; m<training->Np; m++) {
- task = Malloc(struct Task, 1);
- task->epsilon = training->epsilons[i];
- task->weight_idx = training->weight_idxs[j];
- task->kappa = training->kappas[k];
- task->lambda = training->lambdas[l];
- task->p = training->ps[m];
- task->train_data = train_data;
- task->test_data = test_data;
- task->folds = training->folds;
- task->ID = cnt;
- queue->tasks[cnt] = task;
- cnt++;
- }
+ for (k=0; k<cnt; k++) {
+ queue->tasks[i]->weight_idx =
+ training->weight_idxs[j];
+ i++;
+ }
+
+ cnt *= training->Nw;
+ i = 0;
+ while (i < N )
+ for (j=0; j<training->Ne; j++)
+ for (k=0; k<cnt; k++) {
+ queue->tasks[i]->epsilon =
+ training->epsilons[j];
+ i++;
+ }
+
+ cnt *= training->Ne;
+ i = 0;
+ while (i < N && training->Ng > 0)
+ for (j=0; j<training->Ng; j++)
+ for (k=0; k<cnt; k++) {
+ queue->tasks[i]->kernel_param[0] =
+ training->gammas[j];
+ i++;
+ }
+
+ cnt *= training->Ng > 0 ? training->Ng : 1;
+ i = 0;
+ while (i < N && training->Nc > 0)
+ for (j=0; j<training->Nc; j++)
+ for (k=0; k<cnt; k++) {
+ queue->tasks[i]->kernel_param[1] =
+ training->coefs[j];
+ i++;
+ }
+
+ cnt *= training->Nc > 0 ? training->Ng : 1;
+ i = 0;
+ while (i < N && training->Nd > 0)
+ for (j=0; j<training->Nd; j++)
+ for (k=0; k<cnt; k++) {
+ queue->tasks[i]->kernel_param[2] =
+ training->degrees[j];
+ i++;
+ }
}
+/**
+ * @brief Get new Task from Queue
+ *
+ * @details
+ * Return a pointer to the next Task in the Queue. If no Task instances are
+ * left, NULL is returned. The internal counter Queue::i is used for finding
+ * the next Task.
+ *
+ * @param[in] q Queue instance
+ * @returns pointer to next Task
+ *
+ */
struct Task *get_next_task(struct Queue *q)
{
long i = q->i;
@@ -60,6 +183,19 @@ struct Task *get_next_task(struct Queue *q)
return NULL;
}
+/**
+ * @brief Comparison function for Tasks based on performance
+ *
+ * @details
+ * To be able to sort Task structures on the performance of their specific
+ * set of parameters, this comparison function is implemented. Task structs
+ * are sorted with highest performance first.
+ *
+ * @param[in] elem1 Task 1
+ * @param[in] elem2 Task 2
+ * @returns result of inequality of Task 1 performance over
+ * Task 2 performance
+ */
int tasksort(const void *elem1, const void *elem2)
{
const struct Task *t1 = (*(struct Task **) elem1);
@@ -67,6 +203,16 @@ int tasksort(const void *elem1, const void *elem2)
return (t1->performance > t2->performance);
}
+/**
+ * @brief Comparison function for doubles
+ *
+ * @details
+ * Similar to tasksort() only now for two doubles.
+ *
+ * @param[in] elem1 number 1
+ * @param[in] elem2 number 2
+ * @returns comparison of number 1 larger than number 2
+ */
int doublesort(const void *elem1, const void *elem2)
{
const double t1 = (*(double *) elem1);
@@ -74,7 +220,20 @@ int doublesort(const void *elem1, const void *elem2)
return t1 > t2;
}
-
+/**
+ * @brief Calculate the percentile of an array of doubles
+ *
+ * @details
+ * The percentile of performance is used to find the top performing
+ * configurations. Since no standard definition of the percentile exists, we
+ * use the method used in MATLAB and Octave. Since calculating the percentile
+ * requires a sorted list of the values, a local copy is made first.
+ *
+ * @param[in] values array of doubles
+ * @param[in] N length of the array
+ * @param[in] p percentile to calculate ( 0 <= p <= 1.0 ).
+ * @returns the p-th percentile of the values
+ */
double prctile(double *values, long N, double p)
{
long i;
@@ -94,16 +253,50 @@ double prctile(double *values, long N, double p)
return boundary;
}
+/**
+ * @brief Run repeats of the Task structs in Queue to find the best
+ * configuration
+ *
+ * @details
+ * The best performing tasks in the supplied Queue are found by taking those
+ * Task structs that have a performance greater or equal to the 95% percentile
+ * of the performance of all tasks. These tasks are then gathered in a new
+ * Queue. For each of the tasks in this new Queue the cross validation run is
+ * repeated a number of times.
+ *
+ * For each of the Task configurations that are repeated the mean performance,
+ * standard deviation of the performance and the mean computation time are
+ * reported.
+ *
+ * Finally, the overall best tasks are written to the specified output. These
+ * tasks are selected to have both the highest mean performance, as well as the
+ * smallest standard deviation in their performance. This is done as follows.
+ * First the 99th percentile of task performance and the 1st percentile of
+ * standard deviation is calculated. If a task exists for which the mean
+ * performance of the repeats and the standard deviation equals these values
+ * respectively, this task is found to be the best and is written to the
+ * output. If no such task exists, the 98th percentile of performance and the
+ * 2nd percentile of standard deviation is considered. This is repeated until
+ * an interval is found which contains tasks. If one or more tasks are found,
+ * this loop stops.
+ *
+ * @param[in] q Queue of Task structs which have already been
+ * run and have a Task::performance value
+ * @param[in] repeats Number of times to repeat the best
+ * configurations for consistency
+ * @param[in] traintype type of training to do (CV or TT)
+ *
+ */
void consistency_repeats(struct Queue *q, long repeats, TrainType traintype)
{
long i, r, N;
double p, pi, pr, boundary, time, *std, *mean, *perf;
struct Queue *nq = Malloc(struct Queue, 1);
- struct MajModel *model = Malloc(struct MajModel, 1);
+ struct MajModel *model = msvmmaj_init_model();
struct Task *task = Malloc(struct Task, 1);
clock_t loop_s, loop_e;
- // calculate the percentile (Matlab style)
+ // calculate the performance percentile (Matlab style)
qsort(q->tasks, q->N, sizeof(struct Task *), tasksort);
p = 0.95*q->N + 0.5;
pi = maximum(minimum(floor(p), q->N-1), 1);
@@ -111,7 +304,9 @@ void consistency_repeats(struct Queue *q, long repeats, TrainType traintype)
boundary = (1 - pr)*q->tasks[((long) pi)-1]->performance;
boundary += pr*q->tasks[((long) pi)]->performance;
note("boundary determined at: %f\n", boundary);
-
+
+ // find the number of tasks that perform at least as good as the 95th
+ // percentile
N = 0;
for (i=0; i<q->N; i++)
if (q->tasks[i]->performance >= boundary)
@@ -121,12 +316,14 @@ void consistency_repeats(struct Queue *q, long repeats, TrainType traintype)
mean = Calloc(double, N);
perf = Calloc(double, N*repeats);
+ // create a new task queue with the tasks which perform well
nq->tasks = Malloc(struct Task *, N);
for (i=q->N-1; i>q->N-N-1; i--)
nq->tasks[q->N-i-1] = q->tasks[i];
nq->N = N;
nq->i = 0;
+ // for each task run the consistency repeats
for (i=0; i<N; i++) {
task = get_next_task(nq);
make_model_from_task(task, model);
@@ -140,7 +337,8 @@ void consistency_repeats(struct Queue *q, long repeats, TrainType traintype)
for (r=0; r<repeats; r++) {
if (traintype == CV) {
loop_s = clock();
- p = cross_validation(model, NULL, task->train_data, task->folds);
+ p = cross_validation(model, NULL,
+ task->train_data, task->folds);
loop_e = clock();
time += elapsed_time(loop_s, loop_e);
matrix_set(perf, repeats, i, r, p);
@@ -152,15 +350,24 @@ void consistency_repeats(struct Queue *q, long repeats, TrainType traintype)
note("%3.3f\t", p);
}
for (r=0; r<repeats; r++) {
- std[i] += pow(matrix_get(perf, repeats, i, r) - mean[i], 2);
+ std[i] += pow(matrix_get(
+ perf,
+ repeats,
+ i,
+ r) - mean[i],
+ 2.0);
}
std[i] /= ((double) repeats) - 1.0;
std[i] = sqrt(std[i]);
- note("(m = %3.3f, s = %3.3f, t = %3.3f)\n", mean[i], std[i], time);
+ note("(m = %3.3f, s = %3.3f, t = %3.3f)\n",
+ mean[i], std[i], time);
}
+ // find the best overall configurations: those with high average
+ // performance and low deviation in the performance
note("\nBest overall configuration(s):\n");
- note("ID\tweights\tepsilon\t\tp\t\tkappa\t\tlambda\t\tmean_perf\tstd_perf\n");
+ note("ID\tweights\tepsilon\t\tp\t\tkappa\t\tlambda\t\t"
+ "mean_perf\tstd_perf\n");
p = 0.0;
bool breakout = false;
while (breakout == false) {
@@ -168,13 +375,17 @@ void consistency_repeats(struct Queue *q, long repeats, TrainType traintype)
pr = prctile(std, N, p/100.0);
for (i=0; i<N; i++)
if ((pi - mean[i] < 0.0001) && (std[i] - pr < 0.0001)) {
- note("(%li)\tw = %li\te = %f\tp = %f\tk = %f\tl = %f\t"
+ note("(%li)\tw = %li\te = %f\tp = %f\t"
+ "k = %f\tl = %f\t"
"mean: %3.3f\tstd: %3.3f\n",
nq->tasks[i]->ID,
nq->tasks[i]->weight_idx,
- nq->tasks[i]->epsilon, nq->tasks[i]->p,
- nq->tasks[i]->kappa, nq->tasks[i]->lambda,
- mean[i], std[i]);
+ nq->tasks[i]->epsilon,
+ nq->tasks[i]->p,
+ nq->tasks[i]->kappa,
+ nq->tasks[i]->lambda,
+ mean[i],
+ std[i]);
breakout = true;
}
p += 1.0;
@@ -187,6 +398,30 @@ void consistency_repeats(struct Queue *q, long repeats, TrainType traintype)
free(mean);
}
+/**
+ * @brief Run cross validation with a seed model
+ *
+ * @details
+ * This is an implementation of cross validation which uses the optimal
+ * parameters MajModel::V of a previous fold as initial conditions for
+ * MajModel::V of the next fold. An initial seed for V can be given through the
+ * seed_model parameter. If seed_model is NULL, random starting values are
+ * used.
+ *
+ * @todo
+ * The seed model shouldn't have to be allocated completely, since only V is
+ * used.
+ * @todo
+ * There must be some inefficiencies here because the fold model is allocated
+ * at every fold. This would be detrimental with large datasets.
+ *
+ * @param[in] model MajModel with the configuration to train
+ * @param[in] seed_model MajModel with a seed for MajModel::V
+ * @param[in] data MajData with the dataset
+ * @param[in] folds number of cross validation folds
+ * @returns performance (hitrate) of the configuration on
+ * cross validation
+ */
double cross_validation(struct MajModel *model, struct MajModel *seed_model,
struct MajData *data, long folds)
{
@@ -202,7 +437,7 @@ double cross_validation(struct MajModel *model, struct MajModel *seed_model,
double *performance = Calloc(double, folds);
if (seed_model == NULL) {
- seed_model = Malloc(struct MajModel, 1);
+ seed_model = msvmmaj_init_model();
seed_model->n = 0; // we never use anything other than V
seed_model->m = model->m;
seed_model->K = model->K;
@@ -211,34 +446,40 @@ double cross_validation(struct MajModel *model, struct MajModel *seed_model,
fs = true;
}
- train_data = Malloc(struct MajData, 1);
- test_data = Malloc(struct MajData, 1);
-
+ train_data = msvmmaj_init_data();
+ test_data = msvmmaj_init_data();
+ // create splits
msvmmaj_make_cv_split(model->n, folds, cv_idx);
+
for (f=0; f<folds; f++) {
msvmmaj_get_tt_split(data, train_data, test_data, cv_idx, f);
-
- fold_model = Malloc(struct MajModel, 1);
+ // initialize a model for this fold and copy the model
+ // parameters
+ fold_model = msvmmaj_init_model();
copy_model(model, fold_model);
fold_model->n = train_data->n;
fold_model->m = train_data->m;
fold_model->K = train_data->K;
-
+
+ // allocate, initialize and seed the fold model
msvmmaj_allocate_model(fold_model);
msvmmaj_initialize_weights(train_data, fold_model);
msvmmaj_seed_model_V(seed_model, fold_model);
-
+
+ // train the model (without output)
fid = MSVMMAJ_OUTPUT_FILE;
MSVMMAJ_OUTPUT_FILE = NULL;
msvmmaj_optimize(fold_model, train_data);
MSVMMAJ_OUTPUT_FILE = fid;
+ // calculate predictive performance on test set
predy = Calloc(long, test_data->n);
msvmmaj_predict_labels(test_data, fold_model, predy);
performance[f] = msvmmaj_prediction_perf(test_data, predy);
total_perf += performance[f]/((double) folds);
+ // seed the seed model with the fold model
msvmmaj_seed_model_V(fold_model, seed_model);
free(predy);
@@ -250,6 +491,7 @@ double cross_validation(struct MajModel *model, struct MajModel *seed_model,
msvmmaj_free_model(fold_model);
}
+ // if a seed model was allocated before, free it.
if (fs)
msvmmaj_free_model(seed_model);
free(train_data);
@@ -261,12 +503,28 @@ double cross_validation(struct MajModel *model, struct MajModel *seed_model,
}
+/**
+ * @brief Run the grid search for a cross validation dataset
+ *
+ * @details
+ * Given a Queue of Task struct to be trained, a grid search is launched to
+ * find the optimal parameter configuration. As is also done within
+ * cross_validation(), the optimal weights of one parameter set are used as
+ * initial estimates for MajModel::V in the next parameter set. Note that to
+ * optimally exploit this feature of the optimization algorithm, the order in
+ * which tasks are considered is important. This is considered in
+ * make_queue().
+ *
+ * The performance found by cross validation is stored in the Task struct.
+ *
+ * @param[in,out] q Queue with Task instances to run
+ */
void start_training_cv(struct Queue *q)
{
double perf, current_max = 0;
struct Task *task = get_next_task(q);
- struct MajModel *seed_model = Malloc(struct MajModel, 1);
- struct MajModel *model = Malloc(struct MajModel, 1);
+ struct MajModel *seed_model = msvmmaj_init_model();
+ struct MajModel *model = msvmmaj_init_model();
clock_t main_s, main_e, loop_s, loop_e;
model->n = task->train_data->n;
@@ -282,13 +540,16 @@ void start_training_cv(struct Queue *q)
main_s = clock();
while (task) {
- note("(%03li/%03li)\tw = %li\te = %f\tp = %f\tk = %f\t l = %f\t",
- task->ID+1, q->N, task->weight_idx, task->epsilon,
+ note("(%03li/%03li)\tw = %li\te = %f\tp = %f\tk = %f\t "
+ "l = %f\t",
+ task->ID+1, q->N, task->weight_idx,
+ task->epsilon,
task->p, task->kappa, task->lambda);
make_model_from_task(task, model);
loop_s = clock();
- perf = cross_validation(model, seed_model, task->train_data, task->folds);
+ perf = cross_validation(model, seed_model, task->train_data,
+ task->folds);
loop_e = clock();
current_max = maximum(current_max, perf);
@@ -308,6 +569,23 @@ void start_training_cv(struct Queue *q)
msvmmaj_free_model(seed_model);
}
+/**
+ * @brief Run the grid search for a train/test dataset
+ *
+ * @details
+ * This function is similar to start_training_cv(), except that the
+ * pre-determined training set is used only once, and the pre-determined test
+ * set is used for validation.
+ *
+ * @todo
+ * It would probably be better to train the model on the training set using
+ * cross validation and only use the test set when comparing with other
+ * methods. The way it is now, you're finding out which parameters predict
+ * _this_ test set best, which is not what you want.
+ *
+ * @param[in] q Queue with Task structs to run
+ *
+ */
void start_training_tt(struct Queue *q)
{
FILE *fid;
@@ -317,7 +595,7 @@ void start_training_tt(struct Queue *q)
double total_perf, current_max = 0;
struct Task *task = get_next_task(q);
- struct MajModel *seed_model = Malloc(struct MajModel, 1);
+ struct MajModel *seed_model = msvmmaj_init_model();
clock_t main_s, main_e;
clock_t loop_s, loop_e;
@@ -334,7 +612,7 @@ void start_training_tt(struct Queue *q)
c+1, q->N, task->weight_idx, task->epsilon,
task->p, task->kappa, task->lambda);
loop_s = clock();
- struct MajModel *model = Malloc(struct MajModel, 1);
+ struct MajModel *model = msvmmaj_init_model();
make_model_from_task(task, model);
model->n = task->train_data->n;
@@ -374,15 +652,37 @@ void start_training_tt(struct Queue *q)
msvmmaj_free_model(seed_model);
}
+/**
+ * @brief Free the Queue struct
+ *
+ * @details
+ * Freeing the allocated memory of the Queue means freeing every Task struct
+ * and then freeing the Queue.
+ *
+ * @param[in] q Queue to be freed
+ *
+ */
void free_queue(struct Queue *q)
{
long i;
- for (i=0; i<q->N; i++)
+ for (i=0; i<q->N; i++) {
+ free(q->tasks[i]->kernel_param);
free(q->tasks[i]);
+ }
free(q->tasks);
free(q);
}
+/**
+ * @brief Copy parameters from Task to MajModel
+ *
+ * @details
+ * A Task struct only contains the parameters of the MajModel to be estimated.
+ * This function is used to copy these parameters.
+ *
+ * @param[in] task Task instance with parameters
+ * @param[in,out] model MajModel to which the parameters are copied
+ */
void make_model_from_task(struct Task *task, struct MajModel *model)
{
model->weight_idx = task->weight_idx;
@@ -392,6 +692,16 @@ void make_model_from_task(struct Task *task, struct MajModel *model)
model->lambda = task->lambda;
}
+/**
+ * @brief Copy model parameters between two MajModel structs
+ *
+ * @details
+ * The parameters copied are MajModel::weight_idx, MajModel::epsilon,
+ * MajModel::p, MajModel::kappa, and MajModel::lambda.
+ *
+ * @param[in] from MajModel to copy parameters from
+ * @param[in,out] to MajModel to copy parameters to
+ */
void copy_model(struct MajModel *from, struct MajModel *to)
{
to->weight_idx = from->weight_idx;
diff --git a/src/mylapack.c b/src/mylapack.c
deleted file mode 100644
index 4a9cf81..0000000
--- a/src/mylapack.c
+++ /dev/null
@@ -1,49 +0,0 @@
-/**
- * @file mylapack.c
- * @author Gertjan van den Burg (burg@ese.eur.nl)
- * @date August 9, 2013
- * @brief Utility functions for interacting with LAPACK
- *
- * @details
- * Functions in this file are auxiliary functions which make it easier
- * to use LAPACK functions from liblapack.
- */
-
-#include "mylapack.h"
-
-/**
- * @name dposv
- * @brief Solve a system of equations AX = B where A is symmetric positive definite.
- * @ingroup libMSVMMaj
- *
- * See the LAPACK documentation at:
- * http://www.netlib.org/lapack/explore-html/dc/de9/group__double_p_osolve.html
- */
-int dposv(char UPLO, int N, int NRHS, double *A, int LDA, double *B,
- int LDB)
-{
- extern void dposv_(char *UPLO, int *Np, int *NRHSp, double *A,
- int *LDAp, double *B, int *LDBp, int *INFOp);
- int INFO;
- dposv_(&UPLO, &N, &NRHS, A, &LDA, B, &LDB, &INFO);
- return INFO;
-}
-
-/**
- * @name dsysv
- * @brief Solve a system of equations AX = B where A is symmetric.
- * @ingroup libMSVMMaj
- *
- * See the LAPACK documentation at:
- * http://www.netlib.org/lapack/explore-html/d6/d0e/group__double_s_ysolve.html
- */
-int dsysv(char UPLO, int N, int NRHS, double *A, int LDA, int *IPIV,
- double *B, int LDB, double *WORK, int LWORK)
-{
- extern void dsysv_(char *UPLO, int *Np, int *NRHSp, double *A,
- int *LDAp, int *IPIV, double *B, int *LDBp,
- double *WORK, int *LWORK, int *INFOp);
- int INFO;
- dsysv_(&UPLO, &N, &NRHS, A, &LDA, IPIV, B, &LDB, WORK, &LWORK, &INFO);
- return INFO;
-}
diff --git a/src/predMSVMMaj.c b/src/predMSVMMaj.c
index 966c7c0..3e3a101 100644
--- a/src/predMSVMMaj.c
+++ b/src/predMSVMMaj.c
@@ -1,17 +1,42 @@
+/**
+ * @file predMSVMMaj.c
+ * @author Gertjan van den Burg
+ * @date January, 2014
+ * @brief Command line interface for predicting class labels
+ *
+ * @details
+ * This is a command line program for predicting the class labels or
+ * determining the predictive performance of a pre-determined model on a given
+ * test dataset. The predictive performance can be written to the screen or
+ * the predicted class labels can be written to a specified output file. This
+ * is done using msvmmaj_write_predictions().
+ *
+ * The specified model file must follow the specification given in
+ * msvmmaj_write_model().
+ *
+ * For usage information, see the program help function.
+ *
+ */
+
+#include "msvmmaj.h"
+#include "msvmmaj_init.h"
#include "msvmmaj_pred.h"
-#include "MSVMMaj.h"
#include "util.h"
#define MINARGS 3
extern FILE *MSVMMAJ_OUTPUT_FILE;
+// function declarations
void print_null(const char *s) {}
void exit_with_help();
-void parse_command_line(int argc, char **argv, struct MajModel *model,
+void parse_command_line(int argc, char **argv,
char *input_filename, char *output_filename,
char *model_filename);
+/**
+ * @brief Help function
+ */
void exit_with_help()
{
printf("This is MSVMMaj, version %1.1f\n\n", VERSION);
@@ -22,6 +47,24 @@ void exit_with_help()
exit(0);
}
+/**
+ * @brief Main interface function for predMSVMMaj
+ *
+ * @details
+ * Main interface for the command line program. A given model file is read and
+ * a test dataset is initialized from the given data. The predictive
+ * performance (hitrate) of the model on the test set is printed to the output
+ * stream (default = stdout). If an output file is specified the predictions
+ * are written to the file.
+ *
+ * @todo
+ * Ensure that the program can read model files without class labels
+ * specified. In that case no prediction accuracy is printed to the screen.
+ *
+ * @param[in] argc number of command line arguments
+ * @param[in] argv array of command line arguments
+ *
+ */
int main(int argc, char **argv)
{
long *predy;
@@ -31,16 +74,15 @@ int main(int argc, char **argv)
char model_filename[MAX_LINE_LENGTH];
char output_filename[MAX_LINE_LENGTH];;
- struct MajModel *model = Malloc(struct MajModel, 1);
- struct MajData *data = Malloc(struct MajData, 1);
-
if (argc < MINARGS || msvmmaj_check_argv(argc, argv, "-help")
|| msvmmaj_check_argv_eq(argc, argv, "-h") )
exit_with_help();
- parse_command_line(argc, argv, model, input_filename, output_filename,
+ parse_command_line(argc, argv, input_filename, output_filename,
model_filename);
- // TODO: make sure that read_data allows for files without labels
+ // read the data and model
+ struct MajModel *model = msvmmaj_init_model();
+ struct MajData *data = msvmmaj_init_data();
msvmmaj_read_data(data, input_filename);
msvmmaj_read_model(model, model_filename);
@@ -50,8 +92,14 @@ int main(int argc, char **argv)
"does not equal the number of attributes in "
"model (%li)\n", data->m, model->m);
exit(1);
+ } else if (data->K != model->K) {
+ fprintf(stderr, "Error: number of classes in data (%li) "
+ "does not equal the number of classes in "
+ "model (%li)\n", data->K, model->K);
+ exit(1);
}
+ // predict labels and performance if test data has labels
predy = Calloc(long, data->n);
msvmmaj_predict_labels(data, model, predy);
if (data->y != NULL) {
@@ -59,11 +107,13 @@ int main(int argc, char **argv)
note("Predictive performance: %3.2f%%\n", performance);
}
+ // if output file is specified, write predictions to it
if (msvmmaj_check_argv_eq(argc, argv, "-o")) {
msvmmaj_write_predictions(data, predy, output_filename);
note("Predictions written to: %s\n", output_filename);
}
+ // free the model, data, and predictions
msvmmaj_free_model(model);
msvmmaj_free_data(data);
free(predy);
@@ -71,8 +121,26 @@ int main(int argc, char **argv)
return 0;
}
-void parse_command_line(int argc, char **argv, struct MajModel *model,
- char *input_filename, char *output_filename, char *model_filename)
+/**
+ * @brief Parse command line arguments
+ *
+ * @details
+ * Read the data filename and model filename from the command line arguments.
+ * If specified, also read the output filename. If the quiet flag is given,
+ * set the global output stream to NULL. On error, exit_with_help().
+ *
+ * @param[in] argc number of command line arguments
+ * @param[in] argv array of command line arguments
+ * @param[in] input_filename pre-allocated array for the input
+ * filename
+ * @param[in] output_filename pre-allocated array for the output
+ * filename
+ * @param[in] model_filename pre-allocated array for the model
+ * filename
+ *
+ */
+void parse_command_line(int argc, char **argv, char *input_filename,
+ char *output_filename, char *model_filename)
{
int i;
@@ -91,7 +159,8 @@ void parse_command_line(int argc, char **argv, struct MajModel *model,
i--;
break;
default:
- fprintf(stderr, "Unknown option: -%c\n", argv[i-1][1]);
+ fprintf(stderr, "Unknown option: -%c\n",
+ argv[i-1][1]);
exit_with_help();
}
}
diff --git a/src/strutil.c b/src/strutil.c
index ae96239..ca4181f 100644
--- a/src/strutil.c
+++ b/src/strutil.c
@@ -1,5 +1,24 @@
+/**
+ * @file strutil.c
+ * @author Gertjan van den Burg
+ * @date January, 2014
+ * @brief Utility functions for dealing with strings
+ *
+ * @details
+ * This file contains functions for reading files, reading strings from a
+ * format and checking start and ends of strings.
+ */
+
#include "strutil.h"
+/**
+ * @brief Check if a string starts with a prefix
+ *
+ * @param[in] str string
+ * @param[in] pre prefix
+ * @returns boolean, true if string starts with prefix, false
+ * otherwise
+ */
bool str_startswith(const char *str, const char *pre)
{
size_t lenpre = strlen(pre),
@@ -7,19 +26,41 @@ bool str_startswith(const char *str, const char *pre)
return lenstr < lenpre ? false : strncmp(pre, str, lenpre) == 0;
}
+/**
+ * @brief Check if a string ends with a suffix
+ *
+ * @param[in] str string
+ * @param[in] suf suffix
+ * @returns boolean, true if string ends with suffix, false
+ * otherwise
+ */
bool str_endswith(const char *str, const char *suf)
{
size_t lensuf = strlen(suf),
lenstr = strlen(str);
- return lenstr < lensuf ? false : strncmp(str + lenstr - lensuf, suf, lensuf) == 0;
+ return lenstr < lensuf ? false : strncmp(str + lenstr - lensuf, suf,
+ lensuf) == 0;
}
+/**
+ * @brief Move to next line in file
+ *
+ * @param[in] fid File opened for reading
+ * @param[in] filename name of the file pointed to by fid
+ */
void next_line(FILE *fid, char *filename)
{
char buffer[MAX_LINE_LENGTH];
get_line(fid, filename, buffer);
}
+/**
+ * @brief Read line to buffer
+ *
+ * @param[in] fid File opened for reading
+ * @param[in] filename name of the file
+ * @param[in,out] buffer allocated buffer to read to
+ */
void get_line(FILE *fid, char *filename, char *buffer)
{
if (fgets(buffer, MAX_LINE_LENGTH, fid) == NULL) {
@@ -28,6 +69,14 @@ void get_line(FILE *fid, char *filename, char *buffer)
}
}
+/**
+ * @brief Read a double from file following a format
+ *
+ * @param[in] fid File opened for reading
+ * @param[in] filename Name of the file
+ * @param[in] fmt Format containing a float format
+ * @returns value read (if any)
+ */
double get_fmt_double(FILE *fid, char *filename, const char *fmt)
{
char buffer[MAX_LINE_LENGTH];
@@ -38,6 +87,14 @@ double get_fmt_double(FILE *fid, char *filename, const char *fmt)
return value;
}
+/**
+ * @brief Read a long integer from file following a format
+ *
+ * @param[in] fid File opened for reading
+ * @param[in] filename Name of the file
+ * @param[in] fmt Format containing a long integer format
+ * @returns value read (if any)
+ */
long get_fmt_long(FILE *fid, char *filename, const char *fmt)
{
char buffer[MAX_LINE_LENGTH];
@@ -48,6 +105,20 @@ long get_fmt_long(FILE *fid, char *filename, const char *fmt)
return value;
}
+/**
+ * @brief Read all doubles in a given buffer
+ *
+ * @details
+ * This function is used to read a line of doubles from a buffer. All the
+ * doubles found are stored in a pre-allocated array.
+ *
+ * @param[in] buffer a string buffer
+ * @param[in] offset an offset of the string to start looking for
+ * doubles
+ * @param[in] all_doubles pre-allocated array of doubles (should be large
+ * enough)
+ * @returns number of doubles read
+ */
long all_doubles_str(char *buffer, long offset, double *all_doubles)
{
double value;
@@ -69,6 +140,20 @@ long all_doubles_str(char *buffer, long offset, double *all_doubles)
return i;
}
+/**
+ * @brief Read all longs in a given buffer
+ *
+ * @details
+ * This function is used to read a line of longs from a buffer. All the
+ * longs found are stored in a pre-allocated array.
+ *
+ * @param[in] buffer a string buffer
+ * @param[in] offset an offset of the string to start looking for
+ * longs
+ * @param[in] all_longs pre-allocated array of longs (should be large
+ * enough)
+ * @returns number of longs read
+ */
long all_longs_str(char *buffer, long offset, long *all_longs)
{
long value;
diff --git a/src/timer.c b/src/timer.c
index 2187fb2..3a763a0 100644
--- a/src/timer.c
+++ b/src/timer.c
@@ -1,7 +1,25 @@
+/**
+ * @file timer.c
+ * @author Gertjan van den Burg
+ * @date January, 2014
+ * @brief Function for calculating time difference
+ *
+ * @details
+ * This file contains a simple function for calculating the time in seconds
+ * elapsed between two clock() calls.
+ */
+
#include <time.h>
#include "timer.h"
+/**
+ * @brief Calculate the time between two clocks
+ *
+ * @param[in] s_time starting time
+ * @param[in] e_time end time
+ * @returns time elapsed in seconds
+ */
double elapsed_time(clock_t s_time, clock_t e_time)
{
return ((double) (e_time - s_time))/((double) CLOCKS_PER_SEC);
diff --git a/src/trainMSVMMaj.c b/src/trainMSVMMaj.c
index b4b74df..e045a6c 100644
--- a/src/trainMSVMMaj.c
+++ b/src/trainMSVMMaj.c
@@ -1,54 +1,93 @@
+/**
+ * @file trainMSVMMaj.c
+ * @author Gertjan van den Burg
+ * @date August, 2013
+ * @brief Command line interface for training a single model with MSVMMaj
+ *
+ * @details
+ * This is a command line program for training a single model on a given
+ * dataset. To run a grid search over a number of parameter configurations,
+ * see trainMSVMMajdataset.c.
+ *
+ */
+
#include <time.h>
#include <math.h>
+#include "msvmmaj_kernel.h"
#include "libMSVMMaj.h"
+#include "msvmmaj.h"
+#include "msvmmaj_init.h"
#include "msvmmaj_train.h"
#include "util.h"
-#include "MSVMMaj.h"
#define MINARGS 2
extern FILE *MSVMMAJ_OUTPUT_FILE;
+// function declarations
void print_null(const char *s) {}
void exit_with_help();
void parse_command_line(int argc, char **argv, struct MajModel *model,
char *input_filename, char *output_filename, char *model_filename);
+/**
+ * @brief Help function
+ */
void exit_with_help()
{
printf("This is MSVMMaj, version %1.1f\n\n", VERSION);
printf("Usage: trainMSVMMaj [options] training_data_file\n");
printf("Options:\n");
+ printf("-c coef : coefficient for the polynomial and sigmoid kernel\n");
+ printf("-d degree : degree for the polynomial kernel\n");
printf("-e epsilon : set the value of the stopping criterion\n");
+ printf("-g gamma : parameter for the rbf, polynomial or sigmoid "
+ "kernel\n");
printf("-h | -help : print this help.\n");
printf("-k kappa : set the value of kappa used in the Huber hinge\n");
printf("-l lambda : set the value of lambda (lambda > 0)\n");
printf("-m model_file : use previous model as seed for W and t\n");
printf("-o output_file : write output to file\n");
- printf("-p p-value : set the value of p in the lp norm (1.0 <= p <= 2.0)\n");
+ printf("-p p-value : set the value of p in the lp norm "
+ "(1.0 <= p <= 2.0)\n");
printf("-q : quiet mode (no output)\n");
- printf("-r rho : choose the weigth specification (1 = unit, 2 = group)\n");
+ printf("-r rho : choose the weigth specification (1 = unit, 2 = "
+ "group)\n");
+ printf("-t type: kerneltype (LINEAR=0, POLY=1, RBF=2, SIGMOID=3)\n");
+ printf("-u use_cholesky: use cholesky decomposition when using "
+ "kernels (0 = false, 1 = true). Default 0.\n");
exit(0);
}
-/*
- Main
-*/
+/**
+ * @brief Main interface function for trainMSVMMaj
+ *
+ * @details
+ * Main interface for the command line program. A given dataset file is read
+ * and a MSVMMaj model is trained on this data. By default the progress of the
+ * computations are written to stdout. See for full options of the program the
+ * help function.
+ *
+ * @param[in] argc number of command line arguments
+ * @param[in] argv array of command line arguments
+ *
+ */
int main(int argc, char **argv)
{
char input_filename[MAX_LINE_LENGTH];
char model_filename[MAX_LINE_LENGTH];
char output_filename[MAX_LINE_LENGTH];
- struct MajModel *model = Malloc(struct MajModel, 1);
- struct MajData *data = Malloc(struct MajData, 1);
+ struct MajModel *model = msvmmaj_init_model();
+ struct MajData *data = msvmmaj_init_data();
if (argc < MINARGS || msvmmaj_check_argv(argc, argv, "-help")
|| msvmmaj_check_argv_eq(argc, argv, "-h") )
exit_with_help();
- parse_command_line(argc, argv, model, input_filename, output_filename, model_filename);
+ parse_command_line(argc, argv, model, input_filename,
+ output_filename, model_filename);
// read data file
msvmmaj_read_data(data, input_filename);
@@ -59,22 +98,25 @@ int main(int argc, char **argv)
model->K = data->K;
model->data_file = input_filename;
+ // initialize kernel (if necessary)
+ msvmmaj_make_kernel(model, data);
+
// allocate model and initialize weights
msvmmaj_allocate_model(model);
msvmmaj_initialize_weights(data, model);
+ // seed the random number generator (only place in programs is in
+ // command line interfaces)
srand(time(NULL));
if (msvmmaj_check_argv_eq(argc, argv, "-m")) {
- struct MajModel *seed_model = Malloc(struct MajModel, 1);
+ struct MajModel *seed_model = msvmmaj_init_model();
msvmmaj_read_model(seed_model, model_filename);
msvmmaj_seed_model_V(seed_model, model);
msvmmaj_free_model(seed_model);
} else {
msvmmaj_seed_model_V(NULL, model);
}
- // initialize kernel (if necessary)
- // msvmmaj_make_kernel(model, data);
// start training
msvmmaj_optimize(model, data);
@@ -92,18 +134,34 @@ int main(int argc, char **argv)
return 0;
}
+/**
+ * @brief Parse command line arguments
+ *
+ * @details
+ * Process the command line arguments for the model parameters, and record
+ * them in the specified MajModel. An input filename for the dataset is read
+ * and if specified an output filename and a model filename for the seed
+ * model.
+ *
+ * @param[in] argc number of command line arguments
+ * @param[in] argv array of command line arguments
+ * @param[in] model initialized model
+ * @param[in] input_filename pre-allocated buffer for the input
+ * filename
+ * @param[in] output_filename pre-allocated buffer for the output
+ * filename
+ * @param[in] model_filename pre-allocated buffer for the model
+ * filename
+ *
+ */
void parse_command_line(int argc, char **argv, struct MajModel *model,
char *input_filename, char *output_filename, char *model_filename)
{
- int i;
-
- // default values
- model->p = 1.0;
- model->lambda = pow(2, -8.0);
- model->epsilon = 1e-6;
- model->kappa = 0.0;
- model->weight_idx = 1;
-
+ int i, tmp;
+ double gamma = 1.0,
+ degree = 2.0,
+ coef = 0.0;
+
MSVMMAJ_OUTPUT_FILE = stdout;
// parse options
@@ -113,9 +171,18 @@ void parse_command_line(int argc, char **argv, struct MajModel *model,
exit_with_help();
}
switch (argv[i-1][1]) {
+ case 'c':
+ coef = atof(argv[i]);
+ break;
+ case 'd':
+ degree = atof(argv[i]);
+ break;
case 'e':
model->epsilon = atof(argv[i]);
break;
+ case 'g':
+ gamma = atof(argv[i]);
+ break;
case 'k':
model->kappa = atof(argv[i]);
break;
@@ -134,20 +201,50 @@ void parse_command_line(int argc, char **argv, struct MajModel *model,
case 'r':
model->weight_idx = atoi(argv[i]);
break;
+ case 't':
+ model->kerneltype = atoi(argv[i]);
+ break;
+ case 'u':
+ tmp = atoi(argv[i]);
+ if (!(tmp == 1 || tmp == 0))
+ fprintf(stderr, "Unknown value %i for"
+ " use_cholesky", tmp);
+ model->use_cholesky = (tmp == 1) ? true : false;
+ break;
case 'q':
MSVMMAJ_OUTPUT_FILE = NULL;
i--;
break;
default:
- fprintf(stderr, "Unknown option: -%c\n", argv[i-1][1]);
+ fprintf(stderr, "Unknown option: -%c\n",
+ argv[i-1][1]);
exit_with_help();
}
}
-
+
// read input filename
if (i >= argc)
exit_with_help();
strcpy(input_filename, argv[i]);
-}
+ // set kernel parameters
+ switch (model->kerneltype) {
+ case K_LINEAR:
+ break;
+ case K_POLY:
+ model->kernelparam = Calloc(double, 3);
+ model->kernelparam[0] = gamma;
+ model->kernelparam[1] = coef;
+ model->kernelparam[2] = degree;
+ break;
+ case K_RBF:
+ model->kernelparam = Calloc(double, 1);
+ model->kernelparam[0] = gamma;
+ break;
+ case K_SIGMOID:
+ model->kernelparam = Calloc(double, 1);
+ model->kernelparam[0] = gamma;
+ model->kernelparam[1] = coef;
+ }
+}
diff --git a/src/trainMSVMMajdataset.c b/src/trainMSVMMajdataset.c
index 7c3385c..097df85 100644
--- a/src/trainMSVMMajdataset.c
+++ b/src/trainMSVMMajdataset.c
@@ -1,7 +1,28 @@
+/**
+ * @file trainMSVMMajdataset.c
+ * @author Gertjan van den Burg
+ * @date January, 2014
+ * @brief Command line interface for the grid search program
+ *
+ * @details
+ * This is a command line interface to the parameter grid search functionality
+ * of the algorithm. The grid search is specified in a separate file, thereby
+ * reducing the number of command line arguments. See
+ * read_training_from_file() for documentation on the training file.
+ *
+ * The program runs a grid search as specified in the training file. If
+ * desired the grid search can incorporate consistency checks to find the
+ * configuration among the best configurations which scores consistently high.
+ * All output is written to stdout, unless the quiet mode is specified.
+ *
+ * For further usage information, see the program help function.
+ *
+ */
+
#include <time.h>
#include "crossval.h"
-#include "MSVMMaj.h"
+#include "msvmmaj.h"
#include "msvmmaj_pred.h"
#include "msvmmaj_train.h"
#include "msvmmaj_train_dataset.h"
@@ -12,11 +33,15 @@
extern FILE *MSVMMAJ_OUTPUT_FILE;
+// function declarations
void print_null(const char *s) {}
void exit_with_help();
void parse_command_line(int argc, char **argv, char *input_filename);
void read_training_from_file(char *input_filename, struct Training *training);
+/**
+ * @brief Help function
+ */
void exit_with_help()
{
printf("This is MSVMMaj, version %1.1f\n\n", VERSION);
@@ -28,6 +53,22 @@ void exit_with_help()
exit(0);
}
+/**
+ * @brief Main interface function for trainMSVMMajdataset
+ *
+ * @details
+ * Main interface for the command line program. A given training file which
+ * specifies a grid search over a single dataset is read. From this, a Queue
+ * is created containing all Task instances that need to be performed in the
+ * search. Depending on the type of dataset, either cross validation or
+ * train/test split training is performed for all tasks. If specified,
+ * consistency repeats are done at the end of the grid search. Note that
+ * currently no output is produced other than what is written to stdout.
+ *
+ * @param[in] argc number of command line arguments
+ * @param[in] argv array of command line arguments
+ *
+ */
int main(int argc, char **argv)
{
char input_filename[MAX_LINE_LENGTH];
@@ -78,6 +119,21 @@ int main(int argc, char **argv)
return 0;
}
+/**
+ * @brief Parse command line arguments
+ *
+ * @details
+ * Few arguments can be supplied to the command line. Only quiet mode can be
+ * specified, or help can be requested. The filename of the training file is
+ * read from the arguments. Parsing of the training file is done separately in
+ * read_training_from_file().
+ *
+ * @param[in] argc number of command line arguments
+ * @param[in] argv array of command line arguments
+ * @param[in] input_filename pre-allocated buffer for the training
+ * filename.
+ *
+ */
void parse_command_line(int argc, char **argv, char *input_filename)
{
int i;
@@ -94,7 +150,8 @@ void parse_command_line(int argc, char **argv, char *input_filename)
i--;
break;
default:
- fprintf(stderr, "Unknown option: -%c\n", argv[i-1][1]);
+ fprintf(stderr, "Unknown option: -%c\n",
+ argv[i-1][1]);
exit_with_help();
}
}
@@ -105,6 +162,21 @@ void parse_command_line(int argc, char **argv, char *input_filename)
strcpy(input_filename, argv[i]);
}
+/**
+ * @brief Read the Training struct from file
+ *
+ * @details
+ * Read the Training struct from a file. The training file follows a specific
+ * format specified in @ref spec_training_file.
+ *
+ * Commonly used string functions in this function are all_doubles_str() and
+ * all_longs_str().
+ *
+ * @param[in] input_filename filename of the training file
+ * @param[in] training Training structure to place the parsed
+ * parameter grid.
+ *
+ */
void read_training_from_file(char *input_filename, struct Training *training)
{
long i, nr = 0;
@@ -117,7 +189,8 @@ void read_training_from_file(char *input_filename, struct Training *training)
fid = fopen(input_filename, "r");
if (fid == NULL) {
- fprintf(stderr, "Error opening training file %s\n", input_filename);
+ fprintf(stderr, "Error opening training file %s\n",
+ input_filename);
exit(1);
}
training->traintype = CV;
@@ -126,11 +199,13 @@ void read_training_from_file(char *input_filename, struct Training *training)
Memset(lparams, long, MAX_LINE_LENGTH);
if (str_startswith(buffer, "train:")) {
sscanf(buffer, "train: %s\n", train_filename);
- training->train_data_file = Calloc(char, MAX_LINE_LENGTH);
+ training->train_data_file = Calloc(char,
+ MAX_LINE_LENGTH);
strcpy(training->train_data_file, train_filename);
} else if (str_startswith(buffer, "test:")) {
sscanf(buffer, "test: %s\n", test_filename);
- training->test_data_file = Calloc(char, MAX_LINE_LENGTH);
+ training->test_data_file = Calloc(char,
+ MAX_LINE_LENGTH);
strcpy(training->test_data_file, test_filename);
training->traintype = TT;
} else if (str_startswith(buffer, "p:")) {
@@ -167,16 +242,76 @@ void read_training_from_file(char *input_filename, struct Training *training)
nr = all_longs_str(buffer, 6, lparams);
training->folds = lparams[0];
if (nr > 1)
- fprintf(stderr, "Field \"folds\" only takes one value. "
- "Additional fields are ignored.\n");
+ fprintf(stderr, "Field \"folds\" only takes "
+ "one value. Additional "
+ "fields are ignored.\n");
} else if (str_startswith(buffer, "repeats:")) {
nr = all_longs_str(buffer, 8, lparams);
training->repeats = lparams[0];
if (nr > 1)
- fprintf(stderr, "Field \"repeats\" only takes one value. "
- "Additional fields are ignored.\n");
+ fprintf(stderr, "Field \"repeats\" only "
+ "takes one value. Additional "
+ "fields are ignored.\n");
+ } else if (str_startswith(buffer, "kernel:")) {
+ nr = all_longs_str(buffer, 7, lparams);
+ if (nr > 1)
+ fprintf(stderr, "Field \"kernel\" only takes "
+ "one value. Additional "
+ "fields are ignored.\n");
+ switch (lparams[0]) {
+ case 0:
+ training->kerneltype = K_LINEAR;
+ break;
+ case 1:
+ training->kerneltype = K_POLY;
+ break;
+ case 2:
+ training->kerneltype = K_RBF;
+ break;
+ case 3:
+ training->kerneltype = K_SIGMOID;
+ break;
+ }
+ } else if (str_startswith(buffer, "gamma:")) {
+ nr = all_doubles_str(buffer, 6, params);
+ if (training->kerneltype == K_LINEAR) {
+ fprintf(stderr, "Field \"gamma\" ignored, "
+ "linear kernel is used.\n");
+ training->Ng = 0;
+ break;
+ }
+ training->gammas = Calloc(double, nr);
+ for (i=0; i<nr; i++)
+ training->gammas[i] = params[i];
+ training->Ng = nr;
+ } else if (str_startswith(buffer, "coef:")) {
+ nr = all_doubles_str(buffer, 5, params);
+ if (training->kerneltype == K_LINEAR ||
+ training->kerneltype == K_RBF) {
+ fprintf(stderr, "Field \"coef\" ignored with"
+ "specified kernel.\n");
+ training->Nc = 0;
+ break;
+ }
+ training->coefs = Calloc(double, nr);
+ for (i=0; i<nr; i++)
+ training->coefs[i] = params[i];
+ training->Nc = nr;
+ } else if (str_startswith(buffer, "degree:")) {
+ nr = all_doubles_str(buffer, 7, params);
+ if (training->kerneltype != K_POLY) {
+ fprintf(stderr, "Field \"degree\" ignored "
+ "with specified kernel.\n");
+ training->Nd = 0;
+ break;
+ }
+ training->degrees = Calloc(double, nr);
+ for (i=0; i<nr; i++)
+ training->degrees[i] = params[i];
+ training->Nd = nr;
} else {
- fprintf(stderr, "Cannot find any parameters on line: %s\n", buffer);
+ fprintf(stderr, "Cannot find any parameters on line: "
+ "%s\n", buffer);
}
}
diff --git a/src/util.c b/src/util.c
index 101cb00..8e4b806 100644
--- a/src/util.c
+++ b/src/util.c
@@ -1,19 +1,55 @@
+/**
+ * @file util.c
+ * @author Gertjan van den Burg
+ * @date January, 2014
+ * @brief Utility functions
+ *
+ * @details
+ * This file contains several utility functions for coordinating input and
+ * output of data and model files. It also contains string functions.
+ *
+ * @todo
+ * Pull this apart.
+ *
+ */
#include <math.h>
#include <stdarg.h>
#include <time.h>
-#include "matrix.h"
-#include "MSVMMaj.h"
+#include "msvmmaj.h"
+#include "msvmmaj_matrix.h"
#include "strutil.h"
#include "util.h"
-FILE *MSVMMAJ_OUTPUT_FILE;
-
-/*
- Read the data from the data_file. The data matrix X is augmented
- with a column of ones, to get the matrix Z.
-*/
+FILE *MSVMMAJ_OUTPUT_FILE; ///< The #MSVMMAJ_OUTPUT_FILE specifies the
+ ///< output stream to which all output is
+ ///< written. This is done through the
+ ///< internal (!)
+ ///< function msvmmaj_print_string(). The
+ ///< advantage of using a global output
+ ///< stream variable is that the output can
+ ///< temporarily be suppressed by importing
+ ///< this variable through @c extern and
+ ///< (temporarily) setting it to NULL.
+
+/**
+ * @brief Read data from file
+ *
+ * @details
+ * Read the data from the data_file. The data matrix X is augmented
+ * with a column of ones, to get the matrix Z. The data is expected
+ * to follow a specific format, which is specified in the @ref spec_data_file.
+ * The class labels are corrected internally to correspond to the interval
+ * [1 .. K], where K is the total number of classes.
+ *
+ * @todo
+ * Make sure that this function allows datasets without class labels for
+ * testing.
+ *
+ * @param[in,out] dataset initialized MajData struct
+ * @param[in] data_file filename of the data file.
+ */
void msvmmaj_read_data(struct MajData *dataset, char *data_file)
{
FILE *fid;
@@ -22,7 +58,7 @@ void msvmmaj_read_data(struct MajData *dataset, char *data_file)
long nr = 0; // used to check consistency of data
double value;
long K = 0;
- long min_y = 1000;
+ long min_y = 1000000;
char buf[MAX_LINE_LENGTH];
@@ -79,13 +115,15 @@ void msvmmaj_read_data(struct MajData *dataset, char *data_file)
dataset->y[i]++;
K++;
} else if (min_y < 0 ) {
- fprintf(stderr, "ERROR: wrong class labels in %s, minimum value is: %ld\n",
+ fprintf(stderr, "ERROR: wrong class labels in %s, minimum "
+ "value is: %ld\n",
data_file, min_y);
exit(0);
}
if (nr < n * m) {
- fprintf(stderr, "ERROR: not enough data found in %s\n", data_file);
+ fprintf(stderr, "ERROR: not enough data found in %s\n",
+ data_file);
exit(0);
}
@@ -98,6 +136,19 @@ void msvmmaj_read_data(struct MajData *dataset, char *data_file)
dataset->K = K;
}
+/**
+ * @brief Read model from file
+ *
+ * @details
+ * Read a MajModel from a model file. The MajModel struct must have been
+ * initalized elswhere. The model file is expected to follow the @ref
+ * spec_model_file. The easiest way to generate a model file is through
+ * msvmmaj_write_model(), which can for instance be used in trainMSVMMaj.c.
+ *
+ * @param[in,out] model initialized MajModel
+ * @param[in] model_filename filename of the model file
+ *
+ */
void msvmmaj_read_model(struct MajModel *model, char *model_filename)
{
long i, j, nr = 0;
@@ -108,7 +159,8 @@ void msvmmaj_read_model(struct MajModel *model, char *model_filename)
fid = fopen(model_filename, "r");
if (fid == NULL) {
- fprintf(stderr, "Error opening model file %s\n", model_filename);
+ fprintf(stderr, "Error opening model file %s\n",
+ model_filename);
exit(1);
}
// skip the first four lines
@@ -120,7 +172,8 @@ void msvmmaj_read_model(struct MajModel *model, char *model_filename)
model->lambda = get_fmt_double(fid, model_filename, "lambda = %lf");
model->kappa = get_fmt_double(fid, model_filename, "kappa = %lf");
model->epsilon = get_fmt_double(fid, model_filename, "epsilon = %lf");
- model->weight_idx = (int) get_fmt_long(fid, model_filename, "weight_idx = %li");
+ model->weight_idx = (int) get_fmt_long(fid, model_filename,
+ "weight_idx = %li");
// skip to data section
for (i=0; i<2; i++)
@@ -128,7 +181,8 @@ void msvmmaj_read_model(struct MajModel *model, char *model_filename)
// read filename of data file
if (fgets(buffer, MAX_LINE_LENGTH, fid) == NULL) {
- fprintf(stderr, "Error reading model file %s\n", model_filename);
+ fprintf(stderr, "Error reading model file %s\n",
+ model_filename);
exit(1);
}
sscanf(buffer, "filename = %s\n", data_filename);
@@ -153,12 +207,25 @@ void msvmmaj_read_model(struct MajModel *model, char *model_filename)
}
if (nr != (model->m+1)*(model->K-1)) {
fprintf(stderr, "Error reading model file %s. "
- "Not enough elements of V found.\n", model_filename);
+ "Not enough elements of V found.\n",
+ model_filename);
exit(1);
}
-
}
+/**
+ * @brief Write model to file
+ *
+ * @details
+ * Write a MajModel to a file. The current time is specified in the file in
+ * UTC + offset. The model file further corresponds to the @ref
+ * spec_model_file.
+ *
+ * @param[in] model MajModel which contains an estimate for
+ * MajModel::V
+ * @param[in] output_filename the output file to write the model to
+ *
+ */
void msvmmaj_write_model(struct MajModel *model, char *output_filename)
{
FILE *fid;
@@ -171,7 +238,8 @@ void msvmmaj_write_model(struct MajModel *model, char *output_filename)
// open output file
fid = fopen(output_filename, "w");
if (fid == NULL) {
- fprintf(stderr, "Error opening output file %s", output_filename);
+ fprintf(stderr, "Error opening output file %s",
+ output_filename);
exit(1);
}
@@ -201,7 +269,8 @@ void msvmmaj_write_model(struct MajModel *model, char *output_filename)
// Write output to file
fprintf(fid, "Output file for MSVMMaj (version %1.1f)\n", VERSION);
- fprintf(fid, "Generated on: %s (UTC %+03i:%02i)\n\n", timestr, hours, minutes);
+ fprintf(fid, "Generated on: %s (UTC %+03i:%02i)\n\n",
+ timestr, hours, minutes);
fprintf(fid, "Model:\n");
fprintf(fid, "p = %15.16f\n", model->p);
fprintf(fid, "lambda = %15.16f\n", model->lambda);
@@ -218,35 +287,71 @@ void msvmmaj_write_model(struct MajModel *model, char *output_filename)
fprintf(fid, "Output:\n");
for (i=0; i<model->m+1; i++) {
for (j=0; j<model->K-1; j++) {
- fprintf(fid, "%+15.16f ", matrix_get(model->V, model->K-1, i, j));
+ fprintf(fid, "%+15.16f ",
+ matrix_get(model->V,
+ model->K-1, i, j));
}
fprintf(fid, "\n");
}
fclose(fid);
-
}
-void msvmmaj_write_predictions(struct MajData *data, long *predy, char *output_filename)
+/**
+ * @brief Write predictions to file
+ *
+ * @details
+ * Write the given predictions to an output file, such that the resulting file
+ * corresponds to the @ref spec_data_file.
+ *
+ * @param[in] data MajData with the original instances
+ * @param[in] predy predictions of the class labels of the
+ * instances in the given MajData. Note that the
+ * order of the instances is assumed to be the
+ * same.
+ * @param[in] output_filename the file to which the predictions are written
+ *
+ */
+void msvmmaj_write_predictions(struct MajData *data, long *predy,
+ char *output_filename)
{
long i, j;
FILE *fid;
fid = fopen(output_filename, "w");
if (fid == NULL) {
- fprintf(stderr, "Error opening output file %s", output_filename);
+ fprintf(stderr, "Error opening output file %s",
+ output_filename);
exit(1);
}
for (i=0; i<data->n; i++) {
for (j=0; j<data->m; j++)
- fprintf(fid, "%f ", matrix_get(data->Z, data->m+1, i, j+1));
+ fprintf(fid, "%f ",
+ matrix_get(data->Z,
+ data->m+1, i, j+1));
fprintf(fid, "%li\n", predy[i]);
}
fclose(fid);
}
+/**
+ * @brief Check if any command line arguments contain string
+ *
+ * @details
+ * Check if any of a given array of command line arguments contains a given
+ * string. If the string is found, the index of the string in argv is
+ * returned. If the string is not found, 0 is returned.
+ *
+ * This function is copied from MSVMpack/libMSVM.c.
+ *
+ * @param[in] argc number of command line arguments
+ * @param[in] argv command line arguments
+ * @param[in] str string to find in the arguments
+ * @returns index of the string in the arguments if found, 0
+ * otherwise
+ */
int msvmmaj_check_argv(int argc, char **argv, char *str)
{
int i;
@@ -260,6 +365,22 @@ int msvmmaj_check_argv(int argc, char **argv, char *str)
return arg_str;
}
+/**
+ * @brief Check if a command line argument equals a string
+ *
+ * @details
+ * Check if any of the command line arguments is exactly equal to a given
+ * string. If so, return the index of the corresponding command line argument.
+ * If not, return 0.
+ *
+ * This function is copied from MSVMpack/libMSVM.c
+ *
+ * @param[in] argc number of command line arguments
+ * @param[in] argv command line arguments
+ * @param[in] str string to find in the arguments
+ * @returns index of the command line argument that corresponds to
+ * the string, 0 if none matches.
+ */
int msvmmaj_check_argv_eq(int argc, char **argv, char *str)
{
int i;
@@ -274,6 +395,19 @@ int msvmmaj_check_argv_eq(int argc, char **argv, char *str)
}
+/**
+ * @brief Print a given string to the specified output stream
+ *
+ * @details
+ * This function is used to print a given string to the output stream
+ * specified by #MSVMMAJ_OUTPUT_FILE. The stream is flushed after the string
+ * is written to the stream. If #MSVMMAJ_OUTPUT_FILE is NULL, nothing is
+ * written. Note that this function is only used by note(), it should never be
+ * used directly.
+ *
+ * @param[in] s string to write to the stream
+ *
+ */
static void msvmmaj_print_string(const char *s)
{
if (MSVMMAJ_OUTPUT_FILE != NULL) {
@@ -282,6 +416,19 @@ static void msvmmaj_print_string(const char *s)
}
}
+/**
+ * @brief Parse a formatted string and write to the output stream
+ *
+ * @details
+ * This function is a replacement of fprintf(), such that the output stream
+ * does not have to be specified at each function call. The functionality is
+ * exactly the same however. Writing the formatted string to the output stream
+ * is handled by msvmmaj_print_string().
+ *
+ * @param[in] fmt String format
+ * @param[in] ... variable argument list for the string format
+ *
+ */
void note(const char *fmt,...)
{
char buf[BUFSIZ];
@@ -292,6 +439,16 @@ void note(const char *fmt,...)
(*msvmmaj_print_string)(buf);
}
+/**
+ * @brief Allocate memory for a MajModel
+ *
+ * @details
+ * This function can be used to allocate the memory needed for a MajModel. All
+ * arrays in the model are specified and initialized to 0.
+ *
+ * @param[in] model MajModel to allocate
+ *
+ */
void msvmmaj_allocate_model(struct MajModel *model)
{
long n = model->n;
@@ -360,6 +517,16 @@ void msvmmaj_allocate_model(struct MajModel *model)
}
+/**
+ * @brief Free allocated MajModel struct
+ *
+ * @details
+ * Simply free a previously allocated MajModel by freeing all its component
+ * arrays. Note that the model struct itself is also freed here.
+ *
+ * @param[in] model MajModel to free
+ *
+ */
void msvmmaj_free_model(struct MajModel *model)
{
free(model->W);
@@ -376,10 +543,19 @@ void msvmmaj_free_model(struct MajModel *model)
free(model);
}
+/**
+ * @brief Free allocated MajData struct
+ *
+ * @details
+ * Simply free a previously allocated MajData struct by freeing all its
+ * components. Note that the data struct itself is also freed here.
+ *
+ * @param[in] data MajData struct to free
+ *
+ */
void msvmmaj_free_data(struct MajData *data)
{
free(data->Z);
free(data->y);
free(data);
}
-
diff --git a/training/glass.training b/training/glass.training
index a5053f8..a531ebc 100644
--- a/training/glass.training
+++ b/training/glass.training
@@ -1,7 +1,8 @@
-data: ./data/glass.train
+train: ./data/glass.train
p: 1.0 1.5 2.0
-kappa: -0.9 0.0 0.5 1.0 5.0
+kappa: -0.9 0.0 1.0
lambda: 64 32 16 8 4 2 1 0.5 0.25 0.125 0.0625 0.03125 0.015625 0.0078125 0.00390625 0.001953125 0.0009765625 0.00048828125 0.000244140625
epsilon: 1e-6
weight: 1 2
folds: 10
+repeats: 10
diff --git a/training/iris.training b/training/iris.training
new file mode 100644
index 0000000..f89a72e
--- /dev/null
+++ b/training/iris.training
@@ -0,0 +1,8 @@
+train: ./data/iris.train
+p: 1.0 1.5 2.0
+kappa: -0.9 0.0 1.0
+lambda: 64 32 16 8 4 2 1 0.5 0.25 0.125 0.0625 0.03125 0.015625 0.0078125 0.00390625 0.001953125 0.0009765625 0.00048828125 0.000244140625
+epsilon: 1e-6
+weight: 1 2
+folds: 10
+repeats: 10
diff --git a/training/nursery.training b/training/nursery.training
index e0d016b..3fe8dde 100644
--- a/training/nursery.training
+++ b/training/nursery.training
@@ -5,3 +5,7 @@ lambda: 64 32 16 8 4 2 1 0.5 0.25 0.125 0.0625 0.03125 0.015625 0.0078125 0.0039
epsilon: 1e-6
weight: 1
folds: 10
+kernel: 0
+gamma: 0.1 0.2 0.3
+coef: 1.0 2.0
+degree: 1.0 2.0 3.0
diff --git a/training/vehicle.training b/training/vehicle.training
index b0a2fb6..f8760b7 100644
--- a/training/vehicle.training
+++ b/training/vehicle.training
@@ -1,7 +1,8 @@
-data: ./data/vehicle.train
-p: 1.0 1.25 1.5 1.75 2.0
-kappa: -0.9 0.0 0.5 1.0 5.0
+train: ./data/vehicle.train
+p: 1.0 1.5 2.0
+kappa: -0.9 0.0 1.00
lambda: 64 32 16 8 4 2 1 0.5 0.25 0.125 0.0625 0.03125 0.015625 0.0078125 0.00390625 0.001953125 0.0009765625 0.00048828125 0.000244140625
epsilon: 1e-6
weight: 1 2
folds: 10
+repeats: 10