diff options
46 files changed, 4417 insertions, 544 deletions
@@ -11,8 +11,8 @@ all: lib/libmsvmmaj.a $(EXECS) override LDFLAGS+=-lblas -llapack -lm -lib/libmsvmmaj.a: src/libMSVMMaj.o src/util.o src/matrix.o src/mylapack.o src/strutil.o src/crossval.o src/msvmmaj_train.o src/msvmmaj_train_dataset.o src/msvmmaj_pred.o src/timer.o - @ar rcs lib/libmsvmmaj.a src/libMSVMMaj.o src/util.o src/matrix.o src/mylapack.o src/strutil.o src/crossval.o src/msvmmaj_train.o src/msvmmaj_train_dataset.o src/msvmmaj_pred.o src/timer.o +lib/libmsvmmaj.a: src/libMSVMMaj.o src/util.o src/msvmmaj_matrix.o src/msvmmaj_lapack.o src/strutil.o src/crossval.o src/msvmmaj_init.o src/msvmmaj_train.o src/msvmmaj_train_dataset.o src/msvmmaj_pred.o src/timer.o src/msvmmaj_kernel.o + @ar rcs lib/libmsvmmaj.a src/libMSVMMaj.o src/util.o src/msvmmaj_matrix.o src/msvmmaj_lapack.o src/strutil.o src/crossval.o src/msvmmaj_init.o src/msvmmaj_train.o src/msvmmaj_train_dataset.o src/msvmmaj_pred.o src/timer.o src/msvmmaj_kernel.o @echo libmsvmmaj.a... trainMSVMMaj: src/trainMSVMMaj.c lib/libmsvmmaj.a @@ -27,21 +27,29 @@ predMSVMMaj: src/predMSVMMaj.c lib/libmsvmmaj.a @$(CC) -o predMVSMMaj src/predMSVMMaj.c $(CFLAGS) $(INCLUDE) $(LIB) -lmsvmmaj $(LDFLAGS) @echo predMSVMMaj... +src/crossval.o: + @$(CC) -c -o src/crossval.o src/crossval.c $(CFLAGS) $(INCLUDE) + @echo crossval.o... + +src/msvmmaj_kernel.o: + @$(CC) -c -o src/msvmmaj_kernel.o src/msvmmaj_kernel.c $(CFLAGS) $(INCLUDE) + @echo msvmmaj_kernel.o... + src/libMSVMMaj.o: @$(CC) -c -o src/libMSVMMaj.o src/libMSVMMaj.c $(CFLAGS) $(INCLUDE) @echo libMSVMMaj.o... -src/util.o: - @$(CC) -c -o src/util.o src/util.c $(CFLAGS) $(INCLUDE) - @echo util.o... +src/msvmmaj_matrix.o: + @$(CC) -c -o src/msvmmaj_matrix.o src/msvmmaj_matrix.c $(CFLAGS) $(INCLUDE) + @echo msvmmaj_matrix.o... -src/matrix.o: - @$(CC) -c -o src/matrix.o src/matrix.c $(CFLAGS) $(INCLUDE) - @echo matrix.o... +src/msvmmaj_init.o: + @$(CC) -c -o src/msvmmaj_init.o src/msvmmaj_init.c $(CFLAGS) $(INCLUDE) + @echo msvmmaj_init.o... -src/crossval.o: - @$(CC) -c -o src/crossval.o src/crossval.c $(CFLAGS) $(INCLUDE) - @echo crossval.o... +src/msvmmaj_pred.o: + @$(CC) -c -o src/msvmmaj_pred.o src/msvmmaj_pred.c $(CFLAGS) $(INCLUDE) + @echo msvmmaj_pred.o... src/msvmmaj_train.o: @$(CC) -c -o src/msvmmaj_train.o src/msvmmaj_train.c $(CFLAGS) $(INCLUDE) @@ -51,12 +59,8 @@ src/msvmmaj_train_dataset.o: @$(CC) -c -o src/msvmmaj_train_dataset.o src/msvmmaj_train_dataset.c $(CFLAGS) $(INCLUDE) @echo msvmmaj_train_dataset.o... -src/msvmmaj_pred.o: - @$(CC) -c -o src/msvmmaj_pred.o src/msvmmaj_pred.c $(CFLAGS) $(INCLUDE) - @echo msvmmaj_pred.o... - -src/mylapack.o: - @$(CC) -c -o src/mylapack.o src/mylapack.c $(CFLAGS) $(INCLUDE) +src/msvmmaj_lapack.o: + @$(CC) -c -o src/msvmmaj_lapack.o src/msvmmaj_lapack.c $(CFLAGS) $(INCLUDE) @echo mylapack.o... src/strutil.o: @@ -67,5 +71,9 @@ src/timer.o: @$(CC) -c -o src/timer.o src/timer.c $(CFLAGS) $(INCLUDE) @echo timer.o... +src/util.o: + @$(CC) -c -o src/util.o src/util.c $(CFLAGS) $(INCLUDE) + @echo util.o... + clean: rm -rf $(EXECS) *.o src/*.o lib/*.a diff --git a/data/README b/data/README new file mode 100644 index 0000000..5c38a90 --- /dev/null +++ b/data/README @@ -0,0 +1,4 @@ +All datasets downloaded from the UCI repository. + +All datasets scaled to the interval [-1, 1] in Matlab and saved in +full available precision (16 digits). diff --git a/doc/Doxyfile b/doc/Doxyfile new file mode 100644 index 0000000..5b43a2b --- /dev/null +++ b/doc/Doxyfile @@ -0,0 +1,1893 @@ +# Doxyfile 1.8.4 + +# This file describes the settings to be used by the documentation system +# doxygen (www.doxygen.org) for a project. +# +# All text after a double hash (##) is considered a comment and is placed +# in front of the TAG it is preceding . +# All text after a hash (#) is considered a comment and will be ignored. +# The format is: +# TAG = value [value, ...] +# For lists items can also be appended using: +# TAG += value [value, ...] +# Values that contain spaces should be placed between quotes (" "). + +#--------------------------------------------------------------------------- +# Project related configuration options +#--------------------------------------------------------------------------- + +# This tag specifies the encoding used for all characters in the config file +# that follow. The default is UTF-8 which is also the encoding used for all +# text before the first occurrence of this tag. Doxygen uses libiconv (or the +# iconv built into libc) for the transcoding. See +# http://www.gnu.org/software/libiconv for the list of possible encodings. + +DOXYFILE_ENCODING = UTF-8 + +# The PROJECT_NAME tag is a single word (or sequence of words) that should +# identify the project. Note that if you do not use Doxywizard you need +# to put quotes around the project name if it contains spaces. + +PROJECT_NAME = "MSVMMaj" + +# The PROJECT_NUMBER tag can be used to enter a project or revision number. +# This could be handy for archiving the generated documentation or +# if some version control system is used. + +PROJECT_NUMBER = + +# Using the PROJECT_BRIEF tag one can provide an optional one line description +# for a project that appears at the top of each page and should give viewer +# a quick idea about the purpose of the project. Keep the description short. + +PROJECT_BRIEF = + +# With the PROJECT_LOGO tag one can specify an logo or icon that is +# included in the documentation. The maximum height of the logo should not +# exceed 55 pixels and the maximum width should not exceed 200 pixels. +# Doxygen will copy the logo to the output directory. + +PROJECT_LOGO = + +# The OUTPUT_DIRECTORY tag is used to specify the (relative or absolute) +# base path where the generated documentation will be put. +# If a relative path is entered, it will be relative to the location +# where doxygen was started. If left blank the current directory will be used. + +OUTPUT_DIRECTORY = + +# If the CREATE_SUBDIRS tag is set to YES, then doxygen will create +# 4096 sub-directories (in 2 levels) under the output directory of each output +# format and will distribute the generated files over these directories. +# Enabling this option can be useful when feeding doxygen a huge amount of +# source files, where putting all generated files in the same directory would +# otherwise cause performance problems for the file system. + +CREATE_SUBDIRS = NO + +# The OUTPUT_LANGUAGE tag is used to specify the language in which all +# documentation generated by doxygen is written. Doxygen will use this +# information to generate all constant output in the proper language. +# The default language is English, other supported languages are: +# Afrikaans, Arabic, Brazilian, Catalan, Chinese, Chinese-Traditional, +# Croatian, Czech, Danish, Dutch, Esperanto, Farsi, Finnish, French, German, +# Greek, Hungarian, Italian, Japanese, Japanese-en (Japanese with English +# messages), Korean, Korean-en, Latvian, Lithuanian, Norwegian, Macedonian, +# Persian, Polish, Portuguese, Romanian, Russian, Serbian, Serbian-Cyrillic, +# Slovak, Slovene, Spanish, Swedish, Ukrainian, and Vietnamese. + +OUTPUT_LANGUAGE = English + +# If the BRIEF_MEMBER_DESC tag is set to YES (the default) Doxygen will +# include brief member descriptions after the members that are listed in +# the file and class documentation (similar to JavaDoc). +# Set to NO to disable this. + +BRIEF_MEMBER_DESC = YES + +# If the REPEAT_BRIEF tag is set to YES (the default) Doxygen will prepend +# the brief description of a member or function before the detailed description. +# Note: if both HIDE_UNDOC_MEMBERS and BRIEF_MEMBER_DESC are set to NO, the +# brief descriptions will be completely suppressed. + +REPEAT_BRIEF = YES + +# This tag implements a quasi-intelligent brief description abbreviator +# that is used to form the text in various listings. Each string +# in this list, if found as the leading text of the brief description, will be +# stripped from the text and the result after processing the whole list, is +# used as the annotated text. Otherwise, the brief description is used as-is. +# If left blank, the following values are used ("$name" is automatically +# replaced with the name of the entity): "The $name class" "The $name widget" +# "The $name file" "is" "provides" "specifies" "contains" +# "represents" "a" "an" "the" + +ABBREVIATE_BRIEF = + +# If the ALWAYS_DETAILED_SEC and REPEAT_BRIEF tags are both set to YES then +# Doxygen will generate a detailed section even if there is only a brief +# description. + +ALWAYS_DETAILED_SEC = NO + +# If the INLINE_INHERITED_MEMB tag is set to YES, doxygen will show all +# inherited members of a class in the documentation of that class as if those +# members were ordinary class members. Constructors, destructors and assignment +# operators of the base classes will not be shown. + +INLINE_INHERITED_MEMB = NO + +# If the FULL_PATH_NAMES tag is set to YES then Doxygen will prepend the full +# path before files name in the file list and in the header files. If set +# to NO the shortest path that makes the file name unique will be used. + +FULL_PATH_NAMES = YES + +# If the FULL_PATH_NAMES tag is set to YES then the STRIP_FROM_PATH tag +# can be used to strip a user-defined part of the path. Stripping is +# only done if one of the specified strings matches the left-hand part of +# the path. The tag can be used to show relative paths in the file list. +# If left blank the directory from which doxygen is run is used as the +# path to strip. Note that you specify absolute paths here, but also +# relative paths, which will be relative from the directory where doxygen is +# started. + +STRIP_FROM_PATH = + +# The STRIP_FROM_INC_PATH tag can be used to strip a user-defined part of +# the path mentioned in the documentation of a class, which tells +# the reader which header file to include in order to use a class. +# If left blank only the name of the header file containing the class +# definition is used. Otherwise one should specify the include paths that +# are normally passed to the compiler using the -I flag. + +STRIP_FROM_INC_PATH = + +# If the SHORT_NAMES tag is set to YES, doxygen will generate much shorter +# (but less readable) file names. This can be useful if your file system +# doesn't support long names like on DOS, Mac, or CD-ROM. + +SHORT_NAMES = NO + +# If the JAVADOC_AUTOBRIEF tag is set to YES then Doxygen +# will interpret the first line (until the first dot) of a JavaDoc-style +# comment as the brief description. If set to NO, the JavaDoc +# comments will behave just like regular Qt-style comments +# (thus requiring an explicit @brief command for a brief description.) + +JAVADOC_AUTOBRIEF = NO + +# If the QT_AUTOBRIEF tag is set to YES then Doxygen will +# interpret the first line (until the first dot) of a Qt-style +# comment as the brief description. If set to NO, the comments +# will behave just like regular Qt-style comments (thus requiring +# an explicit \brief command for a brief description.) + +QT_AUTOBRIEF = NO + +# The MULTILINE_CPP_IS_BRIEF tag can be set to YES to make Doxygen +# treat a multi-line C++ special comment block (i.e. a block of //! or /// +# comments) as a brief description. This used to be the default behaviour. +# The new default is to treat a multi-line C++ comment block as a detailed +# description. Set this tag to YES if you prefer the old behaviour instead. + +MULTILINE_CPP_IS_BRIEF = NO + +# If the INHERIT_DOCS tag is set to YES (the default) then an undocumented +# member inherits the documentation from any documented member that it +# re-implements. + +INHERIT_DOCS = YES + +# If the SEPARATE_MEMBER_PAGES tag is set to YES, then doxygen will produce +# a new page for each member. If set to NO, the documentation of a member will +# be part of the file/class/namespace that contains it. + +SEPARATE_MEMBER_PAGES = NO + +# The TAB_SIZE tag can be used to set the number of spaces in a tab. +# Doxygen uses this value to replace tabs by spaces in code fragments. + +TAB_SIZE = 4 + +# This tag can be used to specify a number of aliases that acts +# as commands in the documentation. An alias has the form "name=value". +# For example adding "sideeffect=\par Side Effects:\n" will allow you to +# put the command \sideeffect (or @sideeffect) in the documentation, which +# will result in a user-defined paragraph with heading "Side Effects:". +# You can put \n's in the value part of an alias to insert newlines. + +ALIASES = "TODO=\todo" + +# This tag can be used to specify a number of word-keyword mappings (TCL only). +# A mapping has the form "name=value". For example adding +# "class=itcl::class" will allow you to use the command class in the +# itcl::class meaning. + +TCL_SUBST = + +# Set the OPTIMIZE_OUTPUT_FOR_C tag to YES if your project consists of C +# sources only. Doxygen will then generate output that is more tailored for C. +# For instance, some of the names that are used will be different. The list +# of all members will be omitted, etc. + +OPTIMIZE_OUTPUT_FOR_C = NO + +# Set the OPTIMIZE_OUTPUT_JAVA tag to YES if your project consists of Java +# sources only. Doxygen will then generate output that is more tailored for +# Java. For instance, namespaces will be presented as packages, qualified +# scopes will look different, etc. + +OPTIMIZE_OUTPUT_JAVA = NO + +# Set the OPTIMIZE_FOR_FORTRAN tag to YES if your project consists of Fortran +# sources only. Doxygen will then generate output that is more tailored for +# Fortran. + +OPTIMIZE_FOR_FORTRAN = NO + +# Set the OPTIMIZE_OUTPUT_VHDL tag to YES if your project consists of VHDL +# sources. Doxygen will then generate output that is tailored for +# VHDL. + +OPTIMIZE_OUTPUT_VHDL = NO + +# Doxygen selects the parser to use depending on the extension of the files it +# parses. With this tag you can assign which parser to use for a given +# extension. Doxygen has a built-in mapping, but you can override or extend it +# using this tag. The format is ext=language, where ext is a file extension, +# and language is one of the parsers supported by doxygen: IDL, Java, +# Javascript, CSharp, C, C++, D, PHP, Objective-C, Python, Fortran, VHDL, C, +# C++. For instance to make doxygen treat .inc files as Fortran files (default +# is PHP), and .f files as C (default is Fortran), use: inc=Fortran f=C. Note +# that for custom extensions you also need to set FILE_PATTERNS otherwise the +# files are not read by doxygen. + +EXTENSION_MAPPING = + +# If MARKDOWN_SUPPORT is enabled (the default) then doxygen pre-processes all +# comments according to the Markdown format, which allows for more readable +# documentation. See http://daringfireball.net/projects/markdown/ for details. +# The output of markdown processing is further processed by doxygen, so you +# can mix doxygen, HTML, and XML commands with Markdown formatting. +# Disable only in case of backward compatibilities issues. + +MARKDOWN_SUPPORT = YES + +# When enabled doxygen tries to link words that correspond to documented +# classes, or namespaces to their corresponding documentation. Such a link can +# be prevented in individual cases by by putting a % sign in front of the word +# or globally by setting AUTOLINK_SUPPORT to NO. + +AUTOLINK_SUPPORT = YES + +# If you use STL classes (i.e. std::string, std::vector, etc.) but do not want +# to include (a tag file for) the STL sources as input, then you should +# set this tag to YES in order to let doxygen match functions declarations and +# definitions whose arguments contain STL classes (e.g. func(std::string); v.s. +# func(std::string) {}). This also makes the inheritance and collaboration +# diagrams that involve STL classes more complete and accurate. + +BUILTIN_STL_SUPPORT = NO + +# If you use Microsoft's C++/CLI language, you should set this option to YES to +# enable parsing support. + +CPP_CLI_SUPPORT = NO + +# Set the SIP_SUPPORT tag to YES if your project consists of sip sources only. +# Doxygen will parse them like normal C++ but will assume all classes use public +# instead of private inheritance when no explicit protection keyword is present. + +SIP_SUPPORT = NO + +# For Microsoft's IDL there are propget and propput attributes to indicate +# getter and setter methods for a property. Setting this option to YES (the +# default) will make doxygen replace the get and set methods by a property in +# the documentation. This will only work if the methods are indeed getting or +# setting a simple type. If this is not the case, or you want to show the +# methods anyway, you should set this option to NO. + +IDL_PROPERTY_SUPPORT = YES + +# If member grouping is used in the documentation and the DISTRIBUTE_GROUP_DOC +# tag is set to YES, then doxygen will reuse the documentation of the first +# member in the group (if any) for the other members of the group. By default +# all members of a group must be documented explicitly. + +DISTRIBUTE_GROUP_DOC = NO + +# Set the SUBGROUPING tag to YES (the default) to allow class member groups of +# the same type (for instance a group of public functions) to be put as a +# subgroup of that type (e.g. under the Public Functions section). Set it to +# NO to prevent subgrouping. Alternatively, this can be done per class using +# the \nosubgrouping command. + +SUBGROUPING = YES + +# When the INLINE_GROUPED_CLASSES tag is set to YES, classes, structs and +# unions are shown inside the group in which they are included (e.g. using +# @ingroup) instead of on a separate page (for HTML and Man pages) or +# section (for LaTeX and RTF). + +INLINE_GROUPED_CLASSES = NO + +# When the INLINE_SIMPLE_STRUCTS tag is set to YES, structs, classes, and +# unions with only public data fields or simple typedef fields will be shown +# inline in the documentation of the scope in which they are defined (i.e. file, +# namespace, or group documentation), provided this scope is documented. If set +# to NO (the default), structs, classes, and unions are shown on a separate +# page (for HTML and Man pages) or section (for LaTeX and RTF). + +INLINE_SIMPLE_STRUCTS = NO + +# When TYPEDEF_HIDES_STRUCT is enabled, a typedef of a struct, union, or enum +# is documented as struct, union, or enum with the name of the typedef. So +# typedef struct TypeS {} TypeT, will appear in the documentation as a struct +# with name TypeT. When disabled the typedef will appear as a member of a file, +# namespace, or class. And the struct will be named TypeS. This can typically +# be useful for C code in case the coding convention dictates that all compound +# types are typedef'ed and only the typedef is referenced, never the tag name. + +TYPEDEF_HIDES_STRUCT = NO + +# The size of the symbol lookup cache can be set using LOOKUP_CACHE_SIZE. This +# cache is used to resolve symbols given their name and scope. Since this can +# be an expensive process and often the same symbol appear multiple times in +# the code, doxygen keeps a cache of pre-resolved symbols. If the cache is too +# small doxygen will become slower. If the cache is too large, memory is wasted. +# The cache size is given by this formula: 2^(16+LOOKUP_CACHE_SIZE). The valid +# range is 0..9, the default is 0, corresponding to a cache size of 2^16 = 65536 +# symbols. + +LOOKUP_CACHE_SIZE = 0 + +#--------------------------------------------------------------------------- +# Build related configuration options +#--------------------------------------------------------------------------- + +# If the EXTRACT_ALL tag is set to YES doxygen will assume all entities in +# documentation are documented, even if no documentation was available. +# Private class members and static file members will be hidden unless +# the EXTRACT_PRIVATE respectively EXTRACT_STATIC tags are set to YES + +EXTRACT_ALL = YES + +# If the EXTRACT_PRIVATE tag is set to YES all private members of a class +# will be included in the documentation. + +EXTRACT_PRIVATE = NO + +# If the EXTRACT_PACKAGE tag is set to YES all members with package or internal +# scope will be included in the documentation. + +EXTRACT_PACKAGE = NO + +# If the EXTRACT_STATIC tag is set to YES all static members of a file +# will be included in the documentation. + +EXTRACT_STATIC = YES + +# If the EXTRACT_LOCAL_CLASSES tag is set to YES classes (and structs) +# defined locally in source files will be included in the documentation. +# If set to NO only classes defined in header files are included. + +EXTRACT_LOCAL_CLASSES = YES + +# This flag is only useful for Objective-C code. When set to YES local +# methods, which are defined in the implementation section but not in +# the interface are included in the documentation. +# If set to NO (the default) only methods in the interface are included. + +EXTRACT_LOCAL_METHODS = NO + +# If this flag is set to YES, the members of anonymous namespaces will be +# extracted and appear in the documentation as a namespace called +# 'anonymous_namespace{file}', where file will be replaced with the base +# name of the file that contains the anonymous namespace. By default +# anonymous namespaces are hidden. + +EXTRACT_ANON_NSPACES = NO + +# If the HIDE_UNDOC_MEMBERS tag is set to YES, Doxygen will hide all +# undocumented members of documented classes, files or namespaces. +# If set to NO (the default) these members will be included in the +# various overviews, but no documentation section is generated. +# This option has no effect if EXTRACT_ALL is enabled. + +HIDE_UNDOC_MEMBERS = NO + +# If the HIDE_UNDOC_CLASSES tag is set to YES, Doxygen will hide all +# undocumented classes that are normally visible in the class hierarchy. +# If set to NO (the default) these classes will be included in the various +# overviews. This option has no effect if EXTRACT_ALL is enabled. + +HIDE_UNDOC_CLASSES = NO + +# If the HIDE_FRIEND_COMPOUNDS tag is set to YES, Doxygen will hide all +# friend (class|struct|union) declarations. +# If set to NO (the default) these declarations will be included in the +# documentation. + +HIDE_FRIEND_COMPOUNDS = NO + +# If the HIDE_IN_BODY_DOCS tag is set to YES, Doxygen will hide any +# documentation blocks found inside the body of a function. +# If set to NO (the default) these blocks will be appended to the +# function's detailed documentation block. + +HIDE_IN_BODY_DOCS = NO + +# The INTERNAL_DOCS tag determines if documentation +# that is typed after a \internal command is included. If the tag is set +# to NO (the default) then the documentation will be excluded. +# Set it to YES to include the internal documentation. + +INTERNAL_DOCS = NO + +# If the CASE_SENSE_NAMES tag is set to NO then Doxygen will only generate +# file names in lower-case letters. If set to YES upper-case letters are also +# allowed. This is useful if you have classes or files whose names only differ +# in case and if your file system supports case sensitive file names. Windows +# and Mac users are advised to set this option to NO. + +CASE_SENSE_NAMES = YES + +# If the HIDE_SCOPE_NAMES tag is set to NO (the default) then Doxygen +# will show members with their full class and namespace scopes in the +# documentation. If set to YES the scope will be hidden. + +HIDE_SCOPE_NAMES = NO + +# If the SHOW_INCLUDE_FILES tag is set to YES (the default) then Doxygen +# will put a list of the files that are included by a file in the documentation +# of that file. + +SHOW_INCLUDE_FILES = YES + +# If the FORCE_LOCAL_INCLUDES tag is set to YES then Doxygen +# will list include files with double quotes in the documentation +# rather than with sharp brackets. + +FORCE_LOCAL_INCLUDES = NO + +# If the INLINE_INFO tag is set to YES (the default) then a tag [inline] +# is inserted in the documentation for inline members. + +INLINE_INFO = YES + +# If the SORT_MEMBER_DOCS tag is set to YES (the default) then doxygen +# will sort the (detailed) documentation of file and class members +# alphabetically by member name. If set to NO the members will appear in +# declaration order. + +SORT_MEMBER_DOCS = YES + +# If the SORT_BRIEF_DOCS tag is set to YES then doxygen will sort the +# brief documentation of file, namespace and class members alphabetically +# by member name. If set to NO (the default) the members will appear in +# declaration order. + +SORT_BRIEF_DOCS = NO + +# If the SORT_MEMBERS_CTORS_1ST tag is set to YES then doxygen +# will sort the (brief and detailed) documentation of class members so that +# constructors and destructors are listed first. If set to NO (the default) +# the constructors will appear in the respective orders defined by +# SORT_MEMBER_DOCS and SORT_BRIEF_DOCS. +# This tag will be ignored for brief docs if SORT_BRIEF_DOCS is set to NO +# and ignored for detailed docs if SORT_MEMBER_DOCS is set to NO. + +SORT_MEMBERS_CTORS_1ST = NO + +# If the SORT_GROUP_NAMES tag is set to YES then doxygen will sort the +# hierarchy of group names into alphabetical order. If set to NO (the default) +# the group names will appear in their defined order. + +SORT_GROUP_NAMES = NO + +# If the SORT_BY_SCOPE_NAME tag is set to YES, the class list will be +# sorted by fully-qualified names, including namespaces. If set to +# NO (the default), the class list will be sorted only by class name, +# not including the namespace part. +# Note: This option is not very useful if HIDE_SCOPE_NAMES is set to YES. +# Note: This option applies only to the class list, not to the +# alphabetical list. + +SORT_BY_SCOPE_NAME = NO + +# If the STRICT_PROTO_MATCHING option is enabled and doxygen fails to +# do proper type resolution of all parameters of a function it will reject a +# match between the prototype and the implementation of a member function even +# if there is only one candidate or it is obvious which candidate to choose +# by doing a simple string match. By disabling STRICT_PROTO_MATCHING doxygen +# will still accept a match between prototype and implementation in such cases. + +STRICT_PROTO_MATCHING = NO + +# The GENERATE_TODOLIST tag can be used to enable (YES) or +# disable (NO) the todo list. This list is created by putting \todo +# commands in the documentation. + +GENERATE_TODOLIST = YES + +# The GENERATE_TESTLIST tag can be used to enable (YES) or +# disable (NO) the test list. This list is created by putting \test +# commands in the documentation. + +GENERATE_TESTLIST = YES + +# The GENERATE_BUGLIST tag can be used to enable (YES) or +# disable (NO) the bug list. This list is created by putting \bug +# commands in the documentation. + +GENERATE_BUGLIST = YES + +# The GENERATE_DEPRECATEDLIST tag can be used to enable (YES) or +# disable (NO) the deprecated list. This list is created by putting +# \deprecated commands in the documentation. + +GENERATE_DEPRECATEDLIST= YES + +# The ENABLED_SECTIONS tag can be used to enable conditional +# documentation sections, marked by \if section-label ... \endif +# and \cond section-label ... \endcond blocks. + +ENABLED_SECTIONS = + +# The MAX_INITIALIZER_LINES tag determines the maximum number of lines +# the initial value of a variable or macro consists of for it to appear in +# the documentation. If the initializer consists of more lines than specified +# here it will be hidden. Use a value of 0 to hide initializers completely. +# The appearance of the initializer of individual variables and macros in the +# documentation can be controlled using \showinitializer or \hideinitializer +# command in the documentation regardless of this setting. + +MAX_INITIALIZER_LINES = 30 + +# Set the SHOW_USED_FILES tag to NO to disable the list of files generated +# at the bottom of the documentation of classes and structs. If set to YES the +# list will mention the files that were used to generate the documentation. + +SHOW_USED_FILES = YES + +# Set the SHOW_FILES tag to NO to disable the generation of the Files page. +# This will remove the Files entry from the Quick Index and from the +# Folder Tree View (if specified). The default is YES. + +SHOW_FILES = YES + +# Set the SHOW_NAMESPACES tag to NO to disable the generation of the +# Namespaces page. +# This will remove the Namespaces entry from the Quick Index +# and from the Folder Tree View (if specified). The default is YES. + +SHOW_NAMESPACES = YES + +# The FILE_VERSION_FILTER tag can be used to specify a program or script that +# doxygen should invoke to get the current version for each file (typically from +# the version control system). Doxygen will invoke the program by executing (via +# popen()) the command <command> <input-file>, where <command> is the value of +# the FILE_VERSION_FILTER tag, and <input-file> is the name of an input file +# provided by doxygen. Whatever the program writes to standard output +# is used as the file version. See the manual for examples. + +FILE_VERSION_FILTER = + +# The LAYOUT_FILE tag can be used to specify a layout file which will be parsed +# by doxygen. The layout file controls the global structure of the generated +# output files in an output format independent way. To create the layout file +# that represents doxygen's defaults, run doxygen with the -l option. +# You can optionally specify a file name after the option, if omitted +# DoxygenLayout.xml will be used as the name of the layout file. + +LAYOUT_FILE = + +# The CITE_BIB_FILES tag can be used to specify one or more bib files +# containing the references data. This must be a list of .bib files. The +# .bib extension is automatically appended if omitted. Using this command +# requires the bibtex tool to be installed. See also +# http://en.wikipedia.org/wiki/BibTeX for more info. For LaTeX the style +# of the bibliography can be controlled using LATEX_BIB_STYLE. To use this +# feature you need bibtex and perl available in the search path. Do not use +# file names with spaces, bibtex cannot handle them. + +CITE_BIB_FILES = + +#--------------------------------------------------------------------------- +# configuration options related to warning and progress messages +#--------------------------------------------------------------------------- + +# The QUIET tag can be used to turn on/off the messages that are generated +# by doxygen. Possible values are YES and NO. If left blank NO is used. + +QUIET = NO + +# The WARNINGS tag can be used to turn on/off the warning messages that are +# generated by doxygen. Possible values are YES and NO. If left blank +# NO is used. + +WARNINGS = YES + +# If WARN_IF_UNDOCUMENTED is set to YES, then doxygen will generate warnings +# for undocumented members. If EXTRACT_ALL is set to YES then this flag will +# automatically be disabled. + +WARN_IF_UNDOCUMENTED = YES + +# If WARN_IF_DOC_ERROR is set to YES, doxygen will generate warnings for +# potential errors in the documentation, such as not documenting some +# parameters in a documented function, or documenting parameters that +# don't exist or using markup commands wrongly. + +WARN_IF_DOC_ERROR = YES + +# The WARN_NO_PARAMDOC option can be enabled to get warnings for +# functions that are documented, but have no documentation for their parameters +# or return value. If set to NO (the default) doxygen will only warn about +# wrong or incomplete parameter documentation, but not about the absence of +# documentation. + +WARN_NO_PARAMDOC = NO + +# The WARN_FORMAT tag determines the format of the warning messages that +# doxygen can produce. The string should contain the $file, $line, and $text +# tags, which will be replaced by the file and line number from which the +# warning originated and the warning text. Optionally the format may contain +# $version, which will be replaced by the version of the file (if it could +# be obtained via FILE_VERSION_FILTER) + +WARN_FORMAT = "$file:$line: $text" + +# The WARN_LOGFILE tag can be used to specify a file to which warning +# and error messages should be written. If left blank the output is written +# to stderr. + +WARN_LOGFILE = + +#--------------------------------------------------------------------------- +# configuration options related to the input files +#--------------------------------------------------------------------------- + +# The INPUT tag can be used to specify the files and/or directories that contain +# documented source files. You may enter file names like "myfile.cpp" or +# directories like "/usr/src/myproject". Separate the files or directories +# with spaces. + +INPUT = "../include" \ + "../src" \ + "." + +# This tag can be used to specify the character encoding of the source files +# that doxygen parses. Internally doxygen uses the UTF-8 encoding, which is +# also the default input encoding. Doxygen uses libiconv (or the iconv built +# into libc) for the transcoding. See http://www.gnu.org/software/libiconv for +# the list of possible encodings. + +INPUT_ENCODING = UTF-8 + +# If the value of the INPUT tag contains directories, you can use the +# FILE_PATTERNS tag to specify one or more wildcard pattern (like *.cpp +# and *.h) to filter out the source-files in the directories. If left +# blank the following patterns are tested: +# *.c *.cc *.cxx *.cpp *.c++ *.d *.java *.ii *.ixx *.ipp *.i++ *.inl *.h *.hh +# *.hxx *.hpp *.h++ *.idl *.odl *.cs *.php *.php3 *.inc *.m *.mm *.dox *.py +# *.f90 *.f *.for *.vhd *.vhdl + +FILE_PATTERNS = + +# The RECURSIVE tag can be used to turn specify whether or not subdirectories +# should be searched for input files as well. Possible values are YES and NO. +# If left blank NO is used. + +RECURSIVE = YES + +# The EXCLUDE tag can be used to specify files and/or directories that should be +# excluded from the INPUT source files. This way you can easily exclude a +# subdirectory from a directory tree whose root is specified with the INPUT tag. +# Note that relative paths are relative to the directory from which doxygen is +# run. + +EXCLUDE = + +# The EXCLUDE_SYMLINKS tag can be used to select whether or not files or +# directories that are symbolic links (a Unix file system feature) are excluded +# from the input. + +EXCLUDE_SYMLINKS = NO + +# If the value of the INPUT tag contains directories, you can use the +# EXCLUDE_PATTERNS tag to specify one or more wildcard patterns to exclude +# certain files from those directories. Note that the wildcards are matched +# against the file with absolute path, so to exclude all test directories +# for example use the pattern */test/* + +EXCLUDE_PATTERNS = + +# The EXCLUDE_SYMBOLS tag can be used to specify one or more symbol names +# (namespaces, classes, functions, etc.) that should be excluded from the +# output. The symbol name can be a fully qualified name, a word, or if the +# wildcard * is used, a substring. Examples: ANamespace, AClass, +# AClass::ANamespace, ANamespace::*Test + +EXCLUDE_SYMBOLS = + +# The EXAMPLE_PATH tag can be used to specify one or more files or +# directories that contain example code fragments that are included (see +# the \include command). + +EXAMPLE_PATH = + +# If the value of the EXAMPLE_PATH tag contains directories, you can use the +# EXAMPLE_PATTERNS tag to specify one or more wildcard pattern (like *.cpp +# and *.h) to filter out the source-files in the directories. If left +# blank all files are included. + +EXAMPLE_PATTERNS = + +# If the EXAMPLE_RECURSIVE tag is set to YES then subdirectories will be +# searched for input files to be used with the \include or \dontinclude +# commands irrespective of the value of the RECURSIVE tag. +# Possible values are YES and NO. If left blank NO is used. + +EXAMPLE_RECURSIVE = NO + +# The IMAGE_PATH tag can be used to specify one or more files or +# directories that contain image that are included in the documentation (see +# the \image command). + +IMAGE_PATH = + +# The INPUT_FILTER tag can be used to specify a program that doxygen should +# invoke to filter for each input file. Doxygen will invoke the filter program +# by executing (via popen()) the command <filter> <input-file>, where <filter> +# is the value of the INPUT_FILTER tag, and <input-file> is the name of an +# input file. Doxygen will then use the output that the filter program writes +# to standard output. +# If FILTER_PATTERNS is specified, this tag will be ignored. +# Note that the filter must not add or remove lines; it is applied before the +# code is scanned, but not when the output code is generated. If lines are added +# or removed, the anchors will not be placed correctly. + +INPUT_FILTER = + +# The FILTER_PATTERNS tag can be used to specify filters on a per file pattern +# basis. +# Doxygen will compare the file name with each pattern and apply the +# filter if there is a match. +# The filters are a list of the form: +# pattern=filter (like *.cpp=my_cpp_filter). See INPUT_FILTER for further +# info on how filters are used. If FILTER_PATTERNS is empty or if +# non of the patterns match the file name, INPUT_FILTER is applied. + +FILTER_PATTERNS = + +# If the FILTER_SOURCE_FILES tag is set to YES, the input filter (if set using +# INPUT_FILTER) will be used to filter the input files when producing source +# files to browse (i.e. when SOURCE_BROWSER is set to YES). + +FILTER_SOURCE_FILES = NO + +# The FILTER_SOURCE_PATTERNS tag can be used to specify source filters per file +# pattern. A pattern will override the setting for FILTER_PATTERN (if any) +# and it is also possible to disable source filtering for a specific pattern +# using *.ext= (so without naming a filter). This option only has effect when +# FILTER_SOURCE_FILES is enabled. + +FILTER_SOURCE_PATTERNS = + +# If the USE_MD_FILE_AS_MAINPAGE tag refers to the name of a markdown file that +# is part of the input, its contents will be placed on the main page +# (index.html). This can be useful if you have a project on for instance GitHub +# and want reuse the introduction page also for the doxygen output. + +USE_MDFILE_AS_MAINPAGE = + +#--------------------------------------------------------------------------- +# configuration options related to source browsing +#--------------------------------------------------------------------------- + +# If the SOURCE_BROWSER tag is set to YES then a list of source files will +# be generated. Documented entities will be cross-referenced with these sources. +# Note: To get rid of all source code in the generated output, make sure also +# VERBATIM_HEADERS is set to NO. + +SOURCE_BROWSER = NO + +# Setting the INLINE_SOURCES tag to YES will include the body +# of functions and classes directly in the documentation. + +INLINE_SOURCES = NO + +# Setting the STRIP_CODE_COMMENTS tag to YES (the default) will instruct +# doxygen to hide any special comment blocks from generated source code +# fragments. Normal C, C++ and Fortran comments will always remain visible. + +STRIP_CODE_COMMENTS = YES + +# If the REFERENCED_BY_RELATION tag is set to YES +# then for each documented function all documented +# functions referencing it will be listed. + +REFERENCED_BY_RELATION = NO + +# If the REFERENCES_RELATION tag is set to YES +# then for each documented function all documented entities +# called/used by that function will be listed. + +REFERENCES_RELATION = NO + +# If the REFERENCES_LINK_SOURCE tag is set to YES (the default) +# and SOURCE_BROWSER tag is set to YES, then the hyperlinks from +# functions in REFERENCES_RELATION and REFERENCED_BY_RELATION lists will +# link to the source code. +# Otherwise they will link to the documentation. + +REFERENCES_LINK_SOURCE = YES + +# If the USE_HTAGS tag is set to YES then the references to source code +# will point to the HTML generated by the htags(1) tool instead of doxygen +# built-in source browser. The htags tool is part of GNU's global source +# tagging system (see http://www.gnu.org/software/global/global.html). You +# will need version 4.8.6 or higher. + +USE_HTAGS = NO + +# If the VERBATIM_HEADERS tag is set to YES (the default) then Doxygen +# will generate a verbatim copy of the header file for each class for +# which an include is specified. Set to NO to disable this. + +VERBATIM_HEADERS = YES + +#--------------------------------------------------------------------------- +# configuration options related to the alphabetical class index +#--------------------------------------------------------------------------- + +# If the ALPHABETICAL_INDEX tag is set to YES, an alphabetical index +# of all compounds will be generated. Enable this if the project +# contains a lot of classes, structs, unions or interfaces. + +ALPHABETICAL_INDEX = YES + +# If the alphabetical index is enabled (see ALPHABETICAL_INDEX) then +# the COLS_IN_ALPHA_INDEX tag can be used to specify the number of columns +# in which this list will be split (can be a number in the range [1..20]) + +COLS_IN_ALPHA_INDEX = 5 + +# In case all classes in a project start with a common prefix, all +# classes will be put under the same header in the alphabetical index. +# The IGNORE_PREFIX tag can be used to specify one or more prefixes that +# should be ignored while generating the index headers. + +IGNORE_PREFIX = + +#--------------------------------------------------------------------------- +# configuration options related to the HTML output +#--------------------------------------------------------------------------- + +# If the GENERATE_HTML tag is set to YES (the default) Doxygen will +# generate HTML output. + +GENERATE_HTML = YES + +# The HTML_OUTPUT tag is used to specify where the HTML docs will be put. +# If a relative path is entered the value of OUTPUT_DIRECTORY will be +# put in front of it. If left blank `html' will be used as the default path. + +HTML_OUTPUT = html + +# The HTML_FILE_EXTENSION tag can be used to specify the file extension for +# each generated HTML page (for example: .htm,.php,.asp). If it is left blank +# doxygen will generate files with .html extension. + +HTML_FILE_EXTENSION = .html + +# The HTML_HEADER tag can be used to specify a personal HTML header for +# each generated HTML page. If it is left blank doxygen will generate a +# standard header. Note that when using a custom header you are responsible +# for the proper inclusion of any scripts and style sheets that doxygen +# needs, which is dependent on the configuration options used. +# It is advised to generate a default header using "doxygen -w html +# header.html footer.html stylesheet.css YourConfigFile" and then modify +# that header. Note that the header is subject to change so you typically +# have to redo this when upgrading to a newer version of doxygen or when +# changing the value of configuration settings such as GENERATE_TREEVIEW! + +HTML_HEADER = + +# The HTML_FOOTER tag can be used to specify a personal HTML footer for +# each generated HTML page. If it is left blank doxygen will generate a +# standard footer. + +HTML_FOOTER = + +# The HTML_STYLESHEET tag can be used to specify a user-defined cascading +# style sheet that is used by each HTML page. It can be used to +# fine-tune the look of the HTML output. If left blank doxygen will +# generate a default style sheet. Note that it is recommended to use +# HTML_EXTRA_STYLESHEET instead of this one, as it is more robust and this +# tag will in the future become obsolete. + +HTML_STYLESHEET = + +# The HTML_EXTRA_STYLESHEET tag can be used to specify an additional +# user-defined cascading style sheet that is included after the standard +# style sheets created by doxygen. Using this option one can overrule +# certain style aspects. This is preferred over using HTML_STYLESHEET +# since it does not replace the standard style sheet and is therefor more +# robust against future updates. Doxygen will copy the style sheet file to +# the output directory. + +HTML_EXTRA_STYLESHEET = + +# The HTML_EXTRA_FILES tag can be used to specify one or more extra images or +# other source files which should be copied to the HTML output directory. Note +# that these files will be copied to the base HTML output directory. Use the +# $relpath^ marker in the HTML_HEADER and/or HTML_FOOTER files to load these +# files. In the HTML_STYLESHEET file, use the file name only. Also note that +# the files will be copied as-is; there are no commands or markers available. + +HTML_EXTRA_FILES = + +# The HTML_COLORSTYLE_HUE tag controls the color of the HTML output. +# Doxygen will adjust the colors in the style sheet and background images +# according to this color. Hue is specified as an angle on a colorwheel, +# see http://en.wikipedia.org/wiki/Hue for more information. +# For instance the value 0 represents red, 60 is yellow, 120 is green, +# 180 is cyan, 240 is blue, 300 purple, and 360 is red again. +# The allowed range is 0 to 359. + +HTML_COLORSTYLE_HUE = 220 + +# The HTML_COLORSTYLE_SAT tag controls the purity (or saturation) of +# the colors in the HTML output. For a value of 0 the output will use +# grayscales only. A value of 255 will produce the most vivid colors. + +HTML_COLORSTYLE_SAT = 100 + +# The HTML_COLORSTYLE_GAMMA tag controls the gamma correction applied to +# the luminance component of the colors in the HTML output. Values below +# 100 gradually make the output lighter, whereas values above 100 make +# the output darker. The value divided by 100 is the actual gamma applied, +# so 80 represents a gamma of 0.8, The value 220 represents a gamma of 2.2, +# and 100 does not change the gamma. + +HTML_COLORSTYLE_GAMMA = 80 + +# If the HTML_TIMESTAMP tag is set to YES then the footer of each generated HTML +# page will contain the date and time when the page was generated. Setting +# this to NO can help when comparing the output of multiple runs. + +HTML_TIMESTAMP = YES + +# If the HTML_DYNAMIC_SECTIONS tag is set to YES then the generated HTML +# documentation will contain sections that can be hidden and shown after the +# page has loaded. + +HTML_DYNAMIC_SECTIONS = NO + +# With HTML_INDEX_NUM_ENTRIES one can control the preferred number of +# entries shown in the various tree structured indices initially; the user +# can expand and collapse entries dynamically later on. Doxygen will expand +# the tree to such a level that at most the specified number of entries are +# visible (unless a fully collapsed tree already exceeds this amount). +# So setting the number of entries 1 will produce a full collapsed tree by +# default. 0 is a special value representing an infinite number of entries +# and will result in a full expanded tree by default. + +HTML_INDEX_NUM_ENTRIES = 100 + +# If the GENERATE_DOCSET tag is set to YES, additional index files +# will be generated that can be used as input for Apple's Xcode 3 +# integrated development environment, introduced with OSX 10.5 (Leopard). +# To create a documentation set, doxygen will generate a Makefile in the +# HTML output directory. Running make will produce the docset in that +# directory and running "make install" will install the docset in +# ~/Library/Developer/Shared/Documentation/DocSets so that Xcode will find +# it at startup. +# See http://developer.apple.com/tools/creatingdocsetswithdoxygen.html +# for more information. + +GENERATE_DOCSET = NO + +# When GENERATE_DOCSET tag is set to YES, this tag determines the name of the +# feed. A documentation feed provides an umbrella under which multiple +# documentation sets from a single provider (such as a company or product suite) +# can be grouped. + +DOCSET_FEEDNAME = "Doxygen generated docs" + +# When GENERATE_DOCSET tag is set to YES, this tag specifies a string that +# should uniquely identify the documentation set bundle. This should be a +# reverse domain-name style string, e.g. com.mycompany.MyDocSet. Doxygen +# will append .docset to the name. + +DOCSET_BUNDLE_ID = org.doxygen.Project + +# When GENERATE_PUBLISHER_ID tag specifies a string that should uniquely +# identify the documentation publisher. This should be a reverse domain-name +# style string, e.g. com.mycompany.MyDocSet.documentation. + +DOCSET_PUBLISHER_ID = org.doxygen.Publisher + +# The GENERATE_PUBLISHER_NAME tag identifies the documentation publisher. + +DOCSET_PUBLISHER_NAME = Publisher + +# If the GENERATE_HTMLHELP tag is set to YES, additional index files +# will be generated that can be used as input for tools like the +# Microsoft HTML help workshop to generate a compiled HTML help file (.chm) +# of the generated HTML documentation. + +GENERATE_HTMLHELP = NO + +# If the GENERATE_HTMLHELP tag is set to YES, the CHM_FILE tag can +# be used to specify the file name of the resulting .chm file. You +# can add a path in front of the file if the result should not be +# written to the html output directory. + +CHM_FILE = + +# If the GENERATE_HTMLHELP tag is set to YES, the HHC_LOCATION tag can +# be used to specify the location (absolute path including file name) of +# the HTML help compiler (hhc.exe). If non-empty doxygen will try to run +# the HTML help compiler on the generated index.hhp. + +HHC_LOCATION = + +# If the GENERATE_HTMLHELP tag is set to YES, the GENERATE_CHI flag +# controls if a separate .chi index file is generated (YES) or that +# it should be included in the master .chm file (NO). + +GENERATE_CHI = NO + +# If the GENERATE_HTMLHELP tag is set to YES, the CHM_INDEX_ENCODING +# is used to encode HtmlHelp index (hhk), content (hhc) and project file +# content. + +CHM_INDEX_ENCODING = + +# If the GENERATE_HTMLHELP tag is set to YES, the BINARY_TOC flag +# controls whether a binary table of contents is generated (YES) or a +# normal table of contents (NO) in the .chm file. + +BINARY_TOC = NO + +# The TOC_EXPAND flag can be set to YES to add extra items for group members +# to the contents of the HTML help documentation and to the tree view. + +TOC_EXPAND = NO + +# If the GENERATE_QHP tag is set to YES and both QHP_NAMESPACE and +# QHP_VIRTUAL_FOLDER are set, an additional index file will be generated +# that can be used as input for Qt's qhelpgenerator to generate a +# Qt Compressed Help (.qch) of the generated HTML documentation. + +GENERATE_QHP = NO + +# If the QHG_LOCATION tag is specified, the QCH_FILE tag can +# be used to specify the file name of the resulting .qch file. +# The path specified is relative to the HTML output folder. + +QCH_FILE = + +# The QHP_NAMESPACE tag specifies the namespace to use when generating +# Qt Help Project output. For more information please see +# http://doc.trolltech.com/qthelpproject.html#namespace + +QHP_NAMESPACE = org.doxygen.Project + +# The QHP_VIRTUAL_FOLDER tag specifies the namespace to use when generating +# Qt Help Project output. For more information please see +# http://doc.trolltech.com/qthelpproject.html#virtual-folders + +QHP_VIRTUAL_FOLDER = doc + +# If QHP_CUST_FILTER_NAME is set, it specifies the name of a custom filter to +# add. For more information please see +# http://doc.trolltech.com/qthelpproject.html#custom-filters + +QHP_CUST_FILTER_NAME = + +# The QHP_CUST_FILT_ATTRS tag specifies the list of the attributes of the +# custom filter to add. For more information please see +# <a href="http://doc.trolltech.com/qthelpproject.html#custom-filters"> +# Qt Help Project / Custom Filters</a>. + +QHP_CUST_FILTER_ATTRS = + +# The QHP_SECT_FILTER_ATTRS tag specifies the list of the attributes this +# project's +# filter section matches. +# <a href="http://doc.trolltech.com/qthelpproject.html#filter-attributes"> +# Qt Help Project / Filter Attributes</a>. + +QHP_SECT_FILTER_ATTRS = + +# If the GENERATE_QHP tag is set to YES, the QHG_LOCATION tag can +# be used to specify the location of Qt's qhelpgenerator. +# If non-empty doxygen will try to run qhelpgenerator on the generated +# .qhp file. + +QHG_LOCATION = + +# If the GENERATE_ECLIPSEHELP tag is set to YES, additional index files +# will be generated, which together with the HTML files, form an Eclipse help +# plugin. To install this plugin and make it available under the help contents +# menu in Eclipse, the contents of the directory containing the HTML and XML +# files needs to be copied into the plugins directory of eclipse. The name of +# the directory within the plugins directory should be the same as +# the ECLIPSE_DOC_ID value. After copying Eclipse needs to be restarted before +# the help appears. + +GENERATE_ECLIPSEHELP = NO + +# A unique identifier for the eclipse help plugin. When installing the plugin +# the directory name containing the HTML and XML files should also have +# this name. + +ECLIPSE_DOC_ID = org.doxygen.Project + +# The DISABLE_INDEX tag can be used to turn on/off the condensed index (tabs) +# at top of each HTML page. The value NO (the default) enables the index and +# the value YES disables it. Since the tabs have the same information as the +# navigation tree you can set this option to NO if you already set +# GENERATE_TREEVIEW to YES. + +DISABLE_INDEX = NO + +# The GENERATE_TREEVIEW tag is used to specify whether a tree-like index +# structure should be generated to display hierarchical information. +# If the tag value is set to YES, a side panel will be generated +# containing a tree-like index structure (just like the one that +# is generated for HTML Help). For this to work a browser that supports +# JavaScript, DHTML, CSS and frames is required (i.e. any modern browser). +# Windows users are probably better off using the HTML help feature. +# Since the tree basically has the same information as the tab index you +# could consider to set DISABLE_INDEX to NO when enabling this option. + +GENERATE_TREEVIEW = NO + +# The ENUM_VALUES_PER_LINE tag can be used to set the number of enum values +# (range [0,1..20]) that doxygen will group on one line in the generated HTML +# documentation. Note that a value of 0 will completely suppress the enum +# values from appearing in the overview section. + +ENUM_VALUES_PER_LINE = 4 + +# If the treeview is enabled (see GENERATE_TREEVIEW) then this tag can be +# used to set the initial width (in pixels) of the frame in which the tree +# is shown. + +TREEVIEW_WIDTH = 250 + +# When the EXT_LINKS_IN_WINDOW option is set to YES doxygen will open +# links to external symbols imported via tag files in a separate window. + +EXT_LINKS_IN_WINDOW = NO + +# Use this tag to change the font size of Latex formulas included +# as images in the HTML documentation. The default is 10. Note that +# when you change the font size after a successful doxygen run you need +# to manually remove any form_*.png images from the HTML output directory +# to force them to be regenerated. + +FORMULA_FONTSIZE = 10 + +# Use the FORMULA_TRANPARENT tag to determine whether or not the images +# generated for formulas are transparent PNGs. Transparent PNGs are +# not supported properly for IE 6.0, but are supported on all modern browsers. +# Note that when changing this option you need to delete any form_*.png files +# in the HTML output before the changes have effect. + +FORMULA_TRANSPARENT = YES + +# Enable the USE_MATHJAX option to render LaTeX formulas using MathJax +# (see http://www.mathjax.org) which uses client side Javascript for the +# rendering instead of using prerendered bitmaps. Use this if you do not +# have LaTeX installed or if you want to formulas look prettier in the HTML +# output. When enabled you may also need to install MathJax separately and +# configure the path to it using the MATHJAX_RELPATH option. + +USE_MATHJAX = NO + +# When MathJax is enabled you can set the default output format to be used for +# the MathJax output. Supported types are HTML-CSS, NativeMML (i.e. MathML) and +# SVG. The default value is HTML-CSS, which is slower, but has the best +# compatibility. + +MATHJAX_FORMAT = HTML-CSS + +# When MathJax is enabled you need to specify the location relative to the +# HTML output directory using the MATHJAX_RELPATH option. The destination +# directory should contain the MathJax.js script. For instance, if the mathjax +# directory is located at the same level as the HTML output directory, then +# MATHJAX_RELPATH should be ../mathjax. The default value points to +# the MathJax Content Delivery Network so you can quickly see the result without +# installing MathJax. +# However, it is strongly recommended to install a local +# copy of MathJax from http://www.mathjax.org before deployment. + +MATHJAX_RELPATH = http://cdn.mathjax.org/mathjax/latest + +# The MATHJAX_EXTENSIONS tag can be used to specify one or MathJax extension +# names that should be enabled during MathJax rendering. + +MATHJAX_EXTENSIONS = + +# The MATHJAX_CODEFILE tag can be used to specify a file with javascript +# pieces of code that will be used on startup of the MathJax code. + +MATHJAX_CODEFILE = + +# When the SEARCHENGINE tag is enabled doxygen will generate a search box +# for the HTML output. The underlying search engine uses javascript +# and DHTML and should work on any modern browser. Note that when using +# HTML help (GENERATE_HTMLHELP), Qt help (GENERATE_QHP), or docsets +# (GENERATE_DOCSET) there is already a search function so this one should +# typically be disabled. For large projects the javascript based search engine +# can be slow, then enabling SERVER_BASED_SEARCH may provide a better solution. + +SEARCHENGINE = YES + +# When the SERVER_BASED_SEARCH tag is enabled the search engine will be +# implemented using a web server instead of a web client using Javascript. +# There are two flavours of web server based search depending on the +# EXTERNAL_SEARCH setting. When disabled, doxygen will generate a PHP script for +# searching and an index file used by the script. When EXTERNAL_SEARCH is +# enabled the indexing and searching needs to be provided by external tools. +# See the manual for details. + +SERVER_BASED_SEARCH = NO + +# When EXTERNAL_SEARCH is enabled doxygen will no longer generate the PHP +# script for searching. Instead the search results are written to an XML file +# which needs to be processed by an external indexer. Doxygen will invoke an +# external search engine pointed to by the SEARCHENGINE_URL option to obtain +# the search results. Doxygen ships with an example indexer (doxyindexer) and +# search engine (doxysearch.cgi) which are based on the open source search +# engine library Xapian. See the manual for configuration details. + +EXTERNAL_SEARCH = NO + +# The SEARCHENGINE_URL should point to a search engine hosted by a web server +# which will returned the search results when EXTERNAL_SEARCH is enabled. +# Doxygen ships with an example search engine (doxysearch) which is based on +# the open source search engine library Xapian. See the manual for configuration +# details. + +SEARCHENGINE_URL = + +# When SERVER_BASED_SEARCH and EXTERNAL_SEARCH are both enabled the unindexed +# search data is written to a file for indexing by an external tool. With the +# SEARCHDATA_FILE tag the name of this file can be specified. + +SEARCHDATA_FILE = searchdata.xml + +# When SERVER_BASED_SEARCH AND EXTERNAL_SEARCH are both enabled the +# EXTERNAL_SEARCH_ID tag can be used as an identifier for the project. This is +# useful in combination with EXTRA_SEARCH_MAPPINGS to search through multiple +# projects and redirect the results back to the right project. + +EXTERNAL_SEARCH_ID = + +# The EXTRA_SEARCH_MAPPINGS tag can be used to enable searching through doxygen +# projects other than the one defined by this configuration file, but that are +# all added to the same external search index. Each project needs to have a +# unique id set via EXTERNAL_SEARCH_ID. The search mapping then maps the id +# of to a relative location where the documentation can be found. +# The format is: EXTRA_SEARCH_MAPPINGS = id1=loc1 id2=loc2 ... + +EXTRA_SEARCH_MAPPINGS = + +#--------------------------------------------------------------------------- +# configuration options related to the LaTeX output +#--------------------------------------------------------------------------- + +# If the GENERATE_LATEX tag is set to YES (the default) Doxygen will +# generate Latex output. + +GENERATE_LATEX = YES + +# The LATEX_OUTPUT tag is used to specify where the LaTeX docs will be put. +# If a relative path is entered the value of OUTPUT_DIRECTORY will be +# put in front of it. If left blank `latex' will be used as the default path. + +LATEX_OUTPUT = latex + +# The LATEX_CMD_NAME tag can be used to specify the LaTeX command name to be +# invoked. If left blank `latex' will be used as the default command name. +# Note that when enabling USE_PDFLATEX this option is only used for +# generating bitmaps for formulas in the HTML output, but not in the +# Makefile that is written to the output directory. + +LATEX_CMD_NAME = latex + +# The MAKEINDEX_CMD_NAME tag can be used to specify the command name to +# generate index for LaTeX. If left blank `makeindex' will be used as the +# default command name. + +MAKEINDEX_CMD_NAME = makeindex + +# If the COMPACT_LATEX tag is set to YES Doxygen generates more compact +# LaTeX documents. This may be useful for small projects and may help to +# save some trees in general. + +COMPACT_LATEX = NO + +# The PAPER_TYPE tag can be used to set the paper type that is used +# by the printer. Possible values are: a4, letter, legal and +# executive. If left blank a4 will be used. + +PAPER_TYPE = a4 + +# The EXTRA_PACKAGES tag can be to specify one or more names of LaTeX +# packages that should be included in the LaTeX output. + +EXTRA_PACKAGES = amsmath \ + mathtools + +# The LATEX_HEADER tag can be used to specify a personal LaTeX header for +# the generated latex document. The header should contain everything until +# the first chapter. If it is left blank doxygen will generate a +# standard header. Notice: only use this tag if you know what you are doing! + +LATEX_HEADER = + +# The LATEX_FOOTER tag can be used to specify a personal LaTeX footer for +# the generated latex document. The footer should contain everything after +# the last chapter. If it is left blank doxygen will generate a +# standard footer. Notice: only use this tag if you know what you are doing! + +LATEX_FOOTER = + +# The LATEX_EXTRA_FILES tag can be used to specify one or more extra images +# or other source files which should be copied to the LaTeX output directory. +# Note that the files will be copied as-is; there are no commands or markers +# available. + +LATEX_EXTRA_FILES = + +# If the PDF_HYPERLINKS tag is set to YES, the LaTeX that is generated +# is prepared for conversion to pdf (using ps2pdf). The pdf file will +# contain links (just like the HTML output) instead of page references +# This makes the output suitable for online browsing using a pdf viewer. + +PDF_HYPERLINKS = YES + +# If the USE_PDFLATEX tag is set to YES, pdflatex will be used instead of +# plain latex in the generated Makefile. Set this option to YES to get a +# higher quality PDF documentation. + +USE_PDFLATEX = YES + +# If the LATEX_BATCHMODE tag is set to YES, doxygen will add the \\batchmode. +# command to the generated LaTeX files. This will instruct LaTeX to keep +# running if errors occur, instead of asking the user for help. +# This option is also used when generating formulas in HTML. + +LATEX_BATCHMODE = NO + +# If LATEX_HIDE_INDICES is set to YES then doxygen will not +# include the index chapters (such as File Index, Compound Index, etc.) +# in the output. + +LATEX_HIDE_INDICES = NO + +# If LATEX_SOURCE_CODE is set to YES then doxygen will include +# source code with syntax highlighting in the LaTeX output. +# Note that which sources are shown also depends on other settings +# such as SOURCE_BROWSER. + +LATEX_SOURCE_CODE = NO + +# The LATEX_BIB_STYLE tag can be used to specify the style to use for the +# bibliography, e.g. plainnat, or ieeetr. The default style is "plain". See +# http://en.wikipedia.org/wiki/BibTeX for more info. + +LATEX_BIB_STYLE = plain + +#--------------------------------------------------------------------------- +# configuration options related to the RTF output +#--------------------------------------------------------------------------- + +# If the GENERATE_RTF tag is set to YES Doxygen will generate RTF output +# The RTF output is optimized for Word 97 and may not look very pretty with +# other RTF readers or editors. + +GENERATE_RTF = NO + +# The RTF_OUTPUT tag is used to specify where the RTF docs will be put. +# If a relative path is entered the value of OUTPUT_DIRECTORY will be +# put in front of it. If left blank `rtf' will be used as the default path. + +RTF_OUTPUT = rtf + +# If the COMPACT_RTF tag is set to YES Doxygen generates more compact +# RTF documents. This may be useful for small projects and may help to +# save some trees in general. + +COMPACT_RTF = NO + +# If the RTF_HYPERLINKS tag is set to YES, the RTF that is generated +# will contain hyperlink fields. The RTF file will +# contain links (just like the HTML output) instead of page references. +# This makes the output suitable for online browsing using WORD or other +# programs which support those fields. +# Note: wordpad (write) and others do not support links. + +RTF_HYPERLINKS = NO + +# Load style sheet definitions from file. Syntax is similar to doxygen's +# config file, i.e. a series of assignments. You only have to provide +# replacements, missing definitions are set to their default value. + +RTF_STYLESHEET_FILE = + +# Set optional variables used in the generation of an rtf document. +# Syntax is similar to doxygen's config file. + +RTF_EXTENSIONS_FILE = + +#--------------------------------------------------------------------------- +# configuration options related to the man page output +#--------------------------------------------------------------------------- + +# If the GENERATE_MAN tag is set to YES (the default) Doxygen will +# generate man pages + +GENERATE_MAN = NO + +# The MAN_OUTPUT tag is used to specify where the man pages will be put. +# If a relative path is entered the value of OUTPUT_DIRECTORY will be +# put in front of it. If left blank `man' will be used as the default path. + +MAN_OUTPUT = man + +# The MAN_EXTENSION tag determines the extension that is added to +# the generated man pages (default is the subroutine's section .3) + +MAN_EXTENSION = .3 + +# If the MAN_LINKS tag is set to YES and Doxygen generates man output, +# then it will generate one additional man file for each entity +# documented in the real man page(s). These additional files +# only source the real man page, but without them the man command +# would be unable to find the correct page. The default is NO. + +MAN_LINKS = NO + +#--------------------------------------------------------------------------- +# configuration options related to the XML output +#--------------------------------------------------------------------------- + +# If the GENERATE_XML tag is set to YES Doxygen will +# generate an XML file that captures the structure of +# the code including all documentation. + +GENERATE_XML = NO + +# The XML_OUTPUT tag is used to specify where the XML pages will be put. +# If a relative path is entered the value of OUTPUT_DIRECTORY will be +# put in front of it. If left blank `xml' will be used as the default path. + +XML_OUTPUT = xml + +# The XML_SCHEMA tag can be used to specify an XML schema, +# which can be used by a validating XML parser to check the +# syntax of the XML files. + +XML_SCHEMA = + +# The XML_DTD tag can be used to specify an XML DTD, +# which can be used by a validating XML parser to check the +# syntax of the XML files. + +XML_DTD = + +# If the XML_PROGRAMLISTING tag is set to YES Doxygen will +# dump the program listings (including syntax highlighting +# and cross-referencing information) to the XML output. Note that +# enabling this will significantly increase the size of the XML output. + +XML_PROGRAMLISTING = YES + +#--------------------------------------------------------------------------- +# configuration options related to the DOCBOOK output +#--------------------------------------------------------------------------- + +# If the GENERATE_DOCBOOK tag is set to YES Doxygen will generate DOCBOOK files +# that can be used to generate PDF. + +GENERATE_DOCBOOK = NO + +# The DOCBOOK_OUTPUT tag is used to specify where the DOCBOOK pages will be put. +# If a relative path is entered the value of OUTPUT_DIRECTORY will be put in +# front of it. If left blank docbook will be used as the default path. + +DOCBOOK_OUTPUT = docbook + +#--------------------------------------------------------------------------- +# configuration options for the AutoGen Definitions output +#--------------------------------------------------------------------------- + +# If the GENERATE_AUTOGEN_DEF tag is set to YES Doxygen will +# generate an AutoGen Definitions (see autogen.sf.net) file +# that captures the structure of the code including all +# documentation. Note that this feature is still experimental +# and incomplete at the moment. + +GENERATE_AUTOGEN_DEF = NO + +#--------------------------------------------------------------------------- +# configuration options related to the Perl module output +#--------------------------------------------------------------------------- + +# If the GENERATE_PERLMOD tag is set to YES Doxygen will +# generate a Perl module file that captures the structure of +# the code including all documentation. Note that this +# feature is still experimental and incomplete at the +# moment. + +GENERATE_PERLMOD = NO + +# If the PERLMOD_LATEX tag is set to YES Doxygen will generate +# the necessary Makefile rules, Perl scripts and LaTeX code to be able +# to generate PDF and DVI output from the Perl module output. + +PERLMOD_LATEX = NO + +# If the PERLMOD_PRETTY tag is set to YES the Perl module output will be +# nicely formatted so it can be parsed by a human reader. +# This is useful +# if you want to understand what is going on. +# On the other hand, if this +# tag is set to NO the size of the Perl module output will be much smaller +# and Perl will parse it just the same. + +PERLMOD_PRETTY = YES + +# The names of the make variables in the generated doxyrules.make file +# are prefixed with the string contained in PERLMOD_MAKEVAR_PREFIX. +# This is useful so different doxyrules.make files included by the same +# Makefile don't overwrite each other's variables. + +PERLMOD_MAKEVAR_PREFIX = + +#--------------------------------------------------------------------------- +# Configuration options related to the preprocessor +#--------------------------------------------------------------------------- + +# If the ENABLE_PREPROCESSING tag is set to YES (the default) Doxygen will +# evaluate all C-preprocessor directives found in the sources and include +# files. + +ENABLE_PREPROCESSING = YES + +# If the MACRO_EXPANSION tag is set to YES Doxygen will expand all macro +# names in the source code. If set to NO (the default) only conditional +# compilation will be performed. Macro expansion can be done in a controlled +# way by setting EXPAND_ONLY_PREDEF to YES. + +MACRO_EXPANSION = NO + +# If the EXPAND_ONLY_PREDEF and MACRO_EXPANSION tags are both set to YES +# then the macro expansion is limited to the macros specified with the +# PREDEFINED and EXPAND_AS_DEFINED tags. + +EXPAND_ONLY_PREDEF = NO + +# If the SEARCH_INCLUDES tag is set to YES (the default) the includes files +# pointed to by INCLUDE_PATH will be searched when a #include is found. + +SEARCH_INCLUDES = YES + +# The INCLUDE_PATH tag can be used to specify one or more directories that +# contain include files that are not input files but should be processed by +# the preprocessor. + +INCLUDE_PATH = + +# You can use the INCLUDE_FILE_PATTERNS tag to specify one or more wildcard +# patterns (like *.h and *.hpp) to filter out the header-files in the +# directories. If left blank, the patterns specified with FILE_PATTERNS will +# be used. + +INCLUDE_FILE_PATTERNS = + +# The PREDEFINED tag can be used to specify one or more macro names that +# are defined before the preprocessor is started (similar to the -D option of +# gcc). The argument of the tag is a list of macros of the form: name +# or name=definition (no spaces). If the definition and the = are +# omitted =1 is assumed. To prevent a macro definition from being +# undefined via #undef or recursively expanded use the := operator +# instead of the = operator. + +PREDEFINED = + +# If the MACRO_EXPANSION and EXPAND_ONLY_PREDEF tags are set to YES then +# this tag can be used to specify a list of macro names that should be expanded. +# The macro definition that is found in the sources will be used. +# Use the PREDEFINED tag if you want to use a different macro definition that +# overrules the definition found in the source code. + +EXPAND_AS_DEFINED = + +# If the SKIP_FUNCTION_MACROS tag is set to YES (the default) then +# doxygen's preprocessor will remove all references to function-like macros +# that are alone on a line, have an all uppercase name, and do not end with a +# semicolon, because these will confuse the parser if not removed. + +SKIP_FUNCTION_MACROS = YES + +#--------------------------------------------------------------------------- +# Configuration::additions related to external references +#--------------------------------------------------------------------------- + +# The TAGFILES option can be used to specify one or more tagfiles. For each +# tag file the location of the external documentation should be added. The +# format of a tag file without this location is as follows: +# +# TAGFILES = file1 file2 ... +# Adding location for the tag files is done as follows: +# +# TAGFILES = file1=loc1 "file2 = loc2" ... +# where "loc1" and "loc2" can be relative or absolute paths +# or URLs. Note that each tag file must have a unique name (where the name does +# NOT include the path). If a tag file is not located in the directory in which +# doxygen is run, you must also specify the path to the tagfile here. + +TAGFILES = + +# When a file name is specified after GENERATE_TAGFILE, doxygen will create +# a tag file that is based on the input files it reads. + +GENERATE_TAGFILE = + +# If the ALLEXTERNALS tag is set to YES all external classes will be listed +# in the class index. If set to NO only the inherited external classes +# will be listed. + +ALLEXTERNALS = NO + +# If the EXTERNAL_GROUPS tag is set to YES all external groups will be listed +# in the modules index. If set to NO, only the current project's groups will +# be listed. + +EXTERNAL_GROUPS = YES + +# If the EXTERNAL_PAGES tag is set to YES all external pages will be listed +# in the related pages index. If set to NO, only the current project's +# pages will be listed. + +EXTERNAL_PAGES = YES + +# The PERL_PATH should be the absolute path and name of the perl script +# interpreter (i.e. the result of `which perl'). + +PERL_PATH = /usr/bin/perl + +#--------------------------------------------------------------------------- +# Configuration options related to the dot tool +#--------------------------------------------------------------------------- + +# If the CLASS_DIAGRAMS tag is set to YES (the default) Doxygen will +# generate a inheritance diagram (in HTML, RTF and LaTeX) for classes with base +# or super classes. Setting the tag to NO turns the diagrams off. Note that +# this option also works with HAVE_DOT disabled, but it is recommended to +# install and use dot, since it yields more powerful graphs. + +CLASS_DIAGRAMS = YES + +# You can define message sequence charts within doxygen comments using the \msc +# command. Doxygen will then run the mscgen tool (see +# http://www.mcternan.me.uk/mscgen/) to produce the chart and insert it in the +# documentation. The MSCGEN_PATH tag allows you to specify the directory where +# the mscgen tool resides. If left empty the tool is assumed to be found in the +# default search path. + +MSCGEN_PATH = + +# If set to YES, the inheritance and collaboration graphs will hide +# inheritance and usage relations if the target is undocumented +# or is not a class. + +HIDE_UNDOC_RELATIONS = YES + +# If you set the HAVE_DOT tag to YES then doxygen will assume the dot tool is +# available from the path. This tool is part of Graphviz, a graph visualization +# toolkit from AT&T and Lucent Bell Labs. The other options in this section +# have no effect if this option is set to NO (the default) + +HAVE_DOT = YES + +# The DOT_NUM_THREADS specifies the number of dot invocations doxygen is +# allowed to run in parallel. When set to 0 (the default) doxygen will +# base this on the number of processors available in the system. You can set it +# explicitly to a value larger than 0 to get control over the balance +# between CPU load and processing speed. + +DOT_NUM_THREADS = 0 + +# By default doxygen will use the Helvetica font for all dot files that +# doxygen generates. When you want a differently looking font you can specify +# the font name using DOT_FONTNAME. You need to make sure dot is able to find +# the font, which can be done by putting it in a standard location or by setting +# the DOTFONTPATH environment variable or by setting DOT_FONTPATH to the +# directory containing the font. + +DOT_FONTNAME = Helvetica + +# The DOT_FONTSIZE tag can be used to set the size of the font of dot graphs. +# The default size is 10pt. + +DOT_FONTSIZE = 10 + +# By default doxygen will tell dot to use the Helvetica font. +# If you specify a different font using DOT_FONTNAME you can use DOT_FONTPATH to +# set the path where dot can find it. + +DOT_FONTPATH = + +# If the CLASS_GRAPH and HAVE_DOT tags are set to YES then doxygen +# will generate a graph for each documented class showing the direct and +# indirect inheritance relations. Setting this tag to YES will force the +# CLASS_DIAGRAMS tag to NO. + +CLASS_GRAPH = YES + +# If the COLLABORATION_GRAPH and HAVE_DOT tags are set to YES then doxygen +# will generate a graph for each documented class showing the direct and +# indirect implementation dependencies (inheritance, containment, and +# class references variables) of the class with other documented classes. + +COLLABORATION_GRAPH = YES + +# If the GROUP_GRAPHS and HAVE_DOT tags are set to YES then doxygen +# will generate a graph for groups, showing the direct groups dependencies + +GROUP_GRAPHS = YES + +# If the UML_LOOK tag is set to YES doxygen will generate inheritance and +# collaboration diagrams in a style similar to the OMG's Unified Modeling +# Language. + +UML_LOOK = NO + +# If the UML_LOOK tag is enabled, the fields and methods are shown inside +# the class node. If there are many fields or methods and many nodes the +# graph may become too big to be useful. The UML_LIMIT_NUM_FIELDS +# threshold limits the number of items for each type to make the size more +# manageable. Set this to 0 for no limit. Note that the threshold may be +# exceeded by 50% before the limit is enforced. + +UML_LIMIT_NUM_FIELDS = 10 + +# If set to YES, the inheritance and collaboration graphs will show the +# relations between templates and their instances. + +TEMPLATE_RELATIONS = NO + +# If the ENABLE_PREPROCESSING, SEARCH_INCLUDES, INCLUDE_GRAPH, and HAVE_DOT +# tags are set to YES then doxygen will generate a graph for each documented +# file showing the direct and indirect include dependencies of the file with +# other documented files. + +INCLUDE_GRAPH = YES + +# If the ENABLE_PREPROCESSING, SEARCH_INCLUDES, INCLUDED_BY_GRAPH, and +# HAVE_DOT tags are set to YES then doxygen will generate a graph for each +# documented header file showing the documented files that directly or +# indirectly include this file. + +INCLUDED_BY_GRAPH = YES + +# If the CALL_GRAPH and HAVE_DOT options are set to YES then +# doxygen will generate a call dependency graph for every global function +# or class method. Note that enabling this option will significantly increase +# the time of a run. So in most cases it will be better to enable call graphs +# for selected functions only using the \callgraph command. + +CALL_GRAPH = NO + +# If the CALLER_GRAPH and HAVE_DOT tags are set to YES then +# doxygen will generate a caller dependency graph for every global function +# or class method. Note that enabling this option will significantly increase +# the time of a run. So in most cases it will be better to enable caller +# graphs for selected functions only using the \callergraph command. + +CALLER_GRAPH = NO + +# If the GRAPHICAL_HIERARCHY and HAVE_DOT tags are set to YES then doxygen +# will generate a graphical hierarchy of all classes instead of a textual one. + +GRAPHICAL_HIERARCHY = YES + +# If the DIRECTORY_GRAPH and HAVE_DOT tags are set to YES +# then doxygen will show the dependencies a directory has on other directories +# in a graphical way. The dependency relations are determined by the #include +# relations between the files in the directories. + +DIRECTORY_GRAPH = YES + +# The DOT_IMAGE_FORMAT tag can be used to set the image format of the images +# generated by dot. Possible values are svg, png, jpg, or gif. +# If left blank png will be used. If you choose svg you need to set +# HTML_FILE_EXTENSION to xhtml in order to make the SVG files +# visible in IE 9+ (other browsers do not have this requirement). + +DOT_IMAGE_FORMAT = png + +# If DOT_IMAGE_FORMAT is set to svg, then this option can be set to YES to +# enable generation of interactive SVG images that allow zooming and panning. +# Note that this requires a modern browser other than Internet Explorer. +# Tested and working are Firefox, Chrome, Safari, and Opera. For IE 9+ you +# need to set HTML_FILE_EXTENSION to xhtml in order to make the SVG files +# visible. Older versions of IE do not have SVG support. + +INTERACTIVE_SVG = NO + +# The tag DOT_PATH can be used to specify the path where the dot tool can be +# found. If left blank, it is assumed the dot tool can be found in the path. + +DOT_PATH = + +# The DOTFILE_DIRS tag can be used to specify one or more directories that +# contain dot files that are included in the documentation (see the +# \dotfile command). + +DOTFILE_DIRS = + +# The MSCFILE_DIRS tag can be used to specify one or more directories that +# contain msc files that are included in the documentation (see the +# \mscfile command). + +MSCFILE_DIRS = + +# The DOT_GRAPH_MAX_NODES tag can be used to set the maximum number of +# nodes that will be shown in the graph. If the number of nodes in a graph +# becomes larger than this value, doxygen will truncate the graph, which is +# visualized by representing a node as a red box. Note that doxygen if the +# number of direct children of the root node in a graph is already larger than +# DOT_GRAPH_MAX_NODES then the graph will not be shown at all. Also note +# that the size of a graph can be further restricted by MAX_DOT_GRAPH_DEPTH. + +DOT_GRAPH_MAX_NODES = 50 + +# The MAX_DOT_GRAPH_DEPTH tag can be used to set the maximum depth of the +# graphs generated by dot. A depth value of 3 means that only nodes reachable +# from the root by following a path via at most 3 edges will be shown. Nodes +# that lay further from the root node will be omitted. Note that setting this +# option to 1 or 2 may greatly reduce the computation time needed for large +# code bases. Also note that the size of a graph can be further restricted by +# DOT_GRAPH_MAX_NODES. Using a depth of 0 means no depth restriction. + +MAX_DOT_GRAPH_DEPTH = 0 + +# Set the DOT_TRANSPARENT tag to YES to generate images with a transparent +# background. This is disabled by default, because dot on Windows does not +# seem to support this out of the box. Warning: Depending on the platform used, +# enabling this option may lead to badly anti-aliased labels on the edges of +# a graph (i.e. they become hard to read). + +DOT_TRANSPARENT = NO + +# Set the DOT_MULTI_TARGETS tag to YES allow dot to generate multiple output +# files in one run (i.e. multiple -o and -T options on the command line). This +# makes dot run faster, but since only newer versions of dot (>1.8.10) +# support this, this feature is disabled by default. + +DOT_MULTI_TARGETS = YES + +# If the GENERATE_LEGEND tag is set to YES (the default) Doxygen will +# generate a legend page explaining the meaning of the various boxes and +# arrows in the dot generated graphs. + +GENERATE_LEGEND = YES + +# If the DOT_CLEANUP tag is set to YES (the default) Doxygen will +# remove the intermediate dot files that are used to generate +# the various graphs. + +DOT_CLEANUP = YES diff --git a/doc/mainpage.c b/doc/mainpage.c new file mode 100644 index 0000000..e3c4057 --- /dev/null +++ b/doc/mainpage.c @@ -0,0 +1,23 @@ +/** + * @mainpage MSVMMaj C Package Documentation + * @author Gertjan van den Burg (add future safe email adres) + * @date January, 2014 + * @version 0.01a + * + * @section intro_sec Introduction + * + * This is the introduction + * + * @section Usage + * + * Describe the usage of the program here. + * + * @section License + * + * Put some license information here. + * + * @section Reference + * + * Reference to the paper published. Include BibTeX entry as code block here. + * + */ diff --git a/doc/specifications.c b/doc/specifications.c new file mode 100644 index 0000000..5b5a8ae --- /dev/null +++ b/doc/specifications.c @@ -0,0 +1,170 @@ +/** + * @page spec_training_file Training Input File Specification + * + * This page specifies the training file that can be parsed by + * read_training_from_file(). Below is an example training file. + * + * @verbatim + train: /path/to/training/dataset.txt + test: /path/to/test/dataset.txt + p: 1.0 1.5 2.0 + kappa: -0.9 0.0 1.0 + lambda: 64 16 4 1 0.25 0.0625 0.015625 0.00390625 0.0009765625 0.000244140625 + epsilon: 1e-6 + weight: 1 2 + folds: 10 + kernel: LINEAR + gamma: 1e-3 1e-1 1e1 1e3 + coef: 1.0 2.0 + degree: 2.0 3.0 + @endverbatim + * + * Note that with a @c LINEAR kernel specification, the @c gamma, @c coef, and + * @c degree parameters do not need to be specified. The above merely shows + * all available parameters that can be specified in the grid search. Below + * each of the parameters are described in more detail. Arguments followed by + * an asterisk are optional. + * + * @c train: @n + * The location of the training dataset file. See @ref spec_data_file for the + * specification of a dataset file. + * + * @c test:* @n + * The location of a test dataset file. See @ref spec_data_file for the + * specification of a dataset file. This is optional, if specified the + * train/test split will be used for training. + * + * @c p: @n + * The values of the @c p parameter of the algorithm to search over. The @c p + * parameter is used in the @f$ \ell_p @f$ norm over the Huber weighted scalar + * misclassification errors. Note: @f$ 1 \leq p \leq 2 @f$. + * + * @c kappa: @n + * The values of the @c kappa parameter of the algorithm to search over. The + * @c kappa parameter is used in the Huber hinge error over the scalar + * misclassification errors. Note: @f$ \kappa > -1 @f$. + * + * @c lambda: @n + * The values of the @c lambda parameter of the algorithm to search over. The + * @c lambda parameter is used in the regularization term of the loss + * function. Note: @f$ \lambda > 0 @f$. + * + * @c epsilon: @n + * The values of the @c epsilon parameter of the algorithm to search over. The + * @c epsilon parameter is used as the stopping parameter in the majorization + * algorithm. Note that it often suffices to use only one epsilon value. Using + * more than one value increases the size of the grid search considerably. + * + * @c weight: @n + * The weight specifications for the algorithm to use. Two weight + * specifications are implemented: the unit weights (index = 1) and the group + * size correction weights (index = 2). See also msvmmaj_initialize_weights(). + * + * @c folds: @n + * The number of cross validation folds to use. + * + * @c kernel:* @n + * Kernel to use in training. Only one kernel can be specified. See KernelType + * for available kernel functions. Note: if multiple kernel types are + * specified on this line, only the last value will be used (see the + * implementation of parse_kernel_str() for details). If no kernel is + * specified, the @c LINEAR kernel will be used. + * + * @c gamma:* @n + * Gamma parameters for the @c RBF, @c POLY, and @c SIGMOID kernels. This + * parameter is only optional if the @c LINEAR kernel is specified. See + * msvmmaj_compute_rbf(), msvmmaj_compute_poly(), and + * msvmmaj_compute_sigmoid() for kernel specifications. + * + * @c coef:* @n + * Coefficients for the @c POLY and @c SIGMOID kernels. This parameter is only + * optional if the @c LINEAR or @c RBF kernels are used. See + * msvmmaj_compute_poly() and msvmmaj_compute_sigmoid() for kernel + * specifications. + * + * @c degree:* @n + * Degrees to search over in the grid search when the @c POLY kernel is + * specified. With other kernel specifications this parameter is unnecessary. + * See msvmmaj_compute_poly() for the polynomial kernel specification. + * + */ + + +/** + * @page spec_data_file Data File Specification + * + * This page describes the input file format for a dataset. This specification + * is used by msvmmaj_read_data() and msvmmaj_write_predictions(). The data + * file specification is the same as that used in <a + * href="http://www.loria.fr/~lauer/MSVMpack/MSVMpack.html">MSVMpack</a> + * (verified in v. 1.3). + * + * The file is expected to be as follows + * @verbatim +n +m +x_11 x_12 ... x_1m y_1 +x_21 x_22 ... x_2m y_2 +... +x_n1 x_n2 ... x_nm y_n +@endverbatim + * + * Here, @c n denotes the number of instances and @c m denotes the number of + * predictors. The class labels @c y_i are expected in the final column of + * each line. + * + * As an example, below the first 5 lines of the iris dataset are shown. + * + * @verbatim +150 +4 +5.10000 3.50000 1.40000 0.20000 1.00000 +4.90000 3.00000 1.40000 0.20000 1.00000 +4.70000 3.20000 1.30000 0.20000 1.00000 +@endverbatim + * + */ + +/** + * @page spec_model_file Model File Specification + * + * This page describes the input file format for a MajModel. This + * specification is used by msvmmaj_read_model() and msvmmaj_write_model(). + * The model file is designed to fully reproduce a MajModel. + * + * The model output file follows the format + * @verbatim +Output file for MSVMMaj (version 0.1) +Generated on: Tue Jan 14 12:00:00 2014 (UTC +01:00) + +Model: +p = 2.00 +lambda = 0.001 +kappa = 1.0 +epsilon = 1e-06 +weight_idx = 1 + +Data: +filename = /path/to/data_file.txt +n = 150 +m = 4 +K = 3 + +Output: +-0.7693429935131153 -1.9335141926875414 ++0.3425555992439160 +1.0939198172438194 ++0.3100589593140404 +0.9872012663780092 ++0.1319873613546321 +0.1207806485439152 ++0.8052481376988456 +0.6507524553955120 +@endverbatim + * + * The first two lines of the file mainly serve a logging purpose, and are + * ignored when reading the model file. The model section fully describes the + * model parameters. Next, the data section describes the data file that was + * used in training and the size of the dataset. Finally, the output section + * shows the augmented weight matrix MajModel::V, in row-major order. + * + * @todo + * Write kernel specification to model file as well and adjust the format + * above. + */ diff --git a/include/MSVMMaj.h b/include/MSVMMaj.h deleted file mode 100644 index de99f91..0000000 --- a/include/MSVMMaj.h +++ /dev/null @@ -1,46 +0,0 @@ -#ifndef MSVMMAJ_H -#define MSVMMAJ_H - -#include "globals.h" -#include "types.h" - -/* - Model structure -*/ -struct MajModel { - int weight_idx; - long K; - long n; - long m; - double epsilon; - double p; - double kappa; - double lambda; - double *W; - double *t; - double *V; - double *Vbar; - double *U; - double *UU; - double *Q; - double *H; - double *R; - double *rho; - double training_error; - char *data_file; - KernelType kerneltype; - double *kernelparam; -}; - -/* - Data structure -*/ -struct MajData { - long K; - long n; - long m; - long *y; - double *Z; -}; - -#endif diff --git a/include/crossval.h b/include/crossval.h index 0794622..0dff0b9 100644 --- a/include/crossval.h +++ b/include/crossval.h @@ -1,3 +1,15 @@ +/** + * @file crossval.h + * @author Gertjan van den Burg + * @date January, 2014 + * @brief Header file for crossval.c + * + * @details + * Contains function declarations for functions needed for performing cross + * validation on MajData structures. + * + */ + #ifndef CROSSVAL_H #define CROSSVAL_H diff --git a/include/globals.h b/include/globals.h index 8420f76..55fb6c4 100644 --- a/include/globals.h +++ b/include/globals.h @@ -1,5 +1,23 @@ -#ifndef GLOBALS_H -#define GLOBALS_H +/** + * @file globals.h + * @author Gertjan van den Burg + * @date January, 2014 + * @brief Global definitions + * + * @details + * This header file contains defines and includes which are used in many + * parts of the program. Most notable are the Calloc, Malloc and Memset + * defines, which are commonly used to allocate memory. These functions + * are shorthands for their lowercase counterparts. + * + * Furthermore, a maximum and minimum function are defined here. These + * functions have their own include guards, to ensure potential linked + * libraries don't conflict with these definitions. + * + */ + +#ifndef MSVMMAJ_GLOBALS_H +#define MSVMMAJ_GLOBALS_H #include <stdio.h> #include <stdlib.h> diff --git a/include/kernel.h b/include/kernel.h deleted file mode 100644 index ac5c35d..0000000 --- a/include/kernel.h +++ /dev/null @@ -1,11 +0,0 @@ -#ifndef KERNEL_H -#define KERNEL_H - -#include "globals.h" -#include "types.h" - -// forward declarations -struct MajData; - -// function declarations - diff --git a/include/libMSVMMaj.h b/include/libMSVMMaj.h index 21efc2f..b7261dc 100644 --- a/include/libMSVMMaj.h +++ b/include/libMSVMMaj.h @@ -1,3 +1,20 @@ +/** + * @file libMSVMMaj.h + * @author Gertjan van den Burg + * @date August, 2013 + * @brief Header file for the core MSVMMaj library libMSVMMaj.c + * + * @details + * The core computational routines for MSVMMaj are defined in libMSVMMaj.c. + * This file contains function declarations for these functions. + * + */ + +/** + * @todo + * rename this file and libMSVMMaj.c to correspond with the lowercase convention. + * Also change the name of the include guard. + */ #ifndef LIBMSVMMAJ_H #define LIBMSVMMAJ_H diff --git a/include/msvmmaj.h b/include/msvmmaj.h new file mode 100644 index 0000000..d67ad8b --- /dev/null +++ b/include/msvmmaj.h @@ -0,0 +1,98 @@ +/** + * @file msvmmaj.h + * @author Gertjan van den Burg + * @date August, 2013 + * @brief Definitions for common structures + * + * @details + * Contains documentation and declarations of MajModel and MajData. + * + */ + +#ifndef MSVMMAJ_H +#define MSVMMAJ_H + +#include "globals.h" +#include "types.h" + +/** + * @brief A structure to represent a single MSVMMaj model. + * + * @param weight_idx which weights to use (1 = unit, 2 = group) + * @param K number of classes in the dataset + * @param n number of instances in the dataset + * @param m number of predictors in the dataset + * @param epsilon stopping criterion + * @param p parameter for the L_p norm + * @param kappa parameter for the Huber hinge + * @param lambda regularization parameter + * @param *W pointer to the weight matrix + * @param *t pointer to the translation vector + * @param *V pointer to the augmented weight matrix + * @param *Vbar pointer to the augmented weight matrix from a + * previous iteration + * @param *U pointer to the simplex matrix + * @param *UU pointer to the 3D simplex difference matrix + * @param *Q pointer to the error matrix + * @param *H pointer to the Huber weighted error matrix + * @param *R pointer to the 0-1 auxiliary matrix + * @param *rho pointer to the instance weight vector + * @param training_error error after training has completed + * @param *data_file pointer to the filename of the data + * @param kerneltype kernel to be used in the model + * @param kernelparam pointer to the vector of kernel parameters + * @param use_cholesky whether the Cholesky decomposition should be + * used + * + */ +struct MajModel { + int weight_idx; + long K; + long n; + long m; + double epsilon; + double p; + double kappa; + double lambda; + double *W; + double *t; + double *V; + double *Vbar; + double *U; + double *UU; + double *Q; + double *H; + double *R; + double *rho; + double training_error; + char *data_file; + KernelType kerneltype; + double *kernelparam; + bool use_cholesky; +}; + +/** + * @brief A structure to represent the data. + * + * @param K number of classes + * @param n number of instances + * @param m number of predictors + * @param *y pointer to vector of class labels + * @param *Z pointer to augmented data matrix + * @param kerneltype kerneltype used in MajData::Z + * @param *kernelparam kernel parameters used in MajData::Z + * @param use_cholesky whether the Cholesky decomposition is used in MajData::Z + * + */ +struct MajData { + long K; + long n; + long m; + long *y; + double *Z; + KernelType kerneltype; + double *kernelparam; + bool use_cholesky; +}; + +#endif diff --git a/include/msvmmaj_init.h b/include/msvmmaj_init.h new file mode 100644 index 0000000..6e2e36f --- /dev/null +++ b/include/msvmmaj_init.h @@ -0,0 +1,23 @@ +/** + * @file msvmmaj_init.h + * @author Gertjan van den Burg + * @date January, 2014 + * @brief Header file for msvmmaj_init.c + * + * @details + * Contains function declarations for the initialization functions for + * MajModel and MajData structures. + */ + +#ifndef MSVMMAJ_INIT_H +#define MSVMMAJ_INIT_H + +// forward declaration +struct MajData; +struct MajModel; + +struct MajModel *msvmmaj_init_model(); + +struct MajData *msvmmaj_init_data(); + +#endif diff --git a/include/msvmmaj_kernel.h b/include/msvmmaj_kernel.h new file mode 100644 index 0000000..69bf267 --- /dev/null +++ b/include/msvmmaj_kernel.h @@ -0,0 +1,32 @@ +/** + * @file msvmmaj_kernel.h + * @author Gertjan van den Burg + * @date January, 2014 + * @brief Header file for kernel functionality + * + * @details + * Contains function declarations for computing the kernel matrix + * in nonlinear MSVMMaj. Additional kernel functions should be + * included here and in msvmmaj_kernel.c + * + */ + +#ifndef MSVMMAJ_KERNEL_H +#define MSVMMAJ_KERNEL_H + +#include "globals.h" + +// forward declarations +struct MajData; +struct MajModel; + +// function declarations +void msvmmaj_make_kernel(struct MajModel *model, struct MajData *data); + +double msvmmaj_compute_rbf(double *x1, double *x2, double *kernelparam, + long n); +double msvmmaj_compute_poly(double *x1, double *x2, double *kernelparam, + long n); +double msvmmaj_compute_sigmoid(double *x1, double *x2, double *kernelparam, + long n); +#endif diff --git a/include/msvmmaj_lapack.h b/include/msvmmaj_lapack.h new file mode 100644 index 0000000..766a475 --- /dev/null +++ b/include/msvmmaj_lapack.h @@ -0,0 +1,23 @@ +/** + * @file msvmmaj_lapack.h + * @author Gertjan van den Burg + * @date August, 2013 + * @brief Header file for msvmmaj_lapack.c + * + * @details + * Function declarations for external LAPACK functions + * + */ + +#ifndef MSVMMAJ_LAPACK_H +#define MSVMMAJ_LAPACK_H + +#include "globals.h" + +int dposv(char UPLO, int N, int NRHS, double *A, int LDA, double *B, + int LDB); +int dsysv(char UPLO, int N, int NRHS, double *A, int LDA, int *IPIV, + double *B, int LDB, double *WORK, int LWORK); +int dpotrf(char UPLO, int N, double *A, int LDA); + +#endif diff --git a/include/matrix.h b/include/msvmmaj_matrix.h index 5f0a441..8f5ca59 100644 --- a/include/matrix.h +++ b/include/msvmmaj_matrix.h @@ -1,5 +1,16 @@ -#ifndef MATRIX_H -#define MATRIX_H +/** + * @file msvmmaj_matrix.h + * @author Gertjan van den Burg + * @date August, 2013 + * @brief Header file for msvmmaj_matrix.c + * + * @details + * Contains function declarations for functions useful for dealing with matrices. + * + */ + +#ifndef MSVMMAJ_MATRIX_H +#define MSVMMAJ_MATRIX_H #include "globals.h" diff --git a/include/msvmmaj_pred.h b/include/msvmmaj_pred.h index 952389c..ce22b10 100644 --- a/include/msvmmaj_pred.h +++ b/include/msvmmaj_pred.h @@ -1,3 +1,14 @@ +/** + * @file msvmmaj_pred.h + * @author Gertjan van den Burg + * @date August, 2013 + * @brief Header file for msvmmaj_pred.c + * + * @details + * Contains function declarations for prediction functions. + * + */ + #ifndef MSVMMAJ_PRED_H #define MSVMMAJ_PRED_H diff --git a/include/msvmmaj_train.h b/include/msvmmaj_train.h index 4fb198e..835100f 100644 --- a/include/msvmmaj_train.h +++ b/include/msvmmaj_train.h @@ -1,3 +1,15 @@ +/** + * @file msvmmaj_train.h + * @author Gertjan van den Burg + * @date August, 2013 + * @brief Header file for msvmmaj_train.c + * + * @details + * Contains function declarations for functions used to train a single + * MajModel. + * + */ + #ifndef MSVMMAJ_TRAIN_H #define MSVMMAJ_TRAIN_H diff --git a/include/msvmmaj_train_dataset.h b/include/msvmmaj_train_dataset.h index fdcdb4c..5248b4a 100644 --- a/include/msvmmaj_train_dataset.h +++ b/include/msvmmaj_train_dataset.h @@ -1,9 +1,39 @@ +/** + * @file msvmmaj_train_dataset.h + * @author Gertjan van den Burg + * @date August, 2013 + * @brief Structs and functions necessary for the grid search + * + * @details + * The grid search for the optimal parameters is done through a queue. + * This file contains struct definitions for this queue and a single + * task in a queue, as well as a structure for the complete training + * scheme. Function declarations are also included. + * + */ + #ifndef MSVMMAJ_TRAIN_DATASET_H #define MSVMMAJ_TRAIN_DATASET_H #include "globals.h" #include "types.h" +/** + * @brief A structure for a single task in the queue. + * + * @param folds number of folds in cross validation + * @param ID numeric id of the task in the queue + * @param weight_idx parameter for the MajModel + * @param p parameter for the MajModel + * @param kappa parameter for the MajModel + * @param lambda parameter for the MajModel + * @param epsilon parameter for the MajModel + * @param kerneltype parameter for the MajModel + * @param *kernel_param parameters for the MajModel + * @param *train_data pointer to the training data + * @param *test_data pointer to the test data (if any) + * @param performance performance after cross validation + */ struct Task { KernelType kerneltype; int weight_idx; @@ -19,14 +49,54 @@ struct Task { double performance; }; +/** + * @brief Simple task queue. + * + * This struct is basically just an array of pointers to Task instances, + * with a length and an index of the current task. + * + * @param **tasks array of pointers to Task structs + * @param N size of task array + * @param i index used for keeping track of the queue + */ struct Queue { struct Task **tasks; long N; long i; }; +/** + * @brief Structure for describing the entire grid search + * + * @param traintype type of training to use + * @param kerneltype type of kernel to use throughout training + * @param repeats number of repeats to be done after the grid + * search to find the parameter set with the + * most consistent high performance + * @param folds number of folds in cross validation + * @param Np size of the array of p values + * @param Nl size of the array of lambda values + * @param Nk size of the array of kappa values + * @param Ne size of the array of epsilon values + * @param Nw size of the array of weight_idx values + * @param Ng size of the array of gamma values + * @param Nc size of the array of coef values + * @param Nd size of the array of degree values + * @param *weight_idxs array of weight_idxs + * @param *ps array of p values + * @param *lambdas array of lambda values + * @param *kappas array of kappa values + * @param *epsilons array of epsilon values + * @param *gammas array of gamma values + * @param *coefs array of coef values + * @param *degrees array of degree values + * @param *train_data_file filename of train data file + * @param *test_data_file filename of test data file + * + */ struct Training { TrainType traintype; + KernelType kerneltype; long repeats; long folds; long Np; @@ -34,11 +104,17 @@ struct Training { long Nk; long Ne; long Nw; + long Ng; + long Nc; + long Nd; int *weight_idxs; double *ps; double *lambdas; double *kappas; double *epsilons; + double *gammas; + double *coefs; + double *degrees; char *train_data_file; char *test_data_file; }; diff --git a/include/mylapack.h b/include/mylapack.h deleted file mode 100644 index 4c79e0e..0000000 --- a/include/mylapack.h +++ /dev/null @@ -1,11 +0,0 @@ -#ifndef MYLAPACK_H -#define MYLAPACK_H - -#include "globals.h" - -int dposv(char UPLO, int N, int NRHS, double *A, int LDA, double *B, - int LDB); -int dsysv(char UPLO, int N, int NRHS, double *A, int LDA, int *IPIV, - double *B, int LDB, double *WORK, int LWORK); - -#endif diff --git a/include/parallel.h b/include/parallel.h deleted file mode 100644 index 8747347..0000000 --- a/include/parallel.h +++ /dev/null @@ -1,13 +0,0 @@ - -struct Task { - enum KernelType kernel_type; - int weight_idx; - double epsilon; - double p; - double kappa; - double lambda; - double *kernel_param; - struct MajData **data; -} - - diff --git a/include/strutil.h b/include/strutil.h index 66722ae..740fde1 100644 --- a/include/strutil.h +++ b/include/strutil.h @@ -1,3 +1,15 @@ +/** + * @file strutil.h + * @author Gertjan van den Burg + * @date August, 2013 + * @brief Header file for strutil.c + * + * @details + * Function declarations for useful string functions used in parsing + * input files. + * + */ + #ifndef STRUTIL_H #define STRUTIL_H diff --git a/include/timer.h b/include/timer.h index 8a737e0..d4d4d23 100644 --- a/include/timer.h +++ b/include/timer.h @@ -1,5 +1,16 @@ -#ifndef TIMER_H -#define TIMER_H +/** + * @file timer.h + * @author Gertjan van den Burg + * @date August, 2013 + * @brief Header file for timer.c + * + * @details + * Function declaration for timer function used to measure computation time. + * + */ + +#ifndef MSVMMAJ_TIMER_H +#define MSVMMAJ_TIMER_H #include "globals.h" diff --git a/include/types.h b/include/types.h index b4db8d8..f6d008b 100644 --- a/include/types.h +++ b/include/types.h @@ -1,21 +1,41 @@ -#ifndef TYPES_H -#define TYPES_H +/** + * @file types.h + * @author Gertjan van den Burg + * @date August, 2013 + * @brief Definitions of common types + * + * @details + * Here common types used throughout the program are defined. + * + */ +#ifndef MSVMMAJ_TYPES_H +#define MSVMMAJ_TYPES_H + +/** + * @brief Implementation of true and false + */ typedef enum { - false, - true + false=0, /**< false keyword, corresponding to 0. */ + true=1 /**< true keyword, corresponding to 1. */ } bool; +/** + * @brief type of training used in parameter grid search + */ typedef enum { - CV=0, - TT=1 + CV=0, /**< cross validation */ + TT=1 /**< data with existing train/test split */ } TrainType; +/** + * @brief type of kernel used in training + */ typedef enum { - K_LINEAR=0, - K_POLY=1, - K_RBF=2, - K_SIGMOID=3, + K_LINEAR=0, /**< Linear kernel */ + K_POLY=1, /**< Polynomial kernel */ + K_RBF=2, /**< RBF kernel */ + K_SIGMOID=3, /**< Sigmoid kernel */ } KernelType; #endif diff --git a/include/util.h b/include/util.h index facae79..995a927 100644 --- a/include/util.h +++ b/include/util.h @@ -1,5 +1,16 @@ -#ifndef UTIL_H -#define UTIL_H +/** + * @file util.h + * @author Gertjan van den Burg + * @date August, 2013 + * @brief Header file for util.c + * + * @details + * Function declarations for utility functions of the program. + * + */ + +#ifndef MSVMMAJ_UTIL_H +#define MSVMMAJ_UTIL_H #include "globals.h" diff --git a/src/crossval.c b/src/crossval.c index 9a3c1cc..10e3051 100644 --- a/src/crossval.c +++ b/src/crossval.c @@ -1,7 +1,40 @@ +/** + * @file crossval.c + * @author Gertjan van den Burg + * @date January 7, 2014 + * @brief Functions for cross validation + * + * @details + * This file contains functions for performing cross validation. The funtion + * msvmmaj_make_cv_split() creates a cross validation vector for non-stratified + * cross validation. The function msvmmaj_get_tt_split() creates a train and + * test dataset from a given dataset and a pre-determined CV partition vector. + * See individual function documentation for details. + * + */ + #include "crossval.h" -#include "matrix.h" -#include "MSVMMaj.h" +#include "msvmmaj.h" +#include "msvmmaj_matrix.h" +/** + * @brief Create a cross validation split vector + * + * @details + * A pre-allocated vector of length N is created which can be used to define + * cross validation splits. The folds are contain between + * @f$ \lfloor N / folds \rfloor @f$ and @f$ \lceil N / folds \rceil @f$ + * instances. An instance is mapped to a partition randomly until all folds + * contain @f$ N \% folds @f$ instances. The zero fold then contains + * @f$ N / folds + N \% folds @f$ instances. These remaining @f$ N \% folds @f$ + * instances are then distributed over the first @f$ N \% folds @f$ folds. + * + * @param[in] N number of instances + * @param[in] folds number of folds + * @param[in,out] cv_idx array of size N which contains the fold index + * for each observation on exit + * + */ void msvmmaj_make_cv_split(long N, long folds, long *cv_idx) { long i, j, idx; @@ -30,6 +63,26 @@ void msvmmaj_make_cv_split(long N, long folds, long *cv_idx) } } + +/** + * @brief Create train and test datasets for a CV split + * + * @details + * Given a MajData structure for the full dataset, a previously created + * cross validation split vector and a fold index, a training and test dataset + * are created. + * + * @param[in] full_data a MajData structure for the entire + * dataset + * @param[in,out] train_data an initialized MajData structure which + * on exit contains the training dataset + * @param[in,out] test_data an initialized MajData structure which + * on exit contains the test dataset + * @param[in] cv_idx a vector of cv partitions created by + * msvmmaj_make_cv_split() + * @param[in] fold_idx index of the fold which becomes the + * test dataset + */ void msvmmaj_get_tt_split(struct MajData *full_data, struct MajData *train_data, struct MajData *test_data, long *cv_idx, long fold_idx) { @@ -67,13 +120,15 @@ void msvmmaj_get_tt_split(struct MajData *full_data, struct MajData *train_data, test_data->y[k] = full_data->y[i]; for (j=0; j<m+1; j++) matrix_set(test_data->Z, m+1, k, j, - matrix_get(full_data->Z, m+1, i, j)); + matrix_get(full_data->Z, m+1, + i, j)); k++; } else { train_data->y[l] = full_data->y[i]; for (j=0; j<m+1; j++) matrix_set(train_data->Z, m+1, l, j, - matrix_get(full_data->Z, m+1, i, j)); + matrix_get(full_data->Z, m+1, + i, j)); l++; } } diff --git a/src/kernel.c b/src/kernel.c deleted file mode 100644 index ee64871..0000000 --- a/src/kernel.c +++ /dev/null @@ -1,85 +0,0 @@ -/** - * @file kernel.c - * @author Gertjan van den Burg (burg@ese.eur.nl) - * @date October 18, 2013 - * @brief Defines main functions for use of kernels in MSVMMaj. - * - * @details - * Functions for constructing different kernels using user-supplied - * parameters. Also contains the functions for decomposing the - * kernel matrix using several decomposition methods. - * - */ -#include <math.h> - -#include "kernel.h" - -void msvmmaj_make_kernel(struct MajModel *model, struct MajData *data) -{ - switch (model->kerneltype) { - case K_LINEAR: - break; - case K_POLY: - msvmmaj_make_kernel_poly(model, data); - break; - case K_RBF: - msvmmaj_make_kernel_rbf(model, data); - break; - case K_SIGMOID: - msvmmaj_make_kernel_sigmoid(model, data); - break; - } -} - -void msvmmaj_make_kernel_rbf(struct MajModel *model, struct MajData *data) -{ - long i, j; - long n = model->n; - double value; - double *x1, *x2; - double *K = Calloc(double, n*(n+1)); - - for (i=0; i<n; i++) { - for (j=0; j<n; j++) { - x1 = &data->Z[i*(data->m+1)+1]; - x2 = &data->Z[j*(data->m+1)+1]; - value = msvmmaj_compute_rbf(x1, x2, model->kernelparam, n); - matrix_set(K, n+1, i, j+1, value); - } - matrix_set(K, n+1, i, 0, 1.0); - } - - free(data->Z); - data->Z = K; - data->m = n; - model->m = n; -} - -/** - * Implements k(x, z) = exp( -gamma * || x - z ||^2) - */ -double msvmmaj_compute_rbf(double *x1, double *x2, double *kernelparam, long n) -{ - long i; - double value = 0.0; - - for (i=0; i<n; i++) - value += (x1[i] - x2[i]) * (x1[i] - x2[i]); - value *= -kernelparam[0]; - return exp(value); -} - -/** - * Implements k(x, z) = (gamma * <x, z> + c)^degree - */ -double msvmmaj_compute_poly(double *x1, double *x2, double *kernelparam, long n) -{ - long i; - double value = 0.0; - for (i=0; i<n; i++) - value += x1[i]*x2[i]; - value *= kernelparam[0]; - value += kernelparam[1]; - for (i=1; i<(int kernelparam[2]); i++) - value *= value; - :w diff --git a/src/libMSVMMaj.c b/src/libMSVMMaj.c index 9544830..a0bef97 100644 --- a/src/libMSVMMaj.c +++ b/src/libMSVMMaj.c @@ -1,6 +1,6 @@ /** * @file libMSVMMaj.c - * @author Gertjan van den Burg (burg@ese.eur.nl) + * @author Gertjan van den Burg * @date August 8, 2013 * @brief Main functions for the MSVMMaj algorithm * @@ -16,24 +16,23 @@ #include <math.h> #include "libMSVMMaj.h" -#include "MSVMMaj.h" -#include "matrix.h" +#include "msvmmaj.h" +#include "msvmmaj_matrix.h" inline double rnd() { return (double) rand()/0x7FFFFFFF; } /** - * @name msvmmaj_simplex_gen * @brief Generate matrix of simplex vertex coordinates - * @ingroup libMSVMMaj * + * @details * Generate the simplex matrix. Each row of the created * matrix contains the coordinate vector of a single * vertex of the K-simplex in K-1 dimensions. The simplex * generated is a special simplex with edges of length 1. * The simplex matrix U must already have been allocated. * - * @param [in] K number of classes - * @param [in,out] U simplex matrix of size K * (K-1) + * @param[in] K number of classes + * @param[in,out] U simplex matrix of size K * (K-1) */ void msvmmaj_simplex_gen(long K, double *U) { @@ -51,10 +50,18 @@ void msvmmaj_simplex_gen(long K, double *U) } } -/*! - Generate the category matrix R. The category matrix has 1's everywhere - except at the column corresponding to the label of instance i. -*/ +/** + * @brief Generate the category matrix + * + * @details + * Generate the category matrix R. The category matrix has 1's everywhere + * except at the column corresponding to the label of instance i, there the + * element is 0. + * + * @param[in,out] model corresponding MajModel + * @param[in] dataset corresponding MajData + * + */ void msvmmaj_category_matrix(struct MajModel *model, struct MajData *dataset) { long i, j; @@ -70,8 +77,19 @@ void msvmmaj_category_matrix(struct MajModel *model, struct MajData *dataset) } } -/*! - * Simplex diff +/** + * @brief Generate the simplex difference matrix + * + * @details + * The simplex difference matrix is a 3D matrix which is constructed + * as follows. For each instance i, the difference vectors between the row of + * the simplex matrix corresponding to the class label of instance i and the + * other rows of the simplex matrix are calculated. These difference vectors + * are stored in a matrix, which is one horizontal slice of the 3D matrix. + * + * @param[in,out] model the corresponding MajModel + * @param[in] data the corresponding MajData + * */ void msvmmaj_simplex_diff(struct MajModel *model, struct MajData *data) { @@ -92,13 +110,22 @@ void msvmmaj_simplex_diff(struct MajModel *model, struct MajData *data) } } -/*! - Calculate the errors Q based on the current value of V. - It is assumed that the memory for Q has already been allocated. - In addition, the matrix ZV is calculated here. It is assigned to a - pre-allocated block of memory, since it would be inefficient to keep - reassigning this block at every iteration. -*/ +/** + * @brief Calculate the scalar errors + * + * @details + * Calculate the scalar errors q based on the current estimate of V, and + * store these in Q. It is assumed that the memory for Q has already been + * allocated. In addition, the matrix ZV is calculated here. It is assigned + * to a pre-allocated block of memory, which is passed to this function. + * + * @param[in,out] model the corresponding MajModel + * @param[in] data the corresponding MajData + * @param[in,out] ZV a pointer to a memory block for ZV. On exit + * this block is updated with the new ZV matrix + * calculated with MajModel::V. + * + */ void msvmmaj_calculate_errors(struct MajModel *model, struct MajData *data, double *ZV) { long i, j, k; @@ -136,9 +163,23 @@ void msvmmaj_calculate_errors(struct MajModel *model, struct MajData *data, doub } } -/*! - Calculate the Huber hinge errors for each error in the matrix Q. -*/ +/** + * @brief Calculate the Huber hinge errors + * + * @details + * For each of the scalar errors in Q the Huber hinge errors are + * calculated. The Huber hinge is here defined as + * @f[ + * h(q) = + * \begin{dcases} + * 1 - q - \frac{\kappa + 1}{2} & \text{if } q \leq -\kappa \\ + * \frac{1}{2(\kappa + 1)} ( 1 - q)^2 & \text{if } q \in (-\kappa, 1] \\ + * 0 & \text{if } q > 1 + * \end{dcases} + * @f] + * + * @param[in,out] model the corresponding MajModel + */ void msvmmaj_calculate_huber(struct MajModel *model) { long i, j; @@ -159,10 +200,9 @@ void msvmmaj_calculate_huber(struct MajModel *model) } /** - * @name msvmmaj_seed_model_V * @brief seed the matrix V from an existing model or using rand - * @ingroup libMSVMMaj * + * @details * The matrix V must be seeded before the main_loop() can start. * This can be done by either seeding it with random numbers or * using the solution from a previous model on the same dataset @@ -170,8 +210,8 @@ void msvmmaj_calculate_huber(struct MajModel *model) * significant improvement in the number of iterations necessary * because the seeded model V is closer to the optimal V. * - * @param [in] from_model model from which to copy V - * @param [in,out] to_model model to which V will be copied + * @param[in] from_model MajModel from which to copy V + * @param[in,out] to_model MajModel to which V will be copied */ void msvmmaj_seed_model_V(struct MajModel *from_model, struct MajModel *to_model) { @@ -193,10 +233,17 @@ void msvmmaj_seed_model_V(struct MajModel *from_model, struct MajModel *to_model } } -/*! - * Step doubling +/** + * @brief Use step doubling + * + * @details + * Step doubling can be used to speed up the Majorization algorithm. Instead + * of using the value at the minimimum of the majorization function, the value + * ``opposite'' the majorization point is used. This can essentially cut the + * number of iterations necessary to reach the minimum in half. + * + * @param[in] model MajModel containing the augmented parameters */ - void msvmmaj_step_doubling(struct MajModel *model) { long i, j; @@ -207,15 +254,33 @@ void msvmmaj_step_doubling(struct MajModel *model) for (i=0; i<m+1; i++) { for (j=0; j<K-1; j++) { matrix_mul(model->V, K-1, i, j, 2.0); - matrix_add(model->V, K-1, i, j, -matrix_get(model->Vbar, K-1, i, j)); + matrix_add(model->V, K-1, i, j, + -matrix_get(model->Vbar, K-1, i, j)); } } } -/*! - * initialize_weights +/** + * @brief Initialize instance weights + * + * @details + * Instance weights can for instance be used to add additional weights to + * instances of certain classes. Two default weight possibilities are + * implemented here. The first is unit weights, where each instance gets + * weight 1. + * + * The second are group size correction weights, which are calculated as + * @f[ + * \rho_i = \frac{n}{Kn_k} , + * @f] + * where @f$ n_k @f$ is the number of instances in group @f$ k @f$ and + * @f$ y_i = k @f$. + * + * @param[in] data MajData with the dataset + * @param[in,out] model MajModel with the weight specification. On + * exit MajModel::rho contains the instance + * weights. */ - void msvmmaj_initialize_weights(struct MajData *data, struct MajModel *model) { long *groups; diff --git a/src/matrix.c b/src/matrix.c deleted file mode 100644 index 8803e8b..0000000 --- a/src/matrix.c +++ /dev/null @@ -1,77 +0,0 @@ -/** - * @file matrix.c - * @author Gertjan van den Burg (burg@ese.eur.nl) - * @date August 8, 2013 - * @brief Functions facilitating matrix access - * - * @details - * The functions contained in this file are used when - * accessing or writing to matrices. Seperate functions - * exist of adding and multiplying existing matrix - * elements, to ensure this is done in place. - * - */ - -#include "matrix.h" -#include "util.h" - -/** - * @name matrix_set - * @brief Set element of matrix - * @ingroup matrix - * - * Row-Major order is used to set a matrix element. Since matrices - * of type double are most common in MSVMMaj, this function only - * deals with that type. - * - * @param [in] M matrix to set element of - * @param [in] cols number of columns of M - * @param [in] i row index of element to write to - * @param [in] j column index of element to write to - * @param [out] val value to write to specified element of M - */ -void matrix_set(double *M, long cols, long i, long j, double val) -{ - M[i*cols+j] = val; -} - -double matrix_get(double *M, long cols, long i, long j) -{ - return M[i*cols+j]; -} - -void matrix_add(double *M, long cols, long i, long j, double val) -{ - M[i*cols+j] += val; -} - -void matrix_mul(double *M, long cols, long i, long j, double val) -{ - M[i*cols+j] *= val; -} - -void matrix3_set(double *M, long N2, long N3, long i, long j, - long k, double val) -{ - M[k+N3*(j+N2*i)] = val; -} - -double matrix3_get(double *M, long N2, long N3, long i, long j, - long k) -{ - return M[k+N3*(j+N2*i)]; -} - - -void print_matrix(double *M, long rows, long cols) -{ - long i, j; - - for (i=0; i<rows; i++) { - for (j=0; j<cols; j++) - note("%8.8f ", matrix_get(M, cols, i, j)); - note("\n"); - } - note("\n"); -} - diff --git a/src/msvmmaj_init.c b/src/msvmmaj_init.c new file mode 100644 index 0000000..14278f9 --- /dev/null +++ b/src/msvmmaj_init.c @@ -0,0 +1,64 @@ +/** + * @file msvmmaj_init.c + * @author Gertjan van den Burg + * @date January 7, 2014 + * @brief Functions for initializing model and data structures + * + * @details + * This file contains functions for initializing a MajModel instance + * and a MajData instance. In addition, default values for these + * structures are defined here (and only here). + * + */ + +#include <math.h> + +#include "msvmmaj.h" +#include "msvmmaj_init.h" + +/** + * @brief Initialize a MajModel structure + * + * @details + * A MajModel structure is initialized and the default value for the + * parameters are set. A pointer to the initialized model is returned. + * + * @returns initialized MajModel + */ +struct MajModel *msvmmaj_init_model() +{ + struct MajModel *model = Malloc(struct MajModel, 1); + + // set default values + model->p = 1.0; + model->lambda = pow(2, -8.0); + model->epsilon = 1e-6; + model->kappa = 0.0; + model->weight_idx = 1; + model->kerneltype = K_LINEAR; + model->use_cholesky = false; + + return model; +} + +/** + * @brief Initialize a MajData structure + * + * @details + * A MajData structure is initialized and default values are set. + * A pointer to the initialized data is returned. + * + * @returns initialized MajData + * + */ +struct MajData *msvmmaj_init_data() +{ + struct MajData *data = Malloc(struct MajData, 1); + + // set default values + data->kerneltype = K_LINEAR; + data->use_cholesky = false; + + return data; +} + diff --git a/src/msvmmaj_kernel.c b/src/msvmmaj_kernel.c new file mode 100644 index 0000000..6238fc1 --- /dev/null +++ b/src/msvmmaj_kernel.c @@ -0,0 +1,195 @@ +/** + * @file msvmmaj_kernel.c + * @author Gertjan van den Burg + * @date October 18, 2013 + * @brief Defines main functions for use of kernels in MSVMMaj. + * + * @details + * Functions for constructing different kernels using user-supplied + * parameters. Also contains the functions for decomposing the + * kernel matrix using several decomposition methods. + * + */ +#include <math.h> + +#include "msvmmaj.h" +#include "msvmmaj_kernel.h" +#include "msvmmaj_lapack.h" +#include "msvmmaj_matrix.h" +#include "util.h" + +/** + * @brief Create the kernel matrix + * + * Create a kernel matrix based on the specified kerneltype. Kernel parameters + * are assumed to be specified in the model. + * + * @param[in] model MajModel specifying the parameters + * @param[in] data MajData specifying the data. + * + */ +void msvmmaj_make_kernel(struct MajModel *model, struct MajData *data) +{ + if (model->kerneltype == K_LINEAR) + return; + + long i, j; + long n = model->n; + double value; + double *x1, *x2; + double *K = Calloc(double, n*n*sizeof(double)); + + for (i=0; i<n; i++) { + for (j=i; j<n; j++) { + x1 = &data->Z[i*(data->m+1)+1]; + x2 = &data->Z[j*(data->m+1)+1]; + if (model->kerneltype == K_POLY) + value = msvmmaj_compute_poly(x1, x2, + model->kernelparam, data->m); + else if (model->kerneltype == K_RBF) + value = msvmmaj_compute_rbf(x1, x2, + model->kernelparam, data->m); + else if (model->kerneltype == K_SIGMOID) + value = msvmmaj_compute_rbf(x1, x2, + model->kernelparam, data->m); + else { + fprintf(stderr, "Unknown kernel type in " + "msvmmaj_make_kernel\n"); + exit(1); + } + matrix_set(K, n, i, j, value); + matrix_set(K, n, j, i, value); + } + } + + // get cholesky if necessary. + if (model->use_cholesky == true) { + int status = dpotrf('L', n, K, n); + if (status != 0) { + fprintf(stderr, "Error (%i) computing Cholesky " + "decomposition of kernel matrix.\n", + status); + exit(0); + } + note("Got Cholesky.\n"); + } + + // copy kernel/cholesky to data + data->Z = realloc(data->Z, n*(n+1)*(sizeof(double))); + for (i=0; i<n; i++) { + for (j=0; j<n; j++) + matrix_set(data->Z, n+1, i, j+1, + matrix_get(K, n, i, j)); + matrix_set(data->Z, n+1, i, 0, 1.0); + } + data->m = n; + + // let data know what it's made of + data->kerneltype = model->kerneltype; + free(data->kernelparam); + switch (model->kerneltype) { + case K_LINEAR: + break; + case K_POLY: + data->kernelparam = Calloc(double, 3); + data->kernelparam[0] = model->kernelparam[0]; + data->kernelparam[1] = model->kernelparam[1]; + data->kernelparam[2] = model->kernelparam[2]; + break; + case K_RBF: + data->kernelparam = Calloc(double, 1); + data->kernelparam[0] = model->kernelparam[0]; + break; + case K_SIGMOID: + data->kernelparam = Calloc(double, 2); + data->kernelparam[0] = model->kernelparam[0]; + data->kernelparam[1] = model->kernelparam[1]; + } + data->use_cholesky = model->use_cholesky; + model->m = n; + free(K); +} + +/** + * @brief Compute the RBF kernel between two vectors + * + * @details + * The RBF kernel is computed between two vectors. This kernel is defined as + * @f[ + * k(x_1, x_2) = \exp( -\gamma \| x_1 - x_2 \|^2 ) + * @f] + * where @f$ \gamma @f$ is a kernel parameter specified. + * + * @param[in] x1 first vector + * @param[in] x2 second vector + * @param[in] kernelparam array of kernel parameters (gamma is first + * element) + * @param[in] n length of the vectors x1 and x2 + * @returns kernel evaluation + */ +double msvmmaj_compute_rbf(double *x1, double *x2, double *kernelparam, long n) +{ + long i; + double value = 0.0; + + for (i=0; i<n; i++) + value += (x1[i] - x2[i]) * (x1[i] - x2[i]); + value *= -kernelparam[0]; + return exp(value); +} + +/** + * @brief Compute the polynomial kernel between two vectors + * + * @details + * The polynomial kernel is computed between two vectors. This kernel is + * defined as + * @f[ + * k(x_1, x_2) = ( \gamma \langle x_1, x_2 \rangle + c)^d + * @f] + * where @f$ \gamma @f$, @f$ c @f$ and @f$ d @f$ are kernel parameters. + * + * @param[in] x1 first vector + * @param[in] x2 second vector + * @param[in] kernelparam array of kernel parameters (gamma, c, d) + * @param[in] n length of the vectors x1 and x2 + * @returns kernel evaluation + */ +double msvmmaj_compute_poly(double *x1, double *x2, double *kernelparam, long n) +{ + long i; + double value = 0.0; + for (i=0; i<n; i++) + value += x1[i]*x2[i]; + value *= kernelparam[0]; + value += kernelparam[1]; + return pow(value, ((int) kernelparam[2])); +} + +/** + * @brief Compute the sigmoid kernel between two vectors + * + * @details + * The sigmoid kernel is computed between two vectors. This kernel is defined + * as + * @f[ + * k(x_1, x_2) = \tanh( \gamma \langle x_1 , x_2 \rangle + c) + * @f] + * where @f$ \gamma @f$ and @f$ c @f$ are kernel parameters. + * + * @param[in] x1 first vector + * @param[in] x2 second vector + * @param[in] kernelparam array of kernel parameters (gamma, c) + * @param[in] n length of the vectors x1 and x2 + * @returns kernel evaluation + */ +double msvmmaj_compute_sigmoid(double *x1, double *x2, double *kernelparam, long n) +{ + long i; + double value = 0.0; + for (i=0; i<n; i++) + value += x1[i]*x2[i]; + value *= kernelparam[0]; + value += kernelparam[1]; + return tanh(value); +} diff --git a/src/msvmmaj_lapack.c b/src/msvmmaj_lapack.c new file mode 100644 index 0000000..9ca8dab --- /dev/null +++ b/src/msvmmaj_lapack.c @@ -0,0 +1,129 @@ +/** + * @file msvmmaj_lapack.c + * @author Gertjan van den Burg + * @date August 9, 2013 + * @brief Utility functions for interacting with LAPACK + * + * @details + * Functions in this file are auxiliary functions which make it easier + * to use LAPACK functions from liblapack. + */ + +#include "msvmmaj_lapack.h" + +/** + * @brief Solve AX = B where A is symmetric positive definite. + * + * @details + * Solve a linear system of equations AX = B where A is symmetric positive + * definite. This function uses the externel LAPACK routine dposv. + * + * @param[in] UPLO which triangle of A is stored + * @param[in] N order of A + * @param[in] NRHS number of columns of B + * @param[in,out] A double precision array of size (LDA, N). On + * exit contains the upper or lower factor of the + * Cholesky factorization of A. + * @param[in] LDA leading dimension of A + * @param[in,out] B double precision array of size (LDB, NRHS). On + * exit contains the N-by-NRHS solution matrix X. + * @param[in] LDB the leading dimension of B + * @returns info parameter which contains the status of the + * computation: + * - =0: success + * - <0: if -i, the i-th argument had + * an illegal value + * - >0: if i, the leading minor of A + * was not positive definite + * + * See the LAPACK documentation at: + * http://www.netlib.org/lapack/explore-html/dc/de9/group__double_p_osolve.html + */ +int dposv(char UPLO, int N, int NRHS, double *A, int LDA, double *B, + int LDB) +{ + extern void dposv_(char *UPLO, int *Np, int *NRHSp, double *A, + int *LDAp, double *B, int *LDBp, int *INFOp); + int INFO; + dposv_(&UPLO, &N, &NRHS, A, &LDA, B, &LDB, &INFO); + return INFO; +} + +/** + * @brief Solve a system of equations AX = B where A is symmetric. + * + * @details + * Solve a linear system of equations AX = B where A is symmetric. This + * function uses the external LAPACK routine dsysv. + * + * @param[in] UPLO which triangle of A is stored + * @param[in] N order of A + * @param[in] NRHS number of columns of B + * @param[in,out] A double precision array of size (LDA, N). On + * exit contains the block diagonal matrix D and + * the multipliers used to obtain the factor U or + * L from the factorization A = U*D*U**T or + * A = L*D*L**T. + * @param[in] LDA leading dimension of A + * @param[in] IPIV integer array containing the details of D + * @param[in,out] B double precision array of size (LDB, NRHS). On + * exit contains the N-by-NRHS matrix X + * @param[in] LDB leading dimension of B + * @param[out] WORK double precision array of size max(1,LWORK). On + * exit, WORK(1) contains the optimal LWORK + * @param[in] LWORK the length of WORK, can be used for determining + * the optimal blocksize for dsystrf. + * @returns info parameter which contains the status of the + * computation: + * - =0: success + * - <0: if -i, the i-th argument had an + * illegal value + * - >0: if i, D(i, i) is exactly zero, + * no solution can be computed. + * + * See the LAPACK documentation at: + * http://www.netlib.org/lapack/explore-html/d6/d0e/group__double_s_ysolve.html + */ +int dsysv(char UPLO, int N, int NRHS, double *A, int LDA, int *IPIV, + double *B, int LDB, double *WORK, int LWORK) +{ + extern void dsysv_(char *UPLO, int *Np, int *NRHSp, double *A, + int *LDAp, int *IPIV, double *B, int *LDBp, + double *WORK, int *LWORK, int *INFOp); + int INFO; + dsysv_(&UPLO, &N, &NRHS, A, &LDA, IPIV, B, &LDB, WORK, &LWORK, &INFO); + return INFO; +} + +/** + * @brief Compute the Cholesky factorization of a real symmetric positive + * definite matrix. + * + * @details + * This function uses the external LAPACK routine dpotrf. + * + * @param[in] UPLO which triangle of A is stored + * @param[in] N order of A + * @param[in,out] A double precision array of size (LDA, N). On + * exit contains the factor U or L of the Cholesky + * factorization + * @param[in] LDA leading dimension of A + * @returns info parameter which contains the status of the + * computation: + * - =0: success + * - <0: if -i, the i-th argument had an + * illegal value + * - >0: if i, the leading minor of + * order i is not positive + * definite + * + * See the LAPACK documentation at: + * http://www.netlib.org/lapack/explore-html/d0/d8a/dpotrf_8f.html + */ +int dpotrf(char UPLO, int N, double *A, int LDA) +{ + extern void dpotrf_(char *UPLO, int *N, double *A, int *LDA, int *INFOp); + int INFO; + dpotrf_(&UPLO, &N, A, &LDA, &INFO); + return INFO; +} diff --git a/src/msvmmaj_matrix.c b/src/msvmmaj_matrix.c new file mode 100644 index 0000000..ffa0c21 --- /dev/null +++ b/src/msvmmaj_matrix.c @@ -0,0 +1,153 @@ +/** + * @file msvmmaj_matrix.c + * @author Gertjan van den Burg + * @date August, 2013 + * @brief Functions facilitating matrix access + * + * @details + * The functions contained in this file are used when + * accessing or writing to matrices. Seperate functions + * exist of adding and multiplying existing matrix + * elements, to ensure this is done in place. + * + */ + +#include "msvmmaj_matrix.h" +#include "util.h" + +/** + * @brief Set element of matrix + * + * @details + * Row-Major order is used to set a matrix element. Since matrices + * of type double are most common in MSVMMaj, this function only + * deals with that type. + * + * @param[in] M matrix to set element of + * @param[in] cols number of columns of M + * @param[in] i row index of element to write to + * @param[in] j column index of element to write to + * @param[out] val value to write to specified element of M + */ +void matrix_set(double *M, long cols, long i, long j, double val) +{ + M[i*cols+j] = val; +} + +/** + * @brief Retrieve value from matrix + * + * @details + * Return a value from a matrix using row-major order. + * + * @param[in] M matrix to retrieve value from + * @param[in] cols number of columns of M + * @param[in] i row index (starting from 0) + * @param[in] j column index (starting from 0) + * @returns matrix element at (i, j) + */ +double matrix_get(double *M, long cols, long i, long j) +{ + return M[i*cols+j]; +} + +/** + * @brief Add value to matrix element + * + * @details + * This function is added to efficiently add values to matrix + * elements, without having to use get and set methods. + * + * @param[in] M matrix + * @param[in] cols number of columns of M + * @param[in] i row index (starting from 0) + * @param[in] j column index (starting from 0) + * @param[in] val value to add to matrix element (i, j) + */ +void matrix_add(double *M, long cols, long i, long j, double val) +{ + M[i*cols+j] += val; +} + +/** + * @brief Multiply matrix element by value + * + * @details + * This function is added to efficiently multiply a matrix element + * by a certain value, without having to use get and set methods. + * + * @param[in] M matrix + * @param[in] cols number of columns of M + * @param[in] i row index (starting from 0) + * @param[in] j column index (starting from 0) + * @param[in] val value to multiply matrix element (i, j) with + */ +void matrix_mul(double *M, long cols, long i, long j, double val) +{ + M[i*cols+j] *= val; +} + +/** + * @brief Set element of 3D matrix + * + * @details + * Set an element of a 3D matrix using row-major order. + * + * @param[in] M matrix + * @param[in] N2 second dimension of M + * @param[in] N3 third dimension of M + * @param[in] i index along first dimension + * @param[in] j index along second dimension + * @param[in] k index along third dimension + * @param[in] val value to set element (i, j, k) to + * + * See: + * http://en.wikipedia.org/wiki/Row-major_order + */ +void matrix3_set(double *M, long N2, long N3, long i, long j, + long k, double val) +{ + M[k+N3*(j+N2*i)] = val; +} + +/** + * @brief Get element of 3D matrix + * + * @details + * Retrieve an element from a 3D matrix. + * + * @param[in] M matrix + * @param[in] N2 second dimension of M + * @param[in] N3 third dimension of M + * @param[in] i index along first dimension + * @param[in] j index along second dimension + * @param[in] k index along third dimension + * @returns value at the (i, j, k) element of M + */ +double matrix3_get(double *M, long N2, long N3, long i, long j, + long k) +{ + return M[k+N3*(j+N2*i)]; +} + +/** + * @brief print a matrix + * + * @details + * Debug function to print a matrix + * + * @param[in] M matrix + * @param[in] rows number of rows of M + * @param[in] cols number of columns of M + */ +void print_matrix(double *M, long rows, long cols) +{ + long i, j; + + for (i=0; i<rows; i++) { + for (j=0; j<cols; j++) + note("%8.8f ", matrix_get(M, cols, i, j)); + note("\n"); + } + note("\n"); +} diff --git a/src/msvmmaj_pred.c b/src/msvmmaj_pred.c index 5f1b1ae..98b6e0a 100644 --- a/src/msvmmaj_pred.c +++ b/src/msvmmaj_pred.c @@ -1,31 +1,36 @@ /** * @file msvmmaj_pred.c - * @author Gertjan van den Burg (burg@ese.eur.nl) + * @author Gertjan van den Burg * @date August 9, 2013 * @brief Main functions for predicting class labels.. * + * @details + * This file contains functions for predicting the class labels of instances + * and a function for calculating the predictive performance (hitrate) of + * a prediction given true class labels. + * */ #include <cblas.h> #include "libMSVMMaj.h" -#include "MSVMMaj.h" -#include "matrix.h" +#include "msvmmaj.h" +#include "msvmmaj_matrix.h" #include "msvmmaj_pred.h" /** - * @name predict_labels * @brief Predict class labels of data given and output in predy * + * @details * The labels are predicted by mapping each instance in data to the * simplex space using the matrix V in the given model. Next, for each * instance the nearest simplex vertex is determined using an Euclidean * norm. The nearest simplex vertex determines the predicted class label, - * which is recorded in predy + * which is recorded in predy. * - * @param [in] data data to predict labels for - * @param [in] model model with optimized V - * @param [out] predy pre-allocated vector to record predictions in + * @param[in] data MajData to predict labels for + * @param[in] model MajModel with optimized V + * @param[out] predy pre-allocated vector to record predictions in */ void msvmmaj_predict_labels(struct MajData *data, struct MajModel *model, long *predy) { @@ -84,15 +89,15 @@ void msvmmaj_predict_labels(struct MajData *data, struct MajModel *model, long * } /** - * @name msvmmaj_prediction_perf * @brief Calculate the predictive performance (percentage correct) * + * @details * The predictive performance is calculated by simply counting the number * of correctly classified samples and dividing by the total number of * samples, multiplying by 100. * - * @param [in] data the dataset with known labels - * @param [in] predy the predicted class labels + * @param[in] data the MajData dataset with known labels + * @param[in] predy the predicted class labels * * @returns percentage correctly classified. */ diff --git a/src/msvmmaj_train.c b/src/msvmmaj_train.c index 272d86a..97ee6a1 100644 --- a/src/msvmmaj_train.c +++ b/src/msvmmaj_train.c @@ -1,6 +1,6 @@ /** * @file msvmmaj_train.c - * @author Gertjan van den Burg (burg@ese.eur.nl) + * @author Gertjan van den Burg * @date August 9, 2013 * @brief Main functions for training the MSVMMaj solution. * @@ -13,25 +13,34 @@ #include <math.h> #include <cblas.h> -#include "msvmmaj_train.h" -#include "MSVMMaj.h" #include "libMSVMMaj.h" -#include "mylapack.h" -#include "matrix.h" +#include "msvmmaj.h" +#include "msvmmaj_lapack.h" +#include "msvmmaj_matrix.h" +#include "msvmmaj_train.h" #include "util.h" +/** + * Maximum number of iterations of the algorithm. + */ #define MAX_ITER 1000000 /** - * @name msvmmaj_optimize * @brief The main training loop for MSVMMaj * - * The msvmmaj_optimize() function is the main training function. This function + * @details + * This function is the main training function. This function * handles the optimization of the model with the given model parameters, with - * the data given. On return the matrix model->V contains the optimal weight matrix. + * the data given. On return the matrix MajModel::V contains the optimal + * weight matrix. + * + * In this function, step doubling is used in the majorization algorithm after + * a burn-in of 50 iterations. If the training is finished, MajModel::t and + * MajModel::W are extracted from MajModel::V. * - * @param [in,out] model the model to be trained. Contains optimal V on exit. - * @param [in] data the data to train the model with. + * @param[in,out] model the MajModel to be trained. Contains optimal + * V on exit. + * @param[in] data the MajData to train the model with. */ void msvmmaj_optimize(struct MajModel *model, struct MajData *data) { @@ -49,7 +58,7 @@ void msvmmaj_optimize(struct MajModel *model, struct MajData *data) double *ZAZVT = Calloc(double, (m+1)*(K-1)); note("Starting main loop.\n"); - note("MajDataset:\n"); + note("Dataset:\n"); note("\tn = %i\n", n); note("\tm = %i\n", m); note("\tK = %i\n", K); @@ -78,8 +87,8 @@ void msvmmaj_optimize(struct MajModel *model, struct MajData *data) L = msvmmaj_get_loss(model, data, ZV); if (it%50 == 0) - note("iter = %li, L = %15.16f, Lbar = %15.16f, reldiff = %15.16f\n", - it, L, Lbar, (Lbar - L)/L); + note("iter = %li, L = %15.16f, Lbar = %15.16f, " + "reldiff = %15.16f\n", it, L, Lbar, (Lbar - L)/L); it++; } @@ -91,7 +100,8 @@ void msvmmaj_optimize(struct MajModel *model, struct MajData *data) model->t[i] = matrix_get(model->V, K-1, 0, i); for (i=1; i<m+1; i++) for (j=0; j<K-1; j++) - matrix_set(model->W, K-1, i-1, j, matrix_get(model->V, K-1, i, j)); + matrix_set(model->W, K-1, i-1, j, + matrix_get(model->V, K-1, i, j)); free(B); free(ZV); free(ZAZ); @@ -100,19 +110,22 @@ void msvmmaj_optimize(struct MajModel *model, struct MajData *data) } /** - * @name msvmmaj_get_loss - * @brief calculate the current value of the loss function + * @brief Calculate the current value of the loss function * - * The current loss value is calculated based on the matrix V in the given - * model. + * @details + * The current loss function value is calculated based on the matrix V in the + * given model. Note that the matrix ZV is passed explicitly to avoid having + * to reallocate memory at every step. * - * @param [in] model model structure which holds the current estimate V - * @param [in] data data structure - * @param [in,out] ZV pre-allocated matrix ZV which is updated on output - * - * @return the current value of the loss function + * @param[in] model MajModel structure which holds the current + * estimate V + * @param[in] data MajData structure + * @param[in,out] ZV pre-allocated matrix ZV which is updated on + * output + * @returns the current value of the loss function */ -double msvmmaj_get_loss(struct MajModel *model, struct MajData *data, double *ZV) +double msvmmaj_get_loss(struct MajModel *model, struct MajData *data, + double *ZV) { long i, j; long n = data->n; @@ -151,10 +164,52 @@ double msvmmaj_get_loss(struct MajModel *model, struct MajData *data, double *ZV } /** - * @name msvmmaj_get_update - * @brief perform a single step of the majorization algorithm to update V + * @brief Perform a single step of the majorization algorithm to update V + * + * @details + * This function contains the main update calculations of the algorithm. These + * calculations are necessary to find a new update V. The calculations exist of + * recalculating the majorization coefficients for all instances and all + * classes, and solving a linear system to find V. + * + * Because the function msvmmaj_get_update() is always called after a call to + * msvmmaj_get_loss() with the same MajModel::V, it is unnecessary to calculate + * the updated errors MajModel::Q and MajModel::H here too. This saves on + * computation time. * - * details + * In calculating the majorization coefficients we calculate the elements of a + * diagonal matrix A with elements + * @f[ + * A_{i, i} = \frac{1}{n} \rho_i \sum_{j \neq k} \left[ + * \varepsilon_i a_{ijk}^{(p)} + (1 - \varepsilon_i) \omega_i + * a_{ijk}^{(p)} \right], + * @f] + * where @f$ k = y_i @f$. + * Since this matrix is only used to calculate the matrix @f$ Z' A Z @f$, it is + * efficient to update a matrix ZAZ through consecutive rank 1 updates with + * a single element of A and the corresponding row of Z. The BLAS function + * dsyr is used for this. + * + * The B matrix is has rows + * @f[ + * \boldsymbol{\beta}_i' = \frac{1}{n} \rho_i \sum_{j \neq k} \left[ + * \varepsilon_i \left( b_{ijk}^{(1)} - a_{ijk}^{(1)} + * \overline{q}_i^{(kj)} \right) + (1 - \varepsilon_i) + * \omega_i \left( b_{ijk}^{(p)} - a_{ijk}^{(p)} + * \overline{q}_i^{(kj)} \right) \right] + * \boldsymbol{\delta}_{kj}' + * @f] + * This is also split into two cases, one for which @f$ \varepsilon_i = 1 @f$, + * and one for when it is 0. The 3D simplex difference matrix is used here, in + * the form of the @f$ \boldsymbol{\delta}_{kj}' @f$. + * + * Finally, the following system is solved + * @f[ + * (\textbf{Z}'\textbf{AZ} + \lambda \textbf{J})\textbf{V} = + * (\textbf{Z}'\textbf{AZ}\overline{\textbf{V}} + \textbf{Z}' + * \textbf{B}) + * @f] + * solving this system is done through dposv(). * * @param [in,out] model model to be updated * @param [in] data data used in model @@ -166,9 +221,6 @@ double msvmmaj_get_loss(struct MajModel *model, struct MajData *data, double *ZV void msvmmaj_get_update(struct MajModel *model, struct MajData *data, double *B, double *ZAZ, double *ZAZV, double *ZAZVT) { - // Because msvmmaj_update is always called after a call to - // msvmmaj_get_loss() with the latest V, it is unnecessary to recalculate - // the matrix ZV, the errors Q, or the Huber errors H. Awesome! int status, class; long i, j, k; double Avalue, Bvalue; @@ -182,11 +234,14 @@ void msvmmaj_get_update(struct MajModel *model, struct MajData *data, double *B, double p = model->p; double *rho = model->rho; + // constants which are used often throughout const double a2g2 = 0.25*p*(2.0*p - 1.0)*pow((kappa+1.0)/2.0,p-2.0); const double in = 1.0/((double) n); + // clear matrices Memset(B, double, n*(K-1)); Memset(ZAZ, double, (m+1)*(m+1)); + b = 0; for (i=0; i<n; i++) { value = 0; @@ -215,7 +270,8 @@ void msvmmaj_get_update(struct MajModel *model, struct MajData *data, double *B, b = 0; } for (k=0; k<K-1; k++) { - Bvalue = in*rho[i]*b*matrix3_get(model->UU, K-1, K, i, k, j); + Bvalue = in*rho[i]*b*matrix3_get( + model->UU, K-1, K, i, k, j); matrix_add(B, K-1, i, k, Bvalue); } Avalue += a*matrix_get(model->R, K, i, j); @@ -227,13 +283,27 @@ void msvmmaj_get_update(struct MajModel *model, struct MajData *data, double *B, if (q <= -kappa) { b = 0.5 - kappa/2.0 - q; } else if ( q <= 1.0) { - b = pow(1.0 - q, 3.0)/(2.0*pow(kappa + 1.0, 2.0)); + b = pow(1.0 - q, 3.0)/( + 2.0*pow(kappa + 1.0, + 2.0)); } else { b = 0; } for (k=0; k<K-1; k++) { - Bvalue = in*rho[i]*omega*b*matrix3_get(model->UU, K-1, K, i, k, j); - matrix_add(B, K-1, i, k, Bvalue); + Bvalue = in*rho[i]*omega*b* + matrix3_get( + model->UU, + K-1, + K, + i, + k, + j); + matrix_add( + B, + K-1, + i, + k, + Bvalue); } } Avalue = 1.5*(K - 1.0); @@ -241,23 +311,51 @@ void msvmmaj_get_update(struct MajModel *model, struct MajData *data, double *B, for (j=0; j<K; j++) { q = matrix_get(model->Q, K, i, j); if (q <= (p + kappa - 1.0)/(p - 2.0)) { - a = 0.25*pow(p, 2.0)*pow(0.5 - kappa/2.0 - q, p - 2.0); + a = 0.25*pow(p, 2.0)*pow( + 0.5 - kappa/2.0 - q, + p - 2.0); } else if (q <= 1.0) { a = a2g2; } else { - a = 0.25*pow(p, 2.0)*pow((p/(p - 2.0))*(0.5 - kappa/2.0 - q), p - 2.0); - b = a*(2.0*q + kappa - 1.0)/(p - 2.0) + 0.5*p*pow((p/(p - 2.0))*(0.5 - kappa/2.0 - q), p - 1.0); + a = 0.25*pow(p, 2.0)*pow( + (p/(p - 2.0))* + (0.5 - kappa/2.0 - q), + p - 2.0); + b = a*(2.0*q + kappa - 1.0)/ + (p - 2.0) + + 0.5*p*pow( + p/(p - 2.0)* + (0.5 - kappa/ + 2.0 - q), + p - 1.0); } if (q <= -kappa) { - b = 0.5*p*pow(0.5 - kappa/2.0 - q, p - 1.0); + b = 0.5*p*pow( + 0.5 - kappa/2.0 - q, + p - 1.0); } else if ( q <= 1.0) { - b = p*pow(1.0 - q, 2.0*p - 1.0)/pow(2*kappa+2.0, p); + b = p*pow(1.0 - q, + 2.0*p - 1.0)/ + pow(2*kappa+2.0, p); } for (k=0; k<K-1; k++) { - Bvalue = in*rho[i]*omega*b*matrix3_get(model->UU, K-1, K, i, k, j); - matrix_add(B, K-1, i, k, Bvalue); + Bvalue = in*rho[i]*omega*b* + matrix3_get( + model->UU, + K-1, + K, + i, + k, + j); + matrix_add( + B, + K-1, + i, + k, + Bvalue); } - Avalue += a*matrix_get(model->R, K, i, j); + Avalue += a*matrix_get(model->R, + K, i, j); } } Avalue *= omega; @@ -352,7 +450,8 @@ void msvmmaj_get_update(struct MajModel *model, struct MajData *data, double *B, // This step should not be necessary, as the matrix // ZAZ is positive semi-definite by definition. It // is included for safety. - fprintf(stderr, "Received nonzero status from dposv: %i\n", status); + fprintf(stderr, "Received nonzero status from dposv: %i\n", + status); int *IPIV = malloc((m+1)*sizeof(int)); double *WORK = malloc(1*sizeof(double)); status = dsysv( @@ -379,7 +478,8 @@ void msvmmaj_get_update(struct MajModel *model, struct MajData *data, double *B, WORK, sizeof(WORK)/sizeof(double)); if (status != 0) - fprintf(stderr, "Received nonzero status from dsysv: %i\n", status); + fprintf(stderr, "Received nonzero status from " + "dsysv: %i\n", status); } // Return to Row-major order. The matrix ZAZVT contains the @@ -403,8 +503,18 @@ void msvmmaj_get_update(struct MajModel *model, struct MajData *data, double *B, for (i=0; i<m+1; i++) { for (j=0; j<K-1; j++) { - matrix_set(model->Vbar, K-1, i, j, matrix_get(model->V, K-1, i, j)); - matrix_set(model->V, K-1, i, j, matrix_get(ZAZV, K-1, i, j)); + matrix_set( + model->Vbar, + K-1, + i, + j, + matrix_get(model->V, K-1, i, j)); + matrix_set( + model->V, + K-1, + i, + j, + matrix_get(ZAZV, K-1, i, j)); } } } diff --git a/src/msvmmaj_train_dataset.c b/src/msvmmaj_train_dataset.c index 2da8bee..4f5f4d9 100644 --- a/src/msvmmaj_train_dataset.c +++ b/src/msvmmaj_train_dataset.c @@ -1,22 +1,53 @@ +/** + * @file msvmmaj_train_dataset.c + * @author Gertjan van den Burg + * @date January, 2014 + * @brief Functions for finding the optimal parameters for the dataset + * + * @details + * The MSVMMaj algorithm takes a number of parameters. The functions in + * this file are used to find the optimal parameters. + */ + #include <math.h> #include <time.h> #include "crossval.h" #include "libMSVMMaj.h" -#include "matrix.h" +#include "msvmmaj.h" +#include "msvmmaj_init.h" +#include "msvmmaj_matrix.h" #include "msvmmaj_train.h" #include "msvmmaj_train_dataset.h" #include "msvmmaj_pred.h" -#include "MSVMMaj.h" #include "util.h" #include "timer.h" extern FILE *MSVMMAJ_OUTPUT_FILE; +/** + * @brief Initialize a Queue from a Training instance + * + * @details + * A Training instance describes the grid to search over. This funtion + * creates all tasks that need to be performed and adds these to + * a Queue. Each task contains a pointer to the train and test datasets + * which are supplied. Note that the tasks are created in a specific order of + * the parameters, to ensure that the MajModel::V of a previous parameter + * set provides the best possible initial estimate of MajModel::V for the next + * parameter set. + * + * @param[in] training Training struct describing the grid search + * @param[in] queue pointer to a Queue that will be used to + * add the tasks to + * @param[in] train_data MajData of the training set + * @param[in] test_data MajData of the test set + * + */ void make_queue(struct Training *training, struct Queue *queue, struct MajData *train_data, struct MajData *test_data) { - long i, j, k, l, m; + long i, j, k; long N, cnt = 0; struct Task *task; queue->i = 0; @@ -26,30 +57,122 @@ void make_queue(struct Training *training, struct Queue *queue, N *= training->Nk; N *= training->Ne; N *= training->Nw; + // these parameters are not necessarily non-zero + N *= training->Ng > 0 ? training->Ng : 1; + N *= training->Nc > 0 ? training->Nc : 1; + N *= training->Nd > 0 ? training->Nd : 1; queue->tasks = Malloc(struct Task *, N); queue->N = N; - for (i=0; i<training->Ne; i++) + // initialize all tasks + for (i=0; i<N; i++) { + task = Malloc(struct Task, 1); + task->ID = i; + task->train_data = train_data; + task->test_data = test_data; + task->folds = training->folds; + task->kerneltype = training->kerneltype; + task->kernel_param = Calloc(double, training->Ng + + training->Nc + training->Nd); + queue->tasks[i] = task; + } + + // These loops mimick a large nested for loop. The advantage is that + // Nd, Nc and Ng which are on the outside of the nested for loop can + // now be zero, without large modification (see below). Whether this + // is indeed better than the nested for loop has not been tested. + cnt = 1; + i = 0; + while (i < N ) + for (j=0; j<training->Np; j++) + for (k=0; k<cnt; k++) { + queue->tasks[i]->p = training->ps[j]; + i++; + } + + cnt *= training->Np; + i = 0; + while (i < N ) + for (j=0; j<training->Nl; j++) + for (k=0; k<cnt; k++) { + queue->tasks[i]->lambda = + training->lambdas[j]; + i++; + } + + cnt *= training->Nl; + i = 0; + while (i < N ) + for (j=0; j<training->Nk; j++) + for (k=0; k<cnt; k++) { + queue->tasks[i]->kappa = training->kappas[j]; + i++; + } + + cnt *= training->Nk; + i = 0; + while (i < N ) for (j=0; j<training->Nw; j++) - for (k=0; k<training->Nk; k++) - for (l=0; l<training->Nl; l++) - for (m=0; m<training->Np; m++) { - task = Malloc(struct Task, 1); - task->epsilon = training->epsilons[i]; - task->weight_idx = training->weight_idxs[j]; - task->kappa = training->kappas[k]; - task->lambda = training->lambdas[l]; - task->p = training->ps[m]; - task->train_data = train_data; - task->test_data = test_data; - task->folds = training->folds; - task->ID = cnt; - queue->tasks[cnt] = task; - cnt++; - } + for (k=0; k<cnt; k++) { + queue->tasks[i]->weight_idx = + training->weight_idxs[j]; + i++; + } + + cnt *= training->Nw; + i = 0; + while (i < N ) + for (j=0; j<training->Ne; j++) + for (k=0; k<cnt; k++) { + queue->tasks[i]->epsilon = + training->epsilons[j]; + i++; + } + + cnt *= training->Ne; + i = 0; + while (i < N && training->Ng > 0) + for (j=0; j<training->Ng; j++) + for (k=0; k<cnt; k++) { + queue->tasks[i]->kernel_param[0] = + training->gammas[j]; + i++; + } + + cnt *= training->Ng > 0 ? training->Ng : 1; + i = 0; + while (i < N && training->Nc > 0) + for (j=0; j<training->Nc; j++) + for (k=0; k<cnt; k++) { + queue->tasks[i]->kernel_param[1] = + training->coefs[j]; + i++; + } + + cnt *= training->Nc > 0 ? training->Ng : 1; + i = 0; + while (i < N && training->Nd > 0) + for (j=0; j<training->Nd; j++) + for (k=0; k<cnt; k++) { + queue->tasks[i]->kernel_param[2] = + training->degrees[j]; + i++; + } } +/** + * @brief Get new Task from Queue + * + * @details + * Return a pointer to the next Task in the Queue. If no Task instances are + * left, NULL is returned. The internal counter Queue::i is used for finding + * the next Task. + * + * @param[in] q Queue instance + * @returns pointer to next Task + * + */ struct Task *get_next_task(struct Queue *q) { long i = q->i; @@ -60,6 +183,19 @@ struct Task *get_next_task(struct Queue *q) return NULL; } +/** + * @brief Comparison function for Tasks based on performance + * + * @details + * To be able to sort Task structures on the performance of their specific + * set of parameters, this comparison function is implemented. Task structs + * are sorted with highest performance first. + * + * @param[in] elem1 Task 1 + * @param[in] elem2 Task 2 + * @returns result of inequality of Task 1 performance over + * Task 2 performance + */ int tasksort(const void *elem1, const void *elem2) { const struct Task *t1 = (*(struct Task **) elem1); @@ -67,6 +203,16 @@ int tasksort(const void *elem1, const void *elem2) return (t1->performance > t2->performance); } +/** + * @brief Comparison function for doubles + * + * @details + * Similar to tasksort() only now for two doubles. + * + * @param[in] elem1 number 1 + * @param[in] elem2 number 2 + * @returns comparison of number 1 larger than number 2 + */ int doublesort(const void *elem1, const void *elem2) { const double t1 = (*(double *) elem1); @@ -74,7 +220,20 @@ int doublesort(const void *elem1, const void *elem2) return t1 > t2; } - +/** + * @brief Calculate the percentile of an array of doubles + * + * @details + * The percentile of performance is used to find the top performing + * configurations. Since no standard definition of the percentile exists, we + * use the method used in MATLAB and Octave. Since calculating the percentile + * requires a sorted list of the values, a local copy is made first. + * + * @param[in] values array of doubles + * @param[in] N length of the array + * @param[in] p percentile to calculate ( 0 <= p <= 1.0 ). + * @returns the p-th percentile of the values + */ double prctile(double *values, long N, double p) { long i; @@ -94,16 +253,50 @@ double prctile(double *values, long N, double p) return boundary; } +/** + * @brief Run repeats of the Task structs in Queue to find the best + * configuration + * + * @details + * The best performing tasks in the supplied Queue are found by taking those + * Task structs that have a performance greater or equal to the 95% percentile + * of the performance of all tasks. These tasks are then gathered in a new + * Queue. For each of the tasks in this new Queue the cross validation run is + * repeated a number of times. + * + * For each of the Task configurations that are repeated the mean performance, + * standard deviation of the performance and the mean computation time are + * reported. + * + * Finally, the overall best tasks are written to the specified output. These + * tasks are selected to have both the highest mean performance, as well as the + * smallest standard deviation in their performance. This is done as follows. + * First the 99th percentile of task performance and the 1st percentile of + * standard deviation is calculated. If a task exists for which the mean + * performance of the repeats and the standard deviation equals these values + * respectively, this task is found to be the best and is written to the + * output. If no such task exists, the 98th percentile of performance and the + * 2nd percentile of standard deviation is considered. This is repeated until + * an interval is found which contains tasks. If one or more tasks are found, + * this loop stops. + * + * @param[in] q Queue of Task structs which have already been + * run and have a Task::performance value + * @param[in] repeats Number of times to repeat the best + * configurations for consistency + * @param[in] traintype type of training to do (CV or TT) + * + */ void consistency_repeats(struct Queue *q, long repeats, TrainType traintype) { long i, r, N; double p, pi, pr, boundary, time, *std, *mean, *perf; struct Queue *nq = Malloc(struct Queue, 1); - struct MajModel *model = Malloc(struct MajModel, 1); + struct MajModel *model = msvmmaj_init_model(); struct Task *task = Malloc(struct Task, 1); clock_t loop_s, loop_e; - // calculate the percentile (Matlab style) + // calculate the performance percentile (Matlab style) qsort(q->tasks, q->N, sizeof(struct Task *), tasksort); p = 0.95*q->N + 0.5; pi = maximum(minimum(floor(p), q->N-1), 1); @@ -111,7 +304,9 @@ void consistency_repeats(struct Queue *q, long repeats, TrainType traintype) boundary = (1 - pr)*q->tasks[((long) pi)-1]->performance; boundary += pr*q->tasks[((long) pi)]->performance; note("boundary determined at: %f\n", boundary); - + + // find the number of tasks that perform at least as good as the 95th + // percentile N = 0; for (i=0; i<q->N; i++) if (q->tasks[i]->performance >= boundary) @@ -121,12 +316,14 @@ void consistency_repeats(struct Queue *q, long repeats, TrainType traintype) mean = Calloc(double, N); perf = Calloc(double, N*repeats); + // create a new task queue with the tasks which perform well nq->tasks = Malloc(struct Task *, N); for (i=q->N-1; i>q->N-N-1; i--) nq->tasks[q->N-i-1] = q->tasks[i]; nq->N = N; nq->i = 0; + // for each task run the consistency repeats for (i=0; i<N; i++) { task = get_next_task(nq); make_model_from_task(task, model); @@ -140,7 +337,8 @@ void consistency_repeats(struct Queue *q, long repeats, TrainType traintype) for (r=0; r<repeats; r++) { if (traintype == CV) { loop_s = clock(); - p = cross_validation(model, NULL, task->train_data, task->folds); + p = cross_validation(model, NULL, + task->train_data, task->folds); loop_e = clock(); time += elapsed_time(loop_s, loop_e); matrix_set(perf, repeats, i, r, p); @@ -152,15 +350,24 @@ void consistency_repeats(struct Queue *q, long repeats, TrainType traintype) note("%3.3f\t", p); } for (r=0; r<repeats; r++) { - std[i] += pow(matrix_get(perf, repeats, i, r) - mean[i], 2); + std[i] += pow(matrix_get( + perf, + repeats, + i, + r) - mean[i], + 2.0); } std[i] /= ((double) repeats) - 1.0; std[i] = sqrt(std[i]); - note("(m = %3.3f, s = %3.3f, t = %3.3f)\n", mean[i], std[i], time); + note("(m = %3.3f, s = %3.3f, t = %3.3f)\n", + mean[i], std[i], time); } + // find the best overall configurations: those with high average + // performance and low deviation in the performance note("\nBest overall configuration(s):\n"); - note("ID\tweights\tepsilon\t\tp\t\tkappa\t\tlambda\t\tmean_perf\tstd_perf\n"); + note("ID\tweights\tepsilon\t\tp\t\tkappa\t\tlambda\t\t" + "mean_perf\tstd_perf\n"); p = 0.0; bool breakout = false; while (breakout == false) { @@ -168,13 +375,17 @@ void consistency_repeats(struct Queue *q, long repeats, TrainType traintype) pr = prctile(std, N, p/100.0); for (i=0; i<N; i++) if ((pi - mean[i] < 0.0001) && (std[i] - pr < 0.0001)) { - note("(%li)\tw = %li\te = %f\tp = %f\tk = %f\tl = %f\t" + note("(%li)\tw = %li\te = %f\tp = %f\t" + "k = %f\tl = %f\t" "mean: %3.3f\tstd: %3.3f\n", nq->tasks[i]->ID, nq->tasks[i]->weight_idx, - nq->tasks[i]->epsilon, nq->tasks[i]->p, - nq->tasks[i]->kappa, nq->tasks[i]->lambda, - mean[i], std[i]); + nq->tasks[i]->epsilon, + nq->tasks[i]->p, + nq->tasks[i]->kappa, + nq->tasks[i]->lambda, + mean[i], + std[i]); breakout = true; } p += 1.0; @@ -187,6 +398,30 @@ void consistency_repeats(struct Queue *q, long repeats, TrainType traintype) free(mean); } +/** + * @brief Run cross validation with a seed model + * + * @details + * This is an implementation of cross validation which uses the optimal + * parameters MajModel::V of a previous fold as initial conditions for + * MajModel::V of the next fold. An initial seed for V can be given through the + * seed_model parameter. If seed_model is NULL, random starting values are + * used. + * + * @todo + * The seed model shouldn't have to be allocated completely, since only V is + * used. + * @todo + * There must be some inefficiencies here because the fold model is allocated + * at every fold. This would be detrimental with large datasets. + * + * @param[in] model MajModel with the configuration to train + * @param[in] seed_model MajModel with a seed for MajModel::V + * @param[in] data MajData with the dataset + * @param[in] folds number of cross validation folds + * @returns performance (hitrate) of the configuration on + * cross validation + */ double cross_validation(struct MajModel *model, struct MajModel *seed_model, struct MajData *data, long folds) { @@ -202,7 +437,7 @@ double cross_validation(struct MajModel *model, struct MajModel *seed_model, double *performance = Calloc(double, folds); if (seed_model == NULL) { - seed_model = Malloc(struct MajModel, 1); + seed_model = msvmmaj_init_model(); seed_model->n = 0; // we never use anything other than V seed_model->m = model->m; seed_model->K = model->K; @@ -211,34 +446,40 @@ double cross_validation(struct MajModel *model, struct MajModel *seed_model, fs = true; } - train_data = Malloc(struct MajData, 1); - test_data = Malloc(struct MajData, 1); - + train_data = msvmmaj_init_data(); + test_data = msvmmaj_init_data(); + // create splits msvmmaj_make_cv_split(model->n, folds, cv_idx); + for (f=0; f<folds; f++) { msvmmaj_get_tt_split(data, train_data, test_data, cv_idx, f); - - fold_model = Malloc(struct MajModel, 1); + // initialize a model for this fold and copy the model + // parameters + fold_model = msvmmaj_init_model(); copy_model(model, fold_model); fold_model->n = train_data->n; fold_model->m = train_data->m; fold_model->K = train_data->K; - + + // allocate, initialize and seed the fold model msvmmaj_allocate_model(fold_model); msvmmaj_initialize_weights(train_data, fold_model); msvmmaj_seed_model_V(seed_model, fold_model); - + + // train the model (without output) fid = MSVMMAJ_OUTPUT_FILE; MSVMMAJ_OUTPUT_FILE = NULL; msvmmaj_optimize(fold_model, train_data); MSVMMAJ_OUTPUT_FILE = fid; + // calculate predictive performance on test set predy = Calloc(long, test_data->n); msvmmaj_predict_labels(test_data, fold_model, predy); performance[f] = msvmmaj_prediction_perf(test_data, predy); total_perf += performance[f]/((double) folds); + // seed the seed model with the fold model msvmmaj_seed_model_V(fold_model, seed_model); free(predy); @@ -250,6 +491,7 @@ double cross_validation(struct MajModel *model, struct MajModel *seed_model, msvmmaj_free_model(fold_model); } + // if a seed model was allocated before, free it. if (fs) msvmmaj_free_model(seed_model); free(train_data); @@ -261,12 +503,28 @@ double cross_validation(struct MajModel *model, struct MajModel *seed_model, } +/** + * @brief Run the grid search for a cross validation dataset + * + * @details + * Given a Queue of Task struct to be trained, a grid search is launched to + * find the optimal parameter configuration. As is also done within + * cross_validation(), the optimal weights of one parameter set are used as + * initial estimates for MajModel::V in the next parameter set. Note that to + * optimally exploit this feature of the optimization algorithm, the order in + * which tasks are considered is important. This is considered in + * make_queue(). + * + * The performance found by cross validation is stored in the Task struct. + * + * @param[in,out] q Queue with Task instances to run + */ void start_training_cv(struct Queue *q) { double perf, current_max = 0; struct Task *task = get_next_task(q); - struct MajModel *seed_model = Malloc(struct MajModel, 1); - struct MajModel *model = Malloc(struct MajModel, 1); + struct MajModel *seed_model = msvmmaj_init_model(); + struct MajModel *model = msvmmaj_init_model(); clock_t main_s, main_e, loop_s, loop_e; model->n = task->train_data->n; @@ -282,13 +540,16 @@ void start_training_cv(struct Queue *q) main_s = clock(); while (task) { - note("(%03li/%03li)\tw = %li\te = %f\tp = %f\tk = %f\t l = %f\t", - task->ID+1, q->N, task->weight_idx, task->epsilon, + note("(%03li/%03li)\tw = %li\te = %f\tp = %f\tk = %f\t " + "l = %f\t", + task->ID+1, q->N, task->weight_idx, + task->epsilon, task->p, task->kappa, task->lambda); make_model_from_task(task, model); loop_s = clock(); - perf = cross_validation(model, seed_model, task->train_data, task->folds); + perf = cross_validation(model, seed_model, task->train_data, + task->folds); loop_e = clock(); current_max = maximum(current_max, perf); @@ -308,6 +569,23 @@ void start_training_cv(struct Queue *q) msvmmaj_free_model(seed_model); } +/** + * @brief Run the grid search for a train/test dataset + * + * @details + * This function is similar to start_training_cv(), except that the + * pre-determined training set is used only once, and the pre-determined test + * set is used for validation. + * + * @todo + * It would probably be better to train the model on the training set using + * cross validation and only use the test set when comparing with other + * methods. The way it is now, you're finding out which parameters predict + * _this_ test set best, which is not what you want. + * + * @param[in] q Queue with Task structs to run + * + */ void start_training_tt(struct Queue *q) { FILE *fid; @@ -317,7 +595,7 @@ void start_training_tt(struct Queue *q) double total_perf, current_max = 0; struct Task *task = get_next_task(q); - struct MajModel *seed_model = Malloc(struct MajModel, 1); + struct MajModel *seed_model = msvmmaj_init_model(); clock_t main_s, main_e; clock_t loop_s, loop_e; @@ -334,7 +612,7 @@ void start_training_tt(struct Queue *q) c+1, q->N, task->weight_idx, task->epsilon, task->p, task->kappa, task->lambda); loop_s = clock(); - struct MajModel *model = Malloc(struct MajModel, 1); + struct MajModel *model = msvmmaj_init_model(); make_model_from_task(task, model); model->n = task->train_data->n; @@ -374,15 +652,37 @@ void start_training_tt(struct Queue *q) msvmmaj_free_model(seed_model); } +/** + * @brief Free the Queue struct + * + * @details + * Freeing the allocated memory of the Queue means freeing every Task struct + * and then freeing the Queue. + * + * @param[in] q Queue to be freed + * + */ void free_queue(struct Queue *q) { long i; - for (i=0; i<q->N; i++) + for (i=0; i<q->N; i++) { + free(q->tasks[i]->kernel_param); free(q->tasks[i]); + } free(q->tasks); free(q); } +/** + * @brief Copy parameters from Task to MajModel + * + * @details + * A Task struct only contains the parameters of the MajModel to be estimated. + * This function is used to copy these parameters. + * + * @param[in] task Task instance with parameters + * @param[in,out] model MajModel to which the parameters are copied + */ void make_model_from_task(struct Task *task, struct MajModel *model) { model->weight_idx = task->weight_idx; @@ -392,6 +692,16 @@ void make_model_from_task(struct Task *task, struct MajModel *model) model->lambda = task->lambda; } +/** + * @brief Copy model parameters between two MajModel structs + * + * @details + * The parameters copied are MajModel::weight_idx, MajModel::epsilon, + * MajModel::p, MajModel::kappa, and MajModel::lambda. + * + * @param[in] from MajModel to copy parameters from + * @param[in,out] to MajModel to copy parameters to + */ void copy_model(struct MajModel *from, struct MajModel *to) { to->weight_idx = from->weight_idx; diff --git a/src/mylapack.c b/src/mylapack.c deleted file mode 100644 index 4a9cf81..0000000 --- a/src/mylapack.c +++ /dev/null @@ -1,49 +0,0 @@ -/** - * @file mylapack.c - * @author Gertjan van den Burg (burg@ese.eur.nl) - * @date August 9, 2013 - * @brief Utility functions for interacting with LAPACK - * - * @details - * Functions in this file are auxiliary functions which make it easier - * to use LAPACK functions from liblapack. - */ - -#include "mylapack.h" - -/** - * @name dposv - * @brief Solve a system of equations AX = B where A is symmetric positive definite. - * @ingroup libMSVMMaj - * - * See the LAPACK documentation at: - * http://www.netlib.org/lapack/explore-html/dc/de9/group__double_p_osolve.html - */ -int dposv(char UPLO, int N, int NRHS, double *A, int LDA, double *B, - int LDB) -{ - extern void dposv_(char *UPLO, int *Np, int *NRHSp, double *A, - int *LDAp, double *B, int *LDBp, int *INFOp); - int INFO; - dposv_(&UPLO, &N, &NRHS, A, &LDA, B, &LDB, &INFO); - return INFO; -} - -/** - * @name dsysv - * @brief Solve a system of equations AX = B where A is symmetric. - * @ingroup libMSVMMaj - * - * See the LAPACK documentation at: - * http://www.netlib.org/lapack/explore-html/d6/d0e/group__double_s_ysolve.html - */ -int dsysv(char UPLO, int N, int NRHS, double *A, int LDA, int *IPIV, - double *B, int LDB, double *WORK, int LWORK) -{ - extern void dsysv_(char *UPLO, int *Np, int *NRHSp, double *A, - int *LDAp, int *IPIV, double *B, int *LDBp, - double *WORK, int *LWORK, int *INFOp); - int INFO; - dsysv_(&UPLO, &N, &NRHS, A, &LDA, IPIV, B, &LDB, WORK, &LWORK, &INFO); - return INFO; -} diff --git a/src/predMSVMMaj.c b/src/predMSVMMaj.c index 966c7c0..3e3a101 100644 --- a/src/predMSVMMaj.c +++ b/src/predMSVMMaj.c @@ -1,17 +1,42 @@ +/** + * @file predMSVMMaj.c + * @author Gertjan van den Burg + * @date January, 2014 + * @brief Command line interface for predicting class labels + * + * @details + * This is a command line program for predicting the class labels or + * determining the predictive performance of a pre-determined model on a given + * test dataset. The predictive performance can be written to the screen or + * the predicted class labels can be written to a specified output file. This + * is done using msvmmaj_write_predictions(). + * + * The specified model file must follow the specification given in + * msvmmaj_write_model(). + * + * For usage information, see the program help function. + * + */ + +#include "msvmmaj.h" +#include "msvmmaj_init.h" #include "msvmmaj_pred.h" -#include "MSVMMaj.h" #include "util.h" #define MINARGS 3 extern FILE *MSVMMAJ_OUTPUT_FILE; +// function declarations void print_null(const char *s) {} void exit_with_help(); -void parse_command_line(int argc, char **argv, struct MajModel *model, +void parse_command_line(int argc, char **argv, char *input_filename, char *output_filename, char *model_filename); +/** + * @brief Help function + */ void exit_with_help() { printf("This is MSVMMaj, version %1.1f\n\n", VERSION); @@ -22,6 +47,24 @@ void exit_with_help() exit(0); } +/** + * @brief Main interface function for predMSVMMaj + * + * @details + * Main interface for the command line program. A given model file is read and + * a test dataset is initialized from the given data. The predictive + * performance (hitrate) of the model on the test set is printed to the output + * stream (default = stdout). If an output file is specified the predictions + * are written to the file. + * + * @todo + * Ensure that the program can read model files without class labels + * specified. In that case no prediction accuracy is printed to the screen. + * + * @param[in] argc number of command line arguments + * @param[in] argv array of command line arguments + * + */ int main(int argc, char **argv) { long *predy; @@ -31,16 +74,15 @@ int main(int argc, char **argv) char model_filename[MAX_LINE_LENGTH]; char output_filename[MAX_LINE_LENGTH];; - struct MajModel *model = Malloc(struct MajModel, 1); - struct MajData *data = Malloc(struct MajData, 1); - if (argc < MINARGS || msvmmaj_check_argv(argc, argv, "-help") || msvmmaj_check_argv_eq(argc, argv, "-h") ) exit_with_help(); - parse_command_line(argc, argv, model, input_filename, output_filename, + parse_command_line(argc, argv, input_filename, output_filename, model_filename); - // TODO: make sure that read_data allows for files without labels + // read the data and model + struct MajModel *model = msvmmaj_init_model(); + struct MajData *data = msvmmaj_init_data(); msvmmaj_read_data(data, input_filename); msvmmaj_read_model(model, model_filename); @@ -50,8 +92,14 @@ int main(int argc, char **argv) "does not equal the number of attributes in " "model (%li)\n", data->m, model->m); exit(1); + } else if (data->K != model->K) { + fprintf(stderr, "Error: number of classes in data (%li) " + "does not equal the number of classes in " + "model (%li)\n", data->K, model->K); + exit(1); } + // predict labels and performance if test data has labels predy = Calloc(long, data->n); msvmmaj_predict_labels(data, model, predy); if (data->y != NULL) { @@ -59,11 +107,13 @@ int main(int argc, char **argv) note("Predictive performance: %3.2f%%\n", performance); } + // if output file is specified, write predictions to it if (msvmmaj_check_argv_eq(argc, argv, "-o")) { msvmmaj_write_predictions(data, predy, output_filename); note("Predictions written to: %s\n", output_filename); } + // free the model, data, and predictions msvmmaj_free_model(model); msvmmaj_free_data(data); free(predy); @@ -71,8 +121,26 @@ int main(int argc, char **argv) return 0; } -void parse_command_line(int argc, char **argv, struct MajModel *model, - char *input_filename, char *output_filename, char *model_filename) +/** + * @brief Parse command line arguments + * + * @details + * Read the data filename and model filename from the command line arguments. + * If specified, also read the output filename. If the quiet flag is given, + * set the global output stream to NULL. On error, exit_with_help(). + * + * @param[in] argc number of command line arguments + * @param[in] argv array of command line arguments + * @param[in] input_filename pre-allocated array for the input + * filename + * @param[in] output_filename pre-allocated array for the output + * filename + * @param[in] model_filename pre-allocated array for the model + * filename + * + */ +void parse_command_line(int argc, char **argv, char *input_filename, + char *output_filename, char *model_filename) { int i; @@ -91,7 +159,8 @@ void parse_command_line(int argc, char **argv, struct MajModel *model, i--; break; default: - fprintf(stderr, "Unknown option: -%c\n", argv[i-1][1]); + fprintf(stderr, "Unknown option: -%c\n", + argv[i-1][1]); exit_with_help(); } } diff --git a/src/strutil.c b/src/strutil.c index ae96239..ca4181f 100644 --- a/src/strutil.c +++ b/src/strutil.c @@ -1,5 +1,24 @@ +/** + * @file strutil.c + * @author Gertjan van den Burg + * @date January, 2014 + * @brief Utility functions for dealing with strings + * + * @details + * This file contains functions for reading files, reading strings from a + * format and checking start and ends of strings. + */ + #include "strutil.h" +/** + * @brief Check if a string starts with a prefix + * + * @param[in] str string + * @param[in] pre prefix + * @returns boolean, true if string starts with prefix, false + * otherwise + */ bool str_startswith(const char *str, const char *pre) { size_t lenpre = strlen(pre), @@ -7,19 +26,41 @@ bool str_startswith(const char *str, const char *pre) return lenstr < lenpre ? false : strncmp(pre, str, lenpre) == 0; } +/** + * @brief Check if a string ends with a suffix + * + * @param[in] str string + * @param[in] suf suffix + * @returns boolean, true if string ends with suffix, false + * otherwise + */ bool str_endswith(const char *str, const char *suf) { size_t lensuf = strlen(suf), lenstr = strlen(str); - return lenstr < lensuf ? false : strncmp(str + lenstr - lensuf, suf, lensuf) == 0; + return lenstr < lensuf ? false : strncmp(str + lenstr - lensuf, suf, + lensuf) == 0; } +/** + * @brief Move to next line in file + * + * @param[in] fid File opened for reading + * @param[in] filename name of the file pointed to by fid + */ void next_line(FILE *fid, char *filename) { char buffer[MAX_LINE_LENGTH]; get_line(fid, filename, buffer); } +/** + * @brief Read line to buffer + * + * @param[in] fid File opened for reading + * @param[in] filename name of the file + * @param[in,out] buffer allocated buffer to read to + */ void get_line(FILE *fid, char *filename, char *buffer) { if (fgets(buffer, MAX_LINE_LENGTH, fid) == NULL) { @@ -28,6 +69,14 @@ void get_line(FILE *fid, char *filename, char *buffer) } } +/** + * @brief Read a double from file following a format + * + * @param[in] fid File opened for reading + * @param[in] filename Name of the file + * @param[in] fmt Format containing a float format + * @returns value read (if any) + */ double get_fmt_double(FILE *fid, char *filename, const char *fmt) { char buffer[MAX_LINE_LENGTH]; @@ -38,6 +87,14 @@ double get_fmt_double(FILE *fid, char *filename, const char *fmt) return value; } +/** + * @brief Read a long integer from file following a format + * + * @param[in] fid File opened for reading + * @param[in] filename Name of the file + * @param[in] fmt Format containing a long integer format + * @returns value read (if any) + */ long get_fmt_long(FILE *fid, char *filename, const char *fmt) { char buffer[MAX_LINE_LENGTH]; @@ -48,6 +105,20 @@ long get_fmt_long(FILE *fid, char *filename, const char *fmt) return value; } +/** + * @brief Read all doubles in a given buffer + * + * @details + * This function is used to read a line of doubles from a buffer. All the + * doubles found are stored in a pre-allocated array. + * + * @param[in] buffer a string buffer + * @param[in] offset an offset of the string to start looking for + * doubles + * @param[in] all_doubles pre-allocated array of doubles (should be large + * enough) + * @returns number of doubles read + */ long all_doubles_str(char *buffer, long offset, double *all_doubles) { double value; @@ -69,6 +140,20 @@ long all_doubles_str(char *buffer, long offset, double *all_doubles) return i; } +/** + * @brief Read all longs in a given buffer + * + * @details + * This function is used to read a line of longs from a buffer. All the + * longs found are stored in a pre-allocated array. + * + * @param[in] buffer a string buffer + * @param[in] offset an offset of the string to start looking for + * longs + * @param[in] all_longs pre-allocated array of longs (should be large + * enough) + * @returns number of longs read + */ long all_longs_str(char *buffer, long offset, long *all_longs) { long value; diff --git a/src/timer.c b/src/timer.c index 2187fb2..3a763a0 100644 --- a/src/timer.c +++ b/src/timer.c @@ -1,7 +1,25 @@ +/** + * @file timer.c + * @author Gertjan van den Burg + * @date January, 2014 + * @brief Function for calculating time difference + * + * @details + * This file contains a simple function for calculating the time in seconds + * elapsed between two clock() calls. + */ + #include <time.h> #include "timer.h" +/** + * @brief Calculate the time between two clocks + * + * @param[in] s_time starting time + * @param[in] e_time end time + * @returns time elapsed in seconds + */ double elapsed_time(clock_t s_time, clock_t e_time) { return ((double) (e_time - s_time))/((double) CLOCKS_PER_SEC); diff --git a/src/trainMSVMMaj.c b/src/trainMSVMMaj.c index b4b74df..e045a6c 100644 --- a/src/trainMSVMMaj.c +++ b/src/trainMSVMMaj.c @@ -1,54 +1,93 @@ +/** + * @file trainMSVMMaj.c + * @author Gertjan van den Burg + * @date August, 2013 + * @brief Command line interface for training a single model with MSVMMaj + * + * @details + * This is a command line program for training a single model on a given + * dataset. To run a grid search over a number of parameter configurations, + * see trainMSVMMajdataset.c. + * + */ + #include <time.h> #include <math.h> +#include "msvmmaj_kernel.h" #include "libMSVMMaj.h" +#include "msvmmaj.h" +#include "msvmmaj_init.h" #include "msvmmaj_train.h" #include "util.h" -#include "MSVMMaj.h" #define MINARGS 2 extern FILE *MSVMMAJ_OUTPUT_FILE; +// function declarations void print_null(const char *s) {} void exit_with_help(); void parse_command_line(int argc, char **argv, struct MajModel *model, char *input_filename, char *output_filename, char *model_filename); +/** + * @brief Help function + */ void exit_with_help() { printf("This is MSVMMaj, version %1.1f\n\n", VERSION); printf("Usage: trainMSVMMaj [options] training_data_file\n"); printf("Options:\n"); + printf("-c coef : coefficient for the polynomial and sigmoid kernel\n"); + printf("-d degree : degree for the polynomial kernel\n"); printf("-e epsilon : set the value of the stopping criterion\n"); + printf("-g gamma : parameter for the rbf, polynomial or sigmoid " + "kernel\n"); printf("-h | -help : print this help.\n"); printf("-k kappa : set the value of kappa used in the Huber hinge\n"); printf("-l lambda : set the value of lambda (lambda > 0)\n"); printf("-m model_file : use previous model as seed for W and t\n"); printf("-o output_file : write output to file\n"); - printf("-p p-value : set the value of p in the lp norm (1.0 <= p <= 2.0)\n"); + printf("-p p-value : set the value of p in the lp norm " + "(1.0 <= p <= 2.0)\n"); printf("-q : quiet mode (no output)\n"); - printf("-r rho : choose the weigth specification (1 = unit, 2 = group)\n"); + printf("-r rho : choose the weigth specification (1 = unit, 2 = " + "group)\n"); + printf("-t type: kerneltype (LINEAR=0, POLY=1, RBF=2, SIGMOID=3)\n"); + printf("-u use_cholesky: use cholesky decomposition when using " + "kernels (0 = false, 1 = true). Default 0.\n"); exit(0); } -/* - Main -*/ +/** + * @brief Main interface function for trainMSVMMaj + * + * @details + * Main interface for the command line program. A given dataset file is read + * and a MSVMMaj model is trained on this data. By default the progress of the + * computations are written to stdout. See for full options of the program the + * help function. + * + * @param[in] argc number of command line arguments + * @param[in] argv array of command line arguments + * + */ int main(int argc, char **argv) { char input_filename[MAX_LINE_LENGTH]; char model_filename[MAX_LINE_LENGTH]; char output_filename[MAX_LINE_LENGTH]; - struct MajModel *model = Malloc(struct MajModel, 1); - struct MajData *data = Malloc(struct MajData, 1); + struct MajModel *model = msvmmaj_init_model(); + struct MajData *data = msvmmaj_init_data(); if (argc < MINARGS || msvmmaj_check_argv(argc, argv, "-help") || msvmmaj_check_argv_eq(argc, argv, "-h") ) exit_with_help(); - parse_command_line(argc, argv, model, input_filename, output_filename, model_filename); + parse_command_line(argc, argv, model, input_filename, + output_filename, model_filename); // read data file msvmmaj_read_data(data, input_filename); @@ -59,22 +98,25 @@ int main(int argc, char **argv) model->K = data->K; model->data_file = input_filename; + // initialize kernel (if necessary) + msvmmaj_make_kernel(model, data); + // allocate model and initialize weights msvmmaj_allocate_model(model); msvmmaj_initialize_weights(data, model); + // seed the random number generator (only place in programs is in + // command line interfaces) srand(time(NULL)); if (msvmmaj_check_argv_eq(argc, argv, "-m")) { - struct MajModel *seed_model = Malloc(struct MajModel, 1); + struct MajModel *seed_model = msvmmaj_init_model(); msvmmaj_read_model(seed_model, model_filename); msvmmaj_seed_model_V(seed_model, model); msvmmaj_free_model(seed_model); } else { msvmmaj_seed_model_V(NULL, model); } - // initialize kernel (if necessary) - // msvmmaj_make_kernel(model, data); // start training msvmmaj_optimize(model, data); @@ -92,18 +134,34 @@ int main(int argc, char **argv) return 0; } +/** + * @brief Parse command line arguments + * + * @details + * Process the command line arguments for the model parameters, and record + * them in the specified MajModel. An input filename for the dataset is read + * and if specified an output filename and a model filename for the seed + * model. + * + * @param[in] argc number of command line arguments + * @param[in] argv array of command line arguments + * @param[in] model initialized model + * @param[in] input_filename pre-allocated buffer for the input + * filename + * @param[in] output_filename pre-allocated buffer for the output + * filename + * @param[in] model_filename pre-allocated buffer for the model + * filename + * + */ void parse_command_line(int argc, char **argv, struct MajModel *model, char *input_filename, char *output_filename, char *model_filename) { - int i; - - // default values - model->p = 1.0; - model->lambda = pow(2, -8.0); - model->epsilon = 1e-6; - model->kappa = 0.0; - model->weight_idx = 1; - + int i, tmp; + double gamma = 1.0, + degree = 2.0, + coef = 0.0; + MSVMMAJ_OUTPUT_FILE = stdout; // parse options @@ -113,9 +171,18 @@ void parse_command_line(int argc, char **argv, struct MajModel *model, exit_with_help(); } switch (argv[i-1][1]) { + case 'c': + coef = atof(argv[i]); + break; + case 'd': + degree = atof(argv[i]); + break; case 'e': model->epsilon = atof(argv[i]); break; + case 'g': + gamma = atof(argv[i]); + break; case 'k': model->kappa = atof(argv[i]); break; @@ -134,20 +201,50 @@ void parse_command_line(int argc, char **argv, struct MajModel *model, case 'r': model->weight_idx = atoi(argv[i]); break; + case 't': + model->kerneltype = atoi(argv[i]); + break; + case 'u': + tmp = atoi(argv[i]); + if (!(tmp == 1 || tmp == 0)) + fprintf(stderr, "Unknown value %i for" + " use_cholesky", tmp); + model->use_cholesky = (tmp == 1) ? true : false; + break; case 'q': MSVMMAJ_OUTPUT_FILE = NULL; i--; break; default: - fprintf(stderr, "Unknown option: -%c\n", argv[i-1][1]); + fprintf(stderr, "Unknown option: -%c\n", + argv[i-1][1]); exit_with_help(); } } - + // read input filename if (i >= argc) exit_with_help(); strcpy(input_filename, argv[i]); -} + // set kernel parameters + switch (model->kerneltype) { + case K_LINEAR: + break; + case K_POLY: + model->kernelparam = Calloc(double, 3); + model->kernelparam[0] = gamma; + model->kernelparam[1] = coef; + model->kernelparam[2] = degree; + break; + case K_RBF: + model->kernelparam = Calloc(double, 1); + model->kernelparam[0] = gamma; + break; + case K_SIGMOID: + model->kernelparam = Calloc(double, 1); + model->kernelparam[0] = gamma; + model->kernelparam[1] = coef; + } +} diff --git a/src/trainMSVMMajdataset.c b/src/trainMSVMMajdataset.c index 7c3385c..097df85 100644 --- a/src/trainMSVMMajdataset.c +++ b/src/trainMSVMMajdataset.c @@ -1,7 +1,28 @@ +/** + * @file trainMSVMMajdataset.c + * @author Gertjan van den Burg + * @date January, 2014 + * @brief Command line interface for the grid search program + * + * @details + * This is a command line interface to the parameter grid search functionality + * of the algorithm. The grid search is specified in a separate file, thereby + * reducing the number of command line arguments. See + * read_training_from_file() for documentation on the training file. + * + * The program runs a grid search as specified in the training file. If + * desired the grid search can incorporate consistency checks to find the + * configuration among the best configurations which scores consistently high. + * All output is written to stdout, unless the quiet mode is specified. + * + * For further usage information, see the program help function. + * + */ + #include <time.h> #include "crossval.h" -#include "MSVMMaj.h" +#include "msvmmaj.h" #include "msvmmaj_pred.h" #include "msvmmaj_train.h" #include "msvmmaj_train_dataset.h" @@ -12,11 +33,15 @@ extern FILE *MSVMMAJ_OUTPUT_FILE; +// function declarations void print_null(const char *s) {} void exit_with_help(); void parse_command_line(int argc, char **argv, char *input_filename); void read_training_from_file(char *input_filename, struct Training *training); +/** + * @brief Help function + */ void exit_with_help() { printf("This is MSVMMaj, version %1.1f\n\n", VERSION); @@ -28,6 +53,22 @@ void exit_with_help() exit(0); } +/** + * @brief Main interface function for trainMSVMMajdataset + * + * @details + * Main interface for the command line program. A given training file which + * specifies a grid search over a single dataset is read. From this, a Queue + * is created containing all Task instances that need to be performed in the + * search. Depending on the type of dataset, either cross validation or + * train/test split training is performed for all tasks. If specified, + * consistency repeats are done at the end of the grid search. Note that + * currently no output is produced other than what is written to stdout. + * + * @param[in] argc number of command line arguments + * @param[in] argv array of command line arguments + * + */ int main(int argc, char **argv) { char input_filename[MAX_LINE_LENGTH]; @@ -78,6 +119,21 @@ int main(int argc, char **argv) return 0; } +/** + * @brief Parse command line arguments + * + * @details + * Few arguments can be supplied to the command line. Only quiet mode can be + * specified, or help can be requested. The filename of the training file is + * read from the arguments. Parsing of the training file is done separately in + * read_training_from_file(). + * + * @param[in] argc number of command line arguments + * @param[in] argv array of command line arguments + * @param[in] input_filename pre-allocated buffer for the training + * filename. + * + */ void parse_command_line(int argc, char **argv, char *input_filename) { int i; @@ -94,7 +150,8 @@ void parse_command_line(int argc, char **argv, char *input_filename) i--; break; default: - fprintf(stderr, "Unknown option: -%c\n", argv[i-1][1]); + fprintf(stderr, "Unknown option: -%c\n", + argv[i-1][1]); exit_with_help(); } } @@ -105,6 +162,21 @@ void parse_command_line(int argc, char **argv, char *input_filename) strcpy(input_filename, argv[i]); } +/** + * @brief Read the Training struct from file + * + * @details + * Read the Training struct from a file. The training file follows a specific + * format specified in @ref spec_training_file. + * + * Commonly used string functions in this function are all_doubles_str() and + * all_longs_str(). + * + * @param[in] input_filename filename of the training file + * @param[in] training Training structure to place the parsed + * parameter grid. + * + */ void read_training_from_file(char *input_filename, struct Training *training) { long i, nr = 0; @@ -117,7 +189,8 @@ void read_training_from_file(char *input_filename, struct Training *training) fid = fopen(input_filename, "r"); if (fid == NULL) { - fprintf(stderr, "Error opening training file %s\n", input_filename); + fprintf(stderr, "Error opening training file %s\n", + input_filename); exit(1); } training->traintype = CV; @@ -126,11 +199,13 @@ void read_training_from_file(char *input_filename, struct Training *training) Memset(lparams, long, MAX_LINE_LENGTH); if (str_startswith(buffer, "train:")) { sscanf(buffer, "train: %s\n", train_filename); - training->train_data_file = Calloc(char, MAX_LINE_LENGTH); + training->train_data_file = Calloc(char, + MAX_LINE_LENGTH); strcpy(training->train_data_file, train_filename); } else if (str_startswith(buffer, "test:")) { sscanf(buffer, "test: %s\n", test_filename); - training->test_data_file = Calloc(char, MAX_LINE_LENGTH); + training->test_data_file = Calloc(char, + MAX_LINE_LENGTH); strcpy(training->test_data_file, test_filename); training->traintype = TT; } else if (str_startswith(buffer, "p:")) { @@ -167,16 +242,76 @@ void read_training_from_file(char *input_filename, struct Training *training) nr = all_longs_str(buffer, 6, lparams); training->folds = lparams[0]; if (nr > 1) - fprintf(stderr, "Field \"folds\" only takes one value. " - "Additional fields are ignored.\n"); + fprintf(stderr, "Field \"folds\" only takes " + "one value. Additional " + "fields are ignored.\n"); } else if (str_startswith(buffer, "repeats:")) { nr = all_longs_str(buffer, 8, lparams); training->repeats = lparams[0]; if (nr > 1) - fprintf(stderr, "Field \"repeats\" only takes one value. " - "Additional fields are ignored.\n"); + fprintf(stderr, "Field \"repeats\" only " + "takes one value. Additional " + "fields are ignored.\n"); + } else if (str_startswith(buffer, "kernel:")) { + nr = all_longs_str(buffer, 7, lparams); + if (nr > 1) + fprintf(stderr, "Field \"kernel\" only takes " + "one value. Additional " + "fields are ignored.\n"); + switch (lparams[0]) { + case 0: + training->kerneltype = K_LINEAR; + break; + case 1: + training->kerneltype = K_POLY; + break; + case 2: + training->kerneltype = K_RBF; + break; + case 3: + training->kerneltype = K_SIGMOID; + break; + } + } else if (str_startswith(buffer, "gamma:")) { + nr = all_doubles_str(buffer, 6, params); + if (training->kerneltype == K_LINEAR) { + fprintf(stderr, "Field \"gamma\" ignored, " + "linear kernel is used.\n"); + training->Ng = 0; + break; + } + training->gammas = Calloc(double, nr); + for (i=0; i<nr; i++) + training->gammas[i] = params[i]; + training->Ng = nr; + } else if (str_startswith(buffer, "coef:")) { + nr = all_doubles_str(buffer, 5, params); + if (training->kerneltype == K_LINEAR || + training->kerneltype == K_RBF) { + fprintf(stderr, "Field \"coef\" ignored with" + "specified kernel.\n"); + training->Nc = 0; + break; + } + training->coefs = Calloc(double, nr); + for (i=0; i<nr; i++) + training->coefs[i] = params[i]; + training->Nc = nr; + } else if (str_startswith(buffer, "degree:")) { + nr = all_doubles_str(buffer, 7, params); + if (training->kerneltype != K_POLY) { + fprintf(stderr, "Field \"degree\" ignored " + "with specified kernel.\n"); + training->Nd = 0; + break; + } + training->degrees = Calloc(double, nr); + for (i=0; i<nr; i++) + training->degrees[i] = params[i]; + training->Nd = nr; } else { - fprintf(stderr, "Cannot find any parameters on line: %s\n", buffer); + fprintf(stderr, "Cannot find any parameters on line: " + "%s\n", buffer); } } @@ -1,19 +1,55 @@ +/** + * @file util.c + * @author Gertjan van den Burg + * @date January, 2014 + * @brief Utility functions + * + * @details + * This file contains several utility functions for coordinating input and + * output of data and model files. It also contains string functions. + * + * @todo + * Pull this apart. + * + */ #include <math.h> #include <stdarg.h> #include <time.h> -#include "matrix.h" -#include "MSVMMaj.h" +#include "msvmmaj.h" +#include "msvmmaj_matrix.h" #include "strutil.h" #include "util.h" -FILE *MSVMMAJ_OUTPUT_FILE; - -/* - Read the data from the data_file. The data matrix X is augmented - with a column of ones, to get the matrix Z. -*/ +FILE *MSVMMAJ_OUTPUT_FILE; ///< The #MSVMMAJ_OUTPUT_FILE specifies the + ///< output stream to which all output is + ///< written. This is done through the + ///< internal (!) + ///< function msvmmaj_print_string(). The + ///< advantage of using a global output + ///< stream variable is that the output can + ///< temporarily be suppressed by importing + ///< this variable through @c extern and + ///< (temporarily) setting it to NULL. + +/** + * @brief Read data from file + * + * @details + * Read the data from the data_file. The data matrix X is augmented + * with a column of ones, to get the matrix Z. The data is expected + * to follow a specific format, which is specified in the @ref spec_data_file. + * The class labels are corrected internally to correspond to the interval + * [1 .. K], where K is the total number of classes. + * + * @todo + * Make sure that this function allows datasets without class labels for + * testing. + * + * @param[in,out] dataset initialized MajData struct + * @param[in] data_file filename of the data file. + */ void msvmmaj_read_data(struct MajData *dataset, char *data_file) { FILE *fid; @@ -22,7 +58,7 @@ void msvmmaj_read_data(struct MajData *dataset, char *data_file) long nr = 0; // used to check consistency of data double value; long K = 0; - long min_y = 1000; + long min_y = 1000000; char buf[MAX_LINE_LENGTH]; @@ -79,13 +115,15 @@ void msvmmaj_read_data(struct MajData *dataset, char *data_file) dataset->y[i]++; K++; } else if (min_y < 0 ) { - fprintf(stderr, "ERROR: wrong class labels in %s, minimum value is: %ld\n", + fprintf(stderr, "ERROR: wrong class labels in %s, minimum " + "value is: %ld\n", data_file, min_y); exit(0); } if (nr < n * m) { - fprintf(stderr, "ERROR: not enough data found in %s\n", data_file); + fprintf(stderr, "ERROR: not enough data found in %s\n", + data_file); exit(0); } @@ -98,6 +136,19 @@ void msvmmaj_read_data(struct MajData *dataset, char *data_file) dataset->K = K; } +/** + * @brief Read model from file + * + * @details + * Read a MajModel from a model file. The MajModel struct must have been + * initalized elswhere. The model file is expected to follow the @ref + * spec_model_file. The easiest way to generate a model file is through + * msvmmaj_write_model(), which can for instance be used in trainMSVMMaj.c. + * + * @param[in,out] model initialized MajModel + * @param[in] model_filename filename of the model file + * + */ void msvmmaj_read_model(struct MajModel *model, char *model_filename) { long i, j, nr = 0; @@ -108,7 +159,8 @@ void msvmmaj_read_model(struct MajModel *model, char *model_filename) fid = fopen(model_filename, "r"); if (fid == NULL) { - fprintf(stderr, "Error opening model file %s\n", model_filename); + fprintf(stderr, "Error opening model file %s\n", + model_filename); exit(1); } // skip the first four lines @@ -120,7 +172,8 @@ void msvmmaj_read_model(struct MajModel *model, char *model_filename) model->lambda = get_fmt_double(fid, model_filename, "lambda = %lf"); model->kappa = get_fmt_double(fid, model_filename, "kappa = %lf"); model->epsilon = get_fmt_double(fid, model_filename, "epsilon = %lf"); - model->weight_idx = (int) get_fmt_long(fid, model_filename, "weight_idx = %li"); + model->weight_idx = (int) get_fmt_long(fid, model_filename, + "weight_idx = %li"); // skip to data section for (i=0; i<2; i++) @@ -128,7 +181,8 @@ void msvmmaj_read_model(struct MajModel *model, char *model_filename) // read filename of data file if (fgets(buffer, MAX_LINE_LENGTH, fid) == NULL) { - fprintf(stderr, "Error reading model file %s\n", model_filename); + fprintf(stderr, "Error reading model file %s\n", + model_filename); exit(1); } sscanf(buffer, "filename = %s\n", data_filename); @@ -153,12 +207,25 @@ void msvmmaj_read_model(struct MajModel *model, char *model_filename) } if (nr != (model->m+1)*(model->K-1)) { fprintf(stderr, "Error reading model file %s. " - "Not enough elements of V found.\n", model_filename); + "Not enough elements of V found.\n", + model_filename); exit(1); } - } +/** + * @brief Write model to file + * + * @details + * Write a MajModel to a file. The current time is specified in the file in + * UTC + offset. The model file further corresponds to the @ref + * spec_model_file. + * + * @param[in] model MajModel which contains an estimate for + * MajModel::V + * @param[in] output_filename the output file to write the model to + * + */ void msvmmaj_write_model(struct MajModel *model, char *output_filename) { FILE *fid; @@ -171,7 +238,8 @@ void msvmmaj_write_model(struct MajModel *model, char *output_filename) // open output file fid = fopen(output_filename, "w"); if (fid == NULL) { - fprintf(stderr, "Error opening output file %s", output_filename); + fprintf(stderr, "Error opening output file %s", + output_filename); exit(1); } @@ -201,7 +269,8 @@ void msvmmaj_write_model(struct MajModel *model, char *output_filename) // Write output to file fprintf(fid, "Output file for MSVMMaj (version %1.1f)\n", VERSION); - fprintf(fid, "Generated on: %s (UTC %+03i:%02i)\n\n", timestr, hours, minutes); + fprintf(fid, "Generated on: %s (UTC %+03i:%02i)\n\n", + timestr, hours, minutes); fprintf(fid, "Model:\n"); fprintf(fid, "p = %15.16f\n", model->p); fprintf(fid, "lambda = %15.16f\n", model->lambda); @@ -218,35 +287,71 @@ void msvmmaj_write_model(struct MajModel *model, char *output_filename) fprintf(fid, "Output:\n"); for (i=0; i<model->m+1; i++) { for (j=0; j<model->K-1; j++) { - fprintf(fid, "%+15.16f ", matrix_get(model->V, model->K-1, i, j)); + fprintf(fid, "%+15.16f ", + matrix_get(model->V, + model->K-1, i, j)); } fprintf(fid, "\n"); } fclose(fid); - } -void msvmmaj_write_predictions(struct MajData *data, long *predy, char *output_filename) +/** + * @brief Write predictions to file + * + * @details + * Write the given predictions to an output file, such that the resulting file + * corresponds to the @ref spec_data_file. + * + * @param[in] data MajData with the original instances + * @param[in] predy predictions of the class labels of the + * instances in the given MajData. Note that the + * order of the instances is assumed to be the + * same. + * @param[in] output_filename the file to which the predictions are written + * + */ +void msvmmaj_write_predictions(struct MajData *data, long *predy, + char *output_filename) { long i, j; FILE *fid; fid = fopen(output_filename, "w"); if (fid == NULL) { - fprintf(stderr, "Error opening output file %s", output_filename); + fprintf(stderr, "Error opening output file %s", + output_filename); exit(1); } for (i=0; i<data->n; i++) { for (j=0; j<data->m; j++) - fprintf(fid, "%f ", matrix_get(data->Z, data->m+1, i, j+1)); + fprintf(fid, "%f ", + matrix_get(data->Z, + data->m+1, i, j+1)); fprintf(fid, "%li\n", predy[i]); } fclose(fid); } +/** + * @brief Check if any command line arguments contain string + * + * @details + * Check if any of a given array of command line arguments contains a given + * string. If the string is found, the index of the string in argv is + * returned. If the string is not found, 0 is returned. + * + * This function is copied from MSVMpack/libMSVM.c. + * + * @param[in] argc number of command line arguments + * @param[in] argv command line arguments + * @param[in] str string to find in the arguments + * @returns index of the string in the arguments if found, 0 + * otherwise + */ int msvmmaj_check_argv(int argc, char **argv, char *str) { int i; @@ -260,6 +365,22 @@ int msvmmaj_check_argv(int argc, char **argv, char *str) return arg_str; } +/** + * @brief Check if a command line argument equals a string + * + * @details + * Check if any of the command line arguments is exactly equal to a given + * string. If so, return the index of the corresponding command line argument. + * If not, return 0. + * + * This function is copied from MSVMpack/libMSVM.c + * + * @param[in] argc number of command line arguments + * @param[in] argv command line arguments + * @param[in] str string to find in the arguments + * @returns index of the command line argument that corresponds to + * the string, 0 if none matches. + */ int msvmmaj_check_argv_eq(int argc, char **argv, char *str) { int i; @@ -274,6 +395,19 @@ int msvmmaj_check_argv_eq(int argc, char **argv, char *str) } +/** + * @brief Print a given string to the specified output stream + * + * @details + * This function is used to print a given string to the output stream + * specified by #MSVMMAJ_OUTPUT_FILE. The stream is flushed after the string + * is written to the stream. If #MSVMMAJ_OUTPUT_FILE is NULL, nothing is + * written. Note that this function is only used by note(), it should never be + * used directly. + * + * @param[in] s string to write to the stream + * + */ static void msvmmaj_print_string(const char *s) { if (MSVMMAJ_OUTPUT_FILE != NULL) { @@ -282,6 +416,19 @@ static void msvmmaj_print_string(const char *s) } } +/** + * @brief Parse a formatted string and write to the output stream + * + * @details + * This function is a replacement of fprintf(), such that the output stream + * does not have to be specified at each function call. The functionality is + * exactly the same however. Writing the formatted string to the output stream + * is handled by msvmmaj_print_string(). + * + * @param[in] fmt String format + * @param[in] ... variable argument list for the string format + * + */ void note(const char *fmt,...) { char buf[BUFSIZ]; @@ -292,6 +439,16 @@ void note(const char *fmt,...) (*msvmmaj_print_string)(buf); } +/** + * @brief Allocate memory for a MajModel + * + * @details + * This function can be used to allocate the memory needed for a MajModel. All + * arrays in the model are specified and initialized to 0. + * + * @param[in] model MajModel to allocate + * + */ void msvmmaj_allocate_model(struct MajModel *model) { long n = model->n; @@ -360,6 +517,16 @@ void msvmmaj_allocate_model(struct MajModel *model) } +/** + * @brief Free allocated MajModel struct + * + * @details + * Simply free a previously allocated MajModel by freeing all its component + * arrays. Note that the model struct itself is also freed here. + * + * @param[in] model MajModel to free + * + */ void msvmmaj_free_model(struct MajModel *model) { free(model->W); @@ -376,10 +543,19 @@ void msvmmaj_free_model(struct MajModel *model) free(model); } +/** + * @brief Free allocated MajData struct + * + * @details + * Simply free a previously allocated MajData struct by freeing all its + * components. Note that the data struct itself is also freed here. + * + * @param[in] data MajData struct to free + * + */ void msvmmaj_free_data(struct MajData *data) { free(data->Z); free(data->y); free(data); } - diff --git a/training/glass.training b/training/glass.training index a5053f8..a531ebc 100644 --- a/training/glass.training +++ b/training/glass.training @@ -1,7 +1,8 @@ -data: ./data/glass.train +train: ./data/glass.train p: 1.0 1.5 2.0 -kappa: -0.9 0.0 0.5 1.0 5.0 +kappa: -0.9 0.0 1.0 lambda: 64 32 16 8 4 2 1 0.5 0.25 0.125 0.0625 0.03125 0.015625 0.0078125 0.00390625 0.001953125 0.0009765625 0.00048828125 0.000244140625 epsilon: 1e-6 weight: 1 2 folds: 10 +repeats: 10 diff --git a/training/iris.training b/training/iris.training new file mode 100644 index 0000000..f89a72e --- /dev/null +++ b/training/iris.training @@ -0,0 +1,8 @@ +train: ./data/iris.train +p: 1.0 1.5 2.0 +kappa: -0.9 0.0 1.0 +lambda: 64 32 16 8 4 2 1 0.5 0.25 0.125 0.0625 0.03125 0.015625 0.0078125 0.00390625 0.001953125 0.0009765625 0.00048828125 0.000244140625 +epsilon: 1e-6 +weight: 1 2 +folds: 10 +repeats: 10 diff --git a/training/nursery.training b/training/nursery.training index e0d016b..3fe8dde 100644 --- a/training/nursery.training +++ b/training/nursery.training @@ -5,3 +5,7 @@ lambda: 64 32 16 8 4 2 1 0.5 0.25 0.125 0.0625 0.03125 0.015625 0.0078125 0.0039 epsilon: 1e-6 weight: 1 folds: 10 +kernel: 0 +gamma: 0.1 0.2 0.3 +coef: 1.0 2.0 +degree: 1.0 2.0 3.0 diff --git a/training/vehicle.training b/training/vehicle.training index b0a2fb6..f8760b7 100644 --- a/training/vehicle.training +++ b/training/vehicle.training @@ -1,7 +1,8 @@ -data: ./data/vehicle.train -p: 1.0 1.25 1.5 1.75 2.0 -kappa: -0.9 0.0 0.5 1.0 5.0 +train: ./data/vehicle.train +p: 1.0 1.5 2.0 +kappa: -0.9 0.0 1.00 lambda: 64 32 16 8 4 2 1 0.5 0.25 0.125 0.0625 0.03125 0.015625 0.0078125 0.00390625 0.001953125 0.0009765625 0.00048828125 0.000244140625 epsilon: 1e-6 weight: 1 2 folds: 10 +repeats: 10 |
