add msvmmaj matlab file to git

author: Gertjan van den Burg <burg@ese.eur.nl> 2016-12-06 15:06:59 +0100
committer: Gertjan van den Burg <burg@ese.eur.nl> 2016-12-06 15:06:59 +0100
commit: c8a6b540d60d612f892f8441671674faef9d7e1f (patch)
tree: 31338117ae48428d4092a2e496eb03860f9ab5f6 /tests
parent: add unit tests for gensvm_train (diff)
download: gensvm-c8a6b540d60d612f892f8441671674faef9d7e1f.tar.gz
gensvm-c8a6b540d60d612f892f8441671674faef9d7e1f.zip
1 files changed, 361 insertions, 0 deletions
diff --git a/tests/aux/msvmmaj.m b/tests/aux/msvmmaj.m
new file mode 100644
index 0000000..0d14970
--- /dev/null
+++ b/tests/aux/msvmmaj.m
@@ -0,0 +1,361 @@
+% % MSVMMaj: Multiclass SVM using Iterative Majorization and Huber hinge
+% %          errors.
+
+function [W, t] = msvmmaj(X, y, varargin)
+  % Initialize variable arguments
+  numvarargs = length(varargin);
+  MAXARGS = 8;
+  
+  if numvarargs > MAXARGS
+      error('msvmmaj:TooManyInputArguments', ...
+          'At most %d arguments allowed', MAXARGS);
+  end
+  
+  optargs = {ones(length(y),1), ...   % rho
+      1.0, ...                        % p
+      0.5, ...                        % kappa
+      2^(-3), ...                     % lambda
+      3e-10, ...                      % epsilon
+      'show', ...                     % mode
+      0, ...                          % debug
+      [], ...                         % seedV
+      };
+  optargs(1:numvarargs) = varargin;
+  [rho, p, kappa, lambda, epsilon, mode, debug_mode, seedV] = optargs{:};
+  
+  % Initialize constants
+  [n, m] = size(X);
+  % fix labels makes the labels in y consecutive integers.
+  [y, K] = fix_labels(y);
+  % Lets perform a sanity check here for labels
+  if (K ~= max(y))
+      error('msvmmaj:InputError', ...
+          'Label error, please check input.');
+  end
+  % Initialize common matrices
+  Z = [ones(n, 1) X];
+  U = SimplexGen(K);
+  R = categorymat(y, K);
+  if ~isempty(seedV)
+      if isequal(size(seedV), [m+1, K-1])
+          V = seedV;
+      else
+          V = rand(m+1, K-1);
+      end
+  else
+      V = rand(m+1, K-1);
+  end
+  
+  % Initialize the UU 3d matrix. Each of the K sheets contains the
+  % differences between one of the K categories and the class label
+  % of an object.
+  UU = zeros(n, K-1, K);
+  for jj=1:K
+      UU(:, :, jj) = U(y, :) - U(jj*ones(n,1), :);
+  end
+  
+  L = getLoss(Z, y, rho, p, kappa, lambda, UU, R, V);
+  Lbar = L + 2.0*epsilon*L;
+  it = 0;
+  fprintf("L = %15.16f\tLbar = %15.16f\n", L, Lbar);
+  
+  while (it == 0 || (Lbar - L)/L > epsilon)
+      [newV, oldL] = getUpdate(Z, y, rho, p, kappa, lambda, UU, R, V, debug_mode);
+      if debug_mode
+          fprintf('\tOldLoss error: diff = %15.16f\n', abs(L - oldL));
+      end
+  
+      % use step doubling after a short burn in
+      if (it > 50)
+          newV = 2*newV - V;
+      end
+      Lbar = L;
+      L = getLoss(Z, y, rho, p, kappa, lambda, UU, R, newV);
+      if (rem(it, 1) == 0 && strcmp(mode, 'show'))
+          fprintf('%i %15.16f %15.16f %15.16f\n', it, L, Lbar, (Lbar - L)/L);
+      end
+  
+      V = newV;
+      it = it + 1;
+  
+  end
+  
+  t = V(1,:)';
+  W = V(2:end, :);
+  
+  if strcmp(mode, 'show')
+      fprintf('%i %15.16f %15.16f %15.16f\n', it, L, Lbar, (Lbar - L)/L);
+  end
+  if Lbar < L
+      warning('MSVMptest:NegStep', ...
+          'Negative step occured, theoretically not possible.');
+  end
+end
+
+function [newV, oldL] = getUpdate(Z, y, rho, p, kappa, lambda, UU, R, V, debug)
+  % Initialize constants
+  [n, m] = size(Z);
+  m = m - 1;
+  [~, K] = size(R);
+  
+  % Calculate initial errors
+  ZV = Z*V;
+  q = zeros(n, K);
+  for jj = 1:K
+      q(:, jj) = sum(ZV.*UU(:,:,jj), 2);
+  end
+  
+  % Calculate Huber hinge errors
+  G1 = (q <= -kappa);
+  G2 = (q <= 1) & (~G1);
+  H = (1 - q - (kappa+1)/2).*G1 + (1/(2*kappa + 2))*((1 - q).^2).*G2;
+  
+  % Split objects in Category 1 and 2
+  C = sum((H.*R)>0, 2)<=1;
+  C1i = find(C>0); % for some reason this is faster than C==1
+  C2i = find(C<1); % for some reason this is faster than C==0
+  
+  % We are going to do the calculations separately for each category (1,2)
+  % so we separate all important variables as well.
+  Q1 = q(C1i, :);
+  Q2 = q(C2i, :);
+  
+  R1 = R(C1i, :);
+  R2 = R(C2i, :);
+  
+  P1 = rho(C1i, :);
+  P2 = rho(C2i, :);
+  
+  y1 = y(C1i, :);
+  
+  H1 = H(C1i, :);
+  H2 = H(C2i, :);
+  
+  ZV1 = ZV(C1i, :);
+  ZV2 = ZV(C2i, :);
+  
+  UU1 = UU(C1i, :, :);
+  UU2 = UU(C2i, :, :);
+  
+  n1 = length(y1);
+  n2 = n - n1;
+  
+  %% First create all matrices for the Case 1 objects.
+  if debug
+      % Check the first majorization of the Case 1 objects.
+      TT1 = sum((H1.^p).*R1,2).^(1/p);
+      TT2 = sum(H1.*R1,2);
+      if abs(sum(TT1) - sum(TT2))/n1 > eps
+          fprintf('\tFirst Case 1: diff = %15.16f\n', abs(sum(TT1) - sum(TT2))/n1);
+      end
+  end
+  
+  % First do the majorization for the Case 1 objects (p = 1 in Huber maj.)
+  G1 = (Q1 <=-kappa);
+  G2 = (Q1 <= 1) & (~G1);
+  G3 = ~(G1|G2);
+  
+  % calculate dummy variables
+  Phi = 1 - Q1 - (kappa + 1)/2;
+  Psi = (1 - Q1)/sqrt(2*kappa + 2);
+  iPhi = 1./Phi;
+  
+  a1 = 1/4*iPhi.*(G1 - G3) + (1/(2*kappa + 2))*G2;
+  a1(isnan(a1)) = 0; % necessary because Inf*0 = NaN and we need 0
+  
+  b1 = a1.*Q1 + 1/2*G1 + ((Psi.^2)./(1 - Q1)).*G2;
+  
+  B1 = zeros(n1, K-1);
+  for jj=1:K
+      B1 = B1 + ((b1(:, jj) - (a1(:, jj).*Q1(:, jj)))*ones(1, K-1)).*UU1(:,:,jj);
+  end
+  
+  if debug
+      % constant terms in quadratic majorization
+      c1 = a1.*(Q1.^2) + (1-kappa)/2*G1 + ...
+          ((Psi.^2).*(1 + 2.*Q1./(1 - Q1))).*G2;
+  
+      TT1 = a1.*(Q1.^2) - 2*b1.*Q1 + c1;
+      TT2 = H1;
+      D = sum(sum(abs(TT1 - TT2)))/n1;
+      if D > eps
+          fprintf('\tSecond Case 1: diff = %15.16f\n', D);
+      end
+  
+      % Case 1 constant terms in total majorization
+      Gamma1 = 1/n * sum(P1.*sum(c1.*R1,2));
+      Gamma1 = Gamma1 + 1/n * sum(P1.*sum(ZV1.^2,2).*sum(a1.*R1,2));
+      Gamma1 = Gamma1 - 1/n * sum(P1.*sum(a1.*(Q1.^2).*R1,2));
+  
+      clear c1 TT1 TT2 D
+  end
+  
+  %% Now create all matrices for the Case 2 objects.
+  % We can now safely delete a number of matrices from memory
+  clear G1 G2 G3 Phi Psi iPhi b1
+  
+  G1a = (Q2 <= (p+kappa-1)/(p-2));
+  G2a = (Q2 <= 1)&(~G1a);
+  G3a = ~(G1a|G2a);
+  
+  G1b = (Q2 <= -kappa);
+  G2b = (Q2 <= 1) & (~G1b);
+  G3b = ~(G1b|G2b);
+  
+  Phi = 1 - Q2 - (kappa+1)/2;
+  Psi = (1 - Q2)/sqrt(2*kappa + 2);
+  if p~=2
+      Chi = (p*Q2 + kappa - 1)/(p - 2);
+  end
+  
+  omega = (1/p)*(sum((H2.^p).*R2,2)).^(1/p - 1);
+  
+  if debug
+      % First majorization test (p-th root)
+      TT1 = sum((H2.^p).*R2,2).^(1/p);
+      TT2 = omega.*sum((H2.^p).*R2,2) + (1 - 1/p)*(sum((H2.^p).*R2,2)).^(1/p);
+      D = sum(abs(TT1 - TT2))/n2;
+      if D > eps
+          fprintf('\tFirst Case 2: diff = %15.16f\n', D);
+      end
+  end
+  
+  % Some parameters are different when p = 2, we recognize this here.
+  if p~=2
+      a2 = (1/4 * p^2 * Phi.^(p-2)).*G1a + ...
+          (1/4 * p * (2*p - 1) * ((kappa+1)/2)^(p-2)).*G2a + ...
+          (1/4 * p^2 * (p*Phi/(p-2)).^(p-2)).*G3a;
+      a2(isnan(a2)) = 0; % We need Inf*0 = 0.
+  else
+      a2 = 3/2*ones(n2, K);
+  end
+  
+  b2 = (a2.*Q2 + 1/2*p*(Phi.^(p-1))).*G1b + ...
+      (a2.*Q2 + p*(Psi.^(2*p))./(1 - Q2)).*G2b;
+  if p~=2
+      b2 = b2 + (a2.*Chi + 1/2*p*(p*Phi/(p-2)).^(p-1)).*G3b;
+  else
+      b2 = b2 + 3/2*Q2.*G3b;
+  end
+  
+  B2 = zeros(n2, K-1);
+  for jj=1:K
+      B2 = B2 + ((b2(:, jj) - (a2(:, jj).*Q2(:, jj)))*ones(1, K-1)).*UU2(:,:,jj);
+  end
+  
+  if debug
+      c2 = (a2.*(Q2.^2) + Phi.^p + p*Q2.*(Phi.^(p-1))).*G1b + ...
+          (a2.*(Q2.^2) + (Psi.^(2*p)).*(1 + (2*p*Q2)./(1 - Q2))).*G2b;
+      if p~=2
+          c2 = c2 + (a2.*(Chi.^2) + p*Chi.*(p*Phi/(p - 2)).^(p-1) + (p*Phi/(p-2)).^p).*G3b;
+      else
+          c2 = c2 + 3/2*(Q2.^2).*G3b;
+      end
+  
+      TT1 = a2.*(Q2.^2) - 2*b2.*Q2 + c2;
+      TT2 = H2.^p;
+      D = sum(sum(abs(TT1 - TT2)))/n2;
+      if D>eps
+          fprintf('\tSecond Case 2: diff = %15.16f\n', D);
+      end
+  
+      % Case 2 constant terms in majorization
+      Gamma2 = 1/n * (1 - 1/p) * sum(P2.*(sum((H2.^p).*R2,2).^(1/p)));
+      Gamma2 = Gamma2 + 1/n * sum(P2.*omega.*sum(c2.*R2,2));
+      Gamma2 = Gamma2 + 1/n * sum(P2.*omega.*sum(ZV2.^2,2).*sum(a2.*R2,2));
+      Gamma2 = Gamma2 - 1/n * sum(P2.*omega.*sum((Q2.^2).*a2,2));
+  end
+  
+  %% Collect the two classes in a single matrix and calculate update
+  A(C1i, :) = P1.*sum(a1.*R1,2);
+  A(C2i, :) = P2.*omega.*sum(a2.*R2,2);
+  
+  B(C1i, :) = (P1*ones(1, K-1)).*B1;
+  B(C2i, :) = ((P2.*omega)*ones(1, K-1)).*B2;
+  
+  A = 1/n*A;
+  B = 1/n*B;
+  
+  J = eye(m+1); J(1,1) = 0;
+  
+  ZAZ = Z'*((A*ones(1, m+1)).*Z);
+  newV = (ZAZ + lambda*J)\(ZAZ*V + Z'*B);
+  
+  if debug
+      oldL = trace((V - 2*V)'*Z'*diag(A)*Z*V) - 2*trace(B'*Z*V) + Gamma1 + ...
+          Gamma2 + lambda*trace(V'*J*V);
+  else
+      oldL = 0;
+  end
+
+end
+
+function L = getLoss(Z, y, rho, p, kappa, lambda, UU, R, V)
+  % Initialize constants
+  [n, m] = size(Z); % note m = m + 1
+  K = max(y);
+  J = eye(m); J(1,1) = 0;
+  
+  % Create the errors
+  q = zeros(n, K);
+  ZV = Z*V;
+  for jj = 1:K
+      q(:,jj) = sum(ZV.*UU(:,:,jj), 2);
+  end
+  
+  % Create logical matrices for calculating the Huber function
+  G1 = (q <= -kappa);
+  G2 = (q <= 1) & (~G1);
+  H = (1 - q - (kappa+1)/2).*G1 + (1/(2*kappa+2))*((1-q).^2).*G2;
+  
+  % Evaluate loss function
+  L = sum((H.^p).*R, 2).^(1/p);
+  L = 1/n*sum(rho.*L);
+  fprintf("L after rho: %.16f\t", L);
+  fprintf("reg: %.16f\t", trace(V'*J*V));
+  L = L + lambda*trace(V'*J*V);
+  fprintf("L with reg: %.16f\n", L);
+end
+
+function R = categorymat(y, K)
+  I = eye(K);
+  n = length(y);
+  R = zeros(n, K);
+  for ii = 1:n
+      R(ii, :) = I(y(ii, 1), :);
+  end
+  R = ~logical(R);
+
+end
+
+function U = SimplexGen(K)
+  U = zeros(K, K-1);
+  for ii=1:K
+      for jj=1:K-1
+          if ii<=jj
+              U(ii,jj) = -1/sqrt(2*(jj^2 + jj));
+          elseif ii==jj+1
+              U(ii,jj) = jj/sqrt(2*(jj^2 + jj));
+          end
+      end
+  end
+end
+
+function [newy, K] = fix_labels(y)
+% % This function fixes the labels in y such that the labels become
+% % consecutive.
+% % Thus if we have y = [1, 1, 5, 2, 2, 5] the vector returned
+% % will be [1, 1, 3, 2, 2, 3]
+  
+  u = unique(y);
+  K = length(u);
+  newy = zeros(length(y), 1);
+  
+  for ii = 1:K
+      idx = find(y == u(ii));
+      for jj = idx
+          newy(jj) = ii;
+      end
+  end
+
+end
author	Gertjan van den Burg <burg@ese.eur.nl>	2016-12-06 15:06:59 +0100
committer	Gertjan van den Burg <burg@ese.eur.nl>	2016-12-06 15:06:59 +0100
commit	c8a6b540d60d612f892f8441671674faef9d7e1f (patch)
tree	31338117ae48428d4092a2e496eb03860f9ab5f6 /tests
parent	add unit tests for gensvm_train (diff)
download	gensvm-c8a6b540d60d612f892f8441671674faef9d7e1f.tar.gz gensvm-c8a6b540d60d612f892f8441671674faef9d7e1f.zip