% % MSVMMaj: Multiclass SVM using Iterative Majorization and Huber hinge % % errors. function [W, t] = msvmmaj(X, y, varargin) % Initialize variable arguments numvarargs = length(varargin); MAXARGS = 8; if numvarargs > MAXARGS error('msvmmaj:TooManyInputArguments', ... 'At most %d arguments allowed', MAXARGS); end optargs = {ones(length(y),1), ... % rho 1.0, ... % p 0.5, ... % kappa 2^(-3), ... % lambda 3e-10, ... % epsilon 'show', ... % mode 0, ... % debug [], ... % seedV }; optargs(1:numvarargs) = varargin; [rho, p, kappa, lambda, epsilon, mode, debug_mode, seedV] = optargs{:}; % Initialize constants [n, m] = size(X); % fix labels makes the labels in y consecutive integers. [y, K] = fix_labels(y); % Lets perform a sanity check here for labels if (K ~= max(y)) error('msvmmaj:InputError', ... 'Label error, please check input.'); end % Initialize common matrices Z = [ones(n, 1) X]; U = SimplexGen(K); R = categorymat(y, K); if ~isempty(seedV) if isequal(size(seedV), [m+1, K-1]) V = seedV; else V = rand(m+1, K-1); end else V = rand(m+1, K-1); end % Initialize the UU 3d matrix. Each of the K sheets contains the % differences between one of the K categories and the class label % of an object. UU = zeros(n, K-1, K); for jj=1:K UU(:, :, jj) = U(y, :) - U(jj*ones(n,1), :); end L = getLoss(Z, y, rho, p, kappa, lambda, UU, R, V); Lbar = L + 2.0*epsilon*L; it = 0; fprintf("L = %15.16f\tLbar = %15.16f\n", L, Lbar); while (it == 0 || (Lbar - L)/L > epsilon) [newV, oldL] = getUpdate(Z, y, rho, p, kappa, lambda, UU, R, V, debug_mode); if debug_mode fprintf('\tOldLoss error: diff = %15.16f\n', abs(L - oldL)); end % use step doubling after a short burn in if (it > 50) newV = 2*newV - V; end Lbar = L; L = getLoss(Z, y, rho, p, kappa, lambda, UU, R, newV); if (rem(it, 1) == 0 && strcmp(mode, 'show')) fprintf('%i %15.16f %15.16f %15.16f\n', it, L, Lbar, (Lbar - L)/L); end V = newV; it = it + 1; end t = V(1,:)'; W = V(2:end, :); if strcmp(mode, 'show') fprintf('%i %15.16f %15.16f %15.16f\n', it, L, Lbar, (Lbar - L)/L); end if Lbar < L warning('MSVMptest:NegStep', ... 'Negative step occured, theoretically not possible.'); end end function [newV, oldL] = getUpdate(Z, y, rho, p, kappa, lambda, UU, R, V, debug) % Initialize constants [n, m] = size(Z); m = m - 1; [~, K] = size(R); % Calculate initial errors ZV = Z*V; q = zeros(n, K); for jj = 1:K q(:, jj) = sum(ZV.*UU(:,:,jj), 2); end % Calculate Huber hinge errors G1 = (q <= -kappa); G2 = (q <= 1) & (~G1); H = (1 - q - (kappa+1)/2).*G1 + (1/(2*kappa + 2))*((1 - q).^2).*G2; % Split objects in Category 1 and 2 C = sum((H.*R)>0, 2)<=1; C1i = find(C>0); % for some reason this is faster than C==1 C2i = find(C<1); % for some reason this is faster than C==0 % We are going to do the calculations separately for each category (1,2) % so we separate all important variables as well. Q1 = q(C1i, :); Q2 = q(C2i, :); R1 = R(C1i, :); R2 = R(C2i, :); P1 = rho(C1i, :); P2 = rho(C2i, :); y1 = y(C1i, :); H1 = H(C1i, :); H2 = H(C2i, :); ZV1 = ZV(C1i, :); ZV2 = ZV(C2i, :); UU1 = UU(C1i, :, :); UU2 = UU(C2i, :, :); n1 = length(y1); n2 = n - n1; %% First create all matrices for the Case 1 objects. if debug % Check the first majorization of the Case 1 objects. TT1 = sum((H1.^p).*R1,2).^(1/p); TT2 = sum(H1.*R1,2); if abs(sum(TT1) - sum(TT2))/n1 > eps fprintf('\tFirst Case 1: diff = %15.16f\n', abs(sum(TT1) - sum(TT2))/n1); end end % First do the majorization for the Case 1 objects (p = 1 in Huber maj.) G1 = (Q1 <=-kappa); G2 = (Q1 <= 1) & (~G1); G3 = ~(G1|G2); % calculate dummy variables Phi = 1 - Q1 - (kappa + 1)/2; Psi = (1 - Q1)/sqrt(2*kappa + 2); iPhi = 1./Phi; a1 = 1/4*iPhi.*(G1 - G3) + (1/(2*kappa + 2))*G2; a1(isnan(a1)) = 0; % necessary because Inf*0 = NaN and we need 0 b1 = a1.*Q1 + 1/2*G1 + ((Psi.^2)./(1 - Q1)).*G2; B1 = zeros(n1, K-1); for jj=1:K B1 = B1 + ((b1(:, jj) - (a1(:, jj).*Q1(:, jj)))*ones(1, K-1)).*UU1(:,:,jj); end if debug % constant terms in quadratic majorization c1 = a1.*(Q1.^2) + (1-kappa)/2*G1 + ... ((Psi.^2).*(1 + 2.*Q1./(1 - Q1))).*G2; TT1 = a1.*(Q1.^2) - 2*b1.*Q1 + c1; TT2 = H1; D = sum(sum(abs(TT1 - TT2)))/n1; if D > eps fprintf('\tSecond Case 1: diff = %15.16f\n', D); end % Case 1 constant terms in total majorization Gamma1 = 1/n * sum(P1.*sum(c1.*R1,2)); Gamma1 = Gamma1 + 1/n * sum(P1.*sum(ZV1.^2,2).*sum(a1.*R1,2)); Gamma1 = Gamma1 - 1/n * sum(P1.*sum(a1.*(Q1.^2).*R1,2)); clear c1 TT1 TT2 D end %% Now create all matrices for the Case 2 objects. % We can now safely delete a number of matrices from memory clear G1 G2 G3 Phi Psi iPhi b1 G1a = (Q2 <= (p+kappa-1)/(p-2)); G2a = (Q2 <= 1)&(~G1a); G3a = ~(G1a|G2a); G1b = (Q2 <= -kappa); G2b = (Q2 <= 1) & (~G1b); G3b = ~(G1b|G2b); Phi = 1 - Q2 - (kappa+1)/2; Psi = (1 - Q2)/sqrt(2*kappa + 2); if p~=2 Chi = (p*Q2 + kappa - 1)/(p - 2); end omega = (1/p)*(sum((H2.^p).*R2,2)).^(1/p - 1); if debug % First majorization test (p-th root) TT1 = sum((H2.^p).*R2,2).^(1/p); TT2 = omega.*sum((H2.^p).*R2,2) + (1 - 1/p)*(sum((H2.^p).*R2,2)).^(1/p); D = sum(abs(TT1 - TT2))/n2; if D > eps fprintf('\tFirst Case 2: diff = %15.16f\n', D); end end % Some parameters are different when p = 2, we recognize this here. if p~=2 a2 = (1/4 * p^2 * Phi.^(p-2)).*G1a + ... (1/4 * p * (2*p - 1) * ((kappa+1)/2)^(p-2)).*G2a + ... (1/4 * p^2 * (p*Phi/(p-2)).^(p-2)).*G3a; a2(isnan(a2)) = 0; % We need Inf*0 = 0. else a2 = 3/2*ones(n2, K); end b2 = (a2.*Q2 + 1/2*p*(Phi.^(p-1))).*G1b + ... (a2.*Q2 + p*(Psi.^(2*p))./(1 - Q2)).*G2b; if p~=2 b2 = b2 + (a2.*Chi + 1/2*p*(p*Phi/(p-2)).^(p-1)).*G3b; else b2 = b2 + 3/2*Q2.*G3b; end B2 = zeros(n2, K-1); for jj=1:K B2 = B2 + ((b2(:, jj) - (a2(:, jj).*Q2(:, jj)))*ones(1, K-1)).*UU2(:,:,jj); end if debug c2 = (a2.*(Q2.^2) + Phi.^p + p*Q2.*(Phi.^(p-1))).*G1b + ... (a2.*(Q2.^2) + (Psi.^(2*p)).*(1 + (2*p*Q2)./(1 - Q2))).*G2b; if p~=2 c2 = c2 + (a2.*(Chi.^2) + p*Chi.*(p*Phi/(p - 2)).^(p-1) + (p*Phi/(p-2)).^p).*G3b; else c2 = c2 + 3/2*(Q2.^2).*G3b; end TT1 = a2.*(Q2.^2) - 2*b2.*Q2 + c2; TT2 = H2.^p; D = sum(sum(abs(TT1 - TT2)))/n2; if D>eps fprintf('\tSecond Case 2: diff = %15.16f\n', D); end % Case 2 constant terms in majorization Gamma2 = 1/n * (1 - 1/p) * sum(P2.*(sum((H2.^p).*R2,2).^(1/p))); Gamma2 = Gamma2 + 1/n * sum(P2.*omega.*sum(c2.*R2,2)); Gamma2 = Gamma2 + 1/n * sum(P2.*omega.*sum(ZV2.^2,2).*sum(a2.*R2,2)); Gamma2 = Gamma2 - 1/n * sum(P2.*omega.*sum((Q2.^2).*a2,2)); end %% Collect the two classes in a single matrix and calculate update A(C1i, :) = P1.*sum(a1.*R1,2); A(C2i, :) = P2.*omega.*sum(a2.*R2,2); B(C1i, :) = (P1*ones(1, K-1)).*B1; B(C2i, :) = ((P2.*omega)*ones(1, K-1)).*B2; A = 1/n*A; B = 1/n*B; J = eye(m+1); J(1,1) = 0; ZAZ = Z'*((A*ones(1, m+1)).*Z); newV = (ZAZ + lambda*J)\(ZAZ*V + Z'*B); if debug oldL = trace((V - 2*V)'*Z'*diag(A)*Z*V) - 2*trace(B'*Z*V) + Gamma1 + ... Gamma2 + lambda*trace(V'*J*V); else oldL = 0; end end function L = getLoss(Z, y, rho, p, kappa, lambda, UU, R, V) % Initialize constants [n, m] = size(Z); % note m = m + 1 K = max(y); J = eye(m); J(1,1) = 0; % Create the errors q = zeros(n, K); ZV = Z*V; for jj = 1:K q(:,jj) = sum(ZV.*UU(:,:,jj), 2); end % Create logical matrices for calculating the Huber function G1 = (q <= -kappa); G2 = (q <= 1) & (~G1); H = (1 - q - (kappa+1)/2).*G1 + (1/(2*kappa+2))*((1-q).^2).*G2; % Evaluate loss function L = sum((H.^p).*R, 2).^(1/p); L = 1/n*sum(rho.*L); fprintf("L after rho: %.16f\t", L); fprintf("reg: %.16f\t", trace(V'*J*V)); L = L + lambda*trace(V'*J*V); fprintf("L with reg: %.16f\n", L); end function R = categorymat(y, K) I = eye(K); n = length(y); R = zeros(n, K); for ii = 1:n R(ii, :) = I(y(ii, 1), :); end R = ~logical(R); end function U = SimplexGen(K) U = zeros(K, K-1); for ii=1:K for jj=1:K-1 if ii<=jj U(ii,jj) = -1/sqrt(2*(jj^2 + jj)); elseif ii==jj+1 U(ii,jj) = jj/sqrt(2*(jj^2 + jj)); end end end end function [newy, K] = fix_labels(y) % % This function fixes the labels in y such that the labels become % % consecutive. % % Thus if we have y = [1, 1, 5, 2, 2, 5] the vector returned % % will be [1, 1, 3, 2, 2, 3] u = unique(y); K = length(u); newy = zeros(length(y), 1); for ii = 1:K idx = find(y == u(ii)); for jj = idx newy(jj) = ii; end end end