diff options
| author | Gertjan van den Burg <burg@ese.eur.nl> | 2016-12-06 15:06:59 +0100 |
|---|---|---|
| committer | Gertjan van den Burg <burg@ese.eur.nl> | 2016-12-06 15:06:59 +0100 |
| commit | c8a6b540d60d612f892f8441671674faef9d7e1f (patch) | |
| tree | 31338117ae48428d4092a2e496eb03860f9ab5f6 /tests | |
| parent | add unit tests for gensvm_train (diff) | |
| download | gensvm-c8a6b540d60d612f892f8441671674faef9d7e1f.tar.gz gensvm-c8a6b540d60d612f892f8441671674faef9d7e1f.zip | |
add msvmmaj matlab file to git
Diffstat (limited to 'tests')
| -rw-r--r-- | tests/aux/msvmmaj.m | 361 |
1 files changed, 361 insertions, 0 deletions
diff --git a/tests/aux/msvmmaj.m b/tests/aux/msvmmaj.m new file mode 100644 index 0000000..0d14970 --- /dev/null +++ b/tests/aux/msvmmaj.m @@ -0,0 +1,361 @@ +% % MSVMMaj: Multiclass SVM using Iterative Majorization and Huber hinge +% % errors. + +function [W, t] = msvmmaj(X, y, varargin) + % Initialize variable arguments + numvarargs = length(varargin); + MAXARGS = 8; + + if numvarargs > MAXARGS + error('msvmmaj:TooManyInputArguments', ... + 'At most %d arguments allowed', MAXARGS); + end + + optargs = {ones(length(y),1), ... % rho + 1.0, ... % p + 0.5, ... % kappa + 2^(-3), ... % lambda + 3e-10, ... % epsilon + 'show', ... % mode + 0, ... % debug + [], ... % seedV + }; + optargs(1:numvarargs) = varargin; + [rho, p, kappa, lambda, epsilon, mode, debug_mode, seedV] = optargs{:}; + + % Initialize constants + [n, m] = size(X); + % fix labels makes the labels in y consecutive integers. + [y, K] = fix_labels(y); + % Lets perform a sanity check here for labels + if (K ~= max(y)) + error('msvmmaj:InputError', ... + 'Label error, please check input.'); + end + % Initialize common matrices + Z = [ones(n, 1) X]; + U = SimplexGen(K); + R = categorymat(y, K); + if ~isempty(seedV) + if isequal(size(seedV), [m+1, K-1]) + V = seedV; + else + V = rand(m+1, K-1); + end + else + V = rand(m+1, K-1); + end + + % Initialize the UU 3d matrix. Each of the K sheets contains the + % differences between one of the K categories and the class label + % of an object. + UU = zeros(n, K-1, K); + for jj=1:K + UU(:, :, jj) = U(y, :) - U(jj*ones(n,1), :); + end + + L = getLoss(Z, y, rho, p, kappa, lambda, UU, R, V); + Lbar = L + 2.0*epsilon*L; + it = 0; + fprintf("L = %15.16f\tLbar = %15.16f\n", L, Lbar); + + while (it == 0 || (Lbar - L)/L > epsilon) + [newV, oldL] = getUpdate(Z, y, rho, p, kappa, lambda, UU, R, V, debug_mode); + if debug_mode + fprintf('\tOldLoss error: diff = %15.16f\n', abs(L - oldL)); + end + + % use step doubling after a short burn in + if (it > 50) + newV = 2*newV - V; + end + Lbar = L; + L = getLoss(Z, y, rho, p, kappa, lambda, UU, R, newV); + if (rem(it, 1) == 0 && strcmp(mode, 'show')) + fprintf('%i %15.16f %15.16f %15.16f\n', it, L, Lbar, (Lbar - L)/L); + end + + V = newV; + it = it + 1; + + end + + t = V(1,:)'; + W = V(2:end, :); + + if strcmp(mode, 'show') + fprintf('%i %15.16f %15.16f %15.16f\n', it, L, Lbar, (Lbar - L)/L); + end + if Lbar < L + warning('MSVMptest:NegStep', ... + 'Negative step occured, theoretically not possible.'); + end +end + +function [newV, oldL] = getUpdate(Z, y, rho, p, kappa, lambda, UU, R, V, debug) + % Initialize constants + [n, m] = size(Z); + m = m - 1; + [~, K] = size(R); + + % Calculate initial errors + ZV = Z*V; + q = zeros(n, K); + for jj = 1:K + q(:, jj) = sum(ZV.*UU(:,:,jj), 2); + end + + % Calculate Huber hinge errors + G1 = (q <= -kappa); + G2 = (q <= 1) & (~G1); + H = (1 - q - (kappa+1)/2).*G1 + (1/(2*kappa + 2))*((1 - q).^2).*G2; + + % Split objects in Category 1 and 2 + C = sum((H.*R)>0, 2)<=1; + C1i = find(C>0); % for some reason this is faster than C==1 + C2i = find(C<1); % for some reason this is faster than C==0 + + % We are going to do the calculations separately for each category (1,2) + % so we separate all important variables as well. + Q1 = q(C1i, :); + Q2 = q(C2i, :); + + R1 = R(C1i, :); + R2 = R(C2i, :); + + P1 = rho(C1i, :); + P2 = rho(C2i, :); + + y1 = y(C1i, :); + + H1 = H(C1i, :); + H2 = H(C2i, :); + + ZV1 = ZV(C1i, :); + ZV2 = ZV(C2i, :); + + UU1 = UU(C1i, :, :); + UU2 = UU(C2i, :, :); + + n1 = length(y1); + n2 = n - n1; + + %% First create all matrices for the Case 1 objects. + if debug + % Check the first majorization of the Case 1 objects. + TT1 = sum((H1.^p).*R1,2).^(1/p); + TT2 = sum(H1.*R1,2); + if abs(sum(TT1) - sum(TT2))/n1 > eps + fprintf('\tFirst Case 1: diff = %15.16f\n', abs(sum(TT1) - sum(TT2))/n1); + end + end + + % First do the majorization for the Case 1 objects (p = 1 in Huber maj.) + G1 = (Q1 <=-kappa); + G2 = (Q1 <= 1) & (~G1); + G3 = ~(G1|G2); + + % calculate dummy variables + Phi = 1 - Q1 - (kappa + 1)/2; + Psi = (1 - Q1)/sqrt(2*kappa + 2); + iPhi = 1./Phi; + + a1 = 1/4*iPhi.*(G1 - G3) + (1/(2*kappa + 2))*G2; + a1(isnan(a1)) = 0; % necessary because Inf*0 = NaN and we need 0 + + b1 = a1.*Q1 + 1/2*G1 + ((Psi.^2)./(1 - Q1)).*G2; + + B1 = zeros(n1, K-1); + for jj=1:K + B1 = B1 + ((b1(:, jj) - (a1(:, jj).*Q1(:, jj)))*ones(1, K-1)).*UU1(:,:,jj); + end + + if debug + % constant terms in quadratic majorization + c1 = a1.*(Q1.^2) + (1-kappa)/2*G1 + ... + ((Psi.^2).*(1 + 2.*Q1./(1 - Q1))).*G2; + + TT1 = a1.*(Q1.^2) - 2*b1.*Q1 + c1; + TT2 = H1; + D = sum(sum(abs(TT1 - TT2)))/n1; + if D > eps + fprintf('\tSecond Case 1: diff = %15.16f\n', D); + end + + % Case 1 constant terms in total majorization + Gamma1 = 1/n * sum(P1.*sum(c1.*R1,2)); + Gamma1 = Gamma1 + 1/n * sum(P1.*sum(ZV1.^2,2).*sum(a1.*R1,2)); + Gamma1 = Gamma1 - 1/n * sum(P1.*sum(a1.*(Q1.^2).*R1,2)); + + clear c1 TT1 TT2 D + end + + %% Now create all matrices for the Case 2 objects. + % We can now safely delete a number of matrices from memory + clear G1 G2 G3 Phi Psi iPhi b1 + + G1a = (Q2 <= (p+kappa-1)/(p-2)); + G2a = (Q2 <= 1)&(~G1a); + G3a = ~(G1a|G2a); + + G1b = (Q2 <= -kappa); + G2b = (Q2 <= 1) & (~G1b); + G3b = ~(G1b|G2b); + + Phi = 1 - Q2 - (kappa+1)/2; + Psi = (1 - Q2)/sqrt(2*kappa + 2); + if p~=2 + Chi = (p*Q2 + kappa - 1)/(p - 2); + end + + omega = (1/p)*(sum((H2.^p).*R2,2)).^(1/p - 1); + + if debug + % First majorization test (p-th root) + TT1 = sum((H2.^p).*R2,2).^(1/p); + TT2 = omega.*sum((H2.^p).*R2,2) + (1 - 1/p)*(sum((H2.^p).*R2,2)).^(1/p); + D = sum(abs(TT1 - TT2))/n2; + if D > eps + fprintf('\tFirst Case 2: diff = %15.16f\n', D); + end + end + + % Some parameters are different when p = 2, we recognize this here. + if p~=2 + a2 = (1/4 * p^2 * Phi.^(p-2)).*G1a + ... + (1/4 * p * (2*p - 1) * ((kappa+1)/2)^(p-2)).*G2a + ... + (1/4 * p^2 * (p*Phi/(p-2)).^(p-2)).*G3a; + a2(isnan(a2)) = 0; % We need Inf*0 = 0. + else + a2 = 3/2*ones(n2, K); + end + + b2 = (a2.*Q2 + 1/2*p*(Phi.^(p-1))).*G1b + ... + (a2.*Q2 + p*(Psi.^(2*p))./(1 - Q2)).*G2b; + if p~=2 + b2 = b2 + (a2.*Chi + 1/2*p*(p*Phi/(p-2)).^(p-1)).*G3b; + else + b2 = b2 + 3/2*Q2.*G3b; + end + + B2 = zeros(n2, K-1); + for jj=1:K + B2 = B2 + ((b2(:, jj) - (a2(:, jj).*Q2(:, jj)))*ones(1, K-1)).*UU2(:,:,jj); + end + + if debug + c2 = (a2.*(Q2.^2) + Phi.^p + p*Q2.*(Phi.^(p-1))).*G1b + ... + (a2.*(Q2.^2) + (Psi.^(2*p)).*(1 + (2*p*Q2)./(1 - Q2))).*G2b; + if p~=2 + c2 = c2 + (a2.*(Chi.^2) + p*Chi.*(p*Phi/(p - 2)).^(p-1) + (p*Phi/(p-2)).^p).*G3b; + else + c2 = c2 + 3/2*(Q2.^2).*G3b; + end + + TT1 = a2.*(Q2.^2) - 2*b2.*Q2 + c2; + TT2 = H2.^p; + D = sum(sum(abs(TT1 - TT2)))/n2; + if D>eps + fprintf('\tSecond Case 2: diff = %15.16f\n', D); + end + + % Case 2 constant terms in majorization + Gamma2 = 1/n * (1 - 1/p) * sum(P2.*(sum((H2.^p).*R2,2).^(1/p))); + Gamma2 = Gamma2 + 1/n * sum(P2.*omega.*sum(c2.*R2,2)); + Gamma2 = Gamma2 + 1/n * sum(P2.*omega.*sum(ZV2.^2,2).*sum(a2.*R2,2)); + Gamma2 = Gamma2 - 1/n * sum(P2.*omega.*sum((Q2.^2).*a2,2)); + end + + %% Collect the two classes in a single matrix and calculate update + A(C1i, :) = P1.*sum(a1.*R1,2); + A(C2i, :) = P2.*omega.*sum(a2.*R2,2); + + B(C1i, :) = (P1*ones(1, K-1)).*B1; + B(C2i, :) = ((P2.*omega)*ones(1, K-1)).*B2; + + A = 1/n*A; + B = 1/n*B; + + J = eye(m+1); J(1,1) = 0; + + ZAZ = Z'*((A*ones(1, m+1)).*Z); + newV = (ZAZ + lambda*J)\(ZAZ*V + Z'*B); + + if debug + oldL = trace((V - 2*V)'*Z'*diag(A)*Z*V) - 2*trace(B'*Z*V) + Gamma1 + ... + Gamma2 + lambda*trace(V'*J*V); + else + oldL = 0; + end + +end + +function L = getLoss(Z, y, rho, p, kappa, lambda, UU, R, V) + % Initialize constants + [n, m] = size(Z); % note m = m + 1 + K = max(y); + J = eye(m); J(1,1) = 0; + + % Create the errors + q = zeros(n, K); + ZV = Z*V; + for jj = 1:K + q(:,jj) = sum(ZV.*UU(:,:,jj), 2); + end + + % Create logical matrices for calculating the Huber function + G1 = (q <= -kappa); + G2 = (q <= 1) & (~G1); + H = (1 - q - (kappa+1)/2).*G1 + (1/(2*kappa+2))*((1-q).^2).*G2; + + % Evaluate loss function + L = sum((H.^p).*R, 2).^(1/p); + L = 1/n*sum(rho.*L); + fprintf("L after rho: %.16f\t", L); + fprintf("reg: %.16f\t", trace(V'*J*V)); + L = L + lambda*trace(V'*J*V); + fprintf("L with reg: %.16f\n", L); +end + +function R = categorymat(y, K) + I = eye(K); + n = length(y); + R = zeros(n, K); + for ii = 1:n + R(ii, :) = I(y(ii, 1), :); + end + R = ~logical(R); + +end + +function U = SimplexGen(K) + U = zeros(K, K-1); + for ii=1:K + for jj=1:K-1 + if ii<=jj + U(ii,jj) = -1/sqrt(2*(jj^2 + jj)); + elseif ii==jj+1 + U(ii,jj) = jj/sqrt(2*(jj^2 + jj)); + end + end + end +end + +function [newy, K] = fix_labels(y) +% % This function fixes the labels in y such that the labels become +% % consecutive. +% % Thus if we have y = [1, 1, 5, 2, 2, 5] the vector returned +% % will be [1, 1, 3, 2, 2, 3] + + u = unique(y); + K = length(u); + newy = zeros(length(y), 1); + + for ii = 1:K + idx = find(y == u(ii)); + for jj = idx + newy(jj) = ii; + end + end + +end |
