% This uses softmax outputs and a Cross-Entropy error function % You must define the following global variables % data (must be a matrix with one row per training case). % targets (must be a matrix with one row per training case). % testdata (must be a matrix with one row per training case). % testtargets (must be a matrix with one row per training case). % restart (set it to 1 to initialize the weights & epoch number). % maxepoch (set it to be the number of training epochs. % Increase maxepoch to continue training). %%%%%%% GLOBAL VARIABLES: SET BY EDITING THIS FILE %%% initialweightsize = .01; errorprintfreq = 10; %how often to show the error. weightprintfreq = 500; %how often to show the weights. %%%%%%% END OF GLOBAL VARIABLES FOR YOU TO SET %%%%% fprintf(1, 'numhid=%3i initw= %1.3f epsilon= %1.4f \n', ... numhid, initialweightsize, epsilon); numcases = size(data,1); numin = size(data,2); numout = size(targets,2); tiny=exp(-200); % will be used to prevent divide by zero %%%%% BEGIN INITIALIZATION %%%%%%% if restart==1 restart=0; epoch=1; inhid = initialweightsize*randn(numin, numhid); %inhid is the matrix of input to hidden weights. hidout = initialweightsize*randn(numhid, numout); hidbiases = initialweightsize*randn(1, numhid); outbiases = initialweightsize*randn(1, numout); end; %%%%% END INITIALIZATION %%%%%%%%% alltE=[]; %% will be used to keep record of test cost allterrors=[]; %% will be used to keep record of number of test errors for epoch=epoch:maxepoch %%%%%% BEGIN FORWARD PASS %%%%%%%%% % First we do forward passes for all of the cases in % parallel by using one case per row in various matrices. hidsum = data*inhid + repmat(hidbiases, numcases, 1); % repmat(M,V,H) replicates M, V times Vertically % and H times Horizontally hidacts = 1./(1+exp(-hidsum)); % the ./ makes matlab replicate across all elements of hidsum, % so each row of hidacts will be the activity of % all hidden units for a particular training case. outsum = hidacts*hidout + repmat(outbiases, numcases, 1); %now we do a softmax on the outsums unnormalisedoutputs = exp(outsum); rowsums=sum(unnormalisedoutputs,2); outputs=unnormalisedoutputs./repmat(rowsums,1,numout); %%%%%% END FORWARD PASS %%%%%%%%% residuals = outputs-targets; dEbydoutsum = residuals; %the error derivatives with respect to the input to each output unit. E = - sum(sum(targets.*log(tiny+outputs))); if rem(epoch,errorprintfreq)==0 guesses = (outputs - repmat( (max(outputs'))', 1, numout)) >=0; errors = sum(sum(targets))-sum(sum(targets.*guesses)); fprintf(1, 'epoch= %5i , errs %3i, E=%6.3f ',... epoch, errors, E); %% three dots are used to continue the line of matlab code. [tnumcases numin] = size(testdata); numout = size(testtargets,2); %%%%%% BEGIN FORWARD PASS FOR TEST CASES%%%%%%%%% thidsum = testdata*inhid + repmat(hidbiases, tnumcases, 1); thidacts = 1./(1+exp(-thidsum)); toutsum = thidacts*hidout + repmat(outbiases, tnumcases, 1); tunnormalisedoutputs = exp(toutsum); trowsums=sum(tunnormalisedoutputs,2); toutputs=tunnormalisedoutputs./repmat(trowsums,1,numout); %%%%%% END FORWARD PASS FOR TEST CASES %%%%%%%%% tresiduals = toutputs-testtargets; tE = - sum(sum(testtargets.*log(tiny+toutputs))); alltE=[alltE;tE]; %%inefficient but easy tp program; testguesses = (toutputs - repmat( (max(toutputs'))', 1, numout)) >= 0; terrors = sum(sum(testtargets))... -sum(sum(testtargets.*testguesses)); allterrors = [allterrors;terrors]; fprintf(1, 'terrs= %3i, tE=%6.5f \n',... terrors, tE); end; %%%%%% BEGIN BACKWARD PASS %%%%%% dEbydhidacts = dEbydoutsum*(hidout'); dEbydhidsum = dEbydhidacts.*hidacts.*(1-hidacts); dEbydoutbiases = sum(dEbydoutsum); % sums the bias derivatives over all cases dEbydhidout = hidacts'*dEbydoutsum; %derivatives for hidden-to-output weights dEbydhidbiases = sum(dEbydhidsum); dEbydinhid = data'*dEbydhidsum; %%%%%% END BACKWARD PASS %%%%%%% %%%%%% UPDATE WEIGHTS AND BIASES %%%%%% inhid = inhid - epsilon*dEbydinhid; hidout = hidout - epsilon*dEbydhidout; hidbiases = hidbiases - epsilon*dEbydhidbiases; outbiases = outbiases - epsilon*dEbydoutbiases; if rem(epoch,weightprintfreq)==0 showweights(inhid,hidout); drawnow; end; end; figure(3); clf; plot(alltE); title(sprintf('cross-entropy test error with numhid= %3i', numhid)); xlabel(sprintf('epochs times %3i',errorprintfreq)); legend(sprintf('min value %4.0f',min(alltE))); %% x axis is in units of epochs/errorprintfreq. figure(4); clf; numerrs=size(allterrors,1); clippedterrors=min(allterrors',800*ones(1,numerrs))'; %% clips values greater than 800 to expand vertial scale. plot(clippedterrors); title(sprintf('number of test errors with numhid= %3i', numhid)); xlabel(sprintf('epochs times %3i',errorprintfreq)); legend(sprintf('min value %4i',min(clippedterrors))); drawnow;