function [ww,vv,yy,trerr,tsterr]= ... trainmlp(xtrn,ytrn,xtst,ytst,ww,vv,maxiters,alpha) % [ww,vv,yy,trainerr,tsterr] = % trainmlp(xtrain,ytrain,xtest,ytest,ww,vv,maxiters,alpha) % % % trains a one-hidden layer neural network with % inputs xtrain, desired outputs ytrain, % input-to-hidden weights ww and hidden-to-output weights vv % % uses gradient descent with momentum and calls bprop to compute % gradients efficiently % % if present, the test error on inputs xtest and outputs ytest % is monitored during training, but not used to adjust weights % (you can pass these as empty matrices [] if you want) % % ww is a d+1 by H matrix, each column gives the weights into one hidden unit % the last row of ww are the hidden biases % vv is a H+1 by D matrix, each column gives the weights into one output unit % the last row of ww are the output biases % % xtrain is a d by N matrix, each column is an input case % % ytrain is a D by N matrix, each column is an output case % % an example training might do like this: % % H=12; ww0=1e-4*randn(size(xtrn,1)+1,H); vv0=1e-4*randn(H+1,size(ytrn,1)); % [ww,vv,yy,trainerr,testerr] = trainmlp(xtrn,ytrn,xtst,ytst,ww0,vv0,1e4,0.1) more off; [xdim,N]=size(xtrn); [ydim,N2]=size(ytrn); [xd1,H]=size(ww); [h1,yd1]=size(vv); %assert(N==N2);assert(xd1==(xdim+1));assert(yd1==(ydim+1));assert(h1==(H+1)); if(nargin<8 | isempty(alpha)) alpha=.95; end % momentum if(nargin<7 | isempty(maxiters)) maxiters=1e4; end % maximum accepted steps fprintf(1,'-->training with %d inputs, %d outputs, %d examples\n',xdim,ydim,N); fprintf(1,'-->using %d hiddens, momentum=%g, maxiters=%d\n',H,alpha,maxiters); eta=median(abs([ww(:);vv(:)]))/100; % initial stepsize iter=1; errold=Inf; dw=zeros(size(ww)); dv=zeros(size(vv)); dedw=zeros(size(ww)); dedv=zeros(size(vv)); while(eta>0 & iter<=maxiters) dw = -eta*(1-alpha)*dedw + alpha*dw; % weight updates with momentum dv = -eta*(1-alpha)*dedv + alpha*dv; ww = ww + dw; vv=vv+dv; [yy,thiserr,dedw,dedv] = bprop(ww,vv,xtrn,ytrn); % compute new grads,err if(thiserr>=errold) % reject this step eta=eta/2; % lower the stepsize ww=ww-dw; vv=vv-dv; % retract step dedw=dedwold; dedv=dedvold; % reset to old gradients dw=zeros(size(dw)); dv=zeros(size(dv)); % zero momemtum else % accept this step fprintf(1,'iter: %6d eta: %1.5e err: %1.5e diff: %1.5e\n',... iter,eta,thiserr,thiserr-errold); % print status eta=eta*1.05; errold=thiserr; % increase stepsize dedwold=dedw; dedvold=dedv; % remember gradients if(nargout>3) trerr(iter)=thiserr; end % record training error if((nargout>4) & ~isempty(ytst)) [junk,tsterr(iter)] = bprop(ww,vv,xtst,ytst); % compute test error end iter=iter+1; % increment iters end end