%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% % CSC 411: Machine Learning and Data Mining % Tutorial 2 (January 26th):Example for Decision tree algorithm % Data example is referred from http://decisiontrees.net/?q=node/16 % %Another short good Matlab tutorial: % http://cs.gmu.edu/~kosecka/cs803/MatlabTutorialCode.html %For complete Matlab references: % http://www.mathworks.com/access/helpdesk/help/techdoc/matlab.html % % Rui Yan < ruiyan AT cs DOT toronto DOT edu >,January 26th 2007. %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% clear; %Entropy for the response E_S = -9/14*log2(9/14)-5/14*log2(5/14) %%%%%%%%%%%%%%Entropy and Gain Value for 'District' Attribute%%%%%%%%% Gain=[0 0 0 0]; %Gain vector for holding the gain values for the four attributes Entropy=zeros(4,3); %Entropy matrix for holding all the entropy values. 4X3 matrix %Entropy for the District = Surburban Entropy(1,1)=-0.6*log2(0.6)-0.4*log2(0.4) %Entropy for the District = Rural Entropy(1,2)=-log2(1) %Entropy for the District = Urban Entropy(1,3)=-0.6*log2(0.6)-0.4*log2(0.4) %Gain value for 'District' attribute Gain(1) = E_S-(5/14 * Entropy(1,1) +5/14 * Entropy(1,2) +4/14 *Entropy(1,3)) %%%%%%%%%%%%%%Entropy and Gain Value for 'House Type' Attribute%%%%%%%%% %Entropy for the House Type = Detached Entropy(2,1)=-0.5*log2(0.5)-0.5*log2(0.5) %Entropy for the House Type = Semi-detached Entropy(2,2)=-0.8*log2(0.8)-0.2*log2(0.2) %Entropy for the Houes Type = Terrace Entropy(2,3)=-0.6*log2(0.6)-0.4*log2(0.4) %Gain value for 'House Type' attribute Gain(2) = E_S-(4/14 * Entropy(2,1) +5/14 * Entropy(2,2) +5/14 *Entropy(2,3)) %%%%%%%%%%%%%%Entropy and Gain Value for 'Income' Attribute%%%%%%%%% %Entropy for the Income = High Entropy(3,1)=-4/7*log2(4/7)-3/7*log2(3/7) %Entropy for the Income = Low Entropy(3,2)=-1/7*log2(1/7)-6/7*log2(6/7) %Gain value for 'Income' attribute Gain(3)= E_S-(7/14 * Entropy(3,1) +7/14 * Entropy(3,2)) %%%%%%%%%%%%%%Entropy and Gain Value for 'Previous Customers' Attribute%%%%%% %Entropy for the Previous Customers = No Entropy(4,1)=-2/8*log2(2/8)-6/8*log2(6/8) %Entropy for the Previous Customers = Responded Entropy(4,2)=-3/6*log2(3/6)-3/6*log2(3/6) %Gain value for 'Previous Customers' attribute Gain(4) = E_S-(8/14 * Entropy(4,1) +6/14 * Entropy(4,2)) %sort Gain in ascending order [ordered_Gain, index_Gain]=sort(Gain); if index_Gain(4)==1 disp('The ''District'' attribute has the largest Gain value'); elseif index_Gain(4)==2 disp('The ''House Type'' attribute has the largest Gain value'); elseif index_Gain(4)==3 disp('The ''Income'' attribute has the largest Gain value'); else disp('The ''Previous Customers'' attribute has the largest Gain value'); end %draw a figure figure; bar(Gain'); xlabel('Four Attributes: District, House Type,Income, and Previous Customers'); ylabel('Gain value'); legend('District', 'House Type', 'Income', 'Previous Customers'); title('Gain value comparisons'); %save the matlab workspace save decision_tree.mat