function lez_weka() % do it only one time %javaaddpath('/Users/snrxe/PHD/VideogamesWk/weka-3-6-1/weka.jar') close all; %Generate simulated data % TWO class problem % class1 N(M1, S1) % class2 N(M2, S2) S1= [1,0.9; 0.9,2]; S2 = [6,0.2; 0.2,2]; M1 = [5,8]; M2 = [2,7]; % number of sample fro each class N= 2* n total number of sample n= 100; X = [mvnrnd(M1,S1,n); mvnrnd(M2,S2,n)]; C = [zeros(n,1)+1;zeros(n,1)+2]; % plotting distribution of data plot_distribution2(X,[1,2],C); % K-fold cross-validation --------------------------------- K = 10; dim = 2*n/10; for k=1:K; % selct training data and validation data %train selection val_sel = zeros(1,2*n); val_sel(dim * (k-1)+1: dim*k) = 1; val_sel=logical(val_sel); %validation selection train_sel = ~val_sel; Xtr = X(train_sel,:); Ctr = C(train_sel); Xval = X(val_sel,:); Cval = C(val_sel); % define a classifier K-NN K=5 (NOTE this K is totally differen from the number K of folds!!!!) classifier = {'weka.classifiers.lazy.IBk', {'-K','5','-W','0'}}; classifier2 = {'weka.classifiers.lazy.IBk', {'-K','5','-W','0'}}; % build dataset in Weka format train = matlab2weka('tr', {'feat1','feat2','class'}, [Xtr, Ctr]); validation = matlab2weka('vl', {'feat1','feat2','class'}, [Xval, Cval]); %train classifier on training data [cl] = train_weka_classif_affective( train, [], classifier{1,1}, classifier{1,2} ); [cl2] = train_weka_classif_affective( train, [], classifier2{1,1}, classifier2{1,2} ); %Evaluate the classifier on validation data %confusion matrix m m = zeros(2,2); m2 = zeros(2,2); for t = 0:validation.numInstances -1 class_dist(t+1,:) = cl.distributionForInstance(validation.instance(t)); pred = round(class_dist(t+1)); m(Cval(t+1),pred) = m(Cval(t+1),pred)+1; class_dist2(t+1,:) = cl2.distributionForInstance(validation.instance(t)); pred2 = round(class_dist2(t+1)); m2(Cval(t+1),pred2) = m2(Cval(t+1),pred2)+1; end % compute perfromance as Correct classification rate ccr(k) = sum(diag(m)/dim); ccr2(k) = sum(diag(m2)/dim); end n=2*n; % Evaluate confidence intervals ccr(k) xbar = mean(ccr); % ccr mean s = std(ccr); % ccr standard deviation gamma = 0.95; % P(-A< T < A) =0.95 t = tinv((1+gamma)/2,n-1); %correspond to P(T <= A) =(1+gamma)/2 lb = xbar-t*s/sqrt(n); ub = xbar+t*s/sqrt(n); fprintf('Final performance classifier 1: %f <= CCR <= %f\n', lb, ub); % Evaluate confidence intervals ccr2(k) xbar = mean(ccr2); % ccr mean s = std(ccr2); % ccr standard deviation gamma = 0.95; % P(-A< T < A) =0.95 t = tinv((1+gamma)/2,n-1); %correspond to P(T <= A) =(1+gamma)/2 lb = xbar-t*s/sqrt(n); ub = xbar+t*s/sqrt(n); fprintf('Final performance classifier 2: %f <= CCR <= %f\n', lb, ub); %%Are the two classifier different? xbar = mean(ccr) ybar = mean(ccr2) sx = var(ccr); sy = var(ccr2); %significance of test alpha = 0.05; t = tinv(1-alpha/2,n-1); s = sqrt((sx+sy)/n); fprintf('t=%.3f\n', t); fprintf('s=%.3f\n', s); fprintf('xbar-ybar = %f\n', xbar-ybar); lb = -t*sqrt(s/n); ub = t*sqrt(s/n); fprintf('IC = (%.3f,%.3f)\n', lb,ub); if (abs(xbar-ybar)/sqrt(s/n)>= t) fprintf('H_0 rejected, Classifier are not equal!\n') else fprintf('H_0 accepted, Classifier are equal \n') end