load Dataset % question 1: experiment with Data h1 = figure; gscatter(Dataset(:,1),Dataset(:,2),Dataset(:,3),'br','xo') h2 = figure; boxplot (Dataset) x1_min = min(Dataset(:, 1)) x1_max = max(Dataset(:, 1)) x2_min = min(Dataset(:, 2)) x2_max = max(Dataset(:, 2)) x1_mean = mean(Dataset(:, 1)) x2_mean = mean(Dataset(:, 2)) x1_median = median(Dataset(:, 1)) x2_median = median(Dataset(:, 2)) x1_mode = mode(Dataset(:, 1)) x2_mode = mode(Dataset(:, 2)) x1_skewness = skewness (Dataset(:, 1)) x2_skewness = skewness (Dataset(:, 2)) x1_kurtosis = kurtosis (Dataset(:, 1)) x2_kurtosis = kurtosis (Dataset(:, 2)) ZScores = zscore(Dataset(:, 1:2)); ZScoresLess3 = find(abs(ZScores)>3); [x1_m_hat, x1_S_hat]=Gaussian_ML_estimate(Dataset(:, 1)); [x2_m_hat, x2_S_hat]=Gaussian_ML_estimate(Dataset(:, 2)); h3 = histfit(Dataset(:, 1)) h4 = histfit(Dataset(:, 2)) h5 = figure; probplot('normal',Dataset(:, 1)) h6 = figure; probplot('normal',Dataset(:, 2)) % Question 2: use fitdist function to derive the probability density % function of the data pd = fitdist(Dataset(:, 1), 'ExtremeValue') pd = fitdist(Dataset(:, 2), 'ExtremeValue') pd = fitdist(Dataset(:, 1), 'GeneralizedExtremeValue') pd = fitdist(Dataset(:, 2), 'GeneralizedExtremeValue') pd = fitdist(Dataset(:, 1), 'Logistic') pd = fitdist(Dataset(:, 2), 'Logistic') pd = fitdist(Dataset(:, 1), 'Normal') pd = fitdist(Dataset(:, 2), 'Normal') pd = fitdist(Dataset(:, 1), 'Rayleigh') pd = fitdist(Dataset(:, 2), 'Rayleigh') pd = fitdist(Dataset(:, 1), 'tLocationScale') pd = fitdist(Dataset(:, 2), 'tLocationScale') pd_x1 = fitdist(Dataset(:, 1), 'Kernel') pd_x2 = fitdist(Dataset(:, 2), 'Kernel') [pdf_x1,x1_i, bw_x1] = ksdensity(Dataset(:,1)) x1_f = fit (x1_i', pdf_x1', 'poly6') x1_ci = confint(x1_f) [pdf_x2,x2_i, bw_x2] = ksdensity(Dataset(:,2)) x2_f = fit (x2_i', pdf_x2', 'poly9') x2_ci = confint(x2_f) xtrain = Dataset(1:80,1:2) ytrain = Dataset(1:80, 3) xtest = Dataset(81:100,1:2) ks_width = [pd_x1.BandWidth pd_x2.BandWidth] NBModal = fitNaiveBayes (xtrain, ytrain, 'Distribution', 'kernel', 'KSWidth', ks_width) ytest = NBModal.predict(xtest) %compute the misclassification error (the proportion of misclassified observations) on the training set. bad = ~strcmp(NBModal.predict(xtrain),ytrain); NBResubErr = sum(bad) / 80 % Estimate the true test error for NB using 10-fold stratified cross-validation. %First use cvpartition to generate 10 disjoint stratified subsets. cp = cvpartition(ytrain,'k',10) NBClassFun = @(xtrain,ytrain,xtest)(NBModal.predict(xtest)); NBCVErr = crossval('mcr',xtrain,ytrain,'predfun',NBClassFun,'partition',cp) % question 3: Use Basic FItting Tool to derive the equation that fits the % data, using Basic fitting tool ldaClass = classify(xtrain,xtrain,ytrain); %compute the misclassification error (the proportion of misclassified observations) on the training set. bad = ~strcmp(ldaClass,ytrain); ldaResubErr = sum(bad) / 80 qdaClass = classify(xtrain,xtrain,ytrain,'quadratic'); bad_quad = ~strcmp(qdaClass,ytrain); qdaResubErr = sum(bad) / 80 % Estimate the true test error for LDA using 10-fold stratified cross-validation. %First use cvpartition to generate 10 disjoint stratified subsets. cp = cvpartition(ytrain,'k',10) ldaClassFun = @(xtrain,ytrain,xtest)(classify(xtest,xtrain,ytrain)); ldaCVErr = crossval('mcr',xtrain,ytrain,'predfun',ldaClassFun,'partition',cp) qdaClassFun = @(xtrain,ytrain,xtest)(classify(xtest,xtrain,ytrain,'quadratic')); qdaCVErr = crossval('mcr',xtrain,ytrain,'predfun', qdaClassFun,'partition',cp) %The Linear Kernel SVM classifier: SVMStruct = svmtrain(xtrain,ytrain) ytest = svmclassify(SVMStruct , xtest ) bad = ~strcmp(svmclassify(SVMStruct , xtrain),ytrain) SVMResubErr = sum(bad) / 80 cp = cvpartition(ytrain,'k',10) SVMClassFun = @(xtrain,ytrain,xtest)(svmclassify(SVMStruct ,xtest)) SVMCVErr = crossval('mcr',xtrain,ytrain,'predfun',SVMClassFun,'partition',cp) %The Quadratic Kernel SVM classifier: SVMStruct = svmtrain(xtrain,ytrain, 'kernel_function','quadratic') ytest = svmclassify(SVMStruct , xtest ) bad = ~strcmp(svmclassify(SVMStruct , xtrain),ytrain) Q_SVMResubErr = sum(bad) / 80 cp = cvpartition(ytrain,'k',10) SVMClassFun = @(xtrain,ytrain,xtest)(svmclassify(SVMStruct ,xtest)) Q_SVMCVErr = crossval('mcr',xtrain,ytrain,'predfun',SVMClassFun,'partition',cp) %Non-Linear Classifiers %The Polynomial Kernel SVM classifier: SVMStruct = svmtrain(xtrain,ytrain, 'kernel_function','polynomial') ytest = svmclassify(SVMStruct , xtest ) bad = ~strcmp(svmclassify(SVMStruct , xtrain),ytrain) P_SVMResubErr = sum(bad) / 80 cp = cvpartition(ytrain,'k',10) SVMClassFun = @(xtrain,ytrain,xtest)(svmclassify(SVMStruct ,xtest)) P_SVMCVErr = crossval('mcr',xtrain,ytrain,'predfun',SVMClassFun,'partition',cp) %The RBF Kernel SVM classifier: SVMStruct = svmtrain(xtrain,ytrain, 'kernel_function','rbf') ytest = svmclassify(SVMStruct , xtest ) bad = ~strcmp(svmclassify(SVMStruct , xtrain),ytrain) R_SVMResubErr = sum(bad) / 80 cp = cvpartition(ytrain,'k',10) SVMClassFun = @(xtrain,ytrain,xtest)(svmclassify(SVMStruct ,xtest)) R_SVMCVErr = crossval('mcr',xtrain,ytrain,'predfun',SVMClassFun,'partition',cp) %The Multilayer Perceptron Kernel SVM classifier: SVMStruct = svmtrain(xtrain,ytrain, 'kernel_function','mlp') bad = ~strcmp(svmclassify(SVMStruct , xtrain),ytrain) MP_SVMResubErr = sum(bad) / 80 cp = cvpartition(ytrain,'k',10) SVMClassFun = @(xtrain,ytrain,xtest)(svmclassify(SVMStruct ,xtest)) MP_SVMCVErr = crossval('mcr',xtrain,ytrain,'predfun',SVMClassFun,'partition',cp) %Anotherway to SVM: SVMModel1 = fitcsvm(xtrain,ytrain,'KernelFunction','polynomial','Standardize',true); % Compute the scores over a grid d = 0.02; % Step size of the grid [x1Grid,x2Grid] = meshgrid(min(xtrain(:,1)):d:max(xtrain(:,1)),... min(xtrain(:,2)):d:max(xtrain(:,2))); xGrid = [x1Grid(:),x2Grid(:)]; % The grid [~,scores1] = predict(SVMModel1,xGrid); % The scores figure; h(1:2) = gscatter(xtrain(:,1),xtrain(:,2),ytrain) hold on h(3) = plot(xtrain(SVMModel1.IsSupportVector,1),... xtrain(SVMModel1.IsSupportVector,2),'ko','MarkerSize',10); % Support vectors contour(x1Grid,x2Grid,reshape(scores1(:,2),size(x1Grid)),[0 0],'k'); % Decision boundary title('Scatter Diagram with the Decision Boundary') legend({'-1','1','Support Vectors'},'Location','Best'); hold off