Comparing different classifiers on the digits dataset

from sklearn.datasets import load_digits from sklearn.linear_model import SGDClassifier, RidgeClassifier from sklearn.ensemble import RandomForestClassifier, AdaBoostClassifier, VotingClassifier from sklearn.metrics import accuracy_score, confusion_matrix import matplotlib.pyplot as plt def plot_confusion_matrix(cnf_matrix, classifier_type): tick_marks = range(len(classes)) plt.title('Confusion matrix for ' + classifier_type) plt.imshow(cnf_matrix, interpolation='nearest', cmap=plt.cm.Blues) plt.colorbar() plt.xticks(tick_marks, classes) plt.yticks(tick_marks, classes) plt.show() digits = load_digits() X, y = digits.data, digits.target classes = digits.target_names sample_count = len(X) boundary = int(0.8 * sample_count) X_train, X_test = X[:boundary], X[boundary:] y_train, y_test = y[:boundary], y[boundary:] classifier_types = ['Stochastic gradient descent', 'Ridge', 'Random forest', 'AdaBoost', 'Voting'] sgd = SGDClassifier() rdg = RidgeClassifier() rfc = RandomForestClassifier(random_state=1) abc = AdaBoostClassifier(learning_rate=0.1) vc = VotingClassifier(estimators=[('sgd', sgd), ('rdg', rdg), ('rfc', rfc), ('abc', abc)]) classifiers = [sgd, rdg, rfc, abc, vc] for i, classifier_type in enumerate(classifier_types): cls = classifiers[i] cls.fit(X_train, y_train) y_pred = cls.predict(X_test) print(classifier_type + ' classifier accuracy: {0:.3f}'.format(accuracy_score(y_test, y_pred))) cnf_matrix = confusion_matrix(y_test, y_pred) plot_confusion_matrix(cnf_matrix, classifier_type) """ Stochastic gradient descent classifier accuracy: 0.886 Ridge classifier accuracy: 0.858 Random forest classifier accuracy: 0.878 AdaBoost classifier accuracy: 0.769 Voting classifier accuracy: 0.894 Results across runs may vary. """