IRIS ML Project
Using the IRIS dataset to estimate the class of the IRIS using different ML Algorithms.
| import pandas | |
| from pandas.plotting import scatter_matrix | |
| import matplotlib.pyplot as plt | |
| from sklearn import model_selection | |
| from sklearn.metrics import classification_report | |
| from sklearn.metrics import confusion_matrix | |
| from sklearn.metrics import accuracy_score | |
| from sklearn.linear_model import LogisticRegression | |
| from sklearn.tree import DecisionTreeClassifier | |
| from sklearn.neighbors import KNeighborsClassifier | |
| from sklearn.discriminant_analysis import LinearDiscriminantAnalysis | |
| from sklearn.naive_bayes import GaussianNB | |
| from sklearn.svm import SVC | |
| url = 'https://archive.ics.uci.edu/ml/machine-learning-databases/iris/iris.data' | |
| names = ['sepal length','sepal width','petal length','petal width','class'] | |
| dataset = pandas.read_csv(url,names=names) | |
| print(dataset.shape) #print rows and cols | |
| print(dataset.head(10)) #print first 10 rows | |
| print(dataset.describe()) #prints values or ranges of the dataset | |
| print(dataset.groupby('class').size()) #prints the class object attributes and size of it | |
| #plots | |
| dataset.plot(kind = 'box', subplots = True, layout=(2,2),sharex=False,sharey=False) | |
| plt.title('MY IRIS PLOT') | |
| plt.show() | |
| dataset.hist() | |
| plt.show() | |
| scatter_matrix(dataset) | |
| plt.show() | |
| array = dataset.values | |
| X = array[:,0:4] | |
| Y = array[:,4] | |
| validation_size = 0.20 | |
| seed = 6 | |
| X_train, X_test, Y_train, Y_test = model_selection.train_test_split(X,Y, test_size=validation_size, random_state = seed) | |
| seed =6 | |
| scoring = 'accuracy' | |
| #spot check algorithms | |
| models= [] | |
| models.append(('LR', LogisticRegression())) | |
| models.append(('CART', DecisionTreeClassifier())) | |
| models.append(('KNN', KNeighborsClassifier())) | |
| models.append(('LDA', LinearDiscriminantAnalysis())) | |
| models.append(('NB', GaussianNB())) | |
| models.append(('SVM', SVC())) | |
| #evaluate each model in turn | |
| results= [] | |
| names = [] | |
| for name,model in models: | |
| kfold = model_selection.KFold(n_splits=10,random_state =seed) | |
| cv_results = model_selection.cross_val_score(model, X_train,Y_train, cv=kfold, scoring=scoring) | |
| results.append(cv_results) | |
| names.append(name) | |
| msg = "%s: %f (%f)" % (name, cv_results.mean(), cv_results.std()) | |
| print(msg) |
Comments
Post a Comment