Understand AIML Decision Tree
In [6]:
#@title Run this cell to complete the setup for this Notebook
from IPython import get_ipython
ipython = get_ipython()
notebook="M0W2_EXP_1_Decision_Tree_Zoo" #name of the notebook
def setup():
# ipython.magic("sx pip3 install torch")
ipython.magic("sx wget https://cdn.talentsprint.com/aiml/Experiment_related_data/Zoo_New.csv")
ipython.magic("sx apt-get install graphviz")
ipython.magic("sx pip install graphviz")
print ("Setup completed successfully")
return
def submit_notebook():
ipython.magic("notebook -e "+ notebook + ".ipynb")
import requests, json, base64, datetime
url = "https://dashboard.talentsprint.com/xp/app/save_notebook_attempts"
if not submission_id:
data = {"id" : getId(), "notebook" : notebook, "mobile" : getPassword()}
r = requests.post(url, data = data)
r = json.loads(r.text)
if r["status"] == "Success":
return r["record_id"]
elif "err" in r:
print(r["err"])
return None
else:
print ("Something is wrong, the notebook will not be submitted for grading")
return None
elif getAnswer() and getComplexity() and getAdditional() and getConcepts():
f = open(notebook + ".ipynb", "rb")
file_hash = base64.b64encode(f.read())
data = {"complexity" : Complexity, "additional" :Additional,
"concepts" : Concepts, "record_id" : submission_id,
"answer" : Answer, "id" : Id, "file_hash" : file_hash,
"notebook" : notebook}
r = requests.post(url, data = data)
r = json.loads(r.text)
print("Your submission is successful.")
print("Ref Id:", submission_id)
print("Date of submission: ", r["date"])
print("Time of submission: ", r["time"])
print("For any queries/discrepancies, please connect with mentors through the chat icon in LMS dashboard.")
return submission_id
else: submission_id
def getAdditional():
try:
if Additional: return Additional
else: raise NameError('')
except NameError:
print ("Please answer Additional Question")
return None
def getComplexity():
try:
return Complexity
except NameError:
print ("Please answer Complexity Question")
return None
def getConcepts():
try:
return Concepts
except NameError:
print ("Please answer Concepts Question")
return None
def getAnswer():
try:
return Answer
except NameError:
print ("Please answer Question")
return None
def getId():
try:
return Id if Id else None
except NameError:
return None
def getPassword():
try:
return password if password else None
except NameError:
return None
submission_id = None
### Setup
if getPassword() and getId():
submission_id = submit_notebook()
if submission_id:
setup()
else:
print ("Please complete Id and Password cells before running setup")
In [0]:
import pandas as pd
import numpy as np
import graphviz
from sklearn.tree import export_graphviz
In [0]:
#Import all columns omitting the fist which consists the names of the animals
dataset = pd.read_csv('Zoo_New.csv',
names=['animal_name','hair','feathers','eggs','milk',
'airbone','aquatic','predator','toothed','backbone',
'breathes','venomous','fins','legs','tail','domestic','catsize','class',])
#We don't use animal name for classification because it is just a string stating the
#name and it won't provide any extra information for classification in this context
dataset=dataset.drop('animal_name',axis=1)
In [29]:
dataset.head()
Out[29]:
In [9]:
np.unique(dataset['class'].values)
Out[9]:
In [10]:
dataset['class']
Out[10]:
In [11]:
dataset.shape
Out[11]:
In [0]:
def train_test_split(dataset):
training_data = dataset.iloc[:80].reset_index(drop=True)#We drop the index respectively relabel the index
#starting form 0, because we do not want to run into errors regarding the row labels / indexes
testing_data = dataset.iloc[80:].reset_index(drop=True)
return training_data,testing_data
training_data = train_test_split(dataset)[0]
testing_data = train_test_split(dataset)[1]
In [0]:
training_data = training_data.values
In [0]:
testing_data = testing_data.values
In [0]:
from sklearn import tree
In [0]:
clf = tree.DecisionTreeClassifier()
In [0]:
clf = clf.fit(training_data[:,:15],training_data[:,16])
In [0]:
pred = clf.predict(testing_data[:,:15])
In [0]:
from sklearn.metrics import accuracy_score
In [20]:
accuracy_score(testing_data[:,16], pred)
Out[20]:
In [21]:
import os
save_dot = "output" + ".dot"
save_png = "output"+ ".png"
graph = graphviz.Source(export_graphviz(clf, out_file=save_dot, filled = True,feature_names=(list(dataset.columns))[:-2]))
os.system("dot -T png -o " + save_png + " " + save_dot)
Out[21]:
In [22]:
import matplotlib.pyplot as plt
plt.figure(figsize=(20,20))
plt.grid(False)
plt.imshow(plt.imread(save_png)),
plt.show()
In [23]:
def feature_importance_chart(clf, classifier_name, feature_names):
sorted_feature_importances, sorted_feature_names = (
zip(*sorted(zip(clf.tree_.compute_feature_importances(normalize=False), feature_names)))
)
plt.figure(figsize=(16, 9))
plt.barh(range(len(sorted_feature_importances)), sorted_feature_importances)
plt.yticks(
range(len(sorted_feature_importances)),
["{}: {:.3}".format(a, b) for a, b in zip(sorted_feature_names, sorted_feature_importances)]
)
plt.title("Feature importance for the tree")
plt.show()
feature_importance_chart(clf, "simple tree", list(dataset.columns))
In [0]:
#### Your code here
def train_test_split(dataset):
training_data = dataset.iloc[:50].reset_index(drop=True)#We drop the index respectively relabel the index
#starting form 0, because we do not want to run into errors regarding the row labels / indexes
testing_data = dataset.iloc[50:].reset_index(drop=True)
return training_data,testing_data
training_data = train_test_split(dataset)[0]
testing_data = train_test_split(dataset)[1]
In [0]:
training_data = training_data.values
testing_data = testing_data.values
In [26]:
from sklearn import tree
clf = tree.DecisionTreeClassifier()
clf = clf.fit(training_data[:,:15],training_data[:,16])
pred = clf.predict(testing_data[:,:15])
from sklearn.metrics import accuracy_score
accuracy_score(testing_data[:,16], pred)
Out[26]:
In [27]:
import os
save_dot = "output" + ".dot"
save_png = "output"+ ".png"
graph = graphviz.Source(export_graphviz(clf, out_file=save_dot, filled = True,feature_names=(list(dataset.columns))[:-2]))
os.system("dot -T png -o " + save_png + " " + save_dot)
Out[27]:
In [28]:
import matplotlib.pyplot as plt
plt.figure(figsize=(20,20))
plt.grid(False)
plt.imshow(plt.imread(save_png)),
plt.show()
Comments
Post a Comment