from sklearn import tree
from sklearn import metrics
import numpy as np
import csv
import csv
def readFileThroughCSV(filename):
csvfile = open(filename)
readerobject = csv.reader(csvfile, delimiter=',')
lst = list(readerobject)
csvfile.close()
# removing first row from list
lst = lst[1:]
arr = np.array(lst)
data = arr.astype(float)
# extract first column which is classification
c = data[:,0]
# extract remaining data
d = data[:,1:]
return(c,d)
import pandas as pd
def readFileThroughPandas(filename):
c = pd.read_csv(filename, usecols = [0])
d = pd.read_csv(filename, usecols = np.arange(1,21))
cnum = c.values
dnum = d.values
#You may also use the following
#num = c.to_numpy()
#dnum = d.to_numpy()
cnum = cnum[:,0]
return(cnum,dnum)
(c,d) = readFileThroughPandas("multicommodity.csv")
#(c,d) = readFileThroughCSV("multicommodity.csv")
print(c.shape)
print(d.shape)
# Note that only 80% of the dataset is being used for training
clf = tree.DecisionTreeClassifier()
clf.fit(X=d[0:300,:],y=c[0:300])
# returns accuracy
print("Training accuracy",clf.score(X=d[:300,:],y=c[:300]))
print("Testing accuracy",clf.score(X=d[300:,:],y=c[300:]))
indices = range(300,500)
c_predicted = clf.predict(d[indices,:])
m = metrics.confusion_matrix(c[indices],c_predicted)
print(m)