In [12]:

```
from sklearn import ensemble
from sklearn import metrics
import numpy as np
import csv
```

In [13]:

```
def readFileThroughCSV(filename):
csvfile = open(filename)
# creating a csv reader object
readerobject = csv.reader(csvfile, delimiter=',')
lst = list(readerobject)
csvfile.close()
# removing first row from list
lst = lst[1:]
arr = np.array(lst)
data = arr.astype(float)
# extract last column which is classification label
c = data[:,-1]
# extract remaining data
d = data[:,1:-1]
return(c,d)
```

In [14]:

```
(c,d) = readFileThroughCSV("loanacceptance.csv")
# shape of the variables
print(c.shape)
print(d.shape)
```

In [7]:

```
# Note that only 80% of the dataset is being used for training
clf = ensemble.RandomForestClassifier(n_estimators=10)
clf.fit(X=d[0:400,:],y=c[0:400])
```

Out[7]:

In [8]:

```
# returns accuracy
print("Training accuracy",clf.score(X=d[:400,:],y=c[:400]))
# for decision trees clf.score returns the R-squared value (it can be negative as well in case of bad performance)
print("R-square accuracy",clf.score(X=d[400:,:],y=c[400:]))
indices = range(400,500)
c_predicted = clf.predict(d[indices,:])
print("Testing accuracy",metrics.accuracy_score(c[indices],c_predicted))
```

In [9]:

```
m = metrics.confusion_matrix(c[indices],c_predicted)
print(m)
```