In [15]:

```
from sklearn import svm
from sklearn import preprocessing
from sklearn import metrics
import numpy as np
import csv
```

In [16]:

```
def readFileThroughCSV(filename):
csvfile = open(filename)
# creating a csv reader object
readerobject = csv.reader(csvfile, delimiter=',')
lst = list(readerobject)
csvfile.close()
# removing first row from list
lst = lst[1:]
arr = np.array(lst)
data = arr.astype(float)
# extract last column which is classification label
c = data[:,-1]
# extract remaining data
d = data[:,1:-1]
return(c,d)
```

In [31]:

```
import pandas as pd
def readFileThroughPandas(filename):
# Reads the entire data file
data = pd.read_csv(filename)
# Following command will read the columns 1 to 7 leaving out the 0th column
# data = pd.read_csv(filename, usecols = np.arange(1,8))
c = data["Loan Granted"]
d = data[["Marital Status","Kids","Annual Household Salary","Loan Amount","Car owner", "Education Level"]]
c = c.to_numpy()
d = d.to_numpy()
return(c,d)
```

In [33]:

```
(c,d) = readFileThroughPandas("loanacceptance.csv")
# shape of the variables
print(c.shape)
print(d.shape)
print(type(c))
print(type(d))
```

In [27]:

```
# Create an SVM classification object
# clf = svm.SVC(kernel='linear',C=1)
# Note that rbf is the default kernel in svm.SVC
# C is a regularization parameter to avoid overfitting
clf = svm.SVC(kernel='linear',C=1)
# Let us do a 0-1 scaling of our dataset because the attributes are of significantly different orders of magnitude
d = preprocessing.scale(d)
# Fitting SVM only on first 400 data points
clf.fit(X=d[0:400,:],y=c[0:400])
# In case of more than 2 classes, note that multiclass is done based on one-vs-one in svm.SVC
```

Out[27]:

In [28]:

```
# returns accuracy
print("Training accuracy",clf.score(X=d[:400,:],y=c[:400]))
print("Testing accuracy",clf.score(X=d[400:,:],y=c[400:]))
```

In [29]:

```
indices = range(400,500)
c_predicted = clf.predict(d[indices,:])
m = metrics.confusion_matrix(c[indices],c_predicted)
print(m)
```