In [16]:
import matplotlib.pyplot as plt
import numpy as np


## Read and plot the image¶

In [17]:
img = plt.imread("imagedata.jpg")
plt.axis('off')
plt.imshow(img)
plt.show()


## If you do not use the plot function then note that img is a three dimensional (RGB) object.¶

In [18]:
print(img.shape)
print(img)

(564, 400, 3)
[[[179 173 177]
[180 174 178]
[180 174 178]
...
[181 172 175]
[178 172 174]
[176 170 172]]

[[181 175 179]
[181 175 179]
[181 175 179]
...
[179 170 173]
[179 170 173]
[177 171 173]]

[[183 177 181]
[182 176 180]
[181 175 179]
...
[178 169 174]
[179 170 173]
[181 172 175]]

...

[[160 124 102]
[148 113  91]
[148 117  96]
...
[111  78  59]
[121  86  67]
[110  75  55]]

[[168 132 110]
[152 120  99]
[146 115  94]
...
[115  82  63]
[123  88  69]
[114  79  60]]

[[163 127 105]
[155 123 102]
[148 117  97]
...
[109  76  57]
[123  87  71]
[122  86  70]]]


## Reshape img into a matrix¶

In [19]:
#The array has 564 rows each of pixel 400x3.
#Reshape it into the form of a matrix that PCA can understand. # 1200 = 400 * 3

img_reshaped = np.reshape(img, (564, 1200))
print(img_reshaped.shape)
print(img_reshaped)

(564, 1200)
[[179 173 177 ... 176 170 172]
[181 175 179 ... 177 171 173]
[183 177 181 ... 181 172 175]
...
[160 124 102 ... 110  75  55]
[168 132 110 ... 114  79  60]
[163 127 105 ... 122  86  70]]


## Make the data centred at origin¶

In [20]:
img_mean = img_reshaped.mean(axis=0)
img_reshaped = img_reshaped-img_mean


## Apply Principal Component Analysis¶

In [21]:
from sklearn.decomposition import PCA

components=40
pca = PCA(n_components=components)
pca.fit(img_reshaped)

Out[21]:
PCA(copy=True, iterated_power='auto', n_components=40, random_state=None,
svd_solver='auto', tol=0.0, whiten=False)

### To get back from T coordinate system to W coordinate system one needs to do as follows, X=TW'. Note that if W is the complete matrix then X is the exact representation, otherwise not. Also remember to do the de-normalization of X to get to the original values.¶

In [22]:
#Following represents the values in the new coordinate system T=XW
img_transformed_coordinate = pca.transform(img_reshaped)
print(img_transformed_coordinate.shape)

#To go back to the old coordinate systesm, either use the inbuilt command or do the operations manually X=TW'
#img_original_coordinate = pca.inverse_transform(img_transformed_coordinate)
img_original_coordinate = img_transformed_coordinate.dot(pca.components_)

#Shifting the mean to original values
img_original_coordinate = img_original_coordinate+img_mean

#Reshaping the matrix to 564*400*3
img_final = np.reshape(img_original_coordinate, (564,400,3))
img_final = img_final.astype('int')

img_final[img_final<0] = 0
img_final[img_final>255] = 255

plt.axis('off')
plt.imshow(img_final)
plt.savefig('imagedata_new.jpg')
plt.show()

(564, 40)


## Data Compression Achieved¶

In [23]:
#Number of values required to store the original image
original_number_of_values = 564*400*3
#Number of values required to store the new image
#Values in transformed coordinate + principal component values + mean values
new_number_of_values = 564*components+1200*components+components

space_required_in_percentage = new_number_of_values/original_number_of_values*100
print("%.2f" % space_required_in_percentage)

10.43