#Exercise 1: Projection of principal componets import numpy as np from sklearn.decomposition import PCA import matplotlib.pyplot as plt s=np.array([[2.5,2.4], [0.5,0.7], [2.2,2.9], [1.9,2.2], [3.1,3.0], [2.3,2.7], [2, 1.6], [1, 1.1], [1.5, 1.6], [1.1, 0.9]]) pca = PCA(n_components=1) s1=pca.fit_transform(s) #Method 1 plt.xlim(0,3.5) plt.ylim(0,3.5) plt.gca().set_aspect('equal', adjustable='box') plt.plot(s[:,0],s[:,1],'ro') pca1 = PCA(n_components=2) pca1.fit(s) x=np.linspace(0,3.5) y=pca1.components_[1][0]/pca1.components_[0][0]*x+pca1.mean_[1]-pca1.components_[1][0]/ pca1.components_[0][0]*pca1.mean_[0] plt.plot(x, y, 'k-') y=pca1.components_[1][1]/pca1.components_[0][1]*x+pca1.mean_[1]-pca1.components_[1][1]/ pca1.components_[0][1]*pca1.mean_[0] plt.plot(x, y,'r-') #step1 s2=np.zeros([10,2]) for i in range(len(s)): s2[i]=s[i]-pca1.mean_ #step2 c=np.dot(s2, pca1.components_) #step3 c[:,1] = 0 #step4 c1=np.dot(c, pca1.components_.transpose()) #step5 plt.plot(c1[:,0]+pca1.mean_[0],c1[:,1]+pca1.mean_[1],'bo') plt.show() #Method 2 plt.xlim(0,3.5) plt.ylim(0,3.5) plt.gca().set_aspect('equal', adjustable='box') plt.plot(s[:,0],s[:,1],'ro') pca1 = PCA(n_components=2) pca1.fit(s) x=np.linspace(0,3.5) y=pca1.components_[1][0]/pca1.components_[0][0]*x+pca1.mean_[1]-pca1.components_[1][0]/ pca1.components_[0][0]*pca1.mean_[0] plt.plot(x, y, 'k-') y=pca1.components_[1][1]/pca1.components_[0][1]*x+pca1.mean_[1]-pca1.components_[1][1]/ pca1.components_[0][1]*pca1.mean_[0] plt.plot(x, y,'r-') c1=pca.inverse_transform(s1) plt.plot(c1[:, 0], c1[:, 1], 'bo') plt.show() #Exercise 2: visualization of the images import numpy as np from sklearn.decomposition import PCA from sklearn import datasets import matplotlib.pyplot as plt # load the handwriting data from the database digits=datasets.load_digits() print (digits.keys()) print (digits.data.shape) #assignment X,y=digits.data, digits.target #define the pca pca = PCA(n_components=2) #reduce the features to 2 components X_proj=pca.fit_transform(X) #only retain about 28% of the variance by 2 PC print (np.sum(pca.explained_variance_ratio_)) #plot the PC as a scatter plot plt.scatter(X_proj[:,0], X_proj[:,1], c=y) plt.colorbar() plt.show() #Exercise 3: preprocess the data import numpy as np from sklearn.decomposition import PCA from sklearn import datasets import matplotlib.pyplot as plt #load the dataset digits=datasets.load_digits() data, target=digits.data,digits.target X=data[np.logical_or(target==1,target==8), :] y=target[np.logical_or(target==1,target==8)] #define the PCA pca = PCA(n_components=2) #plot the PC as a scatter plot X_proj=pca.fit_transform(X) plt.scatter(X_proj[:,0], X_proj[:,1], c=y) plt.show() ##move to a new cell #print the amount of variance print (np.sum(pca.explained_variance_ratio_)) #change the n_components pca = PCA(n_components=0.50) #reduce the feature dimensions x=pca.fit_transform(X) #print the estimated number of components print (pca.n_components_) #print the amount of variance print (np.sum(pca.explained_variance_ratio_)) #Exercise 4:Application from sklearn import svm, model_selection clf = svm.SVC(kernel='rbf', gamma=0.001) scores = model_selection.cross_val_score(clf, x, y, cv=6) print("Accuracy: %0.2f (+/- %0.2f)" % (scores.mean(), scores.std() * 2)) ##move to a new cell clf.fit(x,y) plt.scatter(x[:, 0], x[:, 1], c=y, zorder=10, cmap=plt.cm.Paired, edgecolor='k', s=30) x_min, x_max = x [:, 0].min()-1, x [:, 0].max()+1 y_min, y_max= x [:, 1].min()-1, x [:, 1].max()+1 # create a mesh to plot in xx, yy = np.mgrid[x_min:x_max:200j, y_min:y_max:200j] Z = clf.decision_function(np.c_[xx.ravel(), yy.ravel()]) # Put the result into a color plot Z = Z.reshape(xx.shape) plt.pcolormesh(xx, yy, Z>0, cmap=plt.cm.Paired) plt.contour(xx, yy, Z, colors=['k', 'k', 'k'],linestyles=['--', '-', '--'], levels=[-0.5, 0, 0.5]) plt.show() #Exercise 5:Data compression import matplotlib.pyplot as plt import numpy as np from sklearn.decomposition import PCA img=plt.imread("sample_BW.png") print (img.shape) imgT=np.transpose(img) plt.imshow(img, cmap=plt.cm.gray) plt.show() ##move a new cell pca = PCA(n_components=100, svd_solver='full') pca.fit(img) nd=pca.transform(img) ni=pca.inverse_transform(nd) plt.imshow(ni, cmap=plt.cm.gray) plt.show() print (np.shape(nd)) print (ni.shape) print (ni) ##move to a new cell for i in range(317): img[i,:]=img[i,:]-pca.mean_ U, S, V = np.linalg.svd(img) z=np.dot(np.eye(100) *S[:100], V[:100,:]) Z=np.dot(U[:,:100], z) for i in range(317): Z[i,:]=Z[i,:]+pca.mean_ plt.imshow(Z, cmap=plt.cm.gray) plt.show() print (Z.shape) print(Z)