#Exercise 1: Projection of principal componets
import numpy as np
from sklearn.decomposition import PCA
import matplotlib.pyplot as plt
s=np.array([[2.5,2.4], [0.5,0.7], [2.2,2.9], [1.9,2.2], [3.1,3.0], [2.3,2.7], [2, 1.6], [1, 1.1], [1.5, 1.6], [1.1, 0.9]])
pca = PCA(n_components=1)
s1=pca.fit_transform(s)

#Method 1
plt.xlim(0,3.5)
plt.ylim(0,3.5)
plt.gca().set_aspect('equal', adjustable='box')
plt.plot(s[:,0],s[:,1],'ro')

pca1 = PCA(n_components=2)
pca1.fit(s)

x=np.linspace(0,3.5)
y=pca1.components_[1][0]/pca1.components_[0][0]*x+pca1.mean_[1]-pca1.components_[1][0]/ pca1.components_[0][0]*pca1.mean_[0]
plt.plot(x, y, 'k-')

y=pca1.components_[1][1]/pca1.components_[0][1]*x+pca1.mean_[1]-pca1.components_[1][1]/ pca1.components_[0][1]*pca1.mean_[0]
plt.plot(x, y,'r-')

#step1
s2=np.zeros([10,2])
for i in range(len(s)):
    s2[i]=s[i]-pca1.mean_ 
#step2
c=np.dot(s2, pca1.components_) 
#step3
c[:,1] = 0   
#step4
c1=np.dot(c, pca1.components_.transpose())
#step5
plt.plot(c1[:,0]+pca1.mean_[0],c1[:,1]+pca1.mean_[1],'bo')

plt.show()

#Method 2
plt.xlim(0,3.5)
plt.ylim(0,3.5)
plt.gca().set_aspect('equal', adjustable='box')
plt.plot(s[:,0],s[:,1],'ro')

pca1 = PCA(n_components=2)
pca1.fit(s)

x=np.linspace(0,3.5)
y=pca1.components_[1][0]/pca1.components_[0][0]*x+pca1.mean_[1]-pca1.components_[1][0]/ pca1.components_[0][0]*pca1.mean_[0]
plt.plot(x, y, 'k-')

y=pca1.components_[1][1]/pca1.components_[0][1]*x+pca1.mean_[1]-pca1.components_[1][1]/ pca1.components_[0][1]*pca1.mean_[0]
plt.plot(x, y,'r-')

c1=pca.inverse_transform(s1) 
plt.plot(c1[:, 0], c1[:, 1], 'bo')

plt.show()


#Exercise 2: visualization of the images
import numpy as np
from sklearn.decomposition import PCA
from sklearn import datasets 
import matplotlib.pyplot as plt

# load the handwriting data from the database
digits=datasets.load_digits()
print (digits.keys())
print (digits.data.shape)

#assignment
X,y=digits.data, digits.target
#define the pca
pca = PCA(n_components=2)
#reduce the features to 2 components
X_proj=pca.fit_transform(X)

#only retain about 28% of the variance by 2 PC
print (np.sum(pca.explained_variance_ratio_))

#plot the PC as a scatter plot
plt.scatter(X_proj[:,0], X_proj[:,1], c=y)
plt.colorbar()

plt.show()

#Exercise 3: preprocess the data
import numpy as np
from sklearn.decomposition import PCA
from sklearn import datasets 
import matplotlib.pyplot as plt

#load the dataset
digits=datasets.load_digits()
data, target=digits.data,digits.target
X=data[np.logical_or(target==1,target==8), :]
y=target[np.logical_or(target==1,target==8)]

#define the PCA
pca = PCA(n_components=2)
#plot the PC as a scatter plot
X_proj=pca.fit_transform(X)
plt.scatter(X_proj[:,0], X_proj[:,1], c=y)
plt.show()

##move to a new cell
#print the amount of variance
print (np.sum(pca.explained_variance_ratio_))

#change the n_components
pca = PCA(n_components=0.50)
#reduce the feature dimensions
x=pca.fit_transform(X)

#print the estimated number of components
print (pca.n_components_)
#print the amount of variance
print (np.sum(pca.explained_variance_ratio_))

#Exercise 4:Application
from sklearn import svm, model_selection
clf = svm.SVC(kernel='rbf', gamma=0.001)
scores = model_selection.cross_val_score(clf, x, y, cv=6)
print("Accuracy: %0.2f (+/- %0.2f)" % (scores.mean(), scores.std() * 2))

##move to a new cell
clf.fit(x,y)
plt.scatter(x[:, 0], x[:, 1], c=y, zorder=10, cmap=plt.cm.Paired, edgecolor='k', s=30)
x_min, x_max = x [:, 0].min()-1, x [:, 0].max()+1
y_min, y_max= x [:, 1].min()-1, x [:, 1].max()+1

# create a mesh to plot in
xx, yy = np.mgrid[x_min:x_max:200j, y_min:y_max:200j]
Z = clf.decision_function(np.c_[xx.ravel(), yy.ravel()])

# Put the result into a color plot
Z = Z.reshape(xx.shape)
plt.pcolormesh(xx, yy, Z>0, cmap=plt.cm.Paired)
plt.contour(xx, yy, Z, colors=['k', 'k', 'k'],linestyles=['--', '-', '--'], levels=[-0.5, 0, 0.5])
plt.show()


#Exercise 5:Data compression
import matplotlib.pyplot as plt
import numpy as np
from sklearn.decomposition import PCA

img=plt.imread("sample_BW.png")
print (img.shape)
imgT=np.transpose(img)
plt.imshow(img, cmap=plt.cm.gray)
plt.show()

##move a new cell
pca = PCA(n_components=100, svd_solver='full')
pca.fit(img)
nd=pca.transform(img)
ni=pca.inverse_transform(nd)
plt.imshow(ni, cmap=plt.cm.gray)

plt.show()
print (np.shape(nd))
print (ni.shape)
print (ni)

##move to a new cell
for i in range(317):
    img[i,:]=img[i,:]-pca.mean_

U, S, V = np.linalg.svd(img)
z=np.dot(np.eye(100) *S[:100], V[:100,:])
Z=np.dot(U[:,:100], z)
for i in range(317):
    Z[i,:]=Z[i,:]+pca.mean_
    
plt.imshow(Z, cmap=plt.cm.gray)

plt.show()
print (Z.shape)
print(Z)