机器学习 August 20, 2018

5-4 使用PCA进行降噪

Words count 3.1k Reading time 3 mins. Read count 0

import numpy as np
import matplotlib.pyplot as plt
X = np.empty((100,2))
X[:,0] = np.random.uniform(0.,100.,size=100)
X[:,1] = 0.75 * X[:,0] + 3. + np.random.normal(0,5,size=100)
plt.scatter(X[:,0],X[:,1])
plt.show()

from sklearn.decomposition import PCA
pca = PCA(n_components=1)
pca.fit(X)
X_reduction = pca.transform(X)
X_restore = pca.inverse_transform(X_reduction)
plt.scatter(X_restore[:,0],X_restore[:,1])
plt.show()

手写识别的例子

from sklearn import datasets
digits = datasets.load_digits()
X = digits.data
y = digits.target
noisy_digits = X + np.random.normal(0,4,size=X.shape)
example_digits = noisy_digits[y==0,:][:10]
for num in range(1,10):
    X_num = noisy_digits[y==num,:][:10]
    example_digits = np.vstack([example_digits,X_num])
example_digits.shape
(100, 64)
def plot_digits(data):
    fig, axes = plt.subplots(10,10,figsize=(10,10),subplot_kw={'xticks':[],'yticks':[]},
    gridspec_kw = dict(hspace=0.1,wspace=0.1))
    for i,ax in enumerate(axes.flat):
        ax.imshow(data[i].reshape(8,8),
                 cmap='binary',interpolation='nearest',clim=(0,16))
    plt.show()
plot_digits(example_digits)

pca = PCA(0.5)
pca.fit(example_digits)
PCA(copy=True, iterated_power='auto', n_components=0.5, random_state=None,
  svd_solver='auto', tol=0.0, whiten=False)
pca.n_components_
8
components = pca.transform(example_digits)
filtered_digits = pca.inverse_transform(components)
plot_digits(filtered_digits)

from sklearn.datasets import fetch_lfw_people
faces = fetch_lfw_people()

faces.keys()
dict_keys(['data', 'images', 'target', 'target_names', 'DESCR'])
faces.data.shape
(13233, 2914)
0%