机器学习 August 20, 2018

6-5 模型正则化 Regularization

Words count 3.9k Reading time 4 mins. Read count 0

模型正则化:限制参数的大小

岭回归

import numpy as np
import matplotlib.pyplot as plt
np.random.seed(42)
x = np.random.uniform(-3.0,3.0,size=100)
X = x.reshape(-1,1)
y = 0.5 * x + 3 + np.random.normal(0,1,size=100)
plt.scatter(x, y)
plt.show()

# 使用多项式回归

from sklearn.pipeline import Pipeline
from sklearn.preprocessing import PolynomialFeatures
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LinearRegression
def PolynomiaRegression(degree):
    return Pipeline([
        ("poly",PolynomialFeatures(degree=degree)),
        ("std_scaler",StandardScaler()),
        ("line_reg",LinearRegression())
    ])
from sklearn.model_selection import train_test_split
np.random.seed(666)
X_train,X_test,y_train,y_test = train_test_split(X,y)
from sklearn.metrics import mean_squared_error
poly_reg = PolynomiaRegression(degree=20)
poly_reg.fit(X_train,y_train) 
y_poly_predict = poly_reg.predict(X_test)
mean_squared_error(y_test,y_poly_predict)
167.9401086729357
X_plot = np.linspace(-3,3,100).reshape(100,1)
y_plot = poly_reg.predict(X_plot)
plt.scatter(x,y)
plt.plot(X_plot[:,0],y_plot,color='r')
plt.axis([-3,3,0,6])
plt.show()

def plot_model(model):
    X_plot = np.linspace(-3,3,100).reshape(100,1)
    y_plot = model.predict(X_plot)
    
    plt.scatter(x,y)
    plt.plot(X_plot[:,0],y_plot,color='r')
    plt.axis([-3,3,0,6])
    plt.show()
plot_model(poly_reg)

使用岭回归

from sklearn.linear_model import Ridge
def RidgeRegression(degree,alpha):
    return Pipeline([
        ("poly",PolynomialFeatures(degree=degree)),
        ("std_scaler",StandardScaler()),
        ("line_reg",Ridge(alpha=alpha))
    ])
ridge1_reg = RidgeRegression(20,0.0001)
ridge1_reg.fit(X_train,y_train)
y1_predict = ridge1_reg.predict(X_test)
mean_squared_error(y_test,y1_predict)
1.323349275406402
plot_model(ridge1_reg)

ridge2_reg = RidgeRegression(20,1)
ridge2_reg.fit(X_train,y_train)
y2_predict = ridge2_reg.predict(X_test)
mean_squared_error(y_test,y2_predict)
1.1888759304218448
plot_model(ridge2_reg)

ridge3_reg = RidgeRegression(20,100)
ridge3_reg.fit(X_train,y_train)
y3_predict = ridge3_reg.predict(X_test)
mean_squared_error(y_test,y3_predict)
1.3196456113086197
plot_model(ridge3_reg)

0%