机器学习 August 20, 2018

3-2 实现 Simple Linear Regression

Words count 3.8k Reading time 3 mins. Read count 0

import numpy as np
import matplotlib.pyplot as plt
x = np.array([1.,2.,3.,4.,5.])
y = np.array([1.,3.,2.,3.,5.])
plt.scatter(x,y)
plt.axis([0,6,0,6])
plt.show()

# x的均值
x_mean = np.mean(x)
# y的均值
y_mean = np.mean(y)
# 定义分子
num = 0.0
# 定义分母
d = 0.0

for x_i,y_i in zip(x,y):
    num += (x_i - x_mean) * (y_i - y_mean)
    d += (x_i -x_mean) ** 2 
a = num/d
b = y_mean-a*x_mean
a
0.8
b
0.39999999999999947
y_hat = a * x +b

plt.scatter(x,y)
plt.plot(x,y_hat,color="r")
plt.axis([0,6,0,6])
plt.show()

x_predict = 6
y_predict = a * x_predict + b 
y_predict
5.2

使用我们自己的SimpleLinearRegression

from script.SimpleLinearRegression import SimpleLinearRegression1

reg1 = SimpleLinearRegression1()
reg1.fit(x,y)
SimpleLinearRegression1()
reg1.predict(np.array([x_predict]))
array([5.2])
reg1.a_
0.8
reg1.b_
0.39999999999999947
y_hat1 = reg1.predict(x)
plt.scatter(x,y)
plt.plot(x,y_hat1,color="r")
plt.axis([0,6,0,6])
plt.show()

向量化实现SimpleLinearRegression

from script.SimpleLinearRegression import SimpleLinearRegression2
reg2 = SimpleLinearRegression2()
reg2.fit(x,y)
SimpleLinearRegression2()
reg2.a_
0.8
reg2.b_
0.39999999999999947
y_hat2 = reg1.predict(x)
plt.scatter(x,y)
plt.plot(x,y_hat2,color="r")
plt.axis([0,6,0,6])
plt.show()

向量化实现的性能测试

m = 1000000
big_x = np.random.random(size=m)
big_y = big_x * 2.0 + 3.0 + np.random.normal(size=m)
%timeit reg1.fit(big_x,big_y)
%timeit reg2.fit(big_x,big_y)
910 ms ± 7.69 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)
20.5 ms ± 1.04 ms per loop (mean ± std. dev. of 7 runs, 100 loops each)
reg1.a_
1.9896828127473971
reg1.b_
3.0065742729200817
0%