import numpy as np
import matplotlib.pyplot as plt
x = np.array([1.,2.,3.,4.,5.])
y = np.array([1.,3.,2.,3.,5.])
plt.scatter(x,y)
plt.axis([0,6,0,6])
plt.show()
# x的均值
x_mean = np.mean(x)
# y的均值
y_mean = np.mean(y)
# 定义分子
num = 0.0
# 定义分母
d = 0.0
for x_i,y_i in zip(x,y):
num += (x_i - x_mean) * (y_i - y_mean)
d += (x_i -x_mean) ** 2
a = num/d
b = y_mean-a*x_mean
a
0.8
b
0.39999999999999947
y_hat = a * x +b
plt.scatter(x,y)
plt.plot(x,y_hat,color="r")
plt.axis([0,6,0,6])
plt.show()
x_predict = 6
y_predict = a * x_predict + b
y_predict
5.2
使用我们自己的SimpleLinearRegression
from script.SimpleLinearRegression import SimpleLinearRegression1
reg1 = SimpleLinearRegression1()
reg1.fit(x,y)
SimpleLinearRegression1()
reg1.predict(np.array([x_predict]))
array([5.2])
reg1.a_
0.8
reg1.b_
0.39999999999999947
y_hat1 = reg1.predict(x)
plt.scatter(x,y)
plt.plot(x,y_hat1,color="r")
plt.axis([0,6,0,6])
plt.show()
向量化实现SimpleLinearRegression
from script.SimpleLinearRegression import SimpleLinearRegression2
reg2 = SimpleLinearRegression2()
reg2.fit(x,y)
SimpleLinearRegression2()
reg2.a_
0.8
reg2.b_
0.39999999999999947
y_hat2 = reg1.predict(x)
plt.scatter(x,y)
plt.plot(x,y_hat2,color="r")
plt.axis([0,6,0,6])
plt.show()
向量化实现的性能测试
m = 1000000
big_x = np.random.random(size=m)
big_y = big_x * 2.0 + 3.0 + np.random.normal(size=m)
%timeit reg1.fit(big_x,big_y)
%timeit reg2.fit(big_x,big_y)
910 ms ± 7.69 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)
20.5 ms ± 1.04 ms per loop (mean ± std. dev. of 7 runs, 100 loops each)
reg1.a_
1.9896828127473971
reg1.b_
3.0065742729200817