用轮廓图描绘L、w和b的关系

在上一节，我们已经基本完成了机器学习的建模过程。接下来要介绍的一个辅助性工作叫做轮廓图。损失曲线描绘的是损失和迭代次数之间的关系，而轮廓图则描绘的是L、w和b这三者之间的关系。
下面是补充代码：

# 上面是上一节的代码
# 使用contour plot
import matplotlib.animation as animation
theta0_vals = np.linspace(-2, 3, 100)
theta1_vals = np.linspace(-3, 3, 100)
J_vals = np.zeros((theta0_vals.size, theta1_vals.size))

for t1, element in enumerate(theta0_vals):
  for t2, element2 in enumerate(theta1_vals):
    thetaT = np.zeros(shape=(2,1))
    weight = element
    bias = element2
    J_vals[t1, t2] = loss_function(X_train, y_train, weight, bias)

J_vals = J_vals.T
A, B = np.meshgrid(theta0_vals, theta1_vals)
C = J_vals

fig = plt.figure(figsize=(12, 5))
plt.subplot(121)
plt.plot(X_train, y_train, 'ro', label='Training data')
plt.title('Sales Prediction')
plt.axis([X_train.min()-X_train.std(), X_train.max()+X_train.std(),
          y_train.min()-y_train.std(), y_train.max()+y_train.std()])
plt.grid(axis='both')
plt.xlabel('Wechat Ads Volumn(X1)')
plt.ylabel('Sales Volumn(Y)')
plt.legend(loc='lower right')

line, = plt.plot([], [], 'b-', label='Current Hypothesis')
annotation = plt.text(-2, 3, '', fontsize=20, color='green')
annotation.set_animated(True)

plt.subplot(122)
cp = plt.contour(A, B, C)
plt.colorbar(cp)
plt.title('Filled Contours Plot')
plt.xlabel('Bias')
plt.ylabel('Weight')
track, = plt.plot([],[],'r-')
point, = plt.plot([],[],'ro')

plt.tight_layout()
plt.close()

def init():
  line.set_data([],[])
  track.set_data([],[])
  point.set_data([],[])
  annotation.set_text('')
  return line, track, point, annotation

def animate(i):
  fit1_X = np.linspace(X_train.min()-X_train.std(), 
              X_train.max()+X_train.std(), 1000)
  fit2_y = bias_history[i]+weight_history[i] * fit1_X

  fit2_X = bias_history.T[:i]
  fit2_y = weight_history.T[:i]

  track.set_data(fit2_X, fit2_y)
  line.set_data(fit1_X, fit1_y)
  point.set_data(bias_history.T[i], weight_history.T[i])

  annotation.set_text('Cost = %.4f' %(loss_history[i]))
  return line, track, point, annotation

anim = animation.FuncAnimation(fig, animate, init_func=init,
                  frames=50, interval=0, blit=True)

anim.save('animation.gif', writer='imagemagick', fps = 500)

# 显示Contour Plot动画
import io
import base64
from IPython.display import HTML

filename = 'animation.gif'

video = io.open(filename, 'r+b').read()
encoded = base64.b64encode(video)
HTML(data='''<img src="data:image/gif;base64,{0}" type="gif" />'''.format(encoded.decode('ascii')))

其中anim.save(‘animation.gif’, writer=‘imagemagick’, fps = 500)在colab会报错，需要安装imagemagick这个依赖项，但是colab好像需要购买colab pro才能安装别的依赖……总之我尝试之后不太行。

这是源代码中展示的最终效果图：

实现多元线性回归模型

多元，也就是说特征是多维的。我们使用下标代表特征的编号，采用以下多元（多变量）的线性方程式来构造假设函数。

$y'=h(x)=b+w_1x_1+w_2x_2+w_3x_3$

在机器学习的程序设计中，这个公式可以被向量化地实现。

$y'=h(x)=w^T \cdot x+b$

还可以进一步把公式简化，也就是把b看作权重 $w_0$ ，则需要引入 $x_0$ ，公式变成：

$y'=h(x)=w_0x_0+w_1x_1+w_3x_3+w_4x_4$

且：

$w_0x_0=b \times 1=b$

简写为：

$h(x)=w^T \cdot x$

完整代码如下：

import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
# 读入数据
# df表示dataframe，一种数据结构
df_ads = pd.read_csv('/content/drive/MyDrive/Colab Notebooks/advertising.csv')
df_ads.head() # .head()方法就是读取数据集前5行，便于预览

import matplotlib.pyplot as plt
import seaborn as sns # 为统计学数据可视化工具库
# 对所有的标签和特征两两显示相关性的热力图
sns.heatmap(df_ads.corr(), cmap='YlGnBu', annot=True)
plt.show()

sns.pairplot(df_ads,
      x_vars=['wechat', 'weibo', 'others'],
      y_vars='sales',
      height=4, aspect=1, kind='scatter')
plt.show()

X = np.array(df_ads) # 构建特征集
X = np.delete(X, [3], axis=1)
y = np.array(df_ads.sales) # 构建标签集
print("张量X的阶：", X.ndim)
print("X的形状：", X.shape)
print("X的内容：\n", X)

# y = y.reshape(len(y), 1)
y = y.reshape(-1, 1) # 等同于上个语句
print ("张量y的形状:", y.shape)

# 张量y的形状: (200, 1)

# 将数据集进行80%（训练集）和20%（测试集）的分割
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=0)

def scalar(train, test): # 定义归一化函数，进行数据压缩
  min = train.min(axis=0)
  max = train.max(axis=0)
  gap = max - min # 最大值和最小值的差
  train -= min # 所有数值减去最小值
  train /= gap # 所有数据除以最大值和最小值的差
  test -= min # 把训练集最小值应用于测试集
  test /= gap # 把训练集最大值和最小值的差应用于测试集
  return train, test

def min_max_gap(train):
  min=train.min(axis=0)
  max=train.max(axis=0)
  gap = max - min
  return min, max, gap

y_min, y_max, y_gap = min_max_gap(y_train)

X_train_original = X_train.copy() # 保留一份训练集数据副本，用于对要预测数据归一化

X_train, X_test = scalar(X_train, X_test)
y_train, y_test = scalar(y_train, y_test)


f, (ax1, ax2, ax3) = plt.subplots(1,3,sharey=True,figsize=(20, 4))

ax1.plot(X_train[:,0:1], y_train, 'ro', label='WeChat Training data')
ax1.set_title('WeChat')
ax1.legend()

ax2.plot(X_train[:,1:2], y_train, 'bo', label='Weibo Training data')
ax2.set_title('Weibo')
ax2.legend()

ax3.plot(X_train[:,2:3], y_train, 'go', label='Others Training data')
ax3.set_title('Others')
ax3.legend()

plt.show()

这里我们将三个维度的散点图可视化

x0_train = np.ones((len(X_train), 1)) # 构造X_train长度的全1数组配合对偏置的点积
X_train = np.append(x0_train, X_train, axis=1) # 把X增加一系列的1
x0_test = np.ones((len(X_test), 1)) # 构造X_test长度的全1数组配合对偏置的点积
X_test = np.append(x0_test, X_test, axis=1) # 把X增加一系列的1
print("张量X的形状", X_train.shape)

# 张量X的形状 (160, 4)

# 损失函数
def loss_function(X, y, W): # 手动定义一个均方误差函数
  y_hat = X.dot(W.T)
  loss = y_hat.reshape((len(y_hat),1)) - y # 每一个y'和训练集中真实的y之间的差异
  cost = np.sum(loss ** 2) / (2*len(X)) # 这是均方误差函数的代码实现
  return cost

  # 梯度下降
def gradient_descent(X, y, W, lr, iterations):
  l_history = np.zeros(iterations) # 初始化记录梯度下降过程损失的数组
  W_history = np.zeros((iterations, len(W))) # 初始化记录梯度下降过程权重的数组
  for iter in range(iterations): # 进行梯度下降的迭代，就是下多少级台阶
    y_hat = X.dot(W.T)
    loss = y_hat.reshape((len(y_hat), 1)) - y
    derivative_W = X.T.dot(loss)/len(X)
    derivative_W = derivative_W.reshape(len(W))
    W = W - lr * derivative_W
    l_history[iter] = loss_function(X, y, W)
    W_history[iter] = W
  return l_history, W_history # 返回梯度下降过程中的数据

# 确定参数的初始值
iteration = 300 # 迭代300次
alpha = 0.25 # 初始学习速率为1
weight = np.array([0.5, 1, 1, 1]) # 权重
print("当前损失：", loss_function(X_train, y_train, weight))

# 当前损失： 0.8039183733604857

# 定义线性回归模型
def linear_regression(X, y, weight, alpha, iteration):
  loss_history, weight_history = gradient_descent(X, y, weight, alpha, iteration)
  print("训练最终损失：", loss_history[-1]) # 打印最终损失
  y_pred = X.dot(weight_history[-1])
  training_acc = 100 - np.mean(np.abs(y_pred-y)*100) # 计算准确率
  print("线性回归训练准确率：{:.2f}".format(training_acc))
  return loss_history, weight_history

# 调用刚才定义的线性回归模型
loss_history, weight_history = linear_regression(X_train, y_train, weight, alpha, iteration) #训练机器

# 画出各个权重随梯度下降的变化图
plt.figure(figsize=(12, 4))
plt.subplot(121)
plt.plot(loss_history, 'g--', label='Loss Curve')
plt.xlabel('Iteration')
plt.ylabel('Loss')
plt.legend()

plt.subplot(122)
plt.plot(weight_history, 'b--', label='Weight Curve')
plt.xlabel('Iteration')
plt.ylabel('Weight')
plt.show()

print(weight_history[-1])

# 训练最终损失： 0.0020534948072892627
# 线性回归训练准确率：75.99
# [0.05592132 0.64217276 0.22460039 0.0889083 ]

曲线变化如图：

X_plan = [250, 50, 50]
X_train, X_plan = scalar(X_train_original, X_plan)
X_plan = np.append([1], X_plan)
y_plan = np.dot(weight_history[-1], X_plan)
y_value = y_plan*23.8+3.2
print("预计商品销售额：", y_value, "千元")

# 预计商品销售额： 7.944991957387483 千元

课后练习

一请用sklearn库的线性回归函数完成同样的任务。

二在sklearn库中，除了前面介绍过的线性回归(Linear Regression)算法以外，还有岭回归(Ridge Regression)和套索回归(Lasso Regression)这两种变体。请尝试使用这两种线性回归算法，并应用于本课的数据集

导入第3课的练习数据集：Keras自带的波士顿房价数据集，并使用本课介绍的方法完成线性回归，实现对标签的预测。