本文共 4444 字,大约阅读时间需要 14 分钟。
随机梯度下降法的封装与应用
随机梯度下降法(SGD)是一种广泛应用于机器学习中的优化算法。它通过不断调整模型参数,适应训练数据,最终达到最小化损失函数的目标。在本节中,我们将对随机梯度下降法的实现进行封装,并展示其在线性回归任务中的实际应用。
为了实现随机梯度下降法,我们首先需要构建一个线性回归模型的封装类LinearRegression。该类包含以下主要方法:
以下是模型类的实现代码:
import numpy as npfrom metrics import r2_scoreclass LinearRegression: def __init__(self): self.coef_ = None # 系数 self.interception_ = None # 截距 self._theta = None # 参数向量 def fit_normal(self, X_train, y_train): """使用正规方法训练线性回归模型""" assert X_train.shape[0] == y_train.shape[0], \ "训练数据的行数必须相等" # 构建增强矩阵 X_b = np.hstack([np.ones((len(X_train), 1)), X_train]) # 计算最优解 self._theta = np.linalg.inv(X_b.T.dot(X_b)).dot(X_b.T).dot(y_train) self.interception_ = self._theta[0] # 截距 self.coef_ = self._theta[1:] # 系数 return self def fit_gd(self, X_train, y_train, eta=0.01, n_iters=1e4): """使用梯度下降法训练线性回归模型""" assert X_train.shape[0] == y_train.shape[0], \ "训练数据的行数必须相等" def J(theta, X_b, y): try: return np.sum((y - X_b.dot(theta)) ** 2) / len(X_b) except: return float('inf') def dJ(theta, X_b, y): return X_b.T.dot(X_b.dot(theta) - y) * 2 / len(X_b) def gradient_descent(X_b, y, initial_theta, eta, n_iters=1e4, epsilon=1e-8): theta = initial_theta i_iter = 0 while i_iter < n_iters: gradient = dJ(theta, X_b, y) last_theta = theta theta = theta - eta * gradient if abs(J(theta, X_b, y) - J(last_theta, X_b, y)) < epsilon: break i_iter += 1 return theta X_b = np.hstack([np.ones((len(X_train), 1)), X_train]) initial_theta = np.zeros(X_b.shape[1]) self._theta = gradient_descent(X_b, y_train, initial_theta, eta) self.interception_ = self._theta[0] self.coef_ = self._theta[1:] return self def fit_sgd(self, X_train, y_train, n_iters=5, t0=5, t1=50): """使用随机梯度下降法训练线性回归模型""" assert X_train.shape[0] == y_train.shape[0], \ "训练数据的行数必须相等" assert n_iters >= 1, "迭代次数至少为1" def dJ_sgd(theta, X_b_i, y_i): return X_b_i.T.dot(X_b_i.dot(theta) - y_i) * 2 def sgd(X_b, y, initial_theta, n_iters, t0=5, t1=50): def learning_rate(t): return t0 / (t + t1) theta = initial_theta m = len(X_b) for i in range(n_iters): indexes = np.random.permutation(m) X_b_new = X_b[indexes] y_new = y[indexes] for i in range(m): gradient = dJ_sgd(theta, X_b_new[i], y_new[i]) theta = theta - learning_rate(i * m + i) * gradient return theta X_b = np.hstack([np.ones((len(X_train), 1)), X_train]) initial_theta = np.zeros(X_b.shape[1]) self._theta = sgd(X_b, y_train, initial_theta, n_iters) self.interception_ = self._theta[0] self.coef_ = self._theta[1:] return self def predict(self, X_predict): """对给定的待预测数据进行预测""" assert self.interception_ is not None and self.coef_ is not None, \ "必须先拟合模型才能进行预测" assert X_predict.shape[1] == len(self.coef_), \ "预测数据的特征数量必须与训练数据一致" X_b = np.hstack([np.ones((len(X_predict), 1)), X_predict]) return X_b.dot(self._theta) def score(self, X_test, y_test): """计算模型在测试集上的预测准确度""" y_predict = self.predict(X_test) return r2_score(y_test, y_predict) def __repr__(self): return "LinearRegression" 随机梯度下降法通过随机化样本的顺序,避免了梯度下降法中常见的局部最优陷阱。以下是使用随机梯度下降法训练线性回归模型的具体实现:
# 初始化线性回归模型model = LinearRegression()# 训练数据X_train = np.array([[1.0, 2.0], [2.0, 3.0], [3.0, 4.0], [4.0, 5.0]])y_train = np.array([2.0, 3.0, 4.0, 5.0])# 训练模型model.fit_sgd(X_train, y_train, n_iters=5, t0=5, t1=50)# 模型参数print("模型系数:", model.coef_)print("截距项:", model.interception_)# 预测新数据X_predict = np.array([[1.0, 2.0], [2.0, 3.0]])y_predict = model.predict(X_predict)# 计算预测准确度print("预测结果:", y_predict)print("R²值:", model.score(X_test, y_test)) 本节中,我们通过封装实现了随机梯度下降法,并展示了其在线性回归任务中的实际应用。随机梯度下降法通过随机化样本顺序,显著提升了模型训练的效率。未来,我们将继续深入探讨梯度下降法及其扩展算法的相关内容。
转载地址:http://etoi.baihongyu.com/