线性回归

我们通过线性回归算法找出自变量和因变量间的最佳线性关系，图形上可以确定一条最佳直线。

这条最佳直线就是回归线。这个回归关系可以用𝑌=𝑎𝑋+𝑏Y=aX+b表示。

一般使用均方误差计算损失函数

import numpy as np
from matplotlib import pyplot as plt

class Line:
    def __init__(self, data):
        self.w = 1
        self.b = 0
        self.learning_rate = 0.01（就是每次趋近与最佳w的步长）
        self.fig, (self.ax1, self.ax2) = plt.subplots(2, 1)
        self.loss_list = []

    def get_data(self, data):
        self.X = np.array(data)[:, 0]
        self.y = np.array(data)[:, 1]
		#sigmoid函数
    def predict(self, x):
        return self.w * x + self.b
    
    def train(self, epoch_times):
        for epoch in range(epoch_times):
            total_loss = 0
            for x, y in zip(self.X, self.y):
                y_pred = self.predict(x)
                # 计算导数（导数w的**符号**即为方向，**数值正的就太远了，负的就太近了**）
                gradient_w = -2 * x * (y - y_pred)
                gradient_b = -2 * (y - y_pred)
                # 更新w，x
                #所求就是最佳的w即(斜率)，在**这里正的相减，负的相加**
                self.w -= self.learning_rate * gradient_w
                self.b -= self.learning_rate * gradient_b
                # 计算损失函数
                loss = (y - y_pred) ** 2
                total_loss += loss
            epoch_loss = total_loss / len(self.X)
            self.loss_list.append(epoch_loss)
            if epoch % 10 == 0:
                print(f"loss: {epoch_loss}")
                self.plot()
        plt.ioff()
        plt.show()
		#绘图
    def plot(self):
        plt.ion()  # Enable interactive mode
        self.ax2.clear()
        self.ax1.clear()
        x = np.linspace(0, 10, 100)
        self.ax1.scatter(self.X, self.y, c="g")
        self.ax1.plot(x, self.predict(x), c="b")
        self.ax2.plot(list(range(len(self.loss_list))), self.loss_list)
        plt.show()
        plt.pause(0.1)

if __name__ == "__main__":  
    # Input data
    data = [(1, 1), (1.8, 2), (2.5, 3), (4.2, 4), (5, 5), (6, 6), (7, 7)]
    s = Line(data)
    s.get_data(data)
    s.train(100)

使用sklearn模块

import numpy as np
from sklearn.linear_model import LinearRegression

# 创建一些示例数据
X = np.array([[1], [2], [3], [4], [5]])  # 自变量
y = np.array([2, 3, 4, 4, 6])  # 因变量
# 创建线性回归模型
model = LinearRegression()

# 拟合模型
model.fit(X, y)

# 打印回归系数和截距
print("回归系数 (斜率):", model.coef_)
print("截距:", model.intercept_)

# 预测新数据点
new_data_point = np.array([[6]])  # 要预测的新数据点
predicted_value = model.predict(new_data_point)
print("预测值:", predicted_value)

预测房屋价格

from sklearn import datasets
from sklearn.linear_model import LinearRegression

# .fetch_california_housing() 加载加利福尼亚州住房数据集
loaded_data = datasets.fetch_california_housing()
# .data 数据集中的特征数据
data_X = loaded_data.data
# .target 数据集中的标签数据
data_y = loaded_data.target
# 创建线性回归模型
model = LinearRegression()
# 拟合模型
# .fit() 方法接受两个参数：特征数据和标签数据
model.fit(data_X, data_y)

# 预测前四所房屋价格
# .predict() 方法接受一个参数：特征数据
print(model.predict(data_X[:4, :]))
# 真实价格
print(data_y[:4])

效果评估

print(model.get_params())# 获取模型参数
# //{'copy_X': True, 'fit_intercept': True, 'n_jobs': None, 'positive': False}
print(model.score(data_X, data_y))
# // 0.606232685199805
# 这意味着数据集中因变量的 60% 的变异性已得到考虑，而其余 40% 的变异性仍未得到解释。
# 打印回归系数和截距
print("回归系数 (斜率):", model.coef_)
print("截距:", model.intercept_)

线性回归​

一般使用均方误差计算损失函数​

使用sklearn模块​

预测房屋价格​

效果评估​

线性回归

一般使用均方误差计算损失函数

使用sklearn模块

预测房屋价格

效果评估