NumPy实现线性回归

2024-09-01 17:04
文章标签 实现 回归 线性 numpy

本文主要是介绍NumPy实现线性回归,希望对大家解决编程问题提供一定的参考价值,需要的开发者们随着小编来一起学习吧!

1 单变量线性回归

1.1 sklearn实现(最小二乘法)

import osimport pandas as pd
import matplotlib.pyplot as plt
import syscurrent_dir=os.getcwd()
path=current_dir+'\\'+"Salary Data.csv"def plot_data(path):table=pd.read_csv(path)experience = table["Experience Years"]salary = table["Salary"]plt.figure(figsize=(8,6))plt.scatter(experience,salary,color="blue",label="Data points")plt.title("experience vs year")plt.xlabel("Experience (Years)")plt.ylabel("Salary")plt.grid(True)plt.legend()plt.show()
plot_data(path)table=pd.read_csv(path)
y=table['Salary']
x=table[ ['Experience Years'] ]  # x.shape=(40,1)
z=table['Experience Years']    # z.shape=(40,)from sklearn.model_selection import train_test_split
x_train,x_test,y_train,y_test = train_test_split(x,y,train_size=0.7,random_state=2529)
# (28, 1) (28,) (12, 1) (12,)from sklearn.linear_model import LinearRegression
model = LinearRegression()
model.fit(x_train,y_train)print( model.intercept_ )  # 26596.961311068262
print( model.coef_ )       # [9405.61663234]from sklearn.metrics import mean_squared_error, r2_score
y_pred = model.predict(x_test)mse = mean_squared_error(y_test, y_pred)
print( "mse = ", mse )          # 24141421.671440993
r2 = r2_score(y_test, y_pred)
print( "r2 = ", r2 )            # 0.960233432146844y_whole_pred=model.predict(x)
# x.iloc[:,0]可以写成x, 或者x["Experience Years"]
plt.scatter(x.iloc[:,0],y,color="blue",label="Data points")
plt.plot(x,y_whole_pred,color="red",linewidth=2, label='linear regression')plt.xlabel("Experience (Years)")
plt.ylabel("Salary")
plt.legend()
plt.show()

1.2 NumPy实现(梯度下降法)

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import os
from sklearn.model_selection import train_test_split
import sysdef plot_data(path):table=pd.read_csv(path)experience = table["Experience Years"]salary = table["Salary"]plt.figure(figsize=(8,6))plt.scatter(experience,salary,color="blue",label="Data points")plt.title("experience vs year")plt.xlabel("Experience (Years)")plt.ylabel("Salary")plt.grid(True)plt.legend()plt.show()class MyLinearReg:def __init__(self,lr = 0.01, epochs = 1000):self.lr = lrself.epochs = epochsself.w = Noneself.b = Noneself.loss_history = []def fit(self,X,y):m,n = X.shapeself.w = np.zeros(n)self.b = 0for epoch in range(self.epochs):# x(m,n) * w(n,), numpy广播机制矩阵向量乘法y_pred = X @ self.w + self.b  # y_pred(m,)loss = (y_pred - y)           # loss(m,)dcost_dw = (1/m) * (X.T @ loss)dcost_b = (1/m) *  lossdcost_b = np.sum(dcost_b)self.w -= self.lr * dcost_dwself.b -= self.lr * dcost_bsquare_loss = (y_pred-y)**2mean_loss = np.mean(square_loss)self.loss_history.append(mean_loss)if epoch % 100 == 99 :print(f"Epoch {epoch} loss: {mean_loss}")print("Trainning finished.")print("Final parameters:","Slope w=",self.w," Bias b=",self.b)# Final parameters: Slope w= [9853.19132896]  Bias b= 23780.770014707407def predict(self,X):return X @ self.w + self.bdef get_params(self):return self.w, self.b# plot_data(path)
current_dir=os.getcwd()
path=current_dir+'\\'+"Salary Data.csv"
table=pd.read_csv(path)
x = table["Experience Years"].values # x(40,)
y = table["Salary"].values           # y(40,)
#(32,),(8,)(32,)(8,)
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.2, random_state=42)
# (32,) (32,) (8,) (8,)x_train=x_train.reshape(-1,1)
x_test=x_test.reshape(-1,1)
model = MyLinearReg()
model.fit(x_train, y_train)y_pred = model.predict(x_test)from sklearn.metrics import mean_squared_error, r2_scoremse = mean_squared_error(y_test, y_pred)
print( "mse = ", mse )          # mse =  43053815.910611115
r2 = r2_score(y_test, y_pred)
print( "r2 = ", r2 )           # r2 =  0.9165907194371214X=x.reshape(-1,1)
y_whole_pred=model.predict(X)
# x.iloc[:,0]可以写成x, 或者x["Experience Years"]
plt.scatter(x,y,color="blue",label="Data points")
plt.plot(x,y_whole_pred,color="red",linewidth=2, label='linear regression')plt.xlabel("Experience (Years)")
plt.ylabel("Salary")
plt.legend()
plt.show()
Epoch 99 loss: 111815444.20061775
Epoch 199 loss: 81534511.03025383
Epoch 299 loss: 61760636.04682423
Epoch 399 loss: 48848017.74472436
Epoch 499 loss: 40415896.49608463
Epoch 599 loss: 34909602.800390095
Epoch 699 loss: 31313915.621658318
Epoch 799 loss: 28965881.353634194
Epoch 899 loss: 27432581.973080143
Epoch 999 loss: 26431315.92580659
Trainning finished.
Final parameters: Slope w= [9853.19132896]  Bias b= 23780.770014707407
mse =  43053815.910611115
r2 =  0.9165907194371214

2 多变量线性回归

2.1 sklearn实现(最小二乘法)

import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import os
import sysdef draw_PairwiseScatter(x,y):num_features = x.shape[1]plt.figure(figsize=(15,10))for i in range(num_features):plt.subplot(3,5,i+1) # 子图的索引从1开始plt.scatter(x[:,i],y,marker='o', color="green", s=15,alpha=0.5)plt.xlabel("Feature {}".format(i+1))plt.ylabel("Label")plt.title("Featurs {} vs Target".format(i+1))plt.tight_layout()plt.show()
def draw_real_pred(x,y,model):y_pred_whole =  model.predict(x)num_features = x.shape[1]plt.figure( figsize=(15,10) )for i in range(num_features):plt.subplot(3,5,i+1)plt.scatter(x[:,i],y,marker='o',color="green", s=15,alpha=0.5)plt.scatter(x[:,i],y_pred_whole,marker="o", color="red", s=15,alpha=0.5)plt.xlabel("Feature {}".format(i+1))plt.ylabel("Label")plt.title("Featurs {} vs Target".format(i+1))plt.tight_layout()plt.show()current_dir = os.getcwd()
path = current_dir + '\\' + "Boston.csv"
house = pd.read_csv(path)y = house['MEDV']                           #  (506,)
X = house.drop(['MEDV'], axis = 1)   #  (506,13)
X=np.array(X)
y=np.array(y)draw_PairwiseScatter(X,y)from sklearn.linear_model import LinearRegression
model = LinearRegression()from sklearn.model_selection import train_test_split
x_train, x_test, y_train, y_test = train_test_split(X, y, train_size = 0.7, random_state = 2529)
#     (354, 13) (152, 13) (354,) (152,)# Ordinary Least Squares 不是梯度下降,不用标准化数据
# theta = (X.T * X)-1 * X.T * y: 最小二乘法
model.fit(x_train,y_train)
print(model.intercept_)
print(model.coef_)y_pred = model.predict(x_test)from sklearn.metrics import  mean_absolute_error, r2_score
print( "mean_absolute_error(y_pred,y_test):", mean_absolute_error(y_pred,y_test) )print ( model.score(x_test,y_test) )
r2 = r2_score(y_test, y_pred)
print(r2)  # 0.6551914852365524draw_real_pred(X,y,model)

2.2 NumPy实现(梯度下降法) 

import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import os
import sysdef draw_PairwiseScatter(x,y):num_features = x.shape[1]plt.figure(figsize=(15,10))for i in range(num_features):plt.subplot(3,5,i+1)plt.scatter(x[:,i],y,marker='o', color="green", s=15,alpha=0.5)plt.xlabel("Feature {}".format(i+1))plt.ylabel("Label")plt.title("Featurs {} vs Target".format(i+1))plt.tight_layout()plt.show()def draw_real_pred(x,y,model):y_pred_whole =  model.predict(x)num_features = x.shape[1]plt.figure(figsize=(15,10))for i in range(num_features):plt.subplot(3,5,i+1)plt.scatter(x[:,i],y,marker='o',color="green", s=15,alpha=0.5)plt.scatter(x[:,i],y_pred_whole,marker='o', color="red", s=15,alpha=0.5)plt.xlabel("Feature {}".format(i+1))plt.ylabel("Label")plt.title("Featurs {} vs Target".format(i+1))plt.tight_layout()plt.show()class MultipleLinear:def __init__(self,learning_rate=0.01, epochs=1000):self.learning_rate = learning_rateself.epochs = epochsself. theta = Noneself.cost_history = Nonedef fit(self,X,y):X = np.hstack( ( np.ones((X.shape[0],1)), X ) )m,n = X.shapeself.theta = np.zeros(n)self.cost_history = []for epoch in range(self.epochs):y_pred = X @ self.thetagradient = X.T @ (y_pred - y)self.theta -= self.learning_rate * gradient * (1/m)cost = self.compute_cost(X,y)self.cost_history.append(cost)if epoch % 100 == 99:print(f"Epoch {epoch} cost: {cost}")print("Training complete")print ( self.theta )def predict(self,X):m,n = X.shapeX = np.hstack( (np.ones((m,1)), X) )return  X @ self.thetadef compute_cost(self,X,y):m = X.shape[0]y_pred = X @ self.thetasq_errors = (y_pred - y)**2cost = 1 / (2 * m) * np.sum(sq_errors)return costcurrent_dir = os.getcwd()
path = current_dir + '\\' + "Boston.csv"
house = pd.read_csv(path)y = house['MEDV']                           #  (506,)
X = house.drop(['MEDV'], axis = 1)   #  (506,13)
X=np.array(X)
y=np.array(y)draw_PairwiseScatter(X,y)from sklearn.model_selection import train_test_split
x_train, x_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
# (404, 13) (102, 13) (404,) (102,)from sklearn.preprocessing import StandardScaler
scaler = StandardScaler()
x_train_scaled = scaler.fit_transform(x_train)  # (404,13)
x_test_scaled = scaler.transform(x_test)        # (102,13)model = MultipleLinear()
model.fit(x_train_scaled, y_train)y_pred = model.predict(x_test_scaled)from sklearn.metrics import  r2_score
r2 = r2_score(y_test,y_pred)
print("r2 = ",r2)       # r2 =  0.6543244875135051draw_real_pred(X,y,model)

这篇关于NumPy实现线性回归的文章就介绍到这儿,希望我们推荐的文章对编程师们有所帮助!



http://www.chinasem.cn/article/1127533

相关文章

hdu1043(八数码问题,广搜 + hash(实现状态压缩) )

利用康拓展开将一个排列映射成一个自然数,然后就变成了普通的广搜题。 #include<iostream>#include<algorithm>#include<string>#include<stack>#include<queue>#include<map>#include<stdio.h>#include<stdlib.h>#include<ctype.h>#inclu

【C++】_list常用方法解析及模拟实现

相信自己的力量,只要对自己始终保持信心,尽自己最大努力去完成任何事,就算事情最终结果是失败了,努力了也不留遗憾。💓💓💓 目录   ✨说在前面 🍋知识点一:什么是list? •🌰1.list的定义 •🌰2.list的基本特性 •🌰3.常用接口介绍 🍋知识点二:list常用接口 •🌰1.默认成员函数 🔥构造函数(⭐) 🔥析构函数 •🌰2.list对象

【Prometheus】PromQL向量匹配实现不同标签的向量数据进行运算

✨✨ 欢迎大家来到景天科技苑✨✨ 🎈🎈 养成好习惯,先赞后看哦~🎈🎈 🏆 作者简介:景天科技苑 🏆《头衔》:大厂架构师,华为云开发者社区专家博主,阿里云开发者社区专家博主,CSDN全栈领域优质创作者,掘金优秀博主,51CTO博客专家等。 🏆《博客》:Python全栈,前后端开发,小程序开发,人工智能,js逆向,App逆向,网络系统安全,数据分析,Django,fastapi

让树莓派智能语音助手实现定时提醒功能

最初的时候是想直接在rasa 的chatbot上实现,因为rasa本身是带有remindschedule模块的。不过经过一番折腾后,忽然发现,chatbot上实现的定时,语音助手不一定会有响应。因为,我目前语音助手的代码设置了长时间无应答会结束对话,这样一来,chatbot定时提醒的触发就不会被语音助手获悉。那怎么让语音助手也具有定时提醒功能呢? 我最后选择的方法是用threading.Time

Android实现任意版本设置默认的锁屏壁纸和桌面壁纸(两张壁纸可不一致)

客户有些需求需要设置默认壁纸和锁屏壁纸  在默认情况下 这两个壁纸是相同的  如果需要默认的锁屏壁纸和桌面壁纸不一样 需要额外修改 Android13实现 替换默认桌面壁纸: 将图片文件替换frameworks/base/core/res/res/drawable-nodpi/default_wallpaper.*  (注意不能是bmp格式) 替换默认锁屏壁纸: 将图片资源放入vendo

C#实战|大乐透选号器[6]:实现实时显示已选择的红蓝球数量

哈喽,你好啊,我是雷工。 关于大乐透选号器在前面已经记录了5篇笔记,这是第6篇; 接下来实现实时显示当前选中红球数量,蓝球数量; 以下为练习笔记。 01 效果演示 当选择和取消选择红球或蓝球时,在对应的位置显示实时已选择的红球、蓝球的数量; 02 标签名称 分别设置Label标签名称为:lblRedCount、lblBlueCount

Kubernetes PodSecurityPolicy:PSP能实现的5种主要安全策略

Kubernetes PodSecurityPolicy:PSP能实现的5种主要安全策略 1. 特权模式限制2. 宿主机资源隔离3. 用户和组管理4. 权限提升控制5. SELinux配置 💖The Begin💖点点关注,收藏不迷路💖 Kubernetes的PodSecurityPolicy(PSP)是一个关键的安全特性,它在Pod创建之前实施安全策略,确保P

工厂ERP管理系统实现源码(JAVA)

工厂进销存管理系统是一个集采购管理、仓库管理、生产管理和销售管理于一体的综合解决方案。该系统旨在帮助企业优化流程、提高效率、降低成本,并实时掌握各环节的运营状况。 在采购管理方面,系统能够处理采购订单、供应商管理和采购入库等流程,确保采购过程的透明和高效。仓库管理方面,实现库存的精准管理,包括入库、出库、盘点等操作,确保库存数据的准确性和实时性。 生产管理模块则涵盖了生产计划制定、物料需求计划、

C++——stack、queue的实现及deque的介绍

目录 1.stack与queue的实现 1.1stack的实现  1.2 queue的实现 2.重温vector、list、stack、queue的介绍 2.1 STL标准库中stack和queue的底层结构  3.deque的简单介绍 3.1为什么选择deque作为stack和queue的底层默认容器  3.2 STL中对stack与queue的模拟实现 ①stack模拟实现

基于51单片机的自动转向修复系统的设计与实现

文章目录 前言资料获取设计介绍功能介绍设计清单具体实现截图参考文献设计获取 前言 💗博主介绍:✌全网粉丝10W+,CSDN特邀作者、博客专家、CSDN新星计划导师,一名热衷于单片机技术探索与分享的博主、专注于 精通51/STM32/MSP430/AVR等单片机设计 主要对象是咱们电子相关专业的大学生,希望您们都共创辉煌!✌💗 👇🏻 精彩专栏 推荐订阅👇🏻 单片机