5、梯度下降法,牛顿法,高斯-牛顿迭代法

2024-08-26 23:32

本文主要是介绍5、梯度下降法,牛顿法,高斯-牛顿迭代法,希望对大家解决编程问题提供一定的参考价值,需要的开发者们随着小编来一起学习吧!

1、梯度下降

 

 

2、牛顿法 

 

 

 

 3、高斯-牛顿迭代法

 

 

4、代码部分

1.梯度下降法代码

批量梯度下降法c++代码:

/*
需要参数为theta:theta0,theta1
目标函数:y=theta0*x0+theta1*x1;
*/
#include <iostream>
using namespace std;int main()
{float matrix[4][2] = { { 1, 1 }, { 2, 1 }, { 2, 2 }, { 3, 4 } };float result[4] = {5,6.99,12.02,18};float theta[2] = {0,0};float loss = 10.0;for (int i = 0; i < 10000 && loss>0.0000001; ++i){float ErrorSum = 0.0;float cost[2] = { 0.0, 0.0 };for (int j = 0; j <4; ++j){float h = 0.0;for (int k = 0; k < 2; ++k){h += matrix[j][k] * theta[k];}ErrorSum = result[j] - h;for (int k = 0; k < 2; ++k){cost[k] = ErrorSum*matrix[j][k];}}for (int k = 0; k < 2; ++k){theta[k] = theta[k] + 0.01*cost[k] / 4;}cout << "theta[0]=" << theta[0] << "\n" << "theta[1]=" << theta[1] << endl;loss = 0.0;for (int j = 0; j < 4; ++j){float h2 = 0.0;for (int k = 0; k < 2; ++k){h2 += matrix[j][k] * theta[k];}loss += (h2 - result[j])*(h2 - result[j]);}cout << "loss=" << loss << endl;}return 0;
}

 2、随机梯度下降法C++代码:

/*
需要参数为theta:theta0,theta1
目标函数:y=theta0*x0+theta1*x1;
*/
#include <iostream>
using namespace std;int main()
{float matrix[4][2] = { { 1, 1 }, { 2, 1 }, { 2, 2 }, { 3, 4 } };float result[4] = {5,6.99,12.02,18};float theta[2] = {0,0};float loss = 10.0;for (int i = 0; i<1000 && loss>0.00001; ++i){float ErrorSum = 0.0;int j = i % 4;float h = 0.0;for (int k = 0; k<2; ++k){h += matrix[j][k] * theta[k];}ErrorSum = result[j] - h;for (int k = 0; k<2; ++k){theta[k] = theta[k] + 0.001*(ErrorSum)*matrix[j][k];}cout << "theta[0]=" << theta[0] << "\n" << "theta[1]=" << theta[1] << endl;loss = 0.0;for (int j = 0; j<4; ++j){float sum = 0.0;for (int k = 0; k<2; ++k){sum += matrix[j][k] * theta[k];}loss += (sum - result[j])*(sum - result[j]);}cout << "loss=" << loss << endl;}return 0;
}

3、牛顿法代码

实例,求解二元方程:

x*x-2*x-y+0.5=0;  (1)

x*x+4*y*y-4=0;  (2)

#include<iostream>
#include<cmath>
#define N 2                     // 非线性方程组中方程个数
#define Epsilon 0.0001          // 差向量1范数的上限
#define Max 1000            //最大迭代次数using namespace std;const int N2 = 2 * N;
void func_y(float xx[N], float yy[N]);					//计算向量函数的因变量向量yy[N]
void func_y_jacobian(float xx[N], float yy[N][N]);		// 计算雅克比矩阵yy[N][N]
void inv_jacobian(float yy[N][N], float inv[N][N]); //计算雅克比矩阵的逆矩阵inv
void NewtonFunc(float x0[N], float inv[N][N], float y0[N], float x1[N]);   //由近似解向量 x0 计算近似解向量 x1int main()
{float x0[N] = { 2.0, 0.5 }, y0[N], jacobian[N][N], invjacobian[N][N], x1[N], errornorm;//再次X0初始值满足方程1,不满足也可int i, j, iter = 0;cout << "初始近似解向量:" << endl;for (i = 0; i < N; i++){cout << x0[i] << " ";}cout << endl;cout << endl;while (iter<Max){iter = iter + 1;cout << "第 " << iter << " 次迭代" << endl;   func_y(x0, y0);                             //计算向量函数的因变量向量func_y_jacobian(x0, jacobian);              //计算雅克比矩阵inv_jacobian(jacobian, invjacobian);        //计算雅克比矩阵的逆矩阵NewtonFunc(x0, invjacobian, y0, x1);        //由近似解向量 x0 计算近似解向量 x1errornorm = 0;for (i = 0; i<N; i++)errornorm = errornorm + fabs(x1[i] - x0[i]);if (errornorm<Epsilon)break;for (i = 0; i<N; i++)x0[i] = x1[i];}return 0;
}void func_y(float xx[N], float yy[N])//求函数的因变量向量
{float x, y;int i;x = xx[0];y = xx[1];yy[0] = x*x-2*x-y+0.5;yy[1] = x*x+4*y*y-4;cout << "函数的因变量向量:" << endl;for (i = 0; i<N; i++)cout << yy[i] << "  ";cout << endl;
}void func_y_jacobian(float xx[N], float yy[N][N])	//计算函数雅克比的值
{float x, y;int i, j;x = xx[0];y = xx[1];//yy[][]分别对x,y求导,组成雅克比矩阵yy[0][0] = 2*x-2;yy[0][1] = -1;yy[1][0] = 2*x;yy[1][1] = 8*y;cout << "雅克比矩阵:" << endl;for (i = 0; i<N; i++){for (j = 0; j < N; j++){cout << yy[i][j] << " ";}cout << endl;}cout << endl;
}void inv_jacobian(float yy[N][N], float inv[N][N])//雅克比逆矩阵
{float aug[N][N2], L;int i, j, k;cout << "计算雅克比矩阵的逆矩阵:" << endl;for (i = 0; i<N; i++){for (j = 0; j < N; j++){aug[i][j] = yy[i][j];}for (j = N; j < N2; j++){if (j == i + N) aug[i][j] = 1;else  aug[i][j] = 0;}}for (i = 0; i<N; i++){for (j = 0; j < N2; j++){cout << aug[i][j] << " ";}cout << endl;}cout << endl;for (i = 0; i<N; i++){for (k = i + 1; k<N; k++){L = -aug[k][i] / aug[i][i];for (j = i; j < N2; j++){aug[k][j] = aug[k][j] + L*aug[i][j];}}}for (i = 0; i<N; i++){for (j = 0; j<N2; j++){cout << aug[i][j] << " ";}cout << endl;}cout << endl;for (i = N - 1; i>0; i--){for (k = i - 1; k >= 0; k--){L = -aug[k][i] / aug[i][i];for (j = N2 - 1; j >= 0; j--){aug[k][j] = aug[k][j] + L*aug[i][j];}}}for (i = 0; i<N; i++){for (j = 0; j < N2; j++){cout << aug[i][j] << " ";}cout << endl;}cout << endl;for (i = N - 1; i >= 0; i--){for (j = N2 - 1; j >= 0; j--){aug[i][j] = aug[i][j] / aug[i][i];}}for (i = 0; i<N; i++){for (j = 0; j < N2; j++){cout << aug[i][j] << " ";}cout << endl;for (j = N; j < N2; j++){inv[i][j - N] = aug[i][j];}}cout << endl;cout << "雅克比矩阵的逆矩阵: " << endl;for (i = 0; i<N; i++){for (j = 0; j < N; j++){cout << inv[i][j] << " ";}cout << endl;}cout << endl;
}void NewtonFunc(float x0[N], float inv[N][N], float y0[N], float x1[N])
{int i, j;float sum = 0;for (i = 0; i<N; i++){sum = 0;for (j = 0; j < N; j++){sum = sum + inv[i][j] * y0[j];}x1[i] = x0[i] - sum;}cout << "近似解向量:" << endl;for (i = 0; i < N; i++){cout << x1[i] << "  ";}cout << endl;
}

4、高斯-牛顿迭代法代码

#include <cstdio>  
#include <vector>  
#include <opencv2/core/core.hpp>  using namespace std;
using namespace cv;const double DERIV_STEP = 1e-5;
const int MAX_ITER = 100;void GaussNewton(double(*Func)(const Mat &input, const Mat params), const Mat &inputs, const Mat &outputs, Mat params);double Deriv(double(*Func)(const Mat &input, const Mat params),const Mat &input, const Mat params, int n);// The user defines their function here  
double Func(const Mat &input, const Mat params);int main()
{// For this demo we're going to try and fit to the function  // F = A*exp(t*B), There are 2 parameters: A B  int num_params = 2;// Generate random data using these parameters  int total_data = 8;Mat inputs(total_data, 1, CV_64F);Mat outputs(total_data, 1, CV_64F);//load observation data  for (int i = 0; i < total_data; i++) {inputs.at<double>(i, 0) = i + 1;  //load year  }//load America population  outputs.at<double>(0, 0) = 8.3;outputs.at<double>(1, 0) = 11.0;outputs.at<double>(2, 0) = 14.7;outputs.at<double>(3, 0) = 19.7;outputs.at<double>(4, 0) = 26.7;outputs.at<double>(5, 0) = 35.2;outputs.at<double>(6, 0) = 44.4;outputs.at<double>(7, 0) = 55.9;// Guess the parameters, it should be close to the true value, else it can fail for very sensitive functions!  Mat params(num_params, 1, CV_64F);//init guess  params.at<double>(0, 0) = 6;params.at<double>(1, 0) = 0.3;GaussNewton(Func, inputs, outputs, params);printf("Parameters from GaussNewton: %f %f\n", params.at<double>(0, 0), params.at<double>(1, 0));return 0;
}double Func(const Mat &input, const Mat params)
{// Assumes input is a single row matrix  // Assumes params is a column matrix  double A = params.at<double>(0, 0);double B = params.at<double>(1, 0);double x = input.at<double>(0, 0);return A*exp(x*B);
}//calc the n-th params' partial derivation , the params are our  final target  
double Deriv(double(*Func)(const Mat &input, const Mat params), const Mat &input, const Mat params, int n)
{// Assumes input is a single row matrix  // Returns the derivative of the nth parameter  Mat params1 = params.clone();Mat params2 = params.clone();// Use central difference  to get derivative  params1.at<double>(n, 0) -= DERIV_STEP;params2.at<double>(n, 0) += DERIV_STEP;double p1 = Func(input, params1);double p2 = Func(input, params2);double d = (p2 - p1) / (2 * DERIV_STEP);return d;
}void GaussNewton(double(*Func)(const Mat &input, const Mat params),const Mat &inputs, const Mat &outputs, Mat params)
{int m = inputs.rows;int n = inputs.cols;int num_params = params.rows;Mat r(m, 1, CV_64F); // residual matrix  Mat Jf(m, num_params, CV_64F); // Jacobian of Func()  Mat input(1, n, CV_64F); // single row input  double last_mse = 0;for (int i = 0; i < MAX_ITER; i++) {double mse = 0;for (int j = 0; j < m; j++) {for (int k = 0; k < n; k++) {//copy Independent variable vector, the year  input.at<double>(0, k) = inputs.at<double>(j, k);}r.at<double>(j, 0) = outputs.at<double>(j, 0) - Func(input, params);//diff between estimate and observation population  mse += r.at<double>(j, 0)*r.at<double>(j, 0);for (int k = 0; k < num_params; k++) {Jf.at<double>(j, k) = Deriv(Func, input, params, k);}}mse /= m;// The difference in mse is very small, so quit  if (fabs(mse - last_mse) < 1e-8) {break;}Mat delta = ((Jf.t()*Jf)).inv() * Jf.t()*r;params += delta;//printf("%d: mse=%f\n", i, mse);  printf("%d %f\n", i, mse);last_mse = mse;}
}

#include <cstdio>  
#include <vector>  
#include <opencv2/core/core.hpp>  using namespace std;
using namespace cv;const double DERIV_STEP = 1e-5;
const int MAX_ITER = 100;void GaussNewton(double(*Func)(const Mat &input, const Mat params), const Mat &inputs, const Mat &outputs, Mat params);double Deriv(double(*Func)(const Mat &input, const Mat params),  const Mat &input, const Mat params, int n);double Func(const Mat &input, const Mat params);int main()
{// For this demo we're going to try and fit to the function  // F = A*sin(Bx) + C*cos(Dx),There are 4 parameters: A, B, C, D  int num_params = 4;// Generate random data using these parameters  int total_data = 100;double A = 5;double B = 1;double C = 10;double D = 2;Mat inputs(total_data, 1, CV_64F);Mat outputs(total_data, 1, CV_64F);for (int i = 0; i < total_data; i++) {double x = -10.0 + 20.0* rand() / (1.0 + RAND_MAX); // random between [-10 and 10]  double y = A*sin(B*x) + C*cos(D*x);// Add some noise  // y += -1.0 + 2.0*rand() / (1.0 + RAND_MAX);  inputs.at<double>(i, 0) = x;outputs.at<double>(i, 0) = y;}// Guess the parameters, it should be close to the true value, else it can fail for very sensitive functions!  Mat params(num_params, 1, CV_64F);params.at<double>(0, 0) = 1;params.at<double>(1, 0) = 1;params.at<double>(2, 0) = 8; // changing to 1 will cause it not to find the solution, too far away  params.at<double>(3, 0) = 1;GaussNewton(Func, inputs, outputs, params);printf("True parameters: %f %f %f %f\n", A, B, C, D);printf("Parameters from GaussNewton: %f %f %f %f\n", params.at<double>(0, 0), params.at<double>(1, 0),params.at<double>(2, 0), params.at<double>(3, 0));return 0;
}double Func(const Mat &input, const Mat params)
{// Assumes input is a single row matrix  // Assumes params is a column matrix  double A = params.at<double>(0, 0);double B = params.at<double>(1, 0);double C = params.at<double>(2, 0);double D = params.at<double>(3, 0);double x = input.at<double>(0, 0);return A*sin(B*x) + C*cos(D*x);
}double Deriv(double(*Func)(const Mat &input, const Mat params), const Mat &input, const Mat params, int n)
{// Assumes input is a single row matrix  // Returns the derivative of the nth parameter  Mat params1 = params.clone();Mat params2 = params.clone();// Use central difference  to get derivative  params1.at<double>(n, 0) -= DERIV_STEP;params2.at<double>(n, 0) += DERIV_STEP;double p1 = Func(input, params1);double p2 = Func(input, params2);double d = (p2 - p1) / (2 * DERIV_STEP);return d;
}void GaussNewton(double(*Func)(const Mat &input, const Mat params),const Mat &inputs, const Mat &outputs, Mat params)
{int m = inputs.rows;int n = inputs.cols;int num_params = params.rows;Mat r(m, 1, CV_64F); // residual matrix  Mat Jf(m, num_params, CV_64F); // Jacobian of Func()  Mat input(1, n, CV_64F); // single row input  double last_mse = 0;for (int i = 0; i < MAX_ITER; i++) {double mse = 0;for (int j = 0; j < m; j++) {for (int k = 0; k < n; k++) {input.at<double>(0, k) = inputs.at<double>(j, k);}r.at<double>(j, 0) = outputs.at<double>(j, 0) - Func(input, params);mse += r.at<double>(j, 0)*r.at<double>(j, 0);for (int k = 0; k < num_params; k++) {Jf.at<double>(j, k) = Deriv(Func, input, params, k);}}mse /= m;// The difference in mse is very small, so quit  if (fabs(mse - last_mse) < 1e-8) {break;}Mat delta = ((Jf.t()*Jf)).inv() * Jf.t()*r;params += delta; printf("%f\n", mse);last_mse = mse;}
}

 梯度下降法,牛顿法,高斯-牛顿迭代法,附代码实现_Naruto_Q的博客-CSDN博客_梯度下降 高斯

这篇关于5、梯度下降法,牛顿法,高斯-牛顿迭代法的文章就介绍到这儿,希望我们推荐的文章对编程师们有所帮助!



http://www.chinasem.cn/article/1109994

相关文章

【机器学习】高斯过程的基本概念和应用领域以及在python中的实例

引言 高斯过程(Gaussian Process,简称GP)是一种概率模型,用于描述一组随机变量的联合概率分布,其中任何一个有限维度的子集都具有高斯分布 文章目录 引言一、高斯过程1.1 基本定义1.1.1 随机过程1.1.2 高斯分布 1.2 高斯过程的特性1.2.1 联合高斯性1.2.2 均值函数1.2.3 协方差函数(或核函数) 1.3 核函数1.4 高斯过程回归(Gauss

【机器学习】高斯网络的基本概念和应用领域

引言 高斯网络(Gaussian Network)通常指的是一个概率图模型,其中所有的随机变量(或节点)都遵循高斯分布 文章目录 引言一、高斯网络(Gaussian Network)1.1 高斯过程(Gaussian Process)1.2 高斯混合模型(Gaussian Mixture Model)1.3 应用1.4 总结 二、高斯网络的应用2.1 机器学习2.2 统计学2.3

✨机器学习笔记(二)—— 线性回归、代价函数、梯度下降

1️⃣线性回归(linear regression) f w , b ( x ) = w x + b f_{w,b}(x) = wx + b fw,b​(x)=wx+b 🎈A linear regression model predicting house prices: 如图是机器学习通过监督学习运用线性回归模型来预测房价的例子,当房屋大小为1250 f e e t 2 feet^

AI学习指南深度学习篇-带动量的随机梯度下降法的基本原理

AI学习指南深度学习篇——带动量的随机梯度下降法的基本原理 引言 在深度学习中,优化算法被广泛应用于训练神经网络模型。随机梯度下降法(SGD)是最常用的优化算法之一,但单独使用SGD在收敛速度和稳定性方面存在一些问题。为了应对这些挑战,动量法应运而生。本文将详细介绍动量法的原理,包括动量的概念、指数加权移动平均、参数更新等内容,最后通过实际示例展示动量如何帮助SGD在参数更新过程中平稳地前进。

AI学习指南深度学习篇-带动量的随机梯度下降法简介

AI学习指南深度学习篇 - 带动量的随机梯度下降法简介 引言 在深度学习的广阔领域中,优化算法扮演着至关重要的角色。它们不仅决定了模型训练的效率,还直接影响到模型的最终表现之一。随着神经网络模型的不断深化和复杂化,传统的优化算法在许多领域逐渐暴露出其不足之处。带动量的随机梯度下降法(Momentum SGD)应运而生,并被广泛应用于各类深度学习模型中。 在本篇文章中,我们将深入探讨带动量的随

高斯平面直角坐标讲解,以及地理坐标转换高斯平面直角坐标

高斯平面直角坐标系(Gauss-Krüger 坐标系)是基于 高斯-克吕格投影 的一种常见的平面坐标系统,主要用于地理信息系统 (GIS)、测绘和工程等领域。该坐标系将地球表面的经纬度(地理坐标)通过一种投影方式转换为平面直角坐标,以便在二维平面中进行距离、面积和角度的计算。 一 投影原理 高斯平面直角坐标系使用的是 高斯-克吕格投影(Gauss-Krüger Projection),这是 横

什么是GPT-3的自回归架构?为什么GPT-3无需梯度更新和微调

文章目录 知识回顾GPT-3的自回归架构何为自回归架构为什么架构会影响任务表现自回归架构的局限性与双向模型的对比小结 为何无需梯度更新和微调为什么不需要怎么做到不需要 🍃作者介绍:双非本科大四网络工程专业在读,阿里云专家博主,专注于Java领域学习,擅长web应用开发,目前开始人工智能领域相关知识的学习 🦅个人主页:@逐梦苍穹 📕所属专栏:人工智能 🌻gitee地址:x

分布式训练同步梯度出现形状不一致的解决方案

1、问题描述           为了加快大模型的训练速度,采用了分布式训练策略,基于MultiWorkerServerStrategy模式,集群之间采用Ring—Reduce的通信机制,不同节点在同步梯度会借助collective_ops.all_gather方法将梯度进行汇聚收集,汇聚过程出现了: allreduce_1/CollectiveGather_1 Inconsitent out

【机器学习】梯度提升和随机森林的概念、两者在python中的实例以及梯度提升和随机森林的区别

引言 梯度提升(Gradient Boosting)是一种强大的机器学习技术,它通过迭代地训练决策树来最小化损失函数,以提高模型的预测性能 随机森林(Random Forest)是一种基于树的集成学习算法,它通过组合多个决策树来提高预测的准确性和稳定性 文章目录 引言一、梯度提升1.1 基本原理1.1.1 初始化模型1.1.2 迭代优化1.1.3 梯度计算1.1.4模型更新 1.2

jmeter 梯度测试 如何查看TPS、RT指标

TPS= 服务器处理请求总数/花费的总时间 149371 (请求量)÷ 113(1分53秒)=1321/秒 跟汇总报告的吞吐量差不多,可以认为吞吐量=TPS 平均值,中位数,最大值,最小值的单位都是毫秒ms 下载插件梯度插件 https://jmeter-plugins.org/install/Install/ 插件管理器的jar包下载好以后,我们需要把jar包放在lib\ext目录下边