本文主要是介绍原始caffe添加CenterLoss_layer,希望对大家解决编程问题提供一定的参考价值,需要的开发者们随着小编来一起学习吧!
CenterLoss_layer可以在原分类的基础上(某种程度上)可提升几个点左右(博主测试提升6个点),还不占前向时间,好东西啊!
原理这里不介绍了,网上应有尽有!这里只是介绍如何在caffe中如何添加CenterLoss_layer这样的新层!
第一步:修改caffe.proto以添加消息机制
1、在message LayerParameter{}中添加如下代码:
optional CenterLossParameter center_loss_param = 147;
2、在caffe.proto文末添加
message CenterLossParameter {optional uint32 num_output = 1; // The number of outputs for the layeroptional FillerParameter center_filler = 2; // The filler for the centers// The first axis to be lumped into a single inner product computation;// all preceding axes are retained in the output.// May be negative to index from the end (e.g., -1 for the last axis).optional int32 axis = 3 [default = 1];
}
第二步:添加头文件center_loss_layer.hpp
#ifndef CAFFE_CENTER_LOSS_LAYER_HPP_
#define CAFFE_CENTER_LOSS_LAYER_HPP_#include <vector>#include "caffe/blob.hpp"
#include "caffe/layer.hpp"
#include "caffe/proto/caffe.pb.h"#include "caffe/layers/loss_layer.hpp"namespace caffe {template <typename Dtype>
class CenterLossLayer : public LossLayer<Dtype> {public:explicit CenterLossLayer(const LayerParameter& param): LossLayer<Dtype>(param) {}virtual void LayerSetUp(const vector<Blob<Dtype>*>& bottom,const vector<Blob<Dtype>*>& top);virtual void Reshape(const vector<Blob<Dtype>*>& bottom,const vector<Blob<Dtype>*>& top);virtual inline const char* type() const { return "CenterLoss"; }virtual inline int ExactNumBottomBlobs() const { return 2; }virtual inline int ExactNumTopBlobs() const { return -1; }protected:virtual void Forward_cpu(const vector<Blob<Dtype>*>& bottom,const vector<Blob<Dtype>*>& top);virtual void Forward_gpu(const vector<Blob<Dtype>*>& bottom,const vector<Blob<Dtype>*>& top);virtual void Backward_cpu(const vector<Blob<Dtype>*>& top,const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom);virtual void Backward_gpu(const vector<Blob<Dtype>*>& top,const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom);int M_;int K_;int N_;Blob<Dtype> distance_;Blob<Dtype> variation_sum_;
};} // namespace caffe#endif // CAFFE_CENTER_LOSS_LAYER_HPP_
第三步:添加center_loss_layer.cpp以及center_loss_layer.cu
#include <vector>#include "caffe/filler.hpp"
#include "caffe/layers/center_loss_layer.hpp"
#include "caffe/util/math_functions.hpp"namespace caffe {template <typename Dtype>
void CenterLossLayer<Dtype>::LayerSetUp(const vector<Blob<Dtype>*>& bottom,const vector<Blob<Dtype>*>& top) {const int num_output = this->layer_param_.center_loss_param().num_output(); N_ = num_output;const int axis = bottom[0]->CanonicalAxisIndex(this->layer_param_.center_loss_param().axis());// Dimensions starting from "axis" are "flattened" into a single// length K_ vector. For example, if bottom[0]'s shape is (N, C, H, W),// and axis == 1, N inner products with dimension CHW are performed.K_ = bottom[0]->count(axis);// Check if we need to set up the weightsif (this->blobs_.size() > 0) {LOG(INFO) << "Skipping parameter initialization";} else {this->blobs_.resize(1);// Intialize the weightvector<int> center_shape(2);center_shape[0] = N_;center_shape[1] = K_;this->blobs_[0].reset(new Blob<Dtype>(center_shape));// fill the weightsshared_ptr<Filler<Dtype> > center_filler(GetFiller<Dtype>(this->layer_param_.center_loss_param().center_filler()));center_filler->Fill(this->blobs_[0].get());} // parameter initializationthis->param_propagate_down_.resize(this->blobs_.size(), true);
}template <typename Dtype>
void CenterLossLayer<Dtype>::Reshape(const vector<Blob<Dtype>*>& bottom,const vector<Blob<Dtype>*>& top) {CHECK_EQ(bottom[1]->channels(), 1);CHECK_EQ(bottom[1]->height(), 1);CHECK_EQ(bottom[1]->width(), 1);M_ = bottom[0]->num();// The top shape will be the bottom shape with the flattened axes dropped,// and replaced by a single axis with dimension num_output (N_).LossLayer<Dtype>::Reshape(bottom, top);distance_.ReshapeLike(*bottom[0]);variation_sum_.ReshapeLike(*this->blobs_[0]);
}template <typename Dtype>
void CenterLossLayer<Dtype>::Forward_cpu(const vector<Blob<Dtype>*>& bottom,const vector<Blob<Dtype>*>& top) {const Dtype* bottom_data = bottom[0]->cpu_data();const Dtype* label = bottom[1]->cpu_data();const Dtype* center = this->blobs_[0]->cpu_data();Dtype* distance_data = distance_.mutable_cpu_data();// the i-th distance_datafor (int i = 0; i < M_; i++) {const int label_value = static_cast<int>(label[i]);// D(i,:) = X(i,:) - C(y(i),:)caffe_sub(K_, bottom_data + i * K_, center + label_value * K_, distance_data + i * K_);}Dtype dot = caffe_cpu_dot(M_ * K_, distance_.cpu_data(), distance_.cpu_data());Dtype loss = dot / M_ / Dtype(2);top[0]->mutable_cpu_data()[0] = loss;
}template <typename Dtype>
void CenterLossLayer<Dtype>::Backward_cpu(const vector<Blob<Dtype>*>& top,const vector<bool>& propagate_down,const vector<Blob<Dtype>*>& bottom) {// Gradient with respect to centersif (this->param_propagate_down_[0]) {const Dtype* label = bottom[1]->cpu_data();Dtype* center_diff = this->blobs_[0]->mutable_cpu_diff();Dtype* variation_sum_data = variation_sum_.mutable_cpu_data();const Dtype* distance_data = distance_.cpu_data();// \sum_{y_i==j}caffe_set(N_ * K_, (Dtype)0., variation_sum_.mutable_cpu_data());for (int n = 0; n < N_; n++) {int count = 0;for (int m = 0; m < M_; m++) {const int label_value = static_cast<int>(label[m]);if (label_value == n) {count++;caffe_sub(K_, variation_sum_data + n * K_, distance_data + m * K_, variation_sum_data + n * K_);}}caffe_axpy(K_, (Dtype)1./(count + (Dtype)1.), variation_sum_data + n * K_, center_diff + n * K_);}}// Gradient with respect to bottom data if (propagate_down[0]) {caffe_copy(M_ * K_, distance_.cpu_data(), bottom[0]->mutable_cpu_diff());caffe_scal(M_ * K_, top[0]->cpu_diff()[0] / M_, bottom[0]->mutable_cpu_diff());}if (propagate_down[1]) {LOG(FATAL) << this->type()<< " Layer cannot backpropagate to label inputs.";}
}#ifdef CPU_ONLY
STUB_GPU(CenterLossLayer);
#endifINSTANTIATE_CLASS(CenterLossLayer);
REGISTER_LAYER_CLASS(CenterLoss);} // namespace caffe
#include <vector>#include "caffe/filler.hpp"
#include "caffe/layers/center_loss_layer.hpp"
#include "caffe/util/math_functions.hpp"namespace caffe {template <typename Dtype>
__global__ void Compute_distance_data_gpu(int nthreads, const int K, const Dtype* bottom,const Dtype* label, const Dtype* center, Dtype* distance) {CUDA_KERNEL_LOOP(index, nthreads) {int m = index / K;int k = index % K;const int label_value = static_cast<int>(label[m]);// distance(i) = x(i) - c_{y(i)}distance[index] = bottom[index] - center[label_value * K + k];}
}template <typename Dtype>
__global__ void Compute_center_diff_gpu(int nthreads, const int M, const int K, const Dtype* label, const Dtype* distance, Dtype* variation_sum, Dtype* center_diff) {CUDA_KERNEL_LOOP(index, nthreads) {int count = 0;for (int m = 0; m < M; m++) {const int label_value = static_cast<int>(label[m]);if (label_value == index) {count++;for (int k = 0; k < K; k++) {variation_sum[index * K + k] -= distance[m * K + k];}}}for (int k = 0; k < K; k++) {center_diff[index * K + k] = variation_sum[index * K + k] /(count + (Dtype)1.);}}
}template <typename Dtype>
void CenterLossLayer<Dtype>::Forward_gpu(const vector<Blob<Dtype>*>& bottom,const vector<Blob<Dtype>*>& top) {int nthreads = M_ * K_;Compute_distance_data_gpu<Dtype><<<CAFFE_GET_BLOCKS(nthreads),CAFFE_CUDA_NUM_THREADS>>>(nthreads, K_, bottom[0]->gpu_data(), bottom[1]->gpu_data(),this->blobs_[0]->gpu_data(), distance_.mutable_gpu_data());Dtype dot;caffe_gpu_dot(M_ * K_, distance_.gpu_data(), distance_.gpu_data(), &dot);Dtype loss = dot / M_ / Dtype(2);top[0]->mutable_cpu_data()[0] = loss;
}template <typename Dtype>
void CenterLossLayer<Dtype>::Backward_gpu(const vector<Blob<Dtype>*>& top,const vector<bool>& propagate_down,const vector<Blob<Dtype>*>& bottom) {int nthreads = N_;caffe_gpu_set(N_ * K_, (Dtype)0., variation_sum_.mutable_cpu_data());Compute_center_diff_gpu<Dtype><<<CAFFE_GET_BLOCKS(nthreads),CAFFE_CUDA_NUM_THREADS>>>(nthreads, M_, K_, bottom[1]->gpu_data(), distance_.gpu_data(), variation_sum_.mutable_cpu_data(), this->blobs_[0]->mutable_gpu_diff());if (propagate_down[0]) {caffe_gpu_scale(M_ * K_, top[0]->cpu_diff()[0] / M_, distance_.gpu_data(), bottom[0]->mutable_gpu_diff());}if (propagate_down[1]) {LOG(FATAL) << this->type()<< " Layer cannot backpropagate to label inputs.";}
}INSTANTIATE_LAYER_GPU_FUNCS(CenterLossLayer);} // namespace caffe
第四步:重新编译caffe
任何问题请加唯一QQ2258205918(名称samylee)!
或唯一VX:samylee_csdn
这篇关于原始caffe添加CenterLoss_layer的文章就介绍到这儿,希望我们推荐的文章对编程师们有所帮助!