17.3.9 caffe自定义一个Layer会遇到的一些理解方面的问题

xiaoxiao2021-03-25 109

对于一个新手而言，在想要自己定义一个层的时候，常常会遇到许多理解方面的障碍。首先，在了解具体的Layer的hpp和cpp文件的内容之前，应该首先了解一下其在该Layer在/src/caffe/proto/caffe.proto文件中的参数定义。

message CenterLossParameter { optional uint32 num_output = 1; // The number of outputs for the layer optional FillerParameter center_filler = 2; // The filler for the centers // The first axis to be lumped into a single inner product computation; // all preceding axes are retained in the output. // May be negative to index from the end (e.g., -1 for the last axis). optional int32 axis = 3 [default = 1]; }

从上面的代码中可以看出，在centerloss层中包含三个参数： num_output，center_filler和axis参数。num_output表示的输出的个数。center_filler表示的是用来存放中心点的filler。 axis表示选取哪个维度信息。一个常见的blob数据包括（N，C，H，W）四个维度，可以通过设定axis来选取哪一个维度。如果输入图像的维度是(N, C, H, W)，则选取C * H * W 。在了解了参数的定义之后，应该开始对hpp文件进行阅读，先了解该Layer包括哪些功能。hpp文件一般出现在/include/caffe/layers/里面。一些头文件的包含暂且不考虑。

namespace caffe { template <typename Dtype> class CenterLossLayer : public LossLayer<Dtype> { public: explicit CenterLossLayer(const LayerParameter& param) : LossLayer<Dtype>(param) {} virtual void LayerSetUp(const vector<Blob<Dtype>*>& bottom, const vector<Blob<Dtype>*>& top); virtual void Reshape(const vector<Blob<Dtype>*>& bottom, const vector<Blob<Dtype>*>& top); virtual inline const char* type() const { return "CenterLoss"; } virtual inline int ExactNumBottomBlobs() const { return 2; } virtual inline int ExactNumTopBlobs() const { return -1; } protected: virtual void Forward_cpu(const vector<Blob<Dtype>*>& bottom, const vector<Blob<Dtype>*>& top); virtual void Forward_gpu(const vector<Blob<Dtype>*>& bottom, const vector<Blob<Dtype>*>& top); virtual void Backward_cpu(const vector<Blob<Dtype>*>& top, const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom); virtual void Backward_gpu(const vector<Blob<Dtype>*>& top, const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom); int M_; int K_; int N_; Blob<Dtype> distance_; Blob<Dtype> variation_sum_; }; }

从代码中可以看出，对于一个loss层而言，应该包括Setup（），Forward_cpu（），Forward_gpu（），Backward_cpu（）， Backward_gpu（）。在声明内还包括几个inline函数，第一个是返回层的类型的字符串。后面两个分别表示输入和输出的个数。centerloss表示有2个输入，无输出。除了上面的几个，centerloss层还定义了Reshape（）和几个变量。几个变量通过对cpp文件的阅读了解到其对应的含义：矩阵乘法参数(M, K) * (K, N) = (M, N) M_ : 表示样本的个数 K_ ：表示单个特征输入的长度 N_ : 表示输出神经元的个数 distance_ ：表示误差 variation_sum_ : 表示变量的总数

在理解了centerloss层主要包括的功能后就是其具体函数的功能。对应的cpp文件在/src/caffe/layers/中。包含的头文件暂时不考虑。

namespace caffe { template <typename Dtype> void CenterLossLayer<Dtype>::LayerSetUp(const vector<Blob<Dtype>*>& bottom, const vector<Blob<Dtype>*>& top) { const int num_output = this->layer_param_.center_loss_param().num_output(); N_ = num_output; const int axis = bottom[0]->CanonicalAxisIndex( this->layer_param_.center_loss_param().axis()); // Dimensions starting from "axis" are "flattened" into a single // length K_ vector. For example, if bottom[0]'s shape is (N, C, H, W), // and axis == 1, N inner products with dimension CHW are performed. K_ = bottom[0]->count(axis); // Check if we need to set up the weights if (this->blobs_.size() > 0) { LOG(INFO) << "Skipping parameter initialization"; } else { this->blobs_.resize(1); // Intialize the weight vector<int> center_shape(2); center_shape[0] = N_; center_shape[1] = K_; this->blobs_[0].reset(new Blob<Dtype>(center_shape)); // fill the weights shared_ptr<Filler<Dtype> > center_filler(GetFiller<Dtype>( this->layer_param_.center_loss_param().center_filler())); center_filler->Fill(this->blobs_[0].get()); } // parameter initialization this->param_propagate_down_.resize(this->blobs_.size(), true); } template <typename Dtype> void CenterLossLayer<Dtype>::Reshape(const vector<Blob<Dtype>*>& bottom, const vector<Blob<Dtype>*>& top) { CHECK_EQ(bottom[1]->channels(), 1); CHECK_EQ(bottom[1]->height(), 1); CHECK_EQ(bottom[1]->width(), 1); M_ = bottom[0]->num(); // The top shape will be the bottom shape with the flattened axes dropped, // and replaced by a single axis with dimension num_output (N_). LossLayer<Dtype>::Reshape(bottom, top); distance_.ReshapeLike(*bottom[0]); variation_sum_.ReshapeLike(*this->blobs_[0]); } template <typename Dtype> void CenterLossLayer<Dtype>::Forward_cpu(const vector<Blob<Dtype>*>& bottom, const vector<Blob<Dtype>*>& top) { const Dtype* bottom_data = bottom[0]->cpu_data(); const Dtype* label = bottom[1]->cpu_data(); const Dtype* center = this->blobs_[0]->cpu_data(); Dtype* distance_data = distance_.mutable_cpu_data(); // the i-th distance_data for (int i = 0; i < M_; i++) { const int label_value = static_cast<int>(label[i]); // D(i,:) = X(i,:) - C(y(i),:) caffe_sub(K_, bottom_data + i * K_, center + label_value * K_, distance_data + i * K_); } Dtype dot = caffe_cpu_dot(M_ * K_, distance_.cpu_data(), distance_.cpu_data()); Dtype loss = dot / M_ / Dtype(2); top[0]->mutable_cpu_data()[0] = loss; } template <typename Dtype> void CenterLossLayer<Dtype>::Backward_cpu(const vector<Blob<Dtype>*>& top, const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom) { // Gradient with respect to centers if (this->param_propagate_down_[0]) { const Dtype* label = bottom[1]->cpu_data(); Dtype* center_diff = this->blobs_[0]->mutable_cpu_diff(); Dtype* variation_sum_data = variation_sum_.mutable_cpu_data(); const Dtype* distance_data = distance_.cpu_data(); // \sum_{y_i==j} caffe_set(N_ * K_, (Dtype)0., variation_sum_.mutable_cpu_data()); for (int n = 0; n < N_; n++) { int count = 0; for (int m = 0; m < M_; m++) { const int label_value = static_cast<int>(label[m]); if (label_value == n) { count++; caffe_sub(K_, variation_sum_data + n * K_, distance_data + m * K_, variation_sum_data + n * K_); } } caffe_axpy(K_, (Dtype)1./(count + (Dtype)1.), variation_sum_data + n * K_, center_diff + n * K_); } } // Gradient with respect to bottom data if (propagate_down[0]) { caffe_copy(M_ * K_, distance_.cpu_data(), bottom[0]->mutable_cpu_diff()); caffe_scal(M_ * K_, top[0]->cpu_diff()[0] / M_, bottom[0]->mutable_cpu_diff()); } if (propagate_down[1]) { LOG(FATAL) << this->type() << " Layer cannot backpropagate to label inputs."; } } #ifdef CPU_ONLY STUB_GPU(CenterLossLayer); #endif INSTANTIATE_CLASS(CenterLossLayer); REGISTER_LAYER_CLASS(CenterLoss); } // namespace caffe

首先对于Setup（）函数：在该函数中主要包括了对centerloss层的参数的获取以及处理。首先N_表示num_output，K_表示axis对应的维度的长度。然后是权重的初始化问题。最后获取center_filler参数的值。紧接着进行参数初始化。在参数初始化之后，就是对前向和后向传播的理解。前向传播比较好些，主要的难点在于后向传播。未完待续！

转载请注明原文地址: https://ju.6miu.com/read-13109.html

技术

最新回复(0)