• outputs=self.activation_function(final_inputs)#设置最终层的输出为最终层的输入经过S函数激活 #反向传播误差 #output layer error is the(target-actual) output_errors=targets-final_outputs#输出误差=目标值-...
#训练集http://www.pjreddie.com/media/files/mnist_train.csv
#测试集http://www.pjreddie.com/media/files/mnist_test.csv
import numpy
#scipy.special for the sigmoid function expit()
import scipy.special
import matplotlib.pyplot

#neural network class definition
class neuralNetwork:
#initialise the neural network
#初始化神经网络
def __init__(self, inputnodes, hiddennodes, outputnodes, learningrate):#四个参数
self.inodes=inputnodes#输入层节点
self.hnodes=hiddennodes#隐藏层节点
self.onodes=outputnodes#输出层节点

self.wih=(numpy.random.rand(self.hnodes,self.inodes)-0.5)#创建输入层与隐藏层的随机权重矩阵，每个值取-0.5到0.5
self.who=(numpy.random.rand(self.onodes,self.hnodes)-0.5)#创建隐藏层与输出层的随机权重矩阵，每个值取-0.5到0.5

#another method 采用正态概率分布采样权重
#self.wih=numpy.random.normal(0.0,pow(self.hnodes,-0.5),(self.hnodes,self.inodes))
#self.who=numpy.random.normal(0.0,pow(self.onodes,-0.5),(self.onodes,self.hnodes))
#learning rate
self.lr=learningrate#设置学习率
#activation function is the sigmoid function
self.activation_function=lambda x: scipy.special.expit(x)#设置激活函数为S函数

pass

#train the neural network
def train(self,input_list,targets_list):
#convert inputs list to 2d array
#将输入输出数据转换为二维数组
inputs=numpy.array(input_list, ndmin=2).T
targets=numpy.array(targets_list, ndmin=2).T

#calculate signals into hidden layer
hidden_inputs=numpy.dot(self.wih,inputs)#设置隐藏层的输入等于输入节点与相应权重矩阵的点乘
#calculate the signals emerging from hidden layer
hidden_outputs=self.activation_function(hidden_inputs)#设置隐藏层的输出为隐藏层的输入经过S函数激活

#calculate signals into final output layer
final_inputs=numpy.dot(self.who,hidden_outputs)#设置最终层的输入为隐藏层的输出与相应权重矩阵的点乘
#calculate the signals emeging from final output layer
final_outputs=self.activation_function(final_inputs)#设置最终层的输出为最终层的输入经过S函数激活

#反向传播误差
#output layer error is the(target-actual)
output_errors=targets-final_outputs#输出误差=目标值-实际值
#hidden layer error is the output_errors,split by weights,recombined at hidden nodes
hidden_errors=numpy.dot(self.who.T, output_errors)#按权重计算隐藏层各节点误差

#update the weights for the links between the hidden and output layers
#更新隐藏层与输出层的权重
self.who+=self.lr*numpy.dot((output_errors*final_outputs*(1.0-final_outputs)),numpy.transpose(hidden_outputs))

#update the weights for the links between the input and hidden layers
#更新输入层与隐藏层的权重
self.wih+=self.lr*numpy.dot((hidden_errors*hidden_outputs*(1.0-hidden_outputs)),numpy.transpose(inputs))

pass

#query the neural network
def query(self,inputs_list):
#self.activation_function=lambda x:scipy.special.expit(x)
#convert inputs list to 2d array
inputs=numpy.array(inputs_list,ndmin=2).T

#calculate signals into hidden layer
hidden_inputs=numpy.dot(self.wih,inputs)
#calculate the signals emerging from hidden layer
hidden_outputs=self.activation_function(hidden_inputs)

#calculate signals into final output layer
final_inputs=numpy.dot(self.who,hidden_outputs)
#calculate the signals emerging from final output layer
final_outputs=self.activation_function(final_inputs)

return final_outputs

#number of input,hidden and output nodes
input_nodes=784
hidden_nodes=200
output_nodes=10

#learning-rate
learning_rate=0.3

#create instance of neural network
n=neuralNetwork(input_nodes, hidden_nodes, output_nodes, learning_rate)

#load the mnist training date CSV file into a list
training_data_file=open("c:/Users/we/Desktop/we/mnist_dataset/mnist_train_100.csv","r")#注意路径
training_data_file.close()

#epochs is the number of times the training data set is used for training
epochs=5

for e in range(epochs):
# go through all records in the training data set
for record in training_data_list:
all_values=record.split(',')
#scale and shift the inputs
inputs=(numpy.asfarray(all_values[1:])/255.0*0.99)+0.01
targets=numpy.zeros(output_nodes)+0.01
targets[int(all_values[0])]=0.99
n.train(inputs,targets)
pass
pass

test_data_file=open("c:/Users/we/Desktop/we/mnist_dataset/mnist_test_10.csv",'r')#最好将数据集放在该程序同一路径下
test_data_file.close()

#test the neural network

#scorecard for how well the network performs,initially empty
scorecard=[]

#go through all the records in the test data set
for record in test_data_list:
#spilt the records by the ',' commas
all_values=record.split(',')
#correct answer is the first value
correct_label=int(all_values[0])
#scale and shift the inputs
inputs=(numpy.asfarray(all_values[1:])/255.0*0.99)+0.01
#query the network
outputs=n.query(inputs)
#the index of the highest value corresponds to the label
label=numpy.argmax(outputs)
#append correct or incorrect to list
if(label==correct_label):
scorecard.append(1)
else:
scorecard.append(0)
pass

pass

#calculate the performance score,the fraction of correct answers
scorecard_array=numpy.asarray(scorecard)
print('performance=',scorecard_array.sum()/scorecard_array.size)


• 反向传播 目标： 快速理解反向传播并推导 分析： 2层神经网络如图1所示，包含一个隐层和输出层，手推反向传播，取一个神经元的传播过程进行推导。 动手之前： 假定某个样本在网络中的一个传播过程如图2所示，隐层...
反向传播
**参考连接：**https://blog.csdn.net/qq_37644877/article/details/105431392?
目标：
快速理解反向传播并推导
分析：
2层神经网络如图1所示，包含一个隐层和输出层，手推反向传播，取一个神经元的传播过程进行推导。

动手之前：
假定某个样本在网络中的一个传播过程如图2所示，隐层激活函数使用sigmoid，输出层不使用非线性激活函数，优化目标是均方误差mse，即损失为：


• 论文：Identity Mappings in Deep Residual ...但是反向传播是为了对权重和偏置进行更新，论文中只是分析了对特征x的梯度。 自己对权重的梯度做了分析。 以一个小的示例表达： 其中分为两类，一是对输出层...
论文：Identity Mappings in Deep Residual Networkshttps://arxiv.org/pdf/1603.05027.pdf

其中对resnet反向传播进行了分析，给出以下公式：

但是反向传播是为了对权重和偏置进行更新，论文中只是分析了对特征x的梯度。

自己对权重的梯度做了分析。

以一个小的示例表达：

其中分为两类，一是对输出层的更新，直接进行求解。

另一个是对中间隐含层的梯度求解，利用论文中给出的公式能够很好的写出。

• 前向运算、反向运算 } loss /= param_.iter_size(); // average the loss across iterations for smoothed reporting UpdateSmoothedLoss(loss, start_iter, average_loss); if (display) { LOG_IF(INFO...
还是以mnist手写字符，lenet5为例。
如上图所示，左边为9个layer层，右边为每层的top blob 的输出（featrue map）的维度。

那么问题来了，训练的参数wij和bi<!--//--><![CDATA[//><!--
w_{ij}和b_i
//--><!]]>存在哪些变量里呢?
1）有需要用BP算法进行训练参数的层(layer)，内部都会有一个成员变量layer::blobs_,其中blobs_[0]存放wij<!--//--><![CDATA[//><!--
w_{ij}
//--><!]]>及Δwij<!--//--><![CDATA[//><!--
\Delta w_{ij}
//--><!]]>，blobs_[1]存放bi及Δbi<!--//--><![CDATA[//><!--
b_i及\Delta b_i
//--><!]]>.
2) 网络层内部成员变量 Net::blobs_内部存放数据及δ<!--//--><![CDATA[//><!--
\delta
//--><!]]>值。

在lenet5网络中，只有两个卷积层（conv1,conv2）和两个全链接层（ip1,ip2）内部有需要训练的参数。因此lenet5网络需要8个blobs_存放训练参数，Net内部变量 net::learnable_params_与此8个blobs_共享内存。且，各层的layer::blobs_变量在layer::setup()中分配内存和初始化，在layer::Backward_cpu()中计算 Δwij和Δbi<!--//--><![CDATA[//><!--
\Delta w_{ij}和\Delta b_i
//--><!]]> ，后续准备编写自己网络的童鞋需要重写此函数。最后调用Net::update()函数，更新wij<!--//--><![CDATA[//><!--
w_{ij}
//--><!]]>。
以全链接层为例：
///ip层 blobs_ 开辟内存及初始化
template <typename Dtype>
void InnerProductLayer<Dtype>::LayerSetUp(const vector<Blob<Dtype>*>& bottom,
const vector<Blob<Dtype>*>& top) {
const int num_output = this->layer_param_.inner_product_param().num_output();
bias_term_ = this->layer_param_.inner_product_param().bias_term();
transpose_ = this->layer_param_.inner_product_param().transpose();
N_ = num_output;
const int axis = bottom[0]->CanonicalAxisIndex(
this->layer_param_.inner_product_param().axis());
// Dimensions starting from "axis" are "flattened" into a single
// length K_ vector. For example, if bottom[0]'s shape is (N, C, H, W),
// and axis == 1, N inner products with dimension CHW are performed.
K_ = bottom[0]->count(axis);
// Check if we need to set up the weights
if (this->blobs_.size() > 0) {
LOG(INFO) << "Skipping parameter initialization";
} else {
if (bias_term_) {
this->blobs_.resize(2);
} else {
this->blobs_.resize(1);
}
// Initialize the weights
vector<int> weight_shape(2);
if (transpose_) {
weight_shape[0] = K_;
weight_shape[1] = N_;
} else {
weight_shape[0] = N_;
weight_shape[1] = K_;
}
this->blobs_[0].reset(new Blob<Dtype>(weight_shape));
// fill the weights
shared_ptr<Filler<Dtype> > weight_filler(GetFiller<Dtype>(   权重w初始化
this->layer_param_.inner_product_param().weight_filler()));
weight_filler->Fill(this->blobs_[0].get());
// If necessary, intiialize and fill the bias term
if (bias_term_) {
vector<int> bias_shape(1, N_);
this->blobs_[1].reset(new Blob<Dtype>(bias_shape)); //偏置初始化
shared_ptr<Filler<Dtype> > bias_filler(GetFiller<Dtype>(
this->layer_param_.inner_product_param().bias_filler()));
bias_filler->Fill(this->blobs_[1].get());
}
}  // parameter initialization
this->param_propagate_down_.resize(this->blobs_.size(), true);
}

权重变化量计算：

template <typename Dtype>
void InnerProductLayer<Dtype>::Backward_cpu(const vector<Blob<Dtype>*>& top,
const vector<bool>& propagate_down,
const vector<Blob<Dtype>*>& bottom) {
if (this->param_propagate_down_[0]) {
const Dtype* top_diff = top[0]->cpu_diff();
const Dtype* bottom_data = bottom[0]->cpu_data();
// Gradient with respect to weight
if (transpose_) {
caffe_cpu_gemm<Dtype>(CblasTrans, CblasNoTrans,
K_, N_, M_,
(Dtype)1., bottom_data, top_diff,
(Dtype)1., this->blobs_[0]->mutable_cpu_diff());
} else {
caffe_cpu_gemm<Dtype>(CblasTrans, CblasNoTrans,
N_, K_, M_,
(Dtype)1., top_diff, bottom_data,
(Dtype)1., this->blobs_[0]->mutable_cpu_diff());  //计算本层参数更新量 △W
}
}
if (bias_term_ && this->param_propagate_down_[1]) {
const Dtype* top_diff = top[0]->cpu_diff();
// Gradient with respect to bias
caffe_cpu_gemv<Dtype>(CblasTrans, M_, N_, (Dtype)1., top_diff,
bias_multiplier_.cpu_data(), (Dtype)1.,
this->blobs_[1]->mutable_cpu_diff());           //计算本层参数更新量 △b
}
if (propagate_down[0]) {
const Dtype* top_diff = top[0]->cpu_diff();
// Gradient with respect to bottom data
if (transpose_) {
caffe_cpu_gemm<Dtype>(CblasNoTrans, CblasTrans,
M_, K_, N_,
(Dtype)1., top_diff, this->blobs_[0]->cpu_data(),
(Dtype)0., bottom[0]->mutable_cpu_diff());
} else {
caffe_cpu_gemm<Dtype>(CblasNoTrans, CblasNoTrans,   /// 计算下一层的deta
M_, K_, N_,
(Dtype)1., top_diff, this->blobs_[0]->cpu_data(),
(Dtype)0., bottom[0]->mutable_cpu_diff());
}
}
}

总的训练流程在Solver::Step（）函数实现中：

template <typename Dtype>
void Solver<Dtype>::Step(int iters) {
const int start_iter = iter_;
const int stop_iter = iter_ + iters;
int average_loss = this->param_.average_loss();
losses_.clear();
smoothed_loss_ = 0;

while (iter_ < stop_iter) {  //以一个batch为一个周期
// zero-init the params
net_->ClearParamDiffs();   /////将Net 的成员变量param_ 中的diff空间初始化为0
if (param_.test_interval() && iter_ % param_.test_interval() == 0
&& (iter_ > 0 || param_.test_initialization())
&& Caffe::root_solver()) {
TestAll();
if (requested_early_exit_) {
// Break out of the while loop because stop was requested while testing.
break;
}
}

for (int i = 0; i < callbacks_.size(); ++i) {
callbacks_[i]->on_start();
}
const bool display = param_.display() && iter_ % param_.display() == 0;
net_->set_debug_info(display && param_.debug_info());
// accumulate the loss and gradient
Dtype loss = 0;
for (int i = 0; i < param_.iter_size(); ++i) {
loss += net_->ForwardBackward(); /////关键。 前向运算、反向运算
}
loss /= param_.iter_size();
// average the loss across iterations for smoothed reporting
if (display) {
LOG_IF(INFO, Caffe::root_solver()) << "Iteration " << iter_   //////////////// Iteration, loss=
<< ", loss = " << smoothed_loss_;
const vector<Blob<Dtype>*>& result = net_->output_blobs();
int score_index = 0;
for (int j = 0; j < result.size(); ++j) {
const Dtype* result_vec = result[j]->cpu_data();
const string& output_name =
net_->blob_names()[net_->output_blob_indices()[j]];
const Dtype loss_weight =
net_->blob_loss_weights()[net_->output_blob_indices()[j]];
for (int k = 0; k < result[j]->count(); ++k) {      //////Train net out///////////////
ostringstream loss_msg_stream;
if (loss_weight) {
loss_msg_stream << " (* " << loss_weight
<< " = " << loss_weight * result_vec[k] << " loss)";
}
LOG_IF(INFO, Caffe::root_solver()) << "    Train net output #"
<< score_index++ << ": " << output_name << " = "
<< result_vec[k] << loss_msg_stream.str();
}
}
}//if(display)
for (int i = 0; i < callbacks_.size(); ++i) {
}
ApplyUpdate();  /////////////// 反向运算完成，统一更新权重值w

// Increment the internal iter_ counter -- its value should always indicate
// the number of times the weights have been updated.
++iter_;

SolverAction::Enum request = GetRequestedAction();

// Save a snapshot if needed.
if ((param_.snapshot()
&& iter_ % param_.snapshot() == 0
&& Caffe::root_solver()) ||
(request == SolverAction::SNAPSHOT)) {
Snapshot();
}
if (SolverAction::STOP == request) {
requested_early_exit_ = true;
// Break out of training loop.
break;
}// if()
}// end while
} // end step()
...