I'm creating ANN from scratch to better understand how it works. It is supposed to take numerical inputs from 0 to 1 and output either 0 or 1. However I'm stuck, because output neuron goes to maximal possible value (in this case 1) and stays there. I cannot see any errors in backprog algorithm, but I'm a beginer in deep learing so that might be the issue. Here is backprog code:
void backpropagation(float target, float output)
{
//counting staring delta
std::vector<std::vector<float>> vectors_of_delta;
std::vector<std::vector<std::vector<float>>> vectors_of_old_weights;
for(int i = 0; i<number_of_layers_-2; i++) //pushing in dummie vectors to better navigate through it later
{
std::vector<std::vector<float>> temp;
std::vector<float> temp2;
vectors_of_old_weights.push_back(temp);
vectors_of_delta.push_back(temp2);
}
//counting small starting delta
std::vector<float> starting_small_delta_vector;
for(int i = 0; i<neurons_[number_of_layers_-2].size(); i++)
{
float delta = -(target/output)*(1/(1 + exp(-neurons_[number_of_layers_-2][i].get_z())))*(1- (1/(1 + exp(-neurons_[number_of_layers_-2][i].get_z()))));
starting_small_delta_vector.push_back(delta);
}
//in convolution will have to iterate over it because there will be more outputs
std::vector<std::vector<float>> placeholder_vector_weights;
placeholder_vector_weights.push_back(neurons_[number_of_layers_-2][0].get_weigts());
vectors_of_old_weights.push_back(placeholder_vector_weights);
vectors_of_delta.push_back(starting_small_delta_vector);
//
for(int i = 0; i<neurons_[number_of_layers_-2].size(); i++)
{
neurons_[number_of_layers_-2][i].set_bias(learning_rate_ * starting_small_delta_vector[i]);
std::vector<float> difference_vector;
for(int j = 0; j < neurons_[number_of_layers_-2][i].get_number_of_weigts(); j++)
{
difference_vector.push_back(learning_rate_ * starting_small_delta_vector[i] * (vector_of_outputs_for_backpropagation[number_of_layers_-3][i]));
}
neurons_[number_of_layers_-2][i].set_weights(difference_vector);
}
// finishing counting starting delta
for(int i = number_of_layers_-3; i > 0; i--)//iteration over layers
{
std::vector<float> delta_vector;
for(int j = 0; j < neurons_[i].size(); j++)//iteration over neurons
{
float sum_of_weights_and_deltas = 0;
for(int k = 0; k < vectors_of_old_weights[i+1].size(); k++ )//iteration over layers
{
sum_of_weights_and_deltas += vectors_of_old_weights[i+1][k][j] * vectors_of_delta[i+1][k];
}
float delta = sum_of_weights_and_deltas * (1/(1+exp(-(neurons_[i][j].get_z())))) * (1-(1/(1+exp(-(neurons_[i][j].get_z())))));
delta_vector.push_back(delta);
neurons_[i][j].set_bias(learning_rate_ * delta);
vectors_of_delta.push_back(delta_vector);
std::vector<float> difference_vector;
for(int k = 0; k < neurons_[i][j].get_number_of_weigts(); k++)
{
difference_vector.push_back(learning_rate_ * delta * vector_of_outputs_for_backpropagation[i-1][k]);// * vector_of_outputs_for_backpropagation[i-1][k]
}
neurons_[i][j].set_weights(difference_vector);
}
vectors_of_delta.push_back(delta_vector);
}
}
I use math from this article: https://medium.com/@tiago.tmleite/neural-networks-multilayer-perceptron-and-the-backpropagation-algorithm-a5cd5b904fde
Rest of code and input data are in this repo: https://github.com/SzymonPabjan/simple_ANN
question from:
https://stackoverflow.com/questions/65861527/artificial-neural-network-written-from-scratch-in-c-stuck-on-maximal-possible 与恶龙缠斗过久,自身亦成为恶龙;凝视深渊过久,深渊将回以凝视…