提问人:Felix 提问时间:11/18/2023 更新时间:11/18/2023 访问量:35
神经网络中用于异或数据的反向传播
Backpropagation in Neural Network for XOR data
问:
我需要实现一个只有numpy的神经网络,它有两个输入,有一个隐藏层,它使用ReLU作为激活函数,还有一个输出层,它使用sigmoid作为激活。我需要使用的损失是交叉二元熵。当我训练我的 NN 时,每个输入的输出都在 0.5 左右。我认为我的问题是反向传播,我不确定我是否正确实现了它。或者错误在我的代码中的其他地方。也许有人可以帮助我。
import numpy as np
from tqdm import tqdm
X = np.array([[0, 0],
[0, 1],
[1, 0],
[1, 1]])
y = np.array([[0],
[1],
[1],
[0]])
def ReLU(x):
return np.maximum(0, x)
def d_ReLU(x):
return np.where(x > 0, 1, 0)
def sigmoid(x):
return 1/(1 + np.exp(-x))
def d_sigmoid(x):
return sigmoid(x) * (1 - sigmoid(x))
def binary_cross_entropy(y, y_pred):
loss = np.mean(-(y * np.log(y_pred) + (1 - y) * np.log(1-y_pred)))
return loss
def d_binary_cross_entropy(y, y_pred):
loss = np.where(y == 1, -1/y_pred, 1/(1 - y_pred))
return loss
class NeuralNetwork():
def __init__(self, input_size, hidden_size, output_size, learning_rate=0.1):
self.input_size = input_size
self.hidden_size = hidden_size
self.output_size = output_size
self.learning_rate = learning_rate
self.weights_hidden = np.random.uniform(size=(input_size, hidden_size))
self.weights_output = np.random.uniform(size=(hidden_size, output_size))
def forward_pass(self, X):
self.output_hidden = ReLU(np.dot(X, self.weights_hidden))
output = sigmoid(np.dot(self.output_hidden, self.weights_output))
return output
def backward_pass(self, X, y, y_pred):
output_delta = d_binary_cross_entropy(y, y_pred) * d_sigmoid(y_pred)
hidden_error = output_delta.dot(self.weights_output.T)
hidden_delta = hidden_error * d_ReLU(self.output_hidden)
self.weights_output -= self.learning_rate * self.output_hidden.T.dot(output_delta)
self.weights_hidden -= self.learning_rate * X.T.dot(hidden_delta)
def train(self, X, y, epochs):
for epoch in range(epochs):
output = self.forward_pass(X)
loss = binary_cross_entropy(y, output)
self.backward_pass(X, y, output)
print(f"Epoch {epoch + 1}/{epochs} - Loss: {loss:.4f}")
print(self.forward_pass(X))
答:
0赞
Harun Cetin
11/18/2023
#1
代码中需要进行一些更正:
首先,如果在 ReLU 函数中直接返回 0,则会丢失梯度。您需要将 ReLU 的返回值乘以相对较小的数字,即 0.0001 以避免此问题。
其次,您应该选择较小的学习率和较大的 epoch 以获得适当的结果。
更正后的代码和工作代码如下:
import numpy as np
from tqdm import tqdm
X = np.array([[0, 0],
[0, 1],
[1, 0],
[1, 1]])
y = np.array([[0],
[1],
[1],
[0]])
def ReLU(x):
return np.maximum(x * 0.00001, x)
def d_ReLU(x):
return np.where(x > 0, 1, 0)
def sigmoid(x):
return 1/(1 + np.exp(-x))
def d_sigmoid(x):
return sigmoid(x) * (1 - sigmoid(x))
def binary_cross_entropy(y, y_pred):
loss = np.mean(-(y * np.log(y_pred) + (1 - y) * np.log(1-y_pred)))
return loss
def d_binary_cross_entropy(y, y_pred):
loss = np.where(y == 1, -1/y_pred, 1/(1 - y_pred))
return loss
class NeuralNetwork():
def __init__(self, input_size, hidden_size, output_size, learning_rate=0.1):
self.input_size = input_size
self.hidden_size = hidden_size
self.output_size = output_size
self.learning_rate = learning_rate
self.weights_hidden = np.random.uniform(size=(input_size, hidden_size))
self.weights_output = np.random.uniform(size=(hidden_size, output_size))
def forward_pass(self, X):
self.output_hidden = ReLU(np.dot(X, self.weights_hidden))
output = sigmoid(np.dot(self.output_hidden, self.weights_output))
return output
def backward_pass(self, X, y, y_pred):
output_delta = d_binary_cross_entropy(y, y_pred) * d_sigmoid(y_pred)
hidden_error = output_delta.dot(self.weights_output.T)
hidden_delta = hidden_error * d_ReLU(self.output_hidden)
self.weights_output -= self.learning_rate * self.output_hidden.T.dot(output_delta)
self.weights_hidden -= self.learning_rate * X.T.dot(hidden_delta)
def train(self, X, y, epochs):
for epoch in range(epochs):
output = self.forward_pass(X)
loss = binary_cross_entropy(y, output)
self.backward_pass(X, y, output)
print(f"Epoch {epoch + 1}/{epochs} - Loss: {loss:.4f}")
r = self.forward_pass(X)
x = [0 if i <= 0.5 else 1 for i in r]
print(r)
print(x)
nn = NeuralNetwork(2, 10, 1, 0.001)
nn.train(X, y, 10000)
评论