提问人:Amr Tamer 提问时间:11/13/2023 最后编辑:Amr Tamer 更新时间:11/14/2023 访问量:20
使用 numpy 的手动后道具没有得到好的结果
Manual back prop using numpy not getting me good results
问:
import numpy as np
def sigmoid(z):
return 1/(1+np.exp(-z))
def derivative_sigmoid(z):
return sigmoid(z) * (1 - sigmoid(z))
weights = []
dweights = []
bias = []
dbias = []
z = []
dz = []
act = []
dact = []
lr = 0.001
input_shape = 6
output_shape = 2
shapes = [input_shape] + [400, 400, 400] + [output_shape]
x=np.array([[1,0,0,0,0,0],[0,1,0,0,0,0],[0,0,1,0,0,0],[0,0,0,1,0,0],[0,0,0,0,1,0],[0,0,0,0,0,1],[1,1,0,0,0,0],[0,1,1,0,0,0],
[0,0,1,1,0,0],[0,0,0,1,1,0],[0,0,0,0,1,1],[1,1,1,0,0,0],[0,1,1,1,0,0],[0,0,1,1,1,0],[0,0,0,1,1,1],[1,1,1,1,0,0],
[0,1,1,1,1,0],[0,0,1,1,1,1],[1,1,1,1,1,0],[0,1,1,1,1,1],[1,1,1,1,1,1],[1,0,1,1,1,1],[1,1,0,1,1,1],[1,1,1,0,1,1],
[1,1,1,1,0,1],[1,1,1,1,1,0],[1,0,0,1,1,1],[1,1,0,0,1,1],[1,1,1,0,0,1],[1,1,1,1,0,0],[1,0,1,0,1,0],[1,1,1,0,1,0],
[1,0,1,1,1,0],[1,1,0,0,1,0],[1,0,0,0,1,0],[1,1,1,0,1,0],[1,0,1,0,1,1],[1,0,1,1,1,0],[0,1,1,0,1,0],[0,0,1,0,1,0],
[0,0,1,0,1,1],[0,1,1,0,1,0],[1,0,1,0,1,1],[1,1,1,0,1,0]])
y=np.array([[1],[0],[0],[1],[1],[1],[1],[1],[0],[1],[1],[0],[1],[0],[0],[1],[1],[0],[1],[1],[0],[1],[1],[0],[1],[1],[0],[1],
[1],[0],[1],[1],[0],[0],[1],[0],[1],[0],[0],[1],[1],[0],[1],[1]]).ravel()
batch_size=len(x)
for i in range(len(shapes)-1):
weights.append(np.random.randn(shapes[i], shapes[i+1]))
bias.append(np.zeros(shapes[i+1]))
dweights.append(np.zeros_like(weights[i]))
dbias.append(np.zeros_like(bias[i]))
z.append(np.zeros((batch_size, shapes[i+1])))
dz.append(np.zeros((batch_size, shapes[i+1])))
act.append(np.zeros((batch_size, shapes[i+1])))
dact.append(np.zeros((batch_size, shapes[i+1])))
for i in range(1000):
z[0] = x @ weights[0] + bias[0]
act[0] = sigmoid(z[0])
z[1] = act[0] @ weights[1] + bias[1]
act[1] = sigmoid(z[1])
z[2] = act[1] @ weights[2] + bias[2]
act[2] = sigmoid(z[2])
z[3] = act[2] @ weights[3] + bias[3]
act[3] = sigmoid(z[3])
logits = act[3]
logits_max = logits.max(axis=1, keepdims=True)
norm_logits = logits - logits_max
counts = np.exp(norm_logits)
counts_sum = counts.sum(axis=1, keepdims=True)
counts_sum_inv = counts_sum**-1
probs = counts * counts_sum_inv
logprobs = np.log(probs)
cost = -logprobs[np.arange(batch_size), y].mean()
if i % 100 == 0 or i == 999:
print(f'At step {i}: cost is {cost}')
dlogprobs = np.zeros_like(probs)
dlogprobs[np.arange(batch_size), y] = -1/batch_size
dprobs = (1/probs) * dlogprobs
dcounts = counts_sum_inv * dprobs
dcounts_sum_inv = (dcounts * dprobs).sum(1, keepdims=True)
dcounts_sum = (-counts_sum**-2) * dcounts_sum_inv
dcounts += np.ones_like(counts) * dcounts_sum
dnorm_logits = counts * dcounts
dlogits = dnorm_logits.copy()
dlogits_max = -dnorm_logits.sum(1, keepdims=True)
indices = logits.argmax(axis=1)
num_classes = logits.shape[1]
one_hot = np.zeros((batch_size, num_classes))
one_hot[np.arange(len(indices)), indices] = 1
dlogits += one_hot * dlogits_max
dact[3] = dlogits
dz[3] = derivative_sigmoid(z[3]) * dact[3]
dact[2] = dz[3] @ weights[3].T
dweights[3] = act[2].T @ dz[3]
dbias[3] = dz[3].sum(axis=0)
dz[2] = derivative_sigmoid(z[2]) * dact[2]
dact[1] = dz[2] @ weights[2].T
dweights[2] = act[1].T @ dz[2]
dbias[2] = dz[2].sum(axis=0)
dz[1] = derivative_sigmoid(z[1]) * dact[1]
dact[0] = dz[1] @ weights[1].T
dweights[1] = act[0].T @ dz[1]
dbias[1] = dz[1].sum(axis=0)
dz[0] = derivative_sigmoid(z[0]) * dact[0]
dweights[0] = x.T @ dz[0]
dbias[0] = dz[0].sum(axis=0)
for i in range(len(weights)):
weights[i] -= lr * dweights[i]
bias[i] -= lr * dbias[i]
print(cost)
我正在尝试仅使用 numpy 通过 MLP 执行手动后道具。但是,我的实现无法正常工作。成本函数并没有减少太多,然后开始增加,因此代码中显然存在一些数学错误。你能告诉我我的错误在哪里吗?我没有收到运行时错误,我尝试调试代码,但在矩阵中发现错误可能很困难。我找不到更多的错误,因为我不知道它是实现还是计算问题。
我尝试通过代码进行调试并修复了一些错误,但找不到更多错误。任何帮助都是值得赞赏的。
答: 暂无答案
评论