提问人:winnie 提问时间:11/3/2023 更新时间:11/3/2023 访问量:13
我的 ReLu 和 Sigmoid 的 MLP 不适用于 XOR 样本
My MLP with ReLu and Sigmoid is not working with XOR samples
问:
我试图实现具有不同激活函数的 MLP,但我不确定为什么它不适用于带有 Sigmoid 的 XOR 示例:
这是我的类,用于实现不同的行为功能:
class ActivationFunction:
def __init__(self, function_type):
self.function_type = function_type
def function(self, x):
if self.function_type == 'sigmoid':
return self.sigmoid(x)
elif self.function_type == 'tanh':
return self.tanh(x)
elif self.function_type == 'relu':
return self.relu(x)
elif self.function_type == 'softmax':
return self.softmax(x)
def derivative(self, x):
if self.function_type == 'sigmoid':
return self.sigmoid_derivative(x)
elif self.function_type == 'tanh':
return self.tanh_derivative(x)
elif self.function_type == 'relu':
return self.relu_derivative(x)
elif self.function_type == 'softmax':
return self.softmax_derivative(x)
def sigmoid(self, x):
return 1 / (1 + np.exp(-x))
def sigmoid_derivative(self, x):
sigmoid = self.sigmoid(x)
return sigmoid * (1 - sigmoid)
def tanh(self, x):
return np.tanh(x)
def tanh_derivative(self, x):
return 1 - np.tanh(x)**2
def relu(self, x):
return np.where(x >= 0, x, 0)
def relu_derivative(self, x):
return np.where(x >= 0, 1, 0)
def softmax(self, x):
exp = np.exp(x)
return exp / np.sum(exp, axis=1, keepdims=True)
def softmax_derivative(self, x):
softmax = self.softmax(x)
return softmax * (1 - softmax)
这是我的多层感知器实现(有 1 个隐藏层)
class MLP():
def __init__(
self, eta, alpha, max_epoch,
n, k, func_act_h='sigmoid', func_act_o='sigmoid',
eps=1e-5, range_w=0.05
):
self.eta = eta
self.alpha = alpha
self.max_epoch = max_epoch
self.eps = eps
self.range_w = range_w
self.n = n
self.k = k
self.func_act_h = ActivationFunction(func_act_h)
self.func_act_o = ActivationFunction(func_act_o)
self.w, self.w_h, self.w_o = None, None, None
self.prev_w_h, self.prev_w_o = None, None
self.errors = None
self.errors_val = None
def train(self, X, d, train_val=0):
if train_val != 0:
X, X_val, d, d_val = stratified_split(X, d, test_size=train_val)
self.w_h = np.random.uniform(size=(X.shape[1] + 1, self.n + 1))
self.w_o = np.random.uniform(size=(self.n + 1, self.k))
self.prev_w_h = np.zeros(np.shape(self.w_h))
self.prev_w_o = np.zeros(np.shape(self.w_o))
self.errors = np.zeros(self.max_epoch)
if train_val != 0:
self.errors_val = np.zeros(self.max_epoch)
X = np.c_[np.ones(X.shape[0]), X]
for epoch in range(self.max_epoch):
for i in range(X.shape[0]):
y_h = self.func_act_h.function(np.dot(X[i], self.w_h))
y_o = self.func_act_o.function(np.dot(y_h, self.w_o))
delta_o = (d[i] - y_o) * self.func_act_o.derivative(y_o)
delta_h = self.func_act_h.derivative(y_h) * \
np.dot(self.w_o, delta_o)
# Actualizar pesos
new_w_o = self.eta * \
np.outer(y_h, delta_o) + self.alpha * self.prev_w_o
new_w_h = self.eta * np.outer(X[i], delta_h) + \
self.alpha * self.prev_w_h
self.w_o += new_w_o
self.w_h += new_w_h
self.prev_w_o = new_w_o
self.prev_w_h = new_w_h
def predict(self, X):
X = np.c_[np.ones(X.shape[0]), X]
y_h = self.func_act_h.function(np.dot(X, self.w_h))
y_o = self.func_act_o.function(np.dot(y_h, self.w_o))
return y_o
我尝试更改隐藏层中的神经元数量、不同的权重范围和学习率,但我仍然得到所有样本值~0.5
答: 暂无答案
评论