我的 ReLu 和 Sigmoid 的 MLP 不适用于 XOR 样本

My MLP with ReLu and Sigmoid is not working with XOR samples

提问人:winnie 提问时间:11/3/2023 更新时间:11/3/2023 访问量:13

问:

我试图实现具有不同激活函数的 MLP,但我不确定为什么它不适用于带有 Sigmoid 的 XOR 示例:

这是我的类,用于实现不同的行为功能:


    class ActivationFunction:
    
        def __init__(self, function_type):
            self.function_type = function_type
    
        def function(self, x):
            if self.function_type == 'sigmoid':
                return self.sigmoid(x)
            elif self.function_type == 'tanh':
                return self.tanh(x)
            elif self.function_type == 'relu':
                return self.relu(x)
            elif self.function_type == 'softmax':
                return self.softmax(x)
    
        def derivative(self, x):
            if self.function_type == 'sigmoid':
                return self.sigmoid_derivative(x)
            elif self.function_type == 'tanh':
                return self.tanh_derivative(x)
            elif self.function_type == 'relu':
                return self.relu_derivative(x)
            elif self.function_type == 'softmax':
                return self.softmax_derivative(x)
    
        def sigmoid(self, x):
            return 1 / (1 + np.exp(-x))
    
        def sigmoid_derivative(self, x):
            sigmoid = self.sigmoid(x)
            return sigmoid * (1 - sigmoid)
    
        def tanh(self, x):
            return np.tanh(x)
    
        def tanh_derivative(self, x):
            return 1 - np.tanh(x)**2
    
        def relu(self, x):
            return np.where(x >= 0, x, 0)
    
        def relu_derivative(self, x):
            return np.where(x >= 0, 1, 0)
    
        def softmax(self, x):
            exp = np.exp(x)
            return exp / np.sum(exp, axis=1, keepdims=True)
    
        def softmax_derivative(self, x):
            softmax = self.softmax(x)
            return softmax * (1 - softmax)

这是我的多层感知器实现(有 1 个隐藏层)


    class MLP():
    
        def __init__(
            self, eta, alpha, max_epoch,
            n, k, func_act_h='sigmoid', func_act_o='sigmoid',
            eps=1e-5, range_w=0.05
        ):
            self.eta = eta
            self.alpha = alpha
            self.max_epoch = max_epoch
            self.eps = eps
            self.range_w = range_w
            self.n = n
            self.k = k
            self.func_act_h = ActivationFunction(func_act_h)
            self.func_act_o = ActivationFunction(func_act_o)
    
            self.w, self.w_h, self.w_o = None, None, None
            self.prev_w_h, self.prev_w_o = None, None
            self.errors = None
            self.errors_val = None
    
        def train(self, X, d, train_val=0):
            if train_val != 0:
                X, X_val, d, d_val = stratified_split(X, d, test_size=train_val)
    
            self.w_h = np.random.uniform(size=(X.shape[1] + 1, self.n + 1))
            self.w_o = np.random.uniform(size=(self.n + 1, self.k))
            self.prev_w_h = np.zeros(np.shape(self.w_h))
            self.prev_w_o = np.zeros(np.shape(self.w_o))
    
            self.errors = np.zeros(self.max_epoch)
            if train_val != 0:
                self.errors_val = np.zeros(self.max_epoch)
    
            X = np.c_[np.ones(X.shape[0]), X]
    
            for epoch in range(self.max_epoch):
                for i in range(X.shape[0]):
                    y_h = self.func_act_h.function(np.dot(X[i], self.w_h))
                    y_o = self.func_act_o.function(np.dot(y_h, self.w_o))
    
                    delta_o = (d[i] - y_o) * self.func_act_o.derivative(y_o)
                    delta_h = self.func_act_h.derivative(y_h) * \
                        np.dot(self.w_o, delta_o)
    
                    # Actualizar pesos
                    new_w_o = self.eta * \
                        np.outer(y_h, delta_o) + self.alpha * self.prev_w_o
                    new_w_h = self.eta * np.outer(X[i], delta_h) + \
                        self.alpha * self.prev_w_h
                    self.w_o += new_w_o
                    self.w_h += new_w_h
                    self.prev_w_o = new_w_o
                    self.prev_w_h = new_w_h
    
        def predict(self, X):
            X = np.c_[np.ones(X.shape[0]), X]
            y_h = self.func_act_h.function(np.dot(X, self.w_h))
            y_o = self.func_act_o.function(np.dot(y_h, self.w_o))
    
            return y_o

我尝试更改隐藏层中的神经元数量、不同的权重范围和学习率,但我仍然得到所有样本值~0.5

神经网络 人工智能 感知器 MLP

评论


答: 暂无答案