LSTM 模型没有改进

LSTM Model Not Improving

提问人:MAHADEV BHANDARI 提问时间:10/6/2023 更新时间:10/6/2023 访问量:22

问:

自 2 个多星期前以来,我一直在研究这个 LSTM 模型,但该模型没有显示出任何进展。分类报告少得可怜。

我必须为每一行添加一个虚拟类 -1,以确保为每个单元生成的序列等于具有最大行数的单元可用的行数。我有时认为这是导致问题的原因,但我认为我别无选择,只能将该类 -1 添加到零行中,这是虚拟行。为了确保模型不考虑这些行进行学习,我提供了一个样本权重,这是一个布尔数组。

在分类报告中,该类 -1 的所有指标均为零。对于其他真正的类,我在测试数据中测试时得到的最高值是 0.36,这是没有用的。

我是深度学习的新手,我正在尝试学习 LSTM 的实现。我试图将 LSTM 实现到 kaggle 中的一个数据集。

Kaggle 链接

我试图实现的基本任务是预测涡扇发动机的剩余使用寿命。

首先,我使用以下方法对数据进行预处理:

class DataPreprocessor:
    def __init__(self, 
                 train_dataframe:pd.DataFrame,
                 test_dataframe:pd.DataFrame):
        """
        A Class To Preprocess the train and test dataframe.
        """
        required_cols = ['unit_number', 'time_in_cycles', 'setting_1', 'setting_2', 
                       'T24', 'T30', 'T50', 'P30', 'Nf', 'Nc', 'Ps30', 'phi', 
                       'NRf', 'NRc', 'BPR', 'htBleed', 'W31', 'W32']
        missing_columns = [col for col in required_cols if col not in train_dataframe.columns]

        assert not missing_columns, f"The given Dataframe is missing the following columns: {', '.join(missing_columns)}"
        
        self.train_dataframe = train_dataframe
        self.test_dataframe = test_dataframe
        
    def extract_rul(self):
        fd_rul_train = self.train_dataframe.groupby("unit_number")["time_in_cycles"].max().reset_index()
        fd_rul_train.columns = ["unit_number", "max_cycles"]
        new_df_train = self.train_dataframe.merge(fd_rul_train,on="unit_number", how = "left")
        new_df_train["RUL"] = new_df_train["max_cycles"] - new_df_train["time_in_cycles"]
        new_df_train.drop("max_cycles", axis = 1, inplace = True)
        self.train_dataframe = new_df_train

        fd_rul_test = self.test_dataframe.groupby("unit_number")["time_in_cycles"].max().reset_index()
        fd_rul_test.columns = ["unit_number", "max_cycles"]
        new_df_test = self.test_dataframe.merge(fd_rul_test,on="unit_number", how = "left")
        new_df_test["RUL"] = new_df_test["max_cycles"] - new_df_test["time_in_cycles"]
        new_df_test.drop("max_cycles", axis = 1, inplace = True)
        self.test_dataframe = new_df_test

    def select_useful_cols(self,
                       low_corr:int = 0.20,
                      high_corr:int = 0.90):
        
        ##Finding correlation and dropping cols with low correlation
        low_corr = [i for i in self.train_dataframe.columns
               if i!="RUL" and  i!= "unit_number" and
               abs(self.train_dataframe[i].corr(self.train_dataframe["RUL"]))<0.2]
        self.train_dataframe.drop(columns=low_corr, inplace = True)
        self.test_dataframe.drop(columns=low_corr, inplace = True)
    
        ## Finding Correlation and dropping cols with high correlation
        high_corr = [(self.train_dataframe.columns[i],self.train_dataframe.columns[j]) for i in range(len(self.train_dataframe.columns)-1)
                     for j in range(i+1, len(self.train_dataframe.columns)-1)
                     if abs(self.train_dataframe.iloc[:,i].corr(self.train_dataframe.iloc[:,j]))>0.90]
        to_drop = [i[0] for i in high_corr]
        self.train_dataframe.drop(to_drop, axis = 1, inplace = True)
        self.test_dataframe.drop(to_drop, axis = 1, inplace = True)

    def rul_classifier(self,
                       min_point:int = -1,
                       max_life:int = 800, 
                       mid_life:int = 100,
                       low_life:int = 50,
                       deadly:int = 30):
                
        self.train_dataframe["RUL_Class"] = pd.cut(self.train_dataframe["RUL"], 
                                        bins = [min_point,deadly, low_life, mid_life, max_life],
                                       labels = ["Deadly", "Final Warning", "Life-Halfway", "Brand-New"])
        self.test_dataframe["RUL_Class"] = pd.cut(self.test_dataframe["RUL"], 
                                        bins = [min_point,deadly, low_life, mid_life, max_life],
                                       labels = ["Deadly", "Final Warning", "Life-Halfway", "Brand-New"])
        
        self.bins = {"Deadly": f"0-{deadly}", 
                "Final Warning":f"{deadly+1}-{low_life}",
                "Life-Halfway":f"{low_life+1}-{mid_life}", 
                "Full-life":f"{mid_life+1}+"}


    def process_data(self, 
                     low_corr:int = 0.20,
                     high_corr:int = 0.90,
                     min_point:int = -1,
                     max_life:int = 800, 
                     mid_life:int = 100,
                     low_life:int = 50,
                     deadly:int = 30):
        
        self.extract_rul()
        #self.select_useful_cols(low_corr, high_corr)
        self.rul_classifier(min_point, max_life,mid_life,low_life,deadly)
        return self.train_dataframe, self.test_dataframe, self.bins
        
        

    
dataprocessor = DataPreprocessor(fd001_train, fd001_test)
train_data_processed, test_data_processed, bins = dataprocessor.process_data()

当我发现一个班级不平衡时,我使用了 SMOTE 技术来平衡它:

train_data_processed_new = train_data_processed.copy(deep = True)
test_data_processed_new = test_data_processed.copy(deep = True)


# !pip install imbalanced-learn
from imblearn.over_sampling import SMOTE
from collections import Counter


X = train_data_processed_new.iloc[:,:-1]
y = train_data_processed_new.iloc[:,-1]


k_vals = range(1,14)
js_divergence = []


for k in k_vals:
    smote = SMOTE(sampling_strategy="auto",random_state=72,k_neighbors=k)
    X_resampled, y_resampled = smote.fit_resample(X, y)
    js_divert = np.median([jensenshannon(X.iloc[:,i].sample(n = X.shape[0]//2, random_state = 42), 
                                       X_resampled.iloc[:,i].sample(n = X.shape[0]//2, 
                                                                    random_state = 42)) for i in range(X.shape[1])])
    js_divergence.append(js_divert)

optimum_k = k_vals[np.argmin(js_divergence)]


smote = SMOTE(sampling_strategy="auto",random_state=72,k_neighbors=optimum_k)
X_resampled, y_resampled = smote.fit_resample(X, y)
smote_train_data_processed_new = pd.concat([X_resampled, y_resampled],axis =1)

然后我开始制作序列:

train_data_processed_new_features = smote_train_data_processed_new.iloc[:,2:-2]
test_data_processed_new_features = test_data_processed_new.iloc[:,2:-2]


train_target = smote_train_data_processed_new.iloc[:,-1]
test_target = test_data_processed_new.iloc[:,-1]


lblencoder = LabelEncoder()
lblencoder.fit(train_target)
train_target_encoded = lblencoder.transform(train_target)
test_target_encoded = lblencoder.transform(test_target)


scaler = StandardScaler()
scaler.fit(train_data_processed_new_features)

train_features_scaled = scaler.transform(train_data_processed_new_features)
test_features_scaled = scaler.transform(test_data_processed_new_features)


train_features_scaled = np.insert(train_features_scaled, 0, smote_train_data_processed_new["unit_number"].values, axis = 1)
test_features_scaled = np.insert(test_features_scaled, 0, test_data_processed_new["unit_number"].values, axis = 1)  


unit_number_dict = {}
label_dict = {}
for i in range(len(train_features_scaled)):
    unit_number = train_features_scaled[i][0]
    if unit_number not in unit_number_dict:
        unit_number_dict[unit_number] = []
        label_dict[unit_number] = []
    unit_number_dict[unit_number].append(train_features_scaled[i][1:])
    label_dict[unit_number].append(train_target_encoded[i])
    
train_sequence = list(unit_number_dict.values())
train_target = list(label_dict.values())


max_len = np.max([len(x) for x in train_sequence])
padded_train_seq = np.zeros(shape = (len(train_sequence), max_len, len(train_sequence[0][0])))
padded_train_target = np.full((len(train_sequence), max_len, 1), -1)

for i, seq in enumerate(train_sequence):
    padded_train_seq[i, :len(seq), :] = seq

for i, lbl in enumerate(train_target):
    lbl_reshaped = np.array(lbl).reshape(-1,1)
    padded_train_target[i,:len(lbl_reshaped), :] = lbl_reshaped


sample_weight = tf.math.logical_not(tf.reduce_all(tf.math.equal(padded_train_seq,0),axis = -1))


unit_number_dict = {}
label_dict = {}
for i in range(len(test_features_scaled)):
    unit_number = test_features_scaled[i][0]
    if unit_number not in unit_number_dict:
        unit_number_dict[unit_number] = []
        label_dict[unit_number] = []
    unit_number_dict[unit_number].append(test_features_scaled[i][1:])
    label_dict[unit_number].append(test_target_encoded[i])
# test_sequence = np.array(list(unit_number_dict.values()))
test_sequence = list(unit_number_dict.values())
test_target = list(label_dict.values())

padded_test_seq = np.zeros(shape = (len(test_sequence), max_len, len(train_sequence[0][0])))
padded_test_target = np.full((len(test_sequence), max_len, 1), -1)
for i, seq in enumerate(test_sequence):
    padded_test_seq[i, :len(seq), :] = seq
for i, lbl in enumerate(test_target):
    lbl_reshaped = np.array(lbl).reshape(-1,1)
    padded_test_target[i,:len(lbl_reshaped), :] = lbl_reshaped
    


sample_weight = np.reshape(sample_weight, (padded_train_target.shape))

然后是 LSTM 模型:





l2_reg_dense = 0.75
l2_reg_lstm = 0.75
model = Sequential()
model.add(InputLayer((padded_train_seq.shape[1], padded_train_seq.shape[2])))

model.add(LSTM(units=100, return_sequences=True, kernel_regularizer = l2(l2_reg_lstm)))
model.add(LSTM(units=50, return_sequences = True, kernel_regularizer = l2(l2_reg_lstm)))
model.add(BatchNormalization())
model.add(Dropout(0.2))

model.add(LSTM(units=50, return_sequences=True, kernel_regularizer = l2(l2_reg_lstm)))
model.add(LSTM(units=25, return_sequences = True, kernel_regularizer = l2(l2_reg_lstm)))
model.add(BatchNormalization())
model.add(Dropout(0.2))


model.add(TimeDistributed(Dense(units = 100, activation = "gelu", kernel_regularizer = l2(l2_reg_dense))))
model.add(TimeDistributed(Dense(units = len(lblencoder.classes_), activation = "softmax", kernel_regularizer = l2(l2_reg_dense))))

model.compile(optimizer= opt, loss = custom_loss, metrics = ["accuracy"], weighted_metrics = ["accuracy"])
model.summary()


callbacks = [tf.keras.callbacks.ReduceLROnPlateau(patience = 3, verbose = 1,min_lr = 1e-4, monitor = "val_accuracy"),
            tf.keras.callbacks.EarlyStopping(patience = 4, verbose = 1, restore_best_weights = True,monitor="val_loss")]


y_train_onehot = to_categorical(padded_train_target, num_classes = len(lblencoder.classes_))


batch_size = 8
epochs = 130
history = model.fit(x = padded_train_seq, 
                    y = y_train_onehot,
                    batch_size = batch_size,
                   epochs = epochs,
                    callbacks=callbacks,
                    validation_split = 0.3,
                   sample_weight = sample_weight)

即使我所做的任何更改,模型也根本不起作用。分类报告看起来很可怜,在过去的 2 周里我一直在研究它,没有任何进展。所以,需要这个美丽社区的帮助来改善它。我在数据处理过程中是否犯了任何错误,或者是关于模型的问题,我很困惑。

Python 深度学习 NLP LSTM 序列

评论


答: 暂无答案