无法使用 mlflow/hyperopt 中的log_params记录 lightGBM 参数-解网

问：

我正在使用 hyperopt 来优化 lightGBM 的超参数。我使用的代码如下所示。我正在尝试在目标函数中使用 log_params（）记录超参数。

from sklearn.metrics import f1_score
import lightgbm as lgbm
import hyperopt
from hyperopt import fmin, tpe, hp, STATUS_OK, space_eval, Trials, SparkTrials
from hyperopt.pyll.base import scope 
import mlflow


lgbm_space = {
        'boosting_type': hp.choice('boosting_type', ['gbdt', 'dart', 'goss']),
        'n_estimators': hp.choice('n_estimators', np.arange(400, 1000, 50, dtype=int)), 
        'learning_rate' : hp.quniform('learning_rate', 0.02, 0.5, 0.02), 
        'max_depth': scope.int(hp.quniform('max_depth', 2, 16, 1)),
        'num_leaves': hp.choice("num_leaves", np.arange(10, 80, 5, dtype=int)),
        'colsample_bytree': hp.uniform('colsample_bytree', 0.7, 1.0),
        'subsample': hp.uniform('subsample', 0.7, 1.0), 
        'min_child_samples': hp.choice('min_child_samples', np.arange(10, 50, 5, dtype=int))

}

search_space = lgbm_space
run_name = "run_optimization" 
max_eval = 100

#define objective function
def objective (search_space):
    model = lgbm.LGBMClassifier( **search_space, class_weight='balanced', n_jobs=-1, random_state=123 )      
    model.fit(X_train, y_train,            
           eval_set= [ ( X_val, y_val) ], 
           early_stopping_rounds= 10, 
           verbose=False)    
    y_pred = model.predict_proba(X_val)[:,1]   
    f1 = f1_score(y_val, (y_pred>0.5).astype(int) )
    mlflow.log_metric('f1 score', f1)
    mlflow.log_params(search_space)
    score = 1 - f1
    
    return {'loss': score, 'status': STATUS_OK, 'model': model, 'params': search_space}

spark_trials = Trials()
with mlflow.start_run(run_name = run_name):
    best_params = hyperopt.fmin(
                    fn = objective,
                    space = search_space,
                    algo = tpe.suggest,
                    max_evals = max_eval, 
                    trials = spark_trials )

我收到一些错误消息，如下所示：

INVALID_PARAMETER_VALUE: Parameter with key colsample_bytree was already logged with a value of 0.9523828639856076. The attempted new value was 0.7640043300157543

我不确定我做错了什么。

python mlflow lightgbm 超选择

import numpy as np
from sklearn.metrics import f1_score
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
import lightgbm as lgbm
import hyperopt
from hyperopt import fmin, tpe, hp, STATUS_OK, space_eval, Trials, SparkTrials
from hyperopt.pyll.base import scope 
import mlflow

iris = load_iris()
X_train, X_val, y_train, y_val = train_test_split(iris.data, iris.target, stratify=iris.target)


lgbm_space = {
        'boosting_type': hp.choice('boosting_type', ['gbdt', 'dart', 'goss']),
        'n_estimators': hp.choice('n_estimators', np.arange(400, 1000, 50, dtype=int)), 
        'learning_rate' : hp.quniform('learning_rate', 0.02, 0.5, 0.02), 
        'max_depth': scope.int(hp.quniform('max_depth', 2, 16, 1)),
        'num_leaves': hp.choice("num_leaves", np.arange(10, 80, 5, dtype=int)),
        'colsample_bytree': hp.uniform('colsample_bytree', 0.7, 1.0),
        'subsample': hp.uniform('subsample', 0.7, 1.0), 
        'min_child_samples': hp.choice('min_child_samples', np.arange(10, 50, 5, dtype=int))

}

search_space = lgbm_space
run_name = "run_optimization" 
max_eval = 2

#define objective function
def objective (search_space):
    model = lgbm.LGBMClassifier( **search_space, class_weight='balanced', n_jobs=-1, random_state=123 )
    callbacks = [lgbm.early_stopping(2, verbose=-10), lgbm.log_evaluation(period=0)]
    with mlflow.start_run(nested=True):
        model.fit(X_train, y_train,
                  eval_set= [(X_val, y_val)],
                  callbacks = callbacks
                  # early_stopping_rounds= 10, 
               # verbose=False
                 )    
        y_pred = model.predict_proba(X_val)[:,1]   
        f1 = f1_score(y_val, (y_pred>0.5).astype(int), average='weighted')
        mlflow.log_metric('f1 score', f1)
        score = 1 - f1
        mlflow.log_params(search_space)

    return {'loss': score, 'status': STATUS_OK, 'model': model} #'params': search_space}

spark_trials = Trials()
with mlflow.start_run(run_name = run_name, nested=True):
    best_params = hyperopt.fmin(
                    fn = objective,
                    space = search_space,
                    algo = tpe.suggest,
                    max_evals = max_eval, 
                    trials = spark_trials)
print("Best value found: ", best_params)

上一个：在 Python 中对角化矩阵时保持对角线元素的顺序

下一个：当pyspark数据帧有嵌套列时，如何将NONES转换为空字符串？

无法使用 mlflow/hyperopt 中的log_params记录 lightGBM 参数

Cannot log lightGBM parameter using log_params in mlflow/hyperopt

评论