提问人:zesla 提问时间:11/8/2023 最后编辑:Nimanthazesla 更新时间:11/17/2023 访问量:85
无法使用 mlflow/hyperopt 中的log_params记录 lightGBM 参数
Cannot log lightGBM parameter using log_params in mlflow/hyperopt
问:
我正在使用 hyperopt 来优化 lightGBM 的超参数。我使用的代码如下所示。 我正在尝试在目标函数中使用 log_params() 记录超参数。
from sklearn.metrics import f1_score
import lightgbm as lgbm
import hyperopt
from hyperopt import fmin, tpe, hp, STATUS_OK, space_eval, Trials, SparkTrials
from hyperopt.pyll.base import scope
import mlflow
lgbm_space = {
'boosting_type': hp.choice('boosting_type', ['gbdt', 'dart', 'goss']),
'n_estimators': hp.choice('n_estimators', np.arange(400, 1000, 50, dtype=int)),
'learning_rate' : hp.quniform('learning_rate', 0.02, 0.5, 0.02),
'max_depth': scope.int(hp.quniform('max_depth', 2, 16, 1)),
'num_leaves': hp.choice("num_leaves", np.arange(10, 80, 5, dtype=int)),
'colsample_bytree': hp.uniform('colsample_bytree', 0.7, 1.0),
'subsample': hp.uniform('subsample', 0.7, 1.0),
'min_child_samples': hp.choice('min_child_samples', np.arange(10, 50, 5, dtype=int))
}
search_space = lgbm_space
run_name = "run_optimization"
max_eval = 100
#define objective function
def objective (search_space):
model = lgbm.LGBMClassifier( **search_space, class_weight='balanced', n_jobs=-1, random_state=123 )
model.fit(X_train, y_train,
eval_set= [ ( X_val, y_val) ],
early_stopping_rounds= 10,
verbose=False)
y_pred = model.predict_proba(X_val)[:,1]
f1 = f1_score(y_val, (y_pred>0.5).astype(int) )
mlflow.log_metric('f1 score', f1)
mlflow.log_params(search_space)
score = 1 - f1
return {'loss': score, 'status': STATUS_OK, 'model': model, 'params': search_space}
spark_trials = Trials()
with mlflow.start_run(run_name = run_name):
best_params = hyperopt.fmin(
fn = objective,
space = search_space,
algo = tpe.suggest,
max_evals = max_eval,
trials = spark_trials )
我收到一些错误消息,如下所示:
INVALID_PARAMETER_VALUE: Parameter with key colsample_bytree was already logged with a value of 0.9523828639856076. The attempted new value was 0.7640043300157543
我不确定我做错了什么。
答:
1赞
Kinjal
11/16/2023
#1
添加了 within the objective 函数。这里也提出了一个问题。现在,代码为每个包含参数和指标的评估创建单独的文件夹。with mlflow.start_run(nested=True):
import numpy as np
from sklearn.metrics import f1_score
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
import lightgbm as lgbm
import hyperopt
from hyperopt import fmin, tpe, hp, STATUS_OK, space_eval, Trials, SparkTrials
from hyperopt.pyll.base import scope
import mlflow
iris = load_iris()
X_train, X_val, y_train, y_val = train_test_split(iris.data, iris.target, stratify=iris.target)
lgbm_space = {
'boosting_type': hp.choice('boosting_type', ['gbdt', 'dart', 'goss']),
'n_estimators': hp.choice('n_estimators', np.arange(400, 1000, 50, dtype=int)),
'learning_rate' : hp.quniform('learning_rate', 0.02, 0.5, 0.02),
'max_depth': scope.int(hp.quniform('max_depth', 2, 16, 1)),
'num_leaves': hp.choice("num_leaves", np.arange(10, 80, 5, dtype=int)),
'colsample_bytree': hp.uniform('colsample_bytree', 0.7, 1.0),
'subsample': hp.uniform('subsample', 0.7, 1.0),
'min_child_samples': hp.choice('min_child_samples', np.arange(10, 50, 5, dtype=int))
}
search_space = lgbm_space
run_name = "run_optimization"
max_eval = 2
#define objective function
def objective (search_space):
model = lgbm.LGBMClassifier( **search_space, class_weight='balanced', n_jobs=-1, random_state=123 )
callbacks = [lgbm.early_stopping(2, verbose=-10), lgbm.log_evaluation(period=0)]
with mlflow.start_run(nested=True):
model.fit(X_train, y_train,
eval_set= [(X_val, y_val)],
callbacks = callbacks
# early_stopping_rounds= 10,
# verbose=False
)
y_pred = model.predict_proba(X_val)[:,1]
f1 = f1_score(y_val, (y_pred>0.5).astype(int), average='weighted')
mlflow.log_metric('f1 score', f1)
score = 1 - f1
mlflow.log_params(search_space)
return {'loss': score, 'status': STATUS_OK, 'model': model} #'params': search_space}
spark_trials = Trials()
with mlflow.start_run(run_name = run_name, nested=True):
best_params = hyperopt.fmin(
fn = objective,
space = search_space,
algo = tpe.suggest,
max_evals = max_eval,
trials = spark_trials)
print("Best value found: ", best_params)
评论
wandb
wandb
start_run
run_name
log_params
start_run
objective