2023-03-09 18:07:47
Держи тогда для старта мой код из одного из последних проектов:
from functools import partial
import matplotlib.pyplot as plt
import seaborn as sns
import scipy as sp
import lightgbm as lgb
from sklearn.metrics import (cohen_kappa_score, mean_squared_error, confusion_matrix,
ConfusionMatrixDisplay, roc_auc_score, precision_recall_curve,
f1_score, roc_curve, auc, PrecisionRecallDisplay)
from sklearn.model_selection import StratifiedKFold, train_test_split
from sklearn.calibration import CalibratedClassifierCV
import optuna
import optuna.integration.lightgbm as lgbo
import shap
def objective(trial, data=X_train, target=y_train):
params = {
'application': 'regression',
'boosting': 'gbdt',
'metric': 'rmse',
'n_jobs': 4,
'reg_alpha': trial.suggest_float('reg_alpha', 0.001, 10.0),
'reg_lambda': trial.suggest_float('reg_lambda', 0.001, 10.0),
'num_leaves': trial.suggest_int('num_leaves', 11, 333),
'min_child_samples': trial.suggest_int('min_child_samples', 5, 100),
'max_depth': trial.suggest_int('max_depth', 5, 64),
'learning_rate': trial.suggest_categorical('learning_rate', [0.005, 0.01, 0.02, 0.05, 0.1]),
'colsample_bytree': trial.suggest_float('colsample_bytree', 0.1, 0.5),
'n_estimators': trial.suggest_int('n_estimators', 2000, 8000),
'cat_smooth' : trial.suggest_int('cat_smooth', 10, 100),
'cat_l2': trial.suggest_int('cat_l2', 1, 20),
'min_data_per_group': trial.suggest_int('min_data_per_group', 50, 200),
}
# Additional parameters:
early_stop = 200
verbose_eval = 200
n_splits = 5
kfold = StratifiedKFold(n_splits=n_splits, shuffle=True, random_state=42)
oof_train = np.zeros((X_train.shape[0]))
i = 0
for train_index, valid_index in kfold.split(X_train, y_train):
X_tr = X_train.iloc[train_index, :]
X_val = X_train.iloc[valid_index, :]
y_tr = [y_train[i] for i in train_index]
y_val = [y_train[i] for i in valid_index]
print(Counter(y_tr), Counter(y_val))
d_train = lgb.Dataset(X_tr, label=y_tr, categorical_feature=cat_col)
d_valid = lgb.Dataset(X_val, label=y_val, categorical_feature=cat_col)
watchlist = [d_train, d_valid]
print('training LGB:')
model = lgb.train(params,
train_set=d_train,
valid_sets=watchlist,
verbose_eval=verbose_eval,
early_stopping_rounds=early_stop)
val_pred = model.predict(X_val, num_iteration=model.best_iteration)
oof_train[valid_index] = val_pred
i += 1
rmse = mean_squared_error(y_train, oof_train, squared=False)
return rmse
study = optuna.create_study(direction='minimize')
study.optimize(objective, n_trials=2)
print('Number of finished trials: ', len(study.trials))
print('Best trial: ', study.best_trial.params)
print('Best value: ', study.best_value)
optuna.visualization.plot_slice(study).write_html(pathlib.Path('optuna_optimization_history.html'), include_plotlyjs='cdn')
2 views15:07