Adjust LightGBM example

This commit is contained in:
Alex Burlacu 2023-05-25 17:43:13 +03:00
parent 813777a4cc
commit a8746de9eb
2 changed files with 93 additions and 63 deletions

View File

@ -1,11 +1,14 @@
# ClearML - Example of LightGBM integration # ClearML - Example of LightGBM integration
# #
import lightgbm as lgb import lightgbm as lgb
import matplotlib.pyplot as plt
import pandas as pd import pandas as pd
from sklearn.metrics import mean_squared_error from sklearn.metrics import mean_squared_error
from clearml import Task from clearml import Task
def main():
# Connecting ClearML with the current process, # Connecting ClearML with the current process,
# from here on everything is logged automatically # from here on everything is logged automatically
task = Task.init(project_name="examples", task_name="LightGBM") task = Task.init(project_name="examples", task_name="LightGBM")
@ -14,7 +17,6 @@ print('Loading data...')
# Load or create your dataset # Load or create your dataset
df_train = pd.read_csv( df_train = pd.read_csv(
'https://raw.githubusercontent.com/microsoft/LightGBM/master/examples/regression/regression.train', 'https://raw.githubusercontent.com/microsoft/LightGBM/master/examples/regression/regression.train',
header=None, sep='\t' header=None, sep='\t'
@ -38,24 +40,32 @@ params = {
'boosting_type': 'gbdt', 'boosting_type': 'gbdt',
'objective': 'regression', 'objective': 'regression',
'metric': {'l2', 'l1'}, 'metric': {'l2', 'l1'},
'num_leaves': 31, 'num_leaves': 200,
'max_depth': 0,
'learning_rate': 0.05, 'learning_rate': 0.05,
'feature_fraction': 0.9, 'feature_fraction': 0.9,
'bagging_fraction': 0.8, 'bagging_fraction': 0.8,
'bagging_freq': 5, 'bagging_freq': 5,
'verbose': 0, 'verbose': 0,
'force_col_wise': True, 'force_col_wise': True,
'deterministic': True,
} }
evals_result = {} # to record eval results for plotting
print('Starting training...') print('Starting training...')
# Train # Train
gbm = lgb.train( gbm = lgb.train(
params, params,
lgb_train, lgb_train,
num_boost_round=20, num_boost_round=500,
valid_sets=[lgb_eval], valid_sets=[lgb_train, lgb_eval],
callbacks=[lgb.early_stopping(stopping_rounds=5)], feature_name=[f'f{i + 1}' for i in range(X_train.shape[-1])],
categorical_feature=[21],
callbacks=[
lgb.record_evaluation(evals_result),
],
) )
print('Saving model...') print('Saving model...')
@ -63,6 +73,21 @@ print('Saving model...')
# Save model to file # Save model to file
gbm.save_model('model.txt') gbm.save_model('model.txt')
print('Plotting metrics recorded during training...')
ax = lgb.plot_metric(evals_result, metric='l1')
plt.show()
print('Plotting feature importances...')
ax = lgb.plot_importance(gbm, max_num_features=10)
plt.show()
print('Plotting split value histogram...')
ax = lgb.plot_split_value_histogram(gbm, feature='f26', bins='auto')
plt.show()
print('Loading model to predict...') print('Loading model to predict...')
# Load model to predict # Load model to predict
@ -73,3 +98,7 @@ y_pred = bst.predict(X_test)
# Eval with loaded model # Eval with loaded model
print("The rmse of loaded model's prediction is:", mean_squared_error(y_test, y_pred) ** 0.5) print("The rmse of loaded model's prediction is:", mean_squared_error(y_test, y_pred) ** 0.5)
if __name__ == '__main__':
main()

View File

@ -1,4 +1,5 @@
lightgbm lightgbm
scikit-learn scikit-learn
pandas pandas
matplotlib
clearml clearml