From a8746de9ebc266f86ccec201b04e0dfae1251d29 Mon Sep 17 00:00:00 2001 From: Alex Burlacu <alex.burlacu@clear.ml> Date: Thu, 25 May 2023 17:43:13 +0300 Subject: [PATCH] Adjust LightGBM example --- .../frameworks/lightgbm/lightgbm_example.py | 155 +++++++++++------- examples/frameworks/lightgbm/requirements.txt | 1 + 2 files changed, 93 insertions(+), 63 deletions(-) diff --git a/examples/frameworks/lightgbm/lightgbm_example.py b/examples/frameworks/lightgbm/lightgbm_example.py index 16034374..8e1614cb 100644 --- a/examples/frameworks/lightgbm/lightgbm_example.py +++ b/examples/frameworks/lightgbm/lightgbm_example.py @@ -1,75 +1,104 @@ # ClearML - Example of LightGBM integration # import lightgbm as lgb +import matplotlib.pyplot as plt import pandas as pd from sklearn.metrics import mean_squared_error from clearml import Task -# Connecting ClearML with the current process, -# from here on everything is logged automatically -task = Task.init(project_name="examples", task_name="LightGBM") -print('Loading data...') +def main(): + # Connecting ClearML with the current process, + # from here on everything is logged automatically + task = Task.init(project_name="examples", task_name="LightGBM") -# Load or create your dataset + print('Loading data...') + + # Load or create your dataset + + df_train = pd.read_csv( + 'https://raw.githubusercontent.com/microsoft/LightGBM/master/examples/regression/regression.train', + header=None, sep='\t' + ) + df_test = pd.read_csv( + 'https://raw.githubusercontent.com/microsoft/LightGBM/master/examples/regression/regression.test', + header=None, sep='\t' + ) + + y_train = df_train[0] + y_test = df_test[0] + X_train = df_train.drop(0, axis=1) + X_test = df_test.drop(0, axis=1) + + # Create dataset for lightgbm + lgb_train = lgb.Dataset(X_train, y_train) + lgb_eval = lgb.Dataset(X_test, y_test, reference=lgb_train) + + # Specify your configurations as a dict + params = { + 'boosting_type': 'gbdt', + 'objective': 'regression', + 'metric': {'l2', 'l1'}, + 'num_leaves': 200, + 'max_depth': 0, + 'learning_rate': 0.05, + 'feature_fraction': 0.9, + 'bagging_fraction': 0.8, + 'bagging_freq': 5, + 'verbose': 0, + 'force_col_wise': True, + 'deterministic': True, + } + + evals_result = {} # to record eval results for plotting + + print('Starting training...') + + # Train + gbm = lgb.train( + params, + lgb_train, + num_boost_round=500, + valid_sets=[lgb_train, lgb_eval], + feature_name=[f'f{i + 1}' for i in range(X_train.shape[-1])], + categorical_feature=[21], + callbacks=[ + lgb.record_evaluation(evals_result), + ], + ) + + print('Saving model...') + + # Save model to file + gbm.save_model('model.txt') + + print('Plotting metrics recorded during training...') + + ax = lgb.plot_metric(evals_result, metric='l1') + plt.show() + + print('Plotting feature importances...') + + ax = lgb.plot_importance(gbm, max_num_features=10) + plt.show() + + print('Plotting split value histogram...') + + ax = lgb.plot_split_value_histogram(gbm, feature='f26', bins='auto') + plt.show() + + print('Loading model to predict...') + + # Load model to predict + bst = lgb.Booster(model_file='model.txt') + + # Can only predict with the best iteration (or the saving iteration) + y_pred = bst.predict(X_test) + + # Eval with loaded model + print("The rmse of loaded model's prediction is:", mean_squared_error(y_test, y_pred) ** 0.5) -df_train = pd.read_csv( - 'https://raw.githubusercontent.com/microsoft/LightGBM/master/examples/regression/regression.train', - header=None, sep='\t' -) -df_test = pd.read_csv( - 'https://raw.githubusercontent.com/microsoft/LightGBM/master/examples/regression/regression.test', - header=None, sep='\t' -) - -y_train = df_train[0] -y_test = df_test[0] -X_train = df_train.drop(0, axis=1) -X_test = df_test.drop(0, axis=1) - -# Create dataset for lightgbm -lgb_train = lgb.Dataset(X_train, y_train) -lgb_eval = lgb.Dataset(X_test, y_test, reference=lgb_train) - -# Specify your configurations as a dict -params = { - 'boosting_type': 'gbdt', - 'objective': 'regression', - 'metric': {'l2', 'l1'}, - 'num_leaves': 31, - 'learning_rate': 0.05, - 'feature_fraction': 0.9, - 'bagging_fraction': 0.8, - 'bagging_freq': 5, - 'verbose': 0, - 'force_col_wise': True, -} - -print('Starting training...') - -# Train -gbm = lgb.train( - params, - lgb_train, - num_boost_round=20, - valid_sets=[lgb_eval], - callbacks=[lgb.early_stopping(stopping_rounds=5)], -) - -print('Saving model...') - -# Save model to file -gbm.save_model('model.txt') - -print('Loading model to predict...') - -# Load model to predict -bst = lgb.Booster(model_file='model.txt') - -# Can only predict with the best iteration (or the saving iteration) -y_pred = bst.predict(X_test) - -# Eval with loaded model -print("The rmse of loaded model's prediction is:", mean_squared_error(y_test, y_pred) ** 0.5) +if __name__ == '__main__': + main() diff --git a/examples/frameworks/lightgbm/requirements.txt b/examples/frameworks/lightgbm/requirements.txt index ac7ee02e..8508c428 100644 --- a/examples/frameworks/lightgbm/requirements.txt +++ b/examples/frameworks/lightgbm/requirements.txt @@ -1,4 +1,5 @@ lightgbm scikit-learn pandas +matplotlib clearml \ No newline at end of file