mirror of
				https://github.com/clearml/clearml
				synced 2025-06-26 18:16:07 +00:00 
			
		
		
		
	Add scikit-learn example
This commit is contained in:
		
							parent
							
								
									ebf08c52c5
								
							
						
					
					
						commit
						bfaf9459e0
					
				| @ -12,7 +12,7 @@ import matplotlib.pyplot as plt | ||||
| 
 | ||||
| from trains import Task | ||||
| 
 | ||||
| task = Task.init(project_name="examples", task_name="joblib test") | ||||
| task = Task.init(project_name="examples", task_name="scikit-learn joblib example") | ||||
| 
 | ||||
| iris = datasets.load_iris() | ||||
| X = iris.data | ||||
							
								
								
									
										153
									
								
								examples/frameworks/scikit-learn/sklearn_matplotlib_example.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										153
									
								
								examples/frameworks/scikit-learn/sklearn_matplotlib_example.py
									
									
									
									
									
										Normal file
									
								
							| @ -0,0 +1,153 @@ | ||||
| import matplotlib.pyplot as plt | ||||
| import numpy as np | ||||
| from sklearn.datasets import load_digits | ||||
| from sklearn.model_selection import ShuffleSplit | ||||
| from sklearn.model_selection import learning_curve | ||||
| from sklearn.naive_bayes import GaussianNB | ||||
| from sklearn.svm import SVC | ||||
| 
 | ||||
| from trains import Task | ||||
| 
 | ||||
| 
 | ||||
| def plot_learning_curve(estimator, title, X, y, axes=None, ylim=None, cv=None, n_jobs=None, | ||||
|                         train_sizes=np.linspace(.1, 1.0, 5)): | ||||
|     """ | ||||
|     Generate 3 plots: the test and training learning curve, the training | ||||
|     samples vs fit times curve, the fit times vs score curve. | ||||
| 
 | ||||
|     Parameters | ||||
|     ---------- | ||||
|     estimator : object type that implements the "fit" and "predict" methods | ||||
|         An object of that type which is cloned for each validation. | ||||
| 
 | ||||
|     title : string | ||||
|         Title for the chart. | ||||
| 
 | ||||
|     X : array-like, shape (n_samples, n_features) | ||||
|         Training vector, where n_samples is the number of samples and | ||||
|         n_features is the number of features. | ||||
| 
 | ||||
|     y : array-like, shape (n_samples) or (n_samples, n_features), optional | ||||
|         Target relative to X for classification or regression; | ||||
|         None for unsupervised learning. | ||||
| 
 | ||||
|     axes : array of 3 axes, optional (default=None) | ||||
|         Axes to use for plotting the curves. | ||||
| 
 | ||||
|     ylim : tuple, shape (ymin, ymax), optional | ||||
|         Defines minimum and maximum yvalues plotted. | ||||
| 
 | ||||
|     cv : int, cross-validation generator or an iterable, optional | ||||
|         Determines the cross-validation splitting strategy. | ||||
|         Possible inputs for cv are: | ||||
| 
 | ||||
|           - None, to use the default 5-fold cross-validation, | ||||
|           - integer, to specify the number of folds. | ||||
|           - :term:`CV splitter`, | ||||
|           - An iterable yielding (train, test) splits as arrays of indices. | ||||
| 
 | ||||
|         For integer/None inputs, if ``y`` is binary or multiclass, | ||||
|         :class:`StratifiedKFold` used. If the estimator is not a classifier | ||||
|         or if ``y`` is neither binary nor multiclass, :class:`KFold` is used. | ||||
| 
 | ||||
|         Refer :ref:`User Guide <cross_validation>` for the various | ||||
|         cross-validators that can be used here. | ||||
| 
 | ||||
|     n_jobs : int or None, optional (default=None) | ||||
|         Number of jobs to run in parallel. | ||||
|         ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context. | ||||
|         ``-1`` means using all processors. See :term:`Glossary <n_jobs>` | ||||
|         for more details. | ||||
| 
 | ||||
|     train_sizes : array-like, shape (n_ticks,), dtype float or int | ||||
|         Relative or absolute numbers of training examples that will be used to | ||||
|         generate the learning curve. If the dtype is float, it is regarded as a | ||||
|         fraction of the maximum size of the training set (that is determined | ||||
|         by the selected validation method), i.e. it has to be within (0, 1]. | ||||
|         Otherwise it is interpreted as absolute sizes of the training sets. | ||||
|         Note that for classification the number of samples usually have to | ||||
|         be big enough to contain at least one sample from each class. | ||||
|         (default: np.linspace(0.1, 1.0, 5)) | ||||
|     """ | ||||
|     if axes is None: | ||||
|         _, axes = plt.subplots(1, 3, figsize=(20, 5)) | ||||
| 
 | ||||
|     axes[0].set_title(title) | ||||
|     if ylim is not None: | ||||
|         axes[0].set_ylim(*ylim) | ||||
|     axes[0].set_xlabel("Training examples") | ||||
|     axes[0].set_ylabel("Score") | ||||
| 
 | ||||
|     train_sizes, train_scores, test_scores, fit_times, _ = \ | ||||
|         learning_curve(estimator, X, y, cv=cv, n_jobs=n_jobs, | ||||
|                        train_sizes=train_sizes, | ||||
|                        return_times=True) | ||||
|     train_scores_mean = np.mean(train_scores, axis=1) | ||||
|     train_scores_std = np.std(train_scores, axis=1) | ||||
|     test_scores_mean = np.mean(test_scores, axis=1) | ||||
|     test_scores_std = np.std(test_scores, axis=1) | ||||
|     fit_times_mean = np.mean(fit_times, axis=1) | ||||
|     fit_times_std = np.std(fit_times, axis=1) | ||||
| 
 | ||||
|     # Plot learning curve | ||||
|     axes[0].grid() | ||||
|     axes[0].fill_between(train_sizes, train_scores_mean - train_scores_std, | ||||
|                          train_scores_mean + train_scores_std, alpha=0.1, | ||||
|                          color="r") | ||||
|     axes[0].fill_between(train_sizes, test_scores_mean - test_scores_std, | ||||
|                          test_scores_mean + test_scores_std, alpha=0.1, | ||||
|                          color="g") | ||||
|     axes[0].plot(train_sizes, train_scores_mean, 'o-', color="r", | ||||
|                  label="Training score") | ||||
|     axes[0].plot(train_sizes, test_scores_mean, 'o-', color="g", | ||||
|                  label="Cross-validation score") | ||||
|     axes[0].legend(loc="best") | ||||
| 
 | ||||
|     # Plot n_samples vs fit_times | ||||
|     axes[1].grid() | ||||
|     axes[1].plot(train_sizes, fit_times_mean, 'o-') | ||||
|     axes[1].fill_between(train_sizes, fit_times_mean - fit_times_std, | ||||
|                          fit_times_mean + fit_times_std, alpha=0.1) | ||||
|     axes[1].set_xlabel("Training examples") | ||||
|     axes[1].set_ylabel("fit_times") | ||||
|     axes[1].set_title("Scalability of the model") | ||||
| 
 | ||||
|     # Plot fit_time vs score | ||||
|     axes[2].grid() | ||||
|     axes[2].plot(fit_times_mean, test_scores_mean, 'o-') | ||||
|     axes[2].fill_between(fit_times_mean, test_scores_mean - test_scores_std, | ||||
|                          test_scores_mean + test_scores_std, alpha=0.1) | ||||
|     axes[2].set_xlabel("fit_times") | ||||
|     axes[2].set_ylabel("Score") | ||||
|     axes[2].set_title("Performance of the model") | ||||
| 
 | ||||
|     return plt | ||||
| 
 | ||||
| 
 | ||||
| Task.init('examples', 'scikit-learn matplotlib example') | ||||
| 
 | ||||
| fig, fig_axes = plt.subplots(1, 3, figsize=(30, 10)) | ||||
| 
 | ||||
| X, y = load_digits(return_X_y=True) | ||||
| 
 | ||||
| title = "Learning Curves (Naive Bayes)" | ||||
| # Cross validation with 100 iterations to get smoother mean test and train | ||||
| # score curves, each time with 20% data randomly selected as a validation set. | ||||
| cv = ShuffleSplit(n_splits=100, test_size=0.2, random_state=0) | ||||
| 
 | ||||
| estimator = GaussianNB() | ||||
| plot_learning_curve(estimator, title, X, y, axes=fig_axes, ylim=(0.7, 1.01), cv=cv, n_jobs=4) | ||||
| 
 | ||||
| plt.show() | ||||
| 
 | ||||
| fig, fig_axes = plt.subplots(1, 3, figsize=(30, 10)) | ||||
| 
 | ||||
| title = r"Learning Curves (SVM, RBF kernel, $\gamma=0.001$)" | ||||
| # SVC is more expensive so we do a lower number of CV iterations: | ||||
| cv = ShuffleSplit(n_splits=10, test_size=0.2, random_state=0) | ||||
| estimator = SVC(gamma=0.001) | ||||
| plot_learning_curve(estimator, title, X, y, axes=fig_axes, ylim=(0.7, 1.01), cv=cv, n_jobs=4) | ||||
| 
 | ||||
| plt.show() | ||||
| 
 | ||||
| print('done') | ||||
		Loading…
	
		Reference in New Issue
	
	Block a user
	 allegroai
						allegroai