From 01d337f1aa1dfe73d0b0271b7c156d83856b316a Mon Sep 17 00:00:00 2001 From: allegroai <> Date: Mon, 5 Aug 2024 15:46:11 +0300 Subject: [PATCH] Black formatting --- examples/datasets/csv_dataset_creation.py | 5 +-- examples/datasets/data_ingestion.py | 20 +++-------- examples/datasets/dataset_creation.py | 8 ++--- examples/datasets/dataset_folder_syncing.py | 10 +++--- .../datasets/multi_parent_child_dataset.py | 17 ++++----- .../datasets/single_parent_child_dataset.py | 13 +++---- .../urbansounds_dataset_preprocessing.py | 36 +++++-------------- examples/datasets/urbansounds_get_data.py | 8 ++--- 8 files changed, 42 insertions(+), 75 deletions(-) diff --git a/examples/datasets/csv_dataset_creation.py b/examples/datasets/csv_dataset_creation.py index 8dbb4999..5aec21a2 100644 --- a/examples/datasets/csv_dataset_creation.py +++ b/examples/datasets/csv_dataset_creation.py @@ -6,7 +6,8 @@ def main(): print("STEP1 : Downloading CSV dataset") csv_file_path = manager.get_local_copy( - remote_url="https://allegro-datasets.s3.amazonaws.com/datasets/Iris_Species.csv") + remote_url="https://allegro-datasets.s3.amazonaws.com/datasets/Iris_Species.csv" + ) print("STEP2 : Creating a dataset") # By default, clearml data uploads to the clearml fileserver. Adding output_uri argument to the create() method @@ -23,5 +24,5 @@ def main(): print("We are done, have a great day :)") -if __name__ == '__main__': +if __name__ == "__main__": main() diff --git a/examples/datasets/data_ingestion.py b/examples/datasets/data_ingestion.py index 638d01b2..6157da87 100644 --- a/examples/datasets/data_ingestion.py +++ b/examples/datasets/data_ingestion.py @@ -42,19 +42,13 @@ dataset_path = Dataset.get( # Dataset and Dataloader initializations transform = transforms.Compose([transforms.ToTensor()]) -trainset = datasets.CIFAR10( - root=dataset_path, train=True, download=False, transform=transform -) +trainset = datasets.CIFAR10(root=dataset_path, train=True, download=False, transform=transform) trainloader = torch.utils.data.DataLoader( trainset, batch_size=params.get("batch_size", 4), shuffle=True, num_workers=10 ) -testset = datasets.CIFAR10( - root=dataset_path, train=False, download=False, transform=transform -) -testloader = torch.utils.data.DataLoader( - testset, batch_size=params.get("batch_size", 4), shuffle=False, num_workers=10 -) +testset = datasets.CIFAR10(root=dataset_path, train=False, download=False, transform=transform) +testloader = torch.utils.data.DataLoader(testset, batch_size=params.get("batch_size", 4), shuffle=False, num_workers=10) classes = ( "plane", @@ -87,14 +81,10 @@ def predictions_gt_images_handler(engine, logger, *args, **kwargs): ax = fig.add_subplot(num_x, num_y, idx + 1, xticks=[], yticks=[]) ax.imshow(trans(x[idx])) ax.set_title( - "{0} {1:.1f}% (label: {2})".format( - classes[preds], probs * 100, classes[y[idx]] - ), + "{0} {1:.1f}% (label: {2})".format(classes[preds], probs * 100, classes[y[idx]]), color=("green" if preds == y[idx] else "red"), ) - logger.writer.add_figure( - "predictions vs actuals", figure=fig, global_step=engine.state.epoch - ) + logger.writer.add_figure("predictions vs actuals", figure=fig, global_step=engine.state.epoch) class Net(nn.Module): diff --git a/examples/datasets/dataset_creation.py b/examples/datasets/dataset_creation.py index 9fa9cc92..6a4395ce 100644 --- a/examples/datasets/dataset_creation.py +++ b/examples/datasets/dataset_creation.py @@ -3,13 +3,9 @@ from clearml import StorageManager, Dataset manager = StorageManager() -dataset_path = manager.get_local_copy( - remote_url="https://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz" -) +dataset_path = manager.get_local_copy(remote_url="https://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz") -dataset = Dataset.create( - dataset_name="cifar_dataset", dataset_project="dataset_examples" -) +dataset = Dataset.create(dataset_name="cifar_dataset", dataset_project="dataset_examples") # Prepare and clean data here before it is added to the dataset diff --git a/examples/datasets/dataset_folder_syncing.py b/examples/datasets/dataset_folder_syncing.py index a039642a..cefb9de8 100644 --- a/examples/datasets/dataset_folder_syncing.py +++ b/examples/datasets/dataset_folder_syncing.py @@ -8,8 +8,9 @@ from clearml import Dataset, StorageManager def download_mnist_dataset(): manager = StorageManager() - mnist_dataset = Path(manager.get_local_copy( - remote_url="https://allegro-datasets.s3.amazonaws.com/datasets/MNIST.zip", name="MNIST")) + mnist_dataset = Path( + manager.get_local_copy(remote_url="https://allegro-datasets.s3.amazonaws.com/datasets/MNIST.zip", name="MNIST") + ) mnist_dataset_train = mnist_dataset / "TRAIN" mnist_dataset_test = mnist_dataset / "TEST" @@ -28,7 +29,8 @@ def main(): print("STEP3 : Creating the dataset") mnist_dataset = Dataset.create( - dataset_project="dataset_examples", dataset_name="MNIST Complete Dataset (Syncing Example)") + dataset_project="dataset_examples", dataset_name="MNIST Complete Dataset (Syncing Example)" + ) print("STEP4 : Syncing train dataset") shutil.copytree(mnist_dataset_train, mnist_train_path) # Populating dataset folder with TRAIN images @@ -46,5 +48,5 @@ def main(): print("We are done, have a great day :)") -if __name__ == '__main__': +if __name__ == "__main__": main() diff --git a/examples/datasets/multi_parent_child_dataset.py b/examples/datasets/multi_parent_child_dataset.py index 51f83207..a9d2dcf5 100644 --- a/examples/datasets/multi_parent_child_dataset.py +++ b/examples/datasets/multi_parent_child_dataset.py @@ -7,29 +7,30 @@ def main(): manager = StorageManager() print("STEP1 : Downloading mnist dataset") - mnist_dataset = Path(manager.get_local_copy( - remote_url="https://allegro-datasets.s3.amazonaws.com/datasets/MNIST.zip", name="MNIST")) + mnist_dataset = Path( + manager.get_local_copy(remote_url="https://allegro-datasets.s3.amazonaws.com/datasets/MNIST.zip", name="MNIST") + ) mnist_dataset_train = mnist_dataset / "TRAIN" mnist_dataset_test = mnist_dataset / "TEST" print("STEP2 : Creating the training dataset") - train_dataset = Dataset.create( - dataset_project="dataset_examples/MNIST", dataset_name="MNIST Training Dataset") + train_dataset = Dataset.create(dataset_project="dataset_examples/MNIST", dataset_name="MNIST Training Dataset") train_dataset.add_files(path=mnist_dataset_train, dataset_path="TRAIN") train_dataset.upload() train_dataset.finalize() print("STEP3 : Creating the testing dataset") - test_dataset = Dataset.create( - dataset_project="dataset_examples/MNIST", dataset_name="MNIST Testing Dataset") + test_dataset = Dataset.create(dataset_project="dataset_examples/MNIST", dataset_name="MNIST Testing Dataset") test_dataset.add_files(path=mnist_dataset_test, dataset_path="TEST") test_dataset.upload() test_dataset.finalize() print("STEP4 : Create a child dataset with both mnist train and test data") child_dataset = Dataset.create( - dataset_project="dataset_examples/MNIST", dataset_name="MNIST Complete Dataset", - parent_datasets=[train_dataset.id, test_dataset.id]) + dataset_project="dataset_examples/MNIST", + dataset_name="MNIST Complete Dataset", + parent_datasets=[train_dataset.id, test_dataset.id], + ) child_dataset.upload() child_dataset.finalize() diff --git a/examples/datasets/single_parent_child_dataset.py b/examples/datasets/single_parent_child_dataset.py index 664cd91c..88700f66 100644 --- a/examples/datasets/single_parent_child_dataset.py +++ b/examples/datasets/single_parent_child_dataset.py @@ -7,21 +7,22 @@ def main(): manager = StorageManager() print("STEP1 : Downloading mnist dataset") - mnist_dataset = Path(manager.get_local_copy( - remote_url="https://allegro-datasets.s3.amazonaws.com/datasets/MNIST.zip", name="MNIST")) + mnist_dataset = Path( + manager.get_local_copy(remote_url="https://allegro-datasets.s3.amazonaws.com/datasets/MNIST.zip", name="MNIST") + ) mnist_dataset_train = mnist_dataset / "TRAIN" mnist_dataset_test = mnist_dataset / "TEST" print("STEP2 : Creating the training dataset") - mnist_dataset = Dataset.create( - dataset_project="dataset_examples", dataset_name="MNIST Training Dataset") + mnist_dataset = Dataset.create(dataset_project="dataset_examples", dataset_name="MNIST Training Dataset") mnist_dataset.add_files(path=mnist_dataset_train, dataset_path="TRAIN") mnist_dataset.upload() mnist_dataset.finalize() print("STEP3 : Create a child dataset of mnist dataset using TEST Dataset") child_dataset = Dataset.create( - dataset_project="dataset_examples", dataset_name="MNIST Complete Dataset", parent_datasets=[mnist_dataset.id]) + dataset_project="dataset_examples", dataset_name="MNIST Complete Dataset", parent_datasets=[mnist_dataset.id] + ) child_dataset.add_files(path=mnist_dataset_test, dataset_path="TEST") child_dataset.upload() child_dataset.finalize() @@ -29,5 +30,5 @@ def main(): print("We are done, have a great day :)") -if __name__ == '__main__': +if __name__ == "__main__": main() diff --git a/examples/datasets/urbansounds_dataset_preprocessing.py b/examples/datasets/urbansounds_dataset_preprocessing.py index 7230afb9..19b418d5 100644 --- a/examples/datasets/urbansounds_dataset_preprocessing.py +++ b/examples/datasets/urbansounds_dataset_preprocessing.py @@ -43,9 +43,7 @@ class PreProcessor: # Make sure all spectrograms are the same size fixed_length = 3 * (self.configuration["resample_freq"] // 200) if melspectogram_db.shape[2] < fixed_length: - melspectogram_db = torch.nn.functional.pad( - melspectogram_db, (0, fixed_length - melspectogram_db.shape[2]) - ) + melspectogram_db = torch.nn.functional.pad(melspectogram_db, (0, fixed_length - melspectogram_db.shape[2])) else: melspectogram_db = melspectogram_db[:, :, :fixed_length] @@ -64,16 +62,10 @@ class DataSetBuilder: alias="Raw Dataset", ) # This will return the pandas dataframe we added in the previous task - self.metadata = ( - Task.get_task(task_id=self.original_dataset._task.id) - .artifacts["metadata"] - .get() - ) + self.metadata = Task.get_task(task_id=self.original_dataset._task.id).artifacts["metadata"].get() # This will download the data and return a local path to the data self.original_dataset_path = Path( - self.original_dataset.get_mutable_local_copy( - self.configuration["dataset_path"], overwrite=True - ) + self.original_dataset.get_mutable_local_copy(self.configuration["dataset_path"], overwrite=True) ) # Prepare a preprocessor that will handle each sample one by one @@ -114,33 +106,23 @@ class DataSetBuilder: # audio side by side in the debug sample UI) for i, (_, data) in tqdm(enumerate(self.metadata.iterrows())): _, audio_file_path, label = data.tolist() - sample, sample_freq = torchaudio.load( - self.original_dataset_path / audio_file_path, normalize=True - ) + sample, sample_freq = torchaudio.load(self.original_dataset_path / audio_file_path, normalize=True) spectrogram = self.preprocessor.preprocess_sample(sample, sample_freq) # Get only the filename and replace the extension, we're saving an image here new_file_name = os.path.basename(audio_file_path).replace(".wav", ".npy") # Get the correct folder, basically the original dataset folder + the new filename - spectrogram_path = ( - self.original_dataset_path - / os.path.dirname(audio_file_path) - / new_file_name - ) + spectrogram_path = self.original_dataset_path / os.path.dirname(audio_file_path) / new_file_name # Save the numpy array to disk np.save(spectrogram_path, spectrogram) # Log every 10th sample as a debug sample to the UI, so we can manually check it if i % 10 == 0: # Convert the numpy array to a viewable JPEG - rgb_image = mpl.colormaps["viridis"]( - spectrogram[0, :, :].detach().numpy() * 255 - )[:, :, :3] + rgb_image = mpl.colormaps["viridis"](spectrogram[0, :, :].detach().numpy() * 255)[:, :, :3] title = os.path.splitext(os.path.basename(audio_file_path))[0] # Report the image and the original sound, so they can be viewed side by side - self.preprocessed_dataset.get_logger().report_image( - title=title, series="spectrogram", image=rgb_image - ) + self.preprocessed_dataset.get_logger().report_image(title=title, series="spectrogram", image=rgb_image) self.preprocessed_dataset.get_logger().report_media( title=title, series="original_audio", @@ -152,9 +134,7 @@ class DataSetBuilder: # Again add some visualizations to the task self.log_dataset_statistics() # We still want the metadata - self.preprocessed_dataset._task.upload_artifact( - name="metadata", artifact_object=self.metadata - ) + self.preprocessed_dataset._task.upload_artifact(name="metadata", artifact_object=self.metadata) self.preprocessed_dataset.finalize(auto_upload=True) diff --git a/examples/datasets/urbansounds_get_data.py b/examples/datasets/urbansounds_get_data.py index 9f442077..55e6f352 100644 --- a/examples/datasets/urbansounds_get_data.py +++ b/examples/datasets/urbansounds_get_data.py @@ -28,9 +28,7 @@ def get_urbansound8k(): "https://allegro-datasets.s3.amazonaws.com/clearml/UrbanSound8K.zip", extract_archive=True, ) - path_to_urbansound8k_csv = ( - Path(path_to_urbansound8k) / "UrbanSound8K" / "metadata" / "UrbanSound8K.csv" - ) + path_to_urbansound8k_csv = Path(path_to_urbansound8k) / "UrbanSound8K" / "metadata" / "UrbanSound8K.csv" path_to_urbansound8k_audio = Path(path_to_urbansound8k) / "UrbanSound8K" / "audio" return path_to_urbansound8k_csv, path_to_urbansound8k_audio @@ -38,9 +36,7 @@ def get_urbansound8k(): def log_dataset_statistics(dataset, metadata): histogram_data = metadata["class"].value_counts() - dataset.get_logger().report_table( - title="Raw Dataset Metadata", series="Raw Dataset Metadata", table_plot=metadata - ) + dataset.get_logger().report_table(title="Raw Dataset Metadata", series="Raw Dataset Metadata", table_plot=metadata) dataset.get_logger().report_histogram( title="Class distribution", series="Class distribution",