mirror of
https://github.com/clearml/clearml
synced 2025-06-23 01:55:38 +00:00
Black formatting
This commit is contained in:
parent
028adb45fb
commit
01d337f1aa
@ -6,7 +6,8 @@ def main():
|
|||||||
|
|
||||||
print("STEP1 : Downloading CSV dataset")
|
print("STEP1 : Downloading CSV dataset")
|
||||||
csv_file_path = manager.get_local_copy(
|
csv_file_path = manager.get_local_copy(
|
||||||
remote_url="https://allegro-datasets.s3.amazonaws.com/datasets/Iris_Species.csv")
|
remote_url="https://allegro-datasets.s3.amazonaws.com/datasets/Iris_Species.csv"
|
||||||
|
)
|
||||||
|
|
||||||
print("STEP2 : Creating a dataset")
|
print("STEP2 : Creating a dataset")
|
||||||
# By default, clearml data uploads to the clearml fileserver. Adding output_uri argument to the create() method
|
# By default, clearml data uploads to the clearml fileserver. Adding output_uri argument to the create() method
|
||||||
@ -23,5 +24,5 @@ def main():
|
|||||||
print("We are done, have a great day :)")
|
print("We are done, have a great day :)")
|
||||||
|
|
||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == "__main__":
|
||||||
main()
|
main()
|
||||||
|
@ -42,19 +42,13 @@ dataset_path = Dataset.get(
|
|||||||
# Dataset and Dataloader initializations
|
# Dataset and Dataloader initializations
|
||||||
transform = transforms.Compose([transforms.ToTensor()])
|
transform = transforms.Compose([transforms.ToTensor()])
|
||||||
|
|
||||||
trainset = datasets.CIFAR10(
|
trainset = datasets.CIFAR10(root=dataset_path, train=True, download=False, transform=transform)
|
||||||
root=dataset_path, train=True, download=False, transform=transform
|
|
||||||
)
|
|
||||||
trainloader = torch.utils.data.DataLoader(
|
trainloader = torch.utils.data.DataLoader(
|
||||||
trainset, batch_size=params.get("batch_size", 4), shuffle=True, num_workers=10
|
trainset, batch_size=params.get("batch_size", 4), shuffle=True, num_workers=10
|
||||||
)
|
)
|
||||||
|
|
||||||
testset = datasets.CIFAR10(
|
testset = datasets.CIFAR10(root=dataset_path, train=False, download=False, transform=transform)
|
||||||
root=dataset_path, train=False, download=False, transform=transform
|
testloader = torch.utils.data.DataLoader(testset, batch_size=params.get("batch_size", 4), shuffle=False, num_workers=10)
|
||||||
)
|
|
||||||
testloader = torch.utils.data.DataLoader(
|
|
||||||
testset, batch_size=params.get("batch_size", 4), shuffle=False, num_workers=10
|
|
||||||
)
|
|
||||||
|
|
||||||
classes = (
|
classes = (
|
||||||
"plane",
|
"plane",
|
||||||
@ -87,14 +81,10 @@ def predictions_gt_images_handler(engine, logger, *args, **kwargs):
|
|||||||
ax = fig.add_subplot(num_x, num_y, idx + 1, xticks=[], yticks=[])
|
ax = fig.add_subplot(num_x, num_y, idx + 1, xticks=[], yticks=[])
|
||||||
ax.imshow(trans(x[idx]))
|
ax.imshow(trans(x[idx]))
|
||||||
ax.set_title(
|
ax.set_title(
|
||||||
"{0} {1:.1f}% (label: {2})".format(
|
"{0} {1:.1f}% (label: {2})".format(classes[preds], probs * 100, classes[y[idx]]),
|
||||||
classes[preds], probs * 100, classes[y[idx]]
|
|
||||||
),
|
|
||||||
color=("green" if preds == y[idx] else "red"),
|
color=("green" if preds == y[idx] else "red"),
|
||||||
)
|
)
|
||||||
logger.writer.add_figure(
|
logger.writer.add_figure("predictions vs actuals", figure=fig, global_step=engine.state.epoch)
|
||||||
"predictions vs actuals", figure=fig, global_step=engine.state.epoch
|
|
||||||
)
|
|
||||||
|
|
||||||
|
|
||||||
class Net(nn.Module):
|
class Net(nn.Module):
|
||||||
|
@ -3,13 +3,9 @@ from clearml import StorageManager, Dataset
|
|||||||
|
|
||||||
manager = StorageManager()
|
manager = StorageManager()
|
||||||
|
|
||||||
dataset_path = manager.get_local_copy(
|
dataset_path = manager.get_local_copy(remote_url="https://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz")
|
||||||
remote_url="https://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz"
|
|
||||||
)
|
|
||||||
|
|
||||||
dataset = Dataset.create(
|
dataset = Dataset.create(dataset_name="cifar_dataset", dataset_project="dataset_examples")
|
||||||
dataset_name="cifar_dataset", dataset_project="dataset_examples"
|
|
||||||
)
|
|
||||||
|
|
||||||
# Prepare and clean data here before it is added to the dataset
|
# Prepare and clean data here before it is added to the dataset
|
||||||
|
|
||||||
|
@ -8,8 +8,9 @@ from clearml import Dataset, StorageManager
|
|||||||
|
|
||||||
def download_mnist_dataset():
|
def download_mnist_dataset():
|
||||||
manager = StorageManager()
|
manager = StorageManager()
|
||||||
mnist_dataset = Path(manager.get_local_copy(
|
mnist_dataset = Path(
|
||||||
remote_url="https://allegro-datasets.s3.amazonaws.com/datasets/MNIST.zip", name="MNIST"))
|
manager.get_local_copy(remote_url="https://allegro-datasets.s3.amazonaws.com/datasets/MNIST.zip", name="MNIST")
|
||||||
|
)
|
||||||
mnist_dataset_train = mnist_dataset / "TRAIN"
|
mnist_dataset_train = mnist_dataset / "TRAIN"
|
||||||
mnist_dataset_test = mnist_dataset / "TEST"
|
mnist_dataset_test = mnist_dataset / "TEST"
|
||||||
|
|
||||||
@ -28,7 +29,8 @@ def main():
|
|||||||
|
|
||||||
print("STEP3 : Creating the dataset")
|
print("STEP3 : Creating the dataset")
|
||||||
mnist_dataset = Dataset.create(
|
mnist_dataset = Dataset.create(
|
||||||
dataset_project="dataset_examples", dataset_name="MNIST Complete Dataset (Syncing Example)")
|
dataset_project="dataset_examples", dataset_name="MNIST Complete Dataset (Syncing Example)"
|
||||||
|
)
|
||||||
|
|
||||||
print("STEP4 : Syncing train dataset")
|
print("STEP4 : Syncing train dataset")
|
||||||
shutil.copytree(mnist_dataset_train, mnist_train_path) # Populating dataset folder with TRAIN images
|
shutil.copytree(mnist_dataset_train, mnist_train_path) # Populating dataset folder with TRAIN images
|
||||||
@ -46,5 +48,5 @@ def main():
|
|||||||
print("We are done, have a great day :)")
|
print("We are done, have a great day :)")
|
||||||
|
|
||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == "__main__":
|
||||||
main()
|
main()
|
||||||
|
@ -7,29 +7,30 @@ def main():
|
|||||||
manager = StorageManager()
|
manager = StorageManager()
|
||||||
|
|
||||||
print("STEP1 : Downloading mnist dataset")
|
print("STEP1 : Downloading mnist dataset")
|
||||||
mnist_dataset = Path(manager.get_local_copy(
|
mnist_dataset = Path(
|
||||||
remote_url="https://allegro-datasets.s3.amazonaws.com/datasets/MNIST.zip", name="MNIST"))
|
manager.get_local_copy(remote_url="https://allegro-datasets.s3.amazonaws.com/datasets/MNIST.zip", name="MNIST")
|
||||||
|
)
|
||||||
mnist_dataset_train = mnist_dataset / "TRAIN"
|
mnist_dataset_train = mnist_dataset / "TRAIN"
|
||||||
mnist_dataset_test = mnist_dataset / "TEST"
|
mnist_dataset_test = mnist_dataset / "TEST"
|
||||||
|
|
||||||
print("STEP2 : Creating the training dataset")
|
print("STEP2 : Creating the training dataset")
|
||||||
train_dataset = Dataset.create(
|
train_dataset = Dataset.create(dataset_project="dataset_examples/MNIST", dataset_name="MNIST Training Dataset")
|
||||||
dataset_project="dataset_examples/MNIST", dataset_name="MNIST Training Dataset")
|
|
||||||
train_dataset.add_files(path=mnist_dataset_train, dataset_path="TRAIN")
|
train_dataset.add_files(path=mnist_dataset_train, dataset_path="TRAIN")
|
||||||
train_dataset.upload()
|
train_dataset.upload()
|
||||||
train_dataset.finalize()
|
train_dataset.finalize()
|
||||||
|
|
||||||
print("STEP3 : Creating the testing dataset")
|
print("STEP3 : Creating the testing dataset")
|
||||||
test_dataset = Dataset.create(
|
test_dataset = Dataset.create(dataset_project="dataset_examples/MNIST", dataset_name="MNIST Testing Dataset")
|
||||||
dataset_project="dataset_examples/MNIST", dataset_name="MNIST Testing Dataset")
|
|
||||||
test_dataset.add_files(path=mnist_dataset_test, dataset_path="TEST")
|
test_dataset.add_files(path=mnist_dataset_test, dataset_path="TEST")
|
||||||
test_dataset.upload()
|
test_dataset.upload()
|
||||||
test_dataset.finalize()
|
test_dataset.finalize()
|
||||||
|
|
||||||
print("STEP4 : Create a child dataset with both mnist train and test data")
|
print("STEP4 : Create a child dataset with both mnist train and test data")
|
||||||
child_dataset = Dataset.create(
|
child_dataset = Dataset.create(
|
||||||
dataset_project="dataset_examples/MNIST", dataset_name="MNIST Complete Dataset",
|
dataset_project="dataset_examples/MNIST",
|
||||||
parent_datasets=[train_dataset.id, test_dataset.id])
|
dataset_name="MNIST Complete Dataset",
|
||||||
|
parent_datasets=[train_dataset.id, test_dataset.id],
|
||||||
|
)
|
||||||
child_dataset.upload()
|
child_dataset.upload()
|
||||||
child_dataset.finalize()
|
child_dataset.finalize()
|
||||||
|
|
||||||
|
@ -7,21 +7,22 @@ def main():
|
|||||||
manager = StorageManager()
|
manager = StorageManager()
|
||||||
|
|
||||||
print("STEP1 : Downloading mnist dataset")
|
print("STEP1 : Downloading mnist dataset")
|
||||||
mnist_dataset = Path(manager.get_local_copy(
|
mnist_dataset = Path(
|
||||||
remote_url="https://allegro-datasets.s3.amazonaws.com/datasets/MNIST.zip", name="MNIST"))
|
manager.get_local_copy(remote_url="https://allegro-datasets.s3.amazonaws.com/datasets/MNIST.zip", name="MNIST")
|
||||||
|
)
|
||||||
mnist_dataset_train = mnist_dataset / "TRAIN"
|
mnist_dataset_train = mnist_dataset / "TRAIN"
|
||||||
mnist_dataset_test = mnist_dataset / "TEST"
|
mnist_dataset_test = mnist_dataset / "TEST"
|
||||||
|
|
||||||
print("STEP2 : Creating the training dataset")
|
print("STEP2 : Creating the training dataset")
|
||||||
mnist_dataset = Dataset.create(
|
mnist_dataset = Dataset.create(dataset_project="dataset_examples", dataset_name="MNIST Training Dataset")
|
||||||
dataset_project="dataset_examples", dataset_name="MNIST Training Dataset")
|
|
||||||
mnist_dataset.add_files(path=mnist_dataset_train, dataset_path="TRAIN")
|
mnist_dataset.add_files(path=mnist_dataset_train, dataset_path="TRAIN")
|
||||||
mnist_dataset.upload()
|
mnist_dataset.upload()
|
||||||
mnist_dataset.finalize()
|
mnist_dataset.finalize()
|
||||||
|
|
||||||
print("STEP3 : Create a child dataset of mnist dataset using TEST Dataset")
|
print("STEP3 : Create a child dataset of mnist dataset using TEST Dataset")
|
||||||
child_dataset = Dataset.create(
|
child_dataset = Dataset.create(
|
||||||
dataset_project="dataset_examples", dataset_name="MNIST Complete Dataset", parent_datasets=[mnist_dataset.id])
|
dataset_project="dataset_examples", dataset_name="MNIST Complete Dataset", parent_datasets=[mnist_dataset.id]
|
||||||
|
)
|
||||||
child_dataset.add_files(path=mnist_dataset_test, dataset_path="TEST")
|
child_dataset.add_files(path=mnist_dataset_test, dataset_path="TEST")
|
||||||
child_dataset.upload()
|
child_dataset.upload()
|
||||||
child_dataset.finalize()
|
child_dataset.finalize()
|
||||||
@ -29,5 +30,5 @@ def main():
|
|||||||
print("We are done, have a great day :)")
|
print("We are done, have a great day :)")
|
||||||
|
|
||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == "__main__":
|
||||||
main()
|
main()
|
||||||
|
@ -43,9 +43,7 @@ class PreProcessor:
|
|||||||
# Make sure all spectrograms are the same size
|
# Make sure all spectrograms are the same size
|
||||||
fixed_length = 3 * (self.configuration["resample_freq"] // 200)
|
fixed_length = 3 * (self.configuration["resample_freq"] // 200)
|
||||||
if melspectogram_db.shape[2] < fixed_length:
|
if melspectogram_db.shape[2] < fixed_length:
|
||||||
melspectogram_db = torch.nn.functional.pad(
|
melspectogram_db = torch.nn.functional.pad(melspectogram_db, (0, fixed_length - melspectogram_db.shape[2]))
|
||||||
melspectogram_db, (0, fixed_length - melspectogram_db.shape[2])
|
|
||||||
)
|
|
||||||
else:
|
else:
|
||||||
melspectogram_db = melspectogram_db[:, :, :fixed_length]
|
melspectogram_db = melspectogram_db[:, :, :fixed_length]
|
||||||
|
|
||||||
@ -64,16 +62,10 @@ class DataSetBuilder:
|
|||||||
alias="Raw Dataset",
|
alias="Raw Dataset",
|
||||||
)
|
)
|
||||||
# This will return the pandas dataframe we added in the previous task
|
# This will return the pandas dataframe we added in the previous task
|
||||||
self.metadata = (
|
self.metadata = Task.get_task(task_id=self.original_dataset._task.id).artifacts["metadata"].get()
|
||||||
Task.get_task(task_id=self.original_dataset._task.id)
|
|
||||||
.artifacts["metadata"]
|
|
||||||
.get()
|
|
||||||
)
|
|
||||||
# This will download the data and return a local path to the data
|
# This will download the data and return a local path to the data
|
||||||
self.original_dataset_path = Path(
|
self.original_dataset_path = Path(
|
||||||
self.original_dataset.get_mutable_local_copy(
|
self.original_dataset.get_mutable_local_copy(self.configuration["dataset_path"], overwrite=True)
|
||||||
self.configuration["dataset_path"], overwrite=True
|
|
||||||
)
|
|
||||||
)
|
)
|
||||||
|
|
||||||
# Prepare a preprocessor that will handle each sample one by one
|
# Prepare a preprocessor that will handle each sample one by one
|
||||||
@ -114,33 +106,23 @@ class DataSetBuilder:
|
|||||||
# audio side by side in the debug sample UI)
|
# audio side by side in the debug sample UI)
|
||||||
for i, (_, data) in tqdm(enumerate(self.metadata.iterrows())):
|
for i, (_, data) in tqdm(enumerate(self.metadata.iterrows())):
|
||||||
_, audio_file_path, label = data.tolist()
|
_, audio_file_path, label = data.tolist()
|
||||||
sample, sample_freq = torchaudio.load(
|
sample, sample_freq = torchaudio.load(self.original_dataset_path / audio_file_path, normalize=True)
|
||||||
self.original_dataset_path / audio_file_path, normalize=True
|
|
||||||
)
|
|
||||||
spectrogram = self.preprocessor.preprocess_sample(sample, sample_freq)
|
spectrogram = self.preprocessor.preprocess_sample(sample, sample_freq)
|
||||||
# Get only the filename and replace the extension, we're saving an image here
|
# Get only the filename and replace the extension, we're saving an image here
|
||||||
new_file_name = os.path.basename(audio_file_path).replace(".wav", ".npy")
|
new_file_name = os.path.basename(audio_file_path).replace(".wav", ".npy")
|
||||||
# Get the correct folder, basically the original dataset folder + the new filename
|
# Get the correct folder, basically the original dataset folder + the new filename
|
||||||
spectrogram_path = (
|
spectrogram_path = self.original_dataset_path / os.path.dirname(audio_file_path) / new_file_name
|
||||||
self.original_dataset_path
|
|
||||||
/ os.path.dirname(audio_file_path)
|
|
||||||
/ new_file_name
|
|
||||||
)
|
|
||||||
# Save the numpy array to disk
|
# Save the numpy array to disk
|
||||||
np.save(spectrogram_path, spectrogram)
|
np.save(spectrogram_path, spectrogram)
|
||||||
|
|
||||||
# Log every 10th sample as a debug sample to the UI, so we can manually check it
|
# Log every 10th sample as a debug sample to the UI, so we can manually check it
|
||||||
if i % 10 == 0:
|
if i % 10 == 0:
|
||||||
# Convert the numpy array to a viewable JPEG
|
# Convert the numpy array to a viewable JPEG
|
||||||
rgb_image = mpl.colormaps["viridis"](
|
rgb_image = mpl.colormaps["viridis"](spectrogram[0, :, :].detach().numpy() * 255)[:, :, :3]
|
||||||
spectrogram[0, :, :].detach().numpy() * 255
|
|
||||||
)[:, :, :3]
|
|
||||||
title = os.path.splitext(os.path.basename(audio_file_path))[0]
|
title = os.path.splitext(os.path.basename(audio_file_path))[0]
|
||||||
|
|
||||||
# Report the image and the original sound, so they can be viewed side by side
|
# Report the image and the original sound, so they can be viewed side by side
|
||||||
self.preprocessed_dataset.get_logger().report_image(
|
self.preprocessed_dataset.get_logger().report_image(title=title, series="spectrogram", image=rgb_image)
|
||||||
title=title, series="spectrogram", image=rgb_image
|
|
||||||
)
|
|
||||||
self.preprocessed_dataset.get_logger().report_media(
|
self.preprocessed_dataset.get_logger().report_media(
|
||||||
title=title,
|
title=title,
|
||||||
series="original_audio",
|
series="original_audio",
|
||||||
@ -152,9 +134,7 @@ class DataSetBuilder:
|
|||||||
# Again add some visualizations to the task
|
# Again add some visualizations to the task
|
||||||
self.log_dataset_statistics()
|
self.log_dataset_statistics()
|
||||||
# We still want the metadata
|
# We still want the metadata
|
||||||
self.preprocessed_dataset._task.upload_artifact(
|
self.preprocessed_dataset._task.upload_artifact(name="metadata", artifact_object=self.metadata)
|
||||||
name="metadata", artifact_object=self.metadata
|
|
||||||
)
|
|
||||||
self.preprocessed_dataset.finalize(auto_upload=True)
|
self.preprocessed_dataset.finalize(auto_upload=True)
|
||||||
|
|
||||||
|
|
||||||
|
@ -28,9 +28,7 @@ def get_urbansound8k():
|
|||||||
"https://allegro-datasets.s3.amazonaws.com/clearml/UrbanSound8K.zip",
|
"https://allegro-datasets.s3.amazonaws.com/clearml/UrbanSound8K.zip",
|
||||||
extract_archive=True,
|
extract_archive=True,
|
||||||
)
|
)
|
||||||
path_to_urbansound8k_csv = (
|
path_to_urbansound8k_csv = Path(path_to_urbansound8k) / "UrbanSound8K" / "metadata" / "UrbanSound8K.csv"
|
||||||
Path(path_to_urbansound8k) / "UrbanSound8K" / "metadata" / "UrbanSound8K.csv"
|
|
||||||
)
|
|
||||||
path_to_urbansound8k_audio = Path(path_to_urbansound8k) / "UrbanSound8K" / "audio"
|
path_to_urbansound8k_audio = Path(path_to_urbansound8k) / "UrbanSound8K" / "audio"
|
||||||
|
|
||||||
return path_to_urbansound8k_csv, path_to_urbansound8k_audio
|
return path_to_urbansound8k_csv, path_to_urbansound8k_audio
|
||||||
@ -38,9 +36,7 @@ def get_urbansound8k():
|
|||||||
|
|
||||||
def log_dataset_statistics(dataset, metadata):
|
def log_dataset_statistics(dataset, metadata):
|
||||||
histogram_data = metadata["class"].value_counts()
|
histogram_data = metadata["class"].value_counts()
|
||||||
dataset.get_logger().report_table(
|
dataset.get_logger().report_table(title="Raw Dataset Metadata", series="Raw Dataset Metadata", table_plot=metadata)
|
||||||
title="Raw Dataset Metadata", series="Raw Dataset Metadata", table_plot=metadata
|
|
||||||
)
|
|
||||||
dataset.get_logger().report_histogram(
|
dataset.get_logger().report_histogram(
|
||||||
title="Class distribution",
|
title="Class distribution",
|
||||||
series="Class distribution",
|
series="Class distribution",
|
||||||
|
Loading…
Reference in New Issue
Block a user