It is very common to create an iterable in machine learning code.
Below, we have two code snippets. First one creates an iterable dataloader. Second one shows how we can convert a generator to a dataloader.
Details of the code is not important and is just for demonstration that these concepts are directly useful in data science.
# A usecase where we are creating an iterable dataloader
import torch
from torch.utils.data import DataLoader, Dataset
# Define a custom dataset
class CustomDataset(Dataset):
def __init__(self, data):
self.data = data
def __len__(self):
return len(self.data)
def __getitem__(self, index):
return self.data[index]
# Define some dummy data
data = [torch.randn(5) for _ in range(1000)]
# Create a DataLoader
batch_size = 32
dataloader = DataLoader(CustomDataset(data), batch_size=batch_size)
# Iterate over the DataLoader to get batches of data
# dataloader is an iterable object
for batch in dataloader:
print(batch.shape)
# Convert Generator to pytorch datasets
import torch
from torch.utils.data import DataLoader, Dataset
class IterDataset(torch.utils.data.IterableDataset):
def __init__(self, generator):
self.generator = generator
def __iter__(self):
return self.generator()
def gen():
for x in range(10):
yield x
dataset = IterDataset(gen)
for i in DataLoader(dataset, batch_size=2):
print(i)