Simple Dataset
import torch
from torch.utils.data import Dataset
from torchvision import transforms
Datasets
class toy_set(Dataset):
def __init__(self, length=100, transform=None):
self.x = 2 * torch.ones(length, 2)
self.y = torch.ones(length, 1)
self.len = length
self.transform = transform
def __getitem__(self, index):
sample = self.x[index], self.y[index]
if self.transform:
sample = self.transform(sample)
return sample
def __len__(self):
return self.len
dataset = toy_set()
len(dataset) # 100
dataset[0] # (tensor([2., 2.]), tensor([1.]))
for i in range(3):
x,y=dataset[i]
print(i,'x:',x,'y:',y)
# 0 x: tensor([2., 2.]) y: tensor([1.])
# 1 x: tensor([2., 2.]) y: tensor([1.])
# 2 x: tensor([2., 2.]) y: tensor([1.])
Transforms
Create cllable classes that transform datasets.
class add_mult(object):
def __init__(self, addx=1, muly=1):
self.addx = addx
self.muly = muly
def __call__(self, sample):
x = sample[0]
y = sample[1]
x = x + self.addx
y = y * self.muly
sample = x, y
return sample
# First lets apply to a dataset with None for transform
dataset = toy_set()
dataset[0] # dataset[0]
a_m=add_mult()
x_,y_ = a_m(dataset[0])
# Second lets set datasets transform object
dataset_ = toy_set(length=100, transform=a_m)
dataset_[0] # (tensor([3., 3.]), tensor([1.]))
Transforms Compose
In this case, we create the class “mult” that will multiply all the elements of a tensor by the value mul.
class mult(object):
def __init__(self, mul=100):
self.mul = mul
def __call__(self, sample):
x = sample[0]
y = sample[1]
x = x * self.mul
y = y * self.mul
sample = x, y
return sample
We create a transforms Compose object. In the constructor, we place a list. The first element of the list is the constructor for the first transform; the second element of the list is the constructor for the second transform. We can apply the transform on the data directly. The function takes the input elements of the dataset, applies the first transform, applies the second transform, and returns the output as a tuple containing the tensors. We can apply the compose object directly in the dataset constructor Each time we retrieve a sample, the original tensor is passed to the compose object, the first transform is applied, then the second transform is applied.
# Compose(transforms: List[Callable])
data_transform = transforms.Compose([add_mult(), mult()])
dataset_ = toy_set(length=100, transform=data_transform)
dataset_[0] # (tensor([300., 300.]), tensor([100.]))