What does
if self.transforms:
data = self.transforms(data)
do? I don't understand the logic behind this line - what is the condition the line is using?
I'm reading an article on creating a custom dataset with pytorch based on the below implementation:
#custom dataset
class MNISTDataset(Dataset):
def __init__(self, images, labels=None, transforms=None):
self.X = images
self.y = labels
self.transforms = transforms
def __len__(self):
return (len(self.X))
def __getitem__(self, i):
data = self.X.iloc[i, :]
data = np.asarray(data).astype(np.uint8).reshape(28, 28, 1)
if self.transforms:
data = self.transforms(data)
if self.y is not None:
return (data, self.y[i])
else:
return data
train_data = MNISTDataset(train_images, train_labels, transform)
test_data = MNISTDataset(test_images, test_labels, transform)
# dataloaders
trainloader = DataLoader(train_data, batch_size=128, shuffle=True)
testloader = DataLoader(test_data, batch_size=128, shuffle=True)
thank you! i'm basically trying to understand why it works & how it applies transforms to the data.
The dataset MNISTDataset can optionnaly be initialized with a transform function. If such transform function is given it be saved in self.transforms else it will keep its default values None. When calling a new item with __getitem__, it first checks if the transform is a truthy value, in this case it checks if self.transforms can be coerced to True which is the case for a callable object. Otherwise it means self.transforms hasn't been provided in the first place and no transform function is applied on data.
Here's a general example, out of a torch/torchvision context:
def do(x, callback=None):
if callback: # will be True if callback is a function/lambda
return callback(x)
return x
do(2) # returns 2
do(2, callback=lambda x: 2*x) # returns 4
Related
I do a trial about my dataset, this is my complete code:
data_root='D:/AuxiliaryDocuments/NYU/'
raw_data_transforms=transforms.Compose([#transforms.ToPILImage(),
transforms.CenterCrop((224,101)),
transforms.ToTensor()])
depth_data_transforms=transforms.Compose([transforms.CenterCrop((74,55)),
transforms.ToTensor()])
filename_txt={'image_train':'image_train.txt','image_test':'image_test.txt',
'depth_train':'depth_train.txt','depth_test':'depth_test.txt'}
class Mydataset(Dataset):
def __init__(self,data_root,transformation,data_type):
self.transform=transformation
self.image_path_txt=filename_txt[data_type]
self.sample_list=list()
f=open(data_root+'/'+data_type+'/'+self.image_path_txt)
lines=f.readlines()
for line in lines:
line=line.strip()
line=line.replace(';','')
self.sample_list.append(line)
f.close()
def __getitem__(self, index):
item=self.sample_list[index]
img=Image.open(item)
if self.transform is not None:
img=self.transform(img)
idx=index
print(type(img))
return idx,img
def __len__(self):
return len(self.sample_list)
I print the type of img that is <class 'torch.Tensor'>, then I used the coding below:
test=Mydataset(data_root,raw_data_transforms,'image_train')
test_1=Mydataset(data_root,depth_data_transforms,'depth_train')
test2=DataLoader(test,batch_size=4,num_workers=0,shuffle=False)
test_2=DataLoader(test_1,batch_size=4,num_workers=0,shuffle=False)
print the information:
for idx,data in enumerate(test_2):
print(idx,data)
print(type(data))
but the type of data is '<class 'list'>', which I need is tensor.
This is the expected output. DataLoader in your case is supposed to return a list. The output of DataLoader is (inputs batch, labels batch).
e.g.
for idx, data in enumerate(test_dataloader):
if idx == 0:
print(type(data))
print(len(data), data[0].shape, data[1].shape)
<class 'list'>
2 torch.Size([64, 1, 28, 28]) torch.Size([64])
Here, the 64 labels corresponds to 64 images in the batch.
In order to pass it to the model, you can do
#If you return img first in your Dataset
return img, idx
# Either
for idx, data in enumerate(test_dataloader):
# pass inputs to model
out = model(data[0])
# your labels are data[1]
# Or
for idx, (inputs, labels) in enumerate(test_dataloader):
# pass inputs to model
out = model(inputs)
# your labels are in "labels" variable
I have problem with this methode which should return both the training and the validation dataset and examine it to return the index that corresponds to the first occurrence of each class in CIFAR10.
this is code:
def get_cifar10_dataset(): """ Should create the cifar 10 network and identify the dataset index of the first time each new class
appears
:return: tuple of training and validation dataset as well as label indices
:rtype: (gluon.data.Dataset, 'dict_values' object is not subscriptable, gluon.data.Dataset,
dict[int:int])
"""
train_data = None
val_data = None
# YOUR CODE HERE
train_data = datasets.CIFAR10(train=True, root=M5_IMAGES)
val_data = datasets.CIFAR10(train=False, root=M5_IMAGES)
You are asked to return a dictionary with labels and the corresponding indexes. Using the following function can solve your problem.
def get_idx_dict(data):
lis = []
idx = []
indices = {}
for i in range(len(data)):
if data[i][1] not in lis:
lis.append(data[i][1])
idx.append(i)
indices = {lis[i]: idx[i] for i in range(len(lis))}
return indices
The function returns a dictionary with desired output. Use this function on data from train and validation set.
train_indices = get_idx_dict(train_data)
val_indices = get_idx_dict(val_data)
You can do it this
def get_cifar10_dataset():
"""
Should create the cifar 10 network and identify the dataset index of the first time each new class appears
:return: tuple of training and validation dataset as well as label indices
:rtype: (gluon.data.Dataset, dict[int:int], gluon.data.Dataset, dict[int:int])
"""
train_data = None
val_data = None
train_indices = {}
val_indices = {}
# Use `root=M5_IMAGES` for your dataset
train_data = gluon.data.vision.datasets.CIFAR10(train=True, root=M5_IMAGES)
val_data = gluon.data.vision.datasets.CIFAR10(train=False, root=M5_IMAGES)
#for train
for i in range(len(train_data)):
if train_data[i][1] not in train_indices:
train_indices[train_data[i][1]] = i
#for valid
for i in range(len(val_data)):
if val_data[i][1] not in val_indices:
val_indices[val_data[i][1]] = i
#raise NotImplementedError()
return train_data, train_indices, val_data, val_indices
I am coding a 2048 game via pygame. below is the relevant section of my code:
class Data():
def __init__(self):
self.data = getnull()
self.score = 0
def updatesprites(self): # EXP
spritelist = [[],[],[],[]]
for count in range(4): # for row loop
for i in range(4): # per column loop
if self.data[count][i] != 0:
spritelist[count]+= newSprite(str(self.data[count] [i])+".png") # error occurs here
spritelist[count][i].move(15 + i*115, 15 + count*115)
showSprite(spritelist[count][i])
class newSprite(pygame.sprite.Sprite):
def __init__(self,filename):
pygame.sprite.Sprite.__init__(self)
self.images=[]
self.images.append(loadImage(filename))
self.image = pygame.Surface.copy(self.images[0])
self.currentImage = 0
self.rect=self.image.get_rect()
self.rect.topleft=(0,0)
self.mask = pygame.mask.from_surface(self.image)
self.angle = 0
def addImage(self, filename):
self.images.append(loadImage(filename))
def move(self,xpos,ypos,centre=False):
if centre:
self.rect.center = [xpos,ypos]
else:
self.rect.topleft = [xpos,ypos]
----------------Main-------------------
from functions import *
from config import *
from pygame_functions import *
import pygame
screenSize(475,475) # call screen init
gameboard = newSprite("game board.png") # createboard
showSprite(gameboard)
game = Data()
game.updatesprites()
while True:
pass
when game.updatesprites() is called, "newSprite object is not iterable" error is raised in function Data.updatesprites
+ concatenates lists and strings, and adds numbers.
What you are trying to do, is to add an element to a list.
This is done as follows:
li.append(element) # adds the element to the end of the list
Or in your case:
spritelist[count].append(newSprite(str(self.data[count][i]) + ".png"))
Another solution: You could create a new type, that lets you add elements the way you were trying to:
class UglyList(list):
def __iadd__(self, other):
self.append(other)
You'd need to change another line here:
spritelist = [UglyList() for i in range(4)]
I have applied Logistic Regression on train set after splitting the data set into test and train sets, but I got the above error. I tried to work it out, and when i tried to print my response vector y_train in the console it prints integer values like 0 or 1. But when i wrote it into a file I found the values were float numbers like 0.0 and 1.0. If thats the problem, how can I over come it.
lenreg = LogisticRegression()
print y_train[0:10]
y_train.to_csv(path='ytard.csv')
lenreg.fit(X_train, y_train)
y_pred = lenreg.predict(X_test)
print metics.accuracy_score(y_test, y_pred)
StrackTrace is as follows,
Traceback (most recent call last):
File "/home/amey/prog/pd.py", line 82, in <module>
lenreg.fit(X_train, y_train)
File "/usr/lib/python2.7/dist-packages/sklearn/linear_model/logistic.py", line 1154, in fit
self.max_iter, self.tol, self.random_state)
File "/usr/lib/python2.7/dist-packages/sklearn/svm/base.py", line 885, in _fit_liblinear
" class: %r" % classes_[0])
ValueError: This solver needs samples of at least 2 classes in the data, but the data contains only one class: 0.0
Meanwhile I've gone across the link which was unanswered. Is there a solution.
The problem here is that your y_train vector, for whatever reason, only has zeros. It is actually not your fault, and its kind of a bug ( I think ). The classifier needs 2 classes or else it throws this error.
It makes sense. If your y_train vector only has zeros, ( ie only 1 class ), then the classifier doesn't really need to do any work, since all predictions should just be the one class.
In my opinion the classifier should still complete and just predict the one class ( all zeros in this case ) and then throw a warning, but it doesn't. It throws the error in stead.
A way to check for this condition is like this:
lenreg = LogisticRegression()
print y_train[0:10]
y_train.to_csv(path='ytard.csv')
if len(np.sum(y_train)) in [len(y_train),0]:
print "all one class"
#do something else
else:
#OK to proceed
lenreg.fit(X_train, y_train)
y_pred = lenreg.predict(X_test)
print metics.accuracy_score(y_test, y_pred)
TO overcome the problem more easily i would recommend just including more samples in you test set, like 100 or 1000 instead of 10.
I had the same problem using learning_curve:
train_sizes, train_scores, test_scores = learning_curve(estimator,
X, y, cv=cv, n_jobs=n_jobs, train_sizes=train_sizes,
scoring="f1", random_state=RANDOM_SEED, shuffle=True)
add the suffle parameter that will randomize the sets.
This doesn't prevent error from happening but it's a way to increase the chances to have both classes in subsets used by the function.
I found it to be because of only 1's or 0's wound up in my y_test since my sample size was really small. Try chaning your test_size value.
# python3
import numpy as np
from sklearn.svm import LinearSVC
def upgrade_to_work_with_single_class(SklearnPredictor):
class UpgradedPredictor(SklearnPredictor):
def __init__(self, *args, **kwargs):
self._single_class_label = None
super().__init__(*args, **kwargs)
#staticmethod
def _has_only_one_class(y):
return len(np.unique(y)) == 1
def _fitted_on_single_class(self):
return self._single_class_label is not None
def fit(self, X, y=None):
if self._has_only_one_class(y):
self._single_class_label = y[0]
else:
super().fit(X, y)
return self
def predict(self, X):
if self._fitted_on_single_class():
return np.full(X.shape[0], self._single_class_label)
else:
return super().predict(X)
return UpgradedPredictor
LinearSVC = upgrade_to_work_with_single_class(LinearSVC)
or hard-way (more right):
import numpy as np
from sklearn.svm import LinearSVC
from copy import deepcopy, copy
from functools import wraps
def copy_class(cls):
copy_cls = type(f'{cls.__name__}', cls.__bases__, dict(cls.__dict__))
for name, attr in cls.__dict__.items():
try:
hash(attr)
except TypeError:
# Assume lack of __hash__ implies mutability. This is NOT
# a bullet proof assumption but good in many cases.
setattr(copy_cls, name, deepcopy(attr))
return copy_cls
def upgrade_to_work_with_single_class(SklearnPredictor):
SklearnPredictor = copy_class(SklearnPredictor)
original_init = deepcopy(SklearnPredictor.__init__)
original_fit = deepcopy(SklearnPredictor.fit)
original_predict = deepcopy(SklearnPredictor.predict)
#staticmethod
def _has_only_one_class(y):
return len(np.unique(y)) == 1
def _fitted_on_single_class(self):
return self._single_class_label is not None
#wraps(SklearnPredictor.__init__)
def new_init(self, *args, **kwargs):
self._single_class_label = None
original_init(self, *args, **kwargs)
#wraps(SklearnPredictor.fit)
def new_fit(self, X, y=None):
if self._has_only_one_class(y):
self._single_class_label = y[0]
else:
original_fit(self, X, y)
return self
#wraps(SklearnPredictor.predict)
def new_predict(self, X):
if self._fitted_on_single_class():
return np.full(X.shape[0], self._single_class_label)
else:
return original_predict(self, X)
setattr(SklearnPredictor, '_has_only_one_class', _has_only_one_class)
setattr(SklearnPredictor, '_fitted_on_single_class', _fitted_on_single_class)
SklearnPredictor.__init__ = new_init
SklearnPredictor.fit = new_fit
SklearnPredictor.predict = new_predict
return SklearnPredictor
LinearSVC = upgrade_to_work_with_single_class(LinearSVC)
You can find the indexes of the first (or any) occurrence of each of the classes and concatenate them on top of the arrays and delete them from their original positions, that way there will be at least one instance of each class in the training set.
This error related to the dataset you are using, the dataset contains a class for example 1/benign, whereas it must contain two classes 1 and 0 or Benign and Attack.
I have this class:
class Tumor(object):
"""
Wrapper for the tumor data points.
Attributes:
idNum = ID number for the tumor (is unique) (int)
malignant = label for this tumor (either 'M' for malignant
or 'B' for benign) (string)
featureNames = names of all features used in this Tumor
instance (list of strings)
featureVals = values of all features used in this Tumor
instance, same order as featureNames (list of floats)
"""
def __init__(self, idNum, malignant, featureNames, featureVals):
self.idNum = idNum
self.label = malignant
self.featureNames = featureNames
self.featureVals = featureVals
def distance(self, other):
dist = 0.0
for i in range(len(self.featureVals)):
dist += abs(self.featureVals[i] - other.featureVals[i])**2
return dist**0.5
def getLabel(self):
return self.label
def getFeatures(self):
return self.featureVals
def getFeatureNames(self):
return self.featureNames
def __str__(self):
return str(self.idNum) + ', ' + str(self.label) + ', ' \
+ str(self.featureVals)
and I am trying to use an instance of it in another function later in my code:
def train_model(train_set):
"""
Trains a logistic regression model with the given dataset
train_set (list): list of data points of type Tumor
Returns a model of type sklearn.linear_model.LogisticRegression
fit to the training data
"""
tumor = Tumor()
features = tumor.getFeatures()
labels = tumor.getLabel()
log_reg = sklearn.linear_model.LogisticRegression(train_set)
model = log_reg.fit(features, labels)
return model
However, I keep getting this error when I test my code:
TypeError: __init__() takes exactly 5 arguments (1 given)
I understand that I am not using the five arguments when I create the instance of Tumor in train_model , but how can I do so?
Arguments to the __init__ (or __new__, if you're using that) just go, predictably, where you create the instance in train_model:
tumor = Tumor(idNum, malignant, featureNames, featureVals)
Of course, you actually need values for all of these, as they are all required arguments.
You don't need to include self, however, as that first argument is taken care of automatically.