Splitting dataset - python-2.7

I am new to opencv and python.
I am trying to create sudoku solver in opencv and want to use this image as my dataset for recognizing the digits in sudoku.
I want the entire image to be used as dataset.
sudoku digits dataset
Dimensions: 468x108
This image has 39 numbers in a row and 9 such rows [ 1 .. 9 ]
image = cv2.imread( 'images/digits_sudoku4.png')
image = cv2.resize( image, None, fx=2, fy=2,)
gray = cv2.cvtColor( image, cv2.COLOR_BGR2GRAY)
cells = [np.hsplit(row, 39) for row in np.vsplit(gray, 12)]
# Convert the List data type to Numpy Array
x = np.array(cells)
print ("The shape of our cells array: " + str(x.shape))
train = x.astype(np.float32) # Size = (3500,400)
# Create labels for train and test data
k = [ 1, 2, 3, 4, 5, 6, 7, 8, 9]
train_labels = np.repeat( k, 468)[:, np.newaxis]
# Initiate kNN, train the data, then test it with test data for k=3
knn = cv2.KNearest()
knn.train(train, train_labels)
#ret, result, neighbors, distance = knn.find_nearest(test, k=3)
# Now we check the accuracy of classification
# For that, compare the result with test_labels and check which are wrong
'''
matches = result == test_labels
correct = np.count_nonzero(matches)
accuracy = correct * (100.0 / result.size)
print("Accuracy is = %.2f" % accuracy + "%")
'''
cv2.imshow( 'Sudoku', gray)
cv2.waitKey(0)
cv2.destroyAllWindows()
I am facing this error on line knn.train(train, train_labels)...
cv2.error: /build/opencv-SviWsf/opencv-2.4.9.1+dfsg/modules/ml/src/inner_functions.cpp:857: error: (-5) train data must be floating-point matrix in function cvCheckTrainData
Please help me out.
Thankyou.

Related

Pytorch error when training an image captioning model Dimension out of range

i was training a model, but it say that it is not enough values to unpack at the decoder. Here is the training step
def train(loader, encoder, decoder, optimizer, criterion, epoch):
loss_tracker, acc_tracker = AvgMeter(), AvgMeter()
encoder.train()
decoder.train()
pbar = tqdm.tqdm(enumerate(loader), total=len(loader))
for i, (images, target_sequences, sequence_lengths) in pbar:
images = images.to(device)
target_sequences = target_sequences.to(device)
sequence_lengths = sequence_lengths.to(device)
# Forward prop.
logits, alphas, sorted_target_sequences, sorted_decode_lengths, sorted_indices = decoder(images, target_sequences, sequence_lengths)
# Since we decoded starting with <sos>, the targets are all words after <sos>, up to <eos>
sorted_target_sequences = sorted_target_sequences[:, 1:]
# Remove paddings
logits = pack_padded_sequence(logits, sorted_decode_lengths).data
sorted_target_sequences = pack_padded_sequence(sorted_target_sequences, sorted_decode_lengths, batch_first=True).data
# Calculate loss
loss = criterion(logits, sorted_target_sequences)
# Add doubly stochastic attention regularization
loss += alpha_c * ((1. - alphas.sum(dim=1)) ** 2).mean()
# Back prop.
optimizer.zero_grad()
loss.backward()
# Clip gradients
if grad_clip is not None:
clip_gradient(optimizer, grad_clip)
# Update weights
optimizer.step()
# Track metrics
loss_tracker.update(loss.item(), sum(sorted_decode_lengths))
acc_tracker.update(accuracy(logits, sorted_target_sequences, 5), sum(sorted_decode_lengths))
# Update progressbar description
pbar.set_description(f'Epoch: {epoch + 1:03d} - train_loss: {loss_tracker.avg:.3f} - train_acc: {acc_tracker.avg:.3f}%')
return loss_tracker.avg, acc_tracker.avg
# , train_bleu1, train_bleu2, train_bleu3, train_bleu4
I was trying to solve the error by change the images, target_seq, and length into for i, (images, (target_sequences, sequence_lengths)) in pbar: then the error continue to it
<ipython-input-89-edf18c02b224> in process_training(encoder, decoder, optimizer, criterion, train_loader, valid_loader, field, alpha_c, start_epoch, n_epochs, grad_clip, model_name, last_improv)
28 # Train step
29 train_loss, train_acc = train(loader=train_loader, encoder=encoder, decoder=decoder, optimizer=optimizer, criterion=criterion,
---> 30 epoch=epoch)
31 # Validation step
32 val_loss, val_acc, bleu1, bleu2, bleu3, bleu4 = validate(encoder=encoder,decoder=decoder, criterion=criterion, loader=valid_loader,
<ipython-input-102-96da9ec6e1d4> in train(loader, encoder, decoder, optimizer, criterion, epoch)
9 sequence_lengths = sequence_lengths.to(device)
10 # Forward prop.
---> 11 logits, alphas, sorted_target_sequences, sorted_decode_lengths, sorted_indices = decoder(images, target_sequences, sequence_lengths)
12 # Since we decoded starting with <sos>, the targets are all words after <sos>, up to <eos>
13 sorted_target_sequences = sorted_target_sequences[:, 1:]
/usr/local/lib/python3.7/dist-packages/torch/nn/modules/module.py in _call_impl(self, *input, **kwargs)
1128 if not (self._backward_hooks or self._forward_hooks or self._forward_pre_hooks or _global_backward_hooks
1129 or _global_forward_hooks or _global_forward_pre_hooks):
-> 1130 return forward_call(*input, **kwargs)
1131 # Do not call functions when jit is used
1132 full_backward_hooks, non_full_backward_hooks = [], []
<ipython-input-97-da50cbd97d62> in forward(self, encoder_out, encoded_captions, caption_lengths)
88 # caption_lengths, sort_ind = caption_lengths.squeeze(1).sort(dim=0, descending=True)
89
---> 90 caption_lengths, sort_ind = caption_lengths.squeeze(1). sort(dim=0, descending=True)
91
92
IndexError: Dimension out of range (expected to be in range of [-1, 0], but got 1)
Here is the code i use in the decoder to forward the input. It's error in the line of caption_lengths, sort_ind = caption_lengths.squeeze(1).sort(dim=0, descending=True) to sort caption data
def forward(self, encoder_out, encoded_captions, caption_lengths):
"""
Forward propagation.
:param encoder_out: encoded images, a tensor of dimension (batch_size, enc_image_size, enc_image_size, encoder_dim)
:param encoded_captions: encoded captions, a tensor of dimension (batch_size, max_caption_length)
:param caption_lengths: caption lengths, a tensor of dimension (batch_size, 1)
:return: scores for vocabulary, sorted encoded captions, decode lengths, weights, sort indices
"""
batch_size = encoder_out.size(0)
encoder_dim = encoder_out.size(-1)
vocab_size = self.vocab_size
# Flatten image
encoder_out = encoder_out.view(batch_size, -1, encoder_dim) # (batch_size, num_pixels, encoder_dim)
num_pixels = encoder_out.size(1)
# Sort input data by decreasing lengths; why? apparent below
caption_lengths, sort_ind = caption_lengths.squeeze(1).sort(dim=0, descending=True)
encoder_out = encoder_out[sort_ind]
encoded_captions = encoded_captions[sort_ind]
# Embedding
embeddings = self.embedding(encoded_captions) # (batch_size, max_caption_length, embed_dim)
# Initialize LSTM state
h, c = self.init_hidden_state(encoder_out) # (batch_size, decoder_dim)
# We won't decode at the <end> position, since we've finished generating as soon as we generate <end>
# So, decoding lengths are actual lengths - 1
decode_lengths = (caption_lengths - 1).tolist()
# Create tensors to hold word predicion scores and alphas
predictions = torch.zeros(batch_size, max(decode_lengths), vocab_size).to(device)
alphas = torch.zeros(batch_size, max(decode_lengths), num_pixels).to(device)
# At each time-step, decode by
# attention-weighing the encoder's output based on the decoder's previous hidden state output
# then generate a new word in the decoder with the previous word and the attention weighted encoding
for t in range(max(decode_lengths)):
batch_size_t = sum([l > t for l in decode_lengths])
attention_weighted_encoding, alpha = self.attention(encoder_out[:batch_size_t],
h[:batch_size_t])
gate = self.sigmoid(self.f_beta(h[:batch_size_t])) # gating scalar, (batch_size_t, encoder_dim)
attention_weighted_encoding = gate * attention_weighted_encoding
h, c = self.decode_step(
torch.cat([embeddings[:batch_size_t, t, :], attention_weighted_encoding], dim=1),
(h[:batch_size_t], c[:batch_size_t])) # (batch_size_t, decoder_dim)
preds = self.fc(self.dropout(h)) # (batch_size_t, vocab_size)
predictions[:batch_size_t, t, :] = preds
alphas[:batch_size_t, t, :] = alpha
return predictions, encoded_captions, decode_lengths, alphas,

Find homography for stitching

I’m working on the following task:
I have 6 fisheye cameras and would like to produce a 360 degree stitched image.
After carrying out the calibration procedure with findChessboardCorners, calibrateCamera, I obtained the intrinsic and extrinsic matrix.
Starting from the 6 images with fish-eye effect, through the fisheye.initUndistortRectifyMap function, I obtained the 6 planar images.
The two planar images from above are reported below.
Now I should do the stitching to get a 360 degree image.
I tried to do this using the cv2.createStitcher function, but this doesn’t always work, also I would like to have access to the homography matrix to determine the static matrices of the system.
So I tried to calculate the homography matrix, identifying through the SIFT algorithm, the common keypoints between two images and keeping the keypoints that best match.
I then stitched the two images using the warpPerspective function.
I believe that the procedure is correct up to the calculation of the keypoints, but I do not understand why the final result is not good.
In fact, in an attempt to stitch the second image is completely deformed / changed in perspective with a loss of right image.
Here there is the code:
import cv2
import numpy as np
def cvshow(name, img):
cv2.imshow(name, img)
cv2.waitKey(0)
cv2.destroyAllWindows()
cv2.destroyAllWindows()
def sift_kp(image):
gray_image = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
sift = cv2.xfeatures2d.SIFT_create()
sift = cv2.xfeatures2d.SIFT_create()
kp, des = sift.detectAndCompute(image, None)
kp_image = cv2.drawKeypoints(gray_image, kp, None)
return kp_image, kp, des
def get_good_match(des1, des2):
bf = cv2.BFMatcher()
matches = bf.knnMatch(des1, des2, k=2) # des1 is the template image, des2 is the matching image
matches = sorted(matches, key=lambda x: x[0].distance / x[1].distance)
good = []
for m, n in matches:
if m.distance < 0.55 * n.distance:
good.append(m)
return good
def drawMatches(imageA, imageB, kpsA, kpsB, matches, status):
# Initialize the visualization picture, connect the A and B pictures left and right together
(hA, wA) = imageA.shape[:2]
(hB, wB) = imageB.shape[:2]
vis = np.zeros((max(hA, hB), wA + wB, 3), dtype="uint8")
vis[0:hA, 0:wA] = imageA
vis[0:hB, wA:] = imageB
# Joint traversal, draw matching pairs
for ((trainIdx, queryIdx), s) in zip(matches, status):
# When the point pair is matched successfully, draw it on the visualization
if s == 1:
# Draw matching pairs
ptA = (int(kpsA[queryIdx][0]), int(kpsA[queryIdx][1]))
ptB = (int(kpsB[trainIdx][0]) + wA, int(kpsB[trainIdx][1]))
cv2.line(vis, ptA, ptB, (0, 255, 0), 1)
# Return visualization results
return vis
# Panorama stitching
def siftimg_rightlignment(img_right, img_left):
_, kp1, des1 = sift_kp(img_right)
_, kp2, des2 = sift_kp(img_left)
goodMatch = get_good_match(des1, des2)
# When the matching pairs of the filter items are greater than 4 pairs: calculate the perspective transformation matrix
if len(goodMatch) > 4:
# Get the point coordinates of the matching pair
ptsA = np.float32([kp1[m.queryIdx].pt for m in goodMatch]).reshape(-1, 1, 2)
ptsB = np.float32([kp2[m.trainIdx].pt for m in goodMatch]).reshape(-1, 1, 2)
ransacReprojThreshold = 4
H, status = cv2.findHomography(ptsA, ptsB, cv2.RANSAC, ransacReprojThreshold)
print(H)
#H = np.array([[-3.95002617e-01,-7.49813070e-02, 4.43642683e+02], [-4.06655962e-01,5.27365057e-01, 1.20636875e+02],[-1.60149798e-03, -3.69708507e-05, 1.00000000e+00]])
# The function of this function is to first use RANSAC to select the best four sets of pairing points, and then calculate the H matrix. H is a 3*3 matrix
# Change the angle of view to the right of the picture, result is the transformed picture
result = cv2.warpPerspective(img_right, H, (img_right.shape[1] + img_left.shape[1], img_right.shape[0]))
cvshow('result_medium', result)
# Pass the picture left to the left end of the result picture
result[0:img_left.shape[0], 0:img_left.shape[1]] = img_left
return result
# Feature matching + panoramic stitching
import numpy as np
import cv2
# Read the stitched pictures (note the placement of the left and right pictures)
# Is to transform the graphics on the right
img_left = cv2.imread(r'\planar\0.png')
img_right = cv2.imread(r'\planar\5.png')
img_right = cv2.resize(img_right, None, fx=0.5, fy=0.3)
# Ensure that the two images are the same size
img_left = cv2.resize(img_left, (img_right.shape[1], img_right.shape[0]))
kpimg_right, kp1, des1 = sift_kp(img_right)
kpimg_left, kp2, des2 = sift_kp(img_left)
# Display the original image and the image after key point detection at the same time
cvshow('img_left', np.hstack((img_left, kpimg_left)))
cvshow('img_right', np.hstack((img_right, kpimg_right)))
goodMatch = get_good_match(des1, des2)
all_goodmatch_img = cv2.drawMatches(img_right, kp1, img_left, kp2, goodMatch, None, flags=2)
# goodmatch_img Set the first goodMatch[:10]
goodmatch_img = cv2.drawMatches(img_right, kp1, img_left, kp2, goodMatch[:10], None, flags=2)
cvshow('Keypoint Matches1', all_goodmatch_img)
cvshow('Keypoint Matches2', goodmatch_img)
# Stitch the picture into a panorama
result = siftimg_rightlignment(img_right, img_left)
cvshow('result', result)```

Computing gradients for outputs taken from intermediate layers and updating weights using optimizer

I am trying to implement below architecture and not sure in applying gradient tape properly.
In the above architecture we can see, outputs taken from multiple layers in the blue boxes. Each blue box is termed as loss branch in the paper which contains two losses namely cross entropy and l2 loss. I wrote architecture in tensorflow 2 and using gradient tape for custom training purpose. One thing I am not sure is how should I update the losses using gradient tape.
I have two queries,
How am I supposed to use gradient tape for multiple losses in this scenario. I am interested in seeing code!
For instance, consider the 3rd blue box(3rd loss branch) in the above image, where we will take inputs from conv 13 layer and get two outputs, one for classification and other for regression.
So after computing the losses how I am supposed to update the weights, should I update all the layers above(from conv 1 to conv 13) or should I only update the layers weights which fetched me conv 13 (conv 11, 12 and 13).
I am also attaching a link where I posted a question yesterday in detail.
Below is the snippet which I have tried for gradient descent. Please correct me if I am wrong.
images = batch.data[0]
images = (images - 127.5) / 127.5
targets = batch.label
with tensorflow.GradientTape() as tape:
outputs = self.net(images)
loss = self.loss_criterion(outputs, targets)
self.scheduler(i, self.optimizer)
grads = tape.gradient(loss, self.net.trainable_variables)
self.optimizer.apply_gradients(zip(grads, self.net.trainable_variables))
Below is the code for custom loss function which is used as loss_criterion above.
losses = []
for i in range(self.num_output_scales):
pred_score = outputs[i * 2]
pred_bbox = outputs[i * 2 + 1]
gt_mask = targets[i * 2]
gt_label = targets[i * 2 + 1]
pred_score_softmax = tensorflow.nn.softmax(pred_score, axis=1)
loss_mask = tensorflow.ones(pred_score_softmax.shape, tensorflow.float32)
if self.hnm_ratio > 0:
pos_flag = (gt_label[:, 0, :, :] > 0.5)
pos_num = tensorflow.math.reduce_sum(tensorflow.cast(pos_flag, dtype=tensorflow.float32))
if pos_num > 0:
neg_flag = (gt_label[:, 1, :, :] > 0.5)
neg_num = tensorflow.math.reduce_sum(tensorflow.cast(neg_flag, dtype=tensorflow.float32))
neg_num_selected = min(int(self.hnm_ratio * pos_num), int(neg_num))
neg_prob = tensorflow.where(neg_flag, pred_score_softmax[:, 1, :, :], \
tensorflow.zeros_like(pred_score_softmax[:, 1, :, :]))
neg_prob_sort = tensorflow.sort(tensorflow.reshape(neg_prob, shape=(1, -1)), direction='ASCENDING')
prob_threshold = neg_prob_sort[0][int(neg_num_selected)]
neg_grad_flag = (neg_prob <= prob_threshold)
loss_mask = tensorflow.concat([tensorflow.expand_dims(pos_flag, axis=1),
tensorflow.expand_dims(neg_grad_flag, axis=1)], axis=1)
else:
neg_choice_ratio = 0.1
neg_num_selected = int(tensorflow.cast(tensorflow.size(pred_score_softmax[:, 1, :, :]), dtype=tensorflow.float32) * 0.1)
neg_prob = pred_score_softmax[:, 1, :, :]
neg_prob_sort = tensorflow.sort(tensorflow.reshape(neg_prob, shape=(1, -1)), direction='ASCENDING')
prob_threshold = neg_prob_sort[0][int(neg_num_selected)]
neg_grad_flag = (neg_prob <= prob_threshold)
loss_mask = tensorflow.concat([tensorflow.expand_dims(pos_flag, axis=1),
tensorflow.expand_dims(neg_grad_flag, axis=1)], axis=1)
pred_score_softmax_masked = tensorflow.where(loss_mask, pred_score_softmax,
tensorflow.zeros_like(pred_score_softmax, dtype=tensorflow.float32))
pred_score_log = tensorflow.math.log(pred_score_softmax_masked)
score_cross_entropy = - tensorflow.where(loss_mask, gt_label[:, :2, :, :],
tensorflow.zeros_like(gt_label[:, :2, :, :], dtype=tensorflow.float32)) * pred_score_log
loss_score = tensorflow.math.reduce_sum(score_cross_entropy) /
tensorflow.cast(tensorflow.size(score_cross_entropy), tensorflow.float32)
mask_bbox = gt_mask[:, 2:6, :, :]
predict_bbox = pred_bbox * mask_bbox
label_bbox = gt_label[:, 2:6, :, :] * mask_bbox
# l2 loss of boxes
# loss_bbox = tensorflow.math.reduce_sum(tensorflow.nn.l2_loss((label_bbox - predict_bbox)) ** 2) / 2
loss_bbox = mse(label_bbox, predict_bbox) / tensorflow.math.reduce_sum(mask_bbox)
# Adding only losses relevant to a branch and sending them for back prop
losses.append(loss_score + loss_bbox)
# losses.append(loss_bbox)
# Adding all losses and sending to back prop Approach 1
# loss_cls += loss_score
# loss_reg += loss_bbox
# loss_branch.append(loss_score)
# loss_branch.append(loss_bbox)
# loss = loss_cls + loss_reg
return losses
I am not getting any error but my losses aren't minimizing. Here is the log for my training.
Someone please help me in fixing this.

keras custom activation to drop under certain conditions

I am trying to drop the values less than 1 and greater than -1 in my custom activation like below.
def ScoreActivationFromSigmoid(x, target_min=1, target_max=9) :
condition = K.tf.logical_and(K.tf.less(x, 1), K.tf.greater(x, -1))
case_true = K.tf.reshape(K.tf.zeros([x.shape[1] * x.shape[2]], tf.float32), shape=(K.tf.shape(x)[0], x.shape[1], x.shape[2]))
case_false = x
changed_x = K.tf.where(condition, case_true, case_false)
activated_x = K.sigmoid(changed_x)
score = activated_x * (target_max - target_min) + target_min
return score
the data type has 3 dimensions: batch_size x sequence_length x number of features.
But I got this error
nvalidArgumentError: Inputs to operation activation_51/Select of type Select must have the same size and shape. Input 0: [1028,300,64] != input 1: [1,300,64]
[[{{node activation_51/Select}} = Select[T=DT_FLOAT, _class=["loc:#training_88/Adam/gradients/activation_51/Select_grad/Select_1"], _device="/job:localhost/replica:0/task:0/device:GPU:0"](activation_51/LogicalAnd, activation_51/Reshape, dense_243/add)]]
[[{{node metrics_92/acc/Mean_1/_9371}} = _Recv[client_terminated=false, recv_device="/job:localhost/replica:0/task:0/device:CPU:0", send_device="/job:localhost/replica:0/task:0/device:GPU:0", send_device_incarnation=1, tensor_name="edge_473_metrics_92/acc/Mean_1", tensor_type=DT_FLOAT, _device="/job:localhost/replica:0/task:0/device:CPU:0"]()]]
I understand what the problem is; custom activation function cannot find the proper batch size of inputs. But I don't know how to control them.
Can anyone fix this or suggest other methods to replace some of the element values in some conditions?
The error message I got when running your code is:
ValueError: Cannot reshape a tensor with 19200 elements to shape
[1028,300,64] (19737600 elements) for 'Reshape_8' (op: 'Reshape') with
input shapes: [19200], [3] and with input tensors computed as partial
shapes: input[1] = [1028,300,64].
And the problem should be that you cannot reshape a tensor of shape [x.shape[1] * x.shape[2]] to (K.tf.shape(x)[0], x.shape[1], x.shape[2]). This is because their element counts are different.
So the solution is just creating a zero array in right shape.
This line:
case_true = K.tf.reshape(K.tf.zeros([x.shape[1] * x.shape[2]], tf.float32), shape=(K.tf.shape(x)[0], x.shape[1], x.shape[2]))
should be replace with:
case_true = K.tf.reshape(K.tf.zeros([x.shape[0] * x.shape[1] * x.shape[2]], K.tf.float32), shape=(K.tf.shape(x)[0], x.shape[1], x.shape[2]))
or using K.tf.zeros_like:
case_true = K.tf.zeros_like(x)
Workable code:
import keras.backend as K
import numpy as np
def ScoreActivationFromSigmoid(x, target_min=1, target_max=9) :
condition = K.tf.logical_and(K.tf.less(x, 1), K.tf.greater(x, -1))
case_true = K.tf.zeros_like(x)
case_false = x
changed_x = K.tf.where(condition, case_true, case_false)
activated_x = K.tf.sigmoid(changed_x)
score = activated_x * (target_max - target_min) + target_min
return score
with K.tf.Session() as sess:
x = K.tf.placeholder(K.tf.float32, shape=(1028, 300, 64), name='x')
score = sess.run(ScoreActivationFromSigmoid(x), feed_dict={'x:0':np.random.randn(1028, 300, 64)})
print(score)

scikit-learn RandomForestClassifier - How to interpret tree output?

I have the below code, but I just don't understand how to interpret the tree output data from the RandomForestClassifier, like how the gini was calculated, given the samples and how the totals in the 'value' lists can be higher than the initial samples of 3.
I am comparing this output to a DecisionTreeClassifier, which I can understand and interpret.
Any help is appreciated, thanks!
from sklearn.ensemble import RandomForestClassifier
from sklearn import tree
import numpy as np
from sklearn.externals.six import StringIO
import pydot
# Data
X = np.array([[0, 0],
[0, 1],
[1, 0],
[1, 1]])
Y = np.array([0, 1, 1, 0])
# Create object classifiers
clf = RandomForestClassifier()
clf_tree = tree.DecisionTreeClassifier()
# Fit data
clf_tree.fit(X,Y)
clf.fit(X, Y)
# Save data
dot_data = StringIO()
tree.export_graphviz(clf_tree, out_file = dot_data)
graph = pydot.graph_from_dot_data(dot_data.getvalue())
graph.write_pdf("orig_tree.pdf")
i_tree = 0
for tree_in_forest in clf.estimators_:
dot_data = StringIO()
tree.export_graphviz(tree_in_forest, out_file = dot_data)
graph = pydot.graph_from_dot_data(dot_data.getvalue())
f_name = 'tree_' + str(i_tree) + '.pdf'
graph.write_pdf(f_name)
i_tree += 1
The decision tree:
http://i.stack.imgur.com/XZ7vU.png
A tree from the RandomForestClassifier:
http://i.stack.imgur.com/Bb5t9.png
How the gini was calculated given the samples ?
The gini is computed exactly in the same way for random forest and the decision tree. The Gini values, or variance, correspond to the impurity of the node.
How the totals in the 'value' lists can be higher than the initial samples of 3?
In the case of classification, the value attribute corresponds to the number of samples reaching the leaves.
In the case of random forest, the samples are bootstraped thus in total there is on average 2 / 3 of the original samples, but the overall number of samples hasn't change.