Related
I am browsing through mir.ndslice docs trying to figure out how to do a simple row selection by column.
In numpy I would do:
a = np.random.randint(0, 20, [4, 6])
# array([[ 8, 5, 4, 18, 1, 4],
# [ 2, 18, 15, 7, 18, 19],
# [16, 5, 4, 6, 11, 11],
# [15, 1, 14, 6, 1, 4]])
a[a[:,2] > 10] # select rows where the second column value is > 10
# array([[ 2, 18, 15, 7, 18, 19],
# [15, 1, 14, 6, 1, 4]])
Using mir library I naively tried:
import std.range;
import std.random;
import mir.ndslice;
auto a = generate!(() => uniform(0, 20)).take(24).array.sliced(4,6);
// [[12, 19, 3, 10, 19, 11],
// [19, 0, 0, 13, 9, 1],
// [ 0, 0, 4, 13, 1, 2],
// [ 6, 19, 14, 18, 14, 18]]
a[a[0..$,2] > 10];
But got
Error: incompatible types for `((ulong __dollar = a.length();) , a.opIndex(a.opSlice(0LU, __dollar), 2)) > (10)`: `Slice!(int*, 1LU, cast(mir_slice_kind)0)` and `int`
dmd failed with exit code 1.
So, I went through the docs and couldn't find anything that would look like np.where or similar. Is it even possible in mir?
I have the code using keras 1.2 and tensorflow 1.1. I have run it but with error
import numpy as np
import keras
from keras import backend as K
from keras import initializers
from keras.models import Sequential, Model, load_model, save_model
from keras.layers.core import Dense, Lambda, Activation
from keras.layers import Embedding, Input, Dense, Multiply, Reshape, Flatten
from keras.optimizers import Adagrad, Adam, SGD, RMSprop
from keras.regularizers import l2
from sklearn.metrics import average_precision_score
from sklearn.metrics import auc
def init_normal(shape, name=None):
return initializers.lecun_uniform(seed=None)
def get_model(num_a, num_b, num_c, dim, regs=[0,0,0]):
a = Input(shape=(1,), dtype='int32', name = 'a')
b = Input(shape=(1,), dtype='int32', name = 'b')
c = Input(shape=(1,), dtype='int32', name = 'c')
Embedding_a = Embedding(input_dim = num_a, output_dim = dim,
embeddings_initializer='uniform', W_regularizer = l2(regs[0]), input_length=1)
Embedding_b = Embedding(input_dim = num_b, output_dim = dim,
embeddings_initializer='uniform', W_regularizer = l2(regs[1]), input_length=1)
Embedding_c = Embedding(input_dim = num_c, output_dim = dim,
embeddings_initializer='uniform', W_regularizer = l2(regs[2]), input_length=1)
a_latent = Flatten()(Embedding_a(a))
b_latent = Flatten()(Embedding_b(b))
c_latent = Flatten()(Embedding_c(c))
predict_vector = Multiply()([a_latent, b_latent, b_latent])
prediction = Dense(1, activation='sigmoid', init='lecun_uniform', name = 'prediction')(predict_vector)
model = Model(input=[a, b, c], output=prediction)
return model
def evaluate_model(model, test_pos, test_neg):
global _model
global _test_pos
global _test_neg
_model = model
_test_pos = test_pos
_test_neg = test_neg
print(_test_neg)
a, b, c, labels = [],[],[],[]
for item in _test_pos:
a.append(item[0])
b.append(item[1])
c.append(item[2])
labels.append(1)
for item in _test_neg:
a.append(item[0])
b.append(item[1])
c.append(item[2])
labels.append(0)
a = np.array(a)
b = np.array(b)
c = np.array(c)
predictions = _model.predict([a, b, c],
batch_size=100, verbose=0)
return average_precision_score(labels, predictions), auc(labels, predictions)
model = get_model(4, 8, 12, 2, [0,0,0])
model.compile(optimizer=Adam(lr=0.001), loss='binary_crossentropy')
pos_test = [[0, 0, 2], [4, 8, 8], [2, 5, 4], [0, 0, 0]]
neg_test = [[3, 3, 2], [2, 1, 8], [1, 4, 1], [3, 3, 12]]
aupr, auc = evaluate_model(model, pos_test, neg_test)
print(aupr, auc)
However, It give me error:any way to fix it?
InvalidArgumentError (see above for traceback): indices[1,0] = 4 is not in [0, 4)
[[Node: embedding_4/embedding_lookup = Gather[Tindices=DT_INT32, Tparams=DT_FLOAT, _class=["loc:#embedding_4/embeddings"], validate_indices=true, _device="/job:localhost/replica:0/task:0/cpu:0"](embedding_4/embeddings/read, _recv_a_1_0)]]
The problem is, you defined embedding input_dim as 4, 8 and 12 while it should be is 5, 9, 13. Because input_dim in embedding should be max_index + 1. It is also clearly mentioned in Keras docs:
Size of the vocabulary, i.e. maximum integer index + 1.
How to fix the issue?
Change get_model method to:
model = get_model(5, 9, 13, 2, [0, 0, 0])
Or alternatively change index of data to:
pos_test = [[0, 0, 2], [3, 7, 7], [2, 5, 4], [0, 0, 0]]
neg_test = [[3, 3, 2], [2, 1, 7], [1, 4, 1], [3, 3, 11]]
I have a list with 10 records, and each record has one or more elements with 3 categories like below:
list = [('0.4', 2, 'doc4.txt'),('0.04', 13, 'doc4.txt'), ('0.5', 4, 'doc4.txt')]
[('0.5', 6, 'doc3.txt'),('0.04', 13, 'doc3.txt'), ('0.5', 4, 'doc3.txt')]
[('0.6', 8, 'doc2.txt')]
[('0.4', 2, 'doc5.txt'), ('1.0', 7, 'doc5.txt')]
[('0.2', 2, 'doc6.txt'), ('0.4', 2, 'doc6.txt'),('0.8', 2, 'doc6.txt'), ('0.34', 5, 'doc6.txt'),('0.76', 4, 'doc6.txt'), ('0.5', 3, 'doc6.txt')]
[('0.3', 7, 'doc9.txt')]
[('0.1', 8, 'doc12.txt')]
[('0.3', 9, 'doc11.txt'),('1.0', 8, 'doc11.txt')]
[('0.9', 7, 'doc22.txt')]
[('0.3', 7, 'doc24.txt')]
You many notice the third category of every record has the same text for each record. There are 10 categories as the list consists of 10 records.
According to the structure of the list:
For example, [('0.6', 8, 'doc2.txt')]
First element, '0.6' represents X-axis value in the range of [0.1 -> 1.0]
Second element of an integer represents Y-axis value in graph
Third element, 'doc2.txt' represents the Category name in graph
The list should be plotted as the image below,
I've been trying with several approaches, but still couldn't figure that out
>>> plt.scatter(*zip(*list))
>>> plt.xlabel('X-Axis')
>>> plt.ylabel('Y-Axis')
>>> plt.show()
I think you can just keep the list as it is and iterate over it. You'd then produce a scatter plot for each sublist in the outer list, as the items from the sublist should share the same marker, color and legend label.
import matplotlib.pyplot as plt
#don't call a variable "list" or "print" or any other python command's name
liste=[[('0.4', 2, 'doc4.txt'),('0.04', 13, 'doc4.txt'), ('0.5', 4, 'doc4.txt')],
[('0.5', 6, 'doc3.txt'),('0.04', 13, 'doc3.txt'), ('0.5', 4, 'doc3.txt')],
[('0.6', 8, 'doc2.txt')],
[('0.4', 2, 'doc5.txt'), ('1.0', 7, 'doc5.txt')],
[('0.2', 2, 'doc6.txt'), ('0.4', 2, 'doc6.txt'),('0.8', 2, 'doc6.txt'), ('0.34', 5, 'doc6.txt'),('0.76', 4, 'doc6.txt'), ('0.5', 3, 'doc6.txt')],
[('0.3', 7, 'doc9.txt')],
[('0.1', 8, 'doc12.txt')],
[('0.3', 9, 'doc11.txt'),('1.0', 8, 'doc11.txt')],
[('0.9', 7, 'doc22.txt')],
[('0.3', 7, 'doc24.txt')]]
markers=[ur"$\u25A1$", ur"$\u25A0$", ur"$\u25B2$", ur"$\u25E9$"]
colors= ["k", "crimson", "#112b77"]
fig, ax = plt.subplots()
for i, l in enumerate(liste):
x,y,cat = zip(*l)
ax.scatter(list(map(float, x)),y, s=64,c=colors[(i//4)%3],
marker=markers[i%4], label=cat[0])
ax.legend(bbox_to_anchor=(1.01,1), borderaxespad=0)
plt.subplots_adjust(left=0.1,right=0.8)
plt.show()
There are multiple issues. You assignment of list makes no sense (presumably you forgot some parentheses). Also, you really shouldn't reuse built-in names like "list". You should not represent floats as strings (your x coordinates). You cannot simply unpack a list into plt.scatter and hope that magically all of these issues work themselves out.
Below some code how to properly pass your data to scatter (I use plot instead of scatter as you can pass plot proper colour names).
import numpy as np
import matplotlib.pyplot as plt
# 'list' is a bad name for a variable as it overwrites the list() built-in function
# -> rename to data
data = [
[('0.4', 2, 'doc4.txt'),('0.04', 13, 'doc4.txt'), ('0.5', 4, 'doc4.txt')],
[('0.5', 6, 'doc3.txt'),('0.04', 13, 'doc3.txt'), ('0.5', 4, 'doc3.txt')],
[('0.6', 8, 'doc2.txt')],
[('0.4', 2, 'doc5.txt'), ('1.0', 7, 'doc5.txt')],
[('0.2', 2, 'doc6.txt'), ('0.4', 2, 'doc6.txt'),('0.8', 2, 'doc6.txt'), ('0.34', 5, 'doc6.txt'),('0.76', 4, 'doc6.txt'), ('0.5', 3, 'doc6.txt')],
[('0.3', 7, 'doc9.txt')],
[('0.1', 8, 'doc12.txt')],
[('0.3', 9, 'doc11.txt'),('1.0', 8, 'doc11.txt')],
[('0.9', 7, 'doc22.txt')],
[('0.3', 7, 'doc24.txt')]
]
# flatten nested list
flat = [item for sublist in data for item in sublist]
# convert strings to numbers
numeric = [(float(x), y, label) for (x, y, label) in flat]
# create a dictionary that maps a label to a set of x,y coordinates
data = dict()
for (x, y, label) in numeric:
if label in data:
data[label].append((x,y))
else:
data[label] = [(x,y)]
# initialise figure
fig, ax = plt.subplots(1,1)
colors = ['blue', 'red', 'yellow', 'green', 'orange', 'brown', 'violet', 'magenta', 'white', 'black']
# populate figure
for color, (label, xy) in zip(colors, data.iteritems()):
x, y = np.array(xy).T
ax.plot(x, y, 'o', label=label, color=color)
ax.set_xlim(0, 1.1)
ax.set_ylim(0, 16)
ax.legend(numpoints=1)
plt.show()
There is Model with ManyToMany field:
class Number(Model):
current_number = IntegerField()
class MyModel(models.Model):
numbers_set = models.ManyToMany(Number)
For example we have such dataset:
my_model_1.numbers_set = [1, 2, 3, 4]
my_model_2.numbers_set = [2, 3, 4, 5]
my_model_3.numbers_set = [3, 4, 5, 6]
my_model_4.numbers_set = [4, 5, 6, 7]
my_model_5.numbers_set = [4, 5, 6, 7]
I'm looking for a way to aggregate MyModel by amount of same numbers.
f.e. MyModel objects that have at least 3 same numbers in theirs numbers_set.
[
[my_model_1, my_model_2],
[my_model_2, my_model_3],
[my_model_3, my_model_4, my_model_5],
]
if you are using Postgres version 9.4 and Django version 1.9 , It's better to use JSONField() rather than using ManyToMany(), for indexing purpose use jsonb indexing on Postgres which will provide you efficient query for fetching data. Check here
I'm trying to plot a graph that shows the average call duration every day each minute for 7 days in the same plot, now I'm defining the function that will give me the data asked according to conditions which will be plotted but I'm always getting a list of empty lists.can any one help me tof ind the bug? (acc is just an example of data from the global database)
This is the function:
import time
import calendar
from datetime import datetime
from itertools import repeat
acc=[{u'switch_id': 3, u'hangup_cause_id': 7, u'start_uepoch': datetime(2015, 5, 8, 13, 32, 1), u'duration': 32}, {u'switch_id': 3, u'hangup_cause_id': 10, u'start_uepoch': datetime(2015, 5, 8, 13, 32, 8), u'duration': 20}, {u'switch_id': 3, u'hangup_cause_id': 10, u'start_uepoch': datetime(2015, 5, 8, 13, 32, 10), u'duration': 17}]
t = datetime.now()
y = t.year
m = t.month
d = t.day
donnees=[]
for k in range(7):
try:
m = t.month
data=[]
liste=[]
liste_time=[]
for i in acc:
if (i["start_uepoch"].year == y and i["start_uepoch"].month == m and i["start_uepoch"].day == d-k):
liste.append([i["start_uepoch"],i["duration"]])
for q in range(24):
for mnt in range(60):
liste2=[]
ACD=0
somme_duration=0
n=0
for p in liste:
if (p[0].hour==q and p[0].minute == mnt):
liste2.append(p[1])
temps=p[0]
if len(liste2)!=0:
for j in liste2:
somme_duration+=j
n+=1
ACD=round((float(somme_duration)/n)*100)/100
liste_time.append(calendar.timegm(temps.timetuple()))
data.append(ACD)
else:
liste_time.append(calendar.timegm(temps.timetuple()))
data.append(0)
except:
pass
donnees.append(data)
print donnees
This is due to your try / except condition, if you remove it by settings temps = None after your loop it solves you issue :
import time
import calendar
from datetime import datetime
from itertools import repeat
acc=[{u'switch_id': 3, u'hangup_cause_id': 7, u'start_uepoch': datetime(2015, 5, 8, 13, 32, 1), u'duration': 32}, {u'switch_id': 3, u'hangup_cause_id': 10, u'start_uepoch': datetime(2015, 5, 8, 13, 32, 8), u'duration': 20}, {u'switch_id': 3, u'hangup_cause_id': 10, u'start_uepoch': datetime(2015, 5, 8, 13, 32, 10), u'duration': 17}]
t = datetime.now()
y = t.year
m = t.month
d = t.day
donnees=[]
for k in range(7):
m = t.month
data=[]
liste=[]
liste_time=[]
for i in acc:
if (i["start_uepoch"].year == y and i["start_uepoch"].month == m and i["start_uepoch"].day == d-k):
liste.append([i["start_uepoch"],i["duration"]])
for q in range(24):
for mnt in range(60):
temps = None
liste2=[]
ACD=0
somme_duration=0
n=0
for p in liste:
if (p[0].hour==q and p[0].minute == mnt):
liste2.append(p[1])
temps=p[0]
if temps:
if len(liste2)!=0:
for j in liste2:
somme_duration+=j
n+=1
ACD=round((float(somme_duration)/n)*100)/100
liste_time.append(calendar.timegm(temps.timetuple()))
data.append(ACD)
else:
liste_time.append(calendar.timegm(temps.timetuple()))
data.append(0)
donnees.append(data)
print donnees