python itertools groupby find the max value - python-2.7

use:
from itertools import groupby
from operater import itemgetter
like this:
input:
test = {('a','b'):1,('a','c'):2,('a','d'):3,('x','b'):4,('x','c'):5}
find the max value groupby the key[0]
output:
output_test = {('a','d'):3,('x','c'):5}

To do this using itetools.groupby and assuming you do not care which entry is returned if there are multiple entries with the same max value:
test = {('a', 'b'): 1, ('a', 'c'): 2, ('a', 'd'): 3, ('x', 'b'): 4, ('x', 'c'): 5}
output_test = {('a', 'd'): 3, ('x', 'c'): 5}
grouped = itertools.groupby(sorted(test.iteritems()), lambda x: x[0][0])
maxEntries = {x[0]: x[1] for x in {max(v, key=lambda q: q[1]) for k, v in grouped}}
print maxEntries
print maxEntries == output_test
Outputs:
{('x', 'c'): 5, ('a', 'd'): 3}
True

from itertools import groupby
max([sum(1 for _ in g) for k, g in groupby(input)])

Related

InvalidArgumentError indices[i,0] = x is not in [0, x) in keras

I have the code using keras 1.2 and tensorflow 1.1. I have run it but with error
import numpy as np
import keras
from keras import backend as K
from keras import initializers
from keras.models import Sequential, Model, load_model, save_model
from keras.layers.core import Dense, Lambda, Activation
from keras.layers import Embedding, Input, Dense, Multiply, Reshape, Flatten
from keras.optimizers import Adagrad, Adam, SGD, RMSprop
from keras.regularizers import l2
from sklearn.metrics import average_precision_score
from sklearn.metrics import auc
def init_normal(shape, name=None):
return initializers.lecun_uniform(seed=None)
def get_model(num_a, num_b, num_c, dim, regs=[0,0,0]):
a = Input(shape=(1,), dtype='int32', name = 'a')
b = Input(shape=(1,), dtype='int32', name = 'b')
c = Input(shape=(1,), dtype='int32', name = 'c')
Embedding_a = Embedding(input_dim = num_a, output_dim = dim,
embeddings_initializer='uniform', W_regularizer = l2(regs[0]), input_length=1)
Embedding_b = Embedding(input_dim = num_b, output_dim = dim,
embeddings_initializer='uniform', W_regularizer = l2(regs[1]), input_length=1)
Embedding_c = Embedding(input_dim = num_c, output_dim = dim,
embeddings_initializer='uniform', W_regularizer = l2(regs[2]), input_length=1)
a_latent = Flatten()(Embedding_a(a))
b_latent = Flatten()(Embedding_b(b))
c_latent = Flatten()(Embedding_c(c))
predict_vector = Multiply()([a_latent, b_latent, b_latent])
prediction = Dense(1, activation='sigmoid', init='lecun_uniform', name = 'prediction')(predict_vector)
model = Model(input=[a, b, c], output=prediction)
return model
def evaluate_model(model, test_pos, test_neg):
global _model
global _test_pos
global _test_neg
_model = model
_test_pos = test_pos
_test_neg = test_neg
print(_test_neg)
a, b, c, labels = [],[],[],[]
for item in _test_pos:
a.append(item[0])
b.append(item[1])
c.append(item[2])
labels.append(1)
for item in _test_neg:
a.append(item[0])
b.append(item[1])
c.append(item[2])
labels.append(0)
a = np.array(a)
b = np.array(b)
c = np.array(c)
predictions = _model.predict([a, b, c],
batch_size=100, verbose=0)
return average_precision_score(labels, predictions), auc(labels, predictions)
model = get_model(4, 8, 12, 2, [0,0,0])
model.compile(optimizer=Adam(lr=0.001), loss='binary_crossentropy')
pos_test = [[0, 0, 2], [4, 8, 8], [2, 5, 4], [0, 0, 0]]
neg_test = [[3, 3, 2], [2, 1, 8], [1, 4, 1], [3, 3, 12]]
aupr, auc = evaluate_model(model, pos_test, neg_test)
print(aupr, auc)
However, It give me error:any way to fix it?
InvalidArgumentError (see above for traceback): indices[1,0] = 4 is not in [0, 4)
[[Node: embedding_4/embedding_lookup = Gather[Tindices=DT_INT32, Tparams=DT_FLOAT, _class=["loc:#embedding_4/embeddings"], validate_indices=true, _device="/job:localhost/replica:0/task:0/cpu:0"](embedding_4/embeddings/read, _recv_a_1_0)]]
The problem is, you defined embedding input_dim as 4, 8 and 12 while it should be is 5, 9, 13. Because input_dim in embedding should be max_index + 1. It is also clearly mentioned in Keras docs:
Size of the vocabulary, i.e. maximum integer index + 1.
How to fix the issue?
Change get_model method to:
model = get_model(5, 9, 13, 2, [0, 0, 0])
Or alternatively change index of data to:
pos_test = [[0, 0, 2], [3, 7, 7], [2, 5, 4], [0, 0, 0]]
neg_test = [[3, 3, 2], [2, 1, 7], [1, 4, 1], [3, 3, 11]]

Django annotate group by month

How to sum price group by month?
I try.
import itertools
qs = Contract.objects.values('created', 'proposal__price')
grouped = itertools.groupby(qs, lambda d: d.get('created').strftime('%Y-%m'))
data = [{'month': month, 'quant': sum(list(this_day))} for month, this_day in grouped]
print(data)
But result is no expected.
I need this similar result
[{'month': '2016-04', 'quant': 8650}, {'month': '2016-05', 'quant': 9050}]
Your this_day inside sum(list(this_day)) is a dict, so you need to build a list with a list comprehension. Example
>>> import itertools
>>> from django.contrib.auth.models import User
>>> li = User.objects.all().values('date_joined', 'username')
>>> gr = itertools.groupby(li, lambda d: d.get('date_joined').strftime('%Y-%m'))
>>> dt = [{'m': m, 'q': sum([len(x['username']) for x in q])} for m, q in gr]
>>> dt
[{'m': '2005-06', 'q': 11}, {'m': '2006-10', 'q': 22},
{'m': '2005-06', 'q': 179}, {'m': '2006-08', 'q': 10},
{'m': '2006-09', 'q': 30}, {'m': '2005-06', 'q': 74}, ... ]
Or, for your code, probably something like this
data = [{'month': month, 'quant': sum([x['proposal__price'] for x in this_day])}
for month, this_day in grouped]
Start by extracting the month and all your values
from django.db import connection
select = {'month': connection.ops.date_trunc_sql('month', 'created')}
qs = Contract.objects.extra(select=select).values('month').annotate(my_total=Sum('proposal__price'))
Now we can use a function to group by dict keys like so:
from itertools import groupby
from operator import attrgetter
get_y = attrgetter('month')
from collections import defaultdict, Counter
def solve(dataset, group_by_key, sum_value_keys):
dic = defaultdict(Counter)
for item in dataset:
key = item[group_by_key]
vals = {k:item[k] for k in sum_value_keys}
dic[key].update(vals)
return dic
Apply it to your queryset's newly annotated my_total, grouped by month:
solved = solve(qs, 'month', ['my_total'])
And you'll have grouped sums by month (month being a datetime object you can change manipulate to meet your needs):
for i in solved: print(i, ":", solved[i]['my_total'])
>>> datetime.datetime(2015, 9, 1, 0, 0, tzinfo=<UTC>) : 67614.23
>>> datetime.datetime(2015, 1, 1, 0, 0, tzinfo=<UTC>) : 54792.39
Now you can extract those values :)
My code adapted from #C14L.
import itertools
# from .models import Contract
c = Contract.objects.all().values('created', 'proposal__price')
gr = itertools.groupby(c, lambda d: d.get('created').strftime('%Y-%m'))
dt = [{'month': month, 'quant': sum([x['proposal__price'] for x in quant])} for month, quant in gr]
dt
Thanks.

ValueError: Tensor A must be from the same graph as Tensor B

I'm doing text matching using tensorflow, before i call tf.nn.embedding_lookup(word_embedding_matrix, combine_result), I have to combine some words from 2 sentence(get m words from sentence S1 and also get m words from sentence S2, then combine them together as "combine_result"), but when the code gose to tf.nn.embedding_lookup(word_embedding_matrix, combine_result) it gives me the error:
ValueError: Tensor("Reshape_7:0", shape=(1, 6), dtype=int32) must be
from the same graph as Tensor("word_embedding_matrix:0", shape=(26320,
50), dtype=float32_ref).
the code is as bellow:
import tensorflow as tf
import numpy as np
import os
import time
import datetime
import data_helpers
NUM_CLASS = 2
SEQUENCE_LENGTH = 47
# Placeholders for input, output and dropout
input_x = tf.placeholder(tf.int32, [None, 2, SEQUENCE_LENGTH], name="input_x")
input_y = tf.placeholder(tf.float32, [None, NUM_CLASS], name="input_y")
dropout_keep_prob = tf.placeholder(tf.float32, name="dropout_keep_prob")
def n_grams(text, window_size):
text_left_window = []
# text_left_window = tf.convert_to_tensor(text_left_window, dtype=tf.int32)
for z in range(SEQUENCE_LENGTH-2):
text_left = tf.slice(text, [z], [window_size])
text_left_window = tf.concat(0, [text_left_window, text_left])
text_left_window = tf.reshape(text_left_window, [-1, window_size])
return text_left_window
def inference(vocab_size, embedding_size, batch_size, slide_window_size, conv_window_size):
# # Embedding layer
word_embedding_matrix = tf.Variable(tf.random_uniform([vocab_size, embedding_size], -1.0, 1.0),
name="word_embedding_matrix")
# convo_unit = tf.Variable(tf.random_uniform([slide_window_size*2, ], -1.0, 1.0), name="convo_unit")
text_comp_result = []
for x in range(batch_size):
# input_x_slice_reshape = [[1 1 1...]
# [2 2 2...]]
input_x_slice = tf.slice(input_x, [x, 0, 0], [1, 2, SEQUENCE_LENGTH])
input_x_slice_reshape = tf.reshape(input_x_slice, [2, SEQUENCE_LENGTH])
# text_left_flat: [294, 6, 2, 6, 2, 57, 2, 57, 147, 57, 147, 5, 147, 5, 2,...], length = SEQUENCE_LENGTH
# text_right_flat: [17, 2, 2325, 2, 2325, 5366, 2325, 5366, 81, 5366, 81, 1238,...]
text_left = tf.slice(input_x_slice_reshape, [0, 0], [1, SEQUENCE_LENGTH])
text_left_flat = tf.reshape(text_left, [-1])
text_right = tf.slice(input_x_slice_reshape, [1, 0], [1, SEQUENCE_LENGTH])
text_right_flat = tf.reshape(text_right, [-1])
# extract both text.
# text_left_window: [[294, 6, 2], [6, 2, 57], [2, 57, 147], [57, 147, 5], [147, 5, 2],...]
# text_right_window: [[17, 2, 2325], [2, 2325, 5366], [2325, 5366, 81], [5366, 81, 1238],...]
text_left_window = n_grams(text_left_flat, slide_window_size)
text_right_window = n_grams(text_right_flat, slide_window_size)
text_left_window_sha = text_left_window.get_shape()
print 'text_left_window_sha:', text_left_window_sha
# composite the slice
text_comp_list = []
# text_comp_list = tf.convert_to_tensor(text_comp_list, dtype=tf.float32)
for l in range(SEQUENCE_LENGTH-slide_window_size+1):
text_left_slice = tf.slice(text_left_window, [l, 0], [1, slide_window_size])
text_left_slice_flat = tf.reshape(text_left_slice, [-1])
for r in range(SEQUENCE_LENGTH-slide_window_size+1):
text_right_slice = tf.slice(text_right_window, [r, 0], [1, slide_window_size])
text_right_slice_flat = tf.reshape(text_right_slice, [-1])
# convo_unit = [294, 6, 2, 17, 2, 2325]
convo_unit = tf.concat(0, [text_left_slice_flat, text_right_slice_flat])
convo_unit_reshape = tf.reshape(convo_unit, [-1, slide_window_size*2])
# convo_unit_shape_val = convo_unit_reshape.get_shape()
# print 'convo_unit_shape_val:', convo_unit_shape_val
embedded_chars = tf.nn.embedding_lookup(word_embedding_matrix, convo_unit_reshape)
embedded_chars_expanded = tf.expand_dims(embedded_chars, -1)
...
could please someone help me? Thank you very much!
Yaroslav answered in a comment above - moving to an answer:
This error happens when you create new default graph. Try to do tf.reset_default_graph() before the computation and not create any more graphs (i.e., calls to tf.Graph)

writing a list with multiple data to a csv file in separate columns in python

import csv
from itertools import izip
if l > 0:
for i in range(0,l):
combined.append(str(questionList[i]).encode('utf-8') + str(viewList[i]).encode('utf-8'))
# viewcsv.append(str(viewList[i]).encode('utf-8'))
# quescsv.append(str(questionList[i]).encode('utf-8'))
with open('collect.csv', 'a') as csvfile:
spamwriter = csv.writer(csvfile, delimiter='\n')
spamwriter.writerow(combined)
# spamwriter.writerows(izip(quescsv, viewcsv))
return 1
else:
return 0
I need to generate a csv file and flood it with data from 2 or more lists into separate columns and not a single column. Currently I'm trying to combine two lists in one list(combined) and use this as input for writing, but I haven't got desired o/p.
I have tried many things including the fieldnames way,izip way, but in vain.
Eg:
questionList viewList
4 3 views
5 0 views
The numbers used are just for example.
Probably, you need something like this:
import csv
X = [1, 2, 3, 4, 5]
Y = [2, 3, 5, 7, 11]
Z = ['two', 'three', 'five', 'seven', 'eleven']
with open('collect.csv', 'w') as csvfile:
writer = csv.writer(csvfile, delimiter=',')
for row in zip(X, Y, Z):
writer.writerow(row)
import csv
X = [1, 2, 3, 4, 5]
Y = [2, 3, 5, 7, 11]
Z = ['two', 'three', 'five', 'seven', 'eleven']
with open('collect.csv', 'w') as csvfile:
writer = csv.writer(csvfile, delimiter=',')
writer.writerow(X)
writer.writerow(Y)
writer.writerow(Z)

How to count the number of zeros in Python?

My code is currently written as:
convert = {0:0,1:1,2:2,3:3,4:0,5:1,6:2,7:1}
rows = [[convert[random.randint(0,7)] for _ in range(5)] for _ in range(5)]
numgood = 25 - rows.count(0)
print numgood
>> 25
It always comes out as 25, so it's not just that rows contains no 0's.
Have you printed rows?
It's [[0, 1, 0, 0, 2], [1, 2, 0, 1, 2], [3, 1, 1, 1, 1], [1, 0, 0, 1, 0], [0, 3, 2, 0, 1]], so you have a nested list there.
If you want to count the number of 0's in those nested lists, you could try:
import random
convert = {0:0, 1:1, 2:2, 3:3, 4:0, 5:1, 6:2, 7:1}
rows = [[convert[random.randint(0, 7)] for _ in range(5)] for _ in range(5)]
numgood = 25 - sum(e.count(0) for e in rows)
print numgood
Output:
18
rows doesn't contain any zeroes; it contains lists, not integers.
>>> row = [1,2,3]
>>> type(row)
<type 'list'>
>>> row.count(2)
1
>>> rows = [[1,2,3],[4,5,6]]
>>> rows.count(2)
0
>>> rows.count([1,2,3])
1
To count the number of zeroes in any of the lists in rows, you could use a generator expression:
>>> rows = [[1,2,3],[4,5,6], [0,0,8]]
>>> sum(x == 0 for row in rows for x in row)
2
You could also use numpy:
import numpy as np
import random
convert = {0:0,1:1,2:2,3:3,4:0,5:1,6:2,7:1}
rows = [[convert[random.randint(0,7)] for _ in range(5)] for _ in range(5)]
numgood = 25 - np.count_nonzero(rows)
print numgood
Output:
9