I have a function pre_raw() and pandas data train_raw.values in python3 I could like this:
with concurrent.futures.ThreadPoolExecutor() as executor:
executor.map(pre_raw, train_raw.values)
How to wrote it on Python2?
Thanks.
procs=[]
for val in train_raw.values:
p = multiprocessing.Process(target=pre_raw, args=(val,))
procs.append(p)
p.start()
for p in procs:
p.join()
Related
I was writing a script in python using google-cloud-datastore python module to upload data from my CSV to datastore. The script seems to work fine but There seems to be a problem that I'm stuck with. I see that the integer values from my CSV are being stored as Floating point number. Is it a default way of sending data to datastore or am I doing something wrong?
Here's my code:
import sys
import getopt
import pandas as pd
from google.cloud import datastore
def write_dict_chunks(data, SIZE=100):
log_count = 0
datastore_client = datastore.Client()
task_key = datastore_client.key(kind)
for i in xrange(0, len(data), SIZE):
entities = []
for each_entry in data[i : i+SIZE]:
nan_check = lambda v: v if str(v)!='nan' else None
string_check = lambda v: v.decode('utf-8') if isinstance(v, str) else v
write_row = {k: nan_check(string_check(v)) for k, v in each_entry.iteritems()}
entity = datastore.Entity(key=task_key)
entity.update(write_row)
entities.append(entity)
datastore_client.put_multi(entities)
log_count += len(entities)
print 'Wrote {} entities to datastore'.format(log_count)
try:
opts, args = getopt.getopt(sys.argv[1:], "ho:v", ["kind=", "filepath="])
if len(args) > 0:
for each in args:
print 'Unrecognized argument: '+each
sys.exit(2)
except getopt.GetoptError as err:
# print help information and exit:
print str(err) # will print something like "option -a not recognized"
print 'Usage: python parse_csv.py --kind=kind_name --filepath=path_to_csv'
kind = None
filepath = None
for option, argument in opts:
if option in '--kind':
kind = argument
elif option in '--filepath':
filepath = argument
df = pd.read_csv(filepath)
df = df.to_dict(orient='records')
write_dict_chunks(df)
Here is my code :
t=(int(input()))
from itertools import permutations
for i in range(0,t):
a=input()
sum=0
count=0
for p in permutations(a):
sum=sum+int(''.join(p))
count=count+1
print(str(count)+" "+str(sum))
It's working fine in Python 3.0+x but not in python 2.7
t=(int(input()))
from itertools import permutations
for i in range(0,t):
a=raw_input()
sum=0
count=0
for p in permutations(a):
sum=sum+int(''.join(p))
count=count+1
print(str(count)+" "+str(sum))
Change input to raw_input in line 4,
in python 2.x, input means integer and raw_input means str
This is my testing code :
import mock
import unittest
def check_method_return(input):
return_value = input.ops.list()
if not return_value:
return False
return return_value
def check_method_len(input):
return_value = input.ops.list()
if len(return_value) < 1:
return False
return return_value
class TestMockReturnValue(unittest.TestCase):
def test_mock_return(self):
fake_input = mock.MagicMock()
fake_input().ops.list.return_value = []
result = check_method_return(fake_input)
self.assertFalse(result)
def test_mock_len(self):
fake_input = mock.MagicMock()
fake_input().ops.list.return_value = []
result = check_method_len(fake_input)
self.assertFalse(result)
if __name__ == '__main__':
test_empty = []
if not test_empty:
print("empty list equals to False")
unittest.main()
The run result output is :
empty list equals to False
.F
======================================================================
FAIL: test_mock_return (__main__.TestMockReturnValue)
----------------------------------------------------------------------
Traceback (most recent call last):
File "test_mock.py", line 31, in test_mock_return
self.assertFalse(result)
AssertionError: <MagicMock name='mock.ops.list()' id='140459969939728'> is not false
----------------------------------------------------------------------
Ran 2 tests in 0.005s
FAILED (failures=1)
Because when the list is empty, its return value for if is False. So, method "check_method_return" should work exactly the same as "check_method_len" in the real world.
So, my question is :
Is there a way to make the unit test pass for "check_method_return" ?
If this is the case, Here is the solution, I cannot explain the exact difference, but it makes sense:
# this mock away input.ops.list()
fake_input.ops.list.return_value = []
# this did not mock away input.ops.list()
fake_input().ops.list.return_value = []
To show the difference between 2 ways to set mock_input return value
This could help to understand better
[gliang#www ~]$ ipython
Python 2.6.6 (r266:84292, Jul 23 2015, 15:22:56)
IPython 0.13.2 -- An enhanced Interactive Python.
In [1]: import unittest
In [2]: import mock
In [3]: fake_input mock.Mag
mock.MagicMixin mock.MagicMock mock.MagicProxy
In [4]: fake_input = mock.MagicMock()
In [5]: fake_input().ops.list.return_value= []
In [6]: print fake_input().ops.list.return_value
[]
In [7]: print fake_input.ops.list.return_value
<MagicMock name='mock.ops.list()' id='15160848'>
In [8]: fake_input2 = mock.MagicMock()
In [9]: fake_input2.ops.list.return_value = []
In [10]: print fake_input2.ops.list.return_value
[]
In [11]: quit()
I have pickled data from 2.7 that I pickled like this:
#!/usr/bin/env python2
# coding=utf-8
import pickle
data = {1: datetime.date(2014, 3, 18),
'string-key': u'ünicode-string'}
pickle.dump(data, open('file.pickle', 'wb'))
The only way I found to load this in Python 3.4 is:
data = pickle.load(open('file.pickle', "rb"), encoding='bytes')
Now my unicode string are fine but the dict keys are bytes. print(repr(data)) gives:
{1: datetime.date(2014, 3, 18), b'string-key': 'ünicode-string'}
Does anybody have an idea to get around rewriting my code like data[b'string-key'] resp. converting all existing files?
This is not a real answer but only a workaround. This converts pickled data to version 3 in Python 3.4 (doesn't work in 3.3):
#!/usr/bin/env python3
import pickle, glob
def bytes_to_unicode(ob):
t = type(ob)
if t in (list, tuple):
l = [str(i, 'utf-8') if type(i) is bytes else i for i in ob]
l = [bytes_to_unicode(i) if type(i) in (list, tuple, dict) else i for i in l]
ro = tuple(l) if t is tuple else l
elif t is dict:
byte_keys = [i for i in ob if type(i) is bytes]
for bk in byte_keys:
v = ob[bk]
del(ob[bk])
ob[str(bk,'utf-8')] = v
for k in ob:
if type(ob[k]) is bytes:
ob[k] = str(ob[k], 'utf-8')
elif type(ob[k]) in (list, tuple, dict):
ob[k] = bytes_to_unicode(ob[k])
ro = ob
else:
ro = ob
print("unprocessed object: {0} {1}".format(t, ob))
return ro
for fn in glob.glob('*.pickle'):
data = pickle.load(open(fn, "rb"), encoding='bytes')
ndata = bytes_to_unicode(data)
pickle.dump(ndata, open(fn + '3', "wb"))
The Python docs say:
The pickle serialization format is guaranteed to be backwards compatible across Python releases.
I didn't find a way to pickle.load Python-2.7 pickled data in Python 3.3 -- not even data that contained only ints and dates.
Have a look at the implementation.
You can subclass the Unpickler and overwrite the byte deserialization to produce strings.
I'd like to run the processes in parallel, so I commented out a p.join from the __main__ section.
What are the consequences of not have a .join, or better yet, should I be using a different approach for parallel multiprocessing?
import multiprocessing
def worker(num):
x = 0
for i in range(10000):
x+=1
print x, num
if __name__ == '__main__':
for i in range(4):
p = multiprocessing.Process(target=worker, args=(i,))
p.start()
# p.join()
Join the processes after starting them.
if __name__ == '__main__':
procs = []
for i in range(4):
p = multiprocessing.Process(target=worker, args=(i,))
p.start()
procs.append(p)
for p in procs:
p.join()
If you run multiple similar tasks, you can use multiprocessing.Pool.
if __name__ == '__main__':
pool = multiprocessing.Pool()
pool.map(worker, range(4))
pool.close()
pool.join()