Here is the code:
// database_extractor.py
class DatabaseExtractor(object):
def __init__(self, ..):
...
def run_extraction(self):
// run sql query to extract data to a file
//driver.py
def extract__func(db_extractor):
db_extractor.run_extraction()
if __name__ == "__main__":
db1 = DatabaseExtractor(..)
db2 = DatabaseExtractor(..)
db3 = DatabaseExtractor(..)
db4 = DatabaseExtractor(..)
db5 = DatabaseExtractor(..)
db6 = DatabaseExtractor(..)
db7 = DatabaseExtractor(..)
db8 = DatabaseExtractor(..)
worker_l = [Process(extract_func, args=[db1]),
Process(extract_func, args=[db2]),
Process(extract_func, args=[db3]),
Process(extract_func, args=[db4]),
Process(extract_func, args=[db5]),
Process(extract_func, args=[db6]),
Process(extract_func, args=[db7]),
Process(extract_func, args=[db8])]
for worker in worker_l: worker.start()
for worker in worker_l: worker.join()
(In reality, the instances of DatabaseExtractor are being generated based on an input config file, so there could be more than 8 processes running)
I referred to the SO post: Reference, quoting the accepted answer "You'll either want to join your processes individually outside of your for loop (e.g., by storing them in a list and then iterating over it) or use something like numpy.Pool and apply_async with a callback". Even though I did the same, all my processes are running sequentially. The reason I know this is because 4 of the instances have queries running for couple of hours and when one of them is kicked off, I do not see the other queries populating their respective output file. How can I force parallel execution of the instances?
My guess is that something is happening at the DB layer. This example shows everything works as expected as far as processes are concerned. I would recommend checking your database locking etc.
from multiprocessing import Process
from random import randint
from time import sleep
def wait_proc(i, s):
print "%d - Working for %d seconds" % (i,s)
sleep(s)
print "%d - Done." % (i,)
wait_l = [Process(target=wait_proc, args=[i,randint(5,15)]) for i in range(10)]
for w in wait_l:
w.start()
for w in wait_l:
w.join()
print "All done."
Related
I like to parallel two functions, one for image batching (streaming all 25 images for processing) and another one for processing batched images. They need to be in parallel.
So I have main function for batching images BatchStreaming(self) and processing for BatchProcessing(self, b_num). Now BatchStreaming is working well. After streaming 25 images, need to proceed for batch processing. I have two parallel processes. They are
(1)While loop in BatchStreaming need to continue for another batch of images.
(2)At the same time, current batched images need to be processed.
I am confusing whether I should use process or thread. I prefer process as I like to utilize all cores in CPU. (Python's thread run only on one CPU core)
Then I have two issues
(1)Process has to join back to main program to proceed. But I need to continue for next batch of images.
(2)In the following program, when BatchProcessing(self, b_num) is called and have exception as
Caught Main Exception
(<class 'TypeError'>, TypeError("'module' object is not callable",), <traceback object at 0x7f98635dcfc8>)
What could be issue?
The code is as follow.
import multiprocessing as MultiProcess
import time
import vid_streamv3 as vs
import cv2
import sys
import numpy as np
import os
BATCHSIZE=25
CHANNEL=3
HEIGHT=480
WIDTH=640
ORGHEIGHT=1080
ORGWIDTH=1920
class ProcessPipeline:
def __init__(self):
#Current Cam
self.camProcess = None
self.cam_queue = MultiProcess.Queue(maxsize=100)
self.stopbit = None
self.camlink = 'rtsp://root:pass#192.168.0.90/axis-media/media.amp?camera=1' #Add your RTSP cam link
self.framerate = 25
self.fullsize_batch1=np.zeros((BATCHSIZE, ORGHEIGHT, ORGWIDTH, CHANNEL), dtype=np.uint8)
self.fullsize_batch2=np.zeros((BATCHSIZE, ORGHEIGHT, ORGWIDTH, CHANNEL), dtype=np.uint8)
self.batch1_is_processed=False
def BatchStreaming(self):
#get all cams
time.sleep(3)
self.stopbit = MultiProcess.Event()
self.camProcess = vs.StreamCapture(self.camlink,
self.stopbit,
self.cam_queue,
self.framerate)
self.camProcess.start()
count=0
try:
while True:
if not self.cam_queue.empty():
cmd, val = self.cam_queue.get()
if cmd == vs.StreamCommands.FRAME:
if val is not None:
print('streaming starts ')
if(self.batch1_is_processed == False):
self.fullsize_batch1[count]=val
else:
self.fullsize_batch2[count]=val
count=count+1
if(count>=25):
if(self.batch1_is_processed == False):#to start process for inference and post processing for batch 1
self.batch1_is_processed = True
print('batch 1 process')
p = MultiProcess(target=self.BatchProcessing, args=(1,))
else:#to start process for inference and post processing for batch 2
self.batch1_is_processed = False
print('batch 2 process')
p = MultiProcess(target=self.BatchProcessing, args=(2,))
p.start()
print('BatchProcessing start')
p.join()
print('BatchProcessing join')
count=0
cv2.imshow('Cam: ' + self.camlink, val)
cv2.waitKey(1)
except KeyboardInterrupt:
print('Caught Keyboard interrupt')
except:
e = sys.exc_info()
print('Caught Main Exception')
print(e)
self.StopStreaming()
cv2.destroyAllWindows()
def StopStreaming(self):
print('in stopCamStream')
if self.stopbit is not None:
self.stopbit.set()
while not self.cam_queue.empty():
try:
_ = self.cam_queue.get()
except:
break
self.cam_queue.close()
print("before camProcess.join()")
self.camProcess.join()
print("after camProcess.join()")
def BatchProcessing(self, b_num):
print('module name:', __name__)
if hasattr(os, 'getppid'): # only available on Unix
print('parent process:', os.getppid())
print('process id:', os.getpid())
if __name__ == "__main__":
mc = ProcessPipeline()
mc.BatchStreaming()
I used Event signalling as shown below.
That is more straightforward for my application.
When batching loop have enough images, signal to batch processing.
#event_tut.py
import random, time
from threading import Event, Thread
event = Event()
def waiter(event, nloops):
count=0
while(count<10):
print("%s. Waiting for the flag to be set." % (i+1))
event.wait() # Blocks until the flag becomes true.
print("Wait complete at:", time.ctime())
event.clear() # Resets the flag.
print('wait exit')
count=count+1
def setter(event, nloops):
for i in range(nloops):
time.sleep(random.randrange(2, 5)) # Sleeps for some time.
event.set()
threads = []
nloops = 10
threads.append(Thread(target=waiter, args=(event, nloops)))
threads[-1].start()
threads.append(Thread(target=setter, args=(event, nloops)))
threads[-1].start()
for thread in threads:
thread.join()
print("All done.")
I created two processes, one process that spawn multi threads is response for writing data to Queue, the other is reading data from Queue. It always deadblock in high frequent, fewer not. Especially when you add sleep in run method in write module(comment in codes). Let me put my codes below:
environments: python2.7
main.py
from multiprocessing import Process,Queue
from write import write
from read import read
if __name__ == "__main__":
record_queue = Queue()
table_queue = Queue()
pw = Process(target=write,args=[record_queue, table_queue])
pr = Process(target=read,args=[record_queue, table_queue])
pw.start()
pr.start()
pw.join()
pr.join()
write.py
from concurrent.futures import ThreadPoolExecutor, as_completed
def write(record_queue, table_queue):
thread_num = 3
pool = ThreadPoolExecutor(thread_num)
futures = [pool.submit(run, record_queue, table_queue) for _ in range (thread_num)]
results = [r.result() for r in as_completed(futures)]
def run(record_queue, table_queue):
while True:
if table_queue.empty():
break
table = table_queue.get()
# adding this code below reduce deadlock opportunity.
#import time
#import random
#time.sleep(random.randint(1, 3))
process_with_table(record_queue, table_queue, table)
def process_with_table(record_queue, table_queue, table):
#for short
for item in [x for x in range(1000)]:
record_queue.put(item)
read.py
from concurrent.futures import ThreadPoolExecutor, as_completed
import threading
import Queue
def read(record_queue, table_queue):
count = 0
while True:
item = record_queue.get()
count += 1
print ("item: ", item)
if count == 4:
break
I googled it and there are same questions on SO, but i cant see the similarity compared with my code, so can anyone help my codes, thanks...
I seem to find a solution, change run method in write module to :
def run(record_queue, table_queue):
while True:
try:
if table_queue.empty():
break
table = table_queue.get(timeout=3)
process_with_table(record_queue, table_queue, table)
except multiprocessing.queues.Empty:
import time
time.sleep(0.1)
and never see deadlock or blocking on get method.
I am trying to open pickle files that have data within them, then update a MSSQL table with that data. It was taking forever, 10 days to update 1,000,000 rows. So i wrote a script for more parallelism. The more processes i run it with the more errors i get like this
(<class 'pyodbc.Error'>, Error('40001', '[40001] [Microsoft][ODBC SQL Server Dri
ver][SQL Server]Transaction (Process ID 93) was deadlocked on lock resources wit
h another process and has been chosen as the deadlock victim. Rerun the transact
ion. (1205) (SQLExecDirectW)'), <traceback object at 0x0000000002791808>)
As you can see in my code i keep trying to process the update until successful and even sleep for a second here
while True:
try:
updated = cursor.execute(update,'Yes', fileName+'.'+ext, dt, size,uniqueID )
break
except:
time.sleep(1)
print sys.exc_info()
Is this because when you use the multiprocessing module in windows it uses os.spawn instead of os.fork ?
Is there a way to do this that will provide more speed up?
I was told that the table can handle way more transactions then this...
#!C:/Python/python.exe -u
import pyodbc,re,pickle,os,glob,sys,time
from multiprocessing import Lock, Process, Queue, current_process
def UpDater(pickleQueue):
for pi in iter(pickleQueue.get, 'STOP'):
name = current_process().name
f=pi
cnxn = pyodbc.connect('DRIVER={SQL Server};SERVER=database.windows.net;DATABASE=DB;UID=user;PWD=pwd');
cursor = cnxn.cursor()
update = ("""UPDATE DocumentList
SET Downloaded=?, DownLoadedAs=?,DownLoadedWhen=?,DownLoadedSizeKB=?
WHERE DocNumberSequence=?""")
r = re.compile('\d+')
pkl_file = open(pi, 'rb')
meta = pickle.load(pkl_file)
fileName = meta[0][0]
pl = r.findall(fileName)
l= int(len(pl)-1)
ext = meta[0][1]
url = meta[0][2]
uniqueID = pl[l]
dt = meta[0][4]
size = meta[0][5]
while True:
try:
updated = cursor.execute(update,'Yes', fileName+'.'+ext, dt, size,uniqueID )
break
except:
time.sleep(1)
print sys.exc_info()
print uniqueID
cnxn.commit()
pkl_file.close()
os.remove(fileName+'.pkl')
cnxn.close()
if __name__ == '__main__':
os.chdir('Pickles')
pickles = glob.glob("*.pkl")
pickleQueue=Queue();processes =[];
for item in pickles:
pickleQueue.put(item)
workers = int(sys.argv[1]);
for x in xrange(workers):
p = Process(target=UpDater,args=(pickleQueue,))
p.start()
processes.append(p)
pickleQueue.put('STOP')
for p in processes:
p.join()
I am using Windows 7 and python 2.7 Anaconda Distribution
EDIT
The answer below to use row locks stopped the error from happening. However, the updates were still slow. Turns out an old fashion index on the primary key was needed for 100x speed up
A few things to try. Using sleeps is a bad idea. First, could you try row level locking?
update = ("""UPDATE DocumentList WITH (ROWLOCK)
SET Downloaded=?, DownLoadedAs=?,DownLoadedWhen=?,DownLoadedSizeKB=?
WHERE DocNumberSequence=? """)
Another option would be to wrap each in a transaction:
update = ("""
BEGIN TRANSACTION my_trans;
UPDATE DocumentList
SET Downloaded=?, DownLoadedAs=?,DownLoadedWhen=?,DownLoadedSizeKB=?
WHERE DocNumberSequence=?;
END TRANSACTION my_trans;
""")
Would either of these solutions work for you?
I have a very long series of similar cvs files (14Gb altogether). I need to open each file, replace certain characters, and write the fixed version to a new file. I want to use the processing power of my multicore computer. I tried with mp.Pools and with mp.Process/mp.Queue. The pool version works, but the queue approach produces this error:
IOError: [Errno 22] invalid mode ('r') or filename: '<multiprocessing.queues.Queue object at 0x0000000002775A90>'
This is a simplified version of my Pool code:
import os
import pandas as pd
import multiprocessing as mp
def fixer(a_file):
lines = []
opened_file = open(a_file)
for each_line in opened_file:
lines.append(each_line.replace('mad', 'rational'))
opened_file.close()
df = pd.DataFrame(lines)
#some pandas magics here
df.to_csv(a_file[:-4] + '_fixed.csv')
if __name__ == "__main__":
my_path = os.getcwd()
my_files = list(os.walk(my_path))[0][2] #I just get the list of file names here
processors = mp.cpu_count()
pool = mp.Pool(processes = processors) # I set as many processes as processors my computer has.
pool.map(fixer, my_files)
And this is the one for the Queue approach:
import os
import pandas as pd
import multiprocessing as mp
def fixer(a_file):
lines = []
opened_file = open(a_file)
for each_line in opened_file:
lines.append(each_line.replace('mad', 'rational'))
opened_file.close()
df = pd.DataFrame(lines)
#some pandas magics here
df.to_csv(a_file[:-4] + '_fixed.csv')
if __name__ == "__main__":
my_path = os.getcwd()
my_files = list(os.walk(my_path))[0][2] #I just get the list of file names here
processors = mp.cpu_count()
queue = mp.Queue()
for each_file in my_files:
queue.put(each_file)
processes = [mp.Process(target = fixer, args=(queue,)) for core in range(processors)]
for process in processes:
process.start()
for process in processes:
process.join()
I will appreciate if you can provide an example to make the Queue version to work. In a second processing step, before the files are written, I need the processors to get an intermediate result and do some calculations. This is the reason why I need the queues.
The problem in the Queue script is that I was not getting the next element in the Queue, but passing the whole Queue to the fixer function. This problem is solved by assigning the value of queue.get() to a variable in the fixer function:
import os
import pandas as pd
import multiprocessing as mp
def fixer(a_queue):
a_file = a_queue.get()
lines = []
opened_file = open(a_file)
for each_line in opened_file:
lines.append(each_line.replace('mad', 'rational'))
opened_file.close()
df = pd.DataFrame(lines)
#some pandas magics here
df.to_csv(a_file[:-4] + '_fixed.csv')
if __name__ == "__main__":
my_path = os.getcwd()
my_files = list(os.walk(my_path))[0][2] #I just get the list of file names here
processors = mp.cpu_count()
queue = mp.Queue()
for each_file in my_files:
queue.put(each_file)
processes = [mp.Process(target = fixer, args=(queue,)) for core in range(processors)]
for process in processes:
process.start()
for process in processes:
process.join()
I've been using a script (above) to run some task in parallel in an Ubuntu server with 16 processors, it actually works but I have a few questions about it:
What is the code actually doing?
As more workers I set up the script run faster, but what is the limit of workers?, I've run 100.
How could improve it?
#!/usr/bin/env python
from multiprocessing import Process, Queue
from executable import run_model
from database import DB
import numpy as np
def worker(work_queue, db_conection):
try:
for phone in iter(work_queue.get, 'STOP'):
registers_per_number = retrieve_CDRs(phone, db_conection)
run_model(np.array(registers_per_number), db_conection)
#print("The phone %s was already run" % (phone))
except Exception:
pass
return True
def retrieve_CDRs(phone, db_conection):
return db_conection.retrieve_data_by_person(phone)
def main():
phone_numbers = np.genfromtxt("../listado.csv", dtype="int")[:2000]
workers = 16
work_queue = Queue()
processes = []
#print("Process started with %s" % (workers))
for phone in phone_numbers:
work_queue.put(phone)
#print("Phone %s put at the queue" % (phone))
#print("The queue %s" % (work_queue))
for w in xrange(workers):
#print("The worker %s" % (w))
# new conection to data base
db_conection = DB()
p = Process(target=worker, args=(work_queue, db_conection))
p.start()
#print("Process %s started" % (p))
processes.append(p)
work_queue.put('STOP')
for p in processes:
p.join()
if __name__ == '__main__':
main()
Cheers!
At first, start from the main function:
It's creating an numpy array of 2000 integers type phone numbers from a CSV file.
Then creating some variables and lists.
Next, you are creating a queue with all the phone numbers that you extracted from the CSV file
Next, for the 16 workers, you are creating a DB connection for each, setting up the processing arguments and started the process for all the worker processors.
Hope that helps you to understand the code. Actually, it's kind of multi-threading you are trying and it's behaving like parallel processing. So, the more number you use, it becomes more faster. You should be able to use 2000 processors as my common sense says that. After that it's not meaningful as master-slave philosophy. Also, parallel processing suggests you to minimize the number of idle processors/workers. If you have more than 2000 workers, then you will have some idle workers which will reduce your performance. Finally, improving parallel processing needs to improve this kind of ideology.
Hope that helps. Cheers!