I want to use ThreadPoolExecutor in django and reuse db connection in a thread in order to avoid create db connection for each sub_task. but db connections won't be closed after ThreadPoolExecutor is shutdown. I know that i can close connection at the end of the sub_task. but with this solution, we are creating connection for each task and connection is not reused. there's a initializer params in ThreadPoolExecutor but there isn't something like on_destroy which can be called when thread is destroyed.
main_task runs with the celery in my setup.
def sub_task():
#some db operations
def main_task(max_workers):
with ThreadPoolExecutor(max_workers=max_workers) as executor:
for i in range(10):
executor.submit(sub_task)
I wrote a custom ThreadPoolExecutor and create a list of threads db connections (add pointer to their connection handler in thread initializer function) and close all db connections of all threads on executor shutdown. note that if you want to use your initializer function too, be careful to pass it as keyword argument not positional
class DBSafeThreadPoolExecutor(ThreadPoolExecutor):
def generate_initializer(self, initializer):
def new_initializer(*args, **kwargs):
self, *args = args
try:
if initializer != None:
initializer(*args, **kwargs)
finally:
self.on_thread_init()
return new_initializer
def on_thread_init(self):
for curr_conn in db.connections.all():
curr_conn.connection = None
self.threads_db_conns.append(curr_conn)
def on_executor_shutdown(self):
for curr_conn in self.threads_db_conns:
try:
curr_conn.inc_thread_sharing()
curr_conn.close()
except Exception:
print(f'error while closing connection {curr_conn.alias}')
traceback.print_exc()
def __init__(self, *args, **kwargs):
kwargs['initializer'] = self.generate_initializer(kwargs.get('initializer'))
kwargs['initargs'] = (self,) + (kwargs.get('initargs') or ())
self.threads_db_conns = []
super().__init__(*args, **kwargs)
def shutdown(self, *args, **kwargs):
super().shutdown(*args, **kwargs)
self.on_executor_shutdown()
Related
So I have a system where users can be part of models called boxes through the model member.
Member models have their own set of roles which in turn have their own permissions.
I have specific methods which determine which set of permissions a member has in a box.
So now I have a websocket group named 'box_{box_id}' to which members can connect. Outbound events such as box related model creation are sent to this group.
However, some members should not listen to certain events sent based on the permissions they have.
This is a sample message that would be sent to the group which denotes an event
{'event': EVENT TYPE,
'data': EVENT DATA}
So now, for example, an user cannot listen to the event with type UPLOAD_CREATE if he doesnt have READ_UPLOADS permissions in the box
How can I implement such checks using django channels?
EDIT
class LocalEventsConsumer(AsyncWebsocketConsumer):
"""
An ASGI consumer for box-specific (local) event sending.
Any valid member for the given box can connect to this consumer.
"""
def __init__(self, *args, **kwargs):
super().__init__(*args, **kwargs)
self.box_id = self.scope['url_route']['kwargs']['box_id']
self.events_group_name = 'box_%s_events' % self.box_id
self.overwrites_cache = {}
self.permissions_cache = set()
# need to update cache on role and overwrite updates
async def connect(self):
try:
# we cache the member object on connection
# to help check permissions later on during
# firing of events
member_kwargs = {
'user': self.scope['user'],
'box__id': self.box_id,
}
self.member = api_models.Member.objects.get(**member_kwargs)
self.permissions_cache = self.member.base_permissions
except ObjectDoesNotExist:
# we reject the connection if the
# box-id passed in the url was invalid
# or the user isn't a member of the box yet
await self.close()
await self.channel_layer.group_add(self.events_group_name, self.channel_name)
await self.accept()
async def disconnect(self, close_code):
await self.channel_layer.group_discard(self.events_group_name, self.channel_name)
async def fire_event(self, event: dict):
member_permissions = self.get_event_permissions(event)
required_permissions = event.pop('listener_permissions', set())
if required_permissions in member_permissions:
await self.send(event)
def get_event_permissions(self, event):
# handle permission caching throughout
# the life of the user's connection
overwrite_channel = event['data'].get('channel', None)
overwrite_cache = self.overwrites_cache.get(overwrite_channel.id, None)
if not overwrite_channel:
# calculate overwrites if the event data at hand
# has a channel attribute. We would need to calculate
# overwrites only when channel-specific events are
# triggered, like UPLOAD_CREATE and OVERWRITE_DELETE
return self.permissions_cache
if not overwrite_cache:
overwrite_cache = self.member.permissions.get_overwrites(overwrite_channel)
self.overwrites_cache[overwrite_channel.id] = overwrite_cache
return overwrite_cache
#receiver(post_delete, sender=api_models.MemberRole)
#receiver(post_save, sender=api_models.MemberRole)
def update_permissions_cache(self, instance=None, **kwargs):
if instance.member == self.member:
self.permissions_cache = self.member.base_permissions
#receiver(post_delete, sender=api_models.Overwrite)
#receiver(post_save, sender=api_models.Overwrite)
def update_overwrites_cache(self, instance=None, **kwargs):
overwrite_cache = self.overwrites_cache.get(instance.channel, None)
if instance.role in self.member.roles.all() and overwrite_cache:
self.overwrites_cache[instance.channel] = self.member.permissions.get_overwrites(instance.channel)
this is my current consumer. I use the fire_event type outside the consumer. However, everytime I need to get the permissions, I need to make a trip to the database. Therefore, I've implemented this permission caching system to mitigate the same. Should the same be altered?
You can check for these permissions in the method that sends the data to the client. Since they all belong to the same channel group, you cannot filter out at the level of sending to the group, at least to the best of my knowledge. So you can do something that like this:
def receive(self, event):
# update box
...
# notify the members
self.channel_layer.group_send(
f'box_{self.box.id}',
{'type': 'notify_box_update', 'event': EVENT TYPE, 'data': EVENT DATA},
)
def notify_box_update(event):
if has_permission(self.user, event['event'], self.box):
self.send(event)
Here, the notify event is sent to the group via the channel_layer but only users with the proper permission get it sent to them downstream. You can implement the has_permission method somewhere in your code to check for the permission given the user, box and event type.
I tried using Celery Task & Django rest framework serializer in same class with multiple inheritance.
from celery import Task
class ReceiveSerializer(Task, serializers.Serializer):
def run(self, source, *args, **kwargs):
self.save()
def save(self, **kwargs):
# call long running save method
I got error,
File "<>\serializers.py", line 217, in <module>
class ReceiveSerializer(Task, serializers.Serializer):
File "<>\workspace\www\lib\site-packages\celery-3.1.20-py2.7.egg\celery\app\task.py", line 199, in __new_
_
tasks.register(new(cls, name, bases, attrs))
TypeError: Error when calling the metaclass bases
metaclass conflict: the metaclass of a derived class must be a (non-strict) subclass of the metaclasses of all its bases
The save method has to create long list of objects in db (some times more than 5 minutes). I don't want user to wait for long time.
Is there any way to do this.
Could it be possible using a Mixin?
class YourMixin:
# if you want to trigger the task on save:
def save(self, *args, **kwargs):
ret = super().save(*args, **kwargs)
some_task.apply_async((
self.__class__.__name__,
self.pk,
))
return ret
#task()
def some_task(model_name, model_id):
my_model = apps.get_model('django_app_name.{}'.format(model_name))
obj = my_model.objects.get(pk=model_id)
This is a follow-up to my unanswered question here. The code, exactly as it is below crashes with a segfault (copy/paste it into your system and run it). When I remove type=QtCore.Qt.DirectConnection from one or both of the signal constructor calls (thereby using QtCore.Qt.AutoConnection instead), everything runs the way it should: A widget appears, showing five progress bars filling up then emptying in an endless loop.
from PySide import QtCore, QtGui
import time
class Worker(QtCore.QThread):
sig_worker_update_progress = QtCore.Signal(int, int)
def __init__(self, thread_id, *args, **kwargs):
super(Worker, self).__init__(*args, **kwargs)
self.thread_id = thread_id
self.stop_requested = False
def slot_interrupt(self):
self.stop_requested = True
def run(self):
progress = 0
while(True):
self.sig_worker_update_progress.emit(self.thread_id, progress % 100)
progress += 1
if self.stop_requested:
break
else:
time.sleep(0.1)
class Controller(QtCore.QObject):
sig_controller_update_progress = QtCore.Signal(int, int)
def __init__(self, num_workers, *args, **kwargs):
super(Controller, self).__init__(*args, **kwargs)
self.workers = []
for i in range(num_workers):
self.workers.append(Worker(i))
self.workers[i].sig_worker_update_progress.connect(
self.slot_worker_update_progress,
type=QtCore.Qt.DirectConnection)
for worker in self.workers:
worker.start()
def slot_worker_update_progress(self, thread_id, progress):
# Do
# Stuff
self.sig_controller_update_progress.emit(thread_id, progress)
class Monitor(QtGui.QWidget):
def __init__(self, num_workers, *args, **kwargs):
super(Monitor, self).__init__(*args, **kwargs)
main_layout = QtGui.QVBoxLayout()
self.setLayout(main_layout)
self.progress_bars = []
for _ in range(num_workers):
progress_bar = QtGui.QProgressBar()
main_layout.addWidget(progress_bar)
self.progress_bars.append(progress_bar)
self.controller = Controller(num_workers)
self.controller.sig_controller_update_progress.connect(
self.slot_controller_update_progress,
type=QtCore.Qt.DirectConnection)
def slot_controller_update_progress(self, thread_id, progress):
self.progress_bars[thread_id].setValue(progress)
if __name__ == "__main__":
app = QtGui.QApplication([])
monitor = Monitor(5)
monitor.show()
app.exec_()
Why does using two nested DirectConnection signals cause a segfault? If Qt does not want you to do that, why isn't a more informative error given?
I am using PySide v1.2.2 which wraps the Qt 4.8 framework.
I found a satisfying explanation here. Apparently, emitting a signal of type DirectConnection is equivalent to a direct function call. So the GUI is after all updated on a Worker thread when both signals are DirectConnect-ed. As mentioned on my other question, threads are not allowed to change the GUI. The issue is NOT with nesting DirectConnections per se.
I have a model that sends signal:
class WMTransaction(models.Model):
def save(self, *args, **kwargs):
if self.status == 'completed':
self.completed = datetime.datetime.now()
try:
old = WMTransaction.objects.get(pk=self.pk)
if old.status == 'processing':
print 'sending signal'
payment_done.send(self)
except:
pass
super(WMTransaction, self).save(*args, **kwargs)
Also I have receivers in 2 modules:
#receiver(payment_done, dispatch_uid="make_this_signal_unique", weak=False)
def subscribe(sender, **kwargs):
print 'subscribing'
# processing
And:
#receiver(payment_done, dispatch_uid="this_signal_is_also_unique", weak=False)
def buy(sender, **kwargs):
print 'buying'
# processing
The problem is that subscribe function is called, and buy - isn't... Both modules are in installed apps, other functions from these modules work correctly. What's the problem with signals?
Has module_B been installed and the definition of buy actually gets executed? Check payment_done.receivers before the payment_done.send line.
I have a situation where when one of my models is saved MyModel I want to check a field, and trigger the same change in any other Model with the same some_key.
The code works fine, but its recursively calling the signals. As a result I am wasting CPU/DB/API calls. I basically want to bypass the signals during the .save(). Any suggestions?
class MyModel(models.Model):
#bah
some_field = #
some_key = #
#in package code __init__.py
#receiver(models_.post_save_for, sender=MyModel)
def my_model_post_processing(sender, **kwargs):
# do some unrelated logic...
logic = 'fun! '
#if something has changed... update any other field with the same id
cascade_update = MyModel.exclude(id=sender.id).filter(some_key=sender.some_key)
for c in cascade_update:
c.some_field = sender.some_field
c.save()
Disconnect the signal before calling save and then reconnect it afterwards:
post_save.disconnect(my_receiver_function, sender=MyModel)
instance.save()
post_save.connect(my_receiver_function, sender=MyModel)
Disconnecting a signal is not a DRY and consistent solution, such as using update() instead of save().
To bypass signal firing on your model, a simple way to go is to set an attribute on the current instance to prevent upcoming signals firing.
This can be done using a simple decorator that checks if the given instance has the 'skip_signal' attribute, and if so prevents the method from being called:
from functools import wraps
def skip_signal(signal_func):
#wraps(signal_func)
def _decorator(sender, instance, **kwargs):
if hasattr(instance, 'skip_signal'):
return None
return signal_func(sender, instance, **kwargs)
return _decorator
Based on your example, that gives us:
from django.db.models.signals import post_save
from django.dispatch import receiver
#receiver(post_save, sender=MyModel)
#skip_signal()
def my_model_post_save(sender, instance, **kwargs):
instance.some_field = my_value
# Here we flag the instance with 'skip_signal'
# and my_model_post_save won't be called again
# thanks to our decorator, avoiding any signal recursion
instance.skip_signal = True
instance.save()
Hope This helps.
A solution may be use update() method to bypass signal:
cascade_update = MyModel.exclude(
id=sender.id).filter(
some_key=sender.some_key).update(
some_field = sender.some_field )
"Be aware that the update() method is converted directly to an SQL statement. It is a bulk operation for direct updates. It doesn't run any save() methods on your models, or emit the pre_save or post_save signals"
You could move related objects update code into MyModel.save method. No playing with signal is needed then:
class MyModel(models.Model):
some_field = #
some_key = #
def save(self, *args, **kwargs):
super(MyModel, self).save(*args, **kwargs)
for c in MyModel.objects.exclude(id=self.id).filter(some_key=self.some_key):
c.some_field = self.some_field
c.save()