Django asyncio for saving (big amount of) objects - Nothing saved - django

I want to fetch categories from a Magento API and display them in a template. In the same time, I want to save them in DB for an ulterior use.
Categories are too many and the render of the template takes more than 30 sec.
I start to learn using asyncio but couldn't get my way with it. I surely missed something.
First, my URL leads to the function that retrieves the categories
#login_required
def get_categories(request):
Category.objects.all().delete()
try:
cats = fetch_categories()
tree = cats['children_data']
except:
print('erreur : impossible de récupérer les catégories (fetch_categories)')
asyncio.run(parse_categories(tree))
return render(request, 'categories/categories_list.html', {'tree': tree})
When I get the "categories tree", I send it to
async def parse_categories(tree):
for lvl1 in tree:
all_tasks = []
asyncio.create_task(save_cat(lvl1))
# main products categories (turbo, injectors ...)
for lvl2 in lvl1['children_data']:
asyncio.create_task(save_cat(lvl2))
# sub categories like RENAULT, DACIA
for lvl3 in lvl2['children_data']:
asyncio.create_task(save_cat(lvl3))
for lvl4 in lvl3['children_data']:
asyncio.create_task(save_cat(lvl4))
for lvl5 in lvl4['children_data']:
asyncio.create_task(save_cat(lvl5))
My save() function is async. I'm not sure it should be. Before I started using async, it was working.
async def save_cat(cat):
cat_id = cat['id']
new_cat = Category()
new_cat.id = cat_id
new_cat.name = cat.get('name', None)
new_cat.parent = cat.get('parent_id', None)
new_cat.url = cat.get('path', None)
new_cat.is_active = cat.get('is_active', None)
new_cat.position = cat.get('position', None)
new_cat.level = cat.get('level', None)
new_cat.save()
When I run, no error. The context is well sent to the template and displays well. But no category is saved.
I also tried to make a task list with asyncio.create_task in each level and execute the loop at the end of parse_categories() like said in this thread, without success.
all_tasks.append(asyncio.create_task(save_cat(lvl1)))
[...]
responses = asyncio.gather(*all_tasks, return_exceptions=True)
loop = asyncio.get_event_loop()
loop.run_until_complete(responses)
loop.close()
Any clue to solve my case will be welcome

Related

How to optimize a server request that has to send back several items with dynamic attributes that need to be calculated every time a user requests it?

I have an Angular UI app connecting to a Django API that uses GraphQL (using Graphene) and Postgres for DB.
My application has many courses and each course can have several chapters. The users signing in can see access courses and not others because a course could have a prerequisite. So they will see a course listed but it will be "locked" for them and a message will say that they need to complete the particular prerequisite before it can be accessed. Like this, we need some other attributes to be sent along with the list of courses:-
'locked' - Boolean - whether a course is locked for the current logged-in user or not.
'status' - ENUM - PENDING/SUBMITTED/GRADED/RETURNED/FLAGGED
'completed' - Boolean - whether the course is completed or not
When a user requests the list of courses, these 3 attributes are calculated for each item in the list before it is compiled and sent back to the user.
And this is done for each of the chapters inside the course too. And the chapter might contain upto 30 chapters or so. So this really takes a LOT of time!
I've implemented caching as well, but because these values change often (eg. when the user completes a chapter) they are constantly invalidated and it doesn't make sense to keep these attributes server-side cached to begin with.
Here's the code for how the chapters are processed for the query for list of chapters:-
#login_required
#user_passes_test(lambda user: has_access(user, RESOURCES['CHAPTER'], ACTIONS['LIST']))
def resolve_chapters(root, info, course_id=None, searchField=None, limit=None, offset=None, **kwargs):
current_user = info.context.user
# Checking if this is cached
cache_entity = CHAPTER_CACHE[0]
cache_key = generate_chapters_cache_key(cache_entity, searchField, limit, offset, course_id, current_user)
cached_response = fetch_cache(cache_entity, cache_key)
if cached_response:
return cached_response
# If not cached...
qs = rows_accessible(current_user, RESOURCES['CHAPTER'], {'course_id': course_id})
if searchField is not None:
filter = (
Q(searchField__icontains=searchField.lower())
)
qs = qs.filter(filter)
if offset is not None:
qs = qs[offset:]
if limit is not None:
qs = qs[:limit]
set_cache(cache_entity, cache_key, qs)
return qs
And I'm using this code to dynamically insert the three attributes into each item in the list of chapters that the above code returns:-
class ChapterType(DjangoObjectType):
completed = graphene.Boolean()
completion_status = graphene.String()
locked = graphene.String()
def resolve_completed(self, info):
user = info.context.user
completed = CompletedChapters.objects.filter(participant_id=user.id, chapter_id=self.id).exists()
return completed
def resolve_completion_status(self, info):
user = info.context.user
status = ExerciseSubmission.StatusChoices.PENDING
try:
completed = CompletedChapters.objects.get(participant_id=user.id, chapter_id=self.id)
status = completed.status
except:
pass
return status
def resolve_locked(self, info):
user = info.context.user
locked = is_chapter_locked(user, self)
return locked
class Meta:
model = Chapter
And the method is_chapter_locked() is quite complex in itself:-
def is_chapter_locked(user, chapter):
locked = None
# Letting the user see it if they are a grader
user_role = user.role.name;
grader = user_role == USER_ROLES_NAMES['GRADER']
# Checking if the user is the author of the course or a grader
if chapter.course.instructor.id == user.id or grader:
# If yes, we mark it as unlocked
return locked
course_locked = is_course_locked(user, chapter.course) # Checking if this belongs to a course that is locked
if course_locked:
# If the course is locked, we immediately return locked is true
locked = 'This chapter is locked for you'
return locked
# If the course is unlocked we
completed_chapters = CompletedChapters.objects.all().filter(participant_id=user.id)
required_chapters = MandatoryChapters.objects.all().filter(chapter_id=chapter.id)
required_chapter_ids = required_chapters.values_list('requirement_id',flat=True)
completed_chapter_ids = completed_chapters.values_list('chapter_id',flat=True)
pending_chapter_ids = []
for id in required_chapter_ids:
if id not in completed_chapter_ids:
pending_chapter_ids.append(id)
if pending_chapter_ids:
locked = 'To view this chapter, you must have completed '
pending_chapters_list = ''
for id in pending_chapter_ids:
try:
chapter= Chapter.objects.get(pk=id, active=True)
if pending_chapters_list != '':
pending_chapters_list += ', '
pending_chapters_list += '"' + str(chapter.section.index) +'.'+str(chapter.index)+'. '+chapter.title +'"'
except:
pass
locked += pending_chapters_list
return locked
As can be seen, there is a lot of dynamic processing that is done for fetching the list of chapters. And this is taking a considerably long time, even with caching of the query from the database before the dynamic attributes are calculated.
I am looking for strategies to minimize the dynamic calculation. What kind of an approach works best for performance optimizations in situations like this?
Thank you.

How to use session timeout in django rest view?

I am implementing a view for a game using Django REST's APIView. I am very new to Django and have never done this before so I'm not sure how to implement this.
The main idea is that a game only lasts 5 minutes. I am sending a resource to the user and creating a session object. This view. should be unavailable after 5 minutes. Is there such a thing as a view timeout?
Will the session timeout then work for the post request as well or do I need to implement it there as well?
This is my view:
The out commented code at the end is what I was thinking of doing. Can I even do it in the view directly? How else can I do this and test it?
views.py
class GameView(APIView):
"""
API View that retrieves the game,
retrieves an game round as well as a random resource per round
allows users to post tags that are verified and saved accordingly to either the Tag or Tagging table
"""
def get(self, request, *args, **kwargs):
current_score = 0
if not isinstance(request.user, CustomUser):
current_user_id = 1
else:
current_user_id = request.user.pk
random_resource = Resource.objects.all().order_by('?').first()
resource_serializer = ResourceSerializer(random_resource)
gameround = Gameround.objects.create(user_id=current_user_id,
gamesession=gamesession,
created=datetime.now(),
score=current_score)
gameround_serializer = GameroundSerializer(gameround)
return Response({'resource': resource_serializer.data,
'gameround': gameround_serializer.data,
})
# TODO: handle timeout after 5 min!
# now = timezone.now()
# end_of_game = start_time + timezone.timedelta(minutes=5)
# if :
# return Response({'resource': resource_serializer.data, 'gameround': gameround_serializer.data,})
# else:
# return Response(status=status.HTTP_408_REQUEST_TIMEOUT)
*Testing the out commented code in Postman always leads to a 408_request_timeout.

Django function for views takes too long

I'm currently using a Docker & Django setup. I have to fill a database with data from API requests. I was hoping to do this everytime you went on a certain page (pretty easy: just have your views.py call the function that fills the database and voila).
But the problem is, the function takes a long time, several minutes from within django (and about half the time with Spyder).
So I usually just get a TimeOut and the page never loads (I admit I have a lot of API requests being made).
I've read some stuff on using Celery but am not quite sure how it's supposed to work.
Anyone know how I could get around this to be able to load the database?
Edit: some code
Views.py
def index(request):
fill_db()
context = {}
context['segment'] = 'index'
html_template = loader.get_template( 'index.html' )
return HttpResponse(html_template.render(context, request))
fill_db function
def fill_db():
fill_agencies()
fill_companies()
fill_contracts()
fill_orders()
fill_projects()
fill_resources()
Example of a fill function:
r = pip._vendor.requests.get(BASE_URL+EXTENSION,auth=(USER,PASS))
data0 = json.loads(r.text)
conn = sqlite3.connect('/app/database.sqlite3')
c = conn.cursor()
for client in data0['data']:
BoondID = client['id']
name = client['attributes']['name']
expertiseArea = client['attributes']['expertiseArea']
town = client['attributes']['town']
country = client['attributes']['country']
mainManager = client['relationships']['mainManager']['data']['id']
values = (BoondID, name, expertiseArea, town, country, mainManager)
c.execute("INSERT OR REPLACE INTO COMPANIES (BoondID,name,expertiseArea,town,country,mainManager) VALUES (?,?,?,?,?,?);", values)
conn.commit()
conn.close()
Solved.
I used python's threading library.
I defined
agencies_thread = threading.Thread(target=fill_agencies, name="Database Updater")
and called agencies_thread.start() inside my views function.
This works fine.

Is there a method to create a persistent variable in my Django view function?

I'm currently grabbing a variable 'my_var' via GET when my view function is called, as shown. I need to query by this variable in my POST method.
def article_delete_view(request, pk):
my_var = request.GET.get('my_var') #GET THE VARIABLE from referring data-url
obj = Listing.objects.get(type=type) # I could query here if I wanted (illustration only)
article = get_object_or_404(Article, pk=pk)
data = dict()
if request.method == 'POST':
article.delete()
data['form_is_valid'] = True
articles = Article.objects.all()
obj = Listing.objects.get(name=my_var) #THIS DOES NOT WORK, since 'type' is not passed on submit.
context = {'articles':articles, 'obj':obj}
data['article_table'] = render_to_string('article_table.html', context)
else:
context = {'article':article}
data['html_form'] = render_to_string('article_delete.html', context, request=request)
return JsonResponse(data)
Is there a best-practice way to make that variable persist when POST is called on a submit? I know declaring a global is a bad idea. Thought about writing it to memory (attached to article) but that feels like a hack. Appending it to the pk argument then splitting it out feels even worse. Yuck. I could get my <input> method to pass my_var but to do so I'd have to refactor a lot of other things to make it happen. Are django session variables the answer here? Any perspective is helpful. Thanks in advance.
UPDATE & SOLUTION:
So to contextualize, the issue here is that I want to query a specific model instance within a view that does not have that model instance info (pk or id) passed to it as an argument. I can GET that variable through the data-url that fires the view, but only once. It was not usable in POST, or on any subsequent call to article_delete_view.
Imagine you're trying to make a peanut butter sandwich, but you can only retrieve one item at a time from the cupboard. You get the peanut butter on the first visit to the cupboard, but when you open the cupboard again to get the bread, the peanut butter disappears. Session variables turned out to be a solution for saving the peanut butter, you know, like a little shelf. A rolling wheelie shelf. Anyway:
def article_delete_view(request, pk):
my_var = request.GET.get('my_var')
if my_var is not None: #This prevents my_var from being lost subsequent function calls
request.session['session_var'] = my_var = request.GET.get('my_var') # create session variable
else:
pass
article = get_object_or_404(Article, pk=pk)
data = dict()
if request.method == 'POST':
article.delete()
data['form_is_valid'] = True
articles = Article.objects.all()
session_var = request.session.get('session_var', None) # grab the session variable here
obj = Listing.objects.get(name=session_var) # NOW THIS WORKS!
context = {'articles':articles, 'obj':obj}
data['article_table'] = render_to_string('article_table.html', context)
else:
context = {'article':article}
data['html_form'] = render_to_string('article_delete.html', context, request=request)
return JsonResponse(data)
The if that follows getting my_var prevents the session variable from being written as null on each subsequent call to the view, since GET only gets the variable the first time. If anyone is curious where that session_var is actually coming from, here's the source (obj.slug):
<button type="button" data-url="{% url 'article_delete' article.id %}?my_var={{obj.slug}}">Name</button>
I'm using GET to grab it from a template where it's in scope, so I can query with it in article_delete_view. Passing it in as a URL parameter did not work, since that pattern doesn't match anything in my app. So far this is working great. If anybody has any other ideas and/or can tell me why this is a bad idea, please post.

mongoengine know when to delete document

New to django. I'm doing my best to implement CRUD using Django, mongodb, and mongoengine. I'm able to query the database and render my page with the correct information from the database. I'm also able to change some document fields using javascript and do an Ajax POST back to the original Django View class with the correct csrf token.
The data payload I'm sending back and forth is a list of each Document Model (VirtualPageModel) serialized to json (each element contains ObjectId string along with the other specific fields from the Model.)
This is where it starts getting murky. In order to update the original document in my View Class post function I do an additional query using the object id and loop through the dictionary items, setting the respective fields each time. I then call save and any new data is pushed to the Mongo collection correctly.
I'm not sure if what I'm doing to update existing documents is correct or in the spirit of django's abstracted database operations. The deeper I get the more I feel like I'm not using some fundamental facility earlier on (provided by either django or mongoengine) and because of this I'm having to make things up further downstream.
The way my code is now I would not be able to create a new document (although that's easy enough to fix). However what I'm really curious about is how I would know when to delete a document which existed in the initial query, but was removed by the user/javascript code? Am I overthinking things and the contents of my POST should contain a list of ObjectIds to delete (sounds like a security risk although this would be an internal tool.)
I was assuming that my View Class might maintain either the original document objects (or simply ObjectIds) it queried and I could do my comparisions off of that set, but I can't seem to get that information to persist (as a class variable in VolumeSplitterView) from its inception to when I received the POST at the end.
I would appreciate if anyone could take a look at my code. It really seems like the "ease of use" facilities of Django start to break when paired with Mongo and/or a sufficiently complex Model schema which needs to be directly available to javascript as opposed to simple Forms.
I was going to use this dev work to become django battle-hardened in order to tackle a future app which will be much more complicated and important. I can hack on this thing all day and make it functional, but what I'm really interested in is anyone's experience in using Django + MongoDB + MongoEngine to implement CRUD on a Database Schema which is not vary Form-centric (think more nested metadata).
Thanks.
model.py: uses mongoengine Field types.
class MongoEncoder(JSONEncoder):
def default(self, o):
if isinstance(o, VirtualPageModel):
data_dict = (o.to_mongo()).to_dict()
if isinstance(data_dict.get('_id'), ObjectId):
data_dict.update({'_id': str(data_dict.get('_id'))})
return data_dict
else:
return JSONEncoder.default(self, o)
class SubTypeModel(EmbeddedDocument):
filename = StringField(max_length=200, required=True)
page_num = IntField(required=True)
class VirtualPageModel(Document):
volume = StringField(max_length=200, required=True)
start_physical_page_num = IntField()
physical_pages = ListField(EmbeddedDocumentField(SubTypeModel),
default=list)
error_msg = ListField(StringField(),
default=list)
def save(self, *args, **kwargs):
print('In save: {}'.format(kwargs))
for k, v in kwargs.items():
if k == 'physical_pages':
self.physical_pages = []
for a_page in v:
tmp_pp = SubTypeModel()
for p_k, p_v in a_page.items():
setattr(tmp_pp, p_k, p_v)
self.physical_pages.append(tmp_pp)
else:
setattr(self, k, v)
return super(VirtualPageModel, self).save(*args, **kwargs)
views.py: My attempt at a view
class VolumeSplitterView(View):
#initial = {'key': 'value'}
template_name = 'click_model/index.html'
vol = None
start = 0
end = 20
def get(self, request, *args, **kwargs):
self.vol = self.kwargs.get('vol', None)
records = self.get_records()
records = records[self.start:self.end]
vp_json_list = []
img_filepaths = []
for vp in records:
vp_json = json.dumps(vp, cls=MongoEncoder)
vp_json_list.append(vp_json)
for pp in vp.physical_pages:
filepath = get_file_path(vp, pp.filename)
img_filepaths.append(filepath)
data_dict = {
'img_filepaths': img_filepaths,
'vp_json_list': vp_json_list
}
return render_to_response(self.template_name,
{'data_dict': data_dict},
RequestContext(request))
def get_records(self):
return VirtualPageModel.objects(volume=self.vol)
def post(self, request, *args, **kwargs):
if request.is_ajax:
vp_dict_list = json.loads(request.POST.get('data', []))
for vp_dict in vp_dict_list:
o_id = vp_dict.pop('_id')
original_doc = VirtualPageModel.objects.get(id=o_id)
try:
original_doc.save(**vp_dict)
except Exception:
print(traceback.format_exc())