How to create a dataframe from a model (Django) - django

I am developing an app in Django.
I have this model
class my_model(models.Model):
Field_A = models.CharField(max_length=256, blank=True, null=True)
Field_B = models.CharField(max_length=25, blank=True, null=True)
Field_C = models.TextField(blank=True, null=True)
I want to create a dataframe with columns names equal to model fields names,
containing in every row the model objects, and in every column the object fields values.
How can I do it? Is there a single command to do it? Or do I have to iterate?
EDIT: Here is the raw and unelegant solution that I found until now:
import pandas as pd
entries = my_model.objects.all()
# this generates an array containing the names of the model fields
columns_names = [field.name for field in my_model._meta.get_fields()]
L_GI = len(entries)
# generate empty dataframe
GI = pd.DataFrame(columns = columns_names)
for element in entries:
new_entry = {"Field_A":element.Field_A, "Field_B":element.Field_B, "Field_C":element.Field_C}
GI = GI.append(new_entry, ignore_index=True)
I bet there is a faster way that avoids iteration. Any suggestions?

Nice question
I think you are forced to iterate them.
I have implemented it in 3 different ways so that you can choose your favorite one
import time
import pandas as pd
from django.core import serializers
class PandasModelMixin(models.Model):
class Meta:
abstract = True
#classmethod
def as_dataframe(cls, queryset=None, field_list=None):
t1 = time.time()
if queryset is None:
queryset = cls.objects.all()
if field_list is None:
field_list = [_field.name for _field in cls._meta._get_fields(reverse=False)]
data = []
[data.append([obj.serializable_value(column) for column in field_list]) for obj in queryset]
columns = field_list
df = pd.DataFrame(data, columns=columns)
print("Execution time without serialization: %s" % time.time()-t1)
return df
#classmethod
def as_dataframe_using_django_serializer(cls, queryset=None):
t1 = time.time()
if queryset is None:
queryset = cls.objects.all()
if queryset.exists():
serialized_models = serializers.serialize(format='python', queryset=queryset)
serialized_objects = [s['fields'] for s in serialized_models]
data = [x.values() for x in serialized_objects]
columns = serialized_objects[0].keys()
df = pd.DataFrame(data, columns=columns)
df = pd.DataFrame()
print("Execution time using Django serializer: %s" % time.time()-t1)
return df
#classmethod
def as_dataframe_using_drf_serializer(cls, queryset=None, drf_serializer=None, field_list=None):
from rest_framework import serializers
t1 = time.time()
if queryset is None:
queryset = cls.objects.all()
if drf_serializer is None:
class CustomModelSerializer(serializers.ModelSerializer):
class Meta:
model = cls
fields = field_list or '__all__'
drf_serializer = CustomModelSerializer
serialized_objects = drf_serializer(queryset, many=True).data
data = [x.values() for x in serialized_objects]
columns = drf_serializer().get_fields().keys()
df = pd.DataFrame(data, columns=columns)
print("Execution time using DjangoRestFramework serializer: %s" % time.time()-t1)
return df
So inherit your Model in this way:
class MyModel(PandasModelMixin):
field_a = models.CharField(max_length=256, blank=True, null=True)
field_b = models.CharField(max_length=25, blank=True, null=True)
field_c = models.TextField(blank=True, null=True)
and try the code in this way:
>> MyModel.as_dataframe()
>> MyModel.as_dataframe_using_django_serializer()
>> MyModel.as_dataframe_using_drf_serializer()
I have tried my code using a Model with 450 instances and 15 columns and I had these results:
Execution time without serialization: 0.07040905952453613
Execution time using Django serializer: 0.07644820213317871
Execution time using DjangoRestFramework serializer: 0.12314629554748535
N.B.
I'm using Django 2.2 and Python 3.6.5

Related

DRF Get likes and dislikes grouped by day

I have a models named Post and Like. How can i get json with ount of likes and dislikes grouped by date (date field in Like model)?
Here is my models.py
class Post(models.Model):
"""Post model"""
author = models.ForeignKey(User, on_delete=models.CASCADE)
title = models.CharField(max_length=255)
body = models.TextField()
created_at = models.DateTimeField(auto_now_add=True)
def __str__(self):
return self.title
class Like(models.Model):
"""Like model"""
LIKE = (
('like', 'like'),
('dislike', 'dislike')
)
user = models.ForeignKey(User, on_delete=models.CASCADE)
post = models.ForeignKey(Post, on_delete=models.CASCADE, related_name='likes')
like = models.CharField(max_length=255, choices=LIKE)
date = models.DateField(auto_now=True)
Here is my serializers.py:
class AnaliticsSerializer(serializers.ModelSerializer):
"""Like analitic"""
class Meta:
model = Like
fields = '__all__'
Here is my vievs.py:
class AnaliticView(ListAPIView):
queryset = Like.objects.all()
serializer_class = AnaliticsSerializer
filter_backends = [DjangoFilterBackend]
filter_fields = ['date']
result what i want
[
{
"date": "2020-11-14",
"total_likes": 25,
"total_dislikes": 17
},
{
"date": "2020-11-15",
"total_likes": 38,
"total_dislikes": 8
},
{
"date": "2020-11-18",
"total_likes": 11,
"total_dislikes": 0
}
Here's a working example of one basic approach to this. It should give you the idea.
The analytics code shouldn't really be in the view. Also, some of the grouping and counting might be offloaded to the database, using advanced ORM querying like this.
views.py
from collections import Counter
from datetime import datetime, timedelta
from itertools import groupby
from django_filters import rest_framework as filters
from rest_framework.generics import GenericAPIView
from rest_framework.response import Response
from likes.filters import DateRangeFilterSet
from likes.models import Like
class AnaliticView(GenericAPIView):
queryset = Like.objects.all()
filter_backends = (filters.DjangoFilterBackend,)
filterset_class = DateRangeFilterSet
def get(self, request, format=None):
queryset = self.get_queryset()
filtered_queryset = self.filter_queryset(queryset)
# Queryset needs to be ordered by date for groupby to work correctly
ordered_queryset = filtered_queryset.order_by('date')
likes_by_date = groupby(ordered_queryset,
lambda like: like.date.strftime("%Y-%m-%d"))
analytics = []
for date, likes in likes_by_date:
count = Counter(like.like for like in likes)
analytics.append(
{
'date': date,
'total_likes': count['like'],
'total_dislikes': count['dislike'],
}
)
return Response(analytics)
Like I said in the comments, it would be possible to create a lightweight class with attributes for date and the two totals, and pass a list of instances of that to a serializer to get the response data. In my opinion, that's overkill as you can just build a dictionary that is easily serialised into JSON.
Update:
I've switched to a GenericAPIView, which is a superclass of ListAPIView, because it supports filter backends. I have added a FilterSet that filters by date_from and date_to:
filters.py
from django_filters import rest_framework as filters
from likes import models
class DateRangeFilterSet(filters.FilterSet):
date_from = filters.DateFilter(field_name='date', lookup_expr='gte')
date_to = filters.DateFilter(field_name='date', lookup_expr='lte')
class Meta:
model = models.Like
fields = ('date_from', 'date_to')

Django FilterSet with distinct AND order_by

I want to use django-filter to create a FilerSet with distinct results for field A (Minutes.case), ordered by field B (case__case_filed_date). Database is PostgreSQL.
Commented lines in class MinutesListView are things I've tried (in various configurations).
models.py
class Case(models.Model):
case_filed_date = models.DateField()
case_number = models.CharField(max_length=25, unique=True)
as_of_timestamp = models.DateTimeField()
def __str__(self):
return self.case_number
class Minutes(models.Model):
case = models.ForeignKey(Case, on_delete=models.CASCADE)
minute_entry_text = models.TextField(blank=True, null=True)
minute_entry_date = models.DateField(blank=True, null=True)
minute_entry_type_text = models.CharField(max_length=255, blank=True, null=True)
filters.py
class MinuteFilterSet(df.FilterSet):
case__case_number = df.CharFilter(lookup_expr='icontains', label='Case Number', distinct=True)
minute_entry_text = df.CharFilter(lookup_expr='icontains', label='Minutes Text')
class Meta:
model = Minutes
fields = ['minute_entry_text']
order_by = ['-case__case_filed_date']
views.py:
class FilteredListView(ListView):
filterset_class = None
def get_queryset(self):
# Get the queryset however you usually would. For example:
queryset = super().get_queryset()
# Then use the query parameters and the queryset to
# instantiate a filterset and save it as an attribute
# on the view instance for later.
self.filterset = self.filterset_class(self.request.GET, queryset=queryset)
# Return the filtered queryset
return self.filterset.qs
def get_context_data(self, **kwargs):
context = super().get_context_data(**kwargs)
# Pass the filterset to the template - it provides the form.
context['filterset'] = self.filterset
return context
class MinutesListView(FilteredListView):
filterset_class = filters.MinuteFilterSet
paginate_by = 25
# ordering = '-case__case_filed_date'
# queryset = Minutes.objects.all()
queryset = Minutes.objects.distinct('case')
The current code shows distinct, unordered results. When I'm able to get ordered results, the cases (case_number) is duplicated. I've read the docs regarding distinct() with order_by() in Django/PostGreSQL but I'm still missing something.

django-elasticsearch-dsl unable to filter with mulitiple model fields

models.py
class PostJob(models.Model):
job_title = models.CharField(max_length=256)
job_description = models.TextField()
key_skills = models.TextField()
def __str__(self):
return self.job_title
documents.py
from django_elasticsearch_dsl import DocType, Index
from .models import PostJob
jobs = Index('jobs')
#jobs.doc_type
class JobsDocument(DocType):
class Meta:
model = PostJob
fields = [
'job_title',
'job_description',
'key_skills',
]
views.py
from .documents import JobsDocument
def search_jobs(request):
q = request.GET.get('q')
if q is None:
return JsonResponse({"code":500,"msg":"query sting not found"})
if q:
Q = JobsDocument.search().query
jobs = Q("match", key_skills=q) or Q("match", job_title=q)
lst=[]
dict ={}
for i in jobs:
dict["job_title"] = i.job_title
dict["description"] = i.job_description
dict["key_skills"] = i.key_skills
lst.append(dict.copy())
return JsonResponse(lst,safe=False)
in django using 'django-elasticsearch-dsl' i am trying to search with multiple model fields.
here i wants to filter with multiple fields with key_skills and job_title
but it is coming with only key_skills but doesn't matches with job_description
for job_title if my job_title job python developer it is not coming if i am searching only developer. it is coming when i am searching python developer completely with white space
Please have a look into it..

Django Complex Query: combined output

models.py
class DeviceType(models.Model):
device_type = models.CharField(max_length=200,unique=True)
def __str__(self):
return self.device_type
class Device(models.Model):
device_type = models.ForeignKey(DeviceType,to_field='device_type')
serial_number = models.CharField(max_length=200,unique=True)
in_use_by = models.ForeignKey(User,to_field='username')
brand = models.CharField(max_length=200,default="-", null=False)
model = models.CharField(max_length=200,default="-", null=False)
type_number = models.CharField(max_length=200,blank=True,null=True)
mac_address = models.CharField(max_length=200,blank=True,null=True)
Above is my models.py file. I want to write a query such that I get the output in the following format:
device_type-serial_number-model-brand
Processing in Python
We can generate a list containing such strings and let Python construct these strings as follows:
list(map(
'-'.join,
Devices.objects.values_list(
'device_type__device_type',
'serial_number',
'model',
'brand'
)
)
This will return a list of strings.
Processing at the database
We can also perform the concatenation at the database, and then we have a queryset of strings:
from django.db.models import F, Value
from django.db.models.functions import Concat
Devices.objects.annotate(
text=Concat(
F('device_type__device_type'),
Value('-'),
F('serial_number'),
Value('-'),
F('model'),
Value('-'),
F('brand')
)
).values_list('text', flat=True)

get dictionary from queryset

I have below query where I want to fetch 'specs' which is a dictionary but the type of envi_dict is a Queryset class. How do I fetch the dictionary from this queryset? Any help is much appreciated.
envi_dict = Environment.objects.values('specs')
Result
<QuerySet [({u'CPU Model': u'Dell', u'RAM': 1000, u'CPU': 400},), ({u'CPU Model': u'Dell', u'RAM': 1000, u'CPU': 400},)]>, <class 'django.db.models.query.QuerySet'>, )
I tried Environment.objects.filter(title=item.title).values('specs') and also Environment.objects.get('specs') but I am still getting a queryset.
Edit: Below is models.py
class CommonModel(models.Model):
author = models.ForeignKey('auth.User',)
title = models.CharField(max_length=400)
comments = models.TextField(blank=True)
requirements = JSONField(default = {})
specs = JSONField(default= {})
created_date = models.DateTimeField(default=timezone.now)
updated_date = models.DateTimeField(blank=True, null=True)
class Meta:
abstract = True
def update(self):
self.updated_date = timezone.now()
self.save()
def __str__(self):
return self.title
class Estimate(CommonModel):
gp_code = models.TextField(default='Unknown')
inputs = models.TextField(blank=True)
...
def __str__(self):
return self.title
class Environment(CommonModel):
estimate = models.ForeignKey(Estimate,related_name='environments')
logic = PythonCodeField(blank=True, null=True)
...
Build a list of dicts with model instance.title as key and the specs as value by iterating over all Environment model instances.
[{i.title: i.specs} for i in Environment.objects.all()]
Use Django model_to_dict
If you need to convert a single queryset into a dictionary, use
model_to_dict.
If you need to convert all querysets into a dictionary use Model.objects.values() or django.core.serializer
using model_to_dict
from django.forms.models import model_to_dict
qs = Environment.objects.filter(title=item.title)
if qs.exists():
qs_dict = model_to_dict(qs) # {id:1,'estimate':'some-estimate-data','logic':'some-logic-data'}
# Do something here with qs_dict
else:
# qs=None -- do some here when qs is not found