Django - Optimize grouping

Django - Optimize grouping - django

I have a model:
from django.db import models
class Product(models.Model):
sku = models.IntegerField()
plu = models.CharField()
pack_type = models.ForeignKey(PackTypes, on_delete=models.CASCADE)
I need to group them into data structure:
{
< plu_1 >: {
< sku_1 >: [
< pack_type_id_1 >,
< pack_type_id_2 >,
...
],
< sku_2 >: [],
...
},
<plu_2>: {
...
}
}
The code that does it:
def dict_with_list():
return defaultdict(list)
result = defaultdict(dict_with_list)
products = Product.objects.values_list('sku', 'plu', 'pack_type_id')
for (plu, sku, pack_type_id) in products:
result[plu][sku].append(pack_type_id)
The problem with it is because there are a lot of records in model Product the code is slow (> 5 seconds).
How could I optimize the code to be faster?

You can let the database do the grouping:
from operator import itemgetter
from itertools import groupby
products = Product.objects.values_list('sku', 'plu', 'pack_type_id').order_by(
'sku', 'plu'
)
items = {
k1: {k2: list(map(itemgetter(2), v)) for k2, v in groupby(itemgetter(1), vs)}
for k1, vs in groupby(itemgetter(0), products)
}
That being said, if the amount of data is really huge, you should use some form of pagination to retrieve the data.

Related

whoosh schema in haystack and django

I am trying to integrate a whoosh searcher into a django project. I saw that you can do that using haystack but I am realizing I can't (dont know yet) how to add my custom whoosh index into the searcher. My schema has ID, KEYWORD and TEXT but they are all text in reality. I used these schemes because it suits my search needs for each of the documents. How do I use this schema in Haystack.
PS: A solution without Haystack is ok too.
Here is my whoosh schema/writer/searcher
import pandas as pd
from whoosh.index import create_in
from whoosh.fields import *
from whoosh.qparser import QueryParser
from whoosh.query import *
def nan2none(x):
y = None if pd.isna(x) else x
return(y)
df = pd.read_csv("df.csv", index_col=[0])
schema = Schema(a = ID(stored=True),
b = KEYWORD(lowercase=True),
c = TEXT,
d = KEYWORD(lowercase=True))
ix = create_in("indexdir", schema)
writer = ix.writer()
for index, row in df.iterrows():
writer.add_document(a = index,
b = nan2none(row['b']),
c = nan2none(row['c']),
d = nan2none(row['d']))
writer.commit()
search_term = "hobbit"
with ix.searcher() as searcher:
a_query = QueryParser("a", ix.schema).parse(search_term)
b_query = QueryParser("b", ix.schema).parse(search_term)
c_query = QueryParser("b", ix.schema).parse(search_term)
d_var_query = QueryParser("d", ix.schema, termclass=Variations).parse(search_term)
d_fuzz_query = QueryParser("d", ix.schema, termclass=FuzzyTerm).parse(search_term)
query = Or([a_query, b_query, c_query, d_var_query, d_fuzz_query])
results = searcher.search(query, limit=None)
print(results)
for res in results:
print(res)
But in my django model all the documents I am adding above are CharField as follows:
class ModelLetters(modes.model):
a = models.CharField(max_length=50)
b = models.CharField(max_length=100)
c = models.CharField(max_length=100)
d = models.CharField(max_length=250)
Whereas my haystack index is as follows (all CharField too):
from haystack import indexes
from appmanager.model.model_letters import ModelLetters
class LettersIndex(indexes.SearchIndex, indexes.Indexable):
text = indexes.CharField(document=True, use_template=True)
a = indexes.CharField(model_attr="a")
b = indexes.CharField(model_attr="b")
c = indexes.CharField(model_attr="c")
d = indexes.CharField(model_attr="d")
class Meta:
model = ModelLetters
fields = ["a", "b", "c", "d"]
def get_model(self):
return ModelLetters
def index_queryset(self, using=None):
"""Used when the entire index for model is updated."""
return self.get_model().objects.all()

How to auto populate data from other model and how to add calculated fields?

I am learning django and I have not been able to properly do two things within model clearance:
Within modelRetrieve the name fields that correspond to the imo number selected.
Autopopulate a date field with the current day plus 7 days.
Any ideas what I am doing wrong? Here is my code:
from django.db import models
from django.core.exceptions import ValidationError
from django.utils import timezone
from datetime import timedelta, datetime
def imo_validator(value):
if value < 0 or value > 9999999:
raise ValidationError(
'This is not a valid IMO number',
params={'value':value},
)
class ship(models.Model):
imo = models.IntegerField(unique=True,validators=[imo_validator])
name = models.CharField(max_length=20)
rpm = models.FloatField()
power = models.FloatField()
main_engine = models.IntegerField()
class Meta:
ordering = ['imo']
def __str__(self):
return "{}, (IMO:{})".format(self.name, self.imo)
class clearance(models.Model):
STATUSES = [
('PENDING','PENDING'),
('REJECTED','REJECTED'),
('APPROVED','APPROVED'),
]
PORTS = [
('PACAN','PACAN'),
('PABLB','PABLB'),
('PACCT','PACCT'),
('PAANP','PAANP'),
('PAANA','PAANA'),
]
date_of_request = models.DateField(default=timezone.now,blank=False,editable=True)
imo = models.ForeignKey(ship, on_delete=models.PROTECT)
port = models.CharField(max_length=20,null=True,choices=PORTS)
eta = models.DateField(null=False)
name = ship.name.get(imo=imo)
calculated_eta = models.DateField(datetime.today + timedelta(days=1))
aduanas = models.FileField(blank=True)
aduanas_ok = models.CharField(max_length=15,default='PENDING',choices=STATUSES,editable=False)
minsa = models.FileField(blank=True)
minsa_ok = models.CharField(max_length=15,default='PENDING',choices=STATUSES,editable=False)
def __str__(self):
return "{}, ETA:{}".format(self.imo, self.eta)
class Meta:
ordering = ['eta']

To add a default to a DateField that is 7 days in the future you need to create a function that returns the date 7 days in the future and then pass that to the "default" parameter of the field
def seven_days_from_now():
return datetime.date.today() + datetime.timedelta(days=7)
class clearance(models.Model):
...
calculated_eta = models.DateField(default=seven_days_from_now)
...
Your "name" field should be a property that returns the name of the associated "imo"
class clearance(models.Model):
...
#property
def name(self):
return self.imo.name
...

My django keep looping the code when runserver

Sorry if the question is really newbie, just learn programming
i put this in my models.py
from django.db import models
from . import func
class Materials(models.Model):
typeId = models.IntegerField()
typeName = models.CharField(max_length=250)
price = models.FloatField()
updated = models.DateTimeField(auto_now=True)
class Meta:
ordering = ('-typeId')
def __str__(Self):
return self.typeId
def insert_data_once():
rows = func.material_id()
for row in rows:
data = Materials(typeId = row[0], typeName = row[1], price = func.sell_min(row[0]))
data.save()
insert_data_once()
here is func.py
import requests
from xml.etree import ElementTree
import sqlite3
def material_id():
conn = sqlite3.connect('eve.db')
command = 'SELECT DISTINCT invTypeMaterials.materialTypeID, invTypes.typeName FROM invTypeMaterials ' \
'INNER JOIN invTypes ON invTypeMaterials.materialTypeID = invTypes.typeID ' \
'WHERE invTypes.Published = 1'
result = conn.execute(command)
rows = result.fetchall()
return rows
def sell_min(type_id):
URL = 'https://api.evemarketer.com/ec/marketstat?typeid=' + str(
type_id) + '&regionlimit=10000002&usesystem=30000142'
minerals_price = requests.get(URL)
root = ElementTree.fromstring(minerals_price.content)
for child in root[0][0][1].iter('min'):
sell_min = child.text
return float(sell_min)
where i should run the insert_data_once function in models.py, the fuction keep looping and cant run manage.py runserver
thank you

How to group result by query in orm

Can i group results on the basic of field in User model.
models.py
class User(AbstractUser):
USERTYPE = (('1','type_1'),('2','type_2'),....)
user_type = models.CharField(max_length=2,choices=USERTYPE)
.....
views.py
User.object.all().values('first_name','last_name')
How can i get all users data with groupby there type in below format by using django ORM query only..
{
"type_1":[
{
"first_name":"abc",
"last_name":"xzy"
},
{
"first_name":"abcd",
"last_name":"wxzy"
}
],
"type_2":[
{
"first_name":"abcdd",
"last_name":"xzddy"
},
{
"first_name":"absdcd",
"last_name":"wxsdzy"
}
]
}

You can not do that in a query itself. But you can do some post-processing on the query:
from itertools import groupby
from operator import methodcaller
qs = User.object.order_by('user_type')
result = {
k: [{ 'first_name': u.first_name, 'last_name': u.last_name } for u in us]
for k, us in groupby(qs, methodcaller('get_user_type_display'))
}
We here thus order the queryset by the user_type field, and then let groupby(..) make groups based on the get_user_type_display() method. Then we can make sublists for each such group.

Django DRF - Group by date

I need to group the result of a queryset by date on DRF
""" Django model """
class Matches(models.Model):
name = models.CharField(max_length=100)
platform = models.CharField(max_length=100)
mode = models.CharField(max_length=100)
kills = models.IntegerField()
date = models.DateTimeField()
""" Serializer """
class MatchesSerializer(serializers.ModelSerializer):
class Meta:
model = models.Matches
fields = ('name', 'platform', 'mode', 'kills', 'date')
""" views """
class Matches(generics.ListAPIView):
serializer_class = serializers.MatchesSerializer
filter_backends = (filters.OrderingFilter,)
lookup_field = 'name'
ordering = ('-date',)
def get_queryset(self):
username = self.kwargs['name']
return models.Matches.objects.filter(name=username)
Desired output (just an example):
[
{
'date':'2019-01-01',
'data':[
{
'platform':'ps4',
'mode':'solo',
'kills':10,
'date':'2019-01-01 10:00:00'
},
{
'platform':'ps4',
'mode':'duo',
'kills':10,
'date':'2019-01-01 12:00:00'
},
{
'platform':'ps4',
'mode':'squad',
'kills':10,
'date':'2019-01-01 11:00:00'
},
]
},
{
'date':'2019-01-02',
'data':[
{
'platform':'ps4',
'mode':'solo',
'kills':1,
'date':'2019-01-02 10:00:00'
},
{
'platform':'ps4',
'mode':'duo',
'kills':2,
'date':'2019-01-02 12:00:00'
},
{
'platform':'ps4',
'mode':'squad',
'kills':3,
'date':'2019-01-02 11:00:00'
},
]
}
]
For me, the easy solution is to make a raw querie on django and create a serializer, but it feels not so pythonic...
So it appears that DRF has some beautiful way to make it look nice, maybe using to_representation...

I used the itertools.groupby iterator. Check my code below.
from itertools import groupby
events = [["2020-04-01", "A"], ["2020-04-01", "B"], ["2020-04-02", "C"], ["2020-04-02", "D"]]
for k, g in groupby(events, lambda x: x[0]):
list = []
for group in g:
list.append(group[1])
output[k] = list
The output will be grouped by date as follows
{'2020-04-01': ['A', 'B'], '2020-04-02': ['C', 'D']}
Please make sure your order by date first.

You can use the function raw from Django ORM
results = Matches.objects.raw('SELECT * FROM myapp_matches GROUP BY date')

We Keep Coding

c++ django amazon-web-services regex python-2.7 google-cloud-platform list unit-testing opengl ember.js

Django - Optimize grouping - django

Related

whoosh schema in haystack and django

How to auto populate data from other model and how to add calculated fields?

My django keep looping the code when runserver

How to group result by query in orm

Django DRF - Group by date

Categories

Resources