Django - Optimize grouping - django

I have a model:
from django.db import models
class Product(models.Model):
sku = models.IntegerField()
plu = models.CharField()
pack_type = models.ForeignKey(PackTypes, on_delete=models.CASCADE)
I need to group them into data structure:
{
< plu_1 >: {
< sku_1 >: [
< pack_type_id_1 >,
< pack_type_id_2 >,
...
],
< sku_2 >: [],
...
},
<plu_2>: {
...
}
}
The code that does it:
def dict_with_list():
return defaultdict(list)
result = defaultdict(dict_with_list)
products = Product.objects.values_list('sku', 'plu', 'pack_type_id')
for (plu, sku, pack_type_id) in products:
result[plu][sku].append(pack_type_id)
The problem with it is because there are a lot of records in model Product the code is slow (> 5 seconds).
How could I optimize the code to be faster?

You can let the database do the grouping:
from operator import itemgetter
from itertools import groupby
products = Product.objects.values_list('sku', 'plu', 'pack_type_id').order_by(
'sku', 'plu'
)
items = {
k1: {k2: list(map(itemgetter(2), v)) for k2, v in groupby(itemgetter(1), vs)}
for k1, vs in groupby(itemgetter(0), products)
}
That being said, if the amount of data is really huge, you should use some form of pagination to retrieve the data.

Related

whoosh schema in haystack and django

I am trying to integrate a whoosh searcher into a django project. I saw that you can do that using haystack but I am realizing I can't (dont know yet) how to add my custom whoosh index into the searcher. My schema has ID, KEYWORD and TEXT but they are all text in reality. I used these schemes because it suits my search needs for each of the documents. How do I use this schema in Haystack.
PS: A solution without Haystack is ok too.
Here is my whoosh schema/writer/searcher
import pandas as pd
from whoosh.index import create_in
from whoosh.fields import *
from whoosh.qparser import QueryParser
from whoosh.query import *
def nan2none(x):
y = None if pd.isna(x) else x
return(y)
df = pd.read_csv("df.csv", index_col=[0])
schema = Schema(a = ID(stored=True),
b = KEYWORD(lowercase=True),
c = TEXT,
d = KEYWORD(lowercase=True))
ix = create_in("indexdir", schema)
writer = ix.writer()
for index, row in df.iterrows():
writer.add_document(a = index,
b = nan2none(row['b']),
c = nan2none(row['c']),
d = nan2none(row['d']))
writer.commit()
search_term = "hobbit"
with ix.searcher() as searcher:
a_query = QueryParser("a", ix.schema).parse(search_term)
b_query = QueryParser("b", ix.schema).parse(search_term)
c_query = QueryParser("b", ix.schema).parse(search_term)
d_var_query = QueryParser("d", ix.schema, termclass=Variations).parse(search_term)
d_fuzz_query = QueryParser("d", ix.schema, termclass=FuzzyTerm).parse(search_term)
query = Or([a_query, b_query, c_query, d_var_query, d_fuzz_query])
results = searcher.search(query, limit=None)
print(results)
for res in results:
print(res)
But in my django model all the documents I am adding above are CharField as follows:
class ModelLetters(modes.model):
a = models.CharField(max_length=50)
b = models.CharField(max_length=100)
c = models.CharField(max_length=100)
d = models.CharField(max_length=250)
Whereas my haystack index is as follows (all CharField too):
from haystack import indexes
from appmanager.model.model_letters import ModelLetters
class LettersIndex(indexes.SearchIndex, indexes.Indexable):
text = indexes.CharField(document=True, use_template=True)
a = indexes.CharField(model_attr="a")
b = indexes.CharField(model_attr="b")
c = indexes.CharField(model_attr="c")
d = indexes.CharField(model_attr="d")
class Meta:
model = ModelLetters
fields = ["a", "b", "c", "d"]
def get_model(self):
return ModelLetters
def index_queryset(self, using=None):
"""Used when the entire index for model is updated."""
return self.get_model().objects.all()

How to auto populate data from other model and how to add calculated fields?

I am learning django and I have not been able to properly do two things within model clearance:
Within modelRetrieve the name fields that correspond to the imo number selected.
Autopopulate a date field with the current day plus 7 days.
Any ideas what I am doing wrong? Here is my code:
from django.db import models
from django.core.exceptions import ValidationError
from django.utils import timezone
from datetime import timedelta, datetime
def imo_validator(value):
if value < 0 or value > 9999999:
raise ValidationError(
'This is not a valid IMO number',
params={'value':value},
)
class ship(models.Model):
imo = models.IntegerField(unique=True,validators=[imo_validator])
name = models.CharField(max_length=20)
rpm = models.FloatField()
power = models.FloatField()
main_engine = models.IntegerField()
class Meta:
ordering = ['imo']
def __str__(self):
return "{}, (IMO:{})".format(self.name, self.imo)
class clearance(models.Model):
STATUSES = [
('PENDING','PENDING'),
('REJECTED','REJECTED'),
('APPROVED','APPROVED'),
]
PORTS = [
('PACAN','PACAN'),
('PABLB','PABLB'),
('PACCT','PACCT'),
('PAANP','PAANP'),
('PAANA','PAANA'),
]
date_of_request = models.DateField(default=timezone.now,blank=False,editable=True)
imo = models.ForeignKey(ship, on_delete=models.PROTECT)
port = models.CharField(max_length=20,null=True,choices=PORTS)
eta = models.DateField(null=False)
name = ship.name.get(imo=imo)
calculated_eta = models.DateField(datetime.today + timedelta(days=1))
aduanas = models.FileField(blank=True)
aduanas_ok = models.CharField(max_length=15,default='PENDING',choices=STATUSES,editable=False)
minsa = models.FileField(blank=True)
minsa_ok = models.CharField(max_length=15,default='PENDING',choices=STATUSES,editable=False)
def __str__(self):
return "{}, ETA:{}".format(self.imo, self.eta)
class Meta:
ordering = ['eta']
To add a default to a DateField that is 7 days in the future you need to create a function that returns the date 7 days in the future and then pass that to the "default" parameter of the field
def seven_days_from_now():
return datetime.date.today() + datetime.timedelta(days=7)
class clearance(models.Model):
...
calculated_eta = models.DateField(default=seven_days_from_now)
...
Your "name" field should be a property that returns the name of the associated "imo"
class clearance(models.Model):
...
#property
def name(self):
return self.imo.name
...

My django keep looping the code when runserver

Sorry if the question is really newbie, just learn programming
i put this in my models.py
from django.db import models
from . import func
class Materials(models.Model):
typeId = models.IntegerField()
typeName = models.CharField(max_length=250)
price = models.FloatField()
updated = models.DateTimeField(auto_now=True)
class Meta:
ordering = ('-typeId')
def __str__(Self):
return self.typeId
def insert_data_once():
rows = func.material_id()
for row in rows:
data = Materials(typeId = row[0], typeName = row[1], price = func.sell_min(row[0]))
data.save()
insert_data_once()
here is func.py
import requests
from xml.etree import ElementTree
import sqlite3
def material_id():
conn = sqlite3.connect('eve.db')
command = 'SELECT DISTINCT invTypeMaterials.materialTypeID, invTypes.typeName FROM invTypeMaterials ' \
'INNER JOIN invTypes ON invTypeMaterials.materialTypeID = invTypes.typeID ' \
'WHERE invTypes.Published = 1'
result = conn.execute(command)
rows = result.fetchall()
return rows
def sell_min(type_id):
URL = 'https://api.evemarketer.com/ec/marketstat?typeid=' + str(
type_id) + '&regionlimit=10000002&usesystem=30000142'
minerals_price = requests.get(URL)
root = ElementTree.fromstring(minerals_price.content)
for child in root[0][0][1].iter('min'):
sell_min = child.text
return float(sell_min)
where i should run the insert_data_once function in models.py, the fuction keep looping and cant run manage.py runserver
thank you

How to group result by query in orm

Can i group results on the basic of field in User model.
models.py
class User(AbstractUser):
USERTYPE = (('1','type_1'),('2','type_2'),....)
user_type = models.CharField(max_length=2,choices=USERTYPE)
.....
views.py
User.object.all().values('first_name','last_name')
How can i get all users data with groupby there type in below format by using django ORM query only..
{
"type_1":[
{
"first_name":"abc",
"last_name":"xzy"
},
{
"first_name":"abcd",
"last_name":"wxzy"
}
],
"type_2":[
{
"first_name":"abcdd",
"last_name":"xzddy"
},
{
"first_name":"absdcd",
"last_name":"wxsdzy"
}
]
}
You can not do that in a query itself. But you can do some post-processing on the query:
from itertools import groupby
from operator import methodcaller
qs = User.object.order_by('user_type')
result = {
k: [{ 'first_name': u.first_name, 'last_name': u.last_name } for u in us]
for k, us in groupby(qs, methodcaller('get_user_type_display'))
}
We here thus order the queryset by the user_type field, and then let groupby(..) make groups based on the get_user_type_display() method. Then we can make sublists for each such group.

Django DRF - Group by date

I need to group the result of a queryset by date on DRF
""" Django model """
class Matches(models.Model):
name = models.CharField(max_length=100)
platform = models.CharField(max_length=100)
mode = models.CharField(max_length=100)
kills = models.IntegerField()
date = models.DateTimeField()
""" Serializer """
class MatchesSerializer(serializers.ModelSerializer):
class Meta:
model = models.Matches
fields = ('name', 'platform', 'mode', 'kills', 'date')
""" views """
class Matches(generics.ListAPIView):
serializer_class = serializers.MatchesSerializer
filter_backends = (filters.OrderingFilter,)
lookup_field = 'name'
ordering = ('-date',)
def get_queryset(self):
username = self.kwargs['name']
return models.Matches.objects.filter(name=username)
Desired output (just an example):
[
{
'date':'2019-01-01',
'data':[
{
'platform':'ps4',
'mode':'solo',
'kills':10,
'date':'2019-01-01 10:00:00'
},
{
'platform':'ps4',
'mode':'duo',
'kills':10,
'date':'2019-01-01 12:00:00'
},
{
'platform':'ps4',
'mode':'squad',
'kills':10,
'date':'2019-01-01 11:00:00'
},
]
},
{
'date':'2019-01-02',
'data':[
{
'platform':'ps4',
'mode':'solo',
'kills':1,
'date':'2019-01-02 10:00:00'
},
{
'platform':'ps4',
'mode':'duo',
'kills':2,
'date':'2019-01-02 12:00:00'
},
{
'platform':'ps4',
'mode':'squad',
'kills':3,
'date':'2019-01-02 11:00:00'
},
]
}
]
For me, the easy solution is to make a raw querie on django and create a serializer, but it feels not so pythonic...
So it appears that DRF has some beautiful way to make it look nice, maybe using to_representation...
I used the itertools.groupby iterator. Check my code below.
from itertools import groupby
events = [["2020-04-01", "A"], ["2020-04-01", "B"], ["2020-04-02", "C"], ["2020-04-02", "D"]]
for k, g in groupby(events, lambda x: x[0]):
list = []
for group in g:
list.append(group[1])
output[k] = list
The output will be grouped by date as follows
{'2020-04-01': ['A', 'B'], '2020-04-02': ['C', 'D']}
Please make sure your order by date first.
You can use the function raw from Django ORM
results = Matches.objects.raw('SELECT * FROM myapp_matches GROUP BY date')