I'm making a django app that calculates popularity from google trends.
I tried to annotate the calculated values from graph data.
However, it took about 5 seconds to execute this queryset. (Using sqlite3, postgresql)
I have a few questions here.
I wonder why it is so slow.
I wonder if it can be improved.
annotate code
class DefaultCalculator(BaseCalculator):
def annotate_score(self, queryset, name):
qs = super().annotate_score(queryset, 'self_score')
qs = self.annotate_std_score(qs, 'std_score')
qs = self.annotate_std2_score(qs, 'std2_score')
score = (
F('self_score') /
(F('std_score') + F('std2_score') + F('self_score'))
* 100
)
return qs.annotate(**{name: score})
def annotate_std_score(self, queryset, name):
std_score = Avg(
'standard__graph__date_points__value',
filter=Q(standard__graph__date_points__date__gte=now() - timedelta(days=self.base_days))
)
return queryset.annotate(**{name: std_score})
def annotate_std2_score(self, queryset, name):
std2_score = Avg(
'standard2__graph__date_points__value',
filter=Q(standard2__graph__date_points__date__gte=now() - timedelta(days=self.base_days))
)
return queryset.annotate(**{name: std2_score})
query
SELECT
"django_popularity_popularity"."id",
"django_popularity_popularity"."mid",
"django_popularity_popularity"."created",
"django_popularity_popularity"."updated",
"django_popularity_popularity"."title",
"django_popularity_popularity"."type",
"django_popularity_popularity"."geo",
"django_popularity_popularity"."standard_id",
"django_popularity_popularity"."standard2_id",
"django_popularity_popularity"."graph_id",
1080 AS "base_days",
AVG(
"django_popularity_dategraphpoint"."value"
) FILTER (
WHERE
"django_popularity_dategraphpoint"."date" >= 2019 - 09 - 30
) AS "score",
AVG(T6."value") FILTER (
WHERE
T6."date" >= 2019 - 09 - 30
) AS "std_score",
AVG(T9."value") FILTER (
WHERE
T9."date" >= 2019 - 09 - 30
) AS "std2_score"
FROM
"django_popularity_popularity"
INNER JOIN "django_popularity_graph" ON (
"django_popularity_popularity"."graph_id" = "django_popularity_graph"."id"
)
LEFT OUTER JOIN "django_popularity_dategraphpoint" ON (
"django_popularity_graph"."id" = "django_popularity_dategraphpoint"."graph_id"
)
INNER JOIN "django_popularity_standard" ON (
"django_popularity_popularity"."standard_id" = "django_popularity_standard"."id"
)
INNER JOIN "django_popularity_graph" T5 ON (
"django_popularity_standard"."graph_id" = T5."id"
)
LEFT OUTER JOIN "django_popularity_dategraphpoint" T6 ON (T5."id" = T6."graph_id")
INNER JOIN "django_popularity_standard" T7 ON (
"django_popularity_popularity"."standard2_id" = T7."id"
)
INNER JOIN "django_popularity_graph" T8 ON (T7."graph_id" = T8."id")
LEFT OUTER JOIN "django_popularity_dategraphpoint" T9 ON (T8."id" = T9."graph_id")
GROUP BY
"django_popularity_popularity"."id",
"django_popularity_popularity"."mid",
"django_popularity_popularity"."created",
"django_popularity_popularity"."updated",
"django_popularity_popularity"."title",
"django_popularity_popularity"."type",
"django_popularity_popularity"."geo",
"django_popularity_popularity"."standard_id",
"django_popularity_popularity"."standard2_id",
"django_popularity_popularity"."graph_id"
full source
Related
I have a model like this:
class Priority(models.Model):
base = models.FloatField(default=0)
job = models.JSONField()
users = models.JSONField()
and both job and users are similar.
like job = {'a':1,'b':2}, user = {'c':3,'d':4}
I want to get the sum ( base + job__a + users__c)
how can I write the filter statement,
and raw sql is fine too.
Thanks
You should accomplish this (updated):
queryset = Priority.objects.annotate(
a=Coalesce(
Cast(KeyTextTransform('a', 'job'), output_field=FloatField()),
Cast(V(0.0), output_field=FloatField())
),
c=Cast(KeyTextTransform('c', 'users'), output_field=FloatField()),
).annotate(
sum=Sum(
F('base') + F('a') + F('c'), output_field=FloatField()
)
)
for item in queryset:
print(item.sum)
I have a table TickerStatement, which contains financial statements about companies
class Statements(models.TextChoices):
"""
Supported statements
"""
capital_lease_obligations = 'capital_lease_obligations'
net_income = 'net_income'
price = 'price'
total_assets = 'total_assets'
short_term_debt = 'short_term_debt'
total_long_term_debt = 'total_long_term_debt'
total_revenue = 'total_revenue'
total_shareholder_equity = 'total_shareholder_equity'
class TickerStatement(TimeStampMixin):
"""
Model that represents ticker financial statements
"""
name = models.CharField(choices=Statements.choices, max_length=50)
fiscal_date_ending = models.DateField()
value = models.DecimalField(max_digits=MAX_DIGITS, decimal_places=DECIMAL_PLACES)
ticker = models.ForeignKey(Ticker, on_delete=models.CASCADE, null=False,
related_name='ticker_statements')
And now I'm trying to calculate a multiplier. The formula looks like:
(short_term_debt + total_long_term_debt) / total_shareholder_equity
I wrote a raw SQL query
SELECT "fin_tickerstatement"."fiscal_date_ending",
t2.equity AS "equity",
value AS "debt",
short_term_debt AS "short_term_debt",
(value + short_term_debt) / t2.equity AS "result"
FROM "fin_tickerstatement"
JOIN
(SELECT "fin_tickerstatement"."fiscal_date_ending",
fin_tickerstatement.value AS "equity"
FROM "fin_tickerstatement"
WHERE ("fin_tickerstatement"."ticker_id" = 12
AND "fin_tickerstatement"."fiscal_date_ending" >= date'2015-09-03'
AND "fin_tickerstatement"."name" = 'total_shareholder_equity')
GROUP BY "fin_tickerstatement"."fiscal_date_ending",
fin_tickerstatement.value
ORDER BY "fin_tickerstatement"."fiscal_date_ending" DESC) t2
ON fin_tickerstatement.fiscal_date_ending = t2.fiscal_date_ending
JOIN
(SELECT "fin_tickerstatement"."fiscal_date_ending",
fin_tickerstatement.value AS "short_term_debt"
FROM "fin_tickerstatement"
WHERE ("fin_tickerstatement"."ticker_id" = 12
AND "fin_tickerstatement"."fiscal_date_ending" >= date'2015-09-03'
AND "fin_tickerstatement"."name" = 'short_term_debt')
GROUP BY "fin_tickerstatement"."fiscal_date_ending",
fin_tickerstatement.value
ORDER BY "fin_tickerstatement"."fiscal_date_ending" DESC) t3
ON fin_tickerstatement.fiscal_date_ending = t3.fiscal_date_ending
WHERE ("fin_tickerstatement"."ticker_id" = 12
AND "fin_tickerstatement"."fiscal_date_ending" >= date'2015-09-03'
AND "fin_tickerstatement"."name" = 'total_long_term_debt')
GROUP BY "fin_tickerstatement"."fiscal_date_ending",
equity,
debt,
short_term_debt
ORDER BY "fin_tickerstatement"."fiscal_date_ending" DESC;
and have no idea how to translate it into Django ORM. Maybe you have some ideas or know some Django plugins that can help me.
The only way to solve this problem is to install django-query-builder.
Facing this issue file querying data from database, I am not using serializer, but using cursor in raw query on manager as you see.
view.py
#login_required
def index(request):
if not request.user.is_superuser:
raise(404)
customers = Customer.objects.Customer_List()
context = {'customers': json.dumps(customers)}
return JsonResponse(context=context, safe=False)
model.py
class CustomerListManager(models.Manager):
def Customer_List(self):
from django.db import connection
with connection.cursor() as cursor:
cursor.execute("""
select
customer_id
,last_30_days-perivous_30_days pace
,first_name
,last_name
,company
,tags
,date_created
,latest_order_date
,first_order_date
,customer_group
,store_type
,assigned_to
,total_orders
,days_active
,ofa
,ofaa
,total_spent
,average_order_value
,status
from (
select
customer_id
,last_60_days-last_30_days perivous_30_days
,last_30_days
,first_name
,last_name
,company
,tags
,date_created
,latest_order_date
,first_order_date
,customer_group
,store_type
,assigned_to
,total_orders
,days_active
,ofa
,ofaa
,total_spent
,average_order_value
,status
from
(select
c.customer_id
,c.first_name
,c.last_name
,c.company
,c.tags
,c.date_created
,max(s.date_created) latest_order_date
,min(s.date_created) first_order_date
,g.name as customer_group
,c.store_type
,a.username assigned_to
,(select count(s1.sale_id)
from sales_sale s1
where s1.customer_id =c.customer_id) total_orders
--,max(s.date_created) - min(s.date_created) days_active
,( select max(s.date_created) - min(s.date_created)
from sales_sale s
where c.customer_id=s.customer_id ) days_active
,(select (max(s.date_created) - min(s.date_created))/count(s.sale_id) OFA
from sales_sale s
where s.customer_id =c.customer_id) ofa
,(select (current_date - max(s.date_created))- ( (max(s.date_created) - min(s.date_created))/count(s.sale_id) ) OFA_threshold
from sales_sale s
where s.customer_id =c.customer_id) ofaa
,(select sum(s1.total_inc_tax) total_spent
from sales_sale s1
where s1.customer_id =c.customer_id
and status != 'Cancelled' ) total_spent
,(select count(s1.sale_id)
from sales_sale s1
where s1.customer_id = c.customer_id
and s1.date_created > current_date - interval '30' day
) last_30_days
,(select count(s1.sale_id)
from sales_sale s1
where s1.customer_id =c.customer_id
and s1.date_created > current_date - interval '60' day
) last_60_days
,(select avg(s1.total_inc_tax) average_order_value
from sales_sale s1
where s1.customer_id =c.customer_id) average_order_value
,current_date - max(s.date_created) status
from customers_customer c
left outer join customers_customergroup g on c.customer_group_id=g.id
left outer join auth_user a on c.assigned_to_id=a.id
left outer join sales_sale s on c.customer_id=s.customer_id
left outer join sales_saleitem si on s.id=si.sale_id
-- where c.customer_id =5
group by c.customer_id,c.first_name,c.last_name,c.company
,c.store_credit,c.date_created,g.name,a.username
order by c.customer_id
)total
--where total.total_order >= 1
)total2
""")
columns = [col[0] for col in cursor.description]
result = [
dict(zip(columns, row))
for row in cursor.fetchall()
]
print(result)
return result
Error:
TypeError at /customers/
Object of type 'datetime' is not JSON serializable
I am trying query data from db using cursor in manager (django model)
I am trying to add two new fields to my django product queryset and I do something like this
products = Product.objects.filter(company=company).filter(Q(name__icontains=search_term) | Q(code__icontains=search_term))
products = products.extra(select={"is_distributed": product_is_distributed_query(False)})
products = products.extra(select={"expense_type": product_expense_type_query(False)})
But the two queries (product_is_distributed_query and product_expense_type_query) are identical just returning different fields. Can I do this in only one extra(select{}) by using just one query to return both fields?
The query would be like this:
def product_is_distributed_query(use_product_code):
product_query = """
select vt.is_distributed, coalesce(et.name, '')
from (
SELECT
cp.id,
CASE WHEN SUM(coalesce(snp.id, 0)) > 0 THEN true
ELSE false
END as is_distributed,
max(coalesce(snp.expense_type_id, 0)) as maxId1
FROM
core_product AS cp
LEFT JOIN
stock_non_product AS snp
ON (cp.name = snp.name """
product_query = product_query + ("AND cp.code = snp.code" if use_product_code else "")
product_query += """
AND cp.company_id = snp.company_id AND snp.is_deleted = false)
LEFT JOIN expense_type as et ON(snp.expense_type_id = et.id AND snp.company_id = et.company_id)
WHERE
cp.id = core_product.id
AND cp.company_id = core_product.company_id
-- AND snp.company_id = core_product.company_id
GROUP BY
cp.id
) as vt
-- LEFT JOIN stock_non_product AS snp ON(snp.id = maxId)
LEFT JOIN expense_type as et ON(et.id = vt.maxId)
"""
return product_query
instead of being two separated queries that are identical to this one but return one or the other field
Let's assume that I have modeL;
class MyModel(...):
start = models.DateTimeField()
stop = models.DateTimeField(null=True, blank=True)
And I have also two records:
start=2012-01-01 7:00:00 stop=2012-01-01 14:00:00
start=2012-01-01 7:00:03 stop=2012-01-01 23:59:59
Now I want to find the second query, so start datetime should be between start and stop, and stop should have hour 23:59:59. How to bould such query?
Some more info:
I think this requires F object. I want to find all records where start -> time is between another start -> time and stop -> time, and stop -> time is 23:59:59, and date is the same like in start
YOu can use range and extra:
from django.db.models import Q
q1 = Q( start__range=(start_date_1, end_date_1) )
q1 = Q( start__range=(start_date_2, end_date_2) )
query = (''' EXTRACT(hour from end_date) = %i
and EXTRACT(minute from end_date) = %i
and EXTRACT(second from end_date) = %i''' %
(23, 59,59)
)
MyModel.objects.filter( q1 | q2).extra(where=[query])
Notice: Posted before hard answer requirement changed 'time is 23:59:59, and date is the same like in start'
To perform the query: "start datetime should be between start and stop"
MyModel.objects.filter(start__gte=obj1.start, start__lte=obj1.stop)
I don't quite understand your second condition, though. Do you want it to match only objects with hour 23:59:59, but for any day?
dt = '2012-01-01 8:00:00'
stop_hour = '23'
stop_minute = '59'
stop_sec = '59'
where = 'HOUR(stop) = %(hour)s AND MINUTE(stop) = %(minute)s AND SECOND(stop) = %(second)s' \
% {'hour': stop_hour, 'minute': stop_minute, 'seconds': stop_ec}
objects = MyModel.objects.filter(start__gte=dt, stop__lte=dt) \
.extra(where=[where])