Django count on many-to-many - django

I have the models:
class Article(models.Model):
title = models.TextField(blank=True)
keywords = models.ManyToManyField(Keyword, null=True, blank=True)
class Keyword(models.Model):
keyword = models.CharField(max_length=355, blank=True)
I want to get a count of how many articles have each keyword. In essence I want to have a list of keywords where I can get each ones count to give it a relative weighting.
I have tried:
keyword_list=Article.objects.all().annotate(key_count=Count('keywords__keyword'))
but
keyword_list[0].key_count
just seems to give me the number of different keywords each article has? Is it somehow a reverse lookup?
Any help would be much appreciated.
UPDATE
So I got it working:
def keyword_list(request):
MAX_WEIGHT = 5
keywords = Keyword.objects.order_by('keyword')
for keyword in keywords:
keyword.count = Article.objects.filter(keywords=keyword).count()
min_count = max_count = keywords[0].count
for keyword in keywords:
if keyword.count < min_count:
min_count = keyword.count
if max_count > keyword.count:
max_count = keyword.count
range = float(max_count - min_count)
if range == 0.0:
range = 1.0
for keyword in keywords:
keyword.weight = (
MAX_WEIGHT * (keyword.count - min_count) / range
)
return { 'keywords': keywords }
but the view results in a hideous number of queries. I have tried implementing the suggestions given here (thanks) but this is the only methid which seems to work at the moment. However, I must be doing something wrong as I now have 400+ queries!
UPDATE
Wooh! Finally got it working:
def keyword_list(request):
MAX_WEIGHT = 5
keywords_with_article_counts = Keyword.objects.all().annotate(count=Count('keyword_set'))
# get keywords and count limit to top 20 by count
keywords = keywords_with_article_counts.values('keyword', 'count').order_by('-count')[:20]
min_count = max_count = keywords[0]['count']
for keyword in keywords:
if keyword['count'] < min_count:
min_count = keyword['count']
if max_count < keyword['count']:
max_count = keyword['count']
range = float(max_count - min_count)
if range == 0.0:
range = 1.0
for keyword in keywords:
keyword['weight'] = int(
MAX_WEIGHT * (keyword['count'] - min_count) / range
)
return { 'keywords': keywords}

Since you want the number of articles that have each keyword, you have to do it the other way:
>>> Keyword.objects.all().annotate(article_count=models.Count('article'))[0].article_count
2

This is the same as the answer from Vebjorn Ljosa, but with a little context, where article_set is the related_name of the reverse many-to-many relationship object.
keywords_with_article_counts = Keyword.objects.all().annotate(article_count=Count('article_set'))
To illustrate your results, it would be easier to return the .values():
keywords_with_article_counts.values('keyword', 'article_count')
Which would return a list of dictionaries that would look something like this:
[{'article_count': 36, 'keyword': u'bacon'},
{'article_count': 4, 'keyword': u'unicorns'},
{'article_count': 8, 'keyword': u'python'}]

i don't know how you would do it efficiently but if you need to get it done.
keywords = Keyword.objects.all()
for keyword in keywords:
print 'Total Articles: %d' % (Article.objects.filter(keywords=keyword).count())

Related

Django filter using Q and multiple fields with different values

I am trying to generate a result that satisfies with the filter query below:
indicators = request.GET.getlist('indicators[]')
fmrprofiles = FMRPriority.objects.all()
q_objects = Q()
obj_filters = []
for indicator in indicators:
split_i = indicator.split('_')
if len(split_i) == 5:
if not any(d['indicator'] == split_i[1] for d in obj_filters):
obj_filters.append({
'indicator': split_i[1],
'scores': []
})
for o in obj_filters:
if split_i[1] == o['indicator']:
o['scores'].append(int(split_i[4]))
for obj in obj_filters:
print (obj['scores'])
q_objects.add(Q(pcindicator__id = int(obj['indicator'])) & Q(score__in=obj['scores']), Q.AND)
print (q_objects)
fmrprofiles = fmrprofiles.values('fmr__id','fmr__road_name').filter(q_objects).order_by('-fmr__date_validated')
print (fmrprofiles.query)
Basically, indicators is a list e.g. ['indicator_1_scoring_1_5', 'indicator_1_scoring_1_4', 'indicator_2_scoring_2_5']
I wanted to filter FMRPriority with these following fields:
pcindicator
score
e.g. pcindicator is equal 1 and scores selected are 5,4..another selection pcindicator is equal to 2 and scores selected are 3.
The query q_objects.add(Q(pcindicator__id = int(obj['indicator'])) & Q(score__in=obj['scores']), Q.AND) returns empty set..i have tried also the raw sql, same result.
Model:
class FMRPriority(models.Model):
fmr = models.ForeignKey(FMRProfile, verbose_name=_("FMR Project"), on_delete=models.CASCADE)
pcindicator = models.ForeignKey(PCIndicator, verbose_name=_("Priority Indicator"), on_delete=models.PROTECT)
score = models.FloatField(_("Score"))
I solve this by using OR and count the occurrence of id then exclude those are not equal to the length of filters:
for obj in obj_filters:
print (obj['scores'])
q_objects.add(
(Q(fmrpriority__pcindicator__id = int(obj['indicator'])) & Q(fmrpriority__score__in=obj['scores'])), Q.OR
)
fmrprofiles = fmrprofiles.values(*vals_to_display).filter(q_objects).annotate(
num_ins=Count('id'),
...
)).exclude(
~Q(num_ins = len(obj_filters))
).order_by('rank','road_name')

How to make sorting in DB by the number of items in the ListField?

I have the next model:
class Departments(Document):
_id = fields.ObjectIdField()
name = fields.StringField(blank=True, null=True)
department_id = fields.StringField(blank=True, null=True) # Added
list_of_users = fields.ListField(blank=True, null=True)
list_of_workstations = fields.ListField(blank=True, null=True)
As you can see list_of_users and list_of_workstations are lists of items.
I wrote a code in Python, which takes all data from DB, put it into dict and then sorts as I need, but it works too slow.
How can I sort Departments right in the DB by the length of list_of_users or list_of_workstations or by ratio of list_of_users/list_of_workstations, something like:
departments = DepartmentStats.objects.order_by(len(list_of_users)).dsc
or
departments = DepartmentStats.objects.order_by(len(list_of_users)/len(list_of_workstations)).dsc
?
For your first request, use annotation like Umut Gunebakan told you in his comment. But I'm know sure about Count() on ListField
departments = DepartmentStats.objects.all().annotate(num_list_users=Count('list_of_users')).order_by('-num_list_users')
For a desc order by, you just need to add the sign '-' (minus).
https://docs.djangoproject.com/en/1.10/ref/models/querysets/#order-by
The second request will be :
departments = DepartmentStats.objects.all().annotate(user_per_workstation=(Count('list_of_users')/Count('list_of_workstations')).order_by('-user_per_workstation')
UPDATE: (Mongoengine used)
With mongoengine you need to get item frequencies and sorted the result :
Check this part of documentation - futher aggregation
list_user_freqs = DepartmentStats.objects.item_frequencies('list_of_users', normalize=True)
from operator import itemgetter
list_user_freqs_sroted = sorted(list_user_freqs.items(), key=itemgetter(1), reverse=True)
If someone needs raw query:
departments = DepartmentStats._get_collection().aggregate([
{"$project": {
"department_id": 1,
"name": 1,
"list_of_users": 1,
}},
{"$sort": {"list_of_users": -1}},
])
and the case, when the result must be sorted by the ratio list_of_users/list_of_workstations
departments = DepartmentStats._get_collection().aggregate([
{"$project": {
"department_id": 1,
"name": 1,
"list_of_users": 1,
"len_list_of_items": {"$divide": [{"$size": "$list_of_users"},
{"$size": "$list_of_workstations"}]}
}},
{"$sort": {"len_list_of_items": -1}},
])

django ratings app , negative scoring

models.py
class Restaurant(models.Model)
food_rating = RatingField(range=2, weight=5,can_change_vote = True,allow_delete = True,allow_anonymous = True)
service_rating = RatingField(range=2, weight=5,can_change_vote = True,allow_delete = True,allow_anonymous = True)
ambience_ratiing = RatingField(range=2, weight=5,can_change_vote = True,allow_delete = True,allow_anonymous = True)
view.py code
r = Restaurant.objects.get(pk=1)
r.food_rating.add(score = -1 , user = request.user , ip_address =request.META.get('HTTP_REFERER'))
print r.food_rating.score
error
djangoratings.exceptions.InvalidRating: -1 is not a valid choice for food_rating
doubt
my food_rating field is eligible to take two scores , how am i supposed to change the score so that i can implement vote up and vote down feature , on vote up , i should be able to add 1 to the existing score and on vote down i should be able to subtract a vote , please help , thanks in advance
The problem comes from this script:
if score < 0 or score > self.field.range:
raise InvalidRating("%s is not a valid choice for %s" % (score, self.field.name))
Short answer: convert the [-x:y] interval you want to use for display, into [-x+x:y+x] in your code to avoid this problem. If you wanted [-5:5], then use [-5+5:5+5] which is [0:10]. If you wanted [-50:100] then use [-50+50:100+50] = [0:150] and so on ... It's a simple formula, that shouldn't be a problem for a programer ;)
Long answer: either you fork djangoratings, either you open an issue asking to add a setting enabling negative ratings ... and probably he'll reject it, because of the simple interval conversion workaround, here's some more concrete examples:
class Restaurant(models.Model):
# blabla :)
ambience_rating = RatingField(range=5, weight=5,can_change_vote = True,allow_delete = True,allow_anonymous = True)
def get_adjusted_ambiance_rating(self):
return self.ambience_rating - 3
So, if ambience_rating is "1" (the lowest score), get_adjusted_ambiance_rating() will return -2.
If ambience_rating is "5" (the highest score), get_ambiance_rating_with_negative() will return 2.
Adapt this example/trick to your needs.
You should probably make a single method for all ratings:
def get_adjusted_rating(self, which):
return getattr(self, '%s_rating' % which) - 3
Callable as such:
restaurant.get_adjusted_rating('ambiance')
restaurant.get_adjusted_rating('food')
# etc ...
And maybe a template filter:
#register.filter
def get_adjusted_rating(restaurant, which):
return restaurant.get_adjusted_rating(which)
Usable as such:
{{ restaurant|get_adjusted_rating:"ambiance" }}
{{ restaurant|get_adjusted_rating:"food" }}
{# etc, etc #}
More details about template filters.

Pseudo-random ordering in django queryset

Suppose I have a QuerySet that returns 10 objects, 3 of which will be displayed in the following positions:
[ display 1 position ] [ display 2 position ] [ display 3 position ]
The model representing it is as follows:
class FeaturedContent(models.Model):
image = models.URLField()
position = models.PositiveSmallIntegerField(blank=True, null=True)
where position can be either 1, 2, 3, or unspecified (Null).
I want to be able to order the QuerySet randomly EXCEPT FOR the objects with a specified position. However, I can't order it by doing:
featured_content = FeaturedContent.objects.order_by('-position', '?')
Because if I had one item that had position = 2, and all the other items were Null, then the item would appear in position 1 instead of position 2.
How would I do this ordering?
Thinking about this, perhaps it would be best to have the data as a dict instead of a list, something like:
`{'1': item or null, '2': item or null, '3': item or null, '?': [list of other items]}`
If you use a db backend that does random ordering efficiently you could do it like this:
# This will hold the result
featured_dict = {}
featured_pos = FeaturedContent.objects.filter(position__isnull=False).order_by('-position')
featured_rand = FeaturedContent.objects.filter(position__isnull=True).order_by('?')
pos_index = 0
rand_index = 0
for pos in range(1, 4):
content = None
if pos_index < len(featured_pos) and featured_pos[pos_index].position == pos:
content = featured_pos[pos_index]
pos_index += 1
elif rand_index < len(featured_rand):
content = featured_rand[rand_index]
rand_index += 1
featured_dict[str(pos)] = content
# I'm not sure if you have to check for a valid index first before slicing
featured_dict['?'] = featured_rand[rand_index:]
If you just want to iterate over the queryset you can have two querysets, order them and chain them.
import itertools
qs1 = FeaturedContent.objects.filter(position__isnull=False).order_by('-position')
qs2 = FeaturedContent.objects.filter(position__isnull=True).order_by('?')
featured_content = itertools.chain(qs1, qs2)
for item in featured_content:
#do something with qs item
print item
Upadate:
Since you are asking to make sure position determines the order and the "blank" spaces are replaced randomly by elements with null positions. If the featured list you want to obtain is not too large, 20 in this case
featured = []
rands = []
for i in xrange(1, 20):
try:
x = FeaturedContent.objects.get(position=i) # assuming position is unique
except FeaturedContentDoesNotExist:
if not rands:
rands = list(FeaturedContent.objects.filter(position__isnull=True).order_by('?')[:20]
x = rands[0]
rands = rands[1:]
featured.append(x)
I would post process it, doing a merge sort between the ordered and unordered records.
EDIT:
The beginnings of a generator for this:
def posgen(posseq, arbseq, posattr='position', startpos=1):
posel = next(posseq)
for cur in itertools.count(startpos):
if getattr(posel, posattr) == cur:
yield posel
posel = next(posseq)
else:
yield next(arbseq)
Note that there are lots of error conditions possible in this code (hint: StopIteration).

Django charts - date&time axis

I have one model which looks like this:
class Measurement(models.Model):
date = models.DateField('date')
time = models.TimeField('time')
Q = models.DecimalField(max_digits=10, decimal_places=6)
P = models.DecimalField(max_digits=10, decimal_places=6)
f = models.DecimalField(max_digits=10, decimal_places=6)
In my views, I would like to represent it. So I made this function:
def plotMeas(request):
# Count the events
c = Measurement.objects.all()
c = c.count()
# Variables
i = 0
a = [0]
P = a*c
Q = a*c
t = a*c
# Save dP_L1 & dQ_L1 in lists
for i in range(c):
meas = Measurement.objects.get(pk = i+1)
P [i] = meas.P
Q [i] = meas.Q
t [c-1-i] = i*10
if c > 100:
P = P[-100:]
Q = Q[-100:]
t [i] = t[-100:]
# Construct the graph
fig = Figure()
q = fig.add_subplot(211)
q.set_xlabel("time (minutes ago)")
q.set_ylabel("Q (VAR)")
p = fig.add_subplot(212)
p.set_xlabel("time (minutes ago)")
p.set_ylabel("P (W)")
p.plot(t,P, 'go-')
q.plot(t,Q, 'o-')
canvas = FigureCanvas(fig)
response = HttpResponse(content_type='image/png')
canvas.print_png(response)
return response
However, I would like that the horizontal axis would show the date and the time (saved in the model). Does anyone know how to do it?
Have a look at the documentation for plot_date. Conveniently plot_date takes similar arguments to plot. A call might look like:
p.plot_date(sequence_of_datetime_objects, y_axis_values, 'go-')
Using matplotlib.dates you can then customize the format of your x-axis labels.
A simple example:
The following will specify that the x-axis displays only every third month in the format Jan '09 (assuming English-speaking locale).
p.xaxis.set_major_locator(mdates.MonthLocator(interval=3))
p.xaxis.set_major_formatter(mdates.DateFormatter("%b '%y"))
Since you have dates and times stored separately you may either want to
change your model to use a DateTimeField, or
use Python to combine them.
For example:
import datetime as dt
t1 = dt.time(21,0,1,2) # 21:00:01.2
d1 = dt.date.today()
dt1 = dt.datetime.combine(d1,t1)
# result: datetime.datetime(2011, 4, 15, 21, 0, 1, 2)
To iterate over two sequences and combine them you might use zip (code for illustrative purposes only, not necessarily optimized):
sequence_of_datetime_objects = []
for a_date, a_time in zip(sequence_of_date_objects, sequence_of_time_objects):
sequence_of_datetime_objects.append(dt.datetime.combine(a_date, a_time))
Feel free to open another question if you get stuck implementing the specifics.