Forcing Django to use INNER JOIN instead of LEFT OUTER JOIN - django

I have implemented search in my Django application to allow searching by more than one field. This results in Django always using a LEFT OUTER JOIN, which in my case generates the wrong results. However, when I change the SQL generated from a LEFT OUTER JOIN to an INNER JOIN, it returns the correct result.
I am thinking it has to do with the way Q object in my code below.
from django.db import models, transaction
...
def construct_search(field_name):
if field_name.startswith('^'):
return "%s__istartswith" % field_name[1:]
elif field_name.startswith('='):
return "%s__iexact" % field_name[1:]
elif field_name.startswith('#'):
return "%s__search" % field_name[1:]
else:
return "%s__icontains" % field_name
class CoreSearchMixin(object):
"""Subclasses must define search_fields = [field_1, ...field_n]
where the field is a string, the name of a field, and can contain the following prefix characters:
'^': the search field must start with the search term, case insensitive
'=': the search field must exactly equal the search term, case insensitive
'#': full-text search
If no prefix is given, any string that contains the search field will match.
"""
search_fields = None
search_form_class = SearchForm
#cachedproperty
def search_form(self):
return self.search_form_class(getattr(self.request, self.request.method))
def get_query_help_message(self):
"""Returns a comma separated list of fields that are used in the search, to help the user
create a search.
"""
fields = []
if self.search_fields:
for search_field in self.search_fields:
field = get_field_from_path(self.model, search_field)
fields.append(field.verbose_name.title())
return ",".join(fields)
def get_filtered_queryset(self, queryset):
if self.search_form.is_valid():
self.query = self.search_form.cleaned_data['q']
else:
self.query = None
if self.search_fields and self.query:
orm_lookups = (construct_search(str(search_field).replace('.', '__'))
for search_field in self.search_fields)
chained_or_queries = None
for bit in self.query.split():
or_queries = (models.Q(**{orm_lookup: bit})
for orm_lookup in orm_lookups)
if chained_or_queries:
chained_or_queries = itertools.chain(chained_or_queries, or_queries)
else:
chained_or_queries = or_queries
return queryset.filter(reduce(operator.or_, chained_or_queries))
else:
return queryset
def get_context_data(self, **kwargs):
return super(CoreSearchMixin, self).get_context_data(
search_form=self.search_form,
query_help_message=self.get_query_help_message(),
search_fields=self.search_fields,
**kwargs
)
How can I ensure that an INNER JOIN is used instead of a LEFT OUTER JOIN in the case of my code above?

According to your question, you want to search over multiple fields. However, following the logic you have, the first result that is found in the OR sequence is returned—without returning possible subsequent matches in the OR sequence; remember, the OR operator stops evaluating upon a truthy result.
In order to convert your OUTER LEFT JOINs to INNER JOINs you would need to have AND/OR Q object permutations of search fields combinations (optimal?), or query them separately and do an intersection on the result (sub-optimal), or write the SQL yourself (sub-optimal).
PS: I've run into this issue before a writing a Datatables API wrapper for use with Django.
PS: I'd consider refactoring, and further commenting your code—specifically get_filtered_queryset; it took a few minutes for me to wrap my head around what was going on here.

Related

Django: Easiest way to join all numerical data into one QuerySet when doing alphabetical pagination?

I'm trying to sort query results into alphabetical sections, like so:
This works with the following code:
def get_context(self, request):
# Get published shows, ordered alphabetically
context = super().get_context(request)
shows = ShowPage.objects.child_of(self).live().order_by("name")
context["pages"] = [{"letter" : i,
"shows" : shows.filter(name__istartswith=i.upper())}
for i in "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ"]
return context
The next step is to combine all shows that start with any number into just one group labeled "0-9".
The following does what I want, but it's awfully verbose and I'm wondering if there's an easier way I just don't know about:
def get_context(self, request):
# Get published shows, ordered alphabetically
context = super().get_context(request)
shows = ShowPage.objects.child_of(self).live().order_by("name")
pages = [{"letter" : i,
"shows" : shows.filter(name__istartswith=i.upper())}
for i in "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ"]
digits = {"letter" : "0 - 9", "shows" : []}
for index, alphabet in enumerate(pages):
if alphabet["letter"].isdigit():
for show in alphabet["shows"]:
digits["shows"] += [show]
while pages[0]["letter"].isdigit():
print(pages[0])
pages.pop(0)
pages.insert(0, digits)
context["pages"] = pages
return context
Any ideas?
Just an easy one-line of regex in the field search to see if the name string starts with a digit. Also istartswith is case-insensitive so you don't need the i.upper().
def get_context(self, request):
# Get published shows, ordered alphabetically
context = super().get_context(request)
shows = ShowPage.objects.child_of(self).live().order_by("name")
context['pages'] = [{'letter': shows.filter(name__istartswith=letter)} for letter in 'ABCDEFGHIJKLMNOPQRSTUVWXYZ']
context['pages'].append({'letter': '0-9', 'shows': shows.filter(name__regex=r'^\d[\w\d _-]+'})
return context

How can i test the correct setup of a queryset with randomly ordered elements?

Assume i have the following function:
def select_queryset(value_to_decide_upon):
""" this function returns either any of some querysets or nothing """
if value_to_decide_upon == 1:
return User._default_manager.all()
elif value_to_decide_upon == 2:
return User._default_manager.filter(pk__in=[some_more_values])
elif value_to_decide_upon == n-1:
return User._default_manager.all().order_by('?')
elif value_to_decide_upon == n:
return None
Now here is the question: this function has a randomly ordered queryset as a return value:
queryset = User._default_manager.all().order_by('?')
Now the only important thing about that function is: it has to return the correct queryset. Is there a way to access the queryset in such way that i can, ie. do something like this:
class TestQuerysetSelection(TestCase):
def test_return_value(self):
# this, of course, will always fail:
self.assertEqual(select_queryset(n-1),
User._default_manager.all().order_by('?') )
# and this is not working as well
self.assertEqual( templatetag.queryset.order_by, '?' )
So how can i test if the function returns the correct queryset, without brute force comparing to all other querysets?
I think the most starighforward way is to compare sets (which are unordered).
self.assertEqual(
set(select_queryset(n-1)),
set(User._default_manager.all())
)
You could override order by and sort by pk, so you'll have order in your querysets and not random order. I'm not sure though what's the best way to compare querysets.
class TestQuerysetSelection(TestCase):
def test_return_value(self):
# Add `order_by('id')` to sort by id
self.assertEqual(select_queryset(n-1).order_by('id'),
User._default_manager.all().order_by('id') )
Try
qs1.query.sql_with_params() == qs2.query.sql_with_params()
# or simply
str(qs1.query) == str(qs2.query)
# for your code
self.assertEqual(select_queryset(n-1).query.sql_with_params(),
User._default_manager.all().order_by('?').query.sql_with_params())
Furthermore, I think you need a more concise function to return these querysets.

How can I add exceptions for sorting Django models?

I've got a line in python:
streams = Stream.objects.filter(info__isnull = False)\
.order_by('-score', 'info__title')
I feed that to a RequestContext
c = RequestContext(request, {
'online_streams': streams.filter(online = True),
'offline_streams': streams.filter(online = False)
})
It sorts a table of ~50 rows by their score and title. This is good, but there is one row I would like to always be top. Simply fetching it beforehand and filtering it out of the line above, then give it to the RequestContext separately won't work, since I'd have to modify the template. Also I might change which row should be on top later, or perhaps make it multiple rows instead.
Is there a filter argument I can insert that says something in the lines of "Order all rows by score then title, except row ID 8, which should always be top"? Or could I perhaps change the order of the QuerySet manually after the filtering?
Thanks for any suggestions
You can override Model manager method:
class MyManager(models.Manager):
def filter(self, *args, **kwargs):
return super(MyManager, self).filter(*args, **kwargs).exclude(id=8).order_by(...)
then in model
class Stream(models.Model):
...
objects = MyManager()
EDIT
to make sure it is included you can query:
from django.db.models import Q
return super(MyManager, self).filter(Q(id=8) | Q(*args, **kwargs)).order_by(...)
I couldn't find a way to use query sets to do this, so I ended up converting the query set to a list and sorting it using the magic compare function. It ended up looking something like this:
The magic sorter, checking for names that should be stuck "on top":
# Comparison magic
def __cmp__(self, other):
# On top
if self.name in stream_ontop:
return 1
if other.name in stream_ontop:
return -1
# Score
if self.score > other.score:
return 1
if self.score < other.score:
return -1
# Title alphabetical
if self.info.title > other.info.title:
return -1
if self.info.title < other.info.title:
return 1
return 0
Then I fetched and sorted them like this from my model class
# Custom sorting
#staticmethod
def getAllSorted(online):
streams = list(Stream.objects.filter(online=online, info__isnull=False))
streams.sort(key=None, reverse=True)
return streams

Django, auto generating unique model fields and recursively calling auto generator if not unique

I am working on a Django project where a Thing would have a unique 10 digit Key, in addition to the standard auto incrementing ID integerfield. I use a simple random number function to create it. [I'm sure there's a better way to do this too]
When a Thing is created, a 10 digit Key is created. I use the .validate_unique() to check the Key's uniqueness. If its not unique, is there a simple way I can recursively call the Key generator (makeKey()) until it passes? Code follows:
Models.py:
class Thing(models.Model):
name=models.CharField(max_length=50)
key=models.IntegerField(unique=True)
Views.py:
def makeKey():
key=''
while len(key)<10:
n=random.randint(0,9)
key+=`n`
k=int(key)
#k=1234567890 #for testing uniqueness
return k
def createThing(request):
if ( request.method == 'POST' ):
f = ThingForm(request.POST)
try:
f.is_valid()
newF=f.save(commit=False)
newF.key=makeKey()
newF.validate_unique(exclude=None)
newF.save()
return HttpResponseRedirect(redirect)
except Exception, error:
print "Failed in register", error
else:
f = ThingForm()
return render_to_response('thing_form.html', {'f': f})
Thank you
No need for recursion here - a basic while loop will do the trick.
newF = f.save()
while True:
key = make_key()
if not Thing.objects.filter(key=key).exists():
break
newF.key = key
newF.save()

Next previous links from a query set / generic views

I have a quite simple query set and a related generic views:
f_detail = {
'queryset': Foto.objects.all(),
'template_name': 'foto_dettaglio.html',
"template_object_name" : "foto",
}
urlpatterns = patterns('',
# This very include
(r'^foto/(?P<object_id>\d+)/$', list_detail.object_detail, f_detail, ),
)
Just a template for generating a detail page of a photo: so there's no view.
Is there an easy way to have a link to previous | next element in the template
without manualy coding a view ?
Somthing like a:
{% if foto.next_item %}
Next
{% endif}
class Foto(model):
...
def get_next(self):
next = Foto.objects.filter(id__gt=self.id)
if next:
return next.first()
return False
def get_prev(self):
prev = Foto.objects.filter(id__lt=self.id).order_by('-id')
if prev:
return prev.first()
return False
you can tweak these to your liking. i just looked at your question again... to make it easier than having the if statement, you could make the methods return the markup for the link to the next/prev if there is one, otherwise return nothing. then you'd just do foto.get_next etc. also remember that querysets are lazy so you're not actually getting tons of items in next/prev.
The Foto version above has a couple of shortcomings:
Doing a boolean evaluation like if next: can be slow since it basically loads the entire QuerySet result. Use next.exists() or the try/except like in my version.
The get_prev() result is wrong because you need to reverse the ordering in this case.
So FWIW here is my version, which is for a generic primary key:
def get_next(self):
"""
Get the next object by primary key order
"""
next = self.__class__.objects.filter(pk__gt=self.pk)
try:
return next[0]
except IndexError:
return False
def get_prev(self):
"""
Get the previous object by primary key order
"""
prev = self.__class__.objects.filter(pk__lt=self.pk).order_by('-pk')
try:
return prev[0]
except IndexError:
return False
If you'll accept Model.objects.all() as your queryset, and you are ok with grabbing next / previous items by a date field (usually a 'created' field with auto_now_add=True will give the same order as object id's), you can use get_next_by_foo() and get_previous_by_foo(), where 'foo' is the date field.
For next / previous links from a more complicated QuerySet, using the Paginator with threshold set to one seems like it might be the best option.