Check url status without opening it - django

At Now when url is opened (without a slash - example.com/blog), a slash is automatically added at the end (there are 301 redirects). The question is, can I somehow do it so that the check first goes to see if the page exists (without a slash - example.com/blog). If so, open it. If not, then check whether the page exists with a slash (only without 301 - example.com/blog/). If so, then redirect 301, and if not, then throw 404.
Now just if there is no page (example.com/blog), then a slash is added to the end first (example.com/blog/), 301 redirects go and only then a 404 error is thrown. In this case, the 404 error must be thrown immediately, without a 301 redirect.
The dispatch was rewritten as follows.
def is_normal_slash_count(url):
temp_url = url
slash_count = 0
while temp_url.endswith('/'):
slash_count += 1
temp_url = temp_url[:-1]
return (slash_count == 1, slash_count)
def replace_bad_slash(url, slash_count):
if slash_count == 2:
return url.replace('//', '/')
return url.replace('/'*(slash_count-1), '')
def normalize_url(url):
if len(url) > 1:
if not url.endswith('/'):
return url + '/'
# replace the url like /contacts//// to /contacts/
good_slash, slash_count = is_normal_slash_count(url)
if not good_slash:
url = replace_bad_slash(url, slash_count)
return url
def is_bad_url(url):
if len(url) > 1:
good_slash, slash_count = is_normal_slash_count(url)
if not good_slash:
return True
return False
class RedirectMixinView:
def dispatch(self, *args, **kwargs):
url = self.request.path
redirect_setting = RedirectSettings.objects.filter(url_from=url).first()
if redirect_setting:
return redirect(redirect_setting.url_to, permanent=True)
if is_bad_url(url):
return redirect(normalize_url(url), permanent=True)
return super(RedirectMixinView, self).dispatch(*args, **kwargs)
Is this realistic?
I think in the direction of writing middleware.
Updated
projects.urls
url(r'^page/', include('pages.urls')),
pages.urls
url(r'^$', PageView.as_view(), name='page'),
test
try:
resolve('/page/')
except:
raise Http404
return redirect('/page/')
I'm tried /page/, /page, page/, page, http://127.0.0.1:8000/page/, http://127.0.0.1:8000/page

You need to remove RedirectMixinView from LandingView.
Comment out the middleware CommonMiddleware.
Add RedirectMiddleware to the list of middleware (preferably in the top).
Create RedirectMiddleware
The code is written jointly with #dirkgroten (most of his contribution).
import re
from django.http import HttpResponsePermanentRedirect
class RedirectMiddleware(object):
response_redirect_class = HttpResponsePermanentRedirect
def __init__(self, get_response):
self.get_response = get_response
def __call__(self, request):
response = self.get_response(request)
path = re.sub("/+", "/", request.path)
if response.status_code == 404:
if not path.endswith('/'):
request.path = path # to force using the cleaned path
else:
request.path = path[:-1] # to force using the cleaned path
try:
full_path = request.get_full_path(force_append_slash=True) # add the slash, keeping query parameters
r = resolve(full_path)
new_response = r.func(request, args=r.args, kwargs=r.kwargs)
if new_response.status_code == 200:
return redirect(full_path)
except Resolver404:
pass # this will fall through to `return response`
# Add the Content-Length header to non-streaming responses if not
# already set.
if not response.streaming and not response.has_header('Content-Length'):
response['Content-Length'] = str(len(response.content))
return response
Add to ngnx config of project
if ($request_uri ~* "\/\/") {
rewrite ^/(.*) $scheme://$host/$1 permanent;
}
# merge_slashes off;
It does what you need, and also removes duplicate slashes if this page exists.

First make sure you set APPEND_SLASH to False in your settings.py. This will disable the automatic 301 redirects to the URLs with slash.
Then use resolve() to check if the URL with slash exists before redirecting. Do this in a Middleware class where you handle the case that the response status code is 404.
from django.urls import resolve
try:
resolve(url_with_slash)
except Resolver404:
raise Http404
return redirect(url_with_slash)
Note that resolve(url) will not raise an exception when there is a path matching the url, even if the view might afterwards still raise a 404. This is the case for example if you have a DetailView for an object where the object's pk is in the URL. Say you have /objects/<pk>/ as the path to show your objects, then the url /objects/4/ will always match even if object with pk=4 does not exist. The view will still raise a 404 after the redirect.
So if you really want to also catch those 404's, you could actually call the view function yourself to check the response:
try:
r = resolve(url_with_slash)
response = r.func(request, args=r.args, kwargs=r.kwargs)
if response.status_code == 200:
return redirect(url_with_slash)
except Resolver404:
pass

Related

Django redirect and modify GET parameters

I am implementing magic tokens and would like clean URLs. As a consequence, I would like to remove the token from the URL upon a successful user authentication. This is my attempt:
def authenticate_via_token(get_response):
def middleware(request):
if request.session.get('authenticated', None):
pass
else:
token = request.GET.get('token', None)
if token:
mt = MagicToken.fetch_by_token(token)
if mt:
request.session['authenticated'] = mt.email
if not request.GET._mutable:
request.GET._mutable = True
request.GET['token'] = None
request.GET._mutable = False
else:
print("invalid token")
response = get_response(request)
return response
return middleware
IE, I would like to send /products/product-detail/3?token=piyMlVMrmYblRwHcgwPEee --> /products/product-detail/3
It's possible that there may be additional GET parameters and I would like to keep them. Any input would be appreciated!
This is the solution I ended up going for:
from django.urls import resolve, reverse
import urllib
def drop_get_param(request, param):
'helpful for redirecting while dropping a specific parameter'
resolution = resolve(request.path_info) #simulate resolving the request
new_params = request.GET.copy() # copy the parameters
del new_params[param] # drop the specified parameter
reversed = reverse(resolution.url_name, kwargs=resolution.kwargs) # create a base url
if new_params: #append the remaining parameters
reversed += '?' + urllib.parse.urlencode(new_params)
return reversed

How to redirect to previous page after language changing?

When I am trying to use next it doesn't work because in next-url there are old language code so language doesn't change.
my template:
en
ru
my url:
path('language-change/<user_language>/', views.set_language_from_url, name="set_language_from_url"),
my view:
def set_language_from_url(request, user_language):
translation.activate(user_language)
request.session[translation.LANGUAGE_SESSION_KEY] = user_language
redirect_to = request.POST.get('next', request.GET.get('next', '/'))
return redirect(redirect_to)
Use redirect_to = request. META.get('HTTP_REFERER','')
You can use Django's built in set_language_view.
This is a view that can be used to change the language of the user, and when passing redirect_to, it will automatically build the correct url for the next page to go to.
You can check out the example provided in the docs.
You can also simply change your code to not include the path of the request, but the name of the url to redirect to, like this (Assuming you have a url in you url_patterns with the name 'view_page':
en
ru
Your view:
def set_language_from_url(request, user_language):
translation.activate(user_language)
request.session[translation.LANGUAGE_SESSION_KEY] = user_language
redirect_to = request.POST.get('next', request.GET.get('next', '/'))
return redirect(reverse(redirect_to))
Edit:
For returning to the current request instead of predefined request (I have not tested this, so I don't know if it will work):
You could try resolving the path of the current request, and reversing it again, passing in the correct arguments. This should return the exact url as before, only with a different language prefix.
en
ru
def set_language_from_url(request, user_language):
translation.activate(user_language)
request.session[translation.LANGUAGE_SESSION_KEY] = user_language
redirect_to = request.POST.get('next', request.GET.get('next', '/'))
resolved_url = resolve(redirect_to)
if resolved_url.kwargs:
return redirect(reverse(resolved_url.url_name, **resolved_url.kwargs))
else:
return redirect(reverse(resolved_url.redirect_to, *resolved_url.args))

Scrapy get request url in parse

How can I get the request url in Scrapy's parse() function? I have a lot of urls in start_urls and some of them redirect my spider to homepage and as result I have an empty item. So I need something like item['start_url'] = request.url to store these urls. I'm using the BaseSpider.
The 'response' variable that's passed to parse() has the info you want. You shouldn't need to override anything.
eg. (EDITED)
def parse(self, response):
print "URL: " + response.request.url
The request object is accessible from the response object, therefore you can do the following:
def parse(self, response):
item['start_url'] = response.request.url
Instead of storing requested URL's somewhere and also scrapy processed URL's are not in same sequence as provided in start_urls.
By using below,
response.request.meta['redirect_urls']
will give you the list of redirect happened like ['http://requested_url','https://redirected_url','https://final_redirected_url']
To access first URL from above list, you can use
response.request.meta['redirect_urls'][0]
For more, see doc.scrapy.org mentioned as :
RedirectMiddleware
This middleware handles redirection of requests based on response status.
The urls which the request goes through (while being redirected) can be found in the redirect_urls Request.meta key.
Hope this helps you
You need to override BaseSpider's make_requests_from_url(url) function to assign the start_url to the item and then use the Request.meta special keys to pass that item to the parse function
from scrapy.http import Request
# override method
def make_requests_from_url(self, url):
item = MyItem()
# assign url
item['start_url'] = url
request = Request(url, dont_filter=True)
# set the meta['item'] to use the item in the next call back
request.meta['item'] = item
return request
def parse(self, response):
# access and do something with the item in parse
item = response.meta['item']
item['other_url'] = response.url
return item
Hope that helps.
Python 3.5
Scrapy 1.5.0
from scrapy.http import Request
# override method
def start_requests(self):
for url in self.start_urls:
item = {'start_url': url}
request = Request(url, dont_filter=True)
# set the meta['item'] to use the item in the next call back
request.meta['item'] = item
yield request
# use meta variable
def parse(self, response):
url = response.meta['item']['start_url']

How to avoid Django URLField adding the trailing slash?

Django URLField likes to add a trailing slash (/) at the end of the user input, forcing all URLs to be stored with the extra character, this is wrong. How can I stop this behavior and save URLs as submitted by users?
Check to_python of URLField at https://github.com/django/django/blob/master/django/forms/fields.py.
You can see it has a line url_fields[2] = '/' almost at the end of method to_python. It appends a trailing slash / at the end of url. You can see the logic for doing this as a comment before this line.
This slash is necessary in case some query params are given.
If you want to avoid this behaviour, write you own field which extends from URLField and override to_python in your custom class.
I've been struggling with this as well, because it's causing a problem for certain urls. For example, http://www.nasa.gov/mission_pages/kepler/news/kepler-62-kepler-69.html/ fails, but it works without the slash.
To expand on akshar's answer, the method to do this is explained here. For example, defining this in my models.py file and setting url = NoSlashURLField() rather than models.URLField() in my model removes the slash:
try:
from urllib.parse import urlsplit, urlunsplit
except ImportError: # Python 2
from urlparse import urlsplit, urlunsplit
class NoSlashURLField(models.URLField):
description = "Remove the goddamn slash"
__metaclass__ = models.SubfieldBase
def __init__(self, *args, **kwargs):
super(NoSlashURLField, self).__init__(*args, **kwargs)
def to_python(self, value):
def split_url(url):
"""
Returns a list of url parts via ``urlparse.urlsplit`` (or raises a
``ValidationError`` exception for certain).
"""
try:
return list(urlsplit(url))
except ValueError:
# urlparse.urlsplit can raise a ValueError with some
# misformatted URLs.
raise ValidationError(self.error_messages['invalid'])
value = super(NoSlashURLField, self).to_python(value)
if value:
url_fields = split_url(value)
if not url_fields[0]:
# If no URL scheme given, assume http://
url_fields[0] = 'http'
if not url_fields[1]:
# Assume that if no domain is provided, that the path segment
# contains the domain.
url_fields[1] = url_fields[2]
url_fields[2] = ''
# Rebuild the url_fields list, since the domain segment may now
# contain the path too.
url_fields = split_url(urlunsplit(url_fields))
# if not url_fields[2]:
# # the path portion may need to be added before query params
# url_fields[2] = '/'
value = urlunsplit(url_fields)
return value
For those using the usual Django admin forms for their site, and also using South for DB migrations, you may want to use the following method instead of stonefury's. His method changes the model field, which confuses South unless you add some special code. The below method changes only the admin code, allowing South to remain blissfully unaware.
Define this class somewhere in your app:
class NoSlashURLFormField(forms.URLField):
def to_python(self, value):
def split_url(url):
"""
Returns a list of url parts via ``urlparse.urlsplit`` (or raises a
``ValidationError`` exception for certain).
"""
try:
return list(urlsplit(url))
except ValueError:
# urlparse.urlsplit can raise a ValueError with some
# misformatted URLs.
raise ValidationError(self.error_messages['invalid'])
if value:
url_fields = split_url(value)
if not url_fields[0]:
# If no URL scheme given, assume http://
url_fields[0] = 'http'
if not url_fields[1]:
# Assume that if no domain is provided, that the path segment
# contains the domain.
url_fields[1] = url_fields[2]
url_fields[2] = ''
# Rebuild the url_fields list, since the domain segment may now
# contain the path too.
url_fields = split_url(urlunsplit(url_fields))
value = urlunsplit(url_fields)
return value
Then edit your admin.py file as follows:
from your_app.path.to.noslash import NoSlashURLFormField
from django.contrib.admin.widgets import AdminURLFieldWidget
class MyModelAdmin(admin.ModelAdmin):
...
formfield_overrides = {
models.URLField: {
'form_class': NoSlashURLFormField,
# Need to specify the AdminURLFieldWidget here because it would
# otherwise get defaulted back to URLInput.
'widget': AdminURLFieldWidget,
}
}

Django: custom 404 handler that returns 404 status code

The project I'm working on has some data that needs to get passed to every view, so we have a wrapper around render_to_response called master_rtr. Ok.
Now, I need our 404 pages to run through this as well. Per the instructions, I created a custom 404 handler (cleverly called custom_404) that calls master_rtr. Everything looks good, but our tests are failing, because we're receiving back a 200 OK.
So, I'm trying to figure out how to return a 404 status code, instead. There seems to be an HttpResponseNotFound class that's kinda what I want, but I'm not quite sure how to construct all of that nonsense instead of using render_to_response. Or rather, I could probably figure it out, but it seems like their must be an easier way; is there?
The appropriate parts of the code:
def master_rtr(request, template, data = {}):
if request.user.is_authenticated():
# Since we're only grabbing the enrollments to get at the courses,
# doing select_related() will save us from having to hit database for
# every course the user is enrolled in
data['courses'] = \
[e.course for e in \
Enrollment.objects.select_related().filter(user=request.user) \
if e.view]
else:
if "anonCourses" in request.session:
data['courses'] = request.session['anonCourses']
else:
data['courses'] = []
data['THEME'] = settings.THEME
return render_to_response(template, data, context_instance=RequestContext(request))
def custom_404(request):
response = master_rtr(request, '404.html')
response.status_code = 404
return response
The easy way:
def custom_404(request):
response = master_rtr(...)
response.status_code = 404
return response
But I have to ask: why aren't you just using a context processor along with a RequestContext to pass the data to the views?
Just set status_code on the response.
Into your application's views.py add:
# Imports
from django.shortcuts import render
from django.http import HttpResponse
from django.template import Context, loader
##
# Handle 404 Errors
# #param request WSGIRequest list with all HTTP Request
def error404(request):
# 1. Load models for this view
#from idgsupply.models import My404Method
# 2. Generate Content for this view
template = loader.get_template('404.htm')
context = Context({
'message': 'All: %s' % request,
})
# 3. Return Template for this view + Data
return HttpResponse(content=template.render(context), content_type='text/html; charset=utf-8', status=404)
The secret is in the last line: status=404
Hope it helped!