Prevent Django from blocking while proxying an HTTP request - django

I'm working on a Django site that allows connecting to devices in restricted networks through a cloud service. The devices connect to a cloud server through a VPN or SSH tunnel and clients connect to a virtual host via HTTP. The Django part is required for managing complex organization-role-access-user relationships.
Currently I'm doing access control in a custom Django middleware module that parses HTTP_HOST, does authentication, gets the page and forwards it to the original requester. The problem is that while a request is going on, Django is not handling any other requests. Celery does not solve the problem because this isn't really a background task. Clients are served through a single address and port, making firewall rules unsuitable for this task.
The relevant code is below:
class NodeProxyMiddleware:
def process_request(self, request, *args, **kwargs):
if not 'HTTP_HOST' in request.META:
return None
hardware_id = match_hwid.match(request.META["HTTP_HOST"])
if not hardware_id:
return None
kwargs["hardware_id"] = hardware_id.group("hwid")
if not authenticate(request, *args, **kwargs):
return HttpResponseForbidden("No access")
return proxy_request(request, *args, **kwargs)
#csrf_exempt
def proxy_request(request, *args, **kwargs):
# Get the port of target Node
hardware_id = kwargs.get("hardware_id", "")
try:
port = Node.objects.filter(hardware_id=hardware_id)[0].port
except IndexError: # Node with given hwid was not found
raise Http404
# We have to convert request.META back to original form manually
headers = convert_headers(request) # HTTP_FOO_BAR to Foo-Bar
headers["connection"] = "close"
connection = httplib2.Http(timeout=5)
url = "http://127.0.0.1:%d%s" % (port, request.META['PATH_INFO'])
method = request.method
# GET -- url ?d=a&t=a has to be urlencoded
if method == "GET":
data = None
if request.GET:
url += "?" + request.GET.urlencode()
# POST -- body has to be urlencoded
elif method == "POST":
data = request.POST.urlencode()
headers["content-type"] = "application/x-www-form-urlencoded"
try:
response, content = connection.request(
url, method, data, headers=headers)
except Exception as e:
print e
return HttpResponse(content=e, status=503)
django_response = HttpResponse(
content=content,
status=int(response["status"]),
mimetype=response["content-type"],
)
# Strip hop-by-hop headers -- See RFC2616 semantically transparent
# proxying. Also, WSGI forbids passing such headers back to it.
hop_by_hop_headers = [
"connection",
"keep-alive",
"proxy-authenticate",
"proxy-authorization",
"te",
"trailers",
"transfer-encoding",
"upgrade",
]
for key, value in response.iteritems():
if key.lower() in hop_by_hop_headers:
continue
django_response[key] = value
return django_response
Is it possible to do this kind of proxying at all in Django by tweaking the code above or other settings? The software stack I'm running on is Nginx + uWSGI + Django 1.6. The uWSGI configuration is:
[uwsgi]
chdir = /home/foo/production/
file = /home/foo/production/wsgi.py
home = /home/foo/virtualenv
master = true
processes = 8
socket = /var/nginx/foo.socket
chmod-socket = 666
vacuum = true
daemonize = /home/foo/production/uwsgi.log

Related

Celery not returning any results after success?

Here I am crawling some websites with different keywords. Before It was only scraping and it worked but I implemented celery for this. After using celery I am not being able to get the scraping result but no error is showing. I am using rabbitmq as the message broker here.
tasks.py
#shared_task()
def schedule_task(pk):
task = Task.objects.get(pk=pk)
keywords = ''
# for keys in ast.literal_eval(obj.keywords.all()): #keywords change to csv
for keys in task.keywords.all():
if keywords:
keywords += ', ' + keys.title
else:
keywords += keys.title
task_ids = [] # one Task/Project contains one or multiple scrapy task
settings = {
'spider_count': len(task.targets.all()),
'keywords': keywords,
'unique_id': str(uuid4()), # unique ID for each record for DB
'USER_AGENT': 'Mozilla/5.0 (compatible; Googlebot/2.1; +http://www.google.com/bot.html)'
}
# res = ast.literal_eval(ini_list)
for site_url in task.targets.all():
domain = urlparse(site_url.address).netloc # parse the url and extract the domain
spider_name = domain.replace('.com', '')
task = scrapyd.schedule('default', spider_name, settings=settings, url=site_url.address, domain=domain,
keywords=keywords)
views
def post(self, request, *args, **kwargs):
form = CreateTaskForm(request.POST)
if form.is_valid():
unique_id = str(uuid4()) # create a unique ID.
obj = form.save(commit=False)
obj.created_by = request.user
obj.unique_id = unique_id
obj.status = 0
obj.save()
form.save_m2m()
print(obj.pk)
schedule_task.delay(pk=obj.pk)
return redirect('crawler:task-list')
views before using celery ( which returns the scraped results worked fine) I just split the scraping part into tasks.py and call it from view with .delay but didn't returned the result(before it returned).
form = CreateTaskForm(request.POST)
if form.is_valid():
unique_id = str(uuid4()) # create a unique ID.
obj = form.save(commit=False)
obj.created_by = request.user
obj.unique_id = unique_id
obj.status = 0
obj.save()
form.save_m2m()
keywords = ''
# for keys in ast.literal_eval(obj.keywords.all()): #keywords change to csv
for keys in obj.keywords.all():
if keywords:
keywords += ', ' + keys.title
else:
keywords += keys.title
task_ids = [] #one Task/Project contains one or multiple scrapy task
settings = {
'spider_count' : len(obj.targets.all()),
'keywords' : keywords,
'unique_id': unique_id, # unique ID for each record for DB
'USER_AGENT': 'Mozilla/5.0 (compatible; Googlebot/2.1; +http://www.google.com/bot.html)'
}
# res = ast.literal_eval(ini_list)
for site_url in obj.targets.all():
domain = urlparse(site_url.address).netloc # parse the url and extract the domain
spider_name = domain.replace('.com', '')
task = scrapyd.schedule('default', spider_name, settings=settings, url=site_url.address, domain=domain, keywords=keywords)
return redirect('crawler:task-list')
celery console
[2020-06-10 20:42:55,885: INFO/MainProcess] celery#DESKTOP-ENPLHOS ready.
[2020-06-10 20:42:55,900: INFO/MainProcess] pidbox: Connected to amqp://guest:**#127.0.0.1:5672//.
[2020-06-10 20:43:13,730: INFO/MainProcess] Received task: crawler.tasks.schedule_task[10e7bf06-5e4e-413c-85a3-79d61b9835cf]
[2020-06-10 20:43:17,077: INFO/MainProcess] Task crawler.tasks.schedule_task[10e7bf06-5e4e-413c-85a3-79d61b9835cf] succeeded in 3.3590000000040163s: None
http://localhost:6800/jobs here I can see the spiders are running but the results are not appearing in my view.
views before using celery ( which returns the scraped results worked fine)
that is because your code runs synchronous....one after the other.
Celery on the other hand runs asynchronous and alway you will get a None as the returned value from it.
If you chain 2 or more Celery tasks (of which all of them run async) then you can make use of their returned value, but not chaining a synchronous view with an async celery task.
Celery tasks are meant to be dispatched and run in background...while your view is suppose to return something else...(without waiting for your spiders to finish)
To be able to make use of the Celery results:
Collected data needs to be stored somewhere (a file like csv, json, etc, .. OR inside a database) and handle the Django View in 2 steps:
first you trigger the Celery task
second collect the stored results and display them

Append a new Http header using django request.META

i am using django-rest framework and i am able to get and set the custom headers using the below META information,
class log_middleware:
def __init__(self, get_response):
self.get_response = get_response
# One-time configuration and initialization.
def __call__(self,request):
# set thread local values
# This will execute before every request
correlation_id = request.META['HTTP_X_REQUEST_ID'] if 'HTTP_X_REQUEST_ID' in request.META else str(uuid.uuid4())
request.META['HTTP_X_REQUEST_ID'] = correlation_id
#logger.debug("Entered service")
response = self.get_response(request)
response['HTTP_X_REQUEST_ID'] = correlation_id
#logger.debug("Processed response")
return response
Now in my views.py i am able to get this header as request.META['HTTP_X_REQUEST_ID']. and it is available in the response header
But when I try to log the http header values in uwsgi using the below config,it has '-' empty value field. Because uwsgi has only actual request headers in %var.XXX variable and response headers goes to %headers and it shows only count and the actual values.
Issue: https://github.com/unbit/uwsgi/issues/1407
So Is there any way in django to append the data in actual request header instead of response header?
[uwsgi]
master = 1
memory-report = true
module = my_service.wsgi
http = 0.0.0.0:8080
max-requests = 50
processes = 16
log-format = { "ctime": "%(ctime)", "addr": "%(addr)", "method": "%(method)", "uri": "%(uri)", "correlation_id": "%(var.HTTP_X_REQUEST_ID)" }
But the same thing works if i set the header HTTP_X_REQUEST while sending the request itself from the rest client utils.
If you need middleware, you can use this:
middlewares.py:
def add_header_middleware(get_response):
def middleware(request):
request.META['hello'] = 'world'
response = get_response(request)
response['world'] = 'hello'
return response
return middleware
views.py:
#api_view(['GET'])
def sample_view(request):
return Response(request.META['hello'])
settings.py:
MIDDLEWARE = [
# ...
'your_app.middlewares.add_header_middleware'
]

Why HttpResponseRedirect.set_cookie is not working when i use in django project?

When I use Google OAuth to verify my user, After verify is passed, I want to redirect to the page which user visit before authority, So I want to save the page path to user's cookie, so I implementation like this:
def get_login_resp(request, redirect):
print(redirect)
auth_url = "https://accounts.google.com/o/oauth2/auth?" + urlencode({
"client_id": GOOGLE_CLIENT_ID,
"response_type": "code",
"redirect_uri": make_redirect_url(request, redirect),
"scope": "profile email",
"max_auth_age": 0
})
resp = HttpResponseRedirect(auth_url)
max_age = 3600 * 24
expires = datetime.strftime(datetime.utcnow() + timedelta(seconds=max_age), "%a, %d-%b-%Y %H:%M:%S GMT")
print(expires)
resp.set_cookie('google_auth_redirect', redirect, max_age=max_age, expires=expires,
domain=LOGIN_COOKIE_DOMAIN, secure=True, httponly=True)
print(resp._headers)
print(resp.cookies)
return resp
ps: redirect is the page path which I want to save
But when request the login url with Postman, I can only see this headers:
response headers
And these cookies:
Cookies
So how can i do with this problem? There is not any error info for me.
Try every methods to find out what's wrong, But still failed.
So I try to run server on an other machine(a Linux server), it works!!!
BTW: My develop PC is Macbook Pro 15-inch, 2017 with macOS High Sierra 10.13.1
Update at 14/Jan/2020:
Didn't find the root cause, but I solved this issue by saving redirect_url to session data, in this solution you should check auth valid by using another request, then call google auth to reauth again, code like below:
class GoogleAuthView(RedirectView):
# google auth view
def get(self, request, *args, **kwargs):
# get redirect url from url params, frontend code should pass the param in request url
redirect_url = request.GET.get('redirect_url', None)
if redirect_url:
redirect_url = parse.unquote(redirect_url)
credentials = request.session.get("credentials", None)
if (not credentials) or ('expire_time' not in credentials) or (credentials['expire_time'] < time.time()):
request.session['redirect_url'] = redirect_url # if need google auth, save redirect url to session first
else:
if redirect_url:
return HttpResponseRedirect(redirect_url)
flow = google_auth_oauthlib.flow.Flow.from_client_config(
client_config=settings.GOOGLE_AUTH_CONFIG,
scopes=settings.GOOGLE_AUTH_SCOPES
)
flow.redirect_uri = settings.GOOGLE_AUTH_CONFIG['web']['redirect_uris'][0]
authorization_url, state = flow.authorization_url(
access_type='offline',
include_granted_scopes='true'
)
request.session['state'] = state
return HttpResponseRedirect(authorization_url)
class GoogleAuthCallBackView(BasicView):
# google callback view
def get(self, request, *args, **kwargs):
state = request.session.get('state')
flow = google_auth_oauthlib.flow.Flow.from_client_config(
client_config=settings.GOOGLE_AUTH_CONFIG,
scopes=settings.GOOGLE_AUTH_SCOPES,
state=state
)
flow.redirect_uri = settings.GOOGLE_AUTH_CONFIG['web']['redirect_uris'][0]
# get redirect url from session data if exists
redirect_url = request.session.get('redirect_url') or settings.ADMIN_LOGIN_REDIRECT_URL
response = HttpResponseRedirect(redirect_url)
try:
del request.session['redirect_url']
except KeyError:
logger.info('Delete `redirect_url` in session get KeyError.')
pass
try:
flow.fetch_token(authorization_response=request.build_absolute_uri())
except Exception as e:
logger.error(e.message)
return response
# save credentials to session
credentials = flow.credentials
request.session["credentials"] = {
'token': credentials.token,
'refresh_token': credentials.refresh_token,
'token_uri': credentials.token_uri,
'client_id': credentials.client_id,
'client_secret': credentials.client_secret,
'scopes': credentials.scopes,
'expire_time': time.time() + TOKEN_EXPIRE_TIME,
}
profile_client = googleapiclient.discovery.build(
serviceName='oauth2',
version='v2',
credentials=credentials
)
profile = profile_client.userinfo().v2().me().get().execute()
email = profile['email']
user = user_manager.get_user_by_email(email)
if user:
user.username = profile['name'] # sync username from google
user.picture = profile['picture'] # sync avatar from google
user.save()
request.session["user"] = user.to_dict()
else:
return HttpResponseRedirect("/api/non_existent_user/") # show non-existent user
return response

Django system check stuck on unreachable url

In my project I have requests library that sends POST request. Url for that request is hardcoded in function, which is accessed from views.py.
The problem is that when I dont have internet connection, or host, on which url is pointing, is down, I cant launch developer server, it gets stuck on Performing system check. However, if I comment the line with url, or change it to guarantee working host, check is going well.
What is good workaround here ?
views.py
def index(request):
s = Sync()
s.do()
return HttpResponse("Hello, world. You're at the polls index.")
sync.py
class Sync:
def do(self):
reservations = Reservation.objects.filter(is_synced=False)
for reservation in reservations:
serializer = ReservationPKSerializer(reservation)
dictionary = {'url': 'url', 'hash': 'hash', 'json': serializer.data}
encoded_data = json.dumps(dictionary)
r = requests.post('http://gservice.ca29983.tmweb.ru/gdocs/do.php', headers={'Content-Type': 'application/json'}, data=encoded_data)
if r.status_code == 200:
reservation.is_synced = True
reservation.save()
It might appear to be stuck because requests automatically retries the connection a few times. Try reducing the retry count to 0 or 1 with:
Can I set max_retries for requests.request?

Not getting notified on return/return_cancel using Django paypal

Am using django paypal for my e-commerce site and payments are all working correctly,but once payment is done it is not redirected back to my site.Am using paypal IPN in my localhost.is it because am running in my local machine,Following is the code for sending data to paypal.
def checkout(request,name):
product=Products.objects.get(name=name)
print "producttttttttttttttttttttt",product
# What you want the button to do.
paypal_dict = {
"business": settings.PAYPAL_RECEIVER_EMAIL,
"amount": product.price,
"item_name": product.name,
"invoice": "unique-invoice-id",
"notify_url": "192.168.5.108:8000" + reverse('paypalipn'),
"return_url": "192.168.5.108:8000/payment-complete/",
"cancel_return": "192.168.5.108:8000",
}
form = PayPalPaymentsForm(initial=paypal_dict)
context = {"form": form}
return render_to_response("payment.html", context)
Following view is for getting data from paypal ipn:
def paypalipn(request,item_check_callable=None):
'''
django paypal view to store the IPN . the notify url excecutes this view.
'''
print "haaaaaaaaaaaaaaaaaaaaaaaaaaiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiii"
"""
PayPal IPN endpoint (notify_url).
Used by both PayPal Payments Pro and Payments Standard to confirm transactions.
https://www.paypal.com/it/home
PayPal IPN Simulator:
https://developer.paypal.com/cgi-bin/devscr?cmd=_ipn-link-session
"""
#TODO: Clean up code so that we don't need to set None here and have a lot
# of if checks just to determine if flag is set.
flag = None
ipn_obj = None
# Clean up the data as PayPal sends some weird values such as "N/A"
# Also, need to cope with custom encoding, which is stored in the body (!).
# Assuming the tolerate parsing of QueryDict and an ASCII-like encoding,
# such as windows-1252, latin1 or UTF8, the following will work:
encoding = request.POST.get('charset', None)
if encoding is None:
flag = "Invalid form - no charset passed, can't decode"
data = None
else:
try:
data = QueryDict(request.body, encoding=encoding)
except LookupError:
data = None
flag = "Invalid form - invalid charset"
if data is not None:
date_fields = ('time_created', 'payment_date', 'next_payment_date',
'subscr_date', 'subscr_effective')
for date_field in date_fields:
if data.get(date_field) == 'N/A':
del data[date_field]
form = PayPalIPNForm(data)
if form.is_valid():
try:
#When commit = False, object is returned without saving to DB.
ipn_obj = form.save(commit=False)
except Exception, e:
flag = "Exception while processing. (%s)" % e
else:
flag = "Invalid form. (%s)" % form.errors
if ipn_obj is None:
ipn_obj = PayPalIPN()
#Set query params and sender's IP address
ipn_obj.initialize(request)
if flag is not None:
#We save errors in the flag field
ipn_obj.set_flag(flag)
else:
# Secrets should only be used over SSL.
if request.is_secure() and 'secret' in request.GET:
ipn_obj.verify_secret(form, request.GET['secret'])
else:
ipn_obj.verify(item_check_callable)
ipn_obj.save()
return HttpResponse("OKAY")
Plese Help???
The issue happened because i was working on localhost,when i moved to development server it worked for me.The page was redirected back to my site.
I will quote this directly from django-paypal documentation:
If you are attempting to test this in development, using the PayPal sandbox, and your machine is behind a firewall/router and therefore is not publicly accessible on the internet (this will be the case for most developer machines), PayPal will not be able to post back to your view. You will need to use a tool like https://ngrok.com/ to make your machine publicly accessible, and ensure that you are sending PayPal your public URL, not localhost, in the notify_url, return and cancel_return fields.