My core code is as follows:
import requests
url='https://www.xxxx.top' #for example
data=dict()
session = requests.session()
session.get(url)
token = session.cookies.get('csrftoken')
data['csrfmiddlewaretoken'] = token
res = session.post(url=url, data=data, headers=session.headers, cookies=session.cookies)
print(res)
# <Response [403]>
The variable url is my own website, which is based on Django. I know I can use #csrf_exempt to disable CSRF, but I don't want to do that.
However, it return 403 response when I use requests to make a post request. I wish someone could tell me what was wrong with my approach.
I have solved the problem. In this case, just add Referer to headers
import requests
url='https://www.xxxx.top' #for example
data=dict()
session = requests.session()
session.headers = {'User-Agent':'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) '
'Chrome/51.0.2704.63 Safari/537.36','Referer':self.url}
session.get(url)
token = session.cookies.get('csrftoken')
data['csrfmiddlewaretoken'] = token
res = session.post(url=url, data=data, headers=session.headers, cookies=session.cookies)
print(res)
Related
With python 2 :
I have 2 different problems.
With an url, I have this error:
urllib2.HTTPError: HTTP Error 302: The HTTP server returned a redirect error that would lead to an infinite loop.
So I am trying to set up cookielib
But then I got this error
urllib2.HTTPError: HTTP Error 403: Forbidden
I tried to combine the 2, without success. It's always this error urllib2.HTTPError: HTTP Error 403: Forbidden
which is displayed
import urllib2, sys
from bs4 import BeautifulSoup
import cookielib
hdr = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/62.0.3202.9 Safari/537.36',
'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
'Accept-Language': 'fr,fr-fr;q=0.8,en-us;q=0.5,en;q=0.3',
'Accept-Charset': 'ISO-8859-1,utf-8;q=0.7,*;q=0.7',
'Connection': 'close'}
req = urllib2.Request(row['url'], None, hdr)
cookie = cookielib.CookieJar() # CookieJar object to store cookie
handler = urllib2.HTTPCookieProcessor(cookie) # create cookie processor
opener = urllib2.build_opener(handler) # a general opener
page = opener.open(req)
pagedata = BeautifulSoup(page,"html.parser")
Or :
req = urllib2.Request(row['url'],None,headers=hdr)
cj = cookielib.CookieJar()
opener = urllib2.build_opener(urllib2.HTTPCookieProcessor(cj))
page = opener.open(req)
pagedata = BeautifulSoup(page,"html.parser")
And many ...
I have a Django 3.1.7 API.
Until now I was adding SameSite and Secure cookies in the responses through a custom middleware before Django 3.1, depending on the user agent, with automated tests.
Now that Django 3.1 can add those cookie keys itself, I removed the custom middleware and still want to test the presence of SameSite and Secure cookies in the responses.
So I added the following constants in settings.py, as Django doc says:
CSRF_COOKIE_SECURE = True
SESSION_COOKIE_SECURE = True
CSRF_COOKIE_SAMESITE = 'None'
SESSION_COOKIE_SAMESITE = 'None'
But when I look at the content of the responses in my tests, I don't get any SameSite neither Secure cookie keys anymore. I printed the content of the cookies, and it's not there.
Why?
Here are my tests:
agent_string = "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/80.0.2227.0 Safari/537.36"
from django.test import Client
test_client = Client()
res = test_client.get("/", HTTP_USER_AGENT=agent_string)
print(res.cookies.items())
I also tried with the DRF test client just in case, with same result:
agent_string = "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/80.0.2227.0 Safari/537.36"
from rest_framework.test import APIClient
test_client = APIClient()
res = test_client.get("/", HTTP_USER_AGENT=agent_string)
print(res.cookies.items())
BS4 error 'NoneType' object has no attribute 'find_all'. Cannot parse html data.
import requests
from bs4 import BeautifulSoup as bs
session = requests.session()
def get_sizes_in_stock():
global session
endpoint = 'https://www.jimmyjazz.com/mens/footwear/nike-air-max-270/AH8050-100?color=White'
response = session.get(endpoint)
soup = bs(response.text,'html.parser')
div = soup.find('div',{'class':'box_wrapper'})
all_sizes = div.find_all('a')
sizes_in_stock = []
for size in all_sizes:
if 'piunavailable' not in size['class']:
size_id = size['id']
sizes_in_stock.append(size_id.split('_')[1])
return sizes_in_stock
print (get_sizes_in_stock())
enter image description here
try adding in the headers parameter:
change:
response = session.get(endpoint)
to:
response = session.get(endpoint, headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/76.0.3809.100 Safari/537.36'})
import requests
from bs4 import BeautifulSoup as bs
session = requests.session()
def get_sizes_in_stock():
global session
endpoint = "https://www.sneakers76.com/en/nike/5111-nike-af1-type-ci0054-001-.html"
response = session.get(endpoint, headers={'User-Agent': 'Mozilla/5.0 (Linux; Android 6.0; Nexus 5 Build/MRA58N) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/76.0.3809.100 Mobile Safari/537.36'})
soup = bs(response.text,"html.parser")
var = soup.find("var",{"blockwishlist_viewwishlist":"View your wishlist"})
all_sizes = var.find_all("var combinations")
sizes_in_stock = []
for size in all_sizes:
if "0" not in size["quantity"]:
size_id = size["attributes"]
sizes_in_stock.append(size_id)
return sizes_in_stock
print (get_sizes_in_stock())
I'm trying to get python to make a post request to "global.sitesafety.trendmicro.com". I've tried getting the cookie and adding it to the header's. I'm not getting the results I should be.
import requests
from bs4 import BeautifulSoup
from requests.packages.urllib3.exceptions import InsecureRequestWarning
requests.packages.urllib3.disable_warnings()
session = requests.Session()
headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/65.0.3325.162 Safari/537.36',
"Host": "global.sitesafety.trendmicro.com"
}
response = session.get('https://global.sitesafety.trendmicro.com/',verify=False, headers=headers)
cookies = session.cookies.get_dict()
domain = "http://hsdfsdfam.com"
url = 'https://global.sitesafety.trendmicro.com/result.php'
payload = {'urlname': domain, 'getinfo': 'Check+Now'}
headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/65.0.3325.162 Safari/537.36',
"Referer": "https://global.sitesafety.trendmicro.com/result.php",
"Origin": "https://global.sitesafety.trendmicro.com",
"Host": "global.sitesafety.trendmicro.com"
}
result = session.post(url, params=payload, headers=headers, cookies=cookies,verify=False)
if result.status_code == 200:
soup = BeautifulSoup(result.content, "lxml")
matching_divs = soup.find_all('div', class_='labeltitleresult')
for div in matching_divs:
print(div.content)
else:
print('failed to get the page somehow, see: {}'.format(results.status_code))
I have tried with headers, cookies, Formdata and body too, but i got 401 and 500 status code. In this site First Page is in GET method & gives HTML response and further pages are in POST method & gives JSON response. But these status codes arrives for unauthorisation but i have searched and i couldn't find any CSRF token or auth token in web page headers.
import scrapy
from SouthShore.items import Product
from scrapy.http import Request, FormRequest
class OjcommerceDSpider(scrapy.Spider):
handle_httpstatus_list = [401,500]
name = "ojcommerce_d"
allowed_domains = ["ojcommerce.com"]
#start_urls = ['http://www.ojcommerce.com/search?k=south%20shore%20furniture']
def start_requests(self):
return [FormRequest('http://www.ojcommerce.com/ajax/search.aspx/FetchDataforPaging',
method ="POST",
body = '''{"searchTitle" : "south shore furniture","pageIndex" : '2',"sortBy":"1"}''',
headers={'Content-Type': 'application/json; charset=UTF-8', 'Accept' : 'application/json, text/javascript, */*; q=0.01',
'Cookie' :'''vid=eAZZP6XwbmybjpTWQCLS+g==;
_ga=GA1.2.1154881264.1480509732;
ASP.NET_SessionId=rkklowbpaxzpp50btpira1yp'''},callback=self.parse)]
def parse(self,response):
with open("ojcommerce.json","wb") as f:
f.write(response.body)
I got it working with the following code:
import json
from scrapy import Request, Spider
class OjcommerceDSpider(Spider):
name = "ojcommerce"
allowed_domains = ["ojcommerce.com"]
custom_settings = {
'LOG_LEVEL': 'DEBUG',
'COOKIES_DEBUG': True,
'DEFAULT_REQUEST_HEADERS': {
'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/52.0.2743.82 Safari/537.36',
},
}
def start_requests(self):
yield Request(
url='http://www.ojcommerce.com/search?k=furniture',
callback=self.parse_search_page,
)
def parse_search_page(self, response):
yield Request(
url='http://www.ojcommerce.com/ajax/search.aspx/FetchDataforPaging',
method='POST',
body=json.dumps({'searchTitle': 'furniture', 'pageIndex': '2', 'sortBy': '1'}),
callback=self.parse_json_page,
headers={
'Content-Type': 'application/json; charset=UTF-8',
'Accept': 'application/json, text/javascript, */*; q=0.01',
'X-Requested-With': 'XMLHttpRequest',
},
)
def parse_json_page(self,response):
data = json.loads(response.body)
with open('ojcommerce.json', 'wb') as f:
json.dump(data, f, indent=4)
Two observations:
a previous request to another site page is needed to get a "fresh" ASP.NET_SessionId cookie
I couldn't make it work using FormRequest, use Request instead.