Python SSL web page scraping - python-2.7

I am trying to scrape web page using Python 2.7 and BeautifulSoup but I can't get past a protocol error which doesn't make much sense to me. This only happens on the specific website that I need to do this for: https://edd.telstra.com/telstra
The code I use just for basic test:
#! /usr/bin/python
from urllib import urlopen
from BeautifulSoup import BeautifulSoup
import re
# Copy all of the content from the provided web page
webpage = urlopen("https://edd.telstra.com/telstra/").read()
And I get the following error (running on Ubuntu 12.10):
Traceback (most recent call last):
File "e.py", line 8, in <module>
webpage = urlopen("https://edd.telstra.com/telstra/").read()
File "/usr/lib/python2.7/urllib.py", line 86, in urlopen
return opener.open(url)
File "/usr/lib/python2.7/urllib.py", line 207, in open
return getattr(self, name)(url)
File "/usr/lib/python2.7/urllib.py", line 436, in open_https
h.endheaders(data)
File "/usr/lib/python2.7/httplib.py", line 958, in endheaders
self._send_output(message_body)
File "/usr/lib/python2.7/httplib.py", line 818, in _send_output
self.send(msg)
File "/usr/lib/python2.7/httplib.py", line 780, in send
self.connect()
File "/usr/lib/python2.7/httplib.py", line 1165, in connect
self.sock = ssl.wrap_socket(sock, self.key_file, self.cert_file)
File "/usr/lib/python2.7/ssl.py", line 381, in wrap_socket
ciphers=ciphers)
File "/usr/lib/python2.7/ssl.py", line 143, in __init__
self.do_handshake()
File "/usr/lib/python2.7/ssl.py", line 305, in do_handshake
self._sslobj.do_handshake()
IOError: [Errno socket error] [Errno 1] _ssl.c:504: error:1408F119:SSL routines:SSL3_GET_RECORD:decryption failed or bad record mac
Could someone tell me if there is some parameter that I need to specify to get this page to download in Python? It seems that this is the problem just on this web page as the code above (plus lots of other code I tried) works fine on other HTTPS/SSL pages I tried.
Thanks for any help!

I can recommend using requests lib :
def get_page(login, password):
'''Docstring
'''
url = 'https://qwe.qwe'
payload = {
'user': login,
'pass': password
}
with requests.Session() as my_session:
my_session.post(url, data=payload)
data = my_session.get(url)
return data.text
More info : http://docs.python-requests.org/en/latest/user/advanced/#session-objects

Related

Flask-Security Login Functional testing

I'm trying to do some functional testing on Flask view functions.
Currently I'm using login, logout from Flask Security module and when I try to follow the login and logout guide from flask's documentation(http://flask.pocoo.org/docs/0.12/testing/#logging-in-and-out), the 'post' of login seems to not working. I've been getting this same error when I try to post using requests module too.
My Flask-Security's login endpoint is /login_test/
Below are piece of my unit test code.
class TestUser(unittest.TestCase):
#run before each test
def setUp(self):
self.client = app.test_client()
db.create_all()
def tearDown(self):
#db.session.remove()
#DropEverything().drop_db()
pass
def login(self, email, password):
return self.client.post('/login_test/', data=dict(
email=email,
password=password
), follow_redirects=False)
def logout(self):
return self.client.get('/logout', follow_redirects=True)
def test_login_logout(self):
response = self.client.post('/login_test', data=dict(
email='admin',
password='admin'
), follow_redirects=False)
self.assertIn(b'You logged in', response.data)
The error message that I got after hitting test_login_logout is like below. The below is when I hit the url with '/login_test'
Ran 1 test in 0.187s
FAILED (failures=1)
Failure
Traceback (most recent call last):
File "/Library/Frameworks/Python.framework/Versions/3.5/lib/python3.5/unittest/case.py", line 58, in testPartExecutor
yield
File "/Library/Frameworks/Python.framework/Versions/3.5/lib/python3.5/unittest/case.py", line 600, in run
testMethod()
File "/Users/genom003dm/PycharmProjects/sample_accessioning_dev/app/tests/user_management_testing.py", line 38, in test_login_logout
), follow_redirects=False)
File "/Users/genom003dm/sample_accessioning_dev_virtual_env/lib/python3.5/site-packages/werkzeug/test.py", line 801, in post
return self.open(*args, **kw)
File "/Users/genom003dm/sample_accessioning_dev_virtual_env/lib/python3.5/site-packages/flask/testing.py", line 127, in open
follow_redirects=follow_redirects)
File "/Users/genom003dm/sample_accessioning_dev_virtual_env/lib/python3.5/site-packages/werkzeug/test.py", line 764, in open
response = self.run_wsgi_app(environ, buffered=buffered)
File "/Users/genom003dm/sample_accessioning_dev_virtual_env/lib/python3.5/site-packages/werkzeug/test.py", line 677, in run_wsgi_app
rv = run_wsgi_app(self.application, environ, buffered=buffered)
File "/Users/genom003dm/sample_accessioning_dev_virtual_env/lib/python3.5/site-packages/werkzeug/test.py", line 884, in run_wsgi_app
app_rv = app(environ, start_response)
File "/Users/genom003dm/sample_accessioning_dev_virtual_env/lib/python3.5/site-packages/flask/app.py", line 1997, in __call__
return self.wsgi_app(environ, start_response)
File "/Users/genom003dm/sample_accessioning_dev_virtual_env/lib/python3.5/site-packages/flask/app.py", line 1985, in wsgi_app
response = self.handle_exception(e)
File "/Users/genom003dm/sample_accessioning_dev_virtual_env/lib/python3.5/site-packages/flask/app.py", line 1540, in handle_exception
reraise(exc_type, exc_value, tb)
File "/Users/genom003dm/sample_accessioning_dev_virtual_env/lib/python3.5/site-packages/flask/_compat.py", line 33, in reraise
raise value
File "/Users/genom003dm/sample_accessioning_dev_virtual_env/lib/python3.5/site-packages/flask/app.py", line 1982, in wsgi_app
response = self.full_dispatch_request()
File "/Users/genom003dm/sample_accessioning_dev_virtual_env/lib/python3.5/site-packages/flask/app.py", line 1614, in full_dispatch_request
rv = self.handle_user_exception(e)
File "/Users/genom003dm/sample_accessioning_dev_virtual_env/lib/python3.5/site-packages/flask/app.py", line 1517, in handle_user_exception
reraise(exc_type, exc_value, tb)
File "/Users/genom003dm/sample_accessioning_dev_virtual_env/lib/python3.5/site-packages/flask/_compat.py", line 33, in reraise
raise value
File "/Users/genom003dm/sample_accessioning_dev_virtual_env/lib/python3.5/site-packages/flask/app.py", line 1612, in full_dispatch_request
rv = self.dispatch_request()
File "/Users/genom003dm/sample_accessioning_dev_virtual_env/lib/python3.5/site-packages/flask/app.py", line 1590, in dispatch_request
self.raise_routing_exception(req)
File "/Users/genom003dm/sample_accessioning_dev_virtual_env/lib/python3.5/site-packages/flask/app.py", line 1576, in raise_routing_exception
raise FormDataRoutingRedirect(request)
flask.debughelpers.FormDataRoutingRedirect: b'A request was sent to this URL (http://localhost/login_test) but a redirect was issued automatically by the routing system to "http://localhost/login_test/". The URL was defined with a trailing slash so Flask will automatically redirect to the URL with the trailing slash if it was accessed without one. Make sure to directly send your POST-request to this URL since we can\'t make browsers or HTTP clients redirect with form data reliably or without user interaction.\n\nNote: this exception is only raised in debug mode'
If I change the URL to /login_test/ then I get HTTP 400 errors. I'm assuming that this is happening due to the fact that I'm missing form object for login? (but in this case I don't have form object because I'm trying just trying to login with post api).
I want to know is there a way to login using flask-security's /login_test/ url.
Thanks
Ok, I found an answer. The reason why I was only seeing HTTP 400 errors instead of the specifics of HTTP 400 errors are because I put the error handling on Flask app and it just showed me 400 rather than what the actual error was. Once I removed the HTTP 400 error handling, it was saying that the CSRF token was missing. So what I did was to WTF_CSRF_ENABLED = False in app config file.

Permission denied for access to folder on Python3.6 with Flask

EDIT: Ignore this question entirely. It was a filesystem problem.
Whenever I try to open the posts folder, I get an error:
Traceback (most recent call last):
File "C:\Python36-32\lib\site-packages\flask\app.py", line 1997, in __call__
return self.wsgi_app(environ, start_response)
File "C:\Python36-32\lib\site-packages\flask\app.py", line 1985, in wsgi_app
response = self.handle_exception(e)
File "C:\Python36-32\lib\site-packages\flask\app.py", line 1540, in handle_exception
reraise(exc_type, exc_value, tb)
File "C:\Python36-32\lib\site-packages\flask\_compat.py", line 33, in reraise
raise value
File "C:\Python36-32\lib\site-packages\flask\app.py", line 1982, in wsgi_app
response = self.full_dispatch_request()
File "C:\Python36-32\lib\site-packages\flask\app.py", line 1614, in full_dispatch_request
rv = self.handle_user_exception(e)
File "C:\Python36-32\lib\site-packages\flask\app.py", line 1517, in handle_user_exception
reraise(exc_type, exc_value, tb)
File "C:\Python36-32\lib\site-packages\flask\_compat.py", line 33, in reraise
raise value
File "C:\Python36-32\lib\site-packages\flask\app.py", line 1612, in full_dispatch_request
rv = self.dispatch_request()
File "C:\Python36-32\lib\site-packages\flask\app.py", line 1598, in dispatch_request
return self.view_functions[rule.endpoint](**req.view_args)
File "C:\Users\vikas\Projects\blg\main.py", line 19, in index
return render_template("index.html", newest_post=find_recent_post())
File "C:\Users\vikas\Projects\blg\main.py", line 12, in find_recent_post
with open(str(find_recent_post_name())) as post:
PermissionError: [Errno 13] Permission denied: 'posts/'
My main.py file:
from flask import Flask
from flask import render_template
app = Flask(__name__)
def find_recent_post_name():
import glob
posts = glob.glob("posts/")
return posts[len(posts) - 1]
def find_recent_post():
with open(str(find_recent_post_name())) as post:
newest_post = post.readlines()
return newest_post
#app.route("/")
def index():
return render_template("index.html", newest_post=find_recent_post())
if __name__ == '__main__':
app.run(debug=True)
My Python version is v3.6.1 and Flask v1.12.0
EDIT: Debugging code and using PowerShell seems to work.
It is not a problem with flask. you can either change the permissions on the folder /posts/ or you can run flask as a user who has read access.
$ chmod 755 <folder names>
to give access to the user who run's flask app, or you could use:
$ sudo python main.py
to run as admin or run flask as a user who already has access. and note that you should add the same settings while on deployment. so workers for uwsgi or gunicron or however you run your app on deployment should run with same credentials.
note that your user should have both read or 4 access to read the file content's, and have execute or 1 access to be able to cd into it.

How to connect to HTTPS through proxy using urllib2 (in Python)

If the website I'm trying to connect to via a proxy is unsecured (HTTP), then I'm able to connect, however if it's secured (HTTPS), then I can't.
The following code works:
import urllib2
proxy_support = urllib2.ProxyHandler({'http':'xxx.xxx.xxx.xx'})
opener = urllib2.build_opener(proxy_support)
urllib2.install_opener(opener)
html = urllib2.urlopen('http://www.example.com').read()
However the code below does not work,
proxy_support = urllib2.ProxyHandler({'https':'xxx.xxx.xxx.xx'})
opener = urllib2.build_opener(proxy_support)
urllib2.install_opener(opener)
html = urllib2.urlopen('https://www.example.com').read()
Instead I get the following traceback:
Traceback (most recent call last):
File "<stdin>", line 1, in <module>
File "/System/Library/Frameworks/Python.framework/Versions/2.7/lib/python2.7/urllib2.py", line 154, in urlopen
return opener.open(url, data, timeout)
File "/System/Library/Frameworks/Python.framework/Versions/2.7/lib/python2.7/urllib2.py", line 431, in open
response = self._open(req, data)
File "/System/Library/Frameworks/Python.framework/Versions/2.7/lib/python2.7/urllib2.py", line 449, in _open
'_open', req)
File "/System/Library/Frameworks/Python.framework/Versions/2.7/lib/python2.7/urllib2.py", line 409, in _call_chain
result = func(*args)
File "/System/Library/Frameworks/Python.framework/Versions/2.7/lib/python2.7/urllib2.py", line 1240, in https_open
context=self._context)
File "/System/Library/Frameworks/Python.framework/Versions/2.7/lib/python2.7/urllib2.py", line 1197, in do_open
raise URLError(err)
urllib2.URLError: <urlopen error [Errno 61] Connection refused>
According to https://docs.python.org/2/library/urllib2.html:
Changed in version 2.7.9: cafile, capath, cadefault, and context were added.
This one allowed me to connect to my local HTTPS site that is using a self-signed SSL certificate:
html = urllib2.urlopen('http://www.example.com'),\
context=ssl._https_verify_certificates(False)
I noticed in your traceback the similarities with mine. The code, just like you posted, works on Ubuntu 14.04 (Python 2.7.6) but not in 16.04 (Python 2.7.13) with exception to the last one:
File "/usr/lib/python2.7/urllib2.py", line 154, in urlopen
return opener.open(url, data, timeout)
File "/usr/lib/python2.7/urllib2.py", line 429, in open
response = self._open(req, data)
File "/usr/lib/python2.7/urllib2.py", line 447, in _open
'_open', req)
File "/usr/lib/python2.7/urllib2.py", line 407, in _call_chain
result = func(*args)
File "/usr/lib/python2.7/urllib2.py", line 1241, in https_open
context=self._context)
File "/usr/lib/python2.7/urllib2.py", line 1198, in do_open
raise URLError(err)
urllib2.URLError: <urlopen error [SSL: CERTIFICATE_VERIFY_FAILED] certificate verify failed (_ssl.c:590)>
I'm not sure if this work on your end.

Selenium error with Firefox: does firefox need to be in applications folder?

I'd like to use selenium to download a series of outputs from a web form
>>> from selenium import webdriver
>>> driver = webdriver.Firefox()
the above code gives the following error -
File "<stdin>", line 1, in <module>
File "/opt/local/Library/Frameworks/Python.framework/Versions/2.7/lib/python2.7/site-packages/selenium/webdriver/firefox/webdriver.py", line 78, in __init__
self.binary, timeout)
File "/opt/local/Library/Frameworks/Python.framework/Versions/2.7/lib/python2.7/site-packages/selenium/webdriver/firefox/extension_connection.py", line 51, in __init__
self.binary.launch_browser(self.profile, timeout=timeout)
File "/opt/local/Library/Frameworks/Python.framework/Versions/2.7/lib/python2.7/site-packages/selenium/webdriver/firefox/firefox_binary.py", line 67, in launch_browser
self._start_from_profile_path(self.profile.path)
File "/opt/local/Library/Frameworks/Python.framework/Versions/2.7/lib/python2.7/site-packages/selenium/webdriver/firefox/firefox_binary.py", line 90, in _start_from_profile_path
env=self._firefox_env)
File "/opt/local/Library/Frameworks/Python.framework/Versions/2.7/lib/python2.7/subprocess.py", line 710, in __init__
errread, errwrite)
File "/opt/local/Library/Frameworks/Python.framework/Versions/2.7/lib/python2.7/subprocess.py", line 1335, in _execute_child
raise child_exception
OSError: [Errno 2] No such file or directory
Firefox is currently installed on the desktop rather than in my Applications folder, could that be the cause of the problem? If so, is there a workaround that means I don't have to move it to Applications?
You can and probably in your case should specify the path to the firefox binary explicitly:
from selenium.webdriver.firefox.firefox_binary import FirefoxBinary
binary = FirefoxBinary('/path/to/binary')
driver = webdriver.Firefox(firefox_binary=binary)

sending "GET" request over proxy using httplib in python

i am trying to send a basic get request to "www.python.org" to fetch "www.python.org/index.html" using the httplib module in python.I use a proxy server "10.1.1.19:80".My code with the error message is shown.Please suggest me with the mistakes i'm commiting.Regards....
>>> import httplib
>>> conn=httplib.HTTPConnection("http://www.python.org",80)
>>> conn.set_tunnel("10.1.1.19:80")
>>> conn.request("GET","www.python.org/index.html",headers={"Proxy-Authorization":"Basic "+"MzEwNTMzOmdveWFs"})
Traceback (most recent call last):
File "<pyshell#3>", line 1, in <module>
conn.request("GET","www.python.org/index.html",headers={"Proxy-Authorization":"Basic "+"MzEwNTMzOmdveWFs"})
File "C:\Python27\lib\httplib.py", line 973, in request
self._send_request(method, url, body, headers)
File "C:\Python27\lib\httplib.py", line 1007, in _send_request
self.endheaders(body)
File "C:\Python27\lib\httplib.py", line 969, in endheaders
self._send_output(message_body)
File "C:\Python27\lib\httplib.py", line 829, in _send_output
self.send(msg)
File "C:\Python27\lib\httplib.py", line 791, in send
self.connect()
File "C:\Python27\lib\httplib.py", line 772, in connect
self.timeout, self.source_address)
File "C:\Python27\lib\socket.py", line 553, in create_connection
for res in getaddrinfo(host, port, 0, SOCK_STREAM):
gaierror: [Errno 11004] getaddrinfo failed`
Try this
import httplib
conn = httplib.HTTPConnection("10.1.1.19", 80)
conn.request("GET", "http://www.python.org/index.html", headers={...}))