I'm scraping some data of the web using Python, BeautifulSoup and Selenium. I am also using PyVirtualDisplay so that I do not need a display.
It works perfectly from my laptop but when I run if from a server I'm getting the following error:
httplib.BadStatusLine: ''
I got this the second time it scraped a page. It now does it all the time. What is the issue?
EDIT
Code Added:
import requests, bs4
import csv
import re
import datetime
import time
import os
from contextlib import closing
from selenium import webdriver
from selenium.webdriver import Firefox # pip install selenium
from selenium.webdriver.firefox.firefox_binary import FirefoxBinary
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.common.by import By
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.common.action_chains import ActionChains
from selenium.webdriver.common.keys import Keys
from pyvirtualdisplay import Display
display = Display(visible=0, size=(1500, 1200))
display.start()
url_base = "https://www.seek.com.au/jobs?page="
# open web browser and login
binary = FirefoxBinary('/home/firefox/firefox/firefox')
driver = webdriver.Firefox(firefox_binary=binary)
overlap = False
page = 0
while not overlap:
page += 1
driver.get(url_base+str(page))
...
And here is the traceback:
Traceback (most recent call last):
File "manage.py", line 22, in <module>
execute_from_command_line(sys.argv)
File "/var/www/matt/env/local/lib/python2.7/site-packages/django/core/management/__init__.py", line 367, in execute_from_command_line
utility.execute()
File "/var/www/matt/env/local/lib/python2.7/site-packages/django/core/management/__init__.py", line 359, in execute
self.fetch_command(subcommand).run_from_argv(self.argv)
File "/var/www/matt/env/local/lib/python2.7/site-packages/django/core/management/base.py", line 294, in run_from_argv
self.execute(*args, **cmd_options)
File "/var/www/matt/env/local/lib/python2.7/site-packages/django/core/management/base.py", line 345, in execute
output = self.handle(*args, **options)
File "/var/www/matt/matt/management/commands/mattv3.py", line 109, in handle
driver.get(url_base+str(page))
File "/var/www/matt/env/local/lib/python2.7/site-packages/selenium/webdriver/remote/webdriver.py", line 245, in get
self.execute(Command.GET, {'url': url})
File "/var/www/matt/env/local/lib/python2.7/site-packages/selenium/webdriver/remote/webdriver.py", line 231, in execute
response = self.command_executor.execute(driver_command, params)
File "/var/www/matt/env/local/lib/python2.7/site-packages/selenium/webdriver/remote/remote_connection.py", line 395, in execute
return self._request(command_info[0], url, body=data)
File "/var/www/matt/env/local/lib/python2.7/site-packages/selenium/webdriver/remote/remote_connection.py", line 426, in _request
resp = self._conn.getresponse()
File "/usr/lib/python2.7/httplib.py", line 1136, in getresponse
response.begin()
File "/usr/lib/python2.7/httplib.py", line 453, in begin
version, status, reason = self._read_status()
File "/usr/lib/python2.7/httplib.py", line 417, in _read_status
raise BadStatusLine(line)
httplib.BadStatusLine: ''
I was running this on a very small server (512MB, 20GB SSD). I've increased it and it is running fine. If someone could explain the issue to me I would love to understand.
Related
I'm running django 2.0.7 on a verual envirment python3 and when i run **django-admin startproject trydjango1 . ** the following error occers can you please help me to solve this problem
Traceback (most recent call last):
File "C:\Users\TG\AppData\Local\Programs\Python\Python310\lib\runpy.py", line 196, in run_module_as_main
return run_code(code, main_globals, None,
File "C:\Users\TG\AppData\Local\Programs\Python\Python310\lib\runpy.py", line 86, in run_code
exec(code, run_globals)
File "D:\django-projects\Test1\Scripts\django-admin.exe_main.py", line 7, in
File "D:\django-projects\Test1\lib\site-packages\django\core\management_init.py", line 371, in execute_from_command_line
utility.execute()
File "D:\django-projects\Test1\lib\site-packages\django\core\management_init.py", line 365, in execute
self.fetch_command(subcommand).run_from_argv(self.argv)
File "D:\django-projects\Test1\lib\site-packages\django\core\management\base.py", line 288, in run_from_argv
self.execute(*args, **cmd_options)
File "D:\django-projects\Test1\lib\site-packages\django\core\management\base.py", line 335, in execute
output = self.handle(*args, **options)
File "D:\django-projects\Test1\lib\site-packages\django\core\management\commands\startproject.py", line 20, in handle
super().handle('project', project_name, target, **options)
File "D:\django-projects\Test1\lib\site-packages\django\core\management\templates.py", line 117, in handle
django.setup()
File "D:\django-projects\Test1\lib\site-packages\django_init_.py", line 16, in setup
from django.urls import set_script_prefix
File "D:\django-projects\Test1\lib\site-packages\django\urls_init_.py", line 1, in
from .base import (
File "D:\django-projects\Test1\lib\site-packages\django\urls\base.py", line 8, in
from .exceptions import NoReverseMatch, Resolver404
File "D:\django-projects\Test1\lib\site-packages\django\urls\exceptions.py", line 1, in
from django.http import Http404
File "D:\django-projects\Test1\lib\site-packages\django\http_init_.py", line 5, in
from django.http.response import (
File "D:\django-projects\Test1\lib\site-packages\django\http\response.py", line 13, in
from django.core.serializers.json import DjangoJSONEncoder
File "D:\django-projects\Test1\lib\site-packages\django\core\serializers_init_.py", line 23, in
from django.core.serializers.base import SerializerDoesNotExist
File "D:\django-projects\Test1\lib\site-packages\django\core\serializers\base.py", line 6, in
from django.db import models
File "D:\django-projects\Test1\lib\site-packages\django\db\models_init_.py", line 3, in
from django.db.models.aggregates import * # NOQA
File "D:\django-projects\Test1\lib\site-packages\django\db\models\aggregates.py", line 5, in
from django.db.models.expressions import Case, Func, Star, When
File "D:\django-projects\Test1\lib\site-packages\django\db\models\expressions.py", line 486, in
class TemporalSubtraction(CombinedExpression):
File "D:\django-projects\Test1\lib\site-packages\django\db\models\expressions.py", line 487, in TemporalSubtraction
output_field = fields.DurationField()
File "D:\django-projects\Test1\lib\site-packages\django\db\models\fields_init_.py", line 155, in init
if isinstance(choices, collections.Iterator):
AttributeError: module 'collections' has no attribute 'Iterator'
You should try different python or/and django version, because both are not compatible with each other.
Try upgrading Django to 2.2.28 version, or run project under python 3.6 (or even lower ver).
I think you should install a newer version of django. It could be that some packages in the version of django you are using are not supported by python 3. I suggest you upgrade to django 3+
I am trying to start a django project using the django-admin startproject but it keeps bringing this error:
Traceback (most recent call last):
File "C:\Users\GAbraham.AECIGROUP\AppData\Local\Programs\Python\Python310\lib\runpy.py", line 196, in _run_module_as_main
return _run_code(code, main_globals, None,
File "C:\Users\GAbraham.AECIGROUP\AppData\Local\Programs\Python\Python310\lib\runpy.py", line 86, in _run_code
exec(code, run_globals)
File "C:\Users\GAbraham.AECIGROUP\AppData\Local\Programs\Python\Python310\Scripts\django-admin.exe\__main__.py", line 7, in <module>
File "C:\Users\GAbraham.AECIGROUP\AppData\Local\Programs\Python\Python310\lib\site-packages\django\core\management\__init__.py", line 371, in execute_from_command_line
utility.execute()
File "C:\Users\GAbraham.AECIGROUP\AppData\Local\Programs\Python\Python310\lib\site-packages\django\core\management\__init__.py", line 365, in execute
self.fetch_command(subcommand).run_from_argv(self.argv)
File "C:\Users\GAbraham.AECIGROUP\AppData\Local\Programs\Python\Python310\lib\site-packages\django\core\management\base.py", line 288, in run_from_argv
self.execute(*args, **cmd_options)
File "C:\Users\GAbraham.AECIGROUP\AppData\Local\Programs\Python\Python310\lib\site-packages\django\core\management\base.py", line 335, in execute
output = self.handle(*args, **options)
File "C:\Users\GAbraham.AECIGROUP\AppData\Local\Programs\Python\Python310\lib\site-packages\django\core\management\commands\startproject.py", line 20, in handle
super().handle('project', project_name, target, **options)
File "C:\Users\GAbraham.AECIGROUP\AppData\Local\Programs\Python\Python310\lib\site-packages\django\core\management\templates.py", line 117, in handle
django.setup()
File "C:\Users\GAbraham.AECIGROUP\AppData\Local\Programs\Python\Python310\lib\site-packages\django\__init__.py", line 16, in setup
from django.urls import set_script_prefix
File "C:\Users\GAbraham.AECIGROUP\AppData\Local\Programs\Python\Python310\lib\site-packages\django\urls\__init__.py", line 1, in <module>
from .base import (
File "C:\Users\GAbraham.AECIGROUP\AppData\Local\Programs\Python\Python310\lib\site-packages\django\urls\base.py", line 8, in <module>
from .exceptions import NoReverseMatch, Resolver404
File "C:\Users\GAbraham.AECIGROUP\AppData\Local\Programs\Python\Python310\lib\site-packages\django\urls\exceptions.py", line 1, in <module>
from django.http import Http404
File "C:\Users\GAbraham.AECIGROUP\AppData\Local\Programs\Python\Python310\lib\site-packages\django\http\__init__.py", line 5, in <module>
from django.http.response import (
File "C:\Users\GAbraham.AECIGROUP\AppData\Local\Programs\Python\Python310\lib\site-packages\django\http\response.py", line 13, in <module>
from django.core.serializers.json import DjangoJSONEncoder
File "C:\Users\GAbraham.AECIGROUP\AppData\Local\Programs\Python\Python310\lib\site-packages\django\core\serializers\__init__.py", line 23, in <module>
from django.core.serializers.base import SerializerDoesNotExist
File "C:\Users\GAbraham.AECIGROUP\AppData\Local\Programs\Python\Python310\lib\site-packages\django\core\serializers\base.py", line 6, in <module>
from django.db import models
File "C:\Users\GAbraham.AECIGROUP\AppData\Local\Programs\Python\Python310\lib\site-packages\django\db\models\__init__.py", line 3, in <module>
from django.db.models.aggregates import * # NOQA
File "C:\Users\GAbraham.AECIGROUP\AppData\Local\Programs\Python\Python310\lib\site-packages\django\db\models\aggregates.py", line 5, in <module>
from django.db.models.expressions import Case, Func, Star, When
File "C:\Users\GAbraham.AECIGROUP\AppData\Local\Programs\Python\Python310\lib\site-packages\django\db\models\expressions.py", line 486, in <module>
class TemporalSubtraction(CombinedExpression):
File "C:\Users\GAbraham.AECIGROUP\AppData\Local\Programs\Python\Python310\lib\site-packages\django\db\models\expressions.py", line 487, in TemporalSubtraction
output_field = fields.DurationField()
File "C:\Users\GAbraham.AECIGROUP\AppData\Local\Programs\Python\Python310\lib\site-packages\django\db\models\fields\__init__.py", line 155, in __init__
if isinstance(choices, collections.Iterator):
AttributeError: module 'collections' has no attribute 'Iterator'
How can i fix this? At first, I created a virtual environment where I installed django and created a project using the same command and I did not get the error, but when I tried to run the python manage.py runserver command, I got this error:
(venv) C:\Users\GAbraham.AECIGROUP\DEV\trydjango\venv>python manage.py runserver
C:\Users\GAbraham.AECIGROUP\AppData\Local\Programs\Python\Python310\python.exe: can't open file 'C:\\Users\\GAbraham.AECIGROUP\\DEV\\trydjango\\venv\\manage.py': [Errno 2] No such file or directory
You should be in the project's root folder in order to run the command python manage.py runserver
According to your error, you are somewhere in venv folder.
You should be in the trydjango folder (according to the screenshots) and then run the above command.
Hope this helps!
I was about to publish this as a bug on the Pub/Sub repo, but I thought I'd ask here first.
I'm just trying to set up an HTTP-triggered Cloud Function that will act as a Webhook and upon receiving a POST request will publish a message. It seems simple enough, so I coded it up and deployed it. However, it kept crashing. So I tried running it locally using functions-framework, and found the culprit to be from google.cloud import pubsub_v1 which is causing the error AttributeError: module 'grpc.experimental.aio' has no attribute 'Call'
Environment details
Running on a Cloud Function with Python 3.7
Code example
main.py
from flask import Flask, request, abort, Response
import hmac
import hashlib
import base64
import os
import requests
from google.cloud import pubsub_v1
app = Flask(__name__)
#app.route('/', methods=['POST'])
def connectToSheets(request):
# Do stuff
if __name__ == '__main__':
app.run()
requirements.txt
# Dependencies
flask==1.1.2
requests==2.24.0
google-cloud-pubsub==1.7.0
Stack trace
Traceback (most recent call last):
File "c:\users\user\appdata\local\programs\python\python37\lib\runpy.py", line 193, in _run_module_as_main
"__main__", mod_spec)
File "c:\users\user\appdata\local\programs\python\python37\lib\runpy.py", line 85, in _run_code
exec(code, run_globals)
File "C:\Users\user\AppData\Local\Programs\Python\Python37\Scripts\functions-framework.exe\__main__.py", line 7, in <module>
File "c:\users\user\appdata\local\programs\python\python37\lib\site-packages\click\core.py", line 764, in __call__
return self.main(*args, **kwargs)
File "c:\users\user\appdata\local\programs\python\python37\lib\site-packages\click\core.py", line 717, in main
rv = self.invoke(ctx)
File "c:\users\user\appdata\local\programs\python\python37\lib\site-packages\click\core.py", line 956, in invoke
return ctx.invoke(self.callback, **ctx.params)
File "c:\users\user\appdata\local\programs\python\python37\lib\site-packages\click\core.py", line 555, in invoke
return callback(*args, **kwargs)
File "c:\users\user\appdata\local\programs\python\python37\lib\site-packages\functions_framework\_cli.py", line 37, in _cli
app = create_app(target, source, signature_type)
File "c:\users\user\appdata\local\programs\python\python37\lib\site-packages\functions_framework\__init__.py", line 229, in create_app
spec.loader.exec_module(source_module)
File "<frozen importlib._bootstrap_external>", line 728, in exec_module
File "<frozen importlib._bootstrap>", line 219, in _call_with_frames_removed
File "C:\Users\user\Desktop\Cookiestruct - Summer 2020 Internship\Automation\Artist\cloudpy\connectToShopify\main.py", line 8, in <module>
from google.cloud import pubsub_v1
File "c:\users\user\appdata\local\programs\python\python37\lib\site-packages\google\cloud\pubsub_v1\__init__.py", line 18, in <module>
from google.cloud.pubsub_v1 import publisher
File "c:\users\user\appdata\local\programs\python\python37\lib\site-packages\google\cloud\pubsub_v1\publisher\__init__.py", line 17, in <module>
from google.cloud.pubsub_v1.publisher.client import Client
File "c:\users\user\appdata\local\programs\python\python37\lib\site-packages\google\cloud\pubsub_v1\publisher\client.py", line 32, in <module>
from google.cloud.pubsub_v1.gapic import publisher_client
File "c:\users\user\appdata\local\programs\python\python37\lib\site-packages\google\cloud\pubsub_v1\gapic\publisher_client.py", line 28, in <module>
import google.api_core.gapic_v1.client_info
File "c:\users\user\appdata\local\programs\python\python37\lib\site-packages\google\api_core\gapic_v1\__init__.py", line 26, in <module>
from google.api_core.gapic_v1 import method_async # noqa: F401
File "c:\users\user\appdata\local\programs\python\python37\lib\site-packages\google\api_core\gapic_v1\method_async.py", line 20, in <module>
from google.api_core import general_helpers, grpc_helpers_async
File "c:\users\user\appdata\local\programs\python\python37\lib\site-packages\google\api_core\grpc_helpers_async.py", line 38, in <module>
class _WrappedCall(aio.Call):
AttributeError: module 'grpc.experimental.aio' has no attribute 'Call'
Any help is appreciated thanks!
# test.py
# python 3.4.5
import time
from concurrent.futures import ThreadPoolExecutor
def a():
time.sleep(1)
print("success")
executor = ThreadPoolExecutor(1)
executor.submit(a).result()
The above snippet works when run like
$ python test.py
success
But fails when run like
$ python manage.py shell < test.py
Traceback (most recent call last):
File "manage.py", line 22, in <module>
execute_from_command_line(sys.argv)
File "/var/www/cgi-bin/tracking/lib64/python3.4/site-packages/django/core/management/__init__.py", line 363, in execute_from_command_line
utility.execute()
File "/var/www/cgi-bin/tracking/lib64/python3.4/site-packages/django/core/management/__init__.py", line 355, in execute
self.fetch_command(subcommand).run_from_argv(self.argv)
File "/var/www/cgi-bin/tracking/lib64/python3.4/site-packages/django/core/management/base.py", line 283, in run_from_argv
self.execute(*args, **cmd_options)
File "/var/www/cgi-bin/tracking/lib64/python3.4/site-packages/django/core/management/base.py", line 330, in execute
output = self.handle(*args, **options)
File "/var/www/cgi-bin/tracking/lib64/python3.4/site-packages/django/core/management/commands/shell.py", line 101, in handle
exec(sys.stdin.read())
File "<string>", line 11, in <module>
File "/usr/lib64/python3.4/concurrent/futures/_base.py", line 395, in result
return self.__get_result()
File "/usr/lib64/python3.4/concurrent/futures/_base.py", line 354, in __get_result
raise self._exception
File "/usr/lib64/python3.4/concurrent/futures/thread.py", line 54, in run
result = self.fn(*self.args, **self.kwargs)
File "<string>", line 7, in a
NameError: name 'time' is not defined
Which is really strange to me. What is it about running the script with the manage.py shell command that results in the time module being undefined in the function a?
Checking in the Django implementation (django/core/management/commands/shell.py line 83):
# Execute stdin if it has anything to read and exit.
# Not supported on Windows due to select.select() limitations.
if sys.platform != 'win32' and select.select([sys.stdin], [], [], 0)[0]:
exec(sys.stdin.read())
return
The developers did not add a globals() scope in the exec() method, that means you are importing time and ThreadPoolExecutor in the 'locals()' dictionary of the handle() scope (in shell.py) but after, when you try to use inside a() it tries to search in the locals() dictionary of the "a" scope and in the globals() dictionary so it throws an import error, you can see an example in this snippet:
command = """
import time
def b():
time.sleep(1)
b()
"""
def a():
exec(command)
a()
and try to change exec(command) by exec(command, globals())
I think it's not working because you did not set the environment variable DJANGO_SETTING_MODULE to your settings, and call django.setup() or set the path to sys.path.append('path/')
(NOT SURE)
But these 2 options can work like a charm:
Either you import the module time inside the function:
from concurrent.futures import ThreadPoolExecutor
def a():
import time
time.sleep(1)
print("success")
executor = ThreadPoolExecutor(1)
executor.submit(a).result()
or just import time at the beginning like you did, and use the module as a global one:
from concurrent.futures import ThreadPoolExecutor
import time
def a():
global time
time.sleep(1)
print("success")
executor = ThreadPoolExecutor(1)
executor.submit(a).result()
I'm having a bit of trouble using str.format(*args, **kwargs):
import datetime
from django.conf import settings
from django.core.management.base import BaseCommand
from django.core.mail import send_mass_mail
from django.contrib.auth.models import User
from django.db.models import Count
class Command(BaseCommand):
def add_arguments(self, parser):
parser.add_argument('--days', dest='days', type=int)
def handle(self, *args, **options):
emails = []
subject = 'Enroll in a course'
date_joined = datetime.date.today() - datetime.timedelta(days=options['days'])
users = User.objects.annotate(course_count=Count('courses_joined'))
.filter(course_count=0, date_joined__lte=date_joined)
for user in users:
message = 'Dear {},\n\n What are you waiting for?'.format(user.first_name)
emails.append((subject, message, settings.DEFAULT_FROM_EMAIL, [user.email]))
send_mass_mail(emails)
countEmails = len(emails)
if countEmails:
# self.stdout.write(countEmails)
messageConsole = 'Sent {} reminders.' % format(int(countEmails))
self.stdout.write(messageConsole)
Run command "python manage.py enroll_reminder --days=1",
then the errro comes:
Traceback (most recent call last):
File "manage.py", line 10, in <module>
execute_from_command_line(sys.argv)
File "/home/polar/.pyenv/versions/my_env/lib/python3.5/site-packages/django/core/management/__init__.py", line 354, in execute_from_command_line
utility.execute()
File "/home/polar/.pyenv/versions/my_env/lib/python3.5/site-packages/django/core/management/__init__.py", line 346, in execute
self.fetch_command(subcommand).run_from_argv(self.argv)
File "/home/polar/.pyenv/versions/my_env/lib/python3.5/site-packages/django/core/management/base.py", line 394, in run_from_argv
self.execute(*args, **cmd_options)
File "/home/polar/.pyenv/versions/my_env/lib/python3.5/site-packages/django/core/management/base.py", line 445, in execute
output = self.handle(*args, **options)
File "/home/polar/PythonLab/DjangoByExample/educa/students/management/commands/enroll_reminder.py", line 30, in handle
messageConsole = 'Sent {} reminders.\n' % format(int(countEmails))
TypeError: not all arguments converted during string formatting
when I uncomment "self.stdout.write(countEmails)", there comes another problem:
Traceback (most recent call last):
File "manage.py", line 10, in <module>
execute_from_command_line(sys.argv)
File "/home/polar/.pyenv/versions/my_env/lib/python3.5/site-packages/django/core/management/__init__.py", line 354, in execute_from_command_line
utility.execute()
File "/home/polar/.pyenv/versions/my_env/lib/python3.5/site-packages/django/core/management/__init__.py", line 346, in execute
self.fetch_command(subcommand).run_from_argv(self.argv)
File "/home/polar/.pyenv/versions/my_env/lib/python3.5/site-packages/django/core/management/base.py", line 394, in run_from_argv
self.execute(*args, **cmd_options)
File "/home/polar/.pyenv/versions/my_env/lib/python3.5/site-packages/django/core/management/base.py", line 445, in execute
output = self.handle(*args, **options)
File "/home/polar/PythonLab/DjangoByExample/educa/students/management/commands/enroll_reminder.py", line 29, in handle
self.stdout.write(countEmails)
File "/home/polar/.pyenv/versions/my_env/lib/python3.5/site-packages/django/core/management/base.py", line 111, in write
if ending and not msg.endswith(ending):
AttributeError: 'int' object has no attribute 'endswith'
Can anyone help show me what I am doing wrong?
Thank you!
You're confused between the old C-style string substitution and the new format method. Either do:
'Sent {} reminders.'.format(int(countEmails))
or
'Sent %s reminders.' % (int(countEmails),)
You are confusing % and .format() formatting
You could do either:
'Sent {} reminders'.format(countEmails)
or
'Sent %d reminders.' % countEmails
Note that countEmails is already an integer, there is no need to call int(countEmails).