PS C:\users\steve\tutorial> scrapy crawl dmoz
Traceback (most recent call last):
File "c:\python27\scripts\scrapy-script.py", line 9, in <module>
load_entry_point('scrapy==1.0.3', 'console_scripts', 'scrapy')()
File "C:\Python27\lib\site-packages\scrapy-1.0.3-py2.7.egg\scrapy\cmdline.py",
cmd.crawler_process = CrawlerProcess(settings)
File "C:\Python27\lib\site-packages\scrapy-1.0.3-py2.7.egg\scrapy\crawler.py",
super(CrawlerProcess, self).__init__(settings)
File "C:\Python27\lib\site-packages\scrapy-1.0.3-py2.7.egg\scrapy\crawler.py",
self.spider_loader = _get_spider_loader(settings)
File "C:\Python27\lib\site-packages\scrapy-1.0.3-py2.7.egg\scrapy\crawler.py",
return loader_cls.from_settings(settings.frozencopy())
File "C:\Python27\lib\site-packages\scrapy-1.0.3-py2.7.egg\scrapy\spiderloader.
return cls(settings)
File "C:\Python27\lib\site-packages\scrapy-1.0.3-py2.7.egg\scrapy\spiderloader.
for module in walk_modules(name):
File "C:\Python27\lib\site-packages\scrapy-1.0.3-py2.7.egg\scrapy\utils\misc.py
submod = import_module(fullpath)
File "C:\Python27\lib\importlib\__init__.py", line 37, in import_module
__import__(name)
File "C:\users\steve\tutorial\tutorial\spiders\dmoz.py", line 4, in <module>
class dmozspider(spiders):
TypeError: Error when calling the metaclass bases module.__init__() takes at most 2 arguments (3 given)
My dmoz spider python script is here
from scrapy import spiders
class dmozspider(spiders):
name = "dmoz"
allowed_domains = ["dmoz.org"]
start_urls = [
"http://www.dmoz.org/Computers/Programming/Languages/Python/Books/",
"http://www.dmoz.org/Computers/Programming/Languages/Python/Resources/"
]
def parse(self, response):
filename = response.url.split("/")[-2] + '.html'
with open(filename, 'wb') as f:
f.write(response.body)
The problem is you're importing "spiders", and using it as your base class. "spiders" is the package that contains the spiders, namely the Spider class. To use it, use:
from scrapy.spiders import Spider
class dmozspider(Spider):
... # Rest of your code
Related
I get the following error when I try to run a pyVows test:
Traceback (most recent call last):
File "C:\Program Files\Python38\lib\unittest\loader.py", line 436, in _find_test_path
module = self._get_module_from_name(name)
File "C:\Program Files\Python38\lib\unittest\loader.py", line 377, in _get_module_from_name
__import__(name)
File "D:\Programmierung\Python-Projekte\JourneyMap\JourneyMap\tests.py", line 2, in <module>
from django_pyvows.context import DjangoHTTPContext
File "C:\Users\malo0\AppData\Roaming\Python\Python38\site-packages\django_pyvows\__init__.py", line 11, in <module>
from context import DjangoContext, DjangoHTTPContext
ModuleNotFoundError: No module named 'context'
I installed pyVows and django-pyvows Am I missing something? I found no helpful documentation
REQUESTED EDIT:
from pyvows import Vows, expect
from django_pyvows.context import DjangoHTTPContext
# Create your tests here.
#Vows.batch
class LanguageTest(Vows.Context):
class LanguageSelectIntegrationTest(DjangoHTTPContext):
def topic(self):
return self.get('/')
def test(self, topic):
# Test will be here
pass
I am upgrading my django application from django1.5 to django1.11. I know its a huge jump. So I am getting so many errors and try to get it fixed. This is my project structure. I think I have made mistakes in appconfig. I couldn't figure it out.
Now I am stuck on this error.ImportError: cannot import name TrainingProfile
settings.py
INSTALLED_APPS = (
'admin.apps.AdminConfig',
'account.apps.AccountConfig',
'.............'
)
apps/admin/apps.py
class AdminConfig(AppConfig):
name = 'apps.admin'
label = 'admin_app'
apps/account/apps.py
class AccountConfig(AppConfig):
name = 'apps.account'
label = 'account_app'
apps/admin/models/init.py
from apps.admin.models.sector import *
from apps.admin.models.track import *
from apps.admin.models.training import *
...............
traceback
Traceback (most recent call last):
File "/home/sample-applications/upgrade/venv/local/lib/python2.7/site-packages/django/utils/autoreload.py", line 228, in wrapper
fn(*args, **kwargs)
File "/home/sample-applications/upgrade/venv/local/lib/python2.7/site-packages/django/core/management/commands/runserver.py", line 117, in inner_run
autoreload.raise_last_exception()
File "/home/sample-applications/upgrade/venv/local/lib/python2.7/site-packages/django/utils/autoreload.py", line 251, in raise_last_exception
six.reraise(*_exception)
File "/home/sample-applications/upgrade/venv/local/lib/python2.7/site-packages/django/utils/autoreload.py", line 228, in wrapper
fn(*args, **kwargs)
File "/home/sample-applications/upgrade/venv/local/lib/python2.7/site-packages/django/__init__.py", line 27, in setup
apps.populate(settings.INSTALLED_APPS)
File "/home/sample-applications/upgrade/venv/local/lib/python2.7/site-packages/django/apps/registry.py", line 108, in populate
app_config.import_models()
File "/home/sample-applications/upgrade/venv/local/lib/python2.7/site-packages/django/apps/config.py", line 202, in import_models
self.models_module = import_module(models_module_name)
File "/usr/lib/python2.7/importlib/__init__.py", line 37, in import_module
__import__(name)
File "/home/sample-applications/upgrade/pursuite/django-pursuite/apps/admin/models/__init__.py", line 17, in <module>
from apps.admin.models.job import *
File "/home/sample-applications/upgrade/pursuite/django-pursuite/apps/admin/models/job.py", line 13, in <module>
from account.models import IndustryProfile
File "./apps/account/models.py", line 13, in <module>
from admin.models import Company, Track
File "./apps/admin/models/__init__.py", line 18, in <module>
from apps.admin.models.training import *
File "/home/sample-applications/upgrade/pursuite/django-pursuite/apps/admin/models/training.py", line 12, in <module>
from account.models import TrainingProfile
ImportError: cannot import name TrainingProfile
apps/admin/models/training.py
from django.db import models
from django.contrib import admin
from django.core.urlresolvers import reverse
from tinymce.models import HTMLField
from account.models import TrainingProfile
from analytics.models import State
from admin.common import html2text
__all__ = ['Training']
class Training(models.Model):
'''
Training
'''
class Meta:
'''
Meta properties for this model
'''
app_label = 'admin'
TRAINING_CHOICES = {
('T', 'Trainers'),
('S', 'Students'),
}
training_title = models.CharField(max_length=100, db_index=True)
provider = models.ForeignKey(TrainingProfile, db_index=True)
training_for = models.CharField(max_length=1, choices=TRAINING_CHOICES)
description = HTMLField()
location = models.ForeignKey(State, db_index=True)
create_date = models.DateTimeField(auto_now_add=True)
write_date = models.DateTimeField(auto_now=True)
The relevant part in your traceback is this:
File "/home/sample-applications/upgrade/pursuite/django-pursuite/apps/admin/models/__init__.py", line 17, in <module>
from apps.admin.models.job import *
File "/home/sample-applications/upgrade/pursuite/django-pursuite/apps/admin/models/job.py", line 13, in <module>
from account.models import IndustryProfile
File "./apps/account/models.py", line 13, in <module>
from admin.models import Company, Track
File "./apps/admin/models/__init__.py", line 18, in <module>
from apps.admin.models.training import *
File "/home/sample-applications/upgrade/pursuite/django-pursuite/apps/admin/models/training.py", line 12, in <module>
from account.models import TrainingProfile
ImportError: cannot import name TrainingProfile
From here I see that you are importing admin models from account models (Company and Track) and vice versa (IndustryProfile, TrainingProfile) which is making a circular import. It's very strange that this was working in Django 1.5 at all.
To fix your problem you can check how you are using these models and if the only thing is to put it as argument to models.ForeignKey you can remove the import and use string instead ('account.IndustryProfile', 'account.TrainingProfile', 'admin.Company' and 'admin.Track')
If you can't replace all of them try to replace at least these that will fix your problem.
More info at Django documentation https://docs.djangoproject.com/en/1.11/ref/models/fields/#foreignkey
Your model is in a file named training.py while the file name is not presented in the import at all. It is not in the file structure above neither, so just take a good look at your files and fix your import statement.
I am building a fully open source testrunner for my needs but I am running into some problems. the test runner parses a yaml file for a set of scripts in various paths and executed the scripts and uses a lib that i will be creating to return the outcome. right now i have a simple ping script that im working to get running and testing as i progress but i am getting a lot of errors. the errors are below and all the source code is also shown below the errors.
The github repo for this is here. feel free to pull it in and test the issues i am seeing.
https://github.com/castaway2000/testrunner
The issue:
I am trying to use the testrunner i built to parse a yaml file for paths to scripts i am writing for projects im using.
For example if want to use a group of certain tests on a target, i can make a yaml file for each set of the types of tests.
There is a certain problem I am seeing with this however, the relative path and exact path of the files are not able to use the django libraries, cause its unable to find the path of the libraries unless its running from the top level of the django app (ie. ./ping_google.py vs ./testcases/ping_google.py)
but on top of that, the django app says is not running when the independent libraries are referencing models.py and admin.py cant import models from the same directory. I need help fixing and understanding this issue.
Here is the rundown(stacktrace):
Enterprize:testrunner xwing$ python3 ping_google.py
Traceback (most recent call last):
File "ping_google.py", line 1, in <module>
from testrunnerlib.test import HostInterface
File "/Users/xwing/PycharmProjects/testrunner/testrunnerlib/test.py", line 11, in <module>
from testrunner.models import Host, TestSuite
File "/Users/xwing/PycharmProjects/testrunner/testrunner/models.py", line 5, in <module>
class Host(models.Model):
File "/usr/local/lib/python3.6/site-packages/django/db/models/base.py", line 105, in __new__
app_config = apps.get_containing_app_config(module)
File "/usr/local/lib/python3.6/site-packages/django/apps/registry.py", line 237, in get_containing_app_config
self.check_apps_ready()
File "/usr/local/lib/python3.6/site-packages/django/apps/registry.py", line 124, in check_apps_ready
raise AppRegistryNotReady("Apps aren't loaded yet.")
django.core.exceptions.AppRegistryNotReady: Apps aren't loaded yet.
after i put import django and django.setup() in the django settings file the above error goes away but i get the following error:
Enterprize:testrunner xwing$ python3 ping_google.py
Traceback (most recent call last):
File "ping_google.py", line 1, in <module>
from testrunnerlib.test import HostInterface
File "/Users/xwing/PycharmProjects/testrunner/testrunnerlib/test.py", line 11, in <module>
from testrunner.models import Host, TestSuite
File "/Users/xwing/PycharmProjects/testrunner/testrunner/models.py", line 5, in <module>
class Host(models.Model):
File "/Users/xwing/PycharmProjects/testrunner/testrunner/models.py", line 6, in Host
ip_address = models.CharField(max_length=16)
File "/usr/local/lib/python3.6/site-packages/django/db/models/fields/__init__.py", line 1043, in __init__
super(CharField, self).__init__(*args, **kwargs)
File "/usr/local/lib/python3.6/site-packages/django/db/models/fields/__init__.py", line 166, in __init__
self.db_tablespace = db_tablespace or settings.DEFAULT_INDEX_TABLESPACE
File "/usr/local/lib/python3.6/site-packages/django/conf/__init__.py", line 53, in __getattr__
self._setup(name)
File "/usr/local/lib/python3.6/site-packages/django/conf/__init__.py", line 41, in _setup
self._wrapped = Settings(settings_module)
File "/usr/local/lib/python3.6/site-packages/django/conf/__init__.py", line 97, in __init__
mod = importlib.import_module(self.SETTINGS_MODULE)
File "/usr/local/Cellar/python3/3.6.1/Frameworks/Python.framework/Versions/3.6/lib/python3.6/importlib/__init__.py", line 126, in import_module
return _bootstrap._gcd_import(name[level:], package, level)
File "/Users/xwing/PycharmProjects/testrunner/testrunner/settings.py", line 133, in <module>
django.setup()
File "/usr/local/lib/python3.6/site-packages/django/__init__.py", line 27, in setup
apps.populate(settings.INSTALLED_APPS)
File "/usr/local/lib/python3.6/site-packages/django/apps/registry.py", line 115, in populate
app_config.ready()
File "/usr/local/lib/python3.6/site-packages/django/contrib/admin/apps.py", line 23, in ready
self.module.autodiscover()
File "/usr/local/lib/python3.6/site-packages/django/contrib/admin/__init__.py", line 26, in autodiscover
autodiscover_modules('admin', register_to=site)
File "/usr/local/lib/python3.6/site-packages/django/utils/module_loading.py", line 50, in autodiscover_modules
import_module('%s.%s' % (app_config.name, module_to_search))
File "/usr/local/Cellar/python3/3.6.1/Frameworks/Python.framework/Versions/3.6/lib/python3.6/importlib/__init__.py", line 126, in import_module
return _bootstrap._gcd_import(name[level:], package, level)
File "/Users/xwing/PycharmProjects/testrunner/testrunner/admin.py", line 3, in <module>
from testrunner.models import Host, TestSuite
ImportError: cannot import name 'Host'
Fixing this will help with testing the rest of the build out scenarios for the testrunner but i will still need advice on the relative path and environment python needs to use to know where to look for these libraries. if possible i can put the libs in the root python directory so the libs are irrelevant to the problem.
problem file:
from testrunnerlib.test import HostInterface
from testrunnerlib.outcomes import Outcomes
from ping3 import ping
def pinger(host):
result = Outcomes()
try:
ping_google = ping(host)
print(ping_google)
if ping_google:
return result.passed()
msg = 'ping had an issue, the following is all we know %s' % ping_google
return result.failed(msg)
except Exception as e:
return result.aborted(exception=e)
if __name__ == '__main__':
pinger(HostInterface().target)
only lib with django imports:
import yaml
import subprocess
from testrunner.models import Host, TestSuite
class HostInterface(object):
def __init__(self):
self._target = 'not set'
#property
def target(self):
return self._target
#target.setter
def target(self, value):
print("setter of target called", value)
self._target = value
#target.deleter
def target(self):
print("deleter of target called")
del self._target
def host(self):
out = Host.objects.get(id=self.target).name
return out
class YamlInterface:
def __init__(self, yamlfile):
self.file = yamlfile
def handle_yaml(self):
data = TestSuite.objects.get(id=self.file)
yamldata = yaml.safe_load(data.text)
for i in yamldata['testsuite']:
status = subprocess.call('python3 %s' % i, shell=True)
print(status)
def run_tests(host, yaml):
h_interface = HostInterface()
h_interface.target = host
h_interface.host()
yaml = YamlInterface(yaml)
yaml.handle_yaml()
the models:
from __future__ import unicode_literals
from django.db import models
class Host(models.Model):
ip_address = models.CharField(max_length=16)
port = models.IntegerField()
name = models.CharField(max_length=256)
class TestSuite(models.Model):
name = models.CharField(max_length=256)
text = models.TextField()
is_active = models.BooleanField(default=True)
created = models.DateTimeField(auto_now_add=True, auto_now=False)
updated = models.DateTimeField(auto_now_add=False, auto_now=True)
def __str__(self):
return "%s" % self.name
admin.py
from django.contrib import admin
from django import forms
from testrunner.models import Host, TestSuite
class HostAdmin(admin.ModelAdmin):
list_display = ['name']
fields = ('name', 'ip_address', 'port')
def __str__(self):
return '%s' % self.name
pass
admin.site.register(Host, HostAdmin)
class TestSuiteAdmin(admin.ModelAdmin):
def formfield_for_dbfield(self, db_field, **kwargs):
formfield = super(TestSuiteAdmin, self).formfield_for_dbfield(db_field, **kwargs)
if db_field.name == 'text':
formfield.widget = forms.Textarea(attrs=formfield.widget.attrs)
return formfield
admin.site.register(TestSuite, TestSuiteAdmin)
You need to make a Django Management Command. This will let you create scripts that will allow you to use all of Django's features.
And you would run this command as python3 manage.py ping_google
To create a management command,
In your apps folder, create a module called management (make a folder called management and place init.py file in it)
Inside the management folder, create a commands module (folder and init.py file)
Inside the commands folder create your ping_google.py file.
Commands are written like this,
from django.core.management.base import BaseCommand, CommandError
class Command(BaseCommand):
help = 'Desc of your command'
def handle(self, *args, **options):
# Your logic goes here
You can read more on custom django commands here
I took this example from scrapy.org. It worked fine until I tried to save everything in an items object. The items.append(item) is apparently invalid syntax, but all other examples on this website have the same assignment.
from scrapy.spider import BaseSpider
from scrapy.selector import HtmlXPathSelector
from tutorial.items import DmozItem
class DmozSpider(BaseSpider):
name = "dmoz"
allowed_domains = ["dmoz.org"]
start_urls = [
"http://www.dmoz.org/Computers/Programming/Languages/Python/Books/",
"http://www.dmoz.org/Computers/Programming/Languages/Python/Resources/"
]
def parse(self, response):
hxs = HtmlXPathSelector(response)
sites = hxs.select('//ul/li')
items = []
for site in sites:
item = DmozItem()
item['title'] = site.select('a/text()').extract()
item['link'] = site.select('a/#href').extract()
item['desc'] = site.select('text()').extract()
items.append(item)
return items
Error is:
computerito#computerito-the-great ~/SHITSHOW/tutorial $ scrapy crawl dmoz
2015-03-10 22:00:40-0700 [scrapy] INFO: Scrapy 0.14.4 started (bot: tutorial)
2015-03-10 22:00:40-0700 [scrapy] DEBUG: Enabled extensions: LogStats, TelnetConsole, CloseSpider, WebService, CoreStats, MemoryUsage, SpiderState
Traceback (most recent call last):
File "/usr/bin/scrapy", line 4, in <module>
execute()
File "/usr/lib/python2.7/dist-packages/scrapy/cmdline.py", line 132, in execute
_run_print_help(parser, _run_command, cmd, args, opts)
File "/usr/lib/python2.7/dist-packages/scrapy/cmdline.py", line 97, in _run_print_help
func(*a, **kw)
File "/usr/lib/python2.7/dist-packages/scrapy/cmdline.py", line 139, in _run_command
cmd.run(args, opts)
File "/usr/lib/python2.7/dist-packages/scrapy/commands/crawl.py", line 43, in run
spider = self.crawler.spiders.create(spname, **opts.spargs)
File "/usr/lib/python2.7/dist-packages/scrapy/command.py", line 34, in crawler
self._crawler.configure()
File "/usr/lib/python2.7/dist-packages/scrapy/crawler.py", line 36, in configure
self.spiders = spman_cls.from_crawler(self)
File "/usr/lib/python2.7/dist-packages/scrapy/spidermanager.py", line 37, in from_crawler
return cls.from_settings(crawler.settings)
File "/usr/lib/python2.7/dist-packages/scrapy/spidermanager.py", line 33, in from_settings
return cls(settings.getlist('SPIDER_MODULES'))
File "/usr/lib/python2.7/dist-packages/scrapy/spidermanager.py", line 23, in __init__
for module in walk_modules(name):
File "/usr/lib/python2.7/dist-packages/scrapy/utils/misc.py", line 65, in walk_modules
submod = __import__(fullpath, {}, {}, [''])
File "/home/computerito/SHITSHOW/tutorial/tutorial/spiders/dmoz_spider.py", line 25
items.append(item)
^
SyntaxError: invalid syntax
I have two classes which import each other:
profile/models.py
class Company(models.Model):
name = ...
class CompanyReview(models.Model):
company = models.ForeignKey(Company)
from action.models import CompanyAction
action = models.ForeignKey(CompanyAction)
action/models.py
from profile.models import Company
class CompanyAction(models.Model):
company = models.ForeignKey(Company, null = True, blank = True)
The circular import works when the Django app is executed on the server or when I call view functions in the shell. However, when I import one of the classes, Django command will fail with an error (see Traceback below).
Why is that the case and only causing a problem in the command method?
How can I avoid the error? I have tried a lazy import of the CompanyAction class, but it led to the same error message.
not working alternative:
class CompanyReview(models.Model):
company = models.ForeignKey(Company)
from django.db.models import get_model
_model = get_model('action', 'CompanyAction')
action = models.ForeignKey(_model)
Interestingly, the variable _model is empty if I execute my command function and the classes are imported. When I load ./manage.py shell, the variable contains the correct class name. Why is that the case?
Traceback
(virtual-env)PC:neurix$ python manage.py close_action
Traceback (most recent call last):
File "manage.py", line 10, in <module>
execute_from_command_line(sys.argv)
File "/Users/Development/virtual-re/lib/python2.7/site-packages/django/core/management/__init__.py", line 453, in execute_from_command_line
utility.execute()
File "/Users/Development/virtual-re/lib/python2.7/site-packages/django/core/management/__init__.py", line 392, in execute
self.fetch_command(subcommand).run_from_argv(self.argv)
File "/Users/Development/virtual-re/lib/python2.7/site-packages/django/core/management/__init__.py", line 272, in fetch_command
klass = load_command_class(app_name, subcommand)
File "/Users/Development/virtual-re/lib/python2.7/site-packages/django/core/management/__init__.py", line 77, in load_command_class
module = import_module('%s.management.commands.%s' % (app_name, name))
File "/Users/Development/virtual-re/lib/python2.7/site-packages/django/utils/importlib.py", line 35, in import_module
__import__(name)
File "/Users/Development/project/apps/action/management/commands/close_action.py", line 2, in <module>
from action.models import CompanyAction
File "/Users/Development/project/apps/action/models.py", line 26, in <module>
from profile.models import Company
File "/Users/Development/apps/profile/models.py", line 436, in <module>
class CompanyReview(models.Model):
File "/Users/Development/project/apps/profile/models.py", line 446, in CompanyReview
action = models.ForeignKey(_model)
File "/Users/Development/virtual-re/lib/python2.7/site-packages/django/db/models/fields/related.py", line 993, in __init__
assert isinstance(to, six.string_types), "%s(%r) is invalid. First parameter to ForeignKey must be either a model, a model name, or the string %r" % (self.__class__.__name__, to, RECURSIVE_RELATIONSHIP_CONSTANT)
AssertionError: ForeignKey(None) is invalid. First parameter to ForeignKey must be either a model, a model name, or the string 'self'
Django has a system for stopping circular imports on foreign keys detailed here: https://docs.djangoproject.com/en/dev/ref/models/fields/#foreignkey
You would want to do something like:
class CompanyReview(models.Model):
company = models.ForeignKey(Company)
action = models.ForeignKey('action.CompanyAction')
class CompanyAction(models.Model):
company = models.ForeignKey('profile.Company', null = True, blank = True)