Python 3 urlopen context manager mocking - unit-testing

I am new to testing and need some help here.
Assuming having this method:
from urllib.request import urlopen
def get_posts():
with urlopen('some url here') as data:
return json.loads(data.read().decode('utf-8'))
The question is how to test this method (using mock.patch decorator if possible)?
What I have now:
#mock.patch('mymodule.urlopen')
def test_get_post(self, mocked_urlopen):
mocked_urlopen.__enter__ = Mock(return_value=self.test_data)
mocked_urlopen.__exit__ = Mock(return_value=False)
...
But it does not seem to be working.
P.S. Is there any convenient way to work with data variable (which type is HTTPResponse) in test so it could just be simple string?

I was fighting with this as well, and finally figured it out. (Python 3 syntax):
import urllib.request
import unittest
from unittest.mock import patch, MagicMock
class TestUrlopen(unittest.TestCase):
#patch('urllib.request.urlopen')
def test_cm(self, mock_urlopen):
cm = MagicMock()
cm.getcode.return_value = 200
cm.read.return_value = 'contents'
cm.__enter__.return_value = cm
mock_urlopen.return_value = cm
with urllib.request.urlopen('http://foo') as response:
self.assertEqual(response.getcode(), 200)
self.assertEqual(response.read(), 'contents')
#patch('urllib.request.urlopen')
def test_no_cm(self, mock_urlopen):
cm = MagicMock()
cm.getcode.return_value = 200
cm.read.return_value = 'contents'
mock_urlopen.return_value = cm
response = urllib.request.urlopen('http://foo')
self.assertEqual(response.getcode(), 200)
self.assertEqual(response.read(), 'contents')
response.close()

here is my take on this
from urllib.request import urlopen
from unittest.mock import patch
class Mock():
def __init__(self, request, context):
return None
def read(self):
return self
def decode(self, arg):
return ''
def __iter__(self):
return self
def __next__(self):
raise StopIteration
with patch('urllib.request.urlopen', Mock):
# do whatever over here

with urlopen('some url here') as data is a context manager
Also, a file can be used as a context manager, so a better approach here is to use io.StringIO
import io
import json
import urllib.request
from unittest.mock import patch
def get_posts():
with urllib.request.urlopen('some url here') as data:
return json.load(data)
def test_get_posts():
data = io.StringIO('{"id": 123}')
with patch.object(urllib.request, 'urlopen', return_value=data):
assert get_posts() == {"id": 123}

Ok, so I have written simple class to simulate context manager.
class PatchContextManager:
def __init__(self, method, enter_return, exit_return=False):
self._patched = patch(method)
self._enter_return = enter_return
self._exit_return = exit_return
def __enter__(self):
res = self._patched.__enter__()
res.context = MagicMock()
res.context.__enter__.return_value = self._enter_return
res.context.__exit__.return_value = self._exit_return
res.return_value = res.context
return res
def __exit__(self, type, value, tb):
return self._patched.__exit__()
Usage:
with PatchContextManager('mymodule.method', 'return_string') as mocked:
a = mymodule.method(47) # a == 'return_string'
mocked.assert_called_with(47)
...

Related

Second request doesn't call the callback

My method parse_adf_info never is called and I dont know why. No error occurs. I want to get the links for each ads (parse) and go to ads one by one (parse_ads_urls) and scraping data (parse_ads_info), but this method never is called.
Here is my code:
# -*- coding: utf-8 -*-
from scrapy import Request, Spider
#from zapimoveis.items import ads_info
from scrapy.selector import Selector
#from scrapy.loader import ItemLoader
proxy_list = ["###","###"]
PROXY = "###"
class AdsSpider(Spider):
name = "zapimoveis"
allowed_domains = ["https://www.zapimoveis.com.br/", "https://www.zapimoveis.com.br/oferta/"]
def __init__(self, start_url='', *args, **kwargs):
super(AdsSpider, self).__init__(*args, **kwargs)
self.start_urls = []
self.start_urls.append(start_url)
self.json = '#{"precomaximo":"2147483647","parametrosautosuggest":[{"B\
airro":"JD CAMBURI","Zona":"","Cidade":"VITORIA","Agrupame\
nto":"","Estado":"ES"}],"pagina":"%d","ordem":"DataAtualiz\
acao","paginaOrigem":"ResultadoBusca","semente":"213739135\
0","formato":"Lista"}'
def start_requests(self):
rq = Request(url=self.start_urls[0], callback=self.parse)
rq.meta['proxy'] = PROXY
yield rq
def parse(self, response):
n_pages = response.css('span[class="pull-right num-of"]::text') \
.extract_first()
n_pages = int(n_pages.replace("de ", ""))
for i in range(1, n_pages+1):
rq = Request(url=self.start_urls[0]+(self.json % i),
callback=self.parse_ads_urls, dont_filter=True)
rq.meta['proxy'] = PROXY
yield rq
def parse_ads_urls(self,response):
for article in response.css('article[class=minificha]'):
url_to_ads = article.css('a[class=btn-ver-detalhes]::attr(href)')\
.extract_first()
rq2 = Request(url=url_to_ads, callback=self.parse_ads_info,
dont_filter=True)
rq2.meta['proxy'] = proxy_list[0]
yield rq2
def parse_ads_info(self, response):
print "#--------->"
print response.css('span[class=value-ficha]::text').extract_first()
I removed my personal proxys.
(2017-06-06) EDIT 1:
Output log : https://pastebin.com/4jv2r9um

How to perform the function, after all crawling is done in scrapy?

spider_closed() function is not performing. If i give just print statement it is printing but if i perform any function call and return the value it is not working.
import scrapy
import re
from pydispatch import dispatcher
from scrapy import signals
from SouthShore.items import Product
from SouthShore.internalData import internalApi
from scrapy.http import Request
class bestbuycaspider(scrapy.Spider):
name = "bestbuy_dca"
allowed_domains = ["bestbuy.ca"]
start_urls = ["http://www.bestbuy.ca/Search/SearchResults.aspx?type=product&page=1&sortBy=relevance&sortDir=desc&pageSize=96&query=south+shore+furniture+beds",
"http://www.bestbuy.ca/Search/SearchResults.aspx?type=product&page=1&sortBy=relevance&sortDir=desc&pageSize=96&query=south+shore+furniture+night+stand",
"http://www.bestbuy.ca/Search/SearchResults.aspx?type=product&page=1&sortBy=relevance&sortDir=desc&pageSize=96&query=south+shore+furniture+headboard",
"http://www.bestbuy.ca/Search/SearchResults.aspx?type=product&page=1&sortBy=relevance&sortDir=desc&pageSize=96&query=south+shore+furniture+desk",
"http://www.bestbuy.ca/Search/SearchResults.aspx?type=product&page=1&sortBy=relevance&sortDir=desc&pageSize=96&query=south+shore+furniture+bookcase",
"http://www.bestbuy.ca/Search/SearchResults.aspx?type=product&page=1&sortBy=relevance&sortDir=desc&pageSize=96&query=south+shore+furniture+dresser",
"http://www.bestbuy.ca/Search/SearchResults.aspx?type=product&page=1&sortBy=relevance&sortDir=desc&pageSize=96&query=south+shore+furniture+tv+stand",
"http://www.bestbuy.ca/Search/SearchResults.aspx?type=product&page=1&sortBy=relevance&sortDir=desc&pageSize=96&query=south+shore+furniture+armoire",
"http://www.bestbuy.ca/Search/SearchResults.aspx?type=product&page=1&sortBy=relevance&sortDir=desc&pageSize=96&query=south+shore+furniture+kids",
"http://www.bestbuy.ca/Search/SearchResults.aspx?type=product&page=1&sortBy=relevance&sortDir=desc&pageSize=96&query=south+shore+furniture+changing+table",
"http://www.bestbuy.ca/Search/SearchResults.aspx?type=product&page=1&sortBy=relevance&sortDir=desc&pageSize=96&query=south+shore+furniture+baby"]
def __init__(self,jsondetails="",serverdetails="", *args,**kwargs):
super(bestbuycaspider, self).__init__(*args, **kwargs)
dispatcher.connect(self.spider_closed, signal=signals.spider_closed)
self.jsondetails = jsondetails
self.serverdetails=serverdetails
self.data = []
def parse(self,response):
#my stuff here
def spider_closed(self,spider):
print "returning values"
self.results['extractedData']=self.data
print self.results=internalApi(self.jsondetails,self.serverdetails)
yield self.results
1) I want to call some function and return the scraped values
You can create an Item Pipeline with close_spider() method:
class MyPipeline(object):
def close_spider(self, spider):
do_something_here()
Just don't forget to activate it in settings.py as described in the docummentation link above.

How to mock sqlite3.connect in Python

I am using Python 3.3, under the Anaconda environment.
I would like to mock sqlite3.connect. For example in MyTests (see below), I would like test_sqlite3_connect to return the string connection rather than an actual sqlite3.Connection object.
I have tried patching it, but that does not work.
from unittest.mock import patch
import unittest
import sqlite3
#patch('sqlite3.connect')
def sqlite3_connect(self,connection_string):
print('connect with : {0}'.format(connection_string))
return 'connection '
class MyTests(unittest.TestCase):
def test_sqlite3_connect(self):
print('testing connection')
dbc = DataBaseClass()
class DataBaseClass():
def __init__(self):
print('initialising database class')
self.connection = sqlite3.connect('test database')
I managed to solve the question using information from the Quick Guide section of
http://www.voidspace.org.uk/python/mock/
The following code shows two ways of mocking sqlite3.connect.
''' An example of how to mock the sqlite3.connection method '''
from unittest.mock import MagicMock,Mock
import unittest
import sqlite3
class MyTests(unittest.TestCase):
def test_sqlite3_connect_success(self):
sqlite3.connect = MagicMock(return_value='connection succeeded')
dbc = DataBaseClass()
sqlite3.connect.assert_called_with('test_database')
self.assertEqual(dbc.connection,'connection succeeded')
def test_sqlite3_connect_fail(self):
sqlite3.connect = MagicMock(return_value='connection failed')
dbc = DataBaseClass()
sqlite3.connect.assert_called_with('test_database')
self.assertEqual(dbc.connection, 'connection failed')
def test_sqlite3_connect_with_sideaffect(self):
self._setup_mock_sqlite3_connect()
dbc = DataBaseClass('good_connection_string')
self.assertTrue(dbc.connection)
sqlite3.connect.assert_called_with('good_connection_string')
dbc = DataBaseClass('bad_connection_string')
self.assertFalse(dbc.connection)
sqlite3.connect.assert_called_with('bad_connection_string')
def _setup_mock_sqlite3_connect(self):
values = {'good_connection_string':True,
'bad_connection_string':False}
def side_effect(arg):
return values[arg]
sqlite3.connect = Mock(side_effect=side_effect)
class DataBaseClass():
def __init__(self,connection_string='test_database'):
self.connection = sqlite3.connect(connection_string)

Django Multilingual Text Field using JSON

I recently ask this question Custom Django MultilingualTextField model field but I found no good reason why I should not do this, so I create a model Field that support multilingual text, auto return text in current language. This basically is the field that store custom Language object to database in json format. Here is the code:
Github: https://github.com/james4388/django-multilingualfield
Ussage:
from django.db import models
from multilingualfield import MLTextField, MLHTMLField
class MyModel(models.Model):
text = MLTextField()
html = MLHTMLField()
Used it like normal text field, translation is auto bases on system language (translation.get_language)
>>>from django.utils import translation
>>>translation.active('en')
>>>m = MyModal.objects.create(text='Hello world',html='<b>Hello world</b>');
>>>m.text
Hello world
>>>translation.active('fr')
>>>m.text #Auto fallback to first language (if any).
Hello world
>>>m.text.value('Bonjour')
>>>m.text.value('Ciao','es')
>>>m.text
Bonjour
>>>m.save()
>>>m.text.get_available_language()
['en', 'fr', 'es']
>>>m.text.remove_language('en')
Field.py
from __future__ import unicode_literals
from django.core.exceptions import ValidationError
from django.conf import settings
from django.db import models, DatabaseError, transaction
from django.utils.translation import ugettext_lazy as _, get_language
from django.utils import six
try:
import json
except ImportError:
from django.utils import simplejson as json
def get_base_language(lang):
if '-' in lang:
return lang.split('-')[0]
return lang
def get_current_language(base=True):
l = get_language()
if base:
return get_base_language(l)
return l
from .widgets import MultilingualWidget, MultilingualHTMLWidget
from .forms import MultilingualTextFormField, MultilingualHTMLFormField
from .language import LanguageText
class MultilingualTextField(six.with_metaclass(models.SubfieldBase, models.Field)):
"""
A field that support multilingual text for your model
"""
default_error_messages = {
'invalid': _("'%s' is not a valid JSON string.")
}
description = "Multilingual text field"
def __init__(self, *args, **kwargs):
self.lt_max_length = kwargs.pop('max_length',-1)
self.default_language = kwargs.get('default_language', get_current_language())
super(MultilingualTextField, self).__init__(*args, **kwargs)
def formfield(self, **kwargs):
defaults = {
'form_class': MultilingualTextFormField,
'widget': MultilingualWidget
}
defaults.update(**kwargs)
return super(MultilingualTextField, self).formfield(**defaults)
def validate(self, value, model_instance):
if not self.null and value is None:
raise ValidationError(self.error_messages['null'])
try:
self.get_prep_value(value)
except:
raise ValidationError(self.error_messages['invalid'] % value)
def get_internal_type(self):
return 'TextField'
def db_type(self, connection):
return 'text'
def to_python(self, value):
if isinstance(value, six.string_types):
if value == "" or value is None:
if self.null:
return None
if self.blank:
return ""
try:
valuejson = json.loads(value)
Lang = LanguageText(max_length=self.lt_max_length,default_language=self.default_language)
Lang.values = valuejson
return Lang
except ValueError:
try:
Lang = LanguageText(value,language=None,max_length=self.lt_max_length,default_language=self.default_language)
return Lang
except:
msg = self.error_messages['invalid'] % value
raise ValidationError(msg)
return value
def get_db_prep_value(self, value, connection=None, prepared=None):
return self.get_prep_value(value)
def get_prep_value(self, value):
if value is None:
if not self.null and self.blank:
return ""
return None
if isinstance(value, six.string_types):
value = LanguageText(value,language=None,max_length=self.lt_max_length,default_language=self.default_language)
if isinstance(value, LanguageText):
value.max_length = self.lt_max_length
value.default_language = self.default_language
return json.dumps(value.values)
return None
def get_prep_lookup(self, lookup_type, value):
if lookup_type in ["exact", "iexact"]:
return self.to_python(self.get_prep_value(value))
if lookup_type == "in":
return [self.to_python(self.get_prep_value(v)) for v in value]
if lookup_type == "isnull":
return value
if lookup_type in ["contains", "icontains"]:
if isinstance(value, (list, tuple)):
raise TypeError("Lookup type %r not supported with argument of %s" % (
lookup_type, type(value).__name__
))
# Need a way co combine the values with '%', but don't escape that.
return self.get_prep_value(value)[1:-1].replace(', ', r'%')
if isinstance(value, dict):
return self.get_prep_value(value)[1:-1]
return self.to_python(self.get_prep_value(value))
raise TypeError('Lookup type %r not supported' % lookup_type)
def value_to_string(self, obj):
return self._get_val_from_obj(obj)
Forms.py
from django import forms
from django.utils import simplejson as json
from .widgets import MultilingualWidget, MultilingualHTMLWidget
from .language import LanguageText
class MultilingualTextFormField(forms.CharField):
widget = MultilingualWidget
def __init__(self, *args, **kwargs):
kwargs['widget'] = MultilingualWidget
super(MultilingualTextFormField, self).__init__(*args, **kwargs)
def clean(self, value):
"""
The default is to have a TextField, and we will decode the string
that comes back from this. However, another use of this field is
to store a list of values, and use these in a MultipleSelect
widget. So, if we have an object that isn't a string, then for now
we will assume that is where it has come from.
"""
value = super(MultilingualTextFormField, self).clean(value)
if not value:
return value
if isinstance(value, basestring):
try:
valuejson = json.loads(value)
Lang = LanguageText()
Lang.values = valuejson
return Lang
except ValueError:
try:
Lang = LanguageText(value,language=None)
return Lang
except:
raise forms.ValidationError(
'JSON decode error: %s' % (unicode(exc),)
)
else:
return value
Language object in language.py
from __future__ import unicode_literals
from django.core.exceptions import ValidationError
from django.conf import settings
from django.db import models, DatabaseError, transaction
from django.utils.translation import ugettext_lazy as _, get_language
try:
import json
except ImportError:
from django.utils import simplejson as json
def get_base_language(lang):
if '-' in lang:
return lang.split('-')[0]
return lang
def get_current_language(base=True):
l = get_language()
if base:
return get_base_language(l)
return l
class LanguageText(object):
'''
JSON text field blah blah blah
'''
values = {}
default_language = None
max_length = -1
def __init__(self, value=None, language=None, default_language=None, max_length=-1):
self.max_length = max_length
self.default_language = default_language
self.values = {}
if value is not None:
self.value(value,language)
def __call__(self, value=None, language=None):
self.value(value,language)
return self
def get_available_language(self):
return self.values.keys()
def get_current_language(self, base=False):
return get_current_language(base)
def remove_language(self, lang):
try:
return self.values.pop(lang)
except:
pass
def has_language(self, lang):
return self.values.has_key(lang)
def get(self, language=None, fallback=True):
if language is None:
curr_lang = get_current_language(False)
else:
curr_lang = language
curr_lang_base = get_current_language(True)
if curr_lang in self.values:
return self.values[curr_lang]
if not fallback:
return None
if curr_lang_base in self.values:
return self.values[curr_lang_base]
if self.default_language in self.values:
return self.values[self.default_language]
try:
first_lang = self.values.keys()[0]
return self.values[first_lang]
except:
pass
return None
def value(self, value=None, language=None):
if value is None: #Get value
return self.get(language)
else: #Set value
if language is None:
language = get_current_language(False)
if self.max_length != -1:
value = value[:self.max_length]
self.values[language] = value
return None
def __unicode__(self):
return self.value()
def __str__(self):
return unicode(self.value()).encode('utf-8')
def __repr__(self):
return unicode(self.value()).encode('utf-8')
widgets.py
from django import forms
from django.utils import simplejson as json
from django.conf import settings
from .language import LanguageText
from django.template import loader, Context
class MultilingualWidget(forms.Textarea):
def __init__(self, *args, **kwargs):
forms.Widget.__init__(self, *args, **kwargs)
def render(self, name, value, attrs=None):
if value is None: #New create or edit none
vjson = '{}'
aLang = []
Lang = '[]'
Langs = json.dumps(dict(settings.LANGUAGES))
t = loader.get_template('multilingualtextarea.html')
c = Context({"data":value,"vjson":vjson,"lang":Lang,"langs":Langs,"langobjs":settings.LANGUAGES,"fieldname":name})
return t.render(c)
if isinstance(value, LanguageText):
vjson = json.dumps(value.values)
aLang = value.get_available_language()
Lang = json.dumps(aLang)
Langs = json.dumps(dict(settings.LANGUAGES))
t = loader.get_template('multilingualtextarea.html')
c = Context({"data":value,"vjson":vjson,"lang":Lang,"langs":Langs,"langobjs":settings.LANGUAGES,"fieldname":name})
return t.render(c)
return "Invalid data '%s'" % value
So I would like to know is this a good approach? Why shouldn't I do this? Plz help
Code looks good to me.
The only thing that could impact performance is the frequent json encoding/decoding... yet, that shouldn't have a major impact unless you are facing thousands of users on a server with minimal resources.
The previous question you linked to contains some comments noting that adding additional languages might be easier using other means. But in the end - that's a mixture between personal preferences and maintainability. If it fits your project goals, I can't see any reason not to do it the way you've coded it.
Providing proof that your implementation is the best is near to impossible. That is, unless you prove it yourself by creating a different, non-json based implementation and benchmark both on your production server. You'll notice differences will be rather minimal on regular machines. Yet, only the individual numbers will provide actual proof and can help you decide if it's "tuned" and "resource -friendly" enough for your project's purposes. I think it will fit your needs... but that's only my 2 cents.

Django soaplib error

I'm trying to make a little "Hello World" webservice with Django following a few tutorials, but I'm hitting the same barrier over and over. I've defined a view.py and soaplib_handler.py:
view.py:
from soaplib_handler import DjangoSoapApp, soapmethod, soap_types
class HelloWorldService(DjangoSoapApp):
__tns__ = 'http://saers.dk/soap/'
#soapmethod(_returns=soap_types.Array(soap_types.String))
def hello(self):
return "Hello World"
soaplib_handler.py:
from soaplib.wsgi_soap import SimpleWSGISoapApp
from soaplib.service import soapmethod
from soaplib.serializers import primitive as soap_types
from django.http import HttpResponse
class DjangoSoapApp(SimpleWSGISoapApp):
def __call__(self, request):
django_response = HttpResponse()
def start_response(status, headers):
status, reason = status.split(' ', 1)
django_response.status_code = int(status)
for header, value in headers:
django_response[header] = value
response = super(SimpleWSGISoapApp, self).__call__(request.META, start_response)
django_response.content = "\n".join(response)
return django_response
And it seems the "response = super...." line is giving me trouble. When I load up /hello_world/services.wsdl mapped in url.py I get:
AttributeError at /hello_world/service.wsdl
'module' object has no attribute 'tostring'
For the full error message, see here:
http://saers.dk:8000/hello_world/service.wsdl
Do you have any suggestion as to why I get this error? And where is ElementTree defined?
#zdmytriv The line
soap_app_response = super(BaseSOAPWebService, self).__call__(environ, start_response)
should look like
soap_app_response = super(DjangoSoapApp, self).__call__(environ, start_response)
then your example works.
not sure if this will solve your problem, but the decorator on your function hello says that it is suppose to return a String Array, but you are actually returning a String
Try _returns=soap_types.String instead
Ray
Copy/paste from my service:
# SoapLib Django workaround: http://www.djangosnippets.org/snippets/979/
class DumbStringIO(StringIO):
""" Helper class for BaseWebService """
def read(self, n):
return self.getvalue()
class DjangoSoapApp(SimpleWSGISoapApp):
def __call__(self, request):
""" Makes Django request suitable for SOAPlib SimpleWSGISoapApp class """
http_response = HttpResponse()
def start_response(status, headers):
status, reason = status.split(' ', 1)
http_response.status_code = int(status)
for header, value in headers:
http_response[header] = value
environ = request.META.copy()
body = ''.join(['%s=%s' % v for v in request.POST.items()])
environ['CONTENT_LENGTH'] = len(body)
environ['wsgi.input'] = DumbStringIO(body)
environ['wsgi.multithread'] = False
soap_app_response = super(BaseSOAPWebService, self).__call__(environ, start_response)
http_response.content = "\n".join(soap_app_response)
return http_response
Django snippet has a bug. Read last two comments from that url.