Configure Django to find all doctests in all modules? - django

If I run the following command:
>python manage.py test
Django looks at tests.py in my application, and runs any doctests or unit tests in that file. It also looks at the __ test __ dictionary for extra tests to run. So I can link doctests from other modules like so:
#tests.py
from myapp.module1 import _function1, _function2
__test__ = {
"_function1": _function1,
"_function2": _function2
}
If I want to include more doctests, is there an easier way than enumerating them all in this dictionary? Ideally, I just want to have Django find all doctests in all modules in the myapp application.
Is there some kind of reflection hack that would get me where I want to be?

I solved this for myself a while ago:
apps = settings.INSTALLED_APPS
for app in apps:
try:
a = app + '.test'
__import__(a)
m = sys.modules[a]
except ImportError: #no test jobs for this module, continue to next one
continue
#run your test using the imported module m
This allowed me to put per-module tests in their own test.py file, so they didn't get mixed up with the rest of my application code. It would be easy to modify this to just look for doc tests in each of your modules and run them if it found them.

Use django-nose since nose automatically find all tests recursivelly.

Here're key elements of solution:
tests.py:
def find_modules(package):
"""Return list of imported modules from given package"""
files = [re.sub('\.py$', '', f) for f in os.listdir(os.path.dirname(package.__file__))
if f.endswith(".py") and os.path.basename(f) not in ('__init__.py', 'test.py')]
return [imp.load_module(file, *imp.find_module(file, package.__path__)) for file in files]
def suite(package=None):
"""Assemble test suite for Django default test loader"""
if not package: package = myapp.tests # Default argument required for Django test runner
return unittest.TestSuite([doctest.DocTestSuite(m) for m in find_modules(package)])
To add recursion use os.walk() to traverse module tree and find python packages.

Thanks to Alex and Paul. This is what I came up with:
# tests.py
import sys, settings, re, os, doctest, unittest, imp
# import your base Django project
import myapp
# Django already runs these, don't include them again
ALREADY_RUN = ['tests.py', 'models.py']
def find_untested_modules(package):
""" Gets all modules not already included in Django's test suite """
files = [re.sub('\.py$', '', f)
for f in os.listdir(os.path.dirname(package.__file__))
if f.endswith(".py")
and os.path.basename(f) not in ALREADY_RUN]
return [imp.load_module(file, *imp.find_module(file, package.__path__))
for file in files]
def modules_callables(module):
return [m for m in dir(module) if callable(getattr(module, m))]
def has_doctest(docstring):
return ">>>" in docstring
__test__ = {}
for module in find_untested_modules(myapp.module1):
for method in modules_callables(module):
docstring = str(getattr(module, method).__doc__)
if has_doctest(docstring):
print "Found doctest(s) " + module.__name__ + "." + method
# import the method itself, so doctest can find it
_temp = __import__(module.__name__, globals(), locals(), [method])
locals()[method] = getattr(_temp, method)
# Django looks in __test__ for doctests to run
__test__[method] = getattr(module, method)

I'm not up to speed on Djano's testing, but as I understand it uses automatic unittest discovery, just like python -m unittest discover and Nose.
If so, just put the following file somewhere the discovery will find it (usually just a matter of naming it test_doctest.py or similar).
Change your_package to the package to test. All modules (including subpackages) will be doctested.
import doctest
import pkgutil
import your_package as root_package
def load_tests(loader, tests, ignore):
modules = pkgutil.walk_packages(root_package.__path__, root_package.__name__ + '.')
for _, module_name, _ in modules:
try:
suite = doctest.DocTestSuite(module_name)
except ValueError:
# Presumably a "no docstrings" error. That's OK.
pass
else:
tests.addTests(suite)
return tests

Related

Python script on Django shell not seeing import if import not set as global?

I have searched the stackoverflow and wasn't able to find this. I have noticed something I can not wrap my head around. When run as normal python script import works ok, but when run from Django shell it behaves weird, needs to set import as global to be seen.
You can reproduce it like this. Make a file test.py in folder with manage.py. Code you can test with is this.
This doesn't work, code of test.py:
#!/usr/bin/env python3
import chardet
class LoadList():
def __init__(self):
self.email_list_path = '/home/omer/test.csv'
#staticmethod
def check_file_encoding(file_to_check):
encoding = chardet.detect(open(file_to_check, "rb").read())
return encoding
def get_encoding(self):
return self.check_file_encoding(self.email_list_path)['encoding']
print(LoadList().get_encoding())
This works ok when chardet set as global inside test.py file:
#!/usr/bin/env python3
import chardet
class LoadList():
def __init__(self):
self.email_list_path = '/home/omer/test.csv'
#staticmethod
def check_file_encoding(file_to_check):
global chardet
encoding = chardet.detect(open(file_to_check, "rb").read())
return encoding
def get_encoding(self):
return self.check_file_encoding(self.email_list_path)['encoding']
print(LoadList().get_encoding())
First run is without global chardet and you can see the error. Second run is with global chardet set and you can see it works ok.
What is going on and can someone explain this to me? Why it isn't seen until set as global?
Piping a file into shell is the same as piping it into the python command. It's not the same as running the file with python test.py. I suspect it's something to do with the way the the newlines are interpreted as to how the file is really parsed, but don't have time to check.
Instead of this approach I'd recommend you write a custom management command.

How to create a pyspark udf, calling a class function from another class function in the same file?

I'm creating a pyspark udf inside a class based view and I have the function what I want to call, inside another class based view, both of them are in the same file (api.py), but when I inspect the content of the dataframe resulting, I get this error:
ModuleNotFoundError: No module named 'api'
I can't understand why this happens, I tried to do a similar code in the pyspark console and it worked good. A similar question was asked here but the difference is that I'm trying to do that in the same file.
This a piece of my full code:
api.py
class TextMiningMethods():
def clean_tweet(self,tweet):
'''
some logic here
'''
return "Hello: "+tweet
class BigDataViewSet(TextMiningMethods,viewsets.ViewSet):
#action(methods=['post'], detail=False)
def word_cloud(self, request, *args, **kwargs):
'''
some previous logic here
'''
spark=SparkSession \
.builder \
.master("spark://"+SPARK_WORKERS) \
.appName('word_cloud') \
.config("spark.executor.memory", '2g') \
.config('spark.executor.cores', '2') \
.config('spark.cores.max', '2') \
.config("spark.driver.memory",'2g') \
.getOrCreate()
sc.sparkContext.addPyFile('path/to/udfFile.py')
cols = ['text']
rows = []
for tweet_account_index, tweet_account_data in enumerate(tweets_list):
tweet_data_aux_pandas_df = pd.Series(tweet_account_data['tweet']).dropna()
for tweet_index,tweet in enumerate(tweet_data_aux_pandas_df):
row= [tweet['text']]
rows.append(row)
# Create a Pandas Dataframe of tweets
tweet_pandas_df = pd.DataFrame(rows, columns = cols)
schema = StructType([
StructField("text", StringType(),True)
])
# Converts to Spark DataFrame
df = spark.createDataFrame(tweet_pandas_df,schema=schema)
clean_tweet_udf = udf(TextMiningMethods().clean_tweet, StringType())
clean_tweet_df = df.withColumn("clean_tweet", clean_tweet_udf(df["text"]))
clean_tweet_df.show() # This line produces the error
This similar test in pyspark works good
from pyspark.sql.types import StructType, StructField, IntegerType, StringType
from pyspark.sql.functions import udf
def clean_tweet(name):
return "This is " + name
schema = StructType([StructField("Id", IntegerType(),True),StructField("tweet", StringType(),True)])
data = [[ 1, "tweet 1"],[2,"tweet 2"],[3,"tweet 3"]]
df = spark.createDataFrame(data,schema=schema)
clean_tweet_udf = udf(clean_tweet,StringType())
clean_tweet_df = df.withColumn("clean_tweet", clean_tweet_udf(df["tweet"]))
clean_tweet_df.show()
So these are my questions:
What is this error related to? and How can I fix it?
What is the right way to create a pyspark udf when you're working with class based view? is a wrong practice to write functions that you will use as pyspark udf, in the same file where you will call them? (in my case, all my api endpoints, working with django rest framework)
Any help will be appreciated, thanks in advance
UPDATE:
This link and this link explains how to use custom classes with pyspark using SparkContext, but not with SparkSession that is my case , but I used this:
sc.sparkContext.addPyFile('path/to/udfFile.py')
The problem is that I defined the class where I have the functions to use as pyspark udf, in the same file where I'm creating the udf function for the dataframe (as a showed in my code). I couldn't found how to reach that behaviour when the path of addPyFile() is in the same code. In spite of that, I moved my code and I followed these steps (that was another error that I fixed):
Create a new folder called udf
Create a new empty __ini__.py file, to make the directory to a package.
And create a file.py for my udf functions.
core/
udf/
├── __init__.py
├── __pycache__
└── pyspark_udf.py
api/
├── admin.py
├── api.py
├── apps.py
├── __init__.py
In this file, I tried to import the dependencies either at the beginning or inside the function. In all the cases I receive ModuleNotFoundError: No module named 'udf'
pyspark_udf.py
import re
import string
import unidecode
from nltk.corpus import stopwords
class TextMiningMethods():
"""docstring for TextMiningMethods"""
def clean_tweet(self,tweet):
# some logic here
I have tried with all of these, At the beginning of my api.py file
from udf.pyspark_udf import TextMiningMethods
# or
from udf.pyspark_udf import *
And inside the word_cloud function
class BigDataViewSet(viewsets.ViewSet):
def word_cloud(self, request, *args, **kwargs):
from udf.pyspark_udf import TextMiningMethods
In the python debugger this line works:
from udf.pyspark_udf import TextMiningMethods
But when I show the dataframe, i receive the error:
clean_tweet_df.show()
ModuleNotFoundError: No module named 'udf'
Obviously, the original problem changed to another, now my problem is more related with this question, but I couldn't find a satisfactory way to import the file yet and create a pyspark udf callinf a class function from another class function.
What I'm missing?
After different tries, I couldn't find a solution by referencing to a method in the path of addPyFile(), located in the same file where I was creating the udf (I would like to know if this is a bad practice) or in another file, technically addPyFile(path) documentation says:
Add a .py or .zip dependency for all tasks to be executed on this SparkContext in the future. The path passed can be either a local file, a file in HDFS (or other Hadoop-supported filesystems), or an HTTP, HTTPS or FTP URI.
So what I mention should be possible. Based on that, I had to used this solution and zip all the udf folder from it's highest level with:
zip -r udf.zip udf
Also, in the pyspark_udf.py I had to import my dependencies as below to avoid this problem
class TextMiningMethods():
"""docstring for TextMiningMethods"""
def clean_tweet(self,tweet):
import re
import string
import unidecode
from nltk.corpus import stopwords
Instead of:
import re
import string
import unidecode
from nltk.corpus import stopwords
class TextMiningMethods():
"""docstring for TextMiningMethods"""
def clean_tweet(self,tweet):
Then, finally this line worked good:
clean_tweet_df.show()
I hope this could be useful for anyone else
Thank you! Your approach worked for me.
Just to clarify my steps:
Made a udf module with __init__.py and pyspark_udfs.py
Made a bash file to zip udfs first and then run my files on the top level:
runner.sh
echo "zipping udfs..."
zip -r udf.zip udf
echo "udfs zipped"
echo "running script..."
/opt/conda/bin/python runner.py
echo "script ended."
In actual code imported my udfs from udf.pyspark_udfs module and initialized my udfs in the python function I need, like so:
def _produce_period_statistics(self, df: pyspark.sql.DataFrame, period: str) -> pyspark.sql.DataFrame:
""" Produces basic and trend statistics based on user visits."""
# udfs
get_hist_vals_udf = F.udf(lambda array, bins, _range: get_histogram_values(array, bins, _range), ArrayType(IntegerType()))
get_hist_edges_udf = F.udf(lambda array, bins, _range: get_histogram_edges(array, bins, _range), ArrayType(FloatType()))
get_mean_udf = F.udf(get_mean, FloatType())
get_std_udf = F.udf(get_std, FloatType())
get_lr_coefs_udf = F.udf(lambda bar_height, bar_edges, hist_upper: get_linear_regression_coeffs(bar_height, bar_edges, hist_upper), StringType())
...

Test different invocation pattern

I want to test how a project generated by cookiecutter behave with multiple invocation patterns.
Given the following generated project
proj/
proj/
__init__.py
__main__.py
Content of __init__.py:
def func():
pass
Content of __main__.py:
from proj import func
def main():
func()
if __name__ == '__main__':
main()
(I have read about issues of __main__.__spec__, this is not what this question is about. In fact, I would like to test-drive -TDD- my work arounds of these issues)
Now I write tests in which the generated project structure and location is known (available via pytest fixtures, for instance)
test_run_proj_script():
"""Test behavior of ``python3 ./proj``"""
pass # how to run this?
test_run_proj_module():
"""Test behavior of ``python3 -m ./proj``"""
pass # how to run this?
Bonus: inject values for sys.argv ?

How to test custom django-admin commands

I created custom django-admin commands
But, I don't know how to test it in standard django tests
If you're using some coverage tool it would be good to call it from the code with:
from django.core.management import call_command
from django.test import TestCase
class CommandsTestCase(TestCase):
def test_mycommand(self):
" Test my custom command."
args = []
opts = {}
call_command('mycommand', *args, **opts)
# Some Asserts.
From the official documentation
Management commands can be tested with the call_command() function. The output can be redirected into a StringIO instance
You should make your actual command script the minimum possible, so that it just calls a function elsewhere. The function can then be tested via unit tests or doctests as normal.
you can see in github.com example
see here
def test_command_style(self):
out = StringIO()
management.call_command('dance', style='Jive', stdout=out)
self.assertEquals(out.getvalue(),
"I don't feel like dancing Jive.")
To add to what has already been posted here. If your django-admin command passes a file as parameter, you could do something like this:
from django.test import TestCase
from django.core.management import call_command
from io import StringIO
import os
class CommandTestCase(TestCase):
def test_command_import(self):
out = StringIO()
call_command(
'my_command', os.path.join('path/to/file', 'my_file.txt'),
stdout=out
)
self.assertIn(
'Expected Value',
out.getvalue()
)
This works when your django-command is used in a manner like this:
$ python manage.py my_command my_file.txt
A simple alternative to parsing stdout is to make your management command exit with an error code if it doesn't run successfully, for example using sys.exit(1).
You can catch this in a test with:
with self.assertRaises(SystemExit):
call_command('mycommand')
I agree with Daniel that the actual command script should do the minimum possible but you can also test it directly in a Django unit test using os.popen4.
From within your unit test you can have a command like
fin, fout = os.popen4('python manage.py yourcommand')
result = fout.read()
You can then analyze the contents of result to test whether your Django command was successful.

How can I call a custom Django manage.py command directly from a test driver?

I want to write a unit test for a Django manage.py command that does a backend operation on a database table. How would I invoke the management command directly from code?
I don't want to execute the command on the Operating System's shell from tests.py because I can't use the test environment set up using manage.py test (test database, test dummy email outbox, etc...)
The best way to test such things - extract needed functionality from command itself to standalone function or class. It helps to abstract from "command execution stuff" and write test without additional requirements.
But if you by some reason cannot decouple logic form command you can call it from any code using call_command method like this:
from django.core.management import call_command
call_command('my_command', 'foo', bar='baz')
Rather than do the call_command trick, you can run your task by doing:
from myapp.management.commands import my_management_task
cmd = my_management_task.Command()
opts = {} # kwargs for your command -- lets you override stuff for testing...
cmd.handle_noargs(**opts)
the following code:
from django.core.management import call_command
call_command('collectstatic', verbosity=3, interactive=False)
call_command('migrate', 'myapp', verbosity=3, interactive=False)
...is equal to the following commands typed in terminal:
$ ./manage.py collectstatic --noinput -v 3
$ ./manage.py migrate myapp --noinput -v 3
See running management commands from django docs.
The Django documentation on the call_command fails to mention that out must be redirected to sys.stdout. The example code should read:
from django.core.management import call_command
from django.test import TestCase
from django.utils.six import StringIO
import sys
class ClosepollTest(TestCase):
def test_command_output(self):
out = StringIO()
sys.stdout = out
call_command('closepoll', stdout=out)
self.assertIn('Expected output', out.getvalue())
Building on Nate's answer I have this:
def make_test_wrapper_for(command_module):
def _run_cmd_with(*args):
"""Run the possibly_add_alert command with the supplied arguments"""
cmd = command_module.Command()
(opts, args) = OptionParser(option_list=cmd.option_list).parse_args(list(args))
cmd.handle(*args, **vars(opts))
return _run_cmd_with
Usage:
from myapp.management import mycommand
cmd_runner = make_test_wrapper_for(mycommand)
cmd_runner("foo", "bar")
The advantage here being that if you've used additional options and OptParse, this will sort the out for you. It isn't quite perfect - and it doesn't pipe outputs yet - but it will use the test database. You can then test for database effects.
I am sure use of Micheal Foords mock module and also rewiring stdout for the duration of a test would mean you could get some more out of this technique too - test the output, exit conditions etc.
The advanced way to run manage command with a flexible arguments and captured output
argv = self.build_argv(short_dict=kwargs)
cmd = self.run_manage_command_raw(YourManageCommandClass, argv=argv)
# Output is saved cmd.stdout.getvalue() / cmd.stderr.getvalue()
Add code to your base Test class
#classmethod
def build_argv(cls, *positional, short_names=None, long_names=None, short_dict=None, **long_dict):
"""
Build argv list which can be provided for manage command "run_from_argv"
1) positional will be passed first as is
2) short_names with be passed after with one dash (-) prefix
3) long_names with be passed after with one tow dashes (--) prefix
4) short_dict with be passed after with one dash (-) prefix key and next item as value
5) long_dict with be passed after with two dashes (--) prefix key and next item as value
"""
argv = [__file__, None] + list(positional)[:]
for name in short_names or []:
argv.append(f'-{name}')
for name in long_names or []:
argv.append(f'--{name}')
for name, value in (short_dict or {}).items():
argv.append(f'-{name}')
argv.append(str(value))
for name, value in long_dict.items():
argv.append(f'--{name}')
argv.append(str(value))
return argv
#classmethod
def run_manage_command_raw(cls, cmd_class, argv):
"""run any manage.py command as python object"""
command = cmd_class(stdout=io.StringIO(), stderr=io.StringIO())
with mock.patch('django.core.management.base.connections.close_all'):
# patch to prevent closing db connecction
command.run_from_argv(argv)
return command