How can I make a library dependency graph with waf? - c++

I'd like to generate a simple DOT file when building a C++ project with waf. Ideally I'd like to just use the use and target attributes of the bld command to generate the file. Is this easily injectable into the system?
e.g. This wscript file (just mentioning the parts I'd like to use)
def build(bld):
bld( use = [ 'lib1',
'lib2', ] ,
target = 'lib3' )
Would produce output of
lib3 -> lib1
lib3 -> lib2
Where would be the best place to inject this behavior?
Thanks!

You can add add a tool like this easily via the add_post_fun in the build step, something along like this:
from waflib.Errors import WafError
from waflib import Utils
def filter_uses(ctx, uses):
filtered = []
for use in uses:
try:
ctx.get_tgen_by_name(use)
filtered.append(use)
except WafError:
pass
return filtered
#Utils.run_once # print only once, even if used in multiple script
def make_dot_file(ctx):
for group in ctx.groups:
for taskgen in group:
uses = Utils.to_list(getattr(taskgen, 'use', []))
uses = filter_uses(ctx, uses) # Optional, only print TaskGens
try:
name = taskgen.name # Sometimes this fails, don't know why
print "{} -> {}".format(name, ", ".join(uses))
except AttributeError:
pass
def build(bld):
# Build stuff ...
bld.add_post_fun(make_dot_file)
Note: To get real nice output some more filtering might be useful

I improved and adjusted #CK1 idea to my needs. My solution generates a DAG with graphviz and uses helper functions from this article by Matthias Eisen to display dependencies and targets.
The main part of the code looks like this:
import functools
import graphviz as gv
from pathlib import Path
from waflib import Utils
# Make sure that dot.exe is in your system path. I had to do this as
# Graphviz (the program, not the package) is installed with conda. I am
# sure there is a proper way to do this with Waf.
library_bin = Path(sys.executable).parent / 'Library' / 'bin' / 'graphviz'
os.environ['PATH'] += str(library_bin) + ';'
def make_dot_file(ctx):
# Create DAG
dag = digraph()
# Loop over task groups
for group in ctx.groups:
# Loop over tasks
for taskgen in group:
# Get name and add node for task
name = taskgen.get_name()
add_nodes(dag, [name])
# Add nodes for dependencies and edges to task
deps = Utils.to_list(getattr(taskgen, 'deps', []))
for dep in deps:
dep = Path(dep).name
add_nodes(dag, [dep])
add_edges(dag, [(dep, name)])
# Add nodes for targets and edges to task
targets = Utils.to_list(getattr(taskgen, 'target', []))
for target in targets:
target = Path(target).name
add_nodes(dag, [target])
add_edges(dag, [(name, target)])
# Make the DAG pretty
dag = apply_styles(dag, styles)
# Save DAG
dag.render(<output path of graphic>)
def build(bld):
# Build stuff ...
bld.add_post_fun(make_dot_file)
The helper functions used for this example are here:
# -------------------- Start helper functions ----------------------------
graph = functools.partial(gv.Graph, format='png')
digraph = functools.partial(gv.Digraph, format='png')
styles = {
'graph': {
'label': 'Pretty Graph',
'fontsize': '16',
'fontcolor': 'white',
'bgcolor': '#333333',
'rankdir': 'BT',
},
'nodes': {
'fontname': 'Helvetica',
'shape': 'hexagon',
'fontcolor': 'white',
'color': 'white',
'style': 'filled',
'fillcolor': '#006699',
},
'edges': {
'style': 'dashed',
'color': 'white',
'arrowhead': 'open',
'fontname': 'Courier',
'fontsize': '12',
'fontcolor': 'white',
}
}
def apply_styles(graph, styles):
graph.graph_attr.update(
('graph' in styles and styles['graph']) or {}
)
graph.node_attr.update(
('nodes' in styles and styles['nodes']) or {}
)
graph.edge_attr.update(
('edges' in styles and styles['edges']) or {}
)
return graph
def add_nodes(graph, nodes):
for n in nodes:
if isinstance(n, tuple):
graph.node(n[0], **n[1])
else:
graph.node(n)
return graph
def add_edges(graph, edges):
for e in edges:
if isinstance(e[0], tuple):
graph.edge(*e[0], **e[1])
else:
graph.edge(*e)
return graph
# ----------------------- End helper functions -----------------------------

Related

How to test a transformation in Palantir Foundry?

We try to create a test function for the whole transformation.
import os
from transforms.verbs.testing.TransformRunner import TransformRunner
from transforms.api import Pipeline
from .myproject.datasets import my_transform
# This assumes your test data exists in the folder /test/fixtures/data/ within the repo next to this test
TEST_DATA_DIR = os.path.join(os.path.dirname(__file__), 'fixtures', 'data')
def test_my_transform(spark_session):
pipeline = Pipeline()
pipeline.add_transforms(my_transform)
runner = TransformRunner(pipeline, '/my_fabulous_project', TEST_DATA_DIR)
output = runner.build_dataset(spark_session, '/my_fabulous_project/output/test')
assert output.first()['col_c'] == 3
Based on the documentation and this post, we tried to modify the import of the function, but we always get one of these errors:
transforms._errors.TransformTypeError: Expected arguments to be of type <class 'transforms.api._transform.Transform'>
ModuleNotFoundError: No module named 'test.myproject'
ValueError: attempted relative import beyond top-level package
How to create a working end-to-end testing function for a transformation?
The following transformation tests work for functions decorated both with #transform and #transform_df.
my_transform.py is located in the repository in src/myproject/datasets folder.
from transforms.api import Input, Output, transform_df
from pyspark.sql import functions as F
#transform_df(
Output('/some_foundry_path/my_dir/out'),
input_a=Input('/some_foundry_path/my_dir/in'))
def compute_sum(input_a):
df = input_a.withColumn('col_c', F.col('col_a') + F.col('col_b'))
return df
Input file:
Approach where test inputs are stored in-memory
test_my_transform.py is located in the repository in src/test folder.
from transforms.api import Pipeline
from transforms.verbs.testing.TransformRunner import TransformRunner
from transforms.verbs.testing.datastores import InMemoryDatastore
from myproject.datasets.my_transform import compute_sum
def test_compute_sum(spark_session):
df_in = spark_session.createDataFrame([
(0, 1)
], ['col_a', 'col_b'])
df_expected = spark_session.createDataFrame([
(0, 1, 1)
], ['col_a', 'col_b', 'col_c'])
path_in = '/some_foundry_path/my_dir/in'
path_out = '/some_foundry_path/my_dir/out'
pipeline = Pipeline()
pipeline.add_transforms(compute_sum)
store = InMemoryDatastore()
store.store_dataframe(path_in, df_in)
runner = TransformRunner(pipeline, datastore=store)
df_out = runner.build_dataset(spark_session, path_out)
assert df_out.subtract(df_expected).count() == 0
assert df_expected.subtract(df_out).count() == 0
assert df_out.schema == df_expected.schema
path_in and path_out are exactly the same as the Input and Output paths of the transformation. So it's easy to follow this script.
Approach where test inputs are stored in .csv in repository
This approach is in the official documentation. It is more elaborate, not so easy to understand what paths should be created, and it may be hard to maintain: in case dataset path changes, a new repository tree might be needed to create.
test_my_transform.py is located in the repository in src/test folder.
from transforms.api import Pipeline
from transforms.verbs.testing.TransformRunner import TransformRunner
import os
from myproject.datasets.my_transform import compute_sum
# Taking this .py file's dir and appending the path to the test data
TEST_DATA_DIR = os.path.join(os.path.dirname(__file__), 'fixtures/data/input')
def test_compute_sum(spark_session):
path_in_prefix = '/some_foundry_path/my_dir'
path_out = '/some_foundry_path/my_dir/out'
pipeline = Pipeline()
pipeline.add_transforms(compute_sum)
runner = TransformRunner(pipeline, path_in_prefix, TEST_DATA_DIR)
df_out = runner.build_dataset(spark_session, path_out)
assert df_out.head()['col_c'] == 1
Test CSV file (in.csv - it has the same name in as transformation Input) is created inside the repository:
col_a,col_b
0,1
Note:
for all the inputs
Input path (/some_foundry_path/my_dir/in)
less
path_in_prefix (/some_foundry_path/my_dir/)
should be equal to
CSV test file full path (...src/test/fixtures/data/input/in)
less
TEST_DATA_DIR (...src/test/fixtures/data/input)
To make tests run automatically together with checks, uncomment the following line in transforms-python/build.gradle:
After trying out several approaches with different conditions, the following approach seems cleanest to me.
no hard-coding paths to datasets
it is very explicit about adding/removing transformation inputs
in-memory dataframes are used as test inputs
test_my_transform.py
from transforms.api import Pipeline
from transforms.verbs.testing.TransformRunner import TransformRunner
from transforms.verbs.testing.datastores import InMemoryDatastore
from myproject.datasets.my_transform import compute_sum
def test_compute_sum(spark_session):
df_input1 = spark_session.createDataFrame([
(0, 2)
], ['col_a', 'col_b'])
df_input2 = spark_session.createDataFrame([
(0, 1)
], ['col_a', 'col_b'])
df_expected = spark_session.createDataFrame([
(0, 1, 1),
(0, 2, 2)
], ['col_a', 'col_b', 'col_c'])
# If #transform_df or #transform_pandas, the key is 'bound_output'
# If #transform, the key is the name of variable Output
output_map = {'out': df_expected}
input_map = {
'input_a': df_input1,
'input_b': df_input2,
}
pipeline = Pipeline()
pipeline.add_transforms(compute_sum)
store = InMemoryDatastore()
for inp_name, inp_obj in pipeline.transforms[0].inputs.items():
store.store_dataframe(inp_obj.alias, input_map[inp_name])
path_out = pipeline.transforms[0].outputs[list(output_map)[0]].alias
runner = TransformRunner(pipeline, datastore=store)
df_out = runner.build_dataset(spark_session, path_out)
assert df_out.subtract(df_expected).count() == 0
assert df_expected.subtract(df_out).count() == 0
assert df_out.schema == df_expected.schema
my_transform.py
from transforms.api import Input, Output, transform
from pyspark.sql import functions as F
#transform(
out=Output('/some_foundry_path/my_dir/out3'),
input_a=Input('/some_foundry_path/my_dir/in'),
input_b=Input('/some_foundry_path/my_dir/in2'))
def compute_sum(input_a, input_b, out):
input_a = input_a.dataframe()
input_b = input_b.dataframe()
df = input_a.unionByName(input_b)
df = df.withColumn('col_c', F.col('col_a') + F.col('col_b'))
out.write_dataframe(df)

Dynamic task generators

I am evaluating waf build for an existing project that has tasks similar to that:
1. preprocessing phase: datafile => TransformationTask => library name list
2. for each library name:
2.1 import files from repository
2.2 build library
The library list depends on the preprocessing task and is naturally not known in advance.
How can this be achieved with waf?
You have to generate a file with the library list with a first task. Another task will have the output of the first as an input and will process the corresponding file if needed to generate what you need.
It is somehow the example given in §11.4.2 of the waf book. You have to replace the compiler output parsing with your library description file parsing. You need to copy the example and change the run method in mytool.py like:
class src2c(Task.Task):
color = 'PINK'
quiet = True
before = ['cstlib']
def run(self):
libnode = self.inputs[0]
libinfo = libnode.read_json()
name = libinfo['name']
files = [f"repo/{file}" for file in libinfo['files']]
taskgen = self.generator
# library name
taskgen.link_task.outputs = []
taskgen.link_task.add_target(name)
# library sources files
nodes = [taskgen.path.make_node(f) for f in files]
# update discovered dependancies
taskgen.bld.raw_deps[self.uid()] = [self.signature()] + nodes
with g_lock:
self.add_c_tasks(nodes)
# cf waf book § 11.4.2
def add_c_tasks(self, lst):
...
# cf waf book § 11.4.2
def runnable_status(self):
...
In the wscript, I simulate the datafile transformation with a copy.:
def options(opt):
opt.load("compiler_c")
def configure(cnf):
cnf.load("compiler_c")
cnf.load("mytool", tooldir=".")
def build(bld):
bld(source = "libs.json", target = "libs.src", features = "subst")
bld(source = "libs.src", features = ["c", "cstlib"])
With a simple my_lib.json:
{
"name": "mylib2",
"files": ["f1.c", "f2.c"]
}
And files repo/f1.c and repo/f2.c like void f1(){} and void f2(){}

Django models aren't loaded yet

I am getting this error when I run make html in sphinx: http://pastebin.com/FjyigBJ9
Is this because the utils file is calling Frequency module before the models are registered?
What should be done to rectify it?
This is my conf.py:
#!/usr/bin/env python3
import sys
import os
import shlex
# If extensions (or modules to document with autodoc) are in another directory,
# add these directories to sys.path here. If the directory is relative to the
# documentation root, use os.path.abspath to make it absolute, like shown here.
sys.path.insert(0, os.path.abspath('..'))
from django.conf import settings
settings.configure()
# -- General configuration ------------------------------------------------
# If your documentation needs a minimal Sphinx version, state it here.
#needs_sphinx = '1.0'
# Add any Sphinx extension module names here, as strings. They can be
# extensions coming with Sphinx (named 'sphinx.ext.*') or your custom
# ones.
extensions = [
'sphinx.ext.autodoc',
'sphinx.ext.coverage',
]
# Add any paths that contain templates here, relative to this directory.
templates_path = ['_templates']
# The suffix(es) of source filenames.
# You can specify multiple suffix as a list of string:
# source_suffix = ['.rst', '.md']
source_suffix = '.rst'
# The encoding of source files.
#source_encoding = 'utf-8-sig'
# The master toctree document.
master_doc = 'index'
# General information about the project.
project = 'Locality Management'
copyright = '2015, DOne'
author = 'DOne'
# The version info for the project you're documenting, acts as replacement for
# |version| and |release|, also used in various other places throughout the
# built documents.
#
# The short X.Y version.
version = '0.0.1'
# The full version, including alpha/beta/rc tags.
release = '0.0.1'
# The language for content autogenerated by Sphinx. Refer to documentation
# for a list of supported languages.
#
# This is also used if you do content translation via gettext catalogs.
# Usually you set "language" from the command line for these cases.
language = None
# There are two options for replacing |today|: either, you set today to some
# non-false value, then it is used:
#today = ''
# Else, today_fmt is used as the format for a strftime call.
#today_fmt = '%B %d, %Y'
# List of patterns, relative to source directory, that match files and
# directories to ignore when looking for source files.
exclude_patterns = ['_build']
# The reST default role (used for this markup: `text`) to use for all
# documents.
#default_role = None
# If true, '()' will be appended to :func: etc. cross-reference text.
#add_function_parentheses = True
# If true, the current module name will be prepended to all description
# unit titles (such as .. function::).
#add_module_names = True
# If true, sectionauthor and moduleauthor directives will be shown in the
# output. They are ignored by default.
#show_authors = False
# The name of the Pygments (syntax highlighting) style to use.
pygments_style = 'sphinx'
# A list of ignored prefixes for module index sorting.
#modindex_common_prefix = []
# If true, keep warnings as "system message" paragraphs in the built documents.
#keep_warnings = False
# If true, `todo` and `todoList` produce output, else they produce nothing.
todo_include_todos = False
# -- Options for HTML output ----------------------------------------------
# The theme to use for HTML and HTML Help pages. See the documentation for
# a list of builtin themes.
html_theme = 'alabaster'
# Theme options are theme-specific and customize the look and feel of a theme
# further. For a list of options available for each theme, see the
# documentation.
#html_theme_options = {}
# Add any paths that contain custom themes here, relative to this directory.
#html_theme_path = []
# The name for this set of Sphinx documents. If None, it defaults to
# "<project> v<release> documentation".
#html_title = None
# A shorter title for the navigation bar. Default is the same as html_title.
#html_short_title = None
# The name of an image file (relative to this directory) to place at the top
# of the sidebar.
#html_logo = None
# The name of an image file (within the static path) to use as favicon of the
# docs. This file should be a Windows icon file (.ico) being 16x16 or 32x32
# pixels large.
#html_favicon = None
# Add any paths that contain custom static files (such as style sheets) here,
# relative to this directory. They are copied after the builtin static files,
# so a file named "default.css" will overwrite the builtin "default.css".
html_static_path = ['_static']
# Add any extra paths that contain custom files (such as robots.txt or
# .htaccess) here, relative to this directory. These files are copied
# directly to the root of the documentation.
#html_extra_path = []
# If not '', a 'Last updated on:' timestamp is inserted at every page bottom,
# using the given strftime format.
#html_last_updated_fmt = '%b %d, %Y'
# If true, SmartyPants will be used to convert quotes and dashes to
# typographically correct entities.
#html_use_smartypants = True
# Custom sidebar templates, maps document names to template names.
#html_sidebars = {}
# Additional templates that should be rendered to pages, maps page names to
# template names.
#html_additional_pages = {}
# If false, no module index is generated.
#html_domain_indices = True
# If false, no index is generated.
#html_use_index = True
# If true, the index is split into individual pages for each letter.
#html_split_index = False
# If true, links to the reST sources are added to the pages.
#html_show_sourcelink = True
# If true, "Created using Sphinx" is shown in the HTML footer. Default is True.
#html_show_sphinx = True
# If true, "(C) Copyright ..." is shown in the HTML footer. Default is True.
#html_show_copyright = True
# If true, an OpenSearch description file will be output, and all pages will
# contain a <link> tag referring to it. The value of this option must be the
# base URL from which the finished HTML is served.
#html_use_opensearch = ''
# This is the file name suffix for HTML files (e.g. ".xhtml").
#html_file_suffix = None
# Language to be used for generating the HTML full-text search index.
# Sphinx supports the following languages:
# 'da', 'de', 'en', 'es', 'fi', 'fr', 'h', 'it', 'ja'
# 'nl', 'no', 'pt', 'ro', 'r', 'sv', 'tr'
#html_search_language = 'en'
# A dictionary with options for the search language support, empty by default.
# Now only 'ja' uses this config value
#html_search_options = {'type': 'default'}
# The name of a javascript file (relative to the configuration directory) that
# implements a search results scorer. If empty, the default will be used.
#html_search_scorer = 'scorer.js'
# Output file base name for HTML help builder.
htmlhelp_basename = 'LocalityManagementdoc'
# -- Options for LaTeX output ---------------------------------------------
latex_elements = {
# The paper size ('letterpaper' or 'a4paper').
#'papersize': 'letterpaper',
# The font size ('10pt', '11pt' or '12pt').
#'pointsize': '10pt',
# Additional stuff for the LaTeX preamble.
#'preamble': '',
# Latex figure (float) alignment
#'figure_align': 'htbp',
}
# Grouping the document tree into LaTeX files. List of tuples
# (source start file, target name, title,
# author, documentclass [howto, manual, or own class]).
latex_documents = [
(master_doc, 'LocalityManagement.tex', 'Locality Management Documentation',
'DOne', 'manual'),
]
# The name of an image file (relative to this directory) to place at the top of
# the title page.
#latex_logo = None
# For "manual" documents, if this is true, then toplevel headings are parts,
# not chapters.
#latex_use_parts = False
# If true, show page references after internal links.
#latex_show_pagerefs = False
# If true, show URL addresses after external links.
#latex_show_urls = False
# Documents to append as an appendix to all manuals.
#latex_appendices = []
# If false, no module index is generated.
#latex_domain_indices = True
# -- Options for manual page output ---------------------------------------
# One entry per manual page. List of tuples
# (source start file, name, description, authors, manual section).
man_pages = [
(master_doc, 'localitymanagement', 'Locality Management Documentation',
[author], 1)
]
# If true, show URL addresses after external links.
#man_show_urls = False
# -- Options for Texinfo output -------------------------------------------
# Grouping the document tree into Texinfo files. List of tuples
# (source start file, target name, title, author,
# dir menu entry, description, category)
texinfo_documents = [
(master_doc, 'LocalityManagement', 'Locality Management Documentation',
author, 'LocalityManagement', 'One line description of project.',
'Miscellaneous'),
]
# Documents to append as an appendix to all manuals.
#texinfo_appendices = []
# If false, no module index is generated.
#texinfo_domain_indices = True
# How to display URL addresses: 'footnote', 'no', or 'inline'.
#texinfo_show_urls = 'footnote'
# If true, do not generate a #detailmenu in the "Top" node's menu.
#texinfo_no_detailmenu = False
Writing
import django
os.environ['DJANGO_SETTINGS_MODULE'] = 'myproject.settings'
django.setup()
instead of
from django.conf import settings
settings.configure()
should do the trick.

Speeding up build process with distutils

I am programming a C++ extension for Python and I am using distutils to compile the project. As the project grows, rebuilding it takes longer and longer. Is there a way to speed up the build process?
I read that parallel builds (as with make -j) are not possible with distutils. Are there any good alternatives to distutils which might be faster?
I also noticed that it's recompiling all object files every time I call python setup.py build, even when I only changed one source file. Should this be the case or might I be doing something wrong here?
In case it helps, here are some of the files which I try to compile: https://gist.github.com/2923577
Thanks!
Try building with environment variable CC="ccache gcc", that will speed up build significantly when the source has not changed. (strangely, distutils uses CC also for c++ source files). Install the ccache package, of course.
Since you have a single extension which is assembled from multiple compiled object files, you can monkey-patch distutils to compile those in parallel (they are independent) - put this into your setup.py (adjust the N=2 as you wish):
# monkey-patch for parallel compilation
def parallelCCompile(self, sources, output_dir=None, macros=None, include_dirs=None, debug=0, extra_preargs=None, extra_postargs=None, depends=None):
# those lines are copied from distutils.ccompiler.CCompiler directly
macros, objects, extra_postargs, pp_opts, build = self._setup_compile(output_dir, macros, include_dirs, sources, depends, extra_postargs)
cc_args = self._get_cc_args(pp_opts, debug, extra_preargs)
# parallel code
N=2 # number of parallel compilations
import multiprocessing.pool
def _single_compile(obj):
try: src, ext = build[obj]
except KeyError: return
self._compile(obj, src, ext, cc_args, extra_postargs, pp_opts)
# convert to list, imap is evaluated on-demand
list(multiprocessing.pool.ThreadPool(N).imap(_single_compile,objects))
return objects
import distutils.ccompiler
distutils.ccompiler.CCompiler.compile=parallelCCompile
For the sake of completeness, if you have multiple extensions, you can use the following solution:
import os
import multiprocessing
try:
from concurrent.futures import ThreadPoolExecutor as Pool
except ImportError:
from multiprocessing.pool import ThreadPool as LegacyPool
# To ensure the with statement works. Required for some older 2.7.x releases
class Pool(LegacyPool):
def __enter__(self):
return self
def __exit__(self, *args):
self.close()
self.join()
def build_extensions(self):
"""Function to monkey-patch
distutils.command.build_ext.build_ext.build_extensions
"""
self.check_extensions_list(self.extensions)
try:
num_jobs = os.cpu_count()
except AttributeError:
num_jobs = multiprocessing.cpu_count()
with Pool(num_jobs) as pool:
pool.map(self.build_extension, self.extensions)
def compile(
self, sources, output_dir=None, macros=None, include_dirs=None,
debug=0, extra_preargs=None, extra_postargs=None, depends=None,
):
"""Function to monkey-patch distutils.ccompiler.CCompiler"""
macros, objects, extra_postargs, pp_opts, build = self._setup_compile(
output_dir, macros, include_dirs, sources, depends, extra_postargs
)
cc_args = self._get_cc_args(pp_opts, debug, extra_preargs)
for obj in objects:
try:
src, ext = build[obj]
except KeyError:
continue
self._compile(obj, src, ext, cc_args, extra_postargs, pp_opts)
# Return *all* object filenames, not just the ones we just built.
return objects
from distutils.ccompiler import CCompiler
from distutils.command.build_ext import build_ext
build_ext.build_extensions = build_extensions
CCompiler.compile = compile
I've got this working on Windows with clcache, derived from eudoxos's answer:
# Python modules
import datetime
import distutils
import distutils.ccompiler
import distutils.sysconfig
import multiprocessing
import multiprocessing.pool
import os
import sys
from distutils.core import setup
from distutils.core import Extension
from distutils.errors import CompileError
from distutils.errors import DistutilsExecError
now = datetime.datetime.now
ON_LINUX = "linux" in sys.platform
N_JOBS = 4
#------------------------------------------------------------------------------
# Enable ccache to speed up builds
if ON_LINUX:
os.environ['CC'] = 'ccache gcc'
# Windows
else:
# Using clcache.exe, see: https://github.com/frerich/clcache
# Insert path to clcache.exe into the path.
prefix = os.path.dirname(os.path.abspath(__file__))
path = os.path.join(prefix, "bin")
print "Adding %s to the system path." % path
os.environ['PATH'] = '%s;%s' % (path, os.environ['PATH'])
clcache_exe = os.path.join(path, "clcache.exe")
#------------------------------------------------------------------------------
# Parallel Compile
#
# Reference:
#
# http://stackoverflow.com/questions/11013851/speeding-up-build-process-with-distutils
#
def linux_parallel_cpp_compile(
self,
sources,
output_dir=None,
macros=None,
include_dirs=None,
debug=0,
extra_preargs=None,
extra_postargs=None,
depends=None):
# Copied from distutils.ccompiler.CCompiler
macros, objects, extra_postargs, pp_opts, build = self._setup_compile(
output_dir, macros, include_dirs, sources, depends, extra_postargs)
cc_args = self._get_cc_args(pp_opts, debug, extra_preargs)
def _single_compile(obj):
try:
src, ext = build[obj]
except KeyError:
return
self._compile(obj, src, ext, cc_args, extra_postargs, pp_opts)
# convert to list, imap is evaluated on-demand
list(multiprocessing.pool.ThreadPool(N_JOBS).imap(
_single_compile, objects))
return objects
def windows_parallel_cpp_compile(
self,
sources,
output_dir=None,
macros=None,
include_dirs=None,
debug=0,
extra_preargs=None,
extra_postargs=None,
depends=None):
# Copied from distutils.msvc9compiler.MSVCCompiler
if not self.initialized:
self.initialize()
macros, objects, extra_postargs, pp_opts, build = self._setup_compile(
output_dir, macros, include_dirs, sources, depends, extra_postargs)
compile_opts = extra_preargs or []
compile_opts.append('/c')
if debug:
compile_opts.extend(self.compile_options_debug)
else:
compile_opts.extend(self.compile_options)
def _single_compile(obj):
try:
src, ext = build[obj]
except KeyError:
return
input_opt = "/Tp" + src
output_opt = "/Fo" + obj
try:
self.spawn(
[clcache_exe]
+ compile_opts
+ pp_opts
+ [input_opt, output_opt]
+ extra_postargs)
except DistutilsExecError, msg:
raise CompileError(msg)
# convert to list, imap is evaluated on-demand
list(multiprocessing.pool.ThreadPool(N_JOBS).imap(
_single_compile, objects))
return objects
#------------------------------------------------------------------------------
# Only enable parallel compile on 2.7 Python
if sys.version_info[1] == 7:
if ON_LINUX:
distutils.ccompiler.CCompiler.compile = linux_parallel_cpp_compile
else:
import distutils.msvccompiler
import distutils.msvc9compiler
distutils.msvccompiler.MSVCCompiler.compile = windows_parallel_cpp_compile
distutils.msvc9compiler.MSVCCompiler.compile = windows_parallel_cpp_compile
# ... call setup() as usual
You can do this easily if you have Numpy 1.10 available. Just add:
try:
from numpy.distutils.ccompiler import CCompiler_compile
import distutils.ccompiler
distutils.ccompiler.CCompiler.compile = CCompiler_compile
except ImportError:
print("Numpy not found, parallel compile not available")
Use -j N or set NPY_NUM_BUILD_JOBS.
In the limited examples you provided in the link, it seems fairly obvious that you have some misunderstanding on what some of the features of the language are. For example, the gsminterface.h has a whole lot of namespace level statics, which is probably unintended. Every translation unit that includes that header will compile it's own version for everyone of the symbols declared in that header. Side effects of this are not only the compile time but also code bloat (larger binaries) and link time as the linker needs to process all those symbols.
There are still many questions that affect the build process that you have not answered, for example, whether you clean every time before you recompile. If you are doing that, then you might want to consider ccache, which is a tool that caches the result of the build process, so that if you run make clean; make target only the preprocessor will be run for any translation unit that has not changed. Note that as long as you keep maintaining most code in headers, this will not offer much of an advantage, as a change in a header modifies all translation units that include it. (I don't know your build system, so I cannot tell you whether python setup.py build will clean or not)
The project does not seem large otherwise, so I would be surprised if it took more than a few seconds to compile.

Configure Django to find all doctests in all modules?

If I run the following command:
>python manage.py test
Django looks at tests.py in my application, and runs any doctests or unit tests in that file. It also looks at the __ test __ dictionary for extra tests to run. So I can link doctests from other modules like so:
#tests.py
from myapp.module1 import _function1, _function2
__test__ = {
"_function1": _function1,
"_function2": _function2
}
If I want to include more doctests, is there an easier way than enumerating them all in this dictionary? Ideally, I just want to have Django find all doctests in all modules in the myapp application.
Is there some kind of reflection hack that would get me where I want to be?
I solved this for myself a while ago:
apps = settings.INSTALLED_APPS
for app in apps:
try:
a = app + '.test'
__import__(a)
m = sys.modules[a]
except ImportError: #no test jobs for this module, continue to next one
continue
#run your test using the imported module m
This allowed me to put per-module tests in their own test.py file, so they didn't get mixed up with the rest of my application code. It would be easy to modify this to just look for doc tests in each of your modules and run them if it found them.
Use django-nose since nose automatically find all tests recursivelly.
Here're key elements of solution:
tests.py:
def find_modules(package):
"""Return list of imported modules from given package"""
files = [re.sub('\.py$', '', f) for f in os.listdir(os.path.dirname(package.__file__))
if f.endswith(".py") and os.path.basename(f) not in ('__init__.py', 'test.py')]
return [imp.load_module(file, *imp.find_module(file, package.__path__)) for file in files]
def suite(package=None):
"""Assemble test suite for Django default test loader"""
if not package: package = myapp.tests # Default argument required for Django test runner
return unittest.TestSuite([doctest.DocTestSuite(m) for m in find_modules(package)])
To add recursion use os.walk() to traverse module tree and find python packages.
Thanks to Alex and Paul. This is what I came up with:
# tests.py
import sys, settings, re, os, doctest, unittest, imp
# import your base Django project
import myapp
# Django already runs these, don't include them again
ALREADY_RUN = ['tests.py', 'models.py']
def find_untested_modules(package):
""" Gets all modules not already included in Django's test suite """
files = [re.sub('\.py$', '', f)
for f in os.listdir(os.path.dirname(package.__file__))
if f.endswith(".py")
and os.path.basename(f) not in ALREADY_RUN]
return [imp.load_module(file, *imp.find_module(file, package.__path__))
for file in files]
def modules_callables(module):
return [m for m in dir(module) if callable(getattr(module, m))]
def has_doctest(docstring):
return ">>>" in docstring
__test__ = {}
for module in find_untested_modules(myapp.module1):
for method in modules_callables(module):
docstring = str(getattr(module, method).__doc__)
if has_doctest(docstring):
print "Found doctest(s) " + module.__name__ + "." + method
# import the method itself, so doctest can find it
_temp = __import__(module.__name__, globals(), locals(), [method])
locals()[method] = getattr(_temp, method)
# Django looks in __test__ for doctests to run
__test__[method] = getattr(module, method)
I'm not up to speed on Djano's testing, but as I understand it uses automatic unittest discovery, just like python -m unittest discover and Nose.
If so, just put the following file somewhere the discovery will find it (usually just a matter of naming it test_doctest.py or similar).
Change your_package to the package to test. All modules (including subpackages) will be doctested.
import doctest
import pkgutil
import your_package as root_package
def load_tests(loader, tests, ignore):
modules = pkgutil.walk_packages(root_package.__path__, root_package.__name__ + '.')
for _, module_name, _ in modules:
try:
suite = doctest.DocTestSuite(module_name)
except ValueError:
# Presumably a "no docstrings" error. That's OK.
pass
else:
tests.addTests(suite)
return tests