I am working on a function that will recurse a directory tree from a given starting point over a given number of directory levels and return files and/or folders as a list. I want to have the option of passing a condition to this function to be evaluated for each file or folder before it is added to the list. For example, I may want my function to return only a list of files with the extension '.py', or I might want only filenames that match with some regex expression.
My code below works fine without passing any conditions to get_items(), but when using conditions I am unable to get any files and folders below the top directory, regardless of what recursion_level is set to.
If the condition and condition args are set to None, the code below prints a list of all '.py' files found in the top four directory levels. With the given arguments, though, only '.py' files in the top directory are captured.
It seems that the code under if depth != 0: in def get_items() is not being executed when a condition is given
#! /usr/bin/env python
import os
import os.path as path
import re
### --------------------------------------------------
# Get files and/or directories from input dir (defines a generator object)
# (Solution inspired by post & comments at
def get_items(input_dir,mode='f',depth=0,condition=None,arg=None,show_hidden=False):
dirs,files = {},{}
# Use dictionary to assign generator outputs from mode
modes = {'f':files,'d':dirs,'w':(input_dir,dirs,files)}
dir_list = os.listdir(input_dir)
# print '\nDIRECTORY "{}" CONTAINS {} FILES.'.format(path.basename(input_dir),len(dir_list))
for name in dir_list:
# Ignore hidden files (Mac OS)
if name.startswith('.') and not show_hidden:
# print '\t',name
path_name = path.join(input_dir,name)
# Check if file satisfies the given conditional function
if condition and not condition(*((path_name,arg) if arg else (path_name,))):
# Append item to appropriate dictionary with 'name' as key and 'path' as value
(dirs if path.isdir(path_name) else files)[name]=path_name
yield modes[mode]
# If depth not yet zero, call the function for the next level down
if depth != 0:
# print '\nFOUND {} SUBDIRECTORIES TO PROCESS:'.format(len(dirs))
for d in dirs:
for i in get_items(dirs[d],mode,depth-1,condition,arg,show_hidden):
yield i
# Parse output from get_items() to simple lists
def parse_trees(dir_trees):
all_dirs,all_files = [],[]
for tree in dir_trees:
obj_type = type(tree)
# If a tuple object is encountered (walk mode)
if obj_type is tuple:
# top = tree[0]
dirs,files = [branch.values() for branch in tree[1:]]
# If a dictionary is encountered (files or dirs mode)
elif obj_type is dict:
dirs,files = [],[]
for branch in tree:
# Access dictionary values to get path
branch_path = tree[branch]
# Append path to appropriate list
(dirs if path.isdir(branch_path) else files).append(branch_path)
# print branch
all_dirs += dirs
all_files += files
return all_files,all_dirs
### --------------------------------------------------
# Check the basename of a given file for a regex pattern match
def pattern_check(file_path,pattern):
filename = path.split(file_path)[1]
# basename,ext = path.splitext(filename)
return (True if,filename) else False)
# Check for the extension in a given list argument
def ext_check(file_path,extensions):
filename = path.split(file_path)[1]
basename,ext = path.splitext(filename)
return (True if ext in extensions else False)
### --------------------------------------------------
mydir = os.getcwd()
recursion_depth = 3
condition_method,condition_arg = ext_check,['.py']
dir_trees = get_items(mydir,'f',recursion_depth,condition_method,condition_arg,False)
file_list,dir_list = parse_trees(dir_trees)
if dir_list:
print '\nDIRS:'
for d in dir_list:
print '\t',d
if file_list:
print '\nFILES:'
for f in file_list:
print '\t',f
print '\n'


Python 2.7 and PrettyTables

I am trying to get PrettyTables to work with the following script. I can get it almost to look right but it keeps separating my tables so it is printing 16 separate tables. I need all information in one table that I can sort. I appreciate all the help i can get.
import sys
import os
import datetime
import hashlib
import logging
def getScanPath(): #12
# Prompt User for path to scan
path = raw_input('Please enter the directory to scan: ')
# Verify that the path is a directory
if os.path.isdir(path):
return path
sys.exit('Invalid File Path ... Script Aborted')
def getFileList(filePath):
# Create an empty list to hold the resulting files
pathList =[]
# Get a list of files, note these will be just the names of the files
# NOT the full path
simpleFileNameList = os.listdir(filePath)
# Now process each filename in the list
for eachFile in simpleFileNameList:
# 1) Get the full path by join the directory with the filename
fullPath = os.path.join(filePath, eachFile)
# 2) Make sure the full path is an absolute path
absPath = os.path.abspath(fullPath)
# 3) Make sure the absolute path is a file i.e. not a folder or directory
if os.path.isfile(absPath):
# 4) if all is well, add the absolute path to the list
logging.error('A Non-File has been identified')
# 5) Once all files have been identified, return the list to the caller
return pathList
def getFileName(theFile):
return os.path.basename(theFile)
def getFileSize(theFile):
return os.path.getsize(theFile)
def getFileLastModified(theFile):
return os.path.getmtime(theFile)
def getFileHash(theFile):
hash_md5 = hashlib.md5()
with open(theFile, "rb") as f:
for chunk in iter(lambda:, b""):
return hash_md5.hexdigest()
# Main Script Starts Here
if __name__ == '__main__':
#Welcome Message
print "\nWelcome to the file scanner\n"
# prompt user for directory path
scanPath = getScanPath()
# Get a list of files with full path
scanFileList = getFileList(scanPath)
# Output Filenames
print "Files found in directory"
for eachFilePath in scanFileList:
fileName = getFileName(eachFilePath)
fileSize = getFileSize(eachFilePath)
lastModified = getFileLastModified(eachFilePath)
hashValue = getFileHash(eachFilePath)
fileModified = (datetime.datetime.fromtimestamp(lastModified))
from prettytable import PrettyTable
pTable = PrettyTable()
pTable.field_names = ["File Name", "File Size", "Last Modified", "Md5 Hash Value"]
pTable.add_row ([fileName, fileSize, fileModified, hashValue])
print (pTable)enter code here
This should show me one big table using all the values from a set directory that the user chooses. This will allow me to sort the table later using prettytables.
I have no experience with prettyTables, but I noticed you have lastModified and fileModified yet only fileModified is used for a column in your table. Are you sure pretty table doesn't have some kind of row limit?

Moving only Files in Directories

I have looked extensively on this site and I can't see an example that fits the bill. I have 4 directories each of which contains a number of files and another directory called 'Superseded'. I am trying to write a script that will move all files in each folder into the 'Superseded' folder but I'm not having any luck.
import os, shutil
source = r'U:\Data\All\Python_Test\Exports\GLA'
dest = r'U:\Data\All\Python_Test\Exports\Superseded'
listofFiles = os.listdir(source)
for f in listofFiles:
fullPath = source + "/" + f
shutil.move(fullPath, dest)
I can only get this to work for one directory and even then only when I've made the destination directory outside of the GLA directory if that makes sense.
I know there is a a os.path.isfile() module so that I can only move the files but I can't seem to get it to work. Does anybody have any ideas?
This works for me:
import os
# I use this to create some empty file to move around later
def touch(fname, times=None):
fhandle = open(fname, 'a')
os.utime(fname, times)
# this function is only to create the folders and files to be moved
def create_files_in_known_folders():
nameList=["source_dir_{:02d}".format(x) for x in range(4)]
for name in nameList:
if not os.path.exists(path):
if not os.path.exists(ssPath):
for i in range(3):
filepath=os.path.join(path, filename)
if not os.path.exists(filepath):
# THIS is actually the function doing what the OP asked for
# there many details that can be tweaked
def move_from_known_to_dest():
# here my given names from above
nameList=["source_dir_{:02d}".format(x) for x in range(4)]
# and my destination path
# not interested in files that are in subfolders
# if those would exist change to os.walk and
# exclude the destination folder with according if...:
for name in nameList:
print path
for fileName in dirList:
filePath=os.path.join(path, fileName)
print filePath
if os.path.isfile(filePath):
print destPath
#alternatively you can chose to 1) overwrite ()might not work 2)delete first 3) not copy
# another option is to check for existence and if
# present add a number to the dest-file-name
# use while loop to check for first non-present number
assert not os.path.exists(destPath), "file {} already exits".format(destPath)
os.rename( filePath, destPath)
if __name__=="__main__":
#break here and check that filestructure and files have been created
But, think carefully what to do if the file already exits in your destination folder.
os.walk might also be something you want to look at.
Implementing several options for the copy behaviour may look like this:
import warnings
formatwarning_orig = warnings.formatwarning
warnings.formatwarning = lambda message, category, filename, lineno, line=None: \
formatwarning_orig(message, category, filename, lineno, line='')
def move_from_known_to_dest_extra(behaviour='overwrite'):
assert behaviour in ['overwrite','leave','accumulate'], "unknown behaviour: {}".format(behaviour)
nameList=["source_dir_{:02d}".format(x) for x in range(4)]
for name in nameList:
for fileName in dirList:
filePath=os.path.join(path, fileName)
if os.path.isfile(filePath):
# simplest case...does not exist so copy
if not os.path.exists(destPath):
os.rename( filePath, destPath)
if behaviour=='leave':
warnings.warn( "Warning! Not copying file: {}; file {} already exists!".format(filePath, destPath))
elif behaviour =='overwrite':
# documentation states:
# On Windows, if dst already exists, OSError will be raised even if it is a file.
os.rename( filePath, destPath)
warnings.warn( "Warning!Overwriting file: {}.".format(destPath))
elif behaviour=='accumulate': #redundant but OK
while True:
if not os.path.exists(newDestPath):
assert addPost < 10000, "Clean up the mess!"
os.rename( filePath, newDestPath)
assert 0, "Unknown copy behaviour requested."
Additionally one might check for file permissions as, e.g., os.remove() may raise an exception. In this case, however, I assume that permissions are properly set by the OP.

How do I confirm with python that required files are in a particular folder and are accessible or not?

I have 5 files in a folder App:
|--Other Folders or Files
Now I want to know if files (A.txt,B.txtC.txt,C.txt,D.txt,E.txt) is present or not and if its there than I want to call a function Cleaner which will supply names of these files to that function. I have written this code but nothing is happening.The function is not getting called.
import glob
import csv
import itertools
files = glob.glob("*.txt")
i = 0
def sublist(a, b):
seq = iter(b)
for x in a:
while next(seq) != x: pass
return True
except StopIteration:
return False
required_files = ['Alternate_ADR6_LFB1.txt', 'Company_Code.txt', 'Left_LIFNR.txt', 'LFA1.txt', 'LFB1.TXT', 'LFBK.TXT']
if sublist(required_files,files):
for files in required_files:
f = open(files , 'r')
except IOError as e:
print 'Error opening or accessing files'
i = 1
print 'Required files are not in correct folder'
if i == 1:
for files in required_files:
def Cleansing(filename):
with open('filename', 'rb') as f_input:
with open('filename', 'rb') as f_input, open('filename_Cleaned.csv', 'wb') as f_output:
csv_output = csv.writer(f_output)
I think now I am able to call the function and also able to check the valid files but its not that pythonic. And I am not able to open or create a file with the name of the file plus _cleaned :filename_cleaned.csv.
You want to check if a list of files (required_files) are in a folder.
You successfully get the complete list of text files in the folder with files = glob.glob("*.txt")
So the first question is: Checking for sublist in list
As the order is not important, we can use sets:
if set(required_files) <= set(files):
# do stuff
#print warning
Next question: How to open the files and create an outputs with names like "filename_Cleaned.csv"
A very important thing you have to understand: "filename" is not the same thing as filename. The first is a string, it will always be the same thing, it will not be replaced by real filenames. When writing open('filename', 'rb') you're trying to open a file called "filename".
filename however can be a variable name and take different values.
for filename in required_files:
def Cleansing(filename):
with open(filename, 'rb') as f_input, open(filename+'_Cleaned.csv', 'wb') as f_output:
#read stuff in f_input
#write stuff in f_output

Django models aren't loaded yet

I am getting this error when I run make html in sphinx:
Is this because the utils file is calling Frequency module before the models are registered?
What should be done to rectify it?
This is my
#!/usr/bin/env python3
import sys
import os
import shlex
# If extensions (or modules to document with autodoc) are in another directory,
# add these directories to sys.path here. If the directory is relative to the
# documentation root, use os.path.abspath to make it absolute, like shown here.
sys.path.insert(0, os.path.abspath('..'))
from django.conf import settings
# -- General configuration ------------------------------------------------
# If your documentation needs a minimal Sphinx version, state it here.
#needs_sphinx = '1.0'
# Add any Sphinx extension module names here, as strings. They can be
# extensions coming with Sphinx (named 'sphinx.ext.*') or your custom
# ones.
extensions = [
# Add any paths that contain templates here, relative to this directory.
templates_path = ['_templates']
# The suffix(es) of source filenames.
# You can specify multiple suffix as a list of string:
# source_suffix = ['.rst', '.md']
source_suffix = '.rst'
# The encoding of source files.
#source_encoding = 'utf-8-sig'
# The master toctree document.
master_doc = 'index'
# General information about the project.
project = 'Locality Management'
copyright = '2015, DOne'
author = 'DOne'
# The version info for the project you're documenting, acts as replacement for
# |version| and |release|, also used in various other places throughout the
# built documents.
# The short X.Y version.
version = '0.0.1'
# The full version, including alpha/beta/rc tags.
release = '0.0.1'
# The language for content autogenerated by Sphinx. Refer to documentation
# for a list of supported languages.
# This is also used if you do content translation via gettext catalogs.
# Usually you set "language" from the command line for these cases.
language = None
# There are two options for replacing |today|: either, you set today to some
# non-false value, then it is used:
#today = ''
# Else, today_fmt is used as the format for a strftime call.
#today_fmt = '%B %d, %Y'
# List of patterns, relative to source directory, that match files and
# directories to ignore when looking for source files.
exclude_patterns = ['_build']
# The reST default role (used for this markup: `text`) to use for all
# documents.
#default_role = None
# If true, '()' will be appended to :func: etc. cross-reference text.
#add_function_parentheses = True
# If true, the current module name will be prepended to all description
# unit titles (such as .. function::).
#add_module_names = True
# If true, sectionauthor and moduleauthor directives will be shown in the
# output. They are ignored by default.
#show_authors = False
# The name of the Pygments (syntax highlighting) style to use.
pygments_style = 'sphinx'
# A list of ignored prefixes for module index sorting.
#modindex_common_prefix = []
# If true, keep warnings as "system message" paragraphs in the built documents.
#keep_warnings = False
# If true, `todo` and `todoList` produce output, else they produce nothing.
todo_include_todos = False
# -- Options for HTML output ----------------------------------------------
# The theme to use for HTML and HTML Help pages. See the documentation for
# a list of builtin themes.
html_theme = 'alabaster'
# Theme options are theme-specific and customize the look and feel of a theme
# further. For a list of options available for each theme, see the
# documentation.
#html_theme_options = {}
# Add any paths that contain custom themes here, relative to this directory.
#html_theme_path = []
# The name for this set of Sphinx documents. If None, it defaults to
# "<project> v<release> documentation".
#html_title = None
# A shorter title for the navigation bar. Default is the same as html_title.
#html_short_title = None
# The name of an image file (relative to this directory) to place at the top
# of the sidebar.
#html_logo = None
# The name of an image file (within the static path) to use as favicon of the
# docs. This file should be a Windows icon file (.ico) being 16x16 or 32x32
# pixels large.
#html_favicon = None
# Add any paths that contain custom static files (such as style sheets) here,
# relative to this directory. They are copied after the builtin static files,
# so a file named "default.css" will overwrite the builtin "default.css".
html_static_path = ['_static']
# Add any extra paths that contain custom files (such as robots.txt or
# .htaccess) here, relative to this directory. These files are copied
# directly to the root of the documentation.
#html_extra_path = []
# If not '', a 'Last updated on:' timestamp is inserted at every page bottom,
# using the given strftime format.
#html_last_updated_fmt = '%b %d, %Y'
# If true, SmartyPants will be used to convert quotes and dashes to
# typographically correct entities.
#html_use_smartypants = True
# Custom sidebar templates, maps document names to template names.
#html_sidebars = {}
# Additional templates that should be rendered to pages, maps page names to
# template names.
#html_additional_pages = {}
# If false, no module index is generated.
#html_domain_indices = True
# If false, no index is generated.
#html_use_index = True
# If true, the index is split into individual pages for each letter.
#html_split_index = False
# If true, links to the reST sources are added to the pages.
#html_show_sourcelink = True
# If true, "Created using Sphinx" is shown in the HTML footer. Default is True.
#html_show_sphinx = True
# If true, "(C) Copyright ..." is shown in the HTML footer. Default is True.
#html_show_copyright = True
# If true, an OpenSearch description file will be output, and all pages will
# contain a <link> tag referring to it. The value of this option must be the
# base URL from which the finished HTML is served.
#html_use_opensearch = ''
# This is the file name suffix for HTML files (e.g. ".xhtml").
#html_file_suffix = None
# Language to be used for generating the HTML full-text search index.
# Sphinx supports the following languages:
# 'da', 'de', 'en', 'es', 'fi', 'fr', 'h', 'it', 'ja'
# 'nl', 'no', 'pt', 'ro', 'r', 'sv', 'tr'
#html_search_language = 'en'
# A dictionary with options for the search language support, empty by default.
# Now only 'ja' uses this config value
#html_search_options = {'type': 'default'}
# The name of a javascript file (relative to the configuration directory) that
# implements a search results scorer. If empty, the default will be used.
#html_search_scorer = 'scorer.js'
# Output file base name for HTML help builder.
htmlhelp_basename = 'LocalityManagementdoc'
# -- Options for LaTeX output ---------------------------------------------
latex_elements = {
# The paper size ('letterpaper' or 'a4paper').
#'papersize': 'letterpaper',
# The font size ('10pt', '11pt' or '12pt').
#'pointsize': '10pt',
# Additional stuff for the LaTeX preamble.
#'preamble': '',
# Latex figure (float) alignment
#'figure_align': 'htbp',
# Grouping the document tree into LaTeX files. List of tuples
# (source start file, target name, title,
# author, documentclass [howto, manual, or own class]).
latex_documents = [
(master_doc, 'LocalityManagement.tex', 'Locality Management Documentation',
'DOne', 'manual'),
# The name of an image file (relative to this directory) to place at the top of
# the title page.
#latex_logo = None
# For "manual" documents, if this is true, then toplevel headings are parts,
# not chapters.
#latex_use_parts = False
# If true, show page references after internal links.
#latex_show_pagerefs = False
# If true, show URL addresses after external links.
#latex_show_urls = False
# Documents to append as an appendix to all manuals.
#latex_appendices = []
# If false, no module index is generated.
#latex_domain_indices = True
# -- Options for manual page output ---------------------------------------
# One entry per manual page. List of tuples
# (source start file, name, description, authors, manual section).
man_pages = [
(master_doc, 'localitymanagement', 'Locality Management Documentation',
[author], 1)
# If true, show URL addresses after external links.
#man_show_urls = False
# -- Options for Texinfo output -------------------------------------------
# Grouping the document tree into Texinfo files. List of tuples
# (source start file, target name, title, author,
# dir menu entry, description, category)
texinfo_documents = [
(master_doc, 'LocalityManagement', 'Locality Management Documentation',
author, 'LocalityManagement', 'One line description of project.',
# Documents to append as an appendix to all manuals.
#texinfo_appendices = []
# If false, no module index is generated.
#texinfo_domain_indices = True
# How to display URL addresses: 'footnote', 'no', or 'inline'.
#texinfo_show_urls = 'footnote'
# If true, do not generate a #detailmenu in the "Top" node's menu.
#texinfo_no_detailmenu = False
import django
os.environ['DJANGO_SETTINGS_MODULE'] = 'myproject.settings'
instead of
from django.conf import settings
should do the trick.

Writing between between characters in a text file?

I have a module that i want to write into. I'm having several problems. One of which locating a string within the file. Currently I open the file, then use a for line in (filename), then do an if to determine if it finds a string, and all of that works. However before (it is commented out now) i tried to determine what position it was at using tell(). However this gave me an incorrect position, giving me 1118 i believe, instead of 660 something. So i determined the position manually to use seek.
However the second problem was, if i write to this file at the position in the file, it just overwrites all the data from thereon. I would want to insert the data instead of overwriting it.
Unless i insert a string equal in character length where i want the write to happen, it will just override most of the if statements and things like that below.
Is there any way to naively do this?
Here is the file i want to write into
# Filename:
# Created By: Gregory Smith
# Description: A script containing a library of user created curves
# Purpose: A library to store names of all the user curves, and deletes curves
# if specified to do so
import os
import maya.cmds as mc
import module_locator
my_path = module_locator.module_path()
def usercurve_lib(fbxfile=None, remove=None):
"""All control/curve objects created by user
Keyword Arguments:
fbxfile -- (string) name of fbx file to import
remove -- (boolean) will remove an entry from the library and delete the
associated fbx file
curves_dict = {
if remove is None:
return curves_dict
elif not remove:
name = mc.file(curves_dict[fbxfile], typ='FBX', i=1,
iv=True, pmt=False)
return name[0]
except RuntimeError:
return None
os.remove('%s\%s.fbx' %(my_path, fbxfile))
return '%s.fbx' %(fbxfile)
except OSError:
print 'File %s does not exist.' %(fbxfile)
return None
This is the code below that i'm running in a module called (this is not the complete code, and 'my_path' is just the path of the current directory is being run in)
def create_entry(self, crv):
"""Exports user curve to user data directory and adds entry into
Keyword Arguments:
crv -- (PyNode) the object to export
# set settings
mel.eval('FBXExportFileVersion "FBX201400"')
mel.eval('FBXExportInputConnections -v 0')
mc.file('%s\userdat\%s.fbx' %(my_path, str(crv)), force=True, options='',
typ='FBX export', pr=True, es=True)
with open('%s\userdat\\' %(my_path), 'r+') as usercrvs:
for line in usercrvs:
if line.strip() == '#crvstart':
#linepos = usercrvs.tell()
#linepos = int(linepos), 0), 0)
usercrvs.write("\n "+str(crv)+" : '%s\%s' %(my_path, '"+
This will give me this result below:
# Filename:
# Created By: Gregory Smith
# Description: A script containing a library of user created curves
# Purpose: A library to store names of all the user curves, and deletes curves
# if specified to do so
import os
import maya.cmds as mc
import module_locator
my_path = module_locator.module_path()
def usercurve_lib(fbxfile=None, remove=None):
"""All control/curve objects created by user
Keyword Arguments:
fbxfile -- (string) name of fbx file to import
remove -- (boolean) will remove an entry from the library and delete the
associated fbx file
curves_dict = {
loop_crv : '%s\%s' %(my_path, 'loop_crv.fbx') return curves_dict
elif not remove:
name = mc.file(curves_dict[fbxfile], typ='FBX', i=1,
iv=True, pmt=False)
return name[0]
except RuntimeError:
return None
os.remove('%s\%s.fbx' %(my_path, fbxfile))
return '%s.fbx' %(fbxfile)
except OSError:
print 'File %s does not exist.' %(fbxfile)
return None
In short: on most operating systems you can not insert into files without rewriting if the lengths are not the same.
Have a look at a long discussion here: Why can we not insert into files without the additional writes? (I neither mean append, nor over-write)