Have all file updated in a directory - python-2.7

I'm needing a bit of help.
I'm looking to use python 2.7 to see if all files in a directory have the same modified date.
I'll be adding this into an if statement so I'm only looking for a True & False value. So if all file have been modified then true else false.
Any help would awesome.
Update:-
The method below works but is this the most effective way?
import os,time
def get_information(directory):
file_list = []
for i in os.listdir(directory):
a = os.stat(os.path.join(directory,i))
file_list.append(str(time.ctime(a.st_atime))[:3])
return file_list
def checkEqual2(iterator):
return len(set(iterator)) <= 1
print get_information("C:\\Auto_Import\\")
print checkEqual2 (get_information("C:\\Auto_Import\\"))

You can use os.scandir for better efficiency since it pulls the information of all files in a given directory in one single call:
from os import scandir
def has_same_mtime(path):
return len(set(entry.stat().st_mtime for entry in scandir(path))) == 1
If you're using Python 3.4 or before you would have to fall back to os.listdir instead:
import os
def has_same_mtime(path):
return len(set(os.stat(os.path.join(path, name)).st_mtime for name in os.listdir(path))) == 1

Related

Reading multiple files in a directory with pyyaml

I'm trying to read all yaml files in a directory, but I am having trouble. First, because I am using Python 2.7 (and I cannot change to 3) and all of my files are utf-8 (and I also need them to keep this way).
import os
import yaml
import codecs
def yaml_reader(filepath):
with codecs.open(filepath, "r", encoding='utf-8') as file_descriptor:
data = yaml.load_all(file_descriptor)
return data
def yaml_dump(filepath, data):
with open(filepath, 'w') as file_descriptor:
yaml.dump(data, file_descriptor)
if __name__ == "__main__":
filepath = os.listdir(os.getcwd())
data = yaml_reader(filepath)
print data
When I run this code, python gives me the message:
TypeError: coercing to Unicode: need string or buffer, list found.
I want this program to show the content of the files. Can anyone help me?
I guess the issue is with filepath.
os.listdir(os.getcwd()) returns the list of all the files in the directory. so you are passing the list to codecs.open() instead of filename
There are multiple problems with your code, apart from that it is invalide Python, in the way you formatted this.
def yaml_reader(filepath):
with codecs.open(filepath, "r", encoding='utf-8') as file_descriptor:
data = yaml.load_all(file_descriptor)
return data
however it is not necessary to do the decoding, PyYAML is perfectly capable of processing UTF-8:
def yaml_reader(filepath):
with open(filepath, "rb") as file_descriptor:
data = yaml.load_all(file_descriptor)
return data
I hope you realise your trying to load multiple documents and always get a list as a result in data even if your file contains one document.
Then the line:
filepath = os.listdir(os.getcwd())
gives you a list of files, so you need to do:
filepath = os.listdir(os.getcwd())[0]
or decide in some other way, which of the files you want to open. If you want to combine all files (assuming they are YAML) in one big YAML file, you need to do:
if __name__ == "__main__":
data = []
for filepath in os.listdir(os.getcwd()):
data.extend(yaml_reader(filepath))
print data
And your dump routine would need to change to:
def yaml_dump(filepath, data):
with open(filepath, 'wb') as file_descriptor:
yaml.dump(data, file_descriptor, allow_unicode=True, encoding='utf-8')
However this all brings you to the biggest problem: that you are using PyYAML, that will mangle your YAML, dropping flow-style, comment, anchor names, special int/float, quotes around scalars etc. Apart from that PyYAML has not been updated to support YAML 1.2 documents (which has been the standard since 2009). I recommend you switch to using ruamel.yaml (disclaimer: I am the author of that package), which supports YAML 1.2 and leaves comments etc in place.
And even if you are bound to use Python 2, you should use the Python 3 like syntax e.g. for print that you can get with from __future__ imports.
So I recommend you do:
pip install pathlib2 ruamel.yaml
and then use:
from __future__ import absolute_import, unicode_literals, print_function
from pathlib import Path
from ruamel.yaml import YAML
if __name__ == "__main__":
data = []
yaml = YAML()
yaml.preserve_quotes = True
for filepath in Path('.').glob('*.yaml'):
data.extend(yaml.load_all(filepath))
print(data)
yaml.dump(data, Path('your_output.yaml'))

How do I confirm with python that required files are in a particular folder and are accessible or not?

I have 5 files in a folder App:
App|
|--A.txt
|--B.txt
|--C.txt
|--D.txt
|--E.txt
|--Run.py
|--Other Folders or Files
Now I want to know if files (A.txt,B.txtC.txt,C.txt,D.txt,E.txt) is present or not and if its there than I want to call a function Cleaner which will supply names of these files to that function. I have written this code but nothing is happening.The function is not getting called.
import glob
import csv
import itertools
files = glob.glob("*.txt")
i = 0
def sublist(a, b):
seq = iter(b)
try:
for x in a:
while next(seq) != x: pass
else:
return True
except StopIteration:
pass
return False
required_files = ['Alternate_ADR6_LFB1.txt', 'Company_Code.txt', 'Left_LIFNR.txt', 'LFA1.txt', 'LFB1.TXT', 'LFBK.TXT']
if sublist(required_files,files):
for files in required_files:
try:
f = open(files , 'r')
f.close()
except IOError as e:
print 'Error opening or accessing files'
i = 1
else:
print 'Required files are not in correct folder'
if i == 1:
for files in required_files:
Cleansing(files)
def Cleansing(filename):
with open('filename', 'rb') as f_input:
...
...
break
with open('filename', 'rb') as f_input, open('filename_Cleaned.csv', 'wb') as f_output:
csv_output = csv.writer(f_output)
csv_output.writerow('something')
Upadate
I think now I am able to call the function and also able to check the valid files but its not that pythonic. And I am not able to open or create a file with the name of the file plus _cleaned :filename_cleaned.csv.
You want to check if a list of files (required_files) are in a folder.
You successfully get the complete list of text files in the folder with files = glob.glob("*.txt")
So the first question is: Checking for sublist in list
As the order is not important, we can use sets:
if set(required_files) <= set(files):
# do stuff
else:
#print warning
Next question: How to open the files and create an outputs with names like "filename_Cleaned.csv"
A very important thing you have to understand: "filename" is not the same thing as filename. The first is a string, it will always be the same thing, it will not be replaced by real filenames. When writing open('filename', 'rb') you're trying to open a file called "filename".
filename however can be a variable name and take different values.
for filename in required_files:
Cleansing(filename)
def Cleansing(filename):
with open(filename, 'rb') as f_input, open(filename+'_Cleaned.csv', 'wb') as f_output:
#read stuff in f_input
#write stuff in f_output

Writing between between characters in a text file?

I have a module that i want to write into. I'm having several problems. One of which locating a string within the file. Currently I open the file, then use a for line in (filename), then do an if to determine if it finds a string, and all of that works. However before (it is commented out now) i tried to determine what position it was at using tell(). However this gave me an incorrect position, giving me 1118 i believe, instead of 660 something. So i determined the position manually to use seek.
However the second problem was, if i write to this file at the position in the file, it just overwrites all the data from thereon. I would want to insert the data instead of overwriting it.
Unless i insert a string equal in character length where i want the write to happen, it will just override most of the if statements and things like that below.
Is there any way to naively do this?
Here is the file i want to write into
# Filename: neo_usercurves.py
# Created By: Gregory Smith
# Description: A script containing a library of user created curves
# Purpose: A library to store names of all the user curves, and deletes curves
# if specified to do so
import os
import maya.cmds as mc
import module_locator
my_path = module_locator.module_path()
def usercurve_lib(fbxfile=None, remove=None):
"""All control/curve objects created by user
Keyword Arguments:
fbxfile -- (string) name of fbx file to import
remove -- (boolean) will remove an entry from the library and delete the
associated fbx file
"""
curves_dict = {
#crvstart
#crvend
}
if remove is None:
return curves_dict
elif not remove:
try:
name = mc.file(curves_dict[fbxfile], typ='FBX', i=1,
iv=True, pmt=False)
return name[0]
except RuntimeError:
return None
else:
try:
os.remove('%s\%s.fbx' %(my_path, fbxfile))
return '%s.fbx' %(fbxfile)
except OSError:
print 'File %s does not exist.' %(fbxfile)
return None
This is the code below that i'm running in a module called neo_curves.py (this is not the complete code, and 'my_path' is just the path of the current directory neo_curves.py is being run in)
def create_entry(self, crv):
"""Exports user curve to user data directory and adds entry into
neo_usercurves.py
Keyword Arguments:
crv -- (PyNode) the object to export
"""
# set settings
mel.eval('FBXExportFileVersion "FBX201400"')
mel.eval('FBXExportInputConnections -v 0')
select(crv)
mc.file('%s\userdat\%s.fbx' %(my_path, str(crv)), force=True, options='',
typ='FBX export', pr=True, es=True)
with open('%s\userdat\\neo_usercurves.py' %(my_path), 'r+') as usercrvs:
for line in usercrvs:
if line.strip() == '#crvstart':
#linepos = usercrvs.tell()
#linepos = int(linepos)
#usercrvs.seek(linepos, 0)
usercrvs.seek(665, 0)
usercrvs.write("\n "+str(crv)+" : '%s\%s' %(my_path, '"+
str(crv)+".fbx')")
break
This will give me this result below:
# Filename: neo_usercurves.py
# Created By: Gregory Smith
# Description: A script containing a library of user created curves
# Purpose: A library to store names of all the user curves, and deletes curves
# if specified to do so
import os
import maya.cmds as mc
import module_locator
my_path = module_locator.module_path()
def usercurve_lib(fbxfile=None, remove=None):
"""All control/curve objects created by user
Keyword Arguments:
fbxfile -- (string) name of fbx file to import
remove -- (boolean) will remove an entry from the library and delete the
associated fbx file
"""
curves_dict = {
#crvstart
loop_crv : '%s\%s' %(my_path, 'loop_crv.fbx') return curves_dict
elif not remove:
try:
name = mc.file(curves_dict[fbxfile], typ='FBX', i=1,
iv=True, pmt=False)
return name[0]
except RuntimeError:
return None
else:
try:
os.remove('%s\%s.fbx' %(my_path, fbxfile))
return '%s.fbx' %(fbxfile)
except OSError:
print 'File %s does not exist.' %(fbxfile)
return None
In short: on most operating systems you can not insert into files without rewriting if the lengths are not the same.
Have a look at a long discussion here: Why can we not insert into files without the additional writes? (I neither mean append, nor over-write)

Manually building a deep copy of a ConfigParser in Python 2.7

Just starting in on my Python learning curve, and hitting a snag in porting some code up to Python 2.7. It appears that in Python 2.7 it is no longer possible to perform a deepcopy() on instances of ConfigParser. It also appears that the Python team isn't terribly interested in restoring such a capability:
http://bugs.python.org/issue16058
Can someone propose an elegant solution for manually constructing a deepcopy/duplicate of an instance of ConfigParser?
Many thanks, -Pete
This is just an example implementation of Jan Vlcinsky answer written in Python 3 (I don't have enough reputation to post this as a comment to Jans answer). Many thanks to Jan for the push in the right direction.
To make a full (deep) copy of base_config into new_config just do the following;
import io
import configparser
config_string = io.StringIO()
base_config.write(config_string)
# We must reset the buffer ready for reading.
config_string.seek(0)
new_config = configparser.ConfigParser()
new_config.read_file(config_string)
Based on #Toenex answer, modified for Python 2.7:
import StringIO
import ConfigParser
# Create a deep copy of the configuration object
config_string = StringIO.StringIO()
base_config.write(config_string)
# We must reset the buffer to make it ready for reading.
config_string.seek(0)
new_config = ConfigParser.ConfigParser()
new_config.readfp(config_string)
The previous solution doesn't work in all python3 use cases. Specifically if the original parser is using Extended Interpolation the copy may fail to work correctly. Fortunately, the easy solution is to use the pickle module:
def deep_copy(config:configparser.ConfigParser)->configparser.ConfigParser:
"""deep copy config"""
rep = pickle.dumps(config)
new_config = pickle.loads(rep)
return new_config
If you need new independent copy of ConfigParser, then one option is:
have original version of ConfigParser
serialize the config file into temporary file or StringIO buffer
use that tmpfile or StringIO buffer to create new ConfigParser.
And you have it done.
If you are using Python 3 (3.2+) you can use the Mapping Protocol Access to copy (actually deep copy) the sections and options of a source configuration to another ConfigParser object.
You can use read_dict() to copy the state of a configuration parser.
Here is a demo:
import configparser
# the configuration to deep copy:
src_cfg = configparser.ConfigParser()
src_cfg.add_section("Section A")
src_cfg["Section A"]["key1"] = "value1"
src_cfg["Section A"]["key2"] = "value2"
# the destination configuration
dst_cfg = configparser.ConfigParser()
dst_cfg.read_dict(src_cfg)
dst_cfg.add_section("Section B")
dst_cfg["Section B"]["key3"] = "value3"
To display the resulting configuration, you can try:
import io
output = io.StringIO()
dst_cfg.write(output)
print(output.getvalue())
You get:
[Section A]
key1 = value1
key2 = value2
[Section B]
key3 = value3
After reading this article, I am more familiar with config.ini.
Record as follows:
import io
import configparser
def copy_config_demo():
with io.StringIO() as memory_file:
memory_file.write(str(test_config_data.__doc__)) # original_config.write(memory_file)
memory_file.seek(0)
new_config = configparser.ConfigParser(interpolation=configparser.ExtendedInterpolation())
new_config.read_file(memory_file)
# below is just for test
for section_name, list_item in [(section_name, new_config.items(section_name)) for section_name in new_config.sections()]:
print('\n[' + section_name + ']')
for key, value in list_item:
print(f'{key}: {value}')
def test_config_data():
"""
[Common]
home_dir: /Users
library_dir: /Library
system_dir: /System
macports_dir: /opt/local
[Frameworks]
Python: >=3.2
path: ${Common:system_dir}/Library/Frameworks/
[Arthur]
name: Carson
my_dir: ${Common:home_dir}/twosheds
my_pictures: ${my_dir}/Pictures
python_dir: ${Frameworks:path}/Python/Versions/${Frameworks:Python}
"""
output:
[Common]
home_dir: /Users
library_dir: /Library
system_dir: /System
macports_dir: /opt/local
[Frameworks]
python: >=3.2
path: /System/Library/Frameworks/
[Arthur]
name: Carson
my_dir: /Users/twosheds
my_pictures: /Users/twosheds/Pictures
python_dir: /System/Library/Frameworks//Python/Versions/>=3.2
hoping it is helpful to you.

How to tell whether a file is executable on Windows in Python?

I'm writing grepath utility that finds executables in %PATH% that match a pattern.
I need to define whether given filename in the path is executable (emphasis is on command line scripts).
Based on "Tell if a file is executable" I've got:
import os
from pywintypes import error
from win32api import FindExecutable, GetLongPathName
def is_executable_win(path):
try:
_, executable = FindExecutable(path)
ext = lambda p: os.path.splitext(p)[1].lower()
if (ext(path) == ext(executable) # reject *.cmd~, *.bat~ cases
and samefile(GetLongPathName(executable), path)):
return True
# path is a document with assoc. check whether it has extension
# from %PATHEXT%
pathexts = os.environ.get('PATHEXT', '').split(os.pathsep)
return any(ext(path) == e.lower() for e in pathexts)
except error:
return None # not an exe or a document with assoc.
Where samefile is:
try: samefile = os.path.samefile
except AttributeError:
def samefile(path1, path2):
rp = lambda p: os.path.realpath(os.path.normcase(p))
return rp(path1) == rp(path2)
How is_executable_win could be improved in the given context? What functions from Win32 API could help?
P.S.
time performance doesn't matter
subst drives and UNC, unicode paths are not under consideration
C++ answer is OK if it uses functions available on Windows XP
Examples
notepad.exe is executable (as a rule)
which.py is executable if it is associated with some executable (e.g., python.exe) and .PY is in %PATHEXT% i.e., 'C:\> which' could start:
some\path\python.exe another\path\in\PATH\which.py
somefile.doc most probably is not executable (when it is associated with Word for example)
another_file.txt is not executable (as a rule)
ack.pl is executable if it is associated with some executable (most probably perl.exe) and .PL is in %PATHEXT% (i.e. I can run ack without specifing extension if it is in the path)
What is "executable" in this question
def is_executable_win_destructive(path):
#NOTE: it assumes `path` <-> `barename` for the sake of example
barename = os.path.splitext(os.path.basename(path))[0]
p = Popen(barename, stdout=PIPE, stderr=PIPE, shell=True)
stdout, stderr = p.communicate()
return p.poll() != 1 or stdout != '' or stderr != error_message(barename)
Where error_message() depends on language. English version is:
def error_message(barename):
return "'%(barename)s' is not recognized as an internal" \
" or external\r\ncommand, operable program or batch file.\r\n" \
% dict(barename=barename)
If is_executable_win_destructive() returns when it defines whether the path points to an executable for the purpose of this question.
Example:
>>> path = r"c:\docs\somefile.doc"
>>> barename = "somefile"
After that it executes %COMSPEC% (cmd.exe by default):
c:\cwd> cmd.exe /c somefile
If output looks like this:
'somefile' is not recognized as an internal or external
command, operable program or batch file.
Then the path is not an executable else it is (lets assume there is one-to-one correspondence between path and barename for the sake of example).
Another example:
>>> path = r'c:\bin\grepath.py'
>>> barename = 'grepath'
If .PY in %PATHEXT% and c:\bin is in %PATH% then:
c:\docs> grepath
Usage:
grepath.py [options] PATTERN
grepath.py [options] -e PATTERN
grepath.py: error: incorrect number of arguments
The above output is not equal to error_message(barename) therefore 'c:\bin\grepath.py' is an "executable".
So the question is how to find out whether the path will produce the error without actually running it? What Win32 API function and what conditions used to trigger the 'is not recognized as an internal..' error?
shoosh beat me to it :)
If I remember correctly, you should try to read the first 2 characters in the file. If you get back "MZ", you have an exe.
hnd = open(file,"rb")
if hnd.read(2) == "MZ":
print "exe"
I think, that this should be sufficient:
check file extension in PATHEXT - whether file is directly executable
using cmd.exe command "assoc .ext" you can see whether file is associated with some executable (some executable will be launched when you launch this file). You can parse capture output of assoc without arguments and collect all extensions that are associated and check tested file extension.
other file extensions will trigger error "command is not recognized ..." therefore you can assume that such files are NOT executable.
I don't really understand how you can tell the difference between somefile.py and somefile.txt because association can be really the same. You can configure system to run .txt files the same way as .py files.
A windows PE always starts with the characters "MZ". This includes however also any kind of DLLs which are not necessarily executables.
To check for this however you'll have to open the file and read the header so that's probably not what you're looking for.
Here's the grepath.py that I've linked in my question:
#!/usr/bin/env python
"""Find executables in %PATH% that match PATTERN.
"""
#XXX: remove --use-pathext option
import fnmatch, itertools, os, re, sys, warnings
from optparse import OptionParser
from stat import S_IMODE, S_ISREG, ST_MODE
from subprocess import PIPE, Popen
def warn_import(*args):
"""pass '-Wd' option to python interpreter to see these warnings."""
warnings.warn("%r" % (args,), ImportWarning, stacklevel=2)
class samefile_win:
"""
http://timgolden.me.uk/python/win32_how_do_i/see_if_two_files_are_the_same_file.html
"""
#staticmethod
def get_read_handle (filename):
return win32file.CreateFile (
filename,
win32file.GENERIC_READ,
win32file.FILE_SHARE_READ,
None,
win32file.OPEN_EXISTING,
0,
None
)
#staticmethod
def get_unique_id (hFile):
(attributes,
created_at, accessed_at, written_at,
volume,
file_hi, file_lo,
n_links,
index_hi, index_lo
) = win32file.GetFileInformationByHandle (hFile)
return volume, index_hi, index_lo
#staticmethod
def samefile_win(filename1, filename2):
"""Whether filename1 and filename2 represent the same file.
It works for subst, ntfs hardlinks, junction points.
It works unreliably for network drives.
Based on GetFileInformationByHandle() Win32 API call.
http://timgolden.me.uk/python/win32_how_do_i/see_if_two_files_are_the_same_file.html
"""
if samefile_generic(filename1, filename2): return True
try:
hFile1 = samefile_win.get_read_handle (filename1)
hFile2 = samefile_win.get_read_handle (filename2)
are_equal = (samefile_win.get_unique_id (hFile1)
== samefile_win.get_unique_id (hFile2))
hFile2.Close ()
hFile1.Close ()
return are_equal
except win32file.error:
return None
def canonical_path(path):
"""NOTE: it might return wrong path for paths with symbolic links."""
return os.path.realpath(os.path.normcase(path))
def samefile_generic(path1, path2):
return canonical_path(path1) == canonical_path(path2)
class is_executable_destructive:
#staticmethod
def error_message(barename):
r"""
"'%(barename)s' is not recognized as an internal or external\r\n
command, operable program or batch file.\r\n"
in Russian:
"""
return '"%(barename)s" \xad\xa5 \xef\xa2\xab\xef\xa5\xe2\xe1\xef \xa2\xad\xe3\xe2\xe0\xa5\xad\xad\xa5\xa9 \xa8\xab\xa8 \xa2\xad\xa5\xe8\xad\xa5\xa9\r\n\xaa\xae\xac\xa0\xad\xa4\xae\xa9, \xa8\xe1\xaf\xae\xab\xad\xef\xa5\xac\xae\xa9 \xaf\xe0\xae\xa3\xe0\xa0\xac\xac\xae\xa9 \xa8\xab\xa8 \xaf\xa0\xaa\xa5\xe2\xad\xeb\xac \xe4\xa0\xa9\xab\xae\xac.\r\n' % dict(barename=barename)
#staticmethod
def is_executable_win_destructive(path):
# assume path <-> barename that is false in general
barename = os.path.splitext(os.path.basename(path))[0]
p = Popen(barename, stdout=PIPE, stderr=PIPE, shell=True)
stdout, stderr = p.communicate()
return p.poll() != 1 or stdout != '' or stderr != error_message(barename)
def is_executable_win(path):
"""Based on:
http://timgolden.me.uk/python/win32_how_do_i/tell-if-a-file-is-executable.html
Known bugs: treat some "*~" files as executable, e.g. some "*.bat~" files
"""
try:
_, executable = FindExecutable(path)
return bool(samefile(GetLongPathName(executable), path))
except error:
return None # not an exe or a document with assoc.
def is_executable_posix(path):
"""Whether the file is executable.
Based on which.py from stdlib
"""
#XXX it ignores effective uid, guid?
try: st = os.stat(path)
except os.error:
return None
isregfile = S_ISREG(st[ST_MODE])
isexemode = (S_IMODE(st[ST_MODE]) & 0111)
return bool(isregfile and isexemode)
try:
#XXX replace with ctypes?
from win32api import FindExecutable, GetLongPathName, error
is_executable = is_executable_win
except ImportError, e:
warn_import("is_executable: fall back on posix variant", e)
is_executable = is_executable_posix
try: samefile = os.path.samefile
except AttributeError, e:
warn_import("samefile: fallback to samefile_win", e)
try:
import win32file
samefile = samefile_win.samefile_win
except ImportError, e:
warn_import("samefile: fallback to generic", e)
samefile = samefile_generic
def main():
parser = OptionParser(usage="""
%prog [options] PATTERN
%prog [options] -e PATTERN""", description=__doc__)
opt = parser.add_option
opt("-e", "--regex", metavar="PATTERN",
help="use PATTERN as a regular expression")
opt("--ignore-case", action="store_true", default=True,
help="""[default] ignore case when --regex is present; for \
non-regex PATTERN both FILENAME and PATTERN are first \
case-normalized if the operating system requires it otherwise \
unchanged.""")
opt("--no-ignore-case", dest="ignore_case", action="store_false")
opt("--use-pathext", action="store_true", default=True,
help="[default] whether to use %PATHEXT% environment variable")
opt("--no-use-pathext", dest="use_pathext", action="store_false")
opt("--show-non-executable", action="store_true", default=False,
help="show non executable files")
(options, args) = parser.parse_args()
if len(args) != 1 and not options.regex:
parser.error("incorrect number of arguments")
if not options.regex:
pattern = args[0]
del args
if options.regex:
filepred = re.compile(options.regex, options.ignore_case and re.I).search
else:
fnmatch_ = fnmatch.fnmatch if options.ignore_case else fnmatch.fnmatchcase
for file_pattern_symbol in "*?":
if file_pattern_symbol in pattern:
break
else: # match in any place if no explicit file pattern symbols supplied
pattern = "*" + pattern + "*"
filepred = lambda fn: fnmatch_(fn, pattern)
if not options.regex and options.ignore_case:
filter_files = lambda files: fnmatch.filter(files, pattern)
else:
filter_files = lambda files: itertools.ifilter(filepred, files)
if options.use_pathext:
pathexts = frozenset(map(str.upper,
os.environ.get('PATHEXT', '').split(os.pathsep)))
seen = set()
for dirpath in os.environ.get('PATH', '').split(os.pathsep):
if os.path.isdir(dirpath): # assume no expansion needed
# visit "each" directory only once
# it is unaware of subst drives, junction points, symlinks, etc
rp = canonical_path(dirpath)
if rp in seen: continue
seen.add(rp); del rp
for filename in filter_files(os.listdir(dirpath)):
path = os.path.join(dirpath, filename)
isexe = is_executable(path)
if isexe == False and is_executable == is_executable_win:
# path is a document with associated program
# check whether it is a script (.pl, .rb, .py, etc)
if not isexe and options.use_pathext:
ext = os.path.splitext(path)[1]
isexe = ext.upper() in pathexts
if isexe:
print path
elif options.show_non_executable:
print "non-executable:", path
if __name__=="__main__":
main()
Parse the PE format.
http://code.google.com/p/pefile/
This is probably the best solution you will get other than using python to actually try to run the program.
Edit: I see you also want files that have associations. This will require mucking in the registry which I don't have the information for.
Edit2: I also see that you differentiate between .doc and .py. This is a rather arbitrary differentiation which must be specified with manual rules, because to windows, they are both file extensions that a program reads.
Your question can't be answered. Windows can't tell the difference between a file which is associated with a scripting language vs. some other arbitrary program. As Windows is concerned, a .PY file is simply a document which is opened by python.exe.