Writing between between characters in a text file? - python-2.7

I have a module that i want to write into. I'm having several problems. One of which locating a string within the file. Currently I open the file, then use a for line in (filename), then do an if to determine if it finds a string, and all of that works. However before (it is commented out now) i tried to determine what position it was at using tell(). However this gave me an incorrect position, giving me 1118 i believe, instead of 660 something. So i determined the position manually to use seek.
However the second problem was, if i write to this file at the position in the file, it just overwrites all the data from thereon. I would want to insert the data instead of overwriting it.
Unless i insert a string equal in character length where i want the write to happen, it will just override most of the if statements and things like that below.
Is there any way to naively do this?
Here is the file i want to write into
# Filename: neo_usercurves.py
# Created By: Gregory Smith
# Description: A script containing a library of user created curves
# Purpose: A library to store names of all the user curves, and deletes curves
# if specified to do so
import os
import maya.cmds as mc
import module_locator
my_path = module_locator.module_path()
def usercurve_lib(fbxfile=None, remove=None):
"""All control/curve objects created by user
Keyword Arguments:
fbxfile -- (string) name of fbx file to import
remove -- (boolean) will remove an entry from the library and delete the
associated fbx file
"""
curves_dict = {
#crvstart
#crvend
}
if remove is None:
return curves_dict
elif not remove:
try:
name = mc.file(curves_dict[fbxfile], typ='FBX', i=1,
iv=True, pmt=False)
return name[0]
except RuntimeError:
return None
else:
try:
os.remove('%s\%s.fbx' %(my_path, fbxfile))
return '%s.fbx' %(fbxfile)
except OSError:
print 'File %s does not exist.' %(fbxfile)
return None
This is the code below that i'm running in a module called neo_curves.py (this is not the complete code, and 'my_path' is just the path of the current directory neo_curves.py is being run in)
def create_entry(self, crv):
"""Exports user curve to user data directory and adds entry into
neo_usercurves.py
Keyword Arguments:
crv -- (PyNode) the object to export
"""
# set settings
mel.eval('FBXExportFileVersion "FBX201400"')
mel.eval('FBXExportInputConnections -v 0')
select(crv)
mc.file('%s\userdat\%s.fbx' %(my_path, str(crv)), force=True, options='',
typ='FBX export', pr=True, es=True)
with open('%s\userdat\\neo_usercurves.py' %(my_path), 'r+') as usercrvs:
for line in usercrvs:
if line.strip() == '#crvstart':
#linepos = usercrvs.tell()
#linepos = int(linepos)
#usercrvs.seek(linepos, 0)
usercrvs.seek(665, 0)
usercrvs.write("\n "+str(crv)+" : '%s\%s' %(my_path, '"+
str(crv)+".fbx')")
break
This will give me this result below:
# Filename: neo_usercurves.py
# Created By: Gregory Smith
# Description: A script containing a library of user created curves
# Purpose: A library to store names of all the user curves, and deletes curves
# if specified to do so
import os
import maya.cmds as mc
import module_locator
my_path = module_locator.module_path()
def usercurve_lib(fbxfile=None, remove=None):
"""All control/curve objects created by user
Keyword Arguments:
fbxfile -- (string) name of fbx file to import
remove -- (boolean) will remove an entry from the library and delete the
associated fbx file
"""
curves_dict = {
#crvstart
loop_crv : '%s\%s' %(my_path, 'loop_crv.fbx') return curves_dict
elif not remove:
try:
name = mc.file(curves_dict[fbxfile], typ='FBX', i=1,
iv=True, pmt=False)
return name[0]
except RuntimeError:
return None
else:
try:
os.remove('%s\%s.fbx' %(my_path, fbxfile))
return '%s.fbx' %(fbxfile)
except OSError:
print 'File %s does not exist.' %(fbxfile)
return None

In short: on most operating systems you can not insert into files without rewriting if the lengths are not the same.
Have a look at a long discussion here: Why can we not insert into files without the additional writes? (I neither mean append, nor over-write)

Related

Python 2.7 and PrettyTables

I am trying to get PrettyTables to work with the following script. I can get it almost to look right but it keeps separating my tables so it is printing 16 separate tables. I need all information in one table that I can sort. I appreciate all the help i can get.
import sys
import os
import datetime
import hashlib
import logging
def getScanPath(): #12
# Prompt User for path to scan
path = raw_input('Please enter the directory to scan: ')
# Verify that the path is a directory
if os.path.isdir(path):
return path
else:
sys.exit('Invalid File Path ... Script Aborted')
def getFileList(filePath):
# Create an empty list to hold the resulting files
pathList =[]
# Get a list of files, note these will be just the names of the files
# NOT the full path
simpleFileNameList = os.listdir(filePath)
# Now process each filename in the list
for eachFile in simpleFileNameList:
# 1) Get the full path by join the directory with the filename
fullPath = os.path.join(filePath, eachFile)
# 2) Make sure the full path is an absolute path
absPath = os.path.abspath(fullPath)
# 3) Make sure the absolute path is a file i.e. not a folder or directory
if os.path.isfile(absPath):
# 4) if all is well, add the absolute path to the list
pathList.append(absPath)
else:
logging.error('A Non-File has been identified')
# 5) Once all files have been identified, return the list to the caller
return pathList
def getFileName(theFile):
return os.path.basename(theFile)
def getFileSize(theFile):
return os.path.getsize(theFile)
def getFileLastModified(theFile):
return os.path.getmtime(theFile)
def getFileHash(theFile):
hash_md5 = hashlib.md5()
with open(theFile, "rb") as f:
for chunk in iter(lambda: f.read(4096), b""):
hash_md5.update(chunk)
return hash_md5.hexdigest()
# Main Script Starts Here
if __name__ == '__main__':
#Welcome Message
print "\nWelcome to the file scanner\n"
# prompt user for directory path
scanPath = getScanPath()
# Get a list of files with full path
scanFileList = getFileList(scanPath)
# Output Filenames
print "Files found in directory"
for eachFilePath in scanFileList:
fileName = getFileName(eachFilePath)
fileSize = getFileSize(eachFilePath)
lastModified = getFileLastModified(eachFilePath)
hashValue = getFileHash(eachFilePath)
fileModified = (datetime.datetime.fromtimestamp(lastModified))
from prettytable import PrettyTable
pTable = PrettyTable()
pTable.field_names = ["File Name", "File Size", "Last Modified", "Md5 Hash Value"]
pTable.add_row ([fileName, fileSize, fileModified, hashValue])
print (pTable)enter code here
This should show me one big table using all the values from a set directory that the user chooses. This will allow me to sort the table later using prettytables.
I have no experience with prettyTables, but I noticed you have lastModified and fileModified yet only fileModified is used for a column in your table. Are you sure pretty table doesn't have some kind of row limit?

Moving only Files in Directories

I have looked extensively on this site and I can't see an example that fits the bill. I have 4 directories each of which contains a number of files and another directory called 'Superseded'. I am trying to write a script that will move all files in each folder into the 'Superseded' folder but I'm not having any luck.
import os, shutil
source = r'U:\Data\All\Python_Test\Exports\GLA'
dest = r'U:\Data\All\Python_Test\Exports\Superseded'
listofFiles = os.listdir(source)
for f in listofFiles:
fullPath = source + "/" + f
shutil.move(fullPath, dest)
I can only get this to work for one directory and even then only when I've made the destination directory outside of the GLA directory if that makes sense.
I know there is a a os.path.isfile() module so that I can only move the files but I can't seem to get it to work. Does anybody have any ideas?
This works for me:
import os
#from:
# https://stackoverflow.com/questions/1158076/implement-touch-using-python
# I use this to create some empty file to move around later
def touch(fname, times=None):
fhandle = open(fname, 'a')
try:
os.utime(fname, times)
finally:
fhandle.close()
# this function is only to create the folders and files to be moved
def create_files_in_known_folders():
nameList=["source_dir_{:02d}".format(x) for x in range(4)]
for name in nameList:
path=os.path.expanduser(os.path.join("~",name))
if not os.path.exists(path):
os.mkdir(path)
ssPath=os.path.join(path,"superseded")
if not os.path.exists(ssPath):
os.mkdir(ssPath)
for i in range(3):
filename="{}_{:02d}.dat".format(name,i)
filepath=os.path.join(path, filename)
if not os.path.exists(filepath):
touch(filepath)
# THIS is actually the function doing what the OP asked for
# there many details that can be tweaked
def move_from_known_to_dest():
# here my given names from above
nameList=["source_dir_{:02d}".format(x) for x in range(4)]
# and my destination path
destPath=os.path.expanduser(os.path.join("~","dest"))
# not interested in files that are in subfolders
# if those would exist change to os.walk and
# exclude the destination folder with according if...:
for name in nameList:
path=os.path.expanduser(os.path.join("~",name))
dirList=os.listdir(path)
print path
for fileName in dirList:
filePath=os.path.join(path, fileName)
print filePath
if os.path.isfile(filePath):
destPath=os.path.join(path,"superseded",fileName)
print destPath
#alternatively you can chose to 1) overwrite ()might not work 2)delete first 3) not copy
# another option is to check for existence and if
# present add a number to the dest-file-name
# use while loop to check for first non-present number
assert not os.path.exists(destPath), "file {} already exits".format(destPath)
#https://stackoverflow.com/questions/8858008/how-to-move-a-file-in-python
os.rename( filePath, destPath)
if __name__=="__main__":
create_files_in_known_folders()
#break here and check that filestructure and files have been created
move_from_known_to_dest()
But, think carefully what to do if the file already exits in your destination folder.
os.walk might also be something you want to look at.
Implementing several options for the copy behaviour may look like this:
import warnings
#from:
# https://stackoverflow.com/questions/2187269/python-print-only-the-message-on-warnings
formatwarning_orig = warnings.formatwarning
warnings.formatwarning = lambda message, category, filename, lineno, line=None: \
formatwarning_orig(message, category, filename, lineno, line='')
def move_from_known_to_dest_extra(behaviour='overwrite'):
assert behaviour in ['overwrite','leave','accumulate'], "unknown behaviour: {}".format(behaviour)
nameList=["source_dir_{:02d}".format(x) for x in range(4)]
destPath=os.path.expanduser(os.path.join("~","dest"))
for name in nameList:
path=os.path.expanduser(os.path.join("~",name))
dirList=os.listdir(path)
for fileName in dirList:
filePath=os.path.join(path, fileName)
if os.path.isfile(filePath):
destPath=os.path.join(path,"superseded",fileName)
# simplest case...does not exist so copy
if not os.path.exists(destPath):
os.rename( filePath, destPath)
else:
if behaviour=='leave':
warnings.warn( "Warning! Not copying file: {}; file {} already exists!".format(filePath, destPath))
elif behaviour =='overwrite':
os.remove(destPath)
# documentation states:
# On Windows, if dst already exists, OSError will be raised even if it is a file.
os.rename( filePath, destPath)
warnings.warn( "Warning!Overwriting file: {}.".format(destPath))
elif behaviour=='accumulate': #redundant but OK
addPost=0
while True:
newDestPath=destPath+"{:04d}".format(addPost)
if not os.path.exists(newDestPath):
break
addPost+=1
assert addPost < 10000, "Clean up the mess!"
os.rename( filePath, newDestPath)
else:
assert 0, "Unknown copy behaviour requested."
Additionally one might check for file permissions as, e.g., os.remove() may raise an exception. In this case, however, I assume that permissions are properly set by the OP.

How do I confirm with python that required files are in a particular folder and are accessible or not?

I have 5 files in a folder App:
App|
|--A.txt
|--B.txt
|--C.txt
|--D.txt
|--E.txt
|--Run.py
|--Other Folders or Files
Now I want to know if files (A.txt,B.txtC.txt,C.txt,D.txt,E.txt) is present or not and if its there than I want to call a function Cleaner which will supply names of these files to that function. I have written this code but nothing is happening.The function is not getting called.
import glob
import csv
import itertools
files = glob.glob("*.txt")
i = 0
def sublist(a, b):
seq = iter(b)
try:
for x in a:
while next(seq) != x: pass
else:
return True
except StopIteration:
pass
return False
required_files = ['Alternate_ADR6_LFB1.txt', 'Company_Code.txt', 'Left_LIFNR.txt', 'LFA1.txt', 'LFB1.TXT', 'LFBK.TXT']
if sublist(required_files,files):
for files in required_files:
try:
f = open(files , 'r')
f.close()
except IOError as e:
print 'Error opening or accessing files'
i = 1
else:
print 'Required files are not in correct folder'
if i == 1:
for files in required_files:
Cleansing(files)
def Cleansing(filename):
with open('filename', 'rb') as f_input:
...
...
break
with open('filename', 'rb') as f_input, open('filename_Cleaned.csv', 'wb') as f_output:
csv_output = csv.writer(f_output)
csv_output.writerow('something')
Upadate
I think now I am able to call the function and also able to check the valid files but its not that pythonic. And I am not able to open or create a file with the name of the file plus _cleaned :filename_cleaned.csv.
You want to check if a list of files (required_files) are in a folder.
You successfully get the complete list of text files in the folder with files = glob.glob("*.txt")
So the first question is: Checking for sublist in list
As the order is not important, we can use sets:
if set(required_files) <= set(files):
# do stuff
else:
#print warning
Next question: How to open the files and create an outputs with names like "filename_Cleaned.csv"
A very important thing you have to understand: "filename" is not the same thing as filename. The first is a string, it will always be the same thing, it will not be replaced by real filenames. When writing open('filename', 'rb') you're trying to open a file called "filename".
filename however can be a variable name and take different values.
for filename in required_files:
Cleansing(filename)
def Cleansing(filename):
with open(filename, 'rb') as f_input, open(filename+'_Cleaned.csv', 'wb') as f_output:
#read stuff in f_input
#write stuff in f_output

Created temporary file is not accessable in production

I have written a custom filefield AudioFileField. For this i created a check if a file really is a valid audiofile. To be able to do that, i use the sox commandlinetool, so i have to create a file on disk first. As sox depends on the suffix to do that validation, i needed to write my own TemporaryUploadedAudioFile, using the original suffix (instead of .upload):
class TemporaryUploadedAudioFile(TemporaryUploadedFile):
"""
A file uploaded to a temporary location (i.e. stream-to-disk).
"""
def __init__(self, name, content_type, size, charset, suffix='.upload'):
"""
The init method overrides the name creation to allow passing
an extension, so that sox is able to test the file
"""
if settings.FILE_UPLOAD_TEMP_DIR:
file = tempfile.NamedTemporaryFile(suffix=suffix,
dir=settings.FILE_UPLOAD_TEMP_DIR)
else:
file = tempfile.NamedTemporaryFile(suffix=suffix)
super(TemporaryUploadedFile, self).__init__(file, name, content_type, size, charset)
That file i use to do the audiovalidation in the AudioFileForm to_python method:
def to_python(self, data):
"""
checks that the file-upload field data contains a valid audio file.
"""
f = super(AudioFileForm, self).to_python(data)
if f is None:
return None
# get the file suffix, sox needs this to be able to test the file
suffix = os.path.splitext(data.name)[1]
# We need to get a temporary file for sox. Even if we allready have a temporary
# file, we have to create a new one ending with the correct suffix
file = TemporaryUploadedAudioFile(data.name, data.content_type, 0, data.charset,suffix = suffix)
with open(file.temporary_file_path(), 'w') as f:
f.write(data.read())
# Do the validation of the audiofile.
filetype=subprocess.Popen([sox,'--i','-t','%s'%file.temporary_file_path()], shell=False, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
filetype=filetype.communicate()[0]
filetype=filetype.replace('\n','')
if not filetype in ['wav','aiff','flac']:
raise forms.ValidationError('Not a valid audiofile (valid are: aif, flac & wav | 16 or 24 bit | 44.1 or 48 kHz)')
return data
Now to the strange things happening: this works like a charm on the development server, but as soon as i switch to apache2/mod_wsgi it stops working. sox returns an error telling me that the file is missing.
I have allready checked rights, tmp-location on the production server is /tmp, all rights are granted there (777). What else could be happening here?
mod-wsgi is known to have problems with standard output and this subprocess thingy with django. There are already lot of questions answered about this in stackoverflow.com.
A quick Google search should help you!

How to tell whether a file is executable on Windows in Python?

I'm writing grepath utility that finds executables in %PATH% that match a pattern.
I need to define whether given filename in the path is executable (emphasis is on command line scripts).
Based on "Tell if a file is executable" I've got:
import os
from pywintypes import error
from win32api import FindExecutable, GetLongPathName
def is_executable_win(path):
try:
_, executable = FindExecutable(path)
ext = lambda p: os.path.splitext(p)[1].lower()
if (ext(path) == ext(executable) # reject *.cmd~, *.bat~ cases
and samefile(GetLongPathName(executable), path)):
return True
# path is a document with assoc. check whether it has extension
# from %PATHEXT%
pathexts = os.environ.get('PATHEXT', '').split(os.pathsep)
return any(ext(path) == e.lower() for e in pathexts)
except error:
return None # not an exe or a document with assoc.
Where samefile is:
try: samefile = os.path.samefile
except AttributeError:
def samefile(path1, path2):
rp = lambda p: os.path.realpath(os.path.normcase(p))
return rp(path1) == rp(path2)
How is_executable_win could be improved in the given context? What functions from Win32 API could help?
P.S.
time performance doesn't matter
subst drives and UNC, unicode paths are not under consideration
C++ answer is OK if it uses functions available on Windows XP
Examples
notepad.exe is executable (as a rule)
which.py is executable if it is associated with some executable (e.g., python.exe) and .PY is in %PATHEXT% i.e., 'C:\> which' could start:
some\path\python.exe another\path\in\PATH\which.py
somefile.doc most probably is not executable (when it is associated with Word for example)
another_file.txt is not executable (as a rule)
ack.pl is executable if it is associated with some executable (most probably perl.exe) and .PL is in %PATHEXT% (i.e. I can run ack without specifing extension if it is in the path)
What is "executable" in this question
def is_executable_win_destructive(path):
#NOTE: it assumes `path` <-> `barename` for the sake of example
barename = os.path.splitext(os.path.basename(path))[0]
p = Popen(barename, stdout=PIPE, stderr=PIPE, shell=True)
stdout, stderr = p.communicate()
return p.poll() != 1 or stdout != '' or stderr != error_message(barename)
Where error_message() depends on language. English version is:
def error_message(barename):
return "'%(barename)s' is not recognized as an internal" \
" or external\r\ncommand, operable program or batch file.\r\n" \
% dict(barename=barename)
If is_executable_win_destructive() returns when it defines whether the path points to an executable for the purpose of this question.
Example:
>>> path = r"c:\docs\somefile.doc"
>>> barename = "somefile"
After that it executes %COMSPEC% (cmd.exe by default):
c:\cwd> cmd.exe /c somefile
If output looks like this:
'somefile' is not recognized as an internal or external
command, operable program or batch file.
Then the path is not an executable else it is (lets assume there is one-to-one correspondence between path and barename for the sake of example).
Another example:
>>> path = r'c:\bin\grepath.py'
>>> barename = 'grepath'
If .PY in %PATHEXT% and c:\bin is in %PATH% then:
c:\docs> grepath
Usage:
grepath.py [options] PATTERN
grepath.py [options] -e PATTERN
grepath.py: error: incorrect number of arguments
The above output is not equal to error_message(barename) therefore 'c:\bin\grepath.py' is an "executable".
So the question is how to find out whether the path will produce the error without actually running it? What Win32 API function and what conditions used to trigger the 'is not recognized as an internal..' error?
shoosh beat me to it :)
If I remember correctly, you should try to read the first 2 characters in the file. If you get back "MZ", you have an exe.
hnd = open(file,"rb")
if hnd.read(2) == "MZ":
print "exe"
I think, that this should be sufficient:
check file extension in PATHEXT - whether file is directly executable
using cmd.exe command "assoc .ext" you can see whether file is associated with some executable (some executable will be launched when you launch this file). You can parse capture output of assoc without arguments and collect all extensions that are associated and check tested file extension.
other file extensions will trigger error "command is not recognized ..." therefore you can assume that such files are NOT executable.
I don't really understand how you can tell the difference between somefile.py and somefile.txt because association can be really the same. You can configure system to run .txt files the same way as .py files.
A windows PE always starts with the characters "MZ". This includes however also any kind of DLLs which are not necessarily executables.
To check for this however you'll have to open the file and read the header so that's probably not what you're looking for.
Here's the grepath.py that I've linked in my question:
#!/usr/bin/env python
"""Find executables in %PATH% that match PATTERN.
"""
#XXX: remove --use-pathext option
import fnmatch, itertools, os, re, sys, warnings
from optparse import OptionParser
from stat import S_IMODE, S_ISREG, ST_MODE
from subprocess import PIPE, Popen
def warn_import(*args):
"""pass '-Wd' option to python interpreter to see these warnings."""
warnings.warn("%r" % (args,), ImportWarning, stacklevel=2)
class samefile_win:
"""
http://timgolden.me.uk/python/win32_how_do_i/see_if_two_files_are_the_same_file.html
"""
#staticmethod
def get_read_handle (filename):
return win32file.CreateFile (
filename,
win32file.GENERIC_READ,
win32file.FILE_SHARE_READ,
None,
win32file.OPEN_EXISTING,
0,
None
)
#staticmethod
def get_unique_id (hFile):
(attributes,
created_at, accessed_at, written_at,
volume,
file_hi, file_lo,
n_links,
index_hi, index_lo
) = win32file.GetFileInformationByHandle (hFile)
return volume, index_hi, index_lo
#staticmethod
def samefile_win(filename1, filename2):
"""Whether filename1 and filename2 represent the same file.
It works for subst, ntfs hardlinks, junction points.
It works unreliably for network drives.
Based on GetFileInformationByHandle() Win32 API call.
http://timgolden.me.uk/python/win32_how_do_i/see_if_two_files_are_the_same_file.html
"""
if samefile_generic(filename1, filename2): return True
try:
hFile1 = samefile_win.get_read_handle (filename1)
hFile2 = samefile_win.get_read_handle (filename2)
are_equal = (samefile_win.get_unique_id (hFile1)
== samefile_win.get_unique_id (hFile2))
hFile2.Close ()
hFile1.Close ()
return are_equal
except win32file.error:
return None
def canonical_path(path):
"""NOTE: it might return wrong path for paths with symbolic links."""
return os.path.realpath(os.path.normcase(path))
def samefile_generic(path1, path2):
return canonical_path(path1) == canonical_path(path2)
class is_executable_destructive:
#staticmethod
def error_message(barename):
r"""
"'%(barename)s' is not recognized as an internal or external\r\n
command, operable program or batch file.\r\n"
in Russian:
"""
return '"%(barename)s" \xad\xa5 \xef\xa2\xab\xef\xa5\xe2\xe1\xef \xa2\xad\xe3\xe2\xe0\xa5\xad\xad\xa5\xa9 \xa8\xab\xa8 \xa2\xad\xa5\xe8\xad\xa5\xa9\r\n\xaa\xae\xac\xa0\xad\xa4\xae\xa9, \xa8\xe1\xaf\xae\xab\xad\xef\xa5\xac\xae\xa9 \xaf\xe0\xae\xa3\xe0\xa0\xac\xac\xae\xa9 \xa8\xab\xa8 \xaf\xa0\xaa\xa5\xe2\xad\xeb\xac \xe4\xa0\xa9\xab\xae\xac.\r\n' % dict(barename=barename)
#staticmethod
def is_executable_win_destructive(path):
# assume path <-> barename that is false in general
barename = os.path.splitext(os.path.basename(path))[0]
p = Popen(barename, stdout=PIPE, stderr=PIPE, shell=True)
stdout, stderr = p.communicate()
return p.poll() != 1 or stdout != '' or stderr != error_message(barename)
def is_executable_win(path):
"""Based on:
http://timgolden.me.uk/python/win32_how_do_i/tell-if-a-file-is-executable.html
Known bugs: treat some "*~" files as executable, e.g. some "*.bat~" files
"""
try:
_, executable = FindExecutable(path)
return bool(samefile(GetLongPathName(executable), path))
except error:
return None # not an exe or a document with assoc.
def is_executable_posix(path):
"""Whether the file is executable.
Based on which.py from stdlib
"""
#XXX it ignores effective uid, guid?
try: st = os.stat(path)
except os.error:
return None
isregfile = S_ISREG(st[ST_MODE])
isexemode = (S_IMODE(st[ST_MODE]) & 0111)
return bool(isregfile and isexemode)
try:
#XXX replace with ctypes?
from win32api import FindExecutable, GetLongPathName, error
is_executable = is_executable_win
except ImportError, e:
warn_import("is_executable: fall back on posix variant", e)
is_executable = is_executable_posix
try: samefile = os.path.samefile
except AttributeError, e:
warn_import("samefile: fallback to samefile_win", e)
try:
import win32file
samefile = samefile_win.samefile_win
except ImportError, e:
warn_import("samefile: fallback to generic", e)
samefile = samefile_generic
def main():
parser = OptionParser(usage="""
%prog [options] PATTERN
%prog [options] -e PATTERN""", description=__doc__)
opt = parser.add_option
opt("-e", "--regex", metavar="PATTERN",
help="use PATTERN as a regular expression")
opt("--ignore-case", action="store_true", default=True,
help="""[default] ignore case when --regex is present; for \
non-regex PATTERN both FILENAME and PATTERN are first \
case-normalized if the operating system requires it otherwise \
unchanged.""")
opt("--no-ignore-case", dest="ignore_case", action="store_false")
opt("--use-pathext", action="store_true", default=True,
help="[default] whether to use %PATHEXT% environment variable")
opt("--no-use-pathext", dest="use_pathext", action="store_false")
opt("--show-non-executable", action="store_true", default=False,
help="show non executable files")
(options, args) = parser.parse_args()
if len(args) != 1 and not options.regex:
parser.error("incorrect number of arguments")
if not options.regex:
pattern = args[0]
del args
if options.regex:
filepred = re.compile(options.regex, options.ignore_case and re.I).search
else:
fnmatch_ = fnmatch.fnmatch if options.ignore_case else fnmatch.fnmatchcase
for file_pattern_symbol in "*?":
if file_pattern_symbol in pattern:
break
else: # match in any place if no explicit file pattern symbols supplied
pattern = "*" + pattern + "*"
filepred = lambda fn: fnmatch_(fn, pattern)
if not options.regex and options.ignore_case:
filter_files = lambda files: fnmatch.filter(files, pattern)
else:
filter_files = lambda files: itertools.ifilter(filepred, files)
if options.use_pathext:
pathexts = frozenset(map(str.upper,
os.environ.get('PATHEXT', '').split(os.pathsep)))
seen = set()
for dirpath in os.environ.get('PATH', '').split(os.pathsep):
if os.path.isdir(dirpath): # assume no expansion needed
# visit "each" directory only once
# it is unaware of subst drives, junction points, symlinks, etc
rp = canonical_path(dirpath)
if rp in seen: continue
seen.add(rp); del rp
for filename in filter_files(os.listdir(dirpath)):
path = os.path.join(dirpath, filename)
isexe = is_executable(path)
if isexe == False and is_executable == is_executable_win:
# path is a document with associated program
# check whether it is a script (.pl, .rb, .py, etc)
if not isexe and options.use_pathext:
ext = os.path.splitext(path)[1]
isexe = ext.upper() in pathexts
if isexe:
print path
elif options.show_non_executable:
print "non-executable:", path
if __name__=="__main__":
main()
Parse the PE format.
http://code.google.com/p/pefile/
This is probably the best solution you will get other than using python to actually try to run the program.
Edit: I see you also want files that have associations. This will require mucking in the registry which I don't have the information for.
Edit2: I also see that you differentiate between .doc and .py. This is a rather arbitrary differentiation which must be specified with manual rules, because to windows, they are both file extensions that a program reads.
Your question can't be answered. Windows can't tell the difference between a file which is associated with a scripting language vs. some other arbitrary program. As Windows is concerned, a .PY file is simply a document which is opened by python.exe.