Tell gdb to skip standard files - c++

I'm debugging C++ code with GDB and when it enters a constructor of some object containing standard library objects, it shows me the constructor of these objects (like std::map) and everything that's underneath.
I know about the next operator, but I'd prefer to basically black list any standard library code, which is never the source of the error I'm investigating. The wished behavior is that a simple skip would send me to the next "user-land" code.

gdb 7.12 supports file globbing to specify the files to skip in the debugger. The documentation for the same is as below:
https://sourceware.org/gdb/onlinedocs/gdb/Skipping-Over-Functions-and-Files.html
To skip stepping into all library headers in the directory /usr/include/c++/5/bits, add the below lines to ~/.gdbinit
# To skip all .h files in /usr/include/c++/5/bits
skip -gfi /usr/include/c++/5/bits/*.h
Instead to skip a specific file, say stl_vector.h, add the below lines to ~/.gdbinit
# To skip the file /usr/include/c++/5/bits/stl_vector.h
skip file /usr/include/c++/5/bits/stl_vector.h
Doing the above with gdb 7.11 and below version leads to the below error:
Ignore function pending future shared library load? (y or [n]) [answered N; input not from terminal]
However, gdb 7.12 seems to have solved the above issue.
This blog addresses the same problem for gdb version 7.11 or below.
Note - You can use the below command from the gdb command prompt to list all the files marked for skipping
info skip

* Changes in GDB 7.4
GDB now allows you to skip uninteresting functions and files when stepping with the "skip function" and "skip file" commands.

Step instructions and skip all files without source
This will be too slow for most applications, but it is fun!
Based on: Displaying each assembly instruction executed in gdb
class ContinueUntilSource(gdb.Command):
def __init__(self):
super().__init__(
'cus',
gdb.COMMAND_BREAKPOINTS,
gdb.COMPLETE_NONE,
False
)
def invoke(self, argument, from_tty):
argv = gdb.string_to_argv(argument)
if argv:
gdb.write('Does not take any arguments.\n')
else:
done = False
thread = gdb.inferiors()[0].threads()[0]
while True:
message = gdb.execute('si', to_string=True)
if not thread.is_valid():
break
try:
path = gdb.selected_frame().find_sal().symtab.fullname()
except:
pass
else:
if os.path.exists(path):
break
ContinueUntilSource()
Tested in Ubuntu 16.04, GDB 7.11. GitHub upstream.
std::function case
How to step debug into std::function user code from C++ functional with GDB?

Modified from Ciro Santilli's answer command ss steps inside specific source. You may specify source file name or the current one will be stepped. Very handy for stepping through bison/yacc sources or other meta-sources that generate С code and insert #line directives.
import os.path
class StepSource(gdb.Command):
def __init__(self):
super().__init__(
'ss',
gdb.COMMAND_BREAKPOINTS,
gdb.COMPLETE_NONE,
False
)
def invoke(self, argument, from_tty):
argv = gdb.string_to_argv(argument)
if argv:
if len(argv) > 1:
gdb.write('Usage:\nns [source-name]]\n')
return
source = argv[0]
full_path = False if os.path.basename(source) == source else True
else:
source = gdb.selected_frame().find_sal().symtab.fullname()
full_path = True
thread = gdb.inferiors()[0].threads()[0]
while True:
message = gdb.execute('next', to_string=True)
if not thread.is_valid():
break
try:
cur_source = gdb.selected_frame().find_sal().symtab.fullname()
if not full_path:
cur_source = os.path.basename(cur_source)
except:
break
else:
if source == cur_source:
break
StepSource()
Known bugs
it doesn't interrupt debugger on SIGINT while running;
changed pass to break on exception as not sure whether it is right.

Related

Step to Next Branching Instruction in GDB

Is there a way to continue execution until a branching instruction is reached in GDB? Much like the WinDbg "ph" command. If not, can it be scripted in Python?
I finally came across this question:
Does GDB have a "step-to-next-call" instruction?
I was able to modify the code there to create my own "ph" command:
import gdb
mips_branches = ["beq", "beqz", "bne", "bnez", "bgtz", "bltz", "bgez", "blez", "j", "jr", "jal", "jalr"]
arm_branches = ["b", "bl", "blx", "bx", "beq"]
class StepToNextBranch (gdb.Command):
def __init__ (self):
super (StepToNextBranch, self).__init__ ("ph", gdb.COMMAND_OBSCURE)
def invoke (self, arg, from_tty):
arch = gdb.selected_frame().architecture()
while True:
SILENT=True
gdb.execute("nexti", to_string=SILENT)
current_pc = int(gdb.selected_frame().read_register("pc"))
disa = arch.disassemble(current_pc)[0]
opcode = disa["asm"].split("\t")[0]
if opcode in mips_branches or opcode in arm_branches:
break
gdb.execute("context")
StepToNextBranch()
I'm not as familiar with ARM as MIPS so I'm not sure this covers everything. Eventually, I'll add x86 branching instructions.
Also, that gdb.execute("context") line is there because I use GEF.

How to avoid step into built in types in gdb? [duplicate]

I'm debugging C++ code with GDB and when it enters a constructor of some object containing standard library objects, it shows me the constructor of these objects (like std::map) and everything that's underneath.
I know about the next operator, but I'd prefer to basically black list any standard library code, which is never the source of the error I'm investigating. The wished behavior is that a simple skip would send me to the next "user-land" code.
gdb 7.12 supports file globbing to specify the files to skip in the debugger. The documentation for the same is as below:
https://sourceware.org/gdb/onlinedocs/gdb/Skipping-Over-Functions-and-Files.html
To skip stepping into all library headers in the directory /usr/include/c++/5/bits, add the below lines to ~/.gdbinit
# To skip all .h files in /usr/include/c++/5/bits
skip -gfi /usr/include/c++/5/bits/*.h
Instead to skip a specific file, say stl_vector.h, add the below lines to ~/.gdbinit
# To skip the file /usr/include/c++/5/bits/stl_vector.h
skip file /usr/include/c++/5/bits/stl_vector.h
Doing the above with gdb 7.11 and below version leads to the below error:
Ignore function pending future shared library load? (y or [n]) [answered N; input not from terminal]
However, gdb 7.12 seems to have solved the above issue.
This blog addresses the same problem for gdb version 7.11 or below.
Note - You can use the below command from the gdb command prompt to list all the files marked for skipping
info skip
* Changes in GDB 7.4
GDB now allows you to skip uninteresting functions and files when stepping with the "skip function" and "skip file" commands.
Step instructions and skip all files without source
This will be too slow for most applications, but it is fun!
Based on: Displaying each assembly instruction executed in gdb
class ContinueUntilSource(gdb.Command):
def __init__(self):
super().__init__(
'cus',
gdb.COMMAND_BREAKPOINTS,
gdb.COMPLETE_NONE,
False
)
def invoke(self, argument, from_tty):
argv = gdb.string_to_argv(argument)
if argv:
gdb.write('Does not take any arguments.\n')
else:
done = False
thread = gdb.inferiors()[0].threads()[0]
while True:
message = gdb.execute('si', to_string=True)
if not thread.is_valid():
break
try:
path = gdb.selected_frame().find_sal().symtab.fullname()
except:
pass
else:
if os.path.exists(path):
break
ContinueUntilSource()
Tested in Ubuntu 16.04, GDB 7.11. GitHub upstream.
std::function case
How to step debug into std::function user code from C++ functional with GDB?
Modified from Ciro Santilli's answer command ss steps inside specific source. You may specify source file name or the current one will be stepped. Very handy for stepping through bison/yacc sources or other meta-sources that generate С code and insert #line directives.
import os.path
class StepSource(gdb.Command):
def __init__(self):
super().__init__(
'ss',
gdb.COMMAND_BREAKPOINTS,
gdb.COMPLETE_NONE,
False
)
def invoke(self, argument, from_tty):
argv = gdb.string_to_argv(argument)
if argv:
if len(argv) > 1:
gdb.write('Usage:\nns [source-name]]\n')
return
source = argv[0]
full_path = False if os.path.basename(source) == source else True
else:
source = gdb.selected_frame().find_sal().symtab.fullname()
full_path = True
thread = gdb.inferiors()[0].threads()[0]
while True:
message = gdb.execute('next', to_string=True)
if not thread.is_valid():
break
try:
cur_source = gdb.selected_frame().find_sal().symtab.fullname()
if not full_path:
cur_source = os.path.basename(cur_source)
except:
break
else:
if source == cur_source:
break
StepSource()
Known bugs
it doesn't interrupt debugger on SIGINT while running;
changed pass to break on exception as not sure whether it is right.

Create version number variations for info.plist using #define and clang?

Years ago, when compiling with GCC, the following defines in a #include .h file could be pre-processed for use in info.plist:
#define MAJORVERSION 2
#define MINORVERSION 6
#define MAINTVERSION 4
<key>CFBundleShortVersionString</key> <string>MAJORVERSION.MINORVERSION.MAINTVERSION</string>
...which would turn into "2.6.4". That worked because GCC supported the "-traditional" flag. (see Tech Note TN2175 Info.plist files in Xcode Using the C Preprocessor, under "Eliminating whitespace between tokens in the macro expansion process")
However, fast-forward to 2016 and Clang 7.0.2 (Xcode 7.2.1) apparently does not support either "-traditional" or "-traditional-cpp" (or support it properly), yielding this string:
"2 . 6 . 4"
(see Bug 12035 - Preprocessor inserts spaces in macro expansions, comment 4)
Because there are so many different variations (CFBundleShortVersionString, CFBundleVersion, CFBundleGetInfoString), it would be nice to work around this clang problem, and define these once, and concatenate / stringify the pieces together. What is the commonly-accepted pattern for doing this now? (I'm presently building on MacOS but the same pattern would work for IOS)
Here is the Python script I use to increment my build number, whenever a source code change is detected, and update one or more Info.plist files within the project.
It was created to solve the issue raised in this question I asked a while back.
You need to create buildnum.ver file in the source tree that looks like this:
version 1.0
build 1
(you will need to manually increment version when certain project milestones are reached, but buildnum is incremented automatically).
NOTE the location of the .ver file must be in the root of the source tree (see SourceDir, below) as this script will look for modified files in this directory. If any are found, the build number is incremented. Modified means source files changes after the .ver file was last updated.
Then create a new Xcode target to run an external build tool and run something like:
tools/bump_buildnum.py SourceDir/buildnum.ver SourceDir/Info.plist
(make it run in ${PROJECT_DIR})
and then make all the actual Xcode targets dependent upon this target, so it runs before any of them are built.
#!/usr/bin/env python
#
# Bump build number in Info.plist files if a source file have changed.
#
# usage: bump_buildnum.py buildnum.ver Info.plist [ ... Info.plist ]
#
# andy#trojanfoe.com, 2014.
#
import sys, os, subprocess, re
def read_verfile(name):
version = None
build = None
verfile = open(name, "r")
for line in verfile:
match = re.match(r"^version\s+(\S+)", line)
if match:
version = match.group(1).rstrip()
match = re.match(r"^build\s+(\S+)", line)
if match:
build = int(match.group(1).rstrip())
verfile.close()
return (version, build)
def write_verfile(name, version, build):
verfile = open(name, "w")
verfile.write("version {0}\n".format(version))
verfile.write("build {0}\n".format(build))
verfile.close()
return True
def set_plist_version(plistname, version, build):
if not os.path.exists(plistname):
print("{0} does not exist".format(plistname))
return False
plistbuddy = '/usr/libexec/Plistbuddy'
if not os.path.exists(plistbuddy):
print("{0} does not exist".format(plistbuddy))
return False
cmdline = [plistbuddy,
"-c", "Set CFBundleShortVersionString {0}".format(version),
"-c", "Set CFBundleVersion {0}".format(build),
plistname]
if subprocess.call(cmdline) != 0:
print("Failed to update {0}".format(plistname))
return False
print("Updated {0} with v{1} ({2})".format(plistname, version, build))
return True
def should_bump(vername, dirname):
verstat = os.stat(vername)
allnames = []
for dirname, dirnames, filenames in os.walk(dirname):
for filename in filenames:
allnames.append(os.path.join(dirname, filename))
for filename in allnames:
filestat = os.stat(filename)
if filestat.st_mtime > verstat.st_mtime:
print("{0} is newer than {1}".format(filename, vername))
return True
return False
def upver(vername):
(version, build) = read_verfile(vername)
if version == None or build == None:
print("Failed to read version/build from {0}".format(vername))
return False
# Bump the version number if any files in the same directory as the version file
# have changed, including sub-directories.
srcdir = os.path.dirname(vername)
bump = should_bump(vername, srcdir)
if bump:
build += 1
print("Incremented to build {0}".format(build))
write_verfile(vername, version, build)
print("Written {0}".format(vername))
else:
print("Staying at build {0}".format(build))
return (version, build)
if __name__ == "__main__":
if os.environ.has_key('ACTION') and os.environ['ACTION'] == 'clean':
print("{0}: Not running while cleaning".format(sys.argv[0]))
sys.exit(0)
if len(sys.argv) < 3:
print("Usage: {0} buildnum.ver Info.plist [... Info.plist]".format(sys.argv[0]))
sys.exit(1)
vername = sys.argv[1]
(version, build) = upver(vername)
if version == None or build == None:
sys.exit(2)
for i in range(2, len(sys.argv)):
plistname = sys.argv[i]
set_plist_version(plistname, version, build)
sys.exit(0)
First, I would like to clarify what each key is meant to do:
CFBundleShortVersionString
A string describing the released version of an app, using semantic versioning. This string will be displayed in the App Store description.
CFBundleVersion
A string specifing the build version (released or unreleased). It is a string, but Apple recommends to use numbers instead.
CFBundleGetInfoString
Seems to be deprecated, as it is no longer listed in the Information Property List Key Reference.
During development, CFBundleShortVersionString isn't changed that often, and I normally set CFBundleShortVersionString manually in Xcode. The only string I change regularly is CFBundleVersion, because you can't submit a new build to iTunes Connect/TestFlight, if the CFBundleVersion wasn't changed.
To change the value, I use a Rake task with PlistBuddy to write a time stamp (year, month, day, hour, and minute) to CFBundleVersion:
desc "Bump bundle version"
task :bump_bundle_version do
bundle_version = Time.now.strftime "%Y%m%d%H%M"
sh %Q{/usr/libexec/PlistBuddy -c "Set CFBundleVersion #{bundle_version}" "DemoApp/DemoApp-Info.plist"}
end
You can use PlistBuddy, if you need to automate CFBundleShortVersionString as well.

Does GDB have a "step-to-next-call" instruction?

WinDBG and the related windows kernel debuggers support a "pc" command which runs the target until reaching the next call statement (in assembly). In other words, it breaks just prior to creating a new stack frame, sort of the opposite of "finish". "Start" in GDB runs until main starts, but in essence I want 'start' but with a wildcard of "any next frame".
I'm trying to locate a similar functionality in GDB, but have not found it.
is this possible?
Example WinDBG doc: http://windbg.info/doc/1-common-cmds.html#4_expr_and_cmds
Simple answer: no, step-to-next-call is not part of GDB commands.
GDB/Python-aware answer: no, it's not part of GDB commands, but it's easy to implement!
I'm not sure to understand if you want to stop before or after the call instruction execution.
To stop before, you need to stepi/nexti (next assembly instruction) until you see call in the current instruction:
import gdb
class StepBeforeNextCall (gdb.Command):
def __init__ (self):
super (StepBeforeNextCall, self).__init__ ("step-before-next-call",
gdb.COMMAND_OBSCURE)
def invoke (self, arg, from_tty):
arch = gdb.selected_frame().architecture()
while True:
current_pc = addr2num(gdb.selected_frame().read_register("pc"))
disa = arch.disassemble(current_pc)[0]
if "call" in disa["asm"]: # or startswith ?
break
SILENT=True
gdb.execute("stepi", to_string=SILENT)
print("step-before-next-call: next instruction is a call.")
print("{}: {}".format(hex(int(disa["addr"])), disa["asm"]))
def addr2num(addr):
try:
return int(addr) # Python 3
except:
return long(addr) # Python 2
StepBeforeNextCall()
To stop after the call, you compute the current stack depth, then step until it's deeper:
import gdb
def callstack_depth():
depth = 1
frame = gdb.newest_frame()
while frame is not None:
frame = frame.older()
depth += 1
return depth
class StepToNextCall (gdb.Command):
def __init__ (self):
super (StepToNextCall, self).__init__ ("step-to-next-call",
gdb.COMMAND_OBSCURE)
def invoke (self, arg, from_tty):
start_depth = current_depth =callstack_depth()
# step until we're one step deeper
while current_depth == start_depth:
SILENT=True
gdb.execute("step", to_string=SILENT)
current_depth = callstack_depth()
# display information about the new frame
gdb.execute("frame 0")
StepToNextCall()
just put that in a file, source it with GDB (or in your .gdbinit) and that will provide you the new commands step-before-next-call and step-to-next-call.
Relevant documentation is there:
Python API table of content
Basic Python
Python representation of architectures
Accessing inferior stack frames from Python.

How to tell whether a file is executable on Windows in Python?

I'm writing grepath utility that finds executables in %PATH% that match a pattern.
I need to define whether given filename in the path is executable (emphasis is on command line scripts).
Based on "Tell if a file is executable" I've got:
import os
from pywintypes import error
from win32api import FindExecutable, GetLongPathName
def is_executable_win(path):
try:
_, executable = FindExecutable(path)
ext = lambda p: os.path.splitext(p)[1].lower()
if (ext(path) == ext(executable) # reject *.cmd~, *.bat~ cases
and samefile(GetLongPathName(executable), path)):
return True
# path is a document with assoc. check whether it has extension
# from %PATHEXT%
pathexts = os.environ.get('PATHEXT', '').split(os.pathsep)
return any(ext(path) == e.lower() for e in pathexts)
except error:
return None # not an exe or a document with assoc.
Where samefile is:
try: samefile = os.path.samefile
except AttributeError:
def samefile(path1, path2):
rp = lambda p: os.path.realpath(os.path.normcase(p))
return rp(path1) == rp(path2)
How is_executable_win could be improved in the given context? What functions from Win32 API could help?
P.S.
time performance doesn't matter
subst drives and UNC, unicode paths are not under consideration
C++ answer is OK if it uses functions available on Windows XP
Examples
notepad.exe is executable (as a rule)
which.py is executable if it is associated with some executable (e.g., python.exe) and .PY is in %PATHEXT% i.e., 'C:\> which' could start:
some\path\python.exe another\path\in\PATH\which.py
somefile.doc most probably is not executable (when it is associated with Word for example)
another_file.txt is not executable (as a rule)
ack.pl is executable if it is associated with some executable (most probably perl.exe) and .PL is in %PATHEXT% (i.e. I can run ack without specifing extension if it is in the path)
What is "executable" in this question
def is_executable_win_destructive(path):
#NOTE: it assumes `path` <-> `barename` for the sake of example
barename = os.path.splitext(os.path.basename(path))[0]
p = Popen(barename, stdout=PIPE, stderr=PIPE, shell=True)
stdout, stderr = p.communicate()
return p.poll() != 1 or stdout != '' or stderr != error_message(barename)
Where error_message() depends on language. English version is:
def error_message(barename):
return "'%(barename)s' is not recognized as an internal" \
" or external\r\ncommand, operable program or batch file.\r\n" \
% dict(barename=barename)
If is_executable_win_destructive() returns when it defines whether the path points to an executable for the purpose of this question.
Example:
>>> path = r"c:\docs\somefile.doc"
>>> barename = "somefile"
After that it executes %COMSPEC% (cmd.exe by default):
c:\cwd> cmd.exe /c somefile
If output looks like this:
'somefile' is not recognized as an internal or external
command, operable program or batch file.
Then the path is not an executable else it is (lets assume there is one-to-one correspondence between path and barename for the sake of example).
Another example:
>>> path = r'c:\bin\grepath.py'
>>> barename = 'grepath'
If .PY in %PATHEXT% and c:\bin is in %PATH% then:
c:\docs> grepath
Usage:
grepath.py [options] PATTERN
grepath.py [options] -e PATTERN
grepath.py: error: incorrect number of arguments
The above output is not equal to error_message(barename) therefore 'c:\bin\grepath.py' is an "executable".
So the question is how to find out whether the path will produce the error without actually running it? What Win32 API function and what conditions used to trigger the 'is not recognized as an internal..' error?
shoosh beat me to it :)
If I remember correctly, you should try to read the first 2 characters in the file. If you get back "MZ", you have an exe.
hnd = open(file,"rb")
if hnd.read(2) == "MZ":
print "exe"
I think, that this should be sufficient:
check file extension in PATHEXT - whether file is directly executable
using cmd.exe command "assoc .ext" you can see whether file is associated with some executable (some executable will be launched when you launch this file). You can parse capture output of assoc without arguments and collect all extensions that are associated and check tested file extension.
other file extensions will trigger error "command is not recognized ..." therefore you can assume that such files are NOT executable.
I don't really understand how you can tell the difference between somefile.py and somefile.txt because association can be really the same. You can configure system to run .txt files the same way as .py files.
A windows PE always starts with the characters "MZ". This includes however also any kind of DLLs which are not necessarily executables.
To check for this however you'll have to open the file and read the header so that's probably not what you're looking for.
Here's the grepath.py that I've linked in my question:
#!/usr/bin/env python
"""Find executables in %PATH% that match PATTERN.
"""
#XXX: remove --use-pathext option
import fnmatch, itertools, os, re, sys, warnings
from optparse import OptionParser
from stat import S_IMODE, S_ISREG, ST_MODE
from subprocess import PIPE, Popen
def warn_import(*args):
"""pass '-Wd' option to python interpreter to see these warnings."""
warnings.warn("%r" % (args,), ImportWarning, stacklevel=2)
class samefile_win:
"""
http://timgolden.me.uk/python/win32_how_do_i/see_if_two_files_are_the_same_file.html
"""
#staticmethod
def get_read_handle (filename):
return win32file.CreateFile (
filename,
win32file.GENERIC_READ,
win32file.FILE_SHARE_READ,
None,
win32file.OPEN_EXISTING,
0,
None
)
#staticmethod
def get_unique_id (hFile):
(attributes,
created_at, accessed_at, written_at,
volume,
file_hi, file_lo,
n_links,
index_hi, index_lo
) = win32file.GetFileInformationByHandle (hFile)
return volume, index_hi, index_lo
#staticmethod
def samefile_win(filename1, filename2):
"""Whether filename1 and filename2 represent the same file.
It works for subst, ntfs hardlinks, junction points.
It works unreliably for network drives.
Based on GetFileInformationByHandle() Win32 API call.
http://timgolden.me.uk/python/win32_how_do_i/see_if_two_files_are_the_same_file.html
"""
if samefile_generic(filename1, filename2): return True
try:
hFile1 = samefile_win.get_read_handle (filename1)
hFile2 = samefile_win.get_read_handle (filename2)
are_equal = (samefile_win.get_unique_id (hFile1)
== samefile_win.get_unique_id (hFile2))
hFile2.Close ()
hFile1.Close ()
return are_equal
except win32file.error:
return None
def canonical_path(path):
"""NOTE: it might return wrong path for paths with symbolic links."""
return os.path.realpath(os.path.normcase(path))
def samefile_generic(path1, path2):
return canonical_path(path1) == canonical_path(path2)
class is_executable_destructive:
#staticmethod
def error_message(barename):
r"""
"'%(barename)s' is not recognized as an internal or external\r\n
command, operable program or batch file.\r\n"
in Russian:
"""
return '"%(barename)s" \xad\xa5 \xef\xa2\xab\xef\xa5\xe2\xe1\xef \xa2\xad\xe3\xe2\xe0\xa5\xad\xad\xa5\xa9 \xa8\xab\xa8 \xa2\xad\xa5\xe8\xad\xa5\xa9\r\n\xaa\xae\xac\xa0\xad\xa4\xae\xa9, \xa8\xe1\xaf\xae\xab\xad\xef\xa5\xac\xae\xa9 \xaf\xe0\xae\xa3\xe0\xa0\xac\xac\xae\xa9 \xa8\xab\xa8 \xaf\xa0\xaa\xa5\xe2\xad\xeb\xac \xe4\xa0\xa9\xab\xae\xac.\r\n' % dict(barename=barename)
#staticmethod
def is_executable_win_destructive(path):
# assume path <-> barename that is false in general
barename = os.path.splitext(os.path.basename(path))[0]
p = Popen(barename, stdout=PIPE, stderr=PIPE, shell=True)
stdout, stderr = p.communicate()
return p.poll() != 1 or stdout != '' or stderr != error_message(barename)
def is_executable_win(path):
"""Based on:
http://timgolden.me.uk/python/win32_how_do_i/tell-if-a-file-is-executable.html
Known bugs: treat some "*~" files as executable, e.g. some "*.bat~" files
"""
try:
_, executable = FindExecutable(path)
return bool(samefile(GetLongPathName(executable), path))
except error:
return None # not an exe or a document with assoc.
def is_executable_posix(path):
"""Whether the file is executable.
Based on which.py from stdlib
"""
#XXX it ignores effective uid, guid?
try: st = os.stat(path)
except os.error:
return None
isregfile = S_ISREG(st[ST_MODE])
isexemode = (S_IMODE(st[ST_MODE]) & 0111)
return bool(isregfile and isexemode)
try:
#XXX replace with ctypes?
from win32api import FindExecutable, GetLongPathName, error
is_executable = is_executable_win
except ImportError, e:
warn_import("is_executable: fall back on posix variant", e)
is_executable = is_executable_posix
try: samefile = os.path.samefile
except AttributeError, e:
warn_import("samefile: fallback to samefile_win", e)
try:
import win32file
samefile = samefile_win.samefile_win
except ImportError, e:
warn_import("samefile: fallback to generic", e)
samefile = samefile_generic
def main():
parser = OptionParser(usage="""
%prog [options] PATTERN
%prog [options] -e PATTERN""", description=__doc__)
opt = parser.add_option
opt("-e", "--regex", metavar="PATTERN",
help="use PATTERN as a regular expression")
opt("--ignore-case", action="store_true", default=True,
help="""[default] ignore case when --regex is present; for \
non-regex PATTERN both FILENAME and PATTERN are first \
case-normalized if the operating system requires it otherwise \
unchanged.""")
opt("--no-ignore-case", dest="ignore_case", action="store_false")
opt("--use-pathext", action="store_true", default=True,
help="[default] whether to use %PATHEXT% environment variable")
opt("--no-use-pathext", dest="use_pathext", action="store_false")
opt("--show-non-executable", action="store_true", default=False,
help="show non executable files")
(options, args) = parser.parse_args()
if len(args) != 1 and not options.regex:
parser.error("incorrect number of arguments")
if not options.regex:
pattern = args[0]
del args
if options.regex:
filepred = re.compile(options.regex, options.ignore_case and re.I).search
else:
fnmatch_ = fnmatch.fnmatch if options.ignore_case else fnmatch.fnmatchcase
for file_pattern_symbol in "*?":
if file_pattern_symbol in pattern:
break
else: # match in any place if no explicit file pattern symbols supplied
pattern = "*" + pattern + "*"
filepred = lambda fn: fnmatch_(fn, pattern)
if not options.regex and options.ignore_case:
filter_files = lambda files: fnmatch.filter(files, pattern)
else:
filter_files = lambda files: itertools.ifilter(filepred, files)
if options.use_pathext:
pathexts = frozenset(map(str.upper,
os.environ.get('PATHEXT', '').split(os.pathsep)))
seen = set()
for dirpath in os.environ.get('PATH', '').split(os.pathsep):
if os.path.isdir(dirpath): # assume no expansion needed
# visit "each" directory only once
# it is unaware of subst drives, junction points, symlinks, etc
rp = canonical_path(dirpath)
if rp in seen: continue
seen.add(rp); del rp
for filename in filter_files(os.listdir(dirpath)):
path = os.path.join(dirpath, filename)
isexe = is_executable(path)
if isexe == False and is_executable == is_executable_win:
# path is a document with associated program
# check whether it is a script (.pl, .rb, .py, etc)
if not isexe and options.use_pathext:
ext = os.path.splitext(path)[1]
isexe = ext.upper() in pathexts
if isexe:
print path
elif options.show_non_executable:
print "non-executable:", path
if __name__=="__main__":
main()
Parse the PE format.
http://code.google.com/p/pefile/
This is probably the best solution you will get other than using python to actually try to run the program.
Edit: I see you also want files that have associations. This will require mucking in the registry which I don't have the information for.
Edit2: I also see that you differentiate between .doc and .py. This is a rather arbitrary differentiation which must be specified with manual rules, because to windows, they are both file extensions that a program reads.
Your question can't be answered. Windows can't tell the difference between a file which is associated with a scripting language vs. some other arbitrary program. As Windows is concerned, a .PY file is simply a document which is opened by python.exe.