How to add file size next to the files name? - python-2.7

I'm trying to export all files according to their extensions and add the size next to them.
import glob2
ext = raw_input("extension:")
ext = "*." + ext
print glob2.glob(ext)
How can I add the file size to the list?

Use os.stat() to find out file metadata (size, date created, etc). It's not clear to me what you mean by next to them so this code produces a list of tuples, [(filename,filesize), ... ].
import glob2
import os
ext = raw_input("extension:")
ext = "*." + ext
filenames = glob2.glob(ext)
filenames_with_sizes = [(filename, os.stat(filename).st_size) for filename in filenames]
print filenames_with_sizes
If you want a pretty list that is nice to print but not amenable to computation then you can do
filenames_with_sizes = ["%-40.40s %d" % (filename, os.stat(filename).st_size) for filename in filenames]

Related

Python 2.7 and PrettyTables

I am trying to get PrettyTables to work with the following script. I can get it almost to look right but it keeps separating my tables so it is printing 16 separate tables. I need all information in one table that I can sort. I appreciate all the help i can get.
import sys
import os
import datetime
import hashlib
import logging
def getScanPath(): #12
# Prompt User for path to scan
path = raw_input('Please enter the directory to scan: ')
# Verify that the path is a directory
if os.path.isdir(path):
return path
else:
sys.exit('Invalid File Path ... Script Aborted')
def getFileList(filePath):
# Create an empty list to hold the resulting files
pathList =[]
# Get a list of files, note these will be just the names of the files
# NOT the full path
simpleFileNameList = os.listdir(filePath)
# Now process each filename in the list
for eachFile in simpleFileNameList:
# 1) Get the full path by join the directory with the filename
fullPath = os.path.join(filePath, eachFile)
# 2) Make sure the full path is an absolute path
absPath = os.path.abspath(fullPath)
# 3) Make sure the absolute path is a file i.e. not a folder or directory
if os.path.isfile(absPath):
# 4) if all is well, add the absolute path to the list
pathList.append(absPath)
else:
logging.error('A Non-File has been identified')
# 5) Once all files have been identified, return the list to the caller
return pathList
def getFileName(theFile):
return os.path.basename(theFile)
def getFileSize(theFile):
return os.path.getsize(theFile)
def getFileLastModified(theFile):
return os.path.getmtime(theFile)
def getFileHash(theFile):
hash_md5 = hashlib.md5()
with open(theFile, "rb") as f:
for chunk in iter(lambda: f.read(4096), b""):
hash_md5.update(chunk)
return hash_md5.hexdigest()
# Main Script Starts Here
if __name__ == '__main__':
#Welcome Message
print "\nWelcome to the file scanner\n"
# prompt user for directory path
scanPath = getScanPath()
# Get a list of files with full path
scanFileList = getFileList(scanPath)
# Output Filenames
print "Files found in directory"
for eachFilePath in scanFileList:
fileName = getFileName(eachFilePath)
fileSize = getFileSize(eachFilePath)
lastModified = getFileLastModified(eachFilePath)
hashValue = getFileHash(eachFilePath)
fileModified = (datetime.datetime.fromtimestamp(lastModified))
from prettytable import PrettyTable
pTable = PrettyTable()
pTable.field_names = ["File Name", "File Size", "Last Modified", "Md5 Hash Value"]
pTable.add_row ([fileName, fileSize, fileModified, hashValue])
print (pTable)enter code here
This should show me one big table using all the values from a set directory that the user chooses. This will allow me to sort the table later using prettytables.
I have no experience with prettyTables, but I noticed you have lastModified and fileModified yet only fileModified is used for a column in your table. Are you sure pretty table doesn't have some kind of row limit?

Avoid urllib to replace a file but insted give it a _1 ,_2 like name

I have a csv file with image urls and given file names in two columns. In the file some file names are repetitive but their b respective links are unique. I want to save all the images. So if
A given filename.jpg image exists I want the next images to be saved as filename_2,filename_3.
I use a simple urllib.urlretrieve line to get images
The imports:
import csv
import os
import re
import urllib
First, store your csv data.
file_names = []
urls = []
with open('data.csv', 'r') as file:
reader = csv.reader(file)
for file_name, url in reader:
file_names.append(file_name)
urls.append(url)
file.close()
Make a new list to store your new file names in.
new_file_names = []
Iterate through the file_names list.
for file_name in file_names:
Grab the file extension. There are many image extensions: .jpg, .png, etc.
This is assuming the file extension is only 4 characters long including the . Anytime you see [-4:] throughout the document, be careful of that. If it is an issue, use regex to get the file extension instead.
file_ext = file_name[-4:]
Next iterate through the new_file_names list to see if we grab any matches with file_name from the file_names list.
for temp_file_name in new_file_names:
if temp_file_name == file_name:
When we get a match, first check if it already has a '_\b+' + file_ext. What this means is _ + any numbers + file_ext.
check = re.search('_\d+' + file_ext, temp_file_name)
If the check is True, we now want to see what that number is and add one.
if check:
number = int(check.group(0)[1:-4]) + 1
Now we want to pretty much do the opposite regex as before so we only get the file name + _ but without all the numbers. Then add on the new number and the file_ext.
inverse = re.search('.*_(?=\d+' + file_ext + ')', file_name)
file_name = inverse.group(0) + str(number) + file_ext
This else is for when the match is the very first occurence adding a _1 to the end of the file_name.
else:
file_name = file_name[:-4] + '_1' + file_ext
Append the file_name to the new_file_names list.
new_file_names.append(file_name)
Set a folder (if you want) to store your images. If the folder doesn't exist, it will create one for you.
path = 'img/'
try:
os.makedirs(path)
except OSError:
if not os.path.isdir(path):
raise
Finally, to save the images, we use a for loop and zip up new_file_names and urls. Inside the loop we use urllib.urlretrieve to download the images.
for file_name, url in zip(new_file_names, urls):
urllib.urlretrieve(url, path + file_name)

Using filters in pyGtk

I am writing a script to display a GUI in which certain files can be chosen. I am using pyGtk and as of now, my code can display all the zip files. I want to add another filter to display only the zip files with the latest date.
Below is my function that displays only zip files.
def open_file( self, w, data=None):
d = gtk.FileChooserDialog( title="Select a file",
parent=self.window,
action=gtk.FILE_CHOOSER_ACTION_OPEN,
buttons=("OK",True,"Cancel",False)
)
#create filters
filter1 = gtk.FileFilter()
filter1.set_name("All files")
filter1.add_pattern("*")
d.add_filter(filter1)
filter2 = gtk.FileFilter()
filter2.set_name("Zip files")
filter2.add_pattern("*.zip")
d.add_filter(filter2)
ok = d.run()
if ok:
import os
fullname = d.get_filename()
dirname, fname = os.path.split( fullname)
size = "%d bytes" % os.path.getsize( fullname)
text = self.label_template % (fname, dirname, size)
else:
text = self.label_template % ("","","")
self.label.set_label( text)
d.destroy()
Is there a way I can choose a filter to display only the latest zip files in a each folder?
Thanks in advance for your help!
Instead of using filter2.add_pattern("*.zip") use filter2.add_pattern("filename")where filename is the name of the file with the latest date. You can write a function that would return a list with file names of the latest zip file.

Concat a String in python

this is are my files
2015125_0r89_PEO.txt
2015125_0r89_PED.txt
2015125_0r89_PEN.txt
2015126_0r89_PEO.txt
2015126_0r89_PED.txt
2015126_0r89_PEN.txt
2015127_0r89_PEO.txt
2015127_0r89_PED.txt
2015127_0r89_PEN.txt
and I want to change to this:
US.CAR.PEO.D.2015.125.txt
US.CAR.PED.D.2015.125.txt
US.CAR.PEN.D.2015.125.txt
US.CAR.PEO.D.2015.126.txt
US.CAR.PED.D.2015.126.txt
US.CAR.PEN.D.2015.126.txt
US.CAR.PEO.D.2015.127.txt
US.CAR.PED.D.2015.127.txt
US.CAR.PEN.D.2015.127.txt
this is my code so far,
import os
paths = (os.path.join(root, filename)
for root, _, filenames in os.walk('C:\\data\\MAX\\') #location files
for filename in filenames)
for path in paths:
a = path.split("_")
b = a[2].split(".")
c = "US.CAR."+ b[0] + ".D." + a[0]
print c
when I run the script it's no make any error, but not change the name of the files .txt which it is what it should supposed to do
any help?
The way you do it by first getting the path and then manipulating it will get bad results, in this case is best first get the name of the file, make the changes to it and then change the name of the file itself, like this
for root,_,filenames in os.walk('C:\\data\\MAX\\'):
for name in filenames:
print "original:", name
a = name.split("_")
b = a[2].split(".")
new = "US.CAR.{}.D.{}.{}".format(b[0],a[0],b[1]) #don't forget the file extention
print "new",new
os.rename( os.path.join(root,name), os.path.join(root,new) )
string concatenation is more inefficient, the best way is using string formating.

writing instead of printing, Python 2.7

My code for this works perfectly. I can print to the screen exactly how I want it. However, I want it to write to a file so that I can view the file instead of the print screen. So I've tried to do the following but I'm coming up with a few issues. Error message:
from xml.dom import minidom
import sys
import os, fnmatch
def find_files(directory, pattern):
for root, dirs, files in os.walk(directory):
for basename in files:
if fnmatch.fnmatch(basename, pattern):
filename = os.path.join(root, basename)
yield filename
for filename in find_files('c:/Python27','*file.xml'):
print ('Found file.xml:', filename)
xmldoc = minidom.parse(filename)
itemlist = xmldoc.getElementsByTagName('Game')
for item in itemlist:
year = item.getElementsByTagName('Year')
for s in year:
print item.attributes['name'].value, s.attributes['value'].value
TypeError: function takes exactly 1 argument (2 given),code with the write function instead:
from xml.dom import minidom
import sys
import os, fnmatch
def find_files(directory, pattern):
for root, dirs, files in os.walk(directory):
for basename in files:
if fnmatch.fnmatch(basename, pattern):
filename = os.path.join(root, basename)
yield filename
f = open('test.txt','w')
for filename in find_files('c:/Python27','*file.xml'):
f.write('Found file.xml:', filename)
xmldoc = minidom.parse(filename)
itemlist = xmldoc.getElementsByTagName('Game')
for item in itemlist:
year = item.getElementsByTagName('Year')
for s in year:
f.write (item.attributes['name'].value), f.write(s.attributes['value'].value)
If you want to make your two arguments into a single line (that f.write will accept) you can do something like
f.write("Found file.xml:" + filename + "\n")
+ will concatenate the elements and give you a single string with a newline at the end, for a neat stack of the elements you were looking for in a final file.
As it is, the Error message looks like it's telling you exactly what the problem is -- f.write really does take only one argument, and having a comma in the function call indicates a second argument.