shutil.make_archive creates zip file , but skips empty directories - python-2.7

Im not sure what im doing wrong, but what i need is to create a zip file with all files and folders ( empty or not ) of a given directory. So, at the moment i have this simple code that "works" but it doens't add empty folders :|
content of c:\temp\trashtests
c:\temp\trashtests\a\a.txt
c:\temp\trashtests\b\b.txt
c:\temp\trashtests\c
c:\temp\trashtests\d
Current code:
class ZipTools:
"""
Zip tools
"""
def __init__(self, target_path=None, zip_name=None):
self.path = target_path
self.zip_name = zip_name
def create_zip(self):
shutil.make_archive(self.zip_name, format='zip', root_dir=self.path)
execution:
ab = self.ZipTools('c:\temp\trashtests', 'c:\test\a.zip')
ab.create_zip()
The output is a zip file only with:
\a\a.txt
\b\b.txt
So, how can i create a zip file with all contents of a given directory using shutils? If not, how could i do it using zipfile?
Thanks in advance.
EDIT:
As sugested by J.F. Sebastian, i tried the solution of this question but it didn't worked since it created a zip file with the following structure:
File: a.zip
Content:
c:
a\a.txt
b\b.txt
I'm still trying to figure it out a solution :)

I was able to solve this problem using using this code:
An important note, this code works for what i need, which is:
Zip an existing folder with the same name of the zip file to create.
import os
import sys
import zipfile
class ZipTools:
"""
Zip tools
"""
def __init__(self, folder_path=None, pkg_zip=None):
self.path = folder_path
self.zip_name = pkg_zip
self.temp_dir = 'c:\\temp'
self.archive = '{p}\\{z}'.format(p=self.temp_dir, z='a.zip')
def zip(self):
parent_folder = os.path.dirname(self.path)
# Retrieve the paths of the folder contents.
contents = os.walk(self.path)
try:
zip_file = zipfile.ZipFile(self.archive, 'w', zipfile.ZIP_DEFLATED)
for root, folders, files in contents:
# Include all subfolders, including empty ones.
for folder_name in folders:
ab_path = os.path.join(root, folder_name)
rel_path = ab_path.replace(parent_folder + '\\' + self.zip_name, '')
print rel_path
zip_file.write(ab_path, rel_path)
for file_name in files:
ab_path = os.path.join(root, file_name)
rel_path = ab_path.replace(parent_folder + '\\' + self.zip_name, '')
zip_file.write(ab_path, rel_path)
except zipfile.BadZipfile, message:
print message
sys.exit(1)
finally:
zip_file.close()
if __name__ == '__main__':
ab = ZipTools('c:\\temp\\trashtests', 'a.zip')
ab.zip()

Related

Django howto progessively write data into a huge zipfile FileField

I have a django model like this:
class Todo(models.Model):
big_file = models.FileField(blank=True)
status = models.PositiveSmallIntegerField(default=0)
progress = models.IntegerField(default=0)
I'd like to do two operations:
first make an empty zipfile out of big_file (less important)
and then progressively add files into my zipfile (and save it iteratively)
The overall process would look like that:
from django.core.files.base import File
import io, zipfile
def generate_data(todo):
io_bytes = io.BytesIO(b'')
# 1. save an empty Zip archive:
with zipfile.ZipFile(io_bytes, 'w') as zip_fd:
todo.generated_data.save('heavy_file.zip', File(zip_fd))
# 2. Progressively fill the Zip archive:
with zipfile.ZipFile(io_bytes, 'w') zip_fd:
for filename, data_bytes in long_iteration(todo):
with zip_fd.open(filename, 'w') as in_zip:
in_zip.write(data_bytes)
if condition(something):
todo.generated_data.save() # that does not work
todo.status = 1
todo.progress = 123
todo.save()
todo.status = 2
todo.save()
But I can't figure out the right filedescriptor / file-like object / Filepath / django-File object combination ...
And it seems that in django I always have to save(filename, content). But my content could be Gigabytes, so it does not sound reasonable to store it all into a "content" variable?
Ok I found following solution for myself; first create an empty file and then use the <my_file_field>.path attribute:
def generate_data(todo):
# 1. save an empty Zip archive:
todo.big_file.save('filename.zip', ContentFile(''))
with zipfile.ZipFile(todo.big_file.path, 'w') as zip_fd:
pass
# 2. Progressively fill the Zip archive:
with zipfile.ZipFile(todo.big_file.path, 'w') zip_fd:
... # do the stuff

How to ignore exception and continue loop in python?

I have a python function that adds the directory to zip file in TEMP directory, but when I launch it I get an exception -> IOError: [Errno 13] Permission denied: 'NTUSER.DAT'. Of course, I will not have access to this file and after exception loop breaks and application exit, so I wanted to know how to make loop continue even after exception?
import zipfile
import sys
import os
_DIR_TO_ZIP = os.environ['USERPROFILE']
_ZIPPED_FILE = os.environ['TEMP'] +"\\" +os.environ['USERNAME'] +".zip"
def zip_folder(folder_path, output_path):
"""Zip the contents of an entire folder (with that folder included
in the archive). Empty subfolders will be included in the archive
as well."""
parent_folder = os.path.dirname(folder_path)
# Retrieve the paths of the folder contents.
contents = os.walk(folder_path)
try:
zip_file = zipfile.ZipFile(output_path, 'w', zipfile.ZIP_DEFLATED)
for root, folders, files in contents:
# Include all subfolders, including empty ones.
for folder_name in folders:
absolute_path = os.path.join(root, folder_name)
relative_path = absolute_path.replace(parent_folder + '\\',
'')
print "Adding '%s' to archive." % absolute_path
zip_file.write(absolute_path, relative_path)
for file_name in files:
absolute_path = os.path.join(root, file_name)
relative_path = absolute_path.replace(parent_folder + '\\',
'')
print "Adding '%s' to archive." % absolute_path
zip_file.write(absolute_path, relative_path)
print "'%s' created successfully." % output_path
zip_file.close()
except:
pass
if __name__ == '__main__':
zip_folder(_DIR_TO_ZIP, _ZIPPED_FILE)
If you want to keep your current function:
Use the tempfile module, it's cleaner and handles the temp paths
Simply insert try/except statements at a lower level, where you expect some elements of the yielded objects to fail.

Python 2.7 and PrettyTables

I am trying to get PrettyTables to work with the following script. I can get it almost to look right but it keeps separating my tables so it is printing 16 separate tables. I need all information in one table that I can sort. I appreciate all the help i can get.
import sys
import os
import datetime
import hashlib
import logging
def getScanPath(): #12
# Prompt User for path to scan
path = raw_input('Please enter the directory to scan: ')
# Verify that the path is a directory
if os.path.isdir(path):
return path
else:
sys.exit('Invalid File Path ... Script Aborted')
def getFileList(filePath):
# Create an empty list to hold the resulting files
pathList =[]
# Get a list of files, note these will be just the names of the files
# NOT the full path
simpleFileNameList = os.listdir(filePath)
# Now process each filename in the list
for eachFile in simpleFileNameList:
# 1) Get the full path by join the directory with the filename
fullPath = os.path.join(filePath, eachFile)
# 2) Make sure the full path is an absolute path
absPath = os.path.abspath(fullPath)
# 3) Make sure the absolute path is a file i.e. not a folder or directory
if os.path.isfile(absPath):
# 4) if all is well, add the absolute path to the list
pathList.append(absPath)
else:
logging.error('A Non-File has been identified')
# 5) Once all files have been identified, return the list to the caller
return pathList
def getFileName(theFile):
return os.path.basename(theFile)
def getFileSize(theFile):
return os.path.getsize(theFile)
def getFileLastModified(theFile):
return os.path.getmtime(theFile)
def getFileHash(theFile):
hash_md5 = hashlib.md5()
with open(theFile, "rb") as f:
for chunk in iter(lambda: f.read(4096), b""):
hash_md5.update(chunk)
return hash_md5.hexdigest()
# Main Script Starts Here
if __name__ == '__main__':
#Welcome Message
print "\nWelcome to the file scanner\n"
# prompt user for directory path
scanPath = getScanPath()
# Get a list of files with full path
scanFileList = getFileList(scanPath)
# Output Filenames
print "Files found in directory"
for eachFilePath in scanFileList:
fileName = getFileName(eachFilePath)
fileSize = getFileSize(eachFilePath)
lastModified = getFileLastModified(eachFilePath)
hashValue = getFileHash(eachFilePath)
fileModified = (datetime.datetime.fromtimestamp(lastModified))
from prettytable import PrettyTable
pTable = PrettyTable()
pTable.field_names = ["File Name", "File Size", "Last Modified", "Md5 Hash Value"]
pTable.add_row ([fileName, fileSize, fileModified, hashValue])
print (pTable)enter code here
This should show me one big table using all the values from a set directory that the user chooses. This will allow me to sort the table later using prettytables.
I have no experience with prettyTables, but I noticed you have lastModified and fileModified yet only fileModified is used for a column in your table. Are you sure pretty table doesn't have some kind of row limit?

I am trying to rename a bunch of files in a directory but I am getting an error("NameError: name 'root' is not defined") when saving renamed files

htnl_files is my directory and I have to remove "https:##www.wisdomjobs.com#e-university#" part from the every file of my directory and I need to save files to same directory.
import os
file_names=os.listdir('html_files')
for file_name in file_names:
#print file_name
file_name = file_name.replace("https:##www.wisdomjobs.com#e-university#","")
#print filename
fullpath = os.path.join(root/html_files, file_name)
os.rename(fullpath, file_name)
neither root nor html_files are defined in your code. what you probably want is this:
import os
file_names = os.listdir('html_files')
for orig_name in file_names:
new_name = orig_name.replace("https:##www.wisdomjobs.com#e-university#", "")
orig_path = os.path.join('html_files', orig_name)
new_path = os.path.join('html_files', new_name)
os.rename(orig_path, new_path)

How do I confirm with python that required files are in a particular folder and are accessible or not?

I have 5 files in a folder App:
App|
|--A.txt
|--B.txt
|--C.txt
|--D.txt
|--E.txt
|--Run.py
|--Other Folders or Files
Now I want to know if files (A.txt,B.txtC.txt,C.txt,D.txt,E.txt) is present or not and if its there than I want to call a function Cleaner which will supply names of these files to that function. I have written this code but nothing is happening.The function is not getting called.
import glob
import csv
import itertools
files = glob.glob("*.txt")
i = 0
def sublist(a, b):
seq = iter(b)
try:
for x in a:
while next(seq) != x: pass
else:
return True
except StopIteration:
pass
return False
required_files = ['Alternate_ADR6_LFB1.txt', 'Company_Code.txt', 'Left_LIFNR.txt', 'LFA1.txt', 'LFB1.TXT', 'LFBK.TXT']
if sublist(required_files,files):
for files in required_files:
try:
f = open(files , 'r')
f.close()
except IOError as e:
print 'Error opening or accessing files'
i = 1
else:
print 'Required files are not in correct folder'
if i == 1:
for files in required_files:
Cleansing(files)
def Cleansing(filename):
with open('filename', 'rb') as f_input:
...
...
break
with open('filename', 'rb') as f_input, open('filename_Cleaned.csv', 'wb') as f_output:
csv_output = csv.writer(f_output)
csv_output.writerow('something')
Upadate
I think now I am able to call the function and also able to check the valid files but its not that pythonic. And I am not able to open or create a file with the name of the file plus _cleaned :filename_cleaned.csv.
You want to check if a list of files (required_files) are in a folder.
You successfully get the complete list of text files in the folder with files = glob.glob("*.txt")
So the first question is: Checking for sublist in list
As the order is not important, we can use sets:
if set(required_files) <= set(files):
# do stuff
else:
#print warning
Next question: How to open the files and create an outputs with names like "filename_Cleaned.csv"
A very important thing you have to understand: "filename" is not the same thing as filename. The first is a string, it will always be the same thing, it will not be replaced by real filenames. When writing open('filename', 'rb') you're trying to open a file called "filename".
filename however can be a variable name and take different values.
for filename in required_files:
Cleansing(filename)
def Cleansing(filename):
with open(filename, 'rb') as f_input, open(filename+'_Cleaned.csv', 'wb') as f_output:
#read stuff in f_input
#write stuff in f_output