Looping CSV Concat in Python Pandas - python-2.7

I have multiple folders each containing csvs. I am trying to concat the csvs in each subdirectory and then export it. At the end I would have same number of outputs as the folders. At the end I would like to have Folder1.csv, Folder2.csv, ...Folder99.csv etc. This is what
import os
from glob import glob
import pandas as pd
import numpy as np
rootDir = 'D:/Data'
OutDirectory = 'D:/OutPut'
os.chdir(rootDir)
# The directory has folders as follows
# D:/Data/Folder1
# D:/Data/Folder2
# D:/Data/Folder3
# ....
# .....
# D:/Data/Folder99
# Each folders (Folder1, Folder2,..etc.) has many csvs.
frame = pd.DataFrame()
list_ = []
for (dirname, dirs, files) in os.walk(rootDir):
for filename in files:
if filename.endswith('.csv'):
df = pd.read_csv(filename,index_col=None, na_values=['-999'], delim_whitespace= True, header = 0, skiprows = 2)
OutFile = '%s.csv' % OutputFname
list_.append(df)
frame = pd.concat(list_)
df.to_csv(OutDirectory+OutFile, sep = ',', header= True)
I am getting the following error:
IOError: File file200150101.csv does not exist

You need to concatenate dirname and filename for a full path to your files. Change this line like so:
df = pd.read_csv(os.path.join(dirname, filename) ,index_col=None, na_values=['-999'], delim_whitespace= True, header = 0, skiprows = 2)
Edit:
I don't know how pandas works because I never used it. But i think your problem is, that you defined everything you wanted to be done to the CSVs in the inner loop that loops over files only (at least the indentation looks that way - but that could also be a format problem that occured when you pasted your code here on SO).
I rewrote your code and fixed some things that I think might be the problem:
First, I renamed your variables starting with big letters because,
for me, it always looks weird to have vars with big starting letters.
I moved your list variable to the outer loop because it should be
reset every time you enter a new directory as you want all CSVs to be
merged per folder.
And finally, I fixed the indentation. In python indentation tells
the compiler which commands are in the inner or outer loop.
My code now looks like this. You might have to change some things because I can't test it right now:
import os
from glob import glob
import pandas as pd
import numpy as np
rootDir = 'D:/Data'
outDir = 'D:/OutPut'
os.chdir(rootDir)
dirs = os.listdir(rootDir)
frame = pd.DataFrame()
for dirname in dirs:
# the outer loop loops over directories! the actual directory is stored in dirname
list = [] # collect csv data for every directory, not in general
files = glob('%s/*.csv' % (dirname))
for filename in files:
# the inner loop loops over the files in the 'dirname' folder
df = pd.read_csv(filename,index_col=None, na_values=['-999'], delim_whitespace= True, header = 0, skiprows = 2)
# all csv data should be in 'list' now
outFile = '%s.csv' % dirname # define the name for output csv
list.append(df) # do that for every file
# at this point, all files in the actual directory were processed
frame = pd.concat(list_) # and then merge CSVs
# ...actually not sure how pd.concat works, but i guess it does merge the data
frame.to_csv(os.path.join(outDir, outFile), sep = ',', header= True) # save the data

Related

Python 2.7 and PrettyTables

I am trying to get PrettyTables to work with the following script. I can get it almost to look right but it keeps separating my tables so it is printing 16 separate tables. I need all information in one table that I can sort. I appreciate all the help i can get.
import sys
import os
import datetime
import hashlib
import logging
def getScanPath(): #12
# Prompt User for path to scan
path = raw_input('Please enter the directory to scan: ')
# Verify that the path is a directory
if os.path.isdir(path):
return path
else:
sys.exit('Invalid File Path ... Script Aborted')
def getFileList(filePath):
# Create an empty list to hold the resulting files
pathList =[]
# Get a list of files, note these will be just the names of the files
# NOT the full path
simpleFileNameList = os.listdir(filePath)
# Now process each filename in the list
for eachFile in simpleFileNameList:
# 1) Get the full path by join the directory with the filename
fullPath = os.path.join(filePath, eachFile)
# 2) Make sure the full path is an absolute path
absPath = os.path.abspath(fullPath)
# 3) Make sure the absolute path is a file i.e. not a folder or directory
if os.path.isfile(absPath):
# 4) if all is well, add the absolute path to the list
pathList.append(absPath)
else:
logging.error('A Non-File has been identified')
# 5) Once all files have been identified, return the list to the caller
return pathList
def getFileName(theFile):
return os.path.basename(theFile)
def getFileSize(theFile):
return os.path.getsize(theFile)
def getFileLastModified(theFile):
return os.path.getmtime(theFile)
def getFileHash(theFile):
hash_md5 = hashlib.md5()
with open(theFile, "rb") as f:
for chunk in iter(lambda: f.read(4096), b""):
hash_md5.update(chunk)
return hash_md5.hexdigest()
# Main Script Starts Here
if __name__ == '__main__':
#Welcome Message
print "\nWelcome to the file scanner\n"
# prompt user for directory path
scanPath = getScanPath()
# Get a list of files with full path
scanFileList = getFileList(scanPath)
# Output Filenames
print "Files found in directory"
for eachFilePath in scanFileList:
fileName = getFileName(eachFilePath)
fileSize = getFileSize(eachFilePath)
lastModified = getFileLastModified(eachFilePath)
hashValue = getFileHash(eachFilePath)
fileModified = (datetime.datetime.fromtimestamp(lastModified))
from prettytable import PrettyTable
pTable = PrettyTable()
pTable.field_names = ["File Name", "File Size", "Last Modified", "Md5 Hash Value"]
pTable.add_row ([fileName, fileSize, fileModified, hashValue])
print (pTable)enter code here
This should show me one big table using all the values from a set directory that the user chooses. This will allow me to sort the table later using prettytables.
I have no experience with prettyTables, but I noticed you have lastModified and fileModified yet only fileModified is used for a column in your table. Are you sure pretty table doesn't have some kind of row limit?

Python script to move specific files from one folder to another

I am trying to write a script (python 2.7) that will use a regex to identify specific files in a folder and move them to another folder. When I run the script, however, the source folder is moved to the target folder instead of just the files within it.
import os, shutil, re
src = "C:\\Users\\****\\Desktop\\test1\\"
#src = os.path.join('C:', os.sep, 'Users','****','Desktop','test1\\')
dst = "C:\\Users\\****\\Desktop\\test2\\"
#dst = os.path.join('C:', os.sep, 'Users','****','Desktop','test2')
files = os.listdir(src)
#regexCtask = "CTASK"
print files
#regex =re.compile(r'(?<=CTASK:)')
files.sort()
#print src, dst
regex = re.compile('CTASK*')
for f in files:
if regex.match(f):
filescr= os.path.join(src, files)
shutil.move(filesrc,dst)
#shutil.move(src,dst)
So basically there are files in "test1" folder that I want to move to "test2", but not all the files, just the ones that contain "CTASK" at the beginning.
The **** in the path is to protect my work username.
Sorry if it is messy, I am still trying a few things out.
You need to assign path to exact file (f) to filescr variable on each loop iteration, but not path to files (files - is a list!)
Try below code
import os
from os import path
import shutil
src = "C:\\Users\\****\\Desktop\\test1\\"
dst = "C:\\Users\\****\\Desktop\\test2\\"
files = [i for i in os.listdir(src) if i.startswith("CTASK") and path.isfile(path.join(src, i))]
for f in files:
shutil.copy(path.join(src, f), dst)
I wanted to move following folders : 1.1,1.2,1.45,1.7 to folder with name '1'
I Have posted solution below:
import shutil
import os
src_path = '/home/user/Documents/folder1'
dest_path='/home/user/Documents/folder2/'
source = os.listdir(src_path)
for folder in source :
#folder = '1.1 -anything'
newf = folder.split('.')[0]
#newf is name of new folder where you want to move
#change Folder name as per yourrequirement
destination = dest_path+newf
if not os.path.exists(destination):
os.makedirs(destination)
shutil.move(src_path+'/'+folder,destination) #change move to copy if you want to copy insted of moving
print 'done moving'

How to open concurrently two files with same name and different extension in python?

I have a folder with multiple couple of files:
a.txt
a.json
b.txt
b.json
and so on:
Using a for loop i want to open a couple of file (a.txt and a.json) concurrently.
Is there a way to do it using the 'with' statement in python?
You could do something like the following which constructs a dictionary keyed by the file name sans extension, and with a count of the number of files matching the required extensions. Then you can iterate over the dictionary opening pairs of files:
import os
from collections import defaultdict
EXTENSIONS = {'.json', '.txt'}
directory = '/path/to/your/files'
grouped_files = defaultdict(int)
for f in os.listdir(directory):
name, ext = os.path.splitext(os.path.join(directory, f))
if ext in EXTENSIONS:
grouped_files[name] += 1
for name in grouped_files:
if grouped_files[name] == len(EXTENSIONS):
with open('{}.txt'.format(name)) as txt_file, \
open('{}.json'.format(name)) as json_file:
# process files
print(txt_file, json_file)
i have two folders of diffrent files one with .jpg and another with.xml this is how i put them into another folder
import os
from pathlib import Path
import shutil
#making the list to store the name
picList=list()
xmlList=list()
#making the directory path
xmlDir = os.listdir('C:\\Users\\%USERNAME%\\Desktop\\img+xml\\XML')
picDir=os.listdir('C:\\Users\\%USERNAME%\\Desktop\\img+xml\\img')
dest=r'C:\Users\%USERNAME%\Desktop\img+xml\i'
#appending the file name to the list
for a in xmlDir:
a=Path(a).stem
xmlList.append(a)
picList.append(a)
#matching and putting file name in destination
for a in xmlList:
for b in picList:
if a==b:
try:
shutil.move(f'C:\\Users\\%USERNAME%\\Desktop\\img+xml\\XML\\{a}.xml',dest)
shutil.move(f'C:\\Users\\%USERNAME%\\Desktop\\img+xml\\img\\{b}.jpg',dest)
except Exception as e:
print(e)

How to rename JPG files with running order using Python

I quite new in Python programming and i try to rename 100 files with ".jpg" extention, located in specific folder using pyhthon.
I need that the files will be renamed by running order start from number 1. This is the code i start writing:
import os,glob,fnmatch
os.chdir(r"G:\desktop\Project\test")
for files in glob.glob("*.jpg"):
print files
When i run it, i get:
>>>
er3.jpg
IMG-20160209-ssdeWA0000.jpg
IMG-20160209-WA0000.jpg
sd4.jpg
tyu2.jpg
uj7.jpg
we3.jpg
yh7.jpg
>>>
so the code, till now is OK.
For example my folder is:
and i need that all the files name will be:
1,2,3,4 - with running order names. Is it possible with python 2.7?
If you simply want to rename all files as 1.jpg, 2.jpg etc. you can do this:
import os
import glob
os.chdir(r"G:\desktop\Project\test")
for index, oldfile in enumerate(glob.glob("*.jpg"), start=1):
newfile = '{}.jpg'.format(index)
os.rename (oldfile,newfile)
enumerate() is used to get get the index of each file from the list returned by glob(), so that it can be used to create the new filename. Note that it allows you to specify the start index, so I've started from 1, rather than Python Standard, zero
If you want this list of files to be sortable properly, you'll want the filename to be padded with zero's as well (001.jpg, etc.). In which case simply replace newfile = '{}.jpg'.format(index)' with newfile = '{:03}.jpg'.format(index).
See the the docs for more on str.format()
To rename all the JPG files from a particular folder First, get the list of all the files contain in the folder.
os.listdir will give you list all the files in images path.
use enumerate to get the index numbers to get the new name for
images.
import os
images_path = r"D:\shots_images"
image_list = os.listdir(images_path)
for i, image in enumerate(image_list):
ext = os.path.splitext(image)[1]
if ext == '.jpg':
src = images_path + '/' + image
dst = images_path + '/' + str(i) + '.jpg'
os.rename(src, dst)
import os
from os import path
os.chdir("//Users//User1//Desktop//newd//pics")
for file in os.listdir():
name,ext=path.splitext(file)
if ext == '.jpeg':
dst= '{}.jpg'.format(name)
os.rename(file,dst)

Os.walk - WindowsError: [Error 123] The filename, directory name, or volume label syntax is incorrect:

new to python and looking for some help on a problem I am having with os.walk. I have had a solid look around and cannot find the right solution to my problem.
What the code does:
Scans a users selected HD or folder and returns all the filenames, subdirs and size. This is then manipulated in pandas (not in code below) and exported to an excel spreadsheet in the formatting I desired.
However, in the first part of the code, in Python 2.7, I am currently experiencing the below error:
WindowsError: [Error 123] The filename, directory name, or volume label syntax is incorrect: 'E:\03. Work\Bre\Files\folder2\icons greyscale flatten\._Icon_18?10 Stainless Steel.psd'
I have explored using raw string (r') but to no avail. Perhaps I am writing it wrong.
I will note that I never get this in 3.5 or on cleanly labelled selected folders. Due to Pandas and pysinstaller problems with 3.5, I am hoping to stick with 2.7 until the error with 3.5 is resolved.
import pandas as pd
import xlsxwriter
import os
from io import StringIO
#Lists for Pandas Dataframes
fpath = []
fname = []
fext = []
sizec = []
# START #Select file directory to scan
filed = raw_input("\nSelect a directory to scan: ")
#Scan the Hard-Drive and add to lists for Pandas DataFrames
print "\nGetting details..."
for root, dirs, files in os.walk(filed):
for filename in files:
f = os.path.abspath(root) #File path
fpath.append(f)
fname.append(filename) #File name
s = os.path.splitext(filename)[1] #File extension
s = str(s)
fext.append(s)
p = os.path.join(root, filename) #File size
si = os.stat(p).st_size
sizec.append(si)
print "\nDone!"
Any help would be greatly appreciated :)
In order to traverse filenames with unicode characters, you need to give os.walk a unicode path name.
Your path contains a unicode character, which is being displayed as ? in the exception.
If you pass in the unicode path, like this os.walk(unicode(filed)) you should not get that exception.
As noted in Convert python filenames to unicode sometimes you'll get a bytestring if the path is "undecodable" by Python 2.