Read Multiple images on a folder in OpenCv (python) - python-2.7

I want to read multiple images on a same folder using opencv (python). To do that do I need to use for loop or while loop with imread funcion? If so, how? please help me...
I want to get images into an array and then processed them one at a time through a loop.

import glob
import cv2
images = [cv2.imread(file) for file in glob.glob("path/to/files/*.png")]

This will get all the files in a folder in onlyfiles. And then it will read them all and store them in the array images.
from os import listdir
from os.path import isfile, join
import numpy
import cv2
mypath='/path/to/folder'
onlyfiles = [ f for f in listdir(mypath) if isfile(join(mypath,f)) ]
images = numpy.empty(len(onlyfiles), dtype=object)
for n in range(0, len(onlyfiles)):
images[n] = cv2.imread( join(mypath,onlyfiles[n]) )

import glob
import cv2 as cv
path = glob.glob("/path/to/folder/*.jpg")
cv_img = []
for img in path:
n = cv.imread(img)
cv_img.append(n)

This one has better time efficiency.
def read_img(img_list, img):
n = cv2.imread(img, 0)
img_list.append(n)
return img_list
path = glob.glob("*.bmp") #or jpg
list_ = []`
cv_image = [read_img(list_, img) for img in path]

import cv2
from pathlib import Path
path=Path(".")
path=path.glob("*.jpg")
images=[]`
for imagepath in path.glob("*.jpg"):
img=cv2.imread(str(imagepath))
img=cv2.cvtColor(img,cv2.COLOR_BGR2GRAY)
img=cv2.resize(img,(200,200))
images.append(img)
print(images)

def flatten_images(folder): # Path of folder (dataset)
images=[] # list contatining all images
for filename in os.listdir(folder):
print(filename)
img=plt.imread(folder+filename) # reading image (Folder path and image name )
img=np.array(img) #
img=img.flatten() # Flatten image
images.append(img) # Appending all images in 'images' list
return(images)

Here is how I did it without using glob, but with using the os module instead, since I could not get it to work with glob on my computer:
# This is to get the names of all the files in the desired directory
# Here I assume that they are all images
original_images = os.listdir('./path/containing/images')
# Here I construct a list of relative path strings for each image
original_images = [f"./path/containing/images/{file_name}" for file_name in original_images]
original_images = [cv2.imread(file) for file in original_images]

Related

Creating HDF5 format for image segmentation task

I started writing a python code for creating HDF5 for image segmentation tasks. I used the code in this link and the link provided by Shai. my images are one channel and in .mat format. I have written the following code, I only want to check with experts whether this code is correct or not. Could experts please have a look? Thanks
import os, h5py
import caffe
import numpy as np
import scipy
import scipy.io as sio
from array import array
import cv2
import matplotlib.pyplot as plt
caffe_root='/home/ss/caffe/'
import sys
sys.path.insert(0,caffe_root+'python')
def img_to_hdf5(paths_src_file,paths_lbl_file,path_dst,msg):
"""
paths_src_file : path to the image paths in a txt file
paths_lbl_file : path to the image paths in a txt file
path_dst = path to the hdf5 file
"""
print(msg)
arrays = {}
SIZE=256 #fixed size of all images
#read the lines of img and lbl path from text file and save into paths_src and paths_lbl
paths_src = []
with open(paths_src_file) as f:
for line in f.readlines():
line = line.strip('\n')
paths_src.append(line)
paths_lbl=[]
with open(paths_lbl_file) as f:
for line in f.readlines():
line=line.strip('\n')
paths_lbl.append(line)
data = np.zeros( (len(paths_src), 1, SIZE, SIZE), dtype='f4' ) #1 channel grayscale image
label = np.zeros( (len(paths_lbl), 1, SIZE, SIZE), dtype='f4' ) #1 channel label image
for in_idx, in_ in enumerate(paths_src):
print in_idx,in_
f=h5py.File(in_,'r')
mat=f['image'].value
im=np.array(mat,dtype=np.float32)
#im = cv2.cvtColor(im,cv2.COLOR_GRAY2RGB)
#im = im[:,:,::-1] #switch from RGB to BGR
im = im.reshape(im.shape[0], im.shape[1], 1)
im = im.transpose((2,0,1)) # convert to CxHxW
data[in_idx]=im
for in_idx, in_ in enumerate(paths_lbl):
print in_idx,in_
f=h5py.File(in_,'r')
mat=f['image'].value
im=np.array(mat,dtype=np.float32)
#im = cv2.cvtColor(im,cv2.COLOR_GRAY2RGB)
#im = im[:,:,::-1] #switch from RGB to BGR
im = im.reshape(im.shape[0], im.shape[1], 1)
im = im.transpose((2,0,1)) # convert to CxHxW
label[in_idx]=im
h5_train = os.path.join(path_dst, 'train_data.h5')
with h5py.File(h5_train,'w') as H:
H.create_dataset( 'data', data=data ) # note the name X given to the dataset!
H.create_dataset( 'label', data=label ) # note the name y given to the dataset!
text_train = os.path.join(path_dst, 'train-path.txt')
with open(text_train,'w') as L:
L.write(h5_train) # list all h5 files you are going to use
train_img_paths = './train_img.txt' #text file of paths to images
train_label_paths = './train_label.txt' #text file of paths to label images (ground truth)
train_img_hdf5 = '/home/ss/workspace/create_hdf5/' # Your path to h5 file
st='Creating Training Data HDF5 File .....'
img_to_hdf5(train_img_paths, train_label_paths,train_img_hdf5,st)
print('DONE...')

I'm trying to write a script to search a folder full of images and compare them to one specific image and find the image that is the most similar

This is what I have so far: the output of the script doesn't even print the difference values yet.
import os
import sys
from skimage.measure import compare_ssim
import cv2
im1 = cv2.imread("7.jpeg");
dir= '/Users/Desktop/images'
//I'm trying to search the specified directory and compare each image
in the "images" folder to im1. Then compute the difference between the
two images for each image in the folder. Lastly I want the program to
output the name of the image that has the smallest difference with im1.
def get_nb_files(dir):
for r in dir:
grayA = cv2.cvtColor(im1, cv2.COLOR_BGR2GRAY)
grayB = cv2.cvtColor(r, cv2.COLOR_BGR2GRAY)
diff = (diff * 255).astype("uint8")
(score, diff) = compare_ssim(im1, r, full=True)
print("SSIM: {}".format(score))

How to get python to read all images in a directory one by one

My experience with python is very limited so I don't fully understand what the code does in this instance. This is part of the code for poets lab from the tensorflow framework.
import os, sys
import tensorflow as tf
import sys
import numpy as np
from PIL import Image
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'
# change this as you see fit
image_path = sys.argv[1]
# Read in the image_data
image_data = tf.gfile.FastGFile(image_path, 'rb').read()
image = Image.open(image_path)
image_array = image.convert('RGB')
# Loads label file, strips off carriage return
label_lines = [line.rstrip() for line
in tf.gfile.GFile("retrained_labels.txt")]
# Unpersists graph from file
with tf.gfile.FastGFile("retrained_graph.pb", 'rb') as f:
graph_def = tf.GraphDef()
graph_def.ParseFromString(f.read())
_ = tf.import_graph_def(graph_def, name='')
with tf.Session() as sess:
# Feed the image_data as input to the graph and get first prediction
softmax_tensor = sess.graph.get_tensor_by_name('final_result:0')
predictions = sess.run(softmax_tensor,{'DecodeJpeg:0': image_array})
# Sort to show labels of first prediction in order of confidence
top_k = predictions[0].argsort()[-len(predictions[0]):][::-1]
for node_id in top_k:
human_string = label_lines[node_id]
score = predictions[0][node_id]
print('%s (score = %.5f)' % (human_string, score))
filename = "results.txt"
with open(filename, 'a+') as f:
f.write('\n**%s**\n' % (image_path))
for node_id in top_k:
human_string = label_lines[node_id]
score = predictions[0][node_id]
f.write('%s (score = %.5f)\n' % (human_string, score))
I want the above code to read in a directory instead of a single image and then process them all and output the scores to the results.txt file.
Currently I can call this like so:
python this_file.py /root/images/1.jpg
How would I get this code to take the following input and processes it
python this_file.py /root/images/
Use os.listdir to list all files in the directory. Qualify it with a filter as well. Join the resulting files to their directory. Read them from the list with a for loop.
python this_file.py /root/images/
image_path = sys.argv[1]
image_paths = [os.path.join(image_path,img) for img in os.listdir(image_path) if '.jpg' in img]
I also recommend re-examining your training function and strategy. It is also good practice to abstract your entire network with tf variable placeholders as far as you can. In addition it would be much more efficient to implement batching, as well as possibly convert your dataset to tf records.

Looping CSV Concat in Python Pandas

I have multiple folders each containing csvs. I am trying to concat the csvs in each subdirectory and then export it. At the end I would have same number of outputs as the folders. At the end I would like to have Folder1.csv, Folder2.csv, ...Folder99.csv etc. This is what
import os
from glob import glob
import pandas as pd
import numpy as np
rootDir = 'D:/Data'
OutDirectory = 'D:/OutPut'
os.chdir(rootDir)
# The directory has folders as follows
# D:/Data/Folder1
# D:/Data/Folder2
# D:/Data/Folder3
# ....
# .....
# D:/Data/Folder99
# Each folders (Folder1, Folder2,..etc.) has many csvs.
frame = pd.DataFrame()
list_ = []
for (dirname, dirs, files) in os.walk(rootDir):
for filename in files:
if filename.endswith('.csv'):
df = pd.read_csv(filename,index_col=None, na_values=['-999'], delim_whitespace= True, header = 0, skiprows = 2)
OutFile = '%s.csv' % OutputFname
list_.append(df)
frame = pd.concat(list_)
df.to_csv(OutDirectory+OutFile, sep = ',', header= True)
I am getting the following error:
IOError: File file200150101.csv does not exist
You need to concatenate dirname and filename for a full path to your files. Change this line like so:
df = pd.read_csv(os.path.join(dirname, filename) ,index_col=None, na_values=['-999'], delim_whitespace= True, header = 0, skiprows = 2)
Edit:
I don't know how pandas works because I never used it. But i think your problem is, that you defined everything you wanted to be done to the CSVs in the inner loop that loops over files only (at least the indentation looks that way - but that could also be a format problem that occured when you pasted your code here on SO).
I rewrote your code and fixed some things that I think might be the problem:
First, I renamed your variables starting with big letters because,
for me, it always looks weird to have vars with big starting letters.
I moved your list variable to the outer loop because it should be
reset every time you enter a new directory as you want all CSVs to be
merged per folder.
And finally, I fixed the indentation. In python indentation tells
the compiler which commands are in the inner or outer loop.
My code now looks like this. You might have to change some things because I can't test it right now:
import os
from glob import glob
import pandas as pd
import numpy as np
rootDir = 'D:/Data'
outDir = 'D:/OutPut'
os.chdir(rootDir)
dirs = os.listdir(rootDir)
frame = pd.DataFrame()
for dirname in dirs:
# the outer loop loops over directories! the actual directory is stored in dirname
list = [] # collect csv data for every directory, not in general
files = glob('%s/*.csv' % (dirname))
for filename in files:
# the inner loop loops over the files in the 'dirname' folder
df = pd.read_csv(filename,index_col=None, na_values=['-999'], delim_whitespace= True, header = 0, skiprows = 2)
# all csv data should be in 'list' now
outFile = '%s.csv' % dirname # define the name for output csv
list.append(df) # do that for every file
# at this point, all files in the actual directory were processed
frame = pd.concat(list_) # and then merge CSVs
# ...actually not sure how pd.concat works, but i guess it does merge the data
frame.to_csv(os.path.join(outDir, outFile), sep = ',', header= True) # save the data

Reading csv zipped files in python

I'm trying to get data from a zipped csv file. Is there a way to do this without unzipping the whole files? If not, how can I unzip the files and read them efficiently?
I used the zipfile module to import the ZIP directly to pandas dataframe.
Let's say the file name is "intfile" and it's in .zip named "THEZIPFILE":
import pandas as pd
import zipfile
zf = zipfile.ZipFile('C:/Users/Desktop/THEZIPFILE.zip')
df = pd.read_csv(zf.open('intfile.csv'))
If you aren't using Pandas it can be done entirely with the standard lib. Here is Python 3.7 code:
import csv
from io import TextIOWrapper
from zipfile import ZipFile
with ZipFile('yourfile.zip') as zf:
with zf.open('your_csv_inside_zip.csv', 'r') as infile:
reader = csv.reader(TextIOWrapper(infile, 'utf-8'))
for row in reader:
# process the CSV here
print(row)
A quick solution can be using below code!
import pandas as pd
#pandas support zip file reads
df = pd.read_csv("/path/to/file.csv.zip")
zipfile also supports the with statement.
So adding onto yaron's answer of using pandas:
with zipfile.ZipFile('file.zip') as zip:
with zip.open('file.csv') as myZip:
df = pd.read_csv(myZip)
Thought Yaron had the best answer but thought I would add a code that iterated through multiple files inside a zip folder. It will then append the results:
import os
import pandas as pd
import zipfile
curDir = os.getcwd()
zf = zipfile.ZipFile(curDir + '/targetfolder.zip')
text_files = zf.infolist()
list_ = []
print ("Uncompressing and reading data... ")
for text_file in text_files:
print(text_file.filename)
df = pd.read_csv(zf.open(text_file.filename)
# do df manipulations
list_.append(df)
df = pd.concat(list_)
Yes. You want the module 'zipfile'
You open the zip file itself with zipfile.ZipInfo([filename[, date_time]])
You can then use ZipFile.infolist() to enumerate each file within the zip, and extract it with ZipFile.open(name[, mode[, pwd]])
this is the simplest thing I always use.
import pandas as pd
df = pd.read_csv("Train.zip",compression='zip')
Supposing you are downloading a zip file that contains a CSV and you don't want to use temporary storage. Here is what a sample implementation looks like:
#!/usr/bin/env python3
from csv import DictReader
from io import TextIOWrapper, BytesIO
from zipfile import ZipFile
import requests
def all_tickers():
url = "https://simfin.com/api/bulk/bulk.php?dataset=industries&variant=null"
r = requests.get(url)
zip_ref = ZipFile(BytesIO(r.content))
for name in zip_ref.namelist():
print(name)
with zip_ref.open(name) as file_contents:
reader = DictReader(TextIOWrapper(file_contents, 'utf-8'), delimiter=';')
for item in reader:
print(item)
This takes care of all python3 bytes/str issues.
Modern Pandas since version 0.18.1 natively supports compressed csv files: its read_csv method has compression parameter : {'infer', 'gzip', 'bz2', 'zip', 'xz', None}, default 'infer'.
https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.read_csv.html
If you have a file name: my_big_file.csv and you zip it with the same name my_big_file.zip
you may simply do this:
df = pd.read_csv("my_big_file.zip")
Note: check your pandas version first (not applicable for older versions)