converting tiff to jpeg in python - python-2.7

Can anyone help me to read .tiff image and convert into jpeg format?
from PIL import Image
im = Image.open('test.tiff')
im.save('test.jpeg')
The above code was not working.

I have successfully solved the issue. I posted the code to read the tiff files in a folder and convert into jpeg automatically.
import os
from PIL import Image
yourpath = os.getcwd()
for root, dirs, files in os.walk(yourpath, topdown=False):
for name in files:
print(os.path.join(root, name))
if os.path.splitext(os.path.join(root, name))[1].lower() == ".tiff":
if os.path.isfile(os.path.splitext(os.path.join(root, name))[0] + ".jpg"):
print "A jpeg file already exists for %s" % name
# If a jpeg is *NOT* present, create one from the tiff.
else:
outfile = os.path.splitext(os.path.join(root, name))[0] + ".jpg"
try:
im = Image.open(os.path.join(root, name))
print "Generating jpeg for %s" % name
im.thumbnail(im.size)
im.save(outfile, "JPEG", quality=100)
except Exception, e:
print e

import os, sys
from PIL import Image
I tried to save directly to jpeg but the error indicated that the mode was P and uncompatible with JPEG format so you have to convert it to RGB mode as follow.
for infile in os.listdir("./"):
print "file : " + infile
if infile[-3:] == "tif" or infile[-3:] == "bmp" :
# print "is tif or bmp"
outfile = infile[:-3] + "jpeg"
im = Image.open(infile)
print "new filename : " + outfile
out = im.convert("RGB")
out.save(outfile, "JPEG", quality=90)

This can be solved with the help of OpenCV. It worked for me.
OpenCV version == 4.3.0
import cv2, os
base_path = "data/images/"
new_path = "data/ims/"
for infile in os.listdir(base_path):
print ("file : " + infile)
read = cv2.imread(base_path + infile)
outfile = infile.split('.')[0] + '.jpg'
cv2.imwrite(new_path+outfile,read,[int(cv2.IMWRITE_JPEG_QUALITY), 200])

I believe all the answers are not complete
TIFF image format is a container for various formats. It can contain BMP, TIFF noncompressed, LZW compressions, Zip compressions and some others, among them JPG etc.
image.read (from PIL) opens these files but cant't do anything with them. At least you can find out that it is a TIFF file (inside, not only by its name). Then one can use
pytiff.Tiff (from pytiff package). For some reasons, when tiff has JPG compression (probably, some others too) it cannot encode the correct information.
Something is rotten in the state of Denmark (C)
P.S. One can convert file with help of Paint (in old windows Paint Brush (Something is rotten in this state too) or Photoshop - any version. Then it can be opened from PythonI'm looking for simple exe which can do it, the call it from python. Probably Bulk Image Converter will do

I liked the solution suggested in this answer: https://stackoverflow.com/a/28872806/12808155
But checking for tiff in my opinion is not entirely correct, since there may be situations when the extension .tif does not define the file format: for example, when indexing, macOS creates hidden files ( ._DSC_123.tif).
For a more universal solution, I suggest using the python-magic library (https://pypi.org/project/python-magic)
The code for checking for tiff format may look like this:
import magic
def check_is_tif(filepath: str) -> bool:
allowed_types = [
'image/tiff',
'image/tif'
]
if magic.from_file(filepath, mime=True) not in allowed_types:
return False
return True
Complete code may looks like this:
import argparse
import os
import magic
from PIL import Image
from tqdm import tqdm
def check_is_tif(filepath: str) -> bool:
allowed_types = [
'image/tiff',
'image/tif'
]
if magic.from_file(filepath, mime=True) not in allowed_types:
return False
return True
def count_total(path: str) -> int:
print('Please wait till total files are counted...')
result = 0
for root, _, files in os.walk(path):
for name in files:
if check_is_tif(os.path.join(root, name)) is True:
result += 1
return result
def convert(path) -> None:
progress = tqdm(total=count_total(path))
for root, _, files in os.walk(path):
for name in files:
if check_is_tif(os.path.join(root, name)) is True:
file_path = os.path.join(root, name)
outfile = os.path.splitext(file_path)[0] + ".jpg"
try:
im = Image.open(file_path)
im.thumbnail(im.size)
im.save(outfile, "JPEG", quality=80)
os.unlink(file_path)
except Exception as e:
print(e)
progress.update()
if __name__ == '__main__':
parser = argparse.ArgumentParser(description='Recursive TIFF to JPEG converter')
parser.add_argument('path', type=str, help='Path do directory with TIFF files')
args = parser.parse_args()
convert(args.path)

Related

Reading multiple files in a directory with pyyaml

I'm trying to read all yaml files in a directory, but I am having trouble. First, because I am using Python 2.7 (and I cannot change to 3) and all of my files are utf-8 (and I also need them to keep this way).
import os
import yaml
import codecs
def yaml_reader(filepath):
with codecs.open(filepath, "r", encoding='utf-8') as file_descriptor:
data = yaml.load_all(file_descriptor)
return data
def yaml_dump(filepath, data):
with open(filepath, 'w') as file_descriptor:
yaml.dump(data, file_descriptor)
if __name__ == "__main__":
filepath = os.listdir(os.getcwd())
data = yaml_reader(filepath)
print data
When I run this code, python gives me the message:
TypeError: coercing to Unicode: need string or buffer, list found.
I want this program to show the content of the files. Can anyone help me?
I guess the issue is with filepath.
os.listdir(os.getcwd()) returns the list of all the files in the directory. so you are passing the list to codecs.open() instead of filename
There are multiple problems with your code, apart from that it is invalide Python, in the way you formatted this.
def yaml_reader(filepath):
with codecs.open(filepath, "r", encoding='utf-8') as file_descriptor:
data = yaml.load_all(file_descriptor)
return data
however it is not necessary to do the decoding, PyYAML is perfectly capable of processing UTF-8:
def yaml_reader(filepath):
with open(filepath, "rb") as file_descriptor:
data = yaml.load_all(file_descriptor)
return data
I hope you realise your trying to load multiple documents and always get a list as a result in data even if your file contains one document.
Then the line:
filepath = os.listdir(os.getcwd())
gives you a list of files, so you need to do:
filepath = os.listdir(os.getcwd())[0]
or decide in some other way, which of the files you want to open. If you want to combine all files (assuming they are YAML) in one big YAML file, you need to do:
if __name__ == "__main__":
data = []
for filepath in os.listdir(os.getcwd()):
data.extend(yaml_reader(filepath))
print data
And your dump routine would need to change to:
def yaml_dump(filepath, data):
with open(filepath, 'wb') as file_descriptor:
yaml.dump(data, file_descriptor, allow_unicode=True, encoding='utf-8')
However this all brings you to the biggest problem: that you are using PyYAML, that will mangle your YAML, dropping flow-style, comment, anchor names, special int/float, quotes around scalars etc. Apart from that PyYAML has not been updated to support YAML 1.2 documents (which has been the standard since 2009). I recommend you switch to using ruamel.yaml (disclaimer: I am the author of that package), which supports YAML 1.2 and leaves comments etc in place.
And even if you are bound to use Python 2, you should use the Python 3 like syntax e.g. for print that you can get with from __future__ imports.
So I recommend you do:
pip install pathlib2 ruamel.yaml
and then use:
from __future__ import absolute_import, unicode_literals, print_function
from pathlib import Path
from ruamel.yaml import YAML
if __name__ == "__main__":
data = []
yaml = YAML()
yaml.preserve_quotes = True
for filepath in Path('.').glob('*.yaml'):
data.extend(yaml.load_all(filepath))
print(data)
yaml.dump(data, Path('your_output.yaml'))

Creating HDF5 format for image segmentation task

I started writing a python code for creating HDF5 for image segmentation tasks. I used the code in this link and the link provided by Shai. my images are one channel and in .mat format. I have written the following code, I only want to check with experts whether this code is correct or not. Could experts please have a look? Thanks
import os, h5py
import caffe
import numpy as np
import scipy
import scipy.io as sio
from array import array
import cv2
import matplotlib.pyplot as plt
caffe_root='/home/ss/caffe/'
import sys
sys.path.insert(0,caffe_root+'python')
def img_to_hdf5(paths_src_file,paths_lbl_file,path_dst,msg):
"""
paths_src_file : path to the image paths in a txt file
paths_lbl_file : path to the image paths in a txt file
path_dst = path to the hdf5 file
"""
print(msg)
arrays = {}
SIZE=256 #fixed size of all images
#read the lines of img and lbl path from text file and save into paths_src and paths_lbl
paths_src = []
with open(paths_src_file) as f:
for line in f.readlines():
line = line.strip('\n')
paths_src.append(line)
paths_lbl=[]
with open(paths_lbl_file) as f:
for line in f.readlines():
line=line.strip('\n')
paths_lbl.append(line)
data = np.zeros( (len(paths_src), 1, SIZE, SIZE), dtype='f4' ) #1 channel grayscale image
label = np.zeros( (len(paths_lbl), 1, SIZE, SIZE), dtype='f4' ) #1 channel label image
for in_idx, in_ in enumerate(paths_src):
print in_idx,in_
f=h5py.File(in_,'r')
mat=f['image'].value
im=np.array(mat,dtype=np.float32)
#im = cv2.cvtColor(im,cv2.COLOR_GRAY2RGB)
#im = im[:,:,::-1] #switch from RGB to BGR
im = im.reshape(im.shape[0], im.shape[1], 1)
im = im.transpose((2,0,1)) # convert to CxHxW
data[in_idx]=im
for in_idx, in_ in enumerate(paths_lbl):
print in_idx,in_
f=h5py.File(in_,'r')
mat=f['image'].value
im=np.array(mat,dtype=np.float32)
#im = cv2.cvtColor(im,cv2.COLOR_GRAY2RGB)
#im = im[:,:,::-1] #switch from RGB to BGR
im = im.reshape(im.shape[0], im.shape[1], 1)
im = im.transpose((2,0,1)) # convert to CxHxW
label[in_idx]=im
h5_train = os.path.join(path_dst, 'train_data.h5')
with h5py.File(h5_train,'w') as H:
H.create_dataset( 'data', data=data ) # note the name X given to the dataset!
H.create_dataset( 'label', data=label ) # note the name y given to the dataset!
text_train = os.path.join(path_dst, 'train-path.txt')
with open(text_train,'w') as L:
L.write(h5_train) # list all h5 files you are going to use
train_img_paths = './train_img.txt' #text file of paths to images
train_label_paths = './train_label.txt' #text file of paths to label images (ground truth)
train_img_hdf5 = '/home/ss/workspace/create_hdf5/' # Your path to h5 file
st='Creating Training Data HDF5 File .....'
img_to_hdf5(train_img_paths, train_label_paths,train_img_hdf5,st)
print('DONE...')

How to get python to read all images in a directory one by one

My experience with python is very limited so I don't fully understand what the code does in this instance. This is part of the code for poets lab from the tensorflow framework.
import os, sys
import tensorflow as tf
import sys
import numpy as np
from PIL import Image
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'
# change this as you see fit
image_path = sys.argv[1]
# Read in the image_data
image_data = tf.gfile.FastGFile(image_path, 'rb').read()
image = Image.open(image_path)
image_array = image.convert('RGB')
# Loads label file, strips off carriage return
label_lines = [line.rstrip() for line
in tf.gfile.GFile("retrained_labels.txt")]
# Unpersists graph from file
with tf.gfile.FastGFile("retrained_graph.pb", 'rb') as f:
graph_def = tf.GraphDef()
graph_def.ParseFromString(f.read())
_ = tf.import_graph_def(graph_def, name='')
with tf.Session() as sess:
# Feed the image_data as input to the graph and get first prediction
softmax_tensor = sess.graph.get_tensor_by_name('final_result:0')
predictions = sess.run(softmax_tensor,{'DecodeJpeg:0': image_array})
# Sort to show labels of first prediction in order of confidence
top_k = predictions[0].argsort()[-len(predictions[0]):][::-1]
for node_id in top_k:
human_string = label_lines[node_id]
score = predictions[0][node_id]
print('%s (score = %.5f)' % (human_string, score))
filename = "results.txt"
with open(filename, 'a+') as f:
f.write('\n**%s**\n' % (image_path))
for node_id in top_k:
human_string = label_lines[node_id]
score = predictions[0][node_id]
f.write('%s (score = %.5f)\n' % (human_string, score))
I want the above code to read in a directory instead of a single image and then process them all and output the scores to the results.txt file.
Currently I can call this like so:
python this_file.py /root/images/1.jpg
How would I get this code to take the following input and processes it
python this_file.py /root/images/
Use os.listdir to list all files in the directory. Qualify it with a filter as well. Join the resulting files to their directory. Read them from the list with a for loop.
python this_file.py /root/images/
image_path = sys.argv[1]
image_paths = [os.path.join(image_path,img) for img in os.listdir(image_path) if '.jpg' in img]
I also recommend re-examining your training function and strategy. It is also good practice to abstract your entire network with tf variable placeholders as far as you can. In addition it would be much more efficient to implement batching, as well as possibly convert your dataset to tf records.

Why OleFileIO_PL only works with .doc file types and not .docx Python?

right so I'm working on a Python script (Python 2.7) that will extract the metadata from OLE files. I am using OleFileIO_PL and it work perfectly file with OLE files 97 - 2003, but any later then that it just says that it is not an OLE2 file type.
Any way I can modify my code to support both .doc and .docx ? Same with .ppt and .pptx etc.
Thank you in advance
Source Code:
#!/usr/bin/env python
# -*- coding: utf-8 -*-
import OleFileIO_PL
import StringIO
import optparse
import sys
import os
def printMetadata(fileName):
data = open(fileName, 'rb').read()
f = StringIO.StringIO(data)
OLEFile = OleFileIO_PL.OleFileIO(f)
meta = OLEFile.get_metadata()
print('Author:', meta.author)
print('Title:', meta.title)
print('Creation date:', meta.create_time)
meta.dump()
OLEFile.close()
def main():
parser = optparse.OptionParser('usage = -F + Name of the OLE file with the extention For example: python Ms Office Metadata Extraction Script.py -F myfile.docx ')
parser.add_option('-F', dest='fileName', type='string',\
help='specify OLE (MS Office) file name')
(options, args) = parser.parse_args()
fileName = options.fileName
if fileName == None:
print parser.usage
exit(0)
else:
printMetadata(fileName)
if __name__ == '__main__':
main()
To answer your question, this is because the newer MS Office 2007+ files (docx, xlsx, xlsb, pptx, etc) have a completely different structure from the legacy MS Office 97-2003 formats.
It is mainly a collection of XML files within a Zip archive. So with a little bit of work, you can extract everything you need using zipfile and ElementTree from the standard library.
If openxmllib does not work for you, you may try other solutions:
officedissector: https://www.officedissector.com/
python-opc: https://pypi.python.org/pypi/python-opc
openpack: https://pypi.python.org/pypi/openpack
paradocx: https://pypi.python.org/pypi/paradocx
BTW, OleFileIO_PL has been renamed to olefile, and the new project page is https://github.com/decalage2/olefile

Saving a stream while playing it using LibVLC

Using LibVLC, I'm trying to save a stream while playing it. This is the python code:
import os
import sys
import vlc
if __name__ == '__main__':
filepath = <either-some-url-or-local-path>
movie = os.path.expanduser(filepath)
if 'http://' not in filepath:
if not os.access(movie, os.R_OK):
print ( 'Error: %s file is not readable' % movie )
sys.exit(1)
instance = vlc.Instance("--sub-source marq --sout=file/ps:example.mpg")
try:
media = instance.media_new(movie)
except NameError:
print ('NameError: % (%s vs Libvlc %s)' % (sys.exc_info()[1],
vlc.__version__, vlc.libvlc_get_version()))
sys.exit(1)
player = instance.media_player_new()
player.set_media(media)
player.play()
#dont exit!
while(1):
continue
It saves the video stream to a file example.mpg. As per this doc, the command to save a stream is this :
--sout=file/ps:example.mpg
which I've using when creating an instance of vlc.Instance:
instance = vlc.Instance("--sub-source marq --sout=file/ps:example.mpg")
But the problem is that it only saves the stream, it doesn't play the stream simultaneously.
Is there any way (in LibVLC) I can save the stream (to a local file) while paying it?
Although, I'm looking for a solution in Python 3.3.1 but it is fine if there is any C or C++ solution.
I've created a similar, but not duplicate, topic yesterday.
Idea:
The basic idea is simple enough. You have to duplicate the output stream and redirect it to a file. This is done, as Maresh correctly pointed out, using the sout=#duplicate{...} directive.
Working Solution:
The following solution works on my machine ™. I've tested it on Ubuntu 12.10 with VLC v2.0.3 (TwoFlower) and Python 2.7.1. I think it should also work on Python 3 since most of the heavy lifting is done by libVlc anyway.
import os
import sys
import vlc
if __name__ == '__main__':
#filepath = <either-some-url-or-local-path>
movie = os.path.expanduser(filepath)
if 'http://' not in filepath:
if not os.access(movie, os.R_OK):
print ( 'Error: %s file is not readable' % movie )
sys.exit(1)
instance = vlc.Instance("--sout=#duplicate{dst=file{dst=example.mpg},dst=display}")
try:
media = instance.media_new(movie)
except NameError:
print ('NameError: % (%s vs Libvlc %s)' % (sys.exc_info()[1],
vlc.__version__, vlc.libvlc_get_version()))
sys.exit(1)
player = instance.media_player_new()
player.set_media(media)
player.play()
#dont exit!
while(1):
continue
Helpful Links
The Command-Line help was essential to decipher the plethora of VLCs
command line options.
Chapter 3 of VLC streaming HowTo. Explains the structure of the stream output, its directives and describes of the various available modules. Chapter 4 shows some examples.
LibVLC API documentation in case you want to change media option at
runtime
Update - Saving YouTube videos:
The above code doesn't play nice with YouTube. I searched around and discovered that an additional transcode directive can be used to convert YouTube's video stream to a regular video format. I used #transcode{vcodec=mp4v,acodec=mpga,vb=800,ab=128,deinterlace}
vcodec=mp4v is the video format you want to encode in (mp4v is MPEG-4, mpgv is MPEG-1, and there is also h263, DIV1, DIV2, DIV3, I420, I422, I444, RV24, YUY2).
acodec=mpga is the audio format you want to encode in (mpga is MPEG audio layer 2, a52 is A52 i.e. AC3 sound).
vb=800 is the video bitrate in Kbit/s.
ab=128 is the audio bitrate in Kbit/s.
deinterlace tells VLC to deinterlace the video on the fly.
The updated code looks like this:
import os
import sys
import vlc
if __name__ == '__main__':
#filepath = <either-some-url-or-local-path>
filepath = "http://r1---sn-nfpnnjvh-1gil.c.youtube.com/videoplayback?source=youtube&newshard=yes&fexp=936100%2C906397%2C928201%2C929117%2C929123%2C929121%2C929915%2C929906%2C929907%2C929125%2C929127%2C925714%2C929917%2C929919%2C912512%2C912515%2C912521%2C906838%2C904485%2C906840%2C931913%2C904830%2C919373%2C933701%2C904122%2C932216%2C936303%2C909421%2C912711%2C907228%2C935000&sver=3&expire=1373237257&mt=1373214031&mv=m&ratebypass=yes&id=1907b7271247a714&ms=au&ipbits=48&sparams=cp%2Cid%2Cip%2Cipbits%2Citag%2Cratebypass%2Csource%2Cupn%2Cexpire&itag=45&key=yt1&ip=2a02%3A120b%3Ac3c6%3A7190%3A6823%3Af2d%3A732c%3A3577&upn=z3zzcrvPC0U&cp=U0hWSFJOVV9KUUNONl9KSFlDOmt4Y3dEWFo3dDFu&signature=D6049FD7CD5FBD2CC6CD4D60411EE492AA0E9A77.5D0562CCF4E10A6CC53B62AAFFF6CB3BB0BA91C0"
movie = os.path.expanduser(filepath)
savedcopy = "yt-stream.mpg"
if 'http://' not in filepath:
if not os.access(movie, os.R_OK):
print ( 'Error: %s file is not readable' % movie )
sys.exit(1)
instance = vlc.Instance("--sout=#transcode{vcodec=mp4v,acodec=mpga,vb=800,ab=128,deinterlace}:duplicate{dst=file{dst=%s},dst=display}" % savedcopy)
try:
media = instance.media_new(movie)
except NameError:
print ('NameError: % (%s vs Libvlc %s)' % (sys.exc_info()[1],
vlc.__version__, vlc.libvlc_get_version()))
sys.exit(1)
player = instance.media_player_new()
player.set_media(media)
player.play()
#dont exit!
while(1):
continue
A couple of important points:
I've used MPEG audio and video codecs in the transcode directive. It seems to be important to use a matching extensions for the output file (mpg in this case). Otherwise VLC gets confused when opening the saved file for playback. Keep that in mind if you decide to switch to another video format.
You cannot add a regular YouTube URL as filepath. Instead you have to specify the location of the video itself. That's the reason why the filepath that I've used looks so cryptic. That filepath corresponds to video at http://www.youtube.com/watch?v=GQe3JxJHpxQ. VLC itself is able to extract the video location from a given YouTube URL, but libVLC doesn't do that out of the box. You'll have to write your own resolver to do that. See this related SO question. I followed this approach to manually resolve the video location for my tests.
I think you need to duplicate the output in order to play and record it at the same time:
vlc.Instance("--sub-source marq --sout=#stream_out_duplicate{dst=display,dst=std{access=file,mux=ts,dst=/path/file.mpg}}")
or
libvlc_media_add_option(media, ":sout=#stream_out_duplicate{dst=display,dst=std{access=file,mux=ts,dst=/path/file.mpg}}")
Did you try adding to the list of options the following option?
--sout-display
i.e.
instance = vlc.Instance("--sub-source marq --sout=file/ps:example.mpg --sout-display")
Some time ago in a sample code in the active state website i saw someone played and recorded a MP3 file using VLC using the vlc.py module. You can take a look at it's sample code to see how to duplicate a stream. I copied th code here for you (I copied it from http://code.activestate.com/recipes/577802-using-vlcpy-to-record-an-mp3-and-save-a-cue-file/):
import vlc
import time
import os
def new_filename(ext = '.mp3'):
"find a free filename in 00000000..99999999"
D = set(x[:8] for x in os.listdir('.')
if (x.endswith(ext) or x.endswith('.cue')) and len(x) == 12)
for i in xrange(10**8):
s = "%08i" %i
if s not in D:
return s
def initialize_cue_file(name,instream,audiofile):
"create a cue file and write some data, then return it"
cueout = '%s.cue' %name
outf = file(cueout,'w')
outf.write('PERFORMER "%s"\n' %instream)
outf.write('TITLE "%s"\n' %name)
outf.write('FILE "%s" WAVE\n' %audiofile)
outf.flush()
return outf
def initialize_player(instream, audiofile):
"initialize a vlc player which plays locally and saves to an mp3file"
inst = vlc.Instance()
p = inst.media_player_new()
cmd1 = "sout=#duplicate{dst=file{dst=%s},dst=display}" %audiofile
cmd2 ="no-sout-rtp-sap"
cmd3 = "no-sout-standard-sap"
cmd4 ="sout-keep"
med=inst.media_new(instream,cmd1,cmd2,cmd3,cmd4)
med.get_mrl()
p.set_media(med)
return p, med
def write_track_meta_to_cuefile(outf,instream,idx,meta,millisecs):
"write the next track info to the cue file"
outf.write(' TRACK %02i AUDIO\n' %idx)
outf.write(' TITLE "%s"\n' %meta)
outf.write(' PERFORMER "%s"\n' %instream)
m = millisecs // 60000
s = (millisecs - (m*60000)) // 1000
hs = (millisecs - (m*60000) - (s*1000)) //10
ts = '%02i:%02i:%02i' %(m,s,hs)
outf.write(' INDEX 01 %s\n' %ts)
outf.flush()
def test():
#some online audio stream for which this currently works ....
instream = 'http://streamer-mtc-aa05.somafm.com:80/stream/1018'
#if the output filename ends with mp3 vlc knows which mux to use
ext = '.mp3'
name = new_filename(ext)
audiofile = '%s%s' %(name,ext)
outf = initialize_cue_file(name,instream,audiofile)
p,med = initialize_player(instream, audiofile)
p.play()
np = None
i = 0
while 1:
time.sleep(.1)
new = med.get_meta(12)
if new != np:
i +=1
t = p.get_time()
print "millisecs: %i" %t
write_track_meta_to_cuefile(outf,instream,i,new,t)
np = new
print "now playing: %s" %np
if __name__=='__main__':
test()
Perhaps you need to clone your output, as suggested on the forum?