Transcode video using celery and ffmpeg in django - django

I would like to transcode user uploaded videos using celery. I think first I should upload the video, and spawn a celery task for transcoding.
Maybe something like this in the tasks.py:
subprocess.call('ffmpeg -i path/.../original path/.../output')
Just completed First steps with celery, so confused how to do so in the views.py and tasks.py. Also is it a good solution? I would really appreciate your help and advice. Thank you.
models.py:
class Video(models.Model):
user = models.ForeignKey(User)
title = models.CharField(max_length=100)
original = models.FileField(upload_to=get_upload_file_name)
mp4_480 = models.FileField(upload_to=get_upload_file_name, blank=True, null=True)
mp4_720 = models.FileField(upload_to=get_upload_file_name, blank=True, null=True)
privacy = models.CharField(max_length=1,choices=PRIVACY, default='F')
pub_date = models.DateTimeField(auto_now_add=True, auto_now=False)
my incomplete views.py:
#login_required
def upload_video(request):
if request.method == 'POST':
form = VideoForm(request.POST, request.FILES)
if form.is_valid():
if form.cleaned_data:
user = request.user
#
#
# No IDEA WHAT TO DO NEXT
#
#
return HttpResponseRedirect('/')
else:
form = VideoForm()
return render(request, 'upload_video.html', {
'form':form
})

I guess you already have solved the problem but I will provide a bit more information to what already said GwynBleidD because I had the same issue.
So as GwynBleidD you need to call Celery tasks, but how to code those tasks ? here is the structure :
the task get the video from the database
it encodes it with ffmepg and outputs it anywhere you want
when done with the encoding, it sets the corresponding attribute to the model and saves it (be careful, if you run various tasks on the same video, do not save with the old instance, as you may lose information from other tasks running)
First, set a FFMPEG_PATH variable in your settings, then:
import os, subprocess
from .models import Video
#app.task
def encode_mp4(video_id, height):
try:
video = Video.objects.get(id = video_id)
input_file_path = video.original.path
input_file_name = video.original.name
#get the filename (without extension)
filename = os.path.basename(input_file_path)
# path to the new file, change it according to where you want to put it
output_file_name = os.path.join('videos', 'mp4', '{}.mp4'.format(filename))
output_file_path = os.path.join(settings.MEDIA_ROOT, output_file_name)
# 2-pass encoding
for i in range(1):
subprocess.call([FFMPEG_PATH, '-i', input_file_path, '-s', '{}x{}'.format(height * 16 /9, height), '-vcodec', 'mpeg4', '-acodec', 'libvo_aacenc', '-b', '10000k', '-pass', i, '-r', '30', output_file_path])
# Save the new file in the database
video.mp4_720.name = output_file_name
video.save(update_fields=['mp4_720'])

Modify your model so you can save original (uploaded) video without transcoded version(s) and maybe add some flag into your model that will save state if video was transcoded (and based on that flag you can display to user that video transcoding is still in progress).
After uploading video and saving it's model to database, run celery task passing ID of your video into it. In celery task retrieve video from database, transcode it and save it into database with changed flag.

Related

Passing Audio Files To Celery Task

I have a music uploading app and believe that it would be smart to pass the files to a celery task to handle uploading. However, when attempting to pass the files, as I will show in my code below, I get a message stating that they are not JSON serializable. What would be the correct way to handle this operation?
Everything below uploaded_songs in .views.py is my current code that successfully uploads the audio tracks. It doesn't, however, utilize celery yet.
.task.py
from django.contrib.auth import get_user_model
from Beyond_April_Base_Backend.celery import app
from django.contrib.auth.models import User
#app.task
def upload_songs(songs, user_id):
try:
user = User.objects.get(pk=user_id)
print('user and songs')
print(user)
print(songs)
except User.DoesNotExist:
logging.warning("Tried to find non-exisiting user '%s'" % user_id)
.views.py
class ConcertUploadView(APIView):
permission_classes = [permissions.IsAuthenticated]
def post(self, request):
track_files = request.FILES.getlist('files')
current_user = self.request.user
upload_songs.delay(track_files, current_user.pk)
try:
selected_band = Band.objects.get(name=request.data['band'])
except ObjectDoesNotExist:
print('band not received from form')
selected_band = Band.objects.get(name='Band')
venue_name = request.data['venue']
concert_date_str = request.data['concertDate']
concert_date_split = concert_date_str.split('(')[0]
concert_date = datetime.strptime(concert_date_split, '%a %b %d %Y %H:%M:%S %Z%z ')
concert_city = request.data['city']
concert_state = request.data['state']
concert_country = request.data['country']
new_concert = Concert(
venue=venue_name,
date=concert_date,
city=concert_city,
state=concert_state,
country=concert_country,
band=selected_band,
user=current_user,
)
new_concert.save()
i = 0
for song in track_files:
audio_metadata = music_tag.load_file(track_files[i].temporary_file_path())
temp_path = song.temporary_file_path
song_title = str(audio_metadata['title'])
audio_file_instance = Song(
title=song_title,
concert=new_concert,
user=current_user,
concert_order = i + 1,
audio_file = track_files[i],
)
audio_file_instance.save()
i += 1
return Response(status=status.HTTP_201_CREATED)
When you create a celery task, it serializes the arguments so that it can store the message in the queue backend (RabbitMQ, Redis, etc). The default serializer is JSON, and a binary file is not JSON-serializable. See celery's serialization docs for more info.
You could base64 encode the binary file to text, but you shouldn't: it will increase the size of the data, and you'll be passing around potentially very large messages. With lots of large messages, you could run out of memory/space in your backend, and it will make it hard to inspect or log messages.
Instead, you should store the binary file somewhere, and pass a reference (filename, S3 URL, database key, etc) to the task. The task can then load the file, do what it needs to, and delete the original (if appropriate).

How to classify an image from Azure Storage in Django using a Tensorflow model

I am developing a django application where the user chooses a machine learning model from a drop down list and uploads an image for classification. This image was initially saved in the project directory (bad, I know) so that I can use it in the classification.
Now I save these images in Azure Storage, but at the moment I can't find a way to access them without having to save them locally to classify them, so I think I'll have to temporarily save them in the project directory and once I use them in the ml model, then I remove the images.
I would like to deploy this application to Azure web service, so I consider it is a bad idea to save and delete images in the project directory.
You can see app's form here
models.py
image is saved in Azure Storage, the other fields in Azure Database for PostgreSQL.
class UploadedImage(models.Model):
image = models.ImageField(upload_to='%Y/%m/%d/')
uploaded = models.DateTimeField(auto_now_add=False, auto_now=True)
title = models.CharField(max_length=50)
prediction = models.FloatField(null=True, blank=True)
def __str__(self):
return self.title
forms.py
class UploadImageForm(forms.ModelForm):
EXTRA_CHOICES = [
('MOB', 'MobileNetV2'),
('VGG', 'VGG-19'),
('CNN', 'CNN 3BI'),
]
predicted_with = forms.ChoiceField(label="Modelo Predictivo",
choices=EXTRA_CHOICES, required=True,
widget=forms.Select(attrs={'class': 'form-control'})
)
class Meta:
model = UploadedImage
fields = [
'image',
]
widgets = {
'image': forms.FileInput(attrs={'class':'custom-file-input'}),
}
views.py
def make_prediction(image_to_predict, model='MOB'):
tf.keras.backend.reset_uids()
folders = {'VGG': 'vgg', 'MOB': 'mobilenet', 'CNN': 'cnn3', 'MSG': 'mobile_sin_gpu'}
model_as_json = 'upload_images/model/%s/modelo.json' % (folders[model])
weights = 'upload_images/model/%s/modelo.h5' % (folders[model])
json_file = open(model_as_json, 'r')
loaded_json_model = json_file.read()
json_file.close()
model = tf.keras.models.model_from_json(loaded_json_model)
model.load_weights(weights)
image = [image_to_predict]
data = img_preprocessing.create_data_batches(image)
return model.predict(data)
def upload_image_view(request):
if request.method == 'POST':
form = forms.UploadImageForm(request.POST, request.FILES)
if form.is_valid():
m = form.save(commit=False)
try:
pred = make_prediction(m.image.path, form.cleaned_data['predicted_with'])[0][0]
if pred > 0.5:
# Code continue...
if status == 200:
m.prediction = pred
m.title = m.image.path
m.save()
# Code continue...
The above snippet worked when I initially saved the images in the project directory but when I started saving the images in Azure Storage I started getting this error:
This backend doesn't support absolute paths.
So I changed the following line: pred = make_prediction(m.image.name, form.cleaned_data['predicted_with'])[0][0]
However now I have this error: NewRandomAccessFile failed to Create/Open: image-100.png : The system cannot find the file specified. ; No such file or directory [[{{node ReadFile}}]] [[IteratorGetNext]] [Op:__inference_predict_function_845] Function call stack: predict_function
For this reason I think my solution would be to temporarily save the image in the project directory, use it in the model and then delete it, however, I do not think it is ideal.
What approach is appropriate to follow in this case?

Populate model with metadata of file uploaded through django admin

I have two models,Foto and FotoMetadata. Foto just has one property called upload, that is an upload field. FotoMetadata has a few properties and should receive metadata from the foto uploaded at Foto. This can be done manually at the admin interface, but I want to do it automatically, i.e: when a photo is uploaded through admin interface, the FotoMetadata is automatically filled.
In my model.py I have a few classes, including Foto and FotoMetadata:
class Foto(models.Model):
upload = models.FileField(upload_to="fotos")
def __str__(self):
return '%s' %(self.upload)
class FotoMetadata(models.Model):
image_formats = (
('RAW', 'RAW'),
('JPG', 'JPG'),
)
date = models.DateTimeField()
camera = models.ForeignKey(Camera, on_delete=models.PROTECT)
format = models.CharField(max_length=8, choices=image_formats)
exposure = models.CharField(max_length=8)
fnumber = models.CharField(max_length=8)
iso = models.IntegerField()
foto = models.OneToOneField(
Foto,
on_delete=models.CASCADE,
primary_key=True,
)
When I login at the admin site, I have an upload form related to the Foto, and this is working fine. My problem is that I can't insert metadata at FotoMetadata on the go. I made a function that parse the photo and give me a dictionary with the info I need. This function is called GetExif is at a file called getexif.py. This will be a simplified version of it:
def GetExif(foto):
# Open image file for reading (binary mode)
f = open(foto, 'rb')
# Parse file
...
<parsing code>
...
f.close()
#create dictionary to receive data
meta={}
meta['date'] = str(tags['EXIF DateTimeOriginal'].values)
meta['fnumber'] = str(tags['EXIF FNumber'])
meta['exposure'] = str(tags['EXIF ExposureTime'])
meta['iso'] = str(tags['EXIF ISOSpeedRatings'])
meta['camera'] =str( tags['Image Model'].values)
return meta
So, basically, what I'm trying to do is use this function at admin.py to automatically populate the FotoMetadata when uploading a photo at Foto, but I really couldn't figure out how to make it. Does any one have a clue?
Edit 24/03/2016
Ok, after a lot more failures, I'm trying to use save_model in admin.py:
from django.contrib import admin
from .models import Autor, Camera, Lente, Foto, FotoMetadata
from fotomanager.local.getexif import GetExif
admin.site.register(Autor)
admin.site.register(Camera)
admin.site.register(Lente)
admin.site.register(FotoMetadata)
class FotoAdmin(admin.ModelAdmin):
def save_model(self, request, obj, form, change):
# populate the model
obj.save()
# get metadata
metadados = GetExif(obj.upload.url)
# Create instance of FotoMetadata
fotometa = FotoMetadata()
# FotoMetadata.id = Foto.id
fotometa.foto = obj.pk
# save exposure
fotometa.exposure = metadados['exposure']
admin.site.register(Foto, FotoAdmin)
I thought it would work, or that I will have problems saving data to the model, but actually I got stucked before this. I got this error:
Exception Type: FileNotFoundError
Exception Value:
[Errno 2] No such file or directory: 'http://127.0.0.1:8000/media/fotos/IMG_8628.CR2'
Exception Location: /home/ricardo/Desenvolvimento/fotosite/fotomanager/local/getexif.py in GetExif, line 24
My GetExif function can't read the file, however, the file path is right! If I copy and paste it to my browser, it downloads the file. I'm trying to figure out a way to correct the address, or to pass the internal path, or to pass the real file to the function instead of its path. I'm also thinking about a diferent way to access the file at GetExif() function too. Any idea of how to solve it?
Solution
I solved the problem above! By reading the FileField source, I've found a property called path, which solve the problem. I also made a few other modifications and the code is working. The class FotoAdmin, at admin.py is like this now:
class FotoAdmin(admin.ModelAdmin):
def save_model(self, request, obj, form, change):
# populate the model
obj.save()
# get metadata
metadados = GetExif(obj.upload.path)
# Create instance of FotoMetadata
fotometa = FotoMetadata()
# FotoMetadata.id = Foto.id
fotometa.foto = obj
# set and save exposure
fotometa.exposure = metadados['exposure']
fotometa.save()
I also had to set null=True at some properties in models.py and everything is working as it should.
I guess you want to enable post_save a signal
read : django signals
Activate the post_save signal - so after you save a FOTO you have a hook to do other stuff, in your case parse photometa and create a FotoMetadata instance.
More, if you want to save the foto only if fotometa succeed , or any other condition you may use , pre_save signal and save the foto only after meta foto was saved.

upload images from URL to easy_thumbnails field

I want to upload many images from URLs while I create objects with a script.
#models.py
class Widget(TimeStampedModel):
name = CharField ... etc, etc
pic = ThumbnailerImageField(_('Widget Pic'),
upload_to='widget/pic/',
help_text = _('Please submit your picture here.'),
null=True, blank=True)
so I thought of using the save method in that class to download and save the images. So my script creates the Widget objects and saves the image url, and then the save method tries to download and save the image. My save method so far is:
def save(self, *args, **kwargs):
if self.pic:
if self.pic.name.startswith( 'http://') and self.pic.name.endswith(('.png', '.gif', '.jpg', '.jpeg', '.svg')):
my_temp_pic = open('test.image', 'w')
my_temp_pic.write(urllib2.urlopen(self.pic.name).read())
my_temp_pic.close()
my_temp_pic = open('test.image')
thumbnailer = get_thumbnailer(my_temp_pic, relative_name = self.slug+'.'+self.pic.name.split('.')[-1])
self.pic = thumbnailer.get_thumbnail({'size': (200, 0), 'crop': False})
super(Widget, self).save(*args, **kwargs)
I've tried to open the file in different ways with .read() or .open() ... but the only way I found (above) feels quite hackish (save some temp file with the image, re-open, then save). Is there a better way? I'm I missing a straightforward way to do this?
Save the temporary file is the only solution I know too. Check this: http://djangosnippets.org/snippets/1890/
So basically you don't need to do hackish like close() and open() again. You can do:
from django.core.files import File
from django.core.files.temp import NamedTemporaryFile
# ... your code here ...
my_temp_pic = NamedTemporaryFile(delete=True)
my_temp_pic.write(urllib2.urlopen(self.pic.name).read())
my_temp_pic.flush()
relative_name = '%s.%s' % (self.slug, self.pic.name.split('.')[-1])
thumbnailer = get_thumbnailer(my_temp_pic, relative_name=relative_name)
# ... your code again ...
Hope it helps.

Processing file uploads before object is saved

I've got a model like this:
class Talk(BaseModel):
title = models.CharField(max_length=200)
mp3 = models.FileField(upload_to = u'talks/', max_length=200)
seconds = models.IntegerField(blank = True, null = True)
I want to validate before saving that the uploaded file is an MP3, like this:
def is_mp3(path_to_file):
from mutagen.mp3 import MP3
audio = MP3(path_to_file)
return not audio.info.sketchy
Once I'm sure I've got an MP3, I want to save the length of the talk in the seconds attribute, like this:
audio = MP3(path_to_file)
self.seconds = audio.info.length
The problem is, before saving, the uploaded file doesn't have a path (see this ticket, closed as wontfix), so I can't process the MP3.
I'd like to raise a nice validation error so that ModelForms can display a helpful error ("You idiot, you didn't upload an MP3" or something).
Any idea how I can go about accessing the file before it's saved?
p.s. If anyone knows a better way of validating files are MP3s I'm all ears - I also want to be able to mess around with ID3 data (set the artist, album, title and probably album art, so I need it to be processable by mutagen).
You can access the file data in request.FILES while in your view.
I think that best way is to bind uploaded files to a form, override the forms clean method, get the UploadedFile object from cleaned_data, validate it anyway you like, then override the save method and populate your models instance with information about the file and then save it.
a cleaner way to get the file before be saved is like this:
from django.core.exceptions import ValidationError
#this go in your class Model
def clean(self):
try:
f = self.mp3.file #the file in Memory
except ValueError:
raise ValidationError("A File is needed")
f.__class__ #this prints <class 'django.core.files.uploadedfile.InMemoryUploadedFile'>
processfile(f)
and if we need a path, ther answer is in this other question
You could follow the technique used by ImageField where it validates the file header and then seeks back to the start of the file.
class ImageField(FileField):
# ...
def to_python(self, data):
f = super(ImageField, self).to_python(data)
# ...
# We need to get a file object for Pillow. We might have a path or we might
# have to read the data into memory.
if hasattr(data, 'temporary_file_path'):
file = data.temporary_file_path()
else:
if hasattr(data, 'read'):
file = BytesIO(data.read())
else:
file = BytesIO(data['content'])
try:
# ...
except Exception:
# Pillow doesn't recognize it as an image.
six.reraise(ValidationError, ValidationError(
self.error_messages['invalid_image'],
code='invalid_image',
), sys.exc_info()[2])
if hasattr(f, 'seek') and callable(f.seek):
f.seek(0)
return f