I want to edit an uploaded file on byte level (i.e. searching and removing a certain byte sequence) before saving it.
I have a pre_save signal set up in the following way:
class Snippet(models.Model):
name = models.CharField(max_length=256, unique=True)
audio_file = models.FileField(upload_to=generate_file_name, blank=True, null=True)
#receiver(models.signals.pre_save, sender=Snippet)
def prepare_save(sender, instance, **kwargs):
if instance.audio_file:
remove_headers(instance)
Now I have had problems implementing the remove_headers function in a way that I can edit the file while it is still in memory and have it stored afterwards. I tried among others the following:
def remove_headers(instance):
byte_sequence = b'bytestoremove'
f = instance.audio_file.read()
file_in_hex = f.hex()
file_in_hex = re.sub(byte_sequence.hex(), '', file_in_hex)
x = b''
x = x.fromhex(file_in_hex)
tmp_file = TemporaryFile()
tmp_file.write(x)
tmp_file.flush()
tmp_file.seek(0)
instance.audio_file.save(instance.audio_file.name, tmp_file, save=True)
This first of all would result in an infinite loop. But this can be mitigated by e.g. only calling the remove_headers method on create or so. It did however not work, the file was unchanged. I also tried replacing the last line with:
instance.audio_file = File(tmp_file, name=instance.audio_file.name)
This however resulted in an empty file to be written/saved.
Curiously when writing a test, this method seems to work:
def test_header_removed(self):
snippet = mommy.make(Snippet)
snippet.audio_file.save('newname.mp3', ContentFile('contentbytestoremovecontent'))
snippet.save()
self.assertEqual(snippet.audio_file.read(), b'contentcontent')
This test does not fail, despite the file being zero bytes in the end.
What am I missing here?
The second solution was almost correct. The reason the files ended up being empty (actually this only happened to bigger files) was, that sometimes you have to seek to the beginning of the file after opening it. So the beginngni of remove_headers needs to be changed:
def remove_headers(instance):
byte_sequence = b'bytestoremove'
instance.audio_file.seek(0)
f = instance.audio_file.read()
file_in_hex = f.hex()
Related
I have the following model in Django:
class AksOrder(models.Model):
zip_file = models.FileField(upload_to='aks_zips/%M/%S/', blank=True)
and in my views I have in essential these functions:
def gen_zip(pk, name, vars):
zipObj = ZipFile(os.path.join('/tmp/', str(name) + '_' + str(pk) + '.zip'), 'w')
zipObj.write(pdf_files[0].path, '/filea.pdf')
zipObj.write(pdf_files[1].path, '/fileb.pdf')
def aksorder_complete(request, pk):
ao = get_object_or_404(AksOrder, id=pk)
zipObj = generate_shop_zip(ao.c.pk, ao.dl, ao.vars)
ao.zip_file.save('file.zip', zipObj)
I did not only try this version, but this one seems the most reasonable and logic one to me. I get a There is no item named 65536 in the archive. When I modify it slightly and close the file at the end of zip-writing in the first function, I get a ValueError: Attempt to use ZIP archive that was already closed message. Both times, the zip-File is generated properly in /tmp/ I could not work arount it. And that's only locally, I need to do it for S3 later...
I finally achieved it: I added a zipObj.close() to the first function at the end and I modified the 2nd function like so:
file = open('path/to/file.zip', 'rb')
ao.zip_file.save('name.zip', file)
apparently, the rb mode in file-open was decisive.
I have 2 large postgres tables which have an index so that I can perform a full text search on each.
Typically, they look like:
class Post_1(db.Model):
query_class = PostQuery
id = db.Column(db.Integer, primary_key=True)
title = db.Column(db.String)
content = db.Column(db.Text)
datestamp = db.Column(db.Float)
search_vector = db.Column(TSVectorType('title', 'content'))
and
class Post_2(db.Model):
query_class = PostQuery
id = db.Column(db.Integer, primary_key=True)
title = db.Column(db.String)
content = db.Column(db.Text)
datestamp = db.Column(db.Float)
search_vector = db.Column(TSVectorType('title', 'content'))
In my flask application, to get the documents which have a specific keyword in one of the tables, I would do:
Post_1.query.search(keyword).\
order_by(Post_1.datestamp.desc()).limit(1)
Since I want to run the same search simultaneously on both tables, I wanted to use flask-executor and wrote the following code:
from flask_executor import Executor
executor = Executor(app)
futures=[]
keyword = "covid"
future=executor.submit(Post_1.query.search(keyword).\
order_by(Post_1.datestamp.desc()).limit(1))
futures.append(future)
future = executor.submit(Post_2.query.search(keyword).\
order_by(Post_2.datestamp.desc()).limit(1))
futures.append(future)
This does not work and I get the following error:
RuntimeError: This decorator can only be used at local scopes when a request context is on the stack. For instance within view functions.
Could anyone help me please?
The error you're getting is because flask-executor is intended to run tasks inside view functions - that is, as part of a request from a user. You're running your code outside of a view (i.e. outside of a scope that would normally be in place when your user is interacting with your application).
Do you need to do this, or is this simply part of a test? If you do something like this, just to test it out:
#app.route('/test')
def testroute():
future=executor.submit(Post_1.query.search(keyword).\
order_by(Post_1.datestamp.desc()).limit(1))
futures.append(future)
future = executor.submit(Post_2.query.search(keyword).\
order_by(Post_2.datestamp.desc()).limit(1))
futures.append(future)
Then you should no longer get the error about running outside of a request context, because the code will be running as part of a request (i.e. inside a view function).
As a side note, the SQLAlchemy tasks you're submitting aren't callable - they're not function objects. Executors, whether the one created by Flask-Executor or the vanilla ones you can get via concurrent.futures, expect you to pass a "callable". I suspect your code still wouldn't work unless it was something like:
query = Post_1.query.search(keyword).\
order_by(Post_1.datestamp.desc()).limit(1).all
future = executor.submit(query)
Notice the lack of brackets at the end, because I want to use the callable object itself, not the result it will return
The executor would then "call" the object that had been passed:
executor.submit(Post_1.query.search(keyword).\
order_by(Post_1.datestamp.desc()).limit(1).all()
Folks, I need help understanding some details about how Django saves model files. I've written a test that involves creation of files (in a temporary directory via tempfile) and has the following lines:
TEMP_DIR = tempfile.TemporaryDirectory()
TEMP_DIR_PATH = TEMP_DIR.name
...
#override_settings(MEDIA_ROOT=TEMP_DIR_PATH)
def create_photo(self, album_number, photo_number):
...
p = Photo.objects.create(
number=photo_number,
album=album,
added_by=self.user,
image=SimpleUploadedFile(
name=...,
content=open(..., 'rb').read(),
content_type='image/jpeg'
),
remarks='-'
)
p.full_clean()
p.save()
return p
This code works, except for one thing that confuses me. The line p = Photo.objects.create causes a file to appear in the temporary directory. Then p.full_clean() does nothing to the file. However when I execute p.save(), the file disappears from the temporary directory. If I remove p.save(), the file stays there when the function returns.
So my test function
def test_image_file_present(self):
"""When a photo is added to DB, the file actually appears in MEDIA."""
p = self.create_photo(3, 2)
image_filename = p.image.file.name
if not os.path.exists(image_filename):
self.fail('Image file not found')
fails if p.save() is there but passes if I remove p.save().
Why would object.save() cause the file to disappear?
As a bonus question, what's the purpose of .save() if the file and the Django model object appear already during Photo.objects.create? I've checked that the pre-save signal is sent by Photo.object.create() as well as by p.save().
I am getting an index range error when I try to use multiple findalls, but if I just use one, then the code works.
from re import findall
news = open('download7.html', 'r')
title = findall('<item>[^<]+<title>(.*)</title>', news.read())
link = findall('<item>[^<]+<title>[^<]+</title>[^<]+<link>(.*)</link>', news.read())
description = findall('<!\[CDATA\[[^<]+<p>(.*)</p>', news.read())
pubdate = findall('<pubDate>([^<]+)</pubDate>', news.read())
image_regex = findall('url="([^"]+627.jpg)', news.read())
print(image_regex[0])
Calling .read() on a file object reads all remaining data from the file, and leaves the file pointer at the end of the file (so subsequent calls to .read() return the empty string).
Cache the file contents once, and reuse it:
from re import findall
with open('download7.html', 'r') as news:
newsdata = news.read()
title = findall('<item>[^<]+<title>(.*)</title>', newsdata)
link = findall('<item>[^<]+<title>[^<]+</title>[^<]+<link>(.*)</link>', newsdata)
description = findall('<!\[CDATA\[[^<]+<p>(.*)</p>', newsdata)
pubdate = findall('<pubDate>([^<]+)</pubDate>', newsdata)
image_regex = findall('url="([^"]+627.jpg)', newsdata)
print(image_regex[0])
Note: You could re-read from the file object by seeking back to the beginning after each read (calling news.seek(0)), but that's far less efficient when you need the complete file data over and over.
I'm very new to Python and I was trying to use a nice library (audiotools) to play an mp3 playlist, just as an exercise.
This is the class to play the tracklist (loosely based on THIS, once I discovered there is a "callback function with no arguments which is called by the player when the current track is finished" (*) ):
class Trackplay:
def __init__(self,
track_list,
audio_output=audiotools.player.open_output('ALSA'),
replay_gain=audiotools.player.RG_NO_REPLAYGAIN):
self.track_index = INDEX - 1
self.track_list = track_list
self.player = audiotools.player.Player(
audio_output,
replay_gain,
self.next_track())
def next_track(self):
try:
self.track_index += 1
current_track = self.track_list[self.track_index]
print str(current_track)
audio_file = audiotools.open(current_track)
self.player.open(audio_file) # <---------- error
self.player.play()
except IndexError:
print('playing finished')
Then I'm calling:
tp = Trackplay(get_track_list(PATH))
where get_track_list is a method returning a list of mp3s from the dir PATH.
The error I get (at the line marked with the "error" comment) is:
AttributeError: Trackplay instance has no attribute 'player'
I don't understand what's happening ...but reading all the AttributeError questions here, must be something stupid...
player seems to me exactly a Trackplay's attribute. Other attributes, as track_index and track_list seems OK, since the line print str(current_track) prints the current track.
Thanks for any help.
See this code here?
self.player = audiotools.player.Player(
audio_output,
replay_gain,
self.next_track())
As part of creating the Player you're going to assign to self.player, you call self.next_track(). self.next_track tries to use self.player, before self.player exists!
def next_track(self):
try:
self.track_index += 1
current_track = self.track_list[self.track_index]
print str(current_track)
audio_file = audiotools.open(current_track)
self.player.open(audio_file)
self.player.play()
except IndexError:
print('playing finished')
next_track doesn't even return anything, so it's baffling why you're trying to pass self.next_track() as an argument to Player.
Was that supposed to be a callback? If so, you should pass self.next_track to Player without calling it.
self.player = audiotools.player.Player(
audio_output,
replay_gain,
self.next_track)
# ^ no call parentheses