Is django's save method thread safe or not. Meaning that if i have multiple threads then will the save() method interfere with the data sent to each of the save() method ? Have faced this problem while running the save() method on multiple threads. I noticed that some of the fields values are interchanged meaning that the field value of one thread's save method has gone to the field value of the other thread. Is there a way to tackle with this situation?
from project.views import start_new_thread
from django.db import connection
#start_new_thread
def site(site,loop):
try:
from bs4 import BeautifulSoup
from django.db.models import Max
import re
import pytz
from datetime import datetime
from .sentiment_analysis import calculate_sentiment
import aiohttp
import asyncio
import async_timeout
from project.models import thread,post
import dateparser
href=[]
link=set()
tthread=[]
author={}
def convert_si_to_number(x):
total_stars = 0
if 'K' in x:
if len(x) > 1:
total_stars = float(x.replace('K', '')) * 1000 # convert k to a thousand
elif 'M' in x:
if len(x) > 1:
total_stars = float(x.replace('M', '')) * 1000000 # convert M to a million
elif 'B' in x:
total_stars = float(x.replace('B', '')) * 1000000000 # convert B to a Billion
else:
total_stars = int(x) # Less than 1000
return int(total_stars)
async def fetch(session, url):
async with async_timeout.timeout(30):
async with session.get(url) as response:
return await response.text()
async def forum(response,lockhref,lockthread):
soup = BeautifulSoup(response,'html5lib')
table = soup.findAll('a',href=re.compile("forums/"))
for row in table:
url ='site.com'+row['href']
async with lockhref:
if url not in link:
href.append(url)
link.add(url)
Th = soup.findAll('div',{'class':re.compile('structItem structItem--thread')})
for t in Th:
json_data={}
divs=t.findAll("div")
url="site.com"+(divs[2].div.find('a',href=re.compile('threads/')))['href']
json_data["id"]=url
dl=divs[5].findAll("dl")
json_data["views"]=convert_si_to_number(dl[1].dd.text)
f=thread.objects.filter(id=url)
async with lockthread:
if url not in link:
link.add(url)
if not len(f):
tthread.append(url)
else:
try:
if f[0].posts<convert_si_to_number(dl[0].dd.text)+1:
tthread.append(url)
except:
if f[0].posts<1:
tthread.append(url)
json_data["thread_title"]=divs[2].div.a.text
json_data["site"]="site.com"
json_data["posts"]=0
json_data["timestamp_ms"]=None
json_data["author"]=None
json_data["date_created"]=None
if not len(f):
t=thread(id=json_data["id"],thread_title=json_data["thread_title"],posts=json_data["posts"] ,views=json_data["views"],site=json_data["site"],timestamp_ms=json_data["timestamp_ms"],
author=json_data["author"],date_created=json_data["date_created"])
t.save()
elif f[0].views<json_data["views"]:
thread.objects.filter(id=json_data["id"]).update(views=json_data["views"])
async def showthread(url,j,lock):
async with aiohttp.ClientSession() as session:
try:
response = await fetch(session, url)
await threa(response,lock,url)
except (aiohttp.ClientConnectionError,aiohttp.ClientConnectorError):
print("Going to sleep for 5min")
await asyncio.sleep(300)
await showthread(url,j,lock)
except asyncio.TimeoutError :
print("Timeout Retrying")
await showthread(url,j,lock)
except Exception as e:
if j<2:
j+=1
print("error "+url+" "+str(e))
await showthread(url,j,lock)
async def threa(response,lock,url):
soup = BeautifulSoup(response, 'html5lib')
table = soup.findAll('a',href=re.compile(url+"page-\d+"))
for row in table:
async with lockthread:
if "site.com"+row["href"] not in link:
tthread.append("site.com"+row["href"])
link.add("site.com"+row["href"])
table = soup.findAll('div',{'class':'message-inner'})
match=re.finditer("page",url)
index=-1
for m in match:
index=m.start()
if index==-1:
id=url
else:
id=url[:index]
count=0
for t in table:
count=count+1
json_data={}
h4=t.find("h4",{'class':'message-name'})
try:
json_data["screen_name"]=h4.text
except:
json_data["screen_name"]="None"
div=t.find('div',{'class':'message-attribution-main'})
try:
json_data["created_at"]=dateparser.parse(' '.join(div.text.split()))
json_data["created_at"]=pytz.utc.localize(json_data["created_at"])
except Exception as e:
print(str(e))
json_data['created_at']=datetime(1970, 1, 1, 1, 1, 1, 0,pytz.UTC)
json_data['timestamp_ms']= datetime.timestamp(json_data['created_at'])
div=t.find('div',{'class':'bbWrapper'})
try:
full_text=''.join((div.text).split())
except:
full_text=''
text,sentiment=calculate_sentiment('ar',full_text)
json_data['sentiment_analysis']=sentiment
json_data["text"]=full_text
json_data["cleaned_text"]=text.split()
json_data["hashtags"]=''
json_data["id"]=id
try:
ul=t.find('ul',{'class':re.compile('message-attribution-opposite')})
li=ul.find('a',{'class':'qimahostma'})
no=int((li.text).replace("#",''))
except:
f=post.objects.filter(link=id)
if not len(f):
no=1
else:
max=f.aggregate(Max('no'))
no=max['no__max']+1
if int(no)==1:
json_data["quoted_screen_name"]=''
json_data["is_quote"]=False
author[url]=json_data["screen_name"]
thread_existing,created=thread.objects.get_or_create(id=json_data["id"])
if created:
thread_existing.date_created=json_data["created_at"]
thread_existing.timestamp=json_data["timestamp_ms"]
thread_existing.author=json_data["screen_name"]
thread_existing.thread_title=(soup.find('h1',{'class':'p-title-value'})).text
thread_existing.posts=0
thread_existing.views=-1
thread_existing.site="site.com"
thread_existing.save()
else:
thread_existing.thread_title=thread_existing.thread_title
thread_existing.posts=thread_existing.posts
thread_existing.views=thread_existing.views
thread_existing.site="site.com"
thread_existing.date_created=json_data["created_at"]
thread_existing.timestamp=json_data["timestamp_ms"]
thread_existing.author=json_data["screen_name"]
thread_existing.save()
else:
json_data["quoted_screen_name"]=author[url]
json_data["is_quote"]=True
json_data["no"]=int(no)
json_data["site"]="site.com"
try:
p=post(link=json_data["id"],no=json_data["no"],created_at=json_data["created_at"],hashtags=[],text=json_data["text"],cleaned_text=json_data["cleaned_text"]
,sentiment_analysis=json_data["sentiment_analysis"],quoted_screen_name=json_data["quoted_screen_name"],is_quote=json_data["is_quote"],site=json_data["site"],
timestamp_ms=json_data["timestamp_ms"],screen_name=json_data["screen_name"])
p.save()
except Exception as e:
print(e)
if count>0:
t=thread.objects.get(id=id)
t.posts=t.posts+count
t.save()
async def scrapping(url,j,lockhref,lockthread):
async with aiohttp.ClientSession() as session:
try:
response = await fetch(session, url)
await forum(response,lockhref,lockthread)
except (aiohttp.ClientConnectionError,aiohttp.ClientConnectorError):
print("Going to sleep for 5min")
await asyncio.sleep(300)
await scrapping(url,j,lockhref,lockthread)
except asyncio.TimeoutError :
print("Timeout Retrying")
await scrapping(url,j,lockhref,lockthread)
except Exception as e:
if j<2:
j+=1
print("error "+url+" "+str(e))
await scrapping(url,j,lockhref,lockthread)
href.append("site.com/index.php")
link.add("site.com/index.php")
asyncio.set_event_loop(loop)
lockhref = asyncio.Lock()
lockthread=asyncio.Lock()
no_of_concurrent_connections=50
i=0
while i<len(href):
if i+no_of_concurrent_connections<len(href):
tasks=[loop.create_task(scrapping(href[j],0,lockhref,lockthread)) for j in range(i,i+no_of_concurrent_connections)]
i+=no_of_concurrent_connections
else:
tasks=[loop.create_task(scrapping(href[j],0,lockhref,lockthread)) for j in range(i,len(href))]
i=len(href)
loop.run_until_complete(asyncio.gather(*tasks))
i=0
while i<len(tthread):
if i+no_of_concurrent_connections<len(tthread):
tasks=[loop.create_task(showthread(tthread[j],0,lockthread)) for j in range(i,i+no_of_concurrent_connections)]
i+=no_of_concurrent_connections
else:
tasks=[loop.create_task(showthread(tthread[j],0,lockthread)) for j in range(i,len(tthread))]
i=len(tthread)
loop.run_until_complete(asyncio.gather(*tasks))
finally:
print('ended')
connection.close()
These threads are created using a decorator
def start_new_thread(function):
def decorator(*args, **kwargs):
name=str(args[0])
t = Thread(name=name,target = function, args=args, kwargs=kwargs,daemon=True)
list[name]=t
t.start()
return decorator
Fixed the problem by closing the connection whenever i created a new process or a thread and then on the new process django automatically created a new connection whenever it is needed
Related
I have a code which the bot needed to read the both reactions of the user on a message but with this code, the bot doesn't print the "second check" which is meant by like bot not reading the second reaction and stopping after the first one come true
reacttn = True
def check(reaction, user):
return user == members.users[members.leader].user and reaction.message.id == ttreact.id
while reacttn == True:
reaction, user = await client.wait_for("reaction_add", check=check)
if len(members.users) == 2:
if str(reaction.emoji) == "1️⃣":
print("first check")
if str(reaction.emoji) == "2️⃣":
print("second check")
await asyncio.sleep(5)
reacttn = False
You can do something like this:
from collections.abc import Sequence
def sequence(seq):
if seq is None:
return ()
if isinstance(seq, Sequence) and not isninstance(seq, str):
return seq
else:
return(seq,)
def reaction_check:(message=None, emoji=None, author=None, ignore_bot=True):
message = sequence(message)
message = tuple(m.id for m in message)
emoji = sequence(emoji)
author = sequence(author)
def check(reaction, user):
if ignore_bot and user.bot:
return False
if message and reaction.message.id not in message:
return False
if emoji and reaction.emoji not in emoji:
return False
if author and user not in author:
return False
return True
return check
In the command:
msg = await ctx.send("react to this message!")
await msg.add_reaction("1️⃣")
await msg.add_reaction("2️⃣")
check = reaction_check(message=msg, author=member, emoji=("1️⃣","2️⃣"))
reaction, user = await client.wait_for("reaction_add", check=check)
if reaction.emoji == "1️⃣":
#first logic
if reaction.emoji == "2️⃣":
#second logic
async def test_coro(id):
print("pass")
async def test_coro2():
print("pass")
async def mock_test_coro(id):
print("pass")
async def mock_test_coro2():
print("hj")
class Test(AioHTTPTestCase):
async def get_application(self):
app = web.Application(debug=True)
return app
#unittest_run_loop
#patch(__name__ + '.test_coro', side_effect=(lambda id: mock_test_coro(id)))
#patch(__name__ + '.test_coro2', side_effect=(lambda: mock_test_coro2()))
async def test_1(self, test_coro, test_coro2):
await test_coro2()
await test_coro(0)
I have fail in this test because (no parameter)
> test_coro = <AsyncMock name='test_coro2' id='139942609979472'>
> test_coro2 = <AsyncMock name='test_coro' id='139942609417456'>
why it so? (if there are more than 2 mocks - it shuffles they in random order)
for sync functions it was working fine
important is patch applying order,
last argument is patch on the top and first is patch at the bottomn
#unittest_run_loop
#patch(__name__ + '.test_coro2', side_effect=(lambda: mock_test_coro2()))
#patch(__name__ + '.test_coro', side_effect=(lambda id: mock_test_coro(id)))
async def test_1(self, test_coro, test_coro2):
await test_coro2()
await test_coro(0)
when I use self.send(content) in a loop , all the messages are sent at once , instead of one my one.
the first self.send() in the if condition od connecting is executed perfectly
but all loop self.send() messages are recieved by client at once after a delay of about 60 second. how to make it one at a time?
consumer.py
from channels.generic.websockets import JsonWebsocketConsumer
class MyConsumer(JsonWebsocketConsumer):
# Set to True if you want it, else leave it out
strict_ordering = False
def connect(self, message, **kwargs):
super(MyConsumer,self).connect(message)
pass
def receive(self, content, **kwargs):
if content['status'] == "connecting":
content['status'] = "connected"
self.send(content)
elif content['status'] == "data":
for p in range(5):
content={
'status': 'sending',
'polygon': p
}
self.send(content)
time.sleep(15)
self.close()
def disconnect(self, message, **kwargs):
pass
clientside.js
socket = new WebSocket("ws://" + location.host + "/mahaplans/");
socket.onopen = function () {
var msg = {
status: "connecting"
};
socket.send(JSON.stringify(msg))
}
socket.onmessage = function (e) {
let status=JSON.parse(e.data)
if (status["status"]=="connected") {
var imageData={
#someddata
}
socket.send(JSON.stringify(imageData))
}
if (status["status"]=="sending") {
console.log(status["polygon"])
}
}
socket.onclose = function (event) {
console.log("bye bye")
}
if (socket.readyState == WebSocket.OPEN) socket.onopen();
I'm using Python2.7, django==1.7 and uwsgi for streaming video/mp4 file to iPhone player.
My code is as below:
def stream(request):
with open('/path/video.mp4', 'r') as video_file:
response = HttpResponse(video_file.read(), content_type='video/mp4')
response['Content-Disposition'] = 'inline; filename=%s' % 'video.mp4'
return response
video_file.close
When i use some small video (less than 1MB), it streams in browser, but in iPhone palyer i have this error:
[uwsgi-http key: 127.0.0.1:8008 client_addr: 192.168.0.172
client_port: 14563] hr_write(): Broken pipe [plugins/http/http.c line
564]
And when the video size is more that 5MB, it doesn't stream in both (means browser and iPhone player) with same error.
I tried to do that by chunk chunk returning using StreamHttpRespose as below:
def read(chunksize=8192):
with open('/path/video.mp4', 'rb') as video_file:
byte = video_file.read(chunksize)
while byte:
yield byte
return StreamingHttpResponse(read(), content_type='video/mp4')
But there is the same error: Broken pipe.
fyi I can stream pdf and image files. This problem is only with mp4 files. And also i changed the content_type to 'video-mpeg', the browser downloaded that, while i want to prevent file downloading.
What's your idea? Any solution!!?
I had the same problem and did a lot of digging before finding a workable solution!
Apparently the Accept Ranges header is needed for HTML5 video controls to work (https://stackoverflow.com/a/24977085/4264463). So, we need to both parse the requested range from HTTP_RANGE and return Content-Range with the response. The generator that is passed to StreamingHttpResponse also needs to return content based on this range as well (by offset and length). I've found the follow snippet that works great (from http://codegist.net/snippet/python/range_streamingpy_dcwatson_python):
import os
import re
import mimetypes
from wsgiref.util import FileWrapper
from django.http.response import StreamingHttpResponse
range_re = re.compile(r'bytes\s*=\s*(\d+)\s*-\s*(\d*)', re.I)
class RangeFileWrapper(object):
def __init__(self, filelike, blksize=8192, offset=0, length=None):
self.filelike = filelike
self.filelike.seek(offset, os.SEEK_SET)
self.remaining = length
self.blksize = blksize
def close(self):
if hasattr(self.filelike, 'close'):
self.filelike.close()
def __iter__(self):
return self
def __next__(self):
if self.remaining is None:
# If remaining is None, we're reading the entire file.
data = self.filelike.read(self.blksize)
if data:
return data
raise StopIteration()
else:
if self.remaining <= 0:
raise StopIteration()
data = self.filelike.read(min(self.remaining, self.blksize))
if not data:
raise StopIteration()
self.remaining -= len(data)
return data
def stream_video(request, path):
range_header = request.META.get('HTTP_RANGE', '').strip()
range_match = range_re.match(range_header)
size = os.path.getsize(path)
content_type, encoding = mimetypes.guess_type(path)
content_type = content_type or 'application/octet-stream'
if range_match:
first_byte, last_byte = range_match.groups()
first_byte = int(first_byte) if first_byte else 0
last_byte = int(last_byte) if last_byte else size - 1
if last_byte >= size:
last_byte = size - 1
length = last_byte - first_byte + 1
resp = StreamingHttpResponse(RangeFileWrapper(open(path, 'rb'), offset=first_byte, length=length), status=206, content_type=content_type)
resp['Content-Length'] = str(length)
resp['Content-Range'] = 'bytes %s-%s/%s' % (first_byte, last_byte, size)
else:
resp = StreamingHttpResponse(FileWrapper(open(path, 'rb')), content_type=content_type)
resp['Content-Length'] = str(size)
resp['Accept-Ranges'] = 'bytes'
return resp
After a lot of search, i didn't find my solution.
So, i tried to create a stream-server easily using nodejs from html5-video-streamer.js reference as below:
var http = require('http'),
fs = require('fs'),
url = require('url'),
basePath = '/var/www/my_project/media/',
baseUrl = 'Your Domain or IP',
basePort = 8081;
http.createServer(function (req, res) {
// Get params from request.
var params = url.parse(req.url, true).query,
filePath = basePath + params.type + '/' + params.name,
stat = fs.statSync(filePath),
total = stat.size;
if (req.headers['range']) {
var range = req.headers.range,
parts = range.replace(/bytes=/, "").split("-"),
partialstart = parts[0],
partialend = parts[1],
start = parseInt(partialstart, 10),
end = partialend ? parseInt(partialend, 10) : total-1,
chunksize = (end-start)+1;
var file = fs.createReadStream(filePath, {start: start, end: end});
res.writeHead(206, { 'Content-Range' : 'bytes ' + start + '-' + end + '/' + total,
'Accept-Ranges' : 'bytes',
'Content-Length' : chunksize,
'Content-Type' : 'video/mp4' });
file.pipe(res);
// Close file at end of stream.
file.on('end', function(){
file.close();
});
}
else {
res.writeHead(206, { 'Content-Length' : total,
'Content-Type' : 'video/mp4' });
var file = fs.createReadStream(filePath);
file.pipe(res);
// Close file at end of stream.
file.on('end', function(){
file.close();
});
}
}).listen(basePort, baseUrl);
Now i have separate stream-server with nodejs that streams mp4 files beside python project that provides my APIs.
I'm aware It's not my solution, but it works for me ;)
In my django-piston handler functions, it looks like I can basically do two things. Either I can return a specific status code with some non-formatted content:
def create(...):
...
resp = rc.BAD_REQUEST
resp.content = 'Some string error message'
return resp
Or I can return a dictionary of error messsages, which can be formatted according to the specified emitter, but with a 200 status code:
def create(...):
...
return error_dict
How can I return a dictionary or model object, formatted by the specified emitter, but with a customized status code?
How about this?
def create(...):
...
resp = rc.BAD_REQUEST
resp.content = error_dict
return resp
In order to solve this, I added a new function to the my subclass of the BaseHandler, although it could just be added to any handler. The function manually calls the emitter to properly format the content, then adds the content-type and status code.
class MyBaseHandler(BaseHandler):
def render_response(self, request, response, content):
em_info = None
for ext in Emitter.EMITTERS:
if request.path.find('.'+ext) > -1:
em_info = Emitter.get(ext)
if not em_info:
return rc.NOT_FOUND
RequestEmitter = em_info[0]
emitter = RequestEmitter(content, typemapper, self, self.fields, False)
response.content = emitter.render(request)
response['Content-Type'] = em_info[1]
return response
Called like so:
def create(...):
...
return self.render_response(request, rc.BAD_REQUEST, error_dict)