I am trying to create a REST API for a database that already exists. The problem is that the data on the database are refreshed from a bash script every hour, so there is no related model for these data. So I am working on creating a GET request on Django so to be able to retrieve the data. Currently I am using an APIView like this:
class RetrieveData(APIView):
def get(self, request):
conn = psycopg2.connect(host=..., database=..., user=..., password=..., port=...)
cur = conn.cursor()
cur.execute(f'Select * from ....')
fetched_data = cur.fetchone()
cur.close()
res_list = [x for x in fetched_data]
json_res_data = {"id": res_list[0],
"date": res_list[1],
"data": res_list[2]}
conn.close()
The problem that I have is that connecting on the database every time so to retrieve the data and then return the response is quite slow ~ 2sec/request. Also I am afraid in case of many requests made at the same time, how is that going to work.
So the question that I have is if there is any suggestions or any solutions that you propose.
Related
I'm currently using a Docker & Django setup. I have to fill a database with data from API requests. I was hoping to do this everytime you went on a certain page (pretty easy: just have your views.py call the function that fills the database and voila).
But the problem is, the function takes a long time, several minutes from within django (and about half the time with Spyder).
So I usually just get a TimeOut and the page never loads (I admit I have a lot of API requests being made).
I've read some stuff on using Celery but am not quite sure how it's supposed to work.
Anyone know how I could get around this to be able to load the database?
Edit: some code
Views.py
def index(request):
fill_db()
context = {}
context['segment'] = 'index'
html_template = loader.get_template( 'index.html' )
return HttpResponse(html_template.render(context, request))
fill_db function
def fill_db():
fill_agencies()
fill_companies()
fill_contracts()
fill_orders()
fill_projects()
fill_resources()
Example of a fill function:
r = pip._vendor.requests.get(BASE_URL+EXTENSION,auth=(USER,PASS))
data0 = json.loads(r.text)
conn = sqlite3.connect('/app/database.sqlite3')
c = conn.cursor()
for client in data0['data']:
BoondID = client['id']
name = client['attributes']['name']
expertiseArea = client['attributes']['expertiseArea']
town = client['attributes']['town']
country = client['attributes']['country']
mainManager = client['relationships']['mainManager']['data']['id']
values = (BoondID, name, expertiseArea, town, country, mainManager)
c.execute("INSERT OR REPLACE INTO COMPANIES (BoondID,name,expertiseArea,town,country,mainManager) VALUES (?,?,?,?,?,?);", values)
conn.commit()
conn.close()
Solved.
I used python's threading library.
I defined
agencies_thread = threading.Thread(target=fill_agencies, name="Database Updater")
and called agencies_thread.start() inside my views function.
This works fine.
Let's say that we have a database with existing data, the data is updated from a bash script and there is no related model on Django for that. Which is the best way to create an endpoint on Django to be able to perform a GET request so to retrieve the data?
What I mean is, that if there was a model we could use something like:
class ModelList(generics.ListCreateAPIView):
queryset = Model.objects.first()
serializer_class = ModelSerializer
The workaround that I tried was to create an APIView and inside that APIView to do something like this:
class RetrieveData(APIView):
def get(self, request):
conn = None
try:
conn = psycopg2.connect(host=..., database=..., user=..., password=..., port=...)
cur = conn.cursor()
cur.execute(f'Select * from ....')
fetched_data = cur.fetchone()
cur.close()
res_list = [x for x in fetched_data]
json_res_data = {"id": res_list[0],
"date": res_list[1],
"data": res_list[2]}
return Response({"data": json_res_data)
except Exception as e:
return Response({"error": 'Error'})
finally:
if conn is not None:
conn.close()
Although I do not believe that this is a good solution, also is a bit slow ~ 2 sec per request. Apart from that, if for example, many Get requests are made at the same time isn't that gonna create a problem on the DB instance, e.g lock table etc?
So I was wondering which is a better / best solution for this kind of problems.
Appreciate your time!
I am adding data to sqlalchemy. But sometimes data is not getting updated or inserted to database. But the commit is successful and I can see the data in memory of session's object.
ie
session.identity_map
Running on sqlalchemy 1.3.3. python 2.7. ubuntu 18.04
from sqlalchemy.orm import Session
from . import Errors as ExecuteErrors
class Errors(object):
def __init__(self, sqlalchemy_engine, d):
self.sqlalchemy_engine = sqlalchemy_engine
self.d = d
def upsert(self, error):
session = Session(self.sqlalchemy_engine)
row = session.query(ExecuteErrors).filter_by(**{'c_name':error['c_name'], 'c_type':error['c_type'],
'f_name':error['f_name']}).scalar()
session.close()
if row:
self.update(error)
else:
self.insert(error)
def insert(self, error):
e = ExecuteErrors(**{'c_name':error['c_name'], 'c_type':error['c_type'], 'f_name':error['f_name'],
'msg':error['msg'], 'details':error['details']})
session = Session(self.sqlalchemy_engine, expire_on_commit=False)
session.add(e)
session.identity_map
session.commit()
session.close()
def update(self, error):
session = Session(self.sqlalchemy_engine, expire_on_commit=False)
session.query(ExecuteErrors).filter_by(**{'c_name':error['c_name'], 'c_type':error['c_type'],
'f_name':error['f_name']}).update({'msg': error['msg'], 'details': error['details']})
session.commit()
session.close()
def get_errors(self):
session = Session(self.sqlalchemy_engine)
e = session.query(ExecuteErrors).all()
session.close()
return e
def clear(self):
session = Session(self.sqlalchemy_engine)
session.query(ExecuteErrors).delete()
session.commit()
session.close()
Calling this with:
e = Error(engine, 'emp')
e.upsert({'c_name':'filter','c_type':'task','f_name':'f1','msg':'TypeError','details':'xyz'})
This should add row in database or update row with new data.
Its working for some insert and for some not.
You could find a possible workaround by explicitely flushing your session when needed.
That said, I think you should reconsider the way you're using sessions. Sessions are intended to manage database connections but you're using them as if there where actual connections.
IMHO, a better way to do would be to create a session at Error instanciation and use it when needed in all your methods.
An even better way to proceed could be to create a session at begining of your "calling" module and pass it to the Error instanciation, and to any other object which need access to database.
Doing this, you could even experience better performance and it may solve your problem (?)
More details about how to manage sessions in the sqlalchemy doc.
EDIT: In addition, the sqlalchemy doc lists some potential problems when used with sqlite. One of them could be the cause of your problem.
I would like to know the number of sql queries which were executed on a psycopg2 connection.
Is there a way to get this number?
I would like to warn if a http request produces too many statements.
I am running a django application. If DEBUG is True, then I have connection.queries. But I would like to get this value from a production server
Update
I want numbers (statistics) from the prod environment. This question is not about debugging a particular http request.
Have a look at django-silk. It is a profiling tool that records metrics like response times and the number of queries.
If you want to roll you own solution and you are using Django 2.0, you can create a middleware with a connection wrapper. The documentation even showcases a QueryLogger class:
import time
from django.db import connection
class QueryLogger:
def __init__(self):
self.queries = []
def __call__(self, execute, sql, params, many, context):
current_query = {'sql': sql, 'params': params, 'many': many}
start = time.time()
try:
result = execute(sql, params, many, context)
except Exception as e:
current_query['status'] = 'error'
current_query['exception'] = e
raise
else:
current_query['status'] = 'ok'
return result
finally:
duration = time.time() - start
current_query['duration'] = duration
self.queries.append(current_query)
class QueryLogginMiddleware:
def __init__(self, get_response):
self.get_response = get_response
def __call__(self, request):
ql = QueryLogger()
with connection.execute_wrapper(ql):
response = self.get_response(request)
# do something with ql.queries here
return response
The amount of queries made on Production and Development are the same, if you have the same environment on your database and everything else.
I recommend you to use Django Debug Toolbar as mentioned, copy see about how many queries your View are doing and rethink your code based on that, if you want to see about those queries performance i recommend you to use the explain command from postgresql.
I usually, copy the query and paste it with explain inside my postgreaql database shell. See this: http://recordit.co/rGZ2SAo7PX
New to django. I'm doing my best to implement CRUD using Django, mongodb, and mongoengine. I'm able to query the database and render my page with the correct information from the database. I'm also able to change some document fields using javascript and do an Ajax POST back to the original Django View class with the correct csrf token.
The data payload I'm sending back and forth is a list of each Document Model (VirtualPageModel) serialized to json (each element contains ObjectId string along with the other specific fields from the Model.)
This is where it starts getting murky. In order to update the original document in my View Class post function I do an additional query using the object id and loop through the dictionary items, setting the respective fields each time. I then call save and any new data is pushed to the Mongo collection correctly.
I'm not sure if what I'm doing to update existing documents is correct or in the spirit of django's abstracted database operations. The deeper I get the more I feel like I'm not using some fundamental facility earlier on (provided by either django or mongoengine) and because of this I'm having to make things up further downstream.
The way my code is now I would not be able to create a new document (although that's easy enough to fix). However what I'm really curious about is how I would know when to delete a document which existed in the initial query, but was removed by the user/javascript code? Am I overthinking things and the contents of my POST should contain a list of ObjectIds to delete (sounds like a security risk although this would be an internal tool.)
I was assuming that my View Class might maintain either the original document objects (or simply ObjectIds) it queried and I could do my comparisions off of that set, but I can't seem to get that information to persist (as a class variable in VolumeSplitterView) from its inception to when I received the POST at the end.
I would appreciate if anyone could take a look at my code. It really seems like the "ease of use" facilities of Django start to break when paired with Mongo and/or a sufficiently complex Model schema which needs to be directly available to javascript as opposed to simple Forms.
I was going to use this dev work to become django battle-hardened in order to tackle a future app which will be much more complicated and important. I can hack on this thing all day and make it functional, but what I'm really interested in is anyone's experience in using Django + MongoDB + MongoEngine to implement CRUD on a Database Schema which is not vary Form-centric (think more nested metadata).
Thanks.
model.py: uses mongoengine Field types.
class MongoEncoder(JSONEncoder):
def default(self, o):
if isinstance(o, VirtualPageModel):
data_dict = (o.to_mongo()).to_dict()
if isinstance(data_dict.get('_id'), ObjectId):
data_dict.update({'_id': str(data_dict.get('_id'))})
return data_dict
else:
return JSONEncoder.default(self, o)
class SubTypeModel(EmbeddedDocument):
filename = StringField(max_length=200, required=True)
page_num = IntField(required=True)
class VirtualPageModel(Document):
volume = StringField(max_length=200, required=True)
start_physical_page_num = IntField()
physical_pages = ListField(EmbeddedDocumentField(SubTypeModel),
default=list)
error_msg = ListField(StringField(),
default=list)
def save(self, *args, **kwargs):
print('In save: {}'.format(kwargs))
for k, v in kwargs.items():
if k == 'physical_pages':
self.physical_pages = []
for a_page in v:
tmp_pp = SubTypeModel()
for p_k, p_v in a_page.items():
setattr(tmp_pp, p_k, p_v)
self.physical_pages.append(tmp_pp)
else:
setattr(self, k, v)
return super(VirtualPageModel, self).save(*args, **kwargs)
views.py: My attempt at a view
class VolumeSplitterView(View):
#initial = {'key': 'value'}
template_name = 'click_model/index.html'
vol = None
start = 0
end = 20
def get(self, request, *args, **kwargs):
self.vol = self.kwargs.get('vol', None)
records = self.get_records()
records = records[self.start:self.end]
vp_json_list = []
img_filepaths = []
for vp in records:
vp_json = json.dumps(vp, cls=MongoEncoder)
vp_json_list.append(vp_json)
for pp in vp.physical_pages:
filepath = get_file_path(vp, pp.filename)
img_filepaths.append(filepath)
data_dict = {
'img_filepaths': img_filepaths,
'vp_json_list': vp_json_list
}
return render_to_response(self.template_name,
{'data_dict': data_dict},
RequestContext(request))
def get_records(self):
return VirtualPageModel.objects(volume=self.vol)
def post(self, request, *args, **kwargs):
if request.is_ajax:
vp_dict_list = json.loads(request.POST.get('data', []))
for vp_dict in vp_dict_list:
o_id = vp_dict.pop('_id')
original_doc = VirtualPageModel.objects.get(id=o_id)
try:
original_doc.save(**vp_dict)
except Exception:
print(traceback.format_exc())