EDIT 2022-10-04 18:40
I've tried using bulk_update and bulk_create as these method only query database once but still have the same issue
would appreciate any help/explanation on this issue
'''
Task to edit data correction forms (DCF) online
'''
#shared_task(bind=True)
def DCF_edition(self):
print(timezone.now())
DCF_BEFORE_UPDATE = pd.DataFrame.from_records(DataCorrectionForm.objects.all().values())
if not DCF_BEFORE_UPDATE.empty :
DCF_BEFORE_UPDATE = DCF_BEFORE_UPDATE.rename(columns={"patient": "pat"})
DCF_BEFORE_UPDATE = DCF_BEFORE_UPDATE.astype({'record_date': str,'created_date': str})
DCF_BEFORE_UPDATE['dcf_status'] = DCF_BEFORE_UPDATE.apply(lambda status: 0, axis=1)
# list of dataframe to concat
data = []
# load queries definition
queries = queries_definitions()
# print(queries)
if not queries.empty:
for index, row in queries.iterrows():
print('Query ide',row['ide'])
# print(row['ide'],row['query_type'],row['crf_name'].lower(),row['crf identification date'],row['variable_name'],row['variable_label'],row['query_condition'],row['fields_to_display'])
try:
missing_or_inconsistent = missing_or_inconsistent_data(row['ide'],row['query_type'],row['crf_name'].lower(),row['crf identification date'],row['variable_name'],row['variable_label'],row['query_condition'],row['fields_to_display']) #.iloc[:10] #to limit rows
missing_or_inconsistent.columns.values[2] = 'record_date' # rename the date column (that have database name)
missing_or_inconsistent['dcf_ide'] = str(row['ide']) + '_' + row['variable_name'] + '_' + missing_or_inconsistent[row['crf primary key']].astype(str)
missing_or_inconsistent['category'] = row['query_type']
missing_or_inconsistent['crf'] = row['crf_name']
missing_or_inconsistent['crf_ide'] = missing_or_inconsistent[row['crf primary key']]
missing_or_inconsistent['field_name'] = row['variable_name']
missing_or_inconsistent['field_label'] = row['variable_label']
missing_or_inconsistent['field_value'] = missing_or_inconsistent[row['variable_name']]
missing_or_inconsistent['message'] = row['query_message']
missing_or_inconsistent['query_id'] = 'Query ide ' + str(row['ide'])
missing_or_inconsistent['dcf_status'] = 1
missing_or_inconsistent['created_date'] = timezone.now()
missing_or_inconsistent['deactivated'] = False
missing_or_inconsistent['comments'] = None
data.append(missing_or_inconsistent[['ide','dcf_ide','category','crf','crf_ide','pat','record_date','field_name','field_label','message','field_value','dcf_status','created_date','query_id','deactivated','comments']])
dcf = pd.concat(data)
except Exception as e:
Log.objects.create(dcf_edition_status=0,dcf_edition_exception=str(e)[:200])
continue
DCF_AFTER_UPDATE = pd.concat([DCF_BEFORE_UPDATE,dcf])
DCF_AFTER_UPDATE['duplicate'] = DCF_AFTER_UPDATE.duplicated(subset=['dcf_ide'],keep='last')
DCF_AFTER_UPDATE['dcf_status'] = DCF_AFTER_UPDATE.apply(lambda row: 2 if row['duplicate'] else row['dcf_status'],axis=1)
DCF_AFTER_UPDATE = DCF_AFTER_UPDATE.drop_duplicates(subset=['dcf_ide'],keep='first').drop(columns=['duplicate'])
DCF_AFTER_UPDATE.rename(columns = {'pat':'patient',}, inplace = True)
# Cast date into string format to be able to dumps data
DCF_AFTER_UPDATE = DCF_AFTER_UPDATE.astype({'record_date': str}) if not DCF_AFTER_UPDATE.empty else DCF_AFTER_UPDATE
records_to_update = [
DataCorrectionForm(
ide=record['ide'],
dcf_ide=record['dcf_ide'],
category=record['category'],
crf=record['crf'],
crf_ide=record['crf_ide'],
patient=record['patient'],
record_date=record['record_date'],
field_name=record['field_name'],
field_label=record['field_label'],
message=record['message'],
field_value=record['field_value'],
dcf_status=record['dcf_status'],
created_date=record['created_date'],
query_id=record['query_id'],
deactivated=record['deactivated'],
comments=record['comments']
) for i, record in DCF_AFTER_UPDATE[(DCF_AFTER_UPDATE['dcf_status'] != 1)].iterrows()
]
if records_to_update:
DataCorrectionForm.objects.bulk_update(records_to_update,['dcf_status'])
records_to_create = [
DataCorrectionForm(
dcf_ide=record['dcf_ide'],
category=record['category'],
crf=record['crf'],
crf_ide=record['crf_ide'],
patient=record['patient'],
record_date=record['record_date'],
field_name=record['field_name'],
field_label=record['field_label'],
message=record['message'],
field_value=record['field_value'],
dcf_status=record['dcf_status'],
created_date=record['created_date'],
query_id=record['query_id'],
deactivated=record['deactivated'],
comments=record['comments']
) for i, record in DCF_AFTER_UPDATE[(DCF_AFTER_UPDATE['dcf_status'] == 1)].iterrows()
]
if records_to_create:
DataCorrectionForm.objects.bulk_create(records_to_create)
EDIT 2022-10-04 13:40
I've tried to "optimized" code using update_or_create() method but doesn't change anything
I still have an OperationalError with the line DataCorrectionForm.objects.update_or_create(...)
How can I update my database?
'''
Task to edit data correction forms (DCF) online
'''
#shared_task(bind=True)
def DCF_edition(self):
DCF_BEFORE_UPDATE = pd.DataFrame.from_records(DataCorrectionForm.objects.all().values())
if not DCF_BEFORE_UPDATE.empty :
DCF_BEFORE_UPDATE.drop(columns=['ide'])
DCF_BEFORE_UPDATE = DCF_BEFORE_UPDATE.rename(columns={"patient": "pat"})
DCF_BEFORE_UPDATE = DCF_BEFORE_UPDATE.astype({'record_date': str,'created_date': str})
DCF_BEFORE_UPDATE['dcf_status'] = DCF_BEFORE_UPDATE.apply(lambda status: 0, axis=1)
# list of dataframe to concat
data = []
# load queries definition
queries = queries_definitions()
if not queries.empty:
for index, row in queries.iterrows()
try:
missing_or_inconsistent = missing_or_inconsistent_data(row['ide'],row['query_type'],row['crf_name'].lower(),row['crf identification date'],row['variable_name'],row['variable_label'],row['query_condition'],row['fields_to_display']) #.iloc[:10] #to limit rows
missing_or_inconsistent.columns.values[2] = 'record_date' # rename the date column (that have database name)
missing_or_inconsistent['dcf_ide'] = str(row['ide']) + '_' + row['variable_name'] + '_' + missing_or_inconsistent[row['crf primary key']].astype(str)
missing_or_inconsistent['category'] = row['query_type']
missing_or_inconsistent['crf'] = row['crf_name']
missing_or_inconsistent['crf_ide'] = missing_or_inconsistent[row['crf primary key']]
missing_or_inconsistent['field_name'] = row['variable_name']
missing_or_inconsistent['field_label'] = row['variable_label']
missing_or_inconsistent['field_value'] = missing_or_inconsistent[row['variable_name']]
missing_or_inconsistent['message'] = row['query_message']
missing_or_inconsistent['DEF'] = 'Query ide ' + str(row['ide'])
missing_or_inconsistent['dcf_status'] = 1
missing_or_inconsistent['created_date'] = timezone.now()
missing_or_inconsistent['deactivated'] = False
missing_or_inconsistent['comments'] = None
data.append(missing_or_inconsistent[['dcf_ide','category','crf','crf_ide','pat','record_date','field_name','field_label','message','field_value','dcf_status','created_date','DEF','deactivated','comments']])
dcf = pd.concat(data)
except Exception as e:
Log.objects.create(dcf_edition_status=0,dcf_edition_exception=str(e)[:200])
continue
DCF_AFTER_UPDATE = pd.concat([DCF_BEFORE_UPDATE,dcf])
DCF_AFTER_UPDATE['duplicate'] = DCF_AFTER_UPDATE.duplicated(subset=['dcf_ide'],keep='last')
DCF_AFTER_UPDATE['dcf_status'] = DCF_AFTER_UPDATE.apply(lambda row: 2 if row['duplicate'] else row['dcf_status'],axis=1)
DCF_AFTER_UPDATE = DCF_AFTER_UPDATE.drop_duplicates(subset=['dcf_ide'],keep='first').drop(columns=['duplicate'])
DCF_AFTER_UPDATE = DCF_AFTER_UPDATE.drop(['DEF'], axis=1)
DCF_AFTER_UPDATE.rename(columns = {'pat':'patient',}, inplace = True)
# Cast date into string format to be able to dumps data
DCF_AFTER_UPDATE = DCF_AFTER_UPDATE.astype({'record_date': str}) if not DCF_AFTER_UPDATE.empty else DCF_AFTER_UPDATE
records = DCF_AFTER_UPDATE.to_dict(orient='records')
for record in records:
DataCorrectionForm.objects.update_or_create(
dcf_ide=record['dcf_ide'], # filter to search for existing objects => should not be pass to default (if not IntegrityError)
defaults = {
'category':record['category'],
'crf':record['crf'],
'crf_ide':record['crf_ide'],
'patient':record['patient'],
'record_date':record['record_date'],
'field_name':record['field_name'],
'field_label':record['field_label'],
'message':record['message'],
'field_value':record['field_value'],
'dcf_status':record['dcf_status'],
'created_date':record['created_date'],
# 'DEF':record['DEF'],
'deactivated':record['deactivated'],
'comments':record['comments']
}
)
Log.objects.create(dcf_edition_status=1)
return True
EDIT 2022-10-03 17:00
in fact reading CAVEATS:
The development server creates a new thread for each request it
handles, negating the effect of persistent connections. Don’t enable
them during development.
EDIT 2022-10-03 16:00
Django 2.2.5
I have tried to set DATABASES parameter CONN_MAX_AGE as per Django documentation but it doesn't change anythings
Default: 0
The lifetime of a database connection, as an integer of seconds. Use 0
to close database connections at the end of each request — Django’s
historical behavior — and None for unlimited persistent connections.
I use Celery task and got an error I do not understand.
I loop over a table (that contain query definitions) to edit missing/inconsistent data in a database (using API) and registered discrepencies in another table.
If I run query one at a time, it works but when I try to loop over queries, I got an error
OperationalError('server closed the connection unexpectedly\n\tThis probably means the server terminated abnormally\n\tbefore or while processing the request.\n')
def DCF_edition(self):
DCF_BEFORE_UPDATE = pd.DataFrame.from_records(DataCorrectionForm.objects.all().values())
DCF_BEFORE_UPDATE = DCF_BEFORE_UPDATE.astype({'record_date': str,'created_date': str}) if not DCF_BEFORE_UPDATE.empty else DCF_BEFORE_UPDATE
data = []
# load queries definition
queries = queries_definitions()
if not queries.empty:
for index, row in queries.iterrows():
try:
missing_or_inconsistent = missing_or_inconsistent_data(row['ide'],row['query_type'],row['crf_name'].lower(),row['crf identification
data.append(missing_or_inconsistent[['dcf_ide','category','crf','crf_ide','pat','record_date','field_name','field_label','message','field_value','dcf_status','DEF','deactivated']])
DCF_AFTER_UPDATE = pd.concat(data)
DCF_AFTER_UPDATE = DCF_AFTER_UPDATE.drop_duplicates(keep='last')
DCF_AFTER_UPDATE = DCF_AFTER_UPDATE.drop(['DEF'], axis=1)
DCF_AFTER_UPDATE.rename(columns = {'pat':'patient',}, inplace = True)
except Exception as e:
Log.objects.create(dcf_edition_status=0,dcf_edition_exception=str(e)[:200])
continue
# Cast date into string format to be able to dumps data
DCF_AFTER_UPDATE = DCF_AFTER_UPDATE.astype({'record_date': str}) if not DCF_AFTER_UPDATE.empty else DCF_AFTER_UPDATE
records = json.loads(json.dumps(list(DCF_AFTER_UPDATE.T.to_dict().values())))
for record in records:
if not DCF_BEFORE_UPDATE.empty and record['dcf_ide'] in DCF_BEFORE_UPDATE.values:
DataCorrectionForm.objects.filter(dcf_ide=record['dcf_ide']).update(dcf_status=2)
else:
DataCorrectionForm.objects.get_or_create(**record)
# resolved dcf => status=0
if not DCF_BEFORE_UPDATE.empty:
records = json.loads(json.dumps(list(DCF_BEFORE_UPDATE.T.to_dict().values())))
for record in records:
if record['dcf_ide'] not in DCF_AFTER_UPDATE.values:
DataCorrectionForm.objects.filter(dcf_ide=record['dcf_ide']).update(dcf_status=0)
Log.objects.create(dcf_edition_status=1)
return True
The lifetime of a database connection, as an integer of seconds. Use 0 to close database connections at the end of each request — Django’s historical behavior — and None for unlimited persistent connections.
It seems that your task is long running task and need to hold the db connection for a long period. Did you try to set it to None
DATABASES = {
'default': env.db(),
}
# https://docs.djangoproject.com/en/3.1/ref/settings/#conn-max-age
DATABASES['default']['CONN_MAX_AGE'] = None
How long does your task need to finish? It could be another problem with server database setting, ex tcp_keepalives_ilde..
I am trying to create a website for myself. It is kind of a Youtube on a local network. I am using video.js (have also tried Plyr.io) to play the video, but i can not fast forward the video. Or go back in the video. I can only play it from begining to the end. If i try to skip forward it only resets. What am I doing wrong?
Thanks in advance!
The behaviour sounds like the server doesn't implement range headers. When you try to seek, it returns the start of the file and not the part requested. If you try Safari you'll probably find it won't play at all. Check questions like Byte Ranges in Django
Yes, I all face the similar issue in using the video js library. But with the help from Byte range in django I solve this issue in video.js library by adding the RangeMiddleware. But I can skip or forward the video.
class RangesMiddleware(MiddlewareMixin):
def process_response(self, request, response):
if response.status_code != 200 or not hasattr(response, "file_to_stream"):
return response
http_range = request.META.get("HTTP_RANGE")
if not (
http_range
and http_range.startswith("bytes=")
and http_range.count("-") == 1
):
return response
if_range = request.META.get("HTTP_IF_RANGE")
if (
if_range
and if_range != response.get("Last-Modified")
and if_range != response.get("ETag")
):
return response
f = response.file_to_stream
statobj = os.fstat(f.fileno())
start, end = http_range.split("=")[1].split("-")
if not start: # requesting the last N bytes
start = max(0, statobj.st_size - int(end))
end = ""
start, end = int(start or 0), int(end or statobj.st_size - 1)
assert 0 <= start < statobj.st_size, (start, statobj.st_size)
end = min(end, statobj.st_size - 1)
f.seek(start)
old_read = f.read
f.read = lambda n: old_read(min(n, end + 1 - f.tell()))
response.status_code = 206
response["Content-Length"] = end + 1 - start
response["Content-Range"] = "bytes %d-%d/%d" % (start, end, statobj.st_size)
return response
Hello I was hoping someone could help me with my college coursework, I have an issue with my code. I keep running into a memory error with my data export.
Is there any way I can reduce the memory that is being used or is there a different approach I can take?
For the course work I am given a file of 300 records about customer orders from a CSV file and then I have to export the Friday records to a new CSV file. Also I am required to print the most popular method for customer's orders and the total money raised from the orders but I have an easy plan for that.
This is my first time working with CSV so I'm not sure how to do it. When I run the program it tends to crash instantly or stop responding. Once it appeared with 'MEMORY ERROR' however that is all it appeared with. I'm using a college provided computer so I am not sure on the exact specs but I know it runs 4GB of memory.
defining count occurences predefined function
def countOccurences(target,array):
counter = 0
for element in array:
if element == target:
counter= counter + 1
print counter
return counter
creating user defined functions for the program
dataInput function used for collecting data from provided file
def dataInput():
import csv
recordArray = []
customerArray = []
f = open('E:\Portable Python 2.7.6.1\Choral Shield Data File(CSV).csv')
csv_f = csv.reader(f)
for row in csv_f:
customerArray.append(row[0])
ticketID = row[1]
day, area = datasplit(ticketID)
customerArray.append(day)
customerArray.append(area)
customerArray.append(row[2])
customerArray.append(row[3])
recordArray.append(customerArray)
f.close
return recordArray
def datasplit(variable):
day = variable[0]
area = variable[1]
return day,area
def dataProcessing(recordArray):
methodArray = []
wed_thursCost = 5
friCost = 10
record = 0
while record < 300:
method = recordArray[record][4]
methodArray.append(method)
record = record+1
school = countOccurences('S',methodArray)
website = countOccurences('W',methodArray)
if school > website:
school = True
elif school < website:
website = True
dayArray = []
record = 0
while record < 300:
day = recordArray[record][1]
dayArray.append(day)
record = record + 1
fridays = countOccurences('F',dayArray)
wednesdays = countOccurences('W',dayArray)
thursdays = countOccurences('T', dayArray)
totalFriCost = fridays * friCost
totalWedCost = wednesdays * wed_thursCost
totalThurCost = thursdays * wed_thursCost
totalCost = totalFriCost + totalWedCost + totalThurCost
return totalCost,school,website
My first attempt to writing to a csv file
def dataExport(recordArray):
import csv
fridayRecords = []
record = 0
customerIDArray = []
ticketIDArray = []
numberArray = []
methodArray = []
record = 0
while record < 300:
if recordArray[record][1] == 'F':
fridayRecords.append(recordArray[record])
record = record + 1
with open('\Courswork output.csv',"wb") as f:
writer = csv.writer(f)
for record in fridayRecords:
writer.writerows(fridayRecords)
f.close
My second attempt at writing to the CSV file
def write_file(recordArray): # write selected records to a new csv file
CustomerID = []
TicketID = []
Number = []
Method = []
counter = 0
while counter < 300:
if recordArray[counter][2] == 'F':
CustomerID.append(recordArray[counter][0])
TicketID.append(recordArray[counter][1]+recordArray[counter[2]])
Number.append(recordArray[counter][3])
Method.append(recordArray[counter][4])
fridayRecords = [] # a list to contain the lists before writing to file
for x in range(len(CustomerID)):
one_record = CustomerID[x],TicketID[x],Number[x],Method[x]
fridayRecords.append(one_record)
#open file for writing
with open("sample_output.csv", "wb") as f:
#create the csv writer object
writer = csv.writer(f)
#write one row (item) of data at a time
writer.writerows(recordArray)
f.close
counter = counter + 1
#Main Program
recordArray = dataInput()
totalCost,school,website = dataProcessing(recordArray)
write_file(recordArray)
In the function write_file(recordArray) in your second attempt the counter variable counter in the first while loop is never updated so the loop continues for ever until you run out of memory.
im uploading a .csv file in django, and im taking groups of 500 lines of the csv and upload them with bulk_create, but im getting this error...
here's what im doing:
personas = []
Aux_USER = []
reader = csv.reader(path)
reader.next()
row_count = sum(1 for row in reader)
reader = csv.reader(path)
reader.next()
for row in reader:
if not Usuario.objects.filter(casillero=if_empty(row[0])).exists():
clave = "%32x" % random.getrandbits(128)
hash = clave[0: 6]
if len(row[4])>30 or len(row[3]) > 30:
ErrorLog.objects.create(casillero=row[4])
else:
usr = User.objects.create(
username=row[1],
first_name=row[3],
last_name=row[4],
password=hash,
email=row[5],
)
if not (row[5] == ""):
# usuario_aux(
usuario_aux.user=usr.id
usuario_aux.clave=hash
usuario_aux.correo=False
# )
# usuario_aux.objects.create(
# user=usr.id,
# clave=hash,
# correo=True
# )
#person_user_email(
Usuario.user=usr.id
Usuario.casillero=if_empty(row[0])
Usuario.cuenta_individual=row[1]
Usuario.integrante=row[2]
Usuario.telefono=row[6]
Usuario.plan_inscripcion=row[9]
Usuario.estado=row[10]
Usuario.municipio=row[11]
Usuario.parroquia=row[12]
Usuario.ciudad=row[13]
Usuario.urbanizacion=row[14]
Usuario.avenida=row[15]
Usuario.tipo_inmueble=row[16]
Usuario.codigo_postal=if_empty(row[17])
Usuario.status=row[29]
usr = None
# )
else:
# usuario_aux(
usuario_aux.user=usr.id
usuario_aux.clave=hash
usuario_aux.correo=False
# )
#usuario_aux.objects.create(user=usr.id, clave=hash)
#person_user_email(
Usuario.user=usr.id
Usuario.casillero=if_empty(row[0])
Usuario.cuenta_individual=row[1]
Usuario.integrante=row[2]
Usuario.telefono=row[6]
Usuario.plan_inscripcion=row[9]
Usuario.estado=row[10]
Usuario.municipio=row[11]
Usuario.parroquia=row[12]
Usuario.ciudad=row[13]
Usuario.urbanizacion=row[14]
Usuario.avenida=row[15]
Usuario.tipo_inmueble=row[16]
Usuario.codigo_postal=if_empty(row[17])
Usuario.status=row[29]
usr = None
#)
personas.append(Usuario)
Aux_USER.append(usuario_aux)
n_objects = len(personas)
if row_count < 500:
Usuario.objects.bulk_create(personas)
usuario_aux.bulk_create(Aux_USER)
print "listo"
personas = []
if n_objects == 500:
Usuario.objects.bulk_create(personas)
usuario_aux.bulk_create(Aux_USER)
print "500 creados"
row_count -= 500
personas = []
when i see the admin, it only create "488" users, and never shows the print "listo" why is this happening?
The create() method saves the record as you create it. That's a nice convenience in many cases, but it completely defeats the purpose of using bulk_create().
I don't think that's the only problem with this code, but it explains how you're saving 488 records without printing "listo". As you refactor this, I'd recommend focusing on processing the first 10 records in your CSV correctly and then worry about optimizing for performance for the whole set.
Unsure of why I am getting this error. I'm reading from a file called columns_unsorted.txt, then trying to write to columns_unsorted.txt. There error is on fan_on = string_j[1], saying list index out of range. Here's my code:
#!/usr/bin/python
import fileinput
import collections
# open document to record results into
j = open('./columns_unsorted.txt', 'r')
# note this is a file of rows of space-delimited date in the format <1384055277275353 0 0 0 1 0 0 0 0 22:47:57> on each row, the first term being unix times, the last human time, the middle binary indicating which machine event happened
# open document to read from
l = open('./columns_sorted.txt', 'w')
# CREATE ARRAY CALLED EVENTS
events = collections.deque()
i = 1
# FILL ARRAY WITH "FACTS" ROWS; SPLIT INTO FIELDS, CHANGE TYPES AS APPROPRIATE
for line in j: # columns_unsorted
line = line.rstrip('\n')
string_j = line.split(' ')
time = str(string_j[0])
fan_on = int(string_j[1])
fan_off = int(string_j[2])
heater_on = int(string_j[3])
heater_off = int(string_j[4])
space_on = int(string_j[5])
space_off = int(string_j[6])
pump_on = int(string_j[7])
pump_off = int(string_j[8])
event_time = str(string_j[9])
row = time, fan_on, fan_off, heater_on, heater_off, space_on, space_off, pump_on, pump_off, event_time
events.append(row)
You are missing the readlines function, no?
You have to do:
j = open('./columns_unsorted.txt', 'r')
l = j.readlines()
for line in l:
# what you want to do with each line
In the future, you should print some of your variables, just to be sure the code is working as you want it to, and to help you identifying problems.
(for example, if in your code you would print string_j you would see what kind of problem you have)
Problem was an inconsistent line in the data file. Forgive my haste in posting