Based on other posts I understand Django doesn't have a memory leak issue but I have a web application where when I specific routine is called, a lot of memory is getting used but not all of it is freed up afterwards. I don't know if that is the correctly terminology but if I track the mem_used_perc on AWS while only calling this routine on the webpage I see the memory usage increase and not return to previous levels.
It is a recursive routine that I call which can iterate up to 7 times. This is the code:
def autosearch(self, phase=1, report="", num = 10):
"""
This is an ES search following specific rules to identify and populate
the lead notifications
"""
if phase == 1:
self.referred_to.clear()
if self.no_of_providers:
num = self.no_of_providers
else:
num = 10
sqs = OrganisationDocument.search()
service_type = None
# filter by care type
if self.type_of_care_care_home:
service_type = "service_care_home"
elif self.type_of_care_home_care:
service_type = "service_home_care"
elif self.type_of_care_live_in_care:
service_type = "service_live_in_care"
elif self.type_of_care_retirement_village:
service_type = "service_retirement_village"
if service_type == "service_retirement_village":
sqs = sqs.query(Q("multi_match", query=True, fields=service_type))
elif service_type:
sqs = sqs.query(
Q("multi_match", query=True, fields=service_type)
& Q("match", care_over_65=True)
)
else:
sqs = sqs.query(Q("match", care_over_65=True))
if self.budget_type:
ranges = self.filter_by_budget_range(phase)
sqs = sqs.query(Q("bool", should=list(ranges)))
# filter on location and distance
if self.radius:
radius = self.radius
else:
radius = 5
"""Increase radius by 2 or 10% for phase 2, 5, and 6"""
if phase in [2, 6]:
if radius < 20:
radius += 2
else:
radius *= 1.1
sqs = sqs.query(
"geo_distance",
distance=f"{radius}mi",
location={
"lat": self.searcharea_set.all()[0].lat,
"lon": self.searcharea_set.all()[0].lng,
},
)
# Filter by care_category_type
categories = []
if self.care_need_category_residential:
categories.append("care_residential")
if self.care_need_category_nursing:
categories.append("care_nursing")
if self.care_need_category_dementia:
categories.append("care_dementia")
if self.care_need_category_nursing_dementia:
pass
if self.care_need_category_personal_care:
categories.append("care_residential")
if self.care_need_category_respite_care:
categories.append("care_respite")
if self.care_need_palliative:
pass
if self.care_need_end_of_life:
pass
if self.care_need_retirement_housing:
categories.append("retirement_living_housing")
if self.care_need_retirement_village:
categories.append("retirement_living_village")
if self.care_need_retirement_community:
categories.append("retirement_living_community")
if self.care_need_retirement_full_time:
pass
query = []
for category in categories:
if self.type_of_care_live_in_care or self.type_of_care_home_care:
if category == "care_residential":
category = "regulated_personal_care"
if category == "care_nursing":
query.append(
Q(
Q("match", regulated_nursing_care=True)
| Q("match", care_nursing=True)
)
)
else:
query.append(Q("match", **{f"{category}": True}))
if self.type_of_care_retirement_village:
sqs = sqs.query("bool", should=list(query))
else:
sqs = sqs.filter(Q("bool", must=query))
# CQC Regulator filter
sqs = sqs.query(
Q(
Q("match", cqc_rating_overall=1)
| Q("match", cqc_rating_overall=2)
| Q("match", cqc_rating_overall=3)
| Q("match", cqc_rating_overall=99)
)
)
# filter on profile
if phase >= 4:
sqs = sqs.query(Q("match", has_thumbnail_image=1))
# Exclude Standard profiles in Brand with Premium Profile
prems = [x.id for x in self.referred_to.all() if x.is_premium]
sqs = sqs.query(~Q('bool', brand_link=list(prems)))
else:
sqs = sqs.query(Q("match", is_premium=1))
# filter on budget
if self.budget_type:
ranges = self.filter_by_budget_range(phase)
sqs = sqs.query(Q("bool", should=list(ranges)))
# funding method
if self.funding_method == choices.LOCAL_AUTHORITY:
sqs = sqs.query(~Q("match", fees_local_authority_funded=False))
# sqs = sqs.query('match', **{'fees_local_authority_funded': True})
elif self.funding_method == choices.SELF_FUNDING:
sqs = sqs.query("match", self_funding_clients=True)
elif self.funding_method == choices.CONTINUING_HEALTHCARE:
sqs = sqs.query(Q("match", fees_continuous_health_top_up=True))
elif self.funding_method == choices.TOP_UP:
sqs = sqs.query(~Q("match", fees_family_top_up=False))
# Red crossed
sqs = sqs.query(~Q("match", autumna_flag=2))
# amber flagged
if phase < 7:
sqs = sqs.query(~Q("match", autumna_flag=1))
# email only
if not self.may_contact_provider:
sqs = sqs.query(~Q("match", leads_accepted=1))
sqs = sqs.query(~Q("match", leads_accepted=2))
# no permission
if not self.may_contact_phone:
sqs = sqs.query(~Q("match", leads_accepted=1))
# timescales
if self.timescales in ["ASAP", "2-4 weeks"]:
sqs = sqs.query(
Q(
Q("match", availability_overall=1)
| Q("match", availability_overall=2)
)
)
sqs.sort("-is_premium", "-has_thumbnail_image", "-is_claimed", self.sort_geo_location_dict())
sqs = sqs[:num]
report += f"<p>Phase {phase}:"
added_count = 0
for organisation in sqs.to_queryset():
if not organisation in self.referred_to.all():
added_count += 1
self.referred_to.add(organisation)
report += f"{organisation}, "
report += "</p>"
if added_count >= num or phase >= 7:
self.autosearch_interim_report = report
self.save()
else:
phase += 1
num -= added_count
return self.autosearch(phase, report, num)
return True
Is there any reason this should cause the symptoms I am seeing and how do I fix it?
EDIT
The function is invoked from admin with the following:
def autosearch(self, request, pk):
"""
Populates the lead notifications
"""
lead = get_object_or_404(models.Lead, pk=pk)
# first, add organisations to favourites
result = lead.autosearch()
logger.debug(f'Auto search result: {result}')
return redirect(reverse("admin:lead_management_lead_change", args=(pk,)))
EDIT
I am using Memcache which is defined as follows:
CACHES = {
"default": {
"BACKEND": "django.core.cache.backends.memcached.MemcachedCache",
"LOCATION": os.environ.get("CACHE_LOCATION", "127.0.0.1:11211"),
}
}
So locally I run it on the machine but in production I am using an environment variable in Elastic Beanstalk to define CACHE_LOCATION which is a string something like: my-site-name-prod.abcdef.cfg.euw2.cache.amazonaws.com:11211
EDIT
def filter_by_budget_range(self, phase):
"""
How the function works:
- case 1: When a given value of min and max budget
- It simply filter organisations between this range.
- case 2: When a given value is only max budget
- From given max budget we derived the min budget
- min_budget = max_budget - max_budget * 0.2
- e.g max_budget = 100, then the min_budget = 100 - 100 * 0.2 => 80
- case 3: When a given value is only min budget
- From given min budget we derived the max budget
- max_budget = min_budget + min_budget * 0.2
- e.g min_budget = 100, then the max_budget = 100 + 100 * 0.2 => 120
"""
_budget_min, _budget_max = self.min_max_handler(phase)
if _budget_max is not None:
if self.type_of_care_live_in_care or self.type_of_care_care_home:
yield {
"range": {
"fees_weekly_residential_costs_from": {
"gte": _budget_min * 100,
"lte": _budget_max * 100,
}
}
}
else:
yield {
"range": {
"fees_hourly_start_from": {
"gte": _budget_min * 100,
"lte": _budget_max * 100,
}
}
}
There is nothing in the provided code that could explain a memory leak.
The issue must come from somewhere else (possibly self.filter_by_budget_range(phase)).
More often than not, memory leaks in Django would come from side-effects when using objects that are created at server startup, and that you keep feeding with new data without even realizing it, or without being aware that the object is bound to the server and not to single requests.
For instance, if you have something like that:
class Foobar(models.Model):
...
baz = ""
...
def barfoo(self, baz):
...
self.baz += baz
...
For every request where you call obj.barfoo(some_string), Foobar.baz will keep growing until the server is restarted.
Similarily, in the following example:
def foobar(baz=[]):
...
baz.append(something)
...
Where the function foobar is created once at server startup. Every time you call foobar() with the default argument, baz keeps growing until the server is restarted.
These two examples are of course silly, it's just to show simple cases of side-effects affecting the memory.
Another possible issue would be if you cache some stuff using a memory-based backend such as Memcached.
If you have no idea what it could be, your best bet would probably be to try to reproduce the issue in development and use pdb (or even django-pdb) to inspect the memory, either while running the development server or directly in the shell (which could be more handy if you can reproduce in the shell).
Does it help if you manually run gc.collect in your code?
https://docs.python.org/3/library/gc.html
collect(): This method free the non referenced object in the list that is maintained by the Collector. Some non-referenced objects are not immediately free automatically due to their implementation.
==============================================
Are you sure your issue is with the memory not being freed up again?
Are you having any memory issues?
In linux it is typical for memory to remain used even after the program has freed it again, since, wat good is your RAM if you keep it empty all the time.
The OS will keep things cached in RAM even if your application doesn't really need it at the moment, because it might benefit from increased performance when later on the application wants that data again and it is still in your ram.
This is called: caching
I tried looking up the documentation of the mem_used[_percent] call and it was not really clear to me if that meant in use memory (by applications + caches) or memory used by applications
https://docs.aws.amazon.com/AmazonCloudWatch/latest/monitoring/metrics-collected-by-CloudWatch-agent.html
Do you have any issues with free memory? is your server going out of memory?
e.g. is mem_available and mem_free going back up after the call is done or not?
What you can typically see is memory being used by an application, then freed, but cashed by the os.
So mem_free goes back up, but it is cashed in memory, so mem_cached is going up, and mem_used stays the same.
This could explain what you are seeing without having a memory leak.
e.g. on my current system what I am seeing when I do cat /proc/meminfo I see almost no memory Free, but most of it (68%) is taken up by caches wich will be cleared as soon as an application needs memory.
1 │ MemTotal: 16068712 kB
2 │ MemFree: 178332 kB
3 │ MemAvailable: 10290764 kB
4 │ Buffers: 812504 kB
5 │ Cached: 10203120 kB
6 │ SwapCached: 1596 kB
7 │ Active: 4852664 kB
8 │ Inactive: 10034536 kB
Related
I'm working in this project to automate updates in Cloud Scheduler Jobs with Python.
I already wrote the logic in Python but I'm facing one problem, it looks like that to update a Cloud Scheduler job with Python is similar to create a job, you have to past most of the properties of the job in the code, that is the problem, I only want to update the retry_config, nothing else. I want to leave the schedule and the target as it is, so I don't have to past those again every time.
Of course I can get the current schedule and target of the job using another class as GetJobRequest for example, that wouldn't be a problem, but I wish I didn't have to, since I don't want to update those fields.
Help?
from google.cloud import scheduler_v1
from google.protobuf import duration_pb2
client = scheduler_v1.CloudSchedulerClient()
retry_config = scheduler_v1.RetryConfig()
retry_config.retry_count = 4
retry_config.max_doublings = 4
retry_config.min_backoff_duration = duration_pb2.Duration(seconds=5)
retry_config.max_backoff_duration = duration_pb2.Duration(seconds=60)
job = scheduler_v1.Job()
job.name = f"projects/{PROJECT_ID}/locations/{DATAFLOW_REGION}/jobs/test"
job.retry_config = retry_config
job.schedule = "* * * * 1"
method = scheduler_v1.HttpMethod(2)
target = scheduler_v1.HttpTarget()
target.uri = "https://xxxx"
target.http_method = method
job.http_target = target
request = scheduler_v1.UpdateJobRequest(
job=job
)
response = client.update_job(request=request)
print(response)
It is possible to specify the properties that need to be changed using the update_mask parameter.
The final code will be as follows:
from google.cloud import scheduler_v1
from google.protobuf import duration_pb2, field_mask_pb2
client = scheduler_v1.CloudSchedulerClient()
retry_config = scheduler_v1.RetryConfig()
retry_config.retry_count = 4
retry_config.max_doublings = 4
retry_config.min_backoff_duration = duration_pb2.Duration(seconds=5)
retry_config.max_backoff_duration = duration_pb2.Duration(seconds=60)
job = scheduler_v1.Job()
job.name = f"projects/{PROJECT_ID}/locations/{DATAFLOW_REGION}/jobs/test"
job.retry_config = retry_config
update_mask = field_mask_pb2.FieldMask(paths=['retry_config'])
request = scheduler_v1.UpdateJobRequest(
job=job,
update_mask=update_mask
)
response = client.update_job(request=request)
print(response)
I am writing a lambda function that has an array of words that I want to put into a slotType, basically updating it every time. Here is how it goes. Initially, the slotType has values ['car', 'bus']. Next time I run the lambda function the values get updated to ['car', 'bus', 'train', 'flight'] which is basically after appending a new array into the old one.
I want to know how I publish the bot every time the Lambda function gets invoked so the next time I hit the lex bot from the front-end, it uses the latest slotType in the intent and newly published bot alias. Yep, also the alias!
I know for a fact that the put_slot_type() is working because the slot is getting updated in the bot.
Here is the function which basically takes in new labels as parameters.
def lex_extend_slots(new_labels):
print('entering lex model...')
lex = boto3.client('lex-models')
slot_name = 'keysDb'
intent_name = 'searchKeys'
bot_name = 'photosBot'
res = lex.get_slot_type(
name = slot_name,
version = '$LATEST'
)
current_labels = res['enumerationValues']
latest_checksum = res['checksum']
arr = [x['value'] for x in current_labels]
labels = arr + new_labels
print('arry: ', arr)
print('new_labels', new_labels)
print('labels in lex: ', labels)
labels = list(set(labels))
enumerationList = [{'value': label, 'synonyms': []} for label in labels]
print('getting ready to push enum..: ', enumerationList)
res_slot = lex.put_slot_type(
name = slot_name,
description = 'updated slots...',
enumerationValues = enumerationList,
valueSelectionStrategy = 'TOP_RESOLUTION',
)
res_build_intent = lex.create_intent_version(
name = intent_name
)
res_build_bot = lex.create_bot_version(
name = bot_name,
checksum = latest_checksum
)
return current_labels
It looks like you're using Version 1 of the Lex Models API on Boto3.
You can use the put_bot method in the lex-models client to effectively create or update your Lex bot.
The put_bot method expects the full list of intents to be used for building the bot.
It is worth mentioning that you will first need to use put_intent to update your intents to ensure they use the latest version of your updated slotType.
Here's the documentation for put_intent.
The appropriate methods for creating and updating aliases are contained in the same link that I've shared above.
Ray saves a bunch of checkpoints during a call of agent.train(). How do I know which one is the checkpoint with the best agent to load?
Is there any function like tune-analysis-output.get_best_checkpoint(path, mode="max") to explore different loading possibilities over the checkpoints?
As answered in https://discuss.ray.io/t/ray-restore-checkpoint-in-rllib/3186/2 you can use:
analysis = tune.Analysis(experiment_path) # can also be the result of `tune.run()`
trial_logdir = analysis.get_best_logdir(metric="metric", mode="max") # Can also just specify trial dir directly
checkpoints = analysis.get_trial_checkpoints_paths(trial_logdir) # Returns tuples of (logdir, metric)
best_checkpoint = analysis.get_best_checkpoint(trial_logdir, metric="metric", mode="max")
See https://docs.ray.io/en/master/tune/api_docs/analysis.html#id1
analysis = tune.run(
"A2C",
name = model_name,
config = config,
...
checkpoint_freq = 5,
checkpoint_at_end = True,
restore = best_checkpoint
)
trial_logdir = analysis.get_best_logdir(metric="episode_reward_mean", mode="max")
best_checkpoint = analysis.get_best_checkpoint(trial_logdir, metric="episode_reward_mean", mode="max")
I have this function that uses PrettyTables to gather information about the Virtual Machines owned by a user. Right now, it only shows information and it works well. I have a new idea where I want to add a button to a new column which allows the user to reboot the virutal machine. I already know how to restart the virtual machines but what I'm struggling to figure out is the best way to create a dataset which i can iterate through and then create a HTML table. I've done similar stuff with PHP/SQL in the past and it was straight forward. I don't think I can iterate through PrettyTables so I'm wondering what is my best option? Pretty tables does a very good job of making it simple to create the table (as you can see below). I'm hoping to use another method, but also keep it very simple. Basically, making it relational and easy to iterate through. Any other suggestions are welcome. Thanks!
Here is my current code:
x = PrettyTable()
x.field_names = ["VM Name", "OS", "IP", "Power State"]
for uuid in virtual_machines:
vm = search_index.FindByUuid(None, uuid, True, False)
if vm.summary.guest.ipAddress == None:
ip = "Unavailable"
else:
ip = vm.summary.guest.ipAddress
if vm.summary.runtime.powerState == "poweredOff":
power_state = "OFF"
else:
power_state = "ON"
if vm.summary.guest.guestFullName == None:
os = "Unavailable"
else:
os = vm.summary.guest.guestFullName
x.add_row([vm.summary.config.name, os, ip, power_state])
table = x.get_html_string(attributes = {"class":"table table-striped"})
return table
Here is a sample of what it looks like and also what I plan to do with the button. http://prntscr.com/nki3ci
Figured out how to query the prettytable. It was a minor addition without having to redo it all.
html = '<table class="table"><tr><th>VM Name</th><th>OS</th><th>IP</th><th>Power
State</th></tr>'
htmlend = '</tr></table>'
body = ''
for vmm in x:
vmm.border = False
vmm.header = False
vm_name = (vmm.get_string(fields=["VM Name"]))
operating_system = (vmm.get_string(fields=["OS"]))
ip_addr = ((vmm.get_string(fields=["IP"])))
body += '<tr><td>'+ vm_name + '</td><td>' + operating_system + '</td> <td>'+ ip_addr +'</td> <td>ON</td></tr>'
html += body
html += htmlend
print(html)
Now I'm developing a project about softlayer api. I wan't to get the os list by softlayer api. Just like the portal site. Is there certain method to get correct os list ? regards~
Is there a specific language example you are looking for? If you use the SoftLayer CLI you can do this with the following command
slcli vs create-options # For Virtual Guests
slcli server create-options # For Bare Metal Servers
Unfortunately, it's not possible to retrieve the same result than Control Portal making a single call, but it's possible using a programming language.
To see programming languages supported by SoftLayer:
SoftLayer Development Network
Take a look the following python script:
"""
List OSs for VSI similar than Portal
See below references for more details.
Important manual pages:
http://sldn.softlayer.com/reference/services/SoftLayer_Product_Package/getItemPrices
http://sldn.softlayer.com/article/object-filters
http://sldn.softlayer.com/article/object-Masks
#License: http://sldn.softlayer.com/article/License
#Author: SoftLayer Technologies, Inc. <sldn#softlayer.com>
"""
import SoftLayer
import datetime
import time
# Your SoftLayer's username and api Key
USERNAME = 'set me'
API_KEY = 'set me'
# Package id
packageId = 46
# Datacenter
datacenter = 'wdc04'
# Computing INstance
core = '1 x 2.0 GHz Core'
# Creating service
client = SoftLayer.Client(username=USERNAME, api_key=API_KEY)
packageService = client['SoftLayer_Product_Package']
# Declaring filters and mask to get additional information for items
filterDatacenter = {"itemPrices": {"pricingLocationGroup": {"locations": {"name": {"operation": datacenter}}}}}
objectMaskDatacenter = 'mask[pricingLocationGroup[locations]]'
objectMask = 'mask[pricingLocationGroup[locations],categories,item[id, description, capacity,softwareDescription[manufacturer],availabilityAttributeCount, availabilityAttributes[attributeType]]]'
filterInstance = {
'itemPrices': {
'categories': {
'categoryCode': {
'operation': 'os'
}
}
}
}
# Define a variable to get capacity
coreCapacity = 0
# To get item id information
itemId = 0
flag = False
# Define the manufacturers from which you like to get information
manufacturers = ["CentOS", "CloudLinux", "CoreOS", "Debian", "Microsoft", "Redhat", "Ubuntu"]
# Declare time to avoid list OS expired
now = time.strftime("%m/%d/%Y")
nowTime = time.mktime(datetime.datetime.strptime(now, "%m/%d/%Y").timetuple())
try:
conflicts = packageService.getItemConflicts(id=packageId)
itemPrices = packageService.getItemPrices(id=packageId, filter=filterDatacenter, mask=objectMask)
if len(itemPrices) == 0:
filterDatacenter = {"itemPrices":{"locationGroupId":{"operation":"is null"}}}
itemPrices = packageService.getItemPrices(id=packageId, filter=filterDatacenter, mask=objectMask)
for itemPrice in itemPrices:
if itemPrice['item']['description'] == core:
itemId = itemPrice['item']['id']
coreCapacity = itemPrice['item']['capacity']
result = packageService.getItemPrices(id=packageId, mask=objectMask, filter=filterInstance)
filtered_os = []
for item in result:
for attribute in item['item']['availabilityAttributes']:
expireTime = time.mktime(datetime.datetime.strptime(attribute['value'], "%m/%d/%Y").timetuple())
if ((attribute['attributeType']['keyName'] == 'UNAVAILABLE_AFTER_DATE_NEW_ORDERS') and (expireTime >= nowTime)):
filtered_os.append(item)
if item['item']['availabilityAttributeCount'] == 0:
filtered_os.append(item)
for manufacturer in manufacturers:
print(manufacturer)
for itemOs in filtered_os:
for conflict in conflicts:
if (((itemOs['item']['id'] == conflict['itemId']) and (itemId == conflict['resourceTableId'])) or ((itemId == conflict['itemId']) and (itemOs['item']['id'] == conflict['resourceTableId']))):
flag = False
break
else:
flag = True
if flag:
if itemOs['item']['softwareDescription']['manufacturer'] == manufacturer:
if 'capacityRestrictionMinimum' in itemOs:
if((itemOs['capacityRestrictionMinimum'] <= coreCapacity) and (coreCapacity <= itemOs['capacityRestrictionMaximum'])):
print("%s Price Id: %s Item Id: %s" % (itemOs['item']['description'], itemOs['id'], itemOs['item']['id']))
else:
print("%s Price Id: %s Item Id: %s" % (itemOs['item']['description'], itemOs['id'], itemOs['item']['id']))
print("---------------------------------------------------")
except SoftLayer.SoftLayerAPIError as e:
print('Unable to get Item Prices faultCode=%s, faultString=%s'
% (e.faultCode, e.faultString))
I added core variable, because the OSs have restriction for capacity of cores. Also I added datecenter to get the specific core item price for a specifc datacenter, perhaps it's something innecesary, but you can edit this script according your requirements.
The same idea could be applied for others programming languges.
I hope it helps, please let me know any doubt, comments or if you need further assistance.
Updated
I improved the script, I added the ability to check conflicts between items, in order to get the same result for each kind of Computing Instance