Memory leak - python - counter - lists in dicts - list

file_dict = {}
for calli, callj in itertools.product(feats,feats):
keys = seqd.keys()
if (not calli in keys) | ((not callj in keys)):
continue
else:
lst = []
##### this is the problematic part !!!!!!!!!
for jj, ii in itertools.product(seqd[callj], seqd[calli]):
if (jj - ii) > 0:
lst.append(1. / (jj - ii))
del jj,ii
entry = sum(lst)
del lst
file_dict[str(calli) + " " + str(callj) + " distance"] = entry
I am using the above snippet in some code and iterating over files. I have a memory leak of some sort. If I just comment out those 4 lines which are in the loop I highlighted, my code stays at constant RAM ~100mb. However, when I uncomment this, it goes up to ~8-9GB super fast. Please help!!

For context, I am replicating the algorithm on page 5 of this paper, generating a system call dependency graph.
Here is the full code fix. It basically involved using a list comprehension within a sum, but without putting brackets around the comprehension. That way items were summed as they were generated instead of first building up the list...
Here is the code:
def graph_maker_dict(feats, calls):
# get dictionary of present calls and list of indexes where they appear
seqd = defaultdict(list)
for v, k in enumerate(calls):
seqd[k].append(v)
# run calculations with list comprehensions
file_dict = {}
for calli, callj in itertools.product(feats,feats):
keys = seqd.keys()
if (not calli in keys) or ((not callj in keys)):
continue
else:
entry = sum(1. / (jj - ii) for jj in seqd[callj] for ii in seqd[calli] if (jj - ii) > 0)
file_dict[calli[:2] + " " + callj[:2] + " distance"[:2]] = entry
return file_dict

Related

Implementation of Karger's Algorithm in Python Taking too Long

Wondering if you can help me understand where the critical flaw may be with my attempt at implementing Karger's algorithm in python. My program appears to take far too long to run and my computer starts to overwork running large sets of vertices. The purpose of the program is to output the minimum cut of the graph.
from random import choice
from statistics import mode
import math
fhand = open("mincuts.txt", "r")
vertices = fhand.readlines()
d = {}
for index,line in enumerate(vertices):
d["{0}".format(index+1)] = line.split()
def randy(graph, x):
y = str(choice(list(graph)))
if x == y:
y = randy(graph, x)
return y
count = 0
def contract(graph):
global count
if len(graph) == 2:
a = list(graph.keys())[0]
b = list(graph.keys())[1]
for i in range(1, len(graph[a])):
if graph[a][i] in graph[b]:
count = count + 1
#print(graph)
return
x = str(choice(list(graph)))
y = randy(graph, x)
#print(x)
#print(y)
graph[x] = graph[x] + graph[y]
graph.pop(y)
#remove self loops
for key in graph:
#method to remove duplicate entries in the arrays of the vertices. Source: www.w3schools.com
graph[key] = list(dict.fromkeys(graph[key]))
contract(graph)
N = len(d)
runs = int(N*N*(math.log(N)))
outcomes = []
for i in range(runs):
e = d.copy()
count = 0
contract(e)
outcomes.append(count)
print(outcomes)
#returns most common minimum cut value
print(mode(outcomes))
Below is a link to the graph I am running in mincuts.txt:
https://github.com/BigSoundCode/Misc-Algorithm-Implementations/blob/main/mincuts.txt

Getting only one answer from a list python

I may have a noob question, but here it goes. I need to parse through a list and I need the return to be specific. Here is the code:
a = 11
bla = [1,2,3,4,5,6,7,8,9,10]
lista = []
for element in bla:
if element == a:
lista.append(element)
else:
lista.append('not found')
print lista
This way - the return is 10 times - 'not found' - and I need it only one time.
lista = ['not found']
Any ideas?
Use python's built in lst.count()
a = 11
bla = [1,2,3,4,5,6,7,8,9,10]
# get number of times a occurs in bla
cnt = bla.count(a)
# if cnt isn't zero then lista becomes a repeated cnt times, otherwise just not found
lista = [a] * cnt if cnt >= 1 else ["not found"]
print(lista)
>>>["not found"]
Edit:
Jean-François Fabre suggested are more pythonic version for building lista:
lista = [a] * cnt or ["not found"]
Edit 2:
If you want to do something similar for dictionaries you only have to implement the count function for yourself. I turned user into a string literal since making it a list and doing user[0] each time is pointless:
user = 'john'
user_details = [{u'UserName': u'fred', u'LastSeen': u'a'}, {u'UserName': u'freddy', u'LastSeen': u'b'}]
# get the count
cnt = sum(i['UserName'] == user for i in user_details)
#do what we did before
lista = [user] * cnt or ["not found"]
Edit 3:
Finally if you want to use a for loop, check if it's not found at the end instead of at each step:
user = ['john']
user_details = [{u'UserName': u'fred', u'LastSeen': u'a'},{u'UserName': u'freddy', u'LastSeen': u'b'}]
lis = []
for element in user_details:
if element['UserName'] == user[0]:
lis.append(element)
if not lis: lis.append("not found")

Made two identical lists in Python - my function is not updating the one I intend to

This is my first post in Stack Overflow. It's my pleasure to say hello to everyone, after all these years on being merely a lurker!
I am currently learning to code in my free time (specifically, Python). While developing my second project, I got stuck with a function that refuses to work as I intended. Since I can't find any documentation that helps me to find why, I decided to aske here. The code is the following:
def GridConstructor(InputGrid, RedPosition, BluePosition, SquareSize, StartingPositionA, StartingPositionB):
CurrentPositionA = StartingPositionA
CurrentPositionB = StartingPositionB
x = 1
for Line in range(19):
y = 1
for Row in range(18):
if [y, x] == BluePosition:
InputGrid[str(x) + ", " + str(y)] = [CurrentPositionA, CurrentPositionB], 1
print CurrentPositionA, CurrentPositionB
elif [y, x] == RedPosition:
InputGrid[str(x) + ", " + str(y)] = [CurrentPositionA, CurrentPositionB], 2
print CurrentPositionA, CurrentPositionB
else:
InputGrid[str(x) + ", " + str(y)] = [CurrentPositionA, CurrentPositionB], 0
print CurrentPositionA, CurrentPositionB
y += 1
CurrentPositionA[1] += SquareSize
CurrentPositionB[1] += SquareSize
x += 1
CurrentPositionA[0] += SquareSize
print CurrentPositionA[0]
CurrentPositionB[0] += SquareSize
print CurrentPositionB[0]
CurrentPositionA[1] = StartingPositionA[1]
print CurrentPositionA[1]
print StartingPositionA[1]
CurrentPositionB[1] = StartingPositionB[1]
print CurrentPositionB[1]
print StartingPositionB[1]
(feel free to ignore the 'print', they're there for debugging purposes).
As you can see it's a pretty simple function to create a dictionary storing [x, y] coordinates in a grid pattern. Judging by the debugging, the problem seems to be that at the moment of adding 'SquareSize' to 'CurrentPositionA' and 'CurrentPositionB' in the second 'for' loop, the value of 'SquareSize' is for some reason added to 'StartingPositionA' and 'StartingPositionB', which means that the y value of both current position trackers do not reset to its original value at the end of each iteration of the first for loop. I have absolutely no idea why the function is doing that, and I can't find any documentation that can help me. I'm sure it is some very stupid, elementary error in action, but I'd be very grateful if someone can help me with this.
Cheers!
The issue is occurring because of these two assignment statements.
CurrentPositionA = StartingPositionA
CurrentPositionB = StartingPositionB
Let me explain the problem
list1 = [1,2]
list2 = list1
list1[1] = 3
print(list1)
print (list2)
print (id(list1))
print (id(list2))
output:-
[1, 3]
[1, 3]
139674729442312
139674729442312
This occurs because list1,list2 variables point to same memory location, there by changing content in one variable changes content in other variable
which is not what you expected.
To avoid this problem you need to deepcopy your objects rather than just assignment.
import copy
list1 = [1,2]
list2 = copy.deepcopy(list1)
list1[1] = 3
print(list1)
print (list2)
print (id(list1))
print (id(list2))
output:-
[1, 3]
[1, 2]
139997249899528
139997259457928
basically replace CurrentPositionA, CurrentPositionB assignments with below commands and see it it works(after importing copy).
CurrentPositionA = copy.deepcoy(StartingPositionA)
CurrentPositionB = copy.deepcopy(StartingPositionB)

python2.7: TypeError: unhashable type: 'list'

I have tried to find an answer to this in vain, so here goes:
The goal is to have a dictionary that has a few lists as values, and then have a function that (depending on user input) will take one of those lists and combine it with other lists, and finally I should get the final list printed.
Seems simple enough but what I get is a type error (lists being unhashable). The combine2 function seems to be working perfectly fine with any other two lists I try to feed it, except for when it tries to get a list that is a dictionary value (??). Does anybody know what I'm doing wrong?
dic = {
'reptiles': ['lizzard', 'crocodile', 'T-Rex'],
'birds': ['canary', 'parrot', 'seagul'],
'mammals': ['monkey', 'cat', 'dog'],
'insects': ['ant', 'bee', 'wasp']
}
FishList = ['goldfish', 'shark', 'trout']
def combine2 (a, b): # returns the combinations of 2 lists' items
tmp = []
n = 0
while n < len(a):
for i in b:
if 8 <= len(str(a[n])+str(i)) and 16 >= len(str(a[n])+str(i)):
tmp.append(str(a[n]) + str(i))
n += 1
return tmp
def animals_mix(k, l): # just some arbitrary combinations of lists
list1 = combine2(FishList, dic[k])
list2 = combine2(list1, dic[k])
list3 = combine2(dic[k], FishList)
l = dic[k] + list1 + list2 + list3
def animals():
print '''\n\nwhat's your favourite animal group?\n
1) reptiles
2) birds
3) mammals
4) insects
'''
while True:
x = raw_input("[+] pick a number > ")
tmp = []
if x == '1':
animals_mix(dic['reptiles'], tmp)
break
elif x == '2':
animals_mix(dic['birds'], tmp)
break
elif x == '3':
animals_mix(dic['mammals'], tmp)
break
elif x == '4':
animals_mix(dic['insects'], tmp)
break
elif x == '':
break
else:
print "\nError: That wasn't in the list of options\nType one of the numbers or press ENTER to move on\n"
return tmp
print animals()
For "TypeError: unhashable type: 'list'", it is because you are actually passing the list in your dict when you seemingly intend to pass the key then access that list:
animals_mix(dic['reptiles'], tmp)
...
def animals_mix(k, l):
list1 = combine2(FishList, dic[k])
in the first line of animals_mix() you are actually trying to do dic[dic['reptiles']] and dicts can not be keyed by un-hashable types, hence the error.

Using for loops to create multiple lists with lower lists

I have a certain function:
def create4(n):
output = []
for i in range(n):
output.append(range(1+i,1))
return output
I want it to produce:
[[1,2,3,4,5], [2,3,4,5],[3,4,5],[4,5],[5]]
for when I print create4(5). How would I change my for loop make the code produce the desired output?
Add a second for loop. The first for will iterate over each nested list. The second for will add the elements.
def create4(n):
output = []
for i in range(n):
output.append([])
for j in range(i, n):
output[i] += [j + 1]
return output
print create4(5)