I am having trouble storing the ID to keys, like a sub (parent-child) kind of thing. I spent hours on it and could not figure a way to accomplish this. What output I am expecting is at the end of this post. Any help would be great.
import sys
import collections
dict = collections.OrderedDict()
dict["A.1"] = {"parent_child":0}
dict["A.1.1"] = {"parent_child":1}
dict["A.1.1.1"] = {"parent_child":2}
dict["A.1.1.2"] = {"parent_child":2}
dict["A.1.1.3"] = {"parent_child":2}
dict["A.1.2"] = {"parent_child":1}
dict["A.1.2.1"] = {"parent_child":2}
dict["A.1.2.2"] = {"parent_child":2}
dict["A.1.2.2.1"] = {"parent_child":3}
dict["A.1.2.2.2"] = {"parent_child":3}
dict["A.1.2.3"] = {"parent_child":2}
dict["A.1.3"] = {"parent_child":1}
dict["A.1.4"] = {"parent_child":1}
print(dict)
new_dict = {}
p = 0 # previous index
i = 0 # current
n = 1 # next index
current_PC = 0 # current parent_child
next_PC = 0 # next parent_child
previous_id = ""
current_id = ""
next_id = ""
change_current = True
change = True
lst = []
while(True):
if change_current:
current_id = dict.keys()[i]
current_PC = dict.values()[i]["parent_child"]
change_current = False
try:
next_id = dict.keys()[n]
next_PC = dict.values()[n]["parent_child"]
except:
pass # it will go out of index
print("KEY {0}".format(current_id))
if next_PC > current_PC:
if next_PC - current_PC == 1:
lst.append(next_PC)
next_PC += 1
print("next_PC: {0}".format(next_PC))
if next_PC == current_PC:
new_dict[current_id] = lst
lst = []
break
print(new_dict)
Trying to make output looks like this (at in similar way), the new_dict should look like:
new_dict["A.1"] = ["A.1.1", "A.1.2", "A.1.3", "A.1.4"]
new_dict["A.1.1"] = ["A.1.1.1", "A.1.1.2", "A.1.1.3"]
new_dict["A.1.1.1"] = []
new_dict["A.1.1.2"] = []
new_dict["A.1.1.3"] = []
new_dict["A.1.2"] = ["A.1.2.1", "A.1.2.2", "A.1.2.3"]
new_dict["A.1.2.1"] = []
new_dict["A.1.2.2"] = ["A.1.2.2.1", "A.1.2.2.2"]
new_dict["A.1.2.2.1"] = []
new_dict["A.1.2.2.2"] = []
new_dict["A.1.2.3"] = []
new_dict["A.1.3"] = []
new_dict["A.1.4"] = []
This gives you the output you are asking for. Since i did not see a {"parent_child":...} in you desired output i did not proceed with anything else.
options = ["A.1","A.1.1","A.1.1.1","A.1.1.2","A.1.1.3","A.1.2","A.1.2.1","A.1.2.2","A.1.2.2.1","A.1.2.2.2","A.1.2.3","A.1.3","A.1.4"]
new_dict = {}
for i, key in enumerate(options):
new_dict[key] = []
ls = []
for j, opt in enumerate(options):
if (key in opt) and (len(opt)-len(key)==2):
new_dict[key].append(opt)
print(new_dict)
EDIT
Using the comment of #Ranbir Aulakh
options = ["A.1","A.1.1","A.1.1.1","A.1.1.2","A.1.1.3","A.1.2","A.1.2.1","A.1.2.2","A.1.2.2.1","A.1.2.2.2","A.1.2.3","A.1.3","A.1.4"]
new_dict = {}
for i, key in enumerate(options):
new_dict[key] = []
ls = []
for j, opt in enumerate(options):
if (key in opt) and (len(opt.split("."))-len(key.split("."))==1):#(len(opt)-len(key)==2):
new_dict[key].append(opt)
print(new_dict)
Related
I am new to GLPK, so my apologies in advance if I'm missing something simple!
I have a largeish LP that I am feeding through GLPK to model an energy market. I'm running the following command line to GLPK to process this:
winglpk-4.65\glpk-4.65\w64\glpsol --lp problem.lp --data ExampleDataFile.dat --output results2.txt
When I open the resulting text file I can see the outputs, which all look sensible. I have one big problem: each record is split over two rows, making it very difficult to clean the file. See an extract below:
No. Row name St Activity Lower bound Upper bound Marginal
------ ------------ -- ------------- ------------- ------------- -------------
1 c_e_SpecifiedDemand(UTOPIA_CSV_ID_1990)_
NS 0 0 = < eps
2 c_e_SpecifiedDemand(UTOPIA_CSV_ID_1991)_
NS 0 0 = < eps
3 c_e_SpecifiedDemand(UTOPIA_CSV_ID_1992)_
NS 0 0 = < eps
4 c_e_SpecifiedDemand(UTOPIA_CSV_ID_1993)_
NS 0 0 = < eps
5 c_e_SpecifiedDemand(UTOPIA_CSV_ID_1994)_
NS 0 0 = < eps
6 c_e_SpecifiedDemand(UTOPIA_CSV_ID_1995)_
NS 0 0 = < eps
7 c_e_SpecifiedDemand(UTOPIA_CSV_ID_1996)_
NS 0 0 = < eps
8 c_e_SpecifiedDemand(UTOPIA_CSV_ID_1997)_
NS 0 0 = < eps
9 c_e_SpecifiedDemand(UTOPIA_CSV_ID_1998)_
NS 0 0 = < eps
10 c_e_SpecifiedDemand(UTOPIA_CSV_ID_1999)_
NS 0 0 = < eps
11 c_e_SpecifiedDemand(UTOPIA_CSV_ID_2000)_
NS 0 0 = < eps
12 c_e_SpecifiedDemand(UTOPIA_CSV_ID_2001)_
NS 0 0 = < eps
13 c_e_SpecifiedDemand(UTOPIA_CSV_ID_2002)_
NS 0 0 = < eps
14 c_e_SpecifiedDemand(UTOPIA_CSV_ID_2003)_
NS 0 0 = < eps
15 c_e_SpecifiedDemand(UTOPIA_CSV_ID_2004)_
NS 0 0 = < eps
I would be very grateful of any suggestions for either:
How I can get each record in the output text file onto a single row, or
Ideas on how to clean / post-process the existing text file output.
I'm sure I'm missing something simple here, but the output is in a very unhelpful format at the moment!
Thanks!
I wrote a Python parser for the GLPK output file. It is not beautiful and not save (try-catch) but it is working (for pure simplex problems).
You can call it on output file:
outp = GLPKOutput('myoutputfile')
print(outp)
val1 = outp.getCol('mycolvar','Activity')
val2 = outp.getRow('myrowname','Upper_bound') # row names should be defined
The class is as follows:
class GLPKOutput:
def __init__(self,filename):
self.rows = {}
self.columns = {}
self.nRows = 0
self.nCols = 0
self.nNonZeros = 0
self.Status = ""
self.Objective = ""
self.rowHeaders = []
self.rowIdx = {}
self.rowWidth = []
self.Rows = []
self.hRows = {}
self.colHeaders = []
self.colIdx = {}
self.colWidth = []
self.Cols = []
self.hCols = {}
self.wcols = ['Activity','Lower_bound','Upper bound','Marginal']
self.readFile(filename)
# split columns with weird line break
def smartSplit(self,line,type,job):
ret = []
line = line.rstrip()
if type == 'ROWS':
cols = len(self.rowHeaders)
idx = self.rowWidth
else:
cols = len(self.colHeaders)
idx = self.colWidth
if job == 'full':
start = 0
for i in range(cols):
stop = start+idx[i]+1
ret.append(line[start:stop].strip())
start = stop
elif job == 'part1':
entries = line.split()
ret = entries[0:2]
elif job == 'part2':
start = 0
for i in range(cols):
stop = start+idx[i]+1
ret.append(line[start:stop].strip())
start = stop
ret = ret[2:]
# print()
# print("SMART:",job,line.strip())
# print(" TO:",ret)
return ret
def readFile(self,filename):
fp = open(filename,"r")
lines = fp.readlines()
fp.close
i = 0
pos = "HEAD"
while pos == 'HEAD' and i<len(lines):
entries = lines[i].split()
if len(entries)>0:
if entries[0] == 'Rows:':
self.nRows = int(entries[1])
elif entries[0] == 'Columns:':
self.nCols = int(entries[1])
elif entries[0] == 'Non-zeros:':
self.nNonZeros = int(entries[1])
elif entries[0] == 'Status:':
self.Status = entries[1]
elif entries[0] == 'Objective:':
self.Objective = float(entries[3]) #' '.join(entries[1:])
elif re.search('Row name',lines[i]):
lines[i] = lines[i].replace('Row name','Row_name')
lines[i] = lines[i].replace('Lower bound','Lower_bound')
lines[i] = lines[i].replace('Upper bound','Upper_bound')
entries = lines[i].split()
pos = 'ROWS'
self.rowHeaders = entries
else:
pass
i+= 1
# formatting of row width
self.rowWidth = lines[i].split()
for k in range(len(self.rowWidth)): self.rowWidth[k] = len(self.rowWidth[k])
# print("Row Widths:",self.rowWidth)
i+= 1
READY = False
FOUND = False
while pos == 'ROWS' and i<len(lines):
if re.match('^\s*[0-9]+',lines[i]): # new line
if len(lines[i].split())>2: # no linebrak
entries = self.smartSplit(lines[i],pos,'full')
READY = True
else: # line break
entries = self.smartSplit(lines[i],pos,'part1')
READY = False
FOUND = True
else:
if FOUND and not READY: # second part of line
entries += self.smartSplit(lines[i],pos,'part2')
READY = True
FOUND = False
if READY:
READY = False
FOUND = False
# print("ROW:",entries)
if re.match('[0-9]+',entries[0]): # valid line with solution data
self.Rows.append(entries)
self.hRows[entries[1]] = len(self.Rows)-1
else:
print("wrong line format ...")
print(entries)
sys.exit()
elif re.search('Column name',lines[i]):
lines[i] = lines[i].replace('Column name','Column_name')
lines[i] = lines[i].replace('Lower bound','Lower_bound')
lines[i] = lines[i].replace('Upper bound','Upper_bound')
entries = lines[i].split()
pos = 'COLS'
self.colHeaders = entries
else:
pass #print("NOTHING: ",lines[i])
i+= 1
# formatting of row width
self.colWidth = lines[i].split()
for k in range(len(self.colWidth)): self.colWidth[k] = len(self.colWidth[k])
# print("Col Widths:",self.colWidth)
i+= 1
READY = False
FOUND = False
while pos == 'COLS' and i<len(lines):
if re.match('^\s*[0-9]+',lines[i]): # new line
if len(lines[i].split())>2: # no linebreak
entries = self.smartSplit(lines[i],pos,'full')
READY = True
else: # linebreak
entries = self.smartSplit(lines[i],pos,'part1')
READY = False
FOUND = True
else:
if FOUND and not READY: # second part of line
entries += self.smartSplit(lines[i],pos,'part2')
READY = True
FOUND = False
if READY:
READY = False
FOUND = False
# print("COL:",entries)
if re.match('[0-9]+',entries[0]): # valid line with solution data
self.Cols.append(entries)
self.hCols[entries[1]] = len(self.Cols)-1
else:
print("wrong line format ...")
print(entries)
sys.exit()
elif re.search('Karush-Kuhn-Tucker',lines[i]):
pos = 'TAIL'
else:
pass #print("NOTHING: ",lines[i])
i+= 1
for i,e in enumerate(self.rowHeaders): self.rowIdx[e] = i
for i,e in enumerate(self.colHeaders): self.colIdx[e] = i
def getRow(self,name,attr):
if name in self.hRows:
if attr in self.rowIdx:
try:
val = float(self.Rows[self.hRows[name]][self.rowIdx[attr]])
except:
val = self.Rows[self.hRows[name]][self.rowIdx[attr]]
return val
else:
return -1
def getCol(self,name,attr):
if name in self.hCols:
if attr in self.colIdx:
try:
val = float(self.Cols[self.hCols[name]][self.colIdx[attr]])
except:
val = self.Cols[self.hCols[name]][self.colIdx[attr]]
return val
else:
print("key error:",name,"not known ...")
return -1
def __str__(self):
retString = '\n'+"="*80+'\nSOLUTION\n'
retString += "nRows: "+str(self.nRows)+'/'+str(len(self.Rows))+'\n'
retString += "nCols: "+str(self.nCols)+'/'+str(len(self.Cols))+'\n'
retString += "nNonZeros: "+str(self.nNonZeros)+'\n'
retString += "Status: "+str(self.Status)+'\n'
retString += "Objective: "+str(self.Objective)+'\n\n'
retString += ' '.join(self.rowHeaders)+'\n'
for r in self.Rows: retString += ' # '.join(r)+' #\n'
retString += '\n'
retString += ' '.join(self.colHeaders)+'\n'
for c in self.Cols: retString += ' # '.join(r)+' #\n'
return retString
The below script i have to compare Test1 vs Test2.Test1 and Test2 data is mentioned in the bottom .I tried to make it a generic one so that it will work for different devices also.The below script i have to compare Test1 vs Test2.Test1 and Test2 data is mentioned in the bottom .I tried to make it a generic one so that it will work for different devices also
import re
data_cleaned = {}
current_key = ''
action_flag = False
data_group = []
if_found_vlan = True
output = open('./output.txt','r').read()
switch_red = re.findall(r'(\w*-RED\d{0,1})', output)[0]
switch_blue = re.findall(r'(\w*-BLUE\d{0,1})', output)[0]
for line in open('./output.txt'):
m = re.match(r'(\w*-RED\d{0,1}|\w*-BLUE\d{0,1})# sh run vlan \d+', line)
if m:
if not if_found_vlan:
data_cleaned[current_key].append([])
if_found_vlan = False
current_key = m.group(1)
if not data_cleaned.has_key(current_key):
data_cleaned[current_key] = []
continue
mm = re.match(r'vlan \d+', line)
if mm:
if_found_vlan = True
action_flag = True
data_group = []
if action_flag and '' == line.strip():
action_flag = False
data_cleaned[current_key].append(data_group)
if action_flag:
data_group.append(line.replace('\r', '').replace('\n', ''))
if not if_found_vlan:
data_cleaned[current_key].append([])
#print ("+++++++++++++++++ The missing configuration ++++++++++++++\n")
print switch_blue + "#" + " has below missing VLAN config\n "
p = [item for index, item in enumerate(data_cleaned[switch_blue]) if [] != [it for it in item if it not in data_cleaned[switch_red][index]]]
print('\n'.join(['\n'.join(item) for item in p]))
print ("+++++++++++++++++++++++++++++++\n")
print switch_red + "#" + " has below missing VLAN config\n "
q = [item for index, item in enumerate(data_cleaned[switch_red]) if [] != [it for it in item if it not in data_cleaned[switch_blue][index]]]
print('\n'.join(['\n'.join(item) for item in q]))
Update with your raw output, I think use a one-dimensional list to represent your output is not a good way for further handling.
When we handle a data, we first need to clean the data & setup a model easy to handle for further program processing, so I use a dict with a two-dimensional list inside it to model your output, and finally easier to process.
import re
data_cleaned = {}
current_key = ''
action_flag = False
data_group = []
if_found_vlan = True
for line in open('./output.txt'):
m = re.match(r'(Test\d+)# sh run vlan \d+', line)
if m:
if not if_found_vlan:
data_cleaned[current_key].append([])
if_found_vlan = False
current_key = m.group(1)
if not data_cleaned.has_key(current_key):
data_cleaned[current_key] = []
continue
mm = re.match(r'vlan \d+', line)
if mm:
if_found_vlan = True
action_flag = True
data_group = []
if action_flag and '' == line.strip():
action_flag = False
data_cleaned[current_key].append(data_group)
if action_flag:
data_group.append(line.replace('\r', '').replace('\n', ''))
if not if_found_vlan:
data_cleaned[current_key].append([])
print ("+++++++++++++++++ The missing configuration is++++++++++++++\n")
p = [item for index, item in enumerate(data_cleaned['Test2']) if [] != [it for it in item if it not in data_cleaned['Test1'][index]]]
print('\n'.join(['\n'.join(item) for item in p]))
print ("+++++++++++++++++ The missing configuration is++++++++++++++\n")
q = [item for index, item in enumerate(data_cleaned['Test1']) if [] != [it for it in item if it not in data_cleaned['Test2'][index]]]
print('\n'.join(['\n'.join(item) for item in q]))
I am working with ExactTarget FUEL SDK to retrieve data from the SalesForce Marketing Cloud. More specifically I working on calling "Unsub Events"(https://github.com/salesforce-marketingcloud/FuelSDK-Python/blob/master/objsamples/sample_unsubevent.py#L15) but the structure of the SOAP response has couple of deeper nested dictionary objects, which I need to iterate over and place into dataframes. Here is what the response looks like and I need to place each of the variables into seperate dataframe.
(UnsubEvent){
Client =
(ClientID){
ID = 11111111
}
PartnerKey = None
CreatedDate = 2016-07-13 13:37:46.000663
ModifiedDate = 2016-07-13 13:37:46.000663
ID = 11111111
ObjectID = "11111111"
SendID = 11111111
SubscriberKey = "aaa#aaa.com"
EventDate = 2016-07-13 13:37:46.000663
EventType = "Unsubscribe"
TriggeredSendDefinitionObjectID = None
BatchID = 1
List =
(List){
PartnerKey = None
ID = 11111111
ObjectID = None
Type = "aaaa"
ListClassification = "aaa"
}
IsMasterUnsubscribed = False
}]
I have successfully placed all variables in data frames except one "ListClassification". I am getting the error "List instance has no attribute 'ListClassification', my question is why is this happening if I can see the attribute in the response? and is there a fix for the issue?
My Code:
import ET_Client
import pandas as pd
try:
debug = False
stubObj = ET_Client.ET_Client(False, debug)
print '>>>UnsubEvents'
getUnsubEvent = ET_Client.ET_UnsubEvent()
getUnsubEvent.auth_stub = stubObj
getResponse3 = getUnsubEvent.get()
ResponseResultsUnsubEvent = getResponse3.results
#print ResponseResultsUnsubEvent
ClientIDUnsubEvents = []
partner_keys3 = []
created_dates3 = []
modified_date3 = []
ID3 = []
ObjectID3 = []
SendID3 = []
SubscriberKey3 = []
EventDate3 = []
EventType3 = []
TriggeredSendDefinitionObjectID3 = []
BatchID3 = []
IsMasterUnsubscribed = []
ListPartnerKey = []
ListID = []
ListObjectID = []
ListType = []
ListClassification = []
for UnsubEvent in ResponseResultsUnsubEvent:
ClientIDUnsubEvents.append(str(UnsubEvent['Client']['ID']))
partner_keys3.append(UnsubEvent['PartnerKey'])
created_dates3.append(UnsubEvent['CreatedDate'])
modified_date3.append(UnsubEvent['ModifiedDate'])
ID3.append(UnsubEvent['ID'])
ObjectID3.append(UnsubEvent['ObjectID'])
SendID3.append(UnsubEvent['SendID'])
SubscriberKey3.append(UnsubEvent['SubscriberKey'])
EventDate3.append(UnsubEvent['EventDate'])
EventType3.append(UnsubEvent['EventType'])
TriggeredSendDefinitionObjectID3.append(UnsubEvent['TriggeredSendDefinitionObjectID'])
BatchID3.append(UnsubEvent['BatchID'])
IsMasterUnsubscribed.append(UnsubEvent['IsMasterUnsubscribed'])
ListPartnerKey.append(str(UnsubEvent['List']['PartnerKey']))
ListID.append(str(UnsubEvent['List']['ID']))
ListObjectID.append(str(UnsubEvent['List']['ObjectID']))
ListType.append(str(UnsubEvent['List']['Type']))
ListClassification.append(str(UnsubEvent['List']['ListClassification']))
df3 = pd.DataFrame({'ListPartnerKey':ListPartnerKey,'ListID':ListID,'ListObjectID':ListObjectID,'ListType':ListType,
'ClientID':ClientIDUnsubEvents,'PartnerKey':partner_keys3,'CreatedDate':created_dates3,
'ModifiedDate':modified_date3,'ID':ID3,'ObjectID':ObjectID3,'SendID':SendID3,'SubscriberKey':SubscriberKey3,
'EventDate':EventDate3,'EventType':EventType3,'TriggeredSendDefinitionObjectID':TriggeredSendDefinitionObjectID3,
'BatchID':BatchID3,'ListClassification':ListClassification,'IsMasterUnsubscribed':IsMasterUnsubscribed})
print df3
Literally all other attributes are going into the dataframe but not sure why "ListClassification" is not being picked up on.
Thank you in advance for you help!
I am getting the error need more than 1 value to unpack i am using defaultDict to make my list to dictionary.
def getTabsCols(request):
proj_name = request.GET.get('projname')
cursor = connection.cursor()
proj_details = TProjects.objects.get(
attr_project_name=proj_name, attr_project_type='Structure', attr_is_active=1)
pid = proj_details.project_id
query = 'call SP_Get_TABCOL_NAMES('+str(pid)+')'
cursor.execute(query)
proj_details = TProjects.objects.get(
attr_project_name=proj_name, attr_project_type='Structure', attr_is_active=1)
pid = proj_details.project_id
query = 'call SP_Get_TABCOL_NAMES('+str(pid)+')'
cursor.execute(query)
result = cursor.fetchall()
tab_list = []
tab_col_list = []
prd = {}
res = []
for row in result:
tabs = collections.OrderedDict()
schema_name = row[1]
table_name = row[3]
tab = (schema_name + '.' + table_name).encode('utf8')
tabs[tab] = row[5].encode('utf8')
tab_list.append(tabs)
d = collections.defaultdict(set)
for k, v in tab_list:
d1[k].append(v)
d = dict((k, tuple(v)) for k, v in d1.iteritems())
return HttpResponse(d, content_type="text/html")
Does anyone know how I get Django Filter build an OR statement? I'm not sure if I have to use the Q object or not, I thought I need some type of OR pipe but this doesn't seem right:
filter_mfr_method = request.GET.getlist('filter_mfr_method')
for m in filter_mfr_method:
designs = designs.filter(Q(mfr_method = m) | m if m else '')
# Or should I do it this way?
#designs = designs.filter(mfr_method = m | m if m else '')
I would like this to be:
SELECT * FROM table WHERE mfr_method = 1 OR mfr_method = 2 OR mfr_method = 3
EDIT: Here is what Worked
filter_mfr_method = request.GET.getlist('filter_mfr_method')
list = []
for m in filter_mfr_method:
list.append(Q(mfr_method = m))
designs = designs.filter(reduce(operator.or_, list))
What about:
import operator
filter_mfr_method = request.GET.getlist('filter_mfr_method')
filter_params = reduce(operator.or_, filter_mfr_method, Q())
designs = designs.filter(filter_params)
Something I used before:
qry = None
for val in request.GET.getlist('filter_mfr_method'):
v = {'mfr_method': val}
q = Q(**v)
if qry:
qry = qry | q
else:
qry = q
designs = designs.filter(qry)
That is taken from one of my query builders.