Defining mutable matrix and manually setting the values during run - pyomo

So my interest is to generate a hessian and gradient during a run to evaluate a constraint -
def shifted_obj(model):
for i in range(1,1+model.m_set):
M.g_shift[i] =((-1)**(i-1))*math.cos(model.v[i])
for i in range(1,1+model.m_set+1):
for j in range(1,1+model.m_set+1):
if(i==j):
model.H_shift[i,j] = -((-1)**(i-1))*math.sin(model.v[i])
return .5*sum(model.g_shift[ i ] * model.g_shift[ i ] for i in model.m_set) + .5*sum( model.g_shift[i]*model.H_shift[i,j]*model.pk[j] for i in model.m_set for j in model.m_set ) + .5*sum( model.pk[i]*model.H_shift[j,i]*model.g_shift[j] for i in model.m_set for j in model.m_set ) + .5*sum( model.pk[i]*model.H_shift[j,i]*model.H_shift[i,j]*model.pk[j] for i in model.m_set for j in model.m_set )
I set two mutable parameters, and I try to index over them to set them to the correct value but looks like this isn't the way to do this:
ERROR: Rule failed when generating expression for constraint de_constraint2:
TypeError: unsupported operand type(s) for +: 'int' and
'FiniteSimpleRangeSet'
ERROR: Constructing component 'de_constraint2' from data=None failed:
TypeError: unsupported operand type(s) for +: 'int' and
'FiniteSimpleRangeSet'
Traceback (most recent call last):
File "line_search.py", line 199, in <module>
line_search(init_x,eps)
File "line_search.py", line 156, in line_search
instance = M.create_instance()
File "/Users/drvogt/opt/anaconda3/lib/python3.7/site-packages/pyomo/core/base/PyomoModel.py", line 726, in create_instance
profile_memory=profile_memory )
File "/Users/drvogt/opt/anaconda3/lib/python3.7/site-packages/pyomo/core/base/PyomoModel.py", line 783, in load
profile_memory=profile_memory)
File "/Users/drvogt/opt/anaconda3/lib/python3.7/site-packages/pyomo/core/base/PyomoModel.py", line 834, in _load_model_data
self._initialize_component(modeldata, namespaces, component_name, profile_memory)
File "/Users/drvogt/opt/anaconda3/lib/python3.7/site-packages/pyomo/core/base/PyomoModel.py", line 885, in _initialize_component
declaration.construct(data)
File "/Users/drvogt/opt/anaconda3/lib/python3.7/site-packages/pyomo/core/base/constraint.py", line 755, in construct
tmp = _init_rule(_self_parent)
File "line_search.py", line 152, in <lambda>
sufficentprogress_rule = lambda M: sufficentprogress(M) <=0.0
File "line_search.py", line 80, in sufficentprogress
return shifted_obj(model) - extra_term - line_obj(model)
File "line_search.py", line 66, in shifted_obj
for i in range(1,1+model.m_set):
TypeError: unsupported operand type(s) for +: 'int' and 'FiniteSimpleRangeSet'
If someone can tell me how to manually set these fields I would appreciate it. Also do I have to do some reconstruct after I set these matrices? I think I may because this function is being called in the de_constraint2constraint.
Here is my entire code for context, thank you.
from pyomo.core import *
from pyomo.environ import *
import numpy as np
import random
import math
#More Thuente linesearch
def objective_func(xk):
x_len = len(xk)
my_sum=0.0
for i in range(1,x_len+1):
#my_sum = my_sum + ((-1)**(i-1))*(xk[i-1]-1)**2
my_sum = my_sum + ((-1)**(i-1))*math.sin(xk[i-1])
return my_sum
def grad_func(xk):
x_len = len(xk)
grad ={}
for i in range(1,len(xk)+1):
grad[i] = ((-1)**(i-1))*math.cos(xk[i-1])
return grad
# grad=np.zeros((x_len,1))
# my_sum=0.0
# for i in range(0,x_len):
# grad[i,0]= ((-1)**i)*2.0*(xk[i]-1)
# return grad
def hess_func(xk):
# H = np.zeros((len(xk),len(xk)))
# for i in range(0,len(xk)):
# H[i,i] = ((-1)**i)*2.0
# return H
H = {}
for i in range(1,len(xk)+1):
for j in range(1,len(xk)+1):
#print((i,j))
if(i==j):
H[(i,j)] = -((-1)**(i-1))*math.sin(xk[i-1])
else:
H[(i,j)] = 0.0
return H
def convert_xk(xk):
new_xk = {}
for i in range(1,len(xk)+1):
new_xk[i] = xk[i-1]
return new_xk
def line_search(init_x,eps):
def c_param_init ( model, v ):
grad_set=init_grad
return grad_set[ v -1 ]
def x_param_init ( model, v ):
return init_x[ v -1 ]
def hessian_init(model,v1,v2):
return init_hessian[ v1 -1 ][ v2 -1 ]
def line_obj(model):
return .5*sum(model.g[ i ] * model.g[ i ] for i in model.m_set) + .5*sum( model.g[i]*model.H[i,j]*model.pk[j] for i in model.m_set for j in model.m_set ) + .5*sum( model.pk[i]*model.H[j,i]*model.g[j] for i in model.m_set for j in model.m_set ) + .5*sum( model.pk[i]*model.H[j,i]*model.H[i,j]*model.pk[j] for i in model.m_set for j in model.m_set )
def shifted_obj(model):
for i in range(1,1+model.m_set):
M.g_shift[i] =((-1)**(i-1))*math.cos(model.v[i])
for i in range(1,1+model.m_set+1):
for j in range(1,1+model.m_set+1):
if(i==j):
model.H_shift[i,j] = -((-1)**(i-1))*math.sin(model.v[i])
return .5*sum(model.g_shift[ i ] * model.g_shift[ i ] for i in model.m_set) + .5*sum( model.g_shift[i]*model.H_shift[i,j]*model.pk[j] for i in model.m_set for j in model.m_set ) + .5*sum( model.pk[i]*model.H_shift[j,i]*model.g_shift[j] for i in model.m_set for j in model.m_set ) + .5*sum( model.pk[i]*model.H_shift[j,i]*model.H_shift[i,j]*model.pk[j] for i in model.m_set for j in model.m_set )
def sufficentprogress(model):
extra_term = .0001*model.alpha[1]*sum(model.g[i] * model.pk[i] for i in model.m_set)
print("YO")
print(extra_term)
return shifted_obj(model) - extra_term - line_obj(model)
def build_line_constraint(M,i):
return (M.v[i] - M.xk[i] - M.alpha[1]*M.pk[i]==0.0)
counter=0
curr_xk = init_x
#print(curr_xk)
#current_grad = grad_func(curr_xk)
#current_grad=current_grad.tolist()
#current_H = hess_func(curr_xk)
#print(current_H)
#current_H = current_H.tolist()
#print(current_H)
first_obj = objective_func(init_x)
#print(first_obj)
obj_list=[]
while(counter <10):
current_grad = grad_func(curr_xk)
#print(current_grad)
current_H = hess_func(curr_xk)
#current_grad = current_grad.tolist()
#current_H=current_H.tolist()
# print(curr_xk)
# print(current_grad)
# temp_grad = []
# for i in range(0,len(current_grad)):
# temp_grad.append(current_grad[i][0])
# print(current_H)
# current_grad = temp_grad
# print(current_grad)
M = AbstractModel()
M.m_set = RangeSet(1, len(curr_xk))
M.n_set = RangeSet(1, 1)
M.a_set = RangeSet(1)
dic_xk = convert_xk(curr_xk)
print("xk" +str(dic_xk))
#print(dic_xk)
# M.H = Param( M.m_set, M.m_set, initialize=hessian_init)
# M.g = Param( M.m_set, initialize=c_param_init)
# M.xk = Param( M.m_set, initialize=x_param_init)
M.H = Param(M.m_set,M.m_set,initialize=current_H)
M.H_shift = Param(M.m_set,M.m_set,mutable=True)
M.g_shift = Param(M.m_set,mutable=True)
#print(current_grad)
M.g = Param(M.m_set,initialize=current_grad)
M.xk = Param(M.m_set,initialize=dic_xk)
#print(eps)
M.epsilon = Param(M.n_set, initialize={1:eps})
#M.x = Var( M.m_set, within=Binary,initialize=x_param_init)
ones_init ={}
for i in range(1,len(curr_xk)+1):
ones_init[i]=1.0
M.v = Var( M.m_set, within=Binary, initialize=ones_init)
M.pk = Var( M.m_set, domain=Reals, initialize=ones_init)
#M.alpha = Var(M.a_set,domain=NonNegativeReals,initialize={1:1.0})
M.alpha = Var(M.a_set,bounds=(.001, 1.0),initialize={1:1.0})
M.i =RangeSet(len(curr_xk))
sufficentprogress_rule = lambda M: sufficentprogress(M) <=0.0
M.de_constraint2= Constraint(rule=sufficentprogress_rule)
M.Co1 = Constraint(M.i, rule = build_line_constraint)
M.obj = Objective( rule=line_obj, sense=minimize )
instance = M.create_instance()
#print(instance.pprint())
#results=SolverFactory('mindtpy').solve(instance, mip_solver='cplex', nlp_solver='ipopt',tee=True)
#results=SolverFactory('mindtpy').solve(instance, mip_solver='gurobi', nlp_solver='ipopt',tee=True)
results=SolverFactory('mindtpy').solve(instance, mip_solver='glpk', nlp_solver='ipopt', tee=True)
#print(dir(results))
instance.solutions.store_to(results)
print(results)
new_xk=[]
for p in instance.m_set:
#print(instance.v[p].value)
new_xk.append(instance.v[p].value)
#print("lol")
#print(new_xk)
curr_xk = new_xk
counter=counter+1
obj_list.append(instance.obj.value())
first_obj = objective_func(init_x)
final_obj = objective_func(curr_xk)
print(first_obj)
print(final_obj)
print(instance.display())
print(curr_xk)
print(obj_list)
mult=10
run=1
eps=10**(-4)
for i in range(0,run):
size = mult*(i+1)
x_vec =[]
for i in range(0,size):
#x_vec.append(np.random.randint(0,2))
x_vec.append(1.0)
#x_vec.append(random.randint(0,2))
#init_obj = objective_func(x_vec)
init_x = x_vec
print(x_vec)
line_search(init_x,eps)
print(x_vec)

Related

Google Cloud Function HTTP Trigger

I am trying to give arguments to my function by adding URL parameter.
import json
import flask
def location_sort(request):
request_json = request.get_json()
location = json.dumps(request_json)
location = json.loads(location)
reverse_location = {v: k for k, v in location.items()}
x = location.keys()
harf_x = (float(max(x)) + float(min(x))) / 2
y_right = []
y_left = []
sorted_location = []
for i in location:
if float(i) < harf_x:
y_left.append(location[i])
else:
y_right.append(location[i])
y_left.sort()
y_right.sort(reverse=True)
sorted_input = y_left + y_right
for i in sorted_input:
sorted_location.append([reverse_location[i], i])
for i in sorted_location:
i[0],i[1] = float(i[0]),float(i[1])
return sorted_location
def cal_centroid(location):
area = 0 # 면적
centroid_x = 0
centroid_y = 0
temp = 0
for i in range(len(location)):
if i == len(location)-1:
temp = location[i][0]*location[0][1] - location[0][0]*location[i][1]
area += temp*0.5
centroid_x += (location[i][0] + location[0][0]) * temp
centroid_y += (location[i][1] + location[0][1]) * temp
else:
temp = location[i][0]*location[i+1][1] - location[i+1][0]*location[i][1]
area += temp*0.5
centroid_x += (location[i][0] + location[i+1][0]) * temp
centroid_y += (location[i][1] + location[i+1][1]) * temp
centroid_x = round(centroid_x / (6*area), 6)
centroid_y = round(centroid_y / (6*area), 6)
x = [centroid_x, centroid_y]
return json.dumps(x)
def main(request):
request_args = request.args
if request_args and "location" in request_args:
request = request["location"]
request = json.dumps(request)
a = location_sort(request)
return cal_centroid(a)
This is my code for Cloud Function and i run main function. And i tried the URL as
https://<REGION>-<GOOGLE_CLOUD_PROJECT>.cloudfunctions.net/FUNCTION_NAME?location={"37.284213":"127.006481","37.562045":"127.034809","37.528694":"126.907483","37.411124":"127.124356"}
And it returns
Error: could not handle the request
What could be the problem to my code? I am very beginner for GCF and i would be very thankful for your help:)

Pyomo assign value to constrain TypeError: Problem inserting

model = ConcreteModel()
model.time = Set(initialize = range(24*3))
model.option = Set(initialize = range(4))
model.time_soc = Var(model.time, bounds = (0.1,0.9), domain=PositiveReals)
model.time_option = Var(model.time, model.option, domain = Binary)
model.soc_param = Param(model.option, initialize={0:0, 1:-0.025, 2:-0.05, 3:0.125})
model.soc_ini = Param(initialize = 0.5)
def cons_time_opt(model, i):
total_choice = sum(model.time_option[i,j] for j in model.option)
return total_choice == 1
model.opt = Constraint(model.time, rule = cons_time_opt)
model.soc_con0 = Constraint(0.5 + model.time_option[0,j]*model.soc_param[j] == model.time_soc[0] for j in model.option)
I got the following error:
ERROR: Constructing component 'soc_con0_index' from data=None failed:
TypeError: Problem inserting time_soc[0] == 0.5 into set soc_con0_index
I'm guessing the soc_con0 constraint is meant to be an indexed constraint in which case you are missing a constraint rule. It should be:
def soc_con0_rule(model, j):
return 0.5 + model.time_option[0,j]*model.soc_param[j] == model.time_soc[0]
model.soc_con0 = Constraint(model.option, rule=soc_con0_rule)

GLPK output formats

I am new to GLPK, so my apologies in advance if I'm missing something simple!
I have a largeish LP that I am feeding through GLPK to model an energy market. I'm running the following command line to GLPK to process this:
winglpk-4.65\glpk-4.65\w64\glpsol --lp problem.lp --data ExampleDataFile.dat --output results2.txt
When I open the resulting text file I can see the outputs, which all look sensible. I have one big problem: each record is split over two rows, making it very difficult to clean the file. See an extract below:
No. Row name St Activity Lower bound Upper bound Marginal
------ ------------ -- ------------- ------------- ------------- -------------
1 c_e_SpecifiedDemand(UTOPIA_CSV_ID_1990)_
NS 0 0 = < eps
2 c_e_SpecifiedDemand(UTOPIA_CSV_ID_1991)_
NS 0 0 = < eps
3 c_e_SpecifiedDemand(UTOPIA_CSV_ID_1992)_
NS 0 0 = < eps
4 c_e_SpecifiedDemand(UTOPIA_CSV_ID_1993)_
NS 0 0 = < eps
5 c_e_SpecifiedDemand(UTOPIA_CSV_ID_1994)_
NS 0 0 = < eps
6 c_e_SpecifiedDemand(UTOPIA_CSV_ID_1995)_
NS 0 0 = < eps
7 c_e_SpecifiedDemand(UTOPIA_CSV_ID_1996)_
NS 0 0 = < eps
8 c_e_SpecifiedDemand(UTOPIA_CSV_ID_1997)_
NS 0 0 = < eps
9 c_e_SpecifiedDemand(UTOPIA_CSV_ID_1998)_
NS 0 0 = < eps
10 c_e_SpecifiedDemand(UTOPIA_CSV_ID_1999)_
NS 0 0 = < eps
11 c_e_SpecifiedDemand(UTOPIA_CSV_ID_2000)_
NS 0 0 = < eps
12 c_e_SpecifiedDemand(UTOPIA_CSV_ID_2001)_
NS 0 0 = < eps
13 c_e_SpecifiedDemand(UTOPIA_CSV_ID_2002)_
NS 0 0 = < eps
14 c_e_SpecifiedDemand(UTOPIA_CSV_ID_2003)_
NS 0 0 = < eps
15 c_e_SpecifiedDemand(UTOPIA_CSV_ID_2004)_
NS 0 0 = < eps
I would be very grateful of any suggestions for either:
How I can get each record in the output text file onto a single row, or
Ideas on how to clean / post-process the existing text file output.
I'm sure I'm missing something simple here, but the output is in a very unhelpful format at the moment!
Thanks!
I wrote a Python parser for the GLPK output file. It is not beautiful and not save (try-catch) but it is working (for pure simplex problems).
You can call it on output file:
outp = GLPKOutput('myoutputfile')
print(outp)
val1 = outp.getCol('mycolvar','Activity')
val2 = outp.getRow('myrowname','Upper_bound') # row names should be defined
The class is as follows:
class GLPKOutput:
def __init__(self,filename):
self.rows = {}
self.columns = {}
self.nRows = 0
self.nCols = 0
self.nNonZeros = 0
self.Status = ""
self.Objective = ""
self.rowHeaders = []
self.rowIdx = {}
self.rowWidth = []
self.Rows = []
self.hRows = {}
self.colHeaders = []
self.colIdx = {}
self.colWidth = []
self.Cols = []
self.hCols = {}
self.wcols = ['Activity','Lower_bound','Upper bound','Marginal']
self.readFile(filename)
# split columns with weird line break
def smartSplit(self,line,type,job):
ret = []
line = line.rstrip()
if type == 'ROWS':
cols = len(self.rowHeaders)
idx = self.rowWidth
else:
cols = len(self.colHeaders)
idx = self.colWidth
if job == 'full':
start = 0
for i in range(cols):
stop = start+idx[i]+1
ret.append(line[start:stop].strip())
start = stop
elif job == 'part1':
entries = line.split()
ret = entries[0:2]
elif job == 'part2':
start = 0
for i in range(cols):
stop = start+idx[i]+1
ret.append(line[start:stop].strip())
start = stop
ret = ret[2:]
# print()
# print("SMART:",job,line.strip())
# print(" TO:",ret)
return ret
def readFile(self,filename):
fp = open(filename,"r")
lines = fp.readlines()
fp.close
i = 0
pos = "HEAD"
while pos == 'HEAD' and i<len(lines):
entries = lines[i].split()
if len(entries)>0:
if entries[0] == 'Rows:':
self.nRows = int(entries[1])
elif entries[0] == 'Columns:':
self.nCols = int(entries[1])
elif entries[0] == 'Non-zeros:':
self.nNonZeros = int(entries[1])
elif entries[0] == 'Status:':
self.Status = entries[1]
elif entries[0] == 'Objective:':
self.Objective = float(entries[3]) #' '.join(entries[1:])
elif re.search('Row name',lines[i]):
lines[i] = lines[i].replace('Row name','Row_name')
lines[i] = lines[i].replace('Lower bound','Lower_bound')
lines[i] = lines[i].replace('Upper bound','Upper_bound')
entries = lines[i].split()
pos = 'ROWS'
self.rowHeaders = entries
else:
pass
i+= 1
# formatting of row width
self.rowWidth = lines[i].split()
for k in range(len(self.rowWidth)): self.rowWidth[k] = len(self.rowWidth[k])
# print("Row Widths:",self.rowWidth)
i+= 1
READY = False
FOUND = False
while pos == 'ROWS' and i<len(lines):
if re.match('^\s*[0-9]+',lines[i]): # new line
if len(lines[i].split())>2: # no linebrak
entries = self.smartSplit(lines[i],pos,'full')
READY = True
else: # line break
entries = self.smartSplit(lines[i],pos,'part1')
READY = False
FOUND = True
else:
if FOUND and not READY: # second part of line
entries += self.smartSplit(lines[i],pos,'part2')
READY = True
FOUND = False
if READY:
READY = False
FOUND = False
# print("ROW:",entries)
if re.match('[0-9]+',entries[0]): # valid line with solution data
self.Rows.append(entries)
self.hRows[entries[1]] = len(self.Rows)-1
else:
print("wrong line format ...")
print(entries)
sys.exit()
elif re.search('Column name',lines[i]):
lines[i] = lines[i].replace('Column name','Column_name')
lines[i] = lines[i].replace('Lower bound','Lower_bound')
lines[i] = lines[i].replace('Upper bound','Upper_bound')
entries = lines[i].split()
pos = 'COLS'
self.colHeaders = entries
else:
pass #print("NOTHING: ",lines[i])
i+= 1
# formatting of row width
self.colWidth = lines[i].split()
for k in range(len(self.colWidth)): self.colWidth[k] = len(self.colWidth[k])
# print("Col Widths:",self.colWidth)
i+= 1
READY = False
FOUND = False
while pos == 'COLS' and i<len(lines):
if re.match('^\s*[0-9]+',lines[i]): # new line
if len(lines[i].split())>2: # no linebreak
entries = self.smartSplit(lines[i],pos,'full')
READY = True
else: # linebreak
entries = self.smartSplit(lines[i],pos,'part1')
READY = False
FOUND = True
else:
if FOUND and not READY: # second part of line
entries += self.smartSplit(lines[i],pos,'part2')
READY = True
FOUND = False
if READY:
READY = False
FOUND = False
# print("COL:",entries)
if re.match('[0-9]+',entries[0]): # valid line with solution data
self.Cols.append(entries)
self.hCols[entries[1]] = len(self.Cols)-1
else:
print("wrong line format ...")
print(entries)
sys.exit()
elif re.search('Karush-Kuhn-Tucker',lines[i]):
pos = 'TAIL'
else:
pass #print("NOTHING: ",lines[i])
i+= 1
for i,e in enumerate(self.rowHeaders): self.rowIdx[e] = i
for i,e in enumerate(self.colHeaders): self.colIdx[e] = i
def getRow(self,name,attr):
if name in self.hRows:
if attr in self.rowIdx:
try:
val = float(self.Rows[self.hRows[name]][self.rowIdx[attr]])
except:
val = self.Rows[self.hRows[name]][self.rowIdx[attr]]
return val
else:
return -1
def getCol(self,name,attr):
if name in self.hCols:
if attr in self.colIdx:
try:
val = float(self.Cols[self.hCols[name]][self.colIdx[attr]])
except:
val = self.Cols[self.hCols[name]][self.colIdx[attr]]
return val
else:
print("key error:",name,"not known ...")
return -1
def __str__(self):
retString = '\n'+"="*80+'\nSOLUTION\n'
retString += "nRows: "+str(self.nRows)+'/'+str(len(self.Rows))+'\n'
retString += "nCols: "+str(self.nCols)+'/'+str(len(self.Cols))+'\n'
retString += "nNonZeros: "+str(self.nNonZeros)+'\n'
retString += "Status: "+str(self.Status)+'\n'
retString += "Objective: "+str(self.Objective)+'\n\n'
retString += ' '.join(self.rowHeaders)+'\n'
for r in self.Rows: retString += ' # '.join(r)+' #\n'
retString += '\n'
retString += ' '.join(self.colHeaders)+'\n'
for c in self.Cols: retString += ' # '.join(r)+' #\n'
return retString

Why does python return an error when I write two functions within one class object

my code is:
class Solution(object):
def getRow(self, rowIndex):
"""
:type rowIndex: int
:rtype: List[int]
"""
if rowIndex == 0:
return [1]
elif rowIndex == 1:
return [1, 1]
else:
ini_row = [1, 2, 1]
def GenNextRow(ini_row):
return map(lambda x, y: x+y, [0] + ini_row, ini_row + [0])
while len(ini_row) < rowIndex+1:
ini_row = GenNextRow(ini_row)
return ini_row
def getRow2(self, rowIndex):
result = [0] * (rowIndex + 1)
for i in xrange(rowIndex + 1):
old = result[0] = 1
for j in xrange(1, i+1):
old, result[j] = result[j], old + result[j]
return result
if __name__ == '__main__':
import time
start_time = time.time()
result = Solution().getRow2(4)
end_time = time.time()
print 'result: {0}'.format(result)
print 'time: {0}'.format(end_time-start_time)
However, when I run it from terminal, I get an error message:
Traceback (most recent call last):
File "array-Pascal's_Triangle2.py", line 46, in
result = Solution().getRow2(4)
AttributeError: 'Solution' object has no attribute 'getRow2'
Then I tried to annotate the first function getRow(), the Solution.getRow2() run successfully...
You need to move the GenNextRow function outside the class as Python doesn't understand why there is a badly indented method in the class. Do this instead:
def GenNextRow(ini_row):
return map(lambda x, y: x+y, [0] + ini_row, ini_row + [0])
class Solution(object):
[...]

AssertionError when running K means Main Function

When Running the below code, I receive an AssertionError in the Main Function, assert len(args) > 1. Any idea where in the code the issue occurs?
K-Means clustering implementation
import numpy as np
from math import sqrt
import csv
import sys
====
Define a function that computes the distance between two data points
GAP = 2
MIN_VAL = 1000000
def get_distance(point1, point2):
dis = sqrt(pow(point1[0] - point2[0],2) + pow(point1[1] - point2[1],2))
return dis
====
Define a function that reads data in from the csv
def csvreader(data_file):
sampleData = []
global Countries
with open(data_file, 'r') as csvfile:
read_data = csv.reader(csvfile, delimiter=' ', quotechar='|')
for row in read_data:
print ', '.join(row)
if read_data <> None:
for f in read_data:
values = f.split(",")
if values[0] <> 'Countries':
sampleData.append([values[1],values[2]])
return sampleData
====
Write the initialisation procedure
def cluster_dis(centroid, cluster):
dis = 0.0
for point in cluster:
dis += get_distance(centroid, point)
return dis
def update_centroids(centroids, cluster_id, cluster):
x, y = 0.0, 0.0
length = len(cluster)
if length == 0:
return
for item in cluster:
x += item[0]
y += item[1]
centroids[cluster_id] = (x / length, y / length)
====
Implement the k-means algorithm, using appropriate looping
def kmeans(data, k):
assert k <= len(data)
seed_ids = np.random.randint(0, len(data), k)
centroids = [data[idx] for idx in seed_ids]
clusters = [[] for _ in xrange(k)]
cluster_idx = [-1] * len(data)
pre_dis = 0
while True:
for point_id, point in enumerate(data):
min_distance, tmp_id = MIN_VAL, -1
for seed_id, seed in enumerate(centroids):
distance = get_distance(seed, point)
if distance < min_distance:
min_distance = distance
tmp_id = seed_id
if cluster_idx[point_id] != -1:
dex = clusters[cluster_idx[point_id]].index(point)
del clusters[cluster_idx[point_id]][dex]
clusters[tmp_id].append(point)
cluster_idx[point_id] = tmp_id
now_dis = 0.0
for cluster_id, cluster in enumerate(clusters):
now_dis += cluster_dis(centroids[cluster_id], cluster)
update_centroids(centroids, cluster_id, cluster)
delta_dis = now_dis - pre_dis
pre_dis = now_dis
if delta_dis < GAP:
break
print(centroids)
print(clusters)
return centroids, clusters
def main():
args = sys.argv[1:]
assert len(args) > 1
data_file, k = args[0], int(args[1])
data = csvreader(data_file)
kmeans(data, k)
if __name__ == '__main__':
main()