what is an alternate to len that can be used with float? - python-2.7

I cant get python to display the number of columns in the array. the rows show up just fine though.
def getDataArray1D(filename):
fileHandle = open(filename, 'r')
fileData=map(float, fileHandle) # !!
fileHandle.close()
return fileData
data = getDataArray1D("HEIGHT.csv")
#print data
rows = len(data)
columns =len(data[0])
print rows, columns

I'm not 100% sure what your question is, but to get the length of the float, you can do
def returnLength(number):
return len(str(number))

Related

Converting strings in a list to integers python

I am trying to open a csv file and convert the values from strings to integers so I can sort the list. Currently when I sort the list the results I get are "[[], ['190'], ['200'], ['250'], ['350'], ['90']]". Here is my code.
import csv
def bubbleSort(scores):
for length in range(len(scores)-1,0,-1):
for i in range(length):
if scores[i]>scores[i+1]:
temp = scores[i]
scores[i] = scores[i+1]
scores[i+1] = temp
with open ("rec_Scores.csv", "rb") as csvfile:
r = csv.reader(csvfile)
scores = list(r)
bubbleSort(scores)
print(scores)
This is probably really easy to fix, but I am still new to python so if anyone could help me solve this problem it would be much appreciated.
You need to add scores_int = [int(score) for score in scores] in order to convert the string numbers in your scores list to int numbers. This is what your code should look like:
import csv
def bubbleSort(scores):
for length in range(len(scores)-1,0,-1):
for i in range(length):
if scores[i]>scores[i+1]:
temp = scores[i]
scores[i] = scores[i+1]
scores[i+1] = temp
with open ("rec_Scores.csv", "rb") as csvfile:
r = csv.reader(csvfile)
scores = list(r)
scores_int = [int(score) for score in scores]
bubbleSort(scores_int)
print(scores)

Python - Convert dictionary (having "list" as values) into csv file

Trying to write below dictionary into csv file with desired output as mentioned below.
dict_data = {"1":["xyz"],
"2":["abc","def"],
"3":["zzz"]
}
desired output:
1,3,2
xyz,zzz,abc
def
Below code doesn't work as expected as it keeps both "abc" & "def" in same cell as shown below.
with open('k.csv','wb') as out_file:
writer = csv.writer(out_file,dialect = 'excel')
headers = [k for k in dict_data]
items = [dict_data[k] for k in dict_data]
writer.writerow(headers)
writer.writerow(items)
output:
1,3,2
xyz,zzz,abc,def
Here is the complete solution:
import csv
import os
class CsvfileWriter:
'''
Takes dictionary as input and writes items into a CSV file.
For ex:-
Input dictionary:
dict_data = {"1":["xyz"],"2":["abc","def"],"3":["zzz"]}
Output: (CSV file)
1,3,2
xyz,zzz,abc
,,def
'''
def __init__(self,dictInput,maxLength=0):
'''
Creates a instance with following variables.
dictInput & maxLength
dictInput -> dictionary having values(list) of same length
ex:-
dict_data = {"1":["xyz",""],"2":["abc","def"],"3":["zzz",""]}
maxLength -> length of the list
'''
self.dictInput = dictInput
self.maxLength = maxLength
#classmethod
def list_padding(cls,dictInput):
'''
converts input dictionary having list (as values) of varying lenghts into constant length.
Also returns class variables dictInput & maxLength
Note:
dictInput represents the dictionary after padding is applied.
maxLength represents the length of the list(values in dictionary) having maximum number of items.
Ex:-
input dictionary:
dict_data = {"1":["xyz"],"2":["abc","def"],"3":["zzz"]}
output dictionary:
dict_data = {"1":["xyz",""],"2":["abc","def"],"3":["zzz",""]}
'''
cls.dictInput = dictInput
listValues = dictInput.values()
listValues.sort(key = lambda i: len(i))
maxLength = len(listValues[-1])
for i in listValues:
while(len(i) < maxLength):
i.append('')
return cls(dictInput,maxLength)
def write_to_csv(self):
with open('sample_file.csv','wb') as out_file:
writer = csv.writer(out_file,dialect = 'excel')
headers = [k for k in self.dictInput]
items = [self.dictInput[k] for k in self.dictInput]
writer.writerow(headers)
c = 0
while (c < self.maxLength):
writer.writerow([i[c] for i in items])
c += 1
dict_data = {"1":["xyz"],"2":["abc","def"],"3":["zzz"]}
cf = CsvfileWriter.list_padding(dict_data)
cf.write_to_csv()
The following works in Python 2:
import csv
dict_data = {
"1":["xyz"],
"2":["abc","def"],
"3":["zzz"]
}
def transpose(cols):
return map(lambda *row: list(row), *cols)
with open('k.csv','w') as out_file:
writer = csv.writer(out_file,dialect = 'excel')
headers = dict_data.keys()
items = transpose(dict_data.values())
writer.writerow(headers)
writer.writerows(items)
I can't take credit for the transpose function, which I picked up from here. It turns a list of columns into a list of rows, automatically padding columns that are too short with None. Fortunately, the csv writer outputs blanks for None values, which is exactly what's needed.
(In Python 3, map behaves differently (no padding), so it would require some changes.)
Edit: A replacement transpose function that works for both Python 2 and 3 is:
def transpose(cols):
def mypop(l):
try:
return l.pop(0)
except IndexError:
return ''
while any(cols):
yield [mypop(l) for l in cols]

Importing and analysing text data using Python 2.7

I have created code in Python 2.7 which saves sales data for various products into a text file using the write() method. My limited Python skills have hit the wall with the next step - I need code which can read this data from the text file and then calculate and display the mean average number of sales of each item. The data is stored in the text file like the data shown below (but I am able to format it differently if that would help).
Product A,30
Product B,26
Product C,4
Product A,40
Product B,18
Product A,31
Product B,13
Product C,3
After far too long Googling around this to no avail, any pointers on the best way to manage this would be greatly appreciated. Thanks in advance.
You can read from the file, then split each line by a space (' '). Then, it is just a matter of creating a dictionary, and appending each new item to a list which is the value for each letter key, then using sum and len to get the average.
Example
products = {}
with open("myfile.txt") as product_info:
data = product_info.read().split('\n') #Split by line
for item in data:
_temp = item.split(' ')[1].split(',')
if _temp[0] not in products.keys():
products[_temp[0]] = [_temp[1]]
else:
products[_temp[0]] = products[_temp[0]]+[_temp[1]]
product_list = [[item, float(sum(key))/len(key)] for item, key in d.items()]
product_list.sort(key=lambda x:x[0])
for item in product_list:
print 'The average of {} is {}'.format(item[0], item[1])
from __future__ import division
dict1 = {}
dict2 = {}
file1 = open("input.txt",'r')
for line in file1:
if len(line)>2:
data = line.split(",")
a,b = data[0].strip(),data[1].strip()
if a in dict1:
dict1[a] = dict1[a] + int(b)
else:
dict1[a] = int(b)
if a in dict2:
dict2[a] = dict2[a] + 1
else:
dict2[a] = 1
for k,v in dict1.items():
for m,n in dict2.items():
if k == m:
avg = float(v/n)
print "%s Average is: %0.6f"%(k,float(avg))
Output:
Product A Average is: 33.666667
Product B Average is: 19.000000
Product C Average is: 3.500000

Python2.7: Too many Values to Unpack - Number of Columns unknown

I have a file that I want to unpack and utilise the columns in different files. The issue I have is that the file I want to unpack varies from row to row on the number of columns it has (for example row 1 could have 7 columns, row 2 could have 15).
How do I unpack the file without receiving the error "Too many values to unpack"?
filehandle3 = open ('output_steps.txt', 'r')
filehandle4 = open ('head_cluster.txt', 'w')
for line in iter(filehandle3):
id, category = line.strip('\n').split('\t')
filehandle4.write(id + "\t" + category + "\n")
filehandle3.close()
filehandle4.close()
Any help would be great. Thanks!
You should extract the values separately, if present, e.g. like this:
for line in iter(filehandle3):
values = line.strip('\n').split('\t')
id = values[0] if len(values) > 0 else None
category = values[1] if len(values) > 1 else None
...
You could also create a helper function for this:
def safe_get(values, index, default=None):
return values[index] if len(values) > index else default
or using try/except:
def safe_get(values, index, default=None):
try:
return values[index]
except IndexError:
return default
and use it like this:
category = safe_get(values, 1)
With Python 3, and if the rows always have at least as many elements as you need, you can use
for line in iter(filehandle3):
id, category, *junk = line.strip('\n').split('\t')
This will bind the first element to id, the second to category, and the rest to junk.

IndexError, but more likely I/O error

Unsure of why I am getting this error. I'm reading from a file called columns_unsorted.txt, then trying to write to columns_unsorted.txt. There error is on fan_on = string_j[1], saying list index out of range. Here's my code:
#!/usr/bin/python
import fileinput
import collections
# open document to record results into
j = open('./columns_unsorted.txt', 'r')
# note this is a file of rows of space-delimited date in the format <1384055277275353 0 0 0 1 0 0 0 0 22:47:57> on each row, the first term being unix times, the last human time, the middle binary indicating which machine event happened
# open document to read from
l = open('./columns_sorted.txt', 'w')
# CREATE ARRAY CALLED EVENTS
events = collections.deque()
i = 1
# FILL ARRAY WITH "FACTS" ROWS; SPLIT INTO FIELDS, CHANGE TYPES AS APPROPRIATE
for line in j: # columns_unsorted
line = line.rstrip('\n')
string_j = line.split(' ')
time = str(string_j[0])
fan_on = int(string_j[1])
fan_off = int(string_j[2])
heater_on = int(string_j[3])
heater_off = int(string_j[4])
space_on = int(string_j[5])
space_off = int(string_j[6])
pump_on = int(string_j[7])
pump_off = int(string_j[8])
event_time = str(string_j[9])
row = time, fan_on, fan_off, heater_on, heater_off, space_on, space_off, pump_on, pump_off, event_time
events.append(row)
You are missing the readlines function, no?
You have to do:
j = open('./columns_unsorted.txt', 'r')
l = j.readlines()
for line in l:
# what you want to do with each line
In the future, you should print some of your variables, just to be sure the code is working as you want it to, and to help you identifying problems.
(for example, if in your code you would print string_j you would see what kind of problem you have)
Problem was an inconsistent line in the data file. Forgive my haste in posting