Python - Convert dictionary (having "list" as values) into csv file - python-2.7

Trying to write below dictionary into csv file with desired output as mentioned below.
dict_data = {"1":["xyz"],
"2":["abc","def"],
"3":["zzz"]
}
desired output:
1,3,2
xyz,zzz,abc
def
Below code doesn't work as expected as it keeps both "abc" & "def" in same cell as shown below.
with open('k.csv','wb') as out_file:
writer = csv.writer(out_file,dialect = 'excel')
headers = [k for k in dict_data]
items = [dict_data[k] for k in dict_data]
writer.writerow(headers)
writer.writerow(items)
output:
1,3,2
xyz,zzz,abc,def

Here is the complete solution:
import csv
import os
class CsvfileWriter:
'''
Takes dictionary as input and writes items into a CSV file.
For ex:-
Input dictionary:
dict_data = {"1":["xyz"],"2":["abc","def"],"3":["zzz"]}
Output: (CSV file)
1,3,2
xyz,zzz,abc
,,def
'''
def __init__(self,dictInput,maxLength=0):
'''
Creates a instance with following variables.
dictInput & maxLength
dictInput -> dictionary having values(list) of same length
ex:-
dict_data = {"1":["xyz",""],"2":["abc","def"],"3":["zzz",""]}
maxLength -> length of the list
'''
self.dictInput = dictInput
self.maxLength = maxLength
#classmethod
def list_padding(cls,dictInput):
'''
converts input dictionary having list (as values) of varying lenghts into constant length.
Also returns class variables dictInput & maxLength
Note:
dictInput represents the dictionary after padding is applied.
maxLength represents the length of the list(values in dictionary) having maximum number of items.
Ex:-
input dictionary:
dict_data = {"1":["xyz"],"2":["abc","def"],"3":["zzz"]}
output dictionary:
dict_data = {"1":["xyz",""],"2":["abc","def"],"3":["zzz",""]}
'''
cls.dictInput = dictInput
listValues = dictInput.values()
listValues.sort(key = lambda i: len(i))
maxLength = len(listValues[-1])
for i in listValues:
while(len(i) < maxLength):
i.append('')
return cls(dictInput,maxLength)
def write_to_csv(self):
with open('sample_file.csv','wb') as out_file:
writer = csv.writer(out_file,dialect = 'excel')
headers = [k for k in self.dictInput]
items = [self.dictInput[k] for k in self.dictInput]
writer.writerow(headers)
c = 0
while (c < self.maxLength):
writer.writerow([i[c] for i in items])
c += 1
dict_data = {"1":["xyz"],"2":["abc","def"],"3":["zzz"]}
cf = CsvfileWriter.list_padding(dict_data)
cf.write_to_csv()

The following works in Python 2:
import csv
dict_data = {
"1":["xyz"],
"2":["abc","def"],
"3":["zzz"]
}
def transpose(cols):
return map(lambda *row: list(row), *cols)
with open('k.csv','w') as out_file:
writer = csv.writer(out_file,dialect = 'excel')
headers = dict_data.keys()
items = transpose(dict_data.values())
writer.writerow(headers)
writer.writerows(items)
I can't take credit for the transpose function, which I picked up from here. It turns a list of columns into a list of rows, automatically padding columns that are too short with None. Fortunately, the csv writer outputs blanks for None values, which is exactly what's needed.
(In Python 3, map behaves differently (no padding), so it would require some changes.)
Edit: A replacement transpose function that works for both Python 2 and 3 is:
def transpose(cols):
def mypop(l):
try:
return l.pop(0)
except IndexError:
return ''
while any(cols):
yield [mypop(l) for l in cols]

Related

Practice assignment AWS Computer Vision : get_Cifar10_dataset

I have problem with this methode which should return both the training and the validation dataset and examine it to return the index that corresponds to the first occurrence of each class in CIFAR10.
this is code:
def get_cifar10_dataset(): """ Should create the cifar 10 network and identify the dataset index of the first time each new class
appears
:return: tuple of training and validation dataset as well as label indices
:rtype: (gluon.data.Dataset, 'dict_values' object is not subscriptable, gluon.data.Dataset,
dict[int:int])
"""
train_data = None
val_data = None
# YOUR CODE HERE
train_data = datasets.CIFAR10(train=True, root=M5_IMAGES)
val_data = datasets.CIFAR10(train=False, root=M5_IMAGES)
You are asked to return a dictionary with labels and the corresponding indexes. Using the following function can solve your problem.
def get_idx_dict(data):
lis = []
idx = []
indices = {}
for i in range(len(data)):
if data[i][1] not in lis:
lis.append(data[i][1])
idx.append(i)
indices = {lis[i]: idx[i] for i in range(len(lis))}
return indices
The function returns a dictionary with desired output. Use this function on data from train and validation set.
train_indices = get_idx_dict(train_data)
val_indices = get_idx_dict(val_data)
You can do it this
def get_cifar10_dataset():
"""
Should create the cifar 10 network and identify the dataset index of the first time each new class appears
:return: tuple of training and validation dataset as well as label indices
:rtype: (gluon.data.Dataset, dict[int:int], gluon.data.Dataset, dict[int:int])
"""
train_data = None
val_data = None
train_indices = {}
val_indices = {}
# Use `root=M5_IMAGES` for your dataset
train_data = gluon.data.vision.datasets.CIFAR10(train=True, root=M5_IMAGES)
val_data = gluon.data.vision.datasets.CIFAR10(train=False, root=M5_IMAGES)
#for train
for i in range(len(train_data)):
if train_data[i][1] not in train_indices:
train_indices[train_data[i][1]] = i
#for valid
for i in range(len(val_data)):
if val_data[i][1] not in val_indices:
val_indices[val_data[i][1]] = i
#raise NotImplementedError()
return train_data, train_indices, val_data, val_indices

Accessing only the values in a dictionary

wordDict = {'y': 1, 'H': 1, 'e': 1}
The above is what I need to get just the value from. I am supposed to mark whether or not each word I have to evaluate is unique or not, which depends on if the values are 1 or above. How would I get access to only the value? I have tried wordDict[0][0] to get the first element of the whole array and then the first element within the dictionary, but that does not work.
import collections;
def main():
reading = read_file();
uniqueOrNot = isUnique(reading);
print uniqueOrNot;
def read_file():
with open('BWA5.in') as fp:
lines = fp.read().split();
fp.close(); #close file
return lines; #return lines to main function
def isUnique(words):
wordDict = [dict(collections.Counter(word)) for word in words];
for wordDict in wordDicts:
values = wordDict.values()
unique = all(value == 1 for value in values)
print wordDict;
main();
Get the values by
values = wordDict.values()
Now the unique thingy,
unique = all(value == 1 for value in values)
Now unique is a Boolean value holding True/False.
We could have also done:
notUnique = any(value > 1 for value in values)
This takes into consideration any 0 frequency letters which you might or might not have, depending on your algorithm.
For a list of dictionaries for multiple words, you can traverse through them all at a time by using a loop.
EDIT:
Final code:
import collections;
def main():
reading = read_file();
uniqueOrNot = isUnique(reading);
print uniqueOrNot;
def read_file():
with open('BWA5.in') as fp:
lines = fp.read().split();
fp.close(); #close file
return lines; #return lines to main function
def isUnique(words):
wordDicts = [dict(collections.Counter(word)) for word in words];
for wordDict in wordDicts:
values = wordDict.values()
unique = all(value == 1 for value in values)
print unique;
main();

Python dictionary: how to take the input string and define dynamic dictionaries with dict names as words in the string, while execution

I want to create a funtion which takes 3 arguments which are as follows.
path = "['pma']['device']['clock']"
key = 'key'
value = 'value'
the function should take these values as input and return a dictionary with sub-dictionaries inside and key,value added at the end dict.
Expected output = {'pma':{'device':{'clock':{'key':'value'}}}}
The challenge I am facing is, how to take the input string and define dynamic dictionaries with dict names as words in the string, while execution.
see this code
path = "['pma']['device']['clock']"
key = 'key'
value = 'value'
pl = path.replace('][', ',').replace(']', '').replace('[','').replace("'","").split(',')
res = {}
for item in reversed(pl):
if item == pl[-1]:
res[pl[-1]]={key:value}
else :
res = {item: res}
Output :
{'pma': {'device': {'clock': {'key': 'value'}}}}
The solution using re.findall() function and recursive makeDictFromPath() function:
import re
path = "['pma']['device']['clock']"
key = 'key'
val = 'value'
def makeDictFromPath(result, path):
result[path[0]] = {}
if len(path) > 1: return makeDictFromPath(result[path[0]], path[1:])
return result
result = {}
keys = re.findall(r'\[\'(\w+)\'\]', path)
makeDictFromPath(result, keys).get('clock', {})[key] = val
print(result)
The output:
{'pma': {'device': {'clock': {'key': 'value'}}}}

Python 2.7 dictionary value not taking float as the input

I am working with Python 2.7 and trying to insert a value which is a float to a key. However, all the values are being inserted as 0.0. The polarity value is being inserted as 0.0 and not the actual value.
Code Snippet:
from textblob import TextBlob
import json
with open('new-webmd-answer.json') as data_file:
data = json.load(data_file, strict=False)
data_new = {}
lst = []
for d in data:
string = d["answerContent"]
blob = TextBlob(string)
#print blob
#print blob.sentiment
#print d["questionId"]
data_new['questionId'] = d["questionId"]
data_new['answerMemberId'] = d["answerMemberId"]
string1 = str(blob.sentiment.polarity)
print string1
data_new['polarity'] = string1
#print blob.sentiment.polarity
lst.append((data_new))
json_data = json.dumps(lst)
#print json_data
with open('polarity.json', 'w') as outfile:
json.dump(json_data, outfile)
The way your code is currently written, you're overwriting the dictionary with each iteration. Then you append that dictionary to the list multiple times.
lets say your dictionary was dict = {"a" : 1} and then you append that to a list
alist.append(dict)
alist
[{'a' : 1}]
Then you change the value of dict, dict{"a" : 0} and append it to the list again alist.append(dict)
alist
[{'a' : 0}, {'a' : 0}]
This occurs because dictionaries are mutable. For a more complete overview on mutable vs unmutable objects see the docs here
To achieve your expected output, make a new dictionary with each iteration of data
lst = []
for d in data:
data_new = {} # makes a new dictionary with each iteration
string = d["answerContent"]
blob = TextBlob(string)
# print blob
# print blob.sentiment
# print d["questionId"]
data_new['questionId'] = d["questionId"]
data_new['answerMemberId'] = d["answerMemberId"]
string1 = str(blob.sentiment.polarity)
print string1
data_new['polarity'] = string1
# print blob.sentiment.polarity
lst.append((data_new))

Printing Results from Loops

I currently have a piece of code that works in two segments. The first segment opens the existing text file from a specific path on my local drive and then arranges, based on certain indices, into a list of sub list. In the second segment I take the sub-lists I have created and group them on a similar index to simplify them (starts at def merge_subs). I am getting no error code but I am not receiving a result when I try to print the variable answer. Am I not correctly looping the original list of sub-lists? Ultimately I would like to have a variable that contains the final product from these loops so that I may write the contents of it to a new text file. Here is the code I am working with:
from itertools import groupby, chain
from operator import itemgetter
with open ("somepathname") as g:
# reads text from lines and turns them into a list sub-lists
lines = g.readlines()
for line in lines:
matrix = line.split()
JD = matrix [2]
minTime= matrix [5]
maxTime= matrix [7]
newLists = [JD,minTime,maxTime]
L = newLists
def merge_subs(L):
dates = {}
for sub in L:
date = sub[0]
if date not in dates:
dates[date] = []
dates[date].extend(sub[1:])
answer = []
for date in sorted(dates):
answer.append([date] + dates[date])
new code
def openfile(self):
filename = askopenfilename(parent=root)
self.lines = open(filename)
def simplify(self):
g = self.lines.readlines()
for line in g:
matrix = line.split()
JD = matrix[2]
minTime = matrix[5]
maxTime = matrix[7]
self.newLists = [JD, minTime, maxTime]
print(self.newLists)
dates = {}
for sub in self.newLists:
date = sub[0]
if date not in dates:
dates[date] = []
dates[date].extend(sub[1:])
answer = []
for date in sorted(dates):
print(answer.append([date] + dates[date]))
enter code here
enter code here