creating Pcollection from json string input - google-cloud-platform

I am trying to create a unit test for my Dataflow code. However, I am getting error while creating pcollection using beam.create().
In the original function, I am using json.loads(element) which takes json string as input and gives the dictionary as o/p.
Test code:
def test(self):
input = '{"name": "xyz"}'
expected_output = {'name': 'xyz'}
input_string = p | beam.create(input) #pipeline object is already defined as p
output = input_string | beam.pardo(_splitdata) #calling the original function here
assert_that(output,equal_to(expected_output))
Errors :
beam.create() is not taking string as input. It just takes iterables and If I make input as suppose a list or any other iterable then json.loads() as part of _splitdata() expects the json object just as str.
How do I resolve this issue. Please help.

I got into similar scenario today, here is my solution:
import apache_beam as beam
data = [
{
'id': 1,
'name': 'abc',
},
{
'id': 2,
'name': 'xyz'
}
]
with beam.Pipeline() as pipeline:
plant_details = (
pipeline
| 'Read Input Data' >> beam.Create(data)
| beam.Map(print))
Output:
{'id': 1, 'name': 'abc'}
{'id': 2, 'name': 'xyz'}

Related

Converting csv for fixtures

I am unable to convert test data in CSV format using csv2json.py so that I can use the same in fixtures which should have the format pk, model, and then the fields.
[
{
"pk": 1,
"model": "wkw2.Lawyer",
"fields": {
"school": "The George Washington University Law School",
"last": "Babbas",
"firm_url": "http://www.graychase.com/babbas",
"year_graduated": "2005",
"firm_name": "Gray & Chase",
"first": "Amr A"
}
}
]
Here is the code from one time that helped me convert CSV to JSON. Let me know if you need something else:
import csv
import json
csvfile = open('file.csv', 'r')
jsonfile = open('file.json', 'w')
fieldnames = ("FirstName","LastName","IDNumber","Message")
reader = csv.DictReader( csvfile, fieldnames)
for row in reader:
json.dump(row, jsonfile)
jsonfile.write('\n')

How to format json response in django?

I am retrieving data from multiple tables in Django.
my current response is :
{
"status": 0,
"message": "Client details retrived successfully...!!!",
"results": [
{
"id": 11,
"client_id": "CL15657917080578748000",
"client_name": "Pruthvi Katkar",
"client_pan_no": "RGBB004A11",
"client_adhar_no": "12312312313",
"legal_entity_name": "ABC",
"credit_period": "6 months",
"client_tin_no": 4564565,
"client_email_id": "abc#gmail.com",
"head_office_name": "ABC",
"office_name": "asd234",
"office_email_id": "zxc#gmail.com",
"office_contact": "022-27547119",
"gst_number": "CGST786876876",
"office_country": "India",
"office_state": "gujrat",
"office_district": "vadodara",
"office_taluka": "kachh",
"office_city": "vadodara",
"office_street": "New rode 21",
"office_pincode": 2344445,
"contact_person_name": "prasad",
"contact_person_designation": "DM",
"contact_person_number": "456754655",
"contact_person_email": "asd#gmail.com",
"contact_person_mobile": "5675545654",
"created_at": "2019-08-14T14:08:28.057Z",
"created_by": "Prathamseh",
"updated_at": "2019-08-14T14:08:28.057Z",
"updated_by": "prasad",
"is_deleted": false
},
{
"id": 11,
"user_id": "CL15657917080578748000",
"bank_details_id": "BL15657917080778611000",
"bank_name": "Pruthvi",
"branch": "vashi",
"ifsc_code": "BOI786988",
"account_number": 56756765765765,
"account_name": "Pruthvi",
"is_deleted": false
},
{
"id": 10,
"document_details_id": "DL15657917080808598000",
"user_id": "CL15657917080578748000",
"document_type": "Pruthvi ID",
"document": "www.sendgrid.com/pan",
"is_deleted": false
}
]
}
Expected Response :
I am getting the queryset form db in models.py and i am sending it to the views.py and i am iterating over the dict but not getting the expected response.
views.py
#csrf_exempt
def get_client_details(request):
try:
# Initialising lists for storing results
result = []
temp_array = []
# Getting data from request body
client_master_dict = json.loads(request.body)
# Response from get client data
records = ClientDetails.get_client_data(client_master_dict)
# Create response object
# Iterating over the records object for getting data
for i in range(len(records)):
# Converting the querysets objects to json array format
record_result_list = list(records[i].values())
# If multiple records are present
if(len(record_result_list) > 1):
for j in range(len(record_result_list)):
user_info = record_result_list[j]
temp_array.append(user_info)
result.append(temp_array)
temp_array=[]
# For single record
else:
result.append(record_result_list[0])
# Success
returnObject = {
"status" : messages.SUCCESS,
"message" : messages.CLIENT_RETRIVE_SUCCESS,
"results" : result
}
return JsonResponse(returnObject,safe=False)
I think the issue might be in my inner for loop, can anyone help me out with this, is there any way to iterate over the nested JSON object.
Models.py
#classmethod
def get_client_data(cls, client_master_dict):
try:
response_list = []
client_id = client_master_dict['client_id']
client_details = cls.objects.filter(client_id = client_id,is_deleted = False)
bank_details = BankDetails.objects.filter(user_id = client_id,is_deleted = False)
document_details = DocumentDetails.objects.filter(user_id = client_id,is_deleted = False)
response_list.append(client_details)
response_list.append(bank_details)
response_list.append(document_details)
return response_list
except(Exception) as error:
print("Error in get_client_data",error)
return False
Here i'm fetching data from 3 tables and adding it into list.
After printing the data on console i am getting :
[{'id': 11, 'client_id': 'CL15657917080578748000', 'client_name': 'Pruthvi Katkar', 'client_pan_no': 'RGBB004A11', 'client_adhar_no': '12312312313', 'legal_entity_name': 'ABC', 'credit_period': '6 months', 'client_tin_no': 4564565, 'client_email_id': 'abc#gmail.com', 'head_office_name': 'ABC', 'office_name': 'asd234', 'office_email_id': 'zxc#gmail.com', 'office_contact': '022-27547119', 'gst_number': 'CGST786876876', 'office_country': 'India', 'office_state': 'gujrat', 'office_district': 'vadodara', 'office_taluka': 'kachh', 'office_city': 'vadodara', 'office_street': 'New rode 21', 'office_pincode': 2344445, 'contact_person_name': 'prasad', 'contact_person_designation': 'DM', 'contact_person_number': '456754655', 'contact_person_email': 'asd#gmail.com', 'contact_person_mobile': '5675545654', 'created_at': datetime.datetime(2019, 8, 14, 14, 8, 28, 57874, tzinfo=<UTC>), 'created_by': 'Prathamseh', 'updated_at': datetime.datetime(2019, 8, 14, 14, 8, 28, 57874, tzinfo=<UTC>), 'updated_by': 'prasad', 'is_deleted': False}]
[{'id': 11, 'user_id': 'CL15657917080578748000', 'bank_details_id': 'BL15657917080778611000', 'bank_name': 'Pruthvi', 'branch': 'vashi', 'ifsc_code': 'BOI786988', 'account_number': 56756765765765, 'account_name': 'Pruthvi', 'is_deleted': False}]
[{'id': 10, 'document_details_id': 'DL15657917080808598000', 'user_id': 'CL15657917080578748000', 'document_type': 'Pruthvi ID', 'document': 'www.sendgrid.com/pan', 'is_deleted': False}]
Did you check the output of record_result_list? You can outright tell their if it's recovering the data in the format you requested. Try the printing to screen method to debug.
As far as I cam see, the expected output and the hierarchy of results for bank details are not matching. I don't know how you are handling the hierarchy. Are you directly taking it from JSON as the hierarchy? Or are you just taking the data and creating hierarchy in the expected output?

AWS boto3 unable to put tags after creating an AMI

I'm trying to put tags after creating AMI from an instance using boto3 and getting an error:
botocore.exceptions.ParamValidationError: Parameter validation failed:
Unknown parameter in input: "TagSpecifications", must be one of:
BlockDeviceMappings, Description, DryRun, InstanceId, Name, NoReboot
Here is my code, can you please check what I'm doing wrong?
It works for snapshot but failing for image.
import xlrd
import boto3
import datetime
client = boto3.client('ec2')
# Give the location of the file
loc = ("/Users/user1/Documents/aws-python/aws-tag-test (1).xlsx")
# To open Workbook
wb = xlrd.open_workbook(loc)
sheet = wb.sheet_by_index(0)
# For row 0 and column 0
#print (sheet.cell_value(0, 0))
nowtime = datetime.datetime.now()
nowdate = (nowtime.strftime("%Y-%m-%d %H-%M"))
print (nowdate)
#print (nowtime)
server_ids = []
instancename =[]
for i in range (1,sheet.nrows):
server_ids.append(sheet.cell_value(i,1))
instancename.append(sheet.cell_value(i,0))
#print (sheet.cell_value(i,1))
# excel closed
for i in range (len(server_ids)):
print(server_ids[i], instancename[i])
response = client.create_image(
Description = 'ami ' + instancename[i] + ' ' + str(nowdate),
InstanceId = server_ids[i],
Name = 'ami ' + instancename[i] + ' ' + str(nowdate),
NoReboot = True,
DryRun=False,
TagSpecifications=[
{
'ResourceType': 'image',
'Tags': [
{
'Key': 'Name',
'Value': 'ami-' + instancename[i] + '-' + str(nowdate)
},
{
'Key': 'date',
'Value': datetime.datetime.now().strftime("%Y-%m-%d")
}
]
},
]
)
#)
print(response)
Really appreciate your help.
Yes, it is now available. Not sure when, but it was definitely added sometime after the original comments.

Python : update an initialized dictionary with new lists as value for each key

I have an initialized dictionary like:
nemas = {'PERSON' : '', 'ORGANIZATION':'' , 'LOCATION': ''}
and three lists of names :
person_names = [u'Albert Einstein', u'Hermann Einstein', u'Pauline Koch', u'Einstein', u'Jakob']
organization_names = [u'Elektrotechnische Fabrik J. Einstein & Cie']
location_names = [u'Ulm', u'Kingdom of Britain', u'Munich']
I intend to update the dictionary and get:
names = { 'PERSON' : [u'Albert Einstein', u'Hermann Einstein', u'Pauline Koch', u'Einstein', u'Jakob'],
'ORGANIZATION': [u'Elektrotechnische Fabrik J. Einstein & Cie'],
'LOCATION': [u'Ulm', u'Kingdom of Britain', u'Munich'] }
I tried :
name_dict = {"PERSON":dict(person_names), "ORGANIZATION": dict(organization_names), "LOCATION":dict(locatoin_names)}
print(names.update(name_dict))
but it didn't work. Is there any Pythonic way to solve this problem?
Let's say we ignore your first line:
nemas = {'PERSON' : , 'ORGANIZATION': , 'LOCATION': }
You simply can't do that. However you could do
nemas = {'PERSON' : None, 'ORGANIZATION': None, 'LOCATION': None}
Then in the end what you want is a dictionary of lists but you try to make a dict of dicts. Try this:
name_dict = {"PERSON":person_names, "ORGANIZATION": organization_names, "LOCATION":location_names}
Please note that I fixed some typos.
Then you can get the expected output by
print(name_dict)

How can I merge two or more dictionaries in a list?

Is there any nice pythonic way of merging dictionaries within a list?
What I have:
[
{ 'name': "Jack" },
{ 'age': "28" }
]
What I would like:
[
{ 'name': "Jack", 'age': "28" }
]
Here's a method that uses dict.update(). In my opinion it's a very readable solution:
data = [{'name': 'Jack'}, {'age': '28'}]
new_dict = {}
for d in data:
new_dict.update(d)
new_data = [new_dict]
print new_data
OUTPUT
[{'age': '28', 'name': 'Jack'}]
If you're using Python 3, you can use collections.ChainMap:
>>> from collections import ChainMap
>>> ld = [
... { 'name': "Jack" },
... { 'age': "28" }
... ]
>>> [dict(ChainMap(*ld))]
[{'name': 'Jack', 'age': '28'}]
You could use list comprehension:
final_list = [{key: one_dict[key]
for one_dict in initial_list
for key in one_dict.keys()}]
Edit: the list comprehension was backwards
out = reduce(lambda one, two: dict(one.items() + two.items()),
[{'name': 'Jack'}, {'age': '28'}, {'last_name': 'Daniels'}])
print(out)
OUTPUT
{'age': '28', 'last_name': 'Daniels', 'name': 'Jack'}