When you create a new AWS EMR cluster through the AWS Management Console you're able to provide JSON Software Configurations. You can put the JSON file in an S3 bucket and point the Software Configurations to the S3 bucket via the following field,
I need to do this through the AWS Python SDK Boto3 library but I don't see where to do it at in the available fields in their example,
response = client.run_job_flow(
Name='string',
LogUri='string',
AdditionalInfo='string',
AmiVersion='string',
ReleaseLabel='string',
Instances={
'MasterInstanceType': 'string',
'SlaveInstanceType': 'string',
'InstanceCount': 123,
'InstanceGroups': [
{
'Name': 'string',
'Market': 'ON_DEMAND'|'SPOT',
'InstanceRole': 'MASTER'|'CORE'|'TASK',
'BidPrice': 'string',
'InstanceType': 'string',
'InstanceCount': 123,
'Configurations': [
{
'Classification': 'string',
'Configurations': {'... recursive ...'},
'Properties': {
'string': 'string'
}
},
],
'EbsConfiguration': {
'EbsBlockDeviceConfigs': [
{
'VolumeSpecification': {
'VolumeType': 'string',
'Iops': 123,
'SizeInGB': 123
},
'VolumesPerInstance': 123
},
],
'EbsOptimized': True|False
},
'AutoScalingPolicy': {
'Constraints': {
'MinCapacity': 123,
'MaxCapacity': 123
},
'Rules': [
{
'Name': 'string',
'Description': 'string',
'Action': {
'Market': 'ON_DEMAND'|'SPOT',
'SimpleScalingPolicyConfiguration': {
'AdjustmentType': 'CHANGE_IN_CAPACITY'|'PERCENT_CHANGE_IN_CAPACITY'|'EXACT_CAPACITY',
'ScalingAdjustment': 123,
'CoolDown': 123
}
},
'Trigger': {
'CloudWatchAlarmDefinition': {
'ComparisonOperator': 'GREATER_THAN_OR_EQUAL'|'GREATER_THAN'|'LESS_THAN'|'LESS_THAN_OR_EQUAL',
'EvaluationPeriods': 123,
'MetricName': 'string',
'Namespace': 'string',
'Period': 123,
'Statistic': 'SAMPLE_COUNT'|'AVERAGE'|'SUM'|'MINIMUM'|'MAXIMUM',
'Threshold': 123.0,
'Unit': 'NONE'|'SECONDS'|'MICRO_SECONDS'|'MILLI_SECONDS'|'BYTES'|'KILO_BYTES'|'MEGA_BYTES'|'GIGA_BYTES'|'TERA_BYTES'|'BITS'|'KILO_BITS'|'MEGA_BITS'|'GIGA_BITS'|'TERA_BITS'|'PERCENT'|'COUNT'|'BYTES_PER_SECOND'|'KILO_BYTES_PER_SECOND'|'MEGA_BYTES_PER_SECOND'|'GIGA_BYTES_PER_SECOND'|'TERA_BYTES_PER_SECOND'|'BITS_PER_SECOND'|'KILO_BITS_PER_SECOND'|'MEGA_BITS_PER_SECOND'|'GIGA_BITS_PER_SECOND'|'TERA_BITS_PER_SECOND'|'COUNT_PER_SECOND',
'Dimensions': [
{
'Key': 'string',
'Value': 'string'
},
]
}
}
},
]
}
},
],
'InstanceFleets': [
{
'Name': 'string',
'InstanceFleetType': 'MASTER'|'CORE'|'TASK',
'TargetOnDemandCapacity': 123,
'TargetSpotCapacity': 123,
'InstanceTypeConfigs': [
{
'InstanceType': 'string',
'WeightedCapacity': 123,
'BidPrice': 'string',
'BidPriceAsPercentageOfOnDemandPrice': 123.0,
'EbsConfiguration': {
'EbsBlockDeviceConfigs': [
{
'VolumeSpecification': {
'VolumeType': 'string',
'Iops': 123,
'SizeInGB': 123
},
'VolumesPerInstance': 123
},
],
'EbsOptimized': True|False
},
'Configurations': [
{
'Classification': 'string',
'Configurations': {'... recursive ...'},
'Properties': {
'string': 'string'
}
},
]
},
],
'LaunchSpecifications': {
'SpotSpecification': {
'TimeoutDurationMinutes': 123,
'TimeoutAction': 'SWITCH_TO_ON_DEMAND'|'TERMINATE_CLUSTER',
'BlockDurationMinutes': 123
}
}
},
],
'Ec2KeyName': 'string',
'Placement': {
'AvailabilityZone': 'string',
'AvailabilityZones': [
'string',
]
},
'KeepJobFlowAliveWhenNoSteps': True|False,
'TerminationProtected': True|False,
'HadoopVersion': 'string',
'Ec2SubnetId': 'string',
'Ec2SubnetIds': [
'string',
],
'EmrManagedMasterSecurityGroup': 'string',
'EmrManagedSlaveSecurityGroup': 'string',
'ServiceAccessSecurityGroup': 'string',
'AdditionalMasterSecurityGroups': [
'string',
],
'AdditionalSlaveSecurityGroups': [
'string',
]
},
Steps=[
{
'Name': 'string',
'ActionOnFailure': 'TERMINATE_JOB_FLOW'|'TERMINATE_CLUSTER'|'CANCEL_AND_WAIT'|'CONTINUE',
'HadoopJarStep': {
'Properties': [
{
'Key': 'string',
'Value': 'string'
},
],
'Jar': 'string',
'MainClass': 'string',
'Args': [
'string',
]
}
},
],
BootstrapActions=[
{
'Name': 'string',
'ScriptBootstrapAction': {
'Path': 'string',
'Args': [
'string',
]
}
},
],
SupportedProducts=[
'string',
],
NewSupportedProducts=[
{
'Name': 'string',
'Args': [
'string',
]
},
],
Applications=[
{
'Name': 'string',
'Version': 'string',
'Args': [
'string',
],
'AdditionalInfo': {
'string': 'string'
}
},
],
Configurations=[
{
'Classification': 'string',
'Configurations': {'... recursive ...'},
'Properties': {
'string': 'string'
}
},
],
VisibleToAllUsers=True|False,
JobFlowRole='string',
ServiceRole='string',
Tags=[
{
'Key': 'string',
'Value': 'string'
},
],
SecurityConfiguration='string',
AutoScalingRole='string',
ScaleDownBehavior='TERMINATE_AT_INSTANCE_HOUR'|'TERMINATE_AT_TASK_COMPLETION',
CustomAmiId='string',
EbsRootVolumeSize=123,
RepoUpgradeOnBoot='SECURITY'|'NONE',
KerberosAttributes={
'Realm': 'string',
'KdcAdminPassword': 'string',
'CrossRealmTrustPrincipalPassword': 'string',
'ADDomainJoinUser': 'string',
'ADDomainJoinPassword': 'string'
}
)
How can I provide an S3 bucket location that has the Software Configuration JSON file for creating an EMR cluster through the Boto3 library?
Right now the boto3 SDK can't directly import the configuration settings from s3 for you as part of the run_job_flow() function. You would need to setup an S3 client in boto3, download the data as an S3 object and then update the Configuration List part of your EMR dictionary with the JSON data in your S3 file.
An example of how to download a json file from S3 and then load it into memory as a Python Dict can be found over here - Reading an JSON file from S3 using Python boto3
The Configuring Applications - Amazon EMR documentation says:
Supplying a Configuration in the Console
To supply a configuration, you navigate to the Create cluster page and choose Edit software settings. You can then enter the configuration directly (in JSON or using shorthand syntax demonstrated in shadow text) in the console or provide an Amazon S3 URI for a file with a JSON Configurations object.
That seems to be the capability you showed in your question.
The documentation then shows how you can do it via the CLI:
aws emr create-cluster --use-default-roles --release-label emr-5.14.0 --instance-type m4.large --instance-count 2 --applications Name=Hive --configurations https://s3.amazonaws.com/mybucket/myfolder/myConfig.json
This maps to the Configurations options in the JSON you show above:
'Configurations': [
{
'Classification': 'string',
'Configurations': {'... recursive ...'},
'Properties': {
'string': 'string'
}
},
]
Configurations: A configuration classification that applies when provisioning cluster instances, which can include configurations for applications and software that run on the cluster.
It would contain settings such as:
[
{
"Classification": "core-site",
"Properties": {
"hadoop.security.groups.cache.secs": "250"
}
},
{
"Classification": "mapred-site",
"Properties": {
"mapred.tasktracker.map.tasks.maximum": "2",
"mapreduce.map.sort.spill.percent": "0.90",
"mapreduce.tasktracker.reduce.tasks.maximum": "5"
}
}
]
Short answer: Configurations
Related
Can anyone please guide me steps to create multiple triggers types one with conditional and other with scheduled trigger type in single workflow
So far I have used create_trigger function . But above requirement not sure how to address.
Can any one help here please.
I have tried with below syntax didn't work
response = client.create_trigger(
Name='two_triggers',
WorkflowName='wf_With_two_tirggers',
Type='SCHEDULED',
Schedule='cron(0 12 * * ? *)',
Actions=[
{
'JobName': 'abc_dev',
'Arguments': {
'string': 'string'
},
'Timeout': 123,
'SecurityConfiguration': 'string',
'NotificationProperty': {
'NotifyDelayAfter': 123
},
'Trigger': 'string'
},
],
Type='CONDITIONAL',
Predicate={
'Logical': 'ANY',
'Conditions': [
{
'LogicalOperator': 'EQUALS',
'JobName': 'def_dev',
'State': 'SUCCEEDED'
},
]
},
Actions=[
{
'JobName': 'ghi_dev',
'Arguments': {
'string': 'string'
},
'Timeout': 123,
'SecurityConfiguration': 'string',
'NotificationProperty': {
'NotifyDelayAfter': 123
},
'CrawlerName': 'string'
},
],
Description='string',
StartOnCreation=True,
Tags={
'string': 'string'
}
)
Below is the design workflow struggling to write code for. Tried with above code for below design using boto3 didn't work
Yes I figured out on an answer. Below is the code for design given in question
import boto3
import os
import logging
glue = boto3.client(service_name="glue", region_name='us-east-1')
response = glue.create_workflow(
Name="dual_trigger_wf")
response1 = glue.create_trigger(
Name="trigger_one_to_many",
WorkflowName="dual_trigger_wf",
Type="SCHEDULED",
Schedule="cron(0 8 * * ? *)",
Actions=[
{
"JobName": "abc",
"Arguments": {"string": "string"},
"Timeout": 123,
"SecurityConfiguration": "string",
"NotificationProperty": {"NotifyDelayAfter": 123},
},
{
"JobName": "def",
"Arguments": {"string": "string"},
"Timeout": 123,
"SecurityConfiguration": "string",
"NotificationProperty": {"NotifyDelayAfter": 123},
},
],
Description="string",
StartOnCreation=False,
)
response2 = glue.create_trigger(
Name="trigger_many_to_one",
WorkflowName="dual_trigger_wf",
Type="CONDITIONAL",
Predicate={
"Logical": "AND",
"Conditions": [
{
"LogicalOperator": "EQUALS",
"JobName": "abc",
"State": "SUCCEEDED",
},
{
"LogicalOperator": "EQUALS",
"JobName": "def",
"State": "SUCCEEDED",
},
],
},
Actions=[
{
"JobName": "ghi",
"Arguments": {"string": "string"},
"Timeout": 123,
"SecurityConfiguration": "string",
"NotificationProperty": {"NotifyDelayAfter": 123},
}
],
Description="string",
StartOnCreation=False,
)
I am trying to automate the EMR cluster creation through boto3. Unfortunately, I'm getting the following warning:
The Auto Scaling policy for instance group ig-MI0ANZ0C3WNN in Amazon EMR cluster j-BS3Y2OAO65R6 (qidv2_historical_3.0.1) could not attach and failed at 2021-09-20 17:41 UTC.
I cannot figure out what is the issue is. This was adapted from an aws cli command which didn't raise any warnings or issues, but after transitioning to boto3, was getting this autoscaling policy warning
cluster_id = self.boto_client().run_job_flow(
Name=self.cluster_name,
LogUri='s3n://aws-logs',
JobFlowRole='EMR_EC2_DefaultRole',
ReleaseLabel=self.release_label,
Applications=[{'Name': 'Spark'},{'Name': 'Hive'},{'Name': 'Hadoop'},{'Name': 'Pig'},{'Name': 'Hue'},
{'Name': 'Zeppelin'},{'Name': 'Livy'},{'Name': 'JupyterHub'},{'Name': 'Tensorflow'}
],
AutoScalingRole='EMR_AutoScaling_DefaultRole',
BootstrapActions=[
{
'Name': 'Custom action',
'ScriptBootstrapAction': {
'Path': 's3://ml-data/emr-bootstrap_spk3.0.1.sh'
}
}
],
ServiceRole='EMR_DefaultRole',
ScaleDownBehavior='TERMINATE_AT_TASK_COMPLETION',
EbsRootVolumeSize=25,
Steps=[
{
'Name': 'Setup Debugging',
'ActionOnFailure': 'TERMINATE_CLUSTER',
'HadoopJarStep': {
'Jar': 'command-runner.jar',
'Args': ['state-pusher-script']
}
},
{
'Name': 'Setup - Sync with S3',
'ActionOnFailure': 'CANCEL_AND_WAIT',
'HadoopJarStep': {
'Jar': 'command-runner.jar',
'Args': ['aws', 's3', 'sync',
's3://ch-ml-data/',
'/mnt/src/']
}
},
{
'Name': 'Spark Application',
'ActionOnFailure': 'CANCEL_AND_WAIT',
'HadoopJarStep': {
'Jar': 'command-runner.jar',
'Args': ['cd /mnt/src; bash spark_jobs/qid_pipeline_historical_run.sh']
}
}
],
Configurations=[
{
'Classification': 'zeppelin-env',
'Properties': {},
'Configurations': [
{
'Classification': 'export',
'Properties': {
'ZEPPELIN_PORT': '8890',
'HADOOP_CONF_DIR': '/etc/hadoop/conf',
'ZEPPELIN_LOG_DIR': '/var/log/zeppelin',
'ZEPPELIN_PID': '$ZEPPELIN_PID_DIR/zeppelin.pid',
'MASTER': 'yarn-client',
'SPARK_SUBMIT_OPTIONS': "$SPARK_SUBMIT_OPTIONS --conf '\''spark.executorEnv.PYTHONPATH=/usr/lib/spark/python/lib/py4j-src.zip:/usr/lib/spark/python/:<CPS>{{PWD}}/pyspark.zip<CPS>{{PWD}}/py4j-src.zip'\'' --conf spark.yarn.isPython=true",
'PYSPARK_DRIVER_PYTHON': '/mnt/anaconda3/envs/question-identification-v2/bin/python',
'ZEPPELIN_NOTEBOOK_USER': 'user',
'CLASSPATH': ':/usr/lib/hadoop-lzo/lib/*:/usr/lib/hadoop/hadoop-aws.jar:/usr/share/java/Hive-JSON-Serde/hive-openx-serde.jar:/usr/share/aws/aws-java-sdk/*:/usr/share/aws/emr/emrfs/conf:/usr/share/aws/emr/emrfs/lib/*:/usr/share/aws/emr/emrfs/auxlib/*:/usr/share/aws/hmclient/lib/aws-glue-datacatalog-spark-client.jar:/usr/share/aws/sagemaker-spark-sdk/lib/sagemaker-spark-sdk.jar',
'ZEPPELIN_PID_DIR': '/var/run/zeppelin',
'PYSPARK_PYTHON': '/mnt/anaconda3/envs/question-identification-v2/bin/python',
'SPARK_HOME': '/usr/lib/spark',
'ZEPPELIN_NOTEBOOK_S3_BUCKET': 'ch-ml-data',
'ZEPPELIN_WAR_TEMPDIR': '/var/run/zeppelin/webapps',
'ZEPPELIN_CONF_DIR': '/etc/zeppelin/conf',
'ZEPPELIN_NOTEBOOK_STORAGE': 'org.apache.zeppelin.notebook.repo.S3NotebookRepo',
'ZEPPELIN_NOTEBOOK_DIR': '/var/lib/zeppelin/notebook',
'ZEPPELIN_ADDR': '0.0.0.0'
}
}
]
},
{
'Classification': 'hive-site',
'Properties': {
'hive.metastore.client.factory.class': 'com.amazonaws.glue.catalog.metastore.AWSGlueDataCatalogHiveClientFactory'
}
},
{
'Classification': 'spark-hive-site',
'Properties': {
'hive.metastore.client.factory.class': 'com.amazonaws.glue.catalog.metastore.AWSGlueDataCatalogHiveClientFactory'
}
}
],
Instances={
'Ec2KeyName': 'emr-temporary',
'KeepJobFlowAliveWhenNoSteps': False,
'TerminationProtected': False,
'Ec2SubnetId': 'subnet-063735e4fa63e3bac',
'AdditionalSlaveSecurityGroups': ["sg-012970517d0a88bae", "sg-01813cf2115b55874", "sg-04563fc7e8ed9e1ec", "sg-07ab30655981361ad"],
'ServiceAccessSecurityGroup': 'sg-00dd6e63d7004176d',
'EmrManagedSlaveSecurityGroup': 'sg-048b83d1a20550b43',
'EmrManagedMasterSecurityGroup': 'sg-017402b74e879aaa5',
'AdditionalMasterSecurityGroups': ["sg-012970517d0a88bae", "sg-01813cf2115b55874", "sg-04563fc7e8ed9e1ec", "sg-07ab30655981361ad"],
'InstanceGroups': [
{
'Name': 'Task',
'InstanceRole': 'TASK',
'InstanceType': 'i3.2xlarge',
'InstanceCount': 1
},
{
'Name': 'Master - 1',
'InstanceRole': 'MASTER',
'InstanceType': 'i3.2xlarge',
'InstanceCount': 1,
},
{
'Name': 'Core - 2',
'InstanceRole': 'CORE',
'InstanceType': 'i3.2xlarge',
'InstanceCount': 1,
'Market': 'SPOT',
'AutoScalingPolicy': {
'Constraints': {
'MinCapacity': 3,
'MaxCapacity': 100
},
'Rules': [
{
'Name': 'memory',
'Description': '',
'Action': {
'SimpleScalingPolicyConfiguration': {
'ScalingAdjustment': 10,
'CoolDown': 300,
'AdjustmentType': 'CHANGE_IN_CAPACITY'
}
},
'Trigger': {
'CloudWatchAlarmDefinition': {
'MetricName': 'YARNMemoryAvailablePercentage',
'ComparisonOperator': 'LESS_THAN',
'Statistic': 'AVERAGE',
'Period': 300,
'EvaluationPeriods': 2,
'Unit': 'PERCENT',
'Namespace': 'AWS/ElasticMapReduce',
'Threshold': 25,
'Dimensions': [
{
'Value': '${emr.clusterId}',
'Key': 'JobFlowId'
}
]
}
}
},
{
'Name': 'mem',
'Description': '',
'Action': {
'SimpleScalingPolicyConfiguration': {
'ScalingAdjustment': -5,
'CoolDown': 300,
'AdjustmentType': 'CHANGE_IN_CAPACITY'
}
},
'Trigger': {
'CloudWatchAlarmDefinition': {
'MetricName': 'YARNMemoryAvailablePercentage',
'ComparisonOperator': 'GREATER_THAN_OR_EQUAL',
'Statistic': 'AVERAGE',
'Period': 300,
'EvaluationPeriods': 18,
'Unit': 'PERCENT',
'Namespace': 'AWS/ElasticMapReduce',
'Threshold': 50,
'Dimensions': [
{
'Value': '${emr.clusterId}',
'Key': 'JobFlowId'
}
],
}
}
}
]
}
}
]
}
)
i'm trying to enable logging on all s3 buckets in my account but getting error while executing the code
def s3_log():
s3 = boto3.client('s3')
response = s3.list_buckets()
for i in response['Buckets']:
#bucketacl = s3.put_bucket_acl(Bucket=i['Name'],AccessControlPolicy={'Grants': [{'Grantee': {'Type': 'Group','URI': 'http://acs.amazonaws.com/groups/s3/LogDelivery'},'Permission': 'FULL_CONTROL'}]})
response = s3.put_bucket_logging(
Bucket=i['Name'],
BucketLoggingStatus={
'LoggingEnabled': {
'TargetBucket': i['Name'],
'TargetGrants': [
{
'Grantee': {
'Type': 'Group',
'URI': 'http://acs.amazonaws.com/groups/s3/LogDelivery'
},
'Permission': 'READ' },
{
'Grantee': {
'Type': 'Group',
'URI': 'http://acs.amazonaws.com/groups/s3/LogDelivery'
},
'Permission': 'WRITE'
},
],
'TargetPrefix': i['Name'] + '/'
}
}
)
Error :
"errorMessage": "An error occurred (InvalidTargetBucketForLogging) when calling the PutBucketLogging operation: You must give the log-delivery group WRITE and READ_ACP permissions to the target bucket"
I have added target grants to add permission to log-delivery group but it seems something is missing in my code.So i went ahead and tried to add bucket acl but then it gives me some malformed xml error so the acl code is commented at the moment
You must give the permission READ_ACP, You can do as follows:
s3c.put_bucket_acl(
AccessControlPolicy = {
"Owner": {
"ID": "canonical_user_id_sdakfjldsakjf" # see https://docs.aws.amazon.com/general/latest/gr/acct-identifiers.html
},
'Grants': [
{
'Grantee': {
'Type': 'Group',
'URI': 'http://acs.amazonaws.com/groups/s3/LogDelivery'
},
'Permission': 'WRITE'
},
{
'Grantee': {
'Type': 'Group',
'URI': 'http://acs.amazonaws.com/groups/s3/LogDelivery'
},
'Permission': 'READ_ACP'
}
]
},
Bucket=bucket
)
more on that here
I am using AWS boto3 pricing api to get the prices of instances.
But I am not getting the results for the combination (us west 2, r3.2x large, Linux, No pre software installed, tenancy =shared)
Here is my code:
pricing = boto3.client('pricing', region_name='us-east-1')
hourlyTermCode = 'JRTCKXETXF'
rateCode = '6YS6EN2CT7'
token = ''
while True:
paginator = pricing.get_paginator('get_products')
pages = paginator.paginate(
ServiceCode='AmazonEC2',
Filters=[
{'Type': 'TERM_MATCH', 'Field': 'operatingSystem', 'Value': 'Linux'},
{'Type': 'TERM_MATCH', 'Field': 'location', 'Value': 'US West (Oregon)'}
],
PaginationConfig={
'StartingToken':token
}
)
for response in pages:
for price in response['PriceList']:
resp = json.loads(price)
product = resp['product'] # ['attributes']['']
sku = product['sku']
if product['productFamily'] == 'Compute Instance':
if str(product['attributes']['instanceType']) == str(amazon_instance_type) :
if str(product['attributes']['operatingSystem']) == 'Linux':
if str(product['attributes']['preInstalledSw']) == 'NA':
if str(product['attributes']['tenancy']) == 'Shared':
sku_key = resp['terms']['OnDemand'].get(sku)
if sku_key:
price = sku_key[sku + '.' + hourlyTermCode + '.' + rateCode]['pricePerUnit']['USD']
print 'here 7'
print price
try:
token = response['NextToken']
except KeyError:
pass
This works:
import json
import boto3
client = boto3.client('pricing', region_name='us-east-1')
response = client.get_products(
ServiceCode='AmazonEC2',
Filters=[
{'Type': 'TERM_MATCH', 'Field': 'operatingSystem', 'Value': 'Linux'},
{'Type': 'TERM_MATCH', 'Field': 'location', 'Value': 'US West (Oregon)'},
{'Type': 'TERM_MATCH', 'Field': 'instanceType', 'Value': 'r3.2xlarge'},
{'Type': 'TERM_MATCH', 'Field': 'tenancy', 'Value': 'Shared'},
{'Type': 'TERM_MATCH', 'Field': 'preInstalledSw', 'Value': 'NA'}
]
)
for pricelist_json in response['PriceList']:
pricelist = json.loads(pricelist_json)
product = pricelist['product']
if product['productFamily'] == 'Compute Instance':
print pricelist['terms']['OnDemand'].values()[0]['priceDimensions'].values()[0][u'pricePerUnit']['USD']
It is based on the output of:
{u'FormatVersion': u'aws_v1', u'PriceList': [u'{
"product": {
"productFamily": "Compute Instance",
"attributes": {
"enhancedNetworkingSupported": "Yes",
"memory": "61 GiB",
"vcpu": "8",
"capacitystatus": "Used",
"locationType": "AWS Region",
"storage": "1 x 160 SSD",
"instanceFamily": "Memory optimized",
"operatingSystem": "Linux",
"physicalProcessor": "Intel Xeon E5-2670 v2 (Ivy Bridge)",
"clockSpeed": "2.5 GHz",
"ecu": "26",
"networkPerformance": "High",
"servicename": "Amazon Elastic Compute Cloud",
"instanceType": "r3.2xlarge",
"tenancy": "Shared",
"usagetype": "USW2-BoxUsage:r3.2xlarge",
"normalizationSizeFactor": "16",
"processorFeatures": "Intel AVX; Intel Turbo",
"servicecode": "AmazonEC2",
"licenseModel": "No License required",
"currentGeneration": "No",
"preInstalledSw": "NA",
"location": "US West (Oregon)",
"processorArchitecture": "64-bit",
"operation": "RunInstances"
},
"sku": "GMTWE5CTY4FEUYDN"
},
"serviceCode": "AmazonEC2",
"terms": {
"OnDemand": {
"GMTWE5CTY4FEUYDN.JRTCKXETXF": {
"priceDimensions": {
"GMTWE5CTY4FEUYDN.JRTCKXETXF.6YS6EN2CT7": {
"unit": "Hrs",
"endRange": "Inf",
"description": "$0.665 per On Demand Linux r3.2xlarge Instance Hour",
"appliesTo": [],
"rateCode": "GMTWE5CTY4FEUYDN.JRTCKXETXF.6YS6EN2CT7",
"beginRange": "0",
"pricePerUnit": {
"USD": "0.6650000000"
}
}
},
"sku": "GMTWE5CTY4FEUYDN",
"effectiveDate": "2018-07-01T00:00:00Z",
"offerTermCode": "JRTCKXETXF",
"termAttributes": {}
}
},
...
},
"version": "20180726190848",
"publicationDate": "2018-07-26T19:08:48Z"
}'
]
}
I am trying to get the SnapshotId of the below output with no success. I can get the value of the AMI description and the value of AMI_ID.
{
'Images': [
{
'Architecture': 'i386'|'x86_64',
'CreationDate': 'string',
'ImageId': 'string',
'ImageLocation': 'string',
'ImageType': 'machine'|'kernel'|'ramdisk',
'Public': True|False,
'KernelId': 'string',
'OwnerId': 'string',
'Platform': 'Windows',
'ProductCodes': [
{
'ProductCodeId': 'string',
'ProductCodeType': 'devpay'|'marketplace'
},
],
'RamdiskId': 'string',
'State': 'pending'|'available'|'invalid'|'deregistered'|'transient'|'failed'|'error',
'BlockDeviceMappings': [
{
'DeviceName': 'string',
'VirtualName': 'string',
'Ebs': {
'Encrypted': True|False,
'DeleteOnTermination': True|False,
'Iops': 123,
'SnapshotId': 'string',
'VolumeSize': 123,
'VolumeType': 'standard'|'io1'|'gp2'|'sc1'|'st1'
},
'NoDevice': 'string'
},
],
'Description': 'string',
'EnaSupport': True|False,
'Hypervisor': 'ovm'|'xen',
'ImageOwnerAlias': 'string',
'Name': 'string',
'RootDeviceName': 'string',
'RootDeviceType': 'ebs'|'instance-store',
'SriovNetSupport': 'string',
'StateReason': {
'Code': 'string',
'Message': 'string'
},
'Tags': [
{
'Key': 'string',
'Value': 'string'
},
],
'VirtualizationType': 'hvm'|'paravirtual'
},
]
}
Using the following code:
import boto3
client = boto3.client('ec2', region_name='us-east-1')
def verifica_imagem(imagem):
amiresponse = client.describe_images(
Filters=[
{
'Name': 'description',
'Values': [
imagem,
]
},
],
DryRun=False
)
try:
data = str(amiresponse['Images'][0]['Description'])
ami_id = str(amiresponse['Images'][0]['ImageId'])
snapshot_id = str(amiresponse['Images'][0]['SnapshotId'])
except:
print "AMI not exists! Exiting...."
return 1
verifica_imagem('IMAGE_XXXXXXX')
I can't understand how to use the key of SnapshotId. I have tried:
snapshot_id = str(amiresponse['Images']['BlockDeviceMappings']['Ebs'][0]['SnapshotId']) but is not working too.
The value of Images and BlockDeviceMappings is an array and Ebs is a dict.
Use this to fetch the value of SnapshotId,
snapshot_id = amiresponse['Images'][0]['BlockDeviceMappings'][0]['Ebs']['SnapshotId']