How to manage stored procedure in AWS Redshift using an automation or command line tool?

How to manage stored procedure in AWS Redshift using an automation or command line tool? - amazon-web-services

We've got a number of stored procedures that have been built in Redshift on AWS.
We need to download and upload these stored procedures so that they can be kept in GITHUB as a means of tracking changes.
These procedures eventually need to be part of a cloudformation template so that the infrastructure can be maintained as well.
Ideally this could be done using the AWS CLI but there doesn't seem to be a command to do that.
How are AWS RedShift stored proceedure managed in an automation/ CICD environment?

I have a portion of a working solution.
import json
import psycopg2
import os
def run_sql_commands(input_command, database="mydatabasename", host_port=1234 ,host_url="datawarehouse.redshift.amazonaws.com"):
"""
:param input_command: sql string to execute
:param database: which database to run the query
:return:
"""
results = None
# db_user and db_pass will need to be set as environment variables
db_user = os.environ["db_user"]
db_pass = os.environ["db_pass"]
db_host = host_url
db_port = host_port
db_name = database
try:
conn = psycopg2.connect(
"dbname={} port={} user={} host={} password={}".format(db_name, db_port, db_user, db_host, db_pass))
cursor = conn.cursor()
cursor.execute(input_command)
results = cursor.fetchall()
except Exception as e:
return None
return results
def get_arg_type(oid, database):
sql = f"SELECT typname FROM pg_catalog.pg_type WHERE oid={oid}"
r = run_sql_commands(sql, database)
return r[0][0]
def download_all_procedures(database, file_location="./local-code-store"):
get_all_stored_procedure_sql = """
SELECT
n.nspname,
b.usename,
p.proname,
p.proargnames,
p.proargtypes,
p.prosrc
FROM
pg_catalog.pg_namespace n
JOIN pg_catalog.pg_proc p ON
pronamespace = n.oid
JOIN pg_user b ON
b.usesysid = p.proowner
WHERE
nspname NOT IN ('information_schema', 'pg_catalog');
"""
r = run_sql_commands(get_all_stored_procedure_sql, database)
counted_items=[]
for item in r:
table = item[0]
author = item[1]
procedure_name = item[2]
procedure_arguments = item[3]
procedure_argtypes = item[4]
procedure = item[5]
t_list = []
for this_oid in procedure_argtypes.split():
t = get_arg_type(this_oid, database="mydatabasename")
t_list.append(t)
meta_data = {'table': table,
'author': author,
'arguments': procedure_arguments,
'argument_types': t_list}
filename = f'{file_location}/{database}/Schemas/{table}/{procedure_name}.sql'
os.makedirs(os.path.dirname(filename), exist_ok=True)
f = open(filename, 'w')
count = f.write(procedure.replace('\r\n', '\n'))
f.close()
filename = f'{file_location}/{database}/Schemas/{table}/{procedure_name}.json'
os.makedirs(os.path.dirname(filename), exist_ok=True)
f = open(filename, 'w')
counted_items.append(f.write(json.dumps(meta_data)))
f.close()
return counted_items
if __name__ == "__main__":
print("Starting...")
c = download_all_procedures("mydatabase")
print("... finished")

Related

Unable to lunch Multiple Streaming Pipeline ( N to N Pipeline) Dynamically (Using Runtime Value Provider) in Single Dataflow Job in Python

I am trying to launch a Streaming Dataflow Job which contains n number of pipelines.
Based on configured topic and corresponding BQ table for each Topic i want to launch a Pipeline inside a one Streaming Job.
My actual problem is i have to create and upload a template for each and every project. What i want is, i can reuse the uploaded template and only configuration files ihave to pass for launching new dataflow job by changing topic,subscription, dataset and bq table.
Which is i am unable to reuse the template.
Please help me on this and let me know if this is possible or not. Because Google has also provided one to one template. Not many to many Template (e.g Three topic - Three BQ Table (three data pipeleine) , n-n).
import logging
import os
import json
from google.cloud import storage
from apache_beam import Pipeline, ParDo, DoFn
from apache_beam.io import ReadFromPubSub, WriteToBigQuery, BigQueryDisposition
from apache_beam.options.pipeline_options import PipelineOptions, StandardOptions, WorkerOptions, GoogleCloudOptions, \
SetupOptions
def _get_storage_service():
storage_client = storage.Client \
.from_service_account_json(
json_credentials_path='C:\Users\dneema\PycharmProjects\iot_dataflow\df_stm_iot_pubsub_bq\service_account_credentials.json')
print('storage service fetched')
return storage_client
class RuntimeOptions(PipelineOptions):
def __init__(self, flags=None, **kwargs):
super(RuntimeOptions, self).__init__(flags, **kwargs)
#classmethod
def _add_argparse_args(cls, parser):
parser.add_value_provider_argument('--bucket_name', type=str)
parser.add_value_provider_argument('--config_json_path', type=str,)
class PipelineCreator:
def __init__(self):
self.options = PipelineOptions()
storage_client = storage.Client.from_service_account_json(
'service_account_credentials_updated.json')
runtime_options = self.options.view_as(RuntimeOptions)
bucket_name = str(runtime_options.bucket_name)
config_json_path = str(runtime_options.config_json_path)
# get the bucket with name
bucket = storage_client.get_bucket(bucket_name)
# get bucket file as blob
blob = bucket.get_blob(config_json_path)
# convert to string and load config
json_data = blob.download_as_string()
self.configData = json.loads(json_data)
dataflow_config = self.configData['dataflow_config']
self.options.view_as(StandardOptions).streaming = bool(dataflow_config['streaming'])
self.options.view_as(SetupOptions).save_main_session = True
worker_options = self.options.view_as(WorkerOptions)
worker_options.max_num_workers = int(dataflow_config['max_num_worker'])
worker_options.autoscaling_algorithm = str(dataflow_config['autoscaling_algorithm'])
#worker_options.machine_type = str(dataflow_config['machine_type'])
#worker_options.zone = str(dataflow_config['zone'])
#worker_options.network = str(dataflow_config['network'])
#worker_options.subnetwork = str(dataflow_config['subnetwork'])
def run(self):
os.environ['GOOGLE_APPLICATION_CREDENTIALS'] = 'dataflow-service-account.json'
project_id = self.configData['project_id']
dataset_id = self.configData['dataset_id']
topics = self.configData['topics']
table_ids = self.configData['bq_table_ids']
error_table_id = self.configData['error_table_id']
logger = logging.getLogger(project_id)
logger.info(self.options.display_data())
pipeline = Pipeline(options=self.options)
size = len(topics)
for index in range(size):
print(topics[index])
pipeline_name = "pipeline_"+str(index)
logger.info("Launch pipeline :: "+pipeline_name)
messages = pipeline | 'Read PubSub Message in ' + pipeline_name >> ReadFromPubSub(topic=topics[index])
logger.info("Read PubSub Message")
valid_messages, invalid_messages = messages | 'Convert Messages to TableRows in ' + pipeline_name >> ParDo(TransformMessageToTableRow()).with_outputs('invalid', main='valid')
valid_messages | 'Write Messages to BigQuery in ' + pipeline_name >> WriteToBigQuery(table=table_ids[index],
dataset=dataset_id,
project=project_id,
write_disposition=BigQueryDisposition.WRITE_APPEND)
pipeline.run().wait_until_finish()
class TransformMessageToTableRow(DoFn):
def process(self, element, *args, **kwargs):
logging.getLogger('dataflow').log(logging.INFO, element)
print element
print("element type ", type(element))
print("inside bq pardo")
import json
try:
message_rows = json.loads(element)
# if using emulator, uncomment below line
message_rows = json.loads(message_rows)
print 'loaded element'
except:
try:
element = "[" + element + "]"
message_rows = json.loads(element)
except Exception as e:
print(e)
from apache_beam import pvalue
yield [pvalue.TaggedOutput('invalid', [element, str(e)])]
print(message_rows)
print("message rows", type(message_rows))
if not isinstance(message_rows, list):
message_rows = [message_rows]
#rows = list()
if isinstance(message_rows, list):
for row in message_rows:
try:
new_row = dict()
for k, v in row.items():
new_row[str(k)] = v
#rows.append(new_row)
print(new_row)
yield new_row
except Exception as e:
print(e)
from apache_beam import pvalue
yield pvalue.TaggedOutput('invalid', [row, str(e)])
if __name__ == '__main__':
PipelineCreator().run()
Here Runtime argument as bucket_name and config_json_path for all the configuration related stuffs like Dataset, BQ table, Topics/ Subscription and all Workflow options.
This is possible or not ? Because Google has also provided one to one template. Not many to many Template (e.g Three topic - Three BQ Table (three data pipeleine) , n-n).

Regarding this previously answered thread Unable to run multiple Pipelines in desired order by creating template in Apache Beam, you can run only one pipeline inside a template at any time.
You'll have to delegate the template creation to another service and pass the configuration with it, just follow the link inside the thread and you'll have How To examples.

How to get boto3 to display _all_ RDS instances?

Trying to get all RDS instances with boto3 - does not return all RDS instances.
When I look at my RDS instances in Oregon (us-west-2), I see the following:
However, if I run the below Python3 script, I only get one result:
$ python3 ./stackoverflow.py
RDS instances in Oregon
------------------------------
aurora-5-7-yasmin.cazdggrmkpt1.us-west-2.rds.amazonaws.com qa test db.t2.small aurora-5-7-yasmin
$
Can you suggest a way to get boto3 to display all RDS instances?
$ cat ./stackoverflow.py
import collections
import boto3
import datetime
import pygsheets
REGIONS = ('us-west-2',)
REGIONS_H = ('Oregon',)
currentDT = str(datetime.datetime.now())
def create_spreadsheet(outh_file, spreadsheet_name = "AWS usage"):
client = pygsheets.authorize(outh_file=outh_file, outh_nonlocal=True)
client.list_ssheets(parent_id=None)
spread_sheet = client.create(spreadsheet_name)
return spread_sheet
def rds_worksheet_creation(spread_sheet):
for i in range(len(REGIONS)):
region = REGIONS[i]
region_h = REGIONS_H[i]
print()
print("{} instances in {}".format("RDS", region_h))
print("------------------------------")
client = boto3.client('rds', region_name=region)
db_instances = client.describe_db_instances()
for i in range(len(db_instances)):
j = i - 1
try:
DBName = db_instances['DBInstances'][j]['DBName']
MasterUsername = db_instances['DBInstances'][0]['MasterUsername']
DBInstanceClass = db_instances['DBInstances'][0]['DBInstanceClass']
DBInstanceIdentifier = db_instances['DBInstances'][0]['DBInstanceIdentifier']
Endpoint = db_instances['DBInstances'][0]['Endpoint']
Address = db_instances['DBInstances'][0]['Endpoint']['Address']
print("{} {} {} {} {}".format(Address, MasterUsername, DBName, DBInstanceClass,
DBInstanceIdentifier))
except KeyError:
continue
if __name__ == "__main__":
spread_sheet = create_spreadsheet(spreadsheet_name = "AWS usage", outh_file = '../client_secret.json')
spread_sheet.link(syncToCloud=False)
rds_worksheet_creation(spread_sheet)
$ cat ../client_secret.json
{"installed":{"client_id":"362799999999-uml0m2XX4v999999mr2s03XX9g8l9odi.apps.googleusercontent.com","project_id":"amiable-shuttle-198516","auth_uri":"https://accounts.google.com/o/oauth2/auth","token_uri":"https://accounts.google.com/o/oauth2/token","auth_provider_x509_cert_url":"https://www.googleapis.com/oauth2/v1/certs","client_secret":"XXXXxQH434Qg-xxxx99_n0vW","redirect_uris":["urn:ietf:wg:oauth:2.0:oob","http://localhost"]}}
$
Edit 1:
Following Michael's comment, I changed the script to the following, but even though one more related line appeared, most of the RDS instances are still not returned:
$ python3 ./stackoverflow.py
RDS instances in Oregon
------------------------------
aurora-5-7-yasmin.cazdggrmkpt1.us-west-2.rds.amazonaws.com qa +++ DBName gave KeyError +++ db.t2.small aurora-5-7-yasmin
aurora-5-7-yasmin.cazdggrmkpt1.us-west-2.rds.amazonaws.com qa test db.t2.small aurora-5-7-yasmin
$
$ cat ./stackoverflow.py
import collections
import boto3
import datetime
import pygsheets
REGIONS = ('us-west-2',)
REGIONS_H = ('Oregon',)
currentDT = str(datetime.datetime.now())
def create_spreadsheet(outh_file, spreadsheet_name = "AWS usage"):
client = pygsheets.authorize(outh_file=outh_file, outh_nonlocal=True)
client.list_ssheets(parent_id=None)
spread_sheet = client.create(spreadsheet_name)
return spread_sheet
def rds_worksheet_creation(spread_sheet):
for i in range(len(REGIONS)):
region = REGIONS[i]
region_h = REGIONS_H[i]
print()
print("{} instances in {}".format("RDS", region_h))
print("------------------------------")
client = boto3.client('rds', region_name=region)
db_instances = client.describe_db_instances()
for i in range(len(db_instances)):
j = i - 1
try:
DBName = db_instances['DBInstances'][j]['DBName']
except KeyError:
DBName = "+++ DBName gave KeyError +++"
MasterUsername = db_instances['DBInstances'][0]['MasterUsername']
DBInstanceClass = db_instances['DBInstances'][0]['DBInstanceClass']
DBInstanceIdentifier = db_instances['DBInstances'][0]['DBInstanceIdentifier']
Endpoint = db_instances['DBInstances'][0]['Endpoint']
Address = db_instances['DBInstances'][0]['Endpoint']['Address']
print("{} {} {} {} {}".format(Address, MasterUsername, DBName, DBInstanceClass,
DBInstanceIdentifier))
if __name__ == "__main__":
spread_sheet = create_spreadsheet(spreadsheet_name = "AWS usage", outh_file = '../client_secret.json')
spread_sheet.link(syncToCloud=False)
rds_worksheet_creation(spread_sheet)

You have an error in your original code but if you want this code to scale to a large number of instances (it is unlikely you'll need this) then you'll want to use something like the following:
import boto3
available_regions = boto3.Session().get_available_regions('rds')
for region in available_regions:
rds = boto3.client('rds', region_name=region)
paginator = rds.get_paginator('describe_db_instances').paginate()
for page in paginator:
for dbinstance in page['DBInstances']:
print("{DBInstanceClass}".format(**dbinstance))
You can get rid of the paginator and just use the first loop if you know each region will have fewer than 100s of instances:
for region in available_regions:
rds = boto3.client('rds', region_name=region)
for dbinstance in rds.describe_db_instances():
print("{DBInstanceClass}".format(**dbinstance))
Additionally you can provide a simple
dbinstance.get('DBName', 'No Name Set')
instead of excepting around the KeyError.

Your for loop range is getting the value of 2 since db_instancesis dict type.
Instead of
for i in range(len(db_instances)):
It should be
for i in range(len(db_instances['DBInstances'])):
Which gives list type and correct length to iterate the loop.

This Code will list all RDS instances present in the account
Try this 100 % working code
#!/usr/bin/env python
import boto3
client = boto3.client('rds')
response = client.describe_db_instances()
for i in response['DBInstances']:
db_name = i['DBName']
db_instance_name = i['DBInstanceIdentifier']
db_type = i['DBInstanceClass']
db_storage = i['AllocatedStorage']
db_engine = i['Engine']
print db_instance_name,db_type,db_storage,db_engine

FYI, the more Pythonic way to do loops in this case would be:
for instance in db_instances['DBInstances']:
MasterUsername = instance['MasterUsername']
DBInstanceClass = instance['DBInstanceClass']
etc.
This avoids the need for i-type iterators.

Postgres vs SQL lite page not found discrapancy

I have my dev environment with django - SQLlite and my prod is with django
- Postgres
I have view that works perfectly on SQL lite
login_required
def receipt_pdf(request,pk):
try:
Receipt_payment_id_dict = Receipt_payment.objects.filter(is_active = True ).aggregate(Max('id'))
if Receipt_payment_id_dict:
Receipt_payment_id = Receipt_payment_id_dict['id__max']
Receipt_payment_dict = get_object_or_404(Receipt_payment, pk=Receipt_payment_id)
else:
Receipt_payment_dict = "no data"
except ValueError:
raise Http404("At least one active value should be in -Receipt_payment-, contact your administrator")
try:
general_id_dict = General_configurations.objects.filter(is_active = True ).aggregate(Max('id'))
if general_id_dict:
general_id = general_id_dict['id__max']
general_dict = get_object_or_404(General_configurations, pk=general_id)
else:
general_dict = "no data"
except ValueError:
raise Http404("At least one active value should be in -General_configurations-, contact your administrator")
payment = get_object_or_404(LeasePayment, pk=pk)
# leasetenant = lease.tenant_set.all().order_by('-id')
# Create the HttpResponse object with the appropriate PDF headers.
response = HttpResponse(content_type='application/pdf')
response['Content-Disposition'] = 'attachment; filename="Receipt.pdf"'
buffer = BytesIO()
# Create the PDF object, using the BytesIO object as its "file."
p = canvas.Canvas(buffer)
But When I execute same code with Postgres I get
No Receipt_payment matches the given query.
What could be the problem?

java.sql.SQLExceptionPyRaisable on the second attempt connecting to Athena using Django

I am using the python module called PyAthenaJDBC in order to query Athena using the provided JDBC driver.
Here is the link : https://pypi.python.org/pypi/PyAthenaJDBC/
I have been facing some persistent issue. I keep getting this java error whenever I use the Athena connection twice in a row.
As a matter of fact, I was able to connect to Athena, show databases, create new tables and even query the content. I am building an application using Django and running its server to use Athena
However, I am obliged to re-run the server in order for the Athena connection to work once again,
Here is a glimpse of the class I have built
import os
import configparser
import pyathenajdbc
#Get aws credentials for the moment
aws_config_file = '~/.aws/config'
Config = configparser.ConfigParser()
Config.read(os.path.expanduser(aws_config_file))
access_key_id = Config['default']['aws_access_key_id']
secret_key_id = Config['default']['aws_secret_access_key']
BASE_DIR = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
athena_jdbc_driver_path = BASE_DIR + "/lib/static/AthenaJDBC.jar"
log_path = BASE_DIR + "/lib/static/queries.log"
class PyAthenaLoader():
def __init__(self):
pyathenajdbc.ATHENA_JAR = athena_jdbc_driver_path
def connecti(self):
self.conn = pyathenajdbc.connect(
s3_staging_dir="s3://aws-athena-query-results--us-west-2",
access_key=access_key_id,
secret_key=secret_key_id,
#profile_name = "default",
#credential_file = aws_config_file,
region_name="us-west-2",
log_path=log_path,
driver_path=athena_jdbc_driver_path
)
def databases(self):
dbs = self.query("show databases;")
return dbs
def tables(self, database):
tables = self.query("show tables in {0};".format(database))
return tables
def create(self):
self.connecti()
try:
with self.conn.cursor() as cursor:
cursor.execute(
"""CREATE EXTERNAL TABLE IF NOT EXISTS sales4 (
Day_ID date,
Product_Id string,
Store_Id string,
Sales_Units int,
Sales_Cost float,
Currency string
)
ROW FORMAT SERDE 'org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe'
WITH SERDEPROPERTIES (
'serialization.format' = '|',
'field.delim' = '|',
'collection.delimm' = 'undefined',
'mapkey.delim' = 'undefined'
) LOCATION 's3://athena-internship/';
""")
res = cursor.description
finally:
self.conn.close()
return res
def query(self, req):
self.connecti()
try:
with self.conn.cursor() as cursor:
cursor.execute(req)
print(cursor.description)
res = cursor.fetchall()
finally:
self.conn.close()
return res
def info(self):
res = []
for i in dir(pyathenajdbc):
temp = i + ' = ' + str(dic[i])
#print(temp)
res.append(temp)
return res
Example of usage :
def test(request):
athena = jdbc.PyAthenaLoader()
res = athena.query('Select * from sales;')
return render(request, 'test.html', {'data': res})
Works just fine!
However refreshing the page would cause this error :
Error
Note that I am using a local .jar file: I thought that would solve the issue but I was wrong
Even if I remove the path of the JDBC driver and let the module download it from s3, the error persists:
File "/home/tewfikghariani/.virtualenvs/venv/lib/python3.4/site-packages/pyathenajdbc/connection.py", line 69, in init
ATHENA_CONNECTION_STRING.format(region=self.region_name, schema=schema_name), props)
jpype._jexception.java.sql.SQLExceptionPyRaisable:
java.sql.SQLException: No suitable driver found for
jdbc:awsathena://athena.us-west-2.amazonaws.com:443/hive/default/
Furthermore, when I run the module on its own, it works just fine.
When I set multiple connection inside my view before rendering the template, that works just fine as well.
I guess the issue is related to the django view, once one of the views is performing a connection with athena, the next connection is not possible anymore and the error is raised unless I restart the server
Any help? If other details are missing I will provide them immediately.

Update:
After posting the issue in github, the author solved this problem and released a new version that works perfectly.
It was a multi-threading problem with JPype.
Question answered!
ref : https://github.com/laughingman7743/PyAthenaJDBC/pull/8

DynamoDB pagination using Boto3

We are using boto3 for our DynamoDB and we need to do a full scan of our tables to enable to do that based on other post we need to do a pagination. However, we are unable to find a working sample of pagination. Here is what we did.
import boto3
client_setting = boto3.client('dynamodb', region_name='ap-southeast-2')
paginator = client_setting.get_paginator('scan')
esk = {}
data = []
unconverted_ga = ourQuery(params1, params2)
for page in unconverted_ga:
data.append(page)
esk = page['LastEvaluatedKey']
We dont know exactly how to make the esk as the ExclusiveStartKey of our next query. What should be the expected value of ExclusiveStartkey parameter? We are still new in DynamoDB and there's many things we need to learn including this. thanks!

From the answer by Tay B at https://stackoverflow.com/a/38619425/3176550
import boto3
dynamodb = boto3.resource('dynamodb',
aws_session_token=aws_session_token,
aws_access_key_id=aws_access_key_id,
aws_secret_access_key=aws_secret_access_key,
region_name=region
)
table = dynamodb.Table('widgetsTableName')
response = table.scan()
data = response['Items']
while 'LastEvaluatedKey' in response:
response = table.scan(ExclusiveStartKey=response['LastEvaluatedKey'])
data.update(response['Items'])

After hour of search, i've finally found a better solution. For those who are new to DynamoDB, we should'nt missed this - http://docs.aws.amazon.com/amazondynamodb/latest/gettingstartedguide/GettingStarted.Python.04.html
from __future__ import print_function # Python 2/3 compatibility
import boto3
import json
import decimal
from boto3.dynamodb.conditions import Key, Attr
# Helper class to convert a DynamoDB item to JSON.
class DecimalEncoder(json.JSONEncoder):
def default(self, o):
if isinstance(o, decimal.Decimal):
if o % 1 > 0:
return float(o)
else:
return int(o)
return super(DecimalEncoder, self).default(o)
dynamodb = boto3.resource('dynamodb', region_name='us-west-2', endpoint_url="http://localhost:8000")
table = dynamodb.Table('Movies')
fe = Key('year').between(1950, 1959)
pe = "#yr, title, info.rating"
# Expression Attribute Names for Projection Expression only.
ean = { "#yr": "year", }
esk = None
response = table.scan(
FilterExpression=fe,
ProjectionExpression=pe,
ExpressionAttributeNames=ean
)
for i in response['Items']:
print(json.dumps(i, cls=DecimalEncoder))
// As long as LastEvaluatedKey is in response it means there are still items from the query related to the data
while 'LastEvaluatedKey' in response:
response = table.scan(
ProjectionExpression=pe,
FilterExpression=fe,
ExpressionAttributeNames= ean,
ExclusiveStartKey=response['LastEvaluatedKey']
)
for i in response['Items']:
print(json.dumps(i, cls=DecimalEncoder))

You can try with following code:
esk = None
while True:
scan_generator = YourTableName.scan(max_results=10, exclusive_start_key=esk)
for item in scan_generator:
# your code for processing
# condition to check if entire table is scanned
else:
break;
# Load the last keys
esk = scan_generator.kwargs['exclusive_start_key'].values()
Here is the reference documentation link.
Hope that helps

Bit more verbose but I like it.
def fetch_from_table(last_key=None):
if last_key:
response = table.query(
IndexName='advertCatalogIdx',
KeyConditionExpression=Key('sk').eq('CATALOG'),
Limit=5,
ExclusiveStartKey=last_key
)
else:
response = table.query(
IndexName='advertCatalogIdx',
KeyConditionExpression=Key('sk').eq('CATALOG'),
Limit=5
)
# print(response)
for item in response['Items']:
print(item['address'])
print('***************************')
return response.get('LastEvaluatedKey')
last_key = fetch_from_table()
while last_key != None:
print("Running again : ")
last_key = fetch_from_table(last_key)

import sys
import boto3
client = boto3.client('dynamodb')
marker = None
while True:
paginator = client.get_paginator('list_tables')
page_iterator = paginator.paginate(
PaginationConfig={
'MaxItems': 1000,
'PageSize': 100,
'StartingToken': marker})
for page in page_iterator:
tables=page['TableNames']
for table in tables:
print (table)
try:
marker = page['NextToken']
except KeyError:
sys.exit()

We Keep Coding

c++ django amazon-web-services regex python-2.7 google-cloud-platform list unit-testing opengl ember.js

How to manage stored procedure in AWS Redshift using an automation or command line tool? - amazon-web-services

Related

Unable to lunch Multiple Streaming Pipeline ( N to N Pipeline) Dynamically (Using Runtime Value Provider) in Single Dataflow Job in Python

How to get boto3 to display _all_ RDS instances?

Postgres vs SQL lite page not found discrapancy

java.sql.SQLExceptionPyRaisable on the second attempt connecting to Athena using Django

DynamoDB pagination using Boto3

Categories

Resources