Attempting to retrieve binary secret value: 'utf-8' codec can't decode byte 0x8b in position 1: invalid start byte - amazon-web-services

I have a token file that is way too long to retrieve the contents from SecretsManager, so I am compressing the file and storing the bytes in SecretsManager. I then am trying to retrieve the secret value so I can use the token in my application. The secret value is the bytes from the file. I'm running into this error: 'utf-8' codec can't decode byte 0x8b in position 1: invalid start byte
Looking to see how I can fix this error and retrieve the secret value.
#declare tar file name
tar = "token.cache.tar.gz"
file = tarfile.open(tar,"w:gz")
file.add("token.cache")
file.close()
# store cache in aws secrets manager
client = boto3.client('secretsmanager')
with open("token.cache.tar.gz", "rb") as bts:
response = client.create_secret(
Name = 'ms-graph-binary',
SecretBinary = bts.read()
)
response = client.get_secret_value(
SecretId='ac-demo-ms-graph-binary'
) ['SecretBinary']
with tarfile.open(fileobj=response.decode('utf-8'), mode='r') as cf:
cf.extractall("token.cache.decompressed")
Edit: Ended up fixing the code and this is what I used to retrieve the secret.
import tarfile
import io
import boto3
client=boto3.client('secretsmanager')
with tarfile.open('token.cache.tar.gz', "w:gz") as tar:
tar.add('token.cache')
bts = open('token.cache.tar.gz','rb').read()
print("Length before",len(bts))
sec=client.update_secret(SecretId="ac-demo-ms-graph", SecretBinary=bts)
sec=client.get_secret_value(SecretId="ac-demo-ms-graph")['SecretBinary']
print("Length after",len(sec))
with tarfile.open(fileobj=io.BytesIO(sec), mode='r:gz') as t:
d=t.extractfile('token.cache')
#print file content
print("File content",str(d.read()))

Related

Re-encoding audio file to linear16 for google cloud speech api fails with '[Errno 30] Read-only file system'

I'm trying to convert an audio file to linear 16 format using FFmpeg module. I've stored the audio file in one cloud storage bucket and want to move the converted file to a different bucket. The code works perfectly in VS code and deploys successfully to cloud functions. But, fails with [Errno 30] Read-only file system when run on the cloud.
Here's the code
from google.cloud import speech
from google.cloud import storage
import ffmpeg
import sys
out_bucket = 'encoded_audio_landing'
input_bucket_name = 'audio_landing'
def process_audio(input_bucket_name, in_filename, out_bucket):
'''
converts audio encoding for GSK call center call recordings to linear16 encoding and 16,000
hertz sample rate
Params:
in_filename: a gsk call audio file
returns an audio file encoded so that google speech to text api can transcribe
'''
storage_client = storage.Client()
bucket = storage_client.bucket(input_bucket_name)
blob = bucket.blob(in_filename)
blob.download_to_filename(blob.name)
print('type contents: ', type('processedfile'))
#print('blob name / len / type', blob.name, len(blob.name), type(blob.name))
try:
out, err = (
ffmpeg.input(blob.name)
#ffmpeg.input()
.output('pipe: a', format="s16le", acodec="pcm_s16le", ac=1, ar="16k")
.overwrite_output()
.run(capture_stdout=True, capture_stderr=True)
)
except ffmpeg.Error as e:
print(e.stderr, file=sys.stderr)
sys.exit(1)
up_bucket = storage_client.bucket(out_bucket)
up_blob = up_bucket.blob(blob.name)
#print('type / len out', type(out), len(out))
up_blob.upload_from_string(out)
#delete source file
blob.delete()
def hello_gcs(event, context):
"""Background Cloud Function to be triggered by Cloud Storage.
This generic function logs relevant data when a file is changed,
and works for all Cloud Storage CRUD operations.
Args:
event (dict): The dictionary with data specific to this type of event.
The `data` field contains a description of the event in
the Cloud Storage `object` format described here:
https://cloud.google.com/storage/docs/json_api/v1/objects#resource
context (google.cloud.functions.Context): Metadata of triggering event.
Returns:
None; the output is written to Cloud Logging
"""
#print('Event ID: {}'.format(context.event_id))
#print('Event type: {}'.format(context.event_type))
print('Bucket: {}'.format(event['bucket']))
print('File: {}'.format(event['name']))
print('Metageneration: {}'.format(event['metageneration']))
#print('Created: {}'.format(event['timeCreated']))
#print('Updated: {}'.format(event['updated']))
#convert audio encoding
print('begin process_audio')
process_audio(input_bucket_name, event['name'], out_bucket)
The problem was that I was downloading the file to my local directory, which obviously wouldn't work on the cloud. I read another article where someone used added the get file path function and used that as an input into blob.download_tofilename(). I'm not sure why that worked.
I did try just removing the whole download_tofilename bit, but it didn't work without that.
I'd very much appreciate an explanation if someone knows why
#this gets around downloading the file to a local folder. it creates some sort of templ location
def get_file_path(filename):
file_name = secure_filename(filename)
return os.path.join(tempfile.gettempdir(), file_name)
def process_audio(input_bucket_name, in_filename, out_bucket):
'''
converts audio encoding for GSK call center call recordings to linear16 encoding and 16,000
hertz sample rate
Params:
in_filename: a gsk call audio file
input_bucket_name: location of the sourcefile that needs to be re-encoded
out_bucket: where to put the newly encoded file
returns an audio file encoded so that google speech to text api can transcribe
'''
storage_client = storage.Client()
bucket = storage_client.bucket(input_bucket_name)
blob = bucket.blob(in_filename)
print(blob.name)
#creates some sort of temp loaction for the tile
file_path = get_file_path(blob.name)
blob.download_to_filename(file_path)
print('type contents: ', type('processedfile'))
#print('blob name / len / type', blob.name, len(blob.name), type(blob.name))
#envokes the ffmpeg library to re-encode the audio file, it's actually some sort of command line application
# that is available in Python and google cloud. The things in the .outuput bit are options from ffmpeg, you
# pass these options into ffmpeg there
try:
out, err = (
ffmpeg.input(file_path)
#ffmpeg.input()
.output('pipe: a', format="s16le", acodec="pcm_s16le", ac=1, ar="16k")
.overwrite_output()
.run(capture_stdout=True, capture_stderr=True)
)
except ffmpeg.Error as e:
print(e.stderr, file=sys.stderr)
sys.exit(1)

How to decode celery message in SQS

Some of celery tasks in sqs are pending forever, I want to read those messages (tasks) before deleting.
On going to sqs console, I am able to see the encoded message I tried decoding it with
value = base64.b64decode(value.encode('utf-8')).decode('utf-8')
This gives me dict dump with keys
['body', 'headers', 'content-type', 'properties', 'content-encoding']
In this dict body lookes like encoded
I tried to decode it with same
value = base64.b64decode(value.encode('utf-8')).decode('utf-8')
but it gives error saying
UnicodeDecodeError: 'utf8' codec can't decode byte 0x87 in position 1: invalid start byte
Am I missing something?
How to decode this messages? Is there is any way to decode it?
It seems that "Celery" uses "pickle.dump" to turn the payload of the task into bytes, and then encode to base64. Doing the reverse operation we get the payload again.
import base64
import boto3
import pickle
queue_name = 'your-queue-name'
sqsr = boto3.resource('sqs')
queue = sqsr.get_queue_by_name(QueueName=queue_name)
for message in queue.receive_messages(MaxNumberOfMessages=10):
print(f'{message.message_id} >>> {message.receipt_handle}'
f' >>> {message.body} >>> {message.message_attributes}')
body_dict = json.loads(base64.b64decode(message.body))
celery_payload = pickle.loads(base64.b64decode(body_dict.get('body')))
print(celery_payload)

request_spot_instance API is not accepting user data in file format

I am using AWS API to create a spot instance. When I try to give user data in a file format , I get an error
"botocore.exceptions.ClientError: An error occurred (InvalidParameterValue) when calling the RequestSpotInstances operation: Invalid BASE64 encoding of user data
session = boto3.Session()
client = session.client('ec2', region_name = 'us-west-2')
response = client.request_spot_instances(
InstanceCount=1,
LaunchSpecification={
'ImageId': 'ami-012345',
'InstanceType': 't2.medium',
'Placement': {'AvailabilityZone': 'us-west-2a'},
'SecurityGroupIds': ['sg-012345'],
'SubnetId': 'subnet-012345',
'UserData': 'file://my_script0.txt'
},
SpotPrice='0.0139',
Type='persistent',
InstanceInterruptionBehavior = 'stop'
)
As per the AWS documentation, if we wanted to pass the user data as a file, we can do that by preceding the file name with "file://".
But it seems that this API is expecting the entire file content to be base64 encoded first.
According to the docs http://docs.aws.amazon.com/AWSEC2/latest/UserGuide/user-data.html, the base64 is only for API calls and not the CLI.
So when I pass base64 encoded data of my file , then the API works correctly.
file = open("my_script0.txt", 'r')
encoded_data = base64.encodestring(file.read())

Google Cloud KMS: Unable to decrypt

I'm trying to decrypt a kms encrypted file and running in to the following error:
UnicodeDecodeError: 'utf8' codec can't decode byte 0x80 in position 3: invalid start byte
I'm using the sample decrypt code.
I'm able to decrypt the file using the command line.
The exception is being thrown from here:
cipher_text.decode('utf-8')
Code: https://github.com/GoogleCloudPlatform/python-docs-samples/blob/master/kms/api-client/snippets.py
Please let me know if I'm missing something here.
When you use the Python library, all inputs must be base64-encoded, and the outputs will be base64-encoded as well. In the encrypt function in snippets.py, you can see that the code is base64-encoding the plaintext before passing it to the KMS encrypt API.
encoded_text = base64.b64encode(plaintext)
When you use the gcloud kms encrypt command, you do not have to base64 encode the plaintext yourself, and the ciphertext is not base64-encoded.
So, when you pass the ciphertext from gcloud kms encrypt to the Python library to decrypt, you must base64-encode it first. Change the decrypt function in snippets.py to base64-encode the file data before sending it on.
# Read cipher text from the input file.
with io.open(encrypted_file_name, 'rb') as encrypted_file:
ciphertext = encrypted_file.read()
encoded_text = base64.b64encode(ciphertext)
# Use the KMS API to decrypt the text.
cryptokeys = kms_client.projects().locations().keyRings().cryptoKeys()
request = cryptokeys.decrypt(
name=name, body={'ciphertext': encoded_text.decode('utf-8')})
response = request.execute()
You can think of the base64-encoding as being a transport-layer implementation detail: it's only necessary so that arbitrary binary data can be sent in JSON, which only accepts Unicode strings. So, the Cloud KMS API requires this data to be base64-encoded, and must base64-encode the output as well. But the gcloud command does this work for you, so you don't have to do it.
I think the Python sample code is misleading. It should always base64-encode inputs to the API and base64-decode outputs, instead of only doing it sometimes. I'll look at updating the Python sample code shortly, and double check the sample code for the other languages.
Given the date of the question, the accepted answer should be #Russ (also, thank you for updating the git).
Since the documentation changed a little, here is a function that deals with an already encrypted json file.
Encrypted using the GCloud Command Line:
gcloud kms encrypt \
--plaintext-file=[SECRETS.json] \
--ciphertext-file=[ENCRYPTED-SECRETS.json.enc] \
--location=[REGION] \
--keyring=[KEYRING-NAME] \
--key=[KEY-NAME]
Here is the function for decrypting said file (cipher_file being the path to [ENCRYPTED-SECRETS.json.enc]):
def decrypt(cipher_file):
project_id = "project"
location_id = "region"
key_ring_id = "key-ring"
crypto_key_id = "key"
# Creates an API client for the KMS API.
client = kms_v1.KeyManagementServiceClient()
# The resource name of the CryptoKey.
name = client.crypto_key_path_path(project_id, location_id, key_ring_id,
crypto_key_id)
# Use the KMS API to decrypt the data.
with io.open(cipher_file, "rb") as file:
c_text = file.read()
response = client.decrypt(name, c_text)
secret_dict = json.loads(response.plaintext.decode("utf-8"))
return secret_dict

AWS Python Lambda Function - Upload File to S3

I have an AWS Lambda function written in Python 2.7 in which I want to:
1) Grab an .xls file form an HTTP address.
2) Store it in a temp location.
3) Store the file in an S3 bucket.
My code is as follows:
from __future__ import print_function
import urllib
import datetime
import boto3
from botocore.client import Config
def lambda_handler(event, context):
"""Make a variable containing the date format based on YYYYYMMDD"""
cur_dt = datetime.datetime.today().strftime('%Y%m%d')
"""Make a variable containing the url and current date based on the variable
cur_dt"""
dls = "http://11.11.111.111/XL/" + cur_dt + ".xlsx"
urllib.urlretrieve(dls, cur_dt + "test.xls")
ACCESS_KEY_ID = 'Abcdefg'
ACCESS_SECRET_KEY = 'hijklmnop+6dKeiAByFluK1R7rngF'
BUCKET_NAME = 'my-bicket'
FILE_NAME = cur_dt + "test.xls";
data = open('/tmp/' + FILE_NAME, 'wb')
# S3 Connect
s3 = boto3.resource(
's3',
aws_access_key_id=ACCESS_KEY_ID,
aws_secret_access_key=ACCESS_SECRET_KEY,
config=Config(signature_version='s3v4')
)
# Uploaded File
s3.Bucket(BUCKET_NAME).put(Key=FILE_NAME, Body=data, ACL='public-read')
However, when I run this function, I receive the following error:
'IOError: [Errno 30] Read-only file system'
I've spent hours trying to address this issue but I'm falling on my face. Any help would be appreciated.
'IOError: [Errno 30] Read-only file system'
You seem to lack some write access right. If your lambda has another policy, try to attach this policy to your role:
arn:aws:iam::aws:policy/AWSLambdaFullAccess
It has full access on S3 as well, in case you can't write in your bucket. If it solves your issue, you'll remove some rights after that.
I have uploaded the image to s3 Bucket. In "Lambda Test Event", I have created one json test event which contains BASE64 of Image to be uploaded to s3 Bucket and Image Name.
Lambda Test JSON Event as fallows ======>
{
"ImageName": "Your Image Name",
"img64":"BASE64 of Your Image"
}
Following is the code to upload an image or any file to s3 ======>
import boto3
import base64
def lambda_handler(event, context):
s3 = boto3.resource(u's3')
bucket = s3.Bucket(u'YOUR-BUCKET-NAME')
path_test = '/tmp/output' # temp path in lambda.
key = event['ImageName'] # assign filename to 'key' variable
data = event['img64'] # assign base64 of an image to data variable
data1 = data
img = base64.b64decode(data1) # decode the encoded image data (base64)
with open(path_test, 'wb') as data:
#data.write(data1)
data.write(img)
bucket.upload_file(path_test, key) # Upload image directly inside bucket
#bucket.upload_file(path_test, 'FOLDERNAME-IN-YOUR-BUCKET /{}'.format(key)) # Upload image inside folder of your s3 bucket.
print('res---------------->',path_test)
print('key---------------->',key)
return {
'status': 'True',
'statusCode': 200,
'body': 'Image Uploaded'
}
change data = open('/tmp/' + FILE_NAME, 'wb') change the wb for "r"
also, I assume your IAM user has full access to S3 right?
or maybe the problem is in the request of that url...
try that cur_dt starts with "/tmp/"
urllib.urlretrieve(dls, "/tmp/" + cur_dt + "test.xls")