'no SavedModel bundles found!' on tensorflow_hub model deployment to AWS SageMaker - amazon-web-services

I attempting to deploy the universal-sentence-encoder model to a aws Sagemaker endpoint and am getting the error raise ValueError('no SavedModel bundles found!')
I have shown my code below, I have a feeling that one of my paths is incorrect
import tensorflow as tf
import tensorflow_hub as hub
import numpy as np
from sagemaker import get_execution_role
from sagemaker.tensorflow.serving import Model
def tfhub_to_savedmodel(model_name,uri):
tfhub_uri = uri
model_path = 'encoder_model/' + model_name
with tf.Session(graph=tf.Graph()) as sess:
module = hub.Module(tfhub_uri)
input_params = module.get_input_info_dict()
dtype = input_params['text'].dtype
shape = input_params['text'].get_shape()
# define the model inputs
inputs = {'text': tf.placeholder(dtype, shape, 'text')}
# define the model outputs
# we want the class ids and probabilities for the top 3 classes
logits = module(inputs['text'])
outputs = {
'vector': logits,
}
# export the model
sess.run([tf.global_variables_initializer(), tf.tables_initializer()])
tf.saved_model.simple_save(
sess,
model_path,
inputs=inputs,
outputs=outputs)
return model_path
sagemaker_role = get_execution_role()
!tar -C "$PWD" -czf encoder.tar.gz encoder_model/
model_data = Session().upload_data(path='encoder.tar.gz',key_prefix='model')
env = {'SAGEMAKER_TFS_DEFAULT_MODEL_NAME': 'universal-sentence-encoder-large'}
model = Model(model_data=model_data, role=sagemaker_role, framework_version=1.12, env=env)
predictor = model.deploy(initial_instance_count=1, instance_type='ml.t2.medium')

I suppose you started from this example? https://github.com/awslabs/amazon-sagemaker-examples/tree/master/sagemaker-python-sdk/tensorflow_serving_container
It looks like you're not saving the TF Serving bundle properly: the model version number is missing, because of this line:
model_path = 'encoder_model/' + model_name
Replacing it with this should fix your problem:
model_path = '{}/{}/00000001'.format('encoder_model/', model_name)
Your model artefact should look like this (I used the model in the notebook above):
mobilenet/
mobilenet/mobilenet_v2_140_224/
mobilenet/mobilenet_v2_140_224/00000001/
mobilenet/mobilenet_v2_140_224/00000001/saved_model.pb
mobilenet/mobilenet_v2_140_224/00000001/variables/
mobilenet/mobilenet_v2_140_224/00000001/variables/variables.data-00000-of-00001
mobilenet/mobilenet_v2_140_224/00000001/variables/variables.index
Then, upload to S3 and deploy.

Related

Regarding attribute error BERT Arch during model deployment using Django

I am begginer in NLP Transformers.
I am facing this issue while deploying model using Django framework.Locally model is working fine but not when deployed.
Here I am importing BERT model which was trained and saved using pytorch same procedure i follow to load the model but before I am defining the architecture which was defined during model training.
But facing the issue after deploying the model.
AttributeError: Can't get attribute 'BERT_Arch' on <module 'main' from '/home/ubuntu/kc_env/bin/gunicorn'>
I tried couple of things:
Like defining the BERT Architecture before model loading:
####Utils.py
from django.apps import AppConfig
# import torch
import torch.nn as nn
class BERT_Arch(nn.Module):
def __init__(self):
super(BERT_Arch, self).__init__()
# dropout layer
self.dropout = nn.Dropout(0.2)
# relu activation function
self.relu = nn.ReLU()
# dense layer
self.fc1 = nn.Linear(768,512)
self.fc2 = nn.Linear(512,256)
self.fc3 = nn.Linear(256,3)
#softmax activation function
self.softmax = nn.LogSoftmax(dim=1)
#define the forward pass
def forward(self, sent_id, mask):
cls_hs = self.bert(sent_id, attention_mask=mask)[0][:,0]
x = self.fc1(cls_hs)
x = self.relu(x)
x = self.dropout(x)
x = self.fc2(x)
x = self.relu(x)
x = self.dropout(x)
# output layer
x = self.fc3(x)
# apply softmax activation
x = self.softmax(x)
return x
###main.py
from .utils import BERT_Arch
model=BERT_Arch()
def func():
model=torch.load('Path to load model.pt')

Using custom trained Keras model with Sagemaker endpoint results ModelError: An error occurred (ModelError) when calling the InvokeEndpoint operation:

I am trying to predict by loading pre-trained model in sagemaker, but I am getting the below error
ModelError: An error occurred (ModelError) when calling the
InvokeEndpoint operation: Received client error (400) from primary
with message "{ "error": "Session was not created with a graph before
Run()!" }
My code
def convert_h5_to_aws(loaded_model):
import tensorflow as tf
if tf.executing_eagerly():
tf.compat.v1.disable_eager_execution()
"""
given a pre-trained keras model, this function converts it to a TF protobuf format
and saves it in the file structure which aws expects
"""
from tensorflow.python.saved_model import builder
from tensorflow.python.saved_model.signature_def_utils import predict_signature_def
from tensorflow.python.saved_model import tag_constants
# This is the file structure which AWS expects. Cannot be changed.
model_version = '1'
export_dir = 'export/Servo/' + model_version
# Build the Protocol Buffer SavedModel at 'export_dir'
builder = builder.SavedModelBuilder(export_dir)
# Create prediction signature to be used by TensorFlow Serving Predict API
signature = predict_signature_def(
inputs={"inputs": loaded_model.input}, outputs={"score": loaded_model.output})
from keras import backend as K
with K.get_session() as sess:
# Save the meta graph and variables
builder.add_meta_graph_and_variables(
sess=sess, tags=[tag_constants.SERVING], signature_def_map={"serving_default": signature})
builder.save()
#create a tarball/tar file and zip it
import tarfile
with tarfile.open('model.tar.gz', mode='w:gz') as archive:
archive.add('export', recursive=True)
convert_h5_to_aws(model)
import sagemaker
sagemaker_session = sagemaker.Session()
inputs = sagemaker_session.upload_data(path='model.tar.gz', key_prefix='model')
!touch train.py #create an empty python file
import boto3, re
from sagemaker import get_execution_role
# the (default) IAM role you created when creating this notebook
role = get_execution_role()
import boto3, re
from sagemaker import get_execution_role
# the (default) IAM role you created when creating this notebook
role = get_execution_role()
# Create a Sagemaker model (see AWS console>SageMaker>Models)
from sagemaker.tensorflow.model import TensorFlowModel
sagemaker_model = TensorFlowModel(model_data = 's3://' + sagemaker_session.default_bucket() + '/model/model.tar.gz',
role = role,
framework_version = '1.12',
entry_point = 'train.py')
# Deploy a SageMaker to an endpoint
predictor = sagemaker_model.deploy(initial_instance_count=1,
instance_type='ml.m4.xlarge')
# Create a predictor which uses this new endpoint
import sagemaker
from sagemaker.tensorflow.model import TensorFlowModel
#endpoint = '' #get endpoint name from SageMaker > endpoints
predictor=sagemaker.tensorflow.model.TensorFlowPredictor(endpoint, sagemaker_session)
# .predict send the data to our endpoint
data = X_test #<-- update this to have inputs for your model
predictor.predict(data)
I also tried using different versions of TensorFlowModel
Is all of this code in a notebook? You want to make sure you are properly tarring your model artifacts and inference code. Make sure that you have your metadata for your saved model stored properly and also if you have an inference script with inference functions (handling pre and post processing) this should be wrapped in a code directory with the script in the tar file as well. Here's an example of deploying a pre-trained Sklearn model on SageMaker you can do the same with your pre-trained TensorFlow model.
Sklearn pre-trained example: https://github.com/RamVegiraju/Pre-Trained-Sklearn-SageMaker

how to deploy the custom model in amazon sageMaker

I am newbie to AWS sagemaker, I am trying to deploy the time series custom lstm model in sagemaker , please help me out and how to perpare the script mode.
this my script file timer_series.py code.
import sagemaker
import boto3
import os
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import tensorflow
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras.layers import LSTM
from sklearn.preprocessing import MinMaxScaler
from tensorflow.keras.preprocessing.sequence import TimeseriesGenerator
from sklearn.metrics import mean_squared_error
if __name__ =='__main__':
parser.add_argument('--epochs', type=int, default=50)
parser.add_argument('--batch_size', type=int, default=72)
parser.add_argument('--n_train_hours', type=int, default=24*365*2)
parser.add_argument('--n_validation_hours', type=int, default=24*365*4)
# input data and model directories
parser.add_argument('--model_dir', type=str)
args, _ = parser.parse_known_args()
train_dataset_dir = os.environ.get('SM_INPUT_DIR') + '/data/training/'
output_model_dir = os.environ.get('SM_MODEL_DIR')
output_object_dir = os.environ.get('SM_OUTPUT_DATA_DIR')
epochs = args.epochs
batch_size = args.batch_size
input_data = {args.input_data}
dataset = read_csv( train_dataset_dir + 'dataset.csv', header=0, index_col='Date')
dataset.sort_index(inplace=True)
train = dataset.iloc[:109]
test= dataset.iloc[109:]
scaler = MinMaxScaler()
scaled_train = scaler.fit_transform(train)
scaled_test=scaler.fit_transform(test)
n_input = 12
n_feature = 1
train_generator = TimeseriesGenerator(scaled_train,scaled_train,length=n_input, batch_size=1)
model = Sequential()
model.add(LSTM(128,activation = 'relu', input_shape= (n_input, n_feature), return_sequences=True))
model.add(LSTM(128, activation='relu', return_sequences=True))
model.add(LSTM(128, activation='relu', return_sequences=False))
model.add(Dense(1))
model.compile(optimizer='adam', loss='mse')
history =model.fit_generator(train_generator,epochs=50, batch_size=1,verbose=1)
# Get a SageMaker-compatible role used by this Notebook Instance.
role = get_execution_role()
with open(output_model_dir + '/history.json', 'w') as f:
json.dump(history.history, f)
#Save the Scaler
dump(scaler, output_model_dir + '/scaler.model', protocol=2)
#Save the trained model and weights
model_json = model.to_json()
with open(output_model_dir + "/model.json", "w") as json_file:
json_file.write(model_json)
model.save_weights(output_model_dir + "/model.h5")
here it showing some error:
train_instance_type = "ml.m4.xlarg"
tf_estimator = TensorFlow(entry_point='time_series.py', role=get_execution_role(),
train_instance_count=1, train_instance_type=train_instance_type,
framework_version='1.12', py_version='py3', script_mode=True,
output_path = 's3://' + s3Bucket, base_job_name = "sales-forecasting-lstm",
hyperparameters={'batch_size': 2,
'epochs': 50})
tf_estimator.fit(uploaded_data_path)
Here I got the error. what this error , I didn't understand this error.
UnexpectedStatusException: Error for Training job sales-forecasting-lstm-2020-04-13-10-17-34-919: Failed. Reason: AlgorithmError: ExecuteUserScriptError:
Command "/usr/bin/python time_series.py --batch_size 2 --epochs 50 --model_dir s3://sagemaker12/sales-forecasting-lstm-2020-04-13-10-17-34-919/model"
​
Hi, I am newbie to AWS sagemaker, I am trying to deploy the time series custom lstm model in sagemaker , please help me out and how to perpare the script mode , python script for deployment.
this my script file timer_series.py code.
import sagemaker
import boto3
import os
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import tensorflow
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras.layers import LSTM
from sklearn.preprocessing import MinMaxScaler
from tensorflow.keras.preprocessing.sequence import TimeseriesGenerator
from sklearn.metrics import mean_squared_error
if __name__ =='__main__':
parser.add_argument('--epochs', type=int, default=50)
parser.add_argument('--batch_size', type=int, default=72)
parser.add_argument('--n_train_hours', type=int, default=24*365*2)
parser.add_argument('--n_validation_hours', type=int, default=24*365*4)
# input data and model directories
parser.add_argument('--model_dir', type=str)
args, _ = parser.parse_known_args()
train_dataset_dir = os.environ.get('SM_INPUT_DIR') + '/data/training/'
output_model_dir = os.environ.get('SM_MODEL_DIR')
output_object_dir = os.environ.get('SM_OUTPUT_DATA_DIR')
epochs = args.epochs
batch_size = args.batch_size
input_data = {args.input_data}
dataset = read_csv( input_data + 'dataset.csv', header=0, index_col='Date')
dataset.sort_index(inplace=True)
train = dataset.iloc[:109]
test= dataset.iloc[109:]
scaler = MinMaxScaler()
scaled_train = scaler.fit_transform(train)
scaled_test=scaler.fit_transform(test)
n_input = 12
n_feature = 1
train_generator = TimeseriesGenerator(scaled_train,scaled_train,length=n_input, batch_size=1)
model = Sequential()
model.add(LSTM(128,activation = 'relu', input_shape= (n_input, n_feature), return_sequences=True))
model.add(LSTM(128, activation='relu', return_sequences=True))
model.add(LSTM(128, activation='relu', return_sequences=False))
model.add(Dense(1))
model.compile(optimizer='adam', loss='mse')
history =model.fit_generator(train_generator,epochs=50, batch_size=1,verbose=1)
# Get a SageMaker-compatible role used by this Notebook Instance.
role = get_execution_role()
with open(output_model_dir + '/history.json', 'w') as f:
json.dump(history.history, f)
#Save the Scaler
dump(scaler, output_model_dir + '/scaler.model', protocol=2)
#Save the trained model and weights
model_json = model.to_json()
with open(output_model_dir + "/model.json", "w") as json_file:
json_file.write(model_json)
model.save_weights(output_model_dir + "/model.h5")
here it showing some error:
train_instance_type = "ml.m4.xlarg"
tf_estimator = TensorFlow(entry_point='time_series.py', role=get_execution_role(),
train_instance_count=1, train_instance_type=train_instance_type,
framework_version='1.12', py_version='py3', script_mode=True,
output_path = 's3://' + s3Bucket, base_job_name = "sales-forecasting-lstm",
hyperparameters={'batch_size': 2,
'epochs': 50})
tf_estimator.fit(uploaded_data_path)
Here I got the error. what this error , I didn't understand this error.
UnexpectedStatusException: Error for Training job sales-forecasting-lstm-2020-04-13-10-17-34-919: Failed. Reason: AlgorithmError: ExecuteUserScriptError:
Command "/usr/bin/python time_series.py --batch_size 2 --epochs 50 --model_dir s3://sagemaker12/sales-forecasting-lstm-2020-04-13-10-17-34-919/model"
​
I would recommend you change timer_series.py cahnnels to your s3 paths, you do not have to add the plus sign on your train_dataset_dir and add sagemaker specific arguments as:
parser.add_argument('--output-data-dir', type=str, default='s3://bucket_name/folder_name/output')
Line above for directions on where your output data should be stored. However that is something you have to specify when calling the batch transform function. Otherwise would store it in default bucket.
Second in order to debug , and being able to help you, you would have to take a look at CloudWatch for that specific training job to better understand what is failing on your script: time_series.py. I suppose is a problem with the specifications and reading of your training data.

Unable to lunch Multiple Streaming Pipeline ( N to N Pipeline) Dynamically (Using Runtime Value Provider) in Single Dataflow Job in Python

I am trying to launch a Streaming Dataflow Job which contains n number of pipelines.
Based on configured topic and corresponding BQ table for each Topic i want to launch a Pipeline inside a one Streaming Job.
My actual problem is i have to create and upload a template for each and every project. What i want is, i can reuse the uploaded template and only configuration files ihave to pass for launching new dataflow job by changing topic,subscription, dataset and bq table.
Which is i am unable to reuse the template.
Please help me on this and let me know if this is possible or not. Because Google has also provided one to one template. Not many to many Template (e.g Three topic - Three BQ Table (three data pipeleine) , n-n).
import logging
import os
import json
from google.cloud import storage
from apache_beam import Pipeline, ParDo, DoFn
from apache_beam.io import ReadFromPubSub, WriteToBigQuery, BigQueryDisposition
from apache_beam.options.pipeline_options import PipelineOptions, StandardOptions, WorkerOptions, GoogleCloudOptions, \
SetupOptions
def _get_storage_service():
storage_client = storage.Client \
.from_service_account_json(
json_credentials_path='C:\Users\dneema\PycharmProjects\iot_dataflow\df_stm_iot_pubsub_bq\service_account_credentials.json')
print('storage service fetched')
return storage_client
class RuntimeOptions(PipelineOptions):
def __init__(self, flags=None, **kwargs):
super(RuntimeOptions, self).__init__(flags, **kwargs)
#classmethod
def _add_argparse_args(cls, parser):
parser.add_value_provider_argument('--bucket_name', type=str)
parser.add_value_provider_argument('--config_json_path', type=str,)
class PipelineCreator:
def __init__(self):
self.options = PipelineOptions()
storage_client = storage.Client.from_service_account_json(
'service_account_credentials_updated.json')
runtime_options = self.options.view_as(RuntimeOptions)
bucket_name = str(runtime_options.bucket_name)
config_json_path = str(runtime_options.config_json_path)
# get the bucket with name
bucket = storage_client.get_bucket(bucket_name)
# get bucket file as blob
blob = bucket.get_blob(config_json_path)
# convert to string and load config
json_data = blob.download_as_string()
self.configData = json.loads(json_data)
dataflow_config = self.configData['dataflow_config']
self.options.view_as(StandardOptions).streaming = bool(dataflow_config['streaming'])
self.options.view_as(SetupOptions).save_main_session = True
worker_options = self.options.view_as(WorkerOptions)
worker_options.max_num_workers = int(dataflow_config['max_num_worker'])
worker_options.autoscaling_algorithm = str(dataflow_config['autoscaling_algorithm'])
#worker_options.machine_type = str(dataflow_config['machine_type'])
#worker_options.zone = str(dataflow_config['zone'])
#worker_options.network = str(dataflow_config['network'])
#worker_options.subnetwork = str(dataflow_config['subnetwork'])
def run(self):
os.environ['GOOGLE_APPLICATION_CREDENTIALS'] = 'dataflow-service-account.json'
project_id = self.configData['project_id']
dataset_id = self.configData['dataset_id']
topics = self.configData['topics']
table_ids = self.configData['bq_table_ids']
error_table_id = self.configData['error_table_id']
logger = logging.getLogger(project_id)
logger.info(self.options.display_data())
pipeline = Pipeline(options=self.options)
size = len(topics)
for index in range(size):
print(topics[index])
pipeline_name = "pipeline_"+str(index)
logger.info("Launch pipeline :: "+pipeline_name)
messages = pipeline | 'Read PubSub Message in ' + pipeline_name >> ReadFromPubSub(topic=topics[index])
logger.info("Read PubSub Message")
valid_messages, invalid_messages = messages | 'Convert Messages to TableRows in ' + pipeline_name >> ParDo(TransformMessageToTableRow()).with_outputs('invalid', main='valid')
valid_messages | 'Write Messages to BigQuery in ' + pipeline_name >> WriteToBigQuery(table=table_ids[index],
dataset=dataset_id,
project=project_id,
write_disposition=BigQueryDisposition.WRITE_APPEND)
pipeline.run().wait_until_finish()
class TransformMessageToTableRow(DoFn):
def process(self, element, *args, **kwargs):
logging.getLogger('dataflow').log(logging.INFO, element)
print element
print("element type ", type(element))
print("inside bq pardo")
import json
try:
message_rows = json.loads(element)
# if using emulator, uncomment below line
message_rows = json.loads(message_rows)
print 'loaded element'
except:
try:
element = "[" + element + "]"
message_rows = json.loads(element)
except Exception as e:
print(e)
from apache_beam import pvalue
yield [pvalue.TaggedOutput('invalid', [element, str(e)])]
print(message_rows)
print("message rows", type(message_rows))
if not isinstance(message_rows, list):
message_rows = [message_rows]
#rows = list()
if isinstance(message_rows, list):
for row in message_rows:
try:
new_row = dict()
for k, v in row.items():
new_row[str(k)] = v
#rows.append(new_row)
print(new_row)
yield new_row
except Exception as e:
print(e)
from apache_beam import pvalue
yield pvalue.TaggedOutput('invalid', [row, str(e)])
if __name__ == '__main__':
PipelineCreator().run()
Here Runtime argument as bucket_name and config_json_path for all the configuration related stuffs like Dataset, BQ table, Topics/ Subscription and all Workflow options.
This is possible or not ? Because Google has also provided one to one template. Not many to many Template (e.g Three topic - Three BQ Table (three data pipeleine) , n-n).
Regarding this previously answered thread Unable to run multiple Pipelines in desired order by creating template in Apache Beam, you can run only one pipeline inside a template at any time.
You'll have to delegate the template creation to another service and pass the configuration with it, just follow the link inside the thread and you'll have How To examples.

NoneType' object has no attribute 'name'

I was building wine recommendation system using k means approach in django. I made cluster module in admin and added 3 clusters manually. However, when I am trying to recommend wine to logged in user I get this error.Can you please help:
AttributeError at /reviews/recommendation/
'NoneType' object has no attribute 'name'
I am getting error in line:
User.objects.get(username=request.user.username).cluster_set.first().name
here is the code for view.py
#login_required
def user_recommendation_list(request):
# get request user reviewed wines
user_reviews = Review.objects.filter(user_name=request.user.username).prefetch_related('wine')
user_reviews_wine_ids = set(map(lambda x: x.wine.id, user_reviews))
# get request user cluster name (just the first one righ now)
try:
user_cluster_name = \
User.objects.get(username=request.user.username).cluster_set.first().name
except: # if no cluster assigned for a user, update clusters
update_clusters()
user_cluster_name = \
User.objects.get(username=request.user.username).cluster_set.first().name
# get usernames for other memebers of the cluster
user_cluster_other_members = \
Cluster.objects.get(name=user_cluster_name).users \
.exclude(username=request.user.username).all()
other_members_usernames = set(map(lambda x: x.username, user_cluster_other_members))
# get reviews by those users, excluding wines reviewed by the request user
other_users_reviews = \
Review.objects.filter(user_name__in=other_members_usernames) \
.exclude(wine__id__in=user_reviews_wine_ids)
other_users_reviews_wine_ids = set(map(lambda x: x.wine.id, other_users_reviews))
# then get a wine list including the previous IDs, order by rating
wine_list = sorted(
list(Wine.objects.filter(id__in=other_users_reviews_wine_ids)),
key=lambda x: x.average_rating,
reverse=True
)
return render(
request,
'reviews/user_recommendation_list.html',
{'username': request.user.username,'wine_list': wine_list}
)
and here is the code for suggestions.py
from .models import Review, Wine, Cluster
from django.contrib.auth.models import User
from sklearn.cluster import KMeans
from scipy.sparse import dok_matrix, csr_matrix
import numpy as np
def update_clusters():
num_reviews = Review.objects.count()
update_step = ((num_reviews/100)+1) * 5
if num_reviews % update_step == 0: # using some magic numbers here, sorry...
# Create a sparse matrix from user reviews
all_user_names = map(lambda x: x.username, User.objects.only("username"))
all_wine_ids = set(map(lambda x: x.wine.id, Review.objects.only("wine")))
num_users = len(all_user_names)
ratings_m = dok_matrix((num_users, max(all_wine_ids)+1), dtype=np.float32)
for i in range(num_users): # each user corresponds to a row, in the order of all_user_names
user_reviews = Review.objects.filter(user_name=all_user_names[i])
for user_review in user_reviews:
ratings_m[i,user_review.wine.id] = user_review.rating
# Perform kmeans clustering
k = int(num_users / 10) + 2
kmeans = KMeans(n_clusters=k)
clustering = kmeans.fit(ratings_m.tocsr())
# Update clusters
Cluster.objects.all().delete()
new_clusters = {i: Cluster(name=i) for i in range(k)}
for cluster in new_clusters.values(): # clusters need to be saved before refering to users
cluster.save()
for i,cluster_label in enumerate(clustering.labels_):
new_clusters[cluster_label].users.add(User.objects.get(username=all_user_names[i]))
When you are going to add data in the cluster table at that time you need to insert your current logged in username. For instance,
jadianes, carlos, and lluis
<username>, teus, yasset
...