ValidationException in Sagemaker pipeline creation - amazon-web-services

I am new to Sagmaker. I am creating a pipeline in sagemaker where I initialize the number of epochs as a pipeline parameter. But when I upsert, it shows this error.
Check the following code for reference, please.
epoch_count = ParameterInteger(name="EpochCount", default_value=1)
pipeline = Pipeline(
name=f"a_name",
parameters=[
training_instance_type,
training_instance_count,
epoch_count,
hugging_face_model_name,
endpoint_instance_type,
endpoint_instance_type_alternate,
],
steps=[step_train, step_register, step_deploy_lambda],
sagemaker_session=sagemaker_session,
)
Error - ---
---------------------------------------------------------------------------
ClientError Traceback (most recent call last)
<ipython-input-54-138a517611f0> in <module>
----> 1 pipeline.upsert(role_arn=role)
/opt/conda/lib/python3.7/site-packages/sagemaker/workflow/pipeline.py in upsert(self, role_arn, description, tags, parallelism_config)
217 """
218 try:
--> 219 response = self.create(role_arn, description, tags, parallelism_config)
220 except ClientError as e:
221 error = e.response["Error"]
/opt/conda/lib/python3.7/site-packages/sagemaker/workflow/pipeline.py in create(self, role_arn, description, tags, parallelism_config)
119 Tags=tags,
120 )
--> 121 return self.sagemaker_session.sagemaker_client.create_pipeline(**kwargs)
122
123 def _create_args(
/opt/conda/lib/python3.7/site-packages/botocore/client.py in _api_call(self, *args, **kwargs)
389 "%s() only accepts keyword arguments." % py_operation_name)
390 # The "self" in this scope is referring to the BaseClient.
--> 391 return self._make_api_call(operation_name, kwargs)
392
393 _api_call.__name__ = str(py_operation_name)
/opt/conda/lib/python3.7/site-packages/botocore/client.py in _make_api_call(self, operation_name, api_params)
717 error_code = parsed_response.get("Error", {}).get("Code")
718 error_class = self.exceptions.from_code(error_code)
--> 719 raise error_class(parsed_response, operation_name)
720 else:
721 return parsed_response
ClientError: An error occurred (ValidationException) when calling the CreatePipeline operation: Cannot assign property reference [Parameters.EpochCount] to argument of type [String]

I replace
epoch_count = ParameterInteger(name="EpochCount", default_value=1)
with
epoch_count = ParameterString(name="EpochCount", default_value="1")
And it works. Maybe we can only use an integer in pipeline parameters from the sagemaker notebook. But epoch_count is being used in the docker container, which is not directly something of Sagemaker, and that's my understanding.

Related

InternalError when trying to predict with an endpoint on AWS

I use AWS SageMaker for a ML project and my TrainingJob for image classification has successfully finished. I use a separate inference.py for the inference, that looks like this:
import json
import logging
import sys
import os
import torch
import torch.nn as nn
import torch.nn.functional as F
import torchvision.models as models
import torchvision.transforms as transforms
import torchvision.transforms as T
from PIL import Image
import io
import requests
logger = logging.getLogger(__name__)
logger.setLevel(logging.DEBUG)
logger.addHandler(logging.StreamHandler(sys.stdout))
# define model
def Net():
model = models.__dict__['resnet50'](pretrained=True)
for param in model.parameters():
param.requires_grad = False
num_features = model.fc.in_features
model.fc = nn.Sequential(
nn.Linear(num_features, int(num_features/2)),
nn.Linear(int(num_features/2), 5))
return model
# load model parameters
def model_fn(model_dir):
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = Net().to(device)
with open(os.path.join(model_dir, "model.pth"), "rb") as f:
model.load_state_dict(torch.load(f))
model.eval()
return model
# deserialize input
def input_fn(request_body, content_type):
if content_type == 'image/jpeg':
img = Image.open(io.BytesIO(request_body))
return img
else:
raise ValueError("This model only supports jpeg input")
# inference
def predict_fn(input_object, model):
transform = T.Compose([
transforms.Resize((224, 224)),
transforms.ToTensor(),
])
input_object=transform(input_object)
input_object=input_object.unsqueeze(0)
with torch.no_grad():
prediction = model(input_object)
return prediction
I created a predictor and made a prediction like this:
from sagemaker.pytorch import PyTorchModel
from sagemaker.predictor import Predictor
model_location=estimator.model_data
pytorch_model = PyTorchModel(model_data=model_location,
role=sagemaker.get_execution_role(),
entry_point='inference.py',
py_version='py3',
framework_version='1.4')
predictor = pytorch_model.deploy(initial_instance_count=1, instance_type='ml.m5.large')
import io
with open("test1.jpg", "rb") as image:
img = image.read()
response=predictor.predict(img, initial_args={"ContentType": "image/jpeg"})
I got the following error message:
InternalFailure Traceback (most recent call last)
/tmp/ipykernel_7063/3976847146.py in <cell line: 1>()
----> 1 response=predictor.predict(img, initial_args={"ContentType": "image/jpeg"})
~/anaconda3/envs/python3/lib/python3.8/site-packages/sagemaker/predictor.py in predict(self, data, initial_args, target_model, target_variant, inference_id)
159 data, initial_args, target_model, target_variant, inference_id
160 )
--> 161 response = self.sagemaker_session.sagemaker_runtime_client.invoke_endpoint(**request_args)
162 return self._handle_response(response)
163
~/anaconda3/envs/python3/lib/python3.8/site-packages/botocore/client.py in _api_call(self, *args, **kwargs)
393 "%s() only accepts keyword arguments." % py_operation_name)
394 # The "self" in this scope is referring to the BaseClient.
--> 395 return self._make_api_call(operation_name, kwargs)
396
397 _api_call.__name__ = str(py_operation_name)
~/anaconda3/envs/python3/lib/python3.8/site-packages/botocore/client.py in _make_api_call(self, operation_name, api_params)
723 error_code = parsed_response.get("Error", {}).get("Code")
724 error_class = self.exceptions.from_code(error_code)
--> 725 raise error_class(parsed_response, operation_name)
726 else:
727 return parsed_response
InternalFailure: An error occurred (InternalFailure) when calling the InvokeEndpoint operation (reached max retries: 4): An exception occurred while sending request to model. Please contact customer support regarding request 34c901aa-76... .
I also tried this code in different accounts, but the issue remained.

Django queryset count() method raise "TypeError: unorderable types: NoneType() > int()"

My environment is Python3.5, Django1.8.3 and cx_Oracle5.3(They are checked by pip3 freeze).
Django query set raises a Type Error exception when count() method is called.
When it comes to Python2 + cx_oracle or Python3 + sqlite3 works fine without any exception but Python3 + cx_oracle.
Thue, I tried to update cx_Oracle version to 6.1(latest version) because I thought I could be some compatibility problem between cx_Oracle and Python3. However, It generates a different error.
I detail with the below code block, please refer it.
P.S: I Need to keep Django version to 1.8.3 for compatibility with my Apps.
models.py
from django.db import models
class Device(models.Model):
deviceClass = models.CharField(max_length=10)
class Meta:
db_table = 'TST_G2S_DEVICE'
cx_Oracle5.3
$ python3 manage.py shell
Python 3.5.2 (default, Nov 23 2017, 16:37:01)
Type "copyright", "credits" or "license" for more information.
IPython 2.4.1 -- An enhanced Interactive Python.
? -> Introduction and overview of IPython's features.
%quickref -> Quick reference.
help -> Python's own help system.
object? -> Details about 'object', use 'object??' for extra details.
In [1]: from polls.models import Device;
In [2]: dev = Device.objects.all()
In [3]: dev
Out[3]: []
In [4]: type(dev)
Out[4]: django.db.models.query.QuerySet
In [5]: dev.count()
---------------------------------------------------------------------------
TypeError Traceback (most recent call last)
<ipython-input-5-72a7bdf9f7f7> in <module>()
----> 1 dev.count()
/usr/local/lib/python3.5/dist-packages/django/db/models/query.py in count(self)
316 return len(self._result_cache)
317
--> 318 return self.query.get_count(using=self.db)
319
320 def get(self, *args, **kwargs):
/usr/local/lib/python3.5/dist-packages/django/db/models/sql/query.py in get_count(self, using)
462 obj = self.clone()
463 obj.add_annotation(Count('*'), alias='__count', is_summary=True)
--> 464 number = obj.get_aggregation(using, ['__count'])['__count']
465 if number is None:
466 number = 0
/usr/local/lib/python3.5/dist-packages/django/db/models/sql/query.py in get_aggregation(self, using, added_aggregate_names)
443 outer_query.select_related = False
444 compiler = outer_query.get_compiler(using)
--> 445 result = compiler.execute_sql(SINGLE)
446 if result is None:
447 result = [None for q in outer_query.annotation_select.items()]
/usr/local/lib/python3.5/dist-packages/django/db/models/sql/compiler.py in execute_sql(self, result_type)
838 if result_type == SINGLE:
839 try:
--> 840 val = cursor.fetchone()
841 if val:
842 return val[0:self.col_count]
/usr/local/lib/python3.5/dist-packages/django/db/utils.py in inner(*args, **kwargs)
102 def inner(*args, **kwargs):
103 with self:
--> 104 return func(*args, **kwargs)
105 return inner
106
/usr/local/lib/python3.5/dist-packages/django/db/backends/oracle/base.py in fetchone(self)
507 if row is None:
508 return row
--> 509 return _rowfactory(row, self.cursor)
510
511 def fetchmany(self, size=None):
/usr/local/lib/python3.5/dist-packages/django/db/backends/oracle/base.py in _rowfactory(row, cursor)
575 # This comes from FloatField columns.
576 value = float(value)
--> 577 elif precision > 0:
578 # NUMBER(p,s) column: decimal-precision fixed point.
579 # This comes from IntField and DecimalField columns.
TypeError: unorderable types: NoneType() > int()
cx_Oracle6.1
Python 3.5.2 (default, Nov 23 2017, 16:37:01)
Type "copyright", "credits" or "license" for more information.
IPython 2.4.1 -- An enhanced Interactive Python.
? -> Introduction and overview of IPython's features.
%quickref -> Quick reference.
help -> Python's own help system.
object? -> Details about 'object', use 'object??' for extra details.
In [1]: from polls.models import Device;
In [2]: dev = Device.objects.all()
In [3]: dev
Out[3]: <repr(<django.db.models.query.QuerySet at 0x7f0ab12e8fd0>) failed: AttributeError: 'cx_Oracle.Cursor' object has no attribute 'numbersAsStrings'>
In [4]: type(dev)
Out[4]: django.db.models.query.QuerySet
In [5]: dev.count()
---------------------------------------------------------------------------
AttributeError Traceback (most recent call last)
<ipython-input-5-72a7bdf9f7f7> in <module>()
----> 1 dev.count()
/usr/local/lib/python3.5/dist-packages/django/db/models/query.py in count(self)
316 return len(self._result_cache)
317
--> 318 return self.query.get_count(using=self.db)
319
320 def get(self, *args, **kwargs):
/usr/local/lib/python3.5/dist-packages/django/db/models/sql/query.py in get_count(self, using)
462 obj = self.clone()
463 obj.add_annotation(Count('*'), alias='__count', is_summary=True)
--> 464 number = obj.get_aggregation(using, ['__count'])['__count']
465 if number is None:
466 number = 0
/usr/local/lib/python3.5/dist-packages/django/db/models/sql/query.py in get_aggregation(self, using, added_aggregate_names)
443 outer_query.select_related = False
444 compiler = outer_query.get_compiler(using)
--> 445 result = compiler.execute_sql(SINGLE)
446 if result is None:
447 result = [None for q in outer_query.annotation_select.items()]
/usr/local/lib/python3.5/dist-packages/django/db/models/sql/compiler.py in execute_sql(self, result_type)
825 return
826
--> 827 cursor = self.connection.cursor()
828 try:
829 cursor.execute(sql, params)
/usr/local/lib/python3.5/dist-packages/django/db/backends/base/base.py in cursor(self)
160 self.validate_thread_sharing()
161 if self.queries_logged:
--> 162 cursor = self.make_debug_cursor(self._cursor())
163 else:
164 cursor = self.make_cursor(self._cursor())
/usr/local/lib/python3.5/dist-packages/django/db/backends/base/base.py in _cursor(self)
135 self.ensure_connection()
136 with self.wrap_database_errors:
--> 137 return self.create_cursor()
138
139 def _commit(self):
/usr/local/lib/python3.5/dist-packages/django/db/backends/oracle/base.py in create_cursor(self)
260
261 def create_cursor(self):
--> 262 return FormatStylePlaceholderCursor(self.connection)
263
264 def _commit(self):
/usr/local/lib/python3.5/dist-packages/django/db/backends/oracle/base.py in __init__(self, connection)
417 self.cursor = connection.cursor()
418 # Necessary to retrieve decimal values without rounding error.
--> 419 self.cursor.numbersAsStrings = True
420 # Default arraysize of 1 is highly sub-optimal.
421 self.cursor.arraysize = 100
AttributeError: 'cx_Oracle.Cursor' object has no attribute 'numbersAsStrings'
Unfortunately for you, your restriction of staying on django 1.8 will not fix this issue.
This github issue describes the same situation you've encountered, with a very similar stack trace. A fix has been implemented in Django, but it apparently affects only 1.11 and forward releases, with no backport to 1.8 for the reason
This patch isn't completely compatible with cx_Oracle < 5.2, hence it
won't be backport to Django < 1.11

unpickle sklearn.tree.DescisionTreeRegressor in python 2 from python3

I wanna fit model in python 3.5 (numpy 1.11.2, sklearn 0.18.1)
import pickle
from sklearn.tree import DecisionTreeRegressor
clf = DecisionTreeRegressor()
X = np.array([[1,2,3,4],[1,1,2,2],[1,2,1,2]]).T
y = [1,1,0,0]
clf.fit(X,y)
with open(join(path_to_data, 'models', 'debug.model'), 'wb') as f:
pickle.dump(clf, f, protocol=2)
After pickling I try to unpickle model in python 2.7 (numpy 1.11.2, sklearn 0.18.1)
import pickle
with open(join(path_to_data, 'models', 'debug.model'), 'rb') as f:
clf = pickle.load(f)
but it raise error:
---------------------------------------------------------------------------
ValueError Traceback (most recent call last)
<ipython-input-78-2eaf35b8e6d9> in <module>()
----> 1 joblib.load(join(path_to_data,'models','queryforest_debug.model'))
/home/iiivanitskiy/.local/lib/python2.7/site-packages/sklearn/externals/joblib/numpy_pickle.pyc in load(filename, mmap_mode)
573 return load_compatibility(fobj)
574
--> 575 obj = _unpickle(fobj, filename, mmap_mode)
576
577 return obj
/home/iiivanitskiy/.local/lib/python2.7/site-packages/sklearn/externals/joblib/numpy_pickle.pyc in _unpickle(fobj, filename, mmap_mode)
505 obj = None
506 try:
--> 507 obj = unpickler.load()
508 if unpickler.compat_mode:
509 warnings.warn("The file '%s' has been generated with a "
/usr/lib/python2.7/pickle.pyc in load(self)
856 while 1:
857 key = read(1)
--> 858 dispatch[key](self)
859 except _Stop, stopinst:
860 return stopinst.value
/home/iiivanitskiy/.local/lib/python2.7/site-packages/sklearn/externals/joblib/numpy_pickle.pyc in load_build(self)
325 NDArrayWrapper is used for backward compatibility with joblib <= 0.9.
326 """
--> 327 Unpickler.load_build(self)
328
329 # For backward compatibility, we support NDArrayWrapper objects.
/usr/lib/python2.7/pickle.pyc in load_build(self)
1215 setstate = getattr(inst, "__setstate__", None)
1216 if setstate:
-> 1217 setstate(state)
1218 return
1219 slotstate = None
ValueError: non-string names in Numpy dtype unpickling
Do we have the way to unpickle in python 2 DecisionTreeRegressor, which was pickled in python 3?

Python Kombu consumer get queue message but the callback can't trigger

If I run followed code, I found I can get message from the queue, but the callback can't triggered
from kombu.mixins import ConsumerMixin
from kombu import Exchange, Queue
task_exchange = Exchange('nginx', type='direct')
task_queues = [Queue(exchange=task_exchange, routing_key='nginx')]
class Worker(ConsumerMixin):
def __init__(self, connection):
self.connection = connection
def get_consumers(self, Consumer, channel):
return [Consumer(queues=task_queues,
callbacks=[self.task]
)]
def task(self, body, message):
print body
message.ack()
if __name__ == '__main__':
from kombu import Connection
with Connection('amqp://test:test#localhost:5672/test') as conn:
worker = Worker(conn)
worker.run()
I try to run python -m pdb test.py
170 -> def run(self, _tokens=1):
171 restart_limit = self.restart_limit
172 errors = (self.connection.connection_errors +
173 self.connection.channel_errors)
174 while not self.should_stop:
175 try:
(Pdb) l
176 if restart_limit.can_consume(_tokens):
177 for _ in self.consume(limit=None): # pragma: no cover
178 pass
179 else:
180 sleep(restart_limit.expected_time(_tokens))
181 except errors:
182 warn(W_CONN_LOST, exc_info=1)
It loop at
for _ in self.consume(limit=None): # pragma: no cover
pass
Runpython -m pdb test.py, Get in the connection.drain_events(), found the content.body encoding is binary,
if (content and
309 channel.auto_decode and
310 hasattr(content, 'content_encoding')):
311 -> try:
312 content.body = content.body.decode(content.content_encoding) #here get a error
313 except Exception:
314 pass
fix it
def get_consumers(self, Consumer, channel):
return [Consumer(queues=task_queues,
accept=['json', 'pickle'],
callbacks=[self.task]
)]

Reverse URL of named wrapped methods with parameters

I'm trying to do a reverse resolution of URL for named wrapped methods.
I have the following set in my urls.py (based on this answer :
urlpatterns = patterns('',
('param_select/$',session_check(param_select),{}, 'param_select'),
('registration/$',registration),
('result_show/(\d+)',session_check(result_show),{}, 'result_show'),
('^expofit/$',media_clean(start),{}, 'start'),
('result_pick/$',session_check(result_pick),{}, 'result_pick'),
('mail_report/$',session_check(mail_report)),
('notification/$',session_check(notification)),
It worked for all the methods without parameters. However it failed on the one which takes parameters.
In [50]: from django.core.urlresolvers import reverse
In [51]: reverse('result_pick')
Out[51]: '/result_pick/'
In [52]: reverse('start')
Out[52]: '/expofit/'
In [53]: reverse('result_show', args=(2,))
---------------------------------------------------------------------------
NoReverseMatch Traceback (most recent call last)
/home/alan/Desktop/expofit/expofit_env/local/lib/python2.7/site-packages/django/core/management/commands/shell.pyc in <module>()
----> 1 reverse('result_show', args=(2,))
/home/alan/Desktop/expofit/expofit_env/local/lib/python2.7/site-packages/django/core/urlresolvers.pyc in reverse(viewname, urlconf, args, kwargs, prefix, current_app)
474 resolver = get_ns_resolver(ns_pattern, resolver)
475
--> 476 return iri_to_uri(resolver._reverse_with_prefix(view, prefix, *args, **kwargs))
477
478 reverse_lazy = lazy(reverse, str)
/home/alan/Desktop/expofit/expofit_env/local/lib/python2.7/site-packages/django/core/urlresolvers.pyc in _reverse_with_prefix(self, lookup_view, _prefix, *args, **kwargs)
394 lookup_view_s = lookup_view
395 raise NoReverseMatch("Reverse for '%s' with arguments '%s' and keyword "
--> 396 "arguments '%s' not found." % (lookup_view_s, args, kwargs))
397
398 class LocaleRegexURLResolver(RegexURLResolver):
NoReverseMatch: Reverse for 'result_show' with arguments '(2,)' and keyword arguments '{}' not found.
I've been following this, by the book, and except for the named url everythong looks the same. Ideas for where the flaw is?
The problem was in the fact that I was using the Django 1.4 version, and reading the Django 1.5 docs. After the update the everything works normally.
In [5]: reverse('result_show',args=(3,))
Out[5]: '/result_show/3'