Following is my flow:-
class APLWorkflow(Flow):
start = (
flow.StartFunction(function1)
.Next(this.request_quotes)
)
request_quotes = (
flow.Handler(function2)
.Next(this.move_package)
)
move_package = (
flow.Handler(function3)
.Next(this.shipment_create)
)
shipment_create = (
flow.Function(function4)
.Next(this.end)
)
end = flow.End()
Following are my util functions:-
def function1():
return 1
def function2():
return 2
def function3():
return 3
def function4():
return 4
The problem is when I start the flow, it runs perfectly well. However, the response returned is that of start node, not the last executed node.
Following is my code:-
activation.prepare()
response = APLWorkFLow.start.run(**some_kwargs)
activation.done() # stops the flow at `move_package`.
print(response) # prints 1, not 3.
How do I return the response of the last executed node, in this Handler (move_package)?
Related
Is there a way to skip calling inner method during pytests ? I am in need to skip validate() method when invoked from test_handle()
# module.py
def handle(request):
try:
token_validator = TokenValidator()
token_validator.**validate**(request['headers'].get("auth_token"))
except Exception:
return response(status=400)
try:
firstname = request['body'].get("first_nm")
except Exception:
return response(status=400)
return firstname
# pytest.py
def test_handle():
mock_event = dict(
headers={},
body={}
)
expected = "Somename"
result = module.handle(mock_event)
TestCase().assertEqual(expected, result)
I am running a Vertex AI batch prediction using the python API.
The function I am using is from the google cloud docs:
def create_batch_prediction_job_dedicated_resources_sample(
key_path,
project: str,
location: str,
model_display_name: str,
job_display_name: str,
gcs_source: Union[str, Sequence[str]],
gcs_destination: str,
machine_type: str = "n1-standard-2",
sync: bool = True,
):
credentials = service_account.Credentials.from_service_account_file(
key_path)
# Initilaize an aiplatfrom object
aiplatform.init(project=project, location=location, credentials=credentials)
# Get a list of Models by Model name
models = aiplatform.Model.list(filter=f'display_name="{model_display_name}"')
model_resource_name = models[0].resource_name
# Get the model
my_model = aiplatform.Model(model_resource_name)
batch_prediction_job = my_model.batch_predict(
job_display_name=job_display_name,
gcs_source=gcs_source,
gcs_destination_prefix=gcs_destination,
machine_type=machine_type,
sync=sync,
)
#batch_prediction_job.wait_for_resource_creation()
batch_prediction_job.wait()
print(batch_prediction_job.display_name)
print(batch_prediction_job.resource_name)
print(batch_prediction_job.state)
return batch_prediction_job
datetime_today = datetime.datetime.now()
model_display_name = 'test_model'
key_path = 'vertex_key.json'
project = 'my_project'
location = 'asia-south1'
job_display_name = 'batch_prediction_' + str(datetime_today)
model_name = '1234'
gcs_source = 'gs://my_bucket/Cleaned_Data/user_item_pairs.jsonl'
gcs_destination = 'gs://my_bucket/prediction'
create_batch_prediction_job_dedicated_resources_sample(key_path,project,location,model_display_name,job_display_name,
gcs_source,gcs_destination)
OUTPUT:
92 current state:
JobState.JOB_STATE_RUNNING
INFO:google.cloud.aiplatform.jobs:BatchPredictionJob projects/my_project/locations/asia-south1/batchPredictionJobs/37737350127597649
The above output is being printed on the terminal over and over after every few seconds.
The issue that I have is that the python program calling this function keeps on running until it is force stopped. I have tried both batch_prediction_job.wait() & batch_prediction_job.wait_for_resource_creation() with the same results.
How do I start a batch_prediction_job without waiting for it to complete and terminating the program just after the job has be created?
I gave you the wrong instruction on the comments, change the parameter sync=False and the function should return just after be executed.
Whether this function call should be synchronous (wait for pipeline run to finish before terminating) or asynchronous (return immediately)
sync=False
def create_batch_prediction_job_dedicated_resources_sample(
# ...
sync: bool = False,
):
UPDATE - Adding more details:
Check here my notebook code where I tested it and its working:
You have to change the sync=False AND remove/comment the following print lines:
#batch_prediction_job.wait()
#print(batch_prediction_job.display_name)
#print(batch_prediction_job.resource_name)
#print(batch_prediction_job.state)
Your code edited:
def create_batch_prediction_job_dedicated_resources_sample(
key_path,
project: str,
location: str,
model_display_name: str,
job_display_name: str,
gcs_source: Union[str, Sequence[str]],
gcs_destination: str,
machine_type: str = "n1-standard-2",
sync: bool = False,
):
credentials = service_account.Credentials.from_service_account_file(key_path)
# Initilaize an aiplatfrom object
aiplatform.init(project=project, location=location, credentials=credentials)
# Get a list of Models by Model name
models = aiplatform.Model.list(filter=f'display_name="{model_display_name}"')
model_resource_name = models[0].resource_name
# Get the model
my_model = aiplatform.Model(model_resource_name)
batch_prediction_job = my_model.batch_predict(
job_display_name=job_display_name,
gcs_source=gcs_source,
gcs_destination_prefix=gcs_destination,
machine_type=machine_type,
sync=sync,
)
return batch_prediction_job
datetime_today = datetime.datetime.now()
model_display_name = 'test_model'
key_path = 'vertex_key.json'
project = '<my_project_name>'
location = 'asia-south1'
job_display_name = 'batch_prediction_' + str(datetime_today)
model_name = '1234'
gcs_source = 'gs://<my_bucket_name>/Cleaned_Data/user_item_pairs.jsonl'
gcs_destination = 'gs://<my_bucket_name>/prediction'
create_batch_prediction_job_dedicated_resources_sample(key_path,
project,location,
model_display_name,
job_display_name,
gcs_source,
gcs_destination,
sync=False,)
Results sync=False:
Results sync=True:
Once a single function needs to be awaited, it seems like a cascading of functions that need to be awaited, until you reach a point where adding await breaks stuff:
I have dash code that intermittently gets called:
def get_data(symbolStr):
print("Getting AR for", symbolStr)
df = amp.get_data_job(symbolStr) #calls async code and need to be awaited
return df
...
html.Div(
dcc.Interval(id='interval1', interval = 900 * 1000, n_intervals=0),
),
html.Div(
dash_table.DataTable(
id='table',
columns=[{"name": i, "id": i} for i in magic_columns],
editable=True,
row_deletable=True
),
)
#app.callback(
[dash.dependencies.Output('table', 'data')],
[dash.dependencies.Input('interval1', 'n_intervals')],
[dash.dependencies.State('table', 'data')]
)
def update_interval(n, data):
allSymbolsDf = pd.DataFrame()
for symbolStr in symbolsStr:
df = get_data(symbolStr)
allSymbolsDf = allSymbolsDf.append(df)
time.sleep(3)
return [allSymbolsDf.to_dict('rows')]
Thing is, get_data is a function that calls asynchronous code
df = amp.get_data_job(symbolStr)
I can't turn the callback update_interval into an asynchronous function (AFAIK). What is the recommended way out of this. I mean, still keeping dash.
Ran into the same situation using FastApi and serving dash on the side. Use asyncio.run()
import asyncio
async def asyn_fun(foo):
return True if foo == "bar" else False
def sync_fun(foo):
return asyncio.run(asyn_fun(foo))
print(sync_fun("bar"))
I'm trying to use Paho MQTT Client and Multiprocessing to send temperature with defined interval. However, publish command is not working inside class. I've checked self.mqtt_client inside scheduler it has the Client object.
Is there anyone that can address problem for me?
Everything inside class is working except Scheduler.
def scheduler(self, topic, interval):
if interval != 0:
while True:
if topic == "temp":
print("Temperature published " + interval) #It's working.
self.mqtt_client.publish(topic, interval , 0 , False) #There is no error/output about this line
time.sleep(int(interval))
else:
pass
Class:
class Switcher:
config = None
mqtt_client = None
mqtt_connected = False
switches = {}
stages = {}
def __init__(self, config):
self.config = config
for switch_cfg in self.config['switches']:
self.switches[switch_cfg['topic_set']] = Switch(int(switch_cfg['gpio']), switch_cfg['topic_status'], switch_cfg['initial'])
def scheduler(self, topic, interval):
if interval != 0:
while True:
if topic == "temp":
print("Temperature published " + interval) #It's working.
self.mqtt_client.publish(topic, interval , 0 , False) #There is no error/output about this line
time.sleep(int(interval))
else:
pass
def mqtt_connect(self):
if self.mqtt_broker_reachable():
self.verbose('Connecting to ' + self.config['mqtt_host'] + ':' + self.config['mqtt_port'])
self.mqtt_client = mqtt.Client(self.config['mqtt_client_id'])
if 'mqtt_user' in self.config and 'mqtt_password' in self.config:
self.mqtt_client.username_pw_set(self.config['mqtt_user'], self.config['mqtt_password'])
self.mqtt_client.on_connect = self.mqtt_on_connect
self.mqtt_client.on_disconnect = self.mqtt_on_disconnect
self.mqtt_client.on_message = self.mqtt_on_message
try:
self.mqtt_client.connect(self.config['mqtt_host'], int(self.config['mqtt_port']), 10)
for switch_cfg in self.config['switches']:
self.mqtt_client.subscribe(switch_cfg['topic_set'], 0)
self.mqtt_client.loop_forever()
except:
self.error(traceback.format_exc())
self.mqtt_client = None
else:
self.error(self.config['mqtt_host'] + ':' + self.config['mqtt_port'] + ' not reachable!')
def mqtt_on_connect(self, mqtt_client, userdata, flags, rc):
self.mqtt_connected = True
for switch_ios in self.config['switches']:
self.mqtt_client.publish(self.config['station_status'], "available", 0, False)
self.mqtt_client.publish(switch_ios['topic_status'], self.switches[switch_ios['topic_set']].get_state(), 0, False)
temp_interval = 1
temp_process = multiprocessing.Process(target=self.scheduler, args=("temp",str(temp_interval),))
temp_process.start()
self.verbose('...mqtt_connected!')
def mqtt_broker_reachable(self):
s = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
s.settimeout(5)
try:
s.connect((self.config['mqtt_host'], int(self.config['mqtt_port'])))
s.close()
return True
except socket.error:
return False
def start(self):
self.mqtt_connect()
You mqtt_connect function will never return.
self.mqtt_client.loop_forever() Will block until self.mqtt_client.disconnect() is called.
You should probably be using self.mqtt_client.loop_start() which will run the client loop on it's own thread in the background. You can call self.mqtt_client.loop_stop() when you want to shut the client down.
I am trying to send a file across the network using Twisted with the LineReceiver protocol. The issue I am seeing is that when I read a binary file and try to send the chunks they simply don't send.
I am reading the file using:
import json
import time
import threading
from twisted.internet import reactor, threads
from twisted.protocols.basic import LineReceiver
from twisted.internet import protocol
MaximumMsgSize = 15500
trySend = True
connectionToServer = None
class ClientInterfaceFactory(protocol.Factory):
def buildProtocol(self, addr):
return WoosterInterfaceProtocol(self._msgProcessor, self._logger)
class ClientInterfaceProtocol(LineReceiver):
def connectionMade(self):
connectionToServer = self
def _DecodeMessage(self, rawMsg):
header, body = json.loads(rawMsg)
return (header, json.loads(body))
def ProcessIncomingMsg(self, rawMsg, connObject):
# Decode raw message.
decodedMsg = self._DecodeMessage(rawMsg)
self.ProccessTransmitJobToNode(decodedMsg, connObject)
def _BuildMessage(self, id, msgBody = {}):
msgs = []
fullMsgBody = json.dumps(msgBody)
msgBodyLength = len(fullMsgBody)
totalParts = 1 if msgBodyLength <= MaximumMsgSize else \
int(math.ceil(msgBodyLength / MaximumMsgSize))
startPoint = 0
msgBodyPos = 0
for partNo in range(totalParts):
msgBodyPos = (partNo + 1) * MaximumMsgSize
header = {'ID' : id, 'MsgParts' : totalParts,
'MsgPart' : partNo }
msg = (header, fullMsgBody[startPoint:msgBodyPos])
jsonMsg = json.dumps(msg)
msgs.append(jsonMsg)
startPoint = msgBodyPos
return (msgs, '')
def ProccessTransmitJobToNode(self, msg, connection):
rootDir = '../documentation/configs/Wooster'
exportedFiles = ['consoleLog.txt', 'blob.dat']
params = {
'Status' : 'buildStatus',
'TaskID' : 'taskID',
'Name' : 'taskName',
'Exports' : len(exportedFiles),
}
msg, statusStr = self._BuildMessage(101, params)
connection.sendLine(msg[0])
for filename in exportedFiles:
with open (filename, "rb") as exportFileHandle:
data = exportFileHandle.read().encode('base64')
params = {
ExportFileToMaster_Tag.TaskID : taskID,
ExportFileToMaster_Tag.FileContents : data,
ExportFileToMaster_Tag.Filename : filename
}
msgs, _ = self._BuildMessage(MsgID.ExportFileToMaster, params)
for m in msgs:
connection.sendLine(m)
def lineReceived(self, data):
threads.deferToThread(self.ProcessIncomingMsg, data, self)
def ConnectFailed(reason):
print 'Connection failed..'
reactor.callLater(20, reactor.callFromThread, ConnectToServer)
def ConnectToServer():
print 'Connecting...'
from twisted.internet.endpoints import TCP4ClientEndpoint
endpoint = TCP4ClientEndpoint(reactor, 'localhost', 8181)
deferItem = endpoint.connect(factory)
deferItem.addErrback(ConnectFailed)
netThread = threading.Thread(target=reactor.run, kwargs={"installSignalHandlers": False})
netThread.start()
reactor.callFromThread(ConnectToServer)
factory = ClientInterfaceFactory()
protocol = ClientInterfaceProtocol()
while 1:
time.sleep(0.01)
if connectionToServer == None: continue
if trySend == True:
protocol.ProccessTransmitJobToNode(None, None)
trySend = False
Is there something I am doing wrong?file is sent, it's when the write is multi part or there are more than one file it struggles.
If a single write occurs then the m
Note: I have updated the question with a crude piece of sample code in the hope it makes sense.
_BuildMessage returns a two-tuple: (msgs, '').
Your network code iterates over this:
msgs = self._BuildMessage(MsgID.ExportFileToMaster, params)
for m in msgs:
So your network code first tries to send a list of json encoded data and then tries to send the empty string. It most likely raises an exception because you cannot send a list of anything using sendLine. If you aren't seeing the exception, you've forgotten to enable logging. You should always enable logging so you can see any exceptions that occur.
Also, you're using time.sleep and you shouldn't do this in a Twisted-based program. If you're doing this to try to avoid overloading the receiver, you should use TCP's native backpressure instead by registering a producer which can receive pause and resume notifications. Regardless, time.sleep (and your loop over all the data) will block the entire reactor thread and prevent any progress from being made. The consequence is that most of the data will be buffered locally before being sent.
Also, your code calls LineReceiver.sendLine from a non-reactor thread. This has undefined results but you can probably count on it to not work.
This loop runs in the main thread:
while 1:
time.sleep(0.01)
if connectionToServer == None: continue
if trySend == True:
protocol.ProccessTransmitJobToNode(None, None)
trySend = False
while the reactor runs in another thread:
netThread = threading.Thread(target=reactor.run, kwargs={"installSignalHandlers": False})
netThread.start()
ProcessTransmitJobToNode simply calls self.sendLine:
def ProccessTransmitJobToNode(self, msg, connection):
rootDir = '../documentation/configs/Wooster'
exportedFiles = ['consoleLog.txt', 'blob.dat']
params = {
'Status' : 'buildStatus',
'TaskID' : 'taskID',
'Name' : 'taskName',
'Exports' : len(exportedFiles),
}
msg, statusStr = self._BuildMessage(101, params)
connection.sendLine(msg[0])
You should probably remove the use of threading entirely from the application. Time-based events are better managed using reactor.callLater (your main-thread loop effectively generates a call to ProcessTransmitJobToNode once hundred times a second (modulo effects of the trySend flag)).
You may also want to take a look at https://github.com/twisted/tubes as a better way to manage large amounts of data with Twisted.