Celery task queuing - flask

I have created a flask application and it consist of 2 celery tasks.
Task 1: Generate a file through a process
Task 2: Email the generated file
Normally task one needs more time compared to task 2. I want to execute task 1 and then task 2. But the problem is both start to execute at the same time inside celery.
How can I resolve this issue.
#celery.task(name='celery_example.process')
def process(a,b,c,d,e,f):
command='rnx2rtkp -p '+a+' -f '+b+' -m '+c+' -n -o oout.pos '+d+' '+e+' '+f
os.system(command)
return 'Successfully created POS file'
#celery.task(name='celery_example.emailfile')
def emailfile(recipientemail):
email_user = ''
email_password = ''
subject = 'subject'
msg = MIMEMultipart()
msg['From'] = email_user
msg['To'] = recipientemail
msg['Subject'] = subject
body = 'This is your Post-Processed position file'
msg.attach(MIMEText(body,'plain'))
filename='oout.pos'
attachment =open(filename,'rb')
part = MIMEBase('application','octet-stream')
part.set_payload((attachment).read())
encoders.encode_base64(part)
part.add_header('Content-Disposition',"attachment; filename= "+filename)
msg.attach(part)
text = msg.as_string()
server = smtplib.SMTP('smtp.gmail.com',587)
server.starttls()
server.login(email_user,email_password)
server.sendmail(email_user,recipientemail,text)
server.quit()
return 'Email has been successfully sent'
This is the app.route
#app.route('/pp.php', methods=['GET', 'POST'])
def pp():
pp = My1Form()
target = os.path.join(APP_ROOT)
print(target)
for fileBase in request.files.getlist("fileBase"):
print(fileBase)
filename = fileBase.filename
destination = "/".join([target, filename])
print(destination)
fileBase.save(destination)
for fileObsRover in request.files.getlist("fileObsRover"):
print(fileObsRover)
filename = fileObsRover.filename
destination = "/".join([target, filename])
print(destination)
fileObsRover.save(destination)
for fileNavRover in request.files.getlist("fileNavRover"):
print(fileNavRover)
filename = fileNavRover.filename
destination = "/".join([target, filename])
print(destination)
fileNavRover.save(destination)
a=fileObsRover.filename
b=fileBase.filename
c=fileNavRover.filename
elevation=pp.ema.data
Freq=pp.frq.data
posMode=pp.pmode.data
emailAdd=pp.email.data
process.delay(posMode,Freq,elevation,a,b,c)
emailfile.delay(emailAdd)
return render_template('results.html', email=pp.email.data, Name=pp.Name.data, ema=elevation, frq=Freq, pmode=posMode, fileBase=a)
return render_template('pp.php', pp=pp)

As it currently stands your code does the following:
# schedule process to run asynchronously
process.delay(posMode,Freq,elevation,a,b,c)
# schedule emailfile to run asynchronously
emailfile.delay(emailAdd)
Both of these will immediately be picked up by workers and executed. You have provided nothing to inform celery that emailfile should wait until processfile is complete.
Instead you should:
alter the signature of emailfile to include another parameter that will be the output of a successful processfile call; then
call processfile using link.
For example:
deferred = processfile.apply_async(
(posMode,Freq,elevation,a,b,c),
link=emailfile.s())
deferred.get()
An alternative to using link, but semantically identical in this case, would be to use a chain.

Related

Load topic file to NAO robot 2.1

Hello I want to know how to load a Dialog Topic file using python.
I made sure that the file path is right, but it keeps saying that it isn't. I have also used the tutorials in NAO 2.1's documentation ALDialog and ALModule
Please send me a code that works or tell me the error. I tried using the following code:
NAO_IP = "nao.local"
dialog_p = None
ModuleInstance = None
class NaoFalanteModule(ALModule):
def __init__(self, name):
ALModule.__init__(self, name)
self.tts = ALProxy("ALTextToSpeech")
self.tts.setLanguage("Brazilian")
global dialog_p
try:
dialog_p = ALProxy("ALDialog")
except Exception, e:
print "Error dialog"
print str(e)
exit(1)
dialog_p.setLanguage("Brazilian")
self.naoAlc()
def naoAlc(self):
topf_path = "/simpleTestes/diaSimples/testeSimples_ptb.top"
topf_path = topf_path.decode("utf-8")
topic = dialog_p.loadTopic(topf_path.encode("utf-8"))
# Start dialog
dialog_p.subscribe("NaoFalanteModule")
dialog_p.activateTopic(topic)
raw_input(u"Press 'Enter' to exit.")
dialog_p.unload(topic)
dialog_p.unsubscribe
def main():
parser = OptionParser()
parser.add_option("--pip",
help="Parent broker port. The IP address or your robot",
dest="pip")
parser.add_option("--pport",
help="Parent broker port. The port NAOqi is listening to",
dest="pport",
type="int")
parser.set_defaults(
pip=NAO_IP,
pport=9559)
(opts, args_) = parser.parse_args()
pip = opts.pip
pport = opts.pport
myBroker = ALBroker("myBroker",
"0.0.0.0",
0,
pip,
pport)
global ModuleInstance
ModuleInstance = NaoFalanteModule("ModuleInstance")
try:
while True:
time.sleep(1)
except KeyboardInterrupt:
printI tried using the following code:
print "Interrupted by user, shutting down"
myBroker.shutdown()
sys.exit(0)
if __name__ == "__main__":
main()
The path to the topic needs to be the absolute path to that file, whereas you're passing a relative path compared to your current execution directory. The reason is that ALDialog is a separate service running in it's own process and knows nothing about the execution context of whoever is calling it.
And the program .top file must be uploaded to the robot using Choregraphe.
So, your absolute path in this case might be something like
topf_path = "/home/nao/simpleTestes/diaSimples/testeSimples_ptb.top"
... or if you want to be a bit cleaner, if you know your script is being executed at the root of your application package, use os.path:
topf_path = os.path.abspath("diaSimples/testeSimples_ptb.top")

Is there any faster way for downloading multiple files from s3 to local folder?

I am trying to download 12,000 files from s3 bucket using jupyter notebook, which is estimating to complete download in 21 hours. This is because each file is downloaded one at a time. Can we do multiple downloads parallel to each other so I can speed up the process?
Currently, I am using the following code to download all files
### Get unique full-resolution image basenames
images = df['full_resolution_image_basename'].unique()
print(f'No. of unique full-resolution images: {len(images)}')
### Create a folder for full-resolution images
images_dir = './images/'
os.makedirs(images_dir, exist_ok=True)
### Download images
images_str = "','".join(images)
limiting_clause = f"CONTAINS(ARRAY['{images_str}'],
full_resolution_image_basename)"
_ = download_full_resolution_images(images_dir,
limiting_clause=limiting_clause)
See the code below. This will only work with python 3.6+, because of the f-string (PEP 498). Use a different method of string formatting for older versions of python.
Provide the relative_path, bucket_name and s3_object_keys. In addition, max_workers is optional, and if not provided the number will be a multiple of 5 times the number of machine processors.
Most of the code for this answer came from an answer to How to create an async generator in Python?
which sources from this example documented in the library.
import boto3
import os
from concurrent import futures
relative_path = './images'
bucket_name = 'bucket_name'
s3_object_keys = [] # List of S3 object keys
max_workers = 5
abs_path = os.path.abspath(relative_path)
s3 = boto3.client('s3')
def fetch(key):
file = f'{abs_path}/{key}'
os.makedirs(file, exist_ok=True)
with open(file, 'wb') as data:
s3.download_fileobj(bucket_name, key, data)
return file
def fetch_all(keys):
with futures.ThreadPoolExecutor(max_workers=5) as executor:
future_to_key = {executor.submit(fetch, key): key for key in keys}
print("All URLs submitted.")
for future in futures.as_completed(future_to_key):
key = future_to_key[future]
exception = future.exception()
if not exception:
yield key, future.result()
else:
yield key, exception
for key, result in fetch_all(S3_OBJECT_KEYS):
print(f'key: {key} result: {result}')
Thank you for this. Had 9000 over JPEG images that I needed to download from my S3. I tried to incorporate this directly into my Colab Pro but wasn't able to get it to work. Kept getting "Errno 21 : Is a directory" error.
Had to add 2 things: 1) a makedir to create the directory I want & 2) use mknod, instead of mkdir.
fetch_all is almost the same: except a small edit for max_workers to actually take effect. s3c is just my boto3.client with my keys and all.
My download time went from 30+ mins to 5 mins with 1000 workers.
os.makedirs('/*some dir you want*/*prefix*')
def fetch(key):
file = f'{abs_path}/{key}'
os.mknod(file, mode=384)
with open(file, 'wb') as data:
s3c.download_fileobj(bucket_name, key, data)
return file
def fetch_all(keys):
with futures.ThreadPoolExecutor(max_workers=max_workers) as executor:
future_to_key = {executor.submit(fetch, key): key for key in keys}
print("All URLs submitted.")
for future in futures.as_completed(future_to_key):
key = future_to_key[future]
exception = future.exception()
if not exception:
yield key, future.result()
else:
yield key, exception
You can try this out. This is fast
import boto3
from multiprocessing import Pool
bucket_name = 'BUCKET_NAME'
prefix = 'PREFIX'
local_dir = './downloads/' # PUT YOUR LOCAL DIR
max_process = 20 # CAN BE CHANGE
debug_en = True
# pass your credentials and region name
s3_client = boto3.client('s3',aws_access_key_id=' ',
aws_secret_access_key=' ', region_name=' ')
def downfiles(bucket_name, src_obj, dest_path):
try:
s3_client.download_file(bucket_name, src_obj, dest_path)
if debug_en:
print("[dubug] downloading object: %s to %s" %(src_obj, dest_path))
except:
pass
def download_dir(bucket_name, sub_prefix):
paginator = s3_client.get_paginator('list_objects_v2')
pages = paginator.paginate(Bucket=bucket_name, Prefix=sub_prefix)
pool = Pool(max_process)
print(pool)
mp_data = []
for page in pages:
if 'Contents' in page:
for obj in page['Contents']:
src_obj = obj['Key']
dest_path = local_dir + src_obj
mp_data.append((bucket_name, src_obj, dest_path))
os.path.dirname(dest_path) and os.makedirs(os.path.dirname(dest_path), exist_ok=True)
pool.starmap(downfiles, mp_data)
return len(mp_data)
if __name__ == '__main__':
print("starting script...")
start_time = datetime.now()
s3_dirs = prefix
total_files = 0
for s3_dir in s3_dirs:
print("[Information] %s directory is downloading" % s3_dir)
no_files = download_dir(bucket_name, s3_dir)
total_files = total_files + no_files
end_time = datetime.now()
print('Duration: {}'.format(end_time - start_time))
print('Total File numbers: %d' % total_files)
print("ended")

How to change host value for ansible while running using ansible-python module?

Here is my code, where I am passing value for host but it is executing command at localhost. I also tried passing hardcoded values and in output it's showing that it ran on that given host.
Here is the code :-
class defination and then,
def __init__(self):
self.variable_manager = VariableManager()
self.loader = DataLoader()
self.inventory = Inventory(loader=self.loader, variable_manager=self.variable_manager, host_list="host")
Options = namedtuple('Options', ['listtags', 'listtasks', 'listhosts', 'syntax', 'connection','module_path', 'forks', 'remote_user', 'private_key_file', 'ssh_common_args', 'ssh_extra_args', 'sftp_extra_args', 'scp_extra_args', 'become', 'become_method', 'become_user', 'verbosity', 'check'])
self.options = Options(listtags=False, listtasks=False, listhosts=True, syntax=False, connection='local', module_path=None, forks=100, remote_user='ubuntu', private_key_file="/tmp/xxx-key2.pem", ssh_common_args=None, ssh_extra_args=None, sftp_extra_args=None, scp_extra_args=None, become=False, become_method=None, become_user='root', verbosity=None, check=False)
def execute_playbook(self, playbook, host, scriptname=None, command=None,
path=None, username=None, password=None, key=None):
if not os.path.exists(playbook):
print '[INFO] The playbook does not exist'
sys.exit()
script_path = None
if scriptname is not None:
script_path = os.getcwd() + '/' + scriptname
if not os.path.exists(script_path):
print '[INFO] The script does not exist'
sys.exit()
self.variable_manager.extra_vars = {'scriptname': script_path,
'host': host, 'command': command, 'path': path} # This can accomodate various other command line arguments.`
passwords = {}
if password is not None:
self.loader.set_vault_password(password)
play_source = dict(
name = "Ansible Play",
hosts = host,
gather_facts = 'no',
tasks = [
dict(action=dict(module='shell', args='sudo mkdir /tmp/test-ansible'), register='shell_out'),
dict(action=dict(module='debug', args=dict(msg='{{shell_out.stdout}}')))
]
)
play = Play.load(play_source, self.variable_manager, self.loader)
tqm = TaskQueueManager(
inventory=self.inventory,
variable_manager=self.variable_manager,
loader=self.loader,
options=self.options,
passwords=passwords,
)
try:
result = tqm.run(play)
except Exception as e:
print e, "Exception in Ansible tqm.run()"
Output is :-
PLAY [Ansible Play] *************************************************************************************************************************
TASK [command] ******************************************************************************************************************************
[WARNING]: Consider using 'become', 'become_method', and 'become_user' rather than running sudo
changed: [110.110.112.139]
TASK [debug] ********************************************************************************************************************************
ok: [110.110.112.139] => {
"msg": ""
}
But it creates directory in my localhost not on "110.110.112.139".
You set connection='local' as option inside __init__.
This means to Ansible that no matter what is the target host, execute tasks on localhost.
Don't set it (leave default) or use ssh to execute tasks remotely.
def __init__(self):
self.variable_manager = VariableManager()
self.loader = DataLoader()
self.inventory = Inventory(loader=self.loader, variable_manager=self.variable_manager, host_list="host")
Options = namedtuple('Options', ['listtags', 'listtasks', 'listhosts', 'syntax', 'connection','module_path', 'forks', 'remote_user', 'private_key_file', 'ssh_common_args', 'ssh_extra_args', 'sftp_extra_args', 'scp_extra_args', 'become', 'become_method', 'become_user', 'verbosity', 'check'])
self.options = Options(listtags=False,
listtasks=False,
listhosts=True,
syntax=False,
**connection='ssh'**,
module_path=None,
forks=100, remote_user='ubuntu',
private_key_file="/tmp/xxx-key2.pem",
ssh_common_args=None,
ssh_extra_args=None,
sftp_extra_args=None,
scp_extra_args=None,
become=False,
become_method=None,
become_user='root', verbosity=None,
check=False
)

AWS SQS: Sending dynamic message using boto

I have a working python/boto script which posts a message to my AWS SQS queue. The message body however is hardcoded into the script.
I creates a file called ~/file which contains two values
$ cat ~/file
Username 'encrypted_password_string'
I would like my boto script (see below) to send a message to my AWS SQS queue that contains these two values.
Can anyone please advise how to modify my script below so the message body sent to SQS contains the contents of file ~/file. Please also take note of the special characters that exists within a encrypted password string
Example:
~/file
username d5MopV/EsfSKk8BExCyLHFwNfBrOTzQ1
#!/usr/bin/env python
conf = {
"sqs-access-key": "xxxx",
"sqs-secret-key": "xxxx",
"sqs-queue-name": "UserPassChange",
"sqs-region": "xxxx",
"sqs-path": "sqssend"
}
import boto.sqs
conn = boto.sqs.connect_to_region(
conf.get('sqs-region'),
aws_access_key_id = conf.get('sqs-access-key'),
aws_secret_access_key = conf.get('sqs-secret-key')
)
q = conn.create_queue(conf.get('sqs-queue-name'))
from boto.sqs.message import RawMessage
m = RawMessage()
m.set_body('hardcoded message')
retval = q.write(m)
print 'added message, got retval: %s' % retval
one way to get it working:
in the script I added
import commands
then added,
USERNAME = commands.getoutput("echo $(who am i | awk '{print $1}')")
PASS = commands.getoutput("cat /tmp/.s")
and then added these values to my message body :
MSG = RawMessage()
MSG.set_body(json.dumps({'pass': PASS, 'user': USERNAME}))
The following example shows how to use Boto3 to send a file to a receiver.
test_sqs.py
import boto3
from moto import mock_sqs
#mock_sqs
def test_sqs():
sqs = boto3.resource('sqs', 'us-east-1')
queue = sqs.create_queue(QueueName='votes')
queue.send_message(MessageBody=open('beer.txt').read())
messages = queue.receive_messages()
assert len(messages) == 1
assert messages[0].body == 'tasty\n'

Python Multiprocessing - Passing values between child process

I have a single method named grep_phalanx_log whose functionality is to SSH to a machine and grep for some values.
My main method will call this method with different host names/credentials, log file name, grep pattern.
So, I need to grep for a SAME pattern in two different servers in PARALLEL. If the match is found in one server, I want the other server to stop grep-ing. If the pattern is not found in both the servers for a specified time, my method grep_phalanx_log will return a negative value. Based on the negative value, I have to proceed with some other requirement.
class eventFlowTestNfx(object)
def grep_phalanx_log(self, host_name, username, password, grep_cmd, timeout=10, time_to_monitor=20):
ssh_client = paramiko.SSHClient()
ssh_client.set_missing_host_key_policy(paramiko.AutoAddPolicy())
Log.info("Grep command to be executed: %r" % grep_cmd)
try:
ssh_client.connect(host_name, username=username, password=password, timeout=timeout)
end_time = time.time() + time_to_monitor
while time.time() < end_time:
ssh_stdin, ssh_stdout, ssh_stderr = ssh_client.exec_command(grep_cmd)
output = ssh_stdout.read()
if not output:
time.sleep(1)
else:
Log.info("NFX: Match message from %r is %r" % (host_name, output))
return output
if not output:
Log.error("FAILED: Message not processed.")
Log.error("Host Name: %r and grep command: %r" % (host_name, grep_cmd))
raise Exception("NFX agent could not process message")
except:
Log.error("End to End flow is broken, check the logs!")
return -1
def main(self):
for cr_dict in correlation_list:
cr_process = multiprocessing.Process(target=self.grep_phalanx_log(), args=(cr_dict["host"], cr_dict["username"], cr_dict["password"], cr_received_cmd_skeleton,))
cr_process.start()
So, I have my code stared 2 process, I am not sure how they will talk to each other and terminate the other.
You could replace time.sleep(1) with:
if not output.strip(): # blank output
is_found = found.wait(1) # sleep >= 1 second unless found
if is_found:
break # stop grepping
else: # found something
found.set()
...
return output
where found = multiprocessing.Event(): creat it in the parent process and pass to each child.