Attempting to Obtain Taxonomic Information from Biopython - list

I am attempting to alter a previous script that utilizes biopython to fetch information about a species phylum. This script was written to retrieve information one species at a time. I would like to modify the script so that I can do this for 100 organisms at a time.
Here is the initial code
import sys
from Bio import Entrez
def get_tax_id(species):
"""to get data from ncbi taxomomy, we need to have the taxid. we can
get that by passing the species name to esearch, which will return
the tax id"""
species = species.replace(" ", "+").strip()
search = Entrez.esearch(term = species, db = "taxonomy", retmode = "xml")
record = Entrez.read(search)
return record['IdList'][0]
def get_tax_data(taxid):
"""once we have the taxid, we can fetch the record"""
search = Entrez.efetch(id = taxid, db = "taxonomy", retmode = "xml")
return Entrez.read(search)
Entrez.email = ""
if not Entrez.email:
print "you must add your email address"
sys.exit(2)
taxid = get_tax_id("Erodium carvifolium")
data = get_tax_data(taxid)
lineage = {d['Rank']:d['ScientificName'] for d in
data[0]['LineageEx'] if d['Rank'] in ['family', 'order']}
I have managed to modify the script so that it accepts a local file that contains one of the organisms I am using. But I need to extend this to a 100 organisms.
So the idea was to generate a list from the file of my organisms and somehow separately fed each item generated from the list into the line taxid = get_tax_id("Erodium carvifolium") and replace "Erodium carvifolium" with my organisms name. But I have no idea how to do that.
Here is the sample version of the code with some of my adjustments
import sys
from Bio import Entrez
def get_tax_id(species):
"""to get data from ncbi taxomomy, we need to have the taxid. we can
get that by passing the species name to esearch, which will return
the tax id"""
species = species.replace(' ', "+").strip()
search = Entrez.esearch(term = species, db = "taxonomy", retmode = "xml")
record = Entrez.read(search)
return record['IdList'][0]
def get_tax_data(taxid):
"""once we have the taxid, we can fetch the record"""
search = Entrez.efetch(id = taxid, db = "taxonomy", retmode = "xml")
return Entrez.read(search)
Entrez.email = ""
if not Entrez.email:
print "you must add your email address"
sys.exit(2)
list = ['Helicobacter pylori 26695', 'Thermotoga maritima MSB8', 'Deinococcus radiodurans R1', 'Treponema pallidum subsp. pallidum str. Nichols', 'Aquifex aeolicus VF5', 'Archaeoglobus fulgidus DSM 4304']
i = iter(list)
item = i.next()
for item in list:
???
taxid = get_tax_id(?)
data = get_tax_data(taxid)
lineage = {d['Rank']:d['ScientificName'] for d in
data[0]['LineageEx'] if d['Rank'] in ['phylum']}
print lineage, taxid
The question marks refer to places where I am stumped as what to do next. I don't see how I can connect my loop to replace the ? in get_tax_id(?). Or do I need to somehow append each of the items in the list so that they are modified each time to contain get_tax_id(Helicobacter pylori 26695) and then find some way to place them in the line containing taxid =

Here's what you need, place this below your function definitions, i.e. after the line that says: sys.exit(2)
species_list = ['Helicobacter pylori 26695', 'Thermotoga maritima MSB8', 'Deinococcus radiodurans R1', 'Treponema pallidum subsp. pallidum str. Nichols', 'Aquifex aeolicus VF5', 'Archaeoglobus fulgidus DSM 4304']
taxid_list = [] # Initiate the lists to store the data to be parsed in
data_list = []
lineage_list = []
print('parsing taxonomic data...') # message declaring the parser has begun
for species in species_list:
print ('\t'+species) # progress messages
taxid = get_tax_id(species) # Apply your functions
data = get_tax_data(taxid)
lineage = {d['Rank']:d['ScientificName'] for d in data[0]['LineageEx'] if d['Rank'] in ['phylum']}
taxid_list.append(taxid) # Append the data to lists already initiated
data_list.append(data)
lineage_list.append(lineage)
print('complete!')

Related

Practice assignment AWS Computer Vision : get_Cifar10_dataset

I have problem with this methode which should return both the training and the validation dataset and examine it to return the index that corresponds to the first occurrence of each class in CIFAR10.
this is code:
def get_cifar10_dataset(): """ Should create the cifar 10 network and identify the dataset index of the first time each new class
appears
:return: tuple of training and validation dataset as well as label indices
:rtype: (gluon.data.Dataset, 'dict_values' object is not subscriptable, gluon.data.Dataset,
dict[int:int])
"""
train_data = None
val_data = None
# YOUR CODE HERE
train_data = datasets.CIFAR10(train=True, root=M5_IMAGES)
val_data = datasets.CIFAR10(train=False, root=M5_IMAGES)
You are asked to return a dictionary with labels and the corresponding indexes. Using the following function can solve your problem.
def get_idx_dict(data):
lis = []
idx = []
indices = {}
for i in range(len(data)):
if data[i][1] not in lis:
lis.append(data[i][1])
idx.append(i)
indices = {lis[i]: idx[i] for i in range(len(lis))}
return indices
The function returns a dictionary with desired output. Use this function on data from train and validation set.
train_indices = get_idx_dict(train_data)
val_indices = get_idx_dict(val_data)
You can do it this
def get_cifar10_dataset():
"""
Should create the cifar 10 network and identify the dataset index of the first time each new class appears
:return: tuple of training and validation dataset as well as label indices
:rtype: (gluon.data.Dataset, dict[int:int], gluon.data.Dataset, dict[int:int])
"""
train_data = None
val_data = None
train_indices = {}
val_indices = {}
# Use `root=M5_IMAGES` for your dataset
train_data = gluon.data.vision.datasets.CIFAR10(train=True, root=M5_IMAGES)
val_data = gluon.data.vision.datasets.CIFAR10(train=False, root=M5_IMAGES)
#for train
for i in range(len(train_data)):
if train_data[i][1] not in train_indices:
train_indices[train_data[i][1]] = i
#for valid
for i in range(len(val_data)):
if val_data[i][1] not in val_indices:
val_indices[val_data[i][1]] = i
#raise NotImplementedError()
return train_data, train_indices, val_data, val_indices

Looking for a best way to insert a records from one model to another based on selection in odoo

I did the code for insert records from so_parts table to so_bo table using Query...How can I use ORM method to do this kind of job. Is there any other way(best)to do that? Here is my code`
`
#api.multi
def save_rapair_parts(self, vals):
#get todays date and convert it to string
created_date = datetime.datetime.today().strftime("%m/%d/%Y")
str_date = str(created_date)
so_p_id = self.so_p_id.id
bo_status = self.bo_status
so_part_t = self.so_part_t
so_part_sno = self.so_part_sno
product = self.so_part_product
product_str = 'Repair '+str(product)
part_id = self.id
bench_order_table.search(['id','bo_sno','created_date','bo_number','rep_description','bo_status'])
#insert details intoso bench orders
`
if so_part_t=='r_b':
try:
sequence = self.env['ir.sequence'].next_by_code('so.benchorder') or '/'
str_sequence = str(sequence)
query = """SELECT so_work_authorization FROM my_depots_so WHERE id=%d """ % (so_p_id)
self.env.cr.execute(query)
result = self.env.cr.fetchall()
result_number = json.dumps(result, ensure_ascii=False)
strip_number = result_number.strip('\' \" [] ')
work_auth_no = str(strip_number)
work_auth_no += "-"
work_auth_no += str_sequence
insert ="""INSERT INTO my_depots_so_bo(id,so_bo_id,bo_sno,created_date,bo_number,rep_description,bo_status) values %s """
parameters = (part_id,so_p_id,so_part_sno,str_date,work_auth_no,product_str,bo_status)
self.env.cr.execute(insert,(parameters,))
my_depots_bo(id,bo_sno,created_date,bo_number,rep_description,bo_status) values %s """
# self.env.cr.execute(insert_query, (parameters,))
except Exception:
print "Error in inserting values"`
yes there is a better way because when you use ORM
method you also checks access right for user to:
for your select query:
rec = self.env['my.depots.so'].search_read(['id', '=', so_p_id], ['so_work_authorization'])
if rec:
rec = rec[0] # search_read return a list of dictionary
so_work_authorization = rec['so_work_authorization']
# and do what ever you want with the result
# to create
# call create method witch accept a dictionary
# field_name : value
new_rec = self.env['my.depots.so.bo'].create({
'so_bo_id': so_p_id, # many2one must be an integer value
'bo_sno': bo_nso_value,
'bo_number': value_of_number,
# ....
# ....
# add al field
}) # create return the new created record as model object
for inserting use: self.env['model.name'].create(vals)
for updating use : self.env['model.name'].write(vals)
using ORM method makes sure that user don't pass the security access rigths
Hope you get the idea

How to iterate over multiple lists with different values but same variables while placing all values in one Pandas DataFrame

Sorry for the long title, didn't know how to ask it:
I am working with ExactTarget Salesforce Marketing API, trying to iterate over multiple dictionary objects from the API call but some of them are nested and have the same name as the other API responses and I am getting confused on how to iterate over the same named variables into a dataframe.
This is the output of the API Call:
(ClickEvent){
Client =
(ClientID){
ID = 11111111
}
PartnerKey = None
CreatedDate = 2016-07-12 00:40:17
ModifiedDate = 2016-07-12 00:40:17
ID = 11111111
ObjectID = "11111111"
SendID = 11111111
SubscriberKey = "azfull#usa.net"
EventDate = 2016-07-12 00:40:17
EventType = "Click"
TriggeredSendDefinitionObjectID = None
BatchID = 1
URLID = 11111111
URL = aaa.com
I want to create a separate dataframe column for the "ID" under "ClientID" but I am running into the trouble of another variable already being named "ID". How can I iterate over "ClientID" and get the ID value plus also get the other values and place them in the dataframe?
My code has been able to place the data in the dataframe but I am not getting the particular Client ID. this is what output looks like now:
BatchID ClientID CreatedDate \
0 1 (ClientID){\n ID = 10914162\n } 2016-02-23 13:08:59
1 1 (ClientID){\n ID = 10914162\n } 2016-02-23 13:11:49
As you can see only want the ID number not the other garbage under "ClientID"
Code:
import ET_Client
import pandas as pd
try:
debug = False
stubObj = ET_Client.ET_Client(False, debug)
## Modify the date below to reduce the number of results returned from the request
## Setting this too far in the past could result in a very large response size
retrieveDate = '2016-07-11T13:00:00.000'
#ET call for clicks
print '>>>ClickEvents'
getClickEvent = ET_Client.ET_ClickEvent()
getClickEvent.auth_stub = stubObj
getResponse = getClickEvent.get()
ResponseResults = getResponse.results
#print ResponseResults
Client = []
partner_keys = []
created_dates = []
modified_date = []
ID = []
ObjectID = []
SendID = []
SubscriberKey = []
EventDate = []
EventType = []
TriggeredSendDefinitionObjectID = []
BatchID = []
URLID = []
URL = []
for ClickEvent in ResponseResults:
Client.append(str(ClickEvent['Client']))
partner_keys.append(ClickEvent['PartnerKey'])
created_dates.append(ClickEvent['CreatedDate'])
modified_date.append(ClickEvent['ModifiedDate'])
ID.append(ClickEvent['ID'])
ObjectID.append(ClickEvent['ObjectID'])
SendID.append(ClickEvent['SendID'])
SubscriberKey.append(ClickEvent['SubscriberKey'])
EventDate.append(ClickEvent['EventDate'])
EventType.append(ClickEvent['EventType'])
TriggeredSendDefinitionObjectID.append('TriggeredSendDefinitionObjectID')
BatchID.append(ClickEvent['BatchID'])
URLID.append(ClickEvent['URLID'])
URL.append(ClickEvent['URL'])
df = pd.DataFrame({'ClientID': Client, 'PartnerKey': partner_keys,
'CreatedDate' : created_dates, 'ModifiedDate': modified_date,
'ID':ID, 'ObjectID': ObjectID,'SendID':SendID,'SubscriberKey':SubscriberKey,
'EventDate':EventDate,'EventType':EventType,'TriggeredSendDefinitionObjectID':TriggeredSendDefinitionObjectID,
'BatchID':BatchID,'URLID':URLID,'URL':URL})
print df
I have been trying this solution but not working:
for ClickEvent in ResponseResults():
if 'ClientID' in ClickEvent:
ID.append(ClickEvent['Client']:
print Client
Thank you in advance.
-EDIT-
The output of the API call above is exactly how the systems outputs it, how should I make it an actual JSON response?
Data frame I want to look like this:
BatchID ClientID CreatedDate \
0 1 111111111 2016-02-23 13:08:59
1 1 111111111 2016-02-23 13:11:49
Just dont want other stuff in the "ClientID" portion of the data I submitted above. Hope this helps.
Instead of appending the entire Client object to your list :
Client.append(str(ClickEvent['Client']))
Have you tried storing just the ID field of the object? Maybe something like:
Client.append(str(ClickEvent['Client']['ID']))

cleaning up my SQLAlchemy operations (reducing repetition)

I have some server side processing of some data (client-side library = jQuery DataTables)
I am using POST as my ajax method. In my Flask webapp, I can access the POST data with request.values
The data type / structure of request.values is werkzeug.datastructures.CombinedMultiDict
If the user wants to sort a column, the request contains a key called action with a value of filter (note the below printouts are obtained with for v in request.values: print v, request.values[v])
...
columns[7][data] role
columns[8][search][regex] false
action filter
columns[10][name]
columns[3][search][value]
...
all the column names are also contained in the request as keys. The columns that have search terms will have the search string as a value for the column name key (as opposed to empty for columns with no search term entered. So, If I want to search for firstname containing bill, I would see the following in my request
columns[7][searchable] true
...
columns[6][name]
firstname bill
columns[0][search][value]
columns[2][searchable] true
...
columns[5][data] phone
role
columns[10][data] registered_on
...
columns[0][searchable] true
email
columns[7][orderable] true
...
columns[2][search][value]
Notice how role and email are empty. So my code below is very non-DRY
rv = request.values
if rv.get('action') == 'filter':
if len(rv.get('firstname')):
q = q.filter(User.firstname.ilike('%{0}%'.format(rv.get('firstname'))))
if len(rv.get('lastname')):
q = q.filter(User.lastname.ilike('%{0}%'.format(rv.get('lastname'))))
if len(rv.get('username')):
q = q.filter(User.username.ilike('%{0}%'.format(rv.get('username'))))
if len(rv.get('email')):
q = q.filter(User.email.ilike('%{0}%'.format(rv.get('email'))))
if len(rv.get('phone')):
q = q.filter(User.phone.ilike('%{0}%'.format(rv.get('phone'))))
if len(rv.get('region')):
q = q.filter(User.region.name.ilike('%{0}%'.format(rv.get('region'))))
if len(rv.get('role')):
q = q.filter(User.role.name.ilike('%{0}%'.format(rv.get('role'))))
if len(rv.get('is_active')):
q = q.filter(User.is_active_ == '{0}'.format(rv.get('is_active')))
if len(rv.get('is_confirmed')):
q = q.filter(User.is_confirmed == '{0}'.format(rv.get('is_confirmed')))
if len(rv.get('registered_on_from')):
fdate = datetime.strptime(rv.get('registered_on_from'), '%Y-%m-%d')
q = q.filter(User.registered_on > fdate)
if len(rv.get('registered_on_to')):
tdate = datetime.strptime(rv.get('registered_on_to'), '%Y-%m-%d')
q = q.filter(User.registered_on < tdate)
I was building the sorting functionality, and I found the following statement that greatly simplified my life (see this answer)
q = q.order_by('{name} {dir}'.format(name=sort_col_name, dir=sort_dir))
I was wondering if there was a way to simplify this set of filtering queries like the above sorting code since I will have to do this for many other models.
This should help:
from sqlalchemy import inspect
from sqlalchemy.sql.sqltypes import String,Boolean
def filter_model_by_request(qry,model,rv):
if rv.get('action') == 'filter':
mapper = inspect(model).attrs # model mapper
col_names = list(set([c.key for c in mapper]) & set(rv.keys()))
# col_names is a list generated by intersecting the request values and model column names
for col_name in col_names:
col = mapper[col_name].columns[0]
col_type = type(col.type)
if col_type == String: # filter for String
qry = qry.filter(col.ilike('%{0}%'.format(rv.get(col_name))))
elif col_type == Boolean: # filter for Boolean
qry = qry.filter(col == '{0}'.format(rv.get(col_name)))
return qry
Example call (I used it with a #app.before_request and a cURL call to verify):
qry = db.session.query(User)
print filter_model_by_request(qry,User,request.values).count()
The date range filtering is not included in the function, add this feature if you wish, your code is fine for that purpose.
side note: be careful with the bigger/smaller operators for the dates. You're excluding the actual requested dates. Use <= or >= to include dates in filtering action. It's always a pitfall for me..

How to convert a python print function on html

I am trying to use web2py to build an app. I have a simple print function that a user submits a key word . The string or int key word is directed to an sqlite db to retrieve a row and output the data. I need to know
1. how to use the print on html.
2. How to split the string...so far i did the list:string
Here is my code:
def first():
form = SQLFORM.factory(Field('visitor_name',
label = 'Please Type Your keyword here!',
requires= [IS_NOT_EMPTY(), IS_LOWER(),'list:string']))
form.element('input[type=submit]')['_onclick'] = "return \
confirm('Are you sure you want to submit:');"
if form.process().accepted:
session.visitor_name = form.vars.visitor_name
redirect(URL('main'))
return dict(form=form)
def main():
while True:
name = request.vars.visitor_name or redirect(URL('first'))
name2 = name[:]
for item in name2:break
name3 = ' '.join(name2)
import sqlite3
id = 0
location = ""
conn = sqlite3.connect("keywords.db")
c = conn.cursor()
c.execute('select * from kmedicals')
records = c.fetchall()
for record in records:
id = record[0]
location = record[15]
if id == name3:
print name3.capitalize(),':' '\n',location
break
sys.exit()
return dict(name=name)
my view...default/main.html:
{{extend 'layout.html'}}
{{=name}}