Odoo 10 search active and inactive records using search() method - python-2.7

I have many2many field location_from_ids and trying to find all the childs of location_ids.
location_from_ids = fields.Many2many(comodel_name='stock.location',relation='report_stock_config_location_from_rel',column1='report_id',column2='location_id',string='Locations From', context={'active_test': False})
I am using search() method to get all the childs of location_ids:
def _get_filter(self, report):
res = ''
if report.location_from_ids:
location_ids = [l.id for l in report.location_from_ids]
locations = self.env['stock.location'].search([('id', 'child_of', location_ids), ('active', 'in', ('t', 'f'))])
I need to get all the locations (active and inactive) but getting only active records. How can I achieve to get all the records: active and inactive?

Just "deactivate" the active test on searches:
locations = self.env['stock.location'].with_context(
active_test=False).search(
[('id', 'child_of', location_ids)])

Como complemento a la respuesta, es bueno revisar las operaciones que soporta los records en odoo
https://odoo-new-api-guide-line.readthedocs.io/en/latest/environment.html#environment
Supported Operations
RecordSet also support set operations you can add, union and intersect, ... recordset:
record in recset1 # include
record not in recset1 # not include
recset1 + recset2 # extend
recset1 | recset2 # union
recset1 & recset2 # intersect
recset1 - recset2 # difference
recset.copy() # to copy recordset (not a deep copy)

Related

PyFlink on Kinesis Analytics Studio - Cannot convert DataStream to Amazon Kinesis Data Stream

I have a DataStream <pyflink.datastream.data_stream.DataStream> coming from a CoFlatMapFunction (simplified here):
%flink.pyflink
# join two streams and update the rule-set
class MyCoFlatMapFunction(CoFlatMapFunction):
def open(self, runtime_context: RuntimeContext):
state_desc = MapStateDescriptor('map', Types.STRING(), Types.BOOLEAN())
self.state = runtime_context.get_map_state(state_desc)
def bool_from_user_number(self, user_number: int):
'''Retunrs True if user_number is greater than 0, False otherwise.'''
if user_number > 0:
return True
else:
return False
def flat_map1(self, value):
'''This method is called for each element in the first of the connected streams'''
self.state.put(value[1], self.bool_from_user_number(value[2]))
def flat_map2(self, value):
'''This method is called for each element in the second of the connected streams (exchange_server_tickers_data_py)'''
current_dateTime = datetime.now()
dt = current_dateTime
x = value[1]
y = value[2]
yield Row(dt, x, y)
def generate__ds(st_env):
# interpret the updating Tables as DataStreams
type_info1 = Types.ROW([Types.SQL_TIMESTAMP(), Types.STRING(), Types.INT()])
ds1 = st_env.to_append_stream(table_1 , type_info=type_info1)
type_info2 = Types.ROW([Types.SQL_TIMESTAMP(), Types.STRING(), Types.STRING()])
ds2 = st_env.to_append_stream(table_2 , type_info=type_info2)
output_type_info = Types.ROW([ Types.PICKLED_BYTE_ARRAY() ,Types.STRING(),Types.STRING() ])
# Connect the two streams
connected_ds = ds1.connect(ds2)
# Apply the CoFlatMapFunction
ds = connected_ds.key_by(lambda a: a[0], lambda a: a[0]).flat_map(MyCoFlatMapFunction(), output_type_info)
return ds
ds = generate__ds(st_env)
The output, however, I am unable to view, either via registering it as a view / table, writing to a sink table or (the best case) using a Kinesis Streams sink to write data from the Flink stream into a Kinesis stream. Firehouse would also not fit my use case as the 30 second latency would be too long. Any help would be appreciated, thanks!
What I have tried:
Registering it as a view / table like so:
# interpret the DataStream as a Table
input_table = st_env.from_data_stream(ds).alias("dt", "x", "y")
z.show(input_table, stream_type="update")
Which gives an error of:
Query schema: [dt: RAW('[B', '...'), x: STRING, y: STRING]
Sink schema: [dt: RAW('[B', ?), x: STRING, y: STRING]
I have also tried writing to a sink table, like so:
%flink.pyflink
# create a sink table to emit results
st_env.execute_sql("""DROP TABLE IF EXISTS table_sink""")
st_env.execute_sql("""
CREATE TABLE table_sink (
dt RAW('[B', '...'),
x VARCHAR(32),
y STRING
) WITH (
'connector' = 'print'
)
""")
# convert the Table API table to a SQL view
table = st_env.from_data_stream(ds).alias("dt", "spread", "spread_orderbook")
st_env.execute_sql("""DROP TEMPORARY VIEW IF EXISTS table_api_table""")
st_env.create_temporary_view('table_api_table', table)
# emit the Table API table
st_env.execute_sql("INSERT INTO table_sink SELECT * FROM table_api_table").wait()
I get the error:
org.apache.flink.table.api.ValidationException: Unable to restore the RAW type of class '[B' with serializer snapshot '...'.
I have also tried to use a sink and add_sink to write the data to a sink, which would be an AWS kinesis data stream like in these Docs, like so:
%flink.pyflink
from pyflink.common.serialization import JsonRowSerializationSchema
from pyflink.datastream.connectors import KinesisStreamsSink
output_type_info = Types.ROW([Types.SQL_TIMESTAMP(), Types.STRING(), Types.STRING()])
serialization_schema = JsonRowSerializationSchema.Builder().with_type_info(output_type_info).build()
# Required
sink_properties = {
'aws.region': 'eu-west-2'
}
kds_sink = KinesisStreamsSink.builder()
.set_kinesis_client_properties(sink_properties)
.set_serialization_schema(SimpleStringSchema())
.set_partition_key_generator(PartitionKeyGenerator
.fixed())
.set_stream_name("test_stream")
.set_fail_on_error(False)
.set_max_batch_size(500)
.set_max_in_flight_requests(50)
.set_max_buffered_requests(10000)
.set_max_batch_size_in_bytes(5 * 1024 * 1024)
.set_max_time_in_buffer_ms(5000)
.set_max_record_size_in_bytes(1 * 1024 * 1024)
.build()
ds.sink_to(kds_sink)
Which i assume would work, but KinesisStreamsSink is not found in pyflink.datastream.connectors and I am unable to find any documentation on how to do this within AWS Kinesis Analytics Studio. Any help would be much much appreciated, thank you! How would I go about writing the data to a Kinesis Streams sink / converting it to a table?
Okay, i have figured it out. There were a couple issues with the particular Pyflink version available on AWS Kinesis Analytics Studio (1.13). The error messages themselves were not that useful, so for anyone who is having issues themselves I would really recommend viewing the errors in the Flink Web UI. Firstly, the MapStateDescriptor datatypes must be specified using Types.PICKLED_BYTE_ARRAY(). Secondly, not shown in the Qn, but each MapStateDescriptor must have a distinct name. I also found that using Row from pyflink.common threw errors for me. It worked better for me to switch to using use Tuples by specifying Types.TUPLE() as is done in this example. I also had to switch to specifying the output as a tuple.
Another thing I have not done is specify a watermark strategy for the DataStream, which could potentially be done by extracting the timestamp from the first field, and assign watermarks based on knowledge of the stream:
class MyTimestampAssigner(TimestampAssigner):
def extract_timestamp(self, value, record_timestamp: int) -> int:
return int(value[0])
watermark_strategy = WatermarkStrategy.for_bounded_out_of_orderness(Duration.of_seconds(5)).with_timestamp_assigner(MyTimestampAssigner())
ds = ds.assign_timestamps_and_watermarks(watermark_strategy)
# the first field has been used for timestamp extraction, and is no longer necessary
# replace first field with a logical event time attribute
table = st_env.from_data_stream(ds, col("dt").rowtime, col('f0'), col('f1'))
But i have instead created a sink table for writing to a Kinesis Data Stream again as an output. In total, the corrected code would look something like this:
from pyflink.table.expressions import col
from pyflink.datastream.state import MapStateDescriptor
from pyflink.datastream.functions import RuntimeContext, CoFlatMapFunction
from pyflink.common.typeinfo import Types
from pyflink.common import Duration as Time, WatermarkStrategy, Duration
from pyflink.common.typeinfo import Types
from pyflink.common.watermark_strategy import TimestampAssigner
from pyflink.datastream import StreamExecutionEnvironment
from pyflink.datastream.functions import KeyedProcessFunction, RuntimeContext
from pyflink.datastream.state import ValueStateDescriptor
from datetime import datetime
# Register the tables in the env
table1 = st_env.from_path("sql_table_1")
table2 = st_env.from_path("sql_table_2")
# interpret the updating Tables as DataStreams
type_info1 = Types.TUPLE([Types.SQL_TIMESTAMP(), Types.STRING(), Types.INT()])
ds1 = st_env.to_append_stream(table2, type_info=type_info1)
type_info2 = Types.TUPLE([Types.SQL_TIMESTAMP(), Types.STRING(), Types.STRING()])
ds2 = st_env.to_append_stream(table1, type_info=type_info2)
# join two streams and update the rule-set state
class MyCoFlatMapFunction(CoFlatMapFunction):
def open(self, runtime_context: RuntimeContext):
'''This method is called when the function is opened in the runtime. It is the initialization purposes.'''
# Map state that we use to maintain the filtering and rules
state_desc = MapStateDescriptor('map', Types.PICKLED_BYTE_ARRAY(), Types.PICKLED_BYTE_ARRAY())
self.state = runtime_context.get_map_state(state_desc)
# maintain state 2
ob_state_desc = MapStateDescriptor('map_OB', Types.PICKLED_BYTE_ARRAY(), Types.PICKLED_BYTE_ARRAY())
self.ob_state = runtime_context.get_map_state(ob_state_desc)
# called on ds1
def flat_map1(self, value):
'''This method is called for each element in the first of the connected streams '''
list_res = value[1].split('|')
for i in list_res:
time = datetime.utcnow().replace(microsecond=0)
yield (time, f"{i}_one")
# called on ds2
def flat_map2(self, value):
'''This method is called for each element in the second of the connected streams'''
list_res = value[1].split('|')
for i in list_res:
time = datetime.utcnow().replace(microsecond=0)
yield (time, f"{i}_two")
connectedStreams = ds1.connect(ds2)
output_type_info = Types.TUPLE([Types.SQL_TIMESTAMP(), Types.STRING()])
ds = connectedStreams.key_by(lambda value: value[1], lambda value: value[1]).flat_map(MyCoFlatMapFunction(), output_type=output_type_info)
name = 'output_table'
ds_table_name = 'temporary_table_dump'
st_env.execute_sql(f"""DROP TABLE IF EXISTS {name}""")
def create_table(table_name, stream_name, region, stream_initpos):
return """ CREATE TABLE {0} (
f0 TIMESTAMP(3),
f1 STRING,
WATERMARK FOR f0 AS f0 - INTERVAL '5' SECOND
)
WITH (
'connector' = 'kinesis',
'stream' = '{1}',
'aws.region' = '{2}',
'scan.stream.initpos' = '{3}',
'sink.partitioner-field-delimiter' = ';',
'sink.producer.collection-max-count' = '100',
'format' = 'json',
'json.timestamp-format.standard' = 'ISO-8601'
) """.format(
table_name, stream_name, region, stream_initpos
)
# Creates a sink table writing to a Kinesis Data Stream
st_env.execute_sql(create_table(name, 'output-test', 'eu-west-2', 'LATEST'))
table = st_env.from_data_stream(ds)
st_env.execute_sql(f"""DROP TEMPORARY VIEW IF EXISTS {ds_table_name}""")
st_env.create_temporary_view(ds_table_name, table)
# emit the Table API table
st_env.execute_sql(f"INSERT INTO {name} SELECT * FROM {ds_table_name}").wait()

How to make fast the import of an excel file containing more than 5000 lines into sqlite database with django

Import xls file (more than 5000 lines) into my sqlite database takes so long.
def importeradsl(request):
if "GET" == request.method:
else:
excel_file = request.FILES["excel_file"]
#you may put validations here to check extension or file size
wb = openpyxl.load_workbook(excel_file)
#getting a particular sheet by name out of many sheets
worksheet = wb["Sheet 1"]
#iterating over the rows and getting value from each cell in row
for row in worksheet.iter_rows(min_row=2):
row_data = list()
for cell in row:
row_data.append(str(cell.value))
#Get content fields DerangementCuivre models
#Client
nd = row_data[0]
nom_client = row_data[3]
nd_contact = row_data[4]
#Categorie
code_categorie = row_data[6]
acces_reseau = row_data[8]
etat = row_data[9]
origine = row_data[10]
code_sig = row_data[11]
agent_sig = row_data[13]
date_sig = dt.datetime.strftime(parse(row_data[14]), '%Y-%m-%d %H:%M:%S')
date_essai = dt.datetime.strftime(parse(row_data[15]), '%Y-%m-%d %H:%M:%S')
agent_essai = row_data[18]
try:
date_ori = dt.datetime.strptime(row_data[19], '%Y-%m-%d %H:%M:%S')
except ValueError as e:
print ("Vous", e)
else:
date_ori = dt.datetime.strftime(parse(row_data[19]), '%Y-%m-%d %H:%M:%S')
agent_ori = row_data[20]
code_ui = row_data[21]
equipe = row_data[22]
sous_traitant = row_data[23]
date_pla = dt.datetime.strftime(parse(row_data[24]), '%Y-%m-%d %H:%M:%S')
date_rel = dt.datetime.strftime(parse(row_data[25]), '%Y-%m-%d %H:%M:%S')
date_releve = dt.datetime.strptime(row_data[25], '%Y-%m-%d %H:%M:%S')
date_essais = dt.datetime.strptime(row_data[15], '%Y-%m-%d %H:%M:%S')
pst = pytz.timezone('Africa/Dakar')
date_releve = pst.localize(date_releve)
utc = pytz.UTC
date_releve = date_releve.astimezone(utc)
date_essais = pst.localize(date_essais)
date_essais = date_essais.astimezone(utc)
code_rel = row_data[26]
localisation = row_data[27]
cause = row_data[28]
commentaire = row_data[29]
agent_releve = row_data[30]
centre_racc = row_data[32]
rep = row_data[33]
srp = row_data[34]
delai = (date_releve - date_essais).total_seconds()
dali = divmod(delai, 86400)[0]
semaine = date_releve.isocalendar()[1]
mois = date_releve.month
annee = date_releve.year
if dali > 7:
etats = "PEX PLUS"
else:
etats = "PEX"
#Enregistrer un client
Client(nd=nd, nom=nom_client, mobile=nd_contact).save()
#Enregistrer la categorie
#Code pour nom categorie - renseigner plus tard
Categorie(code_categorie=code_categorie, nom="Public").save()
#Enregistrer agent de signalisation
AgentSig(matricule=agent_sig, nom="Awa").save()
#Enregistrer agent d'essai
AgentEssai(matricule=agent_essai).save()
#Enregister agent d'orientation
AgentOri(matricule=agent_ori).save()
#Enregistrer agent de relève
AgentRel(matricule=agent_releve).save()
#Enregistrer le sous-traitant
SousTraitant(nom=sous_traitant).save()
#Enregistrer le centre
Centre(code=centre_racc).save()
#Enregistrer ui
UniteIntervention(code_ui=code_ui,
sous_traitant=SousTraitant.objects.get(nom=sous_traitant)).save()
#Enregistrer le repartiteur
Repartiteur(code=rep, crac=Centre.objects.get(code=centre_racc)).save()
#Enregistrer team
Equipe(nom=equipe, unite=UniteIntervention.objects.get(code_ui=code_ui)).save()
#Enregistrer le SR
SousRepartiteur(code=srp, rep=Repartiteur.objects.get(code=rep)).save()
#Enregistrer le drangement
DerangementAdsl(acces_reseau=acces_reseau,
nd_client=Client.objects.get(nd=nd),
categorie=Categorie(code_categorie=code_categorie),
etat=etat,
origine=origine,
code_sig=code_sig,
agent_sig=AgentSig.objects.get(matricule=agent_sig),
date_sig=date_sig,
date_essai=date_essai,
agent_essai=AgentEssai.objects.get(matricule=agent_essai),
date_ori=date_ori,
agent_ori=AgentOri.objects.get(matricule=agent_ori),
sous_traitant=SousTraitant.objects.get(nom=sous_traitant),
unite_int = UniteIntervention.objects.get(code_ui=code_ui),
date_pla=date_pla,
date_rel=date_rel,
code_rel=code_rel,
code_local=localisation,
cause=cause,
comment_cause=commentaire,
agent_rel=AgentRel.objects.get(matricule=agent_releve),
centre=Centre.objects.get(code=centre_racc),
rep=Repartiteur.objects.get(code=rep),
srep=SousRepartiteur.objects.get(code=srp),
delai=dali,
etat_vr=etats,
semaine=semaine,
mois=mois,
annee=annee).save()
There are few things that are incorrect.
I propose to you the following approach:
Make your code more readable
Remove useless queries
Avoid related records duplication
Cache out your related instances.
Use bulk_create
Looking at your code, with a rough estimation, per csv record, you will get over 30 SQL queries per row, that's a bit much...
1. Make you code more readable.
Your parsing logic can be DRYed, a lot.
First, identify what you do with your data.
From my point of view, 2 main functions:
Do nothing:
def no_transformation(value)
return str(value)
Parse dates
def strptime(value):
"""
I can't really tell what your 'parse' function does, I let it be but it might
be interesting adding your logic in here
"""
return dt.datetime.strptime(parse(str(value)), '%Y-%m-%d %H:%M:%S')
Now, you can declare your parser configuration:
PARSER_CONFIG=(
#(column_index, variable_name, transformation_function)
(0,'nd',no_transformation),
(10,'origine',no_transformation),
(11,'code_sig',no_transformation),
(13,'agent_sig',no_transformation),
(14,'date_sig',strptime),
(15,'date_essai',strptime),
(18,'agent_essai',no_transformation),
(19,'date_ori',strptime),
(20,'agent_ori',no_transformation),
(21,'code_ui',no_transformation),
(22,'equipe',no_transformation),
(23,'sous_traitant',no_transformation),
(24,'date_pla',strptime),
(25,'date_rel',strptime),
(26,'code_rel',no_transformation),
(27,'localisation',no_transformation),
(28,'cause',no_transformation),
(29,'commentaire',no_transformation),
(3,'nom_client',no_transformation),
(30,'agent_releve',no_transformation),
(32,'centre_racc',no_transformation),
(33,'rep',no_transformation),
(34,'srp',no_transformation),
(4,'nd_contact',no_transformation),
(6,'code_categorie',no_transformation),
(8,'acces_reseau',no_transformation),
(9,'etat',no_transformation),
(15',date_essais',strptime),
(19',date_ori',strptime),
(25',date_releve',strptime),
)
Now, you know how to parse your data, and how to name it.
Let just put that stuff into a dict.
def parse(row):
"""Transform a row into a dict
Args:
row (tuple): Your row's data
Returns:
dict: Your parsed data, named into a dict.
"""
return {
key:tranfsorm(row[index]) for index, key, transform in PARSER_CONFIG
}
From here, your parser is way more readable, you know exactly what you're doing with your data.
Wrapping this up all together, you should get:
PARSER_CONFIG=(
#(column_index, variable_name, transformation_function)
#...
)
def no_transformation(value)
return str(value)
def strptime(value)
return str(value)
def parse(row):
"""Transform a row into a dict
Args:
row (tuple): Your row's data
Returns:
dict: Your parsed data, named into a dict.
"""
return {
key:tranfsorm(row[index]) for index, key, transform in PARSER_CONFIG
}
for row in rows:
item = parse(row) #< Your data, without related instances yet....
Still have some work to create your related instances, but we'll get there eventually.
2. Removing useless queries.
You do :
#...First, your create a record
Client(nd=nd, nom=nom_client, mobile=nd_contact).save()
#... Then you fetch it when saving DerangementAdsl
nd_client=Client.objects.get(nd=nd)
While a more pythonic way of doing this would be:
#... You create and assign your istance.
client = Client(nd=item.get('nd'),
nom=item.get('nom_client'),
mobile=item.get('nd_contact')).save()
#...
nd_client=client
You just earned one SQL query/row! Doing the same logic for each models, and you'll earn around 20 queries per row!
categorie=Categorie.objects.create(code_categorie=item.get('code_categorie'), nom="Public"),
#Enregistrer agent de signalisation
agent_sig=AgentSig.objects.create(matricule=item.get('agent_sig'), nom="Awa"),
#Enregistrer agent d'essai
agent_essai=AgentEssai.objects.create(matricule=item.get('agent_essai')),
#Enregister agent d'orientation
agent_ori=AgentOri.objects.create(matricule=item.get('agent_ori')),
#Enregistrer agent de relève
agent_rel=AgentRel.objects.create(matricule=item.get('agent_releve')),
#Enregistrer le sous-traitant
sous_traitant=SousTraitant.objects.create(nom=item.get('sous_traitant')),
#Enregistrer le centre
centre=Centre.objects.create(code=item.get('centre_racc')),
#Enregistrer ui
unite_int=UniteIntervention.objects.create(code_ui=item.get('code_ui'), sous_traitant=sous_traitant), # < You earn one extrat query with sous_traitant
#Enregistrer le repartiteur
rep=Repartiteur.objects.create(code=item.get('rep'), crac=centre), # < You earn one extrat query with centre
#Enregistrer team
equipe=Equipe.objects.create(nom=item.get('equipe')), unite=unite_int),# < You earn one extrat query with unite_int
#Enregistrer le SR
srep=SousRepartiteur.objects.create(code=item.get('srp'), rep=rep),# < You earn one extrat query with rep
3. Avoid related records duplication
Now there is one big issue:
Considering you have multiple rows for each client,
you'll eventually find yourself with many duplicates, and you do not want that.
Instead of using create, you should go with get_or_create.
Please note it returns a tuple: (instance, created)
So.... your code should go like:
categorie, categorie_created=Categorie.objects.get_or_create(code_categorie=item.get('code_categorie'), nom="Public"),
agent_sig, agent_sig_created=AgentSig.objects.get_or_create(matricule=item.get('agent_sig'), nom="Awa"),
agent_essai, agent_essai_created=AgentEssai.objects.get_or_create(matricule=item.get('agent_essai')),
agent_ori, agent_ori_created=AgentOri.objects.get_or_create(matricule=item.get('agent_ori')),
agent_rel, agent_rel_created=AgentRel.objects.get_or_create(matricule=item.get('agent_releve')),
sous_traitant, sous_traitant_created=SousTraitant.objects.get_or_create(nom=item.get('sous_traitant')),
centre, centre_created=Centre.objects.get_or_create(code=item.get('centre_racc')),
unite_int, unite_int_created=UniteIntervention.objects.get_or_create(code_ui=item.get('code_ui'), sous_traitant=sous_traitant)
rep, rep_created=Repartiteur.objects.get_or_create(code=item.get('rep'), crac=centre)
equipe, equipe_created=Equipe.objects.get_or_create(nom=item.get('equipe')), unite=unite_int
srep, srep_created=SousRepartiteur.objects.get_or_create(code=item.get('srp'), rep=rep)
Tadaaaaam, you'll create records that are "only" necessary for your related objects.
4. Caching out your related objects.
As in previous topic, I consider you have multiple rows for each related instance,
and for each row, you will still get to fetch that from your DB.
It's OK I guess if you're using SQLite in memory, it won't be as slow as with other DBs, still, it'll be a bottleneck.
You could use an approach like:
MODEL_CACHE = {}
def get_related_instance(model, **kwargs):
key = (model,kwargs)
if key in MODEL_CACHE:
return instance MODEL_CACHE[key]
else:
instance, create = model.objects.get_or_create(**kwargs)
MODEL_CACH[key]=instance
return instance
# Instead of having previous lines now you end up with:
categorie = get_related_instance(Categorie,code_categorie=item.get('code_categorie'), nom="Public"),
agent_sig = get_related_instance(AgentSig,matricule=item.get('agent_sig'), nom="Awa"),
agent_essai = get_related_instance(AgentEssai,matricule=item.get('agent_essai')),
agent_ori = get_related_instance(AgentOri,matricule=item.get('agent_ori')),
agent_rel = get_related_instance(AgentRel,matricule=item.get('agent_releve')),
sous_traitant = get_related_instance(SousTraitant,nom=item.get('sous_traitant')),
centre = get_related_instance(Centre,code=item.get('centre_racc')),
unite_int = get_related_instance(UniteIntervention,code_ui=item.get('code_ui'), sous_traitant=sous_traitant)
rep = get_related_instance(Repartiteur,code=item.get('rep'), crac=centre)
equipe = get_related_instance(Equipe,nom=item.get('equipe')), unite=unite_int
srep = get_related_instance(SousRepartiteur,code=item.get('srp'), rep=rep)
I cannot tell how much you'll gain thanks to that, it really depends on the data set you're trying to import,
but from experience, it's quite drastic!
5 Use bulk_create
You are doing
for row in rows:
DerangementAdsl(...your data...).save() #<That's one DB call
That's one SQL query per row, while you could do:
ITEMS = []
for row in rows:
#...Your parsing we saw previously...
ITEMS.append(DerangementAdsl(**item))
DerangementAdsl.objects.bulk_create(ITEMS) #<That's one DB call
Putting it all together!
PARSER_CONFIG=(
#(column_index, variable_name, transformation_function)
#...
)
def no_transformation(value)
return str(value)
def strptime(value)
return str(value)
MODEL_CACHE = {}
def get_related_instance(model, **kwargs):
key = (mode,kwargs)
if key in MODEL_CACHE:
return instance MODEL_CACHE[key]
else:
instance, create = model.objects.get_or_create(**kwargs)
MODEL_CACH[key]=instance
return instance
def parse(row):
"""Transform a row into a dict
Args:
row (tuple): Your row's data
Returns:
dict: Your parsed data, named into a dict.
"""
item= {
key:tranfsorm(row[index]) for index, key, transform in PARSER_CONFIG
}
item.update({
'categorie': get_related_instance(Categorie,code_categorie=item.get('code_categorie'), nom="Public"),
'agent_sig': get_related_instance(AgentSig,matricule=item.get('agent_sig'), nom="Awa"),
'agent_essai': get_related_instance(AgentEssai,matricule=item.get('agent_essai')),
'agent_ori': get_related_instance(AgentOri,matricule=item.get('agent_ori')),
'agent_rel': get_related_instance(AgentRel,matricule=item.get('agent_releve')),
'sous_traitant': get_related_instance(SousTraitant,nom=item.get('sous_traitant')),
'centre': get_related_instance(Centre,code=item.get('centre_racc')),
'unite_int': get_related_instance(UniteIntervention,code_ui=item.get('code_ui'), sous_traitant=sous_traitant)
'rep': get_related_instance(Repartiteur,code=item.get('rep'), crac=centre)
'equipe': get_related_instance(Equipe,nom=item.get('equipe')), unite=unite_int
'srep': get_related_instance(SousRepartiteur,code=item.get('srp'), rep=rep)
})
return item
def importeradsl(request):
#I skip your conditions for readility
ITEMS = []
for row in worksheet.iter_rows(min_row=2):
ITEMS.append(DerangementAdsl(**parse(row)))
DerangementAdsl.objects.bulk_create(ITEMS)
Conclusion
Following those recommendation, you should end up with an optimized script that will run way faster than the original one, and be way more readable and pythonic
Roughly, depending on your dataset, 5k lines should run somewhere between 10 seconds up to few minutes.
If each row's related instance (client,category...) is unique, I'd use a more sophisticated approach looping multiple times over your dataset to create related models using bulk_create and cache them out like:
CLIENTS = []
for row in rows:
CLIENTS.append(Client(**client_parser(row)))
clients=Client.objects.bulk_create(CLIENTS) # You Create *all* your client with only one DB call!
Then, you cache all created clients. You do the same for all your related models and eventually you'll load your data making a dozen of DB calls, but it really depends on your business logic here: It should be engineered to handle duplicated records too.

Extracting Boundary information from a shapefile

I'm using shapefiles for the first time, and I'm trying to create a database with the boundaries of each polygon in it. So far, using qgis and the .dbf file, I have been unable to figure out how to do this. Is there a way to get the boundaries from a shapefile?
I am using the zip code shapefile from the Census Bureau. Here is a link.
https://www.census.gov/geo/maps-data/data/cbf/cbf_zcta.html
Thanks
For boundary box, you can do it through PyQGIS or QGUIS GUI. Remember that boundary box is the minimum rectangle that evolves the geometry, so it's made by 4 coordinates:
c1 = [x_min, y_min]
c2 = [x_min, y_max]
c3 = [x_max, y_min]
c4 = [x_max, y_max]
So you need x_min, x_max, y_min, y_max to construct these coordinates. I'll post the PyQGIS answer first (we are in StackOverflow) for extracting this 4 values:
from qgis.core import *
from qgis.utils import *
from PyQt4.QtCore import QVariant
# Import layer
layer = QgsVectorLayer('/path/to/cb_2016_us_zcta510_500k.shp','census_boundaries','ogr')
if not layer.isValid():
print "Layer failed to load!"
else:
print "Layer was loaded successfully!"
# add to the canvas
QgsMapLayerRegistry.instance().addMapLayer(layer)
# start editing
layer.startEditing()
# for field name and expression
fields = 'x_min','x_max','y_min','y_max'
for i in range(0,4):
field = QgsField( fields[i], QVariant.Double ) # create field
layer.dataProvider().addAttributes([field])
layer.updateFields()
idx = layer.fieldNameIndex(fields[i]) # extract field index
e = QgsExpression(fields[i]+ '($geometry)' ) # use a field expression to calculate value. ie: x_min($geometry)
e.prepare( layer.pendingFields() )
for f in layer.getFeatures(): # fo it for all field
f[idx] = e.evaluate( f )
layer.updateFeature( f )
layer.commitChanges() #save changes
For QGIS GUI, simply use Field Calculator using the same expression than the code above, creating 4 new fields (using double as data type) as:
Field 1: x_min($geometry)
Field 2: x_max($geometry)
Field 3: y_min($geometry)
Field 4: y_max($geometry)

How to write lists as input filters to mysql table in django model instance

I have below fields in my Django model
class cube(models.Model):
pid = models.IntegerField()
lc = models.CharField(max_length=100)
sid = models.IntegerField()
st = models.IntegerField()
wid = models.IntegerField()
wt = models.IntegerField()
I have below input set coming from clients
input_set1 object -
[{"sid":1,"st":7},{"sid":7,"st":5},{"sid":5,"st":9},{"sid":2,"st":7}]
input_set2 object -
[{"wid":3,"wt":5},{"wid":1,"wt":7},{"wid":4,"wt":8},{"wid":2,"wt":5},{"wid":5,"wt":5}]
Below is my requirement which is in mysql notation :
select pid,lc from cube
where (((sid=1) AND (st>=7)) AND ((sid=7) AND (sid>=5)) AND ((sid=5) AND
(st>=9)) AND ((sid=2) AND (st>=7)))
AND (((wid=3) AND (wt>=5)) AND ((wid=1) AND (wt>=7)) AND ((wid=4) AND
(wt>=8)) AND ((wid=2) AND (wt>=5)) AND ((wid=5) AND (wt>=5)))
Input {sid,st} and {wid,wt} sets can contain upto 10 items each -
like {sid,st} can be upto 10 for each input and same with {wid,wt}
How can I write this sql notation in django ?
I would like to use the input parameters as lists like below in my view, so that it would be generic for the input sets -
input_set1=[{1,7},{7,5},{5,9},{2,7}]
input_set2=[{3,5},{1,7},{4,8},{2,5},{5,5}]
fieldsReq=['pid','lc']
queryset=cube.objects.values_list(fieldsReq).filter(reduce(operator.and_, (Q(sid__contains=x) for x in ['3', '5', '6']),(Q(rt__contains=x) for x in ['4', '8', '3']))) #Am not sure how to write the condition here
I would like to map input_set1 to {sid,st}
and input_set2 to {wid,wt}, but found it difficulty in writing.
How can I achieve this in Django notation to get the fields from mysql table ?
Here's my attempt for sid, it should apply to wid as well.
from django.db.models import Q
# original input
input = [{"sid":1,"st":7},{"sid":7,"st":5},{"sid":5,"st":9},{"sid":2,"st":7}]
# a list of Q objects
queries = [Q(sid=i['sid'], st__gte=i['st']) for i in input]
# 'AND' all conditions
sid_query = reduce(lambda x, y: x & y, queries)
fieldsReq=['pid','lc']
queryset=cube.objects.values_list(fieldsReq).filter(sid_query)

JPQL query syntax exception

I am trying to run the following query:
select new br.com.edipo.ada.entity.Resultado (et, avg(es.vlEscolha) as vlCalculado)
from Escolha es
join fetch Resolucao re on re.idResolucao = es.idResolucao
join fetch Alternativa al on al.idAlternativa = es.idAlternativa
join fetch Questao qu on qu.idQuestao = al.idQuestao
join fetch QuestaoEtiqueta qe on qe.idQuestao = qu.idQuestao
join fetch Etiqueta et on et.idEtiqueta = qe.idEtiqueta
where es.blSelecionada = 1
and re.idAvaliacao = :idAvaliacao
and re.idUsuario = :idUsuario
group by et.dsEtiqueta
But I am getting the following error:
Caused by: org.hibernate.hql.internal.ast.QuerySyntaxException: unexpected token: on near line 1, column 149 [select new br.com.edipo.ada.entity.Resultado (et, avg(es.vlEscolha) as vlCalculado) from br.com.edipo.ada.entity.Escolha es join fetch Resolucao re on re.idResolucao = es.idResolucao join fetch Alternativa al on al.idAlternativa = es.idAlternativa join fetch Questao qu on qu.idQuestao = al.idQuestao join fetch QuestaoEtiqueta qe on qe.idQuestao = qu.idQuestao join fetch Etiqueta et on et.idEtiqueta = qe.idEtiqueta where es.blSelecionada = 1 and re.idAvaliacao = :idAvaliacao and re.idUsuario = :idUsuario group by et.dsEtiqueta]
According to it, the error is on column 149 ("... Resolucao re ON ..."), but I cannot see what is wrong.
I am using JPA 2.0 on JBoss AS 7.
Indeed the problem is with ON keyword as it is not used in JPQL. Try to replace your query with:
select new br.com.edipo.ada.entity.Resultado (et,avg(es.vlEscolha) as vlCalculado)
from Escolha es
join fetch Resolucao re
join fetch Alternativa al
join fetch Questao qu
join fetch QuestaoEtiqueta qe
join fetch Etiqueta et
where es.blSelecionada = 1
and re.idAvaliacao = :idAvaliacao
and re.idUsuario = :idUsuario
group by et.dsEtiqueta
Thanks! Here is how it ended up:
select new br.com.edipo.ada.entity.Resultado (et.dsEtiqueta,avg(es.vlEscolha) as vlCalculado)
from Escolha es
join es.resolucao re
join es.alternativa al
join al.questao qu
join qu.etiquetas et
where es.blSelecionada = 1
and re.avaliacao.id = :idAvaliacao
and re.idUsuario = :idUsuario
group by et.dsEtiqueta