I have model with postgres json field.
class MyModel(models.Model):
data = JSONField(null=True)
then, I do:
m1 = MyModel.objects.create(data={'10':'2017-12-1'})
m2 = MyModel.objects.create(data={'10':'2018-5-1'})
I want query all the MyModel whose key '10' starts with '2017', so I want to write:
MyModel.objects.filter(data__10__startswith='2017')
The problem is that the 10 is interpreted as integer, and therefore, in the generated query it is considered as list index and not key.
Is there anyway to solve this? (except writing raw queries).
This is the generated query:
SELECT "systools_mymodel"."id", "systools_mymodel"."data" FROM "systools_mymodel" WHERE ("systools_mymodel"."data" ->> 10)::text LIKE '2017%' LIMIT 21;
And I want the 10 to be quoted (which would give me the right answer).
Thanks!
A very hackish solution (use on your own risk, tested under Django 2.0.5, voids warranty...):
# patch_jsonb.py
from django.contrib.postgres.fields.jsonb import KeyTransform
def as_sql(self, compiler, connection):
key_transforms = [self.key_name]
previous = self.lhs
while isinstance(previous, KeyTransform):
key_transforms.insert(0, previous.key_name)
previous = previous.lhs
lhs, params = compiler.compile(previous)
if len(key_transforms) > 1:
return "(%s %s %%s)" % (lhs, self.nested_operator), [
key_transforms] + params
try:
int(self.key_name)
except ValueError:
if self.key_name.startswith("K") and self.key_name[1:].isnumeric():
lookup = "'%s'" % self.key_name[1:]
else:
lookup = "'%s'" % self.key_name
else:
lookup = "%s" % self.key_name
return "(%s %s %s)" % (lhs, self.operator, lookup), params
def patch():
KeyTransform.as_sql = as_sql
Usage:
Add this to the bottom of your settings.py:
import patch_jsonb
patch_jsonb.patch()
Instead of __123__ lookups use __K123__ lookups - the uppercase K will be stripped by this patch:
MyModel.objects.filter(data__K10__startswith='2017')
And consider avoiding using numbers as jsonb object keys...
Related
I'm new to python and looking for some assistance on formatting print output to rows and columns. This data will eventually be sent to csv file.
The script will grab data from multiple hosts. The number of lines is variable as well as the length of the interface name and description.
Currently the output looks like this:
hostname IF-MIB::ifDescr.1 = GigabitEthernet0/0/0<br/>
hostname IF-MIB::ifAlias.1 = --> InterfaceDesc<br/>
hostname IF-MIB::ifOperStatus.1 = 'up'<br/>
hostname IF-MIB::ifDescr.2 = GigabitEthernet0/0/1<br/>
hostname IF-MIB::ifAlias.2 = --> InterfaceDesc<br/>
hostname IF-MIB::ifOperStatus.2 = 'up'<br/>
hostname IF-MIB::ifDescr.3 = GigabitEthernet0/0/2<br/>
hostname IF-MIB::ifAlias.3 = --> InterfaceDesc<br/>
hostname IF-MIB::ifOperStatus.3 = 'up'<br/>
I'm trying to format it to the following rows and columns with headers of each row(hostname, interface, interface desc, and status).
hostname interface interface desc status
hostname GigabitEthernet0/0/0 InterfaceDesc up
hostname GigabitEthernet0/0/1 InterfaceDesc up
hostname GigabitEthernet0/0/2 InterfaceDesc up
The print code I currently have is here. I want to keep the print statements for errors.
for errorIndication, errorStatus, errorIndex, varBinds in snmp_iter:
# Check for errors and print out results
if errorIndication:
print(errorIndication)
elif errorStatus:
print('%s at %s' % (errorStatus.prettyPrint(),
errorIndex and varBinds[int(errorIndex) - 1][0] or '?'))
else:
for varBind in varBinds:
print(hostip),
print(' = '.join([x.prettyPrint() for x in varBind]))
Full script:
from pysnmp.hlapi import *
routers = ["router1"]
#adds routers to bulkCmd
def snmpquery (hostip):
snmp_iter = bulkCmd(SnmpEngine(),
CommunityData('Community'),
UdpTransportTarget((hostip, 161)),
ContextData(),
0, 50, # fetch up to 50 OIDs
ObjectType(ObjectIdentity('IF-MIB', 'ifDescr')),
ObjectType(ObjectIdentity('IF-MIB', 'ifAlias')),
ObjectType(ObjectIdentity('IF-MIB', 'ifOperStatus')),
lexicographicMode=False) # End bulk request once outside of OID child objects
for errorIndication, errorStatus, errorIndex, varBinds in snmp_iter:
# Check for errors and print out results
if errorIndication:
print(errorIndication)
elif errorStatus:
print('%s at %s' % (errorStatus.prettyPrint(),
errorIndex and varBinds[int(errorIndex) - 1][0] or '?'))
else:
for rowId, varBind in enumerate(varBindTable):
oid, value = varBind
print('%20.20s' % value)
if not rowId and rowId % 3 == 0:
print('\n')
# calls snmpquery for all routers in list
for router in routers:
snmpquery(router)
Any help you can provide is much appreciated.
Thanks!
Assuming the snmp_iter is initialized with three SNMP table columns:
snmp_iter = bulkCmd(SnmpEngine(),
UsmUserData('usr-md5-des', 'authkey1', 'privkey1'),
Udp6TransportTarget(('demo.snmplabs.com', 161)),
ContextData(),
0, 25,
ObjectType(ObjectIdentity('IF-MIB', 'ifDescr')),
ObjectType(ObjectIdentity('IF-MIB', 'ifAlias')),
ObjectType(ObjectIdentity('IF-MIB', 'ifOperStatus')))
you can be sure that (for the GETNEXT and GETBULK commands) pysnmp always returns rectangular table in a row by row fashion.
Knowing the number of the columns you have requested (3) you should be able to print the output row by row:
for rowId, varBind in enumerate(varBindTable):
oid, value = varBind
print('%20.20s' % value)
if not rowId and rowId % 3 == 0:
print('\n')
Is there a Pythonic way to refer to columns of 2D lists by name?
I import a lot of tables from the web so I made a general purpose function that creates 2 dimensional lists out of various HTML tables. So far so good. But the next step is often to parse the table row by row.
# Sample table.
# In real life I would do something like: table = HTML_table('url', 'table id')
table =
[
['Column A', 'Column B', 'Column C'],
['One', 'Two', 3],
['Four', 'Five', 6]
]
# Current code:
iA = table[0].index('Column A')
iB = tabel[0].index('Column B')
for row in table[1:]:
process_row(row[iA], row[iC])
# Desired code:
for row in table[1:]:
process_row(row['Column A'], row['Column C'])
I think you'll really like the pandas module! http://pandas.pydata.org/
Put your list into a DataFrame
This could also be done directly from html, csv, etc.
df = pd.DataFrame(table[1:], columns=table[0]).astype(str)
Access columns
df['Column A']
Access first row by index
df.iloc[0]
Process row by row
df.apply(lambda x: '_'.join(x), axis=0)
for index,row in df.iterrows():
process_row(row['Column A'], row['Column C'])
Process a column
df['Column C'].astype(int).sum()
Wouldn't a ordereddict of keys being columns names and values a list of rows be a better approach for your problem? I would go with something like:
table = {
'Column A': [1, 4],
'Column B': [2, 5],
'Column C': [3, 6]
}
# And you would parse column by column...
for col, rows in table.iteritems():
#do something
My QueryList is simple to use.
ql.filter(portfolio='123')
ql.group_by(['portfolio', 'ticker'])
class QueryList(list):
"""filter and/or group_by a list of objects."""
def group_by(self, attrs) -> dict:
"""Like a database group_by function.
args:
attrs: str or list.
Returns:
{value_of_the_group: list_of_matching_objects, ...}
When attrs is a list, each key is a tuple.
Ex:
{'AMZN': QueryList(),
'MSFT': QueryList(),
...
}
-- or --
{('Momentum', 'FB'): QueryList(),
...,
}
"""
result = defaultdict(QueryList)
if isinstance(attrs, str):
for item in self:
result[getattr(item, attrs)].append(item)
else:
for item in self:
result[tuple(getattr(item, x) for x in attrs)].append(item)
return result
def filter(self, **kwargs):
"""Returns the subset of IndexedList that has matching attributes.
args:
kwargs: Attribute name/value pairs.
Example:
foo.filter(portfolio='123', account='ABC').
"""
ordered_kwargs = OrderedDict(kwargs)
match = tuple(ordered_kwargs.values())
def is_match(item):
if tuple(getattr(item, y) for y in ordered_kwargs.keys()) == match:
return True
else:
return False
result = IndexedList([x for x in self if is_match(x)])
return result
def scalar(self, default=None, attr=None):
"""Returns the first item in this QueryList.
args:
default: The value to return if there is less than one item,
or if the attr is not found.
attr: Returns getattr(item, attr) if not None.
"""
item, = self[0:1] or [default]
if attr is None:
result = item
else:
result = getattr(item, attr, default)
return result
I tried pandas. I wanted to like it, I really did. But ultimately it is too complicated for my needs.
For example:
df[df['portfolio'] == '123'] & df['ticker'] == 'MSFT']]
is not as simple as
ql.filter(portfolio='123', ticker='MSFT')
Furthermore, creating a QueryList is simpler than creating a df.
That's because you tend to use custom classes with a QueryList. The data conversion code would naturally be placed into the custom class which keeps that separate from the rest of the logic. But data conversion for a df would normally be done inline with the rest of the code.
Can I filter the query by this kind of criteria? Something like
Model.objects.filter(department__is_contained=x).values_list('department')
where
x = 'AAA-BBB-CCC-DDD'
and my results should display stuff like
['AAA', 'AAA-BBB-CCC', 'BBB-CCC', 'AAA-BBB-CCC-DDD', None]
No, there's not. The SQL you're wanting to execute would look something like this:
SELECT *
FROM model
WHERE 'AAA-BBB-CCC-DDD' like '%' + model.department + '%'
Django doesn't currently support the filter term on the left hand side. It may be possible to implement your own reverse_contains lookup with django 1.7 using custom lookups.
class ReverseContains(Lookup):
lookup_name = 'rcontains'
def as_sql(self, qn, connection):
# untested! you'll have to validate this
lhs, lhs_params = self.process_lhs(qn, connection)
rhs, rhs_params = self.process_rhs(qn, connection)
# note we're putting the rhs sql and params on the lhs
params = rhs_params + lhs_params
return "%s LIKE '%%' + %s + '%%' " % (rhs, lhs), params
from django.db.models.fields import CharField
CharField.register_lookup(ReverseContains)
And use it like so:
x = 'AAA-BBB-CCC-DDD'
Model.objects.filter(department__rcontains=x).values_list('department')
Or you could use .extra(where=) to implement the search directly in SQL for that one query.
Here is a one-line solution:
import operator
from django.db.models import Q
x = 'AAA-BBB-CCC-DDD'
User.objects.filter(reduce(operator.or_, (Q(department__contains=y) for y in x.split('-')))).values_list('department', flat=True)
# Result will be list of 'department' fields, that contains
# "AAA" or "BBB" or "CCC" or "DDD"
It could be hard to understand, so here is a more detailed variant of code:
total_filter_q = Q()
for y in x.split("-"):
total_filter_q |= Q(department__contains=y)
# total_filter_q now represent this:
# Q(department__contains="AAA") | Q(department__contains="BBB") | Q(department__contains="CCC") | Q(department__contains="DDD")
User.objects.filter(total_filter_q).values_list('department', flat=True)
Result is the same as in one-line solution.
Here are some useful links:
reduce
operator
Q objects
values_list
Am trying to build a complex queryset whereby am joining with other tables. What is buzzling me here is that am able to see the query when i print the queryset.query but as i call it from my code its returning no result.
class CarManager(models.Manager):
def get_query_set(self):
return super(CarManager, self).get_query_set().filter(version_to__isnull=True)
def my_cars(self, language, user):
qs=self.extra(
select = {
'make_display' : '`%s`.`%s`' % (ModelLookUpI18n._meta.db_table, ModelLookUpI18n._meta.get_field('make_display').column),
'model_display' : '`%s`.`%s`' % (ModelLookUpI18n._meta.db_table, ModelLookUpI18n._meta.get_field('model_display').column),
'trim_display' : '`%s`.`%s`' % (ModelLookUpI18n._meta.db_table, ModelLookUpI18n._meta.get_field('trim_display').column),
},
tables = [
'`%s`' % ModelLookUpI18n._meta.db_table,
],
where = [
'`%s`.`%s` = `%s`.`%s`' % (ModelLookUpI18n._meta.db_table, ModelLookUpI18n._meta.get_field('model').column, ModelLookup._meta.db_table, ModelLookup._meta.get_field('id').column),
"`%s`.`%s`='%s'" % (ModelLookUpI18n._meta.db_table, ModelLookUpI18n._meta.get_field('language').column, language)
]
).select_related().filter(created_by=user).order_by('-created_at')
print qs.query # prints the SQL statement
print qs # always prints []
return qs
When I copy the query printed in the console and paste it into mySQL terminal I 21 records and it looks very normal to me. Yet, the queryset is always empty, any ideas on how to debug further?
Oh, I even monitored the MYSQL execution log to make sure what is going to the engine is matching what is printed by qs.query and its the same.
When I use extra in a certain way on a Django queryset (call it qs), the result of qs.count() is different than len(qs.all()). To reproduce:
Make an empty Django project and app, then add a trivial model:
class Baz(models.Model):
pass
Now make a few objects:
>>> Baz(id=1).save()
>>> Baz(id=2).save()
>>> Baz(id=3).save()
>>> Baz(id=4).save()
Using the extra method to select only some of them produces the expected count:
>>> Baz.objects.extra(where=['id > 2']).count()
2
>>> Baz.objects.extra(where=['-id < -2']).count()
2
But add a select clause to the extra and refer to it in the where clause, and the count is suddenly wrong, even though the result of all() is correct:
>>> Baz.objects.extra(select={'negid': '0 - id'}, where=['"negid" < -2']).all()
[<Baz: Baz object>, <Baz: Baz object>] # As expected
>>> Baz.objects.extra(select={'negid': '0 - id'}, where=['"negid" < -2']).count()
0 # Should be 2
I think the problem has to do with django.db.models.sql.query.BaseQuery.get_count(). It checks whether the BaseQuery's select or aggregate_select attributes have been set; if so, it uses a subquery. But django.db.models.sql.query.BaseQuery.add_extra adds only to the BaseQuery's extra attribute, not select or aggregate_select.
How can I fix the problem? I know I could just use len(qs.all()), but it would be nice to be able to pass the extra'ed queryset to other parts of the code, and those parts may call count() without knowing that it's broken.
Redefining get_count() and monkeypatching appears to fix the problem:
def get_count(self):
"""
Performs a COUNT() query using the current filter constraints.
"""
obj = self.clone()
if len(self.select) > 1 or self.aggregate_select or self.extra:
# If a select clause exists, then the query has already started to
# specify the columns that are to be returned.
# In this case, we need to use a subquery to evaluate the count.
from django.db.models.sql.subqueries import AggregateQuery
subquery = obj
subquery.clear_ordering(True)
subquery.clear_limits()
obj = AggregateQuery(obj.model, obj.connection)
obj.add_subquery(subquery)
obj.add_count_column()
number = obj.get_aggregation()[None]
# Apply offset and limit constraints manually, since using LIMIT/OFFSET
# in SQL (in variants that provide them) doesn't change the COUNT
# output.
number = max(0, number - self.low_mark)
if self.high_mark is not None:
number = min(number, self.high_mark - self.low_mark)
return number
django.db.models.sql.query.BaseQuery.get_count = quuux.get_count
Testing:
>>> Baz.objects.extra(select={'negid': '0 - id'}, where=['"negid" < -2']).count()
2
Updated to work with Django 1.2.1:
def basequery_get_count(self, using):
"""
Performs a COUNT() query using the current filter constraints.
"""
obj = self.clone()
if len(self.select) > 1 or self.aggregate_select or self.extra:
# If a select clause exists, then the query has already started to
# specify the columns that are to be returned.
# In this case, we need to use a subquery to evaluate the count.
from django.db.models.sql.subqueries import AggregateQuery
subquery = obj
subquery.clear_ordering(True)
subquery.clear_limits()
obj = AggregateQuery(obj.model)
obj.add_subquery(subquery, using=using)
obj.add_count_column()
number = obj.get_aggregation(using=using)[None]
# Apply offset and limit constraints manually, since using LIMIT/OFFSET
# in SQL (in variants that provide them) doesn't change the COUNT
# output.
number = max(0, number - self.low_mark)
if self.high_mark is not None:
number = min(number, self.high_mark - self.low_mark)
return number
models.sql.query.Query.get_count = basequery_get_count
I'm not sure if this fix will have other unintended consequences, however.