I have a sqlite table like this. Which has 3 columns. I want to
count the number of rows where user_id = 1
is this possible with SQLAlchemy?
class UserImage(db.Model):
id = db.Column(db.Integer, primary_key=True)
user_id = db.Column(db.Integer, db.ForeignKey('user.id'))
photo = db.Column(db.String(250))
This will return all the rows. How to modify this to get my expected result.
rows = db.session.query(func.count(UserImage.user_id)).scalar()
Thank you so much in advance.
You can do it like this:
UserImage.query.filter(UserImage.user_id == 1).count()
or
db.session.query(UserImage).filter(UserImage.user_id == 1).count()
For SQLAlchemy 2.x style, you can get the count number like this:
from sqlalchemy import select, func
db.session.execute(
select(func.count()).
select_from(User)
).one()
# or
db.session.execute(select(func.count(User.id))).scalars().one()
Now with the condition:
db.session.execute(
select(func.count()).
select_from(UserImage).
filter(UserImage.user_id == 1)
).scalars().one()
# or
db.session.execute(
select(func.count(UserImage.id)).
filter(UserImage.user_id == 1)
).scalars().one()
P.S. If you are using pure SQLAlchemy instead of Flask-SQlAlchemy, just replace db.session with session.
Related
I have a flask app that uses a blend of sqlalchemy (ORM) and flask sqlalchemy.
I have a User model, a Post model, and a followers table as follows:
from app import db
from flask_login import UserMixin
from sqlalchemy.orm import relationship
class User(UserMixin, db.Model)
id = db.Column(db.Integer, primary_key=True)
followed = db.relationship('User', secondary=followers,
primaryjoin=(followers.c.follower_id == id),
secondaryjoin=(followers.c.followed_id == id),
backref=db.backref('followers', lazy='dynamic'), lazy='dynamic') # many to many relationship with other Users
posts = relationship('Post') # one to many relationship with posts
# ...
class Post(db.Model)
id = db.Column(db.Integer, primary_key=True)
user_id = db.Column(db.Integer, db.ForeignKey('user.id'))
#...
followers= db.Table('followers',
db.Column('follower_id', db.Integer, db.ForeignKey('user.id')),
db.Column('followed_id', db.Integer, db.ForeignKey('user.id'))
)
My question is how to query the Post table/model so that it returns only posts written by users that follow you back (i.e. friends).
The flask mega tutorial by Miguel Grinberg (which these models are based off of) uses the following sqlalchemy query to accomplish something similar to what I'm after.
def followed_posts(self):
return Post.query.join(
followers, (followers.c.followed_id == Post.user_id)).filter(
followers.c.follower_id == self.id).order_by(
Post.timestamp.desc())
I've been trying to modify the above query so that the Post table joins only with the subset of followers, characterized by if (a,b) in followers, then (b,a) in followers. I am just not sure how to implement this.
Any help would be appreciated. Also if anyone has a recommendation for a guide that goes over more 'advanced' sqlalchemy queries like the one above without a ton of pure SQL prerequisite knowledge please let me know.
Thanks in advance!
In order to execute and get what you want in one query, the following two options should produce the same result, but are quite different in being pythonic or SQL-ic.
Option-1:
I find this to be quite pythonic as the Poster.followed.any(...) reads as "where one of the followers is me.". But the SQL it produces is a bit "ugly", although it gets the job done:
def followed_posts__elegant_any(self):
user_id = self.id
Poster = aliased(User, name="poster")
return (
object_session(self)
.query(Post)
.join(Poster, Post.user)
.filter(Poster.followed.any(User.id == user_id))
.filter(Poster.followers.any(User.id == user_id))
.order_by(Post.timestamp.desc())
)
Option-2:
This is much more straightforward and is expressed in very "pure" SQL logic:
create aliases so that can join on followers table twice
perform join twice to ensure that the relationship between current user and "Poster" are both ways. The join itself is not really used for anything else, but filtering:
def followed_posts__optimal_join(self):
user_id = self.id
f1 = aliased(followers)
f2 = aliased(followers)
return (
object_session(self)
.query(Post)
.join(User, Post.user)
.join(f1, and_(User.id == f1.c.follower_id, f1.c.followed_id == user_id))
.join(f2, and_(User.id == f2.c.followed_id, f2.c.follower_id == user_id))
.order_by(Post.timestamp.desc())
)
I figured it out.
def get_friend_posts():
return Post.query.filter(Post.user_id.in_([f.id for f in self.followed if f.is_following(self)]))
# helper function
def is_following(self, user):
return self.followed.filter(followers.c.followed_id == user.id).count() > 0
I have the following model in django:
class task(models.Model):
admin = models.BooleanField()
date = modesl.DateField()
I am trying to achieve a filter which provides me with a query_set that prioritize if admin = True
So assume I have these 3 rows :
admin = True , date = 01-01-2019
admin = False , date = 01-01-2019
admin = False , date = 02-02-2019
The output of the query set will be :
admin = True , date = 01-01-2019
admin = False , date = 02-02-2019
it should filter out the row with 01-01-2019 which admin=False because there is already an admin=True row which should take prioritization.
I could do it by fetching all and removing it from the query_set myself, but want to make sure there is no other way of doing it before.
Rather than looping through the QuerySet and removing them yourself, one thing you could do is:
Fetch all the dates where admin is True
Fetch all the objects where either:
i. admin is True
ii. The date is not in part 1 (e.g. admin is False)
This can be achieved with the following:
from django.db.models import Q
true_dates = task.objects.filter(admin=True).values_list("date", flat=True)
results = task.objects.filter(Q(admin=True)|~Q(date__in=true_dates))
This will most likely be more efficient than looping through your results yourself.
Note that since querysets are 'lazy' (this means only evaluated when they absolutely have to be) this will result in just 1 db hit
Tim's answer is close, but incomplete, because he doesn't use Subquery().
This answer provides the same results, without having an additional query hit the database:
from django.db.models import Subquery, Q
dates = Task.objects.filter(admin=True)
tasks = Task.objects.filter(Q(admin=True) | ~Q(date__in=Subquery(dates.values('date')))
I have this query that is trying to find a record given the same day and a status:
ld=LunchDay.query.filter(and_(func.DATE(LunchDay.timestamp == datetime.date.today()), LunchDay.status==1))
The model:
class LunchDay(db.Model):
__tablename__ = 'lunch_day'
id = db.Column(db.Integer, primary_key=True)
user_id = db.Column(db.Integer, db.ForeignKey('user.id'))
user = relationship("User", foreign_keys=[user_id])
timestamp = db.Column(db.DateTime, index=True, default=datetime.today())
status = db.Column(db.Integer)
It runs and doesn't throw an error, yet it seems to not regard date. It will find entries with dates like yesterdays in the DateTimeField of the database:
2018-11-13 00:00:00.000000
Which is yesterdays date, but it is picking it up based just on the status almost just like it is looking at it as an OR. The imports I use are:
from sqlalchemy import func, and_
Print out the actual sql that is being generated by your query to see what is happening. E.g.:
ld=LunchDay.query.filter(and_(func.DATE(LunchDay.timestamp == datetime.date.today()), LunchDay.status==1))
print(ld)
Prints:
SELECT lunch_day.id AS lunch_day_id, lunch_day.timestamp AS lunch_day_timestamp, lunch_day.status AS lunch_day_status
FROM lunch_day
WHERE DATE(lunch_day.timestamp = %(timestamp_1)s) AND lunch_day.status = %(status_1)s
There you can see that the equality of lunch_day.timestamp and the param timestamp_1 is being passed to the DATE function.
Which is actually pretty easy to see in your sqlalchemy query: func.DATE(LunchDay.timestamp == datetime.date.today()). I assume you want to convert LunchDay.timestamp to a date and then compare it to datetime.date.today() which should be db.func.DATE(LunchDay.timestamp) == date.today().
print(LunchDay.query.filter(and_(func.DATE(LunchDay.timestamp) == datetime.date.today(), LunchDay.status == 1)))
prints:
SELECT lunch_day.id AS lunch_day_id, lunch_day.timestamp AS lunch_day_timestamp, lunch_day.status AS lunch_day_status
FROM lunch_day
WHERE DATE(lunch_day.timestamp) = %(DATE_1)s AND lunch_day.status = %(status_1)s
One other thing to note is that multiple statements passed to filter() are automatically treated as an and_(), so you can simplify your query a little by removing that:
LunchDay.query.filter(func.DATE(LunchDay.timestamp) == datetime.date.today(), LunchDay.status == 1)
I have a problem. I have to do this query:
#app.route('/api/subscriptions/<string:id>', methods=('DELETE',))
#decorators.login_required
def delete_subscription(id):
dbsession = DBSession()
session = Session()
favorit = (dbsession.query(StudentsFavorites)
.filter(Exams.number == str(id))
.filter(StudentsFavorites.exam_id)
.filter(Students.id == StudentsFavorites.student_id)
.filter(Students.id == str(session.get_user_id()))
.delete() )
dbsession.flush()
return jsonify(error=False)
But when I do this query I get this exception:
OperationalError: (OperationalError) no such column: exams.number u'DELETE FROM students_favorites WHERE exams.number = ? AND students_favorites.exam_id AND students.id = students_favorites.student_id AND students.id = ?' ('123123123', 'a24213')
The tables are very big and got lots of information, so i can't post all of it. But this query works:
#app.route('/api/subscriptions/<string:id>', methods=('PUT',))
#decorators.login_required
def add_subscription(id):
dbsession = DBSession()
session = Session()
examID = (dbsession.query(Exams.id)
.filter(Exams.number == id).first()
)
favorit=StudentsFavorites(student_id=session.get_user_id(), exam_id=examID.id)
dbsession.add(favorit)
dbsession.flush()
return jsonify(error=False)
Short view to the table:
table: Exams
rows: id, number (number is the id i put into the function)
table: StudentsFavorites
rows: student_id, exams_id
table: Students
rows: id
I really didn't understand, why he didn't find the number row in the exception.
EDIT:
Database StudentsFavorites:
class StudentsFavorites(Base):
"""N:M resolve model for the exams to the semester.
"""
__tablename__ = "students_favorites"
student_id = Column(Unicode(255), ForeignKey("students.id"), primary_key=True, autoincrement=False)
exam_id = Column(Integer, ForeignKey("exams.id"), primary_key=True, autoincrement=False)
created_at = Column(DateTime, nullable = False, default = datetime.now)
student = relationship("Students", uselist = False, lazy="joined")
exam = relationship("Exams", uselist=False, lazy="joined")
Something like this? I tried this:
(dbsession.query(StudentsFavorites)
.filter(StudentsFavorites.exam.id == str(id))
.filter(StudentsFavorites.student.id == str(session.get_user_id()))
.delete()
)
But got the error, that id didn't exist in exams / student
You have two cases of the same problem. Your query has information for StudentFavorites which means it knows about StudentFavorites.student_id and StudentFaovrites.exams_id. It doesn't know anything about Students.id, Exames.id and Exames.number. In order for you to query a StudentFavorites object and have it know about those other values you're going to have to perform a sql join.
Join's can be a bit of a pain in the ass to get working in sqlalchemy (well... in regular sql as well). Since I don't know what your table schema is I can't talk about that but the view should look something like this.
#app.route('/api/subscriptions/<string:id>', methods=('DELETE',))
#decorators.login_required
def delete_subscription(id):
dbsession = DBSession()
session = Session()
favorit = (dbsession.query(StudentsFavorites)
.join(Exames)
.join(students)
.filter(Exams.number == str(id))
.filter(StudentsFavorites.exam_id)
.filter(Students.id == StudentsFavorites.student_id)
.filter(Students.id == str(session.get_user_id()))
.delete() )
dbsession.flush()
return jsonify(error=False)
Alternatively, you can look into setting up Foreign key relationships in your table statements if you use the ORM to create your tables
The reason your second example works is because you're specifying a query over an exam table and only using values found in that table.
Response to Edit:
Right now your table relationships aren't set up correctly. Specifically the sections: Many To Many and Deleting Rows from the Many to Many Table
This example code is explained in much more (and better) detail in the posted link but the basic idea is that you have a associate_table (in your case StudentFavorites) contains foreign keys which have a relationship which is specified in one or more of your other tables. I personally advise that you go with the table example and not the object example.
association_table = Table('association', Base.metadata,
Column('left_id', Integer, ForeignKey('left.id')),
Column('right_id', Integer, ForeignKey('right.id'))
)
class Parent(Base):
__tablename__ = 'left'
id = Column(Integer, primary_key=True)
children = relationship("Child",
secondary=association_table,
backref="parents")
class Child(Base):
__tablename__ = 'right'
id = Column(Integer, primary_key=True)
For example, I have a model like this:
Class Doggy(models.Model):
name = models.CharField(u'Name', max_length = 40)
color = models.CharField(u'Color', max_length = 20)
How can i select doggies with the same color? Or with the same name :)
UPD. Of course, I don't know the name or the color. I want to.. kind of, group by their values.
UPD2. I'm trying to do something like that, but using Django:
SELECT *
FROM table
WHERE tablefield IN (
SELECT tablefield
FROM table
GROUP BY tablefield
HAVING (COUNT(tablefield ) > 1)
)
UPD3. I'd like to do it via Django ORM, without having to iterate over the objects. I just want to get rows with duplicate values for one particular field.
I'm late to the party, but here you go:
Doggy.objects.values('color', 'name').annotate(Count('pk'))
This will give you results that have a count of how many of each Doggy you have grouped by color and name.
If you're looking for Doggy's of a certain colour - you'd do something like.
Doggy.objects.filter(color='blue')
If you want to find Doggys based on the colour of the current Doggy
def GetSimilarColoredDoggys(self):
return Doggy.objects.filter(color=self.color)
The same would go for names:-
def GetDoggysWithSameName(self):
return Doggy.objects.filter(color=self.name)
You can use itertools.groupby() for this:
import operator
import itertools
from django.db import models
def group_model_by_attr(model_class, attr_name):
assert issubclass(model_class, models.Model), \
"%s is not a Django model." % (model_class,)
assert attr_name in [field.name for field in Event._meta.fields], \
"The %s field doesn't exist on model %s" % (attr_name, model_class)
all_instances = model_class.objects.all().order_by(attr_name)
keyfunc = operator.attrgetter(attr_name)
return [{k: list(g)} for k, g in itertools.groupby(all_instances, keyfunc)]
grouped_by_color = group_model_by_attr(Doggy, 'color')
grouped_by_name = group_model_by_attr(Doggy, 'name')
grouped_by_color (for example) will be a list of dicts like [{'purple': [doggy1, doggy2], {'pink': [doggy3,]}] where doggy1,2, etc. are Doggy instances.
UPDATE:
From your update it looks like you just want a list of ids for each event type. I tested this with 250k records in postgresql on my ubuntu laptop w/ a core 2 duo & 3gb of ram, and it took .35 seconds (the itertools.group_by took .72 seconds btw) to generate the dict. You mention that you have 900K records, so this should be fast enough. If it's not it should be easy to cache/update as the records change.
from collections import defaultdict
doggies = Doggy.objects.values_list('color', 'id').order_by('color').iterator()
grouped_doggies_by_color = defaultdict(list)
for color, id in doggies:
grouped_doggies_by_color[color].append(id)
I would change your data model so that the color and name are a one-to-many relationship with Doggy as follows:
class Doggy(models.Model):
name = models.ForeignKey('DoggyName')
color = models.ForeignKey('DoggyColor')
class DoggyName(models.Model):
name = models.CharField(max_length=40, unique=True)
class DoggyColor(models.Model):
color = models.CharField(max_length=20, unique=True)
Now DoggyName and DoggyColor do not contain duplicate names or colors, and you can use them to select dogs with the same name or color.
Okay, apparently, there's no way to do such thing with ORM only.
If you have to do it, you have to use .extra() to execute needed SQL-statement (if you are using SQL database, of course)