Django annotate group by month - django

How to sum price group by month?
I try.
import itertools
qs = Contract.objects.values('created', 'proposal__price')
grouped = itertools.groupby(qs, lambda d: d.get('created').strftime('%Y-%m'))
data = [{'month': month, 'quant': sum(list(this_day))} for month, this_day in grouped]
print(data)
But result is no expected.
I need this similar result
[{'month': '2016-04', 'quant': 8650}, {'month': '2016-05', 'quant': 9050}]

Your this_day inside sum(list(this_day)) is a dict, so you need to build a list with a list comprehension. Example
>>> import itertools
>>> from django.contrib.auth.models import User
>>> li = User.objects.all().values('date_joined', 'username')
>>> gr = itertools.groupby(li, lambda d: d.get('date_joined').strftime('%Y-%m'))
>>> dt = [{'m': m, 'q': sum([len(x['username']) for x in q])} for m, q in gr]
>>> dt
[{'m': '2005-06', 'q': 11}, {'m': '2006-10', 'q': 22},
{'m': '2005-06', 'q': 179}, {'m': '2006-08', 'q': 10},
{'m': '2006-09', 'q': 30}, {'m': '2005-06', 'q': 74}, ... ]
Or, for your code, probably something like this
data = [{'month': month, 'quant': sum([x['proposal__price'] for x in this_day])}
for month, this_day in grouped]

Start by extracting the month and all your values
from django.db import connection
select = {'month': connection.ops.date_trunc_sql('month', 'created')}
qs = Contract.objects.extra(select=select).values('month').annotate(my_total=Sum('proposal__price'))
Now we can use a function to group by dict keys like so:
from itertools import groupby
from operator import attrgetter
get_y = attrgetter('month')
from collections import defaultdict, Counter
def solve(dataset, group_by_key, sum_value_keys):
dic = defaultdict(Counter)
for item in dataset:
key = item[group_by_key]
vals = {k:item[k] for k in sum_value_keys}
dic[key].update(vals)
return dic
Apply it to your queryset's newly annotated my_total, grouped by month:
solved = solve(qs, 'month', ['my_total'])
And you'll have grouped sums by month (month being a datetime object you can change manipulate to meet your needs):
for i in solved: print(i, ":", solved[i]['my_total'])
>>> datetime.datetime(2015, 9, 1, 0, 0, tzinfo=<UTC>) : 67614.23
>>> datetime.datetime(2015, 1, 1, 0, 0, tzinfo=<UTC>) : 54792.39
Now you can extract those values :)

My code adapted from #C14L.
import itertools
# from .models import Contract
c = Contract.objects.all().values('created', 'proposal__price')
gr = itertools.groupby(c, lambda d: d.get('created').strftime('%Y-%m'))
dt = [{'month': month, 'quant': sum([x['proposal__price'] for x in quant])} for month, quant in gr]
dt
Thanks.

Related

Pyomo: What dataformats can I pass into .create_instance()

I want to import parameters from 1 excel sheet, (in the future also 1 csv file) and some parameters that I want to set in the code.
I am importing this values using pandas. But than I donĀ“t know how to pass them to the instance. I tried various options but I am only guessing...
I saw variable examples but I am not able to understand and adopt them.
import pandas as pd
from pyomo.environ import *
from pyomo.opt import SolverFactory
from pyomo.core import Var
infinity = float('inf')
opt = SolverFactory('glpk') # GNU Linear Programming Kit for solving large-scale linear programming (LP), mixed integer programming (MIP), and other
df1 = pd.read_excel("datosPvaD.xlsx")
df2 = pd.read_excel("otrosDatos.xlsx")
#demand = consumption['Consumo (Wh)']
#demand.index += 1
#demand_list = demand.tolist()
data1 = df1.to_dict()
#data2 = df2.to_dict(orient='index')
#data2 = df2.to_dict()
"""
# is the same as otros datos
data2 = {None: {
'pRdImp': {None: 0.35},
'pRdExp': {None: 0.1},
'rend': {None: 0.9},
'CAB': {None: 0.082},
'CABasic': {None: 0.082},
'CAPV': {None: 0.224},
'CI': {None: 0.06849},
'M': {None: 1000},
'dt': {None: 1},
}}
"""
data2 = {'pRdImp': 0.35,
'pRdExp': 0.1,
'rend': 0.9,
'CAB': 0.08,
'CABasic': 0.082,
'CAPV': 0.224,
'CI': 0.06849,
'M': 1000,
'dt': 1
}
#z = {**x, **y}
data = {**data1, **data2}
#from Fotovoltaica_V2_csvread import model # import model
from Fotovoltaica_V1 import model # import model
#instance = model.create_instance('Fotovoltaica_V2.dat')
#instance = model.create_instance(data)
instance = model.create_instance(data1,'Fotovoltaica_V2.dat')
It's hard to tell without seeing your entire model, but the section you have commented out for data2 should work:
data2 = {
None:{
'param':{None:val},
...
}
}
I'm assuming that all of your parameters are not indexed. If they are indexed, then you would need something as follows:
model = AbstractModel()
model.t = Set()
model.thing = Param(t)
input_data = {
None:{
't':{None:[1, 2, 3]},
'thing':{1:100, 2:200, 3:300}
}
}
You would then create a model instance by calling
model.create_instance(input_data)
You can import the data from a csv into python as you normally would with pandas, but then there will be a little rework you need to do to get it in the correct pyomo format
Take a look at this example: https://github.com/Pyomo/pyomo/blob/master/examples/doc/pyomobook/overview-ch/wl_excel.py
I would suggest using a ConcreteModel instead of an AbstractModel when using Pandas to load the data. Instead of creating Param objects the dataframe can be used directly in the Constraint.

Merge generator objects to calculate frequency in NLTK

I am trying to count frequency of various ngrams using ngram and freqDist functions in nltk.
Due to the fact that the ngram function output is a generator object, I would like to merge the output from each ngram before calculating frequency.
However, I am running into problems to merge the various generator objects.
I have tried itertools.chain, which created an itertools object, rather than merge the generators.
I have finally settled on permutations, but to parse the objects afterwards seems redundant.
The working code thus far is:
import nltk
from nltk import word_tokenize, pos_tag
from nltk.collocations import *
from itertools import *
from nltk.util import ngrams
import re
corpus = 'testing sentences to see if if if this works'
token = word_tokenize(corpus)
unigrams = ngrams(token,1)
bigrams = ngrams(token,2)
trigrams = ngrams(token,3)
perms = list(permutations([unigrams,bigrams,trigrams]))
fdist = nltk.FreqDist(perms)
for x,y in fdist.items():
for k in x:
for v in k:
words = '_'.join(v)
print words, y
As you can see in the results, freq dist is not calculating the words from the individual generator objects properly as each has a frequency of 1.
Is there a more pythonic way to do properly do this?
Use everygrams, it returns the all n-grams given a range of n.
>>> from nltk import everygrams
>>> from nltk import FreqDist
>>> corpus = 'testing sentences to see if if if this works'
>>> everygrams(corpus.split(), 1, 3)
<generator object everygrams at 0x7f4e272e9730>
>>> list(everygrams(corpus.split(), 1, 3))
[('testing',), ('sentences',), ('to',), ('see',), ('if',), ('if',), ('if',), ('this',), ('works',), ('testing', 'sentences'), ('sentences', 'to'), ('to', 'see'), ('see', 'if'), ('if', 'if'), ('if', 'if'), ('if', 'this'), ('this', 'works'), ('testing', 'sentences', 'to'), ('sentences', 'to', 'see'), ('to', 'see', 'if'), ('see', 'if', 'if'), ('if', 'if', 'if'), ('if', 'if', 'this'), ('if', 'this', 'works')]
To combine the counting of different orders of ngrams:
>>> from nltk import everygrams
>>> from nltk import FreqDist
>>> corpus = 'testing sentences to see if if if this works'.split()
>>> fd = FreqDist(everygrams(corpus, 1, 3))
>>> fd
FreqDist({('if',): 3, ('if', 'if'): 2, ('to', 'see'): 1, ('sentences', 'to', 'see'): 1, ('if', 'this'): 1, ('to', 'see', 'if'): 1, ('works',): 1, ('testing', 'sentences', 'to'): 1, ('sentences', 'to'): 1, ('sentences',): 1, ...})
Alternatively, FreqDist is essentially a collections.Counter sub-class, so you can combine counters as such:
>>> from collections import Counter
>>> x = Counter([1,2,3,4,4,5,5,5])
>>> y = Counter([1,1,1,2,2])
>>> x + y
Counter({1: 4, 2: 3, 5: 3, 4: 2, 3: 1})
>>> x
>>> from nltk import FreqDist
>>> FreqDist(['a', 'a', 'b'])
FreqDist({'a': 2, 'b': 1})
>>> a = FreqDist(['a', 'a', 'b'])
>>> b = FreqDist(['b', 'b', 'c', 'd', 'e'])
>>> a + b
FreqDist({'b': 3, 'a': 2, 'c': 1, 'e': 1, 'd': 1})
Alvas is right, nltk.everygrams is the perfect tool for this job. But merging several iterators is really not that hard, nor that uncommon, so you should know how to do it. The key is that any iterator can be converted to a list, but it's best to do that only once:
Make a list out of several iterators
Just use lists (simple but inefficient)
allgrams = list(unigrams) + list(bigrams) + list(trigrams)
Or build a single list, properly
allgrams = list(unigrams)
allgrams.extend(bigrams)
allgrams.extend(trigrams)
Or use itertools.chain(), then make a list
allgrams = list(itertools.chain(unigrams, bigrams, trigrams))
The above produce identical results (as long as you don't try to reuse the iterators unigrams etc.-- you need to redefine them between examples).
Use the iterators themselves
Don't fight iterators, learn to work with them. Many Python functions accept them instead of lists, saving you much space and time.
You could form a single iterator and pass it to nltk.FreqDist():
fdist = nltk.FreqDist(itertools.chain(unigrams, bigrams, trigrams))
You can work with multiple iterators. FreqDist, like Counter, has an update() method you can use to count things incrementally:
fdist = nltk.FreqDist(unigrams)
fdist.update(bigrams)
fdist.update(trigrams)

python itertools groupby find the max value

use:
from itertools import groupby
from operater import itemgetter
like this:
input:
test = {('a','b'):1,('a','c'):2,('a','d'):3,('x','b'):4,('x','c'):5}
find the max value groupby the key[0]
output:
output_test = {('a','d'):3,('x','c'):5}
To do this using itetools.groupby and assuming you do not care which entry is returned if there are multiple entries with the same max value:
test = {('a', 'b'): 1, ('a', 'c'): 2, ('a', 'd'): 3, ('x', 'b'): 4, ('x', 'c'): 5}
output_test = {('a', 'd'): 3, ('x', 'c'): 5}
grouped = itertools.groupby(sorted(test.iteritems()), lambda x: x[0][0])
maxEntries = {x[0]: x[1] for x in {max(v, key=lambda q: q[1]) for k, v in grouped}}
print maxEntries
print maxEntries == output_test
Outputs:
{('x', 'c'): 5, ('a', 'd'): 3}
True
from itertools import groupby
max([sum(1 for _ in g) for k, g in groupby(input)])

django compare date with date

I trying to refuse importing lines with date lesser than already imported.
timelimit = Operation.objects.filter(account = 3).aggregate(Max('date'))
for row in csv.reader(reencode(f), delimiter=';', quotechar='"')
if row != []:
if row[0]>timelimit:
operation.date=row[0]
row looks like:
2012-01-12,something,0,something2
Of course comparison row[0]>timelimit is wrong - but what is correct?
#this will convert your string("2012-01-12") to a datetime object
from datetime import datetime
>>> x = datetime.strptime(row[0], "%Y-%m-%d")
>>> x
>>> datetime.datetime(2012, 1, 12, 0, 0)
And then you can convert timelimit in a datetime object too like so:
timelimit = datetime(2011, 10, 10)
and then comparing these two is trivial:
x > timelimit

Django: Total birthdays each day for the next 30 days

I've got a model similar to this:
class Person(models.Model):
name = models.CharField(max_length=40)
birthday = DateTimeField() # their next birthday
I would like to get a list of the total birthdays for each day for the next 30 days. So for example, the list would look like this:
[[9, 0], [10, 3], [11, 1], [12, 1], [13, 5], ... #30 entries in list
Each list entry in the list is a date number followed by the number of birthdays on that day. So for example on the 9th of May there are 0 birthdays.
UPDATES
My db is sqlite3 - will be moving to postgres in the future.
from django.db.models import Count
import datetime
today = datetime.date.today()
thirty_days = today + datetime.timedelta(days=30)
birthdays = dict(Person.objects.filter(
birthday__range=[today, thirty_days]
).values_list('birthday').annotate(Count('birthday')))
for day in range(30):
date = today + datetime.timedelta(day)
print "[%s, %s]" % (date, birthdays.get(date, 0))
I would get the list of days and birthday count this way:
from datetime import date, timedelta
today = date.today()
thirty_days = today + timedelta(days=30)
# get everyone with a birthday
people = Person.objects.filter(birthday__range=[today, thirty_days])
birthday_counts = []
for date in [today + timedelta(x) for x in range(30)]:
# use filter to get only birthdays on given date's day, use len to get total
birthdays = [date.day, len(filter(lambda x: x.birthday.day == date.day, people))]
birthday_counts.append(birthdays)
Something like this --
from datetime import date, timedelta
class Person(models.Model):
name = models.CharField(max_length=40)
birthday = models.DateField()
#staticmethod
def upcoming_birthdays(days=30):
today = date.today()
where = 'DATE_ADD(birthday, INTERVAL (YEAR(NOW()) - YEAR(birthday)) YEAR) BETWEEN DATE(NOW()) AND DATE_ADD(NOW(), INTERVAL %S DAY)'
birthdays = Person.objects.extra(where=where, params=[days]).values_list('birthday', flat=True)
data = []
for offset in range(0, days):
i = 0
d = today + timedelta(days=offset)
for b in birthdays:
if b.day == d.day and b.month == d.month:
i += 1
data.append((d.day, i))
return data
print Person.upcoming_birthdays()
(Queryset of people with a birthday in the next X days)
Found cool solution for this!
For me it works!
from datetime import datetime, timedelta
import operator
from django.db.models import Q
def birthdays_within(days):
now = datetime.now()
then = now + timedelta(days)
# Build the list of month/day tuples.
monthdays = [(now.month, now.day)]
while now <= then:
monthdays.append((now.month, now.day))
now += timedelta(days=1)
# Tranform each into queryset keyword args.
monthdays = (dict(zip(("birthday__month", "birthday__day"), t))
for t in monthdays)
# Compose the djano.db.models.Q objects together for a single query.
query = reduce(operator.or_, (Q(**d) for d in monthdays))
# Run the query.
return Person.objects.filter(query)
But it get a list of persons that have a birthday in date range. You should change a bit.