So I have 2 views: the first one generates the html on request, the second view generates the chart to display for the first view.
HTML View
def activation_signupcount(request):
if 'datestart' not in request.GET:
return render_to_response('activation/activation_signupcount.html', {'datestart':''})
else:
datestart = request.GET['datestart']
dateend = request.GET['dateend']
return render_to_response('activation/activation_signupcount.html', {'datestart':datestart, 'dateend':dateend})#
CHART VIEW
def activation_signupcount_graph(request):
datestart = request.GET['datestart'] #this doesnt work
dateend = request.GET['dateend'] #this doesnt work
print datestart,dateend
# open sql connection
cursor = connection.cursor()
# execute query
cursor.execute("SELECT COUNT(1), JoinDate FROM users WHERE JoinDate BETWEEN '"+ datestart +"' AND '"+ dateend +"' GROUP BY JoinDate;")
# close connection
data = cursor.fetchall()
cursor.close()
connection.close()
fig = Figure()
ax = fig.add_subplot(111)
x = []
y = []
x = [k[1] for k in data]
y = [k[0] for k in data]
ax.plot_date(x, y, '-')
ax.xaxis.set_major_formatter(DateFormatter('%Y-%m-%d'))
fig.autofmt_xdate()
canvas = FigureCanvas(fig)
response = HttpResponse(content_type='image/png')
canvas.print_png(response)
return response
So on the page activation/activation_signupcount.html, I have 2 date fields, start and end, which submits a GET request. So my question is, how can I parse these 2 date variables to my function activation_signupcount_graph to get the start/end dates to generate the chart?
I hope that was clear!
You can access your chart view in your template using the url-templatetag with the appropiate parameters.
So it should look like:
<img src="{% url yourapp.chart_view start_date end_date %}" />
Or, as you are using get-parameters:
<img src="{% url yourapp.chart_view %}?datestart={{ datestart }}" />
have been doing this for years (works with pySVG generated SVG graphs as well btw) , however recently I encountered many problems with installing matplotlib in virtualenvs.
I resorted to adding the system wide matplotlib libraries (from the ubuntu repositories) to the virtualenvs instead of the usual pip install .... anthem
Related
So I've created a pdf file from a ListView called "OrderListView". Now I would like to pass a queryset to the pdf file.
I rewrote my listview as a function view for more clarity. I need to find a way to pass the queryset to the pdf view. I'm using django-filter to create the filtered view. I have the following;
filters.py
class OrderFilter(django_filters.FilterSet):
class Meta:
model = Order
fields = {
'start_date': ['gte'],
'end_date': ['lte'],
}
views.py
from .filters import *
# View to show a filtered list using django-filter
def order_list(request):
order_list = Order.objects.all()
order_filter = OrderFilter(request.GET, queryset=order_list)
start_date__gte = request.GET.get('start_date__gte','')
start_date__lte = request.GET.get('start_date__lte','')
return render(request, 'orders/order_list.html', {
'filter': order_filter,
'start_date__gte': start_date__gte,
'start_date__lte': start_date__lte,
})
# View to create a pdf file from the filtered view using WeasyPrint
def order_list_pdf(request):
# Edited: Create queryset
start_date__gte = request.GET.get('start_date__gte','')
start_date__lte = request.GET.get('start_date__lte','')
order_list = Order.objects.filter(
Q(start_date__gte=start_date__gte) |
Q(start_date__lte=start_date__lte)
)
order_filter = OrderFilter(request.GET, queryset=order_list)
response = HttpResponse(content_type="application/pdf")
response['Content-Inline'] = 'attachment; filename=filtered_list.pdf'.format()
html = render_to_string('pdf/pdf_booking_list_arrivals.html', {
'filtered_list': order_list,
})
font_config = FontConfiguration()
HTML(string=html).write_pdf(response, font_config=font_config)
return response
So I have tried using
start_date__gte = request.GET.get('start_date__gte','')
start_date__lte = request.GET.get('start_date__lte','')
And pass the query with the url
#edited
<a class="btn" href="{% url 'order_list_pdf' %}?start_date__gte={{ start_date__gte }}&start_date__lte={{ start_date__lte }}">Create PDF</a>
This does create the query in the url but does not filter the list. The generated pdf is working, I just need to find a way to only send the filtered results to the pdf view.
Any help would be appreciated!
I'm trying to use scrapy-spider on oneblockdown.it to get all the products from the latest products and to store them into a DB.
Some sites into my monitor are working, but someone such as OBD is not working and not uploading nothing to the db. This is my function:
class OneBlockDownSpider(Spider):
name = "OneBlockDownSpider"
allowded_domains = ["oneblockdown.it"]
start_urls = [OneBlockDownURL]
def __init__(self):
logging.critical("OneBlockDown STARTED.")
def parse(self, response):
products = Selector(response).xpath("//div[#id='product-list']")
for product in products:
item = OneBlockDownItem()
item['name'] = product.xpath('.//div[#class="catalogue-product-title"]//h3').extract.first
item['link'] = product.xpath('.//div[#class="catalogue-product-title"]//h3/a/#href').extract.first
# # item['image'] = "http:" + product.xpath("/div[#class='catalogue-product-cover']/a[#class='catalogue-product-cover-image']/img/#src").extract()[0]
# item['size'] = '**NOT SUPPORTED YET**'
yield item
yield Request(OneBlockDownURL, callback=self.parse, dont_filter=True, priority=15)
I guess I'm using the wrong xpath, but I can't solve it
First of all the site is Cloudflare protected (prevent scraping).
Also you have several issues with your code:
Your products is a single node
You're using extract.first instead of extract_first()
products = response.xpath("//div[#id='product-list']/div")
for product in products:
item = OneBlockDownItem()
item['name'] = product.xpath('.//div[#class="catalogue-product-title"]//h3').extract_first()
item['link'] = product.xpath('.//div[#class="catalogue-product-title"]//h3/a/#href').extract_first()
yield item
You should start all your xpaths with '.' when using a relative selector like product:
item['image'] = "http:" + product.xpath("./div[#class='catalogue-product-cover']/a[#class='catalogue-product-cover-image']/img/#src").extract()[0]
Otherwise, It will try to get the element with this xpath: /body/div[#class='catalogue-product-cover']
So I am successfully storing a complex object (non-model) in my session in development. I've tried every session engine and cache type and they are all working in development (Pycharm). However, when I move the code to production, while no error are thrown, the session losses the object.
Here is the method I use to set the session object:
def instantiate_command_object(request):
try:
ssc = request.session['specimen_search_criteria']
logger.debug('found ssc session variable')
except KeyError:
logger.debug('failed to find ssc session variable')
ssc = SpecimenSearchCommand()
return ssc
Then in a method that runs asynchronously via an ajax call I start making changes to the object in the session:
def ajax_add_collection_to_search(request):
ssc = instantiate_command_object(request)
collection_id = request.GET.get('collection')
collection = Collection.objects.get(pk=collection_id)
if collection and collection not in ssc.collections:
ssc.collections.append(collection)
# save change to session
request.session['specimen_search_criteria'] = ssc
# refresh search results
ssc.search()
return render(request, '_search.html')
All this works as far as it goes. However, if I then refresh the browser, the session is lost. Here is a snippet from the template:
{% with criteria=request.session.specimen_search_criteria %}
<div class="search-criteria" id="search-criteria">
<div class="row">
Sesssion:
{{ request.session }}<br/>
Search:
{{ request.session.specimen_search_criteria }}<br/>
Created:
{{ request.session.specimen_search_criteria.key }}<br/>
Collections:
{{ request.session.specimen_search_criteria.collections }}<br/>
Again, in development I can refresh all day and the same object will be returned. In production, it will either create a new object or occasionally will return a previously created copy.
A few relevant items:
The production server is running Apache httpd with mod_wsgi.
I've tried memcached, databasecache, etc. the behavior remains the same. Always works in development, never in production.
I've tried it with
SESSION_SERIALIZER = 'django.contrib.sessions.serializers.PickleSerializer'
and without. I can see the session info in the database and when I unpickle it it just seems to be pointing to a location in memory for the complex object.
I'm guessing this might have something to do with running in a multi-user environment, but again, I'm not using locmem and I've tried all of the caching approaches to no effect.
To be clear, the session itself seems to be fine, I can store a string or other simple item in it and it will stick. It's the complex object within the session that seems to be getting lost.
Edit: I might also point out that if I refresh the browser immediately following the return of the search criteria it will actually return successfully. Anything more than about a second and it will disappear.
Edit (adding code of SpecimenSearchCommand):
class SpecimenSearchCommand:
def __init__(self):
pass
created = datetime.datetime.now()
key = ''.join(random.SystemRandom().choice(string.ascii_uppercase + string.digits) for _ in range(6))
jurisdictions = []
taxa = []
strata = []
collections = []
chrons = []
has_images = False
query = None # The active SQL query, not the actual result records
page_size = 50
current_page = 1
sort_order = 'number'
results = [] # Page of results from paginator
def is_empty(self):
if len(self.jurisdictions) == 0 and len(self.taxa) == 0 and len(self.strata) == 0 and \
len(self.collections) == 0 and len(self.chrons) == 0 and self.has_images is False:
return True
else:
return False
def get_results(self):
paginator = Paginator(self.query, self.page_size)
try:
self.results = paginator.page(self.current_page)
except PageNotAnInteger:
self.results = paginator.page(1)
except TypeError:
return []
except EmptyPage:
self.results = paginator.page(paginator.num_pages)
return self.results
def get_results_json(self):
points = []
for s in self.results:
if s.locality.latitude and s.locality.longitude:
points.append({"type": "Feature",
"geometry": {"type": "Point",
"coordinates": [s.locality.longitude, s.locality.latitude]},
"properties": {"specimen_id": s.id,
"sci_name": s.taxon.scientific_name(),
"cat_num": s.specimen_number(),
"jurisdiction": s.locality.jurisdiction.full_name()}
})
return json.dumps({"type": "FeatureCollection", "features": points})
def search(self):
if self.is_empty():
self.query = None
return
query = Specimen.objects.filter().distinct().order_by(self.sort_order)
if len(self.taxa) > 0:
query = query.filter(taxon__in=get_hierarchical_search_elements(self.taxa))
if len(self.jurisdictions) > 0:
query = query.filter(locality__jurisdiction__in=get_hierarchical_search_elements(self.jurisdictions))
if len(self.strata) > 0:
query = query.filter(stratum__in=get_hierarchical_search_elements(self.strata))
if len(self.chrons) > 0:
query = query.filter(chron__in=get_hierarchical_search_elements(self.chrons))
if len(self.collections) > 0:
query = query.filter(collection__in=get_hierarchical_search_elements(self.collections))
if self.has_images:
query = query.filter(images__isnull=False)
self.query = query
return
def get_hierarchical_search_elements(elements):
search_elements = []
for element in elements:
search_elements = set().union(search_elements, element.get_descendants(True))
return search_elements
OK, so as Daniel pointed out, the attributes of the SSC class were class-level instead of instance level. The correct version looks like this now:
self.created = datetime.datetime.now()
self.key = ''.join(random.SystemRandom().choice(string.ascii_uppercase + string.digits) for _ in range(6))
self.jurisdictions = []
self.taxa = []
self.strata = []
self.collections = []
self.chrons = []
self.has_images = False
self.query = None # The active SQL query, not the actual result records
self.page_size = 50
self.current_page = 1
self.sort_order = 'number'
self.results = [] # Page of results from paginator
so I am trying to build a dynamic web crawler to get all url links within links.
so far i am able to get all the links for Chapters, but when I trying to do section links from each chapter, my output does not print out anything.
the code i used :
#########################Chapters#######################
import requests
from bs4 import BeautifulSoup, SoupStrainer
import re
base_url = "http://law.justia.com/codes/alabama/2015/title-{title:01d}/"
for title in range (1,4):
url = base_url.format(title=title)
r = requests.get(url)
for link in BeautifulSoup((r.content),"html.parser",parse_only=SoupStrainer('a')):
if link.has_attr('href'):
if 'chapt' in link['href']:
href = "http://law.justia.com" + link['href']
leveltwo(href)
#########################Sections#######################
def leveltwo(item_url):
r = requests.get(item_url)
soup = BeautifulSoup((r.content),"html.parser")
section = soup.find('div', {'class': 'primary-content' })
for sublinks in section.find_all('a'):
sectionlinks = sublinks.get('href')
print (sectionlinks)
With some minor modifications to your code, I was able to get it to run and output the sections. Mainly, you needed to fix your indentation, and define a function before you call it.
#########################Chapters#######################
import requests
from bs4 import BeautifulSoup, SoupStrainer
import re
def leveltwo(item_url):
r = requests.get(item_url)
soup = BeautifulSoup((r.content),"html.parser")
section = soup.find('div', {'class': 'primary-content' })
for sublinks in section.find_all('a'):
sectionlinks = sublinks.get('href')
print (sectionlinks)
base_url = "http://law.justia.com/codes/alabama/2015/title-{title:01d}/"
for title in range (1,4):
url = base_url.format(title=title)
r = requests.get(url)
for link in BeautifulSoup((r.content),"html.parser",parse_only=SoupStrainer('a')):
try:
if 'chapt' in link['href']:
href = "http://law.justia.com" + link['href']
leveltwo(href)
else:
continue
except KeyError:
continue
#########################Sections#######################
output:
/codes/alabama/2015/title-3/chapter-1/section-3-1-1/index.html
/codes/alabama/2015/title-3/chapter-1/section-3-1-2/index.html
/codes/alabama/2015/title-3/chapter-1/section-3-1-3/index.html
/codes/alabama/2015/title-3/chapter-1/section-3-1-4/index.html etc.
You don't need any try/except blocks, you can use href=True with find or find_all to only select the anchor tags with href's or a css select a[href] as below, the chapter links are in the first ul with inside the article tag with the id #maincontent so you don't need to filter at all:
base_url = "http://law.justia.com/codes/alabama/2015/title-{title:01d}/"
import requests
from bs4 import BeautifulSoup
def leveltwo(item_url):
r = requests.get(item_url)
soup = BeautifulSoup(r.content, "html.parser")
section_links = [a["href"] for a in soup.select('div .primary-content a[href]')]
print (section_links)
for title in range(1, 4):
url = base_url.format(title=title)
r = requests.get(url)
for link in BeautifulSoup(r.content, "html.parser").select("#maincontent ul:nth-of-type(1) a[href]"):
href = "http://law.justia.com" + link['href']
leveltwo(href)
If you were to use find_all you simply need to pass find_all(.., href=True) to filter your anchor tags to only select ones that have hrefs.
Good afternoon..i have a model with a class like this:
class Reportage:
def get_num(self):
end_date = self.date.end_date
start_date = self.date.start_date
duration = time.mktime(end_date.timetuple()) - time.mktime(start_date.timetuple())
delta_t = duration / 60
num = []
for t in range(0,duration,delta_t):
start = t + start_date
end = datetime.timedelta(0,t+delta_t) + start_date
n_num = self.get_num_in_interval(start,end)
num.append([t, n_num])
return num
I want to serialize with simplejson the array num [] in the views.py for passing in a second moment this array to a jquery script to plot it in a graph..
what's the code to serialize that array..?
I hope I was clear .. thanks in advance to all those who respond ..
Following #ninefingers' response. I think that your question is aimed on how to make that dumped json string available to a jQuery plugin.
# views.py
def my_view(request):
# do stuff
num = reportage_instance.get_num()
num_json = simplejson.dumps(num)
return render(request, 'template.html', {
'num_json': num_json,
})
In your template, you make available that json obj as a Javascript variable
# template.html
<html>
<body>
<script>
var NUM_JSON = {{num_json|safe}};
myScript.doSomething(NUM_JSON);
</script>
</body>
</html>
Now you can call regular JS with the NUM_JSON variable.
If you're looking to do this in a model, something like this would work:
# if this were a models.py file:
import simplejson
# other django imports:
class SomeModel(models.Model):
property = models.SomeField()...
def some_function(self):
num = []
# full num
simplejson.dumps(num)
That'll dump num to a string-representation of the json, which you can then return, or write to a file, and so on.
From a view you have a choice - but if your view is an ajax view returning some json for processing you might do this:
# views.py
# ...
def my_ajax_call(request, model_pk):
try:
mymodel = SomeModel.get(pk=model_pk)
except SomeModel.NotFoundException as e:
return HttpResonse("Unknown model", status=404)
else:
return HttpResponse(mymodel.some_function()) # returns simplejson.dumps(num)
This can then be used from a template in Javascript - the other answer shows you how you might approach that :)