Django optimize query update - django

I have a function which opens a txt file (350k lines), reads each line and updates fields in the database. The database has around 250k-300k records.
It runs way too slow for me.
Could you help me with optimizing it the query or the way the import is being done?
Query:
PlaceGroupMatch.objects.filter(**query_kwargs).update(sym_ul=int(line[5]), updated=True, updated_date=datetime.now())
Whole code:
def street_postal_codes():
output_file_name = os.path.join(settings.MEDIA_ROOT, "postal", "ulice_GUS.txt")
with open(output_file_name, 'r+') as handle:
next(handle)
lines = csv.reader(handle, delimiter='\t')
lines = list(lines)
def update_data(lines):
transaction.set_autocommit(False)
for index, line in enumerate(lines):
query_kwargs = {
"sym_ext": int(line[10]),
"sym_pod":int(line[4]),
"updated": False
}
qs = PlaceGroupMatch.objects.filter(**query_kwargs).update(sym_ul=int(line[5]), updated=True, updated_date=datetime.now())
print index
if index % 5000 == 0:
print index, datetime.now()
transaction.commit()
transaction.set_autocommit(True)
update_data(lines)

Related

Django : can't interrupt update function with redirect. Is it possible?

I use a function for updating a Model.
def update_mapping(request, pk):
flow = Flow.objects.get(pk=pk)
mappings = MappingField.objects.filter(fl_id=pk)
headers_samples = GetCsvHeadersAndSamples(request, pk)
[...]
In this function, I call another one (GetCsvHeadersAndSamples) for getting datas from a CSV. Later, I use those datas with JS in the template.
def GetCsvHeadersAndSamples(request, flow_id):
get_file_and_attribs = get_csv(request, flow_id)
file = get_file_and_attribs[0]
separator = get_file_and_attribs[1]
encoding = get_file_and_attribs[2]
with open(file, newline='') as f:
reader = csv.reader(f, delimiter=separator,
encoding=encoding)
headers = next(reader)
samples = next(itertools.islice(csv.reader(f), 1, None))
headersAndSamples = {'headers': headers, 'samples': samples}
return headersAndSamples
For accessing CSV datas, I use another function for checking if the CSV still exists, in which case, I retrieve datas in it.
def get_csv(request, flow_id):
flow = Flow.objects.get(pk=flow_id)
file = flow.fl_file_name
separator = flow.fl_separator
media_folder = settings.MEDIA_ROOT
file = os.path.join(media_folder, str(file))
if os.path.isfile(file):
file_2_test = urllib.request.urlopen('file://' + file).read()
encoding = (chardet.detect(file_2_test))['encoding']
return (file, separator, encoding)
else:
# print('No file')
messages.error(request, "File not found or corrupted.")
return HttpResponseRedirect(reverse('mappings-list', args=(flow_id,)))
I hoped that the return would "break" my original function and would redirect to the 'mappings-list' page with the message.error. But it continues and returns to GetCsvHeadersAndSamples function that generates an error because CSV datas were not found. Nota: the commented print however shows well that the file is not found.
It seems that the way I'm doing things is not the good one.

How should I be formatting my yield requests?

My scrapy spider is very confused, or I am, but one of us is not working as intended. My spider pulls start url's from a file and is supposed to: Start on an Amazon search page, crawl the page and grab the url's of each search result, follow the link to the items page, crawl the items page for information on the item, once all items have been crawled on the first page follow pagination up to page X, rinse and repeat.
I am using ScraperAPI and Scrapy-user-agent to randomize my middlewares. I have formatted my start_requests with a priority based on their index in the file, so they should be crawled in order. I have checked and ensured that I AM receiving a successful 200 html response with the actual html from the Amazon page. Here is the code for the spider:
class AmazonSpiderSpider(scrapy.Spider):
name = 'amazon_spider'
page_number = 2
current_keyword = 0
keyword_list = []
payload = {'api_key': 'mykey', 'url':'https://httpbin.org/ip'}
r = requests.get('http://api.scraperapi.com', params=payload)
print(r.text)
#/////////////////////////////////////////////////////////////////////
def start_requests(self):
with open("keywords.txt") as f:
for index, line in enumerate(f):
try:
keyword = line.strip()
AmazonSpiderSpider.keyword_list.append(keyword)
formatted_keyword = keyword.replace(' ', '+')
url = "http://api.scraperapi.com/?api_key=mykey&url=https://www.amazon.com/s?k=" + formatted_keyword + "&ref=nb_sb_noss_2"
yield scrapy.Request(url, meta={'priority': index})
except:
continue
#/////////////////////////////////////////////////////////////////////
def parse(self, response):
print("========== starting parse ===========")
for next_page in response.css("h2.a-size-mini a").xpath("#href").extract():
if next_page is not None:
if "https://www.amazon.com" not in next_page:
next_page = "https://www.amazon.com" + next_page
yield scrapy.Request('http://api.scraperapi.com/?api_key=mykey&url=' + next_page, callback=self.parse_dir_contents)
second_page = response.css('li.a-last a').xpath("#href").extract_first()
if second_page is not None and AmazonSpiderSpider.page_number < 3:
AmazonSpiderSpider.page_number += 1
yield scrapy.Request('http://api.scraperapi.com/?api_key=mykey&url=' + second_page, callback=self.parse_pagination)
else:
AmazonSpiderSpider.current_keyword = AmazonSpiderSpider.current_keyword + 1
#/////////////////////////////////////////////////////////////////////
def parse_pagination(self, response):
print("========== starting pagination ===========")
for next_page in response.css("h2.a-size-mini a").xpath("#href").extract():
if next_page is not None:
if "https://www.amazon.com" not in next_page:
next_page = "https://www.amazon.com" + next_page
yield scrapy.Request(
'http://api.scraperapi.com/?api_key=mykey&url=' + next_page,
callback=self.parse_dir_contents)
second_page = response.css('li.a-last a').xpath("#href").extract_first()
if second_page is not None and AmazonSpiderSpider.page_number < 3:
AmazonSpiderSpider.page_number += 1
yield scrapy.Request(
'http://api.scraperapi.com/?api_key=mykey&url=' + second_page,
callback=self.parse_pagination)
else:
AmazonSpiderSpider.current_keyword = AmazonSpiderSpider.current_keyword + 1
#/////////////////////////////////////////////////////////////////////
def parse_dir_contents(self, response):
items = ScrapeAmazonItem()
print("============= parsing page ==============")
temp = response.css('#productTitle::text').extract()
product_name = ''.join(temp)
product_name = product_name.replace('\n', '')
product_name = product_name.strip()
temp = response.css('#priceblock_ourprice::text').extract()
product_price = ''.join(temp)
product_price = product_price.replace('\n', '')
product_price = product_price.strip()
temp = response.css('#SalesRank::text').extract()
product_score = ''.join(temp)
product_score = product_score.strip()
product_score = re.sub(r'\D', '', product_score)
product_ASIN = response.css('li:nth-child(2) .a-text-bold+ span').css('::text').extract()
keyword = AmazonSpiderSpider.keyword_list[AmazonSpiderSpider.current_keyword]
items['product_keyword'] = keyword
items['product_ASIN'] = product_ASIN
items['product_name'] = product_name
items['product_price'] = product_price
items['product_score'] = product_score
yield items
For the FIRST start url, it will crawl three or four items and then it will jump to the SECOND start url. It will skip processing the remaining items and pagination pages, going directly to the second start url. For the second url, it will crawl three or four items, then it again will skip to the THIRD start url. It continues in this way, grabbing three or four items, then skipping to the next URL until it reaches the final start url. It will completely gather all information on this URL. Sometimes the spider COMPLETELY SKIPS the first or second starting url. This happens infrequently, but I have no idea as to what could cause this.
My code for following result item URL's works fine, but I never get the print statement for "starting pagination" so it is not correctly following pages. Also, there is something odd with middlewares. It begins parsing before it has assigned a middleware

Automating Date Range while extracting

The below script I am using to extract data from Google Analytics. Here I am extracting data for last one week. I want to automate the date range so that i don't have to change date_range every week.
I also want to avoid sampling of data by GA. Please guide my the correct way to automate in details.
author = 'test#gmail.com (test)'
import argparse
import sys
import csv
import string
import datetime
import json
import time
from apiclient.errors import HttpError
from apiclient import sample_tools
from oauth2client.client import AccessTokenRefreshError
cam_name = sys.argv[1:]
class SampledDataError(Exception): pass
def main(argv):
# Authenticate and construct service.
service, flags = sample_tools.init(
argv[0], 'analytics', 'v3', __doc__, __file__,
scope='https://www.googleapis.com/analytics.readonly')
# Try to make a request to the API. Print the results or handle errors.
try:
profile_id = profile_ids[profile]
if not profile_id:
print ('Could not find a valid profile for this user.')
else:
metrics = argv[1]
dimensions = argv[2]
reportName = argv[3]
sort = argv[4]
filters = argv[5]
for start_date, end_date in date_ranges:
limit = ga_query(service, profile_id, 0,
start_date, end_date, metrics, dimensions, sort, filters).get('totalResults')
for pag_index in range(0, limit, 10000):
results = ga_query(service, profile_id, pag_index,
start_date, end_date, metrics, dimensions, sort, filters)
# if results.get('containsSampledData'):
# raise SampledDataError
print_results(results, pag_index, start_date, end_date, reportName)
except TypeError as error:
# Handle errors in constructing a query.
print ('There was an error in constructing your query : %s' % error)
except HttpError as error:
# Handle API errors.
print ('Arg, there was an API error : %s : %s' %
(error.resp.status, error._get_reason()))
except AccessTokenRefreshError:
# Handle Auth errors.
print ('The credentials have been revoked or expired, please re-run '
'the application to re-authorize')
except SampledDataError:
# force an error if ever a query returns data that is sampled!
print ('Error: Query contains sampled data!')
def ga_query(service, profile_id, pag_index, start_date, end_date, metrics, dimensions, sort, filters):
return service.data().ga().get(
ids='ga:' + profile_id,
start_date=start_date,
end_date=end_date,
metrics=metrics,
dimensions=dimensions,
sort=sort,
filters=filters,
samplingLevel='HIGHER_PRECISION',
start_index=str(pag_index+1),
max_results=str(pag_index+10000)).execute()
def print_results(results, pag_index, start_date, end_date, reportName):
"""Prints out the results.
This prints out the profile name, the column headers, and all the rows of
data.
Args:
results: The response returned from the Core Reporting API.
"""
# New write header
if pag_index == 0:
if (start_date, end_date) == date_ranges[0]:
print ('Profile Name: %s' % results.get('profileInfo').get('profileName'))
columnHeaders = results.get('columnHeaders')
cleanHeaders = [str(h['name']) for h in columnHeaders]
writer.writerow(cleanHeaders)
print (reportName,'Now pulling data from %s to %s.' %(start_date, end_date))
# Print data table.
if results.get('rows', []):
for row in results.get('rows'):
for i in range(len(row)):
old, new = row[i], str()
for s in old:
new += s if s in string.printable else ''
row[i] = new
writer.writerow(row)
else:
print ('No Rows Found')
limit = results.get('totalResults')
print (pag_index, 'of about', int(round(limit, -4)), 'rows.')
return None
# Uncomment this line & replace with 'profile name': 'id' to query a single profile
# Delete or comment out this line to loop over multiple profiles.
#Brands
profile_ids = {'abc-Mobile': '12345',
'abc-Desktop': '23456',
'pqr-Mobile': '34567',
'pqr-Desktop': '45678',
'xyz-Mobile': '56789',
'xyz-Desktop': '67890'}
date_ranges = [
('2017-01-24','2017-01-24'),
('2017-01-25','2017-01-25'),
('2017-01-26','2017-01-26'),
('2017-01-27','2017-01-27'),
('2017-01-28','2017-01-28'),
('2017-01-29','2017-01-29'),
('2017-01-30','2017-01-30')
]
for profile in sorted(profile_ids):
print("Sequence 1",profile)
with open('qwerty.json') as json_data:
d = json.load(json_data)
for getThisReport in d["Reports"]:
print("Sequence 2",getThisReport["ReportName"])
reportName = getThisReport["ReportName"]
metrics = getThisReport["Metrics"]
dimensions = getThisReport["Dimensions"]
sort = getThisReport["sort"]
filters = getThisReport["filter"]
path = 'C:\\Projects\\DataExport\\test\\' #replace with path to your folder where csv file with data will be written
today = time.strftime('%Y%m%d')
filename = profile+'_'+reportName+'_'+today+'.csv' #replace with your filename. Note %s is a placeholder variable and the profile name you specified on row 162 will be written here
with open(path + filename, 'wt') as f:
writer = csv.writer(f,delimiter = '|', lineterminator='\n', quoting=csv.QUOTE_MINIMAL)
args = [sys.argv,metrics,dimensions,reportName,sort,filters]
if __name__ == '__main__': main(args)
print ( "Profile done. Next profile...")
print ("All profiles done.")
The Core Reporting API supports some interesting things as far as dates goes.
All Analytics data requests must specify a date range. If you do not include start-date and end-date parameters in the request, the server returns an error. Date values can be for a specific date by using the pattern YYYY-MM-DD or relative by using today, yesterday, or the NdaysAgo pattern. Values must match [0-9]{4}-[0-9]{2}-[0-9]{2}|today|yesterday|[0-9]+(daysAgo).
so doing something like
start_date = '7daysAgo'
end_date = 'today'
Just remember that data hasn't completed processing for 24 - 48 hours so your data for today, yesterday and the day before that may not be 100% accurate.

unable to use session variables outside view function

I have login page where the user inputs the username and password. I have made the username as session variable in the /login view function and would like to use this variable outside the view function in the main body of the code in a if-else block.
session['username'] = request.form['username'].lower()
How do I do this?
Here is part of the code for this:
import os
import csv
import pymysql
import pymysql.cursors
from datetime import date
import calendar
import ssl
from ldap3 import Connection, Server, ANONYMOUS, SIMPLE, SYNC, ASYNC,ALL
from flask import Flask, make_response,render_template,url_for,redirect,request,session,escape
from validusers import users
app = Flask(__name__)
IT = pymysql.connect(host='xx.xx.xx.xx', user='xxxxx', password='xxxxx',
db='xxxx')#Connect to the IT database
Others = pymysql.connect(host='xxxxx', user='xxxxxx', password='xxxxxx',
db='xxxxx')#Connect to the non IT database
a=IT.cursor() # Open Cursor for IT database
b=Others.cursor()#Open Cursor for non-IT database
**#app.route('/')
#app.route('/login',methods=['GET', 'POST'])
def login():
error=None
if request.method =='POST':
#if not request.form['username']:
#error='You forgot to enter "Username", please try again'
#return render_template('login.html',error=error)
if request.form['username'].lower() not in users:
error='You are not authorized to view this page !!'
return render_template('login.html',error=error)
#if not request.form['password']:
#error='You forgot to enter "Password", please try again'
#return render_template('login.html',error=error)
#else:
#s = Server('appauth.corp.domain.com:636', use_ssl=True, get_info=ALL)
#c = Connection(s,user=request.form['username'],password=request.form['password'],check_names=True, lazy=False,raise_exceptions=False)
#c.open()
#c.bind()
#if (c.bind() != True) is True:
#error='Invalid credentials. Please try again'
#else:
#session['username'] = request.form['username'].lower()
#return redirect(url_for('index'))
return render_template('login.html',error=error)**
#app.route('/index',methods=['GET','POST'])
def index():
if 'username' in session:
return render_template('index.html')
Filename = os.getenv("HOMEDRIVE") + os.getenv("HOMEPATH") + "\\Desktop\RosterUnified.csv" # Create/write a CSV file in the user's desktop
Filename1=os.getenv("HOMEDRIVE") + os.getenv("HOMEPATH") + "\\Desktop\RosterCurrentMonth.csv"
d=open(Filename, 'w',newline='\n') #Format for CSV input
c = csv.writer(d)
c.writerow(["Manager NT ID"," Vertical Org","Employee ID" ]+ dayssl)# Write the header list of strings in the first row
for row in result_IT:
c.writerow(row)#Write output for IT to csv
d.close()
#result_IT and result_Oters part of code is ommitted
e=open(Filename, 'a',newline='\n')
f= csv.writer(e)
for row in result_Others:
f.writerow(row)# Append to the existing CSV file with non IT data
e.close()
x=session['username']
sql="select verticalorg from tbl_employeedetails where empntid=(%s)"
args=x
a.execute(sql,args)
b.execute(sql,args)
c=a.fetchall()
d1=b.fetchall()
s=c+d1
q=[x[0] for x in s]
sql1="select role from tbl_employeedetails where empntid=(%s)"
a.execute(sql1,args)
b.execute(sql1,args)
c1=a.fetchall()
d2=b.fetchall()
Role=c1+d2
r=[x[0] for x in Role]
if r=='O':
if q==27:
f1=open(Filename,'r',newline='\n')
f2=open(Filename1,'w',newline='\n')
reader = csv.DictReader(f1)
writer = csv.writer(f2)
writer.writerow(["Manager NT ID"," Vertical Org","Employee ID" ]+ dayssl)
rows = [row for row in reader if row['Vertical Org'] == 'HR']
writer.writerow[row in rows]
elif q==2:
f1=open(Filename,'r',newline='\n')
f2=open(Filename1,'w',newline='\n')
reader = csv.DictReader(f1)
writer = csv.writer(f2)
writer.writerow(["Manager NT ID"," Vertical Org","Employee ID" ]+ dayssl)
f2.close()
z=open(Filename1)
with z as f:
p = f.read()
else:
z=open(Filename)
with z as f:
p = f.read()
#app.route('/csv/')
def download_csv():
csv = p
response = make_response(csv)
cd = 'attachment; filename=RosterCurrentMonth.csv'
response.headers['Content-Disposition'] = cd
response.mimetype='text/csv'
return response
z.close()
os.remove(Filename)
#app.route('/logout')
def logout():
# remove the username from the session if it's there
session.pop('username', None)
return redirect(url_for('login'))
app.secret_key ='secret key generated'
if __name__=='__main__':
context=('RosterWeb.crt','RosterWeb.key')
app.run(ssl_context=context, threaded=True, debug=True)
getting the error:
Traceback (most recent call last):
File "roster.py", line 175, in <module>
x=session['username']
File "C:\Users\dasa17\Envs\r_web\lib\site-packages\werkzeug\local.py", line 37
3, in <lambda>
__getitem__ = lambda x, i: x._get_current_object()[i]
File "C:\Users\dasa17\Envs\r_web\lib\site-packages\werkzeug\local.py", line 30
2, in _get_current_object
return self.__local()
File "C:\Users\dasa17\Envs\r_web\lib\site-packages\flask\globals.py", line 37,
in _lookup_req_object
raise RuntimeError(_request_ctx_err_msg)
RuntimeError: Working outside of request context.
This typically means that you attempted to use functionality that needed
an active HTTP request. Consult the documentation on testing for
information about how to avoid this problem.
You need to place the code you want to run before any routing is done in a function and use the before_first_request decorator on it. It will be executed before your first request is done and you can make use of your session variable from there.
#app.route('/')
def index():
# ... index route here ...
pass
#app.before_first_request
def init_app():
# ... do some preparation here ...
session['username'] = 'me'

GeoDJango: retrieve last inserted primary key from LayerMapping

I am building an application with GeoDjango and I have the following problem:
I need to read track data from a GPX file and those data should be stored in a model MultiLineStringField field.
This should happen in the admin interface, where the user uploads a GPX file
I am trying to achieve this, namely that the data grabbed from the file should be assigned to the MultiLineStringField, while the other fields should get values from the form.
My model is:
class GPXTrack(models.Model):
nome = models.CharField("Nome", blank = False, max_length = 255)
slug = models.SlugField("Slug", blank = True)
# sport natura arte/cultura
tipo = models.CharField("Tipologia", blank = False, max_length = 2, choices=TIPOLOGIA_CHOICES)
descrizione = models.TextField("Descrizione", blank = True)
gpx_file = models.FileField(upload_to = 'uploads/gpx/')
track = models.MultiLineStringField(blank = True)
objects = models.GeoManager()
published = models.BooleanField("Pubblicato")
rel_files = generic.GenericRelation(MyFiles)
#publish_on = models.DateTimeField("Pubblicare il", auto_now_add = True)
created = models.DateTimeField("Created", auto_now_add = True)
updated = models.DateTimeField("Updated", auto_now = True)
class Meta:
#verbose_name = "struttura'"
#verbose_name_plural = "strutture"
ordering = ['-created']
def __str__(self):
return str(self.nome)
def __unicode__(self):
return '%s' % (self.nome)
def put(self):
self.slug = sluggy(self.nome)
key = super(Foresta, self).put()
# do something after save
return key
While in the admin.py file I have overwritten the save method as follows:
from django.contrib.gis import admin
from trails.models import GPXPoint, GPXTrack
from django.contrib.contenttypes import generic
from django.contrib.gis.gdal import DataSource
#from gpx_mapping import GPXMapping
from django.contrib.gis.utils import LayerMapping
from django.template import RequestContext
import tempfile
import os
import pprint
class GPXTrackAdmin(admin.OSMGeoAdmin):
list_filter = ( 'tipo', 'published')
search_fields = ['nome']
list_display = ('nome', 'tipo', 'published', 'gpx_file')
inlines = [TrackImagesInline, TrackFilesInline]
prepopulated_fields = {"slug": ("nome",)}
def save_model(self, request, obj, form, change):
"""When creating a new object, set the creator field.
"""
if 'gpx_file' in request.FILES:
# Get
gpxFile = request.FILES['gpx_file']
# Save
targetPath = tempfile.mkstemp()[1]
destination = open(targetPath, 'wt')
for chunk in gpxFile.chunks():
destination.write(chunk)
destination.close()
#define fields of interest for LayerMapping
track_point_mapping = {'timestamp' : 'time',
'point' : 'POINT',
}
track_mapping = {'track' : 'MULTILINESTRING'}
gpx_file = DataSource(targetPath)
mytrack = LayerMapping(GPXTrack, gpx_file, track_mapping, layer='tracks')
mytrack.save()
#remove the temp file saved
os.remove(targetPath)
orig = GPXTrack.objects.get(pk=mytrack.pk)
#assign the parsed values from LayerMapping to the appropriate Field
obj.track = orig.track
obj.save()
As far as I know:
LayerMapping cannot be used to update a field but only to save a new one
I cannot access a specific field of the LayerMapping object (ie in the code above: mytrack.track) and assign its value to a model field (ie obj.track) in the model_save method
I cannot retrieve the primary key of the last saved LayerMapping object (ie in the code above: mytrack.pk) in order to update it with the values passed in the form for the field not mapped in LayerMapping.mapping
What can I do then?!?!
I sorted it out subclassing LayerMapping and adding a method get_values() that instead of saving the retrieved data, returns them for any use or manipulation.The get_values method is a copy of the LayerMapping::save() method that returns the values instead of saving them.
I am using django 1.5
import os
from django.contrib.gis.utils import LayerMapping
import sys
class MyMapping(LayerMapping):
def get_values(self, verbose=False, fid_range=False, step=False,
progress=False, silent=False, stream=sys.stdout, strict=False):
"""
Returns the contents from the OGR DataSource Layer
according to the mapping dictionary given at initialization.
Keyword Parameters:
verbose:
If set, information will be printed subsequent to each model save
executed on the database.
fid_range:
May be set with a slice or tuple of (begin, end) feature ID's to map
from the data source. In other words, this keyword enables the user
to selectively import a subset range of features in the geographic
data source.
step:
If set with an integer, transactions will occur at every step
interval. For example, if step=1000, a commit would occur after
the 1,000th feature, the 2,000th feature etc.
progress:
When this keyword is set, status information will be printed giving
the number of features processed and sucessfully saved. By default,
progress information will pe printed every 1000 features processed,
however, this default may be overridden by setting this keyword with an
integer for the desired interval.
stream:
Status information will be written to this file handle. Defaults to
using `sys.stdout`, but any object with a `write` method is supported.
silent:
By default, non-fatal error notifications are printed to stdout, but
this keyword may be set to disable these notifications.
strict:
Execution of the model mapping will cease upon the first error
encountered. The default behavior is to attempt to continue.
"""
# Getting the default Feature ID range.
default_range = self.check_fid_range(fid_range)
# Setting the progress interval, if requested.
if progress:
if progress is True or not isinstance(progress, int):
progress_interval = 1000
else:
progress_interval = progress
# Defining the 'real' save method, utilizing the transaction
# decorator created during initialization.
#self.transaction_decorator
def _get_values(feat_range=default_range, num_feat=0, num_saved=0):
if feat_range:
layer_iter = self.layer[feat_range]
else:
layer_iter = self.layer
for feat in layer_iter:
num_feat += 1
# Getting the keyword arguments
try:
kwargs = self.feature_kwargs(feat)
except LayerMapError, msg:
# Something borked the validation
if strict: raise
elif not silent:
stream.write('Ignoring Feature ID %s because: %s\n' % (feat.fid, msg))
else:
# Constructing the model using the keyword args
is_update = False
if self.unique:
# If we want unique models on a particular field, handle the
# geometry appropriately.
try:
# Getting the keyword arguments and retrieving
# the unique model.
u_kwargs = self.unique_kwargs(kwargs)
m = self.model.objects.using(self.using).get(**u_kwargs)
is_update = True
# Getting the geometry (in OGR form), creating
# one from the kwargs WKT, adding in additional
# geometries, and update the attribute with the
# just-updated geometry WKT.
geom = getattr(m, self.geom_field).ogr
new = OGRGeometry(kwargs[self.geom_field])
for g in new: geom.add(g)
setattr(m, self.geom_field, geom.wkt)
except ObjectDoesNotExist:
# No unique model exists yet, create.
m = self.model(**kwargs)
else:
m = self.model(**kwargs)
try:
# Attempting to save.
pippo = kwargs
num_saved += 1
if verbose: stream.write('%s: %s\n' % (is_update and 'Updated' or 'Saved', m))
except SystemExit:
raise
except Exception, msg:
if self.transaction_mode == 'autocommit':
# Rolling back the transaction so that other model saves
# will work.
transaction.rollback_unless_managed()
if strict:
# Bailing out if the `strict` keyword is set.
if not silent:
stream.write('Failed to save the feature (id: %s) into the model with the keyword arguments:\n' % feat.fid)
stream.write('%s\n' % kwargs)
raise
elif not silent:
stream.write('Failed to save %s:\n %s\nContinuing\n' % (kwargs, msg))
# Printing progress information, if requested.
if progress and num_feat % progress_interval == 0:
stream.write('Processed %d features, saved %d ...\n' % (num_feat, num_saved))
# Only used for status output purposes -- incremental saving uses the
# values returned here.
return pippo
nfeat = self.layer.num_feat
if step and isinstance(step, int) and step < nfeat:
# Incremental saving is requested at the given interval (step)
if default_range:
raise LayerMapError('The `step` keyword may not be used in conjunction with the `fid_range` keyword.')
beg, num_feat, num_saved = (0, 0, 0)
indices = range(step, nfeat, step)
n_i = len(indices)
for i, end in enumerate(indices):
# Constructing the slice to use for this step; the last slice is
# special (e.g, [100:] instead of [90:100]).
if i + 1 == n_i: step_slice = slice(beg, None)
else: step_slice = slice(beg, end)
try:
pippo = _get_values(step_slice, num_feat, num_saved)
beg = end
except:
stream.write('%s\nFailed to save slice: %s\n' % ('=-' * 20, step_slice))
raise
else:
# Otherwise, just calling the previously defined _save() function.
return _get_values()
In a custom save or save_model method you can then use:
track_mapping = {'nome': 'name',
'track' : 'MULTILINESTRING'}
targetPath = "/my/gpx/file/path.gpx"
gpx_file = DataSource(targetPath)
mytrack = MyMapping(GPXTrack, gpx_file, track_mapping, layer='tracks')
pippo = mytrack.get_values()
obj.track = pippo['track']