IMPORTING CSV DATAS INTO MODEL - DJANGO - django

I'm implementing a list of clients. I want to give to the user the possibility of importing new clients thought a csv file.
The client model has these fields: Client, name, surname, email, phone
So I created this model:
class CsvClient(models.Model):
file_name = models.FileField(upload_to='csv-cliente')
uploaded = models.DateTimeField(auto_now_add=True)
activated = models.BooleanField(default=False)
def __str__(self):
return f"File id: {self.id}"
and this function in views.py:
import csv
def importa_csv_clienti(request):
form = CVSForm(request.POST or None, request.FILES or None)
if form.is_valid():
form.save()
form = CVSForm()
clients = CsvClient.objects.get(activated=False)
with open(clienti.file_name.path, 'r') as f:
reader = csv.reader(f)
for i, row in enumerate(reader):
if i==0:
pass
else:
row = "".join(row)
row = row.replace(";", " ")
row = row.split(" ")
client = row[0].capitalize()
name = row[1].capitalize()
surname = row[2].capitalize()
value = Cliente.objects.create(
cliente=cliente,
nome=nome,
cognome=cognome,
email=riga[3],
telefono=riga[4],
)
print('oggetto creato:', value.cliente, value.nome, value.cognome, value.email, value.telefono)
clients.activated = True
clients.save()
context = {'form': form}
template = 'importa.html'
return render(request, template, context)
It works, expect for the fact that if in the csv file I have the row:
Nutella Antonio Dello Iudice
where
nutella is client
Antonio is name
Dello Iodice is surname
and email and phone are blank
basically it interprets it as if
Dello is the surname and Iudice is the email.
How do I tell him not to separate the surname if it composed by 2 or more words?
PS: this a curiosity, but how do I Know if the csv file imported by the user is separated by ; and not , ? Because I set as separator ";", but if the user imports a file that is separated by "," the my code won't work, right?

Right now you are replacing ";" by a whitespace. So a row like Nutella;Antonio Dello; Iudice would be converted to Nutella Antonio Dello Iudice.
After this replacement you do a split for whitespace.
Why not removing row = row.replace(";", " ")and just doing row = row.split(";") ?

If I understood your question right, you want to check what separator is used right?
One simple method would be to check the file for ; - and if the count of them is bigger or equals 4 (0;1;2;3;4) the separator is ;.
For the first question about splitting you surname:
row = "".join(row)
row = row.replace(";", " ")
row = row.split(" ")
Why do you replace ; with blank spaces? Just split the row by you separator, since in a Valid CSV it is not allowed to use the separator inside the content.
row = "".join(row)
row = row.split(";")
Now your surname is not split when containing a blankspace.

Related

Getting a variable from a field using arcpy

I am creating a toolbox tool using a python script that creates maps based on user input. I have created a map template that gets saved and altered with python. I am struggling on how to update some text in text boxes in the layout view using Arcpy. I was able to do it with dynamic text with data driven pages, but I couldn't find any python code to get data driven pages to refresh so I decided to try to update the text with python directly. With data driven pages, the dynamic text was pulling the text from an attribute table. I'm fairly new to python so am struggling with how to pull values from a table to use as part of the text. I am able to update text as long as I have the variable defined somewhere else (not from a table), but the only method I found to pull data from a table was with a search cursor but that returns a list rather than a value so I get an error. The feature classes with the text values I want only have one row in them so it is a list of one. How can I convert that list to a value. I am only including the applicable parts of the script. I also removed the actual paths from the code.
import arcpy
import os
ID = arcpy.GetParameterAsText(1)
city = arcpy.GetParameterAsText(3)
WS = os.path.join('path to gdb', "WS")
dfield = 'name'
datefield = 'date'
cfield = "county"
#Use SearchCursor - these features only have one row, but these are my problem because they are lists
wsname = [row[0] for row in arcpy.da.SearchCursor(WS, dfield)]
wsdate = [row[0] for row in arcpy.da.SearchCursor(WS, datefield)]
county = [row[0] for row in arcpy.da.SearchCursor(overview, cfield)]
#update text
for elm in arcpy.mapping.ListLayoutElements(mxd, "TEXT_ELEMENT"):
elm.text = elm.text.replace('WS',wsname) #this doesn't work because wsname is a list
elm.text = elm.text.replace('City',city) #this works
elm.text = elm.text.replace('text2',"words"+ ID +" -more words") #This works
elm.text = elm.text.replace('Name', county) #this doesn't work because county is a list
elm.text = elm.text.replace('Date',wsdate) #this doesn't work because wsdate is a list
arcpy.RefreshActiveView()
mxd.save()
This code will work when run from a arcgis toobox script tool.
# define the aprx file and the layout in the project
import arcpy
aprx = arcpy.mp.ArcGISProject(r'path\to\the\arcgis\aprxfile.aprx')
aprxLayout = aprx.listLayouts()[0] '''adding the zero index will return the first
layout in the layout list, if there is more than one layout'''
# get the attribute value to use for the layout text element
fieldNames = ['FieldName1', 'FieldName2']
with arcpy.da.SearchCursor(r'path\to.gdb\featureclass', fieldNames) as sc:
for row in sc:
if (row[0]) is not None:
field1Value = (row[0])
if (row[0]) is None:
field1Value = 'Null'
if (row[1]) is not None:
field2Value = (row[0])
if (row[1]) is None:
field2Value = 'Null'
# Assign the attribute value to the layout text element
for textElem in aprxLayout.listElements:
if textElem.name == 'name of layout text element in the element properties':
text.Elem.text = field1Value
if textElem.name == 'name of layout text element in the element properties':
text.Elem.text = field2Value
aprx.saveACopy(r'path/to/folder/projectname')
del aprx
I was able to tweak armedwiththeword's code to come up with this.
import arcpy
mxd = arcpy.mapping.MapDocument(path_to_mxd)
fieldNames = ['name', 'date']
with arcpy.da.SearchCursor(WS, fieldNames) as sc:
for row in sc:
if(row[0]) is not None:
field1Value = (row[0])
if(row[0]) is None:
field1Value = 'Null'
if(row[1]) is not None:
field2Value = (row[1])
if(row[1]) is None:
field2Value = 'Null'
fieldName = ['CTY_NAME']
with arcpy.da.SearchCursor(overview, fieldName) as sc:
for row in sc:
if(row[0]) is not None:
field3Value = (row[0])
if(row[0]) is None:
field3Value = 'Null'
# Assign the attribute value to the layout text element
for textElem in arcpy.mapping.ListLayoutElements(mxd,'TEXT_ELEMENT'):
if textElem.name == 'title':
textElem.text = field1Value + " words"
if textElem.name == 'subtitle':
textElem.text = "WS -0"+ ID + " -more words"
if textElem.name == 'city':
textElem.text = city
if textElem.name == 'county':
textElem.text = field3Value
if textElem.name == 'date':
textElem.text = field2Value

Django : can't interrupt update function with redirect. Is it possible?

I use a function for updating a Model.
def update_mapping(request, pk):
flow = Flow.objects.get(pk=pk)
mappings = MappingField.objects.filter(fl_id=pk)
headers_samples = GetCsvHeadersAndSamples(request, pk)
[...]
In this function, I call another one (GetCsvHeadersAndSamples) for getting datas from a CSV. Later, I use those datas with JS in the template.
def GetCsvHeadersAndSamples(request, flow_id):
get_file_and_attribs = get_csv(request, flow_id)
file = get_file_and_attribs[0]
separator = get_file_and_attribs[1]
encoding = get_file_and_attribs[2]
with open(file, newline='') as f:
reader = csv.reader(f, delimiter=separator,
encoding=encoding)
headers = next(reader)
samples = next(itertools.islice(csv.reader(f), 1, None))
headersAndSamples = {'headers': headers, 'samples': samples}
return headersAndSamples
For accessing CSV datas, I use another function for checking if the CSV still exists, in which case, I retrieve datas in it.
def get_csv(request, flow_id):
flow = Flow.objects.get(pk=flow_id)
file = flow.fl_file_name
separator = flow.fl_separator
media_folder = settings.MEDIA_ROOT
file = os.path.join(media_folder, str(file))
if os.path.isfile(file):
file_2_test = urllib.request.urlopen('file://' + file).read()
encoding = (chardet.detect(file_2_test))['encoding']
return (file, separator, encoding)
else:
# print('No file')
messages.error(request, "File not found or corrupted.")
return HttpResponseRedirect(reverse('mappings-list', args=(flow_id,)))
I hoped that the return would "break" my original function and would redirect to the 'mappings-list' page with the message.error. But it continues and returns to GetCsvHeadersAndSamples function that generates an error because CSV datas were not found. Nota: the commented print however shows well that the file is not found.
It seems that the way I'm doing things is not the good one.

Django bulk create for non-repetitive entries

I want to insert data from an Excel file to the database, but I want to insert only non-repetitive ones.
I wrote this code, but the if statement is always False!
def ca_import(request):
uploadform=UploadFileForm(None)
if request.method == 'POST' :
uploadform = UploadFileForm(request.POST, request.FILES)
if uploadform.is_valid():
file = uploadform.cleaned_data['docfile']
workbook = openpyxl.load_workbook(filename=file, read_only=True)
# Get name of the first sheet and then open sheet by name
first_sheet = workbook.get_sheet_names()[0]
worksheet = workbook.get_sheet_by_name(first_sheet)
data = []
try:
for row in worksheet.iter_rows(row_offset=1): # Offset for header
stockname =StocksName()
if (StocksName.objects.filter(name=row[0].value).count()<1): #???
stockname.name=row[0].value
data.append(stockname)
StocksName.objects.bulk_create(data)
messages.success(request,"Successful" ,extra_tags="saveexcel")
except :
messages.error(request,_('Error'),extra_tags="excelerror")
return render(request, 'BallbearingSite/excelfile.html',{'uploadform':uploadform})
Any suggestion to solve it?
If you data has a unique id then you can use get_or_create() or update_or_create() instead of bulk_create()
Otherwise you will have to write the logic to check if each line already exists in your model.

Django : Aggregate Sum works on view only after a refresh

I have an Invoice model in Django that has multiple Line models.(line items that have a title, unit price , and qty)
I have a 'addline' view that allows to add item lines to the invoice.
The view also displays all current invoice lines and a calculated sum of the total price for all line items.
when I submit a new line item, the view refreshes to the same page and the line item appears properly, but the total (totalservices or totalgoods) of line items is not updated .
It becomes updated when i refresh the page manually , or when i add another line -with the previous line total.
here is my relevant view
def addline(request, id):
form = AddLineForm(request.POST or None )
invoice = get_object_or_404(Invoice, id = id)
linelist = Line.objects.filter(invoice = id).order_by('created_at')
servicelines = linelist.filter(line_type = "S")
goodslines = linelist.filter(line_type = "G")
totalservice = servicelines.aggregate(Sum('line_total'))['line_total__sum']
print servicelines.aggregate(Sum('line_total'))
totalgoods = goodslines.aggregate(Sum('line_total'))['line_total__sum']
if form.is_valid():
instance = form.save(commit=False) #do schtuff with data
instance.invoice = invoice
instance.line_total = instance.unit_price *instance.qty
#print instance.line_total
if form.cleaned_data.get('overwrite'):
invoice.invoiced_service = totalservice or 0 #or zero to prevent fuss if list is empty
invoice.invoiced_goods = totalgoods or 0
invoice.save()
form.save()
form = AddLineForm()
context = {'inv': invoice, 'form': form, 'lines':linelist, 'goods': goodslines, 'services': servicelines ,'totalservice' : totalservice, 'totalgoods':totalgoods }
return render(request,'testpaper.html', context)
Thanks in advance, I'm not sure what could be the problem. Maybe that the Sum is lazy and not evaluated ?
*edited to reflect actual view
Found the problem : the 'if form.is_valid 'block that ends in form.save() needed to be before the block that calculates lines , otherwise lines are calculated before current item has been saved .

How to access the values inside the 'files' field in scrapy

I have downloaded some files using the file pipeline and i want to get the values of the files field. I tried to print item['files'] and it gives me a key error. Why is this so and how can i do it?
class testspider2(CrawlSpider):
name = 'genspider'
URL = 'flu-card.com'
URLhttp = 'http://www.flu-card.com'
allowed_domains = [URL]
start_urls = [URLhttp]
rules = (
[Rule(LxmlLinkExtractor(allow = (),restrict_xpaths = ('//a'),unique = True,),callback='parse_page',follow=True),]
)
def parse_page(self, response):
List = response.xpath('//a/#href').extract()
item = GenericspiderItem()
date = strftime("%Y-%m-%d %H:%M:%S")#get date&time dd-mm-yyyy hh:mm:ss
MD5hash = '' #store as part of the item, some links crawled are not file links so they do not have values on these fields
fileSize = ''
newFilePath = ''
File = open('c:/users/kevin123/desktop//ext.txt','a')
for links in List:
if re.search('http://www.flu-card.com', links) is None:
responseurl = re.sub('\/$','',response.url)
url = urljoin(responseurl,links)
else:
url = links
#File.write(url+'\n')
filename = url.split('/')[-1]
fileExt = ''.join(re.findall('.{3}$',filename))
if (fileExt != ''):
blackList = ['tml','pdf','com','php','aspx','xml','doc']
for word in blackList:
if any(x in fileExt for x in blackList):
pass #url is blacklisted
else:
item['filename'] = filename
item['URL'] = url
item['date'] = date
print item['files']
File.write(fileExt+'\n')
yield GenericspiderItem(
file_urls=[url]
)
yield item
It is not possible to access item['files'] in your spider. That is because the files are download by the FilesPipeline, and items just reach pipelines after they get out of your spider.
You first yield the item, then it gets to FilesPipeline, then the files are dowloaded, an just then the field images is populated with the info you want. To access it, you have to write a pipeline and schedule it after the FilesPipeline. Inside your pipeline, you can access the files field.
Also note that, in your spider, you are yielding to different kinds of items!