What is wrong with Django csv upload code? - django

Here is my code. I would like to import csv and save it to database via model.
class DataInput(forms.Form):
file = forms.FileField(label="Select CSV file")
def save(self, mdl):
records = csv.reader(self.cleaned_data["file"].read().decode('utf-8'), delimiter=',')
if mdl=='auction':
auction = Auction()
for line in records:
auction.auction_name = line[0]
auction.auction_full_name = line[1]
auction.auction_url = line[2]
auction.is_group = line[3]
auction.save()
Now, it throws the following error.
Exception Type: IndexError
Exception Value: list index out of range
csv file
RTS,Rapid Trans System,www.rts.com,TRUE
ZAA,Zelon Advanced Auton,www.zaa.info,FALSE
Really stuck. Please, help.

First of all, the full stacktrace should reveal exactly where the error is. Give Django the --traceback argument, e.g. ./manage.py --traceback runserver.
As Burhan Khalid mentioned 10 minutes ago you miss the 5th column in your csv file (index 4), so that is the root of the error.

Once you read the file with .read(), you are passing in the complete string - which is why each row is an individual character.
You need to pass the entire file object, without reading it first:
records = csv.reader(self.cleaned_data["file"], delimiter=',')
If you need to decode it first, then you had better run through the file yourself:
for line in self.cleaned_data['file'].read().decode('utf-8').split('\n'):
if line.strip():
try:
name, full_name, url, group = line.split(',')
except ValueError:
print('Invalid line: {}'.format(line))
continue
i = Auction()
i.auction_name = name
i.action_full_name = full_name
i.auction_url = url
i.is_group = group
i.save()

Related

Read custom input file(ldif) type/format in GCP DatafLow

I have ldif extension file from LDAP system. I am able to easily parse this in python and extract required data from file and insert into SQL server. My sample python looks like below.
import os
from ldif3 import LDIFParser
import pymssql
parser = LDIFParser(open('temp.ldiff', 'rb'))
def return_dictionary_element_if_present(dict_entry, element):
if dict_entry.get(element):
return dict_entry.get(element)[0]
return ''
def add_new_user():
for dn, entry in parser.parse():
dict_entry = dict(entry)
email = return_dictionary_element_if_present(dict_entry,'email')
password = return_dictionary_element_if_present(dict_entry,'password')
#some code to insert into SQL server
add_new_user()
But when i am looking to convert this to dataflow, iam unable to understand what and where to modify. My Data flow code looks something like below
class sqlserverwriteDoFn(beam.DoFn):
#insert statement
class CreateEntities(beam.DoFn):
def process(self, element):
#figure out how to return dictionary if parsed correctly
return [{"email": email, "password": password}]
def dataflow(input_file, pipeline_options):
print("starting")
options = GoogleCloudOptions.from_dictionary(pipeline_options)
with beam.Pipeline(options=options) as p:
(p | 'Reading Ldif data from GCS' >> beam.io.ReadFromText(input_file)
| 'Create entities' >> beam.ParDo(CreateEntities())
| 'Insert data to SQLSERVER' >> beam.ParDo(sqlserverwriteDoFn(pipeline_options['project']))
)
I think ReadFromText converts each line into pcollection which in my case does not work. Sample ldif file looks like this
dn: uid=12345,ab=users,xy=random
phone: 111
address: someaddress
email: true
username:abc
password:abc
dn: uid=12345,ab=users,xy=random
objectClass: inetOrgPerson
objectClass: top
phone: 111
address: someaddress
email: true
username:abcd
password:abcd
Any ideas is really appreciated as I am looking to import 50 million user names and passwords from LDIF file and definitely simple python for loop can not be scaled.
[Edit1] As per comments, modified code and getting some other error
def return_dictionary_element_if_present(dict_entry, element):
if dict_entry.get(element):
return dict_entry.get(element)[0]
return ''
class CreateEntities(beam.DoFn):
def process(self, file):
parser = LDIFParser(open(file, 'rb'))
arr=[]
for dn, entry in parser.parse():
dict1 ={}
dict_entry = dict(entry)
email = return_dictionary_element_if_present(dict_entry,'email')
password = return_dictionary_element_if_present(dict_entry,'password')
dict1['email'] = email
dict1['password'] = password
arr.append(dict1)
return arr
def dataflow(pipeline_options):
print("starting")
options = GoogleCloudOptions.from_dictionary(pipeline_options)
with beam.Pipeline(options=options) as p:
(p | 'Reading data from GCS' >> MatchFiles(file_pattern="temp.ldiff")
| 'file match' >> ReadMatches()
| 'Create entities' >> beam.ParDo(CreateEntities())
| 'print to screen' >> beam.Map(print)
)
Getting follwoing error
File "dataflow.py", line 26, in process
parser = LDIFParser(open(file, 'rb'))
TypeError: expected str, bytes or os.PathLike object, not ReadableFile [while running 'Create entities']
Edit2
changed one line of python code as below
parser = LDIFParser(file)
Got this error
File "dataflow.py", line 28, in process
for dn, entry in parser.parse():
File "C:\Users\sande\anaconda3\envs\saopaulo\lib\site-packages\ldif3.py", line 383, in parse
for block in self._iter_blocks():
File "C:\Users\sande\anaconda3\envs\saopaulo\lib\site-packages\ldif3.py", line 282, in _iter_blocks
for line in self._iter_unfolded_lines():
File "C:\Users\sande\anaconda3\envs\saopaulo\lib\site-packages\ldif3.py", line 263, in _iter_unfolded_lines
line = self._input_file.readline()
AttributeError: 'ReadableFile' object has no attribute 'readline' [while running 'Create entities']
How should i change my code so that error is resolved?
You are correct, TextIO in the Python SDK uses newline as the delimiter to separate elements. So each element produced is a single line of the input file.
In your original code you already have a parser that can read LDIF files. You can use that in your pipeline via a ParDo transform. I would recommend beginning with FileIO to create a PCollection of LDIF files, and then use those as input to your own ParDo which parses those files and outputs your records. Note that you will likely want to read on managing Beam Python dependencies if you want to use the existing parser on Dataflow, as your Dataflow worker will need access to that dependency.

file.write return invalid syntax only for one element

here is something I can't figure out.
I'm trying to get different things from a file (games.log) and store them into another.
But i'm getting "SyntaxError: invalid syntax" here : file2.write(f"USER_{index}\n")
def get_users():
# Open game log file
file = open('games.log', 'r')
lines = file.readlines()
# open/create user file to store data
file2 = open('users.log', 'w')
index = 0
for line in lines:
name = ''
guid = ''
ip = ''
player = ''
if 'spawned' in line:
line = remove_colorcode(line)
# Get IP
ip = get_ip(line)
# Get player number
player = line[line.find('Player') + 7: line.find('spawned')]
player.strip(' ')
# Get name
name = get_name(line)
# Get guid
guid = get_guid(line)
# Write to file
file2.write(f"USER_{index}\n")
file2.write(f"IP:{ip}\n")
file2.write(f"Name: {name}\n")
file2.write(f"ja_guid:{guid}\n\n")
index += 1
file.close()
file2.close()
file2.write(f"USER_{index}\n") is invalid syntax. But I can't figure why ?
SyntaxError: invalid syntax
Formatted string literals (f"...") are a feature of Python 3. (And only since Python 3.6.) Python 2 doesn't have them.
If you want to use them, use Python 3. That's a good plan in any case.

download log - modify and use last line

I'm trying to shorten or simplify my code.
I want to download a log file from an internal server which is updated every 10 seconds, but I'm only running my script every 10 or 15 minutes.
The log file is semicolon seperated and has many rows in it I don't use. So my workflow is as following.
get current date in YYYYMMDD format
download the file
delay for waiting that the file is finished downloading
trim the file to the rows I need
only process last line of the file
delete the files
I'm new to python and if you could help me to shorten/simplify my code in less steps I would be thankful.
import urllib
import time
from datetime import date
today = str(date.today())
import csv
url = "http://localserver" + today + ".log"
urllib.urlretrieve (url, "output.log")
time.sleep(15)
with open("output.log","rb") as source:
rdr= csv.reader(source, delimiter=';')
with open("result.log","wb") as result:
wtr= csv.writer( result )
for r in rdr:
wtr.writerow( (r[0], r[1], r[2], r[3], r[4], r[5], r[15], r[38], r[39], r[42], r[54], r[90], r[91], r[92], r[111], r[116], r[121], r[122], r[123], r[124]) )
with open('result.log') as myfile:
print (list(myfile)[-1]) #how do I access certain rows here?
You could probably make use of the advanced module, requests as below. The timeout can be increased depending on the time it takes for the download to complete successfully. Furthermore, the two with open statements can be consolidated in a single line. What is more, in order to load the line one by one in to the memory, we can make use of iter_lines generator. Note that stream=True should be set in order to load line one at a time.
from datetime import date
import csv
import requests
# Declare variables
today = str(date.today())
url = "http://localserver" + today + ".log"
outfile = 'output.log'
# Instead of waiting for 15 seconds explicitly consider using requests module
# with timeout parameter
response = requests.get(url, timeout=15, stream=True)
if response.status_code != 200:
print('Failed to get data:', response.status_code)
with open(outfile, 'w') as dest:
writer = csv.writer(dest)
# Walk through the request response line by line w/o loadin gto memory
line = list(response.iter_lines())[-1]
# Decode the response to string and split line by line
reader = csv.reader(line.decode('utf-8').splitlines(), delimiter=';')
# Read line by line for the splitted content and write to file
for r in reader:
writer.writerow((r[0], r[1], r[2], r[3], r[4], r[5], r[15], r[38], r[39], r[42], r[54], r[90], r[91], r[92],
r[111], r[116], r[121], r[122], r[123], r[124]))
print('File written successfully: ' + outfile)

3D Drawing from a file in an extra directory [duplicate]

I'm trying to get a data parsing script up and running. It works as far as the data manipulation is concerned. What I'm trying to do is set this up so I can enter multiple user defined CSV's with a single command.
e.g.
> python script.py One.csv Two.csv Three.csv
If you have any advice on how to automate the naming of the output CSV so that if input = test.csv, output = test1.csv, I'd appreciate that as well.
Getting
TypeError: coercing to Unicode: need string or buffer, list found
for the line
for line in csv.reader(open(args.infile)):
My code:
import csv
import pprint
pp = pprint.PrettyPrinter(indent=4)
res = []
import argparse
parser = argparse.ArgumentParser()
#parser.add_argument("infile", nargs="*", type=str)
#args = parser.parse_args()
parser.add_argument ("infile", metavar="CSV", nargs="+", type=str, help="data file")
args = parser.parse_args()
with open("out.csv","wb") as f:
output = csv.writer(f)
for line in csv.reader(open(args.infile)):
for item in line[2:]:
#to skip empty cells
if not item.strip():
continue
item = item.split(":")
item[1] = item[1].rstrip("%")
print([line[1]+item[0],item[1]])
res.append([line[1]+item[0],item[1]])
output.writerow([line[1]+item[0],item[1].rstrip("%")])
I don't really understand what is going on with the error. Can someone explain this in layman's terms?
Bear in mind I am new to programming/python as a whole and am basically learning alone, so if possible could you explain what is going wrong/how to fix it so I can note it for future reference.
args.infile is a list of filenames, not one filename. Loop over it:
for filename in args.infile:
base, ext = os.path.splitext(filename)
with open("{}1{}".format(base, ext), "wb") as outf, open(filename, 'rb') as inf:
output = csv.writer(outf)
for line in csv.reader(inf):
Here I used os.path.splitext() to split extension and base filename so you can generate a new output filename adding 1 to the base.
If you specify an nargs argument to .add_argument, the argument will always be returned as a list.
Assuming you want to deal with all of the files specified, loop through that list:
for filename in args.infile:
for line in csv.reader(open(filename)):
for item in line[2:]:
#to skip empty cells
[...]
Or if you really just want to be able to specify a single file; just get rid of nargs="+".

How to solve AttributeError in python active_directory?

Running the below script works for 60% of the entries from the MasterGroupList however suddenly fails with the below error. although my questions seem to be poor ou guys have been able to help me before. Any idea how I can avoid getting this error? or what is trhoughing off the script? The masterGroupList looks like:
Groups Pulled from AD
SET00 POWERUSER
SET00 USERS
SEF00 CREATORS
SEF00 USERS
...another 300 entries...
Error:
Traceback (most recent call last):
File "C:\Users\ks185278\OneDrive - NCR Corporation\Active Directory Access Scr
ipt\test.py", line 44, in <module>
print group.member
File "C:\Python27\lib\site-packages\active_directory.py", line 805, in __getat
tr__
raise AttributeError
AttributeError
Code:
from active_directory import *
import os
file = open("C:\Users\NAME\Active Directory Access Script\MasterGroupList.txt", "r")
fileAsList = file.readlines()
indexOfTitle = fileAsList.index("Groups Pulled from AD\n")
i = indexOfTitle + 1
while i <= len(fileAsList):
fileLocation = 'C:\\AD Access\\%s\\%s.txt' % (fileAsList[i][:5], fileAsList[i][:fileAsList[i].find("\n")])
#Creates the dir if it does not exist already
if not os.path.isdir(os.path.dirname(fileLocation)):
os.makedirs(os.path.dirname(fileLocation))
fileGroup = open(fileLocation, "w+")
#writes group members to the open file
group = find_group(fileAsList[i][:fileAsList[i].find("\n")])
print group.member
for group_member in group.member: #this is line 44
fileGroup.write(group_member.cn + "\n")
fileGroup.close()
i+=1
Disclaimer: I don't know python, but I know Active Directory fairly well.
If it's failing on this:
for group_member in group.member:
It could possibly mean that the group has no members.
Depending on how phython handles this, it could also mean that the group has only one member and group.member is a plain string rather than an array.
What does print group.member show?
The source code of active_directory.py is here: https://github.com/tjguk/active_directory/blob/master/active_directory.py
These are the relevant lines:
if name not in self._delegate_map:
try:
attr = getattr(self.com_object, name)
except AttributeError:
try:
attr = self.com_object.Get(name)
except:
raise AttributeError
So it looks like it just can't find the attribute you're looking up, which in this case looks like the 'member' attribute.