Downloading a CSV file from a URL in using Python 2.7 - python-2.7

I'm trying to download a CSV file from a URL & save it to file on my hard drive. I'm trying to use the following code on Python 2.7 but getting errors. The CSV is located on a SharePoint site.
import urllib
import csv
url = 'https://office.com/sites/20Reporting/Lists/Reports/476%20-%2050%20DaySDShrink%20Report/476-%2030%20DaySDShrink.csv'
csv = urllib.urlopen(url).read() # returns type 'str'
with open('C:\Users\Documents\DPAM.csv', 'wb') as fx:
fx.write(csv)
I'm getting the following error message.
IOError: ('http error', 401, 'Unauthorized', <httplib.HTTPMessage instance at 0x037541E8>)

Try something like this:
import urllib2,base64
import csv
username ="username"
password= "password"
url = 'https://office.com/sites/20Reporting/Lists/Reports/476%20-%2050%20DaySDShrink%20Report/476-%2030%20DaySDShrink.csv'
request = urllib2.Request(url )
base64string = base64.encodestring('%s:%s' % (username, password)).replace('\n', '')
request.add_header("Authorization", "Basic %s" % base64string)
csv = urllib2.urlopen(request).read()
with open('C:\Users\Documents\DPAM.csv', 'wb') as fx:
fx.write(csv)
Also you can try to communicate with sharepoint via SOAP using urllib2

Related

AWS Lambda - Generate CSV In Memory and send it as an attachment to an Email

I'm trying to write an AWS Lambda service using Python 2.7 that will generate an In-Memory CSV file and email it as an attachment. I feel like I'm close with this script based on what I've learned but I'm not quite there.
# Import smtplib for the actual sending function
import smtplib
import sys
import csv
import cStringIO
from os.path import basename
from email.mime.text import MIMEText
from email.mime.multipart import MIMEMultipart
from email.mime.application import MIMEApplication
# Import the email modules we'll need
server = smtplib.SMTP('smtp.postmarkapp.com', 587)
server.starttls()
server.login('.....','.....')
list = []
row1 = ["One","Two","Three"]
list.append(row1)
msg = MIMEMultipart()
msg['To'] = "daniel#mydomain.com"
msg['From'] = "noreply#mydomain.com"
msg['Subject'] = "DG Test subject"
msg.attach(MIMEText("Test Message"))
csv_buffer = cStringIO.StringIO()
writer = csv.writer(csv_buffer, lineterminator='\n')
writer.writerow(["1","2","3"])
for row in list:
writer.writerow(row)
print(csv_buffer.getvalue())
msg.attach(csv_buffer)
try:
response = server.sendmail(msg['From'], ["daniel#mydomain.com"],msg.as_string())
server.quit()
except AttributeError as error:
print(error)
else:
print(response)
This gives me the following error:
1,2,3
One,Two,Three
'cStringIO.StringO' object has no attribute 'get_content_maintype'
Basically it comes down to not being sure how to use the csv_buffer object. Assuming I just need to add that attribute to the object somehow but I'm not quite sure how. If I try to add any additional arguments to the .attach() line, it complains that I have too many arguments.
Thanks!
I figured it out, thanks to stitching together a few SO posts.
import cStringIO
import csv
csv_buffer = cStringIO.StringIO()
writer = csv.writer(csv_buffer, delimiter=',', quoting=csv.QUOTE_ALL)
writer.writerow(["1","2","3"])
for row in list:
writer.writerow(row)
print(csv_buffer.getvalue())
# new lines
csv_file = MIMEText(csv_buffer.getvalue())
attachment = csv_file.add_header('Content-Disposition', 'attachment', filename="csv_file.csv")
msg.attach(csv_file)

Python mechanize implementation of HTTP Basic Auth

I could get HTTP Basic Authentication to work using requests:
import requests
request = requests.post(url, auth=(user, pass), data={'a':'whatever'})
And also using urllib2 and urllib:
import urllib2, urllib
passman = urllib2.HTTPPasswordMgrWithDefaultRealm()
passman.add_password(None, url, user, pass)
auth_handler = urllib2.HTTPBasicAuthHandler(passman)
opener = urllib2.build_opener(auth_handler)
urllib2.install_opener(opener)
content = urllib2.urlopen(url, urllib.urlencode({'a': 'whatever'}))
The problem is I get an unauthorized error when I try the same thing with mechanize:
import mechanize, urllib
from base64 import b64encode
browser = mechanize.Browser()
b64login = b64encode('%s:%s' % (user, pass))
browser.addheaders.append(('Authorization', 'Basic %s' % b64login ))
request = mechanize.Request(url)
response = mechanize.urlopen(request, data=urllib.urlencode({'a':'whatever}))
error:
HTTPError: HTTP Error 401: UNAUTHORIZED
The code I tried with mechanize could be trying to authenticate in a different way than the other two code snippets. So the question is how could the same authentication process be achieved in mechanize.
I am using python 2.7.12
The header should have been added to the request instead of the browser. In fact the browser variable isn't even needed.
import mechanize, urllib
from base64 import b64encode
b64login = b64encode('%s:%s' % (user, pass))
request = mechanize.Request(url)
request.add_header('Authorization', 'Basic %s' % b64login )
response = mechanize.urlopen(request, data=urllib.urlencode({'a':'whatever'}))

Using python to update a file on google drive

I have the following script to upload a file unto google drive, using python27. As it is now it will upload a new copy of the file, but I want the existing file updated/overwritten. I can't find help in the Google Drive API references and guides for python. Any suggestions?
from __future__ import print_function
import os
from apiclient.discovery import build
from httplib2 import Http
from oauth2client import file, client, tools
try:
import argparse
flags = argparse.ArgumentParser(parents=[tools.argparser]).parse_args()
except ImportError:
flags = None
# Gain acces to google drive
SCOPES = 'https://www.googleapis.com/auth/drive.file'
store = file.Storage('storage.json')
creds = store.get()
if not creds or creds.invalid:
flow = client.flow_from_clientsecrets('client_secret.json', SCOPES)
creds = tools.run_flow(flow, store, flags) \
if flags else tools.run(flow, store)
DRIVE = build('drive', 'v3', http=creds.authorize(Http()))
#The file that is being uploaded
FILES = (
('all-gm-keys.txt', 'application/vnd.google-apps.document'), #in google doc format
)
#Where the file ends on google drive
for filename, mimeType in FILES:
folder_id = '0B6V-MONTYPYTHONROCKS-lTcXc' #Not the real folder id
metadata = {'name': filename,'parents': [ folder_id ] }
if mimeType:
metadata['mimeType'] = mimeType
res = DRIVE.files().create(body=metadata, media_body=filename).execute()
if res:
print('Uploaded "%s" (%s)' % (filename, res['mimeType']))
I think that you are looking for the update method. Here is a link to the documentation. There is an example on overwriting the file in python.
I think that using the official google client api instead of pure http requests should make your task easier.
from apiclient import errors
from apiclient.http import MediaFileUpload
# ...
def update_file(service, file_id, new_title, new_description, new_mime_type,
new_filename, new_revision):
"""Update an existing file's metadata and content.
Args:
service: Drive API service instance.
file_id: ID of the file to update.
new_title: New title for the file.
new_description: New description for the file.
new_mime_type: New MIME type for the file.
new_filename: Filename of the new content to upload.
new_revision: Whether or not to create a new revision for this file.
Returns:
Updated file metadata if successful, None otherwise.
"""
try:
# First retrieve the file from the API.
file = service.files().get(fileId=file_id).execute()
# File's new metadata.
file['title'] = new_title
file['description'] = new_description
file['mimeType'] = new_mime_type
# File's new content.
media_body = MediaFileUpload(
new_filename, mimetype=new_mime_type, resumable=True)
# Send the request to the API.
updated_file = service.files().update(
fileId=file_id,
body=file,
newRevision=new_revision,
media_body=media_body).execute()
return updated_file
except errors.HttpError, error:
print 'An error occurred: %s' % error
return None
Link the example: https://developers.google.com/drive/api/v2/reference/files/update#examples

How can i go to specific page of a website and fetch desired data using python and save it into excel sheet.this code need url till desired page

import requests from bs4
import BeautifulSoup
import xlrd file="C:\Users\Ashadeep\PycharmProjects\untitled1\xlwt.ashadee.xls"
workbook=xlrd.open_workbook(file)
sheet=workbook.sheet_by_index(0)
print(sheet.cell_value(0,0))
r = requests.get(sheet.cell_value(0,0))
r.content soup = BeautifulSoup(r.content,"html.parser") g_data=soup.find_all("div",{"class":"admissionhelp-left"})
print(g_data)
text=soup.find_all("Tel") for item in g_data:print(item.text)
Are you trying to download an Excel file from the web and save it to your HDD? I don't see any URL, but you can try one of these 3 ideas.
import urllib
dls = "http://www.muellerindustries.com/uploads/pdf/UW SPD0114.xls"
urllib.urlretrieve(dls, "test.xls")
import requests
dls = "http://www.muellerindustries.com/uploads/pdf/UW SPD0114.xls"
resp = requests.get(dls)
with open('test.xls', 'wb') as output:
output.write(resp.content)
Or, if you don't necessarily need to go through the browser, you can use the urllib module to save a file to a specified location.
import urllib
url = 'http://www.example.com/file/processing/path/excelfile.xls'
local_fname = '/home/John/excelfile.xls'
filename, headers = urllib.retrieveurl(url, local_fname)

Downloading images with Python

I am working on a project where I need to download some images using python. I have tried to fix it by doing different things but it is still not working. Here is some code I found and I tried to use but it does not seem to work. To be honest I am a newbie at programming so I would be grateful to get some help.
Here is the code:
import json
import os
import time
import requests
import Image
from StringIO import StringIO
from requests.exceptions import ConnectionError
def go(query,pathA):
BASE_URL = 'https://ajax.googleapis.com/ajax/services/search/images?'\
'v=1.0&q=' + query + '&start=%d'
BASE_PATH = os.path.join(pathA, query)
if not os.path.exists(BASE_PATH):
os.makedirs(BASE_PATH)
start = 0
while start < 60:
r = requests.get(BASE_URL % start)
for image_info in json.loads(r.text)['responseData']['results']:
url = image_info['unescapedUrl']
try:
image_r = requests.get(url)
except ConnectionError, e:
print 'could not download %s' % urla
continue
# Remove file-system path characters from name.
title = image_info['titleNoFormatting'].replace('/', '').replace('\\', '')
fileII = open(os.path.join(BASE_PATH, '%s.jpg') % title, 'w')
try:
Image.open(StringIO(image_r.content)).save(fileII, 'JPEG')
except IOError, e:
# Throw away some gifs...blegh.
print 'could not save %s' % url
continue
finally:
fileII.close()
print start
start += 4 # 4 images per page.
time.sleep(1.5)
# Example use
go('landscape', 'myDirectory')
The error I get when I run the code above is:
IOError: [Errno 22] invalid mode ('w') or filename: u'myDirectory\landscape\Na
ture - Photo gallery | MIRIADNA.COM.jpg'
Thanks in advance
This bit of code defines where you will save your image
# Remove file-system path characters from name.
title = image_info['titleNoFormatting'].replace('/', '').replace('\\', '')
Reading your error message I see that file does not exist (or a directory) because w is a valid mode for opening a file.
Try hardcoding the title to a simple and local path, such as
title = 'test'