I have been working in a python code to search and download SMAP satellite data from NSIDC https website. My code was working until last week when start a bug:
urllib2.HTTPError: HTTP Error 404: Not Found
Any help?
The code Is a adaptation from a NSIDC website proposed to do exactly what I need. The example below:
"""This script,, defines an HTML parser to scrape data files from an earthdata HTTPS URL and bulk downloads all files to your working directory.
This code was adapted from
Last edited Jan 26, 2017 G. Deemer"""
import urllib2
import os
from cookielib import CookieJar
from HTMLParser import HTMLParser
# Define a custom HTML parser to scrape the contents of the HTML data table
class MyHTMLParser(HTMLParser):
def __init__(self):
self.inLink = False
self.dataList = [] = '/'
self.indexcol = ';'
self.Counter = 0
def handle_starttag(self, tag, attrs):
self.inLink = False
if tag == 'table':
self.Counter += 1
if tag == 'a':
for name, value in attrs:
if name == 'href':
if in value or self.indexcol in value:
self.inLink = True
self.lasttag = tag
def handle_endtag(self, tag):
if tag == 'table':
self.Counter +=1
def handle_data(self, data):
if self.Counter == 1:
if self.lasttag == 'a' and self.inLink and data.strip():
parser = MyHTMLParser()
# Define function for batch downloading
def BatchJob(Files, cookie_jar):
for dat in Files:
print "downloading: ", dat
JobRequest = urllib2.Request(url+dat)
JobRequest.add_header('cookie', cookie_jar) # Pass the saved cookie into additional HTTP request
JobRedirect_url = urllib2.urlopen(JobRequest).geturl() + '&app_type=401'
# Request the resource at the modified redirect url
Request = urllib2.Request(JobRedirect_url)
Response = urllib2.urlopen(Request)
f = open( dat, 'wb')
print "Files downloaded to: ", os.path.dirname(os.path.realpath(__file__))
# The following code block is used for HTTPS authentication
# The user credentials that will be used to authenticate access to the data
username = "user"
password = "password"
# The FULL url of the directory which contains the files you would like to bulk download
url = "" # Example URL
# Create a password manager to deal with the 401 reponse that is returned from
# Earthdata Login
password_manager = urllib2.HTTPPasswordMgrWithDefaultRealm()
password_manager.add_password(None, "", username, password)
# Create a cookie jar for storing cookies. This is used to store and return
# the session cookie given to use by the data server (otherwise it will just
# keep sending us back to Earthdata Login to authenticate). Ideally, we
# should use a file based cookie jar to preserve cookies between runs. This
# will make it much more efficient.
cookie_jar = CookieJar()
# Install all the handlers.
opener = urllib2.build_opener(
#urllib2.HTTPHandler(debuglevel=1), # Uncomment these two lines to see
#urllib2.HTTPSHandler(debuglevel=1), # details of the requests/responses
# Create and submit the requests. There are a wide range of exceptions that
# can be thrown here, including HTTPError and URLError. These should be
# caught and handled.
# Open a requeset to grab filenames within a directory. Print optional
DirRequest = urllib2.Request(url)
DirResponse = urllib2.urlopen(DirRequest)
# Get the redirect url and append 'app_type=401'
# to do basic http auth
DirRedirect_url = DirResponse.geturl()
DirRedirect_url += '&app_type=401'
# Request the resource at the modified redirect url
DirRequest = urllib2.Request(DirRedirect_url)
DirResponse = urllib2.urlopen(DirRequest)
DirBody =
# Uses the HTML parser defined above to pring the content of the directory containing data
Files = parser.dataList
# Display the contents of the python list declared in the HTMLParser class
# print Files #Uncomment to print a list of the files
# Call the function to download all files in url
BatchJob(Files, cookie_jar) # Comment out to prevent downloading to your working directory

I could fix the bug using a directly load of the website and selecting the images to download. As the code above.
"""This script,, defines an HTML parser to scrape data files from an earthdata HTTPS URL and bulk downloads all files to your working directory.
This code was adapted from Last edited Jan 26, 2017 G. Deemer"""
import urllib2
import os
from cookielib import CookieJar
# Define function for batch downloading
def BatchJob(Files, cookie_jar):
for dat in Files:
print "downloading: ", dat
JobRequest = urllib2.Request(url+dat)
JobRequest.add_header('cookie', cookie_jar) # Pass the saved cookie into additional HTTP request
JobRedirect_url = urllib2.urlopen(JobRequest).geturl() + '&app_type=401'
# Request the resource at the modified redirect url
Request = urllib2.Request(JobRedirect_url)
Response = urllib2.urlopen(Request)
f = open( dat, 'wb')
print "Files downloaded to: ", os.path.dirname(os.path.realpath(__file__))
# The following code block is used for HTTPS authentication
# The user credentials that will be used to authenticate access to the data
username = "user"
password = "password"
# The FULL url of the directory which contains the files you would like to bulk download
url = "" # Example URL
# Create a password manager to deal with the 401 reponse that is returned from # Earthdata Login
password_manager = urllib2.HTTPPasswordMgrWithDefaultRealm()
username, password)
# Create a cookie jar for storing cookies. This is used to store and return
# the session cookie given to use by the data server (otherwise it will just
# keep sending us back to Earthdata Login to authenticate). Ideally, we
# should use a file based cookie jar to preserve cookies between runs. This
# will make it much more efficient.
cookie_jar = CookieJar()
# Install all the handlers.
opener = urllib2.build_opener(
#urllib2.HTTPHandler(debuglevel=1), # Uncomment these two lines to see
#urllib2.HTTPSHandler(debuglevel=1), # details of the requests/responses
# Create and submit the requests. There are a wide range of exceptions that
# can be thrown here, including HTTPError and URLError. These should be
# caught and handled.
# Open a requeset to grab filenames within a directory. Print optional
DirResponse = urllib2.urlopen(url)
htmlPage =
listFiles = [x.split(">")[0].replace('"', "")
for x in htmlPage.split("><a href=") if x.split(">")[0].endswith('.h5"') == True]
# Display the contents of the python list declared in the HTMLParser class
# print Files #Uncomment to print a list of the files
# Call the function to download all files in url
BatchJob(Files, cookie_jar) # Comment out to prevent downloading to your working directory


Authentication with GitLab to a terminal

I have a terminal that served in webbrowser with wetty. I want to authenticate the user from gitlab to let user with interaction with the terminal(It is inside docker container. When user authenticated i ll allow him to see the containers terminal).
I am trying to do OAuth 2.0 but couldn't manage to achieve.
That is what i tried.
I created an application on gitlab.
Get the code and secret and make a http call with python script.
Script directed me to login and authentication page.
I tried to get code but failed(Their is no mistake on code i think)
Now the problem starts in here. I need to get the auth code from redirected url to gain access token but couldn't figure out. I used flask library for get the code.
from flask import Flask, abort, request
from uuid import uuid4
import requests
import requests.auth
import urllib2
import urllib
CLIENT_ID = "clientid"
CLIENT_SECRET = "clientsecret"
def user_agent():
raise NotImplementedError()
def base_headers():
return {"User-Agent": user_agent()}
app = Flask(__name__)
def homepage():
text = 'Authenticate with gitlab'
return text % make_authorization_url()
def make_authorization_url():
# Generate a random string for the state parameter
# Save it for use later to prevent xsrf attacks
state = str(uuid4())
params = {"client_id": CLIENT_ID,
"response_type": "code",
"state": state,
"redirect_uri": REDIRECT_URI,
"scope": "api"}
url = "https://GitlapDomain/oauth/authorize?" + urllib.urlencode(params)
print get_redirected_url(url)
return url
# Left as an exercise to the reader.
# You may want to store valid states in a database or memcache.
def save_created_state(state):
def is_valid_state(state):
return True
def reddit_callback():
error = request.args.get('error', '')
if error:
return "Error: " + error
state = request.args.get('state', '')
if not is_valid_state(state):
# Uh-oh, this request wasn't started by us!
code = request.args.get('code')
access_token = get_token(code)
# Note: In most cases, you'll want to store the access token, in, say,
# a session for use in other parts of your web app.
return "Your gitlab username is: %s" % get_username(access_token)
def get_token(code):
client_auth = requests.auth.HTTPBasicAuth(CLIENT_ID, CLIENT_SECRET)
post_data = {"grant_type": "authorization_code",
"code": code,
"redirect_uri": REDIRECT_URI}
headers = base_headers()
response ="https://MyGitlabDomain/oauth/token",
token_json = response.json()
return token_json["access_token"]
if __name__ == '__main__':"",debug=True, port=65010)
I think my problem is on my redirect url. Because it is just an irrelevant link from GitLab and there is no API the I can make call.
If I can fire
that line on Python my problem will probably will be solved.
I need to make correction on my Python script or different angle to solve my problem. Please help.
I was totally miss understand the concept of auth2. Main aim is to have access_token. When i corrected callback url as localhost it worked like charm.

Python error "Connection reset by peer" in requests module

My goal is to create a persistent cookie on-the-fly by supplying user id & password and use that cookie in POST request using a session object. But below code returns below exception.
('Connection aborted.', error(54, 'Connection reset by peer'))
class CreatePersistentCookie(): """This class is created to generate a persistent cookie that can further be used through out session for all the service requests being executed"""
class CreatePersistentCookie():
"""This class is created to generate a persistent cookie that can further be
used through out session for all the service requests being executed"""
def __init__(self, headers, data, url, params, authserver):
self.headers = headers = data
self.url = url
self.params = params
self.authserver = authserver
def generateCookie(self):
with requests.session() as s:
reqsessionObj =,params = self.params)
reqCookie = reqsessionObj.request.headers['Cookie'] # this returns the Cookie i need
regexObj = re.compile(r'act-uat=\S+') # this is my app specific pattern search that returns the exact cookie text i need.
matchObj =
sessionCookie =
self.headers['Cookie'] = sessionCookie # adding Cookie attribute in headers.
r =, data=json.dumps(, headers=self.headers)
return r.raise_for_status()
except requests.exceptions.RequestException as err:
print err
def main():
# Defining the params variable. This contains authentication details such as user id,password & App id.
params = {"accountId": "John",
"accountPassword": "password",
"appIdKey": "5c9773e36fd6ea7cc2f9f8ffd9da3e3"
# Defining the authserver variable that contains the host details where authentication happens.
authserver = ''
# creating a object cookieObj from class CreatePersistentCookie that returns persistent cookie.
#print cookies
headers = {'Content-Type': 'application/json;charset=UTF-8',
data = {"appName":"abc","appKey":"abc","type":"jdbc","queryName":"xyz","version":"v1.2","useCache":"false","bindVars":[{"bindVarName":"In_dt","bindVarVal":"2014-05-13"},{"bindVarName":"In_Location","bindVarVal":"USA"}]}
url = ''
cookieObj = CreatePersistentCookie(headers, data, url, params, authserver)
if __name__ == '__main__':
Connection reset by peer indicates that the server you're trying to connect to is refusing the connection. Normally, there is a handshake between your computer and the website's server, but here for some reason, the server is refusing the connection. I would use the urllib, requests, mechanize, and cookielib modules (some of which only work in Python 2.7). Then, using urllib you can attach a user-client header like Firefox, which will trick the browser into accepting the connection because they will think you are a regular person surfing the web, not a robot.
Try the below command in terminal it worked for me
pip install requests[security]
In my case it worked from Postman but not from python script. Restarting the system fixed it.

Deleting a video from youtube YouTube Data API v3 and python

I'm developing an application using Django and angularJS.
One of the major thing that worker server (coded in python, flask) does is downloading videos from s3 (which are uploaded by users) and uploading the videos to youtube.
Is there way to "delete a youtube video in python"?.
There is no such a code example written in python.
Does anyone know how to do this simply, like the code example below?
This is sample code for uploading video. I referred this code and implemented uploading feature.
def get_authenticated_service(args):
flow = flow_from_clientsecrets(CLIENT_SECRETS_FILE,
storage = Storage("%s-oauth2.json" % sys.argv[0])
credentials = storage.get()
if credentials is None or credentials.invalid:
credentials = run_flow(flow, storage, args)
def initialize_upload(youtube, options):
tags = None
if options.keywords:
tags = options.keywords.split(",")
# Call the API's videos.insert method to create and upload the video.
insert_request = youtube.videos().insert(
media_body=MediaFileUpload(options.file, chunksize=-1, resumable=True)
Make a file called:
Usage: python --id=MY_VID_ID
import httplib
import httplib2
import os
import random
import sys
import time
from apiclient.discovery import build
from apiclient.errors import HttpError
from apiclient.http import MediaFileUpload
from oauth2client.client import flow_from_clientsecrets
from oauth2client.file import Storage
from import argparser, run_flow
# Explicitly tell the underlying HTTP transport library not to retry, since
# we are handling retry logic ourselves.
httplib2.RETRIES = 1
# Maximum number of times to retry before giving up.
# Always retry when these exceptions are raised.
RETRIABLE_EXCEPTIONS = (httplib2.HttpLib2Error, IOError, httplib.NotConnected,
httplib.IncompleteRead, httplib.ImproperConnectionState,
httplib.CannotSendRequest, httplib.CannotSendHeader,
httplib.ResponseNotReady, httplib.BadStatusLine)
# Always retry when an apiclient.errors.HttpError with one of these status
# codes is raised.
RETRIABLE_STATUS_CODES = [500, 502, 503, 504]
# The CLIENT_SECRETS_FILE variable specifies the name of a file that contains
# the OAuth 2.0 information for this application, including its client_id and
# client_secret. You can acquire an OAuth 2.0 client ID and client secret from
# the Google Developers Console at
# Please ensure that you have enabled the YouTube Data API for your project.
# For more information about using OAuth2 to access the YouTube Data API, see:
# For more information about the client_secrets.json file format, see:
CLIENT_SECRETS_FILE = "client_secrets.json"
# This OAuth 2.0 access scope allows an application to upload files to the
# authenticated user's YouTube channel, but doesn't allow other types of access.
# This variable defines a message to display if the CLIENT_SECRETS_FILE is
# missing.
WARNING: Please configure OAuth 2.0
To make this sample run you will need to populate the client_secrets.json file
found at:
with information from the Developers Console
For more information about the client_secrets.json file format, please visit:
""" % os.path.abspath(os.path.join(os.path.dirname(__file__),
VALID_PRIVACY_STATUSES = ("public", "private", "unlisted")
def get_authenticated_service(args):
flow = flow_from_clientsecrets(CLIENT_SECRETS_FILE,
storage = Storage("%s-oauth2.json" % sys.argv[0])
credentials = storage.get()
if credentials is None or credentials.invalid:
credentials = run_flow(flow, storage, args)
if __name__ == '__main__':
argparser.add_argument("--id", required=True, help="Video youtube ID")
args = argparser.parse_args()
if not
exit("Please specify a youtube ID using the --id= parameter.")
youtube = get_authenticated_service(args)
resp = youtube.videos().delete(, onBehalfOfContentOwner=None).execute()
except HttpError, e:
print "An HTTP error %d occurred:\n%s" % (e.resp.status, e.content)
Assuming that you are using the python client library I found this in the documentation.
delete(id=*, onBehalfOfContentOwner=None) Deletes a YouTube video.
Args: id: string, The id parameter specifies the YouTube video ID
for the resource that is being deleted. In a video resource, the id
property specifies the video's ID. (required)
onBehalfOfContentOwner: string, Note: This parameter is intended
exclusively for YouTube content partners.

Serving Zip file Django

I'm following this solution (Serving dynamically generated ZIP archives in Django) to serve some zip files from django.
The idea is to select the files from a database using some check boxes, but I'm trying to make the example work with just 2 images.
import os
import zipfile
import StringIO
from django.http import HttpResponse
def getfiles(request):
# Files (local path) to put in the .zip
# FIXME: Change this (get paths from DB etc)
filenames = ["/home/../image1.png", "/home/../image2.png"]
# Folder name in ZIP archive which contains the above files
# E.g []/somefiles/file2.txt
# FIXME: Set this to something better
zip_subdir = "somefiles"
zip_filename = "" % zip_subdir
# Open StringIO to grab in-memory ZIP contents
s = StringIO.StringIO()
# The zip compressor
zf = zipfile.ZipFile(s, "w")
for fpath in filenames:
# Calculate path for file in zip
fdir, fname = os.path.split(fpath)
zip_path = os.path.join(zip_subdir, fname)
# Add file, at correct path
zf.write(fpath, zip_path)
# Must close zip for all contents to be written
# Grab ZIP file from in-memory, make response with correct MIME-type
resp = HttpResponse(s.getvalue(), mimetype = "application/x-zip-compressed")
# ..and correct content-disposition
resp['Content-Disposition'] = 'attachment; filename=%s' % zip_filename
return resp
I wrote the getfile(request) on my and i make a call from the index view
def index(request):
if request.method == 'POST': # If the form has been submitted...
resp = getfiles(request)
form = FilterForm(request.POST) # A form bound to the POST data
# do some validation and get latest_events from database
context = {'latest_events_list': latest_events_list, 'form': form}
return render(request, 'db_interface/index.html', context)
I know the getfile() method is called, because if I put names of unexistents files I got an error, but I dont get any download neither an error if the filenames are correct (I put the full path /home/myuser/xxx/yyy/Project/app/static/app/image1.png).
I tried with the django server and with the apache2/nginx server I have for production
I also tried using content_type = 'application/force-download'

Automating pulling csv files off google Trends

pyGTrends does not seem to work. Giving errors in Python.
pyGoogleTrendsCsvDownloader seems to work, logs in, but after getting 1-3 requests (per day!) complains about exhausted quota, even though manual download with the same login/IP works flawlessly.
Bottom line: neither work. Searching through stackoverflow: many questions from people trying to pull csv's from Google, but no workable solution I could find...
Thank you in advance: whoever will be able to help. How should the code be changed? Do you know of another solution that works?
Here's the code of
import httplib
import urllib
import urllib2
import re
import csv
import lxml.etree as etree
import lxml.html as html
import traceback
import gzip
import random
import time
import sys
from cookielib import Cookie, CookieJar
from StringIO import StringIO
class pyGoogleTrendsCsvDownloader(object):
Google Trends Downloader
Recommended usage:
from pyGoogleTrendsCsvDownloader import pyGoogleTrendsCsvDownloader
r = pyGoogleTrendsCsvDownloader(username, password)
r.get_csv(cat='0-958', geo='US-ME-500')
def __init__(self, username, password):
Provide login and password to be used to connect to Google Trends
All immutable system variables are also defined here
# The amount of time (in secs) that the script should wait before making a request.
# This can be used to throttle the downloading speed to avoid hitting servers too hard.
# It is further randomized.
self.download_delay = 0.25
self.service = "trendspro"
self.url_service = ""
self.url_download = self.url_service + "trendsReport?"
self.login_params = {}
# These headers are necessary, otherwise Google will flag the request at your account level
self.headers = [('User-Agent', 'Mozilla/5.0 (Windows NT 6.1; WOW64; rv:12.0) Gecko/20100101 Firefox/12.0'),
("Accept", "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8"),
("Accept-Language", "en-gb,en;q=0.5"),
("Accept-Encoding", "gzip, deflate"),
("Connection", "keep-alive")]
self.url_login = ''+self.service+'&passive=1209600&continue='+self.url_service+'&followup='+self.url_service
self.url_authenticate = ''
self.header_dictionary = {}
self._authenticate(username, password)
def _authenticate(self, username, password):
Authenticate to Google:
1 - make a GET request to the Login webpage so we can get the login form
2 - make a POST request with email, password and login form input values
# Make sure we get CSV results in English
ck = Cookie(version=0, name='I4SUserLocale', value='en_US', port=None, port_specified=False, domain='', domain_specified=False,domain_initial_dot=False, path='/trends', path_specified=True, secure=False, expires=None, discard=False, comment=None, comment_url=None, rest=None)
self.cj = CookieJar()
self.opener = urllib2.build_opener(urllib2.HTTPCookieProcessor(self.cj))
self.opener.addheaders = self.headers
# Get all of the login form input values
find_inputs = etree.XPath("//form[#id='gaia_loginform']//input")
resp =
if'Content-Encoding') == 'gzip':
buf = StringIO(
f = gzip.GzipFile(fileobj=buf)
data =
data =
xmlTree = etree.fromstring(data, parser=html.HTMLParser(recover=True, remove_comments=True))
for input in find_inputs(xmlTree):
name = input.get('name')
if name:
name = name.encode('utf8')
value = input.get('value', '').encode('utf8')
self.login_params[name] = value
print("Exception while parsing: %s\n" % traceback.format_exc())
self.login_params["Email"] = username
self.login_params["Passwd"] = password
params = urllib.urlencode(self.login_params), params)
def get_csv(self, throttle=False, **kwargs):
Download CSV reports
# Randomized download delay
if throttle:
r = random.uniform(0.5 * self.download_delay, 1.5 * self.download_delay)
params = {
'export': 1
params = urllib.urlencode(params)
r = + params)
# Make sure everything is working ;)
if not'Content-Disposition'):
print "You've exceeded your quota. Continue tomorrow..."
if'Content-Encoding') == 'gzip':
buf = StringIO(
f = gzip.GzipFile(fileobj=buf)
data =
data =
myFile = open('trends_%s.csv' % '_'.join(['%s-%s' % (key, value) for (key, value) in kwargs.items()]), 'w')
Although I don't know python, I may have a solution. I am currently doing the same thing in C# and though I didn't get the .csv file, I got created a custom URL through code and then downloaded that HTML and saved to a text file (also through code). In this HTML (at line 12) is all the information needed to create the graph that is used on Google Trends. However, this has alot of unnecessary text within it that needs to be cut down. But either way, you end up with the same result. The Google Trends data. I posted a more detailed answer to my question here:
Downloading .csv file from Google Trends
There is an alternative module named pytrends - It is really cool. I would recommend this.
Example usage:
import numpy as np
import pandas as pd
from pytrends.request import TrendReq
pytrend = TrendReq()
#It is the term that you want to search
pytrend.build_payload(kw_list=["Eminem is the Rap God"])
# Find which region has searched the term
df = pytrend.interest_by_region()
Potentially if you have a list of terms to search you could make use of "for loop" to automate the insights as per your wish.