fetching various data corresponding to a tweet - python-2.7

I am trying to fetch data from twitter for processing. Please see the code I want various data corresponding to a particular tweet corresponding to a given topic. I am able to fetch data (created_at, text, username, user_id). It shows error when i try to fetch(location, followers_count, friends_count, retweet_count).
from tweepy import Stream
from tweepy import OAuthHandler
from tweepy.streaming import StreamListener
import time
import json
ckey = '***********************'
csecret = '************************'
atoken ='*************************'
asecret = '**********************'
class listener(StreamListener):
def on_data(self,data):
try:
all_data = json.loads(data)
tweet = all_data["text"]
username = all_data["user"]["screen_name"]
timestamp = all_data["created_at"]
user_id = all_data["id_str"]
location = all_data["location"]
followers_count = all_data["followers_count"]
friends_count = all_data["friends_count"]
retweet_count = all_data["retweet_count"]
saveThis = str(time.time())+'::'+timestamp+'::'+username+'::'+user_id+'::'+tweet+'::'+followers_count+'::'+friends_count+'::'+retweet_count+'::'+location
saveFile = open('clean2.txt','a')
saveFile.write(saveThis)
saveFile.write('\n')
saveFile.close
return True
except BaseException, e:
print 'failed on data,',str(e)
time.sleep(5)
def on_error(self, status):
print status
auth = OAuthHandler(ckey, csecret)
auth.set_access_token(atoken, asecret)
twitterStream = Stream(auth, listener())
twitterStream.filter(track=["tweepy"])#topic

The reason it fails on all_data["location"] is that tweets don't have such a property: https://dev.twitter.com/overview/api/tweets
same with friends_count, followers_count - they are properties of users, not tweets.
The code should not be failing on all_date["retweet_count"] as tweets have such a property.
P.S. please include the error message (even if you skip the full error trackback) when reporting errors. makes it's easier to help you, otherwise one has to guess what the error might be.

Related

Zoho CRM Python SDK v2 initialization problem for Django

Im trying to integrate the Zoho CRM v2 SDK with my Django app.
On the Django runserver, im able to get access tokens and using the refresh method and store them in the zcrm_oauthtokens.pkl file. The sdk then automatically refreshes the access token using the refresh token, so no problem here. However on my production server (heroku) im getting this error message:
2019-01-16T11:07:22.314759+00:00 app[web.1]: 2019-01-16 11:07:22,314 - Client_Library_OAUTH - ERROR - Exception occured while fetching oauthtoken from db; Exception Message::'NoneType' object has no attribute 'accessToken'
It seems to me that the tokens are being saved to file, but when the sdk try to access them it is looking for them in a DB and not the file specified in the token_persistence_path.
In my settings.py I have this:
ZOHO_CLIENT_ID = config('ZOHO_CLIENT_ID')
ZOHO_CLIENT_SECRET = config('ZOHO_CLIENT_SECRET')
ZOHO_REDIRECT_URI = config('ZOHO_REDIRECT_URI')
ZOHO_CURRENT_USER_EMAIL = 'jamesalexander#mylastwill.co.uk'
ZOHO_PATH = os.path.join(BASE_DIR, 'wills_online', 'zoho')
zoho_config = {'apiBaseUrl': "https://www.zohoapis.com",
'currentUserEmail': ZOHO_CURRENT_USER_EMAIL,
'client_id': ZOHO_CLIENT_ID,
'client_secret': ZOHO_CLIENT_SECRET,
'redirect_uri': ZOHO_REDIRECT_URI,
'token_persistence_path': ZOHO_PATH}
and in a views file I have this:
from zcrmsdk import *
import logging
from django.shortcuts import HttpResponse
from wills.models import PersonalDetails, ZoHoRecord, WillDocument
from wills_online.decorators import start_new_thread
from wills_online.settings import zoho_config
logger = logging.getLogger(__name__)
class ZohoRunOnce:
def __init__(self):
self.already_run = False
def run_once(self):
if not self.already_run:
print('zoho init run once')
ZCRMRestClient.initialize(zoho_config)
self.already_run = True
zoho_init = ZohoRunOnce()
zoho_init.run_once()
print(zoho_config['token_persistence_path'])
def zoho_callback():
return HttpResponse(200)
#start_new_thread
def zoho_personal_details(request):
""" updates or create a user account on zoho on profile completion """
personal_details_ob = PersonalDetails.objects.get(user=request.user)
zoho_ob = ZoHoRecord.objects.get(user=request.user)
try:
if zoho_ob.account:
record = ZCRMRecord.get_instance('Accounts', zoho_ob.account)
record.set_field_value('Account_Name', request.user.email)
record.set_field_value('Name', personal_details_ob.full_name)
record.set_field_value('Email', request.user.email)
record.set_field_value('Address_Line_1', personal_details_ob.address_line_1)
record.set_field_value('Address_Line_2', personal_details_ob.address_line_2)
record.set_field_value('Post_Town', personal_details_ob.post_town)
record.set_field_value('Post_Code', personal_details_ob.post_code)
record.set_field_value('Dob_Day', personal_details_ob.dob_day)
record.set_field_value('Dob_Month', personal_details_ob.dob_month)
record.set_field_value('Dob_Year', personal_details_ob.dob_year)
record.set_field_value('Gender', personal_details_ob.sex)
record.set_field_value('Marital_Status', personal_details_ob.marital_status)
record.set_field_value('Partner_Name', personal_details_ob.partner_full_name)
record.set_field_value('Partner_Gender', personal_details_ob.partner_gender)
record.set_field_value('Partner_Email', personal_details_ob.partner_email)
record.set_field_value('Children', personal_details_ob.children)
record.set_field_value('Pets', personal_details_ob.pets)
record.update()
else:
user = ZCRMUser.get_instance(name='James Alexander')
record = ZCRMRecord.get_instance('Accounts')
record.set_field_value('Account_Owner', user)
record.set_field_value('Account_Name', request.user.email)
record.set_field_value('Name', personal_details_ob.full_name)
record.set_field_value('Email', request.user.email)
record.set_field_value('Address_Line_1', personal_details_ob.address_line_1)
record.set_field_value('Address_Line_2', personal_details_ob.address_line_2)
record.set_field_value('Post_Town', personal_details_ob.post_town)
record.set_field_value('Post_Code', personal_details_ob.post_code)
record.set_field_value('Dob_Day', personal_details_ob.dob_day)
record.set_field_value('Dob_Month', personal_details_ob.dob_month)
record.set_field_value('Dob_Year', personal_details_ob.dob_year)
record.set_field_value('Gender', personal_details_ob.sex)
record.set_field_value('Marital_Status', personal_details_ob.marital_status)
record.set_field_value('Partner_Name', personal_details_ob.partner_full_name)
record.set_field_value('Partner_Gender', personal_details_ob.partner_gender)
record.set_field_value('Partner_Email', personal_details_ob.partner_email)
record.set_field_value('Children', personal_details_ob.children)
record.set_field_value('Pets', personal_details_ob.pets)
response = record.create()
# save account id to db for future updates
zoho_ob.account = response.details['id']
zoho_ob.save()
except ZCRMException as ex:
logger.log(1, ex.status_code)
logger.log(1, ex.error_message)
logger.log(1, ex.error_details)
logger.log(1, ex.error_content)
print(ex.status_code)
print(ex.error_message)
print(ex.error_content)
print(ex.error_details)
Ive tried running ZCRMRestClient.initialize(zoho_config) in settings.py, with no luck.
My method for getting the access token and refresh token, which seems to work is:
import os
import pprint
from sys import argv
import django
import requests
import zcrmsdk
from django.conf import settings
os.environ.setdefault('DJANGO_SETTINGS_MODULE', 'wills_online.settings')
django.setup()
def zoho_refresh_token(code):
""" supply a self client token from the zoho api credentials from web site """
zoho_config = {"apiBaseUrl": "https://www.zohoapis.com",
"currentUserEmail": settings.ZOHO_CURRENT_USER_EMAIL,
"client_id": settings.ZOHO_CLIENT_ID,
"client_secret": settings.ZOHO_CLIENT_SECRET,
"redirect_uri": settings.ZOHO_REDIRECT_URI,
"token_persistence_path": settings.ZOHO_PATH}
pprint.pprint(zoho_config)
print('working')
address = f'https://accounts.zoho.com/oauth/v2/token?code={code}&redirect_uri={settings.ZOHO_REDIRECT_URI}&client_id={settings.ZOHO_CLIENT_ID}&client_secret={settings.ZOHO_CLIENT_SECRET}&grant_type=authorization_code'
response = requests.post(address)
data = response.json()
pprint.pprint(data)
zcrmsdk.ZCRMRestClient.initialize(zoho_config)
oauth_client = zcrmsdk.ZohoOAuth.get_client_instance()
refresh_token = data['refresh_token']
print(type(refresh_token))
oauth_client.generate_access_token_from_refresh_token(refresh_token, settings.ZOHO_CURRENT_USER_EMAIL)
print(refresh_token)
print('finished')
if name == 'main':
zoho_refresh_token(argv[1])
This is driving me mad. Help would be greatly appreciated. This is my first post so go easy, lol.
For future reference, you will need to define persistence_handler_class and persistence_handler_path in your configuration dictionary. You will also need a handler class and a user-defined model to store the results. Sample code follows:
# settings.py
import zcrmsdk
configuration_dictionary = {
'apiBaseUrl': 'https://www.zohoapis.com',
'apiVersion': 'v2',
'currentUserEmail': ZOHO_CURRENT_USER_EMAIL,
'sandbox': 'False',
'applicationLogFilePath': '',
'client_id': ZOHO_CLIENT_ID,
'client_secret': ZOHO_CLIENT_SECRET,
'redirect_uri': ZOHO_REDIRECT_URI,
'accounts_url': 'https://accounts.zoho.com',
'access_type': 'online',
'persistence_handler_class': ZOHO_HANDLER_CLASS,
'persistence_handler_path': ZOHO_HANDLER_PATH,
}
zcrmsdk.ZCRMRestClient.initialize(configuration_dictionary)
# zoho.models.py
from django.db import models
from zcrmsdk.OAuthClient import ZohoOAuthTokens
class ZohoOAuthHandler:
#staticmethod
def get_oauthtokens(email_address):
oauth_model_instance = ZohoOAuth.objects.get(user_email=email_address)
return ZohoOAuthTokens(oauth_model_instance.refresh_token,
oauth_model_instance.access_token,
oauth_model_instance.expiry_time,
user_email=oauth_model_instance.user_email)
#staticmethod
def save_oauthtokens(oauth_token):
defaults = {
'refresh_token': oauth_token.refreshToken,
'access_token': oauth_token.accessToken,
'expiry_time': oauth_token.expiryTime,
}
ZohoOAuth.objects.update_or_create(user_email=oauth_token.userEmail, defaults=defaults)
class ZohoOAuth(models.Model):
refresh_token = models.CharField(max_length=250)
access_token = models.CharField(max_length=250)
expiry_time = models.BigIntegerField()
user_email = models.EmailField()
In this example ZOHO_HANDLER_CLASS = 'ZohoOAuthHandler' and ZOHO_HANDLER_PATH = 'zoho.models'
The first time you go to use this you will need a grant_token from https://accounts.zoho.com/developerconsole. For the scope use aaaserver.profile.READ,ZohoCRM.modules.ALL to start (see https://www.zoho.com/crm/developer/docs/api/oauth-overview.html#scopes)
Before you can use the api you'll need to run the code below in a django shell. This uses a grant token to generate your initial access and refresh tokens. Afterwards, the api should handle refreshing your access token.
grant_token = GRANT_TOKEN
import zcrmsdk
oauth_client = zcrmsdk.ZohoOAuth.get_client_instance()
oauth_tokens = oauth_client.generate_access_token(grant_token)

Automating Date Range while extracting

The below script I am using to extract data from Google Analytics. Here I am extracting data for last one week. I want to automate the date range so that i don't have to change date_range every week.
I also want to avoid sampling of data by GA. Please guide my the correct way to automate in details.
author = 'test#gmail.com (test)'
import argparse
import sys
import csv
import string
import datetime
import json
import time
from apiclient.errors import HttpError
from apiclient import sample_tools
from oauth2client.client import AccessTokenRefreshError
cam_name = sys.argv[1:]
class SampledDataError(Exception): pass
def main(argv):
# Authenticate and construct service.
service, flags = sample_tools.init(
argv[0], 'analytics', 'v3', __doc__, __file__,
scope='https://www.googleapis.com/analytics.readonly')
# Try to make a request to the API. Print the results or handle errors.
try:
profile_id = profile_ids[profile]
if not profile_id:
print ('Could not find a valid profile for this user.')
else:
metrics = argv[1]
dimensions = argv[2]
reportName = argv[3]
sort = argv[4]
filters = argv[5]
for start_date, end_date in date_ranges:
limit = ga_query(service, profile_id, 0,
start_date, end_date, metrics, dimensions, sort, filters).get('totalResults')
for pag_index in range(0, limit, 10000):
results = ga_query(service, profile_id, pag_index,
start_date, end_date, metrics, dimensions, sort, filters)
# if results.get('containsSampledData'):
# raise SampledDataError
print_results(results, pag_index, start_date, end_date, reportName)
except TypeError as error:
# Handle errors in constructing a query.
print ('There was an error in constructing your query : %s' % error)
except HttpError as error:
# Handle API errors.
print ('Arg, there was an API error : %s : %s' %
(error.resp.status, error._get_reason()))
except AccessTokenRefreshError:
# Handle Auth errors.
print ('The credentials have been revoked or expired, please re-run '
'the application to re-authorize')
except SampledDataError:
# force an error if ever a query returns data that is sampled!
print ('Error: Query contains sampled data!')
def ga_query(service, profile_id, pag_index, start_date, end_date, metrics, dimensions, sort, filters):
return service.data().ga().get(
ids='ga:' + profile_id,
start_date=start_date,
end_date=end_date,
metrics=metrics,
dimensions=dimensions,
sort=sort,
filters=filters,
samplingLevel='HIGHER_PRECISION',
start_index=str(pag_index+1),
max_results=str(pag_index+10000)).execute()
def print_results(results, pag_index, start_date, end_date, reportName):
"""Prints out the results.
This prints out the profile name, the column headers, and all the rows of
data.
Args:
results: The response returned from the Core Reporting API.
"""
# New write header
if pag_index == 0:
if (start_date, end_date) == date_ranges[0]:
print ('Profile Name: %s' % results.get('profileInfo').get('profileName'))
columnHeaders = results.get('columnHeaders')
cleanHeaders = [str(h['name']) for h in columnHeaders]
writer.writerow(cleanHeaders)
print (reportName,'Now pulling data from %s to %s.' %(start_date, end_date))
# Print data table.
if results.get('rows', []):
for row in results.get('rows'):
for i in range(len(row)):
old, new = row[i], str()
for s in old:
new += s if s in string.printable else ''
row[i] = new
writer.writerow(row)
else:
print ('No Rows Found')
limit = results.get('totalResults')
print (pag_index, 'of about', int(round(limit, -4)), 'rows.')
return None
# Uncomment this line & replace with 'profile name': 'id' to query a single profile
# Delete or comment out this line to loop over multiple profiles.
#Brands
profile_ids = {'abc-Mobile': '12345',
'abc-Desktop': '23456',
'pqr-Mobile': '34567',
'pqr-Desktop': '45678',
'xyz-Mobile': '56789',
'xyz-Desktop': '67890'}
date_ranges = [
('2017-01-24','2017-01-24'),
('2017-01-25','2017-01-25'),
('2017-01-26','2017-01-26'),
('2017-01-27','2017-01-27'),
('2017-01-28','2017-01-28'),
('2017-01-29','2017-01-29'),
('2017-01-30','2017-01-30')
]
for profile in sorted(profile_ids):
print("Sequence 1",profile)
with open('qwerty.json') as json_data:
d = json.load(json_data)
for getThisReport in d["Reports"]:
print("Sequence 2",getThisReport["ReportName"])
reportName = getThisReport["ReportName"]
metrics = getThisReport["Metrics"]
dimensions = getThisReport["Dimensions"]
sort = getThisReport["sort"]
filters = getThisReport["filter"]
path = 'C:\\Projects\\DataExport\\test\\' #replace with path to your folder where csv file with data will be written
today = time.strftime('%Y%m%d')
filename = profile+'_'+reportName+'_'+today+'.csv' #replace with your filename. Note %s is a placeholder variable and the profile name you specified on row 162 will be written here
with open(path + filename, 'wt') as f:
writer = csv.writer(f,delimiter = '|', lineterminator='\n', quoting=csv.QUOTE_MINIMAL)
args = [sys.argv,metrics,dimensions,reportName,sort,filters]
if __name__ == '__main__': main(args)
print ( "Profile done. Next profile...")
print ("All profiles done.")
The Core Reporting API supports some interesting things as far as dates goes.
All Analytics data requests must specify a date range. If you do not include start-date and end-date parameters in the request, the server returns an error. Date values can be for a specific date by using the pattern YYYY-MM-DD or relative by using today, yesterday, or the NdaysAgo pattern. Values must match [0-9]{4}-[0-9]{2}-[0-9]{2}|today|yesterday|[0-9]+(daysAgo).
so doing something like
start_date = '7daysAgo'
end_date = 'today'
Just remember that data hasn't completed processing for 24 - 48 hours so your data for today, yesterday and the day before that may not be 100% accurate.

extracting tweet from Twitter API using Python

I want to get every tweet of the HousingWire on Twitter (https://twitter.com/HousingWire). I understood how to authenticate into the twitter account but how I can get the tweet of HousingWire?
I know how to stream the data based on the keywords,but I want to stream the HousingWire tweet. how I can do that?
import time
from tweepy import Stream
from tweepy import OAuthHandler
from tweepy.streaming import StreamListener
ckey=''
csecret=''
atoken=''
asecret=''
class listener(StreamListener):
def on_data(self,data):
try:
print data
#tweet=data.split(',"text":"')[1].split('","source')[0]
#print tweet
#savethis=str(time.time())+'::'+tweet
savefile=open('tweetdb.txt','a')
savefile.write(data)
savefile.write('\n')
savefile.close()
return True
except BaseException,e:
print 'failed on data',str(e)
time.sleep(5)
def on_error(self,status):
print status
auth=OAuthHandler(ckey,csecret)
auth.set_access_token(atoken,asecret)
twitterStream=Stream(auth,listener())
twitterStream.filter(track=["stock"])
You can use the below Python script to grab the last 3,240 tweets from HousingWire (Twitter only allows access to that many tweets from a user - no way to grab the complete history). Usage: Simply put their twitter screen name in the script.
#!/usr/bin/env python
# encoding: utf-8
import tweepy #https://github.com/tweepy/tweepy
import csv
#Twitter API credentials
consumer_key = ""
consumer_secret = ""
access_key = ""
access_secret = ""
def get_all_tweets(screen_name):
#Twitter only allows access to a users most recent 3240 tweets with this method
#authorize twitter, initialize tweepy
auth = tweepy.OAuthHandler(consumer_key, consumer_secret)
auth.set_access_token(access_key, access_secret)
api = tweepy.API(auth)
#initialize a list to hold all the tweepy Tweets
alltweets = []
#make initial request for most recent tweets (200 is the maximum allowed count)
new_tweets = api.user_timeline(screen_name = screen_name,count=200)
#save most recent tweets
alltweets.extend(new_tweets)
#save the id of the oldest tweet less one
oldest = alltweets[-1].id - 1
#keep grabbing tweets until there are no tweets left to grab
while len(new_tweets) > 0:
print "getting tweets before %s" % (oldest)
#all subsiquent requests use the max_id param to prevent duplicates
new_tweets = api.user_timeline(screen_name = screen_name,count=200,max_id=oldest)
#save most recent tweets
alltweets.extend(new_tweets)
#update the id of the oldest tweet less one
oldest = alltweets[-1].id - 1
print "...%s tweets downloaded so far" % (len(alltweets))
#transform the tweepy tweets into a 2D array that will populate the csv
outtweets = [[tweet.id_str, tweet.created_at, tweet.text.encode("utf-8")] for tweet in alltweets]
#write the csv
with open('%s_tweets.csv' % screen_name, 'wb') as f:
writer = csv.writer(f)
writer.writerow(["id","created_at","text"])
writer.writerows(outtweets)
pass
if __name__ == '__main__':
#pass in the username of the account you want to download
get_all_tweets("J_tsar")

scrapy: request url must be str or unicode, got Selector

I am writing a spider using Scrapy, to scrape user details of Pinterest. I am trying to get the details of user and his followers ( and so on until the last node).
Below is the spider code:
from scrapy.spider import BaseSpider
import scrapy
from pinners.items import PinterestItem
from scrapy.http import FormRequest
from urlparse import urlparse
class Sample(BaseSpider):
name = 'sample'
allowed_domains = ['pinterest.com']
start_urls = ['https://www.pinterest.com/banka/followers', ]
def parse(self, response):
for base_url in response.xpath('//div[#class="Module User gridItem"]/a/#href'):
list_a = response.urljoin(base_url.extract())
for new_urls in response.xpath('//div[#class="Module User gridItem"]/a/#href'):
yield scrapy.Request(new_urls, callback=self.Next)
yield scrapy.Request(list_a, callback=self.Next)
def Next(self, response):
href_base = response.xpath('//div[#class = "tabs"]/ul/li/a')
href_board = href_base.xpath('//div[#class="BoardCount Module"]')
href_pin = href_base.xpath('.//div[#class="Module PinCount"]')
href_like = href_base.xpath('.//div[#class="LikeCount Module"]')
href_followers = href_base.xpath('.//div[#class="FollowerCount Module"]')
href_following = href_base.xpath('.//div[#class="FollowingCount Module"]')
item = PinterestItem()
item["Board_Count"] = href_board.xpath('.//span[#class="value"]/text()').extract()[0]
item["Pin_Count"] = href_pin.xpath('.//span[#class="value"]/text()').extract()
item["Like_Count"] = href_like.xpath('.//span[#class="value"]/text()').extract()
item["Followers_Count"] = href_followers.xpath('.//span[#class="value"]/text()').extract()
item["Following_Count"] = href_following.xpath('.//span[#class="value"]/text()').extract()
item["User_ID"] = response.xpath('//link[#rel="canonical"]/#href').extract()[0]
yield item
I get the following error:
raise TypeError('Request url must be str or unicode, got %s:' % type(url).__name__)
TypeError: Request url must be str or unicode, got Selector:
I did check the type of the list_a ( urls extracted). It gives me unicode.
the error is generated by the inner for loop in the parse method:
for new_urls in response.xpath('//div[#class="Module User gridItem"]/a/#href'):
yield scrapy.Request(new_urls, callback=self.Next)
the new_urls variable is actually a selector, please try something like this:
for base_url in response.xpath('//div[#class="Module User gridItem"]/a/#href'):
list_a = response.urljoin(base_url.extract())
yield scrapy.Request(list_a, callback=self.Next)

Automating pulling csv files off google Trends

pyGTrends does not seem to work. Giving errors in Python.
pyGoogleTrendsCsvDownloader seems to work, logs in, but after getting 1-3 requests (per day!) complains about exhausted quota, even though manual download with the same login/IP works flawlessly.
Bottom line: neither work. Searching through stackoverflow: many questions from people trying to pull csv's from Google, but no workable solution I could find...
Thank you in advance: whoever will be able to help. How should the code be changed? Do you know of another solution that works?
Here's the code of pyGoogleTrendsCsvDownloader.py
import httplib
import urllib
import urllib2
import re
import csv
import lxml.etree as etree
import lxml.html as html
import traceback
import gzip
import random
import time
import sys
from cookielib import Cookie, CookieJar
from StringIO import StringIO
class pyGoogleTrendsCsvDownloader(object):
'''
Google Trends Downloader
Recommended usage:
from pyGoogleTrendsCsvDownloader import pyGoogleTrendsCsvDownloader
r = pyGoogleTrendsCsvDownloader(username, password)
r.get_csv(cat='0-958', geo='US-ME-500')
'''
def __init__(self, username, password):
'''
Provide login and password to be used to connect to Google Trends
All immutable system variables are also defined here
'''
# The amount of time (in secs) that the script should wait before making a request.
# This can be used to throttle the downloading speed to avoid hitting servers too hard.
# It is further randomized.
self.download_delay = 0.25
self.service = "trendspro"
self.url_service = "http://www.google.com/trends/"
self.url_download = self.url_service + "trendsReport?"
self.login_params = {}
# These headers are necessary, otherwise Google will flag the request at your account level
self.headers = [('User-Agent', 'Mozilla/5.0 (Windows NT 6.1; WOW64; rv:12.0) Gecko/20100101 Firefox/12.0'),
("Accept", "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8"),
("Accept-Language", "en-gb,en;q=0.5"),
("Accept-Encoding", "gzip, deflate"),
("Connection", "keep-alive")]
self.url_login = 'https://accounts.google.com/ServiceLogin?service='+self.service+'&passive=1209600&continue='+self.url_service+'&followup='+self.url_service
self.url_authenticate = 'https://accounts.google.com/accounts/ServiceLoginAuth'
self.header_dictionary = {}
self._authenticate(username, password)
def _authenticate(self, username, password):
'''
Authenticate to Google:
1 - make a GET request to the Login webpage so we can get the login form
2 - make a POST request with email, password and login form input values
'''
# Make sure we get CSV results in English
ck = Cookie(version=0, name='I4SUserLocale', value='en_US', port=None, port_specified=False, domain='www.google.com', domain_specified=False,domain_initial_dot=False, path='/trends', path_specified=True, secure=False, expires=None, discard=False, comment=None, comment_url=None, rest=None)
self.cj = CookieJar()
self.cj.set_cookie(ck)
self.opener = urllib2.build_opener(urllib2.HTTPCookieProcessor(self.cj))
self.opener.addheaders = self.headers
# Get all of the login form input values
find_inputs = etree.XPath("//form[#id='gaia_loginform']//input")
try:
#
resp = self.opener.open(self.url_login)
if resp.info().get('Content-Encoding') == 'gzip':
buf = StringIO( resp.read())
f = gzip.GzipFile(fileobj=buf)
data = f.read()
else:
data = resp.read()
xmlTree = etree.fromstring(data, parser=html.HTMLParser(recover=True, remove_comments=True))
for input in find_inputs(xmlTree):
name = input.get('name')
if name:
name = name.encode('utf8')
value = input.get('value', '').encode('utf8')
self.login_params[name] = value
except:
print("Exception while parsing: %s\n" % traceback.format_exc())
self.login_params["Email"] = username
self.login_params["Passwd"] = password
params = urllib.urlencode(self.login_params)
self.opener.open(self.url_authenticate, params)
def get_csv(self, throttle=False, **kwargs):
'''
Download CSV reports
'''
# Randomized download delay
if throttle:
r = random.uniform(0.5 * self.download_delay, 1.5 * self.download_delay)
time.sleep(r)
params = {
'export': 1
}
params.update(kwargs)
params = urllib.urlencode(params)
r = self.opener.open(self.url_download + params)
# Make sure everything is working ;)
if not r.info().has_key('Content-Disposition'):
print "You've exceeded your quota. Continue tomorrow..."
sys.exit(0)
if r.info().get('Content-Encoding') == 'gzip':
buf = StringIO( r.read())
f = gzip.GzipFile(fileobj=buf)
data = f.read()
else:
data = r.read()
myFile = open('trends_%s.csv' % '_'.join(['%s-%s' % (key, value) for (key, value) in kwargs.items()]), 'w')
myFile.write(data)
myFile.close()
Although I don't know python, I may have a solution. I am currently doing the same thing in C# and though I didn't get the .csv file, I got created a custom URL through code and then downloaded that HTML and saved to a text file (also through code). In this HTML (at line 12) is all the information needed to create the graph that is used on Google Trends. However, this has alot of unnecessary text within it that needs to be cut down. But either way, you end up with the same result. The Google Trends data. I posted a more detailed answer to my question here:
Downloading .csv file from Google Trends
There is an alternative module named pytrends - https://pypi.org/project/pytrends/ It is really cool. I would recommend this.
Example usage:
import numpy as np
import pandas as pd
from pytrends.request import TrendReq
pytrend = TrendReq()
#It is the term that you want to search
pytrend.build_payload(kw_list=["Eminem is the Rap God"])
# Find which region has searched the term
df = pytrend.interest_by_region()
df.to_csv("path\Eminem_InterestbyRegion.csv")
Potentially if you have a list of terms to search you could make use of "for loop" to automate the insights as per your wish.