Has anyone done a Twitter sentiment analysis using PySpark?

Has anyone done a Twitter sentiment analysis using PySpark? - python-2.7

Has anyone done a Twitter sentiment analysis using Apache Spark?I have tried this
class Tweet(dict):
def __init__(self, tweet_in, encoding = 'utf-8'):
super(Tweet, self).__init__(self)
if tweet_in and 'delete' not in tweet_in:
self['id'] = tweet_in['id']
self['geo'] = tweet_in['geo']['coordinates'] if tweet_in['geo'] else None
self['text'] = tweet_in['text'].encode(encoding)
self['user_id'] = tweet_in['user']['id']
self['hashtags'] = [x['text'].encode(encoding) for x in tweet_in['entities']['hashtags']]
self['timestamp'] = dateutil.parser.parse(tweet_in[u'created_at']).replace(tzinfo=None).isoformat()
self['screen_name'] = tweet_in['user']['screen_name'].encode(encoding)
def connect_twitter():
consumer_key = "personal_info"
consumer_secret = "personal_info"
access_token = "personal_info"
access_secret = "personal_info"
auth = twitter.OAuth(token = access_token,
token_secret = access_secret,
consumer_key = consumer_key,
consumer_secret = consumer_secret)
return twitter.TwitterStream(auth=auth)
def get_next_tweet(twitter_stream, i ):
block = False # True
stream = twitter_stream.statuses.sample(block=False)
tweet_in = None
while not tweet_in or 'delete' in tweet_in:
tweet_in = stream.next()
tweet_parsed = Tweet(tweet_in)
return json.dumps(tweet_parsed)
def process_rdd_queue(twitter_stream, nb_tweets = 5):`enter code here`
rddQueue = []
for i in range(nb_tweets):
json_twt = get_next_tweet(twitter_stream, i )
dist_twt = ssc.sparkContext.parallelize([json_twt], 5)
rddQueue += [dist_twt]
lines = ssc.queueStream(rddQueue, oneAtATime=False)
lines.pprint()
enter code here
try : sc.stop()
except : pass
sc = SparkContext(appName="PythonStreamingQueueStream")
ssc = StreamingContext(sc, 1)
twitter_stream = connect_twitter()
process_rdd_queue(twitter_stream)
try : ssc.stop(stopSparkContext=True, stopGraceFully=True)
except : pass
ssc.start()
time.sleep(2)
ssc.stop(stopSparkContext=True, stopGraceFully=True)
Do I need to have a static file where I need positive and negative words for sentiment analysis mechanism.
I am a beginner at this.

Related

can not add or update child row, foreign key constraint failed

subscriber_package_forms = subscriber.package.form
# subscriber packages forms.
a = subscriber_package_forms.replace('[', '')
b = a.replace(']', '')
c = b.replace("'", '')
d = c.replace('''"''', '')
e = d.replace("'", '')
f = e.split(',')
#search active columns of all the packages from main database.
for frm in f:
frm = int(frm)
try:
form_columns_object = form_columns.objects.using(main_db).filter(form_id = frm).first()
except:
pass
if form_columns_object == None:
pass
else:
form_columns_dict = model_to_dict(form_columns_object)
form_instance = FormModel.objects.using(main_db).get(id = int(form_columns_dict['form']))
user_instance = User.objects.using(main_db).first() #get(username = form_columns_dict['user']) # id = form_columns_dict['id']
form_columns.objects.using(db_name).create(columns = str(form_columns_dict['columns']), form = form_instance, user=user_instance)
This code is working fine in my local setup. But i am facing a problem after committing it on server.
We have error in last line of the code i guess

How to do multiple request POST in django?

I have a view that sends a form to another view so that the user can review his form, and then after making another request post to save at that moment... so when the user enters the review view, he enters in request.POST and the form ends up being saved at the wrong time
def resumo(request, avaliado_id):
print(request.method == 'POST')
perfil = Perfil.objects.filter(user_id=avaliado_id)
queryDict = request.POST
notas = (list(queryDict.values()))
criterios = (list(queryDict.keys()))
valores = (list(queryDict.values()))
valores = [float(x.replace(",",".")) for x in valores[2:]]
pesos = (queryDict.getlist('pesos'))
pesos = [float(x.replace(",",".")) for x in pesos]
res_list = [valores[i] * pesos[i] for i in range(len(valores))]
media = sum(res_list)
lista = zip(criterios[2:], notas[2:])
print(list(lista))
query_criterio = Criterio.objects.filter(ativo = True).values_list('id', flat=True)
lista_criterio_id = list(query_criterio)
if request.method == 'POST':
avaliado = Perfil.objects.get(pk = avaliado_id)
avaliador = request.user.perfil
avaliacao = Avaliacao.objects.create(avaliador = avaliador, avaliado= avaliado, media = media)
avaliacao.save()
print(avaliacao.id)
for nota, criterio in zip(notas[2:], lista_criterio_id):
nota = Notas.objects.create(avaliacao = Avaliacao.objects.get(pk = avaliacao.id), notas = nota, criterios = Criterio.objects.get( pk = criterio))
nota.save()
context = {
'media' : media,
'chaves_e_valores' : zip(criterios[2:], notas[2:]),
'perfis' : perfil,
}
return render(request, 'admin/resumo.html', context)
in the first line "request.method == 'POST'" returns True and because of that it ends up hitting in conditional if.

How to save variables into mysql database in django

so l have pulled the data from a movie website and l saved it into variables for example title but now I'm struggling to send this data to MySQL DB
def index(request):
response = requests.get(
'https://fmovies.to/api/list_movies.json',
params={'limit':'20'},
)
json_response = response.json()
movies = json_response['data']['movies']
#title = movies[0]['title']
#movie_url = movies[0]['url']
#description = movies[0]['description_full']
#movie_torrent_link = movies[0]['torrents'][0]['url']
#cover_image = movies[0]['medium_cover_image']
for value in movies:
title = value['title']
movie_url = value['url']
description = value['description_full']
movie_torrent_link = value['torrents'][0]['url']
image = value['medium_cover_image']
rating = value['rating']
genre = value['genres']
runtime = value['runtime']
year_of_production = value['year']
slug = value['slug']
print(image)
print(rating)
print(runtime)
print(year_of_production)
print(slug)
return render(request, 'index.html',{'title':title})

managed to save direct without going the long way l was going
def index(request):
response = requests.get(
'https://fmovies.to/api/list_movies.json',
params={'limit':'20'},
)
json_response = response.json()
movies = json_response['data']['movies']
for value in movies:
save_to_db = Movie.objects.create(
title=value['title'],
description = value['description_full'],
image = value['medium_cover_image'],
category = value['genres'],
year_of_production = value['year'],
movie_url = value['url'],
movie_torrent_link = value['torrents'][0]['url'],
rating = value['rating'],
runtime = value['runtime'],
)
save_to_db.save()
return render(request, 'index.html',)

How to use timefield__week_day__lt?

I need to use the following sentence:
qs = FamilyActivity.objects.all()
qs = qs.filter(Q(end_time__week_day__lt = F('start_time__week_day')) or Q(F('end_time')- F('start_time')>=timedelta(days=5)) or Q(end_time__week_time__gte=6))
But I can't do it. I want to search the future including weekend activities. Cannot the __week_day__lt not be used?
My models.py :
class FamilyActivity(models.Model):
org = models.ForeignKey(Org)
name = models.CharField(verbose_name=_('name'), max_length=200)
start_time = models.DateTimeField(verbose_name=_('start_time'))
end_time = models.DateTimeField(verbose_name=_('end_time'))
cost = models.CharField(verbose_name=_('cost'), max_length=4000)
My views.py :
def search_fa_Map(request, cat=''):
qs = FamilyActivity.objects.all()
if cat != '':
cat_ids = cat.split("-")
try :
if(len(cat_ids) >= 6):
fa_district_id = int(cat_ids[0])
fa_time_id = int(cat_ids[1])
fa_type_id = int(cat_ids[2])
fa_age_id = int(cat_ids[3])
fa_cost_id = int(cat_ids[4])
fa_state_id = int(cat_ids[5])
fa_block_id = 0
if(len(cat_ids) >= 7):
fa_block_id = int(cat_ids[6])
if fa_time_id != 0:
now = datetime.datetime.now()
oneday = datetime.timedelta(days=1)
if fa_time_id == 45:#today
qs = qs.filter(Q(start_time__lte=now,end_time__gte=now))
elif fa_time_id == 46:#weekend
qs = qs.filter(Q(end_time__week_day__lt = F('start_time__week_day')) or Q(F('end_time')-F('start_time')>=timedelta(days=5)) or Q(end_time__week_time__gte=6))
I only want to the following sentence can working.
qs = qs.filter(Q(end_time__week_day__lt = F('start_time__week_day')) or Q(F('end_time')-F('start_time')>=timedelta(days=5)) or Q(end_time__week_time__gte=6))

or is the wrong operator. You want |.

Getting Input Data from a formular and use it as a parameter for a function

Hello I wanted to know if it possible to get input-data from a formular and use this data for another function?
my forms.py:
class TextPathForm(forms.Form):
text = forms.CharField(required=True)
language = forms.CharField(required=True)
stopwords = forms.CharField(required=False)
search = forms.CharField(required=False)
filterword = forms.CharField(required=False)
my view.py:
def textpath(request):
success = False
text = ''
stopwords = ''
language = ''
search = ''
if request.method == 'POST':
textpath_form = TextPathForm(request.POST)
if textpath_form.is_valid():
success = True
text = textpath_form.cleaned_data['text']
stopwords = textpath_form.cleaned_data['stopwords']
language = textpath_form.cleaned_data['language']
search = textpath_form.cleaned_data['search']
else:
textpath_form = TextPathForm()
ctx = {'textpath_form':textpath_form,'text':text,'language':language,'stopwords':stopwords,'search':search,'succes': success,}
return render_to_response('index.html',ctx)
def any_function("need parameter text from textpath() "):
Thanks for helping me.

What do you mean? You can always pass textpath_form.cleaned_data to your function as an argument or even pass textpath_form.cleaned_data['text'] and do something with it there.

We Keep Coding

c++ django amazon-web-services regex python-2.7 google-cloud-platform list unit-testing opengl ember.js

Has anyone done a Twitter sentiment analysis using PySpark? - python-2.7

Related

can not add or update child row, foreign key constraint failed

How to do multiple request POST in django?

How to save variables into mysql database in django

How to use timefield__week_day__lt?

Getting Input Data from a formular and use it as a parameter for a function

Categories

Resources