Given a table that looks like the following:
ts,open,high,low,close,adj_close,volume
2014-08-20T12:00:00.123456Z,198.119995,199.160004,198.080002,198.919998,180.141846,72763000
2014-08-21T12:00:00.123456Z,199.089996,199.759995,198.929993,199.500000,180.667160,67791000
2014-08-22T12:00:00.123456Z,199.339996,199.690002,198.740005,199.190002,180.386368,76107000
2014-08-25T12:00:00.123456Z,200.139999,200.589996,199.149994,200.199997,181.301010,63855000
2014-08-26T12:00:00.123456Z,200.330002,200.820007,200.279999,200.330002,181.418716,47298000
2014-08-27T12:00:00.123456Z,200.429993,200.570007,199.940002,200.250000,181.346298,47874000
2014-08-28T12:00:00.123456Z,199.589996,200.270004,199.389999,200.139999,181.246689,58330000
2014-08-29T12:00:00.123456Z,200.449997,200.729996,199.820007,200.710007,181.762909,65907000
2014-09-02T12:00:00.123456Z,200.970001,201.000000,199.860001,200.610001,181.672318,72426000
How can I plot the moving average of the results? I am connecting to the database and loading the results into a dataframe with the following:
import psycopg2
import pandas as pd
df_trades = pd.DataFrame()
try:
connection = psycopg2.connect(user="admin",
password="quest",
host="127.0.0.1",
port="8812",
database="qdb")
cursor = connection.cursor()
df_trades = pd.read_sql_query("select * from my_table",connection)
except (Exception, psycopg2.Error) as error:
print("Error while connecting to QuestDB", error)
finally:
if (connection):
cursor.close()
connection.close()
print("QuestDB connection closed")
print(df_trades.head())
# moving average
df_trades['10point_ma'] = df_trades['close'].rolling(window=10).mean()
What you can do in this case is use plotly:
import psycopg2
import pandas as pd
import plotly.graph_objects as go
df_trades = pd.DataFrame()
try:
connection = psycopg2.connect(user="admin",
password="quest",
host="127.0.0.1",
port="8812",
database="qdb")
cursor = connection.cursor()
df_trades = pd.read_sql_query("select * from my_table",connection)
except (Exception, psycopg2.Error) as error:
print("Error while connecting to QuestDB", error)
finally:
if (connection):
cursor.close()
connection.close()
print("QuestDB connection closed")
print(df_trades.head())
fig = go.Figure()
fig.update_layout(title_text="Table candlestick")
df_trades['10point_ma'] = df_trades['close'].rolling(window=10).mean()
fig.add_trace(go.Scatter(x=df_trades['ts'], y=df_trades['10point_ma'],
name='10 point moving average',
mode='lines',
opacity=1,
marker=dict(color='MediumPurple',
size=1)))
# original table as candlestick chart
fig.add_trace(go.Candlestick(x=df_trades['ts'],
open=df_trades['open'],
high=df_trades['high'],
low=df_trades['low'],
close=df_trades['close'],
name='My Awesome Chart'))
fig.update(layout_xaxis_rangeslider_visible=False)
fig.show()
This will generate the following chart:
Related
I was trying to read from a table in snowflake and manipulate data and trying to write back !
I was able to connect to snow flake , read data as dataframe but cannot write back to the table
code to connect to snowflake
import sys
from awsglue.transforms import *
from awsglue.utils import getResolvedOptions
from pyspark.context import SparkContext
from awsglue.context import GlueContext
from awsglue.job import Job
from py4j.java_gateway import java_import
## #params: [JOB_NAME, URL, WAREHOUSE, DB, SCHEMA, USERNAME, PASSWORD]
SNOWFLAKE_SOURCE_NAME = "net.snowflake.spark.snowflake"
args = getResolvedOptions(sys.argv, ['JOB_NAME', 'URL', 'WAREHOUSE', 'DB', 'SCHEMA', 'USERNAME', 'PASSWORD'])
#sc = SparkContext()
sc=SparkContext()
glueContext = GlueContext(sc)
spark = glueContext.spark_session
job = Job(glueContext)
try:
job.init(args['JOB_NAME'], args)
except Exception as e:
pass
java_import(spark._jvm, SNOWFLAKE_SOURCE_NAME)
## uj = sc._jvm.net.snowflake.spark.snowflake
spark._jvm.net.snowflake.spark.snowflake.SnowflakeConnectorUtils.enablePushdownSession(spark._jvm.org.apache.spark.sql.SparkSession.builder().getOrCreate())
sfOptions = {
"sfURL" : args['URL'],
"sfUser" : args['USERNAME'],
"sfPassword" : args['PASSWORD'],
"sfDatabase" : args['DB'],
"sfSchema" : args['SCHEMA'],
"sfWarehouse" : args['WAREHOUSE'],
"sfRole" : args['ROLE']
}
df = spark.read.format(SNOWFLAKE_SOURCE_NAME).options(**sfOptions).option("dbtable", "<>").load().select('<>')
print(df.printSchema())
print(df.show())
df.write.format(SNOWFLAKE_SOURCE_NAME).options(**sfOptions).option("dbtable", "GLUE_DEMO").mode("append").save()
But when executing getting below error
File "/home/glue_user/spark/python/lib/py4j-0.10.9-src.zip/py4j/protocol.py", line 328, in get_return_value
py4j.protocol.Py4JJavaError: An error occurred while calling o81.save.
: java.sql.SQLException: Status of query associated with resultSet is FAILED_WITH_ERROR. Results not generated.
at net.snowflake.client.jdbc.SFAsyncResultSet.getRealResults(SFAsyncResultSet.java:127)
at net.snowflake.client.jdbc.SFAsyncResultSet.getMetaData(SFAsyncResultSet.java:262)
If a see the history in snowflake it's showing warehouse not selected
No active warehouse selected in the current session. Select an active warehouse with the 'use warehouse' command
The easiest way is to assign the default warehouse to the user:
ALTER USER <name> SET DEFAULT_WAREHOUSE = <string>
Reference: ALTER USER
The read worked, if the data was already cached and hence does not require an active warehouse.
the real error code somewhere in Snowflake history
I am trying to display a plot with matplotlib and django following this and this questions, however it seems not working, I tried both solutions and only while using IO i get an empty canvas, but when I try to plot a 'real' plot I get the error in the title.
This is my view:
import django
from matplotlib.backends.backend_agg import FigureCanvasAgg as
FigureCanvas
from matplotlib.figure import Figure
import numpy as np
import matplotlib.pyplot as plt
import io
def mplimage(request):
fig = Figure()
canvas = FigureCanvas(fig)
x = np.arange(-2, 1.5, .01)
y = np.sin(np.exp(2 * x))
plt.plot(x, y)
buf = io.BytesIO()
plt.savefig(buf, format='png')
plt.close(fig)
response = django.http.HttpResponse(content_type='image/png')
canvas.print_png(response)
return response
and here the link in urls.py:
import mpl.views
url(r'mplimage.png', mpl.views.mplimage)
This works if you save the file objects as JPEG (requires PIL) instead of PNG using print_jpg() method instead of print_png().
Change:
response = django.http.HttpResponse(content_type='image/png')
canvas.print_png(response)
To:
response = HttpResponse(content_type='image/jpg')
canvas.print_jpg(response)
from matplotlib.figure import Figure
from matplotlib.backends.backend_agg import FigureCanvasAgg as FigureCanvas
import numpy as np
import django
def showimage(request):
fig = Figure()
canvas = FigureCanvas(fig)
ax = fig.add_subplot(111)
x = np.arange(-2,1.5,.01)
y = np.sin(np.exp(2*x))
ax.plot(x, y)
response = HttpResponse(content_type='image/jpg')
canvas.print_jpg(response)
return response
I'm trying to write an AWS Lambda service using Python 2.7 that will generate an In-Memory CSV file and email it as an attachment. I feel like I'm close with this script based on what I've learned but I'm not quite there.
# Import smtplib for the actual sending function
import smtplib
import sys
import csv
import cStringIO
from os.path import basename
from email.mime.text import MIMEText
from email.mime.multipart import MIMEMultipart
from email.mime.application import MIMEApplication
# Import the email modules we'll need
server = smtplib.SMTP('smtp.postmarkapp.com', 587)
server.starttls()
server.login('.....','.....')
list = []
row1 = ["One","Two","Three"]
list.append(row1)
msg = MIMEMultipart()
msg['To'] = "daniel#mydomain.com"
msg['From'] = "noreply#mydomain.com"
msg['Subject'] = "DG Test subject"
msg.attach(MIMEText("Test Message"))
csv_buffer = cStringIO.StringIO()
writer = csv.writer(csv_buffer, lineterminator='\n')
writer.writerow(["1","2","3"])
for row in list:
writer.writerow(row)
print(csv_buffer.getvalue())
msg.attach(csv_buffer)
try:
response = server.sendmail(msg['From'], ["daniel#mydomain.com"],msg.as_string())
server.quit()
except AttributeError as error:
print(error)
else:
print(response)
This gives me the following error:
1,2,3
One,Two,Three
'cStringIO.StringO' object has no attribute 'get_content_maintype'
Basically it comes down to not being sure how to use the csv_buffer object. Assuming I just need to add that attribute to the object somehow but I'm not quite sure how. If I try to add any additional arguments to the .attach() line, it complains that I have too many arguments.
Thanks!
I figured it out, thanks to stitching together a few SO posts.
import cStringIO
import csv
csv_buffer = cStringIO.StringIO()
writer = csv.writer(csv_buffer, delimiter=',', quoting=csv.QUOTE_ALL)
writer.writerow(["1","2","3"])
for row in list:
writer.writerow(row)
print(csv_buffer.getvalue())
# new lines
csv_file = MIMEText(csv_buffer.getvalue())
attachment = csv_file.add_header('Content-Disposition', 'attachment', filename="csv_file.csv")
msg.attach(csv_file)
i'm scraping urls from a txt file and export it to a csv file. But after all the process my code writes only the information from the last url. My guess is that i'm forgetting a loop. But where?
Here's my code:
import requests
from bs4 import BeautifulSoup
import pandas as pd
from urllib import urlopen
file = open('urls.txt', 'r')
filelines = (line.strip() for line in file)
for code in filelines:
site = urlopen(code)
soup = BeautifulSoup(site, "html.parser")
final = soup.find_all("span", {"class": "bd js-title-main-info"})
print final
records = []
for pagetxt in final:
print pagetxt.text
records.append((pagetxt.text))
df = pd.DataFrame(records, columns=['product name'])
df.to_csv('test.csv', index=False, encoding='utf-8')
Thanks
When you get data from file you keep only last value in variable final. Try to append data earlier (I've marked changes with #####):
import requests
from bs4 import BeautifulSoup
import pandas as pd
from urllib import urlopen
file = open('urls.txt', 'r')
filelines = (line.strip() for line in file)
records = [] ######
for code in filelines:
site = urlopen(code)
soup = BeautifulSoup(site, "html.parser")
final = soup.find_all("span", {"class": "bd js-title-main-info"})
print final
for pagetxt in final: ######
print pagetxt.text ######
records.append((pagetxt.text)) ######
df = pd.DataFrame(records, columns=['product name'])
df.to_csv('test.csv', index=False, encoding='utf-8')
Is there any way in Pandas to capture the warning produced by setting error_bad_lines = False and warn_bad_lines = True? For instance the following script:
import pandas as pd
from StringIO import StringIO
data = StringIO("""a,b,c
1,2,3
4,5,6
6,7,8,9
1,2,5
3,4,5""")
pd.read_csv(data, warn_bad_lines=True, error_bad_lines=False)
produces the warning:
Skipping line 4: expected 3 fields, saw 4
I'd like to store this output to a string so that I can eventually write it to a log file to keep track of records that are being skipped.
I tried using the warning module but it doesn't appear as though this "warning" is of the traditional sense. I'm using Python 2.7 and Pandas 0.16.
I think it isn't implemented to pandas.
source1, source2
My solutions:
1. Pre or after processing
import pandas as pd
import csv
df = pd.read_csv('data.csv', warn_bad_lines=True, error_bad_lines=False)
#compare length of rows by recommended value:
RECOMMENDED = 3
with open('data.csv') as csv_file:
reader = csv.reader(csv_file, delimiter=',')
for row in reader:
if (len(row) != RECOMMENDED):
print ("Length of row is: %r" % len(row) )
print row
#compare length of rows by length of columns in df
lencols = len(df.columns)
print lencols
with open('data.csv') as csv_file:
reader = csv.reader(csv_file, delimiter=',')
for row in reader:
if (len(row) != lencols):
print ("Length of row is: %r" % len(row) )
print row
2. Replaces sys.stdout
import pandas as pd
import os
import sys
class RedirectStdStreams(object):
def __init__(self, stdout=None, stderr=None):
self._stdout = stdout or sys.stdout
self._stderr = stderr or sys.stderr
def __enter__(self):
self.old_stdout, self.old_stderr = sys.stdout, sys.stderr
self.old_stdout.flush(); self.old_stderr.flush()
sys.stdout, sys.stderr = self._stdout, self._stderr
def __exit__(self, exc_type, exc_value, traceback):
self._stdout.flush(); self._stderr.flush()
sys.stdout = self.old_stdout
sys.stderr = self.old_stderr
if __name__ == '__main__':
devnull = open('log.txt', 'w')
#replaces sys.stdout, sys.stderr, see http://stackoverflow.com/a/6796752/2901002
with RedirectStdStreams(stdout=devnull, stderr=devnull):
df = pd.read_csv('data.csv', warn_bad_lines=True, error_bad_lines=False)
I can't help you with older than Python 3, but I've had very good success with the following:
import pandas as pd
from contextlib import redirect_stderr
import io
# Redirect stderr to something we can report on.
f = io.StringIO()
with redirect_stderr(f):
df = pd.read_csv(
new_file_name, header=None, error_bad_lines=False, warn_bad_lines=True, dtype=header_types
)
if f.getvalue():
logger.warning("Had parsing errors: {}".format(f.getvalue()))
I searched for this issue a number of times and kept being pointed to this questions. Hope it helps someone else, later on.