How do I fill a column with a user inputted value? - python-2.7

I want to add a date column to data exported to csv file. Luckily the files are identified by the date they represent. However, I can't get the column to fill up with the appropriate user inputted value. Here's the code I have so far:
def read_file():
user_input = raw_input("Please put cost folder with date in form Costmm.dd: ")
path = r'C:\\Users\\CP\\documents\\' + user_input
allFiles = glob.glob(path + '/*.csv')
frame = pd.DataFrame()
frame['Date'] = pd.Series()
frame['Date'] = frame['Date'].astype(object).fillna(user_input)
list = []
for file in allFiles:
df = pd.read_csv(file,index_col=None,header=0)
list.append(df)
frame =pd.concat(list,ignore_index=True)
frame['Date'] = pd.Series()
return(frame)
Any and all help is greatly appreciated!

If I understand what you're after the following should work:
import datetime as dt
def read_file():
user_input = raw_input("Please put cost folder with date in form Costmm.dd: ")
path = r'C:\\Users\\CP\\documents\\' + user_input
allFiles = glob.glob(path + '/*.csv')
l = []
for file in allFiles:
df = pd.read_csv(file,index_col=None,header=0)
l.append(df)
frame =pd.concat(l,ignore_index=True)
frame['Date'] = dt.datetime.strptime(user_input, '%m.%d')
return frame
A few notes:
frame = pd.DataFrame()
frame['Date'] = pd.Series()
frame['Date'] = frame['Date'].astype(object).fillna(user_input)
The above was redundant as you overwrite this with the result of pd.concat anyway.
Don't user list for a variable name of type list use something like df_list.
To convert a string to datetime you can use dt.datetime.strptime and pass the format string.

Related

Input query for python code

So I have created this code for my research, but I want to use it for plenty of data files, I do not want to do it manually, which means retyping some lines in my code to use desired file. How to use input command in python (I work with python 2.7 on Windows OS) to use it faster, just by typing name of desired datafile. My code so far:
import iodata as io
import matplotlib.pyplot as plt
import numpy as np
import time
from scipy.signal import welch
from scipy import signal
testInstance = io.InputConverter()
start = time.time()
conversionError = io.ConversionError()
#data = testInstance.convert(r"S:\Doktorat\Python\", 1", conversionError)
data = testInstance.convert(r"/Users/PycharmProjects/Hugo/20160401", "201604010000", conversionError)
end = time.time()
print("time elapsed " + str(end - start))
if(conversionError.conversionSucces):
print("Conversion succesful")
if(conversionError.conversionSucces == False):
print("Conversion failed: " + conversionError.conversionErrorLog)
print "Done!"
# Create a new subplot for two cannals 1 & 3
a = np.amin(data.data)
Bx = data.data[0,]
By = data.data[1,]
dt = float(300)/266350
Fs = 1/dt
t = np.arange(0,300,dt*1e3)
N = len(Bx)
M = len(By)
time = np.linspace(0,300,N)
time2 = np.linspace(0,300,M)
filename = 'C:/Users/PycharmProjects/Hugo/20160401/201604010000.dat'
d = open(filename,'rb')
degree = u"\u00b0"
headersize = 64
header = d.read(headersize)
ax1 = plt.subplot(211)
ax1.set_title(header[:16] + ', ' + # station name
'Canals: '+header[32:33]+' and '+header[34:35]+ ', ' # canals
+'Temp'+header[38:43]+degree+'C' # temperature
+', '+'Time:'+header[26:32]+', '+'Date'+' '+header[16:26]) # date
plt.ylabel('Pico Tesle [pT]')
plt.xlabel('Time [ms]')
plt.grid()
plt.plot(time[51:-14], Bx[51:-14], label='Canal 1', color='r', linewidth=0.1, linestyle="-")
plt.plot(time2[1:-14], By[1:-14], label='Canal 3', color='b', linewidth=0.1, linestyle="-")
plt.legend(loc='upper right', frameon=False, )
# Create a new subplot for FFT
plt.subplot(212)
plt.title('Fast Fourier Transform')
plt.ylabel('Power [a.u.]')
plt.xlabel('Frequency Hz')
xaxis2 = np.arange(0,470,10)
plt.xticks(xaxis2)
fft1 = (Bx[51:-14])
fft2 = (By[1:-14])
plt.grid()
# Loop for FFT data
for dataset in [fft1]:
dataset = np.asarray(dataset)
freqs, psd = welch(dataset, fs=266336/300, window='hamming', nperseg=8192)
plt.semilogy(freqs, psd/dataset.size**0, color='r')
for dataset2 in [fft2]:
dataset2 = np.asarray(dataset2)
freqs2, psd2 = welch(dataset2, fs=266336/300, window='hamming', nperseg=8192)
plt.semilogy(freqs2, psd2/dataset2.size**0, color='b')
plt.show()
As you can see there are some places where it would be better to put input and when I run the code I can write names of filenames etc. to python instead of creating every single pythonfile, with specified info in the code.
Btw. I use Pycharm to my python.
If all you are trying to do is get rid of the hardcoded pathname, you should be able to format your name string with input variables
name = raw_input("Name: ")
measurement = raw_input("Measurement: ")
filename = "C:/Users/PycharmProjects/{0}/{1}".format(name, measurement)
see raw_input and string formatting

Python - reading text file delimited by semicolon, ploting chart using openpyxl

I have copied the text file to excel sheet separating cells by ; delimiter.
I need to plot a chart using the same file which I achieved. Since all the values copied are type=str my chart gives me wrong points.
Please suggest to overcome this. Plot is should be made of int values
from datetime import date
from openpyxl import Workbook,load_workbook
from openpyxl.chart import (
LineChart,
Reference,
Series,
)
from openpyxl.chart.axis import DateAxis
excelfile = "C:\Users\lenovo\Desktop\how\openpychart.xlsx"
wb = Workbook()
ws = wb.active
f = open("C:\Users\lenovo\Desktop\sample.txt")
data = []
num = f.readlines()
for line in num:
line = line.split(";")
ws.append(line)
f.close()
wb.save(excelfile)
wb.close()
wb = load_workbook(excelfile, data_only=True)
ws = wb.active
c1 = LineChart()
c1.title = "Line Chart"
##c1.style = 13
c1.y_axis.title = 'Size'
c1.x_axis.title = 'Test Number'
data = Reference(ws, min_col=6, min_row=2, max_col=6, max_row=31)
series = Series(data, title='4th average')
c1.append(series)
data = Reference(ws, min_col=7, min_row=2, max_col=7, max_row=31)
series = Series(data, title='Defined Capacity')
c1.append(series)
##c1.add_data(data, titles_from_data=True)
# Style the lines
s1 = c1.series[0]
s1.marker.symbol = "triangle"
s1.marker.graphicalProperties.solidFill = "FF0000" # Marker filling
s1.marker.graphicalProperties.line.solidFill = "FF0000" # Marker outline
s1.graphicalProperties.line.noFill = True
s2 = c1.series[1]
s2.graphicalProperties.line.solidFill = "00AAAA"
s2.graphicalProperties.line.dashStyle = "sysDot"
s2.graphicalProperties.line.width = 100050 # width in EMUs
##s2 = c1.series[2]
##s2.smooth = True # Make the line smooth
ws.add_chart(c1, "A10")
##
##from copy import deepcopy
##stacked = deepcopy(c1)
##stacked.grouping = "stacked"
##stacked.title = "Stacked Line Chart"
##ws.add_chart(stacked, "A27")
##
##percent_stacked = deepcopy(c1)
##percent_stacked.grouping = "percentStacked"
##percent_stacked.title = "Percent Stacked Line Chart"
##ws.add_chart(percent_stacked, "A44")
##
### Chart with date axis
##c2 = LineChart()
##c2.title = "Date Axis"
##c2.style = 12
##c2.y_axis.title = "Size"
##c2.y_axis.crossAx = 500
##c2.x_axis = DateAxis(crossAx=100)
##c2.x_axis.number_format = 'd-mmm'
##c2.x_axis.majorTimeUnit = "days"
##c2.x_axis.title = "Date"
##
##c2.add_data(data, titles_from_data=True)
##dates = Reference(ws, min_col=1, min_row=2, max_row=7)
##c2.set_categories(dates)
##
##ws.add_chart(c2, "A61")
### setup and append the first series
##values = Reference(ws, (1, 1), (9, 1))
##series = Series(values, title="First series of values")
##chart.append(series)
##
### setup and append the second series
##values = Reference(ws, (1, 2), (9, 2))
##series = Series(values, title="Second series of values")
##chart.append(series)
##
##ws.add_chart(chart)
wb.save(excelfile)
wb.close()
I have modified below code in for loop and it worked.
f = open("C:\Users\lenovo\Desktop\sample.txt")
data = []
num = f.readlines()
for line in num:
line = line.split(";")
new_line=[]
for x in line:
if x.isdigit():
x=int(x)
new_line.append(x)
else:
new_line.append(x)
ws.append(new_line)
f.close()
wb.save(excelfile)
wb.close()
For each list,for each value check if its a digit, if yes converts to integer and store in another list.
Using x=map(int,x) didnt work since I have character values too.
I felt above is much more easy than using x=map(int,x) with try and Except
Thanks
Basha

python 2.7 program to extract data from excel workbooks - why does it need to be saved in the same folder as the files?

I've got a program that uses openpyxl, os and tkinter that lets a person choose a file directory and then extracts data from certain cells from excel files in that directory. As-is, it will only run if the python file is in the same folder as the files from which data is being extracted are located.
I want to make it so that the program file can be stored outside that folder, but I can't figure out why it needs to be within that folder based on my code. Can someone point me to the place in the code that is making this be necessary?
Thank you
#!/usr/bin/env python
import os
import openpyxl
import Tkinter as tk
from Tkinter import *
import tkFileDialog, tkMessageBox, ttk
def file_open():
file_path = tkFileDialog.askdirectory()
if file_path == "":
tkMessageBox.showinfo("Error", "No Folder Selected")
else:
ALL_SHEETS = [f for f in os.listdir(file_path)
if os.path.isfile(os.path.join(file_path, f))
and f.endswith('.xlsx')]
HEAD = 1
ROW = 2
START = 1
END = 11
OUTFILE = 'empty_book.xlsx'
def get_row(sht, start, end, row):
row_data = []
for col in range(start, end):
d = sht.cell(row=row, column=col)
row_data.append(d.value)
return row_data
def get_all(files):
data_rows = []
for f in files:
wb = openpyxl.load_workbook(filename=f, data_only=True)
sheet = wb.get_sheet_by_name('Data')
row = get_row(sheet, START, END, ROW)
data_rows.append(row)
return data_rows
def get_headings(sheets):
first = sheets[1]
wb = openpyxl.load_workbook(filename=first)
sheet = wb.get_sheet_by_name('Data')
row = get_row(sheet, START, END, HEAD)
return row
def write_new(header, data, f):
wb = openpyxl.Workbook()
ws1 = wb.active
ws1.title = 'Data'
ws1.append(header)
for row in data:
ws1.append(row)
wb.save(filename=f)
def together():
sheets = sorted(ALL_SHEETS)
header = get_headings(sheets)
data = get_all(sheets)
write_new(header, data, OUTFILE)
together()
tkMessageBox.showinfo("Great Job!", "Data Extraction Successful!")
class NSC(tk.Frame):
def __init__(self, parent):
tk.Frame.__init__(self, parent)
self.parent = parent
self.parent.title("Degree Planner Data Extractor")
l1 = tk.Label(text="Degree Planner Data Extractor", font=('Segui',
20))
l1.place(x = 35, y = 20)
nscButton = tk.Button(text=' Extract data from degree planners ',
command=file_open)
nscButton.place(x= 80, y=100)
quitButton = tk.Button(text=" Quit ", command=self.quit)
quitButton.place(x=155, y=155)
def main():
root = Tk()
w = 400
h = 250
ws = root.winfo_screenwidth() # width of the screen
hs = root.winfo_screenheight() # height of the screen
x = (ws/2) - (w/2)
y = (hs/2) - (h/2)
root.geometry('%dx%d+%d+%d' % (w, h, x, y))
root.resizable(0,0)
app = NSC(root)
root.mainloop()
if __name__ == '__main__':
main()
You've kinda solved the problem in your code already. os.listdir returns file names without path so you needed os.path.join for the isfile test. You need to add that joined name to your list.
ALL_SHEETS = [os.path.join(file_path, f) for f in os.listdir(file_path)
if os.path.isfile(os.path.join(file_path, f))
and f.endswith('.xlsx')]
glob.glob does almost the same thing with the small risk that somebody named a directory ".xlsx".
from glob import glob
ALL_SHEETS = [f for f in glob(os.path.join(file_path, "*.xlsx"))
if os.path.isfile(f)]]

How to use user-defined functions in python?

I'm just learning python so be gentle. I want to read a file and that to be one function and then have another function work on what the previous function "read". I am having trouble passing the result on one function to another. Here is what I have no far:
I want to call read_file more than once and to be able to pass its result to more than one function, therefore, I do not want frame to be a global variable. How do I get read_file to pass 'frame' to cost_channelID directly? Perhaps, for cost_channelID to call read_file?
def read_file():
user_input = raw_input("please put date needed in x.xx form: ")
path = r'C:\\Users\\CP\\documents\\' + user_input
allFiles = glob.glob(path + '/*.csv')
frame = pd.DataFrame()
list = []
for file in allFiles:
df = pd.read_csv(file,index_col=None,header=0)
list.append(df)
frame =pd.concat(list,ignore_index=True)
def cost_channelID():
numbers =r'[0,1,2,3,4,5,6,7,8,9]'
Ads = frame['Ad']
ID = []
for ad in Ads:
num = ''.join(re.findall(numbers,ad)[1:7])
ID.append(num)
ID = pd.Series(ID)
pieces = [frame,ID]
frame2 = pd.concat(pieces,ignore_index=True,axis=1)
frame2 = frame2.rename(columns={0:'Ad',1:'Ad Impressions',2:'Total Ad Spend',3:'eCPM (Total Ad Spend/Ad)',4:'Ad Attempts',5:'ID'})
Any and all help is greatly appreciated!
Here is your modified code (comments in uppercase for easier finding, not rudeness):
def read_file():
user_input = raw_input("please put date needed in x.xx form: ")
path = r'C:\\Users\\CP\\documents\\' + user_input
allFiles = glob.glob(path + '/*.csv')
frame = pd.DataFrame()
list = []
for file in allFiles:
df = pd.read_csv(file,index_col=None,header=0)
list.append(df)
frame =pd.concat(list,ignore_index=True)
return(frame) #YOUR FUNCTION WILL BE RETURNING THE READ FRAME
def cost_channelID(read_frame): #YOU WILL BE RECEIVING A DIFFERENT FRAME EVERY TIME
numbers =r'[0,1,2,3,4,5,6,7,8,9]'
Ads = read_frame['Ad']
ID = []
for ad in Ads:
num = ''.join(re.findall(numbers,ad)[1:7])
ID.append(num)
ID = pd.Series(ID)
pieces = [frame,ID]
frame2 = pd.concat(pieces,ignore_index=True,axis=1)
frame2 = frame2.rename(columns={0:'Ad',1:'Ad Impressions',2:'Total Ad Spend',3:'eCPM (Total Ad Spend/Ad)',4:'Ad Attempts',5:'ID'})
#HERE YOU MEANT TO USE FRAME2 WITHIN THE FUNCTION? IT IS WHAT HAPPENING BECAUSE OF THE INDENTATION
frame1 = read_file() #YOU CAN READ AS MANY FRAMES AS YOU WANT AND THEY'LL BE KEPT IN SEPARATED FRAMES
frame2 = read_file()
#...framexx = read_file()
#AND YOU CAN JUST CALL cost_channelID in any of them (or any other function)
const_channelID(frame1)
const_channelID(frame2)
#... AND SO ON
If you are trying to pass frame from one function to the other, you need to declare it outside the scope of the function. Otherwise we need more information about what you are trying to accomplish.
frame = None
def read_file():
user_input = raw_input("please put date needed in x.xx form: ")
path = r'C:\\Users\\CP\\documents\\' + user_input
allFiles = glob.glob(path + '/*.csv')
frame = pd.DataFrame()
list = []
for file in allFiles:
df = pd.read_csv(file,index_col=None,header=0)
list.append(df)
frame =pd.concat(list,ignore_index=True)
def cost_channelID():
numbers =r'[0,1,2,3,4,5,6,7,8,9]'
Ads = frame['Ad']
ID = []
for ad in Ads:
num = ''.join(re.findall(numbers,ad)[1:7])
ID.append(num)
ID = pd.Series(ID)
pieces = [frame,ID]
frame2 = pd.concat(pieces,ignore_index=True,axis=1)
frame2 = frame2.rename(columns={0:'Ad',1:'Ad Impressions',2:'Total Ad Spend',3:'eCPM (Total Ad Spend/Ad)',4:'Ad Attempts',5:'ID'})
Here is a good reference for scope rules in python
Short Description of the Scoping Rules?

Printing Results from Loops

I currently have a piece of code that works in two segments. The first segment opens the existing text file from a specific path on my local drive and then arranges, based on certain indices, into a list of sub list. In the second segment I take the sub-lists I have created and group them on a similar index to simplify them (starts at def merge_subs). I am getting no error code but I am not receiving a result when I try to print the variable answer. Am I not correctly looping the original list of sub-lists? Ultimately I would like to have a variable that contains the final product from these loops so that I may write the contents of it to a new text file. Here is the code I am working with:
from itertools import groupby, chain
from operator import itemgetter
with open ("somepathname") as g:
# reads text from lines and turns them into a list sub-lists
lines = g.readlines()
for line in lines:
matrix = line.split()
JD = matrix [2]
minTime= matrix [5]
maxTime= matrix [7]
newLists = [JD,minTime,maxTime]
L = newLists
def merge_subs(L):
dates = {}
for sub in L:
date = sub[0]
if date not in dates:
dates[date] = []
dates[date].extend(sub[1:])
answer = []
for date in sorted(dates):
answer.append([date] + dates[date])
new code
def openfile(self):
filename = askopenfilename(parent=root)
self.lines = open(filename)
def simplify(self):
g = self.lines.readlines()
for line in g:
matrix = line.split()
JD = matrix[2]
minTime = matrix[5]
maxTime = matrix[7]
self.newLists = [JD, minTime, maxTime]
print(self.newLists)
dates = {}
for sub in self.newLists:
date = sub[0]
if date not in dates:
dates[date] = []
dates[date].extend(sub[1:])
answer = []
for date in sorted(dates):
print(answer.append([date] + dates[date]))
enter code here
enter code here