customized summary in proc report sas - sas

I need a report with a customized summary, the next report shows the fields in the test data set and the total results are the sum of each variable. I want the total value to be the total of the variable num divided by the total of the variable tot. i.e. 68/194 = 35.05%, and not the sum of the percentages in the rate variable.
data test;
input rank num tot rate;
datalines;
1 20 50 0.4
2 15 30 0.5
3 28 52 0.538461538461538
4 5 62 0.0806451612903226
;
run;
proc report data = _last_ box spacing = 1 split = "/"
style(header) = [font_face = "courier new"] style(column) = [font_face =
"courier new"]
style(lines) = [font_face = "courier new"] style(report) = [font_face =
"courier new"]
style(summary) = [font_face = "courier new" font_style = roman];
column(rank num tot rate);
define rank / " Rank " center width = 6 format = 5. order
order = data;
define num / " N " center width = 6 format = 5.;
define tot / " Total " center width = 6 format = 5.;
define rate / " Rate " center width = 6 format =
percent9.2 ;
rbreak after / summarize style = [font_weight = bold];
run;

Add compute block after summarize.
proc report data = _last_ box spacing = 1 split = "/"
style(header) = [font_face = "courier new"] style(column) = [font_face =
"courier new"]
style(lines) = [font_face = "courier new"] style(report) = [font_face =
"courier new"]
style(summary) = [font_face = "courier new" font_style = roman];
column(rank num tot rate);
define rank / " Rank " center width = 6 format = 5. order
order = data;
define num / " N " center width = 6 format = 5.;
define tot / " Total " center width = 6 format = 5.;
define rate / " Rate " center width = 6 format =
percent9.2 ;
rbreak after / summarize style = [font_weight = bold];
compute after;
rate.sum=num.sum/tot.sum;
endcomp;
run;

You're nearly there - I think you just need to specify analysis mean when you define the variable rate:
data test;
input rank num tot rate;
datalines;
1 20 50 0.4
2 15 30 0.5
3 28 52 0.538461538461538
4 5 62 0.0806451612903226
;
run;
proc report data = _last_ box spacing = 1 split = "/"
style(header) = [font_face = "courier new"] style(column) = [font_face =
"courier new"]
style(lines) = [font_face = "courier new"] style(report) = [font_face =
"courier new"]
style(summary) = [font_face = "courier new" font_style = roman];
column(rank num tot rate);
define rank / " Rank " center width = 6 format = 5. order order = data;
define num / " N " center width = 6 format = 5.;
define tot / " Total " center width = 6 format = 5.;
define rate / " Rate " center width = 6 format = percent9.2 analysis mean;
rbreak after / summarize style = [font_weight = bold];
run;
quit;

Related

How to improve my python code to speed it up?

Below is my current code:
import pandas as pd
import math
import csv
fund = 10000
print("investment",fund)
pval = 0
oldportfolio = []
dts = ["06 Feb 2017", "07 Feb 2017", "08 Feb 2017", "09 Feb 2017", "10 Feb 2017", "13 Feb 2017", "14 Feb 2017", "15 Feb 2017", "16 Feb 2017", "17 Feb 2017",
"20 Feb 2017", "21 Feb 2017", "22 Feb 2017", "23 Feb 2017", "27 Feb 2017"]
for dt in dts:
files = ["stocklistcustom.csv"]
for file in files:
df = pd.read_csv(file, header=None)
i = 0
filecount = len(df)
result = []
while i < filecount:
# while i < 10:
name = df[0][i]
link = df[1][i]
mcsym = df[2][i]
i = i + 1
filepath = "data/nse/his/" + mcsym + ".csv"
try:
sp = pd.read_csv(filepath, header=None)
endrow = sp[sp[0] == dt].index[0] + 1
parray = []
tarray = []
starray = []
intdate = []
p1 = 0
p2 = 0
p3 = 0
p4 = 0
j = 0
mavg15 = ''
mavg60 = ''
olddiff = 0
days = 2
strtrow = endrow - days - 60
for k in range (strtrow, endrow):
date = sp[0][k]
price = float(sp[4][k])
k = k + 1
parray.append(price)
j = j + 1
strtavg = j - 15
mavg15 = sum(parray[strtavg:j]) / 15
strtavg = j - 60
mavg60 = sum(parray[strtavg:j]) / 60
# buy criteria
if j > 59:
diff = mavg60 - mavg15
if diff < 0 and olddiff > 0:
trigger = 1
intdate.append(date)
else:
trigger = 0
tarray.append(trigger)
olddiff = diff
# sell criteria
if j == (days + 60):
pricep = (price - p1) * 100 / p1
p1p = (p1 - p2) * 100 / p2
p2p = (p2 - p3) * 100 / p3
p3p = (p3 - p4) * 100 / p4
if pricep < -5 or pricep > 8:
sell = 1
if price < p1 and p1 < p2 and p2 < p3:
sell = 1
else:
sell = 0
p4 = p3
p3 = p2
p2 = p1
p1 = price
if sum(tarray) > 0:
result.append([name,mcsym,"buy",price])
if sell > 0:
result.append([name,mcsym,"sell",price])
except:
# print(name,"not found")
pass
# print(result)
output = "output/triggers/"+dt+"trigger.csv"
with open(output, "wb") as f:
writer = csv.writer(f)
writer.writerows(result)
print(output,"exported")
The above code create an array named result and exports various csv files with calls...
The code below now process the data in result array to compute portfolio value
# Code for calculating investment
portfolio = []
for row in result:
if row[2] == "sell" and len(oldportfolio) > 0:
pindex = 0
for buys in oldportfolio:
bindex = 0
for stock in buys:
if row[0] == stock[0]:
sellqty = stock[2]
sellp = row[3]
sellval = sellqty * sellp
purchasep = stock[1]
sellcost = purchasep * sellqty
print(dt,"selling",row[0],row[1],sellp,sellqty,sellval)
# print(oldportfolio)
del oldportfolio[pindex][bindex]
# print(oldportfolio)
fund = fund + sellval
pval = pval - sellcost
bindex = bindex + 1
pindex = pindex + 1
# print("op", oldportfolio)
# print(dt,"fund after selling",fund)
buycount = sum(1 for row in result if row[2]==("buy"))
if buycount > 0:
maxinvest = fund / buycount
for row in result:
if row[2] == "buy":
name = row[0]
price = row[3]
qty = math.floor(maxinvest / price)
if qty > 0:
val = qty * price
print(dt,"buying",name,row[1],price,qty,val)
portfolio.append([name,price,qty,val])
fund = fund - val
# print("portfolio",portfolio)
pval = pval + sum(row[3] for row in portfolio)
print(dt,"cash",fund,"portfolio value",pval,"total",fund+pval)
oldportfolio.append(portfolio)
print(oldportfolio)
It gives me the value of portfolio for each day after trading based on certain rules. But its execution time is too much. How to reduce its execution time?
Also, I need to change pval as it is calculated incorrectly in current code. It must be calculated based on that particular day's prices.
Your code has multiple nested loops which probably why it is so slow.
But your biggest problem isn't speed, it's readability. It is really hard to reason about your code, consider refactoring.
I'm sure you'll find some bottlenecks and be able to improve your code while refactoring.

Python 2.7; How to clear an entry in the GUI

I have an example code, here just for BMI index. I would like to clear the input and output fields in the GUI. It seems that i can clear the entries, but the BMI calculation is not being removed (row 79 does not seem to have an effect) (# self.text.delete(0, 'end'))
Thanks
Emin
import math
from Tkinter import *
class Application(Frame):
"""A GUI application with three buttons"""
def __init__(self, master):
"""Initialize the Frame"""
Frame.__init__(self,master)
self.grid()
self.create_widgets()
self.title = Label(self, text = "BMI index calculation")
self.title.grid(row = 0, column = 0, columnspan = 2 , sticky =W)
def create_widgets(self):
"""Create button, text and entry widgets"""
self.name = Label(self, text = "What is your name?")
self.name.grid(row = 1, column = 0, columnspan = 2 , sticky =W)
self.name_io = Entry(self)
self.name_io.grid(row = 1, column =2, sticky = W)
self.age = Label(self, text = "How old are you?")
self.age.grid(row = 2, column = 0, columnspan = 2 , sticky =W)
self.age_io = Entry(self)
self.age_io.grid(row = 2, column =2, sticky = W)
self.height = Label(self, text = "How tall are you?")
self.height.grid(row = 3, column = 0, columnspan = 2 , sticky =W)
self.height_io = Entry(self)
self.height_io.grid(row = 3, column =2, sticky = W)
self.weight = Label(self, text = "How much do you weigh in kg?")
self.weight.grid(row = 4, column = 0, columnspan = 2 , sticky =W)
self.weight_io = Entry(self)
self.weight_io.grid(row = 4, column =2, sticky = W)
self.submit_button = Button(self, text = "Calculate", command = self.reveal)
self.submit_button.grid(row = 5, column = 0, sticky = W)
self.text = Text(self, width = 40, height = 5, wrap = WORD)
self.text.grid(row = 6, column = 0, columnspan = 3, sticky = W)
self.clear_button = Button(self, text = "Clear", command = self.clear_text)
self.clear_button.grid(row = 7, column = 0, sticky = W)
def reveal(self):
"""Display message based on the password typed in"""
content_name = self.name_io.get()
content_age = float(self.age_io.get())
content_height = float(self.height_io.get())
content_weight = float(self.weight_io.get())
BMI = round((content_weight/(content_height/100)**2.),1)
underBMI = 18.5
NormalBMI = 24.9
OverweightBMI = 29.9
ObesityBMI = 30
if BMI <= underBMI:
message = content_name + ", " + "your BMI index is" + " " + str(BMI) + ", " + "you are underweight, so you need to eat!"
elif (BMI > underBMI) and (BMI <= NormalBMI):
message = content_name + ", " + "your BMI index is" + " " + str(BMI) + ", " + "your BMI is Normal"
elif (BMI > NormalBMI) and (BMI <= OverweightBMI):
message = content_name + ", " + "your BMI index is" + " " + str(BMI) + ", " + "you are Overweight - need to exercise!"
elif (BMI > OverweightBMI):
message = content_name + ", " + "your BMI index is" + " " + str(BMI) + ", " + "you are in Obesity"
self.text.insert(0.0, message)
def clear_text(self):
self.name_io.delete(0, 'end')
self.age_io.delete(0, 'end')
self.height_io.delete(0, 'end')
self.weight_io.delete(0, 'end')
# self.text.delete(0, 'end')
root = Tk()
root.title("BMI Index")
root.geometry("600x350")
app = Application(root)
root.mainloop ()
The problem is that you're giving an index that is 0.0. Text widget indexes are a string of the form line.column but you're giving it a floating point number.
The proper index for the first character is the string "1.0".
self.text.delete("1.0", 'end')
Simply
your_entry.delete(0,END)

SAS Proc Template: Only Outside Table Border Control

I have a custom PROC TEMPLATE essentally all set up except that I cannot get ONLY the outside frame of the table to be double lines. I can get the top of the table to be double when I use:
STYLE TABLEHEADERCONTAINER /
BORDERTOPSTYLE=DOUBLE
;
But the bottom of the table will not show a double line when I use:
STYLE TABLEFOOTERCONTAINER /
BORDERTOPSTYLE=DOUBLE
;
Nor does the above work with BORDERBOTTOMSTYLE. On top of this issue, I am unable to get the left and right sides of the table to have double lines. Can anyone offer any insight into this? Below is my entire template:
** CREATE CUSTOM STYLE WITH ARIAL 10 PT **;
PROC TEMPLATE;
DEFINE STYLE STYLES.STARFOX;
PARENT=STYLES.RTF;
STYLE USERTEXT FROM USERTEXT / FONTSIZE=3.5 FONTSTYLE=ROMAN JUST=L; /*INFO FOR THE ODS TEXT STATEMENTS*/
STYLE FONTS /
'TITLEFONT2' = ("<SANS-SERIF>, <MTSANS-SERIF>, ARIAL",2,BOLD)
'TITLEFONT' = ("<SANS-SERIF>, <MTSANS-SERIF>, ARIAL",3,BOLD)
'STRONGFONT' = ("<SANS-SERIF>, <MTSANS-SERIF>, ARIAL",2,BOLD)
'EMPHASISFONT' = ("<SANS-SERIF>, <MTSANS-SERIF>, ARIAL",2)
'FIXEDFONT' = ("<MONOSPACE>, COURIER",2)
'BATCHFIXEDFONT' = ("SAS MONOSPACE, <MONOSPACE>, COURIER, MONOSPACE",2)
'FIXEDHEADINGFONT' = ("<MONOSPACE>, COURIER, MONOSPACE",2)
'FIXEDSTRONGFONT' = ("<MONOSPACE>, COURIER, MONOSPACE",2,BOLD)
'FIXEDEMPHASISFONT' = ("<MONOSPACE>, COURIER, MONOSPACE",2)
'HEADINGEMPHASISFONT' = ("<SANS-SERIF>, <MTSANS-SERIF>, ARIAL",2,BOLD )
'HEADINGFONT' = ("<SANS-SERIF>, <MTSANS-SERIF>, ARIAL",2,BOLD)
'DOCFONT' = ("<SANS-SERIF>, <MTSANS-SERIF>, ARIAL",2);
;
STYLE TABLEHEADERCONTAINER /
BORDERTOPSTYLE=DOUBLE
;
STYLE TABLEFOOTERCONTAINER /
BORDERTOPSTYLE=DOUBLE
;
STYLE DATA /
FONT_FACE = "ARIAL"
FONT_SIZE = 10PT
JUST=CENTER
VJUST=C
;
STYLE TABLE /
CELLSPACING = 0.7
CELLPADDING = 1.4
FONT_SIZE = 10
JUST=CENTER
VJUST=C
BORDERWIDTH=1
FRAME=BOX
;
STYLE HEADER /
FONT_FACE = "ARIAL"
FONT_SIZE = 10PT
FONT_WEIGHT = BOLD
JUST=CENTER
VJUST=C
;
STYLE ROWHEADER /
FONT_FACE = "ARIAL"
FONT_SIZE = 10
JUST=CENTER
VJUST=C
;
STYLE FOOTER /
FONT_FACE = "ARIAL"
FONT_SIZE=10
JUST=CENTER VJUST=C
;
REPLACE COLOR_LIST /
"BG" = WHITE
"FG" = BLACK
"BGH" = WHITE
"LINK" = BLUE;
STYLE BODY FROM DOCUMENT /
TOPMARGIN=.8IN
BOTTOMMARGIN=.8IN
RIGHTMARGIN=.9IN;
END;
RUN;
Below is how the output table appears:
And I would like it to appear as:

MSChart - Multiple columns grouped by date

I need to display % utilization for several production lines over several days. So my Y axis will be the % values. I need bars for each of the production lines on each of the days. So my X axis would be groups of columns each labeled for the production line and then grouped and labeled for the date. How would I do this with the MSchart.
Below is a sample of what I need. It shows only 2 production lines (I will need to display more than 2 production lines) and does not include the production line name in the X-axis label.
I am almost there. Here is an image of the chart I am creating:
And here is the code that created it:
Private Sub ChartSetup()
Try
dvCapacityUtilization.RowFilter = ""
dvCapacityUtilization.Sort = "Period ASC, CutUpSet ASC"
Me.cuChart.BeginInit()
With Me.cuChart
.ChartAreas(0).AxisX.Interval = 30
.ChartAreas(0).AxisX.LabelStyle.Format = "MM/yy"
.ChartAreas(0).AxisX.LabelStyle.Angle = -90
.ChartAreas(0).AxisY.MajorGrid.LineColor = Color.Gray
.ChartAreas(0).AxisX.MajorGrid.LineColor = Color.White
.ChartAreas(0).AxisX.MinorGrid.LineColor = Color.White
.DataBindCrossTable(dvCapacityUtilization, _
"CutUpSet", "Period", "CapacityUtilization", "Label = CapacityUtilization")
End With
'
For Each series In Me.cuChart.Series
series.IsValueShownAsLabel = False
series.LabelFormat = "0.0%"
series.SetCustomProperty("PointWidth", "0.5")
series.SetCustomProperty("DrawingStyle", "Cylinder")
series.XValueType = DataVisualization.Charting.ChartValueType.Date
Next
Me.cuChart.EndInit()
Catch ex As Exception
ErrHandler(Me.Name & " - Chart Setup", ex)
End Try
End Sub
How do I turn the value labels off? In my code I used IsValueShownAsLabel = False but they are still displayed. I eventually will allow the user to turn the values on or off.
How do I format the values as XX.X%. In my code I used LabelFormat = 0.0% but that did not work.
How can I get the X axis labels to be the dates associated with the data values. In my code I used AxisX.Interval = 30 just to get the labels on the chart.
Follow up:
As stated in #1 above, I used series.IsValueShownAsLabel = False but the value labels were still displayed. To remove them I had to do the following:
For Each point in series.Points
point.Label = String.Empty
Next
Why should I have to do this instead of using IsValueShownAsLabel = False?
WINFORM : Finally find out with some R&D. Please follow below steps.
1. Drag and Drop new chart control from ToolBox in new winform.
2. Remove default "Series1" as it's dynamically generated as per column values.
3. Assume that we have following sample datatable.
public DataTable GetDataTable()
{
DataTable dt = new DataTable();
dt.Columns.Add("UtilizationDate", typeof(DateTime));
dt.Columns.Add("ProductionLine", typeof(string));
dt.Columns.Add("UtilizationValue", typeof(int));
DataRow dr = dt.NewRow();
dr[0] = DateTime.Now;
dr[1] = "Proy01";
dr[2] = 25;
dt.Rows.Add(dr);
dr = dt.NewRow();
dr[0] = DateTime.Now;
dr[1] = "Proy02";
dr[2] = 15;
dt.Rows.Add(dr);
dr = dt.NewRow();
dr[0] = DateTime.Now;
dr[1] = "Proy03";
dr[2] = 125;
dt.Rows.Add(dr);
dr = dt.NewRow();
dr[0] = DateTime.Now.AddMonths(1);
dr[1] = "Proy01";
dr[2] = 13;
dt.Rows.Add(dr);
dr = dt.NewRow();
dr[0] = DateTime.Now.AddMonths(1);
dr[1] = "Proy02";
dr[2] = 111;
dt.Rows.Add(dr);
dr = dt.NewRow();
dr[0] = DateTime.Now.AddMonths(1);
dr[1] = "Proy03";
dr[2] = 77;
dt.Rows.Add(dr);
dr = dt.NewRow();
dr[0] = DateTime.Now.AddMonths(2);
dr[1] = "Proy01";
dr[2] = 13;
dt.Rows.Add(dr);
dr = dt.NewRow();
dr[0] = DateTime.Now.AddMonths(2);
dr[1] = "Proy02";
dr[2] = 111;
dt.Rows.Add(dr);
dr = dt.NewRow();
dr[0] = DateTime.Now.AddMonths(2);
dr[1] = "Proy03";
dr[2] = 77;
dt.Rows.Add(dr);
return dt;
}
4. Now add following line of code using DataBindCrossTable.
chart1.ChartAreas["ChartArea1"].AxisX.IntervalType = DateTimeIntervalType.Months;
chart1.ChartAreas["ChartArea1"].AxisX.Interval = 1;
chart1.ChartAreas[0].AxisX.LabelStyle.Format = "MM-yy";
DataTable dt = GetDataTable();
chart1.DataBindCrossTable(dt.DefaultView, "ProductionLine", "UtilizationDate", "UtilizationValue", "Label=UtilizationValue");
foreach (Series item in chart1.Series)
{
item.IsValueShownAsLabel = true;
item["PixelPointWidth"] = "50";
item["DrawingStyle"] = "Cylinder";
item.XValueType = ChartValueType.DateTime;
}

Python 2.7 for loop confusion

I'm trying to build a table from user input to export via Cheetah to fill a template to use as a report. I'm having trouble separating each iteration of the loop
"for j in range(1, numErrors):" and put table row tags at the beginning and end of each concatenation.
table = ""
cells = ""
row = ""
numMeas = int(raw_input("Enter total number of measurements: "))
numMeas = numMeas + 1 #number of measurements compensated for iteration behavior
for i in range(1, numMeas):
typeMeas = raw_input("Enter type of measurement "+str(i)+": ")
numErrors = int(raw_input("Enter number of error sources: "))
numErrors = numErrors + 1
for j in range(1, numErrors): #builds dataSet from number of errors
inputData = []
inputData.append(typeMeas)
description = raw_input("Enter source of uncertainty "+str(j)+": ")
inputData.append(description)
estUncert = raw_input("Enter estimated uncertainty "+str(j)+": ")
estUncert = float(estUncert)
inputData.append(str(estUncert))
for i in inputData:
cell = "<td>"+str(i)+"</td>"
cells += cell
table = "<tr>"+cells+"</tr>"+"\n"
print table
Current output:
<tr><td>mass</td><td>scale</td><td>1.0</td><td>mass</td><td>human</td><td>2.0</td> <td>temp</td><td>room</td><td>3.0</td><td>temp</td><td>therm</td><td>4.0</td></tr>
Desired output:
<tr><td>mass</td><td>scale</td><td>1.0</td></tr>
<tr><td>mass</td><td>human</td><td>2.0</td></tr>
<tr><td>temp</td><td>room</td><td>3.0</td></tr>
<tr><td>temp</td><td>therm</td><td>4.0</td></tr>
I am guessing it probably needs to look like this:
table = ""
cells = ""
row = ""
numMeas = int(raw_input("Enter total number of measurements: "))
numMeas = numMeas + 1 #number of measurements compensated for iteration behavior
for i in range(1, numMeas):
typeMeas = raw_input("Enter type of measurement "+str(i)+": ")
numErrors = int(raw_input("Enter number of error sources: "))
numErrors = numErrors + 1
inputData = []
for j in range(1, numErrors): #builds dataSet from number of errors
inputData.append(typeMeas)
description = raw_input("Enter source of uncertainty "+str(j)+": ")
inputData.append(description)
estUncert = raw_input("Enter estimated uncertainty "+str(j)+": ")
estUncert = float(estUncert)
inputData.append(str(estUncert))
cells = ''
for i in inputData:
cell = "<td>"+str(i)+"</td>"
cells += cell
table += "<tr>"+cells+"</tr>"+"\n"
print table