How to adjust table size in markdown knitr - r-markdown

I'm trying to use a package expss, and I followed some of the examples, but my table is huge. How can I adjust the table so that it is small, like in the link?
For the tables in the link please see: expss introduction
My tables have wide margins:
I think I need to make changes to the setup, but I don't know how. My set up is:
library(knitr)
knitr::opts_chunk$set(echo = TRUE)

Try to use htmlTable css.cell argument:
library(expss)
data(mtcars)
mtcars = apply_labels(mtcars,
mpg = "Miles/(US) gallon",
cyl = "Number of cylinders",
disp = "Displacement (cu.in.)",
hp = "Gross horsepower",
drat = "Rear axle ratio",
wt = "Weight (1000 lbs)",
qsec = "1/4 mile time",
vs = "Engine",
vs = c("V-engine" = 0,
"Straight engine" = 1),
am = "Transmission",
am = c("Automatic" = 0,
"Manual"=1),
gear = "Number of forward gears",
carb = "Number of carburetors"
)
cro(mtcars$am, mtcars$vs) %>%
htmlTable(css.cell = "padding: 0px;")

Related

Making Power Bi - R (HTMLwidgets pbiviz based) custom visuals interactive with other Power BI visuals

I have made a pbiviz custom visual using developer tools of Normal distribution curve over a Histogram plot with R - ggplot2 and plotly libraries in a pbiviz.package
The visual works fine. Now I want to add interactivity of the Histogram with other Power BI visuals.
i.e. If user clicks on a bar of the Histogram, it should filter out a Table on my PBI report with rows relevant to the histogram bar data.
Considering the limitations of using R script with Power BI, I do not know if it is possible with my current visual as I am new to scripting.
Is there a better way (Typescript, JS, Python, etc.) other than what I have done to make an Interactive Histogram & Distribution Curve in Power BI?
This is the R script along with sample data and Visual Image
Histogram represents the projects falling in different durations
There are two bell curves - One for closed projects and Other for Active Projects
source('./r_files/flatten_HTML.r')
############### Library Declarations ###############
libraryRequireInstall("ggplot2");
libraryRequireInstall("plotly");
libraryRequireInstall("tidyverse");
libraryRequireInstall("scales");
libraryRequireInstall("htmlwidgets");
library(ggplot2)
library(tidyverse)
library(scales)
library(plotly)
theme_set(theme_bw())
##### Making DataSet for All Selected Projects #####
Duration <- dataset$Duration
Status <- (rep(dataset$ProjectStatus))
da <- data.frame(Duration,Status)
lenx <- length(Duration)
meanall <- mean(da$Duration)
sdx <- sd(da$Duration)
binwidth <- 30
font_label <- list(family = "Segoe UI", size = 21, colour = "black")
hovlabel <- list(bordercolor = "black", font = font_label)
#Filtering Out Closed Projects from Dataset
#Creating Data Frame for Closed Projects
closedproj <- dataset %>%
select(Duration,ProjectStatus) %>%
filter(ProjectStatus == "Closed")
closed <- closedproj$Duration
df <- data.frame(closed)
xclosed <- closedproj$
df2 <- data.frame(xclosed)
lenc <- length(xclosed)
mean_closed <- mean(df2$xclosed)
sdc <- sd(df2$xclosed)
a <-
(ggplot(da,aes(x=Duration, fill = Status, text = paste("Duration: ",x,"-", x + binwidth,"<br />Project Count", ..count..)))+
#Histogram
geom_histogram(aes(y=..count..),alpha=0.5, position='identity',binwidth = binwidth)+
# #Distribution Curve
annotate(
geom = "line",
x = da$Duration,
y = dnorm(da$Duration, mean = meanall, sd = sdx) * lenx * binwidth,
width = 3,
color = "red"
) +
annotate(
geom = "line",
x = df2$xclosed,
y = dnorm(df2$xclosed, mean = mean_closed, sd = sdc)* lenc * binwidth,
width = 3,
color = "blue"
) +
labs(
x = "Project Duration (Days)",
y = "Project_Count",
fill = "Project Status")+
#Mean
geom_vline(aes(xintercept=meanall),color="red",linetype="dashed",size = 0.8,label=paste("Mean :",round(meanall,0)))+
geom_vline(aes(xintercept=mean_closed),color="blue",linetype="dashed",size = 0.8,label=paste("Mean (Closed):",round(mean_closed,0)))+
# 1 Sigma
geom_vline(aes(xintercept = (meanall + sdx)), color = "red", size = 1, linetype = "dashed") +
geom_vline(aes(xintercept = (meanall - sdx)), color = "red", size = 1, linetype = "dashed")+
geom_vline(aes(xintercept = (mean_closed + sdc)), color = "blue", size = 1, linetype = "dashed") +
geom_vline(aes(xintercept = (mean_closed - sdc)), color = "blue", size = 1, linetype = "dashed")+
# Theme
theme(
plot.background = element_rect(fill = "transparent"),
legend.background = element_rect(fill = "lightgray"),
axis.title.x = element_text(colour = "Black",size = 18,face = "bold"),
axis.title.y = element_text(colour = "Black",size = 18,face = "bold"),
axis.text.x = element_text(colour = "Black",size = 15),
axis.text.y = element_text(colour = "Black",size = 15),
panel.grid.major = element_blank(), panel.grid.minor = element_blank())+
scale_x_continuous(labels = comma,
breaks = seq(0, max(Duration),50)) +
scale_y_continuous(labels = comma,
breaks = seq(0,max(Duration),10)))
############# Create and save widget ###############
p = ggplotly(a, tooltip = c("text")) %>%
style(hoverlabel = hovlabel) %>%
layout(legend = list(
orientation = "h",
x = 0,
y = 1.13,
title = list(text = "Project Status",font = list(family = "Segoe UI", size = 23)),
font = font_label
),
yaxis = list(title = list(standoff = 25L)),
xaxis = list(title = list(standoff = 25L)),
annotations = list(showarrow=FALSE,align = "left",valign = "top",x = 0.95, xref = "paper",yref = "paper",y = 0.955,
font = list(family = "Segoe UI", size = 22, color = "#cc0000"),
text = paste("Max Duration: ", comma(round(max(da$Duration),0)),
"<br>Mean (Closed): ", comma(round(mean_closed,0)),
"<br>Mean (All) : ", comma(round(meanall,0))))
) %>%
config(modeBarButtonsToRemove = c("select2d","hoverClosestCartesian", "lasso2d","hoverCompareCartesian","toggleSpikelines"), displaylogo = FALSE);
internalSaveWidget(p, 'out.html');
}
####################################################
################ Reduce paddings ###################
ReadFullFileReplaceString('out.html', 'out.html', ',"padding":[0-5]*,', ',"padding":0,')
What I expect is -- If user clicks on a bar of the Histogram, it should reflect on a Table visual on my PBI report with rows relevant to the histogram bar data.
Any help will be highly appreciated !
Regards

How do I get the word-embedding matrix from ft_word2vec (sparklyr-package)?

I have another question in the word2vec universe.
I am using the 'sparklyr'-package. Within this package I call the ft_word2vec() function. I have some trouble understanding the output:
For each number of sentences/paragraphs I am providing to the ft_word2vec() function, I always get the same amount of vectors. Even, if I have more sentences/paragraphs than words. For me, that looks like I get the paragraph-vectors. Maybe a Code-example helps to understand my problem?
# add your spark_connection here as 'spark_connection = '
# create example data frame
FK_data = data.frame(sentences = c("This is my first sentence",
"It is followed by the second sentence",
"At the end there is the last sentence"))
# move the data to spark
sc_FK_data <- copy_to(spark_connection, FK_data, name = "FK_data", overwrite = TRUE)
# prepare data for ft_word2vec (sentences have to be tokenized [=list of words instead of one string in each row])
sc_FK_data <- ft_tokenizer(sc_FK_data, input_col = "icd_long", output_col = "tokens")
# split data into test and trainings sets
partitions <- sc_FK_data %>%
sdf_random_split(training = 0.7, test = 0.3, seed = 123456)
FK_train <- partitions$training
FK_test <- partitions$test
# given a trainings data set (FK_train) with a column "tokens" (for each row = a list of strings)
mymodel = ft_word2vec(
FK_train,
input_col = "tokens",
output_col = "word2vec",
vector_size = 15,
min_count = 1,
max_sentence_length = 4444,
num_partitions = 1,
step_size = 0.1,
max_iter = 10,
seed = 123456,
uid = random_string("word2vec_"))
# I tried to get the data from spark with:
myemb = mymodel %>% sparklyr::collect()
Has somebody had similar experiences? Can someone explain what exactly the ft_word2vec() function returns? Do you have an example on how to get the word embedding vectors with this function? Or does the returned column indeed contain the paragraph vectors?
my colleague found a solution! If you know how to do it, the instructions really begin to make sense!
# add your spark_connection here as 'spark_connection = '
# create example data frame
FK_data = data.frame(sentences = c("This is my first sentence",
"It is followed by the second sentence",
"At the end there is the last sentence"))
# move the data to spark
sc_FK_data <- copy_to(spark_connection, FK_data, name = "FK_data", overwrite = TRUE)
# prepare data for ft_word2vec (sentences have to be tokenized [=list of words instead of one string in each row])
sc_FK_data <- ft_tokenizer(sc_FK_data, input_col = "icd_long", output_col = "tokens")
# split data into test and trainings sets
partitions <- sc_FK_data %>%
sdf_random_split(training = 0.7, test = 0.3, seed = 123456)
FK_train <- partitions$training
FK_test <- partitions$test
# CHANGES FOLLOW HERE:
# We have to use the spark connection instead of the data. For me this was the confusing part, since i thought no data -> no model.
# maybe we can think of this step as an initialization
mymodel = ft_word2vec(
spark_connection,
input_col = "tokens",
output_col = "word2vec",
vector_size = 15,
min_count = 1,
max_sentence_length = 4444,
num_partitions = 1,
step_size = 0.1,
max_iter = 10,
seed = 123456,
uid = random_string("word2vec_"))
# now that we have our model initialized, we add the word-embeddings to the model
w2v_model = ml_fit(w2v_model, sc_FK_EMB)
# now we can collect the embedding vectors
emb = word2vecmodel$vectors %>% collect()

Shiny implementation

Hello my question is about user interactivity with my code, I developed this simple loop for time series using the great forecast Hybrid package, here is the rmd for it
The strange language is Portuguese sorry I was too lazy to translate the whole thing it shouldn't matter anyway.
```{r Primeira Vez, clique no play, include=FALSE, include=FALSE}
setwd("~/R")
install.packages("forecastHybrid")
setwd("~/R")
library(forecastHybrid)
```
```{r Inputs}
# A dataset variable in the global enviroment
Data<- SemZero
#How to save a witout dummies regression
ComoSalvar<- "Exemplo2.csv"
#How to save a with dummies regression
ComoSalvarReg<- "ExemploparaDummies.csv"
#Where to save them.
OndeSalvar <- "~/R"
#Month,Year, and Day variable for the ts
Mes<- 9
Ano<- 2012
Dia<- 1
#Frequency
Freq<- 12
#Forecast Period.
Forecast = 12
#Confidance intervals
IC<- c(0)
#Variables in the Data dataset that will be used for the lapply regression, usually I would read an excel file with headers.
VStart = 1
VFinish = 2
#Simple regressor dataset, they can be matrix as well as line since de data.fram combines everyone example data.frame(OddMonths,Christmas,WasTrumpPresident?,RainInThatSeason)
Regressores<- data.frame(0)
```
```{r Logic for rolling test and data simplification}
setwd(OndeSalvar)
if(IC[1] > 0) pi<-TRUE else
pi<- FALSE
if (nrow(Data) >= Freq*3 + Forecast*2) {
Weights <- "cv.error"
Multiplicador<-floor((nrow(Data)-Forecast*2)/Freq) } else
if (nrow(Data) <= Freq*3 + Forecast*2) {
Weights <- "cv.error"
Multiplicador<-3 } else
if (nrow(Data) >= Freq*2.5 + Forecast*2){
Weights <- "cv.error"
Multiplicador = 2.5} else
Weights <- "equal"
```
```{r The bunk of the regression process}
if (nrow(Regressores) < nrow(Data)) {
my_forecast1 <- try({function(x){
print(x)
print(summary(x))
names(x)
x[is.na(x)] <- 0; x
if(sum(abs(x)) < Freq){
Model<- "aenst"} else
if(mean(x[1:Freq]) == 0)Model<- "aenst" else Model<- "aenstf"
x<- ts(x, start = c(Ano, Mes,Dia), frequency = Freq)
hm<-hybridModel(x, models = Model, lambda = NULL,
a.args = list(trace = FALSE,test = "kpss", ic ="aicc", max.P = 2, max.p = 9,max.q=9,max.Q = 2,max.d = 2,max.D = 2,start.p = 9,start.P = 2,start.Q = 2,start.q = 9,allowdrift = TRUE,allowmean = TRUE
#Se tiver tempo apague o # abaixo para uma maior qualidade no modelo arima.
#,stepwise = FALSE,parallel = TRUE,num.cores = NULL
),
e.args = list(ic = "aicc"),
n.args = list(repeats = nrow(Data)),
s.args = NULL,
t.args = NULL,
weights = Weights,
errorMethod = "RMSE",cvHorizon = Forecast,windowSize = frequency(x)*Multiplicador, horizonAverage = FALSE,
verbose = TRUE)
lapply(seq_along(x), function(i) paste(names(x)[[i]], x[[i]]))
fcast1<- forecast(hm,h = Forecast,level = IC,PI = pi)
return(fcast1)
}})
Listas<- lapply(Data[,VStart:VFinish], try(my_forecast1))
if (pi == FALSE) ListaResultado<- as.data.frame(lapply(Listas, '[[', 'mean')) else
ListaResultado <- Listas
write.csv(ListaResultado , file = ComoSalvar)
} else {
my_forecastreg <- function(x){
print(x)
print(summary(x))
names(x)
x[is.na(x)] <- 0; x
x<- ts(x, start = c(Ano, Mes,Dia), frequency = Freq)
hmreg <- hybridModel(x ,models = "ans",
a.args = list(xreg = Regressores[1:nrow(Data),],trace = TRUE,test = "kpss", ic ="aicc", max.P = 2, max.p = 9,max.q=9,max.Q = 2,max.d = 2,max.D = 2,start.p = 9,start.P = 2,start.Q = 2,start.q = 9,allowdrift = TRUE,allowmean = TRUE
#Se tiver tempo apague o # abaixo para uma maior qualidade no modelo arima.
#,stepwise = FALSE,parallel = TRUE,num.cores = NULL
),
n.args = list(xreg = Regressores[1:nrow(Data),], repeats= nrow(Regressores)),
s.args = list(xreg = Regressores[1:nrow(Data),], method = "arima"))
fcast2<- forecast(hmreg,h = Forecast,level = IC,PI = pi, xreg = Regressores[nrow(Data):(nrow(Data)+Forecast-1),])
return(fcast2)
}
Listas2<- lapply(Data[,VStart:VFinish], try(my_forecastreg))
if (pi == FALSE) ListaResultado2<- as.data.frame(lapply(Listas2, '[[', 'mean')) else
ListaResultado2 <- Listas2
write.csv(ListaResultado2 , file = ComoSalvarReg)
}
```
I want to develop something to get the user input and run the regressions, my end user doesn't usually like how ugly a R Markdown file looks, I was looking into using shiny, but i dont know a few details
Who runs this regressions if I upload the whole thing successfully to shiny? My computer,the server, the user, I have no idea?
Can the user input go into the users own global environment so that the whole thing could be kept as a strictly offline process(using Shiny as a beautification app that substitutes this input chunk?)
Can someone please give an example of an Shiny app that does something similar?
Can the user read.xlsm into shiny server, or use his global environment to define a data for the shiny app to use as input?
Also is the thief package possible to implement on this lapply function as a way to increase the forecast quality, I would of course Drop the stlm and theta option from the model as they behave rather poorly in a wide range of simulations that I performed with toy sets, the stlm crashes on cross validation with a few observations and the theta model just doesn't work.
Can someone teach me how to on error inside the function ignore the variable and just keep applying the function to the next variable? or change the model to something less problematic my solution was to try to catch these cases where the model would crash and drop the theta model before it happens but it is just an ugly hack to the underlying problem.
Also if you see something ugly in the code itself feel free to criticize.

Importing many to many relationship (Django)

I want to import data to many to many fields.
My code below imports the data, but only applies one relationship to the model (not two, as I've asked it to do below by importing it twice- where I wrote "cat = ...")
In my example below, I want it to import cat (category) from columns 4 & 11. My code below only applies one category to the model (not both).
How can I get it to apply both fields to the model? Im using python 2.7
import csv
l = list(csv.reader(open('test_data.csv', 'rb')))
Gender_CHOICES = {
'Male': 1,
'Female': 2,
'Unisex': 3,
}
Stock_CHOICES = {
'in stock': 1,
'low stock': 2,
'out of stock': 3,
'discountinued': 4
}
for i in l[1:]:
cat = m.Category.objects.get_or_create(category_name = i[4],[11])[0]
prod = m.Product(
name = i[0],
link = i[1],
description = i[6],
brand = i[7],
colour = i[10],
gender = Gender_CHOICES[i[8]] if i[8] in Gender_CHOICES else 3,
store = m.Store.objects.get_or_create(store_name = i[2])[0]
)
prod.save()
var = m.Variation(
product = prod,
variation = "default"
)
var.save()
img = m.Image(
variation = var,
image = i[5]
)
img.save()
size = m.Size(
variation = var
)
size.save()
price = m.Price(
variation = var,
price = float(i[3])
)
price.save()
stock = m.Stock(
size = size,
stock = Stock_CHOICES[i[9]] if i[9] in Stock_CHOICES else 4
)
stock.save()
prod.category.add(cat)
Sample of CSV:
prod_name,prod_link,store_name,prod_price,category,image_default,prod_description,prod_brand,gender,prod_stock,category1
Bliss Firm Baby Firm Lifting & Volumising Serum 30ml - Serum,http://click.linksynergy.com/link?id=dnw*50nuNL8&offerid=287549.2554637&type=15&murl=http%3A%2F%2Fwww.asos.com%2Fau%2FBliss%2FBliss-Firm-Baby-Firm-Lifting-Volumising-Serum-30ml%2FProd%2Fpgeproduct.aspx%3Fiid%3D3936070%26istCompanyId%3Df448b47d-6b90-4b9b-a52d-eb6058c99b1c%26istItemId%3Dwxqqpxxmi%26istBid%3Dt,Asos,117,Skin Care Body Creams & Moisturisers,http://images.asos-media.com/inv/media/0/7/0/6/3936070/serum/image1xxl.jpg,Firm Baby Firm Lifting & Volumising Serum by Bliss Designed to boost collagen and elasticity Concentrated formula with a water-free aloe base Aims to plump skin from the inside out,Bliss,Female,
Yes To Carrots Day Cream 50ml - Carrots,http://click.linksynergy.com/link?id=dnw*50nuNL8&offerid=287549.2825448&type=15&murl=http%3A%2F%2Fwww.asos.com%2Fau%2FYES-TO%2FYes-To-Carrots-Day-Cream-50ml%2FProd%2Fpgeproduct.aspx%3Fiid%3D4254119%26istCompanyId%3Df448b47d-6b90-4b9b-a52d-eb6058c99b1c%26istItemId%3Dwiqwwawpm%26istBid%3Dt,Asos,21,Skin Care Body Creams & Moisturisers,http://images.asos-media.com/inv/media/9/1/1/4/4254119/carrots/image1xxl.jpg,Day cream by Yes To Carrots 95% natural ingredients Including carrots and sweet almond oil Rich moisturising formula Naturally nourishes to promote softer skin Suitable for normal to dry skin types Product size: 50ml,YES TO,Female,Belts
I believe, the problem is with Object creation. I think the following is what you are looking for :
....
....
for i in l[1:]:
cat_1 = m.Category.objects.get_or_create(category_name = i[4])
cat_2 = m.Category.objects.get_or_create(category_name = i[11])
....
....
prod.category.add(cat_1)
prod.category.add(cat_2)

How to store the user selected input from a Listbox

This is much like a traveling salesman problem. I have a Listbox with college names in it(backed with coordinates I grabbed from the Facebook Graph). I have the selection mode set to multiple. I need to know the code that will allow me to use the colleges they selected so i can put them through a distance method. I only need to know the code to see what they selected. I tried using curselection() but I still do not understand it.
Here is some code:
self.listbox = Listbox(self.mid_frame,width = 42,selectmode ="multiple",
highlightcolor = "orange",
highlightthickness = "10",bd = "5")
coordinates = []
collegelist = []
f = open(sys.argv[1],'r')
# grab the college's lat and long from facebook graph
for identity in f:
urlquery='https://graph.facebook.com/'+identity
obj = json.load(urllib2.urlopen(urlquery))
college = obj["name"]
latitude = obj["location"]["latitude"]
longitude = obj["location"]["longitude"]
coordinates.append((college,latitude, longitude))
collegelist.append(college)
#sort the colleges so they appear alphabetical order
sortcollege = sorted(collegelist)
#fill Listbox with the College names imported from a text file
for college in sortcollege:
self.listbox.insert(END, college)
self.listbox.pack(side = LEFT)
#The label where I would put the total distance
self.output_totaldist_label = Label(self.mid_frame,
width = 11,
textvariable = self.totaldistance)
self.totaldistance = StringVar()
self.output_label = Label(self.mid_frame,
textvariable = self.totaldistance)
self.output_totaldist_label.pack(side = LEFT)
self.output_label.pack(side = LEFT)
It would have been nice to see how you tried curselection to see what went wrong.
Something like:
for idx in self.listbox.curselection():
selitem = self.listbox.get(idx)
should do the trick. Have you tried that?