SQL 2005 IF not working fine - if-statement

I've created an if statement to control if a customer discount is right. I take the sales volume of its previous year, control if it is in the exact range, then if it is not I need to write the right discount to apply. My problem is that this suggested discount is not the right one. I'll show you the code.
Before I give a sample customer data.
Discount Applied is 60 %
Sales Volume 2016--->€ 13.895.90
Sales Volume 2015 ---> € 25.686.92
This is my query:
DECLARE all variables that i need......
SET #Anno1 = YEAR(GETDATE());
SET #Anno2 = YEAR(DATEADD(year,-1,GETDATE()));
SET #Anno3 = YEAR(DATEADD(year,-2,GETDATE()));
SET #datada = DATEADD(DAY, -15, GETDATE());
SET #dataa = GETDATE();
----- set discount sales volume ---
SET #40 = '€ '+ REPLACE(CONVERT(varchar, CAST('1500.0000' AS money), 105),',','.');
SET #50 = '€ '+ REPLACE(CONVERT(varchar, CAST('15000.0000' AS money), 105),',','.');
SET #60 = '€ '+ REPLACE(CONVERT(varchar, CAST('150000.0000' AS money), 105),',','.');
SET #70 = '€ '+ REPLACE(CONVERT(varchar, CAST('200000.0000' AS money), 105),',','.');
SET #80 = '€ '+ REPLACE(CONVERT(varchar, CAST('500000.0000' AS money), 105),',','.');
---create cursor---
DECLARE c CURSOR FOR
SELECT DISTINCT
CODCONTO,
DSCCONTO1
FROM .dbo.TESTEDOCUMENTI
WHERE D (.dbo.TESTEDOCUMENTI.TIPODOC = 'PCL' OR .dbo.TESTEDOCUMENTI.TIPODOC = 'ORC' ) AND .dbo.TESTEDOCUMENTI.DATADOC BETWEEN #datada AND #dataa
----take each customer----
OPEN c
FETCH NEXT FROM c INTO #CodiceCliente,#Cliente
--IF #CodiceCliente IS NULL goto finescript;
WHILE ##FETCH_STATUS = 0
BEGIN
-------------------------------------------------------------------
----------------------set sales volumes to variables---
-------------------------------Current year -----
SET #FattAnnoCorrente =
(SELECT '€ '+ REPLACE(CONVERT(varchar, CAST(SUM(TOTIMPONIBILE) AS money), 105),',','.') FROM .dbo.TESTEDOCUMENTI
WHERE CODCLIFOR = #CodiceCliente ANDAND .dbo.TESTEDOCUMENTI.TIPODOC = 'FVC' AND .dbo.TESTEDOCUMENTI.ESERCIZIO = YEAR(GETDATE()));
-------------------------------Previous Year -----
SET #FattAnnoPrecedente =
(SELECT '€ '+ REPLACE(CONVERT(varchar, CAST(SUM(TOTIMPONIBILE) AS money), 105),',','.') FROM .dbo.TESTEDOCUMENTI
WHERE CODCLIFOR = #CodiceCliente AND .dbo.TESTEDOCUMENTI.TIPODOC = 'FVC' AND .dbo.TESTEDOCUMENTI.ESERCIZIO = YEAR(DATEADD(year,-1,GETDATE())));
------------------------------2 Previous years -----
SET #Fatt2AnniPrecedenti =
(SELECT '€ '+ REPLACE(CONVERT(varchar, CAST(SUM(TOTIMPONIBILE) AS money), 105),',','.') FROM .dbo.TESTEDOCUMENTI
WHERE CODCLIFOR = #CodiceCliente AND .dbo.TESTEDOCUMENTI.TIPODOC = 'FVC' AND .dbo.TESTEDOCUMENTI.ESERCIZIO = YEAR(DATEADD(year,-2,GETDATE())));
----------- Take the last document discount and set to variable -----
SET #Sconto =
(SELECT DISTINCT MAX(SCONTORIGA)
FROM .dbo.TESTEDOCUMENTI
WHERE SCONTORIGA IS NOT NULL AND CODCLIFOR = #CodiceCliente AND (.dbo.TESTEDOCUMENTI.TIPODOC = 'PCL' OR .dbo.TESTEDOCUMENTI.TIPODOC = 'ORC' ) AND .dbo.TESTEDOCUMENTI.DATADOC BETWEEN #datada AND #dataa);
--------------------------verify condition---THERE IS THE TRUOBLES----
---------------------------PREVIOUS YEAR SALES VOLUME----
IF #FattAnnoCorrente IS NULL SET #FattAnnoCorrente = '0'
IF #FattAnnoPrecedente IS NULL SET #FattAnnoPrecedente = '0'
IF #Fatt2AnniPrecedenti IS NULL SET #Fatt2AnniPrecedenti = '0'
IF #FattAnnoPrecedente = '0' goto fatturatocorrente;
IF (#FattAnnoPrecedente > '0' and #FattAnnoPrecedente < #40) and #Sconto < '40' goto finescript;
IF (#FattAnnoPrecedente > #40 and #FattAnnoPrecedente < #50 ) and (#Sconto > '40' and #Sconto < '50') goto finescript;
IF (#FattAnnoPrecedente > #50 and #FattAnnoPrecedente < #60 ) and (#Sconto > '50' and #Sconto < '60') goto finescript;
IF (#FattAnnoPrecedente > #60 and #FattAnnoPrecedente < #70 ) and (#Sconto > '60' and #Sconto < '70') goto finescript;
IF (#FattAnnoPrecedente > #70 and #FattAnnoPrecedente < #80 ) and (#Sconto > '70' and #Sconto < '80') goto finescript;
IF (#FattAnnoPrecedente > #80 and #FattAnnoPrecedente < '999999999999999999' ) and #Sconto > '80' goto finescript;
------------------------------------FIND THE SUGESTED DISCOUNT ------ THIS IS WRONG
IF ((#FattAnnoPrecedente > '0' and #FattAnnoPrecedente < #40 ))
SET #ScontoPrevisto = 'inferiore al 40%';
IF ((#FattAnnoPrecedente > #40 and #FattAnnoPrecedente < #50 ) )
SET #ScontoPrevisto = 'compreso tra 40% e 50%';
IF ((#FattAnnoPrecedente > #50 and #FattAnnoPrecedente < #60 ) )
SET #ScontoPrevisto = 'compreso tra 50% e 60%';
IF ((#FattAnnoPrecedente > #60 and #FattAnnoPrecedente < #70 ) )
SET #ScontoPrevisto = 'compreso tra 60% e 70%';
IF ((#FattAnnoPrecedente > #70 and #FattAnnoPrecedente < #80 ) )
SET #ScontoPrevisto = 'compreso tra 70% e 80%';
IF ((#FattAnnoPrecedente > #80 and #FattAnnoPrecedente < '999999999999999999' ) )
SET #ScontoPrevisto = 'superiore all"80%';
SET #AnnoConsiderato = 'ANNO PRECEDENTE';
fatturatocorrente:
------------USE CURRENT YEAR IF PREVIOUS SALES VOLUME IS 0---------
IF #FattAnnoPrecedente NOT LIKE '0' goto fatturatoesistente;
IF (#FattAnnoCorrente > '0' and #FattAnnoCorrente < #40 ) and #Sconto < '40' goto finescript;
IF (#FattAnnoCorrente > #40 and #FattAnnoCorrente < #50 ) and (#Sconto > '40' and #Sconto < '50') goto finescript;
IF (#FattAnnoCorrente > #50 and #FattAnnoCorrente < #60 ) and (#Sconto > '50' and #Sconto < '60') goto finescript;
IF (#FattAnnoCorrente > #60 and #FattAnnoCorrente < #70 ) and (#Sconto > '60' and #Sconto < '70') goto finescript;
IF (#FattAnnoCorrente > #70 and #FattAnnoCorrente < #80 ) and (#Sconto > '70' and #Sconto < '80') goto finescript;
IF (#FattAnnoCorrente > #80 and #FattAnnoCorrente < '999999999999999999' ) and #Sconto > '80' goto finescript;
------------------------------------FIND SUGGESTED DISCOUNT ------
--SET #FattAnnoCorrente = '1';
IF ((#FattAnnoCorrente > '0' and #FattAnnoCorrente < #40 ))
SET #ScontoPrevisto = 'inferiore al 40%';
IF ((#FattAnnoCorrente > #40 and #FattAnnoCorrente < #50 ))
SET #ScontoPrevisto = 'compreso tra 40% e 50%';
IF ((#FattAnnoCorrente > #50 and #FattAnnoCorrente < #60 ))
SET #ScontoPrevisto = 'compreso tra 50% e 60%';
IF ((#FattAnnoCorrente > #60 and #FattAnnoCorrente < #70 ))
SET #ScontoPrevisto = 'compreso tra 60% e 70%';
IF ((#FattAnnoCorrente > #70 and #FattAnnoCorrente < #80 ))
SET #ScontoPrevisto = 'compreso tra 70% e 80%';
IF ((#FattAnnoCorrente > #80 and #FattAnnoCorrente < '999999999999999999'))
SET #ScontoPrevisto = 'superiore all"80%';
SET #AnnoConsiderato = 'ANNO CORRRENTE';
IF #Sconto LIKE '0.0%' SET #ScontoPrevisto = 'da stabilire in base alla merce ordinata'
fatturatoesistente:
-----------
--- HERE THERE WAS SOME TABLES CALLED BELOW BUT THEY WORK FINE, SO I REMOVED THEM ---
---------------------------------
---HTML EMAIL BODY SET WITH ALL VARIABLES, ALL WORKING FINE BUT THE #SCONTOPREVISTO is the wrong one----
SET #Email =
N'......HTML CODE....' + #ScontoPrevisto + '..HTML CODE...';
SET #oggettomail = 'ERRATA SCONTISTICA PER ' + #Cliente;
IF #Emailis null goto finescript;
EXEC msdb.dbo.sp_send_dbmail
#recipients = 'email#gmail.com',
#subject = #oggettomail,
#body = #Email,
#body_format = 'HTML' ;
finescript:
--take the next customerE---
FETCH NEXT FROM c INTO #CodiceCliente,#Cliente
END
--clean---
CLOSE c
DEALLOCATE c
The result of this query for #ScontoPrevisto, suggested discount is wrong, it is between 70% and 80 % but as you see the previous year sales volume is about 25000 so the right discount must be 50-60%.
I dont' understand why. Instead for some customer, reslt is good.
Another customer has
Sales Volume 2016--->0
Sales Volume 2015 ---> 0
Discount 60%
Result is greater then 80 % instead to be smaller than 40 %.
I wait for your answer. Thank you guys!

Related

If/Else Statement One Line

I'd like to incorporate if they currently have mmsa or jmmsa. MMMSA is balance over 2500 and JMMSA is balance over 100,000.
combined_2 = (
combined
.withColumn('mmsa_eligible',
F.when(
(F.col('min_bal_0_90_days') >= 2500) & (F.col('current_bal') >= 2500), 1
).otherwise(0)
)
.withColumn('jmmsa_eligible' ,
F.when(
(F.col('min_bal_0_90_days') >= 100000) & (F.col('current_bal') >= 100000), 1
).otherwise(0)
)
if jmmsa_eligible == 1 and jmmsa_current_flag == 0:
print ('Y')
else:
print ('N')

Pinescript - Syntax error at input 'long_entry'

I have an issue with prinescript. I don't get how to fix this, line 23. When I use an if statementwhen, I use the ?: I don't get the error.
//#version=5
indicator("Test 1", overlay = true)
// Define the indicators
ema = ta.ema(close, 200)
atr14 = ta.atr(14)
long_entry = false
short_entry = false
// Define the conditions for consecutive open and close prices
long_condition = (close[2] < open[2] and close[1] < open[1] and close > open[1]) and (close > open)
or (close[3] < open[3] and close[2] < open[2] and close > open[2]) and (close > open)
or (close[4] < open[4] and close[3] < open[3] and close > open[3]) and (close > open)
or (close[5] < open[5] and close[4] < open[4] and close > open[4]) and (close > open)
or (close[6] < open[6] and close[5] < open[5] and close > open[5]) and (close > open)
short_condition = (close[2] > open[2] and close[1] > open[1] and close < open[1]) and (close > open)
or (close[3] > open[3] and close[2] > open[2] and close < open[2]) and (close < open)
or (close[4] > open[4] and close[3] > open[3] and close < open[3]) and (close < open)
or (close[5] > open[5] and close[4] > open[4] and close < open[4]) and (close < open)
or (close[6] > open[6] and close[5] > open[5] and close < open[5]) and (close < open)
// Define the conditions for long and short positions
if long_condition and close > ema and barstate.isconfirmed
long_entry := true
if short_condition and close < ema and barstate.isconfirmed
short_entry := true
// Define the stop loss
long_stop = close - (atr14 * 2)
short_stop = close + (atr14 * 2)
I looked around but couldn't find a solution.
If blocks must be indented by four spaces or a tab. You have five spaces in below if statements:
// Define the conditions for long and short positions
if long_condition and close > ema and barstate.isconfirmed
long_entry := true
if short_condition and close < ema and barstate.isconfirmed
short_entry := true
So, change it to:
// Define the conditions for long and short positions
if long_condition and close > ema and barstate.isconfirmed
long_entry := true
if short_condition and close < ema and barstate.isconfirmed
short_entry := true
Note: The script must have at least one output function call (e.g. plot, barcolor, etc.).

Need some help writing a strategy with different positions depending on three 3 signal variables

Hi I'm brand new in coding, and I am getting stuck every new line of code I try to write but hey its a learning process.
I'm doing a strategy based on the MACD variables.
-MACD_Line is either positive or negative.
-Signal_Line is either positive or negative.
-Histogram is either positive or negative.
Based on the historical prices there are 6 possibilities of combined signals either: ---, -++, --+, +--, ++- or +++.
What I want to do is pre-set different position sizes depending on these 6 possible output signals.
So for example: if "---" then short 50% of equity,
if "+++" then long 100% of equity,
if "-++" then short 25% of equity.
Therefore the equity position would change after one of the initial 3 variables changes.
My attempt:
strategy("HIST", overlay= false, initial_capital = 1, default_qty_type= strategy.percent_of_equity, default_qty_value= 100 )
//time inputs
startDate = input(title="Start Date", type=input.integer, defval=1, minval=1, maxval=31)
startMonth = input(title="Start Month", type=input.integer, defval=1, minval=1, maxval=12)
startYear = input(title="Start Year", type=input.integer, defval=2014, minval=1800, maxval=2100)
endDate = input(title="End Date", type=input.integer, defval=29, minval=1, maxval=31)
endMonth = input(title="End Month", type=input.integer, defval=3, minval=1, maxval=12)
endYear = input(title="End Year", type=input.integer, defval=2021, minval=1800, maxval=2100)
inDateRange = (time >= timestamp(syminfo.timezone, startYear, startMonth, startDate, 0, 0)) and
(time < timestamp(syminfo.timezone, endYear, endMonth, endDate, 0, 0))
//variable
ema26= ema(close,26)
ema12= ema(close,12 )
macdl= ema12-ema26
signal= ema(macdl, 9)
hist= macdl-signal
enterLong = crossover(macdl,0)
enterShort = crossunder(macdl,0)
s000 = if (hist <= 0 and macdl <= 0 and signal <=0)
s001 = if (hist > 0 and macdl <= 0 and signal <= 0)
s011 = if (hist > 0 and macdl > 0 and signal <= 0)
s111 = if (hist > 0 and macdl > 0 and signal > 0)
s011 = if (hist <= 0 and macdl > 0 signal > 0)
s001 = if (hist <= 0 and macdl <= 0 signal > 0)
if (inDateRange and s111)
strategy.entry(id="+", long=true)
if (inDateRange and s000)
strategy.entry(id="-", long=false)
if (not inDateRange)
strategy.close_all()
This should get you started in the right direction. You'll need to finish coding all the conditions yourself. See comments in code:
//#version=4
strategy("HIST", overlay= false, initial_capital = 1, default_qty_type= strategy.percent_of_equity, default_qty_value= 100 )
//time inputs
startDate = input(title="Start Date", type=input.integer, defval=1, minval=1, maxval=31)
startMonth = input(title="Start Month", type=input.integer, defval=1, minval=1, maxval=12)
startYear = input(title="Start Year", type=input.integer, defval=2014, minval=1800, maxval=2100)
endDate = input(title="End Date", type=input.integer, defval=29, minval=1, maxval=31)
endMonth = input(title="End Month", type=input.integer, defval=3, minval=1, maxval=12)
endYear = input(title="End Year", type=input.integer, defval=2021, minval=1800, maxval=2100)
inDateRange = (time >= timestamp(syminfo.timezone, startYear, startMonth, startDate, 0, 0)) and
(time < timestamp(syminfo.timezone, endYear, endMonth, endDate, 0, 0))
//variable
ema26= ema(close,26)
ema12= ema(close,12 )
macdl= ema12-ema26
signal= ema(macdl, 9)
hist= macdl-signal
enterLong = crossover(macdl,0)
enterShort = crossunder(macdl,0)
// Two last var names clashed with others, so used "X" in them.
s000 = hist <= 0 and macdl <= 0 and signal <= 0
s001 = hist > 0 and macdl <= 0 and signal <= 0
s011 = hist > 0 and macdl > 0 and signal <= 0
s111 = hist > 0 and macdl > 0 and signal > 0
s01X = hist <= 0 and macdl > 0 and signal > 0
s00X = hist <= 0 and macdl <= 0 and signal > 0
// Detect changes in conditions.
f_changeIn(_cond) => _cond and not _cond[1]
c000 = f_changeIn(s000)
c001 = f_changeIn(s001)
c011 = f_changeIn(s011)
c111 = f_changeIn(s111)
c01X = f_changeIn(s01X)
c00X = f_changeIn(s00X)
// Functions calculates position size from a % (0 - 1.0).
f_positionSize(_percentEquity) => strategy.equity * _percentEquity / close
// Generate orders on trasitions into conditions.
float positionSize = na
if inDateRange
if c000
positionSize := f_positionSize(0.5)
strategy.entry(id="+", long=false, qty = positionSize)
else if c011
positionSize := f_positionSize(1.0)
strategy.entry(id="+", long=false, qty = positionSize)
else if c111
positionSize := f_positionSize(1.0)
strategy.entry(id="+", long=true, qty = positionSize)
else
strategy.close_all()
// For debugging.
plot(positionSize, "Position size", color.orange, 2, plot.style_circles)

PySpark: How to add columns whose data come from a query (similar to subquery for each row)

I have a holidays table
start: Date
end: Date
type: Enum(HOLIDAY|LONG_WEEKENDS)
Some example data:
"start","end","type"
"2019-01-01","2019-01-01","HOLIDAY"
"2019-02-05","2019-02-06","HOLIDAY"
"2019-03-16","2019-03-24","HOLIDAY"
"2019-04-19","2019-04-19","HOLIDAY"
"2019-10-04","2019-10-04","HOLIDAY"
"2019-08-08","2019-08-13","LONG_WEEKENDS"
"2019-10-25","2019-10-29","LONG_WEEKENDS"
"2019-12-20","2020-01-02","LONG_WEEKENDS"
And a flights table, for simplicity, it has
id: varchar
out_date: Date
in_date: Date
Some example data:
"id","out_date","in_date"
"25997661","2019-02-08","2019-02-12"
"25997658","2019-02-08","2019-02-12"
"25997659","2019-02-08","2019-02-12"
"25997662","2019-02-08","2019-02-12"
"25997663","2019-02-08","2019-02-12"
"25997657","2019-02-08","2019-02-12"
"25997660","2019-02-08","2019-02-12"
"25997397","2019-02-08","2019-02-12"
I want to add 4 columns into the flights table like:
out_date_is_holiday: Boolean
out_date_is_longweekends: Boolean
in_date_is_holiday: Boolean
in_date_is_longweekends: Boolean
So the "stupid" way is I download the holidays table. Then for each flight, I will do (in pyspark):
add column out_date_is_holiday if out_date is between holidays.start and holidays.end AND holidays.type = 'HOLIDAYS' then true else false
Likewise for the other 3 columns. How can I do this efficiently? I am doing this on AWS Glue, if it matters
UPDATE
Following #stack0114106 suggestion, I tried:
WITH t (
SELECT
f.outboundlegid,
f.inboundlegid,
f.agent,
f.querydatetime,
CASE WHEN type = 'HOLIDAY' AND (out_date BETWEEN start AND end)
THEN true
ELSE false
END out_is_holiday,
CASE WHEN type = 'LONG_WEEKENDS' AND (out_date BETWEEN start AND end)
THEN true
ELSE false
END out_is_longweekends,
CASE WHEN type = 'HOLIDAY' AND (in_date BETWEEN start AND end)
THEN true
ELSE false
END in_is_holiday,
CASE WHEN type = 'LONG_WEEKENDS' AND (in_date BETWEEN start AND end)
THEN true
ELSE false
END in_is_longweekends
FROM flights f
CROSS JOIN holidays h
)
SELECT
f.*,
CASE WHEN array_contains(collect_set(out_is_holiday), true)
THEN true
ELSE false
END out_is_holiday,
CASE WHEN array_contains(collect_set(out_is_longweekends), true)
THEN true
ELSE false
END out_is_longweekends,
CASE WHEN array_contains(collect_set(in_is_holiday), true)
THEN true
ELSE false
END in_is_holiday,
CASE WHEN array_contains(collect_set(in_is_longweekends), true)
THEN true
ELSE false
END in_is_longweekends
FROM t f
GROUP BY
f.querydatetime,
f.outboundlegid,
f.inboundlegid,
f.agent
LIMIT 1000000
But got
pyspark.sql.utils.AnalysisException: u"expression 'f.`out_is_holiday`' is neither present in the group by, nor is it an aggregate function. Add to group by or wrap in first() (or first_value) if you don't care which value you get.;;\nGlobalLimit 1000000\n+- LocalLimit 1000000\n +- Aggregate [querydatetime#231, outboundlegid#208, inboundlegid#209, agent#205], [outboundlegid#208, inboundlegid#209, agent#205, querydatetime#231, out_is_holiday#347, out_is_longweekends#348, in_is_holiday#349, in_is_longweekends#350, CASE WHEN array_contains(collect_set(out_is_holiday#347, 0, 0), true) THEN true ELSE false END AS out_is_holiday#343, CASE WHEN array_contains(collect_set(out_is_longweekends#348, 0, 0), true) THEN true ELSE false END AS out_is_longweekends#344, CASE WHEN array_contains(collect_set(in_is_holiday#349, 0, 0), true) THEN true ELSE false END AS in_is_holiday#345, CASE WHEN array_contains(collect_set(in_is_longweekends#350, 0, 0), true) THEN true ELSE false END AS in_is_longweekends#346]\n +- SubqueryAlias f\n +- SubqueryAlias t\n +- Project [outboundlegid#208, inboundlegid#209, agent#205, querydatetime#231, CASE WHEN ((type#57 = HOLIDAY) && ((out_date#267 >= start#55) && (out_date#267 <= end#56))) THEN true ELSE false END AS out_is_holiday#347, CASE WHEN ((type#57 = LONG_WEEKENDS) && ((out_date#267 >= start#55) && (out_date#267 <= end#56))) THEN true ELSE false END AS out_is_longweekends#348, CASE WHEN ((type#57 = HOLIDAY) && ((in_date#304 >= start#55) && (in_date#304 <= end#56))) THEN true ELSE false END AS in_is_holiday#349, CASE WHEN ((type#57 = LONG_WEEKENDS) && ((in_date#304 >= start#55) && (in_date#304 <= end#56))) THEN true ELSE false END AS in_is_longweekends#350]\n +- Join Cross\n :- SubqueryAlias f\n : +- SubqueryAlias flights\n : +- Project [Id#198, QueryTaskId#199, QueryOriginPlace#200, QueryOutboundDate#201, QueryInboundDate#202, QueryCabinClass#203, QueryCurrency#204, Agent#205, QuoteAgeInMinutes#206, Price#207, OutboundLegId#208, InboundLegId#209, OutDeparture#210, OutArrival#211, OutDuration#212, OutJourneyMode#213, OutStops#214, OutCarriers#215, OutOperatingCarriers#216, NumberOutStops#217, NumberOutCarriers#218, NumberOutOperatingCarriers#219, InDeparture#220, InArrival#221, ... 12 more fields]\n : +- Project [Id#198, QueryTaskId#199, QueryOriginPlace#200, QueryOutboundDate#201, QueryInboundDate#202, QueryCabinClass#203, QueryCurrency#204, Agent#205, QuoteAgeInMinutes#206, Price#207, OutboundLegId#208, InboundLegId#209, OutDeparture#210, OutArrival#211, OutDuration#212, OutJourneyMode#213, OutStops#214, OutCarriers#215, OutOperatingCarriers#216, NumberOutStops#217, NumberOutCarriers#218, NumberOutOperatingCarriers#219, InDeparture#220, InArrival#221, ... 11 more fields]\n : +- LogicalRDD [Id#198, QueryTaskId#199, QueryOriginPlace#200, QueryOutboundDate#201, QueryInboundDate#202, QueryCabinClass#203, QueryCurrency#204, Agent#205, QuoteAgeInMinutes#206, Price#207, OutboundLegId#208, InboundLegId#209, OutDeparture#210, OutArrival#211, OutDuration#212, OutJourneyMode#213, OutStops#214, OutCarriers#215, OutOperatingCarriers#216, NumberOutStops#217, NumberOutCarriers#218, NumberOutOperatingCarriers#219, InDeparture#220, InArrival#221, ... 10 more fields]\n +- SubqueryAlias h\n +- SubqueryAlias holidays\n +- LogicalRDD [start#55, end#56, type#57]\n"
It seems to be overkill in trying to reduce the code using foldLeft/reduce functions for adding these 4 columns. To me, it appears to get some stuffs done by constructing the SQL strings rather than falling back to dataframe operations. Check this out
scala> val holiday = Seq(("2019-01-01","2019-01-01","HOLIDAY"),
| ("2019-02-05","2019-02-06","HOLIDAY"),
| ("2019-03-16","2019-03-24","HOLIDAY"),
| ("2019-04-19","2019-04-19","HOLIDAY"),
| ("2019-10-04","2019-10-04","HOLIDAY"),
| ("2019-08-08","2019-08-13","LONG_WEEKENDS"),
| ("2019-10-25","2019-10-29","LONG_WEEKENDS"),
| ("2019-12-20","2020-01-02","LONG_WEEKENDS")
| ).toDF("start","end","type")
holiday: org.apache.spark.sql.DataFrame = [start: string, end: string ... 1 more field]
scala> val flight = Seq(("25997661","2019-02-08","2019-02-12"),
| ("25997658","2019-02-05","2019-02-12"), // modified to get "true" values
| ("25997659","2019-02-08","2019-02-12"),
| ("25997662","2019-02-08","2019-02-12"),
| ("25997663","2019-02-08","2019-02-12"),
| ("25997657","2019-02-08","2019-02-12"),
| ("25997660","2019-02-08","2019-02-12"),
| ("25997397","2019-02-08","2019-02-12")
| ).toDF("id","out_date","in_date")
flight: org.apache.spark.sql.DataFrame = [id: string, out_date: string ... 1 more field]
scala> val df = flight.crossJoin(holiday).withColumn("out_date",to_date('out_date)).withColumn("in_date",to_date('in_date)).withColumn("start",to_date('start)).withColumn("endx",to_date('end)).withColumn("typex",'type)
df: org.apache.spark.sql.DataFrame = [id: string, out_date: date ... 6 more fields]
scala> df.createOrReplaceTempView("jiew")
scala> val od_holiday = """ case when out_date >= start and out_date <= endx and typex='HOLIDAY' then true else false end out_date_is_holiday """
od_holiday: String = " case when out_date >= start and out_date <= endx and typex='HOLIDAY' then true else false end out_date_is_holiday "
scala> val od_longweek = """ case when out_date >= start and out_date <= endx and typex='LONG_WEEKENDS' then true else false end out_date_is_longweekends """
od_longweek: String = " case when out_date >= start and out_date <= endx and typex='LONG_WEEKENDS' then true else false end out_date_is_longweekends "
scala> val id_holiday = """ case when in_date >= start and in_date <= endx and typex='HOLIDAY' then true else false end in_date_is_holiday """
id_holiday: String = " case when in_date >= start and in_date <= endx and typex='HOLIDAY' then true else false end in_date_is_holiday "
scala> val id_longweek = """ case when in_date >= start and in_date <= endx and typex='LONG_WEEKENDS' then true else false end in_date_is_longweekends """
id_longweek: String = " case when in_date >= start and in_date <= endx and typex='LONG_WEEKENDS' then true else false end in_date_is_longweekends "
scala> val sel_columns = Array(od_holiday,od_longweek,id_holiday,id_longweek).mkString(",")
sel_columns: String = " case when out_date >= start and out_date <= endx and typex='HOLIDAY' then true else false end out_date_is_holiday , case when out_date >= start and out_date <= endx and typex='LONG_WEEKENDS' then true else false end out_date_is_longweekends , case when in_date >= start and in_date <= endx and typex='HOLIDAY' then true else false end in_date_is_holiday , case when in_date >= start and in_date <= endx and typex='LONG_WEEKENDS' then true else false end in_date_is_longweekends "
scala> val new_columns = Array("out_date_is_holiday","out_date_is_longweekends","in_date_is_holiday","in_date_is_longweekends")
new_columns: Array[String] = Array(out_date_is_holiday, out_date_is_longweekends, in_date_is_holiday, in_date_is_longweekends)
scala> val group_sel_columns = new_columns.map( x => s"case when array_contains(collect_set("+x+"),true) then true else false end "+x )
group_sel_columns: Array[String] = Array(case when array_contains(collect_set(out_date_is_holiday),true) then true else false end out_date_is_holiday, case when array_contains(collect_set(out_date_is_longweekends),true) then true else false end out_date_is_longweekends, case when array_contains(collect_set(in_date_is_holiday),true) then true else false end in_date_is_holiday, case when array_contains(collect_set(in_date_is_longweekends),true) then true else false end in_date_is_longweekends)
scala> val group_sel_columns_str = group_sel_columns.mkString(",")
group_sel_columns_str: String = case when array_contains(collect_set(out_date_is_holiday),true) then true else false end out_date_is_holiday,case when array_contains(collect_set(out_date_is_longweekends),true) then true else false end out_date_is_longweekends,case when array_contains(collect_set(in_date_is_holiday),true) then true else false end in_date_is_holiday,case when array_contains(collect_set(in_date_is_longweekends),true) then true else false end in_date_is_longweekends
scala> spark.sql( s""" with t1 ( select t.*, ${sel_columns} from jiew t) select id,out_date,in_date, ${group_sel_columns_str} from t1 group by id,out_date,in_date """).show(false)
+--------+----------+----------+-------------------+------------------------+------------------+-----------------------+
|id |out_date |in_date |out_date_is_holiday|out_date_is_longweekends|in_date_is_holiday|in_date_is_longweekends|
+--------+----------+----------+-------------------+------------------------+------------------+-----------------------+
|25997663|2019-02-08|2019-02-12|false |false |false |false |
|25997657|2019-02-08|2019-02-12|false |false |false |false |
|25997662|2019-02-08|2019-02-12|false |false |false |false |
|25997397|2019-02-08|2019-02-12|false |false |false |false |
|25997660|2019-02-08|2019-02-12|false |false |false |false |
|25997659|2019-02-08|2019-02-12|false |false |false |false |
|25997661|2019-02-08|2019-02-12|false |false |false |false |
|25997658|2019-02-05|2019-02-12|true |false |false |false |
+--------+----------+----------+-------------------+------------------------+------------------+-----------------------+
scala> val df2 = spark.sql( s""" with t1 ( select t.*, ${sel_columns} from jiew t) select id,out_date,in_date, ${group_sel_columns_str} from t1 group by id,out_date,in_date """)
df2: org.apache.spark.sql.DataFrame = [id: string, out_date: date ... 5 more fields]
scala>

Oracle CLOB slow running report

We have a report that pulls data from an Oracle 10g database.
We are using Coldfusion 9 to display the report.
The report contains 3 CLOB columns that is killing the report processing time.
Tested this by removing the three CLOB columns and report displays in 2 - 3 seconds.
Is there anything that can be done to increase the processing time of the report?
I understand why its running slow due to how large the CLOB fields are.
Just looking to improve the processing time.
Just to give an idea of rows typical report returns 200-300 row so very small amount of rows.
many thanks
Matthew
UPDATE - We have tried using dbms_lob.substr() and return max 4000 character but this does not improve the processing time.
here's the query:
SELECT
R.NC_REQUEST_ID REQUEST_ID,
R.TC_REQUEST_NAME REQUEST_NAME,
R.TC_REQUEST_NAME_2 REQUEST_NAME_2,
R.TC_INITIATOR INITIATOR,
R.NC_EXPENSE EXPENSE,
S.NC_FORM_STEP_ID FORM_STEP_ID,
S.NC_FORM_ID FORM_ID,
S.NC_STEP_ID STEP_ID,
S.TC_STEP_CODE STEP_CODE,
S.TC_STEP_NAME STEP_NAME,
TO_CHAR(R.DC_SUBMIT_DATE,'DD-Mon-YYYY') SUBMIT_DATE,
TO_CHAR(R.DC_ORIGINAL_DATE,'DD-Mon-YYYY') ORIGINAL_DATE,
TO_CHAR(R.DC_REVISION_DATE,'DD-Mon-YYYY') REVISION_DATE,
R.NC_BUS_GROUP_ID_FK BUS_GROUP_ID,
R.NC_BUS_UNIT_ID_FK BUS_UNIT_ID,
R.NC_BUS_SUB_ID_FK BUS_SUB_ID,
R.TC_ACT_NUMBER ACT_NUMBER,
R.TC_REQUEST_NAME REQUEST_NAME,
R.NC_YEAR YEAR_NUM,
R.NC_ACT_END_NUMBER ACT_END_NUMBER,
R.DC_START_DATE START_DATE,
R.DC_END_DATE END_DATE,
R.NC_ACT_TYPE_ID_FK ACT_TYPE_ID_FK,
TO_CHAR(R.DC_APPROVAL_DATE,'DD-Mon-YYYY') APPROVAL_DATE,
CC_ATTACHED_EXHIBIT ATTACHED_EXHIBIT,
CC_ROUTED ROUTED,
R.IC_RESTRICTED RESTRICTED,
ROI.NC_PI_OFF PI_OFF,
ROI.NC_PI_CA COST_AVOIDANCE_PI,
(SELECT COUNT(NC_REQUEST_ID_FK) FROM TBXS31_REMOVE_SECTION WHERE NC_REQUEST_ID_FK = R.NC_REQUEST_ID AND NC_TEMPLATE_ID_FK IN(346,1455,1456,1458,1459,1460,1464.1641) GROUP BY NC_REQUEST_ID_FK) AS COUNT_OF_REMOVE_SECTIONS,
<cfoutput query="variables.qryEnergyListItems">
(SELECT NVL(ED.NC_EXISTINGUSAGE - ED.NC_PROPOSEDUSAGE,0) FROM TBYB28_SI_ENERGY_DATA ED,TBXP73_LIST_ITEMS LI WHERE LI.NC_VALUE_ID = ED.NC_ENERGYFORM(+) AND LI.NC_VALUE_ID = #variables.qryEnergyListItems.NC_VALUE_ID# AND ED.NC_REQUEST_ID_FK = R.NC_REQUEST_ID AND LI.IC_ACTIVE = 'T' AND LI.NC_VALUE_ID <> 808) AS "#variables.qryEnergyListItems.TC_MED_DESC#",
</cfoutput>
(SELECT TC_MED_DESC FROM TBXP73_LIST_ITEMS WHERE NC_VALUE_ID = SD.NC_BUSINESSUNIT) AS TC_BUS_UNIT,
(SELECT TC_MED_DESC FROM TBXP73_LIST_ITEMS WHERE NC_VALUE_ID = SD.NC_BUSINESSSUBUNIT) AS TC_BUS_SUB_UNIT,
(SELECT TC_MED_DESC FROM TBXP73_LIST_ITEMS WHERE NC_VALUE_ID = SD.NC_SITE) AS TC_BUS_SITE,
(SELECT NC_PROPOSEDGHG - NC_EXISTINGGHG FROM TBYB27_SUSTAINABILITY_DATA WHERE NC_REQUEST_ID_FK = R.NC_REQUEST_ID) AS ANNUAL_IMPACT_C02E,
(SELECT DECODE(TBYB14_SI_BASELINE.NC_BASE_CO2E , 0, NULL, ((TBYB27_SUSTAINABILITY_DATA.NC_PROPOSEDGHG - TBYB27_SUSTAINABILITY_DATA.NC_EXISTINGGHG) / TBYB14_SI_BASELINE.NC_BASE_CO2E) * 100) FROM TBYB14_SI_BASELINE,TBYB27_SUSTAINABILITY_DATA WHERE TBYB27_SUSTAINABILITY_DATA.NC_REQUEST_ID_FK = R.NC_REQUEST_ID AND TBYB27_SUSTAINABILITY_DATA.NC_SITE = TBYB14_SI_BASELINE.NC_SITE_ID_FK(+) AND TBYB27_SUSTAINABILITY_DATA.NC_BASELINEYEAR = TBYB14_SI_BASELINE.NC_YEAR(+) ) AS PCT_IMPACT_CO2E,
(SELECT NC_PROPOSEDWASTE - NC_EXISTINGWASTE FROM TBYB27_SUSTAINABILITY_DATA WHERE NC_REQUEST_ID_FK = R.NC_REQUEST_ID) AS ANNUAL_IMPACT_WASTE,
(SELECT DECODE(TBYB14_SI_BASELINE.NC_BASE_WASTE, 0, NULL, ((TBYB27_SUSTAINABILITY_DATA.NC_PROPOSEDWASTE - TBYB27_SUSTAINABILITY_DATA.NC_EXISTINGWASTE) / TBYB14_SI_BASELINE.NC_BASE_WASTE) * 100) FROM TBYB14_SI_BASELINE,TBYB27_SUSTAINABILITY_DATA WHERE TBYB27_SUSTAINABILITY_DATA.NC_REQUEST_ID_FK = R.NC_REQUEST_ID AND TBYB27_SUSTAINABILITY_DATA.NC_SITE = TBYB14_SI_BASELINE.NC_SITE_ID_FK(+) AND TBYB27_SUSTAINABILITY_DATA.NC_BASELINEYEAR = TBYB14_SI_BASELINE.NC_YEAR(+) ) AS PCT_IMPACT_WASTE,
(SELECT NC_PROPOSEDWATER - NC_EXISTINGWATER FROM TBYB27_SUSTAINABILITY_DATA WHERE NC_REQUEST_ID_FK = R.NC_REQUEST_ID) AS ANNUAL_IMPACT_WATER,
(SELECT DECODE(TBYB14_SI_BASELINE.NC_BASE_WATER, 0, NULL, ((TBYB27_SUSTAINABILITY_DATA.NC_PROPOSEDWATER - TBYB27_SUSTAINABILITY_DATA.NC_EXISTINGWATER) / TBYB14_SI_BASELINE.NC_BASE_WATER) * 100) FROM TBYB14_SI_BASELINE,TBYB27_SUSTAINABILITY_DATA WHERE TBYB27_SUSTAINABILITY_DATA.NC_REQUEST_ID_FK = R.NC_REQUEST_ID AND TBYB27_SUSTAINABILITY_DATA.NC_SITE = TBYB14_SI_BASELINE.NC_SITE_ID_FK(+) AND TBYB27_SUSTAINABILITY_DATA.NC_BASELINEYEAR = TBYB14_SI_BASELINE.NC_YEAR(+) ) AS PCT_IMPACT_WATER,
(SELECT TBXS32_TEXT_SECTIONS.TC_TEXT FROM TBXS32_TEXT_SECTIONS WHERE TBXS32_TEXT_SECTIONS.NC_REQUEST_ID_FK = R.NC_REQUEST_ID AND TBXS32_TEXT_SECTIONS.NC_TEMPLATE_ID_FK = 570 AND NC_SUBSECTION_ID = 0) AS PROPOSAL_TEXT,
(SELECT TBXS32_TEXT_SECTIONS.TC_TEXT FROM TBXS32_TEXT_SECTIONS WHERE TBXS32_TEXT_SECTIONS.NC_REQUEST_ID_FK = R.NC_REQUEST_ID AND TBXS32_TEXT_SECTIONS.NC_TEMPLATE_ID_FK = 1456 AND NC_SUBSECTION_ID = 0) AS COMMENTS_EXPLANATIONS,
(SELECT TBXS32_TEXT_SECTIONS.TC_TEXT FROM TBXS32_TEXT_SECTIONS WHERE TBXS32_TEXT_SECTIONS.NC_REQUEST_ID_FK = R.NC_REQUEST_ID AND TBXS32_TEXT_SECTIONS.NC_TEMPLATE_ID_FK = 1458 AND NC_SUBSECTION_ID = 0) AS COMMENTS_ON_ADD_SI_IMPACTS,
<!--- Get Capital --->
(
SELECT
SUM(NVL((NC_QUANTITY * COST) * (1 + NC_TAX_RATE),0))
FROM
(
SELECT
E.*,
NVL(E.MC_COST,0) * FROM_CURR.NC_EXCHANGE_RATE / TO_CURR.NC_EXCHANGE_RATE COST,
E.MC_MAINT_COST * FROM_CURR.NC_EXCHANGE_RATE / TO_CURR.NC_EXCHANGE_RATE MAINT_COST
FROM
TBXS23_EXPENDITURES E,
TBXS41_EXCHANGE_RATE_CIT FROM_CURR,
TBXS41_EXCHANGE_RATE_CIT TO_CURR,
TBXP78_REQUESTS R,
V_TBXS13_STEPS SS
WHERE
R.NC_ACT_TYPE_ID_FK != 40
AND
R.NC_FORM_STEP_ID_FK = SS.NC_FORM_STEP_ID
AND
E.NC_REQUEST_ID_FK = R.NC_REQUEST_ID
AND
TO_CURR.NC_CURRENCY_ID = <cfqueryparam value="#session.objUser.getCurrencyPreference()#" cfsqltype="cf_sql_numeric">
AND
E.NC_CURRENCY_ID_FK = FROM_CURR.NC_CURRENCY_ID
AND
E.NC_EXPENDITURE_TYPE_ID_FK = 83
AND
R.DC_APPROVAL_DATE
BETWEEN
TO_CURR.DC_VALID_FROM
AND
TO_CURR.DC_VALID_TO
AND
R.DC_APPROVAL_DATE
BETWEEN
FROM_CURR.DC_VALID_FROM
AND
FROM_CURR.DC_VALID_TO
) WHERE NC_REQUEST_ID_FK = R.NC_REQUEST_ID
)AS CAPITAL,
<!--- Get Capital --->
<!--- Project Expense One Time --->
(
SELECT
SUM(NVL((NC_QUANTITY * COST) * (1 + NC_TAX_RATE),0))
FROM
(
SELECT
E.*,
NVL(E.MC_COST,0) * FROM_CURR.NC_EXCHANGE_RATE / TO_CURR.NC_EXCHANGE_RATE COST,
E.MC_MAINT_COST * FROM_CURR.NC_EXCHANGE_RATE / TO_CURR.NC_EXCHANGE_RATE MAINT_COST
FROM
TBXS23_EXPENDITURES E,
TBXS41_EXCHANGE_RATE_CIT FROM_CURR,
TBXS41_EXCHANGE_RATE_CIT TO_CURR,
TBXP78_REQUESTS R,
V_TBXS13_STEPS SS
WHERE
R.NC_ACT_TYPE_ID_FK != 40
AND
R.NC_FORM_STEP_ID_FK = SS.NC_FORM_STEP_ID
AND
E.NC_REQUEST_ID_FK = R.NC_REQUEST_ID
AND
TO_CURR.NC_CURRENCY_ID = <cfqueryparam value="#session.objUser.getCurrencyPreference()#" cfsqltype="cf_sql_numeric">
AND
E.NC_EXPENDITURE_TYPE_ID_FK = 84
AND
E.NC_CURRENCY_ID_FK = FROM_CURR.NC_CURRENCY_ID
AND
R.DC_APPROVAL_DATE
BETWEEN
TO_CURR.DC_VALID_FROM
AND
TO_CURR.DC_VALID_TO
AND
R.DC_APPROVAL_DATE
BETWEEN
FROM_CURR.DC_VALID_FROM
AND
FROM_CURR.DC_VALID_TO
) WHERE NC_REQUEST_ID_FK = R.NC_REQUEST_ID
)AS PROJECT_EXPENSE
<!--- Project Expense One Time --->
FROM
TBXP78_REQUESTS R,
V_TBXS13_STEPS S,
TBXS39_ROI ROI,
TBYB27_SUSTAINABILITY_DATA SD
WHERE
R.NC_FORM_STEP_ID_FK = S.NC_FORM_STEP_ID
AND
R.NC_REQUEST_ID = ROI.NC_REQUEST_ID_FK(+)
AND
R.NC_REQUEST_ID = SD.NC_REQUEST_ID_FK(+)
AND
R.NC_ACT_TYPE_ID_FK != 40
These are the CLOB fields:
(SELECT TBXS32_TEXT_SECTIONS.TC_TEXT FROM TBXS32_TEXT_SECTIONS WHERE TBXS32_TEXT_SECTIONS.NC_REQUEST_ID_FK = R.NC_REQUEST_ID AND TBXS32_TEXT_SECTIONS.NC_TEMPLATE_ID_FK = 570 AND NC_SUBSECTION_ID = 0) AS PROPOSAL_TEXT,
(SELECT TBXS32_TEXT_SECTIONS.TC_TEXT FROM TBXS32_TEXT_SECTIONS WHERE TBXS32_TEXT_SECTIONS.NC_REQUEST_ID_FK = R.NC_REQUEST_ID AND TBXS32_TEXT_SECTIONS.NC_TEMPLATE_ID_FK = 1456 AND NC_SUBSECTION_ID = 0) AS COMMENTS_EXPLANATIONS,
(SELECT TBXS32_TEXT_SECTIONS.TC_TEXT FROM TBXS32_TEXT_SECTIONS WHERE TBXS32_TEXT_SECTIONS.NC_REQUEST_ID_FK = R.NC_REQUEST_ID AND TBXS32_TEXT_SECTIONS.NC_TEMPLATE_ID_FK = 1458 AND NC_SUBSECTION_ID = 0) AS COMMENTS_ON_ADD_SI_IMPACTS,