Shader looks completely different on Instagram than in SparkAR Studio - glsl

I'm trying to create an underwater filter by utilizing shaders in SparkAR.
My filter looks like intended in SparkAR, but not at all when tested in Instagram.
Here is a comparison, SparkAR on the left, Instagram on the right:
I thought it had something to do with the resolution, so I tried everything there already: upscaling, calculating the UVs by using getModelViewProjectionMatrix() instead of getRenderTargetSize(), etc.
Nothing worked, so I hope someone here has experienced something similar and can help me out!
Here is the shader code used:
#ifdef GL_ES
precision mediump float;
#endif
float length2(vec2 p) { return dot(p, p); }
float noise(vec2 p){
return fract(sin(fract(sin(p.x) * (4231.13311)) + p.y) * 3131.0011);
}
float worley(vec2 p) {
float d = 1e30;
for (int xo = -1; xo <= 1; ++xo) {
for (int yo = -1; yo <= 1; ++yo) {
vec2 tp = floor(p) + vec2(xo, yo);
d = min(d, length2(p - tp - vec2(noise(tp))));
}
}
return 3.0*exp(-4.0*abs(2.0*d - 1.0));
}
float fworley(vec2 p) {
float time = fragment(std::getTime());
return sqrt(sqrt(sqrt(
1.6 * // light
worley(p*32. + 4.3 + time*.125) *
sqrt(worley(p * 64. + 5.3 + time * -0.0625)) *
sqrt(sqrt(worley(p * -100. + 9.3))))));
}
void main(out vec4 Position, out vec4 Color) {
Position = std::getModelViewProjectionMatrix() * std::getVertexPosition();
vec2 scaling = vec2(1., 1.);
float time = fragment(std::getTime());
vec2 vertCoord = fragment(std::getVertexTexCoord());
vec2 resolution = fragment(std::getRenderTargetSize());
vec2 uv = floor(resolution * vertCoord) / resolution;
vec2 xDifference = vec2(2.0 * (sin(time / 2.0) / 2.0) - 1.5, 0.8);
float t = fworley(uv * resolution / (900.0 * scaling)) / 2.;
t *= exp(-length2(abs(1.0* (uv + xDifference) * - 1.0)));
t += fworley(uv * resolution / (450.0 * scaling)) / 2.;
xDifference = vec2(2.0 * (sin(time / 2.0) / 2.0) - 0.75, 0.7);
t *= exp(-length2(abs(1.0* (uv + xDifference) * - 1.0))) * 0.5;
t += fworley(uv * resolution / (300.0 * scaling)) / 3.;
xDifference = vec2(2.0 * (sin(time / 2.0) / 3.0) - 0.5, 0.6);
t *= exp(-length2(abs(1.0* (uv + xDifference) * - 1.0)));
Color = vec4((t+0.05) * vec3(0.3, 1.5*t, 0.3 + pow(t, 1.0-t)), 1.3);
}
I also already checked if I used something that is not supported by GLSL 1.x as that is the basis for SparkSL. But that isn't the case.

You need to read the article : https://sparkar.facebook.com/ar-studio/learn/sparksl/cross-device-shader-sparksl#opengl
In short, the issue is precision of floats.
And never use:
fract(sin(fract(sin(p.x) * (4231.13311)) + p.y) * 3131.0011);

Related

Interpreting visual studio profiler, is this subtraction slow? Can I make all this any faster?

I'm using the Visual Studio profiler for the first time and I'm trying to interpret the results. Looking at the percentages on the left, I found this subtraction's time cost a bit strange:
Other parts of the code contain more complex expressions, like:
Even a simple multiplication seems way faster than the subtraction :
Other multiplications take way longer and I really don't get why, like this :
So, I guess my question is if there is anything weird going on here.
Complex expressions take longer than that subtraction and some expressions take way longer than similar other ones. I run the profiler several times and the distribution of the percentages is always like this. Am I just interpreting this wrong?
Update:
I was asked to give the profile for the whole function so here it is, even though it's a bit big. I ran the function inside a for loop for 1 minute and got 50k samples. The function contains a double loop. I include the text first for ease, followed by the pictures of profiling. Note that the code in text is a bit updated.
for (int i = 0; i < NUMBER_OF_CONTOUR_POINTS; i++) {
vec4 contourPointV(contour3DPoints[i], 1);
float phi = angles[i];
float xW = pose[0][0] * contourPointV.x + pose[1][0] * contourPointV.y + contourPointV.z * pose[2][0] + pose[3][0];
float yW = pose[0][1] * contourPointV.x + pose[1][1] * contourPointV.y + contourPointV.z * pose[2][1] + pose[3][1];
float zW = pose[0][2] * contourPointV.x + pose[1][2] * contourPointV.y + contourPointV.z * pose[2][2] + pose[3][2];
float x = -G_FU_STRICT * xW / zW;
float y = -G_FV_STRICT * yW / zW;
x = (x + 1) * G_WIDTHo2;
y = (y + 1) * G_HEIGHTo2;
y = G_HEIGHT - y;
phi -= extraTheta;
if (phi < 0)phi += CV_PI2;
int indexForTable = phi * oneKoverPI;
//vec2 ray(cos(phi), sin(phi));
vec2 ray(cos_pre[indexForTable], sin_pre[indexForTable]);
vec2 ray2(-ray.x, -ray.y);
float outerStepX = ray.x * step;
float outerStepY = ray.y * step;
cv::Point2f outerPoint(x + outerStepX, y + outerStepY);
cv::Point2f innerPoint(x - outerStepX, y - outerStepY);
cv::Point2f contourPointCV(x, y);
cv::Point2f contourPointCVcopy(x, y);
bool cut = false;
if (!isInView(outerPoint.x, outerPoint.y) || !isInView(innerPoint.x, innerPoint.y)) {
cut = true;
}
bool outside2 = true; bool outside1 = true;
if (cut) {
outside2 = myClipLine(contourPointCV.x, contourPointCV.y, outerPoint.x, outerPoint.y, G_WIDTH - 1, G_HEIGHT - 1);
outside1 = myClipLine(contourPointCVcopy.x, contourPointCVcopy.y, innerPoint.x, innerPoint.y, G_WIDTH - 1, G_HEIGHT - 1);
}
myIterator innerRayMine(contourPointCVcopy, innerPoint);
myIterator outerRayMine(contourPointCV, outerPoint);
if (!outside1) {
innerRayMine.end = true;
innerRayMine.prob = true;
}
if (!outside2) {
outerRayMine.end = true;
innerRayMine.prob = true;
}
vec2 normal = -ray;
float dfdxTerm = -normal.x;
float dfdyTerm = normal.y;
vec3 point3D = vec3(xW, yW, zW);
cv::Point contourPoint((int)x, (int)y);
float Xc = point3D.x; float Xc2 = Xc * Xc; float Yc = point3D.y; float Yc2 = Yc * Yc; float Zc = point3D.z; float Zc2 = Zc * Zc;
float XcYc = Xc * Yc; float dfdxFu = dfdxTerm * G_FU; float dfdyFv = dfdyTerm * G_FU; float overZc2 = 1 / Zc2; float overZc = 1 / Zc;
pixelJacobi[0] = (dfdyFv * (Yc2 + Zc2) + dfdxFu * XcYc) * overZc2;
pixelJacobi[1] = (-dfdxFu * (Xc2 + Zc2) - dfdyFv * XcYc) * overZc2;
pixelJacobi[2] = (-dfdyFv * Xc + dfdxFu * Yc) * overZc;
pixelJacobi[3] = -dfdxFu * overZc;
pixelJacobi[4] = -dfdyFv * overZc;
pixelJacobi[5] = (dfdyFv * Yc + dfdxFu * Xc) * overZc2;
float commonFirstTermsSum = 0;
float commonFirstTermsSquaredSum = 0;
int test = 0;
while (!innerRayMine.end) {
test++;
cv::Point xy = innerRayMine.pos(); innerRayMine++;
int x = xy.x;
int y = xy.y;
float dx = x - contourPoint.x;
float dy = y - contourPoint.y;
vec2 dxdy(dx, dy);
float raw = -glm::dot(dxdy, normal);
float heavisideTerm = heaviside_pre[(int)raw * 100 + 1000];
float deltaTerm = delta_pre[(int)raw * 100 + 1000];
const Vec3b rgb = ante[y * 640 + x];
int red = rgb[0]; int green = rgb[1]; int blue = rgb[2];
red = red >> 3; red = red << 10; green = green >> 3; green = green << 5; blue = blue >> 3;
int colorIndex = red + green + blue;
pF = pFPointer[colorIndex];
pB = pBPointer[colorIndex];
float denAsMul = 1 / (pF + pB + 0.000001);
pF = pF * denAsMul;
float pfMinusPb = 2 * pF - 1;
float denominator = heavisideTerm * (pfMinusPb)+pB + 0.000001;
float commonFirstTerm = -pfMinusPb / denominator * deltaTerm;
commonFirstTermsSum += commonFirstTerm;
commonFirstTermsSquaredSum += commonFirstTerm * commonFirstTerm;
}
}
Visual Studio profiles by sampling: it interrupts execution often and records the value of the instruction pointer; it then maps it to the source and calculates the frequency of hitting that line.
There are few issues with that: it's not always possible to figure out which line produced a specific assembly instruction in the optimized code.
One trick I use is to move the code of interest into a separate function and declare it with __declspec(noinline) .
In your example, are you sure the subtraction was performed as many times as multiplication? I would be more puzzled by the difference in subsequent multiplication (0.39% and 0.53%)
Update:
I believe that the following lines:
float phi = angles[i];
and
phi -= extraTheta;
got moved together in assembly and the time spent getting angles[i] was added to that subtraction line.

Function to calculate angle to a point in unusual 2D space

I'm looking for a robust function to calculate the difference(delta) between an object and a point.
For example, it there was an object at point A with an orientation of 1.2 Rad, what would be the required angle for the object to turn in order to face Point B.
Furthermore, I'm working in a odd coordinate system where north(0 Rad) faces towards +X, the image below shows this.
I understand the fundamentals, but I'm struggling to produce something robust.
My c++ function template look like this,
float Robot::getDeltaHeading(float _x1, float _y1, float _x2, float _y2, float _currentHeading) {
//TODO:
return xxxxxxx;
}
Any help would be appreciated.
Cheers in Advance.
Here's the answer.
float Robot::getDeltaHeading(float _x1, float _y1, float _x2, float _y2, float _currentHeading) {
_currentHeading -= 90;
double Ux = 0.0, Uy = 0.0, Vx = 0.0, Vy = 0.0, d = 0.0;
d = sqrtf(powf(abs(_x1 - _x2), 2) + powf(abs(_y1 - _x2), 2));
Ux = (_x2 - _x1) / d;
Uy = (_y2 - _y1) / d;
Vx = cos(_currentHeading * (3.14159f / 180.0));
Vy = sin(_currentHeading * (3.14159f / 180.0));
auto ans = 90 + (atan2(((Ux * Vy) - (Uy * Vx)), ((Ux * Vx) + (Uy * Vy))) * (180.0 / 3.14159f));
while (ans > 180) ans -= 360;
while (ans < -180) ans += 360;
return ans;
}

Problem with PathTracing ShadowRay, Spheres all black

So i'm making a raytracer in OpenGL, fully shader based, and i'm struggling to know where the problem is with my Shadow rays. If i multiply the radiance of the object by the shadowRay output, it seems like only the "edge" of the sphere is lighten up
I verified multiple times the code without finding where the problem comes from.
This is what i get:
vec3 TraceShadowRay(vec3 hitPoint, vec3 normal, Object objects[3])
{
Light pointLight;
pointLight.position = vec3(0, 80, 0);
pointLight.intensity = 2;
Ray ShadowRay;
ShadowRay.origin = hitPoint + normal * 1e-4;
ShadowRay.dir = normalize(pointLight.position - ShadowRay.origin);
ShadowRay.t = 100000;
//ShadowRay.dir = vec3(0, 1, 0);
for(int i = 0; i < 3; ++i)
{
if(objects[i].type == 0)
{
if(interectSphere(objects[i].position, objects[i].radius, ShadowRay))
{
return vec3(0);
}
}
if(objects[i].type == 1)
{
if(intersectPlane(objects[i].normal, objects[i].position, ShadowRay))
{
return vec3(0);
}
}
}
float AngleNormalShadow = dot(ShadowRay.dir, normal);
clamp(AngleNormalShadow, 0, 1);
return GetLight(ShadowRay.origin, pointLight);// * AngleNormalShadow;
}
The getLight function:
vec3 GetLight(vec3 origin, Light light)
{
return vec3(1, 1, 1) * light.intensity;
//float dist = sqrt( ((origin.x - light.position.x) * (origin.x - light.position.x)) + ((origin.y - light.position.y) * (origin.y - light.position.y)));
//return (vec3(1, 1, 1) * light.intensity) / (4 * M_PI * ((origin - light.position).length * (origin - light.position).length));
}
The intersectSphere function:
bool interectSphere(const vec3 center, float radius, inout Ray r)
{
vec3 o = r.origin;
vec3 d = r.dir;
vec3 v = o - center;
float b = 2 * dot(v, d);
float c = dot(v, v) - radius*radius;
float delta = b*b - 4 * c;
if(delta < 1e-4)
return false;
float t1 = (-b - sqrt(delta))/2;
float t2 = (-b + sqrt(delta))/2;
if(t1 < t2)
{
r.t = t1;
r.t2 = t2;
}
else if(t2 < t1)
{
r.t = t2;
r.t2 = t1;
}
r.reflectionNormal = normalize((r.origin + r.dir * r.t) - center);
return true;
}
The result expected is a nice shaded sphere with light coming from the top of the spheres
Could it be a missing negation? Looks like interectSphere() returns true when there is a collision, but the calling code in TraceShadowRay() bails out when it returns true.
old:
if(interectSphere(objects[i].position, objects[i].radius, ShadowRay))
{
return vec3(0);
}
new:
if(!interectSphere(objects[i].position, objects[i].radius, ShadowRay))
{
return vec3(0);
}

How to have text centered inside each slice of a pie chart?

I would like to get the text labels (percentages) centered within each pie slice. It currently works a bit for two of the quadrants:
What am I doing wrong?
void PieChartWidget::paintEvent(QPaintEvent *) {
QPainter painter(this);
QRectF size;
painter.setPen(QPen(Qt::black, 2));
if (this->height() > this->width()) {
size = QRectF(5, 5, this->width() - 10, this->width() - 10);
} else {
size = QRectF(5, 5, this->height() - 5, this->height() - 10);
}
double sum = 0.0, startAng = 0.0;
double angle, endAng;
double percent;
for (int i = 0; i < qvValues.size(); i++) {
sum += qvValues[i];
}
for (int i = 0; i < qvValues.size(); i++) {
percent = qvValues[i] / sum;
angle = percent * 360.0;
endAng = startAng + angle;
painter.setBrush(qvColors[i]);
painter.drawPie(size, static_cast<int>(startAng * 16),
static_cast<int>(angle * 16));
startAng = endAng;
if (percent != 0) {
double draw_x = width() / 2 +
cos(PI * (endAng / 180.0 - angle / 360.0)) * this->width() / 4.0;
double draw_y = height() / 2 +
sin(PI * (endAng / 180.0 - angle / 360.0)) * this->width() / 4.0;
painter.drawText(draw_x, draw_y, QString::number(percent * 100) + "%");
}
}
}
On this line:
painter.drawText(this->width()/4,this->height(), QString::number(percent*100)+"%");
You seem to draw the percentage in the same place every time. You do successfully draw the percentage for each section, they're just being drawn in the same place every time. Try changing it to this:
painter.drawText(double(i + 1) * this->width()/4,this->height(), QString::number(percent*100)+"%");
And you'll see what I mean. By multiplying the x value by some changing value, the x position of each drawn text will change, and thus you will be able to see the different percentages being drawn.
If you want it to draw in each quadrant, then your code might look something like this:
# define PI 3.14159265358979323846
...
double draw_x = this->width / 2.0 + cos(PI * (end_angle / 180.0 - angle / 360.0)) * this->width / 4.0;
double draw_y = this->height / 2.0 - sin(PI * (end_angle / 180.0 - angle / 360.0)) * this->width / 4.0;
painter.drawText(draw_x, draw_y, QString::number(percent*100)+"%");
Basically, what's happening in the above code is I'm calculating the x and y coords of the middle of each slice. Then, I'm drawing the percentages in those positions.

OpenGL Shader vs CUDA

I was using this FXAA Shader for anti-aliasing in my OpenGL program. Now I reimplemented this code in CUDA and tested it. The resulting images are the same, but the CUDA version is much slower. (Shader runs at 60 FPS with vsync, while CUDA drops down to ~40 FPS)
Here is the CUDA code:
__device__ uchar4 readChar(int x, int y){
return surf2Dread<uchar4>( surfaceRead, (x)*sizeof(uchar4), (y),cudaBoundaryModeClamp);
}
__device__ uchar4 readFloatBilin2(float x, float y){
int x1 = floor(x);
int y1 = floor(y);
uchar4 z11 = readChar(x1,y1);
uchar4 z12 = readChar(x1,y1+1);
uchar4 z21 = readChar(x1+1,y1);
uchar4 z22 = readChar(x1+1,y1+1);
float u_ratio = x - x1;
float v_ratio = y - y1;
float u_opposite = 1 - u_ratio;
float v_opposite = 1 - v_ratio;
uchar4 result = (z11 * u_opposite + z21 * u_ratio) * v_opposite +
(z12 * u_opposite + z22 * u_ratio) * v_ratio;
return result;
}
__device__ float fluma(const uchar4 &c){
return c.x*0.299 * (1.0/255) + c.y *0.587 * (1.0/255) + c.z*0.114 * (1.0/255);
}
__global__ void filter_fxaa_opt(TextureData data)
{
int x = blockIdx.x*blockDim.x + threadIdx.x;
int y = blockIdx.y*blockDim.y + threadIdx.y;
if(x >= data.w || y >= data.h)
{
return;
}
uchar4 out_color;
const float FXAA_SPAN_MAX = 8.0;
const float FXAA_REDUCE_MUL = 1.0/8.0;
const float FXAA_REDUCE_MIN = (1.0/128.0);
float lumaNW = fluma(readChar(x-1,y-1));
float lumaNE = fluma(readChar(x+1,y-1));
float lumaSW = fluma(readChar(x-1,y+1));
float lumaSE = fluma(readChar(x+1,y+1));
float lumaM = fluma(readChar(x,y));
float lumaMin = min(lumaM, min(min(lumaNW, lumaNE), min(lumaSW, lumaSE)));
float lumaMax = max(lumaM, max(max(lumaNW, lumaNE), max(lumaSW, lumaSE)));
float2 dir;
dir.x = -((lumaNW + lumaNE) - (lumaSW + lumaSE));
dir.y = ((lumaNW + lumaSW) - (lumaNE + lumaSE));
float dirReduce = max((lumaNW + lumaNE + lumaSW + lumaSE) * (0.25 * FXAA_REDUCE_MUL), FXAA_REDUCE_MIN);
float rcpDirMin = 1.0/(min(abs(dir.x), abs(dir.y)) + dirReduce);
// float2 test = dir * rcpDirMin;
dir = clamp(dir * rcpDirMin,-FXAA_SPAN_MAX,FXAA_SPAN_MAX);
uchar4 rgbA = (
readFloatBilin2(x+ dir.x * (1.0/3.0 - 0.5),y+ dir.y * (1.0/3.0 - 0.5))*0.5f+
readFloatBilin2(x+ dir.x * (2.0/3.0 - 0.5),y+ dir.y * (2.0/3.0 - 0.5))*0.5f);
uchar4 rgbB = rgbA * (1.0/2.0) + (
readFloatBilin2(x+ dir.x * (0.0/3.0 - 0.5),y+ dir.y * (0.0/3.0 - 0.5))*0.25f+
readFloatBilin2(x+ dir.x * (3.0/3.0 - 0.5),y+ dir.y * (3.0/3.0 - 0.5))*0.25f);
float lumaB = fluma(rgbB);
if((lumaB < lumaMin) || (lumaB > lumaMax)){
out_color=rgbA;
} else {
out_color=rgbB;
}
surf2Dwrite<uchar4>(out_color, surfaceWrite, x*sizeof(uchar4), y);
}
Setup:
//called for the 'src' and 'dst' texture once at the beginning
checked_cuda( cudaGraphicsGLRegisterImage(&res, gl_buffer,gl_target, cudaGraphicsRegisterFlagsSurfaceLoadStore));
//called for the 'src' and 'dst' texture every frame
checked_cuda( cudaGraphicsMapResources(1, &res, 0));
checked_cuda( cudaGraphicsSubResourceGetMappedArray(&array, res, 0,0));
//kernel call every frame
dim3 block_size(8, 8);
dim3 grid_size;
grid_size.x = (src->w) / (block_size.x) ;
grid_size.y = (src->h) / (block_size.y) ;
checked_cuda(cudaBindSurfaceToArray(surfaceRead, (cudaArray *)src->d_data));
checked_cuda(cudaBindSurfaceToArray(surfaceWrite, (cudaArray *)dst->d_data));
filter_fxaa_opt<<<grid_size, block_size>>>(*src);
System:
Ubuntu 14.04
Opengl version: 4.4.0 NVIDIA 331.113
Renderer version: GeForce GTX 760M/PCIe/SSE2
CUDA 5.5
Question:
What does the OpenGL Shader do better and why is it so much faster?
As njuffa pointed out the main problem was the manual interpolation and normalization. After using a CUDA texture instead of a CUDA surface the build in interpolation can be used by calling tex2D(..) instead of surf2Dread(...).
The modified CUDA code is now almost indentically to the OpenGL shader and does indeed perform equally well.
__global__ void filter_fxaa2(TextureData data)
{
int x = blockIdx.x*blockDim.x + threadIdx.x;
int y = blockIdx.y*blockDim.y + threadIdx.y;
if(x >= data.w || y >= data.h)
{
return;
}
uchar4 out_color;
const float FXAA_SPAN_MAX = 8.0f;
const float FXAA_REDUCE_MUL = 1.0f/8.0f;
const float FXAA_REDUCE_MIN = (1.0f/128.0f);
float u = x + 0.5f;
float v = y + 0.5f;
float4 rgbNW = tex2D( texRef, u-1.0f,v-1.0f);
float4 rgbNE = tex2D( texRef, u+1.0f,v-1.0f);
float4 rgbSW = tex2D( texRef, u-1.0f,v+1.0f);
float4 rgbSE = tex2D( texRef, u+1.0f,v+1.0f);
float4 rgbM = tex2D( texRef, u,v);
const float4 luma = make_float4(0.299f, 0.587f, 0.114f,0.0f);
float lumaNW = dot(rgbNW, luma);
float lumaNE = dot(rgbNE, luma);
float lumaSW = dot(rgbSW, luma);
float lumaSE = dot(rgbSE, luma);
float lumaM = dot( rgbM, luma);
float lumaMin = min(lumaM, min(min(lumaNW, lumaNE), min(lumaSW, lumaSE)));
float lumaMax = max(lumaM, max(max(lumaNW, lumaNE), max(lumaSW, lumaSE)));
float2 dir;
dir.x = -((lumaNW + lumaNE) - (lumaSW + lumaSE));
dir.y = ((lumaNW + lumaSW) - (lumaNE + lumaSE));
float dirReduce = max((lumaNW + lumaNE + lumaSW + lumaSE) * (0.25f * FXAA_REDUCE_MUL), FXAA_REDUCE_MIN);
float rcpDirMin = 1.0f/(min(abs(dir.x), abs(dir.y)) + dirReduce);
float2 test = dir * rcpDirMin;
dir = clamp(test,-FXAA_SPAN_MAX,FXAA_SPAN_MAX);
float4 rgbA = (1.0f/2.0f) * (
tex2D( texRef,u+ dir.x * (1.0f/3.0f - 0.5f),v+ dir.y * (1.0f/3.0f - 0.5f))+
tex2D( texRef,u+ dir.x * (2.0f/3.0f - 0.5f),v+ dir.y * (2.0f/3.0f - 0.5f)));
float4 rgbB = rgbA * (1.0f/2.0f) + (1.0f/4.0f) * (
tex2D( texRef,u+ dir.x * (0.0f/3.0f - 0.5f),v+ dir.y * (0.0f/3.0f - 0.5f))+
tex2D( texRef,u+ dir.x * (3.0f/3.0f - 0.5f),v+ dir.y * (3.0f/3.0f - 0.5f)));
float lumaB = dot(rgbB, luma);
if((lumaB < lumaMin) || (lumaB > lumaMax)){
out_color=toChar(rgbA);
} else {
out_color=toChar(rgbB);
}
surf2Dwrite<uchar4>(out_color, surfaceWrite, x*sizeof(uchar4), y);
}
Update:
Performance meassured with cudaEvents:
Old Version: ~12.8ms
New Version: ~1.2ms
Conclusion:
Use CUDA surfaces only for writing and not for reading textures!