Spline3D Curve Fitting - python-2.7

I wrote a program reference pyeq2, for 40 points 3D spline fitting to create 768x480 Mask。
But, change 40 points to 144 points, 768x480 to 1920x1080, the result becomes not smooth。
How do I modify the code, or any other tool that can do this.
asciiDataInColumns_3D = '''
60 60 246.09
60 180 245.07
60 300 243.63
60 420 242.64
60 540 240.39
60 660 241.09
60 780 243.78
60 900 244.35
60 1020 249.19
180 60 242.50
180 180 241.37
180 300 242.78
180 420 240.25
180 540 238.87
180 660 249.34
180 780 243.21
180 900 244.21
180 1020 247.70
300 60 242.50
300 180 241.37
300 300 241.37
300 420 240.39
300 540 240.81
300 660 243.92
300 780 240.39
300 900 251.91
300 1020 255.00
420 60 244.64
420 180 244.64
420 300 243.63
420 420 243.49
420 540 242.78
420 660 242.35
420 780 243.78
420 900 243.78
420 1020 246.23
540 60 245.80
540 180 246.23
540 300 245.22
540 420 245.36
540 540 243.78
540 660 242.78
540 780 242.35
540 900 242.50
540 1020 243.06
660 60 245.65
660 180 246.53
660 300 246.38
660 420 245.51
660 540 243.92
660 660 241.93
660 780 240.11
660 900 238.46
660 1020 240.67
780 60 245.51
780 180 247.55
780 300 247.55
780 420 248.00
780 540 244.93
780 660 241.93
780 780 240.39
780 900 238.32
780 1020 238.87
900 60 245.80
900 180 247.70
900 300 246.67
900 420 245.80
900 540 244.64
900 660 243.06
900 780 239.15
900 900 236.69
900 1020 236.15
1020 60 247.70
1020 180 248.00
1020 300 247.11
1020 420 245.51
1020 540 243.21
1020 660 240.67
1020 780 238.05
1020 900 235.62
1020 1020 235.22
1140 60 247.55
1140 180 248.15
1140 300 246.96
1140 420 245.36
1140 540 242.78
1140 660 239.28
1140 780 237.37
1140 900 234.29
1140 1020 234.55
1260 60 247.26
1260 180 246.53
1260 300 245.22
1260 420 242.78
1260 540 241.23
1260 660 237.50
1260 780 236.69
1260 900 234.16
1260 1020 234.95
1380 60 246.53
1380 180 245.07
1380 300 243.06
1380 420 240.67
1380 540 237.91
1380 660 235.35
1380 780 234.69
1380 900 233.89
1380 1020 234.55
1500 60 245.51
1500 180 243.06
1500 300 241.23
1500 420 239.01
1500 540 235.75
1500 660 234.16
1500 780 233.24
1500 900 232.84
1500 1020 233.76
1620 60 244.49
1620 180 242.21
1620 300 239.28
1620 420 236.02
1620 540 233.76
1620 660 232.98
1620 780 231.81
1620 900 232.98
1620 1020 234.16
1740 60 245.94
1740 180 242.92
1740 300 239.42
1740 420 236.29
1740 540 234.03
1740 660 232.58
1740 780 230.52
1740 900 230.14
1740 1020 234.69
1860 60 252.83
1860 180 249.19
1860 300 244.35
1860 420 240.53
1860 540 236.96
1860 660 237.37
1860 780 235.22
1860 900 235.48
1860 1020 236.56
'''
Dimension = 3.0
inSmoothingFactor = 1.0
XOrder = 4
YOrder = 4
#bbox = [0,768,0,480]
bbox = [0,1920,0,1080]
Data Conversion:
rawData = StringIO.StringIO(asciiDataInColumns_3D).readlines()
dataLists = [[], [], []]
for line in rawData:
tokenlist = line.split()
if len(tokenlist) < Dimension:
continue
try:
a = float(tokenlist[0])
b = float(tokenlist[1])
c = float(tokenlist[2])
except:
continue
if a > 1.0E300 or a < -1.0E300:
continue
if b > 1.0E300 or b < -1.0E300:
continue
if c > 1.0E300 or c < -1.0E300:
continue
dataLists[0].append(a)
dataLists[1].append(b)
dataLists[2].append(c)
arrayLists = numpy.array(dataLists)
Data ARGsort:
indices = numpy.argsort(arrayLists[2])
DependentData = numpy.array(arrayLists[2][indices])
IndependentData = numpy.array([arrayLists[0][indices], arrayLists[1][indices]])
scipy Spline:
mSpline = scipy.interpolate.fitpack2.SmoothBivariateSpline(IndependentData[0], IndependentData[1], DependentData, s=inSmoothingFactor, kx=XOrder, ky=YOrder, bbox=bbox)
tx = mSpline.tck[0]
ty = mSpline.tck[1]
coeff = mSpline.tck[2]
Formula:
def mfitting(x_in, y_in):
global tx, ty, coeff
nx = len(tx)
ny = len(ty)
kx = 4
ky = 4
h = [0.0] * 25
hh = [0.0] * 25
w_x = [0.0] * 25
w_y = [0.0] * 25
kx1 = kx+1
nkx1 = nx-kx1
l = kx1
l1 = l+1
while x_in >= tx[l1-1] and l != nkx1:
l = l1
l1 = l+1
h[0] = 1.0
for j in range(1, kx+1):
for i in range(j):
hh[i] = h[i]
h[0] = 0.0
for i in range(j):
li = l+i
lj = li-j
if tx[li] != tx[lj]:
f = hh[i] / (tx[li] - tx[lj])
h[i] = h[i] + f * (tx[li] - x_in)
h[i+1] = f * (x_in - tx[lj])
else:
h[i+1-1] = 0.0
lx = l-kx1
for j in range(kx1):
w_x[j] = h[j]
ky1 = ky+1
nky1 = ny-ky1
l = ky1
l1 = l+1
while y_in >= ty[l1-1] and l != nky1:
l = l1
l1 = l+1
h[0] = 1.0
for j in range(1, ky+1):
for i in range(j):
hh[i] = h[i]
h[0] = 0.0
for i in range(j):
li = l+i
lj = li-j
if ty[li] != ty[lj]:
f = hh[i] / (ty[li] - ty[lj])
h[i] = h[i] + f * (ty[li] - y_in)
h[i+1] = f * (y_in - ty[lj])
else:
h[i+1-1] = 0.0
ly = l-ky1
for j in range(ky1):
w_y[j] = h[j]
l = lx*nky1
for i1 in range(kx1):
h[i1] = w_x[i1]
l1 = l+ly
temp = 0.0
for i1 in range(kx1):
l2 = l1
for j1 in range(ky1):
l2 = l2+1
temp = temp + coeff[l2-1] * h[i1] * w_y[j1]
l1 = l1+nky1
return temp
Main:
print "Creating MASK ...."
#im_RGB = Image.new('RGB',(768,480))
im_RGB = Image.new('RGB',(1920,1080))
'''
for i in range(768):
for j in range(480):
value = int(Spline_model(i,j))
im_RGB.putpixel((i,j), (value,value,value))
'''
for i in range(1920):
for j in range(1080):
value = int(mfitting(i,j))
im_RGB.putpixel((i,j), (value,value,value))
im_RGB.save("MASK.png")
print "Creating MASK Done"

Related

Is it possible to stop a parallel process in CUDA [duplicate]

I am working with CUDA and I am trying to stop my kernels work (i.e. terminate all running threads) after a certain if block is being hit. How can I do that? I am really stuck in here.
The CUDA execution model doesn't allow for inter-block communication by design. That can potentially make this sort of kernel abort on condition operation difficult to achieve reliably without resorting to the assert or trap type approaches which can potentially result in context destruction and loss of data which isn't what you probably want.
If your kernel design involves a small number of blocks with "resident" threads, then the only approach is some sort of atomic spinlock, which is hard to get to work reliably, and which will greatly degrade memory controller performance and achievable bandwidth.
If, on the other hand, your kernel design has rather large grids with a lot of blocks, and your main goal is to stop blocks which are not yet scheduled from running, then you could try something like this:
#include <iostream>
#include <vector>
__device__ unsigned int found_idx;
__global__ void setkernel(unsigned int *indata)
{
indata[115949] = 0xdeadbeef;
indata[119086] = 0xdeadbeef;
indata[60534] = 0xdeadbeef;
indata[37072] = 0xdeadbeef;
indata[163107] = 0xdeadbeef;
}
__global__ void searchkernel(unsigned int *indata, unsigned int *outdata)
{
if (found_idx > 0) {
return;
} else if (threadIdx.x == 0) {
outdata[blockIdx.x] = blockIdx.x;
};
unsigned int tid = threadIdx.x + blockIdx.x * blockDim.x;
if (indata[tid] == 0xdeadbeef) {
unsigned int oldval = atomicCAS(&found_idx, 0, 1+tid);
}
}
int main()
{
const unsigned int N = 1 << 19;
unsigned int* in_data;
cudaMalloc((void **)&in_data, sizeof(unsigned int) * size_t(N));
cudaMemset(in_data, 0, sizeof(unsigned int) * size_t(N));
setkernel<<<1,1>>>(in_data);
cudaDeviceSynchronize();
unsigned int block_size = 1024;
unsigned int grid_size = N / block_size;
unsigned int* out_data;
cudaMalloc((void **)&out_data, sizeof(unsigned int) * size_t(grid_size));
cudaMemset(out_data, 0xf0, sizeof(unsigned int) * size_t(grid_size));
const unsigned int zero = 0;
cudaMemcpyToSymbol(found_idx, &zero, sizeof(unsigned int));
searchkernel<<<grid_size, block_size>>>(in_data, out_data);
std::vector<unsigned int> output(grid_size);
cudaMemcpy(&output[0], out_data, sizeof(unsigned int) * size_t(grid_size), cudaMemcpyDeviceToHost);
cudaDeviceReset();
std::cout << "The following blocks did not run" << std::endl;
for(int i=0, j=0; i<grid_size; i++) {
if (output[i] == 0xf0f0f0f0) {
std::cout << " " << i;
if (j++ == 20) {
std::cout << std::endl;
j = 0;
}
}
}
std::cout << std::endl;
return 0;
}
Here I have a simple kernel which is searching for a magic word in a large array. To get the early exit behaviour, I use a single global word, which is set atomically by those threads which "win" or trigger the termination condition. Every new block checks the state of this global word, and if it is set, they return without doing any work.
If I compile and run this on a moderate sized Kepler device:
$ nvcc -arch=sm_30 -o blocking blocking.cu
$ ./blocking
The following blocks did not run
42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62
63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83
84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104
105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125
126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146
147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167
168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188
189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209
210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230
231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251
252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272
273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293
294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314
315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335
336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356
357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377
378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398
399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419
420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440
441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461
462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482
483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503
504 505 506 507 508 509 510 511
you can see that a large number of blocks in the grid saw the change in the global word and early terminated without running the search code. This might be the best you can do without a severely invasive spinlock approach which will greatly harm performance.
I assume you want to stop a running kernel (not a single thread).
The simplest approach (and the one that I suggest) is to set up a global memory flag which is been tested by the kernel.
You can set the flag using cudaMemcpy() (or without if using unified memory).
Like the following:
if (gm_flag) {
__threadfence(); // ensure store issued before trap
asm("trap;"); // kill kernel with error
}
ams("trap;") will stop all running thread
Note that since cuda 2.0 you can use assert() to terminate a kernel!
A different approach could be the following (I haven't tried the code!)
__device__ bool go(int val){
return true;
}
__global__ void stopme(bool* flag, int* val, int size){
int idx= blockIdx.x *blockDim.x + threadIdx.x;
if(idx < size){
bool canContinue = true;
while(canContinue && (flag[0])){
printf("HELLO from %i\n",idx);
if(!(*flag)){
return;
}
else{
//do some computation
val[idx]++;
val[idx]%=100;
}
canContinue = go(val[idx]);
}
}
}
#define gpuErrchk(ans) { gpuAssert((ans), __FILE__, __LINE__); }
inline void gpuAssert(cudaError_t code, const char *file, int line, bool abort=true)
{
if (code != cudaSuccess)
{
fprintf(stderr,"GPUassert: %s %s %d\n", cudaGetErrorString(code), file, line);
if (abort) exit(code);
}
}
int main(void)
{
int size = 128;
int* h_val = (int*)malloc(sizeof(int)*size);
bool * h_flag = new bool;
*h_flag=true;
bool* d_flag;
cudaMalloc(&d_flag,sizeof(bool));
cudaMemcpy(d_flag,h_flag,1,cudaMemcpyHostToDevice);
int* d_val;
cudaMalloc(&d_val,sizeof(int)*size );
for(int i=0;i<size;i++){
h_val[i] = i;
}
cudaMemcpy(d_val,h_val,size,cudaMemcpyHostToDevice);
int BSIZE=32;
int nblocks =size/BSIZE;
printf("%i,%i",nblocks,BSIZE);
stopme<<<nblocks,BSIZE>>>(d_flag,d_val,size);
//--------------sleep for a while --------------------------
*h_flag=false;
cudaMemcpy(d_flag,h_flag,1,cudaMemcpyHostToDevice);
cudaDeviceSynchronize();
gpuErrchk( cudaPeekAtLastError() );
printf("END\n");
}
where the kernel stopMe keeps running until someone from the host side sets up the flag to false. Note that your kernel could be much more complicated than this and the effort to synchronize all threads in order to execute the return could be much more than this (and can affect performance). Hope this helped.
More info here

GLSL: pow vs multiplication for integer exponent

Which is faster in GLSL:
pow(x, 3.0f);
or
x*x*x;
?
Does exponentiation performance depend on hardware vendor or exponent value?
I wrote a small benchmark, because I was interested in the results.
In my personal case, I was most interested in exponent = 5.
Benchmark code (running in Rem's Studio / LWJGL):
package me.anno.utils.bench
import me.anno.gpu.GFX
import me.anno.gpu.GFX.flat01
import me.anno.gpu.RenderState
import me.anno.gpu.RenderState.useFrame
import me.anno.gpu.framebuffer.Frame
import me.anno.gpu.framebuffer.Framebuffer
import me.anno.gpu.hidden.HiddenOpenGLContext
import me.anno.gpu.shader.Renderer
import me.anno.gpu.shader.Shader
import me.anno.utils.types.Floats.f2
import org.lwjgl.opengl.GL11.*
import java.nio.ByteBuffer
import kotlin.math.roundToInt
fun main() {
fun createShader(code: String) = Shader(
"", null, "" +
"attribute vec2 attr0;\n" +
"void main(){\n" +
" gl_Position = vec4(attr0*2.0-1.0, 0.0, 1.0);\n" +
" uv = attr0;\n" +
"}", "varying vec2 uv;\n", "" +
"void main(){" +
code +
"}"
)
fun repeat(code: String, times: Int): String {
return Array(times) { code }.joinToString("\n")
}
val size = 512
val warmup = 50
val benchmark = 1000
HiddenOpenGLContext.setSize(size, size)
HiddenOpenGLContext.createOpenGL()
val buffer = Framebuffer("", size, size, 1, 1, true, Framebuffer.DepthBufferType.NONE)
println("Power,Multiplications,GFlops-multiplication,GFlops-floats,GFlops-ints,GFlops-power,Speedup")
useFrame(buffer, Renderer.colorRenderer) {
RenderState.blendMode.use(me.anno.gpu.blending.BlendMode.ADD) {
for (power in 2 until 100) {
// to reduce the overhead of other stuff
val repeats = 100
val init = "float x1 = dot(uv, vec2(1.0)),x2,x4,x8,x16,x32,x64;\n"
val end = "gl_FragColor = vec4(x1,x1,x1,x1);\n"
val manualCode = StringBuilder()
for (bit in 1 until 32) {
val p = 1.shl(bit)
val h = 1.shl(bit - 1)
if (power == p) {
manualCode.append("x1=x$h*x$h;")
break
} else if (power > p) {
manualCode.append("x$p=x$h*x$h;")
} else break
}
if (power.and(power - 1) != 0) {
// not a power of two, so the result isn't finished yet
manualCode.append("x1=")
var first = true
for (bit in 0 until 32) {
val p = 1.shl(bit)
if (power.and(p) != 0) {
if (!first) {
manualCode.append('*')
} else first = false
manualCode.append("x$p")
}
}
manualCode.append(";\n")
}
val multiplications = manualCode.count { it == '*' }
// println("$power: $manualCode")
val shaders = listOf(
// manually optimized
createShader(init + repeat(manualCode.toString(), repeats) + end),
// can be optimized
createShader(init + repeat("x1=pow(x1,$power.0);", repeats) + end),
// can be optimized, int as power
createShader(init + repeat("x1=pow(x1,$power);", repeats) + end),
// slightly different, so it can't be optimized
createShader(init + repeat("x1=pow(x1,${power}.01);", repeats) + end),
)
for (shader in shaders) {
shader.use()
}
val pixels = ByteBuffer.allocateDirect(4)
Frame.bind()
glClearColor(0f, 0f, 0f, 1f)
glClear(GL_COLOR_BUFFER_BIT or GL_DEPTH_BUFFER_BIT)
for (i in 0 until warmup) {
for (shader in shaders) {
shader.use()
flat01.draw(shader)
}
}
val flops = DoubleArray(shaders.size)
val avg = 10 // for more stability between runs
for (j in 0 until avg) {
for (index in shaders.indices) {
val shader = shaders[index]
GFX.check()
val t0 = System.nanoTime()
for (i in 0 until benchmark) {
shader.use()
flat01.draw(shader)
}
// synchronize
glReadPixels(0, 0, 1, 1, GL_RGBA, GL_UNSIGNED_BYTE, pixels)
GFX.check()
val t1 = System.nanoTime()
// the first one may be an outlier
if (j > 0) flops[index] += multiplications * repeats.toDouble() * benchmark.toDouble() * size * size / (t1 - t0)
GFX.check()
}
}
for (i in flops.indices) {
flops[i] /= (avg - 1.0)
}
println(
"" +
"$power,$multiplications," +
"${flops[0].roundToInt()}," +
"${flops[1].roundToInt()}," +
"${flops[2].roundToInt()}," +
"${flops[3].roundToInt()}," +
(flops[0] / flops[3]).f2()
)
}
}
}
}
The sampler function is run 9x 512² pixels * 1000 times, and evaluates the function 100 times each.
I run this code on my RX 580, 8GB from Gigabyte, and collected the following results:
Power
#Mult
GFlops*
GFlopsFp
GFlopsInt
GFlopsPow
Speedup
2
1
1246
1429
1447
324
3.84
3
2
2663
2692
2708
651
4.09
4
2
2682
2679
2698
650
4.12
5
3
2766
972
974
973
2.84
6
3
2785
978
974
976
2.85
7
4
2830
1295
1303
1299
2.18
8
3
2783
2792
2809
960
2.90
9
4
2836
1298
1301
1302
2.18
10
4
2833
1291
1302
1298
2.18
11
5
2858
1623
1629
1623
1.76
12
4
2824
1302
1295
1303
2.17
13
5
2866
1628
1624
1626
1.76
14
5
2869
1614
1623
1611
1.78
15
6
2886
1945
1943
1953
1.48
16
4
2821
1305
1300
1305
2.16
17
5
2868
1615
1625
1619
1.77
18
5
2858
1620
1625
1624
1.76
19
6
2890
1949
1946
1949
1.48
20
5
2871
1618
1627
1625
1.77
21
6
2879
1945
1947
1943
1.48
22
6
2886
1944
1949
1952
1.48
23
7
2901
2271
2269
2268
1.28
24
5
2872
1621
1628
1624
1.77
25
6
2886
1942
1943
1942
1.49
26
6
2880
1949
1949
1953
1.47
27
7
2891
2273
2263
2266
1.28
28
6
2883
1949
1946
1953
1.48
29
7
2910
2279
2281
2279
1.28
30
7
2899
2272
2276
2277
1.27
31
8
2906
2598
2595
2596
1.12
32
5
2872
1621
1625
1622
1.77
33
6
2901
1953
1942
1949
1.49
34
6
2895
1948
1939
1944
1.49
35
7
2895
2274
2266
2268
1.28
36
6
2881
1937
1944
1948
1.48
37
7
2894
2277
2270
2280
1.27
38
7
2902
2275
2264
2273
1.28
39
8
2910
2602
2594
2603
1.12
40
6
2877
1945
1947
1945
1.48
41
7
2892
2276
2277
2282
1.27
42
7
2887
2271
2272
2273
1.27
43
8
2912
2599
2606
2599
1.12
44
7
2910
2278
2284
2276
1.28
45
8
2920
2597
2601
2600
1.12
46
8
2920
2600
2601
2590
1.13
47
9
2925
2921
2926
2927
1.00
48
6
2885
1935
1955
1956
1.47
49
7
2901
2271
2279
2288
1.27
50
7
2904
2281
2276
2278
1.27
51
8
2919
2608
2594
2607
1.12
52
7
2902
2282
2270
2273
1.28
53
8
2903
2598
2602
2598
1.12
54
8
2918
2602
2602
2604
1.12
55
9
2932
2927
2924
2936
1.00
56
7
2907
2284
2282
2281
1.27
57
8
2920
2606
2604
2610
1.12
58
8
2913
2593
2597
2587
1.13
59
9
2925
2923
2924
2920
1.00
60
8
2930
2614
2606
2613
1.12
61
9
2932
2946
2946
2947
1.00
62
9
2926
2935
2937
2947
0.99
63
10
2958
3258
3192
3266
0.91
64
6
2902
1957
1956
1959
1.48
65
7
2903
2274
2267
2273
1.28
66
7
2909
2277
2276
2286
1.27
67
8
2908
2602
2606
2599
1.12
68
7
2894
2272
2279
2276
1.27
69
8
2923
2597
2606
2606
1.12
70
8
2910
2596
2599
2600
1.12
71
9
2926
2921
2927
2924
1.00
72
7
2909
2283
2273
2273
1.28
73
8
2909
2602
2602
2599
1.12
74
8
2914
2602
2602
2603
1.12
75
9
2924
2925
2927
2933
1.00
76
8
2904
2608
2602
2601
1.12
77
9
2911
2919
2917
2909
1.00
78
9
2927
2921
2917
2935
1.00
79
10
2929
3241
3246
3246
0.90
80
7
2903
2273
2276
2275
1.28
81
8
2916
2596
2592
2589
1.13
82
8
2913
2600
2597
2598
1.12
83
9
2925
2931
2926
2913
1.00
84
8
2917
2598
2606
2597
1.12
85
9
2920
2916
2918
2927
1.00
86
9
2942
2922
2944
2936
1.00
87
10
2961
3254
3259
3268
0.91
88
8
2934
2607
2608
2612
1.12
89
9
2918
2939
2931
2916
1.00
90
9
2927
2928
2920
2924
1.00
91
10
2940
3253
3252
3246
0.91
92
9
2924
2933
2926
2928
1.00
93
10
2940
3259
3237
3251
0.90
94
10
2928
3247
3247
3264
0.90
95
11
2933
3599
3593
3594
0.82
96
7
2883
2282
2268
2269
1.27
97
8
2911
2602
2595
2600
1.12
98
8
2896
2588
2591
2587
1.12
99
9
2924
2939
2936
2938
1.00
As you can see, a power() call takes exactly as long as 9 multiplication instructions. Therefore every manual rewriting of a power with less than 9 multiplications is faster.
Only the cases 2, 3, 4, and 8 are optimized by my driver. The optimization is independent of whether you use the .0 suffix for the exponent.
In the case of exponent = 2, my implementation seems to have lower performance than the driver. I am not sure, why.
The speedup is the manual implementation compared to pow(x,exponent+0.01), which cannot be optimized by the compiler.
Because the multiplications and the speedup align so perfectly, I created a graph to show the relationship. This relationship kind of shows that my benchmark is trustworthy :).
Operating System: Windows 10 Personal
GPU: RX 580 8GB from Gigabyte
Processor: Ryzen 5 2600
Memory: 16 GB DDR4 3200
GPU Driver: 21.6.1 from 17th June 2021
LWJGL: Version 3.2.3 build 13
While this can definitely be hardware/vendor/compiler dependent, advanced mathematical functions like pow() tend to be considerably more expensive than basic operations.
The best approach is of course to try both, and benchmark. But if there is a simple replacement for an advanced mathematical functions, I don't think you can go very wrong by using it.
If you write pow(x, 3.0), the best you can probably hope for is that the compiler will recognize the special case, and expand it. But why take the risk, if the replacement is just as short and easy to read? C/C++ compilers don't always replace pow(x, 2.0) by a simple multiplication, so I wouldn't necessarily count on all GLSL compilers to do that.

Different output for the same input data

Why does this code return 10 on the test machine for the input data:
5
4 2 3 3 3
while on my PC it correctly returns 12?
#include <iostream>
#include <cstdlib>
using namespace std;
int n;
int b[99];
int money = 0;
int min()
{
int min = b[0];
int index = 0;
for (int i = 1; i < n; i++)
if (b[i] < min && b[i] % 2 != 0) {
min = b[i];
index = i;
}
for (int i = index; i < n - 1; i++)
{
b[i] = b[i + 1];
}
b[n - 1] = 0;
return min;
}
int main()
{
money = 0;
cin >> n;
for (int i = 0; i < n; i++)
{
cin >> b[i];
money += b[i];
}
while (money % 2 != 0)
{
money -= min();
}
if (money % 2 == 0)
if (money != 0)
cout << money;
else cout << "NIESTETY";
system("pause");
}
Thanks!
Edit: It happened on my PC too, once. But for other input data it works perfectly. And i'm not talking about some simple values. I've run some recommended tests for this problem, this for example:
581
906 369 899 998 73 717 269 51 875 61 123 237 50 924 576 52 693 394 952 24 534 452 634 139 642 570 20 643 128 165 144 867 86 256 981 26 344 912 489 524 667 375 502 944 961 61 586 834 62 240 644 602 563 758 587 973 440 920 977 785 999 186 384 231 414 184 84 492 277 787 737 312 849 595 238 892 377 103 275 797 847 652 263 526 473 643 972 701 209 70 95 852 337 387 651 451 843 649 445 172 592 299 110 479 191 148 324 600 928 258 354 693 522 637 977 180 394 306 846 885 85 339 678 355 799 297 232 929 133 173 698 362 592 426 319 690 195 944 143 571 831 765 847 106 293 460 650 4 876 891 272 987 171 767 495 486 161 243 959 431 707 761 634 574 774 176 84 903 102 223 7 759 886 113 626 917 873 177 307 46 56 415 162 537 845 956 704 725 551 266 673 255 567 520 278 1000 450 573 205 447 656 946 460 331 317 125 340 372 246 635 667 642 422 766 669 894 217 759 396 160 985 46 61 832 375 272 276 49 820 440 259 482 569 97 903 135 164 570 412 320 827 403 610 577 350 543 328 481 136 595 584 935 830 134 587 658 604 865 742 111 668 313 669 233 592 451 735 539 890 319 882 674 306 376 945 313 872 683 915 92 954 598 750 106 576 259 563 293 158 708 716 259 434 833 867 754 678 576 981 55 8 421 31 176 641 707 175 531 299 65 55 118 990 696 533 713 829 20 310 201 748 697 397 550 909 584 366 6 515 298 128 483 454 564 986 171 225 35 733 664 368 674 146 479 70 610 734 708 550 962 227 606 520 751 449 254 6 253 757 942 63 926 592 768 516 843 187 836 722 329 745 54 703 437 671 463 288 548 847 563 354 964 27 100 120 351 928 557 757 937 872 417 18 335 181 68 834 994 83 178 67 45 965 31 976 155 883 726 173 813 66 199 211 619 429 643 237 464 586 929 546 675 392 350 811 187 936 64 167 394 969 36 630 430 377 905 283 36 352 221 293 125 975 214 903 622 769 955 759 83 104 988 876 612 308 495 612 634 288 581 164 595 685 620 817 988 355 134 71 159 493 730 952 70 620 961 407 78 159 660 542 991 858 101 140 531 156 667 323 621 365 193 825 252 848 897 5 16 277 461 909 549 194 638 422 718 979 705 515 499 488 447 249 229 382 315 851 625 249 818 889 580 224 926 270 734 689 275 842 834 838 774 949 101 728 930 478 397 188 228 675 36 259 834 745 629 374 664 335 30 745 963 918 38 848 482 739 745 912 154 408 250 271 480 767 120 142 652 169 764 541 228 919 482 984 942 631 46 529 395 576
//Output: 289826

estimator for the derivative of the local linear model (SAS)

I estimate a local linear model for the data with one continuous dependent variable and multiple explanatory variables (continuous and dichotomous).
Is it possible to estimate the derivatives of this function for each bundle of explanatory variables and save them for the further usage? How can one do it in SAS?
In principle, I would like to get some kind of analogue to the parameter estimates in a simple parametric regression. But now I would have not a one-point estimator but rather a distribution for each of the variable.
Thanks for any suggestions, comments, clarifications.
UPDATE:
For example, if I use the SAS datasample "ExperimentA" and run the local linear model, how can I get after the estimation the derivative for each indep.variable at each row of the data?
data ExperimentA;
format Temperature f4.0 Catalyst f6.3 Yield f8.3;
input Temperature Catalyst Yield ##;
datalines;
80 0.005 6.039 80 0.010 4.719 80 0.015 6.301
80 0.020 4.558 80 0.025 5.917 80 0.030 4.365
80 0.035 6.540 80 0.040 5.063 80 0.045 4.668
80 0.050 7.641 80 0.055 6.736 80 0.060 7.255
80 0.065 5.515 80 0.070 5.260 80 0.075 4.813
80 0.080 4.465 90 0.005 4.540 90 0.010 3.553
90 0.015 5.611 90 0.020 4.586 90 0.025 6.503
90 0.030 4.671 90 0.035 4.919 90 0.040 6.536
90 0.045 4.799 90 0.050 6.002 90 0.055 6.988
90 0.060 6.206 90 0.065 5.193 90 0.070 5.783
90 0.075 6.482 90 0.080 5.222 100 0.005 5.042
100 0.010 5.551 100 0.015 4.804 100 0.020 5.313
100 0.025 4.957 100 0.030 6.177 100 0.035 5.433
100 0.040 6.139 100 0.045 6.217 100 0.050 6.498
100 0.055 7.037 100 0.060 5.589 100 0.065 5.593
100 0.070 7.438 100 0.075 4.794 100 0.080 3.692
110 0.005 6.005 110 0.010 5.493 110 0.015 5.107
110 0.020 5.511 110 0.025 5.692 110 0.030 5.969
110 0.035 6.244 110 0.040 7.364 110 0.045 6.412
110 0.050 6.928 110 0.055 6.814 110 0.060 8.071
110 0.065 6.038 110 0.070 6.295 110 0.075 4.308
110 0.080 7.020 120 0.005 5.409 120 0.010 7.009
120 0.015 6.160 120 0.020 7.408 120 0.025 7.123
120 0.030 7.009 120 0.035 7.708 120 0.040 5.278
120 0.045 8.111 120 0.050 8.547 120 0.055 8.279
120 0.060 8.736 120 0.065 6.988 120 0.070 6.283
120 0.075 7.367 120 0.080 6.579 130 0.005 7.629
130 0.010 7.171 130 0.015 5.997 130 0.020 6.587
130 0.025 7.335 130 0.030 7.209 130 0.035 8.259
130 0.040 6.530 130 0.045 8.400 130 0.050 7.218
130 0.055 9.167 130 0.060 9.082 130 0.065 7.680
130 0.070 7.139 130 0.075 7.275 130 0.080 7.544
140 0.005 4.860 140 0.010 5.932 140 0.015 3.685
140 0.020 5.581 140 0.025 4.935 140 0.030 5.197
140 0.035 5.559 140 0.040 4.836 140 0.045 5.795
140 0.050 5.524 140 0.055 7.736 140 0.060 5.628
140 0.065 6.644 140 0.070 3.785 140 0.075 4.853
140 0.080 6.006
;
run;
ods graphics on;
ods output ScoreResults=PredLOESS;
proc loess data=ExperimentA;
model Yield = Temperature Catalyst
/ scale=sd select=gcv degree=2;
score;
run;

How to fix this round-off error?

Apologies for the long code. This is as far as I could reduce it.
#include <QtGui/QApplication>
#include <QtGui/QWidget>
#include <QtGui/QImage>
#include <QtGui/QPainter>
#include <vector>
using namespace std;
class View : public QWidget {
typedef pair<double, double> Point;
unsigned char* _buffer;
double centerx, centery, scale;
double xmin, xmax, ymin, ymax;
double xprec, yprec;
double xratio, yratio;
double fwidth, fheight;
double xlen, ylen;
int width;
int height;
public:
View(int w, int h) : width(w), height(h) {
_buffer = new unsigned char[4 * w * h];
fwidth = static_cast<double>(width);
fheight = static_cast<double>(height);
double aspectRatio = fwidth / fheight;
centerx = 0;
centery = 0;
scale = 2.3;
xlen = aspectRatio * scale;
ylen = 1.0 * scale;
xmin = -(xlen * 0.5) + centerx;
xmax = -xmin;
ymin = -(ylen * 0.5) + centery;
ymax = -ymin;
xprec = xlen / fwidth;
yprec = ylen / fheight;
xratio = fwidth / scale / aspectRatio;
yratio = fheight / scale;
}
double roundX(double x) { return std::floor(x / xprec) * xprec; }
double roundY(double y) { return std::floor(y / yprec) * yprec; }
protected:
void paintEvent(QPaintEvent* event) {
QPainter painter(this);
render();
painter.drawImage(
QPoint(0, 0),
QImage(_buffer, width, height, QImage::Format_RGB32));
}
private:
void render() {
memset(_buffer, 0, 4 * width * height);
for (double i = xmin; i < xmax; i += xprec) {
for (double j = ymin; j < ymax; j += yprec) {
Point p(roundX(i), roundY(j));
int x = static_cast<int>((p.first * xratio) - (xmin * xratio) );
int y = static_cast<int>((p.second * yratio) - (ymin * yratio) );
_buffer[4 * (x * width + y) ] = 255;
_buffer[4 * (x * width + y) + 1] = 255;
_buffer[4 * (x * width + y) + 2] = 255;
}
}
}
};
int main(int argc, char* argv[])
{
QApplication app(argc, argv);
View view(512, 512);
view.show();
return app.exec();
}
The code, instead of producing a white window, produces a white window with lines which are the result of round-off error. I think the source of the problem are roundX() and roundY() functions, but I'm not sure. I also don't know how to fix this. Any ideas?
I don't have Qt and I prefer C. So, below is a C program that imitates your render() in render1() and has a corrected implementation in render2().
The main problem of your render() is that it uses prematurely rounded values from Point p(roundX(i), roundY(j));. If you use unrounded i and j, things will improve dramatically, but you're still going to suffer from the rounding errors of your additions, multiplications and so on.
But those rounding errors aren't large and they don't accumulate to a large error in the end. So, the final correction is the addition of .5 prior to conversion to the integers x and y. Without it, the floating point value can be slightly less than the nearest integer that you want to get and when you convert this double into int, you get a value that is one less.
#include <stdio.h>
#include <string.h>
#include <math.h>
double centerx, centery, scale;
double xmin, xmax, ymin, ymax;
double xprec, yprec;
double xratio, yratio;
double fwidth, fheight;
double xlen, ylen;
int width;
int height;
void InitView(int w, int h)
{
double aspectRatio;
width = w; height = h;
// _buffer = new unsigned char[4 * w * h];
fwidth = width;
fheight = height;
aspectRatio = fwidth / fheight;
centerx = 0;
centery = 0;
scale = 2.3;
xlen = aspectRatio * scale;
ylen = 1.0 * scale;
xmin = -(xlen * 0.5) + centerx;
xmax = -xmin;
ymin = -(ylen * 0.5) + centery;
ymax = -ymin;
xprec = xlen / fwidth;
yprec = ylen / fheight;
xratio = fwidth / scale / aspectRatio;
yratio = fheight / scale;
}
double roundX(double x) { return floor(x / xprec) * xprec; }
double roundY(double y) { return floor(y / yprec) * yprec; }
void render1(void)
{
double i, j;
int cnt;
char usedx[1 + 512 + 1];
char usedy[1 + 512 + 1];
printf("render1():\n");
memset(usedx, 0, sizeof usedx);
memset(usedy, 0, sizeof usedy);
printf("x's:\n");
for (cnt = 0, i = xmin; i < xmax; i += xprec)
{
int x = ((roundX(i) * xratio) - (xmin * xratio));
printf("%d ", x);
cnt++;
usedx[1 + x] = 1;
}
printf("\ncount: %d\n", cnt);
for (cnt = 1; cnt <= 512; cnt++)
if (!usedx[cnt])
printf("missing x: %d\n", cnt - 1);
printf("y's:\n");
for (cnt = 0, j = ymin; j < ymax; j += yprec)
{
int y = ((roundY(j) * yratio) - (ymin * yratio));
printf("%d ", y);
cnt++;
usedy[1 + y] = 1;
}
printf("\ncount: %d\n", cnt);
for (cnt = 1; cnt <= 512; cnt++)
if (!usedy[cnt])
printf("missing y: %d\n", cnt - 1);
}
void render2(void)
{
double i, j;
int cnt;
char usedx[1 + 512 + 1];
char usedy[1 + 512 + 1];
printf("render2():\n");
memset(usedx, 0, sizeof usedx);
memset(usedy, 0, sizeof usedy);
printf("x's:\n");
for (cnt = 0, i = xmin; i < xmax; i += xprec)
{
int x = ((i * xratio) - (xmin * xratio) + .5);
printf("%d ", x);
cnt++;
usedx[1 + x] = 1;
}
printf("\ncount: %d\n", cnt);
for (cnt = 1; cnt <= 512; cnt++)
if (!usedx[cnt])
printf("missing x: %d\n", cnt - 1);
printf("y's:\n");
for (cnt = 0, j = ymin; j < ymax; j += yprec)
{
int y = ((j * yratio) - (ymin * yratio) + .5);
printf("%d ", y);
cnt++;
usedy[1 + y] = 1;
}
printf("\ncount: %d\n", cnt);
for (cnt = 1; cnt <= 512; cnt++)
if (!usedy[cnt])
printf("missing y: %d\n", cnt - 1);
}
int main(void)
{
InitView(512, 512);
render1();
render2();
return 0;
}
Output (ideone):
render1():
x's:
0 0 0 2 2 4 5 5 7 7 9 10 10 12 12 14 15 15 17 17 19 20 20 22 22 24 25 25 27 27 29 30 30 32 32 33 35 36 36 37 38 40 41 41 42 43 45 46 46 47 48 50 51 51 52 54 55 56 56 57 59 60 61 61 62 64 65 66 66 67 69 70 71 71 72 74 75 76 76 77 79 80 81 81 82 84 85 86 86 87 89 90 91 91 92 94 95 95 96 97 99 100 100 101 103 104 105 105 106 108 109 110 110 111 113 114 115 115 116 118 119 120 120 121 123 124 125 125 126 128 129 130 130 131 133 134 135 135 136 138 139 140 140 141 143 144 145 146 146 148 148 150 151 152 153 153 155 156 157 158 158 160 162 163 163 165 166 167 168 168 170 171 172 173 174 175 175 177 178 179 180 180 182 183 184 185 185 187 188 189 190 190 192 193 194 195 195 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 371 373 374 375 376 376 378 379 380 381 381 383 384 385 386 386 388 389 390 391 391 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 420 422 423 424 425 425 427 428 429 430 430 432 433 434 435 435 437 438 439 440 440 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 479 481 482 482 484 484 486 487 487 489 489 491 492 492 494 495 496 497 497 499 500 501 502 502 504 505 506 507 507 509 510 511
count: 512
missing x: 1
missing x: 3
missing x: 6
missing x: 8
missing x: 11
missing x: 13
missing x: 16
missing x: 18
missing x: 21
missing x: 23
missing x: 26
missing x: 28
missing x: 31
missing x: 34
missing x: 39
missing x: 44
missing x: 49
missing x: 53
missing x: 58
missing x: 63
missing x: 68
missing x: 73
missing x: 78
missing x: 83
missing x: 88
missing x: 93
missing x: 98
missing x: 102
missing x: 107
missing x: 112
missing x: 117
missing x: 122
missing x: 127
missing x: 132
missing x: 137
missing x: 142
missing x: 147
missing x: 149
missing x: 154
missing x: 159
missing x: 161
missing x: 164
missing x: 169
missing x: 176
missing x: 181
missing x: 186
missing x: 191
missing x: 196
missing x: 372
missing x: 377
missing x: 382
missing x: 387
missing x: 392
missing x: 421
missing x: 426
missing x: 431
missing x: 436
missing x: 441
missing x: 480
missing x: 483
missing x: 485
missing x: 488
missing x: 490
missing x: 493
missing x: 498
missing x: 503
missing x: 508
y's:
0 0 0 2 2 4 5 5 7 7 9 10 10 12 12 14 15 15 17 17 19 20 20 22 22 24 25 25 27 27 29 30 30 32 32 33 35 36 36 37 38 40 41 41 42 43 45 46 46 47 48 50 51 51 52 54 55 56 56 57 59 60 61 61 62 64 65 66 66 67 69 70 71 71 72 74 75 76 76 77 79 80 81 81 82 84 85 86 86 87 89 90 91 91 92 94 95 95 96 97 99 100 100 101 103 104 105 105 106 108 109 110 110 111 113 114 115 115 116 118 119 120 120 121 123 124 125 125 126 128 129 130 130 131 133 134 135 135 136 138 139 140 140 141 143 144 145 146 146 148 148 150 151 152 153 153 155 156 157 158 158 160 162 163 163 165 166 167 168 168 170 171 172 173 174 175 175 177 178 179 180 180 182 183 184 185 185 187 188 189 190 190 192 193 194 195 195 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 371 373 374 375 376 376 378 379 380 381 381 383 384 385 386 386 388 389 390 391 391 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 420 422 423 424 425 425 427 428 429 430 430 432 433 434 435 435 437 438 439 440 440 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 479 481 482 482 484 484 486 487 487 489 489 491 492 492 494 495 496 497 497 499 500 501 502 502 504 505 506 507 507 509 510 511
count: 512
missing y: 1
missing y: 3
missing y: 6
missing y: 8
missing y: 11
missing y: 13
missing y: 16
missing y: 18
missing y: 21
missing y: 23
missing y: 26
missing y: 28
missing y: 31
missing y: 34
missing y: 39
missing y: 44
missing y: 49
missing y: 53
missing y: 58
missing y: 63
missing y: 68
missing y: 73
missing y: 78
missing y: 83
missing y: 88
missing y: 93
missing y: 98
missing y: 102
missing y: 107
missing y: 112
missing y: 117
missing y: 122
missing y: 127
missing y: 132
missing y: 137
missing y: 142
missing y: 147
missing y: 149
missing y: 154
missing y: 159
missing y: 161
missing y: 164
missing y: 169
missing y: 176
missing y: 181
missing y: 186
missing y: 191
missing y: 196
missing y: 372
missing y: 377
missing y: 382
missing y: 387
missing y: 392
missing y: 421
missing y: 426
missing y: 431
missing y: 436
missing y: 441
missing y: 480
missing y: 483
missing y: 485
missing y: 488
missing y: 490
missing y: 493
missing y: 498
missing y: 503
missing y: 508
render2():
x's:
0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511
count: 512
y's:
0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511
count: 512